// Remove segments where source is duplicated from TMX file while streaming it using System; using System.Collections.Generic; using System.Linq; using System.Xml; using System.Xml.Linq; namespace DuplicatesInTMX { class Program { static XElement ReadHeader(XmlReader reader) { if (reader == null) throw new ArgumentNullException("reader"); reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "header") { var header = XElement.ReadFrom(reader) as XElement; return header; } } return null; } static IEnumerable NoDuplicatedSource(XmlReader reader) { if (reader == null) throw new ArgumentNullException("reader"); var duplicates = new Dictionary(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "tu") { var tu = XElement.ReadFrom(reader) as XElement; var tuv = tu.Element("tuv"); if (tuv == null) continue; var source = tuv.Element("seg"); if (source == null) continue; var sourceText = source.ToString(); if (duplicates.ContainsKey(sourceText)) continue; duplicates.Add(sourceText, true); yield return tu; } } } static void Main(string[] args) { if (!args.Any()) { Console.WriteLine("You need to specify a path to TMX file!"); return; } XmlReaderSettings settings = new XmlReaderSettings() { DtdProcessing = DtdProcessing.Ignore }; using (XmlReader reader = XmlReader.Create(args[0], settings)) { var version = new XAttribute("version", "1.4"); var root = new XStreamingElement("tmx"); root.Add(version); var header = ReadHeader(reader); if (header == null) throw new NullReferenceException("There's no header in the file!"); root.Add(header); var body = new XStreamingElement("body", from el in NoDuplicatedSource(reader) select el); root.Add(body); root.Save("output.tmx"); } } } }