|
@@ -0,0 +1,95 @@
|
|
|
+// Remove segments where source is duplicated from TMX file while streaming it
|
|
|
+using System;
|
|
|
+using System.Collections.Generic;
|
|
|
+using System.Linq;
|
|
|
+using System.Xml;
|
|
|
+using System.Xml.Linq;
|
|
|
+
|
|
|
+namespace DuplicatesInTMX
|
|
|
+{
|
|
|
+ class Program
|
|
|
+ {
|
|
|
+ static XElement ReadHeader(XmlReader reader)
|
|
|
+ {
|
|
|
+ if (reader == null)
|
|
|
+ throw new ArgumentNullException("reader");
|
|
|
+
|
|
|
+ reader.MoveToContent();
|
|
|
+
|
|
|
+ while (reader.Read())
|
|
|
+ {
|
|
|
+ if (reader.NodeType == XmlNodeType.Element
|
|
|
+ && reader.Name == "header")
|
|
|
+ {
|
|
|
+ var header = XElement.ReadFrom(reader) as XElement;
|
|
|
+ return header;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ static IEnumerable<XElement> NoDuplicatedSource(XmlReader reader)
|
|
|
+ {
|
|
|
+ if (reader == null)
|
|
|
+ throw new ArgumentNullException("reader");
|
|
|
+
|
|
|
+ var duplicates = new Dictionary<string, bool>();
|
|
|
+ while (reader.Read())
|
|
|
+ {
|
|
|
+ if (reader.NodeType == XmlNodeType.Element
|
|
|
+ && reader.Name == "tu")
|
|
|
+ {
|
|
|
+ var tu = XElement.ReadFrom(reader) as XElement;
|
|
|
+ var tuv = tu.Element("tuv");
|
|
|
+ if (tuv == null)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ var source = tuv.Element("seg");
|
|
|
+ if (source == null)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ var sourceText = source.ToString();
|
|
|
+ if (duplicates.ContainsKey(sourceText))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ duplicates.Add(sourceText, true);
|
|
|
+ yield return tu;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ static void Main(string[] args)
|
|
|
+ {
|
|
|
+ if (!args.Any())
|
|
|
+ {
|
|
|
+ Console.WriteLine("You need to specify a path to TMX file!");
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ XmlReaderSettings settings = new XmlReaderSettings()
|
|
|
+ {
|
|
|
+ DtdProcessing = DtdProcessing.Ignore
|
|
|
+ };
|
|
|
+
|
|
|
+ using (XmlReader reader = XmlReader.Create(args[0], settings))
|
|
|
+ {
|
|
|
+ var version = new XAttribute("version", "1.4");
|
|
|
+ var root = new XStreamingElement("tmx");
|
|
|
+ root.Add(version);
|
|
|
+
|
|
|
+ var header = ReadHeader(reader);
|
|
|
+ if (header == null)
|
|
|
+ throw new NullReferenceException("There's no header in the file!");
|
|
|
+
|
|
|
+ root.Add(header);
|
|
|
+
|
|
|
+ var body = new XStreamingElement("body",
|
|
|
+ from el in NoDuplicatedSource(reader)
|
|
|
+ select el);
|
|
|
+
|
|
|
+ root.Add(body);
|
|
|
+ root.Save("output.tmx");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|