NoDuplicatedSource.cs 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. // Remove segments where source is duplicated from TMX file while streaming it
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Xml;
  6. using System.Xml.Linq;
  7. namespace DuplicatesInTMX
  8. {
  9. class Program
  10. {
  11. static XElement ReadHeader(XmlReader reader)
  12. {
  13. if (reader == null)
  14. throw new ArgumentNullException("reader");
  15. reader.MoveToContent();
  16. while (reader.Read())
  17. {
  18. if (reader.NodeType == XmlNodeType.Element
  19. && reader.Name == "header")
  20. {
  21. var header = XElement.ReadFrom(reader) as XElement;
  22. return header;
  23. }
  24. }
  25. return null;
  26. }
  27. static IEnumerable<XElement> NoDuplicatedSource(XmlReader reader)
  28. {
  29. if (reader == null)
  30. throw new ArgumentNullException("reader");
  31. var duplicates = new Dictionary<string, bool>();
  32. while (reader.Read())
  33. {
  34. if (reader.NodeType == XmlNodeType.Element
  35. && reader.Name == "tu")
  36. {
  37. var tu = XElement.ReadFrom(reader) as XElement;
  38. var tuv = tu.Element("tuv");
  39. if (tuv == null)
  40. continue;
  41. var source = tuv.Element("seg");
  42. if (source == null)
  43. continue;
  44. var sourceText = source.ToString();
  45. if (duplicates.ContainsKey(sourceText))
  46. continue;
  47. duplicates.Add(sourceText, true);
  48. yield return tu;
  49. }
  50. }
  51. }
  52. static void Main(string[] args)
  53. {
  54. if (!args.Any())
  55. {
  56. Console.WriteLine("You need to specify a path to TMX file!");
  57. return;
  58. }
  59. XmlReaderSettings settings = new XmlReaderSettings()
  60. {
  61. DtdProcessing = DtdProcessing.Ignore
  62. };
  63. using (XmlReader reader = XmlReader.Create(args[0], settings))
  64. {
  65. var version = new XAttribute("version", "1.4");
  66. var root = new XStreamingElement("tmx");
  67. root.Add(version);
  68. var header = ReadHeader(reader);
  69. if (header == null)
  70. throw new NullReferenceException("There's no header in the file!");
  71. root.Add(header);
  72. var body = new XStreamingElement("body",
  73. from el in NoDuplicatedSource(reader)
  74. select el);
  75. root.Add(body);
  76. root.Save("output.tmx");
  77. }
  78. }
  79. }
  80. }