DOCX.cs 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.IO.Compression;
  5. using System.Xml;
  6. using Newtonsoft.Json;
  7. using System.Linq;
  8. namespace DOCX
  9. {
  10. public class Docx : IDisposable
  11. {
  12. private readonly ZipArchive _zip;
  13. private readonly string _authorsJson;
  14. private readonly XmlNamespaceManager _ns = new XmlNamespaceManager(new NameTable());
  15. private readonly Dictionary<string, string> _namespaces = new Dictionary<string, string>
  16. {
  17. {"w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
  18. };
  19. private void LoadNamespaces()
  20. {
  21. foreach (var item in _namespaces)
  22. {
  23. _ns.AddNamespace(item.Key, item.Value);
  24. }
  25. }
  26. public Docx(string path)
  27. {
  28. _zip = ZipFile.Open(path, ZipArchiveMode.Update);
  29. _authorsJson = Path.ChangeExtension(path, "json");
  30. LoadNamespaces();
  31. }
  32. public Docx(ZipArchive zipArchive)
  33. {
  34. _zip = zipArchive;
  35. LoadNamespaces();
  36. }
  37. private (XmlDocument doc, string message) GetXML(ZipArchiveEntry entry)
  38. {
  39. XmlDocument doc = new XmlDocument()
  40. {
  41. PreserveWhitespace = true // Disables auto-indent
  42. };
  43. try
  44. {
  45. using (StreamReader sr = new StreamReader(entry.Open()))
  46. {
  47. doc.Load(sr);
  48. }
  49. }
  50. catch (Exception e)
  51. {
  52. return (null, $"Error reading {entry.Name}!\n{e}");
  53. }
  54. return (doc, "OK");
  55. }
  56. private (bool status, string message) SaveXML(XmlDocument doc, ZipArchiveEntry entry)
  57. {
  58. try
  59. {
  60. using (var sr = entry.Open())
  61. {
  62. sr.SetLength(doc.OuterXml.Length);
  63. using (StreamWriter sw = new StreamWriter(sr))
  64. {
  65. doc.Save(sw);
  66. }
  67. }
  68. }
  69. catch (Exception e)
  70. {
  71. return (false, $"Error saving {entry.Name}!\n{e}");
  72. }
  73. return (true, "OK");
  74. }
  75. private (bool status, string message) AddTrackRevisions(ZipArchiveEntry settings)
  76. {
  77. var loadResult = GetXML(settings);
  78. if (loadResult.doc == null)
  79. return (false, loadResult.message);
  80. XmlDocument doc = loadResult.doc;
  81. if (doc.SelectSingleNode("//w:trackRevisions", _ns) != null) return (true, "No change needed.");
  82. XmlElement trackRevisions = doc.CreateElement("w", "trackRevisions", _namespaces["w"]);
  83. if (doc.DocumentElement == null)
  84. return (false, "No root element in settings.xml!");
  85. doc.DocumentElement.AppendChild(trackRevisions);
  86. return SaveXML(doc, settings);
  87. }
  88. public (bool status, string message) EnableTrackedChanges()
  89. {
  90. ZipArchiveEntry settings = _zip.GetEntry(@"word/settings.xml");
  91. if (settings == null)
  92. return (false,
  93. "Can't access settings.xml!");
  94. var result = AddTrackRevisions(settings);
  95. return !result.status ? (false, result.message) : (true, "OK");
  96. }
  97. private Dictionary<string, string> _authors = new Dictionary<string, string>();
  98. private string AnonymizeName(string name)
  99. {
  100. if (_authors.TryGetValue(name, out var anonymousName))
  101. return anonymousName;
  102. anonymousName = $"Author{_authors.Count + 1}";
  103. _authors.Add(name, anonymousName);
  104. return anonymousName;
  105. }
  106. private (bool status, string message) AnonymizeAuthors(ZipArchiveEntry comments)
  107. {
  108. var loadResult = GetXML(comments);
  109. if (loadResult.doc == null)
  110. return (false, loadResult.message);
  111. XmlDocument doc = loadResult.doc;
  112. var commentNodes = doc.SelectNodes("//w:comment", _ns);
  113. if (commentNodes == null)
  114. return (false, "There are no comments!");
  115. foreach (XmlNode node in commentNodes)
  116. {
  117. var author = node.Attributes["w:author"];
  118. author.Value = AnonymizeName(author.Value);
  119. }
  120. return SaveXML(doc, comments);
  121. }
  122. private bool SaveAuthors(string path = null)
  123. {
  124. path = !string.IsNullOrEmpty(path) ? path :
  125. !string.IsNullOrEmpty(_authorsJson) ? _authorsJson : null;
  126. if (string.IsNullOrEmpty(path))
  127. return false;
  128. using (StreamWriter sw = new StreamWriter(path))
  129. using (JsonWriter writer = new JsonTextWriter(sw))
  130. {
  131. JsonSerializer serializer = new JsonSerializer
  132. {
  133. NullValueHandling = NullValueHandling.Ignore
  134. };
  135. serializer.Serialize(writer, _authors);
  136. }
  137. return true;
  138. }
  139. public (bool status, string message) AnonymizeComments(string path = null)
  140. {
  141. ZipArchiveEntry comments = _zip.GetEntry(@"word/comments.xml");
  142. if (comments == null)
  143. return (false,
  144. "Can't access comments.xml!");
  145. var result = AnonymizeAuthors(comments);
  146. if (!result.status) return (false, result.message);
  147. return !SaveAuthors(path) ? (false, $"Problem saving authors to {path}!") : (true, "OK");
  148. }
  149. private bool LoadAuthors(string path=null)
  150. {
  151. if (string.IsNullOrEmpty(path))
  152. if (File.Exists(_authorsJson))
  153. path = _authorsJson;
  154. else
  155. return false;
  156. using (StreamReader rd = new StreamReader(path))
  157. {
  158. string json = rd.ReadToEnd();
  159. _authors = JsonConvert.DeserializeObject<Dictionary<string, string>>(json);
  160. }
  161. return _authors.Count > 0;
  162. }
  163. private (bool status, string message) DeanonymizeAuthors(ZipArchiveEntry comments)
  164. {
  165. var loadResult = GetXML(comments);
  166. if (loadResult.doc == null)
  167. return (false, loadResult.message);
  168. XmlDocument doc = loadResult.doc;
  169. var commentNodes = doc.SelectNodes("//w:comment", _ns);
  170. if (commentNodes == null)
  171. return (false, "There are no comments!");
  172. foreach (XmlNode node in commentNodes)
  173. {
  174. var author = node.Attributes["w:author"];
  175. if (_authors.TryGetValue(author.Value, out var name))
  176. author.Value = name;
  177. }
  178. return SaveXML(doc, comments);
  179. }
  180. public (bool status, string message) DeanonymizeComments(string path=null)
  181. {
  182. if (!LoadAuthors(path))
  183. return (false, $"Can't load authors from {path}!");
  184. _authors = _authors.ToDictionary(x => x.Value, x => x.Key);
  185. ZipArchiveEntry comments = _zip.GetEntry(@"word/comments.xml");
  186. if (comments == null)
  187. return (false,
  188. "Can't access comments.xml!");
  189. var result = DeanonymizeAuthors(comments);
  190. return !result.status ? (false, result.message) : (true, "OK");
  191. }
  192. public void Dispose()
  193. {
  194. Dispose(true);
  195. GC.SuppressFinalize(this);
  196. }
  197. ~Docx()
  198. {
  199. Dispose(false);
  200. }
  201. protected virtual void Dispose(bool disposing)
  202. {
  203. if (disposing)
  204. {
  205. _zip.Dispose();
  206. }
  207. }
  208. }
  209. }