Browse Source

Using binn and supporting de-anonymization

Piotr Czajkowski 5 years ago
parent
commit
fdcc9b8bbb
14 changed files with 153 additions and 138 deletions
  1. 1 0
      .gitignore
  2. 9 1
      README.md
  3. 21 4
      anonymize.c
  4. BIN
      bin/anonymize.exe
  5. BIN
      bin/mingwLibs.zip
  6. 75 12
      comments.c
  7. 7 1
      comments.h
  8. 0 46
      dict.c
  9. 0 15
      dict.h
  10. 0 33
      keyval.c
  11. 0 10
      keyval.h
  12. 7 11
      makefile
  13. 28 4
      zip.c
  14. 5 1
      zip.h

+ 1 - 0
.gitignore

@@ -1,2 +1,3 @@
 *.o
 *.dSYM
+.DS_Store

+ 9 - 1
README.md

@@ -16,6 +16,14 @@ Running it on provided *test.docx* should produce:
 	"Kowalski, Jan" is now "Author2"
 	"Piotr Fronczewski" is now "Author3"
 
-You'll need libarchive and libxml2 to compile it. It was created as learning project while I was exploring C, so use it freely, but at your own risk. Output was tested with Word 2013 and Libre Office Writer.
+File called *test.docx.bin*, or *test2.docx.bin*, will be created containing details of the transformation.
+
+You can also de-anonymize comments. Proper bin file, named "<your_orginial_docx>.bin", must be present.
+
+*./anonymize test.docx -d* - test.docx will be replaced with de-anonymized version.
+
+*./anonymize test.docx -d test2.docx* - de-anonymized version will be saved as test2.docx leaving original test.docx intact.
+
+You'll need libarchive, libxml2 and [lbinn](https://github.com/liteserver/binn) to compile it. It was created as learning project while I was exploring C, so use it freely, but at your own risk. Output was tested with Word 2013 and Libre Office Writer.
 
 To relieve your pain I've added compiled Windows 64 bit binary in the bin folder. It includes all necessary MingW dlls. Enjoy!

+ 21 - 4
anonymize.c

@@ -1,14 +1,31 @@
 #include <stdio.h>
 #include "zip.h"
 
+extern int action;
+
 int main(int argc, char **argv) {
 	if (argc < 2) {
 		printf("Usage: %s <path_to_DOCX>\n", argv[0]);
 		printf("Optionaly provide output file as second argument.\n");
+		printf("-d as second argument will deanonymize given file. You can optionaly provide output file as third argument.\n");
+		return 0;
 	}
 
-	if (argc > 2)
-		process(argv[1], argv[2]);
-	else if (argc > 1)
-		process(argv[1], NULL);
+	if (argc > 2) {
+		if (strcmp(argv[2], "-d") == 0) {
+			action = 1;
+
+			if (argc > 3) {
+				deanonymize(argv[1], argv[3]);
+				return 0;
+			}
+
+			deanonymize(argv[1], NULL);
+			return 0;
+		}
+		
+		anonymize(argv[1], argv[2]);
+	}
+	else
+		anonymize(argv[1], NULL);
 }

BIN
bin/anonymize.exe


BIN
bin/mingwLibs.zip


+ 75 - 12
comments.c

@@ -3,26 +3,36 @@
 #include "comments.h"
 #include "stopif.h"
 
-char* anonymizeAuthor(dictionary *authors, const xmlChar *authorName) {
+void printAuthors(const char *authorName, const char *anonName) {
+	printf("\"%s\" is now \"%s\"\n", authorName, anonName);
+}
+
+char* anonymizeAuthor(binn *anonAuthors, const xmlChar *authorName) {
+	static int authorsCount = 0;
+
 	char *name = (char*)authorName;
-	char *newName = (char*)dictionary_find(authors, name);
+	char *newName = binn_object_str(anonAuthors, name);
 
 	if (newName)
 		return newName;
 
-	asprintf(&newName, "Author%d", authors->length+1);
-	dictionary_add(authors, name, newName);
+	asprintf(&newName, "Author%d", ++authorsCount);
+	binn_object_set_str(anonAuthors, name, newName);
+	binn_object_set_str(anonAuthors, newName, name);
+	printAuthors(name, newName);
 	free(newName);
-	return (char*)dictionary_find(authors, name);
+
+	return binn_object_str(anonAuthors, name);
 }
 
-void printAuthors(const dictionary *authors) {
-	for (int i=0; i<authors->length; i++)
-		printf("\"%s\" is now \"%s\"\n", authors->pairs[i]->key, (char*)authors->pairs[i]->value);
+void saveAuthors(binn *anonAuthors) {
+	FILE *fp = fopen(binnFile, "w");
+	fwrite(binn_ptr(anonAuthors), binn_size(anonAuthors), 1, fp);
+	fclose(fp);
 }
 
-int processAuthors(const xmlXPathObjectPtr authors) {
-	dictionary *anonAuthors = dictionary_new();
+int anonymizeAuthors(const xmlXPathObjectPtr authors) {
+	binn *anonAuthors = binn_object();
 
 	for (int i=0; i < authors->nodesetval->nodeNr; i++){
 		xmlChar *authorName = (xmlChar*)"";		
@@ -31,12 +41,65 @@ int processAuthors(const xmlXPathObjectPtr authors) {
 		xmlNodeSetContent(authors->nodesetval->nodeTab[i], (xmlChar*)anonAuthor);
 		xmlFree(authorName);
 	}
+	
+	saveAuthors(anonAuthors);
+	binn_free(anonAuthors);
+	return 1;
+}
+
+char *data;
+
+binn *readAuthors() {
+	FILE *fp = fopen(binnFile, "rb");
+	if (fp == NULL) {
+		printf("Can't read bin file (%s)!\n", binnFile);
+		return NULL;
+	}
 
-	printAuthors(anonAuthors);
-	dictionary_free(anonAuthors);
+        fseek(fp, 0, SEEK_END);
+        long fsize = ftell(fp);
+        fseek(fp, 0, SEEK_SET);
+
+        data = malloc(fsize + 1);
+        fread(data, fsize, 1, fp);
+        fclose(fp);
+
+        data[fsize] = 0;
+
+        binn *obj = binn_open(data);
+	
+	return obj;
+}
+
+int deanonymizeAuthors(const xmlXPathObjectPtr authors) {
+	binn *anonAuthors = readAuthors();
+	if (anonAuthors == NULL) return 0;
+	
+	for (int i=0; i < authors->nodesetval->nodeNr; i++){
+		xmlChar *anonName = (xmlChar*)"";		
+		anonName = xmlNodeGetContent(authors->nodesetval->nodeTab[i]);
+		
+		char *author = binn_object_str(anonAuthors, (char*)anonName);
+		if (author != NULL) {
+			xmlNodeSetContent(authors->nodesetval->nodeTab[i], (xmlChar*)author);
+			printAuthors((char*)anonName, author);
+		}
+		
+		xmlFree(anonName);
+	}
+	
+	free(data);
+	binn_free(anonAuthors);
 	return 1;
 }
 
+int processAuthors(const xmlXPathObjectPtr authors) {
+	if (action == DEANONYMIZE)
+		return deanonymizeAuthors(authors);
+	
+	return anonymizeAuthors(authors);
+}
+
 int anonymizeComments(XMLBuff *infile) {
 	const xmlChar *authorPath = (xmlChar*)"//w:comment/@w:author";
 

+ 7 - 1
comments.h

@@ -1,6 +1,12 @@
 #include <libxml2/libxml/xpath.h>
 #include <libxml2/libxml/xpathInternals.h>
-#include "dict.h"
+#include <binn.h>
 #include "xmlbuff.h"
 
+#define ANONYMIZE 0
+#define DEANONYMIZE 1
+
+extern char binnFile[256];
+extern int action;
+
 int anonymizeComments(XMLBuff *infile);

+ 0 - 46
dict.c

@@ -1,46 +0,0 @@
-// Borrowed from https://github.com/b-k/21st-Century-Examples
-#include <stdio.h>
-#include <stdlib.h>
-#include "dict.h"
-
-void *dictionary_not_found;
-
-dictionary *dictionary_new (void){
-	static int dnf;
-	if (!dictionary_not_found) dictionary_not_found = &dnf;
-	dictionary *out= malloc(sizeof(dictionary));
-	*out= (dictionary){ .pairs=NULL };                          
-	return out;
-} 
-
-static void dictionary_add_keyval(dictionary *in, keyval *kv){
-	in->length++;
-	in->pairs = realloc(in->pairs, sizeof(keyval*)*in->length);
-	in->pairs[in->length-1] = kv;
-}
-
-void dictionary_add(dictionary *in, char *key, void *value){
-	if (!key){fprintf(stderr, "NULL is not a valid key.\n"); abort();}
-	dictionary_add_keyval(in, keyval_new(key, value));
-}
-
-void *dictionary_find(dictionary const *in, char const *key){
-	for (int i=0; i< in->length; i++)
-		if (keyval_matches(in->pairs[i], key))    
-			return in->pairs[i]->value;
-	return NULL;
-}
-
-dictionary *dictionary_copy(dictionary *in){
-	dictionary *out = dictionary_new();
-	for (int i=0; i< in->length; i++)
-		dictionary_add_keyval(out, keyval_copy(in->pairs[i]));
-	return out;
-}
-
-void dictionary_free(dictionary *in){
-	for (int i=0; i< in->length; i++)
-		keyval_free(in->pairs[i]);
-	free(in->pairs);
-	free(in);
-}

+ 0 - 15
dict.h

@@ -1,15 +0,0 @@
-// Borrowed from https://github.com/b-k/21st-Century-Examples
-#include "keyval.h"
-
-extern void *dictionary_not_found;
-
-typedef struct dictionary{
-	keyval **pairs;
-	int length;
-} dictionary;
-
-dictionary *dictionary_new (void);
-dictionary *dictionary_copy(dictionary *in);
-void dictionary_free(dictionary *in);
-void dictionary_add(dictionary *in, char *key, void *value);
-void *dictionary_find(dictionary const *in, char const *key);

+ 0 - 33
keyval.c

@@ -1,33 +0,0 @@
-// Borrowed from https://github.com/b-k/21st-Century-Examples
-#include <stdlib.h> //malloc
-#include <string.h>
-#include "keyval.h"
-
-keyval *keyval_new(char *key, char *value){
-	keyval *out = malloc(sizeof(keyval));
-	out->key = malloc(strlen(key)+1);
-	out->value = malloc(strlen(value)+1);
-
-	strcpy(out->key, key);
-	strcpy(out->value, value);
-
-	return out;
-}
-
-/** Copy a key/value pair. The new pair has pointers to
-  the values in the old pair, not copies of their data.  */
-keyval *keyval_copy(keyval const *in){
-	keyval *out = malloc(sizeof(keyval));
-	*out = *in;
-	return out;
-}
-
-void keyval_free(keyval *in){
-	free(in->key);
-	free(in->value);
-	free(in);
-}
-
-int keyval_matches(keyval const *in, char const *key){
-	return !strcmp(in->key, key);
-}

+ 0 - 10
keyval.h

@@ -1,10 +0,0 @@
-// Borrowed from https://github.com/b-k/21st-Century-Examples
-typedef struct keyval{
-	char *key;
-	char *value;
-} keyval;
-
-keyval *keyval_new(char *key, char *value);
-keyval *keyval_copy(keyval const *in);
-void keyval_free(keyval *in);
-int keyval_matches(keyval const *in, char const *key);

+ 7 - 11
makefile

@@ -1,25 +1,21 @@
-CFLAGS=`pkg-config --cflags --libs libxml-2.0` -g -Wall -Wextra -O3 -std=c99
-LDLIBS=`pkg-config --libs libxml-2.0` -larchive
-objects=keyval.o dict.o comments.o zip.o xmlbuff.o
-mingwCFLAGS=`x86_64-w64-mingw32-pkg-config --cflags --libs libxml-2.0` -g -Wall -Wextra -O3 -std=c99
-mingwLDLIBS=`x86_64-w64-mingw32-pkg-config --libs libxml-2.0` -larchive
+CFLAGS=`pkg-config --cflags --libs libxml-2.0` -g -Wall -Wextra -O3 -std=gnu99
+LDLIBS=`pkg-config --libs libxml-2.0` -larchive -lbinn
+objects=comments.o zip.o xmlbuff.o
+mingwCFLAGS=`x86_64-w64-mingw32-pkg-config --cflags --libs libxml-2.0` -g -Wall -Wextra -O3 -std=gnu99
+mingwLDLIBS=`x86_64-w64-mingw32-pkg-config --libs libxml-2.0` -larchive -lbinn-1.0
 mingw=x86_64-w64-mingw32-gcc
-MACCFLAGS=`pkg-config --cflags --libs libxml-2.0 libarchive` -g -Wall -Wextra -O3 -std=c99
-MACLDLIBS=`pkg-config --libs libxml-2.0 libarchive`
+MACCFLAGS=`pkg-config --cflags --libs libxml-2.0 libarchive` -g -Wall -Wextra -O3 -std=gnu99
+MACLDLIBS=`pkg-config --libs libxml-2.0 libarchive` -lbinn
 
 anonymize: $(objects)
 
 bin/anonymize.exe:
-	$(mingw) $(mingwCFLAGS) -c keyval.c $(mingwLDLIBS)
-	$(mingw) $(mingwCFLAGS) -c dict.c $(mingwLDLIBS)
 	$(mingw) $(mingwCFLAGS) -c comments.c $(mingwLDLIBS)
 	$(mingw) $(mingwCFLAGS) -c zip.c $(mingwLDLIBS)
 	$(mingw) $(mingwCFLAGS) -c xmlbuff.c $(mingwLDLIBS)
 	$(mingw) $(mingwCFLAGS) anonymize.c $(objects) $(mingwLDLIBS) -o $@
 
 mac/anonymize:
-	gcc $(MACCFLAGS) -c keyval.c $(MACLDLIBS)
-	gcc $(MACCFLAGS) -c dict.c $(MACLDLIBS)
 	gcc $(MACCFLAGS) -c comments.c $(MACLDLIBS)
 	gcc $(MACCFLAGS) -c zip.c $(MACLDLIBS)
 	gcc $(MACCFLAGS) -c xmlbuff.c $(MACLDLIBS)

+ 28 - 4
zip.c

@@ -2,7 +2,7 @@
 #include "stopif.h"
 
 int processComments(struct archive *archiveOut, XMLBuff *comments) {
-	Stopif(!anonymizeComments(comments), return 0, "Can't anonymize comments!\n");
+	if (!anonymizeComments(comments)) return 0;
 
 	struct archive_entry *newEntry = archive_entry_new();
 	archive_entry_set_pathname(newEntry, comments->name);
@@ -29,7 +29,8 @@ int rewriteZIP(struct archive *archiveIn, struct archive *archiveOut) {
 		if (strcmp(commentsFile, path) == 0){
 			XMLBuff *comments = XMLBuffNew();
 			*comments = (XMLBuff){.data=buf, .size=size, .name=path};
-			Stopif(!processComments(archiveOut, comments), return 0, "Can't process comments!\n");
+			
+			if (!processComments(archiveOut, comments)) return 0;
 			XMLBuffFree(comments);
 		} else {
 			Stopif(archive_write_header(archiveOut, entryIn) != ARCHIVE_OK, return 0, "Can't write entry header!\n");
@@ -52,16 +53,39 @@ int processDOCX(const char *infile, const char *outfile) {
 
 	Stopif(archive_write_open_filename(archiveOut, outfile) != ARCHIVE_OK, return 0, "Can't create new archive %s!\n", outfile);
 
-	Stopif(!rewriteZIP(archiveIn, archiveOut), return 0, "Problems rewriting zip!\n");
+	if (!rewriteZIP(archiveIn, archiveOut)) return 0;
+	
 	Stopif(archive_read_free(archiveIn) != ARCHIVE_OK, return 0, "Can't free %s!\n", infile);
 	Stopif(archive_write_free(archiveOut) != ARCHIVE_OK, return 0, "Can't free %s!\n", outfile);
 	return 1;
 }
 
-int process(const char *infile, char *outfile) {
+int anonymize(const char *infile, char *outfile) {
+	if (!outfile || strcmp(infile, outfile) == 0){
+		strcat(binnFile, infile);
+		strcat(binnFile, ".bin");
+
+		const char *outfile = "tmpFile.docx";
+		processDOCX(infile, outfile);
+		remove(infile);
+		rename(outfile, infile);
+	} else {
+		strcat(binnFile, outfile);
+		strcat(binnFile, ".bin");
+
+		processDOCX(infile, outfile);
+	}
+	return 1;
+}
+
+int deanonymize(const char *infile, char *outfile) {
+	strcat(binnFile, infile);
+	strcat(binnFile, ".bin");
+
 	if (!outfile || strcmp(infile, outfile) == 0){
 		const char *outfile = "tmpFile.docx";
 		processDOCX(infile, outfile);
+		remove(infile);
 		rename(outfile, infile);
 	} else {
 		processDOCX(infile, outfile);

+ 5 - 1
zip.h

@@ -4,4 +4,8 @@
 #include <string.h>
 #include "comments.h"
 
-int process(char const *infile, char *outfile);
+char binnFile[256];
+int action;
+
+int anonymize(char const *infile, char *outfile);
+int deanonymize(char const *infile, char *outfile);