app.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. import re
  2. import random
  3. TOKENIZE_RE = re.compile(r'(\w+)', re.U)
  4. def encode_text(text):
  5. output = TOKENIZE_RE.split(text)
  6. original_words = []
  7. encoded_words = []
  8. for word in output:
  9. encoded_word, encoded = encoder(word)
  10. if encoded:
  11. original_words.append(word)
  12. encoded_words.append(encoded_word)
  13. separator = '\n---weird---\n'
  14. original = sorted(original_words, key=lambda s: s.lower())
  15. original_words = ' '.join(original)
  16. encoded_text = ''.join(encoded_words)
  17. return ''.join([separator, encoded_text, separator, original_words])
  18. def encoder(word):
  19. if len(word) > 3:
  20. first_letter = word[0]
  21. last_letter = word[-1]
  22. word_center = word[1:-1]
  23. encoded_center, encoded = shuffle(word_center)
  24. encoded_word = ''.join([first_letter, encoded_center, last_letter])
  25. else:
  26. encoded_word = word
  27. encoded = False
  28. return encoded_word, encoded
  29. def shuffle(word):
  30. encoded = word
  31. word_list = list(word)
  32. encoded_list = word_list[:]
  33. encoded = False
  34. if len(set(encoded_list)) >= 2:
  35. while (encoded_list == word_list):
  36. random.shuffle(word_list)
  37. encoded = True
  38. encoded_word = ''.join(word_list)
  39. return encoded_word, encoded
  40. def decoder():
  41. pass
  42. if __name__ == '__main__':
  43. text = '''This is a long looong test sentence,
  44. with some big (biiiiig) words!'''
  45. print encode_text(text)