fork download
  1. import re
  2.  
  3. # Input
  4. text = "I've been bad but I aspire to be a better person, and behave like my dog and cat :)"
  5. a = {"animal": [ "dog", "cat", "dog and cat"], "XXX": ["I've been", "asp*", ":)"]}
  6.  
  7. class Trie():
  8. """Regex::Trie in Python. Creates a Trie out of a list of words. The trie can be exported to a Regex pattern.
  9. The corresponding Regex should match much faster than a simple Regex union."""
  10. def __init__(self):
  11. self.data = {}
  12.  
  13. def add(self, word):
  14. ref = self.data
  15. for char in word:
  16. ref[char] = char in ref and ref[char] or {}
  17. ref = ref[char]
  18. ref[''] = 1
  19.  
  20. def dump(self):
  21. return self.data
  22.  
  23. def quote(self, char):
  24. if char == '*':
  25. return r'\w*'
  26. else:
  27. return re.escape(char)
  28.  
  29. def _pattern(self, pData):
  30. data = pData
  31. if "" in data and len(data.keys()) == 1:
  32. return None
  33.  
  34. alt = []
  35. cc = []
  36. q = 0
  37. for char in sorted(data.keys()):
  38. if isinstance(data[char], dict):
  39. try:
  40. recurse = self._pattern(data[char])
  41. alt.append(self.quote(char) + recurse)
  42. except:
  43. cc.append(self.quote(char))
  44. else:
  45. q = 1
  46. cconly = not len(alt) > 0
  47.  
  48. if len(cc) > 0:
  49. if len(cc) == 1:
  50. alt.append(cc[0])
  51. else:
  52. alt.append('[' + ''.join(cc) + ']')
  53.  
  54. if len(alt) == 1:
  55. result = alt[0]
  56. else:
  57. result = "(?:" + "|".join(alt) + ")"
  58.  
  59. if q:
  60. if cconly:
  61. result += "?"
  62. else:
  63. result = "(?:%s)?" % result
  64. return result
  65.  
  66. def pattern(self):
  67. return self._pattern(self.dump())
  68.  
  69. # Creating patterns
  70. a2 = {}
  71. for k,v in a.items():
  72. trie = Trie()
  73. for w in v:
  74. trie.add(w)
  75. a2[k] = re.compile(fr"(?<!\w){trie.pattern()}(?!\w)", re.I)
  76.  
  77. for k,r in a2.items():
  78. text = r.sub(k, text)
  79.  
  80. print(text)
  81.  
Success #stdin #stdout 0.03s 9428KB
stdin
Standard input is empty
stdout
XXX bad but I XXX to be a better person, and behave like my animal XXX