import re
sentence = "Machine learning and data mining often employ the same methods and overlap significantly."
dic = {'MLDM': ['machine learning and data mining'], 'ML': ['machine learning'],
'DM': ['data mining']}
def get_key(val):
for k,v in dic.items():
if m.group().lower() in map(str.lower, v):
return k
return ''
# Flatten the lists in values and sort the list by length in descending order
l=sorted([v for x in dic.values() for v in x], key=len, reverse=True)
# Build the alternation based regex with \b to match each item as a whole word
rx=r'\b(?:{})\b'.format("|".join(l))
for m in re.finditer(rx, sentence, re.I): # Search case insensitively
key = get_key(m.group())
if key:
print("{} {}".format(key, m.start()))
aW1wb3J0IHJlCgpzZW50ZW5jZSA9ICJNYWNoaW5lIGxlYXJuaW5nIGFuZCBkYXRhIG1pbmluZyBvZnRlbiBlbXBsb3kgdGhlIHNhbWUgbWV0aG9kcyBhbmQgb3ZlcmxhcCBzaWduaWZpY2FudGx5LiIKCmRpYyA9IHsnTUxETSc6IFsnbWFjaGluZSBsZWFybmluZyBhbmQgZGF0YSBtaW5pbmcnXSwgJ01MJzogWydtYWNoaW5lIGxlYXJuaW5nJ10sCiAnRE0nOiBbJ2RhdGEgbWluaW5nJ119CgpkZWYgZ2V0X2tleSh2YWwpOgoJZm9yIGssdiBpbiBkaWMuaXRlbXMoKToKCQlpZiBtLmdyb3VwKCkubG93ZXIoKSBpbiBtYXAoc3RyLmxvd2VyLCB2KToKCQkJcmV0dXJuIGsKCXJldHVybiAnJwoKIyBGbGF0dGVuIHRoZSBsaXN0cyBpbiB2YWx1ZXMgYW5kIHNvcnQgdGhlIGxpc3QgYnkgbGVuZ3RoIGluIGRlc2NlbmRpbmcgb3JkZXIKbD1zb3J0ZWQoW3YgZm9yIHggaW4gZGljLnZhbHVlcygpIGZvciB2IGluIHhdLCBrZXk9bGVuLCByZXZlcnNlPVRydWUpCiMgQnVpbGQgdGhlIGFsdGVybmF0aW9uIGJhc2VkIHJlZ2V4IHdpdGggXGIgdG8gbWF0Y2ggZWFjaCBpdGVtIGFzIGEgd2hvbGUgd29yZCAKcng9cidcYig/Ont9KVxiJy5mb3JtYXQoInwiLmpvaW4obCkpCmZvciBtIGluIHJlLmZpbmRpdGVyKHJ4LCBzZW50ZW5jZSwgcmUuSSk6ICMgU2VhcmNoIGNhc2UgaW5zZW5zaXRpdmVseQogICAga2V5ID0gZ2V0X2tleShtLmdyb3VwKCkpCiAgICBpZiBrZXk6CgkgICAgcHJpbnQoInt9IHt9Ii5mb3JtYXQoa2V5LCBtLnN0YXJ0KCkpKQo=