import re
abbreviations = ["Dr.", "Mrs.", "Mr.", "Ph.d"]
rx = re.compile(r'''(?:{})|((?<=[a-z])\.(?=\s*[A-Z]))'''.format("|".join(abbreviations)))
data = "Man is weak.So they die. I have a Ph.d"
# substitute them first
def repl(match):
if match.group(1) is not None:
return "#!#"
return match.group(0)
data = rx.sub(repl, data)
for sent in re.split(r"#!#\s*", data):
print(sent.replace(".", ""))
aW1wb3J0IHJlCgphYmJyZXZpYXRpb25zID0gWyJEci4iLCAiTXJzLiIsICJNci4iLCAiUGguZCJdCnJ4ID0gcmUuY29tcGlsZShyJycnKD86e30pfCgoPzw9W2Etel0pXC4oPz1ccypbQS1aXSkpJycnLmZvcm1hdCgifCIuam9pbihhYmJyZXZpYXRpb25zKSkpCgpkYXRhID0gIk1hbiBpcyB3ZWFrLlNvIHRoZXkgZGllLiBJIGhhdmUgYSBQaC5kIgoKIyBzdWJzdGl0dXRlIHRoZW0gZmlyc3QKZGVmIHJlcGwobWF0Y2gpOgoJaWYgbWF0Y2guZ3JvdXAoMSkgaXMgbm90IE5vbmU6CgkJcmV0dXJuICIjISMiCglyZXR1cm4gbWF0Y2guZ3JvdXAoMCkKCmRhdGEgPSByeC5zdWIocmVwbCwgZGF0YSkKZm9yIHNlbnQgaW4gcmUuc3BsaXQociIjISNccyoiLCBkYXRhKToKCXByaW50KHNlbnQucmVwbGFjZSgiLiIsICIiKSkK