import re
strings = [
"there should be 9 valid words, including: a well-behave, right?",
"blabla! bla121 {{blabla123bla.. bla-blablabla!! b;a-bla@!. blabla bla-bla-bla-bla **bla-bla",
"{{)foo! ~~foo121 foo--foo?. foo-foo?!{. @foo-foo! f 23 foo2 f-ff-fff-ffff!.,? **foo-f"
]
pattern = r"(?:[ *]|^)([a-zA-Z]+(?:-[a-zA-Z]+)*)(?= |$|[.,!?:]+(?!\S))"
for s in strings:
print(re.findall(pattern, s, re.M))
aW1wb3J0IHJlCgpzdHJpbmdzID0gWwogICAgICJ0aGVyZSBzaG91bGQgYmUgOSB2YWxpZCAgd29yZHMsIGluY2x1ZGluZzogYSB3ZWxsLWJlaGF2ZSwgcmlnaHQ/IiwKICAgICAiYmxhYmxhISBibGExMjEge3tibGFibGExMjNibGEuLiBibGEtYmxhYmxhYmxhISEgYjthLWJsYUAhLiBibGFibGEgYmxhLWJsYS1ibGEtYmxhICoqYmxhLWJsYSIsCiAgICAgInt7KWZvbyEgfn5mb28xMjEgZm9vLS1mb28/LiBmb28tZm9vPyF7LiBAZm9vLWZvbyEgZiAyMyBmb28yIGYtZmYtZmZmLWZmZmYhLiw/ICAqKmZvby1mIgpdCgpwYXR0ZXJuID0gciIoPzpbICpdfF4pKFthLXpBLVpdKyg/Oi1bYS16QS1aXSspKikoPz0gfCR8Wy4sIT86XSsoPyFcUykpIgpmb3IgcyBpbiBzdHJpbmdzOgogICAgIHByaW50KHJlLmZpbmRhbGwocGF0dGVybiwgcywgcmUuTSkp
['there', 'should', 'be', 'valid', 'words', 'including', 'a', 'well-behave', 'right']
['blabla', 'bla-blablabla', 'blabla', 'bla-bla-bla-bla', 'bla-bla']
['f', 'f-ff-fff-ffff', 'foo-f']