fork download
  1. import re
  2.  
  3. strings = [
  4. "there should be 9 valid words, including: a well-behave, right?",
  5. "blabla! bla121 {{blabla123bla.. bla-blablabla!! b;a-bla@!. blabla bla-bla-bla-bla **bla-bla",
  6. "{{)foo! ~~foo121 foo--foo?. foo-foo?!{. @foo-foo! f 23 foo2 f-ff-fff-ffff!.,? **foo-f"
  7. ]
  8.  
  9. pattern = r"(?:[ *]|^)([a-zA-Z]+(?:-[a-zA-Z]+)*)(?= |$|[.,!?:]+(?!\S))"
  10. for s in strings:
  11. print(re.findall(pattern, s, re.M))
Success #stdin #stdout 0.03s 9368KB
stdin
Standard input is empty
stdout
['there', 'should', 'be', 'valid', 'words', 'including', 'a', 'well-behave', 'right']
['blabla', 'bla-blablabla', 'blabla', 'bla-bla-bla-bla', 'bla-bla']
['f', 'f-ff-fff-ffff', 'foo-f']