fork download
  1. import re
  2.  
  3. string = """(2021-07-02 01:00:00 AM BST)
  4. ---
  5. syl.hs has joined the conversation
  6.  
  7.  
  8.  
  9. (2021-07-02 01:00:23 AM BST)
  10. ---
  11. e.wang
  12. Good Morning
  13. How're you?
  14.  
  15.  
  16.  
  17.  
  18. (2021-07-02 01:05:11 AM BST)
  19. ---
  20. wk.wang
  21. Hi, I'm Good.
  22.  
  23.  
  24.  
  25. (2021-07-02 01:08:01 AM BST)
  26. ---
  27. perter.derrek
  28. we got the update on work.
  29. It will get complete by next week.
  30.  
  31. (2021-07-15 08:59:41 PM BST)
  32. ---
  33. ad.ft has left the conversation
  34.  
  35.  
  36.  
  37.  
  38. ---
  39.  
  40. * * *"""
  41.  
  42. regex = r'''(?x)(?m)(?s) # re.X, re.M, re.S (DOTALL)
  43. (?: # start of non capturing group
  44. ^\(\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2}\ [AP]M\ BST\)\s*\r?\n # date and time
  45. (?!---\s*\r?\nad\.ft has) # next lines are not the ---\n\ad.ft etc.
  46. ---\s*\r?\n # --- line
  47. [\w.]+\s*\r?\n # name line
  48. \s* # skip leading whitespace
  49. ) # end of non-capture group
  50. # The folowing is capture group 1. Match characters until you get to the next date-time:
  51. ((?:(?!\s*\r?\n\(\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2}\ [AP]M\ BST\)).)*)# skip trailing whitespace
  52. '''
  53.  
  54. matches = re.findall(regex, string)
  55. print(matches)
Success #stdin #stdout 0.03s 9776KB
stdin
Standard input is empty
stdout
["Good Morning\nHow're you?", "Hi, I'm Good.", 'we got the update on work.\nIt will get complete by next week.']