fork download
  1. import re
  2.  
  3. pattern = r"\s*(.+?)\s*(\w{2,4} \d-\d{0,2}\w(?:\s+/\s+\w{2,4} \d-\d{0,2}\w)*)"
  4.  
  5. s = ("Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n"
  6. " EX 0-02a\n"
  7. "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n"
  8. "Vestibulum eget vestibulum sapien.\n"
  9. "Sed porta, odio id sollicitudin congue\n"
  10. " EX 0-04a\n\n"
  11. "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n"
  12. "Vestibulum eget vestibulum sapien, sed porta, odio id sollicitudin congue\n"
  13. "Lorem ipsum dolor sit amet\n"
  14. "EX 0-01a / EX 0-05a / EX 0-07a /\n"
  15. "EX 0-08a\n\n\n"
  16. "Lorem ipsum dolor sit amet, \n"
  17. "consectetur adipiscing \n"
  18. "elit.\n"
  19. "Vestibulum eget \n"
  20. "vestibulum sapien.\n"
  21. " EX 3-11b")
  22.  
  23. print(re.findall(pattern, s, re.S))
Success #stdin #stdout 0.03s 9512KB
stdin
Standard input is empty
stdout
[('Lorem ipsum dolor sit amet, consectetur adipiscing elit.', 'EX 0-02a'), ('Lorem ipsum dolor sit amet, consectetur adipiscing elit.\nVestibulum eget vestibulum sapien.\nSed porta, odio id sollicitudin congue', 'EX 0-04a'), ('Lorem ipsum dolor sit amet, consectetur adipiscing elit.\nVestibulum eget vestibulum sapien, sed porta, odio id sollicitudin congue\nLorem ipsum dolor sit amet', 'EX 0-01a / EX 0-05a / EX 0-07a /\nEX 0-08a'), ('Lorem ipsum dolor sit amet, \nconsectetur adipiscing \nelit.\nVestibulum eget \nvestibulum sapien.', 'EX 3-11b')]