import re
pattern = r"\s *(.+?)\s *(\w {2,4} \d -\d {0,2}\w (?:\s +/\s +\w {2,4} \d -\d {0,2}\w )*)"
s = ( "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n "
" EX 0-02a\n "
"Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n "
"Vestibulum eget vestibulum sapien.\n "
"Sed porta, odio id sollicitudin congue\n "
" EX 0-04a\n \n "
"Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n "
"Vestibulum eget vestibulum sapien, sed porta, odio id sollicitudin congue\n "
"Lorem ipsum dolor sit amet\n "
"EX 0-01a / EX 0-05a / EX 0-07a /\n "
"EX 0-08a\n \n \n "
"Lorem ipsum dolor sit amet, \n "
"consectetur adipiscing \n "
"elit.\n "
"Vestibulum eget \n "
"vestibulum sapien.\n "
" EX 3-11b" )
print ( re .findall ( pattern, s, re .S ) )
aW1wb3J0IHJlCgpwYXR0ZXJuID0gciJccyooLis/KVxzKihcd3syLDR9IFxkLVxkezAsMn1cdyg/OlxzKy9ccytcd3syLDR9IFxkLVxkezAsMn1cdykqKSIKCnMgPSAoIkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0LCBjb25zZWN0ZXR1ciBhZGlwaXNjaW5nIGVsaXQuXG4iCiAgICAgICAgICAgICIgRVggMC0wMmFcbiIKICAgICAgICAgICAgIkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0LCBjb25zZWN0ZXR1ciBhZGlwaXNjaW5nIGVsaXQuXG4iCiAgICAgICAgICAgICJWZXN0aWJ1bHVtIGVnZXQgdmVzdGlidWx1bSBzYXBpZW4uXG4iCiAgICAgICAgICAgICJTZWQgcG9ydGEsIG9kaW8gaWQgc29sbGljaXR1ZGluIGNvbmd1ZVxuIgogICAgICAgICAgICAiICAgICAgICAgIEVYIDAtMDRhXG5cbiIKICAgICAgICAgICAgIkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0LCBjb25zZWN0ZXR1ciBhZGlwaXNjaW5nIGVsaXQuXG4iCiAgICAgICAgICAgICJWZXN0aWJ1bHVtIGVnZXQgdmVzdGlidWx1bSBzYXBpZW4sIHNlZCBwb3J0YSwgb2RpbyBpZCBzb2xsaWNpdHVkaW4gY29uZ3VlXG4iCiAgICAgICAgICAgICJMb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldFxuIgogICAgICAgICAgICAiRVggMC0wMWEgLyBFWCAwLTA1YSAvIEVYIDAtMDdhIC9cbiIKICAgICAgICAgICAgIkVYIDAtMDhhXG5cblxuIgogICAgICAgICAgICAiTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIFxuIgogICAgICAgICAgICAiY29uc2VjdGV0dXIgYWRpcGlzY2luZyBcbiIKICAgICAgICAgICAgImVsaXQuXG4iCiAgICAgICAgICAgICJWZXN0aWJ1bHVtIGVnZXQgXG4iCiAgICAgICAgICAgICJ2ZXN0aWJ1bHVtIHNhcGllbi5cbiIKICAgICAgICAgICAgIiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIEVYIDMtMTFiIikKCnByaW50KHJlLmZpbmRhbGwocGF0dGVybiwgcywgcmUuUykp
stdout
[('Lorem ipsum dolor sit amet, consectetur adipiscing elit.', 'EX 0-02a'), ('Lorem ipsum dolor sit amet, consectetur adipiscing elit.\nVestibulum eget vestibulum sapien.\nSed porta, odio id sollicitudin congue', 'EX 0-04a'), ('Lorem ipsum dolor sit amet, consectetur adipiscing elit.\nVestibulum eget vestibulum sapien, sed porta, odio id sollicitudin congue\nLorem ipsum dolor sit amet', 'EX 0-01a / EX 0-05a / EX 0-07a /\nEX 0-08a'), ('Lorem ipsum dolor sit amet, \nconsectetur adipiscing \nelit.\nVestibulum eget \nvestibulum sapien.', 'EX 3-11b')]