import re
pattern = r"(?P<date>\d{2}/\d{2})\s+(?P<desc>\w[\w ]*)(?P<expense>\$[\d.,]*)\s{2}(?P<deposit>\d[\d.,]*)\s.*(?P<desc_more>(?:\n(?!\d+\/\d|continued\b|Page\s+\d).*)*)"
s = (" 0 0 $12,345.67 \n"
"08/27 DEBIT CARD PURCHASE XXXXXX 5541XXXXXX $1.23 0 $123,456.78\n"
"RACETRAC467 00004671 PLEASANTVILLEPA\n"
"08/27 BANK FUNDS TRANSFER DB $45.67 0 $124,816.32\n"
"TO SMITH,JOHN\n"
"SAVINGS #0001, CONF# 8675309\n"
"continued on next page>>>\n"
" 987654-3210\n"
"Page 1 of 11\n"
"07/27 DEBIT CARD PURCHASE XXXXXX 6541XXXXXX $2.23 0 $223,456.78")
matches = re.finditer(pattern, s)
for _, match in enumerate(matches):
d = match.groupdict()
d.update({'desc': re.sub(r"[^\S\n]*\n", " " , match.groupdict().get('desc') + match.groupdict().get('desc_more'))})
del d["desc_more"]
print(d)
aW1wb3J0IHJlCgpwYXR0ZXJuID0gciIoP1A8ZGF0ZT5cZHsyfS9cZHsyfSlccysoP1A8ZGVzYz5cd1tcdyBdKikoP1A8ZXhwZW5zZT5cJFtcZC4sXSopXHN7Mn0oP1A8ZGVwb3NpdD5cZFtcZC4sXSopXHMuKig/UDxkZXNjX21vcmU+KD86XG4oPyFcZCtcL1xkfGNvbnRpbnVlZFxifFBhZ2VccytcZCkuKikqKSIKCnMgPSAoIiAgMCAgMCAgJDEyLDM0NS42NyBcbiIKICAgICAiMDgvMjcgIERFQklUIENBUkQgUFVSQ0hBU0UgWFhYWFhYIDU1NDFYWFhYWFggICQxLjIzICAwICAkMTIzLDQ1Ni43OFxuIgogICAgICJSQUNFVFJBQzQ2NyAwMDAwNDY3MSBQTEVBU0FOVFZJTExFUEFcbiIKICAgICAiMDgvMjcgIEJBTksgRlVORFMgVFJBTlNGRVIgREIgICQ0NS42NyAgMCAgJDEyNCw4MTYuMzJcbiIKICAgICAiVE8gU01JVEgsSk9ITlxuIgogICAgICJTQVZJTkdTICMwMDAxLCBDT05GIyA4Njc1MzA5XG4iCiAgICAgImNvbnRpbnVlZCBvbiBuZXh0IHBhZ2U+Pj5cbiIKICAgICAiIDk4NzY1NC0zMjEwXG4iCiAgICAgIlBhZ2UgMSBvZiAxMVxuIgogICAgICIwNy8yNyAgREVCSVQgQ0FSRCBQVVJDSEFTRSBYWFhYWFggNjU0MVhYWFhYWCAgJDIuMjMgIDAgICQyMjMsNDU2Ljc4IikKCm1hdGNoZXMgPSByZS5maW5kaXRlcihwYXR0ZXJuLCBzKQoKZm9yIF8sIG1hdGNoIGluIGVudW1lcmF0ZShtYXRjaGVzKToKICAgIGQgPSBtYXRjaC5ncm91cGRpY3QoKQogICAgZC51cGRhdGUoeydkZXNjJzogcmUuc3ViKHIiW15cU1xuXSpcbiIsICIgIiAsIG1hdGNoLmdyb3VwZGljdCgpLmdldCgnZGVzYycpICsgbWF0Y2guZ3JvdXBkaWN0KCkuZ2V0KCdkZXNjX21vcmUnKSl9KQogICAgZGVsIGRbImRlc2NfbW9yZSJdCiAgICBwcmludChkKQ==
{'date': '08/27', 'desc': 'DEBIT CARD PURCHASE XXXXXX 5541XXXXXX RACETRAC467 00004671 PLEASANTVILLEPA', 'expense': '$1.23', 'deposit': '0'}
{'date': '08/27', 'desc': 'BANK FUNDS TRANSFER DB TO SMITH,JOHN SAVINGS #0001, CONF# 8675309', 'expense': '$45.67', 'deposit': '0'}
{'date': '07/27', 'desc': 'DEBIT CARD PURCHASE XXXXXX 6541XXXXXX ', 'expense': '$2.23', 'deposit': '0'}