import re p = re.compile(r'^:([^:]+):([^:]+)', re.MULTILINE) s = ":1:text\ntext\n\n:20:text\n\n:21:text\ntext\ntext\n\n:22: \n\n:25:aa\naa\naaaaa" print([[x.group(1)] + re.findall(r".+\n*", x.group(2)) for x in p.finditer(s)])
Standard input is empty
[['1', 'text\n', 'text\n\n'], ['20', 'text\n\n'], ['21', 'text\n', 'text\n', 'text\n\n'], ['22', ' \n\n'], ['25', 'aa\n', 'aa\n', 'aaaaa']]