import re string = """ ARW VR-GREQ-299684_6j VR-GREQ-299684_6k VR-GREQ-299606_3 VR-GREQ-299606_4 VR-GREQ-299606_5 VR-GREQ-299606_7 VR-GREQ-299606_9 VR-GREQ-299607_4 VR-GREQ-299608_1 VR-GREQ-299563_10 CZC VR-GREQ-299684_6k VR-GREQ-299606_6 VR-GREQ-299606_8 VR-GREQ-299640_1 VR-GREQ-299640_5 VR-GREQ-299640_6 VR-GREQ-299640_7 DUN FB_71125_1 FRC VR-GREQ-299659_18 VR-GREQ-299659_19 VR-GREQ-299659_28 VR-GREQ-299659_31 VR-GREQ-299659_32 """ rx = r'^([A-Z]{3})$' languages = {} tmp = list() for line in re.split(r'\n', string): m = re.search(rx, line, re.MULTILINE) if m is not None: if len(tmp) > 0: languages[current] = tmp tmp = list() current = m.group(1) else: if len(line) > 0: tmp.append(line.strip()) if len(tmp) > 0: languages[current] = tmp print languages
Standard input is empty
{'FRC': ['VR-GREQ-299659_18', 'VR-GREQ-299659_19', 'VR-GREQ-299659_28', 'VR-GREQ-299659_31', 'VR-GREQ-299659_32'], 'CZC': ['VR-GREQ-299684_6k', 'VR-GREQ-299606_6', 'VR-GREQ-299606_8', 'VR-GREQ-299640_1', 'VR-GREQ-299640_5', 'VR-GREQ-299640_6', 'VR-GREQ-299640_7'], 'DUN': ['FB_71125_1'], 'ARW': ['VR-GREQ-299684_6j', 'VR-GREQ-299684_6k', 'VR-GREQ-299606_3', 'VR-GREQ-299606_4', 'VR-GREQ-299606_5', 'VR-GREQ-299606_7', 'VR-GREQ-299606_9', 'VR-GREQ-299607_4', 'VR-GREQ-299608_1', 'VR-GREQ-299563_10']}