import re import pprint pattern = r"(?:\s*\+\s*)?([^:]+)\s*:\s*([^:]+)(?=\+[^:+]*:|$)" s = ("TOP : Cotton + Embroidered ( 2 Mtr) \n" "BOTTOM : Cotton + Solid (2 Mtr) \n" "DUPATTA : Chiffon + Lace Work ( 2 Mtr) \n" "TYPE : Un Stitched\n" "COLOUR : Multi Colour \n" "CONTAINS : 1 TOP WITH LINING 1 BOTTOM & 1 DUPATTA\n" "Country of Origin: India\n\n" "Top Fabric: Cotton Cambric + Top Length: 0-2.00\n" "Bottom Fabric: Cotton Cambric + Bottom Length: 0-2.00\n" "Dupatta Fabric: Nazneen + Dupatta Length: 0-2.00\n" "Lining Fabric: Cotton Cambric\n" "Type: Un Stitched\n" "Pattern: Printed\n" "Multipack: 3 Top\n" "Country of Origin: India") dictionary = {} for m in re.finditer(pattern, s, re.MULTILINE): dictionary[m.group(1).strip()] = m.group(2).strip() pprint.pprint(dictionary)
Standard input is empty
{'BOTTOM': 'Cotton + Solid (2 Mtr)',
'Bottom Fabric': 'Cotton Cambric',
'Bottom Length': '0-2.00',
'COLOUR': 'Multi Colour',
'CONTAINS': '1 TOP WITH LINING 1 BOTTOM & 1 DUPATTA',
'Country of Origin': 'India',
'DUPATTA': 'Chiffon + Lace Work ( 2 Mtr)',
'Dupatta Fabric': 'Nazneen',
'Dupatta Length': '0-2.00',
'Lining Fabric': 'Cotton Cambric',
'Multipack': '3 Top',
'Pattern': 'Printed',
'TOP': 'Cotton + Embroidered ( 2 Mtr)',
'TYPE': 'Un Stitched',
'Top Fabric': 'Cotton Cambric',
'Top Length': '0-2.00',
'Type': 'Un Stitched'}