import re import pprint pattern = r"(?:\s*\+\s*)?([^:]+)\s*:\s*([^:]+)(?=\+[^:+]*:|$)" s = ("TOP : Cotton + Embroidered ( 2 Mtr) \n" "BOTTOM : Cotton + Solid (2 Mtr) \n" "DUPATTA : Chiffon + Lace Work ( 2 Mtr) \n" "TYPE : Un Stitched\n" "COLOUR : Multi Colour \n" "CONTAINS : 1 TOP WITH LINING 1 BOTTOM & 1 DUPATTA\n" "Country of Origin: India\n\n" "Top Fabric: Cotton Cambric + Top Length: 0-2.00\n" "Bottom Fabric: Cotton Cambric + Bottom Length: 0-2.00\n" "Dupatta Fabric: Nazneen + Dupatta Length: 0-2.00\n" "Lining Fabric: Cotton Cambric\n" "Type: Un Stitched\n" "Pattern: Printed\n" "Multipack: 3 Top\n" "Country of Origin: India") dictionary = {} for m in re.finditer(pattern, s, re.MULTILINE): dictionary[m.group(1).strip()] = m.group(2).strip() pprint.pprint(dictionary)
Standard input is empty
{'BOTTOM': 'Cotton + Solid (2 Mtr)', 'Bottom Fabric': 'Cotton Cambric', 'Bottom Length': '0-2.00', 'COLOUR': 'Multi Colour', 'CONTAINS': '1 TOP WITH LINING 1 BOTTOM & 1 DUPATTA', 'Country of Origin': 'India', 'DUPATTA': 'Chiffon + Lace Work ( 2 Mtr)', 'Dupatta Fabric': 'Nazneen', 'Dupatta Length': '0-2.00', 'Lining Fabric': 'Cotton Cambric', 'Multipack': '3 Top', 'Pattern': 'Printed', 'TOP': 'Cotton + Embroidered ( 2 Mtr)', 'TYPE': 'Un Stitched', 'Top Fabric': 'Cotton Cambric', 'Top Length': '0-2.00', 'Type': 'Un Stitched'}