import re regex = r"([^\s:][^:]*)\s+\:\s+\[\s*([^][]*)\s+]" s = ("Date : [ 2010-01-01 XX:XX:XX ] Age : [ 22 ] Sex : [ M ] : [ XXX ]\n" "Height(cm) : [ 145 ] Weight(kg) : [ 56.4 ] Race : [ Hispanic ]\n" "Spirometry : [ restrictive pattern ]\n" "Treatment response : [ Negative ]\n" "Tissue volume : [ Normal ]\n" "Tissue volume\n" "[ Normal RV ] \n" "Diffusing capacity : [ Normal capacity ]\n" "FVC Liters : [ 2.22 ] FVC Liters : [ 67 ] FVC Liters : [ 3.35 ] \n" "FEV1 Liters : [ 1.96 ] FEV1 Liters : [ 66 ] FEV1 Liters : [ 2.06 ] \n" "FEV1 / FVC % : [ 58 ] FEV1 / FVC % : [ 62 ]\n" "DLCO mL/mmHg/min : [ 21.5 ] DLCO mL/mmHg/min : [ 102 ]\n" "DLCO Adj mL/mmHg/min : [ 21.5 ] DLCO Adj mL/mmHg/min : [ 102 ]\n" "RV/TLC % : [ 22 ]") print(re.findall(regex, s))
Standard input is empty
[('Date', '2010-01-01 XX:XX:XX'), ('Age', '22'), ('Sex', 'M'), ('[ XXX ]\nHeight(cm)', '145'), ('Weight(kg)', '56.4'), ('Race', 'Hispanic'), ('Spirometry', 'restrictive pattern'), ('Treatment response', 'Negative'), ('Tissue volume', 'Normal'), ('Tissue volume\n[ Normal RV ] \nDiffusing capacity', 'Normal capacity'), ('FVC Liters', '2.22'), ('FVC Liters', '67'), ('FVC Liters', '3.35'), ('FEV1 Liters', '1.96'), ('FEV1 Liters', '66'), ('FEV1 Liters', '2.06'), ('FEV1 / FVC %', '58'), ('FEV1 / FVC %', '62'), ('DLCO mL/mmHg/min', '21.5'), ('DLCO mL/mmHg/min', '102'), ('DLCO Adj mL/mmHg/min', '21.5'), ('DLCO Adj mL/mmHg/min', '102'), ('RV/TLC %', '22')]