import re from pprint import pprint content = ("ALA\n" " CCTOOLS-0424210918\n\n" " 13 12 0 0 1 0 0 0 0 0999 V2000\n" " 2.2810 26.2130 12.8040 N 0 0 0 0 0\n" " 1.1690 26.9420 13.4110 C 0 0 0 0 0\n" " 1.5390 28.3440 13.8740 C 0 0 0 0 0\n" " 2.7090 28.6470 14.1140 O 0 0 0 0 0\n" " 0.6010 26.1430 14.5740 C 0 0 0 0 0\n" " 0.5230 29.1940 13.9970 O 0 0 0 0 0\n" " 2.0330 25.2730 12.4930 H 0 0 0 0 0\n" " 3.0800 26.1840 13.4360 H 0 0 0 0 0\n" " 0.3990 27.0670 12.6130 H 0 0 0 0 0\n" " -0.2470 26.6990 15.0370 H 0 0 0 0 0\n" " 0.3080 25.1100 14.2700 H 0 0 0 0 0\n" " 1.3840 25.8760 15.3210 H 0 0 0 0 0\n" " 0.7530 30.0690 14.2860 H 0 0 0 0 0\n" " 1 2 1 0 0 0\n" " 1 7 1 0 0 0\n" " 1 8 1 0 0 0\n" " 2 3 1 0 0 0\n" " 2 5 1 0 0 0\n" " 2 9 1 0 0 0\n" " 3 4 2 0 0 0\n" " 3 6 1 0 0 0\n" " 5 10 1 0 0 0\n" " 5 11 1 0 0 0\n" " 5 12 1 0 0 0\n" " 6 13 1 0 0 0\n" "M END\n" "$$$$") match = re.search(r"^ {3,}-?\d.*(?:\r?\n {3,}-?\d.*)*", content, re.M) datasplit = [] if match: for line in match.group().splitlines(): datasplit.append([part for part in line.split()][:4]) pprint(datasplit)
Standard input is empty
[['2.2810', '26.2130', '12.8040', 'N'], ['1.1690', '26.9420', '13.4110', 'C'], ['1.5390', '28.3440', '13.8740', 'C'], ['2.7090', '28.6470', '14.1140', 'O'], ['0.6010', '26.1430', '14.5740', 'C'], ['0.5230', '29.1940', '13.9970', 'O'], ['2.0330', '25.2730', '12.4930', 'H'], ['3.0800', '26.1840', '13.4360', 'H'], ['0.3990', '27.0670', '12.6130', 'H'], ['-0.2470', '26.6990', '15.0370', 'H'], ['0.3080', '25.1100', '14.2700', 'H'], ['1.3840', '25.8760', '15.3210', 'H'], ['0.7530', '30.0690', '14.2860', 'H']]