import re from collections import defaultdict dct = {} str="""######################### ### MOSES CONFIG FILE ### ######################### # input factors [input-factors] 0 # mapping steps [mapping] 0 T 0 [distortion-limit] 6 # feature functions [feature] UnknownWordPenalty WordPenalty PhrasePenalty PhraseDictionaryMemory name=TranslationModel0 num-features=4 path=/home /gillin/jojomert/phrase-jojo/work.src-ref/training/model/phrase-table.gz input-factor=0 output-factor=0 LexicalReordering name=LexicalReordering0 num-features=6 type=wbe-msd-bidirectional-fe-allff input-factor=0 output-factor=0 path=/home/gillin/jojomert/phrase-jojo/work.src-ref/training/model/reordering-table.wbe-msd-bidirectional-fe.gz Distortion KENLM lazyken=0 name=LM0 factor=0 path=/home/gillin/jojomert/ru.kenlm order=5 # dense weights for feature functions [weight] UnknownWordPenalty0= 1 WordPenalty0= -1 PhrasePenalty0= 0.2 TranslationModel0= 0.2 0.2 0.2 0.2 LexicalReordering0= 0.3 0.3 0.3 0.3 0.3 0.3 Distortion0= 0.3 LM0= 0.5""" #get [weight] section match_weight = re.search(r"\[weight][^\n]*(?:\n(?!$|\n)[^\n]*)*", str) if match_weight: weight = match_weight.group() # get the [weight] text dct = dict([(x[0], [float(x) for x in x[1].split(" ")]) for x in re.findall(r"(\w+)\s*=\s*(.*)\s*", weight)]) print dct
Standard input is empty
{'UnknownWordPenalty0': [1.0], 'LexicalReordering0': [0.3, 0.3, 0.3, 0.3, 0.3, 0.3], 'LM0': [0.5], 'PhrasePenalty0': [0.2], 'TranslationModel0': [0.2, 0.2, 0.2, 0.2], 'Distortion0': [0.3], 'WordPenalty0': [-1.0]}