fork download
  1. import re
  2. from collections import defaultdict
  3.  
  4. dct = {}
  5.  
  6. str="""#########################
  7. ### MOSES CONFIG FILE ###
  8. #########################
  9.  
  10. # input factors
  11. [input-factors]
  12. 0
  13.  
  14. # mapping steps
  15. [mapping]
  16. 0 T 0
  17.  
  18. [distortion-limit]
  19. 6
  20.  
  21. # feature functions
  22. [feature]
  23. UnknownWordPenalty
  24. WordPenalty
  25. PhrasePenalty
  26. PhraseDictionaryMemory name=TranslationModel0 num-features=4 path=/home /gillin/jojomert/phrase-jojo/work.src-ref/training/model/phrase-table.gz input-factor=0 output-factor=0
  27. LexicalReordering name=LexicalReordering0 num-features=6 type=wbe-msd-bidirectional-fe-allff input-factor=0 output-factor=0 path=/home/gillin/jojomert/phrase-jojo/work.src-ref/training/model/reordering-table.wbe-msd-bidirectional-fe.gz
  28. Distortion
  29. KENLM lazyken=0 name=LM0 factor=0 path=/home/gillin/jojomert/ru.kenlm order=5
  30.  
  31. # dense weights for feature functions
  32. [weight]
  33. UnknownWordPenalty0= 1
  34. WordPenalty0= -1
  35. PhrasePenalty0= 0.2
  36. TranslationModel0= 0.2 0.2 0.2 0.2
  37. LexicalReordering0= 0.3 0.3 0.3 0.3 0.3 0.3
  38. Distortion0= 0.3
  39. LM0= 0.5"""
  40.  
  41. #get [weight] section
  42. match_weight = re.search(r"\[weight][^\n]*(?:\n(?!$|\n)[^\n]*)*", str)
  43. if match_weight:
  44. weight = match_weight.group() # get the [weight] text
  45. dct = dict([(x[0], [float(x) for x in x[1].split(" ")]) for x in re.findall(r"(\w+)\s*=\s*(.*)\s*", weight)])
  46.  
  47. print dct
  48.  
Success #stdin #stdout 0.02s 9128KB
stdin
Standard input is empty
stdout
{'UnknownWordPenalty0': [1.0], 'LexicalReordering0': [0.3, 0.3, 0.3, 0.3, 0.3, 0.3], 'LM0': [0.5], 'PhrasePenalty0': [0.2], 'TranslationModel0': [0.2, 0.2, 0.2, 0.2], 'Distortion0': [0.3], 'WordPenalty0': [-1.0]}