fork download
  1. import re
  2. suffixes = r"(?:adj\.|adv\.|pron\.|num\.|num\.-m|conj\.|part\.|aux\.|prep\.|n\.|v\.|m\.)"
  3. regex = fr'^(\w+?)((?:{suffixes}) .*)$'
  4.  
  5. for sentence in ['qiān\tnum. thousand', 'jiànm. (used for clothes among other items) piece']:
  6. result = re.search(regex, sentence.replace('\t', 't'))
  7. if result:
  8. print(result.groups())
  9.  
Success #stdin #stdout 0.02s 9432KB
stdin
Standard input is empty
stdout
('qiānt', 'num. thousand')
('jiàn', 'm. (used for clothes among other items) piece')