fork download
  1. import re
  2. regex = r"((?<![^_])ins\d+)_|([a-zA-Z]+[0-9]*)\{([^{}]*)\}:r\.([-()?+\d]+)?(?:_([-()?+\d]+))?"
  3. test = ["EWSR1{ENST00000397938}:r.1_1364_FLI1{ENST00000429175}:r.1046_3051",
  4. "EML4{ENST00000318522}:r.1_929_EML4{ENST00000318522}:r.903+188_903+220_ALK{ENST00000389048}:r.4080_6220",
  5. "FUS{ENST00000254108}:r.1_(608)_FUS{ENST00000254108}:r.(819)_937_DDIT3{ENST00000547303}:r.76_872",
  6. "TCF3{ENST00000262965}:r.1_1795_ins27_PBX1{ENST00000420696}:r.454_6636",
  7. "EML4{ENST00000318522}:r.?_ALK{ENST00000389048}:r.?"]
  8. res = []
  9. for s in test:
  10. for match in re.finditer(regex, s):
  11. tmp = []
  12. for groupNum in range(0, len(match.groups())):
  13. if match.group(groupNum+1):
  14. tmp.append(match.group(groupNum+1))
  15. res.append(tmp)
  16. print(res)
Success #stdin #stdout 0s 9024KB
stdin
Standard input is empty
stdout
[['EWSR1', 'ENST00000397938', '1', '1364'], ['FLI1', 'ENST00000429175', '1046', '3051'], ['EML4', 'ENST00000318522', '1', '929'], ['EML4', 'ENST00000318522', '903+188', '903+220'], ['ALK', 'ENST00000389048', '4080', '6220'], ['FUS', 'ENST00000254108', '1', '(608)'], ['FUS', 'ENST00000254108', '(819)', '937'], ['DDIT3', 'ENST00000547303', '76', '872'], ['TCF3', 'ENST00000262965', '1', '1795'], ['ins27'], ['PBX1', 'ENST00000420696', '454', '6636'], ['EML4', 'ENST00000318522', '?'], ['ALK', 'ENST00000389048', '?']]