fork download
  1. data = """
  2. >lcl|NW_001820825.1_gene_208 [locus_tag=SS1G_08319] [db_xref=GeneID:5486863] [partial=5',3'] [location=<504653..>506706] [gbkey=Gene]
  3. ATGGGCAAAGCTTCTAGGAATAAGACGAAGCATCGCGCTGATCCTACCGCAAAAACTGTTAAGCCACCCA
  4. CTGACCCAGAGCTTGCAGCAATTCGAGTTAACAAAATTCTGCCAATTCTCCAAGATTTACAAAGTGCAGA
  5. CCAGTCAAAGAGATCAACTGCTGCAACTGCCATTGCGAACCTCGTTGACGATACAAAATGTCGAAAGTTA
  6. TTCTTGAGAGAGCAAATTGTTCGTATTCTACTCGAACAAACCCTTACAGACTCAAGCATGGAAACTAGAA
  7. >lcl|NW_001820817.1_gene_205 [locus_tag=SS1G_12233] [db_xref=GeneID:5483157] [partial=5',3'] [location=complement(<502136..>503461)] [gbkey=Gene]
  8. ATGATCTGTAATACGCTCGGTGTTCCACCCTGCAACAGAATTCTTAAGAAATTCTCCGTTGGCGAGAGTC
  9. GTCTCGAAATTCAAGACTCAGTACGAGGCAAAGATGTCTACATCATTCAATCGGGTGGAGGAAAGGCCAA
  10. TGATCACTTCGTGGATCTTTGCATTATGATCTCCGCATGCAAAACTGGCTCTGCCAAGCGCGTCACTGTC
  11. GTCCTTCCTTTGTTTCCTTATTCACGACAACCTGATCTGCCATACAACAAGATTGGCGCACCACTTGCCA
  12. >lcl|NW_001820834.1_gene_1034 [locus_tag=SS1G_02099] [db_xref=GeneID:5493612] [partial=5',3'] [location=<2692251..>2693298] [gbkey=Gene]
  13. ATGGCTTCTGTTTACAAGTCATTATCAAAGACCTCTGGTCATAAAGAAGAAACCCCGACTGGTGTCAAGA
  14. AAAACAAGCAAAGAGTTTTGATCTTGTCTTCAAGAGGAATAACTTACAGGTATATAAATTTGTACCGATG
  15. CGATGCAAAAAATCGCAGGAAAATGCTAACTCTACAACTTAGACATCGACATCTCCTCAATGACCTTGCG
  16. TCCCTACTTCCCCACGGTAGGAAAGATGCGAAACTCGATACCAAGTCAAAGCTTTATCAATTGAATGAAT
  17. >lcl|NW_001820830.1_gene_400 [locus_tag=SS1G_05227] [db_xref=GeneID:5489764] [partial=5',3'] [location=complement(<1032740..>1033620)] [gbkey=Gene]
  18. ATGGCGGACGGATGTAAGTTAATTGATGTTCCTACTATTCCAGACTAATATTTGTTCTCGTCCCTACAAT
  19. GCATTCGGAACGGATGGTACTCAGTTAACTTTGTAACTAATACAACGTCTAGTAAATGACCAAAGAACTG
  20. """.splitlines()
  21.  
  22. import re
  23.  
  24. def get_tags1(data, tags, prefix='>lcl'):
  25. tags = set(tags)
  26. pattern = re.compile(r'\[(.+?)=(.+?)\]')
  27.  
  28. def parse_line(line):
  29. return {m.group(1): m.group(2) for m in pattern.finditer(line) if m.group(1) in tags}
  30.  
  31. return [parse_line(line) for line in data if prefix is None or line.startswith(prefix)]
  32.  
  33. def get_tags2(data, tags, prefix='>lcl'):
  34. tags = set(tags)
  35.  
  36. def parse_line(line):
  37. items = [tag.split(']')[0].split('=') for tag in line.split('[')[1:]]
  38. return dict(tag for tag in items if tag[0] in tags)
  39.  
  40. return [parse_line(line) for line in data if prefix is None or line.startswith(prefix)]
  41.  
  42. tags = ['locus_tag', 'location']
  43.  
  44. print('Regex')
  45. for line in get_tags1(data, tags):
  46. print(*(line[tag] for tag in tags))
  47.  
  48. print('No Regex')
  49. for line in get_tags2(data, tags):
  50. print(*(line[tag] for tag in tags))
Success #stdin #stdout 0.01s 27728KB
stdin
Standard input is empty
stdout
Regex
SS1G_08319 <504653..>506706
SS1G_12233 complement(<502136..>503461)
SS1G_02099 <2692251..>2693298
SS1G_05227 complement(<1032740..>1033620)
No Regex
SS1G_08319 <504653..>506706
SS1G_12233 complement(<502136..>503461)
SS1G_02099 <2692251..>2693298
SS1G_05227 complement(<1032740..>1033620)