fork download
  1. import re
  2.  
  3. data = """
  4. >XM_024446048.1 PREDICTED: Homo sapiens mannosidase alpha class 2A member 1 (MAN2A1), transcript variant X2, mRNA
  5. CAGCCCCC
  6. TGAGCGAC
  7. TCCCTAATGTG
  8. ACAGTAAAGAA
  9. >NM_001308028.1 Homo sapiens FER tyrosine kinase (FER), transcript variant 2, mRNA
  10. CAGCCC
  11. CCGTGACGC
  12. GGGGTGGTGACT
  13. GGCTC
  14. GGTGGT
  15. GTGAC
  16. >NM_0013082323028.1 H STZ mRSN1A
  17. CAGCCC
  18. CCGTGACGC
  19. GGG
  20. GTGGTGA
  21. CTGGCTCCGGAGT
  22. CTGAGGGGTTCGG"""
  23.  
  24. rx = re.compile(r'''
  25. ^>(?P<header>.+)[\n\r]
  26. (?P<content>[\s\S]+?)(?=^>|\Z)''', re.M | re.X)
  27.  
  28. sequences = {"sequence{}".format(idx): {"header": m.group('header'), "content": m.group("content")}
  29. for idx, m in enumerate(rx.finditer(data), 1)}
  30. print(sequences)
Success #stdin #stdout 0.02s 27712KB
stdin
Standard input is empty
stdout
{'sequence1': {'header': 'XM_024446048.1 PREDICTED: Homo sapiens mannosidase alpha class 2A member 1 (MAN2A1), transcript variant X2, mRNA', 'content': 'CAGCCCCC\nTGAGCGAC\nTCCCTAATGTG\nACAGTAAAGAA\n'}, 'sequence2': {'header': 'NM_001308028.1 Homo sapiens FER tyrosine kinase (FER), transcript variant 2, mRNA', 'content': 'CAGCCC\nCCGTGACGC\nGGGGTGGTGACT\nGGCTC\nGGTGGT\nGTGAC\n'}, 'sequence3': {'header': 'NM_0013082323028.1 H STZ mRSN1A', 'content': 'CAGCCC\nCCGTGACGC\nGGG\nGTGGTGA\nCTGGCTCCGGAGT\nCTGAGGGGTTCGG'}}