fork download
  1. from collections import namedtuple
  2.  
  3. # -------------------------
  4. # Data: Multiple sequence alignment
  5. # -------------------------
  6. msa = {
  7. "human" : "TCCTGCCTCATCCTATTATTTATCGCACCTAC-GTTCAATATTACAGGCGAA-CATA-CTTACTAAAGTGTGTTAATTAATTAATGCTTGTAG",
  8. "bonobo" : "TCCTGCCCCATTACGTTATTTATCGCACCTAC-GTTCAATATTATTACCTAG-CATGATTTACTAAAGCGTGTTAATTAATTAATGCTTGTAG",
  9. "chimp" : "TCCTGCCCCATTGTATTATTTATCGCACCTAC-GTTCAATATTACGACCTAG-CATA-CCTACTAAAGTGTGTTGATTAATTAATGCTTGCAG",
  10. "gibbon" : "TTCTGACCCATCCTATTGTTGATCGCGCCTAC-GTTCAATATCCCAGCCGAG-CATA-CTTACACTAAGGTGTTAATTAATTCATGCTTGTTG",
  11. "oran-pa" : "TCCTACCTCATGCCATTATTAATCGCGCCTAATATCCAATATCCTAGCCCCACCCTC-AGTGTTTGAAGCTGCTATTTAATTTATGCTAG-AG",
  12. "gorilla" : "TCCTGCCCCATGCTACCATTTATCGCACCTAC-GTTCAATATTACAGCCGAG-CGCA-CAGTGTTCATGGTGTTAATTAATTCATGCTTGTTG",
  13. "oran-pp" : "TCCTGCCCCATGGCGTTATTGATCGCGCCTAACGTCCAATGTTCTAGCGCCC-CCTC-CCTATTGAAAGTTGTTATTTAATTTATGCTAG-AG"
  14. }
  15.  
  16. # All sequences same length
  17. L = len(next(iter(msa.values())))
  18.  
  19. # -------------------------
  20. # Fitch algorithm
  21. # -------------------------
  22.  
  23. def fitch_pass(node, column):
  24. """Returns (set of states, score) for a node at one column"""
  25. if isinstance(node, str): # leaf
  26. return { msa[node][column] }, 0
  27.  
  28. left, right = node
  29. Lset, Lscore = fitch_pass(left, column)
  30. Rset, Rscore = fitch_pass(right, column)
  31. inter = Lset & Rset
  32. if inter:
  33. return inter, Lscore + Rscore
  34. else:
  35. return (Lset | Rset), Lscore + Rscore + 1
  36.  
  37.  
  38. def fitch_score(tree):
  39. """Compute total parsimony score for a tree over all columns"""
  40. score = 0
  41. for c in range(L):
  42. _, s = fitch_pass(tree, c)
  43. score += s
  44. return score
  45.  
  46.  
  47. # -------------------------
  48. # Define trees as nested tuples
  49. # -------------------------
  50. # Topology 1: ((((human,(chimp,bonobo)),gorilla),(oran-pa,oran-pp)),gibbon)
  51. tree1 = (((( "human", ("chimp","bonobo") ), "gorilla"), ("oran-pa","oran-pp")), "gibbon")
  52.  
  53. # Topology 2: ((((human,gorilla),(chimp,bonobo)),(oran-pa,oran-pp)),gibbon)
  54. tree2 = (((("human","gorilla"),("chimp","bonobo")),("oran-pa","oran-pp")), "gibbon")
  55.  
  56. # Topology 3: ((((human,bonobo),gorilla),(oran-pa,oran-pp)),(chimp,gibbon))
  57. tree3 = (((("human","bonobo"),"gorilla"),("oran-pa","oran-pp")),("chimp","gibbon"))
  58.  
  59. # Topology 4: (human,(((gibbon,(oran-pa,oran-pp)),gorilla),(chimp,bonobo)))
  60. tree4 = ("human",((( "gibbon",("oran-pa","oran-pp") ),"gorilla"),("chimp","bonobo")))
  61.  
  62.  
  63. # -------------------------
  64. # Run and print
  65. # -------------------------
  66. for i,tree in enumerate([tree1, tree2, tree3, tree4], start=1):
  67. print(f"Tree {i} score = {fitch_score(tree)}")
  68.  
Success #stdin #stdout 0.13s 14172KB
stdin
Standard input is empty
stdout
Tree 1 score = 84
Tree 2 score = 87
Tree 3 score = 94
Tree 4 score = 84