fork download
  1. class PKNode {
  2. constructor() {
  3. this.id = -1;
  4. this.parent = null, this.child = [];
  5. this.name = this.meta = this.seq = "";
  6. this.d = -1.0; // branch length
  7. }
  8. }
  9.  
  10. function pk_parse_nh(str) {
  11. const re = /(\(|((\)?[^,;:\[\]\(\)]+|\))(:[\d.eE\-]+)?(\[[^\[\]]*\])?))/g;
  12. const str_compact = str.replace(/[;\s\n]+/g, ""); // compacted string without SPACE, new line or ';'
  13. let tree = { root:null, node:[], error:0 };
  14. let m, stack = [], leaf_cnt = {};
  15. while ((m = re.exec(str_compact)) != null) { // each match returns '(' or a node
  16. let p = new PKNode();
  17. if (m[1] == '(') {
  18. p.id = -2; // a placehold for the left '('
  19. } else if (m[3]) {
  20. if (m[3][0] == ')') { // an internal node
  21. let q;
  22. while (stack.length > 0) { // look for the matching '('
  23. q = stack.pop();
  24. if (q.id == -2) break;
  25. p.child.push(q);
  26. }
  27. p.child.reverse();
  28. if (q.id != -2) {
  29. tree.error |= 1; // ERROR: unmatched ')'
  30. break;
  31. }
  32. for (const q of p.child) q.parent = p; // set parent node
  33. if (m[3].length > 1) p.name = m[3].substr(1); // set internal name if present
  34. } else { // a leaf
  35. p.name = m[3];
  36. if (leaf_cnt[p.name] == null) leaf_cnt[p.name] = 0;
  37. if (++leaf_cnt[p.name] > 1) tree.error |= 8; // ERROR: duplicated leaf name
  38. }
  39. if (m[4]) p.d = parseFloat(m[4].substr(1)); // set branch length if present
  40. p.id = tree.node.length;
  41. tree.node.push(p);
  42. }
  43. stack.push(p);
  44. }
  45. if (stack.length != 1) tree.error |= 2; // ERROR: unmatched '('
  46. tree.root = tree.node[tree.node.length - 1];
  47. return tree;
  48. }
  49.  
  50. function pk_seq_len(node) {
  51. let len = -1;
  52. for (let i = 0; i < node.length; ++i) {
  53. const p = node[i];
  54. if (p.child.length == 0) {
  55. if (len < 0) len = p.seq.length;
  56. else if (len != p.seq.length) return -1;
  57. }
  58. }
  59. return len;
  60. }
  61.  
  62. function pk_parse_msa_simple(tree, msa_str) {
  63. let h = {};
  64. for (let i = 0; i < tree.node.length; ++i) {
  65. const p = tree.node[i];
  66. if (p.child.length == 0)
  67. h[p.name] = i;
  68. }
  69. for (const line of msa_str.split("\n")) {
  70. let t = line.split(/\s+/);
  71. if (t.length < 2) return;
  72. if (h[t[0]] == null) return;
  73. tree.node[h[t[0]]].seq += t[1];
  74. }
  75. return pk_seq_len(tree.node);
  76. }
  77.  
  78. function pk_mp1(node, col) {
  79. let R = [], c = 0;
  80. for (let i = 0; i < node.length; ++i) R[i] = {};
  81. for (let i = 0; i < node.length; ++i) {
  82. const p = node[i];
  83. if (p.child.length == 0) { // leaf
  84. R[i][p.seq[col]] = 1;
  85. } else { // internal node
  86. let h = {};
  87. for (let j = 0; j < p.child.length; ++j) {
  88. const Rj = R[p.child[j].id];
  89. for (const x in Rj) {
  90. if (h[x] == null) h[x] = 0;
  91. ++h[x];
  92. }
  93. }
  94. let inter = {}, n_inter = 0;
  95. for (const x in h)
  96. if (h[x] == p.child.length)
  97. inter[x] = 1, ++n_inter;
  98. if (n_inter > 0) R[i] = inter;
  99. else R[i] = h, ++c;
  100. }
  101. }
  102. let a = [];
  103. for (const x in R[node.length-1])
  104. a.push(x);
  105. return [c, a];
  106. }
  107.  
  108. const msa =
  109. `human TCCTGCCTCATCCTATTATTTATCGCACCTAC-GTTCAATATTACAGGCGAA-CATA-CTTACTAAAGTGTGTTAATTAATTAATGCTTGTAG
  110. bonobo TCCTGCCCCATTACGTTATTTATCGCACCTAC-GTTCAATATTATTACCTAG-CATGATTTACTAAAGCGTGTTAATTAATTAATGCTTGTAG
  111. chimp TCCTGCCCCATTGTATTATTTATCGCACCTAC-GTTCAATATTACGACCTAG-CATA-CCTACTAAAGTGTGTTGATTAATTAATGCTTGCAG
  112. gorilla TCCTGCCCCATGCTACCATTTATCGCACCTAC-GTTCAATATTACAGCCGAG-CGCA-CAGTGTTCATGGTGTTAATTAATTCATGCTTGTTG
  113. oran-pa TCCTACCTCATGCCATTATTAATCGCGCCTAATATCCAATATCCTAGCCCCACCCTC-AGTGTTTGAAGCTGCTATTTAATTTATGCTAG-AG
  114. oran-pp TCCTGCCCCATGGCGTTATTGATCGCGCCTAACGTCCAATGTTCTAGCGCCC-CCTC-CCTATTGAAAGTTGTTATTTAATTTATGCTAG-AG
  115. gibbon TTCTGACCCATCCTATTGTTGATCGCGCCTAC-GTTCAATATCCCAGCCGAG-CATA-CTTACACTAAGGTGTTAATTAATTCATGCTTGTTG`;
  116.  
  117. const trees = [
  118. "((((human,(chimp,bonobo)),gorilla),(oran-pa,oran-pp)),gibbon)",
  119. "((((human,gorilla),(chimp,bonobo)),(oran-pa,oran-pp)),gibbon)",
  120. "((((human,bonobo),gorilla),(oran-pa,oran-pp)),(chimp,gibbon))",
  121. "(human,(((gibbon,(oran-pa,oran-pp)),gorilla),(chimp,bonobo)))"];
  122.  
  123. for (let i = 0; i < trees.length; ++i) {
  124. let tree = pk_parse_nh(trees[i]);
  125. let len = pk_parse_msa_simple(tree, msa);
  126. let tot = 0;
  127. for (let i = 0; i < len; ++i) {
  128. let [c, a] = pk_mp1(tree.node, i);
  129. tot += c;
  130. }
  131. console.log(tot, trees[i]);
  132. }
  133.  
Success #stdin #stdout 0.07s 47556KB
stdin
Standard input is empty
stdout
84 ((((human,(chimp,bonobo)),gorilla),(oran-pa,oran-pp)),gibbon)
87 ((((human,gorilla),(chimp,bonobo)),(oran-pa,oran-pp)),gibbon)
94 ((((human,bonobo),gorilla),(oran-pa,oran-pp)),(chimp,gibbon))
84 (human,(((gibbon,(oran-pa,oran-pp)),gorilla),(chimp,bonobo)))