fork download
  1. #include <iostream>
  2. #include <string>
  3. #include <cstdlib>
  4. #include <ctime>
  5. #include <vector>
  6. #include <assert.h>
  7. #include <iomanip>
  8.  
  9. // demo for SO answer at http://stackoverflow.com/a/38491405/2932052
  10. using namespace std;
  11.  
  12. const string CODE = "ATCG";
  13. vector<unsigned> buildRMap()
  14. {
  15. vector<unsigned> result(256, 0);
  16. for (size_t i=0; i<CODE.size(); ++i) {
  17. result[CODE[i]]= i;
  18. }
  19. return result;
  20. }
  21. const vector<unsigned> RMAP = buildRMap();
  22.  
  23. enum { SEQ_LEN = 16 };
  24.  
  25. string randomDna()
  26. {
  27. string result(SEQ_LEN, ' ');
  28. for (size_t i=0; i<SEQ_LEN; ++i ){
  29. result[i] = CODE[rand() & 3];
  30. }
  31. return result;
  32. }
  33.  
  34. int distS(const char* const a, const char* const b)
  35. {
  36. int result = 0;
  37. for (size_t i=0; i<SEQ_LEN; ++i) {
  38. result += a[i] != b[i];
  39. }
  40. return result;
  41. }
  42.  
  43. unsigned encodeV(const string& atcg)
  44. {
  45. assert(atcg.size() == SEQ_LEN);
  46. unsigned result = 0;
  47. for (size_t i=0; i<SEQ_LEN; ++i) {
  48. result |= RMAP[atcg[i]] << (i << 1);
  49. }
  50. return result;
  51. }
  52.  
  53. int distV(const unsigned va, const unsigned vb)
  54. {
  55. const unsigned x = va ^ vb;
  56. const unsigned bn = ((x & 0xaaaaaaaa) >> 1 ) | (x & 0x55555555);
  57. return __builtin_popcount(bn);
  58. }
  59.  
  60. int main()
  61. {
  62. srand(time(0));
  63. const string a = randomDna();
  64. const unsigned va = encodeV(a);
  65. cout << a << " -> V: 0x" << setfill('0') << setw(8) << hex << va << endl;
  66. const string b = randomDna();
  67. const unsigned vb = encodeV(b);
  68. cout << b << " -> V: 0x" << setfill('0') << setw(8) << hex << vb << endl;
  69. cout << "dS=" << dec << distS(a.c_str(), b.c_str()) << endl;
  70. cout << "dV=" << dec << distV(va, vb) << endl;
  71. return 0;
  72. }
  73.  
Success #stdin #stdout 0s 2820KB
stdin
Standard input is empty
stdout
GTATCTGTTGACCGGA -> V: 0x3e8d7647
AATAATTTAGACAATG -> V: 0xd08c5410
dS=11
dV=11