fork download
  1. #include <cstdlib>
  2. #include <string>
  3. #include <iostream>
  4. using namespace std;
  5.  
  6. unsigned char mask[] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
  7. #define tget(i) ( (t[(i)/8]&mask[(i)%8]) ? 1 : 0 )
  8. #define tset(i, b) t[(i)/8]=(b) ? (mask[(i)%8]|t[(i)/8]) : ((~mask[(i)%8])&t[(i)/8])
  9. #define chr(i) (cs==sizeof(int)?((int*)s)[i]:((unsigned char *)s)[i])
  10. #define isLMS(i) (i>0 && tget(i) && !tget(i-1))
  11.  
  12. // find the start or end of each bucket
  13. void getBuckets(unsigned char *s, int *bkt, int n, int K, int cs, bool end) {
  14. int i, sum = 0;
  15. for (i = 0; i <= K; i++)
  16. bkt[i] = 0; // clear all buckets
  17. for (i = 0; i < n; i++)
  18. bkt[chr(i)]++; // compute the size of each bucket
  19. for (i = 0; i <= K; i++) {
  20. sum += bkt[i];
  21. bkt[i] = end ? sum : sum - bkt[i];
  22. }
  23. }
  24. // compute SAl
  25. void induceSAl(unsigned char *t, int *SA, unsigned char *s, int *bkt, int n, int K, int cs, bool end) {
  26. int i, j;
  27. getBuckets(s, bkt, n, K, cs, end); // find starts of buckets
  28. for (i = 0; i < n; i++) {
  29. j = SA[i] - 1;
  30. if (j >= 0 && !tget(j))
  31. SA[bkt[chr(j)]++] = j;
  32. }
  33. }
  34. // compute SAs
  35. void induceSAs(unsigned char *t, int *SA, unsigned char *s, int *bkt, int n, int K, int cs, bool end) {
  36. int i, j;
  37. getBuckets(s, bkt, n, K, cs, end); // find ends of buckets
  38. for (i = n - 1; i >= 0; i--) {
  39. j = SA[i] - 1;
  40. if (j >= 0 && tget(j))
  41. SA[--bkt[chr(j)]] = j;
  42. }
  43. }
  44.  
  45. // find the suffix array SA of s[0..n-1] in {1..K}^n
  46. // require s[n-1]=0 (the sentinel!), n>=2
  47. // use a working space (excluding s and SA) of at most 2.25n+O(1) for a constant alphabet
  48. void SA_IS(unsigned char *s, int *SA, int n, int K, int cs) {
  49. int i, j;
  50. unsigned char *t = (unsigned char *) malloc(n / 8 + 1); // LS-type array in bits
  51. // Classify the type of each character
  52. tset(n-2, 0);
  53. tset(n-1, 1); // the sentinel must be in s1, important!!!
  54. for (i = n - 3; i >= 0; i--)
  55. tset(i, (chr(i)<chr(i+1) || (chr(i)==chr(i+1) && tget(i+1)==1))?1:0);
  56. // stage 1: reduce the problem by at least 1/2
  57. // sort all the S-substrings
  58. int *bkt = (int *) malloc(sizeof(int) * (K + 1)); // bucket array
  59. getBuckets(s, bkt, n, K, cs, true); // find ends of buckets
  60. for (i = 0; i < n; i++)
  61. SA[i] = -1;
  62. for (i = 1; i < n; i++)
  63. if (isLMS(i))
  64. SA[--bkt[chr(i)]] = i;
  65. induceSAl(t, SA, s, bkt, n, K, cs, false);
  66. induceSAs(t, SA, s, bkt, n, K, cs, true);
  67. free(bkt);
  68. // compact all the sorted substrings into the first n1 items of SA
  69. // 2*n1 must be not larger than n (proveable)
  70. int n1 = 0;
  71. for (i = 0; i < n; i++)
  72. if (isLMS(SA[i]))
  73. SA[n1++] = SA[i];
  74. // find the lexicographic names of all substrings
  75. for (i = n1; i < n; i++)
  76. SA[i] = -1; // init the name array buffer
  77. int name = 0, prev = -1;
  78. for (i = 0; i < n1; i++) {
  79. int pos = SA[i];
  80. bool diff = false;
  81. for (int d = 0; d < n; d++)
  82. if (prev == -1 || chr(pos+d) != chr(prev+d) || tget(pos+d) != tget(prev+d)) {
  83. diff = true;
  84. break;
  85. } else if (d > 0 && (isLMS(pos+d) || isLMS(prev+d)))
  86. break;
  87. if (diff) {
  88. name++;
  89. prev = pos;
  90. }
  91. pos = (pos % 2 == 0) ? pos / 2 : (pos - 1) / 2;
  92. SA[n1 + pos] = name - 1;
  93. }
  94. for (i = n - 1, j = n - 1; i >= n1; i--)
  95. if (SA[i] >= 0)
  96. SA[j--] = SA[i];
  97. // stage 2: solve the reduced problem
  98. // recurse if names are not yet unique
  99. int *SA1 = SA, *s1 = SA + n - n1;
  100. if (name < n1)
  101. SA_IS((unsigned char*) s1, SA1, n1, name - 1, sizeof(int));
  102. else
  103. // generate the suffix array of s1 directly
  104. for (i = 0; i < n1; i++)
  105. SA1[s1[i]] = i;
  106. // stage 3: induce the result for the original problem
  107. bkt = (int *) malloc(sizeof(int) * (K + 1)); // bucket array
  108. // put all left-most S characters into their buckets
  109. getBuckets(s, bkt, n, K, cs, true); // find ends of buckets
  110. for (i = 1, j = 0; i < n; i++)
  111. if (isLMS(i))
  112. s1[j++] = i; // get p1
  113. for (i = 0; i < n1; i++)
  114. SA1[i] = s1[SA1[i]]; // get index in s
  115. for (i = n1; i < n; i++)
  116. SA[i] = -1; // init SA[n1..n-1]
  117. for (i = n1 - 1; i >= 0; i--) {
  118. j = SA[i];
  119. SA[i] = -1;
  120. SA[--bkt[chr(j)]] = j;
  121. }
  122. induceSAl(t, SA, s, bkt, n, K, cs, false);
  123. induceSAs(t, SA, s, bkt, n, K, cs, true);
  124. free(bkt);
  125. free(t);
  126. }
  127.  
  128. const int maxn = 200000;
  129. int sa[maxn];
  130. int lcp[maxn];
  131. int _rank[maxn];
  132. unsigned char *s;
  133. int n;
  134.  
  135. void calc_lcp() {
  136. for (int i = 0; i < n; i++)
  137. _rank[sa[i]] = i;
  138. for (int i = 0, h = 0; i < n; i++) {
  139. if (_rank[i] < n - 1) {
  140. for (int j = sa[_rank[i] + 1]; s[i + h] == s[j + h]; ++h)
  141. ;
  142. lcp[_rank[i]] = h;
  143. if (h > 0)
  144. --h;
  145. }
  146. }
  147. }
  148.  
  149. int main() {
  150. string str = "CCCCC";
  151. n = str.size();
  152. s = (unsigned char*) str.c_str();
  153. SA_IS(s, sa, n + 1, 256, 1);
  154. calc_lcp();
  155.  
  156. for (int i = 0; i < n; i++) {
  157. cout << str.substr(sa[i + 1]);
  158. if (i < n - 1)
  159. cout << " " << lcp[i + 1];
  160. cout << endl;
  161. }
  162. }
Success #stdin #stdout 0s 17592KB
stdin
Standard input is empty
stdout
C 1
CC 2
CCC 3
CCCC 0
CCCCC