• Source
    1. import java.io.BufferedReader;
    2. import java.io.FileReader;
    3. import java.util.LinkedList;
    4. import java.util.List;
    5.  
    6. class RabinKarp {
    7. // radix: 65536 for java chars, can actually be much smaller since we are hashing
    8. private final static long d = Character.MAX_VALUE + 1;
    9. // prime number chosen so that d*(q+1) < 2^63-1 (fits in a java long)
    10. private final static long q = 96293;
    11. /**
    12.   * Search for the given pattern in the text using the Rabin-Karp algorithm.
    13.   * The radix-d base d and the prime number q for hashing are chosen as the
    14.   * maximum value a char can have (65536 in java) and such that d*(q+1) fits
    15.   * in a java int, i.e., q<32767.
    16.   *
    17.   * @param text
    18.   * Text to search in.
    19.   * @param pattern
    20.   * Pattern to search.
    21.   * @return Index in the text at which the first occurrence of the pattern
    22.   * appears.
    23.   */
    24. public List<Integer> findMatches(char[] text, char[] pattern) {
    25. return findMatches(text, pattern, d, q);
    26. }
    27. /**
    28.   * Search for the given pattern in the text using the Rabin-Karp algorithm.
    29.   * The radix-d base and the prime number for hashing must be provided as
    30.   * arguments.
    31.   *
    32.   * @param text
    33.   * Text to search in.
    34.   * @param pattern
    35.   * Pattern to search.
    36.   * @param d
    37.   * Base for the hashing.
    38.   * @param q
    39.   * Prime number to use in the hashing functions.
    40.   * @return Index in the text at which the first occurrence of the pattern
    41.   * appears.
    42.   */
    43. public List<Integer> findMatches(char[] text, char[] pattern, long d, long q) {
    44. // length of pattern
    45. int m = pattern.length;
    46. // do I have enough text?
    47. if (m > text.length)
    48. return new LinkedList<>();
    49. LinkedList<Integer> ret = new LinkedList<>();
    50. long p = 0;
    51. long t = 0;
    52. for (int i = 0; i < m; i++) {
    53. // I can get away with that because d*(q+1) < 2^63-1
    54. // otherwise I risk overflowing
    55. p = (p * d + pattern[i]) % q;
    56. t = (t * d + text[i]) % q;
    57. }
    58. // the most significant digit
    59. long h = powMod(d, m - 1, q); // d^(m-1) mod q
    60. int nMinusM = text.length - m;
    61. for (int i = 0; i <= nMinusM; i++) {
    62. if (p == t)
    63. if (compare(text, i, pattern))
    64. ret.add(i);
    65. if (i < nMinusM) {
    66. t -= h * text[i];
    67. // necessary because the previous subtraction most likely leads
    68. // to a negative value and java doesn't know of modulo of a
    69. // negative number.
    70. while (t < 0)
    71. t += q;
    72. t = (d * t + text[i + m]) % q;
    73. }
    74. }
    75. return ret;
    76. }
    77. /**
    78.   * Directly compares the pattern to a substring in the text starting at the
    79.   * given location.
    80.   *
    81.   * @param text
    82.   * The text.
    83.   * @param start
    84.   * Start of the substring in the text.
    85.   * @param pattern
    86.   * The pattern.
    87.   * @return True if text[start,...,start+pattern.length] equals
    88.   * pattern[0,...,pattern.length].
    89.   */
    90. private boolean compare(char[] text, int start, char[] pattern) {
    91. if (text.length - start < pattern.length)
    92. return false;
    93. for (int i = 0; i < pattern.length; i++)
    94. if (text[i + start] != pattern[i])
    95. return false;
    96. return true;
    97. }
    98. /**
    99.   * Computes the power d to the n modulo q.
    100.   *
    101.   * @param d
    102.   * @param n
    103.   * @param q
    104.   * @return d^n mod q.
    105.   */
    106. private long powMod(long d, long n, long q) {
    107. if (n == 0)
    108. return 1;
    109. if (n == 1)
    110. return d % q;
    111. long j = powMod(d, n / 2, q);
    112. j = (j * j) % q;
    113. if (n % 2 == 0)
    114. return j;
    115. return ((j * d) % q);
    116. }
    117.  
    118. public static void main(String[] args){
    119. RabinKarp matcher = new RabinKarp();
    120. System.out.println(
    121. matcher.findMatches(
    122. "hello world hell world hello".toCharArray(), // text
    123. "hello".toCharArray())); // pattern
    124. }
    125. }