import java.io.BufferedReader;
import java.io.FileReader;
import java.util.LinkedList;
import java.util.List;
class RabinKarp {
// radix: 65536 for java chars, can actually be much smaller since we are hashing
private final static long d
= Character.
MAX_VALUE + 1; // prime number chosen so that d*(q+1) < 2^63-1 (fits in a java long)
private final static long q = 96293;
/**
* Search for the given pattern in the text using the Rabin-Karp algorithm.
* The radix-d base d and the prime number q for hashing are chosen as the
* maximum value a char can have (65536 in java) and such that d*(q+1) fits
* in a java int, i.e., q<32767.
*
* @param text
* Text to search in.
* @param pattern
* Pattern to search.
* @return Index in the text at which the first occurrence of the pattern
* appears.
*/
public List<Integer> findMatches(char[] text, char[] pattern) {
return findMatches(text, pattern, d, q);
}
/**
* Search for the given pattern in the text using the Rabin-Karp algorithm.
* The radix-d base and the prime number for hashing must be provided as
* arguments.
*
* @param text
* Text to search in.
* @param pattern
* Pattern to search.
* @param d
* Base for the hashing.
* @param q
* Prime number to use in the hashing functions.
* @return Index in the text at which the first occurrence of the pattern
* appears.
*/
public List<Integer> findMatches(char[] text, char[] pattern, long d, long q) {
// length of pattern
int m = pattern.length;
// do I have enough text?
if (m > text.length)
return new LinkedList<>();
LinkedList<Integer> ret = new LinkedList<>();
long p = 0;
long t = 0;
for (int i = 0; i < m; i++) {
// I can get away with that because d*(q+1) < 2^63-1
// otherwise I risk overflowing
p = (p * d + pattern[i]) % q;
t = (t * d + text[i]) % q;
}
// the most significant digit
long h = powMod(d, m - 1, q); // d^(m-1) mod q
int nMinusM = text.length - m;
for (int i = 0; i <= nMinusM; i++) {
if (p == t)
if (compare(text, i, pattern))
ret.add(i);
if (i < nMinusM) {
t -= h * text[i];
// necessary because the previous subtraction most likely leads
// to a negative value and java doesn't know of modulo of a
// negative number.
while (t < 0)
t += q;
t = (d * t + text[i + m]) % q;
}
}
return ret;
}
/**
* Directly compares the pattern to a substring in the text starting at the
* given location.
*
* @param text
* The text.
* @param start
* Start of the substring in the text.
* @param pattern
* The pattern.
* @return True if text[start,...,start+pattern.length] equals
* pattern[0,...,pattern.length].
*/
private boolean compare(char[] text, int start, char[] pattern) {
if (text.length - start < pattern.length)
return false;
for (int i = 0; i < pattern.length; i++)
if (text[i + start] != pattern[i])
return false;
return true;
}
/**
* Computes the power d to the n modulo q.
*
* @param d
* @param n
* @param q
* @return d^n mod q.
*/
private long powMod(long d, long n, long q) {
if (n == 0)
return 1;
if (n == 1)
return d % q;
long j = powMod(d, n / 2, q);
j = (j * j) % q;
if (n % 2 == 0)
return j;
return ((j * d) % q);
}
public static void main
(String[] args
){ RabinKarp matcher = new RabinKarp();
matcher.findMatches(
"hello world hell world hello".toCharArray(), // text
"hello".toCharArray())); // pattern
}
}