/*PROMPT:
* Consider the problem of searching for genes in DNA sequences using Horspool's algorithm.
* A DNA sequence is represented by a test on the alphabet (A, C, G, T), and the gene segement
* is the pattern. Write a Java Program that:
* inputs DNA sequence and a pattern
* constructs the shift table for the pattern
* applies the Horspool's algorithm to locate the pattern in the DNA sequence
*
* Test your program with the following:
* pattern: TCCTATTCTT (it is chromosome 10)
* DNA Sequence: TTATAGATCTCGTATTCTTTTATAGATCTCCTATTCTT
*/
import java.util.*;
class Horspool {
/* ShiftTable ALGORITHM
* Fills the shift table used by Horspool's algorithm
* Input: Pattern P [0..m-1] and an alphabet of possible characters
* Output: Table[0..size-1] indexed by the alphabet's characters and
* filled with shift sizes computed with the formula:
* t(c) = the pattern's length m, if c is not among the first m-1 characters of the pattern
* else, the distance from the rightmost c among the first m-1 characters of the pattern to its last character
* for i = 0 to size -1 do Table[i] = m
* for j = 0 to m-2 do Table[P[J]]= m-1-j
* return Table
*/
public static int[] ShiftTable
(String P
) { int[] Table = new int[128];
for (int i = 0; i < 128; i++) {
Table[i] = -1;
}
for (int i = 0; i < P.length()-1; i++) {
Table[P.charAt(i)] = i;
}
return Table;
}
/*HorspoolMatching(P[0..m-1], T[0..n-1]) ALGORITHM
* Implements Horspool's algorithm for string matching
* Input: Pattern P[0..m-1] and text T[0..n-1]
* Output: The index of the left and of the first matching substring
* or -1 if their are no matches
* ShiftTable(P[0..m-1]) //generate table of shifts
* i = m-1 //position the pattern's right end
* while i <= n-1 do
* k = 0
* while k<= m-1 and P[m-1-k]=T[i-k] do
* k += 1
* if k = m
* return i-m+1
* else i = i+Table[T[i]]
* return -1
*/
int[] Table;
int k, j, m, n;
n = T.length();
m = P.length();
Table = ShiftTable(P);
k = 0;
while ( k < (n-m) ) {
j = m-1;
while ( P.charAt(j) == T.charAt(k+j)) {
j--;
if ( j < 0 )
return (k);
}
k = k + (m-1) - Table[T.charAt(k+(m-1))];
}
return -1;
}
//a function that runs the HorspoolMatching() function and prints the result.
public static void main
(String[] args
) { int q;
T = "TTATAGATCTCGTATTCTTTTATAGATCTCCTATTCTT";
P = "TCCTATTCTT";
q = HorspoolMatching (T, P);
System.
out.
println("String " + P
+ " found at pos: " + q
);
//A test using a different pattern from the string.
/* T = "TTATAGATCTCGTATTCTTTTATAGATCTCCTATTCTT";
P = "TCTCGTAT";
q = HorspoolMatching (T, P);
System.out.println("String " + P + " found at pos: " + q);*/
}
}