using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace SEA2016.FuzzySearch
{
///
/// Check if two strings are within specified Levenshtein distance from each other
/// using shift-and algorithm also known as Bitap or Baeza-Yates–Gonnet algorithm
/// https://e...content-available-to-author-only...a.org/wiki/Bitap_algorithm
///
public static class LevenshteinBitap
{
public static bool FuzzyMatch(string pattern, string text, int d)
{
int m = pattern.Length;
if (m > sizeof(UInt64) * 8 - d)
{
throw new ArgumentException("Pattern is too long");
}
if (m == 0)
{
return false;
}
var alphabet = pattern.Select(y => y).Distinct().OrderBy(y => y).ToArray();
var first_a = alphabet.First();
var last_a = alphabet.Last();
var T = new UInt64[last_a - first_a + 1];
for (int i = 0; i < T.Length; ++i)
{
T[i] = ~(0ul);
}
/* Initialize characteristic vectors T */
for (int i = 0; i < m; ++i)
{
int c = (int)(pattern[i] - first_a);
T[c] &= ~(1ul << (i + d));
}
/* Initialize the bit arrays R. */
UInt64[] R = new UInt64[d + 1];
UInt64[] prevR = new UInt64[d + 1];
for (int i = 0; i < d + 1; ++i)
{
prevR[i] = (~0ul) << (i + d);
}
UInt64 mask = 1ul << (m + d - 1);
for (int i = 0; i < text.Length; ++i)
{
char c = text[i];
UInt64 Tc = (c >= first_a && c <= last_a) ? T[(int)(c - first_a)] : (~0ul);
R[0] = (prevR[0] << 1) | Tc;
for (int j=1; j