• Source
    1. using System;
    2. using System.Collections;
    3. using System.Collections.Generic;
    4. using System.Linq;
    5. using System.Text;
    6.  
    7. namespace SEA2016.FuzzySearch
    8. {
    9. /// <summary>
    10. /// Check if two strings are within specified Levenshtein distance from each other
    11. /// using shift-and algorithm also known as Bitap or Baeza-Yates–Gonnet algorithm
    12. /// https://en.wikipedia.org/wiki/Bitap_algorithm
    13. /// </summary>
    14. public static class LevenshteinBitap
    15. {
    16. public static bool FuzzyMatch(string pattern, string text, int d)
    17. {
    18. int m = pattern.Length;
    19. if (m > sizeof(UInt64) * 8 - d)
    20. {
    21. throw new ArgumentException("Pattern is too long");
    22. }
    23.  
    24. if (m == 0)
    25. {
    26. return false;
    27. }
    28.  
    29. var alphabet = pattern.Select(y => y).Distinct().OrderBy(y => y).ToArray();
    30. var first_a = alphabet.First();
    31. var last_a = alphabet.Last();
    32.  
    33. var T = new UInt64[last_a - first_a + 1];
    34. for (int i = 0; i < T.Length; ++i)
    35. {
    36. T[i] = ~(0ul);
    37. }
    38.  
    39. /* Initialize characteristic vectors T */
    40. for (int i = 0; i < m; ++i)
    41. {
    42. int c = (int)(pattern[i] - first_a);
    43. T[c] &= ~(1ul << (i + d));
    44. }
    45.  
    46. /* Initialize the bit arrays R. */
    47. UInt64[] R = new UInt64[d + 1];
    48.  
    49. UInt64[] prevR = new UInt64[d + 1];
    50. for (int i = 0; i < d + 1; ++i)
    51. {
    52. prevR[i] = (~0ul) << (i + d);
    53. }
    54.  
    55. UInt64 mask = 1ul << (m + d - 1);
    56.  
    57. for (int i = 0; i < text.Length; ++i)
    58. {
    59. char c = text[i];
    60. UInt64 Tc = (c >= first_a && c <= last_a) ? T[(int)(c - first_a)] : (~0ul);
    61.  
    62. R[0] = (prevR[0] << 1) | Tc;
    63. for (int j=1; j<d+1; ++j)
    64. {
    65. R[j] = (prevR[j - 1]) &
    66. (R[j-1] << 1) &
    67. (prevR[j - 1] << 1) &
    68. ((prevR[j] << 1) | Tc);
    69. }
    70.  
    71. var t = R; R = prevR; prevR = t;
    72. }
    73.  
    74. if ((prevR[d] & mask) == 0)
    75. {
    76. return true;
    77. }
    78.  
    79. return false;
    80. }
    81. }
    82.  
    83. public static class Program
    84. {
    85. public static void Main(string[] args)
    86. {
    87. var words = Console.ReadLine().Split(' ');
    88.  
    89. Console.WriteLine($"Matching words {words[0]} and {words[1]} using bitap algorithm with distance {words[2]}:");
    90. Console.WriteLine(LevenshteinBitap.FuzzyMatch(words[0], words[1], int.Parse(words[2])));
    91. }
    92. }
    93. }
    94.