• Source
    1. using System;
    2. using System.Collections.Generic;
    3. using System.Linq;
    4. using System.Text;
    5.  
    6. namespace SEA2016.FuzzySearch
    7. {
    8. /// <summary>
    9. /// Calculate Levenshtein distance between two strings using Levenshtein automaton
    10. /// https://en.wikipedia.org/wiki/Levenshtein_automaton
    11. /// </summary>
    12. public static class LevenshteinAutomaton
    13. {
    14. public static bool Match(string pattern, string text, int d)
    15. {
    16. var dfa = CreateAutomaton(pattern, d).Determinize();
    17. var s = 0;
    18. for (int i=0; i<text.Length; ++i)
    19. {
    20. s = dfa.Next(s, text[i]);
    21. }
    22. return dfa.IsFinal(s);
    23. }
    24.  
    25. public static NFA CreateAutomaton(string a, int k)
    26. {
    27. if (a.Contains('*'))
    28. {
    29. throw new ArgumentException("Star is a reserved character");
    30. }
    31.  
    32. var result = new NFA();
    33. var m = a.Length + 1;
    34.  
    35. /* Create |a|*k states */
    36. for (int i = 0; i < m; ++i)
    37. {
    38. for (int j = 0; j <= k; ++j)
    39. {
    40. result.AddState(i + m * j, i == a.Length);
    41. }
    42. }
    43.  
    44. /* Create transitions */
    45. for (int i = 0; i < m; ++i)
    46. {
    47. for (int j = 0; j <= k; ++j)
    48. {
    49. if (i < m - 1)
    50. {
    51. result.AddTransition(i + m * j, i + 1 + m * j, a[i]);
    52. }
    53.  
    54. if (j < k)
    55. {
    56. if (i < m - 1)
    57. {
    58. result.AddTransition(i + m * j, i + 1 + m * (j + 1), NFA.Any);
    59. result.AddTransition(i + m * j, i + 1 + m * (j + 1), NFA.Epsilon);
    60. }
    61.  
    62. result.AddTransition(i + m * j, i + m * (j + 1), NFA.Any);
    63. }
    64. }
    65. }
    66.  
    67. return result;
    68. }
    69. }
    70.  
    71. public class DFA
    72. {
    73. private readonly List<int> star = new List<int>();
    74. private readonly List<List<Tuple<char, int>>> transitions = new List<List<Tuple<char, int>>>();
    75. private readonly HashSet<int> final = new HashSet<int>();
    76.  
    77. public void AddState(int state, bool isFinal)
    78. {
    79. if (state != transitions.Count)
    80. {
    81. throw new ArgumentException();
    82. }
    83.  
    84. star.Add(-1);
    85. transitions.Add(new List<Tuple<char, int>>());
    86. if (isFinal)
    87. {
    88. final.Add(state);
    89. }
    90. }
    91.  
    92. public void AddTransition(int from, int to, char c)
    93. {
    94. if (c == NFA.Any)
    95. {
    96. star[from] = to;
    97. }
    98. else
    99. {
    100. transitions[from].Add(new Tuple<char, int>(c, to));
    101. }
    102. }
    103.  
    104. public int Next(int s, char c)
    105. {
    106. if (s == -1) return -1;
    107. foreach (var t in transitions[s])
    108. {
    109. if (t.Item1 == c) return t.Item2;
    110. }
    111. return star[s];
    112. }
    113.  
    114. public bool IsFinal(int s)
    115. {
    116. return final.Contains(s);
    117. }
    118.  
    119. public string ToDotNotation()
    120. {
    121. var result = new StringBuilder();
    122. result.AppendLine("digraph DFA {");
    123. result.AppendLine("rankdir = LR;");
    124. result.AppendLine("orientation = Portrait;");
    125.  
    126. for (int i = 0; i < transitions.Count; ++i)
    127. {
    128. if (i == 0)
    129. {
    130. result.AppendFormat("{0}[label = \"{0}\", shape = circle, style = bold, fontsize = 14]", i);
    131. result.AppendLine();
    132. }
    133. else if (final.Contains(i))
    134. {
    135. result.AppendFormat("{0}[label = \"{0}\", shape = doublecircle, style = bold, fontsize = 14]", i);
    136. result.AppendLine();
    137. }
    138. else {
    139. result.AppendFormat("{0}[label = \"{0}\", shape = circle, style = solid, fontsize = 14]", i);
    140. result.AppendLine();
    141. }
    142.  
    143. foreach (var t in transitions[i])
    144. {
    145. result.AppendFormat("{0}->{1} [label = \"{2}\", fontsize = 14];", i, t.Item2, t.Item1);
    146. result.AppendLine();
    147. }
    148.  
    149. if (star[i] >= 0)
    150. {
    151. result.AppendFormat("{0}->{1} [label = \"*\", fontsize = 14];", i, star[i]);
    152. result.AppendLine();
    153. }
    154. }
    155.  
    156. result.AppendLine("}");
    157. return result.ToString();
    158. }
    159. }
    160.  
    161. /// <summary>
    162. /// Nondeterministic finite automaton
    163. /// https://en.wikipedia.org/wiki/Nondeterministic_finite_automaton
    164. /// </summary>
    165. public class NFA
    166. {
    167. public static char Epsilon = 'ε';
    168. public static char Any = '*';
    169.  
    170. private int initial = 0;
    171. private readonly List<int> states = new List<int>();
    172. private readonly List<Tuple<int, int, char>> transitions = new List<Tuple<int, int, char>>();
    173. private readonly HashSet<int> final = new HashSet<int>();
    174.  
    175. public void AddState(int state, bool isFinal)
    176. {
    177. states.Add(state);
    178. if (isFinal)
    179. {
    180. final.Add(state);
    181. }
    182. }
    183.  
    184. public void AddTransition(int from, int to, char c)
    185. {
    186. if (!transitions.Any(t => t.Item1 == from && t.Item2 == to && t.Item3 == c))
    187. {
    188. transitions.Add(new Tuple<int, int, char>(from, to, c));
    189. }
    190. }
    191.  
    192. public static NFA AnyOf(NFA left, NFA right)
    193. {
    194. var result = new NFA();
    195. result.AddState(0, false);
    196. result.AddTransition(0, result.Add(left), Epsilon);
    197. result.AddTransition(0, result.Add(right), Epsilon);
    198. return result;
    199. }
    200.  
    201. public static NFA Star(NFA input)
    202. {
    203. var result = new NFA();
    204.  
    205. var newFinal = result.NewState();
    206. result.AddState(newFinal, true);
    207.  
    208. var start = result.Add(input);
    209. var finals = result.final.Where(h => h != newFinal).ToArray();
    210. foreach (var final in finals)
    211. {
    212. result.AddTransition(final, start, Epsilon);
    213. }
    214. result.AddTransition(newFinal, start, Epsilon);
    215.  
    216. return result;
    217. }
    218.  
    219. public static NFA Concat(NFA left, NFA right)
    220. {
    221. var result = new NFA();
    222. result.Add(left);
    223.  
    224. var leftFinals = result.final.ToArray();
    225. result.final.Clear();
    226.  
    227. int rightInitial = result.Add(right);
    228. foreach (var final in leftFinals)
    229. {
    230. result.AddTransition(final, rightInitial, Epsilon);
    231. }
    232. return result;
    233. }
    234.  
    235. private int Add(NFA left)
    236. {
    237. int stateOffset = NewState();
    238. foreach (var s in left.states)
    239. {
    240. AddState(s + stateOffset, left.final.Contains(s));
    241. }
    242. foreach (var t in left.transitions)
    243. {
    244. AddTransition(t.Item1 + stateOffset, t.Item2 + stateOffset, t.Item3);
    245. }
    246. return stateOffset;
    247. }
    248.  
    249. private int NewState()
    250. {
    251. if (states.Count == 0)
    252. {
    253. return 0;
    254. }
    255. return states.Max() + 1;
    256. }
    257.  
    258. private void EpsilonClosure(HashSet<int> set_state)
    259. {
    260. Stack<int> frontier = new Stack<int>(set_state);
    261.  
    262. while (frontier.Count > 0)
    263. {
    264. var from_state = frontier.Pop();
    265.  
    266. foreach (var ept in FindTransitions(from_state).Where(t => t.Item3 == Epsilon))
    267. {
    268. if (set_state.Add(ept.Item2))
    269. {
    270. frontier.Push(ept.Item2);
    271. }
    272. }
    273. }
    274. }
    275.  
    276. private IEnumerable<Tuple<int, int, char>> FindTransitions(int from_state)
    277. {
    278. return transitions.Where(t => t.Item1 == from_state);
    279. }
    280.  
    281. class SetStateComparer : IEqualityComparer<HashSet<int>>
    282. {
    283. public bool Equals(HashSet<int> x, HashSet<int> y)
    284. {
    285. if (x.Count != y.Count)
    286. {
    287. return false;
    288. }
    289.  
    290. return x.All(t => y.Contains(t));
    291. }
    292.  
    293. public int GetHashCode(HashSet<int> x)
    294. {
    295. uint hash = int.MaxValue;
    296. foreach (var value in x.OrderBy(t => t))
    297. {
    298. hash ^= (uint)value + 0x9e3779b9 + (hash << 6) + (hash >> 2);
    299. }
    300. return (int)hash;
    301. }
    302. }
    303.  
    304. bool ContainsFinalState(HashSet<int> x)
    305. {
    306. return x.Intersect(final).Any();
    307. }
    308.  
    309. public string ToDotNotation()
    310. {
    311. var result = new StringBuilder();
    312. result.AppendLine("digraph NFA {");
    313. result.AppendLine("rankdir = LR;");
    314. result.AppendLine("orientation = Portrait;");
    315.  
    316. for (int i = 0; i < states.Count; ++i)
    317. {
    318. if (states[i] == initial)
    319. {
    320. result.AppendFormat("{0}[label = \"{0}\", shape = circle, style = bold, fontsize = 14]", states[i]);
    321. result.AppendLine();
    322. }
    323. else if (final.Contains(states[i]))
    324. {
    325. result.AppendFormat("{0}[label = \"{0}\", shape = doublecircle, style = bold, fontsize = 14]", states[i]);
    326. result.AppendLine();
    327. }
    328. else {
    329. result.AppendFormat("{0}[label = \"{0}\", shape = circle, style = solid, fontsize = 14]", states[i]);
    330. result.AppendLine();
    331. }
    332.  
    333. foreach (var t in transitions)
    334. {
    335. if (t.Item1 != states[i]) continue;
    336. if (t.Item3 == Any)
    337. {
    338. result.AppendFormat("{0}->{1} [label = \"*\", fontsize = 14];", t.Item1, t.Item2);
    339. result.AppendLine();
    340. }
    341. else if (t.Item3 == Epsilon)
    342. {
    343. result.AppendFormat("{0}->{1} [label = \"&epsilon;\", fontsize = 14];", t.Item1, t.Item2);
    344. result.AppendLine();
    345. }
    346. else
    347. {
    348. result.AppendFormat("{0}->{1} [label = \"{2}\", fontsize = 14];", t.Item1, t.Item2, t.Item3);
    349. result.AppendLine();
    350. }
    351. }
    352. }
    353.  
    354. result.AppendLine("}");
    355. return result.ToString();
    356. }
    357.  
    358. public DFA Determinize()
    359. {
    360. var target = new DFA();
    361.  
    362. Stack<Tuple<int, HashSet<int>>> frontier = new Stack<Tuple<int, HashSet<int>>>();
    363. Dictionary<HashSet<int>, int> seen = new Dictionary<HashSet<int>, int>(new SetStateComparer());
    364. int set_key = 0;
    365.  
    366. HashSet<int> set_initial = new HashSet<int>();
    367. set_initial.Add(0);
    368. EpsilonClosure(set_initial);
    369.  
    370. target.AddState(set_key, ContainsFinalState(set_initial));
    371.  
    372. frontier.Push(new Tuple<int, HashSet<int>>(set_key, set_initial));
    373. seen[set_initial] = set_key;
    374.  
    375. while (frontier.Count > 0)
    376. {
    377. var current = frontier.Pop();
    378. var current_key = current.Item1;
    379. var current_set = current.Item2;
    380.  
    381. HashSet<char> inputs = new HashSet<char>();
    382. foreach (var st in current_set)
    383. {
    384. foreach (var t in FindTransitions(st))
    385. {
    386. if (t.Item3 == Epsilon)
    387. {
    388. continue;
    389. }
    390. inputs.Add(t.Item3);
    391. }
    392. }
    393.  
    394. foreach (var i in inputs)
    395. {
    396. HashSet<int> new_state = new HashSet<int>();
    397.  
    398. foreach (var st in current_set)
    399. {
    400. foreach (var t in FindTransitions(st).Where(j => j.Item3 == i || j.Item3 == Any))
    401. {
    402. new_state.Add(t.Item2);
    403. }
    404. }
    405.  
    406. EpsilonClosure(new_state);
    407.  
    408. int seen_state_key;
    409. if (!seen.TryGetValue(new_state, out seen_state_key))
    410. {
    411. set_key++;
    412.  
    413. target.AddState(set_key, ContainsFinalState(new_state));
    414. target.AddTransition(current_key, set_key, i);
    415.  
    416. frontier.Push(new Tuple<int, HashSet<int>>(set_key, new_state));
    417. seen[new_state] = set_key;
    418. }
    419. else
    420. {
    421. target.AddTransition(current_key, seen_state_key, i);
    422. }
    423. }
    424. }
    425.  
    426. return target;
    427. }
    428. }
    429.  
    430. public static class Program
    431. {
    432. public static void Main(string[] args)
    433. {
    434. var words = Console.ReadLine().Split(' ');
    435.  
    436. Console.WriteLine($"Matching words {words[0]} and {words[1]} using Levenshtein automaton with distance {words[2]}:");
    437. Console.WriteLine(LevenshteinAutomaton.Match(words[0], words[1], int.Parse(words[2])));
    438. }
    439. }
    440. }
    441.