fork download
  1. using System;
  2. using System.Linq;
  3. using System.Collections.Generic;
  4. using System.Text.RegularExpressions;
  5. public class Test
  6. {
  7. public static void Main()
  8. {
  9. var HASHTAG_LETTERS = @"\p{L}\p{M}";
  10. var HASHTAG_NUMERALS = @"\p{Nd}";
  11. var HASHTAG_SPECIAL_CHARS = @"_\u200c\u200d\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7";
  12. var HASHTAG_LETTERS_NUMERALS = HASHTAG_LETTERS + HASHTAG_NUMERALS + HASHTAG_SPECIAL_CHARS;
  13. var HASHTAG_LETTERS_NUMERALS_SET = "[" + HASHTAG_LETTERS_NUMERALS + "]";
  14. var HASHTAG_LETTERS_SET = "[" + HASHTAG_LETTERS + "]";
  15. var VALID_HASHTAG = new Regex("(^|[^&" + HASHTAG_LETTERS_NUMERALS + @"])(#|\uFF03)(?!\uFE0F|\u20E3)(" + HASHTAG_LETTERS_NUMERALS_SET + "*" + HASHTAG_LETTERS_SET + HASHTAG_LETTERS_NUMERALS_SET + "*)", RegexOptions.IgnoreCase);
  16.  
  17. var tests = new List<string>() {"#hashtag",
  18. "#Azərbaycanca",
  19. "#mûǁae",
  20. "#Čeština",
  21. "#Ċaoiṁín",
  22. "#Caoiṁín",
  23. "#táim",
  24. "#hag̃ua",
  25. "#café",
  26. "#עברית",
  27. "#אֲשֶׁר",
  28. "#עַל־יְדֵי",
  29. "#וכו׳",
  30. "#מ״כ",
  31. "#العربية",
  32. "#حالياً",
  33. "#يـﮱـَٱ",
  34. "#ประเทศไทย",
  35. "#ฟรี",
  36. "#日本語ハッシュタグ",
  37. "#日本語ハッシュタグ",
  38. "これはOK #ハッシュタグ",
  39. "これもOK。#ハッシュタグ",
  40. "これはダメ#ハッシュタグ",
  41. "#1",
  42. "#2"};
  43. tests.ForEach(input =>
  44. Console.WriteLine("Input: " + input + " = " + VALID_HASHTAG.IsMatch(input) +
  45. (VALID_HASHTAG.IsMatch(input) ? ", match = " + VALID_HASHTAG.Match(input).Value : "")));
  46. }
  47. }
Success #stdin #stdout 0.13s 24728KB
stdin
Standard input is empty
stdout
Input: #hashtag = True, match = #hashtag
Input: #Azərbaycanca = True, match = #Azərbaycanca
Input: #mûǁae = True, match = #mûǁae
Input: #Čeština = True, match = #Čeština
Input: #Ċaoiṁín = True, match = #Ċaoiṁín
Input: #Caoiṁín = True, match = #Caoiṁín
Input: #táim = True, match = #táim
Input: #hag̃ua = True, match = #hag̃ua
Input: #café = True, match = #café
Input: #עברית = True, match = #עברית
Input: #אֲשֶׁר = True, match = #אֲשֶׁר
Input: #עַל־יְדֵי = True, match = #עַל־יְדֵי
Input: #וכו׳ = True, match = #וכו׳
Input: #מ״כ = True, match = #מ״כ
Input: #العربية = True, match = #العربية
Input: #حالياً = True, match = #حالياً
Input: #يـﮱـَٱ = True, match = #يـﮱـَٱ
Input: #ประเทศไทย = True, match = #ประเทศไทย
Input: #ฟรี = True, match = #ฟรี
Input: #日本語ハッシュタグ = True, match = #日本語ハッシュタグ
Input: #日本語ハッシュタグ = True, match = #日本語ハッシュタグ
Input: これはOK #ハッシュタグ = True, match =  #ハッシュタグ
Input: これもOK。#ハッシュタグ = True, match = 。#ハッシュタグ
Input: これはダメ#ハッシュタグ = False
Input: #1 = False
Input: #2 = False