fork download
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Text.RegularExpressions;
  6.  
  7.  
  8. namespace ConsoleApplication1
  9. {
  10. class Program
  11. {
  12. static void Main(string[] args)
  13. {
  14. string input = @"
  15. <a asdf = href= >BLANK</a>
  16. <a href= a""'tz target=_self >ATZ</a>
  17. <a href=/2012/02/26/world/meast/iraq-missing-soldier-id/index.html?hpt=hp_bn1 target=""_self"">Last missing U.S. soldier in Iraq ID'd</a>
  18. <a id=""weatherLocBtn"" href=""javascript:MainLocalObj.Weather.checkInput('weather',document.localAllLookupForm.inputField.value);""><span>Go</span></a>
  19. <a href=""javascript:CNN_handleOverlay('profile_signin_overlay')"">Log in</a>
  20. <a no='href' here> NOT FOUND </a>
  21. <a this href= is_ok > OK </a>
  22. ";
  23. string regex = @"
  24. <a
  25. (?=\s)
  26. (?:[^>""']|""[^""]*""|'[^']*')*?
  27. (?<=\s) href \s* =
  28. (?: (?> \s* (['""]) (?<URL>.*?) \1 )
  29. | (?> (?!\s*['""]) \s* (?<URL>[^\s>]*) (?=\s|>) )
  30. )
  31. (?> (?:"".*?""|'.*?'|[^>]?)+ )
  32. (?<!/)
  33. >
  34. (?<TEXT>.*?)
  35. </a \s*>
  36. ";
  37. string output = Regex.Replace(input, regex, "${TEXT} [${URL}]",
  38. RegexOptions.IgnoreCase |
  39. RegexOptions.Singleline |
  40. RegexOptions.IgnorePatternWhitespace);
  41.  
  42. Console.WriteLine(input + "\n------------\n");
  43. Console.WriteLine(output);
  44. }
  45. }
  46. }
  47.  
Success #stdin #stdout 0.08s 37344KB
stdin
Standard input is empty
stdout
               <a asdf = href=  >BLANK</a>
               <a href= a"'tz target=_self >ATZ</a>
               <a href=/2012/02/26/world/meast/iraq-missing-soldier-id/index.html?hpt=hp_bn1 target="_self">Last missing U.S. soldier in Iraq ID'd</a>
               <a id="weatherLocBtn" href="javascript:MainLocalObj.Weather.checkInput('weather',document.localAllLookupForm.inputField.value);"><span>Go</span></a>
               <a href="javascript:CNN_handleOverlay('profile_signin_overlay')">Log in</a>
               <a no='href' here> NOT FOUND </a>
               <a this href= is_ok > OK </a>
            
------------


               BLANK []
               ATZ [a"'tz]
               Last missing U.S. soldier in Iraq ID'd [/2012/02/26/world/meast/iraq-missing-soldier-id/index.html?hpt=hp_bn1]
               <span>Go</span> [javascript:MainLocalObj.Weather.checkInput('weather',document.localAllLookupForm.inputField.value);]
               Log in [javascript:CNN_handleOverlay('profile_signin_overlay')]
               <a no='href' here> NOT FOUND </a>
                OK  [is_ok]