fork(23) download
  1. import java.util.regex.Pattern;
  2.  
  3. class DetectHtml
  4. {
  5. // adapted from re posted by Phil Haack and modified to match better
  6. public final static String tagStart=
  7. "\\<\\w+((\\s+\\w+(\\s*\\=\\s*(?:\".*?\"|'.*?'|[^'\"\\>\\s]+))?)+\\s*|\\s*)\\>";
  8. public final static String tagEnd=
  9. "\\</\\w+\\>";
  10. public final static String tagSelfClosing=
  11. "\\<\\w+((\\s+\\w+(\\s*\\=\\s*(?:\".*?\"|'.*?'|[^'\"\\>\\s]+))?)+\\s*|\\s*)/\\>";
  12. public final static String htmlEntity=
  13. "&[a-zA-Z][a-zA-Z0-9]+;";
  14. public final static Pattern htmlPattern=Pattern.compile(
  15. "("+tagStart+".*"+tagEnd+")|("+tagSelfClosing+")|("+htmlEntity+")",
  16. Pattern.DOTALL
  17. );
  18.  
  19. public static boolean isHtml(String s) {
  20. boolean ret=false;
  21. if (s != null) {
  22. ret=htmlPattern.matcher(s).find();
  23. }
  24. return ret;
  25. }
  26.  
  27. // test - you can delete me
  28. public static void main (String[] args) throws java.lang.Exception
  29. {
  30. String strIsNotHtml="If B<A then A>B & this is true";
  31. String strIsHtml="<span id=\"true\">If B&lt;A then A&gt;B &amp; this is true</span>";
  32. String strIsMultilineHtml="<a href=\"http://w...content-available-to-author-only...e.com/\">\nclick here\n</a>";
  33. String strBadEndTagOnly="</end>";
  34. String strBadStartTagOnly="<start>";
  35. String strEmptyBraces="These are used <> to denote an HTML tag.";
  36. String strTextWithEntities="This is an example of HTML&nbsp;escaped text";
  37.  
  38. System.out.println(strIsNotHtml + " - " + (isHtml(strIsNotHtml) ? "IS HTML" : "IS NOT HTML"));
  39. System.out.println(strIsHtml + " - " + (isHtml(strIsHtml) ? "IS HTML" : "IS NOT HTML"));
  40. System.out.println(strIsMultilineHtml + " - " + (isHtml(strIsMultilineHtml) ? "IS HTML" : "IS NOT HTML"));
  41. System.out.println(strBadEndTagOnly + " - " + (isHtml(strBadEndTagOnly) ? "IS HTML" : "IS NOT HTML"));
  42. System.out.println(strBadStartTagOnly + " - " + (isHtml(strBadStartTagOnly) ? "IS HTML" : "IS NOT HTML"));
  43. System.out.println(strEmptyBraces + " - " + (isHtml(strEmptyBraces) ? "IS HTML" : "IS NOT HTML"));
  44. System.out.println(strTextWithEntities + " - " + (isHtml(strTextWithEntities) ? "IS HTML" : "IS NOT HTML"));
  45.  
  46. }
  47.  
  48. }
Success #stdin #stdout 0.08s 380160KB
stdin
Standard input is empty
stdout
If B<A then A>B & this is true - IS NOT HTML
<span id="true">If B&lt;A then A&gt;B &amp; this is true</span> - IS HTML
<a href="http://w...content-available-to-author-only...e.com/">
click here
</a> - IS HTML
</end> - IS NOT HTML
<start> - IS NOT HTML
These are used <> to denote an HTML tag. - IS NOT HTML
This is an example of HTML&nbsp;escaped text - IS HTML