import java.util.regex.Pattern ;
class DetectHtml
{
// adapted from re posted by Phil Haack and modified to match better
public final static String tagStart
= "\\ <\\ w+((\\ s+\\ w+(\\ s*\\ =\\ s*(?:\" .*?\" |'.*?'|[^'\" \\ >\\ s]+))?)+\\ s*|\\ s*)\\ >" ;
public final static String tagEnd
= "\\ </\\ w+\\ >" ;
public final static String tagSelfClosing
= "\\ <\\ w+((\\ s+\\ w+(\\ s*\\ =\\ s*(?:\" .*?\" |'.*?'|[^'\" \\ >\\ s]+))?)+\\ s*|\\ s*)/\\ >" ;
public final static String htmlEntity
= "&[a-zA-Z][a-zA-Z0-9]+;" ;
public final static Pattern htmlPattern= Pattern.compile (
"(" + tagStart+ ".*" + tagEnd+ ")|(" + tagSelfClosing+ ")|(" + htmlEntity+ ")" ,
Pattern.DOTALL
) ;
public static boolean isHtml
( String s
) { boolean ret= false ;
if ( s != null ) {
ret= htmlPattern.matcher ( s) .find ( ) ;
}
return ret;
}
// test - you can delete me
{
String strIsNotHtml
= "If B<A then A>B & this is true" ; String strIsHtml
= "<span id=\" true\" >If B<A then A>B & this is true</span>" ; String strIsMultilineHtml
= "<a href=\" http://w...content-available-to-author-only...e.com/\" >\n click here\n </a>" ; String strBadEndTagOnly
= "</end>" ; String strBadStartTagOnly
= "<start>" ; String strEmptyBraces
= "These are used <> to denote an HTML tag." ; String strTextWithEntities
= "This is an example of HTML escaped text" ;
System .
out .
println ( strIsNotHtml
+ " - " + ( isHtml
( strIsNotHtml
) ? "IS HTML" : "IS NOT HTML" ) ) ; System .
out .
println ( strIsHtml
+ " - " + ( isHtml
( strIsHtml
) ? "IS HTML" : "IS NOT HTML" ) ) ; System .
out .
println ( strIsMultilineHtml
+ " - " + ( isHtml
( strIsMultilineHtml
) ? "IS HTML" : "IS NOT HTML" ) ) ; System .
out .
println ( strBadEndTagOnly
+ " - " + ( isHtml
( strBadEndTagOnly
) ? "IS HTML" : "IS NOT HTML" ) ) ; System .
out .
println ( strBadStartTagOnly
+ " - " + ( isHtml
( strBadStartTagOnly
) ? "IS HTML" : "IS NOT HTML" ) ) ; System .
out .
println ( strEmptyBraces
+ " - " + ( isHtml
( strEmptyBraces
) ? "IS HTML" : "IS NOT HTML" ) ) ; System .
out .
println ( strTextWithEntities
+ " - " + ( isHtml
( strTextWithEntities
) ? "IS HTML" : "IS NOT HTML" ) ) ;
}
}
aW1wb3J0IGphdmEudXRpbC5yZWdleC5QYXR0ZXJuOwoKY2xhc3MgRGV0ZWN0SHRtbAp7CgkvLyBhZGFwdGVkIGZyb20gcmUgcG9zdGVkIGJ5IFBoaWwgSGFhY2sgYW5kIG1vZGlmaWVkIHRvIG1hdGNoIGJldHRlcgoJcHVibGljIGZpbmFsIHN0YXRpYyBTdHJpbmcgdGFnU3RhcnQ9CgkJIlxcPFxcdysoKFxccytcXHcrKFxccypcXD1cXHMqKD86XCIuKj9cInwnLio/J3xbXidcIlxcPlxcc10rKSk/KStcXHMqfFxccyopXFw+IjsKCXB1YmxpYyBmaW5hbCBzdGF0aWMgU3RyaW5nIHRhZ0VuZD0KICAgICAgICAiXFw8L1xcdytcXD4iOwoJcHVibGljIGZpbmFsIHN0YXRpYyBTdHJpbmcgdGFnU2VsZkNsb3Npbmc9CgkJIlxcPFxcdysoKFxccytcXHcrKFxccypcXD1cXHMqKD86XCIuKj9cInwnLio/J3xbXidcIlxcPlxcc10rKSk/KStcXHMqfFxccyopL1xcPiI7CglwdWJsaWMgZmluYWwgc3RhdGljIFN0cmluZyBodG1sRW50aXR5PQoJCSImW2EtekEtWl1bYS16QS1aMC05XSs7IjsKCXB1YmxpYyBmaW5hbCBzdGF0aWMgUGF0dGVybiBodG1sUGF0dGVybj1QYXR0ZXJuLmNvbXBpbGUoCgkgICIoIit0YWdTdGFydCsiLioiK3RhZ0VuZCsiKXwoIit0YWdTZWxmQ2xvc2luZysiKXwoIitodG1sRW50aXR5KyIpIiwKCSAgUGF0dGVybi5ET1RBTEwKCSk7CgoJcHVibGljIHN0YXRpYyBib29sZWFuIGlzSHRtbChTdHJpbmcgcykgewoJCWJvb2xlYW4gcmV0PWZhbHNlOwoJCWlmIChzICE9IG51bGwpIHsKCQkJcmV0PWh0bWxQYXR0ZXJuLm1hdGNoZXIocykuZmluZCgpOwoJCX0KCQlyZXR1cm4gcmV0OwoJfQoKCS8vIHRlc3QgLSB5b3UgY2FuIGRlbGV0ZSBtZQoJcHVibGljIHN0YXRpYyB2b2lkIG1haW4gKFN0cmluZ1tdIGFyZ3MpIHRocm93cyBqYXZhLmxhbmcuRXhjZXB0aW9uCgl7CgkJU3RyaW5nIHN0cklzTm90SHRtbD0iSWYgQjxBIHRoZW4gQT5CICYgdGhpcyBpcyB0cnVlIjsKCQlTdHJpbmcgc3RySXNIdG1sPSI8c3BhbiBpZD1cInRydWVcIj5JZiBCJmx0O0EgdGhlbiBBJmd0O0IgJmFtcDsgdGhpcyBpcyB0cnVlPC9zcGFuPiI7CgkJU3RyaW5nIHN0cklzTXVsdGlsaW5lSHRtbD0iPGEgaHJlZj1cImh0dHA6Ly93Li4uY29udGVudC1hdmFpbGFibGUtdG8tYXV0aG9yLW9ubHkuLi5lLmNvbS9cIj5cbmNsaWNrIGhlcmVcbjwvYT4iOwoJCVN0cmluZyBzdHJCYWRFbmRUYWdPbmx5PSI8L2VuZD4iOwoJCVN0cmluZyBzdHJCYWRTdGFydFRhZ09ubHk9IjxzdGFydD4iOwoJCVN0cmluZyBzdHJFbXB0eUJyYWNlcz0iVGhlc2UgYXJlIHVzZWQgPD4gdG8gZGVub3RlIGFuIEhUTUwgdGFnLiI7CgkJU3RyaW5nIHN0clRleHRXaXRoRW50aXRpZXM9IlRoaXMgaXMgYW4gZXhhbXBsZSBvZiBIVE1MJm5ic3A7ZXNjYXBlZCB0ZXh0IjsKCgkJU3lzdGVtLm91dC5wcmludGxuKHN0cklzTm90SHRtbCArICIgLSAiICsgKGlzSHRtbChzdHJJc05vdEh0bWwpID8gIklTIEhUTUwiIDogIklTIE5PVCBIVE1MIikpOwoJCVN5c3RlbS5vdXQucHJpbnRsbihzdHJJc0h0bWwgKyAiIC0gIiArIChpc0h0bWwoc3RySXNIdG1sKSA/ICJJUyBIVE1MIiA6ICJJUyBOT1QgSFRNTCIpKTsKCQlTeXN0ZW0ub3V0LnByaW50bG4oc3RySXNNdWx0aWxpbmVIdG1sICsgIiAtICIgKyAoaXNIdG1sKHN0cklzTXVsdGlsaW5lSHRtbCkgPyAiSVMgSFRNTCIgOiAiSVMgTk9UIEhUTUwiKSk7CgkJU3lzdGVtLm91dC5wcmludGxuKHN0ckJhZEVuZFRhZ09ubHkgKyAiIC0gIiArIChpc0h0bWwoc3RyQmFkRW5kVGFnT25seSkgPyAiSVMgSFRNTCIgOiAiSVMgTk9UIEhUTUwiKSk7CgkJU3lzdGVtLm91dC5wcmludGxuKHN0ckJhZFN0YXJ0VGFnT25seSArICIgLSAiICsgKGlzSHRtbChzdHJCYWRTdGFydFRhZ09ubHkpID8gIklTIEhUTUwiIDogIklTIE5PVCBIVE1MIikpOwoJCVN5c3RlbS5vdXQucHJpbnRsbihzdHJFbXB0eUJyYWNlcyArICIgLSAiICsgKGlzSHRtbChzdHJFbXB0eUJyYWNlcykgPyAiSVMgSFRNTCIgOiAiSVMgTk9UIEhUTUwiKSk7CgkJU3lzdGVtLm91dC5wcmludGxuKHN0clRleHRXaXRoRW50aXRpZXMgKyAiIC0gIiArIChpc0h0bWwoc3RyVGV4dFdpdGhFbnRpdGllcykgPyAiSVMgSFRNTCIgOiAiSVMgTk9UIEhUTUwiKSk7CgoJfQoKfQ==