<?php

$redefs = '(?(DEFINE)
    (?<tagname> [a-z][^\s>/]*+    )
    (?<attname> [^\s>/][^\s=>/]*+    )  # first char can be pretty much anything, including =
    (?<attval>  (?>
                    "[^"]*+" |
                    \'[^\']*+\' |
                    [^\s>]*+            # unquoted values can contain quotes, = and /
                )
    ) 
    (?<attrib>  (?&attname)
                (?: \s*+
                    = \s*+
                    (?&attval)
                )?+
    )
    (?<crap>    [^\s>]    )             # most crap inside tag is ignored, will eat the last / in self closing tags
    (?<tag>     <(?&tagname)
                (?: \s*+                # spaces between attributes not required: <b/foo=">"style=color:red>bold red text</b>
                    (?>
                        (?&attrib) |    # order matters
                        (?&crap)        # if not an attribute, eat the crap
                    )
                )*+
                \s*+ /?+
                \s*+ >
    )
)';


// removes onanything attributes from all matched HTML tags
function remove_event_attributes($html){
	global $redefs;
	$re = '(?&tag)' . $redefs;
	return preg_replace("~$re~xie", 'remove_event_attributes_from_tag("$0")', $html);
}

// removes onanything attributes from a single opening tag
function remove_event_attributes_from_tag($tag){
	global $redefs;
	$re = '( ^ <(?&tagname) ) | \G \s*+ (?> ((?&attrib)) | ((?&crap)) )' . $redefs;
	return preg_replace("~$re~xie", '"$1$3"? "$0": (preg_match("/^on/i", "$2")? " ": "$0")', $tag);
}


// test
$str = '
<button onclick="..javascript instruction..">
<button onclick="..javascript instruction.." value="..">
<button onclick=..javascript_instruction..>
<button onclick=..javascript_instruction.. value>
<hello word "" ontest = "hai"x="y"onfoo=bar/baz  />
';

echo $str . "\n----------------------\n";

echo remove_event_attributes($str);

?>