<?php
$input = <<<EOD
<p>sapien Cras cursus consequat nibh <a href="#">sapien</a>ac vehicula. Sed erat sapien, condimentum quis risus nec, viverra dignissim nisi. Cras sapien convallis, erat egestas tincidunt <img src="myimage.jpg" alt="sapien" >rutrum, massa enim sagittis ante, sapien.sed pellentesque lorem risus vitae enim. Curabitur hendrerit dolor facilisis <a href="sapien">sapien</a> dolor malesuada molestie.sapien</p>
EOD;
// define tags which you need to exclude from replacement
// as: array( start_string => end_string, ... );
'<a' => '</a>',
'<img' => '>',
'<div' => '</div>'
);
echo "Input:\n {$input} \n\n ";
$output = parse_text($input, $ignore_tags);
echo "Output:\n {$output}";
// callback function that invokes every time when 'parse_text' parses 'clean' peace of text
function my_replace($text) {
echo "my_replace call on: \n".$text."\n\n";
// your replacements here
$text = preg_replace("#\b(sapien)\b#i", "<a href=#>\\1</a>", $text); return $text;
}
// main parsing function that split text to clean and ignored parts
function parse_text($input, $ignore_tags) {
$output = '';
$str = '';
$ignore = false;
$current_ignore_tag = '';
for ($i = 0; $i < strlen($input); $i++) { $str .= $input[$i];
// if starts ignore tag and not already $ignore
if (preg_match("#({$ignore_tags_regexp})$#si", $str, $m) && !$ignore) { $str = preg_replace("#({$ignore_tags_regexp})$#si", "", $str); // cut and not include ignore tag $output .= my_replace($str) . $m[1]; // replace all before and save
$ignore = true;
$current_ignore_tag = $m[1];
$str = '';
} // if $ignore and matches the end of the current ignore tag
elseif ($ignore && preg_match("#({$ignore_tags[$current_ignore_tag]})$#i", $str, $m)) { $output .= $str; // just save as it is current peace
$ignore = false;
$str = '';
}
}
$output .= (!$ignore) ? my_replace($str) : $str;
return $output;
}
PD9waHAKJGlucHV0ID0gPDw8RU9ECjxwPnNhcGllbiBDcmFzIGN1cnN1cyBjb25zZXF1YXQgbmliaCA8YSBocmVmPSIjIj5zYXBpZW48L2E+YWMgdmVoaWN1bGEuIFNlZCBlcmF0IHNhcGllbiwgY29uZGltZW50dW0gcXVpcyByaXN1cyBuZWMsIHZpdmVycmEgZGlnbmlzc2ltIG5pc2kuIENyYXMgc2FwaWVuIGNvbnZhbGxpcywgZXJhdCBlZ2VzdGFzIHRpbmNpZHVudCA8aW1nIHNyYz0ibXlpbWFnZS5qcGciIGFsdD0ic2FwaWVuIiA+cnV0cnVtLCBtYXNzYSBlbmltIHNhZ2l0dGlzIGFudGUsIHNhcGllbi5zZWQgcGVsbGVudGVzcXVlIGxvcmVtIHJpc3VzIHZpdGFlIGVuaW0uIEN1cmFiaXR1ciBoZW5kcmVyaXQgZG9sb3IgZmFjaWxpc2lzIDxhIGhyZWY9InNhcGllbiI+c2FwaWVuPC9hPiBkb2xvciBtYWxlc3VhZGEgbW9sZXN0aWUuc2FwaWVuPC9wPgpFT0Q7CgovLyBkZWZpbmUgdGFncyB3aGljaCB5b3UgbmVlZCB0byBleGNsdWRlIGZyb20gcmVwbGFjZW1lbnQgCi8vIGFzOiBhcnJheSggc3RhcnRfc3RyaW5nID0+IGVuZF9zdHJpbmcsIC4uLiApOwokaWdub3JlX3RhZ3MgPSBhcnJheSgKICAgICc8YScgPT4gJzwvYT4nLAogICAgJzxpbWcnID0+ICc+JywKICAgICc8ZGl2JyA9PiAnPC9kaXY+JwopOwoKZWNobyAiSW5wdXQ6XG4geyRpbnB1dH0gXG5cbiAiOwokb3V0cHV0ID0gcGFyc2VfdGV4dCgkaW5wdXQsICRpZ25vcmVfdGFncyk7CmVjaG8gIk91dHB1dDpcbiB7JG91dHB1dH0iOwoKLy8gY2FsbGJhY2sgZnVuY3Rpb24gdGhhdCBpbnZva2VzIGV2ZXJ5IHRpbWUgd2hlbiAncGFyc2VfdGV4dCcgcGFyc2VzICdjbGVhbicgcGVhY2Ugb2YgdGV4dApmdW5jdGlvbiBteV9yZXBsYWNlKCR0ZXh0KSB7CgllY2hvICJteV9yZXBsYWNlIGNhbGwgb246IFxuIi4kdGV4dC4iXG5cbiI7CgkKICAgIC8vIHlvdXIgcmVwbGFjZW1lbnRzIGhlcmUKICAgICR0ZXh0ID0gcHJlZ19yZXBsYWNlKCIjXGIoc2FwaWVuKVxiI2kiLCAiPGEgaHJlZj0jPlxcMTwvYT4iLCAkdGV4dCk7CiAgICByZXR1cm4gJHRleHQ7Cn0KCgovLyBtYWluIHBhcnNpbmcgZnVuY3Rpb24gdGhhdCBzcGxpdCB0ZXh0IHRvIGNsZWFuIGFuZCBpZ25vcmVkIHBhcnRzCmZ1bmN0aW9uIHBhcnNlX3RleHQoJGlucHV0LCAkaWdub3JlX3RhZ3MpIHsKICAgICRvdXRwdXQgPSAnJzsKICAgICRzdHIgPSAnJzsKICAgICRpZ25vcmUgPSBmYWxzZTsKICAgICRjdXJyZW50X2lnbm9yZV90YWcgPSAnJzsKICAgICRpZ25vcmVfdGFnc19yZWdleHAgPSBpbXBsb2RlKCJ8IiwgYXJyYXlfa2V5cygkaWdub3JlX3RhZ3MpKTsKCiAgICBmb3IgKCRpID0gMDsgJGkgPCBzdHJsZW4oJGlucHV0KTsgJGkrKykgewogICAgICAgICRzdHIgLj0gJGlucHV0WyRpXTsKICAgICAgICAvLyBpZiBzdGFydHMgaWdub3JlIHRhZyBhbmQgbm90IGFscmVhZHkgJGlnbm9yZQogICAgICAgIGlmIChwcmVnX21hdGNoKCIjKHskaWdub3JlX3RhZ3NfcmVnZXhwfSkkI3NpIiwgJHN0ciwgJG0pICYmICEkaWdub3JlKSB7CiAgICAgICAgICAgICRzdHIgPSBwcmVnX3JlcGxhY2UoIiMoeyRpZ25vcmVfdGFnc19yZWdleHB9KSQjc2kiLCAiIiwgJHN0cik7IC8vIGN1dCBhbmQgbm90IGluY2x1ZGUgaWdub3JlIHRhZwogICAgICAgICAgICAkb3V0cHV0IC49IG15X3JlcGxhY2UoJHN0cikgLiAkbVsxXTsgLy8gcmVwbGFjZSBhbGwgYmVmb3JlIGFuZCBzYXZlCiAgICAgICAgICAgICRpZ25vcmUgPSB0cnVlOwogICAgICAgICAgICAkY3VycmVudF9pZ25vcmVfdGFnID0gJG1bMV07CiAgICAgICAgICAgICRzdHIgPSAnJzsKICAgICAgICB9IC8vIGlmICRpZ25vcmUgYW5kIG1hdGNoZXMgdGhlIGVuZCBvZiB0aGUgY3VycmVudCBpZ25vcmUgdGFnCiAgICAgICAgZWxzZWlmICgkaWdub3JlICYmIHByZWdfbWF0Y2goIiMoeyRpZ25vcmVfdGFnc1skY3VycmVudF9pZ25vcmVfdGFnXX0pJCNpIiwgJHN0ciwgJG0pKSB7CiAgICAgICAgICAgICRvdXRwdXQgLj0gJHN0cjsgLy8ganVzdCBzYXZlIGFzIGl0IGlzIGN1cnJlbnQgcGVhY2UKICAgICAgICAgICAgJGlnbm9yZSA9IGZhbHNlOwogICAgICAgICAgICAkc3RyID0gJyc7CiAgICAgICAgfQogICAgfQogICAgJG91dHB1dCAuPSAoISRpZ25vcmUpID8gbXlfcmVwbGFjZSgkc3RyKSA6ICRzdHI7CiAgICByZXR1cm4gJG91dHB1dDsKfQoK
Input:
<p>sapien Cras cursus consequat nibh <a href="#">sapien</a>ac vehicula. Sed erat sapien, condimentum quis risus nec, viverra dignissim nisi. Cras sapien convallis, erat egestas tincidunt <img src="myimage.jpg" alt="sapien" >rutrum, massa enim sagittis ante, sapien.sed pellentesque lorem risus vitae enim. Curabitur hendrerit dolor facilisis <a href="sapien">sapien</a> dolor malesuada molestie.sapien</p>
my_replace call on:
<p>sapien Cras cursus consequat nibh
my_replace call on:
ac vehicula. Sed erat sapien, condimentum quis risus nec, viverra dignissim nisi. Cras sapien convallis, erat egestas tincidunt
my_replace call on:
rutrum, massa enim sagittis ante, sapien.sed pellentesque lorem risus vitae enim. Curabitur hendrerit dolor facilisis
my_replace call on:
dolor malesuada molestie.sapien</p>
Output:
<p><a href=#>sapien</a> Cras cursus consequat nibh <a href="#">sapien</a>ac vehicula. Sed erat <a href=#>sapien</a>, condimentum quis risus nec, viverra dignissim nisi. Cras <a href=#>sapien</a> convallis, erat egestas tincidunt <img src="myimage.jpg" alt="sapien" >rutrum, massa enim sagittis ante, <a href=#>sapien</a>.sed pellentesque lorem risus vitae enim. Curabitur hendrerit dolor facilisis <a href="sapien">sapien</a> dolor malesuada molestie.<a href=#>sapien</a></p>