<?php
function killSpam($html, $whitelist){

preg_match_all('%(<(?:\s+)?a.*?href=["|\'](.*?)["|\'].*?>(.*?)<(?:\s+)?/(?:\s+)?a(?:\s+)?>)%sm', $html, $match, PREG_PATTERN_ORDER);
for ($i = 0; $i < count($match[1]); $i++) {
    if(!preg_match("/$whitelist/", $match[1][$i])){
        $spamsite = $match[3][$i];
        $html = preg_replace("%" . preg_quote($match[1][$i]) . "%",  " (SPAM) ", $html);
    }
}


preg_match_all('/(\b(?:(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)[A-Z0-9+&@#\/%?=~_|$!:,.;-]*[A-Z0-9+&@#\/%=~_|$-]|((?:mailto:)?[A-Z0-9._%+-]+@[A-Z0-9._%-]+\.[A-Z]{2,6})\b)|"(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)[^"\r\n]+"|\'(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)[^\'\r\n]+\')/i', $html, $match2, PREG_PATTERN_ORDER);

for ($i = 0; $i < count($match2[1]); $i++) {
     if(!preg_match("/$whitelist/", $match2[1][$i])){
        $spamsite = $match2[1][$i];
        $html = preg_replace("%" . preg_quote($spamsite) . "%",  " (SPAM) ", $html);
    }
}


return $html;

}


$html = <<< LOB
 <p>Hello world, thanks to <a href="http://m...content-available-to-author-only...e.com/about" rel="nofollow">http://mywebsite/about</a> I learned a lot. I found
  you on <a href="http://w...content-available-to-author-only...g.com" rel="nofollow">http://w...content-available-to-author-only...g.com</a>, <a href="https://google.com/search" rel="nofollow">https://google.com/search</a> and on some <a href="http://w...content-available-to-author-only...e.com" rel="nofollow">www.spamwebsite.com/refid=spammer2< /a >. www.spamme.com, http://m...content-available-to-author-only...m.com/?aff=122, http://c...content-available-to-author-only...r.com/?money=22 and spam@email.com, file://spamfile.com/file.txt ftp://s...content-available-to-author-only...p.com/file.exe </p>
LOB;


//USAGE

$whitelist = "(google\.com|yahoo\.com|bing\.com|nicesite\.com|mywebsite\.com)";

$noSpam = killSpam($html, $whitelist);

echo $noSpam;