fork(3) download
  1. <?php
  2.  
  3. $html = <<< EOF
  4. <img border="0" src="imagenes/flech.gif" width="6" height="8">
  5.  
  6. <a href="escuchar-baladas-de-Albano_Y_Romina_Power.html">Albano Y Romina Power</a><br>
  7. <img border="0" src="imagenes/flech.gif" width="6" height="8">
  8.  
  9. <a href="escuchar-baladas-de-Armando_Manzanero.html">Armando Manzanero</a><br>
  10.  
  11. <a name="inicio21" href="musica-Merengue-de-Banda_Cuisillos.html">
  12. <img border="0" src="imagenes/flech.gif" width="6" height="8">Banda Cuisillos</a><br>
  13.  
  14. <a href="Musica-Baladas-Alternativas.html">Baladas Alternativas</a><br>
  15. EOF;
  16.  
  17.  
  18. $dom = new DOMDocument();
  19.  
  20. # Parse the HTML from Google.
  21. # The @ before the method call suppresses any warnings that
  22. # loadHTML might throw because of invalid HTML in the page.
  23. @$dom->loadHTML($html);
  24.  
  25. # Iterate over all the <a> tags
  26. foreach($dom->getElementsByTagName('a') as $link) {
  27.  
  28. $url = $link->getAttribute('href');
  29. $text = preg_replace('/[\r\n]/sm', '', $link->nodeValue); // remove line breaks
  30.  
  31. //if doesn't contain the banned words...
  32. if (!preg_match('/(Baladas Alternativas|another text to filter)/sm', $text)) {
  33. echo $url ." ".$text. "\n";
  34. }
  35.  
  36. }
Success #stdin #stdout 0.01s 20568KB
stdin
Standard input is empty
stdout
escuchar-baladas-de-Albano_Y_Romina_Power.html Albano Y Romina Power
escuchar-baladas-de-Armando_Manzanero.html Armando Manzanero
musica-Merengue-de-Banda_Cuisillos.html Banda Cuisillos