fork download
  1. <?php
  2.  
  3. function get_url_metadata($url) {
  4. $metadata = array();
  5.  
  6. // 1. Fetch the HTML content of the URL
  7. $ch = curl_init();
  8. curl_setopt($ch, CURLOPT_URL, $url);
  9. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  10. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // Follow redirects
  11. curl_setopt($ch, CURLOPT_USERAGENT, 'Your_Bot_Name'); // Set a user agent
  12. $html = curl_exec($ch);
  13. curl_close($ch);
  14.  
  15. if ($html === false) {
  16. return $metadata; // Handle errors gracefully
  17. }
  18.  
  19. // 2. Use a DOM parser to extract data
  20. $doc = new DOMDocument();
  21. @$doc->loadHTML($html); // Suppress warnings on invalid HTML
  22.  
  23. // 2.1 Extract title
  24. $title_tags = $doc->getElementsByTagName('title');
  25. if ($title_tags->length > 0) {
  26. $metadata['title'] = $title_tags->item(0)->nodeValue;
  27. }
  28.  
  29. // 2.2 Extract meta tags
  30. $meta_tags = $doc->getElementsByTagName('meta');
  31. foreach ($meta_tags as $meta) {
  32. $property = $meta->getAttribute('property');
  33. $name = $meta->getAttribute('name');
  34. $content = $meta->getAttribute('content');
  35.  
  36. if (!empty($property)) {
  37. $metadata[$property] = $content;
  38. } elseif (!empty($name)) {
  39. $metadata[$name] = $content;
  40. }
  41. }
  42.  
  43. // 2.3 Extract description (common case)
  44. if (isset($metadata['og:description']) || isset($metadata['description'])) {
  45. $metadata['description'] = isset($metadata['og:description']) ? $metadata['og:description'] : $metadata['description'];
  46. }
  47.  
  48. // 2.4 Extract image (common case)
  49. if (isset($metadata['og:image'])) {
  50. $metadata['image'] = $metadata['og:image'];
  51. }
  52.  
  53. return $metadata;
  54. }
  55.  
  56. // Example usage:
  57. $url = 'https://www.google.com';
  58. $metadata = get_url_metadata($url);
  59.  
  60. print_r($metadata);
  61.  
  62. ?>
  63.  
Success #stdin #stdout 0.04s 25892KB
stdin
Standard input is empty
stdout
Array
(
)