<?php
function get_url_metadata($url) {
// 1. Fetch the HTML content of the URL
curl_setopt($ch, CURLOPT_FOLLOWLOCATION
, true); // Follow redirects curl_setopt($ch, CURLOPT_USERAGENT
, 'Your_Bot_Name'); // Set a user agent
if ($html === false) {
return $metadata; // Handle errors gracefully
}
// 2. Use a DOM parser to extract data
$doc = new DOMDocument();
@$doc->loadHTML($html); // Suppress warnings on invalid HTML
// 2.1 Extract title
$title_tags = $doc->getElementsByTagName('title');
if ($title_tags->length > 0) {
$metadata['title'] = $title_tags->item(0)->nodeValue;
}
// 2.2 Extract meta tags
$meta_tags = $doc->getElementsByTagName('meta');
foreach ($meta_tags as $meta) {
$property = $meta->getAttribute('property');
$name = $meta->getAttribute('name');
$content = $meta->getAttribute('content');
$metadata[$property] = $content;
} elseif (!empty($name)) { $metadata[$name] = $content;
}
}
// 2.3 Extract description (common case)
if (isset($metadata['og:description']) || isset($metadata['description'])) { $metadata['description'] = isset($metadata['og:description']) ?
$metadata['og:description'] : $metadata['description']; }
// 2.4 Extract image (common case)
if (isset($metadata['og:image'])) { $metadata['image'] = $metadata['og:image'];
}
return $metadata;
}
// Example usage:
$url = 'https://www.google.com';
$metadata = get_url_metadata($url);
?>
PD9waHAKCmZ1bmN0aW9uIGdldF91cmxfbWV0YWRhdGEoJHVybCkgewogICRtZXRhZGF0YSA9IGFycmF5KCk7CgogIC8vIDEuIEZldGNoIHRoZSBIVE1MIGNvbnRlbnQgb2YgdGhlIFVSTAogICRjaCA9IGN1cmxfaW5pdCgpOwogIGN1cmxfc2V0b3B0KCRjaCwgQ1VSTE9QVF9VUkwsICR1cmwpOwogIGN1cmxfc2V0b3B0KCRjaCwgQ1VSTE9QVF9SRVRVUk5UUkFOU0ZFUiwgdHJ1ZSk7CiAgY3VybF9zZXRvcHQoJGNoLCBDVVJMT1BUX0ZPTExPV0xPQ0FUSU9OLCB0cnVlKTsgLy8gRm9sbG93IHJlZGlyZWN0cwogIGN1cmxfc2V0b3B0KCRjaCwgQ1VSTE9QVF9VU0VSQUdFTlQsICdZb3VyX0JvdF9OYW1lJyk7IC8vIFNldCBhIHVzZXIgYWdlbnQKICAkaHRtbCA9IGN1cmxfZXhlYygkY2gpOwogIGN1cmxfY2xvc2UoJGNoKTsKCiAgaWYgKCRodG1sID09PSBmYWxzZSkgewogICAgcmV0dXJuICRtZXRhZGF0YTsgLy8gSGFuZGxlIGVycm9ycyBncmFjZWZ1bGx5CiAgfQoKICAvLyAyLiBVc2UgYSBET00gcGFyc2VyIHRvIGV4dHJhY3QgZGF0YQogICRkb2MgPSBuZXcgRE9NRG9jdW1lbnQoKTsKICBAJGRvYy0+bG9hZEhUTUwoJGh0bWwpOyAvLyBTdXBwcmVzcyB3YXJuaW5ncyBvbiBpbnZhbGlkIEhUTUwKCiAgLy8gMi4xIEV4dHJhY3QgdGl0bGUKICAkdGl0bGVfdGFncyA9ICRkb2MtPmdldEVsZW1lbnRzQnlUYWdOYW1lKCd0aXRsZScpOwogIGlmICgkdGl0bGVfdGFncy0+bGVuZ3RoID4gMCkgewogICAgJG1ldGFkYXRhWyd0aXRsZSddID0gJHRpdGxlX3RhZ3MtPml0ZW0oMCktPm5vZGVWYWx1ZTsKICB9CgogIC8vIDIuMiBFeHRyYWN0IG1ldGEgdGFncwogICRtZXRhX3RhZ3MgPSAkZG9jLT5nZXRFbGVtZW50c0J5VGFnTmFtZSgnbWV0YScpOwogIGZvcmVhY2ggKCRtZXRhX3RhZ3MgYXMgJG1ldGEpIHsKICAgICRwcm9wZXJ0eSA9ICRtZXRhLT5nZXRBdHRyaWJ1dGUoJ3Byb3BlcnR5Jyk7CiAgICAkbmFtZSA9ICRtZXRhLT5nZXRBdHRyaWJ1dGUoJ25hbWUnKTsKICAgICRjb250ZW50ID0gJG1ldGEtPmdldEF0dHJpYnV0ZSgnY29udGVudCcpOwoKICAgIGlmICghZW1wdHkoJHByb3BlcnR5KSkgewogICAgICAkbWV0YWRhdGFbJHByb3BlcnR5XSA9ICRjb250ZW50OwogICAgfSBlbHNlaWYgKCFlbXB0eSgkbmFtZSkpIHsKICAgICAgJG1ldGFkYXRhWyRuYW1lXSA9ICRjb250ZW50OwogICAgfQogIH0KCiAgLy8gMi4zIEV4dHJhY3QgZGVzY3JpcHRpb24gKGNvbW1vbiBjYXNlKQogIGlmIChpc3NldCgkbWV0YWRhdGFbJ29nOmRlc2NyaXB0aW9uJ10pIHx8IGlzc2V0KCRtZXRhZGF0YVsnZGVzY3JpcHRpb24nXSkpIHsKICAgICRtZXRhZGF0YVsnZGVzY3JpcHRpb24nXSA9IGlzc2V0KCRtZXRhZGF0YVsnb2c6ZGVzY3JpcHRpb24nXSkgPyAkbWV0YWRhdGFbJ29nOmRlc2NyaXB0aW9uJ10gOiAkbWV0YWRhdGFbJ2Rlc2NyaXB0aW9uJ107CiAgfQoKICAvLyAyLjQgRXh0cmFjdCBpbWFnZSAoY29tbW9uIGNhc2UpCiAgaWYgKGlzc2V0KCRtZXRhZGF0YVsnb2c6aW1hZ2UnXSkpIHsKICAgICRtZXRhZGF0YVsnaW1hZ2UnXSA9ICRtZXRhZGF0YVsnb2c6aW1hZ2UnXTsKICB9CgogIHJldHVybiAkbWV0YWRhdGE7Cn0KCi8vIEV4YW1wbGUgdXNhZ2U6CiR1cmwgPSAnaHR0cHM6Ly93d3cuZ29vZ2xlLmNvbSc7CiRtZXRhZGF0YSA9IGdldF91cmxfbWV0YWRhdGEoJHVybCk7CgpwcmludF9yKCRtZXRhZGF0YSk7Cgo/Pgo=