#!/usr/bin/perl
use LWP;
use HTTP:: Request :: Common ;
use HTML:: TreeBuilder ;
use strict;
@ARGV == 1 or die "Specify a single URL.\n " ;
# Basic objects.
# Return the tree of the URL.
sub tree_of {
my $url = shift @_;
my $tree = HTML:: TreeBuilder -> new;
$url = ~ s/^ file\: //;
if ( $url = ~ /^ ( [ a- z] + ) \:/ ) {
# Receive the remote data.
my $ua = LWP:: UserAgent -> new( agent => "parsepage/1.0 libwww-perl" ) ;
my $req = HTTP:: Request -> new( GET => $url) ;
sub rcvdata {
my( $data, $response, $protocol) = @_;
if ( $response-> content_type( ) eq 'text/html' ) {
$tree-> parse( $data) ;
} else {
$tree-> eof;
}
}
my $resp = $ua-> request( $req, \& rcvdata) ;
if ( $resp-> is_success) {
if ( $resp-> content_type( ) ne 'text/html' ) {
$resp-> code( 410 ) ;
$resp-> message( "Not HMTL" ) ;
return ( 0 , $resp) ;
}
$tree-> eof;
$tree-> elementify( ) ;
return ( 1 , $tree) ;
} else {
return ( 0 , $resp) ;
}
} elsif( - r $url) {
$tree-> parse_file( $url) ;
$tree-> elementify( ) ;
return ( 1 , $tree) ;
} else {
return ( 0 , HTTP:: Response -> new( 404 , "Cannot read $url" ) ) ;
}
}
# Print a string, with indent and wrap.
sub prstr {
my $ind = shift @_;
my $str = shift @_;
my $newind = $ind;
my $wid = 78 - $ind;
my $newwid = $wid - 5 ;
my $oldsp = ' ' x $ind;
my $newsp = ' ' x ( $ind + 5 ) ;
my $sp = $oldsp;
return if ( $str = ~ /^ \s* $/ m) ;
$str = ~ s/^ \s+ //;
while ( length( $str) > $wid) {
$str = ~ s/^ ( .{ 1 , $wid} ) \s+ // or
$str = ~ s/^ ( \S+ ) \s+ // or last;
print "$sp$1\n " ;
$sp = $newsp;
$wid = $newwid;
#print "FRED: $wid ", length($str), "\n";
}
$str = ~ s/ \n\s*/ \n$newsp/ g;
print "$sp$str\n " ;
}
# Print the node.
sub pnode {
my $ind = shift @_;
my $node = shift @_;
if ( ref $node) {
prstr( $ind, $node-> starttag( ) ) ;
} else {
prstr( $ind, HTML:: Entities :: encode ( $node) ) ;
}
}
# Print the tree.
sub scan_tree {
my $ind = shift @_;
my $node = shift @_;
pnode( $ind, $node) ;
if ( ref $node) {
foreach my $e( $node-> content_list( ) ) {
scan_tree( $ind + 2 , $e) ;
}
if ( ! $HTML:: Tagset :: emptyElement { $node-> tag( ) } ) {
print ( ' ' x $ind) ;
print '</' , $node-> tag( ) , ">\n " ;
}
}
}
my ( $succ, $tree) = tree_of( $ARGV[ 0 ] ) ;
if ( $succ) { scan_tree( 0 , $tree) ; }
else { die "GET $ARGV[0] failed: " , $tree-> code, ": " , $tree-> message, "\n " ; }
IyEvdXNyL2Jpbi9wZXJsCgp1c2UgTFdQOwp1c2UgSFRUUDo6UmVxdWVzdDo6Q29tbW9uOwp1c2UgSFRNTDo6VHJlZUJ1aWxkZXI7CnVzZSBzdHJpY3Q7CgpAQVJHViA9PSAxIG9yIGRpZSAiU3BlY2lmeSBhIHNpbmdsZSBVUkwuXG4iOwoKIyBCYXNpYyBvYmplY3RzLgoKIyBSZXR1cm4gdGhlIHRyZWUgb2YgdGhlIFVSTC4Kc3ViIHRyZWVfb2YgewogICAgbXkgJHVybCA9IHNoaWZ0IEBfOwogICAgbXkgJHRyZWUgPSBIVE1MOjpUcmVlQnVpbGRlci0+bmV3OwoKICAgICR1cmwgPX4gcy9eZmlsZVw6Ly87CiAgICBpZigkdXJsID1+IC9eKFthLXpdKylcOi8pIHsKICAgICAgICAjIFJlY2VpdmUgdGhlIHJlbW90ZSBkYXRhLgogICAgICAgIG15ICR1YSA9IExXUDo6VXNlckFnZW50LT5uZXcoYWdlbnQgPT4gInBhcnNlcGFnZS8xLjAgbGlid3d3LXBlcmwiKTsKICAgICAgICBteSAkcmVxID0gSFRUUDo6UmVxdWVzdC0+bmV3KEdFVCA9PiAkdXJsKTsKICAgICAgICBzdWIgcmN2ZGF0YSB7CiAgICAgICAgICAgIG15KCRkYXRhLCAkcmVzcG9uc2UsICRwcm90b2NvbCkgPSBAXzsKICAgICAgICAgICAgaWYoJHJlc3BvbnNlLT5jb250ZW50X3R5cGUoKSBlcSAndGV4dC9odG1sJykgewogICAgICAgICAgICAgICAgJHRyZWUtPnBhcnNlKCRkYXRhKTsKICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICR0cmVlLT5lb2Y7CiAgICAgICAgICAgIH0KICAgICAgICB9CiAgICAgICAgbXkgJHJlc3AgPSAkdWEtPnJlcXVlc3QoJHJlcSwgXCZyY3ZkYXRhKTsKCiAgICAgICAgaWYoJHJlc3AtPmlzX3N1Y2Nlc3MpIHsKICAgICAgICAgICAgaWYoJHJlc3AtPmNvbnRlbnRfdHlwZSgpIG5lICd0ZXh0L2h0bWwnKSB7CiAgICAgICAgICAgICAgICAkcmVzcC0+Y29kZSg0MTApOwogICAgICAgICAgICAgICAgJHJlc3AtPm1lc3NhZ2UoIk5vdCBITVRMIik7CiAgICAgICAgICAgICAgICByZXR1cm4gKDAsICRyZXNwKTsKICAgICAgICAgICAgfQogICAgICAgICAgICAkdHJlZS0+ZW9mOwogICAgICAgICAgICAkdHJlZS0+ZWxlbWVudGlmeSgpOwogICAgICAgICAgICByZXR1cm4gKDEsICR0cmVlKTsKICAgICAgICB9IGVsc2UgewogICAgICAgICAgICByZXR1cm4gKDAsICRyZXNwKTsKICAgICAgICB9CiAgICB9IGVsc2lmKC1yICR1cmwpIHsKICAgICAgICAkdHJlZS0+cGFyc2VfZmlsZSgkdXJsKTsKICAgICAgICAkdHJlZS0+ZWxlbWVudGlmeSgpOwogICAgICAgIHJldHVybiAoMSwgJHRyZWUpOwogICAgfSBlbHNlIHsKICAgICAgICByZXR1cm4gKDAsIEhUVFA6OlJlc3BvbnNlLT5uZXcoNDA0LCAiQ2Fubm90IHJlYWQgJHVybCIpKTsKICAgIH0KfQoKIyBQcmludCBhIHN0cmluZywgd2l0aCBpbmRlbnQgYW5kIHdyYXAuCnN1YiBwcnN0ciB7CiAgICBteSAkaW5kID0gc2hpZnQgQF87CiAgICBteSAkc3RyID0gc2hpZnQgQF87CiAgICBteSAkbmV3aW5kID0gJGluZDsKICAgIG15ICR3aWQgPSA3OCAtICRpbmQ7CiAgICBteSAkbmV3d2lkID0gJHdpZCAtIDU7CiAgICBteSAkb2xkc3AgPSAnICcgeCAkaW5kOwogICAgbXkgJG5ld3NwID0gJyAnIHggKCRpbmQgKyA1KTsKICAgIG15ICRzcCA9ICRvbGRzcDsKCiAgICByZXR1cm4gaWYoJHN0ciA9fiAvXlxzKiQvbSk7CgogICAgJHN0ciA9fiBzL15ccysvLzsKICAgIHdoaWxlKGxlbmd0aCgkc3RyKSA+ICR3aWQpIHsKICAgICAgICAkc3RyID1+IHMvXiguezEsJHdpZH0pXHMrLy8gb3IgCiAgICAgICAgICAgICRzdHIgPX4gcy9eKFxTKylccysvLyBvciBsYXN0OwogICAgICAgIHByaW50ICIkc3AkMVxuIjsKICAgICAgICAkc3AgPSAkbmV3c3A7CiAgICAgICAgJHdpZCA9ICRuZXd3aWQ7CiAgICAgICAgI3ByaW50ICJGUkVEOiAkd2lkICIsIGxlbmd0aCgkc3RyKSwgIlxuIjsKICAgIH0KICAgICRzdHIgPX4gcy9cblxzKi9cbiRuZXdzcC9nOwogICAgcHJpbnQgIiRzcCRzdHJcbiI7Cn0KCiMgUHJpbnQgdGhlIG5vZGUuCnN1YiBwbm9kZSB7CiAgICBteSAkaW5kID0gc2hpZnQgQF87CiAgICBteSAkbm9kZSA9IHNoaWZ0IEBfOwoKICAgIGlmKHJlZiAkbm9kZSkgewogICAgICAgIHByc3RyKCRpbmQsICRub2RlLT5zdGFydHRhZygpKTsKICAgIH0gZWxzZSB7CiAgICAgICAgcHJzdHIoJGluZCwgSFRNTDo6RW50aXRpZXM6OmVuY29kZSgkbm9kZSkpOwogICAgfQp9CgojIFByaW50IHRoZSB0cmVlLgpzdWIgc2Nhbl90cmVlIHsKICAgIG15ICRpbmQgPSBzaGlmdCBAXzsKICAgIG15ICRub2RlID0gc2hpZnQgQF87CgogICAgcG5vZGUoJGluZCwgJG5vZGUpOwogICAgaWYocmVmICRub2RlKSB7CiAgICAgICAgZm9yZWFjaCBteSAkZSgkbm9kZS0+Y29udGVudF9saXN0KCkpIHsKICAgICAgICAgICAgc2Nhbl90cmVlKCRpbmQgKyAyLCAkZSk7CiAgICAgICAgfQogICAgICAgIGlmKCEkSFRNTDo6VGFnc2V0OjplbXB0eUVsZW1lbnR7JG5vZGUtPnRhZygpfSkgewogICAgICAgICAgICBwcmludCAoJyAnIHggJGluZCk7CiAgICAgICAgICAgIHByaW50ICc8LycsICRub2RlLT50YWcoKSwgIj5cbiI7CiAgICAgICAgfQogICAgfQp9CgpteSAoJHN1Y2MsICR0cmVlKSA9IHRyZWVfb2YoJEFSR1ZbMF0pOwppZigkc3VjYykgeyBzY2FuX3RyZWUoMCwgJHRyZWUpOyB9CmVsc2UgeyBkaWUgIkdFVCAkQVJHVlswXSBmYWlsZWQ6ICIsICR0cmVlLT5jb2RlLCAiOiAiLCAkdHJlZS0+bWVzc2FnZSwgIlxuIjt9