fork download
  1. #!/usr/bin/perl
  2.  
  3. use LWP;
  4. use HTTP::Request::Common;
  5. use HTML::TreeBuilder;
  6. use strict;
  7.  
  8. @ARGV == 1 or die "Specify a single URL.\n";
  9.  
  10. # Basic objects.
  11.  
  12. # Return the tree of the URL.
  13. sub tree_of {
  14. my $url = shift @_;
  15. my $tree = HTML::TreeBuilder->new;
  16.  
  17. $url =~ s/^file\://;
  18. if($url =~ /^([a-z]+)\:/) {
  19. # Receive the remote data.
  20. my $ua = LWP::UserAgent->new(agent => "parsepage/1.0 libwww-perl");
  21. my $req = HTTP::Request->new(GET => $url);
  22. sub rcvdata {
  23. my($data, $response, $protocol) = @_;
  24. if($response->content_type() eq 'text/html') {
  25. $tree->parse($data);
  26. } else {
  27. $tree->eof;
  28. }
  29. }
  30. my $resp = $ua->request($req, \&rcvdata);
  31.  
  32. if($resp->is_success) {
  33. if($resp->content_type() ne 'text/html') {
  34. $resp->code(410);
  35. $resp->message("Not HMTL");
  36. return (0, $resp);
  37. }
  38. $tree->eof;
  39. $tree->elementify();
  40. return (1, $tree);
  41. } else {
  42. return (0, $resp);
  43. }
  44. } elsif(-r $url) {
  45. $tree->parse_file($url);
  46. $tree->elementify();
  47. return (1, $tree);
  48. } else {
  49. return (0, HTTP::Response->new(404, "Cannot read $url"));
  50. }
  51. }
  52.  
  53. # Print a string, with indent and wrap.
  54. sub prstr {
  55. my $ind = shift @_;
  56. my $str = shift @_;
  57. my $newind = $ind;
  58. my $wid = 78 - $ind;
  59. my $newwid = $wid - 5;
  60. my $oldsp = ' ' x $ind;
  61. my $newsp = ' ' x ($ind + 5);
  62. my $sp = $oldsp;
  63.  
  64. return if($str =~ /^\s*$/m);
  65.  
  66. $str =~ s/^\s+//;
  67. while(length($str) > $wid) {
  68. $str =~ s/^(.{1,$wid})\s+// or
  69. $str =~ s/^(\S+)\s+// or last;
  70. print "$sp$1\n";
  71. $sp = $newsp;
  72. $wid = $newwid;
  73. #print "FRED: $wid ", length($str), "\n";
  74. }
  75. $str =~ s/\n\s*/\n$newsp/g;
  76. print "$sp$str\n";
  77. }
  78.  
  79. # Print the node.
  80. sub pnode {
  81. my $ind = shift @_;
  82. my $node = shift @_;
  83.  
  84. if(ref $node) {
  85. prstr($ind, $node->starttag());
  86. } else {
  87. prstr($ind, HTML::Entities::encode($node));
  88. }
  89. }
  90.  
  91. # Print the tree.
  92. sub scan_tree {
  93. my $ind = shift @_;
  94. my $node = shift @_;
  95.  
  96. pnode($ind, $node);
  97. if(ref $node) {
  98. foreach my $e($node->content_list()) {
  99. scan_tree($ind + 2, $e);
  100. }
  101. if(!$HTML::Tagset::emptyElement{$node->tag()}) {
  102. print (' ' x $ind);
  103. print '</', $node->tag(), ">\n";
  104. }
  105. }
  106. }
  107.  
  108. my ($succ, $tree) = tree_of($ARGV[0]);
  109. if($succ) { scan_tree(0, $tree); }
  110. else { die "GET $ARGV[0] failed: ", $tree->code, ": ", $tree->message, "\n";}
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
===SORRY!===
Unable to find module 'LWP' in the @*INC directories.
(@*INC contains:
  /home/nIdP0b/.perl6/lib
  /usr/lib/parrot/2.7.0/languages/perl6/lib
  .)
stdout
Standard output is empty