fork download
  1. import sys
  2. from xml.etree import cElementTree as etree
  3.  
  4. try:
  5. from HTMLParser import HTMLParser
  6. except ImportError:
  7. from html.parser import HTMLParser
  8.  
  9. class LinksParser(HTMLParser):
  10. def __init__(self):
  11. HTMLParser.__init__(self)
  12. self.tb = etree.TreeBuilder()
  13.  
  14. def handle_starttag(self, tag, attributes):
  15. self.tb.start(tag, dict(attributes))
  16.  
  17. def handle_endtag(self, tag):
  18. self.tb.end(tag)
  19.  
  20. def handle_data(self, data):
  21. self.tb.data(data)
  22.  
  23. def close(self):
  24. HTMLParser.close(self)
  25. return self.tb.close()
  26.  
  27. parser = LinksParser()
  28. parser.feed(sys.stdin.read())
  29. root = parser.close()
  30. span = root.find(".//span[@itemprop='description']")
  31. etree.ElementTree(span).write(sys.stdout)
  32.  
Success #stdin #stdout 0.13s 11520KB
stdin
<root>
<span itemprop="description">
<h1>My First Heading</h1>
<p>My first <br/><br/>paragraph.</p>
</span>
</root>
stdout
<span itemprop="description">
<h1>My First Heading</h1>
<p>My first <br /><br />paragraph.</p>
</span>