fork download
  1. import json
  2.  
  3. import attr
  4. import lxml.html
  5.  
  6.  
  7. @attr.s
  8. class Child:
  9. title = attr.ib(default=None)
  10. link = attr.ib(default=None)
  11. subtree = attr.ib(default=[])
  12.  
  13.  
  14. def first(iterable, default=None):
  15. try:
  16. return next(iter(iterable))
  17. except StopIteration:
  18. return None
  19.  
  20.  
  21. def get_tree(node):
  22. childs = []
  23. for child_node in node.iterchildren():
  24. child = get_child(child_node)
  25. childs.append(child)
  26. return childs
  27.  
  28.  
  29. def get_child(child_node):
  30. child = Child()
  31. tag = child_node.tag
  32. if tag == 'ul':
  33. child.subtree = get_tree(child_node)
  34. elif tag == 'li':
  35. child.title = child_node.find('span').text_content()
  36. child.link = first(child_node.xpath('span/a/@href'))
  37. ul = child_node.find('ul')
  38. if ul is not None:
  39. child.subtree = get_tree(ul)
  40. else:
  41. raise Exception("Unexpected tag: {}".format(tag))
  42. return attr.asdict(child)
  43.  
  44.  
  45. def main():
  46. with open('./rutracker_cr_forum_map.html') as fin:
  47. html = lxml.html.fromstring(fin.read())
  48. fmap = html.get_element_by_id('f-map')
  49. tree = get_tree(fmap)
  50. with open('rutracker_cr_forum_map.json', 'w') as fout:
  51. json.dump(tree, fout, ensure_ascii=False, indent=4)
  52.  
  53.  
  54. if __name__ == "__main__":
  55. main()
  56.  
Runtime error #stdin #stdout #stderr 0.02s 30744KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 3, in <module>
    import attr
ImportError: No module named 'attr'