fork download
  1. import json
  2.  
  3. import lxml.html
  4.  
  5.  
  6. def first(iterable, default=None):
  7. try:
  8. return next(iter(iterable))
  9. except StopIteration:
  10. return None
  11.  
  12.  
  13. def get_tree(node):
  14. childs = []
  15. for child_node in node.iterchildren():
  16. child = get_child(child_node)
  17. childs.append(child)
  18. return childs
  19.  
  20.  
  21. def get_child(child_node):
  22. child = {}
  23. tag = child_node.tag
  24. if tag == 'ul':
  25. child['subtree'] = get_tree(child_node)
  26. elif tag == 'li':
  27. child['title'] = child_node.find('span').text_content()
  28. child['link'] = first(child_node.xpath('span/a/@href'))
  29. ul = child_node.find('ul')
  30. if ul is not None:
  31. child['subtree'] = get_tree(ul)
  32. else:
  33. raise Exception("Unexpected tag: {}".format(tag))
  34. return child
  35.  
  36.  
  37. def main():
  38. with open('./rutracker_cr_forum_map.html') as fin:
  39. html = lxml.html.fromstring(fin.read())
  40. fmap = html.get_element_by_id('f-map')
  41. tree = get_tree(fmap)
  42. with open('rutracker_cr_forum_map.json', 'w') as fout:
  43. json.dump(tree, fout, ensure_ascii=False, indent=4)
  44.  
  45.  
  46. if __name__ == "__main__":
  47. main()
  48.  
Runtime error #stdin #stdout #stderr 0.04s 98240KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 47, in <module>
    main()
  File "./prog.py", line 38, in main
    with open('./rutracker_cr_forum_map.html') as fin:
FileNotFoundError: [Errno 2] No such file or directory: './rutracker_cr_forum_map.html'