fork download
  1. import scrapy
  2. import json
  3.  
  4. class RTSpider(scrapy.Spider):
  5. name = "RTSpider"
  6. start_urls = ['https://r...content-available-to-author-only...r.cr/forum/index.php?map=1']
  7.  
  8. def parse(self, response):
  9.  
  10. def getTree(node):
  11. nonlocal raw_map
  12. node_parent = node
  13. tree = {}
  14. ul_amt = range(len(raw_map.css(node+">*")))
  15. node += ">ul:nth-child({})>li:nth-child({})"
  16. for ul_position in ul_amt:
  17. li_amt = range(len(raw_map.css(
  18. node_parent+">ul:nth-child({})>*".format(ul_position+1)))
  19. )
  20. for li_position in li_amt:
  21. nodeaddr = node.format(ul_position+1, li_position+1)
  22. if raw_map.css(nodeaddr):
  23. cat_node_name = raw_map.css(nodeaddr + ">span>span::attr(title)").extract_first()
  24. forum_node_name = raw_map.css(nodeaddr + ">span a::text").extract_first()
  25. node_name = forum_node_name if forum_node_name else cat_node_name
  26. link = raw_map.css(nodeaddr + ">span a::attr(href)").extract_first()
  27. tree[node_name] = {"link": link, "subtree": getTree(nodeaddr)}
  28. return tree
  29.  
  30. raw_map = response.css("#f-map")
  31. map = getTree("#f-map")
  32. with open("crack.json", 'w', encoding='utf-8') as f:
  33. f.write(json.dumps(map, indent=4, ensure_ascii=False))
Runtime error #stdin #stdout #stderr 0.02s 28384KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 1, in <module>
    import scrapy
ImportError: No module named 'scrapy'