import scrapy
import json
class RTSpider(scrapy.Spider):
name = "RTSpider"
start_urls = ['https://r...content-available-to-author-only...r.cr/forum/index.php?map=1']
def parse(self, response):
def getTree(node):
nonlocal raw_map
node_parent = node
tree = {}
ul_amt = range(len(raw_map.css(node+">*")))
node += ">ul:nth-child({})>li:nth-child({})"
for ul_position in ul_amt:
li_amt = range(len(raw_map.css(
node_parent+">ul:nth-child({})>*".format(ul_position+1)))
)
for li_position in li_amt:
nodeaddr = node.format(ul_position+1, li_position+1)
if raw_map.css(nodeaddr):
cat_node_name = raw_map.css(nodeaddr + ">span>span::attr(title)").extract_first()
forum_node_name = raw_map.css(nodeaddr + ">span a::text").extract_first()
node_name = forum_node_name if forum_node_name else cat_node_name
link = raw_map.css(nodeaddr + ">span a::attr(href)").extract_first()
tree[node_name] = {"link": link, "subtree": getTree(nodeaddr)}
return tree
raw_map = response.css("#f-map")
map = getTree("#f-map")
with open("crack.json", 'w', encoding='utf-8') as f:
f.write(json.dumps(map, indent=4, ensure_ascii=False))
aW1wb3J0IHNjcmFweQppbXBvcnQganNvbgoKY2xhc3MgUlRTcGlkZXIoc2NyYXB5LlNwaWRlcik6CgluYW1lID0gIlJUU3BpZGVyIgoJc3RhcnRfdXJscyA9IFsnaHR0cHM6Ly9yLi4uY29udGVudC1hdmFpbGFibGUtdG8tYXV0aG9yLW9ubHkuLi5yLmNyL2ZvcnVtL2luZGV4LnBocD9tYXA9MSddCgoJZGVmIHBhcnNlKHNlbGYsIHJlc3BvbnNlKToKCgkJZGVmIGdldFRyZWUobm9kZSk6CgkJCW5vbmxvY2FsIHJhd19tYXAKCQkJbm9kZV9wYXJlbnQgPSBub2RlCgkJCXRyZWUgPSB7fQoJCQl1bF9hbXQgPSByYW5nZShsZW4ocmF3X21hcC5jc3Mobm9kZSsiPioiKSkpCgkJCW5vZGUgKz0gIj51bDpudGgtY2hpbGQoe30pPmxpOm50aC1jaGlsZCh7fSkiCgkJCWZvciB1bF9wb3NpdGlvbiBpbiB1bF9hbXQ6CgkJCQlsaV9hbXQgPSByYW5nZShsZW4ocmF3X21hcC5jc3MoCgkJCQkJbm9kZV9wYXJlbnQrIj51bDpudGgtY2hpbGQoe30pPioiLmZvcm1hdCh1bF9wb3NpdGlvbisxKSkpCgkJCQkpCgkJCQlmb3IgbGlfcG9zaXRpb24gaW4gbGlfYW10OgoJCQkJCW5vZGVhZGRyID0gbm9kZS5mb3JtYXQodWxfcG9zaXRpb24rMSwgbGlfcG9zaXRpb24rMSkKCQkJCQlpZiByYXdfbWFwLmNzcyhub2RlYWRkcik6IAoJCQkJCQljYXRfbm9kZV9uYW1lID0gcmF3X21hcC5jc3Mobm9kZWFkZHIgKyAiPnNwYW4+c3Bhbjo6YXR0cih0aXRsZSkiKS5leHRyYWN0X2ZpcnN0KCkKCQkJCQkJZm9ydW1fbm9kZV9uYW1lID0gcmF3X21hcC5jc3Mobm9kZWFkZHIgKyAiPnNwYW4gYTo6dGV4dCIpLmV4dHJhY3RfZmlyc3QoKQoJCQkJCQlub2RlX25hbWUgPSBmb3J1bV9ub2RlX25hbWUgaWYgZm9ydW1fbm9kZV9uYW1lIGVsc2UgY2F0X25vZGVfbmFtZQoJCQkJCQlsaW5rID0gcmF3X21hcC5jc3Mobm9kZWFkZHIgKyAiPnNwYW4gYTo6YXR0cihocmVmKSIpLmV4dHJhY3RfZmlyc3QoKQoJCQkJCQl0cmVlW25vZGVfbmFtZV0gPSB7ImxpbmsiOiBsaW5rLCAic3VidHJlZSI6IGdldFRyZWUobm9kZWFkZHIpfQoJCQlyZXR1cm4gdHJlZQoKCQlyYXdfbWFwID0gcmVzcG9uc2UuY3NzKCIjZi1tYXAiKQoJCW1hcCA9IGdldFRyZWUoIiNmLW1hcCIpCgkJd2l0aCBvcGVuKCJjcmFjay5qc29uIiwgJ3cnLCBlbmNvZGluZz0ndXRmLTgnKSBhcyBmOgoJCQlmLndyaXRlKGpzb24uZHVtcHMobWFwLCBpbmRlbnQ9NCwgZW5zdXJlX2FzY2lpPUZhbHNlKSk=