fork download
  1. from bs4 import BeautifulSoup as soup
  2. from urllib.request import urlopen as uReq
  3. import urllib.request
  4. import re
  5.  
  6. lists = open("AmiAmi_mini.csv").read().split("\n")[:-1]
  7.  
  8. filename = "Ami_full.csv"
  9. f = open(filename, "w", encoding="utf-8")
  10. headers = "fig_name_eng, fig_name_jap, Scale, Size, Type, Materials, Sell_price, Price, Sale, Status, Code, date, brand, line_prod, series, char_name, sculpt\n"
  11. f.write(headers)
  12. f.close
  13.  
  14. for url in lists:
  15.  
  16. uClient = uReq(url)
  17. page_html = uClient.read()
  18. uClient.close()
  19. page_soup = soup(page_html, "html.parser")
  20.  
  21. page_soup = soup(page_html, "html.parser")
  22. page_soup.findAll("div", style="width:600px;")
  23.  
  24. f = open(filename, "a", encoding="utf-8")
  25.  
  26. info4 = page_soup.findAll("h2", {"class": "heading_10"})
  27. container4 = info4[0]
  28.  
  29. for container4 in info4:
  30. jap = container4.find('span', class_='').text
  31. if jap:
  32. Jap = jap.strip()
  33. else:
  34. Jap = "NA"
  35. r = str(container4)
  36. container5 = soup(r[:r.rindex('<br/>')], 'html.parser')
  37. if container5:
  38. eng = container5.find('h2').text.strip()
  39. else:
  40. eng = "NA"
  41.  
  42. f.write(eng.replace(",", "|") + "," + jap.replace(",", "|") + ",")
  43.  
  44. info1 = page_soup.findAll("p", {"class": "box_01"})
  45. container1 = info1[0]
  46.  
  47. for container1 in info1:
  48. Scale1 = container1.findAll(text=re.compile('Scale'))
  49. if Scale1:
  50. Scale = Scale1[0].strip(' \t\n\r')
  51. else:
  52. Scale = "NA"
  53.  
  54. Size1 = container1.findAll(text=re.compile('Size'))
  55. if Size1:
  56. Size = Size1[0].strip(' \t\n\r')
  57. else:
  58. Size = "NA"
  59.  
  60. Type1 = info1[0].next_element.strip()
  61. if Type1:
  62. Type = Type1
  63. else:
  64. Type = "NA"
  65.  
  66. Material1 = container1.findAll(text=re.compile('Material'))
  67. if Material1:
  68. Material = Material1[0].strip(' \t\n\r')
  69. else:
  70. Material = "NA"
  71.  
  72. print("Scale: "+Scale)
  73. print("Size: "+Size)
  74. print("Type: "+Type)
  75. print("Materials: "+Material)
  76. f.write(Scale.replace(",", ".") + "," + Size.replace(",", ".") + "," + Type + "," + Material + ",")
  77.  
  78. info2 = page_soup.findAll("ul")
  79. container2 = info2[4]
  80.  
  81. for container2 in info2:
  82. Sell_price1 = container2.findAll("li", {"class": "selling_price"})
  83. if Sell_price1:
  84. Sell_price = Sell_price1[0].text.strip()
  85. else:
  86. Sell_price = "NA"
  87.  
  88. Price1 = container2.findAll("li", {"class": "price"})
  89. if Price1:
  90. Price = Price1[0].findAll(text=re.compile('JPY'))[0].strip()
  91. else:
  92. Price = "NA"
  93.  
  94. sale1 = container2.li("span", {"class": "off_price"})
  95. if sale1:
  96. Sale = sale1[0].text
  97. else:
  98. Sale = "NA"
  99.  
  100. Status = container2.findAll("li", {"class": "selling_price"})
  101. if Status:
  102. Status = Status[1].text.strip()
  103. else:
  104. Status = "NA"
  105.  
  106. print("Sell price: "+Sell_price)
  107. print("Price: "+ Price)
  108. print("Sale: "+Sale)
  109. print("Status: "+Status)
  110. f.write(Sell_price.replace(",", ".") + "," + Price.replace(",", ".") + "," + Sale.replace(",", ".") + "," + Status.replace(",", ".") + ",")
  111.  
  112. info3 = page_soup.findAll("dl", {"class": "spec_data"})
  113. container3 = info3[0]
  114.  
  115. fields = ["JAN Code", "Release Date", "Brand", "Product Line",
  116. "Series Title", "Character Name", "Sculptor"]
  117.  
  118. for container3 in info3:
  119. data = []
  120. for field in fileds:
  121. tag = container3.find('dt', text=field)
  122. if tag:
  123. field_text = tag.find_next_sibling("dd").text.strip().replace(",",".")
  124. else:
  125. field_text = "NA"
  126. data.append(field_text)
  127.  
  128. f.write(",".join(data))
  129.  
  130. f.close()
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/usr/lib/python2.7/py_compile.py", line 117, in compile
    raise py_exc
py_compile.PyCompileError: Sorry: IndentationError: unexpected indent (prog.py, line 115)
stdout
Standard output is empty