fork(1) download
  1. from bs4 import BeautifulSoup as soup
  2. from urllib.request import urlopen as uReq
  3. import urllib.request
  4. import re
  5.  
  6. lists = open("AmiAmi_mini.csv").read().split("\n")[:-1]
  7.  
  8. filename = "Ami_full.csv"
  9. f = open(filename, "w", encoding="utf-8")
  10. headers = "fig_name_eng, fig_name_jap, Scale, Size, Type, Materials, Sell_price, Price, Sale, Status, Code, date, brand, line_prod, series, char_name, sculpt\n"
  11. f.write(headers)
  12. f.close
  13.  
  14. for url in lists:
  15.  
  16. uClient = uReq(url)
  17. page_html = uClient.read()
  18. uClient.close()
  19. page_soup = soup(page_html, "html.parser")
  20.  
  21. page_soup = soup(page_html, "html.parser")
  22. page_soup.findAll("div", style="width:600px;")
  23.  
  24. f = open(filename, "a", encoding="utf-8")
  25.  
  26. info4 = page_soup.findAll("h2", {"class": "heading_10"})
  27. container4 = info4[0]
  28.  
  29. for container4 in info4:
  30. jap = container4.find('span', class_='').text
  31. if jap:
  32. Jap = jap.strip()
  33. else:
  34. Jap = "NA"
  35. r = str(container4)
  36. container5 = soup(r[:r.rindex('<br/>')], 'html.parser')
  37. if container5:
  38. eng = container5.find('h2').text.strip()
  39. else:
  40. eng = "NA"
  41.  
  42. f.write(eng.replace(",", "|") + "," + jap.replace(",", "|") + ",")
  43.  
  44. info1 = page_soup.findAll("p", {"class": "box_01"})
  45. container1 = info1[0]
  46.  
  47. for container1 in info1:
  48. Scale1 = container1.findAll(text=re.compile('Scale'))
  49. if Scale1:
  50. Scale = Scale1[0].strip(' \t\n\r')
  51. else:
  52. Scale = "NA"
  53.  
  54. Size1 = container1.findAll(text=re.compile('Size'))
  55. if Size1:
  56. Size = Size1[0].strip(' \t\n\r')
  57. else:
  58. Size = "NA"
  59.  
  60. Type1 = info1[0].next_element.strip()
  61. if Type1:
  62. Type = Type1
  63. else:
  64. Type = "NA"
  65.  
  66. Material1 = container1.findAll(text=re.compile('Material'))
  67. if Material1:
  68. Material = Material1[0].strip(' \t\n\r')
  69. else:
  70. Material = "NA"
  71.  
  72. print("Scale: "+Scale)
  73. print("Size: "+Size)
  74. print("Type: "+Type)
  75. print("Materials: "+Material)
  76. f.write(Scale.replace(",", ".") + "," + Size.replace(",", ".") + "," + Type + "," + Material + ",")
  77.  
  78. info2 = page_soup.findAll("ul")
  79. container2 = info2[4]
  80.  
  81. for container2 in info2:
  82. Sell_price1 = container2.findAll("li", {"class": "selling_price"})
  83. if Sell_price1:
  84. Sell_price = Sell_price1[0].text.strip()
  85. else:
  86. Sell_price = "NA"
  87.  
  88. Price1 = container2.findAll("li", {"class": "price"})
  89. if Price1:
  90. Price = Price1[0].findAll(text=re.compile('JPY'))[0].strip()
  91. else:
  92. Price = "NA"
  93.  
  94. sale1 = container2.li("span", {"class": "off_price"})
  95. if sale1:
  96. Sale = sale1[0].text
  97. else:
  98. Sale = "NA"
  99.  
  100. Status = container2.findAll("li", {"class": "selling_price"})
  101. if Status:
  102. Status = Status[1].text.strip()
  103. else:
  104. Status = "NA"
  105.  
  106. print("Sell price: "+Sell_price)
  107. print("Price: "+ Price)
  108. print("Sale: "+Sale)
  109. print("Status: "+Status)
  110. f.write(Sell_price.replace(",", ".") + "," + Price.replace(",", ".") + "," + Sale.replace(",", ".") + "," + Status.replace(",", ".") + ",")
  111.  
  112. info3 = page_soup.findAll("dl", {"class": "spec_data"})
  113. container3 = info3[0]
  114.  
  115. for container3 in info3:
  116. code = container3.find('dt', text='JAN Code')
  117. if code:
  118. Code = code.find_next_sibling("dd").text.strip()
  119. else:
  120. Code = "NA"
  121.  
  122. Release_Date = container3.find('dt', text='Release Date')
  123. if Release_Date:
  124. date = Release_Date.find_next_sibling("dd").text.strip()
  125. else:
  126. date = "NA"
  127.  
  128. Brand = container3.find('dt', text='Brand')
  129. if Brand:
  130. brand = Brand.find_next_sibling("dd").text.strip()
  131. else:
  132. brand = "NA"
  133.  
  134. Line = container3.find('dt', text='Product Line')
  135. if Line:
  136. line_prod = Line.find_next_sibling("dd").text.strip()
  137. else:
  138. line_prod = "NA"
  139.  
  140. Series = container3.find('dt', text='Series Title')
  141. if Series:
  142. series = Series.find_next_sibling("dd").text.strip()
  143. else:
  144. series = "NA"
  145.  
  146. Name = container3.find('dt', text='Character Name')
  147. if Name:
  148. char_name = Name.find_next_sibling("dd").text.strip()
  149. else:
  150. char_name = "NA"
  151.  
  152. Sculptor = container3.find('dt', text='Sculptor')
  153. if Sculptor:
  154. sculpt = Sculptor.find_next_sibling("dd").text.strip()
  155. else:
  156. sculpt = "NA"
  157.  
  158. f.write(Code.replace(",", ".") + "," + date + "," + brand.replace(",", ".") + "," + line_prod.replace(",", ".") + "," + series.replace(",", ".") + "," + char_name.replace(",", ".") + "," + sculpt + "\n")
  159.  
  160.  
  161. f.close
Runtime error #stdin #stdout #stderr 0.05s 112576KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "prog.py", line 2, in <module>
ImportError: No module named request