from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq
import urllib.request
import re
lists = open("AmiAmi_mini.csv").read().split("\n")[:-1]
filename = "Ami_full.csv"
f = open(filename, "w", encoding="utf-8")
headers = "fig_name_eng, fig_name_jap, Scale, Size, Type, Materials, Sell_price, Price, Sale, Status, Code, date, brand, line_prod, series, char_name, sculpt\n"
f.write(headers)
f.close
for url in lists:
uClient = uReq(url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
page_soup = soup(page_html, "html.parser")
page_soup.findAll("div", style="width:600px;")
f = open(filename, "a", encoding="utf-8")
info4 = page_soup.findAll("h2", {"class": "heading_10"})
container4 = info4[0]
for container4 in info4:
jap = container4.find('span', class_='').text
if jap:
Jap = jap.strip()
else:
Jap = "NA"
r = str(container4)
container5 = soup(r[:r.rindex('<br/>')], 'html.parser')
if container5:
eng = container5.find('h2').text.strip()
else:
eng = "NA"
f.write(eng.replace(",", "|") + "," + jap.replace(",", "|") + ",")
info1 = page_soup.findAll("p", {"class": "box_01"})
container1 = info1[0]
for container1 in info1:
Scale1 = container1.findAll(text=re.compile('Scale'))
if Scale1:
Scale = Scale1[0].strip(' \t\n\r')
else:
Scale = "NA"
Size1 = container1.findAll(text=re.compile('Size'))
if Size1:
Size = Size1[0].strip(' \t\n\r')
else:
Size = "NA"
Type1 = info1[0].next_element.strip()
if Type1:
Type = Type1
else:
Type = "NA"
Material1 = container1.findAll(text=re.compile('Material'))
if Material1:
Material = Material1[0].strip(' \t\n\r')
else:
Material = "NA"
print("Scale: "+Scale)
print("Size: "+Size)
print("Type: "+Type)
print("Materials: "+Material)
f.write(Scale.replace(",", ".") + "," + Size.replace(",", ".") + "," + Type + "," + Material + ",")
info2 = page_soup.findAll("ul")
container2 = info2[4]
for container2 in info2:
Sell_price1 = container2.findAll("li", {"class": "selling_price"})
if Sell_price1:
Sell_price = Sell_price1[0].text.strip()
else:
Sell_price = "NA"
Price1 = container2.findAll("li", {"class": "price"})
if Price1:
Price = Price1[0].findAll(text=re.compile('JPY'))[0].strip()
else:
Price = "NA"
sale1 = container2.li("span", {"class": "off_price"})
if sale1:
Sale = sale1[0].text
else:
Sale = "NA"
Status = container2.findAll("li", {"class": "selling_price"})
if Status:
Status = Status[1].text.strip()
else:
Status = "NA"
print("Sell price: "+Sell_price)
print("Price: "+ Price)
print("Sale: "+Sale)
print("Status: "+Status)
f.write(Sell_price.replace(",", ".") + "," + Price.replace(",", ".") + "," + Sale.replace(",", ".") + "," + Status.replace(",", ".") + ",")
info3 = page_soup.findAll("dl", {"class": "spec_data"})
container3 = info3[0]
for container3 in info3:
code = container3.find('dt', text='JAN Code')
if code:
Code = code.find_next_sibling("dd").text.strip()
else:
Code = "NA"
Release_Date = container3.find('dt', text='Release Date')
if Release_Date:
date = Release_Date.find_next_sibling("dd").text.strip()
else:
date = "NA"
Brand = container3.find('dt', text='Brand')
if Brand:
brand = Brand.find_next_sibling("dd").text.strip()
else:
brand = "NA"
Line = container3.find('dt', text='Product Line')
if Line:
line_prod = Line.find_next_sibling("dd").text.strip()
else:
line_prod = "NA"
Series = container3.find('dt', text='Series Title')
if Series:
series = Series.find_next_sibling("dd").text.strip()
else:
series = "NA"
Name = container3.find('dt', text='Character Name')
if Name:
char_name = Name.find_next_sibling("dd").text.strip()
else:
char_name = "NA"
Sculptor = container3.find('dt', text='Sculptor')
if Sculptor:
sculpt = Sculptor.find_next_sibling("dd").text.strip()
else:
sculpt = "NA"
f.write(Code.replace(",", ".") + "," + date + "," + brand.replace(",", ".") + "," + line_prod.replace(",", ".") + "," + series.replace(",", ".") + "," + char_name.replace(",", ".") + "," + sculpt + "\n")
f.close
ZnJvbSBiczQgaW1wb3J0IEJlYXV0aWZ1bFNvdXAgYXMgc291cApmcm9tIHVybGxpYi5yZXF1ZXN0IGltcG9ydCB1cmxvcGVuIGFzIHVSZXEKaW1wb3J0IHVybGxpYi5yZXF1ZXN0CmltcG9ydCByZQoKbGlzdHMgPSBvcGVuKCJBbWlBbWlfbWluaS5jc3YiKS5yZWFkKCkuc3BsaXQoIlxuIilbOi0xXQoKZmlsZW5hbWUgPSAiQW1pX2Z1bGwuY3N2IgpmID0gb3BlbihmaWxlbmFtZSwgInciLCBlbmNvZGluZz0idXRmLTgiKQpoZWFkZXJzID0gImZpZ19uYW1lX2VuZywgZmlnX25hbWVfamFwLCBTY2FsZSwgU2l6ZSwgVHlwZSwgTWF0ZXJpYWxzLCBTZWxsX3ByaWNlLCBQcmljZSwgU2FsZSwgU3RhdHVzLCBDb2RlLCBkYXRlLCBicmFuZCwgbGluZV9wcm9kLCBzZXJpZXMsIGNoYXJfbmFtZSwgc2N1bHB0XG4iCmYud3JpdGUoaGVhZGVycykKZi5jbG9zZQoKZm9yIHVybCBpbiBsaXN0czoKCiAgICB1Q2xpZW50ID0gdVJlcSh1cmwpCiAgICBwYWdlX2h0bWwgPSB1Q2xpZW50LnJlYWQoKQogICAgdUNsaWVudC5jbG9zZSgpCiAgICBwYWdlX3NvdXAgPSBzb3VwKHBhZ2VfaHRtbCwgImh0bWwucGFyc2VyIikKCiAgICBwYWdlX3NvdXAgPSBzb3VwKHBhZ2VfaHRtbCwgImh0bWwucGFyc2VyIikKICAgIHBhZ2Vfc291cC5maW5kQWxsKCJkaXYiLCBzdHlsZT0id2lkdGg6NjAwcHg7IikKCiAgICBmID0gb3BlbihmaWxlbmFtZSwgImEiLCBlbmNvZGluZz0idXRmLTgiKQoKICAgIGluZm80ID0gcGFnZV9zb3VwLmZpbmRBbGwoImgyIiwgeyJjbGFzcyI6ICJoZWFkaW5nXzEwIn0pCiAgICBjb250YWluZXI0ID0gaW5mbzRbMF0KCiAgICBmb3IgY29udGFpbmVyNCBpbiBpbmZvNDoKICAgICAgICBqYXAgPSBjb250YWluZXI0LmZpbmQoJ3NwYW4nLCBjbGFzc189JycpLnRleHQKICAgICAgICBpZiBqYXA6CiAgICAgICAgICAgIEphcCA9IGphcC5zdHJpcCgpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgSmFwID0gIk5BIgogICAgICAgIHIgPSBzdHIoY29udGFpbmVyNCkKICAgICAgICBjb250YWluZXI1ID0gc291cChyWzpyLnJpbmRleCgnPGJyLz4nKV0sICdodG1sLnBhcnNlcicpCiAgICAgICAgaWYgY29udGFpbmVyNToKICAgICAgICAgICAgZW5nID0gY29udGFpbmVyNS5maW5kKCdoMicpLnRleHQuc3RyaXAoKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIGVuZyA9ICJOQSIKCiAgICAgICAgZi53cml0ZShlbmcucmVwbGFjZSgiLCIsICJ8IikgKyAiLCIgKyBqYXAucmVwbGFjZSgiLCIsICJ8IikgKyAiLCIpCgogICAgaW5mbzEgPSBwYWdlX3NvdXAuZmluZEFsbCgicCIsIHsiY2xhc3MiOiAiYm94XzAxIn0pCiAgICBjb250YWluZXIxID0gaW5mbzFbMF0KCiAgICBmb3IgY29udGFpbmVyMSBpbiBpbmZvMToKICAgICAgICBTY2FsZTEgPSBjb250YWluZXIxLmZpbmRBbGwodGV4dD1yZS5jb21waWxlKCdTY2FsZScpKQogICAgICAgIGlmIFNjYWxlMToKICAgICAgICAgICAgU2NhbGUgPSBTY2FsZTFbMF0uc3RyaXAoJyBcdFxuXHInKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIFNjYWxlID0gIk5BIgoKICAgICAgICBTaXplMSA9IGNvbnRhaW5lcjEuZmluZEFsbCh0ZXh0PXJlLmNvbXBpbGUoJ1NpemUnKSkKICAgICAgICBpZiBTaXplMToKICAgICAgICAgICAgU2l6ZSA9IFNpemUxWzBdLnN0cmlwKCcgXHRcblxyJykKICAgICAgICBlbHNlOgogICAgICAgICAgICBTaXplID0gIk5BIgoKICAgICAgICBUeXBlMSA9IGluZm8xWzBdLm5leHRfZWxlbWVudC5zdHJpcCgpCiAgICAgICAgaWYgVHlwZTE6CiAgICAgICAgICAgIFR5cGUgPSBUeXBlMQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIFR5cGUgPSAiTkEiCgogICAgICAgIE1hdGVyaWFsMSA9IGNvbnRhaW5lcjEuZmluZEFsbCh0ZXh0PXJlLmNvbXBpbGUoJ01hdGVyaWFsJykpCiAgICAgICAgaWYgTWF0ZXJpYWwxOgogICAgICAgICAgICBNYXRlcmlhbCA9IE1hdGVyaWFsMVswXS5zdHJpcCgnIFx0XG5ccicpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgTWF0ZXJpYWwgPSAiTkEiCgogICAgICAgIHByaW50KCJTY2FsZTogIitTY2FsZSkKICAgICAgICBwcmludCgiU2l6ZTogIitTaXplKQogICAgICAgIHByaW50KCJUeXBlOiAiK1R5cGUpCiAgICAgICAgcHJpbnQoIk1hdGVyaWFsczogIitNYXRlcmlhbCkKICAgICAgICBmLndyaXRlKFNjYWxlLnJlcGxhY2UoIiwiLCAiLiIpICsgIiwiICsgU2l6ZS5yZXBsYWNlKCIsIiwgIi4iKSArICIsIiArIFR5cGUgKyAiLCIgKyBNYXRlcmlhbCArICIsIikKCiAgICBpbmZvMiA9IHBhZ2Vfc291cC5maW5kQWxsKCJ1bCIpCiAgICBjb250YWluZXIyID0gaW5mbzJbNF0KCiAgICBmb3IgY29udGFpbmVyMiBpbiBpbmZvMjoKICAgICAgICBTZWxsX3ByaWNlMSA9IGNvbnRhaW5lcjIuZmluZEFsbCgibGkiLCB7ImNsYXNzIjogInNlbGxpbmdfcHJpY2UifSkKICAgICAgICBpZiBTZWxsX3ByaWNlMToKICAgICAgICAgICAgU2VsbF9wcmljZSA9IFNlbGxfcHJpY2UxWzBdLnRleHQuc3RyaXAoKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIFNlbGxfcHJpY2UgPSAiTkEiCgogICAgICAgIFByaWNlMSA9IGNvbnRhaW5lcjIuZmluZEFsbCgibGkiLCB7ImNsYXNzIjogInByaWNlIn0pCiAgICAgICAgaWYgUHJpY2UxOgogICAgICAgICAgICBQcmljZSA9IFByaWNlMVswXS5maW5kQWxsKHRleHQ9cmUuY29tcGlsZSgnSlBZJykpWzBdLnN0cmlwKCkKICAgICAgICBlbHNlOgogICAgICAgICAgICBQcmljZSA9ICJOQSIKCiAgICAgICAgc2FsZTEgPSBjb250YWluZXIyLmxpKCJzcGFuIiwgeyJjbGFzcyI6ICJvZmZfcHJpY2UifSkKICAgICAgICBpZiBzYWxlMToKICAgICAgICAgICAgU2FsZSA9IHNhbGUxWzBdLnRleHQKICAgICAgICBlbHNlOgogICAgICAgICAgICBTYWxlID0gIk5BIgoKICAgICAgICBTdGF0dXMgPSBjb250YWluZXIyLmZpbmRBbGwoImxpIiwgeyJjbGFzcyI6ICJzZWxsaW5nX3ByaWNlIn0pCiAgICAgICAgaWYgU3RhdHVzOgogICAgICAgICAgICBTdGF0dXMgPSBTdGF0dXNbMV0udGV4dC5zdHJpcCgpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgU3RhdHVzID0gIk5BIgoKICAgICAgICBwcmludCgiU2VsbCBwcmljZTogIitTZWxsX3ByaWNlKQogICAgICAgIHByaW50KCJQcmljZTogIisgUHJpY2UpCiAgICAgICAgcHJpbnQoIlNhbGU6ICIrU2FsZSkKICAgICAgICBwcmludCgiU3RhdHVzOiAiK1N0YXR1cykKICAgICAgICBmLndyaXRlKFNlbGxfcHJpY2UucmVwbGFjZSgiLCIsICIuIikgKyAiLCIgKyBQcmljZS5yZXBsYWNlKCIsIiwgIi4iKSArICIsIiArIFNhbGUucmVwbGFjZSgiLCIsICIuIikgKyAiLCIgKyBTdGF0dXMucmVwbGFjZSgiLCIsICIuIikgKyAiLCIpCgogICAgaW5mbzMgPSBwYWdlX3NvdXAuZmluZEFsbCgiZGwiLCB7ImNsYXNzIjogInNwZWNfZGF0YSJ9KQogICAgY29udGFpbmVyMyA9IGluZm8zWzBdCgogICAgZm9yIGNvbnRhaW5lcjMgaW4gaW5mbzM6CiAgICAgICAgY29kZSA9IGNvbnRhaW5lcjMuZmluZCgnZHQnLCB0ZXh0PSdKQU4gQ29kZScpCiAgICAgICAgaWYgY29kZToKICAgICAgICAgICAgQ29kZSA9IGNvZGUuZmluZF9uZXh0X3NpYmxpbmcoImRkIikudGV4dC5zdHJpcCgpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgQ29kZSA9ICJOQSIKCiAgICAgICAgUmVsZWFzZV9EYXRlID0gY29udGFpbmVyMy5maW5kKCdkdCcsIHRleHQ9J1JlbGVhc2UgRGF0ZScpCiAgICAgICAgaWYgUmVsZWFzZV9EYXRlOgogICAgICAgICAgICBkYXRlID0gUmVsZWFzZV9EYXRlLmZpbmRfbmV4dF9zaWJsaW5nKCJkZCIpLnRleHQuc3RyaXAoKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIGRhdGUgPSAiTkEiCgogICAgICAgIEJyYW5kID0gY29udGFpbmVyMy5maW5kKCdkdCcsIHRleHQ9J0JyYW5kJykKICAgICAgICBpZiBCcmFuZDoKICAgICAgICAgICAgYnJhbmQgPSBCcmFuZC5maW5kX25leHRfc2libGluZygiZGQiKS50ZXh0LnN0cmlwKCkKICAgICAgICBlbHNlOgogICAgICAgICAgICBicmFuZCA9ICJOQSIKCiAgICAgICAgTGluZSA9IGNvbnRhaW5lcjMuZmluZCgnZHQnLCB0ZXh0PSdQcm9kdWN0IExpbmUnKQogICAgICAgIGlmIExpbmU6CiAgICAgICAgICAgIGxpbmVfcHJvZCA9IExpbmUuZmluZF9uZXh0X3NpYmxpbmcoImRkIikudGV4dC5zdHJpcCgpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgbGluZV9wcm9kID0gIk5BIgoKICAgICAgICBTZXJpZXMgPSBjb250YWluZXIzLmZpbmQoJ2R0JywgdGV4dD0nU2VyaWVzIFRpdGxlJykKICAgICAgICBpZiBTZXJpZXM6CiAgICAgICAgICAgIHNlcmllcyA9IFNlcmllcy5maW5kX25leHRfc2libGluZygiZGQiKS50ZXh0LnN0cmlwKCkKICAgICAgICBlbHNlOgogICAgICAgICAgICBzZXJpZXMgPSAiTkEiCgogICAgICAgIE5hbWUgPSBjb250YWluZXIzLmZpbmQoJ2R0JywgdGV4dD0nQ2hhcmFjdGVyIE5hbWUnKQogICAgICAgIGlmIE5hbWU6CiAgICAgICAgICAgIGNoYXJfbmFtZSA9IE5hbWUuZmluZF9uZXh0X3NpYmxpbmcoImRkIikudGV4dC5zdHJpcCgpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgY2hhcl9uYW1lID0gIk5BIgoKICAgICAgICBTY3VscHRvciA9IGNvbnRhaW5lcjMuZmluZCgnZHQnLCB0ZXh0PSdTY3VscHRvcicpCiAgICAgICAgaWYgU2N1bHB0b3I6CiAgICAgICAgICAgIHNjdWxwdCA9IFNjdWxwdG9yLmZpbmRfbmV4dF9zaWJsaW5nKCJkZCIpLnRleHQuc3RyaXAoKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHNjdWxwdCA9ICJOQSIKCiAgICAgICAgZi53cml0ZShDb2RlLnJlcGxhY2UoIiwiLCAiLiIpICsgIiwiICsgZGF0ZSArICIsIiArIGJyYW5kLnJlcGxhY2UoIiwiLCAiLiIpICsgIiwiICsgbGluZV9wcm9kLnJlcGxhY2UoIiwiLCAiLiIpICsgIiwiICsgc2VyaWVzLnJlcGxhY2UoIiwiLCAiLiIpICsgIiwiICsgY2hhcl9uYW1lLnJlcGxhY2UoIiwiLCAiLiIpICsgIiwiICsgc2N1bHB0ICsgIlxuIikKCgogICAgZi5jbG9zZQ==