from bs4 import BeautifulSoup
import csv
import os
import re
import requests
## delete only if file exists ##
if os.path.exists('KOSPI200.csv'):
os.remove('KOSPI200.csv')
else:
print("Sorry , I can not remove {} file.".format('KOPSPI200.csv'))
BaseUrl = 'https://f...content-available-to-author-only...r.com/sise/entryJongmok.nhn?&page='
for i in range(1,22,1):
try:
url = BaseUrl + str(i)
r = reuqests.get(url)
soup = BeautifulSoup(r.text,'lxml')
items = soup.find_all('td',{'class':'ctg'})
for item in items:
#print(item)
txt = item.a.get('href')
k = re.search('[\d]+',txt)
if k:
code = k.group()
name = item.text
data = code, name
with open ('KOSPI200.csv','a') as f:
writer=csv.writer(f)
writer.writerow(data)
except:
pass
finally:
temp_for_sort = []
with open ('KOSPI200.csv','r') as in_file:
for sort_line in in_file:
temp_for_sort.append(sort_line)
with open('KOSPI200.csv','w') as out_file:
seen = set()
for line in temp_for_sort:
if line in seen: continue
seen.add(line)
out_file.wrte(line)# your code goes here
ZnJvbSBiczQgaW1wb3J0IEJlYXV0aWZ1bFNvdXAKaW1wb3J0IGNzdgppbXBvcnQgb3MKaW1wb3J0IHJlCmltcG9ydCByZXF1ZXN0cwoKIyMgZGVsZXRlIG9ubHkgaWYgZmlsZSBleGlzdHMgIyMgCgppZiBvcy5wYXRoLmV4aXN0cygnS09TUEkyMDAuY3N2Jyk6IAogICAgb3MucmVtb3ZlKCdLT1NQSTIwMC5jc3YnKQplbHNlOgogICAgcHJpbnQoIlNvcnJ5ICwgSSBjYW4gbm90IHJlbW92ZSB7fSBmaWxlLiIuZm9ybWF0KCdLT1BTUEkyMDAuY3N2JykpCgpCYXNlVXJsID0gJ2h0dHBzOi8vZi4uLmNvbnRlbnQtYXZhaWxhYmxlLXRvLWF1dGhvci1vbmx5Li4uci5jb20vc2lzZS9lbnRyeUpvbmdtb2submhuPyZwYWdlPScKCgpmb3IgaSBpbiByYW5nZSgxLDIyLDEpOgogICAgdHJ5OiAKICAgICAgICB1cmwgPSBCYXNlVXJsICsgc3RyKGkpCiAgICAgICAgciA9IHJldXFlc3RzLmdldCh1cmwpCiAgICAgICAgc291cCA9IEJlYXV0aWZ1bFNvdXAoci50ZXh0LCdseG1sJykKICAgICAgICBpdGVtcyA9IHNvdXAuZmluZF9hbGwoJ3RkJyx7J2NsYXNzJzonY3RnJ30pCgogICAgICAgIGZvciBpdGVtIGluIGl0ZW1zOgogICAgICAgICAgICAjcHJpbnQoaXRlbSkKICAgICAgICAgICAgdHh0ID0gaXRlbS5hLmdldCgnaHJlZicpCiAgICAgICAgICAgIGsgPSByZS5zZWFyY2goJ1tcZF0rJyx0eHQpCiAgICAgICAgICAgIGlmIGs6CiAgICAgICAgICAgICAgICBjb2RlID0gay5ncm91cCgpCiAgICAgICAgICAgICAgICBuYW1lID0gaXRlbS50ZXh0CiAgICAgICAgICAgICAgICBkYXRhID0gY29kZSwgbmFtZQoKICAgICAgICAgICAgICAgIHdpdGggb3BlbiAoJ0tPU1BJMjAwLmNzdicsJ2EnKSBhcyBmOgogICAgICAgICAgICAgICAgICAgIHdyaXRlcj1jc3Yud3JpdGVyKGYpCiAgICAgICAgICAgICAgICAgICAgd3JpdGVyLndyaXRlcm93KGRhdGEpCiAgICBleGNlcHQ6CiAgICAgICAgcGFzcwogICAgZmluYWxseToKICAgICAgICB0ZW1wX2Zvcl9zb3J0ID0gW10KICAgICAgICB3aXRoIG9wZW4gKCdLT1NQSTIwMC5jc3YnLCdyJykgYXMgaW5fZmlsZToKICAgICAgICAgICAgZm9yIHNvcnRfbGluZSBpbiBpbl9maWxlOgogICAgICAgICAgICAgICAgdGVtcF9mb3Jfc29ydC5hcHBlbmQoc29ydF9saW5lKQogICAgICAgIAogICAgICAgIHdpdGggb3BlbignS09TUEkyMDAuY3N2JywndycpIGFzIG91dF9maWxlOgogICAgICAgICAgICBzZWVuID0gc2V0KCkgCiAgICAgICAgICAgIGZvciBsaW5lIGluIHRlbXBfZm9yX3NvcnQ6CiAgICAgICAgICAgICAgICBpZiBsaW5lIGluIHNlZW46IGNvbnRpbnVlIAoKICAgICAgICAgICAgICAgIHNlZW4uYWRkKGxpbmUpCiAgICAgICAgICAgICAgICBvdXRfZmlsZS53cnRlKGxpbmUpIyB5b3VyIGNvZGUgZ29lcyBoZXJl