fork(1) download
  1. #!/usr/bin/env python3
  2.  
  3. import requests
  4. from bs4 import BeautifulSoup
  5. import csv
  6.  
  7. def get_html(url):
  8. r = requests.get(url)
  9. return r.text
  10.  
  11. def get_total_pages(html):
  12. soup = BeautifulSoup(html, 'lxml')
  13. pages = soup.find('div', class_='pagination-pages').find_all('a', class_='pagination-page')[-1].get('href')
  14. total_pages = pages.split('=')[1].split('&')[0]
  15. return int(total_pages)
  16.  
  17. def write_csv(date):
  18. with open('avito.csv', 'a') as f:
  19. writer = csv.writer(f)
  20. writer.writerow((date['title'],
  21. date['price'],
  22. date['metro'],
  23. date['url']))
  24.  
  25. def get_page_date(html):
  26. soup = BeautifulSoup(html, 'lxml')
  27. ads = soup.find('div', class_='catalog-list').find_all('div', class_='item_table')
  28. for ad in ads:
  29. name = ad.find('div', class_='description').find('h3').text.strip().lower()
  30. if 'htc' in name:
  31. try:
  32. title = ad.find('div', class_='description').find('h3').text.strip()
  33. except:
  34. title= ''
  35. try:
  36. url = 'https://w...content-available-to-author-only...o.ru/' + ad.find('div', class_='description').find('h3').find('a').get('href')
  37. except:
  38. url = ''
  39. try:
  40. price = ad.find('div', class_='about').text.strip()
  41. except:
  42. price = ''
  43. try:
  44. metro = ad.find('div', class_='data').find('p').text.strip()
  45. except:
  46. metro = ''
  47. date = {'title': title,
  48. 'price': price,
  49. 'metro': metro,
  50. 'url': url}
  51. write_csv(date)
  52. else:
  53. continue
  54.  
  55. def main():
  56. url = 'https://w...content-available-to-author-only...o.ru/moskva/telefony/htc'
  57. base_url = 'https://w...content-available-to-author-only...o.ru/moskva/telefony/htc?p='
  58. total_pages = get_total_pages(get_html(url))
  59. for i in range(1, total_pages):
  60. url_gen = base_url + str(i)
  61. html = get_html(url_gen)
  62. get_page_date(html)
  63.  
  64. if __name__ == '__main__':
  65. main()
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
Traceback (most recent call last):
  File "/usr/lib/python3.5/py_compile.py", line 125, in compile
    _optimize=optimize)
  File "<frozen importlib._bootstrap_external>", line 735, in source_to_code
  File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
  File "./prog.py", line 65
    main()
          ^
SyntaxError: invalid character in identifier

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/usr/lib/python3.5/py_compile.py", line 129, in compile
    raise py_exc
py_compile.PyCompileError:   File "./prog.py", line 65
    main()
          ^
SyntaxError: invalid character in identifier

stdout
Standard output is empty