fork(2) download
  1. #!/usr/bin/env python3
  2. # requirements.txt: pip3 install beautifulsoup4
  3. import requests
  4. try:
  5. from bs4 import BeautifulSoup
  6. except ImportError:
  7. print('warning: might be using BS3!')
  8. from BeautifulSoup import BeautifulSoup
  9.  
  10. def web_crawler(max_page):
  11. page = 1
  12. while max_page > page:
  13. url = 'http://b...content-available-to-author-only...e.com/catalogue/category/books_1/page-' + str(page) + '.html'
  14. source_code = requests.get(url)
  15. plain_text = source_code # .text => bs3: AttributeError: 'str' object has no attribute 'text', musi byc bs4
  16. soup = BeautifulSoup(plain_text.content, 'html.parser')
  17. for link in soup.findAll('h3'):
  18. print('link', link)
  19. href = 'http://b...content-available-to-author-only...e.com' + str(link.find('a').get('href'))
  20. title = link.string
  21. print('HREF', href)
  22. print('TITLE', title)
  23. page += 1
  24.  
  25. web_crawler(2)
  26.  
Runtime error #stdin #stdout #stderr 0.01s 27704KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 3, in <module>
    import requests
ImportError: No module named 'requests'