fork download
  1. # your code goes here
  2.  
  3. # -*- coding: utf-8 -*-
  4.  
  5. import requests
  6. from bs4 import BeautifulSoup
  7.  
  8. def spider(max_pages):
  9. url = 'http://w...content-available-to-author-only...o.com/yearly/chart/?page=' + str(max_pages) + '&view=releasedate&view2=domestic&yr=2015&p=.htm'
  10. source_code = requests.get(url)
  11. plain_text = source_code.text
  12. soup = BeautifulSoup(plain_text)
  13.  
  14. for link in soup.select('td > b > font > a[href^=/movies/?]'):
  15. href = 'http://w...content-available-to-author-only...o.com' + link.get('href')
  16. print link.text
  17. getRank(href)
  18. break
  19.  
  20. def getRank(item_url):
  21. href = item_url[:37]+"page=weekend&" + item_url[37:]
  22. response = requests.get(href)
  23. print response.status_code, "for", href
  24. soup = BeautifulSoup(response.content) # or BeautifulSoup(response.content, "html5lib")
  25.  
  26. rows = soup.select('.chart-wide tr')
  27.  
  28. header_skipped = False
  29. for row in rows:
  30. if not header_skipped:
  31. header_skipped = True
  32. continue
  33.  
  34. headers = "Date Rank WeekendGross Change Theaters Change/Avg GrossToDate Week".split()
  35.  
  36. for header, child in zip(headers, row.children):
  37. print header, ":", child.text
  38.  
  39. rank = 5
  40. print rank
  41.  
  42. spider(1)
  43.  
Runtime error #stdin #stdout #stderr 1.06s 24624KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
sh: 1: cannot create /dev/null: Permission denied
sh: 1: cannot create /dev/null: Permission denied
sh: 1: cannot create /dev/null: Permission denied
sh: 1: cannot create /dev/null: Permission denied
sh: 1: cannot create /dev/null: Permission denied
sh: 1: cannot create /dev/null: Permission denied
Traceback (most recent call last):
  File "prog.py", line 6, in <module>
ImportError: No module named bs4