fork download
  1. #!/usr/bin/env python
  2.  
  3. import scraperwiki
  4. import requests
  5. from bs4 import BeautifulSoup
  6. import string
  7.  
  8. allitems = []
  9.  
  10. uri = "http://l...content-available-to-author-only...g.com/r/discussion/new/"
  11. html = requests.get(uri)
  12. soup = BeautifulSoup(html.content)
  13. items = soup.find_all(class_="post list")
  14.  
  15. while len(items) > 0:
  16. for item in items:
  17.  
  18. if (item is not None) and (item.div is not None) and (item.div.span is not None):
  19. points = item.div.span.span
  20. itemdate = item.find(class_="date")
  21. id = string.split(item.h2.a['href'], '/')[4]
  22. title = item.h2.a.string
  23. if (points is not None) and (itemdate is not None):
  24. allitems.append([points.string, title, itemdate.string, id])
  25. unique_keys = [ 'id' ]
  26. data = { 'id':unicode(id), 'title':unicode(title), 'points':int(points.string), 'posted_on':unicode(itemdate.string)}
  27. scraperwiki.sql.save(unique_keys, data)
  28.  
  29. uri = "http://l...content-available-to-author-only...g.com/r/discussion/new/?after=t3_" + allitems[-1][3]
  30. html = requests.get(uri)
  31. soup = BeautifulSoup(html.content)
  32. items = soup.find_all(class_="post list")
  33.  
  34. print uri, len(allitems)
Runtime error #stdin #stdout #stderr 0.01s 7892KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "prog.py", line 3, in <module>
ImportError: No module named scraperwiki