fork download
  1. import urllib, urllib2, cookielib, re, operator, getpass
  2.  
  3. id = '' # Enter your id here to save typing it again.
  4. passwd = '' # Enter your password here at your own risk.
  5.  
  6. def get_num_pages_for_members():
  7. url = 'http://w...content-available-to-author-only...s.com/club/mitbbs_club_member.php?club=Topcoders&pagen=1'
  8. page = opener.open(url)
  9. pagedata = page.read()
  10.  
  11. # Grab the # total pages (text) beside the select drop-down box
  12. select_drop_down_pages = re.search(ur'</select>/(\d+)\u00D2\u00B3\]</form>', pagedata)
  13.  
  14. if select_drop_down_pages:
  15. num_pages = select_drop_down_pages.group(1)
  16. else:
  17. num_pages = 0
  18.  
  19. return int(num_pages)
  20.  
  21. def get_members_list():
  22. members = []
  23.  
  24. num_pages = get_num_pages_for_members()
  25.  
  26. for page in range(0, num_pages):
  27. url = 'http://w...content-available-to-author-only...s.com/club/mitbbs_club_member.php?club=Topcoders&pagen=%d' % (page + 1)
  28.  
  29. page = opener.open(url)
  30. pagedata = page.read()
  31.  
  32. members += re.findall(r'"/user_info/([a-zA-Z0-9]+)', pagedata)
  33.  
  34. return members
  35.  
  36. def get_page(url):
  37. page = opener.open(url)
  38. pagedata = page.read()
  39.  
  40. # [shang ye][<a href="next_page_url" class="news">xia ye</a>]
  41. next_page = re.search(ur'\u00C9\u00CF\u00D2\u00B3.+\[<a href="(.+)" class="news">\u00CF\u00C2\u00D2\u00B3</a>\]', pagedata)
  42. return (next_page, pagedata)
  43.  
  44. def sort_post_by_count(posts):
  45. sorted_post_by_count = sorted(posts.iteritems(), key = operator.itemgetter(1))
  46. sorted_post_by_count.reverse()
  47.  
  48. return sorted_post_by_count
  49.  
  50.  
  51. url = 'http://w...content-available-to-author-only...s.com/newindex/mitbbs_bbslogin.php'
  52.  
  53. if id == '':
  54. id = raw_input('Please enter your user id: ')
  55. if passwd == '':
  56. passwd = getpass.getpass()
  57.  
  58. data = [
  59. ('id', id),
  60. ('passwd', passwd),
  61. ('kick_multi', '1')] # confirm login with multi_session.
  62.  
  63. cj = cookielib.CookieJar()
  64.  
  65. opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
  66.  
  67. print 'Logging in...'
  68. response = opener.open(url, urllib.urlencode(dict(data)))
  69.  
  70. home_page = response.read()
  71.  
  72. # There's two type of error message that might return:
  73. # alert ("yong hu ming cuo wu, huo xi tong fan mang, qing chong xin deng lu!")
  74. # alert("yong hu mi ma cuo wu, qing chong xin deng lu!")
  75. # Notice the extra space character in the first error message.
  76. # D3 C3 is the gb2312 code for 'yong', and BB A7 is for 'hu'.
  77. if re.search(ur'alert\s*\("\u00D3\u00C3\u00BB\u00A7', home_page):
  78. print 'Failed to login. Please try again.'
  79. exit(1)
  80.  
  81.  
  82. print 'Login successfully.'
  83.  
  84. posts = {}
  85.  
  86. # wen zhang mo shi (first page)
  87. url = 'http://w...content-available-to-author-only...s.com/club_bbsdoc1/Topcoders_1_3.html'
  88.  
  89. while True:
  90. (next_page, pagedata) = get_page(url)
  91. print "Processing page [" + url + "]..."
  92.  
  93. # get all user names, user name can only contain alphanumeric characters, ie: [a-zA-Z0-9]
  94. users = re.findall(r'\'/user_info/([a-zA-Z0-9]+)', pagedata)
  95. for user in users:
  96. posts[user] = posts.get(user, 0) + 1
  97.  
  98. if not next_page:
  99. break;
  100.  
  101. next_page_url = next_page.group(1)
  102. url = 'http://w...content-available-to-author-only...s.com' + next_page_url
  103.  
  104.  
  105. print ""
  106. print "Members who never posted"
  107. print "------------------------"
  108. all_users = get_members_list()
  109.  
  110. for user in all_users:
  111. if posts.get(user, 0) == 0:
  112. print user
  113.  
  114.  
  115. print ""
  116. print "Members sorted by #posts"
  117. print "------------------------"
  118. posts = sort_post_by_count(posts)
  119.  
  120. for (user, num_posts) in posts:
  121. print "%s: %d" % (user, num_posts)
Not running #stdin #stdout 0s 0KB
stdin
Standard input is empty
stdout
Standard output is empty