import urllib, urllib2, cookielib, re, operator, getpass
id = '' # Enter your id here to save typing it again.
passwd = '' # Enter your password here at your own risk.
def get_num_pages_for_members():
url = 'http://w...content-available-to-author-only...s.com/club/mitbbs_club_member.php?club=Topcoders&pagen=1'
page = opener.open(url)
pagedata = page.read()
# Grab the # total pages (text) beside the select drop-down box
select_drop_down_pages = re.search(ur'</select>/(\d+)\u00D2\u00B3\]</form>', pagedata)
if select_drop_down_pages:
num_pages = select_drop_down_pages.group(1)
else:
num_pages = 0
return int(num_pages)
def get_members_list():
members = []
num_pages = get_num_pages_for_members()
for page in range(0, num_pages):
url = 'http://w...content-available-to-author-only...s.com/club/mitbbs_club_member.php?club=Topcoders&pagen=%d' % (page + 1)
page = opener.open(url)
pagedata = page.read()
members += re.findall(r'"/user_info/([a-zA-Z0-9]+)', pagedata)
return members
def get_page(url):
page = opener.open(url)
pagedata = page.read()
# [shang ye][<a href="next_page_url" class="news">xia ye</a>]
next_page = re.search(ur'\u00C9\u00CF\u00D2\u00B3.+\[<a href="(.+)" class="news">\u00CF\u00C2\u00D2\u00B3</a>\]', pagedata)
return (next_page, pagedata)
def sort_post_by_count(posts):
sorted_post_by_count = sorted(posts.iteritems(), key = operator.itemgetter(1))
sorted_post_by_count.reverse()
return sorted_post_by_count
url = 'http://w...content-available-to-author-only...s.com/newindex/mitbbs_bbslogin.php'
if id == '':
id = raw_input('Please enter your user id: ')
if passwd == '':
passwd = getpass.getpass()
data = [
('id', id),
('passwd', passwd),
('kick_multi', '1')] # confirm login with multi_session.
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
print 'Logging in...'
response = opener.open(url, urllib.urlencode(dict(data)))
home_page = response.read()
# There's two type of error message that might return:
# alert ("yong hu ming cuo wu, huo xi tong fan mang, qing chong xin deng lu!")
# alert("yong hu mi ma cuo wu, qing chong xin deng lu!")
# Notice the extra space character in the first error message.
# D3 C3 is the gb2312 code for 'yong', and BB A7 is for 'hu'.
if re.search(ur'alert\s*\("\u00D3\u00C3\u00BB\u00A7', home_page):
print 'Failed to login. Please try again.'
exit(1)
print 'Login successfully.'
posts = {}
# wen zhang mo shi (first page)
url = 'http://w...content-available-to-author-only...s.com/club_bbsdoc1/Topcoders_1_3.html'
while True:
(next_page, pagedata) = get_page(url)
print "Processing page [" + url + "]..."
# get all user names, user name can only contain alphanumeric characters, ie: [a-zA-Z0-9]
users = re.findall(r'\'/user_info/([a-zA-Z0-9]+)', pagedata)
for user in users:
posts[user] = posts.get(user, 0) + 1
if not next_page:
break;
next_page_url = next_page.group(1)
url = 'http://w...content-available-to-author-only...s.com' + next_page_url
print ""
print "Members who never posted"
print "------------------------"
all_users = get_members_list()
for user in all_users:
if posts.get(user, 0) == 0:
print user
print ""
print "Members sorted by #posts"
print "------------------------"
posts = sort_post_by_count(posts)
for (user, num_posts) in posts:
print "%s: %d" % (user, num_posts)