# -*- coding: utf-8 -*-

import codecs, re, time, tnttzinfo, gethttp, tntvnlang
from datetime import datetime
from pprint import pprint

VN_TZ = tnttzinfo.UTCTimeZone(+420, False, "GMT+07")
VN_TZ_Y = tnttzinfo.UTCTimeZone(-17*60, False, "GMT-17")

def unescape_html(s):
    s = s.replace('&gt;','>')
    s = s.replace('&lt;','<')
    s = s.replace('&quot;','"')
    s = s.replace('&#39;',"'")
    s = s.replace('&amp;','&')
    return s

def GetAllPosts(thread_id, page=1):
    ##Get page source
    url = 'http://f...content-available-to-author-only...n.com/showthread.php?%s/page%s' % (thread_id, page)
    c = gethttp.GetHttpSource(url).decode('utf8')
    if not c:
    	yield None
    ##Get current & yesterday datetime
    currentdate = datetime.now(VN_TZ).strftime("%d-%m-%Y")
    yesterdaydate = datetime.now(VN_TZ_Y).strftime("%d-%m-%Y")
    ##Make list of posts
    pl = re.compile('<li class="postbit (.+?)<hr />(.+?)</li>', re.S|re.U)
    pl = [c[m.span()[0]:m.span()[1]] for m in pl.finditer(c)]
    ##Process list of posts (return new list)
    npl = []
    for p0 in pl:
        ##Get post id
        pid = int( re.findall('class="postcounter">#(\d+)</a>', p0, re.U)[0] )
        ##Get post created and post last edited's timestamp
        pdate = re.findall(u'<span class="date">(.+?)<span', p0, re.U)[0]
        if pdate.find(u'Hôm nay') != -1:
            pdate = currentdate
        elif pdate.find(u'Hôm qua') != -1:
            pdate = yesterdaydate
        else:
            pdate = re.findall(u'\d\d-\d\d-\d\d\d\d', pdate)[0]
        pdate = [int(i) for i in pdate.split('-')]
        ptime = re.findall(u'<span class="time">(.+?)</span', p0, re.U)[0]
        ptime = [int(i) for i in ptime.split(':')]
        pcreated = datetime(pdate[2], pdate[1], pdate[0], ptime[0], ptime[1], tzinfo=VN_TZ)
        pedit = re.findall(u'<blockquote class="postcontent lastedited">(.+?)</blockquote>',
                           p0, re.U|re.S)
        if pedit:
            pedit = pedit[0].strip()
            pedittime = re.findall(u'<span class="time">(.+?)</span', pedit, re.U)[0]
            if pedit.find(u'Hôm nay') != -1:
                pedit = currentdate
            elif pedit.find(u'Hôm qua') != -1:
                pedit = yesterdaydate
            else:
                pedit = re.findall(u'\d\d-\d\d-\d\d\d\d', pedit)[0]
            pedit += '-'+pedittime
            pedit = re.sub(u':', u'-', pedit)
            pedit = [int(i) for i in pedit.split('-')]
            plastedited = datetime(pedit[2],pedit[1],pedit[0],pedit[3],pedit[4],tzinfo=VN_TZ)
        else: plastedited = pcreated
        ##Get post content
        raw_content = re.findall(u'<blockquote class="postcontent restore">(.+?)</blockquote>',
                                 p0, re.U|re.S)[0]
        pcontent = '\n'.join(li for li in [re.sub(u'<(.+?)>', '', line, re.U).strip()
                                           for line in raw_content.splitlines()] if li)
        ##Get poster's info: name, rank, join date, post count
        raw_name = re.findall(u'<div class="popupmenu memberaction">(.+?)</a>',
                              p0, re.U|re.S)[0]
        uname = re.sub(u'<(.+?)>', '', raw_name, re.U).strip()
        raw_rank = re.findall(u'<span class="rank">(.+?)</span>', p0, re.U)[0]
        urank = re.sub(u'<(.+?)>', '', raw_rank, re.U).strip()
        ustats = re.findall(u'<dl class="userstats">(.+?)</dl>', p0, re.U|re.S)[0]
        ujoindate = re.findall(u'<dt>Ngày tham gia</dt> <dd>(.+?)</dd>', ustats, re.U)[0]
        ujoindate = [int(i) for i in ujoindate.split('-')]
        ujoindate = datetime(ujoindate[2], ujoindate[1], ujoindate[0], tzinfo=VN_TZ)
        upostcount = re.findall(u'<dt>Bài vi&#7871;t</dt> <dd>(.+?)</dd>', ustats, re.U)[0]
        upostcount = int(re.sub(u',', u'', upostcount))

        uname = unescape_html(uname)
        urank = unescape_html(urank)
        pcontent = unescape_html(pcontent)

        #      int   float       float       str      str    str     float       int
        yield (pid, pcreated, plastedited, pcontent, uname, urank, ujoindate, upostcount)
