import sys
from optparse import OptionParser
import urllib
import urllib2
import bs4
from collections import defaultdict
import pickle

MAIN_URI='http://s...content-available-to-author-only...o.com/ncaa/football/recruiting/recruit-search'
SEARCH_URI='http://s...content-available-to-author-only...o.com/ncaa/football/recruiting/recruit-search-results'

def handleOptions():
    argParser = OptionParser(description='Compare common recruits from two schools based upon Rivals rankings.')
    argParser.add_option('-l', '--list', action='store_true', dest='list', default=False, help='List available schools')
    argParser.add_option('-1', '--first', action='store', dest='first', default='', help='First school to compare')
    argParser.add_option('-2', '--second', action='store', dest='second', default='', help='Second school to compare')
    argParser.add_option('-y', '--year', action='store', dest='year', default='2014', help='Year to compare')
    argParser.add_option('-s', '--stats', action='store_true', dest='stats', default=False, help='Print advanced class statistics')
    argParser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='Use verbose output')

    argParser.add_option('', '--save', action='store_true', dest='save', default=False, help='Save scraped output to intermediate file')
    argParser.add_option('', '--load', action='store_true', dest='load', default=False, help='Load scraped output from intermediate file')

    return argParser.parse_args()

class Recruit(object):
    def __init__(self, row):
        #self.row = row
        self.pos = str(row.contents[0].string)
        self.name = str(row.contents[1].a.string)
        self.location = str(row.contents[2].string)
        self.height = str(row.contents[3].string)
        self.weight = str(row.contents[4].string)
        self.forty = str(row.contents[5].string)
        stars = row.contents[6]
        if stars.span:
            self.stars = int(stars.span.string.split()[0])
        else:
            self.stars = 0
        try:
            self.rating = float(row.contents[7].string)
        except ValueError:
            self.rating = 0.0
        self.rank = str(row.contents[8].string)
        self.school = str(row.contents[9].div.a.string)

def getHtml(uri, values={}):
    """
        Retrieves HTML returned by the given URI.
    """
    data = urllib.urlencode(values, True)
    req = urllib2.Request(uri, data)
    req.add_header('User-agent', 'Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11')
    
    html = None
    f = urllib2.urlopen(req)
    try:
        html = f.read()
    finally:
        f.close()

    return html

def getSchoolList():
    """
        Returns a list of school names valid to search for.
    """
    html = getHtml(MAIN_URI)

    soup = bs4.BeautifulSoup(html, 'html5lib')

    select = soup.find(id='school-football')
    return [s['value'] for s in select.find_all('option')[1:]]

def getStartValues(soup):
    """
        Returns a list of start value tuples that define a range of recruit numbers suitable for acting as page navigation.
    """
    navDiv = soup.find(id='ysr-search-results-index-top')
    pageList = navDiv.ul
    startValues = set()
    for li in pageList.find_all('li'):
        if 'class' in li.attrs and 'summary' in li['class']:
            continue

        if 'disabled' in li.form.button.attrs:
            continue

        buttonStart = li.form.button['value']
        hiddenStart = None
        for inputTag in li.form.find_all('input'):
            if inputTag['name'] == 'start':
                hiddenStart = inputTag['value']
        startValues.add((buttonStart, hiddenStart))

    return sorted(startValues)

def getRecruits(schoolName, year):
    """
        Returns a map of recruit names and Recruit objects that are all the committed recruits to the given school for the given year.
    """
    values = {'sport': 'football', 'year': year, 'college': schoolName, 'offered': '1', 'hsprospects': '1', 'prepprospects': '1', 'jucoprospects': 1, 'sort_columns': 'rivalsrating'}

    html = getHtml(SEARCH_URI, values)
    soup = bs4.BeautifulSoup(html, 'html5lib')

    startValues = getStartValues(soup)
    soups = [soup]
    for start in startValues:
        values['start'] = start
        html = getHtml(SEARCH_URI, values)
        soup = bs4.BeautifulSoup(html, 'html5lib')
        soups.append(soup)

    recruits = {}
    for soup in soups:
        table = soup.find(id='ysr-search-results')
        body = table.tbody
        rows = body.find_all('tr')
        for row in rows:
            recruit = Recruit(row)
            recruits[recruit.name] = recruit

    return recruits

def printStats(schoolName, recruits, isSummary=False):
    """
        Print a variety statistics about the given recruiting class.
    """
    def median(ratings):
        if len(ratings) == 0:
            return 0.0

        if len(ratings) == 1:
            return ratings[0]*1.0

        if len(ratings) % 2 == 0: 
            mid = len(ratings)/2
            mid = ratings[mid-1:mid+1]
            return sum(mid)/len(mid)
        else:
            return ratings[len(ratings)/2]
        
    offeredRatings = sorted([recruit.rating for recruit in recruits.values() if recruit.rating != 0])
    committedRatings = sorted([recruit.rating for recruit in recruits.values() if recruit.rating != 0 and recruit.school == schoolName])
    offeredStars = [recruit.stars for recruit in recruits.values()]
    committedStars = [recruit.stars for recruit in recruits.values() if recruit.school == schoolName]

    #Offered mean/median
    offeredMeanRating = sum(offeredRatings)/len(offeredRatings)
    offeredMedianRating = median(offeredRatings)

    #Committed mean/median
    committedMeanRating = sum(committedRatings)/len(committedRatings)
    committedMedianRating = median(offeredRatings)

    #Commits/offer
    commits = 0
    for recruit in recruits.itervalues():
        if recruit.school == schoolName:
            commits += 1

    #Offered by star
    offeredStarBuckets = defaultdict(int)
    for star in offeredStars:
        offeredStarBuckets[star] += 1

    #Commits by star
    committedStarBuckets = defaultdict(int)
    for star in committedStars:
        committedStarBuckets[star] += 1

    #Output
    header = '%s recruits (%d):' % (schoolName, len(recruits))
    print '\n'+header
    print '='*len(header)
    sortedRecruits = sorted([(recruit.rating, recruit) for recruit in recruits.itervalues()], reverse=True)
    for (recruitName, recruit) in sortedRecruits:
        print '    %s - %s - %s' % (recruit.name, recruit.rating, recruit.school)

    print '\nAverage Offered Rating: %.2f' % offeredMeanRating
    if not isSummary:
        print 'Average Committed Rating: %.2f' % committedMeanRating

    print '\nMedian Offered Rating: %.2f' % offeredMedianRating
    if not isSummary:
        print 'Median Committed Rating: %.2f' % committedMedianRating

    print '\nCommits: %d' % commits
    if not isSummary:
        print 'Commits per offer: %.2f' % (float(commits)/len(recruits))

    starTableHeader = ['# of Stars:', 'Offered:', 'Committed:']
    colWidths = [len(header) for header in starTableHeader]
    print '\n'+'    '.join(starTableHeader)
    print '    '.join(['='*width for width in colWidths])
    for i in range(5,2,-1):
        print '%*d    %*d    %*d' % (colWidths[0], i, colWidths[1], offeredStarBuckets[i], colWidths[2], committedStarBuckets[i])

def dictInstersection(left, right):
    """
        Returns a dict that is the intersection between the two input dictionaries. Uses the values from the first dictionary argument.
    """
    intersection = {}
    intersectNames = left.viewkeys() & right.viewkeys()
    return {name: left[name] for name in intersectNames}

def error(msg, status=1):
    if msg:
        sys.stderr.write(msg)
    sys.exit(status)

def main():
    (options, args) = handleOptions()

    if options.verbose:
        print 'Retrieving list of schools...'
    schoolList = getSchoolList()
    if options.list:
        print 'Schools:'
        print '========'
        for school in schoolList:
            print school
        return
    lowerSchools = [s.lower() for s in schoolList]

    if options.first.lower() not in lowerSchools:
        error("First school '%s' could not be found on Rivals list." % options.first)
        
    compare = len(options.second.strip()) > 0
    if compare and options.second.lower() not in lowerSchools:
        error("Second school '%s' could not be found on Rivals list." % options.second)
        
    secondRecruits = {}
    if not options.load:
        if options.verbose:
            print 'Retrieving %s recruits...' % options.first
        firstRecruits = getRecruits(options.first, options.year)
        if compare:
            if options.verbose:
                print 'Retrieving %s recruits...' % options.second
            secondRecruits = getRecruits(options.second, options.year)

    pickleFile = '%s_%s_%s.pkl' % (options.year, options.first.lower().replace(' ', '-'), options.second.lower().replace(' ', '-'))
    if options.save:
        tmp = (options.year, options.first, firstRecruits, options.second, secondRecruits)
        with open(pickleFile, 'wb') as f:
            pickle.dump(tmp, f)

    if options.load:
        with open(pickleFile, 'rb') as f:
            (options.year, options.first, firstRecruits, options.second, secondRecruits) = pickle.load(f)
    
    if options.stats:
        printStats(options.first, firstRecruits)
        if compare:
            printStats(options.second, secondRecruits)

    print '\n-------------------------------------'
    if compare:
        jointRecruits = dictInstersection(firstRecruits, secondRecruits)
        if options.stats:
            print '\n# of common recruits: %d' % len(jointRecruits)
        commonFirstRecruits = {name: recruit for (name, recruit) in jointRecruits.items() if recruit.school == options.first}
        commonSecondRecruits = {name: recruit for (name, recruit) in jointRecruits.items() if recruit.school == options.second}

        printStats(options.first, commonFirstRecruits, True)
        printStats(options.second, commonSecondRecruits, True)
    else:
        committedRecruits = {name: recruit for (name, recruit) in firstRecruits.items() if recruit.school == options.first}
        printStats(options.first, committedRecruits, True)

if __name__ == '__main__':
    main()
