# -*- coding: utf-8 -*-

'''Async checker who take url's from input file, where one url in string,
create async queue where one url relates to one proxies - one item in queue,
get responses from server through http proxy and sort in output files.
'''
import logging
from eventlet import GreenPool
from erequests import AsyncRequestFactory

urls_and_proxies = []
requests_at_time = 1000         # specifies the number of requests 
                                # to make at a time (open connections)
timeout_for_req = 10.001        # exception if no bytes have been received
                                # on the underlying socket for timeout seconds
timeout_for_proxy_test = 30.0   # 30 sec
url_for_proxy_tests = 'http://content-available-to-author-only.ru'
proxies_file_name = 'proxy.txt' # one proxy in string http://109.202.19.26:8080
bad_proxies_file_name = 'bad_proxy.txt'
input_file_name = 'input.txt'
output_file_names = ['1xx.txt', '2xx.txt', '3xx.txt',
                    '4xx.txt', '5xx.txt', 'error.txt']
logging.basicConfig(
    level=logging.WARNING, format='%(asctime)s - %(levelname)s - %(message)s')

class ProxyError(Exception):
    pass


def test_proxies_from_file(proxies_file_name):
    '''The function create and return list proxy form file,
check and if they do not work - excludes from the list and from file
'''
    # Get http proxies from file
    logging.warning('test proxies')
    proxy_from_response = []
    proxy_checked = []
    proxy_from_file = [proxy.strip() for proxy in open(proxies_file_name)]
    requests = async_reqest([(url_for_proxy_tests, { 'http': proxy })
        for proxy in proxy_from_file], timeout_for_req=timeout_for_proxy_test)
    # Get only good proxy
    for response in requests:
        try:
            response.status_code
        except AttributeError:
            pass
        else:
            proxy_from_response.append(
                response.connection.proxy_manager.keys()[0])
    for from_response in proxy_from_response:
        for from_file in proxy_from_file:
            if from_response in from_file:
                proxy_checked.append(from_file)
                break
    # Write bad proxy in file or rise exception no live proxies in file
    if not proxy_checked:
        raise ProxyError, 'Run out proxy'
    elif len(proxy_checked) != len(proxy_from_file):
        with open(bad_proxies_file_name, 'w') as proxies_file:
            for proxy in [bad_proxy for bad_proxy in proxy_from_file 
                            if bad_proxy not in proxy_checked]:
                proxies_file.write(proxy+'\n')
    logging.warning('end test proxies')
    return proxy_checked

def imap(requests, size=10):
    '''Concurrently converts a sequence of AsyncRequest objects to Responses.
This function from erequests where I added url in exception object.
'''

    pool = GreenPool(size)

    def send(r):
        try:
            return r.send()
        except Exception as e:
            # Add url in exception object because in message I only see proxy
            e.url = r.url
            return e

    for r in pool.imap(send, requests):
        yield r

def sort_responses_to_files(requests):
    '''Get first number from HTTP status code and write response
in file where first letter in name is this number, 
or if response is exception object - write in error.txt
'''
    logging.warning('get and sort responses')
    for response in requests:
        try:
            response.status_code
        except AttributeError:
            # Write in error.txt url and exception
            logging.warning('%s | %s' % (response.url, response))
            output_files['error.txt'
                ].write('%s | %s\n' % (response.url, response))
        else:
            # Write status and url in output files)
            logging.warning('%s | %s' % (response.status_code, response.url))
            file_name = str(response.status_code)[0] + 'xx.txt'
            output_files[file_name
                ].write('%s | %s\n' % (response.status_code, response.url))

def async_reqest(urls_and_proxies,
                timeout_for_req=timeout_for_req,
                number_of_reqests=requests_at_time):
    '''Resive list where item is (url, proxy={ 'http': 'ip:port' })
and returns acync queue of reqests.
'''
    logging.warning('create async requests')
    requests = imap(
                (AsyncRequestFactory.head(
                    url, timeout=timeout_for_req, proxies=proxy)
                        for url, proxy in urls_and_proxies), number_of_reqests)
    return requests

if __name__ == '__main__':
    # Delete old if exist and create new output files
    output_files = dict(
        [(file_name, open(file_name, 'w')) for file_name in output_file_names])
    # Get and test all proxies from file
    proxies_list = test_proxies_from_file(proxies_file_name)
    with open(input_file_name) as input_file:
        logging.warning('create list urls for async reqest')
        for url in input_file:
            # Add urls in queue from file, while exist one proxy for each url
            if proxies_list:
                proxy = { 'http': proxies_list[0]}
                url = url.strip()
                urls_and_proxies.append((url, proxy))
                del proxies_list[0]
            else:
                # If run out proxy - request and write result
                logging.warning('run queue')
                requests = async_reqest(urls_and_proxies)
                sort_responses_to_files(requests)
                urls_and_proxies = []
                # Get again all proxies from file and test
                proxies_list = test_proxies_from_file(proxies_file_name)
                logging.warning('create list urls for async reqest')
        else:
            # For remainder urls
            logging.warning('run remainder queue')
            requests = async_reqest(urls_and_proxies)
            sort_responses_to_files(requests)