# -*- coding: utf-8 -*-

'''Asynchronous checker get part url's from input file, where one url in string,
get responses from server through http proxy and sort it in output files.
'''
import eventlet
from erequests import AsyncRequest, AsyncRequestFactory

urls = []
max_number_items_in_urls = 100 # max url in one cycle
requests_in_time = 50          # specifies the number of requests 
                               # to make at a time

timeout_for_req = 10.001       # exception if no bytes have been received
                               # on the underlying socket for timeout seconds
url_for_proxy_tests = 'http://content-available-to-author-only.ru'
proxies_file_name = 'proxy.txt'# 1 proxy in string http://109.202.19.26:8080
input_file_name = 'input.txt'
output_file_names = ['1xx.txt', '2xx.txt', '3xx.txt',
                    '4xx.txt', '5xx.txt', '6xx.txt' 'error.txt']
# Delete old if exist and create new output files
output_files = dict(
    [(file_name, open(file_name, 'w')) for file_name in output_file_names])
# Get http proxies from file
proxy_list = [proxy.strip() for proxy in open(proxies_file_name)]


class ProxyError(Exception):
    pass


def simple_proxy_test(proxy, url=url_for_proxy_tests, timeout=timeout_for_req):
    '''Get url through proxy and return True if passed,
or False if has exceptont on timeout, socket error etc.
'''
    try:
        AsyncRequest.send(
            AsyncRequestFactory.head(
                url,
                proxies={'http': proxy, 'https': ''},
                timeout=timeout_for_req))
    except Exception:
        test_passed = False
    else:
        test_passed = True
    return test_passed

def get_proxy():
    '''The function get proxy form queue,
check and if they do not work - excludes from the queue else returns
and transferring it to the end queue.'''
    while proxy_list:
        proxy = proxy_list[0]
        if simple_proxy_test(proxy=proxy):
            proxy_list.append(proxy_list[0])
            del proxy_list[0]
            return proxy
        elif not simple_proxy_test(proxy=proxy, timeout=30.0):
            # Re-test proxy with big timeout and delete if test not passed
            del proxy_list[0]
    else:
        raise ProxyError, 'Run out proxy'

def imap(requests, size=10):
    """Concurrently converts a sequence of AsyncRequest objects to Responses.
This function from erequests where I added url in exception object.
"""

    pool = eventlet.GreenPool(size)

    def send(r):
        try:
            return r.send()
        except Exception as e:
            # Add url in exception object because in message I only see proxy
            e.url = r.url
            return e

    for r in pool.imap(send, requests):
        yield r

def create_request(urls, proxy={'http': ''}):
    # Prepare request and written response in file
    for req in imap(
        (AsyncRequestFactory.head(url, timeout=timeout_for_req, proxies=proxy)
        for url in urls), requests_in_time):
            try:
                req.status_code
            except AttributeError:
                # Write in error.txt url and exception
                print '%s | %s' % (req.url, req)
                output_files['error.txt'].write('%s | %s\n' % (req.url, req))
            else:
                # Write status and url in output files)
                print '%s | %s' % (req.status_code, req.url)
                file_name = str(req.status_code)[0] + 'xx.txt'
                output_files[
                    file_name].write('%s | %s\n' % (req.status_code, req.url))

with open(input_file_name) as input_file:
    for url in input_file:
        # Get part urls from file
        if len(urls) < max_number_items_in_urls:
            url = url.strip()
            urls.append(url)
        else:
            # If run out proxy - read proxy.txt again
            try:
                proxy = { 'http': get_proxy(), 'https': '' }
            except ProxyError:
                print 'Run out proxy'
                proxy_list = [
                    proxy.strip() for proxy in open(proxies_file_name)]
                proxy = { 'http': get_proxy(), 'https': '' }
            # Request and write result
            create_request(urls, proxy)
            urls = []
    else:
        # For remainder urls
        create_request(urls)