import time
import sys
import re
import requests
from tqdm import trange
from multiprocessing import Queue, Manager, Process
re_wp = re.compile(r'\/wp-(admin|content|includes)\/', flags=re.IGNORECASE|re.UNICODE)
def fetch(url):
try:
headers = {'host': url}
response = requests.get('http://' + url, timeout=(3.05, 10.0), headers=headers)
result = 1 if re_wp.search(response.text) else 0
except Exception as e:
result = 'e'
return '{}|{}\n'.format(url, result)
def worker(urls, results):
while True:
try:
url = urls.get(timeout=15)
except Exception:
break
results.put(fetch(url))
def data_loader(urls, queue):
for url in urls:
if queue.qsize() > 5000:
time.sleep(10)
queue.put(url)
def main(in_file, out_file, workers=200):
with open(in_file, 'r') as f:
urls = [url for url in map(str.strip, f) if url]
with Manager() as man:
queue = man.Queue()
results = man.Queue()
Process(target=data_loader, args=(urls, queue)).start()
for _ in range(workers):
Process(target=worker, args=(queue, results), daemon=True).start()
with open(out_file, 'w') as out:
for i in trange(len(urls), smoothing=0.01, file=sys.stdout):
try:
result = results.get(timeout=5 * 60)
except:
break
out.write(result)
if __name__ == '__main__':
main('xal.txt', 'wp_xal.txt')
aW1wb3J0IHRpbWUKaW1wb3J0IHN5cwppbXBvcnQgcmUKaW1wb3J0IHJlcXVlc3RzCmZyb20gdHFkbSBpbXBvcnQgdHJhbmdlCmZyb20gbXVsdGlwcm9jZXNzaW5nIGltcG9ydCBRdWV1ZSwgTWFuYWdlciwgUHJvY2VzcwoKCnJlX3dwID0gcmUuY29tcGlsZShyJ1wvd3AtKGFkbWlufGNvbnRlbnR8aW5jbHVkZXMpXC8nLCBmbGFncz1yZS5JR05PUkVDQVNFfHJlLlVOSUNPREUpCgoKZGVmIGZldGNoKHVybCk6CiAgICB0cnk6CiAgICAgICAgaGVhZGVycyA9IHsnaG9zdCc6IHVybH0KICAgICAgICByZXNwb25zZSA9IHJlcXVlc3RzLmdldCgnaHR0cDovLycgKyB1cmwsIHRpbWVvdXQ9KDMuMDUsIDEwLjApLCBoZWFkZXJzPWhlYWRlcnMpCiAgICAgICAgcmVzdWx0ID0gMSBpZiByZV93cC5zZWFyY2gocmVzcG9uc2UudGV4dCkgZWxzZSAwCgogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgIHJlc3VsdCA9ICdlJwoKICAgIHJldHVybiAne318e31cbicuZm9ybWF0KHVybCwgcmVzdWx0KQoKCmRlZiB3b3JrZXIodXJscywgcmVzdWx0cyk6CiAgICB3aGlsZSBUcnVlOgogICAgICAgIHRyeToKICAgICAgICAgICAgdXJsID0gdXJscy5nZXQodGltZW91dD0xNSkKICAgICAgICBleGNlcHQgRXhjZXB0aW9uOgogICAgICAgICAgICBicmVhawoKICAgICAgICByZXN1bHRzLnB1dChmZXRjaCh1cmwpKQoKCmRlZiBkYXRhX2xvYWRlcih1cmxzLCBxdWV1ZSk6CiAgICBmb3IgdXJsIGluIHVybHM6CiAgICAgICAgaWYgcXVldWUucXNpemUoKSA+IDUwMDA6CiAgICAgICAgICAgIHRpbWUuc2xlZXAoMTApCgogICAgICAgIHF1ZXVlLnB1dCh1cmwpCgoKZGVmIG1haW4oaW5fZmlsZSwgb3V0X2ZpbGUsIHdvcmtlcnM9MjAwKToKICAgIHdpdGggb3Blbihpbl9maWxlLCAncicpIGFzIGY6CiAgICAgICAgdXJscyA9IFt1cmwgZm9yIHVybCBpbiBtYXAoc3RyLnN0cmlwLCBmKSBpZiB1cmxdCgogICAgd2l0aCBNYW5hZ2VyKCkgYXMgbWFuOgogICAgICAgIHF1ZXVlID0gbWFuLlF1ZXVlKCkKICAgICAgICByZXN1bHRzID0gbWFuLlF1ZXVlKCkKCiAgICAgICAgUHJvY2Vzcyh0YXJnZXQ9ZGF0YV9sb2FkZXIsIGFyZ3M9KHVybHMsIHF1ZXVlKSkuc3RhcnQoKQoKICAgICAgICBmb3IgXyBpbiByYW5nZSh3b3JrZXJzKToKICAgICAgICAgICAgUHJvY2Vzcyh0YXJnZXQ9d29ya2VyLCBhcmdzPShxdWV1ZSwgcmVzdWx0cyksIGRhZW1vbj1UcnVlKS5zdGFydCgpCgogICAgICAgIHdpdGggb3BlbihvdXRfZmlsZSwgJ3cnKSBhcyBvdXQ6CiAgICAgICAgICAgIGZvciBpIGluIHRyYW5nZShsZW4odXJscyksIHNtb290aGluZz0wLjAxLCBmaWxlPXN5cy5zdGRvdXQpOgogICAgICAgICAgICAgICAgdHJ5OgogICAgICAgICAgICAgICAgICAgIHJlc3VsdCA9IHJlc3VsdHMuZ2V0KHRpbWVvdXQ9NSAqIDYwKQogICAgICAgICAgICAgICAgZXhjZXB0OgogICAgICAgICAgICAgICAgICAgIGJyZWFrCgogICAgICAgICAgICAgICAgb3V0LndyaXRlKHJlc3VsdCkKCgppZiBfX25hbWVfXyA9PSAnX19tYWluX18nOgogICAgbWFpbigneGFsLnR4dCcsICd3cF94YWwudHh0JykKCg==