fork download
  1. import time
  2. import sys
  3. import re
  4. import requests
  5. from tqdm import trange
  6. from multiprocessing import Queue, Manager, Process
  7.  
  8.  
  9. re_wp = re.compile(r'\/wp-(admin|content|includes)\/', flags=re.IGNORECASE|re.UNICODE)
  10.  
  11.  
  12. def fetch(url):
  13. try:
  14. headers = {'host': url}
  15. response = requests.get('http://' + url, timeout=(3.05, 10.0), headers=headers)
  16. result = 1 if re_wp.search(response.text) else 0
  17.  
  18. except Exception as e:
  19. result = 'e'
  20.  
  21. return '{}|{}\n'.format(url, result)
  22.  
  23.  
  24. def worker(urls, results):
  25. while True:
  26. try:
  27. url = urls.get(timeout=15)
  28. except Exception:
  29. break
  30.  
  31. results.put(fetch(url))
  32.  
  33.  
  34. def data_loader(urls, queue):
  35. for url in urls:
  36. if queue.qsize() > 5000:
  37. time.sleep(10)
  38.  
  39. queue.put(url)
  40.  
  41.  
  42. def main(in_file, out_file, workers=200):
  43. with open(in_file, 'r') as f:
  44. urls = [url for url in map(str.strip, f) if url]
  45.  
  46. with Manager() as man:
  47. queue = man.Queue()
  48. results = man.Queue()
  49.  
  50. Process(target=data_loader, args=(urls, queue)).start()
  51.  
  52. for _ in range(workers):
  53. Process(target=worker, args=(queue, results), daemon=True).start()
  54.  
  55. with open(out_file, 'w') as out:
  56. for i in trange(len(urls), smoothing=0.01, file=sys.stdout):
  57. try:
  58. result = results.get(timeout=5 * 60)
  59. except:
  60. break
  61.  
  62. out.write(result)
  63.  
  64.  
  65. if __name__ == '__main__':
  66. main('xal.txt', 'wp_xal.txt')
  67.  
  68.  
Runtime error #stdin #stdout #stderr 0.09s 17800KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
/bin/sh: 1: cannot create /dev/null: Permission denied
/bin/sh: 1: cannot create /dev/null: Permission denied
/bin/sh: 1: cannot create /dev/null: Permission denied
/bin/sh: 1: cannot create /dev/null: Permission denied
Traceback (most recent call last):
  File "./prog.py", line 5, in <module>
ImportError: No module named 'tqdm'