fork download
  1. import random
  2. import time
  3. import multiprocessing as mp
  4.  
  5. num_partitions = 10
  6. num_workers = 4
  7.  
  8.  
  9. def init_arr():
  10. return [random.random()*10 for _ in range(10_000_000)]
  11.  
  12.  
  13. def my_func(arr):
  14. return list(map(lambda el: el**2, arr))
  15.  
  16.  
  17. def parallelize_dataframe(data, func):
  18. partion_size = len(data) // num_partitions
  19. splitted = [data[i * partion_size:(i + 1) * partion_size] for i in range(num_partitions)]
  20. pool = mp.Pool(num_workers)
  21. data = pool.map(func, splitted)
  22. pool.join()
  23. pool.close()
  24. return data
  25.  
  26.  
  27. def test1(arr):
  28. my_func(arr)
  29.  
  30.  
  31. def test2(arr):
  32. parallelize_dataframe(arr, my_func)
  33.  
  34.  
  35. class MyTimer:
  36.  
  37. def __init__(self, name):
  38. self.name = name
  39. self.started = None
  40.  
  41. def __enter__(self):
  42. self.started = time.time()
  43. return self
  44.  
  45. def __exit__(self, exc_type, exc_val, exc_tb):
  46. print(f'{time.time()-self.started} secs elapsed for {self.name}')
  47.  
  48.  
  49. if __name__ == '__main__':
  50. arr = init_arr()
  51.  
  52. with MyTimer('Sync') as sync_t:
  53. test1(arr)
  54.  
  55. with MyTimer('Parallel') as par_t:
  56. test1(arr)
  57.  
Success #stdin #stdout 3.92s 647064KB
stdin
Standard input is empty
stdout
1.2571208477020264 secs elapsed for Sync
1.2686481475830078 secs elapsed for Parallel