fork download
  1. import random
  2. import time
  3. import multiprocessing as mp
  4.  
  5. num_partitions = 20
  6. num_workers = 4
  7.  
  8. def init_arr():
  9. return [random.random()*10 for _ in range(1_000_000)]
  10.  
  11.  
  12. def calc(num):
  13. for i in range(100):
  14. num *= 0.0001
  15. return num
  16.  
  17. def my_func(arr):
  18. return list(map(calc, arr))
  19.  
  20.  
  21. def parallelize_dataframe(data, func):
  22. partion_size = len(data) // num_partitions
  23. splitted = [data[i * partion_size:(i + 1) * partion_size] for i in range(num_partitions)]
  24. with mp.Pool(num_workers) as pool:
  25. data = pool.map(func, splitted)
  26. return data
  27.  
  28.  
  29. def test1(arr):
  30. my_func(arr)
  31.  
  32.  
  33. def test2(arr):
  34. parallelize_dataframe(arr, my_func)
  35.  
  36.  
  37. class MyTimer:
  38.  
  39. def __init__(self, name):
  40. self.name = name
  41. self.started = None
  42.  
  43. def __enter__(self):
  44. self.started = time.time()
  45. return self
  46.  
  47. def __exit__(self, exc_type, exc_val, exc_tb):
  48. print(f'{time.time()-self.started} secs elapsed for {self.name}')
  49.  
  50.  
  51. if __name__ == '__main__':
  52. arr = init_arr()
  53.  
  54. with MyTimer('Sync') as sync_t:
  55. test1(arr)
  56.  
  57. with MyTimer('Parallel') as par_t:
  58. test2(arr)
  59.  
  60.  
Time limit exceeded #stdin #stdout 5s 89712KB
stdin
Standard input is empty
stdout
3.147193431854248 secs elapsed for Sync
3.5718376636505127 secs elapsed for Parallel