import random
import time
import multiprocessing as mp
 
num_partitions = 20
num_workers = 4
 
def init_arr():
    return [random.random()*10 for _ in range(1_000_000)]


def calc(num):
    for i in range(100):
        num *= 0.0001
    return num
 
def my_func(arr):
    return list(map(calc, arr))
 
 
def parallelize_dataframe(data, func):
    partion_size = len(data) // num_partitions
    splitted = [data[i * partion_size:(i + 1) * partion_size] for i in range(num_partitions)]
    with mp.Pool(num_workers) as pool:
        data = pool.map(func, splitted)
    return data
 
 
def test1(arr):
    my_func(arr)
 
 
def test2(arr):
    parallelize_dataframe(arr, my_func)
 
 
class MyTimer:
 
    def __init__(self, name):
        self.name = name
        self.started = None
 
    def __enter__(self):
        self.started = time.time()
        return self
 
    def __exit__(self, exc_type, exc_val, exc_tb):
        print(f'{time.time()-self.started} secs elapsed for {self.name}')
 
 
if __name__ == '__main__':
    arr = init_arr()
 
    with MyTimer('Sync') as sync_t:
        test1(arr)
 
    with MyTimer('Parallel') as par_t:
        test2(arr)

