import pandas as pd
import numpy as np
import ipyparallel as ipp
nrows = 20_000_000
target_df = pd.DataFrame(np.random.random((nrows, 2)), columns=["a", "b"])
def f(df):
return df.sum(axis=0)
%time f(target_df)
CPU times: user 297 ms, sys: 10.6 ms, total: 308 ms Wall time: 311 ms
a 1.000086e+07 b 9.999178e+06 dtype: float64
PEN = 4
dfs = np.array_split(target_df, PEN)
c = ipp.Cluster(n=PEN)
with c as rc:
e_all = rc[:]
%time results = e_all.map_sync(f, dfs)
results
/Users/minrk/conda/lib/python3.10/site-packages/numpy/core/fromnumeric.py:57: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead. return bound(*args, **kwds)
Starting 4 engines with <class 'ipyparallel.cluster.launcher.LocalEngineSetLauncher'>
0%| | 0/4 [00:00<?, ?engine/s]
CPU times: user 45.4 ms, sys: 160 ms, total: 205 ms Wall time: 922 ms Stopping engine(s): 1707385317 engine set stopped 1707385317: {'engines': {'0': {'exit_code': 0, 'pid': 89728, 'identifier': '0'}, '1': {'exit_code': 0, 'pid': 89729, 'identifier': '1'}, '2': {'exit_code': 0, 'pid': 89730, 'identifier': '2'}, '3': {'exit_code': 0, 'pid': 89731, 'identifier': '3'}}, 'exit_code': 0} Stopping controller Controller stopped: {'exit_code': 0, 'pid': 89716, 'identifier': 'ipcontroller-1707385316-tu9a-89704'}
[a 2.500426e+06 b 2.500046e+06 dtype: float64, a 2.500864e+06 b 2.500290e+06 dtype: float64, a 2.500053e+06 b 2.499262e+06 dtype: float64, a 2.499518e+06 b 2.499580e+06 dtype: float64]