import pandas as pd
import numpy as np
n = 10e6
m = 1e6
%time d = pd.DataFrame({"x": np.random.randint(0,m,n), "y": np.random.random(n)})
CPU times: user 336 ms, sys: 144 ms, total: 480 ms Wall time: 482 ms
%time dd = d[(d.x>=10) & (d.x<20)]
CPU times: user 96 ms, sys: 20 ms, total: 116 ms Wall time: 115 ms
%time dd = d.sort("x")
CPU times: user 3.49 s, sys: 264 ms, total: 3.76 s Wall time: 3.77 s
%time dd = d.copy()
CPU times: user 24 ms, sys: 72 ms, total: 96 ms Wall time: 96.9 ms
%time dd["y2"] = 2*dd["y"]
CPU times: user 60 ms, sys: 88 ms, total: 148 ms Wall time: 64.9 ms
%time dd = d.groupby("x")["y"].mean()
CPU times: user 1.3 s, sys: 156 ms, total: 1.46 s Wall time: 1.46 s
type(dd)
pandas.core.series.Series
%time dd = d.groupby("x", as_index = False)["y"].mean()
CPU times: user 1.39 s, sys: 140 ms, total: 1.53 s Wall time: 1.53 s
type(dd)
pandas.core.frame.DataFrame
%time dk = d.sort_index(by = "x")
CPU times: user 3.48 s, sys: 240 ms, total: 3.72 s Wall time: 3.72 s
%time dd = dk.groupby("x")["y"].mean()
CPU times: user 284 ms, sys: 120 ms, total: 404 ms Wall time: 402 ms
%time dd = dk.groupby("x", as_index = False)["y"].mean()
CPU times: user 352 ms, sys: 132 ms, total: 484 ms Wall time: 485 ms
%time dm = pd.DataFrame({"x": np.random.permutation(np.arange(m))})
CPU times: user 176 ms, sys: 4 ms, total: 180 ms Wall time: 179 ms
%time dd = pd.merge(d, dm)
CPU times: user 5.38 s, sys: 504 ms, total: 5.88 s Wall time: 5.89 s
%time dmk = dm.sort_index(by = "x")
CPU times: user 212 ms, sys: 4 ms, total: 216 ms Wall time: 217 ms
%time dd = pd.merge(dk, dmk)
CPU times: user 1.78 s, sys: 380 ms, total: 2.16 s Wall time: 2.16 s