# The statistics module has been added to # the standard library in Python 3.4 import statistics as stats import numpy as np def calc_mean(samples): return sum(samples)/float(len(samples)) def np_mean(samples): return np.mean(samples) def np_mean_ary(np_array): return np.mean(np_array) def st_mean(samples): return stats.mean(samples) def np_convert_and_mean_ary(samples): return np.mean(np.array(samples)) n = 1000 samples = list(range(n)) samples_array = np.arange(n) assert(st_mean(samples) == np_mean(samples) == calc_mean(samples) == np_mean_ary(samples_array) == np_convert_and_mean_ary(samples)) print('ok') import timeit funcs = ['st_mean', 'np_mean', 'calc_mean', 'np_mean_ary', 'np_convert_and_mean_ary'] orders_n = [10**n for n in range(1, 6)] times_n = {f:[] for f in funcs} for n in orders_n: samples = list(range(n)) for f in funcs: if f == 'np_mean_ary': samples = np.arange(n) times_n[f].append(min(timeit.Timer('%s(samples)' %f, 'from __main__ import %s, samples' %f) .repeat(repeat=3, number=1000))) import platform import multiprocessing def print_sysinfo(): print('\nPython version:', platform.python_version()) print('NumPy version', np.__version__) print('compiler:', platform.python_compiler()) print('\nsystem :', platform.system()) print('release :', platform.release()) print('machine :', platform.machine()) print('processor :', platform.processor()) print('interpreter:', platform.architecture()[0]) print('CPU count :', multiprocessing.cpu_count()) print('\n\n') %matplotlib inline import matplotlib.pyplot as plt def plot_timing(): labels = [('st_mean', 'statistics.mean()'), ('np_mean', 'numpy.mean() on list'), ('np_mean_ary', 'numpy.mean() on array'), ('calc_mean', 'sum(samples)/len(samples)'), ('np_convert_and_mean_ary', 'convert to array then numpy.mean()') ] plt.rcParams.update({'font.size': 12}) fig = plt.figure(figsize=(10,8)) for lb in labels: plt.plot(orders_n, times_n[lb[0]], alpha=0.5, label=lb[1], marker='o', lw=3) plt.xlabel('sample size n') plt.ylabel('time per computation in milliseconds [ms]') plt.legend(loc=2) plt.grid() plt.xscale('log') plt.yscale('log') plt.title('Performance of different approaches for calculating sample means') max_perf = max( s/c for s,c in zip(times_n['st_mean'], times_n['np_convert_and_mean_ary']) ) min_perf = min( s/c for s,c in zip(times_n['st_mean'], times_n['np_convert_and_mean_ary']) ) ftext = 'Converting a list to a numpy array and then using numpy.mean() \n is {:.2f}x to '\ '{:.2f}x faster than statistics.mean() on lists'\ .format(min_perf, max_perf) plt.figtext(.14,.15, ftext, fontsize=11, ha='left') plt.show() print_sysinfo() plot_timing()