def append_in_for_loop(list_2d): flattened = [] for sublist in list_2d: for item in sublist: flattened.append(item) return flattened def extend_in_for_loop(list_2d): flattened = [] for sublist in list_2d: flattened.extend(sublist) return flattened def list_comprehension(list_2d): return [item for sublist in list_2d for item in sublist] def flatten_via_sum(list_2d): return sum(list_2d, []) import itertools def itertools_chain_from_iterable(list_2d): return list(itertools.chain.from_iterable(list_2d)) def itertools_chain(list_2d): return list(itertools.chain(*list_2d)) import functools import operator def functools_reduce_lambda(list_2d): return functools.reduce(lambda x,y: x+y, list_2d) def functools_reduce_operator(list_2d): return functools.reduce(operator.add, map(list, list_2d)) def functools_reduce_list_add(list_2d): return functools.reduce(list.__add__, (list(sub) for sub in list_2d)) funcs = [ append_in_for_loop, list_comprehension, flatten_via_sum, itertools_chain_from_iterable, itertools_chain, functools_reduce_lambda, extend_in_for_loop, functools_reduce_operator, functools_reduce_list_add ] for f in funcs: assert (f([[1],[2,3,4],[5],[6]]) == [1, 2, 3, 4, 5, 6]),\ '%s failed test 1'% f.__name__ assert (f([[1],[2,[3],4],[5],[6]]) == [1, 2, [3], 4, 5, 6]),\ '%s failed test 2'% f.__name__ print('ok') import timeit import random import copy import numpy as np random.seed(12345) def make_copy(l): return copy.deepcopy(l) func_names = [f.__name__ for f in funcs] orders_n = [10**n for n in range(1, 5)] timings = {f:[] for f in func_names} for n in orders_n: l = [[i for i in range(random.randint(1,10))] for j in range(n)] for f in func_names: timings[f].append(min(timeit.Timer('%s(make_copy(l))' %f, 'from __main__ import %s, l, make_copy' %f) .repeat(repeat=3, number=10))) %matplotlib inline import platform import multiprocessing def print_sysinfo(): print('\nPython version :', platform.python_version()) print('compiler :', platform.python_compiler()) print('\nsystem :', platform.system()) print('release :', platform.release()) print('machine :', platform.machine()) print('processor :', platform.processor()) print('CPU count :', multiprocessing.cpu_count()) print('interpreter:', platform.architecture()[0]) print('\n\n') import matplotlib.pyplot as plt def plot(): labels_fast = {'append_in_for_loop':'.append() in for-loop', 'extend_in_for_loop':'.extend() in for-loop', 'list_comprehension': 'List comprehension', 'itertools_chain_from_iterable': 'list(itertools.chain.from_iterable(list_2d))', 'itertools_chain': 'list(itertools.chain(*list_2d))', } labels_slow = {'functools_reduce_lambda': 'functools.reduce(lambda x,y: x+y, list_2d)', 'functools_reduce_operator': 'functools.reduce(operator.add, map(list, list_2d))', 'flatten_via_sum': 'sum(list_2d, [])', 'functools_reduce_list_add': 'functools.reduce(list.__add__, (list(sub) for sub in list_2d))' } plt.rcParams.update({'font.size': 9}) fig = plt.figure(figsize=(10,8)) plt.title('Performance of different methods for flattening a list of sublists', fontsize=18) reds = ['#F87217', '#F75D59', '#FF0000', '#C24641'] for lb,col in zip(sorted(labels_slow.keys()), reds): plt.plot(orders_n, [t**2 for t in timings[lb]], alpha=0.3, label=labels_slow[lb], marker='o', lw=2, c=col, markersize=8) blues = ['#000080', '#2B60DE', '#736AFF', '#659EC7', '#46C7C7'] for lb,col in zip(sorted(labels_fast.keys()), blues): plt.plot(orders_n, [t**2 for t in timings[lb]], alpha=0.3, label=labels_fast[lb], marker='^', lw=2, c=col, markersize=10) max_perf = max( s/c for s,c in zip(timings['flatten_via_sum'], timings['list_comprehension']) ) ftext = 'A list comprehension \n >>[item for sublist '\ 'in list_2d for item in sublist]\n'\ 'is {:.2f}x faster for flattening a 2D list than \n >>sum(list_2d, [])\nat n = 10000'\ .format(max_perf) plt.figtext(.4,.15, ftext, fontsize=13, ha='left') plt.legend(loc='upper left') plt.grid() plt.ylabel('Time per computation for 10 loops (in seconds)', fontsize=12) plt.xlabel('Number of sublists', fontsize=12) plt.xscale('log') plt.yscale('log') plt.show() print_sysinfo() plot() import timeit import random import copy import numpy as np random.seed(12345) def make_copy(l): return copy.deepcopy(l) labels_fast = { 'append_in_for_loop':'.append() in for-loop', 'extend_in_for_loop':'.extend() in for-loop', 'list_comprehension': 'List comprehension', 'itertools_chain_from_iterable': 'list(itertools.chain.from_iterable(list_2d))', 'itertools_chain': 'list(itertools.chain(*list_2d))', } fast_names = [f for f in labels_fast.keys()] fast_times = [] l = [[i for i in range(random.randint(1,10))] for j in range(10**5)] for f in fast_names: fast_times.append(min(timeit.Timer('%s(make_copy(l))' %f, 'from __main__ import %s, l, make_copy' %f) .repeat(repeat=3, number=10))) plt.rcParams.update({'font.size': 12}) fast_labels = [labels_fast[l] for l in fast_names] x_pos = np.arange(len(fast_labels)) plt.figure(figsize=(8,7)) plt.bar(x_pos, fast_times, align='center', alpha=0.5) plt.xticks(x_pos, fast_labels, rotation=90) plt.axhline(y=min(fast_times), linestyle='--', color='black') plt.ylabel('Time per computation for 10 loops in seconds for n=10000') plt.title('List flattening methods with linear growth rates') plt.ylim(8,9) plt.show()