#!/usr/bin/env python # coding: utf-8 # This notebook shows a simple example of profiling alternative methods of concatenating two pandas DataFrames. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np import cProfile from pstatsviewer import StatsViewer from qgrid import nbinstall nbinstall() # In[2]: # Construct two 5000 x 8 frames with random floats. df1 = pd.DataFrame( np.random.randn(5000, 8), columns=[chr(ord('A') + i) for i in range(8)], index=range(5000), ) df2 = pd.DataFrame( np.random.randn(5000, 8), columns=[chr(ord('A') + i) for i in range(8)], index=range(5000, 10000), ) df1.head(5) # In[3]: from qgrid import show_grid # ## Generating stats files with cProfile: # In[4]: def concat_naive(): for i in range(500): pd.concat([df1, df2]) cProfile.run( 'concat_naive()', 'naive.stats', ) # ## Table/Grid View # # Provides interactive support for: # - Scrolling # - Filtering # - Sorting # - Resizing Columns # In[5]: slow = StatsViewer("naive.stats") slow.table() # ## Chart View # # Supports interactive generation of charts parameterized by no. of functions and sort order. # In[6]: slow.chart() # ## Comparing Alternative Implementations # In[7]: def concat_fast(): """ Concatenate using numpy primitives instead of pd.concat. """ for i in range(500): pd.DataFrame( np.vstack([df1.values, df2.values]), columns=df1.columns, index=np.hstack([ df1.index.values, df2.index.values, ]) ) cProfile.run( 'concat_fast()', 'fast.stats', ) fast = StatsViewer("fast.stats") # ## Comparison View # # Both `chart` and `grid` support comparison versions. # In[9]: slow.compare_table(fast, lsuffix="_slow", rsuffix="_fast") # In[11]: slow.compare_chart(fast, 'tottime', 25) # ##How do it do it? # # - cProfile output is loaded into a pandas `DataFrame` # - Chart View is implemented with IPython widget delegating to http://github.com/mwaskom/seaborn. # - Table View is implemented with http://github.com/quantopian/qgrid: # - Built on top of IPython widgets and http://github.com/mleibman/SlickGrid. # In[ ]: