#!/usr/bin/env python # coding: utf-8 # # Statistical tools # In[1]: import addutils.toc ; addutils.toc.js(ipy_notebook=True) # With this tutorial we are going to see some of the statistical and computational tools offered by `pandas`. # In[2]: import datetime import scipy.io import numpy as np import pandas as pd import bokeh.plotting as bk from IPython.display import display, HTML from addutils import css_notebook, side_by_side2 css_notebook() # ## 1 Percent change # Given a `pandas.Series` the method `pct_change` returns a new `pandas.Series` object containing percent change over a given number of periods. # In[3]: s1 = pd.Series(range(10, 18) + np.random.randn(8) / 10) pct_ch_1d = s1.pct_change() * 100 pct_ch_3d = s1.pct_change(periods=3) * 100 HTML(side_by_side2(s1, pct_ch_1d, pct_ch_3d)) # ## 2 Covariance # Given two `pandas.Series` the method `cov` computes covariance between them, excluding missing values. # In[4]: s1 = pd.util.testing.makeTimeSeries(7) s2 = s1 + np.random.randn(len(s1)) / 10 HTML(side_by_side2(s1, s2)) # In[5]: s1.cov(s2) # It is also possibile to compute pairwise covariance of a `pandas.DataFrame` columns using `pandas.DataFrame.cov` method. Here we use the module `pandas.util.testing` in order to generate random data easily: # In[6]: d1 = pd.util.testing.makeTimeDataFrame() print (d1.head()) print (d1.cov()) # ## 3 Correlation # `pandas.Series.corr` allows to compute correlation between two `pandas.Series`. By the `method` paramether it's possible to choose between: # # * Pearson # * Kendall # * Spearman # In[7]: s1.corr(s2, method='pearson') # Like we just seen for covariance, it is possibile to call `pandas.DataFrame.corr` to obtain pairwise correlation of columns over a `pandas.DataFrame` # In[8]: d1.corr() # ## 4 Rolling moments and Binary rolling moments # `pandas` provides also a lot of methods for calculating rolling moments. # In[9]: [n for n in dir(pd) if n.startswith('rolling')] # Let's see some examples: # In[10]: s3 = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) s3 = s3.cumsum() s3_max = s3.rolling(60).max() s3_mean = s3.rolling(60).mean() s3_min = s3.rolling(60).min() data = {'cumsum':s3, 'max':s3_max, 'mean':s3_mean, 'min':s3_min} df = pd.DataFrame(data) df.tail() # In[11]: bk.output_notebook() # In[12]: fig = bk.figure(x_axis_type = "datetime", tools="pan,box_zoom,reset", title = 'Rolling Moments', plot_width=750, plot_height=400) fig.line(df.index, df['cumsum'], color='cadetblue', legend='Cumulative Sum') fig.line(df.index, df['max'], color='mediumorchid', legend='Max') fig.line(df.index, df['min'], color='mediumpurple', legend='Min') fig.line(df.index, df['mean'], color='navy', legend='Mean') bk.show(fig) # `pandas.Series.cumsum` returns a new `pandas.Series` containing the cumulative sum of the given values. # In[13]: s4 = s3 + np.random.randn(len(s3)) rollc = s3.rolling(window=10).corr(s3) data2 = {'cumsum':s3, 'similar':s4, 'rolling correlation':rollc} df2 = pd.DataFrame(data2) # In[14]: fig = bk.figure(x_axis_type = "datetime", title = 'Rolling Correlation', plot_width=750, plot_height=400) fig.line(df2.index, df2['cumsum'], color='cadetblue', legend='Cumulative Sum') fig.line(df2.index, df2['similar'], color='mediumorchid', legend='Similar') fig.line(df2.index, df2['rolling correlation'], color='navy', legend='Rolling Corr.') fig.legend.location = "bottom_right" bk.show(fig) # ## 5 A pratical example: Return indexes and cumulative returns # In[15]: AAPL = pd.read_csv('example_data/p03_AAPL.txt', index_col='Date', parse_dates=True) price = AAPL['Adj Close'] display(price.tail()) # `pandas.Series.tail` returns the last n rows of a given `pandas.Series`. # In[16]: price['2011-10-03'] / price['2011-3-01'] - 1 returns = price.pct_change() ret_index = (1 + returns).cumprod() ret_index[0] = 1 monthly_returns = ret_index.resample('BM').last().pct_change() # In[17]: fig = bk.figure(x_axis_type = 'datetime', title = 'Monthly Returns', plot_width=750, plot_height=400) fig.line(monthly_returns.index, monthly_returns) bk.show(fig) # --- # # Visit [www.add-for.com]() for more tutorials and updates. # # This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.