In [1]:

%%html
<link rel="stylesheet" href="static/hyrule.css" type="text/css">

In [5]:

%pylab inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.mpl_style', 'default')
plt.rcParams['figure.figsize'] = (20, 10)    

Populating the interactive namespace from numpy and matplotlib

WARNING: pylab import has clobbered these variables: ['hist', 'figure', 'show']
`%matplotlib` prevents importing * from pylab and numpy

In [22]:

# Pandas URL method 
# target URL: http://www.quandl.com/api/v1/datasets/YAHOO/INDEX_GSPC.csv?trim_start=2007-01-01&trim_end=2015-02-02
# Generate the URL string

url = "http://www.quandl.com/api/v1/datasets/YAHOO/"
url += "INDEX_GSPC"
url += ".csv?trim_start=%s-%s-%s&trim_end=%s-%s-%s" % ('2007', '01', '01', '2015', '02', '02')

sp500 = pd.read_csv(url, parse_dates = ['Date'], dayfirst = True, index_col = 'Date')

# parse_dates: This will parse the "Date" column as a datetime object instead of an object string
# index_col: Instead of creating a default index column, Use the "Date" column as an index
# dayfirst: Use DD/MM format, Default uses the MM/DD format 

In [5]:

sp500.head()

Out[5]:

	Open	High	Low	Close	Volume	Adjusted Close
Date
2015-02-02	1996.67	2021.66	1980.90	2020.85	4008330000	2020.85
2015-01-30	2019.35	2023.32	1993.38	1994.99	4538650000	1994.99
2015-01-29	2002.45	2024.64	1989.18	2021.25	4127140000	2021.25
2015-01-28	2032.34	2042.49	2001.49	2002.16	4067530000	2002.16
2015-01-27	2047.86	2047.86	2019.91	2029.55	3329810000	2029.55

In [3]:

import pandas.io.data as web
spc = web.DataReader(name = "^GSPC", data_source = 'yahoo', start = '2007-1-1', end = '2015-2-2')
#spc.to_csv("SP500.csv")         # Function to save a data frame for offline analysis  
spc.tail()

Out[3]:

	Open	High	Low	Close	Volume	Adj Close
Date
2015-01-27	2047.86	2047.86	2019.91	2029.55	3329810000	2029.55
2015-01-28	2032.34	2042.49	2001.49	2002.16	4067530000	2002.16
2015-01-29	2002.45	2024.64	1989.18	2021.25	4127140000	2021.25
2015-01-30	2019.35	2023.32	1993.38	1994.99	4538650000	1994.99
2015-02-02	1996.67	2021.66	1980.90	2020.85	4008330000	2020.85

In [6]:

N = 1000
z = np.random.randn(N)
walk = np.zeros(N)

for i in np.arange(1, N, 1):
    walk[i] = walk[i - 1] + z[i]
plt.plot(walk)

Out[6]:

[<matplotlib.lines.Line2D at 0x1038b67d0>]

In [9]:

# Compute rolling statistics
spc['MA42'] = pd.rolling_mean(spc['Open'], window = 42, min_periods = 30)
spc['MA252'] = pd.rolling_mean(spc['Open'], window = 252,  min_periods = 30)
spc[['Open', 'MA252', 'MA42','Close']].plot()
plt.legend(["Actual", "Long term", "Short term","Close"])
plt.title('Trends in the SP500 data', fontsize = 20)

Out[9]:

<matplotlib.text.Text at 0x1096ab050>

In [10]:

url = "https://www.quandl.com/api/v1/datasets/WIKI/"
url += "AAPL"
url += ".csv?trim_start=%s-%s-%s&trim_end=%s-%s-%s" % ('2007', '01', '01', '2015', '02', '02')

AAPL = pd.read_csv(url, parse_dates = ['Date'], dayfirst = True, index_col = 'Date')
AAPL.head()

Out[10]:

	Open	High	Low	Close	Volume	Ex-Dividend	Split Ratio	Adj. Open	Adj. High	Adj. Low	Adj. Close	Adj. Volume
Date
2015-02-02	118.03	119.1700	116.08	118.63	62347947	0	1	118.03	119.1700	116.08	118.63	62347947
2015-01-30	118.28	120.0000	116.85	117.16	83532038	0	1	118.28	120.0000	116.85	117.16	83532038
2015-01-29	116.36	119.1900	115.56	118.90	83985866	0	1	116.36	119.1900	115.56	118.90	83985866
2015-01-28	117.56	118.1200	115.31	115.31	146420662	0	1	117.56	118.1200	115.31	115.31	146420662
2015-01-26	113.74	114.3626	112.80	113.10	55457420	0	1	113.74	114.3626	112.80	113.10	55457420

In [16]:

url = "https://www.google.com/finance/historical?q=GOOGLEINDEX_US:RENTAL\
&output=csv&startdate=20080101&ei=_znYVJjfMZOHsgeHv4G4Cg"
r = pd.read_csv(url, parse_dates = [0], index_col = 0)
r['MA30'] = pd.rolling_mean(r['Close'], window = 30, min_periods = 30)
r[['Close', 'MA30']].plot()

Out[16]:

<matplotlib.axes._subplots.AxesSubplot at 0x108af4ed0>

In [18]:

import datetime as dt
 
df = web.get_data_yahoo(
     ['AAPL', 'GE', 'IBM', 'KO', 'MSFT', 'PEP'], 
     start = dt.datetime(2010, 1, 1), 
     end = dt.datetime(2013, 1, 1))['Adj Close']
df.head()

Out[18]:

	AAPL	GE	IBM	KO	MSFT	PEP
Date
2010-01-04	28.84	13.10	119.53	24.67	27.14	52.81
2010-01-05	28.89	13.16	118.09	24.37	27.14	53.45
2010-01-06	28.43	13.10	117.32	24.36	26.98	52.92
2010-01-07	28.38	13.77	116.92	24.30	26.70	52.58
2010-01-08	28.56	14.07	118.09	23.85	26.88	52.41

In [19]:

rets = df.pct_change()                              # Calculates Percent change over 1-day period. 
corr = rets.corr()                                  # Computes pairwise correlation of columns
plt.imshow(corr, cmap = 'Reds', interpolation = 'none')    # Visualize the values in the entire data frame.
plt.colorbar()                                            
plt.xticks(range(len(corr)), corr.columns)
plt.yticks(range(len(corr)), corr.columns);
plt.title("Correlation Matrix: Tech Stocks")

Out[19]:

<matplotlib.text.Text at 0x10e52ac50>

In [27]:

from math import pi
from bokeh.plotting import *

df = pd.DataFrame(sp500)[:50]

mids = (df.Open + df.Close) / 2
spans = abs(df.Close - df.Open)

inc = df.Close > df.Open
dec = df.Open > df.Close
w = 12 * 60 * 60 * 1000        # sample 12 hours in ms
output_notebook()              # Load the Bokeh Java-Script interactive browser

figure(x_axis_type="datetime", plot_width=1000, name="candlestick",
       tools="pan,wheel_zoom,box_zoom,reset,previewsave")

hold()

segment(df.index, df.High, df.index, df.Low, color='black')
rect(df.index[inc], mids[inc], w, spans[inc], fill_color="#D5E1DD", line_color="black")
rect(df.index[dec], mids[dec], w, spans[dec], fill_color="#F2583E", line_color="black")

curplot().title = "SP500 Candlestick"
xaxis().major_label_orientation = pi/4
grid().grid_line_alpha = 0.3

show()

BokehJS successfully loaded.

Warning: BokehJS previously loaded

In [28]:

import statsmodels.api as sm
import statsmodels.graphics.tsaplots as tsa

# Calculate Autocorrelation, Partial Auto Correlation and the Correlogram  
fig = plt.figure()
ax1 = fig.add_subplot(211)
fig = tsa.plot_acf(walk, lags = 42, ax = ax1)   
ax2 = fig.add_subplot(212)
fig = tsa.plot_pacf(walk, lags = 42, ax = ax2)

In [29]:

arma_mod22 = sm.tsa.ARMA(walk, (2, 2)).fit()
print(arma_mod22.summary())
print "Durbin Watson score: ", sm.stats.durbin_watson(arma_mod22.resid)

                              ARMA Model Results                              
==============================================================================
Dep. Variable:                      y   No. Observations:                 1000
Model:                     ARMA(2, 2)   Log Likelihood               -1422.275
Method:                       css-mle   S.D. of innovations              1.001
Date:                Wed, 11 Feb 2015   AIC                           2856.551
Time:                        20:31:19   BIC                           2885.997
Sample:                             0   HQIC                          2867.743
                                                                              
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          5.9467      4.081      1.457      0.145        -2.052    13.945
ar.L1.y        1.9103      0.016    119.067      0.000         1.879     1.942
ar.L2.y       -0.9107      0.016    -57.126      0.000        -0.942    -0.879
ma.L1.y       -0.9337      0.035    -26.842      0.000        -1.002    -0.866
ma.L2.y       -0.0063      0.031     -0.202      0.840        -0.067     0.055
                                    Roots                                    
=============================================================================
                 Real           Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            1.0049           +0.0000j            1.0049            0.0000
AR.2            1.0927           +0.0000j            1.0927            0.0000
MA.1            1.0633           +0.0000j            1.0633            0.0000
MA.2         -149.2299           +0.0000j          149.2299            0.5000
-----------------------------------------------------------------------------
Durbin Watson score:  1.96427270972

In [34]:

# CPI Data set: Explore this data set in class
# Plot and Compute the various statistics
# Explain the Correlograms

macrodta = sm.datasets.macrodata.load_pandas().data
macrodta.index = pd.Index(sm.tsa.datetools.dates_from_range('1959Q1', '2009Q3'))

print macrodta.head()
cpi = macrodta["infl"]
macrodta

fig = plt.figure()
ax = fig.add_subplot(111)
ax = cpi.plot(ax = ax)
ax.legend()

            year  quarter   realgdp  realcons  realinv  realgovt  realdpi  \
1959-03-31  1959        1  2710.349    1707.4  286.898   470.045   1886.9   
1959-06-30  1959        2  2778.801    1733.7  310.859   481.301   1919.7   
1959-09-30  1959        3  2775.488    1751.8  289.226   491.260   1916.4   
1959-12-31  1959        4  2785.204    1753.7  299.356   484.052   1931.3   
1960-03-31  1960        1  2847.699    1770.5  331.722   462.199   1955.5   

              cpi     m1  tbilrate  unemp      pop  infl  realint  
1959-03-31  28.98  139.7      2.82    5.8  177.146  0.00     0.00  
1959-06-30  29.15  141.7      3.08    5.1  177.830  2.34     0.74  
1959-09-30  29.35  140.5      3.82    5.3  178.657  2.74     1.09  
1959-12-31  29.37  140.0      4.33    5.6  179.386  0.27     4.06  
1960-03-31  29.54  139.6      3.50    5.2  180.007  2.31     1.19

Out[34]:

<matplotlib.legend.Legend at 0x1119448d0>

In [31]:

# Calculate Autocorrelation, Partial Auto Correlation and the Correlogram  
fig = plt.figure()
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(cpi, lags = 42, ax = ax1)   
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(cpi, lags=42, ax = ax2)

In [ ]: