import statsmodels.formula.api as sm
from datetime import datetime
import pandas as pd
import pandas.io.data as web
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
pd.set_option('max_columns', 30)
%matplotlib inline
Filtro de celdas para correr todo
run_console = True
update_from_yahoo = False
update_from_Quandl = False
update_from_dropbox = False
plot_on = True
Descargar datos de la web y guardar en archivo csv
if update_from_yahoo:
today = datetime.now()
ipsa = web.DataReader('^IPSA', 'yahoo', start=datetime(2003, 1, 10), end=today)['Close']
spy = web.DataReader('SPY', 'yahoo', start=datetime(1993, 1, 29), end=today)['Close']
eem = web.DataReader('EEM', 'yahoo', start=datetime(2003, 4, 15), end=today)['Close']
vea = web.DataReader('VEA', 'yahoo', start=datetime(2007, 7, 26), end=today)['Close']
df = pd.DataFrame()
df['ipsa'] = ipsa
df['spy'] = spy
df['eem'] = eem
df['vea'] = vea
df.to_csv('INDEX.csv')
if update_from_Quandl:
import Quandl
df = Quandl.get('BNP/USDCLP')
df.to_csv('USDCLP.csv')
# Datos que actualiza usuario del foro chilebolsa
if update_from_dropbox:
import urllib2
url = 'http://dl.dropboxusercontent.com/u/35775157/multifondos_afp.txt'
res = urllib2.Request(url)
csvio = urllib2.urlopen(res)
df = pd.read_csv(csvio, index_col=['<TICKER>', '<DTYYMMDD>'], parse_dates=True, usecols=['<TICKER>', '<DTYYMMDD>','<CLOSE>'])
df = df.unstack('<TICKER>')
df.columns = df.columns.droplevel()
df.to_csv('AFP.csv')
Leer datos desde archivos csv y guardar en DataFrame data
data = pd.read_csv('INDEX.csv', parse_dates=True, index_col='Date')
data['usd'] = pd.read_csv('USDCLP.csv', parse_dates=True, index_col='Date')
data.info()
data.tail()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 2816 entries, 2003-01-10 00:00:00 to 2014-04-16 00:00:00 Data columns (total 5 columns): ipsa 2816 non-null float64 spy 2739 non-null float64 eem 2674 non-null float64 vea 1633 non-null float64 usd 2816 non-null float64 dtypes: float64(5)
ipsa | spy | eem | vea | usd | |
---|---|---|---|---|---|
Date | |||||
2014-04-10 | 3853.95 | 183.16 | 41.83 | 40.99 | 547.0000 |
2014-04-11 | 3868.35 | 181.51 | 41.83 | 40.76 | 545.4900 |
2014-04-14 | 3893.51 | 182.94 | 41.74 | 40.99 | 548.7000 |
2014-04-15 | 3877.49 | 184.20 | 41.11 | 40.79 | 549.0185 |
2014-04-16 | 3912.24 | 186.13 | 41.63 | 41.33 | 554.7950 |
5 rows × 5 columns
if plot_on:
data.plot(subplots=True, figsize=(10, 10), sharex=True)
#plt.legend(loc='best')
Variación porcentual diaria de los datos en DataFrame data_pc
data_pc = data.pct_change()
data_cum = data_pc.cumsum()
if plot_on:
data_pc.plot(subplots=True, figsize=(10, 10), sharex=True)
data_cum.plot(figsize=(10, 10))
Edu7 https://twitter.com/AlephEdu sugiere la siguiente ponderación de índices para "simular" variación fondo A
edu_pc = pd.DataFrame()
edu_pc['A'] = 0.20*data_pc['spy'] + 0.15*data_pc['ipsa'] + 0.17*data_pc['eem'] + 0.06*data_pc['vea'] + 0.50*data_pc['usd']
edu_pc['A1'] = 0.21*data_pc['spy'] + 0.15*data_pc['ipsa'] + 0.125*data_pc['eem'] + 0.13*data_pc['vea'] + 0.57*data_pc['usd']
edu_cum = pd.DataFrame()
edu_cum['A'] = edu_pc['A'].cumsum()
edu_cum['A1'] = edu_pc['A1'].cumsum()
if plot_on:
edu_pc['2011':][['A','A1']].cumsum().plot(figsize=(10, 10))
Guardar valores cuota de multifondo de todas las AFP en m_afp
m_afp = pd.read_csv('AFP.csv', parse_dates=True, index_col=['<DTYYMMDD>'])
m_afp.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 4442 entries, 2002-01-02 00:00:00 to 2014-04-15 00:00:00 Data columns (total 30 columns): CAPITAL-A 2129 non-null float64 CAPITAL-B 2128 non-null float64 CAPITAL-C 2129 non-null float64 CAPITAL-D 2128 non-null float64 CAPITAL-E 2129 non-null float64 CUPRUM-A 4203 non-null float64 CUPRUM-B 4202 non-null float64 CUPRUM-C 4415 non-null float64 CUPRUM-D 4202 non-null float64 CUPRUM-E 4415 non-null float64 HABITAT-A 4192 non-null float64 HABITAT-B 4190 non-null float64 HABITAT-C 4403 non-null float64 HABITAT-D 4191 non-null float64 HABITAT-E 4401 non-null float64 MODELO-A 1246 non-null float64 MODELO-B 1245 non-null float64 MODELO-C 1246 non-null float64 MODELO-D 1245 non-null float64 MODELO-E 1246 non-null float64 PLANVITAL-A 4204 non-null float64 PLANVITAL-B 4203 non-null float64 PLANVITAL-C 4417 non-null float64 PLANVITAL-D 4204 non-null float64 PLANVITAL-E 4416 non-null float64 PROVIDA-A 4202 non-null float64 PROVIDA-B 4201 non-null float64 PROVIDA-C 4410 non-null float64 PROVIDA-D 4201 non-null float64 PROVIDA-E 4410 non-null float64 dtypes: float64(30)
Guardar valores cuota fondo A en DataFrame a_afp
a_afp = pd.DataFrame()
a_afp['CAPITAL-A'] = m_afp['CAPITAL-A']
a_afp['CUPRUM-A'] = m_afp['CUPRUM-A']
a_afp['HABITAT-A'] = m_afp['HABITAT-A']
a_afp['MODELO-A'] = m_afp['MODELO-A']
a_afp['PLANVITAL-A'] = m_afp['PLANVITAL-A']
a_afp['PROVIDA-A'] = m_afp['PROVIDA-A']
if plot_on:
a_afp['2011':].plot(figsize=(10, 10))
Ver si existe correlación de valores cuota del fondo A para distintas AFP (Efecto manada)
a_afp.corr()
CAPITAL-A | CUPRUM-A | HABITAT-A | MODELO-A | PLANVITAL-A | PROVIDA-A | |
---|---|---|---|---|---|---|
CAPITAL-A | 1.000000 | 0.997865 | 0.997788 | 0.983902 | 0.996134 | 0.999183 |
CUPRUM-A | 0.997865 | 1.000000 | 0.997959 | 0.994555 | 0.996849 | 0.999522 |
HABITAT-A | 0.997788 | 0.997959 | 1.000000 | 0.995856 | 0.999302 | 0.997767 |
MODELO-A | 0.983902 | 0.994555 | 0.995856 | 1.000000 | 0.997381 | 0.982992 |
PLANVITAL-A | 0.996134 | 0.996849 | 0.999302 | 0.997381 | 1.000000 | 0.996442 |
PROVIDA-A | 0.999183 | 0.999522 | 0.997767 | 0.982992 | 0.996442 | 1.000000 |
6 rows × 6 columns
Variación porcentual diaria de los valores cuota fondo A en DataFrame a_afp_pc
a_afp_pc = a_afp.pct_change()
Variación acumulada de valores cuota fondo A en DataFrame a_afp_cum
a_afp_cum = a_afp_pc.cumsum()
Agregar carteras A y A1 inferidas por Edu7
a_afp_pc['A'] = edu_pc['A']
a_afp_pc['A1'] = edu_pc['A1']
a_afp_cum['A'] = edu_cum['A']
a_afp_cum['A1'] = edu_cum['A1']
Dibujar gráfico variación acumulada.
if plot_on:
#Dibujar gráfico aproximado tendencias variación acumulada, ajusta inicio acumulación con distintos ejes (derecha e izquierda).
#a_afp_cum['2013':][['A','A1','CUPRUM-A']].plot(secondary_y=['A','A1'], figsize=(10, 10))
a_afp_pc['2013':][['A','A1','CUPRUM-A']].cumsum().plot(figsize=(10, 10))
Observaciones:
Dibujar gráfico variación valor cuota Fondo A y E de AFP Cuprum.
if plot_on:
m_afp['2013':][['CUPRUM-A','CUPRUM-E']].plot(figsize=(10, 10))
if plot_on:
a_afp_pc['2013':].cumsum().plot(x='CUPRUM-A', y='A', style='o', markersize=3, figsize=(10, 10))
if plot_on:
a_afp_pc['2013':][['A','A1','MODELO-A']].cumsum().plot(figsize=(10, 10))
Observaciones:
Dibujar gráfico variación valor cuota Fondo A y E de AFP Modelo.
if plot_on:
m_afp['2013':][['MODELO-A','MODELO-E']].plot(figsize=(10, 10))
if plot_on:
a_afp_pc['2013':].cumsum().plot(x='MODELO-A', y='A', style='o', markersize=3, figsize=(10, 10))
Cálculo de coeficientes por regresión lineal
rl_pc = pd.DataFrame()
rl_pc['spy'] = data_pc['spy']
rl_pc['ipsa'] = data_pc['ipsa']
rl_pc['eem'] = data_pc['eem']
rl_pc['vea'] = data_pc['vea']
rl_pc['usd'] = data_pc['usd']
rl_pc['cuprum_A'] = a_afp_pc['CUPRUM-A']
rl_pc['modelo_A'] = a_afp_pc['MODELO-A']
Estimación de cartera de Edu7 https://twitter.com/AlephEdu
Cálculo de coeficientes por regresión lineal para AFP Cuprum
result = sm.ols(formula="cuprum_A ~ spy + ipsa + eem + vea + usd", data=rl_pc['2013':].cumsum()).fit()
print result.params
Intercept -0.003812 spy 0.216213 ipsa 0.403131 eem 0.057489 vea 0.213285 usd 0.575817 dtype: float64
Observaciones:
Cálculo de coeficientes por regresión lineal para AFP Modelo
result = sm.ols(formula="modelo_A ~ spy + ipsa + eem + vea + usd", data=rl_pc['2013':].cumsum()).fit()
print result.params
Intercept -0.001324 spy 0.160597 ipsa 0.359208 eem 0.115911 vea 0.200686 usd 0.588541 dtype: float64
Observaciones:
if run_console:
%qtconsole