import pandas as pd import numpy as np %load_ext rmagic %%R -o data set.seed(3433); par(mfrow=c(1,2)); data <- rnorm(100,mean=seq(0,3,length=100),sd=seq(0.1,3,length=100)) df = pd.DataFrame(zip(data, np.linspace(0, 3, num=100)), columns=['data', 'x']) from statsmodels.formula.api import ols from statsmodels.stats.sandwich_covariance import cov_hc3 lm1 = ols('data ~ x', df).fit() cov_hc3(lm1) lm1.normalized_cov_params %%R -o x,y set.seed(343) x <- seq(0,3,length=100); y <- rcauchy(100) df = pd.DataFrame(zip(x, y), columns=['x', 'y']) from statsmodels.formula.api import rlm lm1 = ols('y ~ x', df).fit() rlm1 = rlm('y ~ x', df).fit() lm1.params rlm1.params f, (ax1, ax2) = subplots(ncols=2) ax1.plot(df['x'], df['y'], 'o', color='grey') ax1.plot(df['x'], lm1.fittedvalues, linewidth=3) ax1.plot(df['x'], rlm1.fittedvalues, 'g', linewidth=3) ax1.set_xlabel('x') ax1.set_ylabel('y') ax2.plot(df['x'], df['y'], 'o', color='grey') ax2.plot(df['x'], lm1.fittedvalues, linewidth=3) ax2.plot(df['x'], rlm1.fittedvalues, 'g', linewidth=3) ax2.set_xlabel('x') ax2.set_ylabel('y') ax2.set_ylim([-5, 5]) ax2.set_title('Zoomed in') f.set_size_inches(9, 3) f.tight_layout(); movies = pd.read_csv('../data/movies.txt', sep='\t') movies.columns = ['X', 'score', 'rating', 'genre', 'box_office', 'running_time'] movies.head() # no stepwise regression lm1 = ols('score ~ box_office + running_time', movies).fit() lm1.summary() # no regsubsets # no bic.glm