%%html
<link rel="stylesheet" href="static/hyrule.css" type="text/css">
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style('white')
spat = pd.read_csv('msleep.csv')
pd.scatter_matrix(spat, figsize=[10, 10], alpha=0.2, diagonal='kde')
spat.describe()
sleep_total | sleep_rem | sleep_cycle | awake | brainwt | bodywt | |
---|---|---|---|---|---|---|
count | 83.000000 | 61.000000 | 32.000000 | 83.000000 | 56.000000 | 83.000000 |
mean | 10.433735 | 1.875410 | 0.439583 | 13.567470 | 0.281581 | 166.136349 |
std | 4.450357 | 1.298288 | 0.358680 | 4.452085 | 0.976414 | 786.839732 |
min | 1.900000 | 0.100000 | 0.116667 | 4.100000 | 0.000140 | 0.005000 |
25% | 7.850000 | 0.900000 | 0.183333 | 10.250000 | 0.002900 | 0.174000 |
50% | 10.100000 | 1.500000 | 0.333333 | 13.900000 | 0.012400 | 1.670000 |
75% | 13.750000 | 2.400000 | 0.579167 | 16.150000 | 0.125500 | 41.750000 |
max | 19.900000 | 6.600000 | 1.500000 | 22.100000 | 5.712000 | 6654.000000 |
pd.scatter_matrix(spat, figsize=[10, 10], alpha=0.2)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x109c9f190>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b001850>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c3cf890>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c0b8850>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c3e7ad0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c0c3910>], [<matplotlib.axes._subplots.AxesSubplot object at 0x10c709c10>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b464bd0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c37ac50>, <matplotlib.axes._subplots.AxesSubplot object at 0x109d8fe90>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b555450>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c0fc550>], [<matplotlib.axes._subplots.AxesSubplot object at 0x10b488790>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b5d09d0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b34f7d0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c30b190>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b4163d0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10aeaf450>], [<matplotlib.axes._subplots.AxesSubplot object at 0x10c213b10>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b737c50>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b7bb750>, <matplotlib.axes._subplots.AxesSubplot object at 0x10a417990>, <matplotlib.axes._subplots.AxesSubplot object at 0x10a1ab790>, <matplotlib.axes._subplots.AxesSubplot object at 0x10ada6110>], [<matplotlib.axes._subplots.AxesSubplot object at 0x10b014250>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b2889d0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10aec1610>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c726190>, <matplotlib.axes._subplots.AxesSubplot object at 0x10accb7d0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10ac02410>], [<matplotlib.axes._subplots.AxesSubplot object at 0x10a3b5b50>, <matplotlib.axes._subplots.AxesSubplot object at 0x10a928890>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b059310>, <matplotlib.axes._subplots.AxesSubplot object at 0x10a9f4350>, <matplotlib.axes._subplots.AxesSubplot object at 0x10a96d090>, <matplotlib.axes._subplots.AxesSubplot object at 0x10b7f4d50>]], dtype=object)
norm = pd.DataFrame({'d': np.random.normal(size=100)}) # key is a column
n_bins = np.abs(spat['awake'].max() - spat['awake'].min())
fig, axes = plt.subplots(nrows=2, ncols=2,)
## New notation: what does the 'ax' argument do?
norm['d'].plot(ax=axes[0, 0], kind='kde')
norm['d'].hist(ax=axes[0, 1], bins=n_bins/2)
spat['awake'].plot(ax=axes[1, 0], kind='kde')
spat['awake'].hist(ax=axes[1, 1], bins=n_bins/2)
print fig
Figure(640x440)
qq = sorted(np.random.normal(spat['awake'].mean(), spat['awake'].std(), len(spat)))
awake = sorted(spat['awake'])
plt.plot(qq, awake, '.')
plt.show()
import statsmodels.api as sm
import scipy.stats as stats
# Check sm.qqplot() configurations with shift+tab to see how you can change the distribution you are testing against!
fig = sm.qqplot(spat['awake'], dist=stats.distributions.norm, line='s')
plt.show()
fig = sm.qqplot(spat['sleep_rem'], dist=stats.distributions.norm, line='s')
plt.show()
fig, axes = plt.subplots(nrows=2, ncols=2,)
norm['d'].plot(ax=axes[0, 0], kind='kde')
norm['d'].hist(ax=axes[0, 1], bins=n_bins/2)
spat['bodywt'].plot(ax=axes[1, 0], kind='kde')
spat['bodywt'].hist(ax=axes[1, 1], bins=n_bins/2)
<matplotlib.axes._subplots.AxesSubplot at 0x10ac4ed50>
plt.figure()
plt.plot(spat.bodywt, spat.awake, '*')
[<matplotlib.lines.Line2D at 0x10de1ced0>]
fig, ax = plt.subplots(nrows=2, ncols=2,)
ax[0, 0].hist(spat.bodywt)
ax[0, 1].hist(np.log(spat.bodywt))
ax[1, 0].plot(spat.bodywt, spat.awake, '.')
ax[1, 1].plot(np.log(spat.bodywt), spat.awake, '.')
plt.show()
fig = sm.qqplot(spat['bodywt'], dist=stats.distributions.norm, line='s')
plt.show()
fig = sm.qqplot(np.log(spat['bodywt']), dist=stats.distributions.norm, line='s')
plt.show()
import statsmodels.formula.api as smf
# OLS, or ordinary least squares, takes a y (dependent variable) and X (independent variables) (formula = y ~ X)
# Below, we copy the data frame and remove the na variables, and create a single variable linear model
# to return a test statistic and p-value, to see how strong of a relationship bodyweight and brainweight have.
spat_cleaned_up = pd.DataFrame(spat)
spat_cleaned_up['bodywt'].dropna(inplace=True)
spat_cleaned_up['brainwt'].dropna(inplace=True)
spat_cleaned_up['log_bodywt'] = np.log(spat_cleaned_up['bodywt'])
spat_cleaned_up['log_brainwt'] = np.log(spat_cleaned_up['brainwt'])
#Creating a figure and axes using subplots (2 columns)
fig, axes = plt.subplots(nrows=1,ncols=2)
axes[0].plot(spat_cleaned_up.bodywt, spat_cleaned_up.brainwt, 'g.')
model = smf.ols(formula='brainwt ~ bodywt', data=spat_cleaned_up)
results = model.fit()
print 'NORMAL FIT SUMMARY'
print(results.summary())
print
axes[1].plot(spat_cleaned_up.log_bodywt, spat_cleaned_up.log_brainwt, 'm.')
log_model = smf.ols(formula='log_brainwt ~ log_bodywt', data=spat_cleaned_up)
log_results = log_model.fit()
print 'LOG-LOG FIT SUMMARY'
print(log_results.summary())
print fig
NORMAL FIT SUMMARY OLS Regression Results ============================================================================== Dep. Variable: brainwt R-squared: 0.872 Model: OLS Adj. R-squared: 0.870 Method: Least Squares F-statistic: 367.7 Date: Mon, 09 Feb 2015 Prob (F-statistic): 9.16e-26 Time: 19:40:48 Log-Likelihood: -20.070 No. Observations: 56 AIC: 44.14 Df Residuals: 54 BIC: 48.19 Df Model: 1 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept 0.0859 0.048 1.782 0.080 -0.011 0.183 bodywt 0.0010 5.03e-05 19.176 0.000 0.001 0.001 ============================================================================== Omnibus: 85.068 Durbin-Watson: 2.376 Prob(Omnibus): 0.000 Jarque-Bera (JB): 1330.630 Skew: 4.258 Prob(JB): 1.14e-289 Kurtosis: 25.311 Cond. No. 981. ============================================================================== LOG-LOG FIT SUMMARY OLS Regression Results ============================================================================== Dep. Variable: log_brainwt R-squared: 0.932 Model: OLS Adj. R-squared: 0.931 Method: Least Squares F-statistic: 738.4 Date: Mon, 09 Feb 2015 Prob (F-statistic): 3.56e-33 Time: 19:40:48 Log-Likelihood: -55.688 No. Observations: 56 AIC: 115.4 Df Residuals: 54 BIC: 119.4 Df Model: 1 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept -4.7754 0.093 -51.463 0.000 -4.961 -4.589 log_bodywt 0.7652 0.028 27.173 0.000 0.709 0.822 ============================================================================== Omnibus: 4.775 Durbin-Watson: 2.240 Prob(Omnibus): 0.092 Jarque-Bera (JB): 4.060 Skew: 0.653 Prob(JB): 0.131 Kurtosis: 3.193 Cond. No. 3.46 ============================================================================== Figure(640x440)
# Original model
print sns.lmplot(x='bodywt', y='brainwt', data=spat_cleaned_up)
# New log-log model
print sns.lmplot(x='log_bodywt', y='log_brainwt', data=spat_cleaned_up)
<seaborn.axisgrid.FacetGrid object at 0x10c786d10> <seaborn.axisgrid.FacetGrid object at 0x10c786710>
print sm.stats.anova_lm(log_results, typ=2)
print
print log_results.get_influence().summary_table()
sum_sq df F PR(>F) log_bodywt 327.602483 1 738.388555 3.562822e-33 Residual 23.958299 54 NaN NaN ================================================================================================== obs endog fitted Cook's student. hat diag dffits ext.stud. dffits value d residual internal residual -------------------------------------------------------------------------------------------------- 0 -4.167 -5.337 0.037 1.777 0.023 0.272 1.814 0.277 1 -8.146 -7.808 0.009 -0.523 0.061 -0.133 -0.519 -0.132 2 -0.860 0.119 0.089 -1.526 0.071 -0.423 -1.546 -0.428 3 -2.659 -2.756 0.000 0.147 0.023 0.023 0.146 0.022 4 -2.321 -2.714 0.004 0.597 0.023 0.092 0.593 0.092 5 -2.163 -2.089 0.000 -0.113 0.030 -0.020 -0.112 -0.020 6 -5.203 -5.018 0.001 -0.280 0.021 -0.041 -0.278 -0.040 7 -5.051 -5.439 0.004 0.589 0.024 0.092 0.586 0.091 8 -6.908 -6.928 0.000 0.031 0.043 0.007 0.031 0.007 9 -5.021 -4.775 0.001 -0.372 0.019 -0.052 -0.369 -0.052 10 -8.874 -8.829 0.000 -0.070 0.087 -0.022 -0.069 -0.021 11 -4.528 -3.817 0.011 -1.078 0.018 -0.146 -1.079 -0.146 12 -4.398 -3.948 0.004 -0.682 0.018 -0.092 -0.679 -0.092 13 -5.067 -4.369 0.010 -1.057 0.018 -0.144 -1.058 -0.144 14 1.527 1.226 0.013 0.478 0.103 0.162 0.474 0.161 15 -8.112 -7.662 0.015 -0.696 0.057 -0.172 -0.692 -0.171 16 -0.423 0.011 0.017 -0.676 0.069 -0.183 -0.672 -0.182 17 -0.870 -0.773 0.001 -0.150 0.051 -0.035 -0.148 -0.034 18 -5.655 -4.975 0.011 -1.031 0.020 -0.149 -1.031 -0.149 19 -2.163 -3.014 0.018 1.291 0.021 0.190 1.299 0.191 20 -3.665 -3.862 0.001 0.298 0.018 0.040 0.295 0.040 21 -5.298 -6.007 0.018 1.080 0.029 0.188 1.081 0.188 22 -1.124 -1.376 0.003 0.386 0.040 0.079 0.383 0.078 23 -4.401 -4.037 0.003 -0.551 0.018 -0.074 -0.547 -0.074 24 0.278 -1.617 0.157 2.898 0.036 0.561 3.124 0.605 25 1.743 1.960 0.009 -0.350 0.129 -0.135 -0.347 -0.133 26 -1.720 -3.309 0.058 2.408 0.020 0.340 2.525 0.357 27 -6.908 -6.398 0.011 -0.779 0.034 -0.147 -0.776 -0.147 28 -7.824 -7.696 0.001 -0.198 0.058 -0.049 -0.197 -0.049 29 -8.294 -8.299 0.000 0.008 0.073 0.002 0.008 0.002 30 -4.382 -4.518 0.000 0.206 0.018 0.028 0.204 0.028 31 -4.415 -4.074 0.002 -0.515 0.018 -0.070 -0.512 -0.069 32 -1.743 -1.702 0.000 -0.062 0.035 -0.012 -0.062 -0.012 33 -0.821 -1.749 0.036 1.418 0.034 0.267 1.432 0.269 34 -1.852 -1.252 0.019 -0.920 0.042 -0.193 -0.919 -0.192 35 -1.715 -2.305 0.011 0.899 0.027 0.151 0.897 0.150 36 -6.032 -5.233 0.017 -1.214 0.022 -0.182 -1.219 -0.183 37 -4.474 -4.406 0.000 -0.103 0.018 -0.014 -0.102 -0.014 38 -2.513 -1.643 0.033 -1.331 0.036 -0.256 -1.341 -0.258 39 -3.863 -3.795 0.000 -0.103 0.018 -0.014 -0.102 -0.014 40 -6.266 -5.647 0.012 -0.941 0.026 -0.152 -0.940 -0.152 41 -3.912 -5.003 0.029 1.655 0.021 0.240 1.682 0.244 42 -6.725 -6.757 0.000 0.049 0.040 0.010 0.048 0.010 43 -6.742 -6.237 0.010 -0.771 0.032 -0.141 -0.768 -0.140 44 -5.809 -6.385 0.014 0.880 0.034 0.166 0.878 0.166 45 -5.167 -4.839 0.002 -0.497 0.020 -0.071 -0.494 -0.070 46 -5.521 -6.530 0.045 1.542 0.036 0.300 1.562 0.304 47 -8.016 -7.099 0.048 -1.410 0.046 -0.310 -1.424 -0.313 48 -1.715 -1.365 0.006 -0.536 0.040 -0.110 -0.533 -0.109 49 -3.689 -3.625 0.000 -0.097 0.018 -0.013 -0.097 -0.013 50 -1.778 -0.693 0.078 -1.673 0.053 -0.394 -1.702 -0.401 51 -5.952 -4.856 0.028 -1.662 0.020 -0.236 -1.691 -0.240 52 -5.991 -6.507 0.012 0.789 0.036 0.153 0.786 0.152 53 -4.046 -4.245 0.001 0.302 0.018 0.041 0.300 0.041 54 -3.112 -3.844 0.011 1.108 0.018 0.150 1.110 0.150 55 -2.988 -3.672 0.010 1.037 0.018 0.142 1.037 0.142 ==================================================================================================
df1 = spat[['bodywt','sleep_rem']]
dn_df1 = df1.dropna()
fig, ax = plt.subplots(nrows=1,ncols=2)
dn_df1['log_bodywt'] = np.log(dn_df1.bodywt)
dn_df1['log_sleep_rem']= np.log(dn_df1.sleep_rem)
ax[0].plot(dn_df1.bodywt,dn_df1.sleep_rem,'.')
ax[1].plot(log_bodywt,log_sleep_rem,'.')
[<matplotlib.lines.Line2D at 0x110d0f350>]
dn_df1.hist()
model = smf.ols (formula = 'sleep_rem ~ bodywt',data=dn_df1)
results = model.fit()
print "Linear Regression"
print results.summary()
print
model = smf.ols (formula = 'log_sleep_rem ~ log_bodywt',data=dn_df1)
results = model.fit()
print "Log Linear Regression"
print results.summary()
print
print sns.lmplot(x='bodywt',y='sleep_rem',data = dn_df1)
print sns.lmplot(x='log_bodywt',y='log_sleep_rem',data = dn_df1)
Linear Regression OLS Regression Results ============================================================================== Dep. Variable: sleep_rem R-squared: 0.107 Model: OLS Adj. R-squared: 0.092 Method: Least Squares F-statistic: 7.096 Date: Mon, 09 Feb 2015 Prob (F-statistic): 0.00995 Time: 20:55:50 Log-Likelihood: -98.511 No. Observations: 61 AIC: 201.0 Df Residuals: 59 BIC: 205.2 Df Model: 1 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept 2.0246 0.168 12.052 0.000 1.688 2.361 bodywt -0.0024 0.001 -2.664 0.010 -0.004 -0.001 ============================================================================== Omnibus: 30.662 Durbin-Watson: 2.421 Prob(Omnibus): 0.000 Jarque-Bera (JB): 62.046 Skew: 1.649 Prob(JB): 3.36e-14 Kurtosis: 6.679 Cond. No. 201. ============================================================================== Log Linear Regression OLS Regression Results ============================================================================== Dep. Variable: log_sleep_rem R-squared: 0.256 Model: OLS Adj. R-squared: 0.243 Method: Least Squares F-statistic: 20.25 Date: Mon, 09 Feb 2015 Prob (F-statistic): 3.25e-05 Time: 20:55:50 Log-Likelihood: -60.377 No. Observations: 61 AIC: 124.8 Df Residuals: 59 BIC: 129.0 Df Model: 1 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept 0.4743 0.087 5.447 0.000 0.300 0.649 log_bodywt -0.1275 0.028 -4.500 0.000 -0.184 -0.071 ============================================================================== Omnibus: 1.614 Durbin-Watson: 2.452 Prob(Omnibus): 0.446 Jarque-Bera (JB): 0.910 Skew: 0.056 Prob(JB): 0.635 Kurtosis: 3.587 Cond. No. 3.18 ============================================================================== <seaborn.axisgrid.FacetGrid object at 0x1113fa250> <seaborn.axisgrid.FacetGrid object at 0x11134a290>
%ls
Ex3.ipynb GA- Advanced Python.ipynb lahman.sqlite
Exercise 1.ipynb Pandas Excersie.ipynb msleep.csv
Exercise2.ipynb bikeshare.csv static/
df = pd.read_csv('bikeshare.csv')
df.head()
instant | dteday | season | yr | mnth | hr | holiday | weekday | workingday | weathersit | temp | atemp | hum | windspeed | casual | registered | cnt | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2011-01-01 | 1 | 0 | 1 | 0 | 0 | 6 | 0 | 1 | 0.24 | 0.2879 | 0.81 | 0 | 3 | 13 | 16 |
1 | 2 | 2011-01-01 | 1 | 0 | 1 | 1 | 0 | 6 | 0 | 1 | 0.22 | 0.2727 | 0.80 | 0 | 8 | 32 | 40 |
2 | 3 | 2011-01-01 | 1 | 0 | 1 | 2 | 0 | 6 | 0 | 1 | 0.22 | 0.2727 | 0.80 | 0 | 5 | 27 | 32 |
3 | 4 | 2011-01-01 | 1 | 0 | 1 | 3 | 0 | 6 | 0 | 1 | 0.24 | 0.2879 | 0.75 | 0 | 3 | 10 | 13 |
4 | 5 | 2011-01-01 | 1 | 0 | 1 | 4 | 0 | 6 | 0 | 1 | 0.24 | 0.2879 | 0.75 | 0 | 0 | 1 | 1 |
model = smf.ols (formula = 'casual ~ temp+workingday+hum+windspeed',data=df)
results = model.fit()
print results.summary()
OLS Regression Results ============================================================================== Dep. Variable: casual R-squared: 0.414 Model: OLS Adj. R-squared: 0.414 Method: Least Squares F-statistic: 3067. Date: Mon, 09 Feb 2015 Prob (F-statistic): 0.00 Time: 21:21:41 Log-Likelihood: -87761. No. Observations: 17379 AIC: 1.755e+05 Df Residuals: 17374 BIC: 1.756e+05 Df Model: 4 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept 49.6341 1.532 32.396 0.000 46.631 52.637 temp 116.7805 1.495 78.123 0.000 113.850 119.711 workingday -34.0349 0.616 -55.217 0.000 -35.243 -32.827 hum -78.6812 1.556 -50.556 0.000 -81.732 -75.631 windspeed 3.1096 2.449 1.270 0.204 -1.690 7.909 ============================================================================== Omnibus: 7437.313 Durbin-Watson: 0.209 Prob(Omnibus): 0.000 Jarque-Bera (JB): 51135.790 Skew: 1.922 Prob(JB): 0.00 Kurtosis: 10.472 Cond. No. 14.3 ==============================================================================
model = smf.ols (formula = 'registered ~temp+atemp+hum+windspeed',data=df)
results = model.fit()
print results.summary()
OLS Regression Results ============================================================================== Dep. Variable: registered R-squared: 0.192 Model: OLS Adj. R-squared: 0.192 Method: Least Squares F-statistic: 826.8 Date: Mon, 09 Feb 2015 Prob (F-statistic): 0.00 Time: 21:20:13 Log-Likelihood: -1.1004e+05 No. Observations: 17379 AIC: 2.201e+05 Df Residuals: 17373 BIC: 2.201e+05 Df Model: 5 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept 112.1234 5.799 19.335 0.000 100.757 123.490 workingday 39.4587 2.221 17.763 0.000 35.105 43.813 temp 30.5000 35.374 0.862 0.389 -38.837 99.837 atemp 243.4686 39.679 6.136 0.000 165.693 321.244 hum -196.3493 5.613 -34.981 0.000 -207.351 -185.347 windspeed 36.2222 9.072 3.993 0.000 18.441 54.004 ============================================================================== Omnibus: 5030.075 Durbin-Watson: 0.492 Prob(Omnibus): 0.000 Jarque-Bera (JB): 14117.233 Skew: 1.538 Prob(JB): 0.00 Kurtosis: 6.168 Cond. No. 80.3 ==============================================================================
model = smf.ols (formula = 'registered ~ workingday',data=df)
results = model.fit()
print results.summary()
OLS Regression Results ============================================================================== Dep. Variable: registered R-squared: 0.018 Model: OLS Adj. R-squared: 0.018 Method: Least Squares F-statistic: 319.3 Date: Mon, 09 Feb 2015 Prob (F-statistic): 8.84e-71 Time: 21:19:53 Log-Likelihood: -1.1174e+05 No. Observations: 17379 AIC: 2.235e+05 Df Residuals: 17377 BIC: 2.235e+05 Df Model: 1 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept 123.9639 2.020 61.371 0.000 120.005 127.923 workingday 43.6825 2.445 17.869 0.000 38.891 48.474 ============================================================================== Omnibus: 4531.445 Durbin-Watson: 0.402 Prob(Omnibus): 0.000 Jarque-Bera (JB): 10634.703 Skew: 1.469 Prob(JB): 0.00 Kurtosis: 5.461 Cond. No. 3.31 ==============================================================================
model = smf.ols (formula = 'casual ~ weathersit',data=df)
results = model.fit()
print results.summary()
OLS Regression Results ============================================================================== Dep. Variable: casual R-squared: 0.023 Model: OLS Adj. R-squared: 0.023 Method: Least Squares F-statistic: 414.5 Date: Mon, 09 Feb 2015 Prob (F-statistic): 4.53e-91 Time: 21:11:21 Log-Likelihood: -92198. No. Observations: 17379 AIC: 1.844e+05 Df Residuals: 17377 BIC: 1.844e+05 Df Model: 1 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept 52.4520 0.903 58.078 0.000 50.682 54.222 weathersit -11.7701 0.578 -20.358 0.000 -12.903 -10.637 ============================================================================== Omnibus: 8997.652 Durbin-Watson: 0.136 Prob(Omnibus): 0.000 Jarque-Bera (JB): 58892.549 Skew: 2.467 Prob(JB): 0.00 Kurtosis: 10.548 Cond. No. 5.19 ==============================================================================
model = smf.ols (formula = 'casual ~ windspeed',data=df)
results = model.fit()
print results.summary()
OLS Regression Results ============================================================================== Dep. Variable: casual R-squared: 0.008 Model: OLS Adj. R-squared: 0.008 Method: Least Squares F-statistic: 142.8 Date: Mon, 09 Feb 2015 Prob (F-statistic): 8.67e-33 Time: 21:11:57 Log-Likelihood: -92332. No. Observations: 17379 AIC: 1.847e+05 Df Residuals: 17377 BIC: 1.847e+05 Df Model: 1 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ Intercept 28.7591 0.688 41.782 0.000 27.410 30.108 windspeed 36.3870 3.045 11.951 0.000 30.419 42.355 ============================================================================== Omnibus: 9060.548 Durbin-Watson: 0.127 Prob(Omnibus): 0.000 Jarque-Bera (JB): 59405.576 Skew: 2.489 Prob(JB): 0.00 Kurtosis: 10.567 Cond. No. 8.47 ==============================================================================
model = np.polyfit('registered ~ temp ** 2',data=df)
results = model.fit()
print results.summary()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-62-9481445353c9> in <module>() ----> 1 model = np.polyfit (formula = 'registered ~ temp ** 2',data=df) 2 results = model.fit() 3 print results.summary() TypeError: polyfit() got an unexpected keyword argument 'formula'