import pandas as pd
import numpy as np
movies = pd.read_csv('http://www.rossmanchance.com/iscam2/data/movies03RT.txt', sep='\t')
movies.columns = ['X', 'score', 'rating', 'genre', 'box_office', 'running_time']
movies.head()
X | score | rating | genre | box_office | running_time | |
---|---|---|---|---|---|---|
0 | 2 Fast 2 Furious | 48.9 | PG-13 | action/adventure | 127.146 | 107 |
1 | 28 Days Later | 78.2 | R | horror | 45.065 | 113 |
2 | A Guy Thing | 39.5 | PG-13 | rom comedy | 15.545 | 101 |
3 | A Man Apart | 42.9 | R | action/adventure | 26.248 | 110 |
4 | A Mighty Wind | 79.9 | PG-13 | comedy | 17.781 | 91 |
from statsmodels.stats.anova import anova_lm
from statsmodels.formula.api import ols
lm = ols('score ~ rating', movies).fit()
aovObject = anova_lm(lm)
aovObject
df | sum_sq | mean_sq | F | PR(>F) | |
---|---|---|---|---|---|
rating | 3 | 570.123813 | 190.041271 | 0.918184 | 0.433975 |
Residual | 136 | 28148.635044 | 206.975258 | NaN | NaN |
lm.params
Intercept 67.650000 rating[T.PG] -12.592857 rating[T.PG-13] -11.814615 rating[T.R] -12.020000
lm2 = ols('score ~ rating + genre', movies).fit()
aovObject2 = anova_lm(lm2)
aovObject2
df | sum_sq | mean_sq | F | PR(>F) | |
---|---|---|---|---|---|
rating | 3 | 570.123813 | 190.041271 | 0.973214 | 0.407720 |
genre | 12 | 3934.928021 | 327.910668 | 1.679252 | 0.079134 |
Residual | 124 | 24213.707023 | 195.271831 | NaN | NaN |
lm3 = ols('score ~ genre + rating', movies).fit()
aovObject3 = anova_lm(lm3)
aovObject3
df | sum_sq | mean_sq | F | PR(>F) | |
---|---|---|---|---|---|
genre | 12 | 4221.505277 | 351.792106 | 1.801551 | 0.054737 |
rating | 3 | 283.546557 | 94.515519 | 0.484020 | 0.693992 |
Residual | 124 | 24213.707023 | 195.271831 | NaN | NaN |
lm4 = ols('score ~ genre + rating + box_office', movies).fit()
aovObject4 = anova_lm(lm4)
aovObject4
df | sum_sq | mean_sq | F | PR(>F) | |
---|---|---|---|---|---|
genre | 12 | 4221.505277 | 351.792106 | 2.186135 | 0.016198 |
rating | 3 | 283.546557 | 94.515519 | 0.587346 | 0.624421 |
box_office | 1 | 4420.588612 | 4420.588612 | 27.470780 | 0.000001 |
Residual | 123 | 19793.118411 | 160.919662 | NaN | NaN |