import pandas as pd d = pd.read_csv('A2_mosquito_data.csv') print d import pandas as pd data = pd.read_csv('A2_mosquito_data.csv') print data.head() data['temperature'] = (data['temperature'] - 32) * 5 / 9.0 print data.head() import statsmodels.api as sm regr_results = sm.OLS.from_formula('mosquitos ~ temperature + rainfall', data).fit() regr_results.summary() print regr_results.params print regr_results.rsquared parameters = regr_results.params rsquared = regr_results.rsquared %matplotlib inline import matplotlib.pyplot as plt predicted = parameters[0] + parameters[1] * data['temperature'] + parameters[2] * data['rainfall'] plt.plot(predicted, data['mosquitos'], 'ro') min_mosquitos, max_mosquitos = min(data['mosquitos']), max(data['mosquitos']) plt.plot([min_mosquitos, max_mosquitos], [min_mosquitos, max_mosquitos], 'k-') import pandas as pd import statsmodels.api as sm import matplotlib.pyplot as plt data = pd.read_csv('A2_mosquito_data.csv') data['temperature'] = (data['temperature'] - 32) * 5 / 9.0 regr_results = sm.OLS.from_formula('mosquitos ~ temperature + rainfall', data).fit() parameters = regr_results.params rsquared = regr_results.rsquared predicted = parameters[0] + parameters[1] * data['temperature'] + parameters[2] * data['rainfall'] plt.plot(predicted, data['mosquitos'], 'ro') min_mosquitos, max_mosquitos = min(data['mosquitos']), max(data['mosquitos']) plt.plot([min_mosquitos, max_mosquitos], [min_mosquitos, max_mosquitos], 'k-') print parameters print "R^2 = ", rsquared def square(x): x_squared = x ** 2 return x_squared print "Four squared is", square(4) print "Five squared is", square(5) def square(x): return x ** 2 print square(3) two_squared = square(2) print two_squared def fahr_to_celsius(tempF): tempC = (tempF - 32) * 5 / 9.0 return tempC original = 32.0 final = fahr_to_celsius(original) print tempC def center(data): return data - data.mean() import pandas as pd test_data = pd.DataFrame([[1, 1], [1, 2]]) print test_data print center(test_data) data = pd.read_csv('A2_mosquito_data.csv') print center(data) print 'original mean:' print data.mean() centered = center(data) print print 'mean of centered data:' centered.mean() print 'std dev before and after:' print data.std() print print centered.std() # center(data): return a new DataFrame containing the original data centered around zero. def center(data, desired): return data - data.mean() def center(data, desired): """Return a new DataFrame containing the original data centered around zero.""" return data - data.mean() help(center) def center(data): """Return a new array containing the original data centered on zero Example: >>> import pandas >>> data = pandas.DataFrame([[0, 1], [0, 2]) >>> center(data) 0 1 0 0 -0.5 1 0 0.5 """ return data - data.mean() help(center) import pandas as pd import statsmodels.api as sm import matplotlib.pyplot as plt def fahr_to_celsius(tempF): """Convert fahrenheit to celsius""" tempC = (tempF - 32) * 5 / 9.0 return tempC def analyze(data): """Perform regression analysis on mosquito data Takes a dataframe as input that includes columns named 'temperature', 'rainfall', and 'mosquitos'. Performs a multiple regression to predict the number of mosquitos. Creates an observed-predicted plot of the result and returns the parameters of the regression. """ regr_results = sm.OLS.from_formula('mosquitos ~ temperature + rainfall', data).fit() parameters = regr_results.params predicted = parameters[0] + parameters[1] * data['temperature'] + parameters[2] * data['rainfall'] plt.figure() plt.plot(predicted, data['mosquitos'], 'ro') min_mosquitos, max_mosquitos = min(data['mosquitos']), max(data['mosquitos']) plt.plot([min_mosquitos, max_mosquitos], [min_mosquitos, max_mosquitos], 'k-') return parameters data = pd.read_csv('A2_mosquito_data.csv') data['temperature'] = fahr_to_celsius(data['temperature']) regr_results = analyze(data) print parameters import glob filenames = glob.glob('*.csv') print filenames mylist = [1, 'a', center] print mylist filenames =glob.glob('*data.csv') for filename in filenames: print filename