cd ~/swc/pandas-data/ pwd import pandas pandas.read_csv('A1_mosquito_data.csv', sep=',') pandas.read_csv('A1_mosquito_data.csv', sep=',', index_col='year') data = pandas.read_csv('A1_mosquito_data.csv', sep=',', index_col='year') print(data) data print(type(data)) print(data['temperature']) print(data[['rainfall', 'temperature']]) data.index print(data[:2]) data.ix[2004:2008] data.ix[2004:2008]["temperature"] data.ix[2004] data['temperature'][data['mosquitos'] > 200] data['mosquitos'][data['temperature'] > 75].ix[2005:2008] data['mosquitos'].ix[2005:2008][data['temperature'] > 75] print(data.mean()) print(data.max()) print(data['temperature'].min()) print(data['mosquitos'][1:3].std()) abs(-1) # call function data.max() # call method on an object data[:3] # slicing by row data["temperature"] # get a column of a data frame "slice a column" dataslice = data.ix[2004:2008] # this is a special case, we are slicing ix %matplotlib inline from matplotlib import pyplot as plt data = pandas.read_csv('A2_mosquito_data.csv', index_col="year") data['mosquitos'].plot() data.plot(figsize=(13,8), marker='s') import statsmodels.api as sm regr_results = sm.OLS.from_formula('mosquitos ~ temperature', data).fit() regr_results.summary() line_fit = regr_results.params['Intercept'] + regr_results.params['temperature'] * data['temperature'] plt.plot(data['temperature'], data['mosquitos'], '.', label="data") plt.plot(data['temperature'], line_fit, 'red', label="fit") plt.xlabel('temperature') plt.ylabel('mosquitos') plt.legend()