import numpy as np import pandas as pd !head eng-hourly-08012013-08312013.csv !head -20 eng-hourly-08012013-08312013.csv !tail eng-hourly-08012013-08312013.csv data = pd.read_csv('eng-hourly-08012013-08312013.csv', skiprows=16) print data[0:4] print data.tail(1) !tail -1 eng-hourly-08012013-08312013.csv data = pd.read_csv('eng-hourly-08012013-08312013.csv', skiprows=16, encoding="ISO-8859-1") print data.columns temps = data[u'Temp (°C)'] print 'max:', temps.max(), 'on', data['Date/Time'][temps.argmax()] print 'min:', temps.min(), 'on', data['Date/Time'][temps.argmin()] print 'mean:', temps.mean() print 'std dev:', temps.std() temps = data[u'Temp (°C)'] for day in range(1, 32): mask = data['Day']==day max_temp = temps[mask].max() date = data[mask]['Date/Time'][temps[mask].argmax()][:11] hour = data[mask]['Time'][temps[mask].argmax()] print 'max temperature on',date, 'was', max_temp, 'at', hour import requests url = 'http://climate.weather.gc.ca/climateData/bulkdata_e.html' params = { 'timeframe': 1, 'stationID': 51442, 'Year': 2013, 'Month': 7, 'Day': 1, 'format': 'csv', } response = requests.get(url, params=params) response.headers from StringIO import StringIO fakefile = StringIO(response.content) datajul = pd.read_csv(fakefile, skiprows=16, encoding="ISO-8859-1") print datajul.head(2)