import pandas as pd import numpy as np import matplotlib.pyplot as plt from dateutil.parser import parse from datetime import datetime, date, time # df = pd.read_csv("data/Data.csv") df.Date = pd.to_datetime(df.Date,utc='GMT') #df = df.set_index('GMT') print df.head() df.tail() # create a column which is only the month, day and hour of day. # This is used to find the average for each respective hour later df['hour'] = df['Date'].apply(lambda t: "%d-%d-%d" % (t.month, t.day,t.hour)) #find average for each hour... This makes sense for all the columns except precip g = df.groupby('hour').mean().reset_index() # g = df.groupby('hour').agg({'Temp' : np.mean,'H_Pcnt' : np.mean,'C_Pcnt' : np.mean,'Temp_Feels' : np.mean,'Precip' : np.sum,'Radiation' : np.mean,'Wind' : np.mean}).reset_index() g # Do a left merge with the original dataset new_df = pd.merge(left=df, right=g, on='hour', suffixes=('','_avg') ) new_df = new_df.set_index('Date') new_df.head() # I don't need the dates before 2009 so I remove them here. # I kept them till now in order to calculate the averages test = [] def drop_dates(indexes): for index in indexes: if index < datetime(2009,5,1): test.append(index) return test trimmed = new_df.drop(drop_dates(new_df.index)).sort_index().drop('hour',axis=1) #trimmed = trimmed.drop('date', 1) print trimmed.head() trimmed.Precip.plot() #pd.write_csv("data/aggedWeather.csv") trimmed.to_csv("aggedWeather.csv") # print trimmed.columns # trimmed.drop() # grouped = df.groupby("Date") # #grouped['Temp'].agg([np.sum, np.mean, np.std]) # agged = grouped.agg({'Temp' : np.mean, # 'Humidity_Pcnt' : np.mean, # 'Clouds_Pcnt' : np.mean, # 'Temp_Feels' : np.mean, # 'Precip' : np.sum, # 'Radiation' : np.mean, # 'Wind' : np.mean}) # agged = np.round(agged,1) # agged['Year'] = "holder" # agged['Month'] = "holder" # agged['Day'] = "holder" # agged['Weekday'] = "holder" # #slimmed = agged.drop(lambda x: "2004" is x) # #slimmed # dates = [] # def dropDates(): # for dateString in agged.index: # date = parse(dateString) # if date < parse("2009-05-01"): # dates.append(dateString) # else: # agged.Year[dateString] = date.year # agged.Month[dateString] = date.month # agged.Weekday[dateString] = date.weekday() # agged.Day[dateString] = date.day # dropDates() # agged = agged.drop(dates) # agged.head()