%matplotlib inline import requests from StringIO import StringIO import numpy as np import pandas as pd # pandas import matplotlib.pyplot as plt # module for plotting import datetime as dt # module for manipulating dates and times import numpy.linalg as lin # module for performing linear algebra operations from __future__ import division from math import log10,exp pd.options.display.mpl_style = 'default' file = 'Data/Org/0701-0930-2011.xls' df = pd.read_excel(file, header = 0, skiprows = np.arange(0,6)) files = ['Data/Org/1101-1130-2011.xls', 'Data/Org/1201-2011-0131-2012.xls', 'Data/Org/0201-0331-2012.xls','Data/Org/0401-0531-2012.xls','Data/Org/0601-0630-2012.xls', 'Data/Org/0701-0831-2012.xls','Data/Org/0901-1031-2012.xls','Data/Org/1101-1231-2012.xls', 'Data/Org/0101-0228-2013.xls', 'Data/Org/0301-0430-2013.xls','Data/Org/0501-0630-2013.xls','Data/Org/0701-0831-2013.xls', 'Data/Org/0901-1031-2013.xls','Data/Org/1101-1231-2013.xls','Data/Org/0101-0228-2014.xls', 'Data/Org/0301-0430-2014.xls', 'Data/Org/0501-0630-2014.xls', 'Data/Org/0701-0831-2014.xls', 'Data/Org/0901-1031-2014.xls'] for file in files: data = pd.read_excel(file, header = 0, skiprows = np.arange(0,6)) df = df.append(data) df.head() df.rename(columns={'Unnamed: 0':'Datetime'}, inplace=True) nonBlankColumns = ['Unnamed' not in s for s in df.columns] columns = df.columns[nonBlankColumns] df = df[columns] df = df.set_index(['Datetime']) df.index.name = None df.head() for item in df.columns: print item electricity=df[['Gund Bus-A EnergyReal - kWhr','Gund Bus-B EnergyReal - kWhr', 'Gund Bus-A PowerReal - kW','Gund Bus-B PowerReal - kW',]] electricity.head() file = 'Data/monthly electricity.csv' monthlyElectricityFromFacility = pd.read_csv(file, header=0) monthlyElectricityFromFacility monthlyElectricityFromFacility = monthlyElectricityFromFacility.set_index(['month']) monthlyElectricityFromFacility.head() monthlyElectricityFromFacility['startDate'] = pd.to_datetime(monthlyElectricityFromFacility['startDate'], format="%m/%d/%y") values = monthlyElectricityFromFacility.index.values keys = np.array(monthlyElectricityFromFacility['startDate']) dates = {} for key, value in zip(keys, values): dates[key] = value sortedDates = np.sort(dates.keys()) sortedDates = sortedDates[sortedDates > np.datetime64('2011-11-01')] months = [] monthlyElectricityOrg = np.zeros((len(sortedDates) - 1, 2)) for i in range(len(sortedDates) - 1): begin = sortedDates[i] end = sortedDates[i+1] months.append(dates[sortedDates[i]]) monthlyElectricityOrg[i, 0] = (np.round(electricity.loc[end,'Gund Bus-A EnergyReal - kWhr'] - electricity.loc[begin,'Gund Bus-A EnergyReal - kWhr'], 1)) monthlyElectricityOrg[i, 1] = (np.round(electricity.loc[end,'Gund Bus-B EnergyReal - kWhr'] - electricity.loc[begin,'Gund Bus-B EnergyReal - kWhr'], 1)) monthlyElectricity = pd.DataFrame(data = monthlyElectricityOrg, index = months, columns = ['CE603B kWh', 'CE604B kWh']) plt.figure() fig, ax = plt.subplots() fig = monthlyElectricity.plot(marker = 'o', figsize=(15,6), rot = 40, fontsize = 13, ax = ax, linestyle='') fig.set_axis_bgcolor('w') plt.xlabel('Billing month', fontsize = 15) plt.ylabel('kWh', fontsize = 15) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 13) plt.xticks(np.arange(0,len(months)),months) plt.title('Original monthly consumption from hourly data',fontsize = 17) text = 'Meter malfunction' ax.annotate(text, xy = (9, 4500000), xytext = (5, 2), fontsize = 15, textcoords = 'offset points', ha = 'center', va = 'top') ax.annotate(text, xy = (8, -4500000), xytext = (5, 2), fontsize = 15, textcoords = 'offset points', ha = 'center', va = 'bottom') ax.annotate(text, xy = (14, -2500000), xytext = (5, 2), fontsize = 15, textcoords = 'offset points', ha = 'center', va = 'bottom') ax.annotate(text, xy = (15, 2500000), xytext = (5, 2), fontsize = 15, textcoords = 'offset points', ha = 'center', va = 'top') plt.show() monthlyElectricity.loc['Aug 12','CE604B kWh'] = np.nan monthlyElectricity.loc['Sep 12','CE604B kWh'] = np.nan monthlyElectricity.loc['Feb 13','CE603B kWh'] = np.nan monthlyElectricity.loc['Mar 13','CE603B kWh'] = np.nan fig,ax = plt.subplots(1, 1,figsize=(15,8)) #ax.set_axis_bgcolor('w') plt.bar(np.arange(0, len(monthlyElectricity))-0.5,monthlyElectricity['CE603B kWh'], label='Our data processing from hourly data') plt.plot(monthlyElectricityFromFacility.loc[months,'CE603B kWh'],'or', label='Facility data') plt.xticks(np.arange(0,len(months)),months) plt.xlabel('Month',fontsize=15) plt.ylabel('kWh',fontsize=15) plt.xlim([0, len(monthlyElectricity)]) plt.legend() ax.set_xticklabels(months, rotation=40, fontsize=13) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) plt.title('Comparison between our data processing and facilities, Meter CE603B',fontsize=20) text = 'Meter malfunction \n estimated by facility' ax.annotate(text, xy = (14, monthlyElectricityFromFacility.loc['Feb 13','CE603B kWh']), xytext = (5, 50), fontsize = 15, arrowprops=dict(facecolor='black', shrink=0.15), textcoords = 'offset points', ha = 'center', va = 'bottom') ax.annotate(text, xy = (15, monthlyElectricityFromFacility.loc['Mar 13','CE603B kWh']), xytext = (5, 50), fontsize = 15, arrowprops=dict(facecolor='black', shrink=0.15), textcoords = 'offset points', ha = 'center', va = 'bottom') plt.show() fig,ax = plt.subplots(1, 1,figsize=(15,8)) #ax.set_axis_bgcolor('w') plt.bar(np.arange(0, len(monthlyElectricity))-0.5, monthlyElectricity['CE604B kWh'], label='Our data processing from hourly data') plt.plot(monthlyElectricityFromFacility.loc[months,'CE604B kWh'],'or', label='Facility data') plt.xticks(np.arange(0,len(months)),months) plt.xlabel('Month',fontsize=15) plt.ylabel('kWh',fontsize=15) plt.xlim([0, len(monthlyElectricity)]) plt.legend() ax.set_xticklabels(months, rotation=40, fontsize=13) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) plt.title('Comparison between our data processing and facilities, Meter CE604B',fontsize=20) ax.annotate(text, xy = (9, monthlyElectricityFromFacility.loc['Sep 12','CE604B kWh']), xytext = (5, 50), fontsize = 15, arrowprops=dict(facecolor='black', shrink=0.15), textcoords = 'offset points', ha = 'center', va = 'bottom') ax.annotate(text, xy = (8, monthlyElectricityFromFacility.loc['Aug 12','CE604B kWh']), xytext = (5, 50), fontsize = 15, arrowprops=dict(facecolor='black', shrink=0.15), textcoords = 'offset points', ha = 'center', va = 'bottom') plt.show() electricity['energy'] = electricity['Gund Bus-A EnergyReal - kWhr'] + electricity['Gund Bus-B EnergyReal - kWhr'] electricity['power'] = electricity['Gund Bus-A PowerReal - kW'] + electricity['Gund Bus-B PowerReal - kW'] electricity.head() # In case there are any missing hours, reindex to get the entire time span. Fill in nan data. hourlyTimestamp = pd.date_range(start = '2011/7/1', end = '2014/10/31', freq = 'H') # Somehow, reindex does not work well. October 2011 and several other hours are missing. # Basically it is just the length of original length. #electricity.reindex(hourlyTimestamp, inplace = True, fill_value = np.nan) startTime = hourlyTimestamp endTime = hourlyTimestamp + np.timedelta64(1,'h') hourlyTime = pd.DataFrame(data = np.transpose([startTime, endTime]), index = hourlyTimestamp, columns = ['startTime', 'endTime']) electricity = electricity.join(hourlyTime, how = 'outer') # Just in case, in order to use diff method, timestamp has to be in asending order. electricity.sort_index(inplace = True) hourlyEnergy = electricity.diff(periods=1)['energy'] hourlyElectricity = pd.DataFrame(data = hourlyEnergy.values, index = hourlyEnergy.index, columns = ['electricity-kWh']) hourlyElectricity = hourlyElectricity.join(hourlyTime, how = 'inner') print "Data length: ", len(hourlyElectricity)/24, " days" hourlyElectricity.head() # Filter the data, keep the NaN and generate two excels, with and without Nan hourlyElectricity.loc[abs(hourlyElectricity['electricity-kWh']) > 100000,'electricity-kWh'] = np.nan time = hourlyElectricity.index index = ((time > np.datetime64('2012-07-26')) & (time < np.datetime64('2012-08-18'))) \ | ((time > np.datetime64('2013-01-21')) & (time < np.datetime64('2013-03-08'))) hourlyElectricity.loc[index,'electricity-kWh'] = np.nan hourlyElectricityWithoutNaN = hourlyElectricity.dropna(axis=0, how='any') hourlyElectricity.to_excel('Data/hourlyElectricity.xlsx') hourlyElectricityWithoutNaN.to_excel('Data/hourlyElectricityWithoutNaN.xlsx') plt.figure() fig = hourlyElectricity.plot(fontsize = 15, figsize = (15, 6)) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) fig.set_axis_bgcolor('w') plt.title('All the hourly electricity data', fontsize = 16) plt.ylabel('kWh') plt.show() plt.figure() fig = hourlyElectricity.iloc[26200:27400,:].plot(marker = 'o',label='hourly electricity', fontsize = 15, figsize = (15, 6)) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) fig.set_axis_bgcolor('w') plt.title('Hourly electricity data of selected days', fontsize = 16) plt.ylabel('kWh') plt.legend() plt.show() dailyTimestamp = pd.date_range(start = '2011/7/1', end = '2014/10/31', freq = 'D') electricityReindexed = electricity.reindex(dailyTimestamp, inplace = False) # Just in case, in order to use diff method, timestamp has to be in asending order. electricityReindexed.sort_index(inplace = True) dailyEnergy = electricityReindexed.diff(periods=1)['energy'] dailyElectricity = pd.DataFrame(data = dailyEnergy.values, index = electricityReindexed.index - np.timedelta64(1,'D'), columns = ['electricity-kWh']) dailyElectricity['startDay'] = dailyElectricity.index dailyElectricity['endDay'] = dailyElectricity.index + np.timedelta64(1,'D') # Filter the data, keep the NaN and generate two excels, with and without Nan dailyElectricity.loc[abs(dailyElectricity['electricity-kWh']) > 2000000,'electricity-kWh'] = np.nan time = dailyElectricity.index index = ((time > np.datetime64('2012-07-26')) & (time < np.datetime64('2012-08-18'))) | ((time > np.datetime64('2013-01-21')) & (time < np.datetime64('2013-03-08'))) dailyElectricity.loc[index,'electricity-kWh'] = np.nan dailyElectricityWithoutNaN = dailyElectricity.dropna(axis=0, how='any') dailyElectricity.to_excel('Data/dailyElectricity.xlsx') dailyElectricityWithoutNaN.to_excel('Data/dailyElectricityWithoutNaN.xlsx') dailyElectricity.head() plt.figure() fig = dailyElectricity.plot(figsize = (15, 6)) fig.set_axis_bgcolor('w') plt.title('All the daily electricity data', fontsize = 16) plt.ylabel('kWh') plt.show() plt.figure() fig = dailyElectricity.iloc[1000:1130,:].plot(marker = 'o', figsize = (15, 6)) fig.set_axis_bgcolor('w') plt.title('Daily electricity data of selected days', fontsize = 16) plt.ylabel('kWh') plt.show() chilledWater = df[['Gund Main Energy - Ton-Days']] chilledWater.head() file = 'Data/monthly chilled water.csv' monthlyChilledWaterFromFacility = pd.read_csv(file, header=0) monthlyChilledWaterFromFacility.set_index(['month'], inplace = True) monthlyChilledWaterFromFacility.head() monthlyChilledWaterFromFacility['startDate'] = pd.to_datetime(monthlyChilledWaterFromFacility['startDate'], format="%m/%d/%y") values = monthlyChilledWaterFromFacility.index.values keys = np.array(monthlyChilledWaterFromFacility['startDate']) dates = {} for key, value in zip(keys, values): dates[key] = value sortedDates = np.sort(dates.keys()) sortedDates = sortedDates[sortedDates > np.datetime64('2011-11-01')] months = [] monthlyChilledWaterOrg = np.zeros((len(sortedDates) - 1)) for i in range(len(sortedDates) - 1): begin = sortedDates[i] end = sortedDates[i+1] months.append(dates[sortedDates[i]]) monthlyChilledWaterOrg[i] = (np.round(chilledWater.loc[end,:] - chilledWater.loc[begin,:], 1)) monthlyChilledWater = pd.DataFrame(data = monthlyChilledWaterOrg, index = months, columns = ['chilledWater-TonDays']) fig,ax = plt.subplots(1, 1,figsize=(15,8)) #ax.set_axis_bgcolor('w') #plt.plot(monthlyChilledWater, label='Our data processing from hourly data', marker = 'x', markersize = 15, linestyle = '') plt.bar(np.arange(len(monthlyChilledWater))-0.5, monthlyChilledWater.values, label='Our data processing from hourly data') plt.plot(monthlyChilledWaterFromFacility[5:-1]['chilledWater'],'or', label='Facility data') plt.xticks(np.arange(0,len(months)),months) plt.xlabel('Month',fontsize=15) plt.ylabel('kWh',fontsize=15) plt.xlim([0,len(months)]) plt.legend() ax.set_xticklabels(months, rotation=40, fontsize=13) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) plt.title('Data Validation: comparison between our data processing and facilities',fontsize=20) text = 'Match! Our processing method is valid.' ax.annotate(text, xy = (15, 2000), xytext = (5, 50), fontsize = 15, textcoords = 'offset points', ha = 'center', va = 'bottom') plt.show() hourlyTimestamp = pd.date_range(start = '2011/7/1', end = '2014/10/31', freq = 'H') chilledWater.reindex(hourlyTimestamp, inplace = True) # Just in case, in order to use diff method, timestamp has to be in asending order. chilledWater.sort_index(inplace = True) hourlyEnergy = chilledWater.diff(periods=1) hourlyChilledWater = pd.DataFrame(data = hourlyEnergy.values, index = hourlyEnergy.index, columns = ['chilledWater-TonDays']) hourlyChilledWater['startTime'] = hourlyChilledWater.index hourlyChilledWater['endTime'] = hourlyChilledWater.index + np.timedelta64(1,'h') hourlyChilledWater.loc[abs(hourlyChilledWater['chilledWater-TonDays']) > 50,'chilledWater-TonDays'] = np.nan hourlyChilledWaterWithoutNaN = hourlyChilledWater.dropna(axis=0, how='any') hourlyChilledWater.to_excel('Data/hourlyChilledWater.xlsx') hourlyChilledWaterWithoutNaN.to_excel('Data/hourlyChilledWaterWithoutNaN.xlsx') plt.figure() fig = hourlyChilledWater.plot(fontsize = 15, figsize = (15, 6)) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) fig.set_axis_bgcolor('w') plt.title('All the hourly chilled water data', fontsize = 16) plt.ylabel('Ton-Days') plt.show() hourlyChilledWater.head() dailyTimestamp = pd.date_range(start = '2011/7/1', end = '2014/10/31', freq = 'D') chilledWaterReindexed = chilledWater.reindex(dailyTimestamp, inplace = False) chilledWaterReindexed.sort_index(inplace = True) dailyEnergy = chilledWaterReindexed.diff(periods=1)['Gund Main Energy - Ton-Days'] dailyChilledWater = pd.DataFrame(data = dailyEnergy.values, index = chilledWaterReindexed.index - np.timedelta64(1,'D'), columns = ['chilledWater-TonDays']) dailyChilledWater['startDay'] = dailyChilledWater.index dailyChilledWater['endDay'] = dailyChilledWater.index + np.timedelta64(1,'D') dailyChilledWaterWithoutNaN = dailyChilledWater.dropna(axis=0, how='any') dailyChilledWater.to_excel('Data/dailyChilledWater.xlsx') dailyChilledWaterWithoutNaN.to_excel('Data/dailyChilledWaterWithoutNaN.xlsx') plt.figure() fig = dailyChilledWater.plot(fontsize = 15, figsize = (15, 6)) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) fig.set_axis_bgcolor('w') plt.title('All the daily chilled water data', fontsize = 16) plt.ylabel('Ton-Days') plt.show() dailyChilledWater.head() steam = df[['Gund Condensate FlowTotal - LBS']] steam.head() file = 'Data/monthly steam.csv' monthlySteamFromFacility = pd.read_csv(file, header=0) monthlySteamFromFacility.set_index(['month'], inplace = True) monthlySteamFromFacility.head() monthlySteamFromFacility['startDate'] = pd.to_datetime(monthlySteamFromFacility['startDate'], format="%m/%d/%Y") values = monthlySteamFromFacility.index.values keys = np.array(monthlySteamFromFacility['startDate']) dates = {} for key, value in zip(keys, values): dates[key] = value sortedDates = np.sort(dates.keys()) sortedDates = sortedDates[sortedDates > np.datetime64('2011-11-01')] months = [] monthlySteamOrg = np.zeros((len(sortedDates) - 1)) for i in range(len(sortedDates) - 1): begin = sortedDates[i] end = sortedDates[i+1] months.append(dates[sortedDates[i]]) monthlySteamOrg[i] = (np.round(steam.loc[end,:] - steam.loc[begin,:], 1)) monthlySteam = pd.DataFrame(data = monthlySteamOrg, index = months, columns = ['steam-LBS']) # 867 LBS ~= 1MMBTU steam fig,ax = plt.subplots(1, 1,figsize=(15,8)) #ax.set_axis_bgcolor('w') #plt.plot(monthlySteam/867, label='Our data processing from hourly data') plt.bar(np.arange(len(monthlySteam))-0.5, monthlySteam.values/867, label='Our data processing from hourly data') plt.plot(monthlySteamFromFacility.loc[months,'steam'],'or', label='Facility data') plt.xticks(np.arange(0,len(months)),months) plt.xlabel('Month',fontsize=15) plt.ylabel('Steam (MMBTU)',fontsize=15) plt.xlim([0,len(months)]) plt.legend() ax.set_xticklabels(months, rotation=40, fontsize=13) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) plt.title('Comparison between our data processing and facilities - Steam',fontsize=20) text = 'Match! Our processing method is valid.' ax.annotate(text, xy = (9, 1500), xytext = (5, 50), fontsize = 15, textcoords = 'offset points', ha = 'center', va = 'bottom') plt.show() hourlyTimestamp = pd.date_range(start = '2011/7/1', end = '2014/10/31', freq = 'H') steam.reindex(hourlyTimestamp, inplace = True) # Just in case, in order to use diff method, timestamp has to be in asending order. steam.sort_index(inplace = True) hourlyEnergy = steam.diff(periods=1) hourlySteam = pd.DataFrame(data = hourlyEnergy.values, index = hourlyEnergy.index, columns = ['steam-LBS']) hourlySteam['startTime'] = hourlySteam.index hourlySteam['endTime'] = hourlySteam.index + np.timedelta64(1,'h') hourlySteam.loc[abs(hourlySteam['steam-LBS']) > 100000,'steam-LBS'] = np.nan plt.figure() fig = hourlySteam.plot(fontsize = 15, figsize = (15, 6)) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 17) fig.set_axis_bgcolor('w') plt.title('All the hourly steam data', fontsize = 16) plt.ylabel('LBS') plt.show() hourlySteamWithoutNaN = hourlySteam.dropna(axis=0, how='any') hourlySteam.to_excel('Data/hourlySteam.xlsx') hourlySteamWithoutNaN.to_excel('Data/hourlySteamWithoutNaN.xlsx') hourlySteam.head() dailyTimestamp = pd.date_range(start = '2011/7/1', end = '2014/10/31', freq = 'D') steamReindexed = steam.reindex(dailyTimestamp, inplace = False) steamReindexed.sort_index(inplace = True) dailyEnergy = steamReindexed.diff(periods=1)['Gund Condensate FlowTotal - LBS'] dailySteam = pd.DataFrame(data = dailyEnergy.values, index = steamReindexed.index - np.timedelta64(1,'D'), columns = ['steam-LBS']) dailySteam['startDay'] = dailySteam.index dailySteam['endDay'] = dailySteam.index + np.timedelta64(1,'D') plt.figure() fig = dailySteam.plot(fontsize = 15, figsize = (15, 6)) plt.tick_params(which=u'major', reset=False, axis = 'y', labelsize = 15) fig.set_axis_bgcolor('w') plt.title('All the daily steam data', fontsize = 16) plt.ylabel('LBS') plt.show() dailySteamWithoutNaN = dailyChilledWater.dropna(axis=0, how='any') dailySteam.to_excel('Data/dailySteam.xlsx') dailySteamWithoutNaN.to_excel('Data/dailySteamWithoutNaN.xlsx') dailySteam.head() weather2014 = pd.read_excel('Data/weather-2014.xlsx') weather2014.head() weather2014 = weather2014.set_index('Datetime') weather2014 = weather2014.resample('H') weather2014.head() weather2012and2013 = pd.read_excel('Data/weather-2012-2013.xlsx') weather2012and2013.head() weather2012and2013['Datetime'] = pd.to_datetime(weather2012and2013['Datetime'], format='%Y-%m-%d-%H') weather2012and2013 = weather2012and2013.set_index('Datetime') weather2012and2013.head() # Combine two weather files hourlyWeather = weather2014.append(weather2012and2013) hourlyWeather.index.name = None hourlyWeather.sort_index(inplace = True) # Add more features # Convert relative humidity to specific humidity Mw=18.0160 # molecular weight of water Md=28.9660 # molecular weight of dry air R = 8.31432E3 # gas constant Rd = R/Md # specific gas constant for dry air Rv = R/Mw # specific gas constant for vapour Lv = 2.5e6 # heat release for condensation of water vapour [J kg-1] eps = Mw/Md #saturation pressure def esat(T): ''' get sateration pressure (units [Pa]) for a given air temperature (units [K])''' from numpy import log10 TK = 273.15 e1 = 101325.0 logTTK = log10(T/TK) esat = e1*10**(10.79586*(1-TK/T)-5.02808*logTTK+ 1.50474*1e-4*(1.-10**(-8.29692*(T/TK-1)))+ 0.42873*1e-3*(10**(4.76955*(1-TK/T))-1)-2.2195983) return esat def rh2sh(RH,p,T): '''purpose: conversion relative humidity (unitless) to specific humidity (humidity ratio) [kg/kg]''' es = esat(T) W = Mw/Md*RH*es/(p-RH*es) return W/(1.+W) p = hourlyWeather['pressure-mbar'] * 100 RH = hourlyWeather['RH-%'] / 100 T = hourlyWeather['T-C'] + 273.15 w = rh2sh(RH,p,T) hourlyWeather['humidityRatio-kg/kg'] = w hourlyWeather['coolingDegrees'] = hourlyWeather['T-C'] - 12 hourlyWeather.loc[hourlyWeather['coolingDegrees'] < 0, 'coolingDegrees'] = 0 hourlyWeather['heatingDegrees'] = 15 - hourlyWeather['T-C'] hourlyWeather.loc[hourlyWeather['heatingDegrees'] < 0, 'heatingDegrees'] = 0 hourlyWeather['dehumidification'] = hourlyWeather['humidityRatio-kg/kg'] - 0.00886 hourlyWeather.loc[hourlyWeather['dehumidification'] < 0, 'dehumidification'] = 0 #hourlyWeather.to_excel('Data/hourlyWeather.xlsx') hourlyWeather.head() plt.figure() fig = hourlyWeather.plot(y = 'T-C', figsize = (15, 6)) fig.set_axis_bgcolor('w') plt.title('All hourly temperture', fontsize = 16) plt.ylabel(r'Temperature ($\circ$C)') plt.show() plt.figure() fig = hourlyWeather.plot(y = 'solarRadiation-W/m2', figsize = (15, 6)) fig.set_axis_bgcolor('w') plt.title('All hourly solar radiation', fontsize = 16) plt.ylabel(r'$W/m^2$', fontsize = 13) plt.show() plt.figure() fig = hourlyWeather['2014-10'].plot(y = 'T-C', figsize = (15, 6), marker = 'o') fig.set_axis_bgcolor('w') plt.title('Selected hourly temperture',fontsize = 16) plt.ylabel(r'Temperature ($\circ$C)',fontsize = 13) plt.show() plt.figure() fig = hourlyWeather['2014-10'].plot(y = 'solarRadiation-W/m2', figsize = (15, 6), marker ='o') fig.set_axis_bgcolor('w') plt.title('Selected hourly solar radiation', fontsize = 16) plt.ylabel(r'$W/m^2$', fontsize = 13) plt.show() dailyWeather = hourlyWeather.resample('D') #dailyWeather.to_excel('Data/dailyWeather.xlsx') dailyWeather.head() plt.figure() fig = dailyWeather.plot(y = 'T-C', figsize = (15, 6), marker ='o') fig.set_axis_bgcolor('w') plt.title('All daily temperture', fontsize = 16) plt.ylabel(r'Temperature ($\circ$C)', fontsize = 13) plt.show() plt.figure() fig = dailyWeather['2014'].plot(y = 'T-C', figsize = (15, 6), marker ='o') fig.set_axis_bgcolor('w') plt.title('Selected daily temperture', fontsize = 16) plt.ylabel(r'Temperature ($\circ$C)', fontsize = 13) plt.show() plt.figure() fig = dailyWeather['2014'].plot(y = 'solarRadiation-W/m2', figsize = (15, 6), marker ='o') fig.set_axis_bgcolor('w') plt.title('Selected daily solar radiation', fontsize = 16) plt.ylabel(r'$W/m^2$', fontsize = 14) plt.show() holidays = pd.read_excel('Data/holidays.xlsx') holidays.head() hourlyTimestamp = pd.date_range(start = '2011/7/1', end = '2014/10/31', freq = 'H') occupancy = np.ones(len(hourlyTimestamp)) hourlyOccupancy = pd.DataFrame(data = occupancy, index = hourlyTimestamp, columns = ['occupancy']) Saturdays = hourlyOccupancy.index.weekday == 5 Sundays = hourlyOccupancy.index.weekday == 6 hourlyOccupancy.loc[Saturdays, 'occupancy'] = 0.5 hourlyOccupancy.loc[Sundays, 'occupancy'] = 0.5 for i in range(len(holidays)): timestamp = pd.date_range(start = holidays.loc[i, 'startDate'], end = holidays.loc[i, 'endDate'], freq = 'H') hourlyOccupancy.loc[timestamp, 'occupancy'] = holidays.loc[i, 'value'] #hourlyHolidays['Datetime'] = pd.to_datetime(hourlyHolidays['Datetime'], format="%Y-%m-%d %H:%M:%S") hourlyOccupancy['cosHour'] = np.cos((hourlyOccupancy.index.hour - 3) * 2 * np.pi / 24) dailyOccupancy = hourlyOccupancy.resample('D') dailyOccupancy.drop('cosHour', axis = 1, inplace = True) hourlyElectricityWithFeatures = hourlyElectricity.join(hourlyWeather, how = 'inner') hourlyElectricityWithFeatures = hourlyElectricityWithFeatures.join(hourlyOccupancy, how = 'inner') hourlyElectricityWithFeatures.dropna(axis=0, how='any', inplace = True) hourlyElectricityWithFeatures.to_excel('Data/hourlyElectricityWithFeatures.xlsx') hourlyElectricityWithFeatures.head() hourlyChilledWaterWithFeatures = hourlyChilledWater.join(hourlyWeather, how = 'inner') hourlyChilledWaterWithFeatures = hourlyChilledWaterWithFeatures.join(hourlyOccupancy, how = 'inner') hourlyChilledWaterWithFeatures.dropna(axis=0, how='any', inplace = True) hourlyChilledWaterWithFeatures.to_excel('Data/hourlyChilledWaterWithFeatures.xlsx') hourlySteamWithFeatures = hourlySteam.join(hourlyWeather, how = 'inner') hourlySteamWithFeatures = hourlySteamWithFeatures.join(hourlyOccupancy, how = 'inner') hourlySteamWithFeatures.dropna(axis=0, how='any', inplace = True) hourlySteamWithFeatures.to_excel('Data/hourlySteamWithFeatures.xlsx') dailyElectricityWithFeatures = dailyElectricity.join(dailyWeather, how = 'inner') dailyElectricityWithFeatures = dailyElectricityWithFeatures.join(dailyOccupancy, how = 'inner') dailyElectricityWithFeatures.dropna(axis=0, how='any', inplace = True) dailyElectricityWithFeatures.to_excel('Data/dailyElectricityWithFeatures.xlsx') dailyChilledWaterWithFeatures = dailyChilledWater.join(dailyWeather, how = 'inner') dailyChilledWaterWithFeatures = dailyChilledWaterWithFeatures.join(dailyOccupancy, how = 'inner') dailyChilledWaterWithFeatures.dropna(axis=0, how='any', inplace = True) dailyChilledWaterWithFeatures.to_excel('Data/dailyChilledWaterWithFeatures.xlsx') dailySteamWithFeatures = dailySteam.join(dailyWeather, how = 'inner') dailySteamWithFeatures = dailySteamWithFeatures.join(dailyOccupancy, how = 'inner') dailySteamWithFeatures.dropna(axis=0, how='any', inplace = True) dailySteamWithFeatures.to_excel('Data/dailySteamWithFeatures.xlsx')