This notebook outlines the data-crunching I did to take the 2013 grid reliability from the EIA for this Inside Energy story. (This is my first time using IPython Notebook and pandas, and I'm a newbie to programming in general, so please forgive my ugly code!)
If you have any questions about the methods or the data, email me at jordanwb@insideenergy.org.
A few notes:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('svg')
import matplotlib.pylab as plt
from IPython.display import Image, SVG
# Import original data file for 2013 grid reliability data [downloaded from EIA](http://www.eia.gov/electricity/data/eia861/)
df1 = pd.io.excel.read_excel('../DataOriginals/f8612013/Reliability_2013.xls', 'RELIABILITY_States', skiprows=1, na_values=".")
#Select non-null values for SAIDI With MED (from both IEEE and non-IEEE utilities)
df_SAIDI_wm_1 = df1[np.isfinite(df1['SAIDI With MED'])]
df_SAIDI_wm_2 = df1[np.isfinite(df1['SAIDI With MED.1'])]
#Create a single data frame
df_SAIDI_With_Med = df_SAIDI_wm_1.append(df_SAIDI_wm_2)
#Change null values to 0 and turn SAIDI With MED, Customers into a single column
df_SAIDI_With_Med = df_SAIDI_With_Med.fillna(0)
df_SAIDI_With_Med['SAIDI_WM'] = df_SAIDI_With_Med['SAIDI With MED'] + df_SAIDI_With_Med['SAIDI With MED.1']
df_SAIDI_With_Med['Customers'] = df_SAIDI_With_Med['Number of Customers'] + df_SAIDI_With_Med['Number of Customers.1']
#Create a dataframe that only has the columns we need
SAIDI_With_Med = df_SAIDI_With_Med[['Utility Name', 'State', 'SAIDI_WM', 'Customers']]
#Select non-null values for SAIDI Without MED (from both IEEE and non-IEEE utilities)
df_SAIDI_wom_1 = df1[np.isfinite(df1['SAIDI Without MED'])]
df_SAIDI_wom_2 = df1[np.isfinite(df1['SAIDI Without MED.1'])]
#Create a single data frame
df_SAIDI_Without_Med = df_SAIDI_wom_1.append(df_SAIDI_wom_2)
#Change null values to 0 and turn SAIDI With MED, Customers into a single column
df_SAIDI_Without_Med = df_SAIDI_Without_Med.fillna(0)
df_SAIDI_Without_Med['SAIDI_WOM'] = df_SAIDI_Without_Med['SAIDI Without MED'] + df_SAIDI_Without_Med['SAIDI Without MED.1']
df_SAIDI_Without_Med['Customers'] = df_SAIDI_Without_Med['Number of Customers'] + df_SAIDI_Without_Med['Number of Customers.1']
#Create a dataframe that only has the columns we need
SAIDI_Without_Med = df_SAIDI_Without_Med[['Utility Name', 'State', 'SAIDI_WOM', 'Customers']]
#Export as utility-level SAIDI data as Excel files
SAIDI_Without_Med.to_excel("SAIDI_Without_Med.xlsx")
SAIDI_With_Med.to_excel("SAIDI_With_Med.xlsx")
#Calculated total interruption time so can compute summary SAIDI values
SAIDI_With_Med['TotalInterruption'] = df_SAIDI_With_Med['SAIDI_WM'] * df_SAIDI_With_Med['Customers']
SAIDI_Without_Med['TotalInterruption'] = df_SAIDI_Without_Med['SAIDI_WOM'] * df_SAIDI_Without_Med['Customers']
TotalInterruption_With_Med = SAIDI_With_Med['TotalInterruption'].sum
TotalInterruption_Without_Med = SAIDI_Without_Med['TotalInterruption'].sum
#This was some code to add a numeric index that I could use to make a plot -- don't need it anymore!
#State_Codes = df2['State'].unique()
#State_Codes.sort()
#s = pd.Series(range(0,52), index=State_Codes)
#Aggregate SAIDI with and without major event days by state
grouped_SAIDI_With_Med = SAIDI_With_Med.groupby('State').aggregate(np.sum)
grouped_SAIDI_With_Med['StateSAIDIWithMED'] = grouped_SAIDI_With_Med['TotalInterruption'] / grouped_SAIDI_With_Med['Customers']
grouped_SAIDI_Without_Med = SAIDI_Without_Med.groupby('State').aggregate(np.sum)
grouped_SAIDI_Without_Med['StateSAIDIWithoutMED'] = grouped_SAIDI_Without_Med['TotalInterruption'] / grouped_SAIDI_Without_Med['Customers']
#Export data frames as Excel files (note -- need Excel files to do visualization in Tableau)
SAIDI_With_Med.to_excel("utils_2013_SAIDI_with_med.xlsx")
grouped_SAIDI_With_Med.to_excel("states_2013_SAIDI_with_med.xlsx")
SAIDI_Without_Med.to_excel("utils_2013_SAIDI_without_med.xlsx")
grouped_SAIDI_Without_Med.to_excel("states_2013_SAIDI_without_med.xlsx")
#Export data frames as CSV files
SAIDI_With_Med.to_csv("utils_2013_SAIDI_with_med.csv")
grouped_SAIDI_With_Med.to_csv("states_2013_SAIDI_with_med.csv")
SAIDI_Without_Med.to_csv("utils_2013_SAIDI_without_med.csv")
grouped_SAIDI_Without_Med.to_csv("states_2013_SAIDI_without_med.csv")
#Playing with plotting a little bit
bp = SAIDI_With_Med.boxplot(column='SAIDI_WM', by='State', grid=False)
#Plot state values -- TODO add state ID back in!
#plt.plot(s[df2['State']], df2['SAIDI'], 'r.', alpha=0.2)
#plt.plot(s[grouped.index], grouped['StateSAIDIWithMED'], 'b.')
#plt.yscale('log')
#plt.savefig("test.svg")
#SVG(filename="test.svg")