### Globals and Constants import csv from collections import defaultdict # Column indicies for EIA data UTILITY = 1 STATE = 3 COUNTY = 4 # Column indicies for EIA retail data UTILITY_ID = 1 CONSUMPTION = 21 SHORT_CONSUMPTION = 5 # Column indicies for Census data ID = 1 DESC = 2 POP2012 = 7 # Constants YEAR = 2012 PATH_TO_SERVICE_DATA = "data/f8612012/service_territory_%s.csv" % YEAR # Path to the defined utility service territories PATH_TO_RETAIL_DATA = "data/f8612012/retail_sales_%s.csv" % YEAR # Path to the defined utility service territories PATH_TO_SHORTRETAIL_DATA = "data/f8612012/short_form_%s_Changed.csv" % YEAR # Path to the defined utility service territories PATH_TO_POPULATION_DATA = "data/PEP_2013_PEPANNRES/PEP_2013_PEPANNRES_with_ann_with_changes.csv" STATES = { 'AK': 'Alaska','AL': 'Alabama','AR': 'Arkansas','AS': 'American Samoa','AZ': 'Arizona','CA': 'California','CO': 'Colorado','CT': 'Connecticut','DC': 'District of Columbia','DE': 'Delaware','FL': 'Florida','GA': 'Georgia','GU': 'Guam','HI': 'Hawaii','IA': 'Iowa','ID': 'Idaho','IL': 'Illinois','IN': 'Indiana','KS': 'Kansas','KY': 'Kentucky','LA': 'Louisiana','MA': 'Massachusetts','MD': 'Maryland','ME': 'Maine','MI': 'Michigan','MN': 'Minnesota','MO': 'Missouri','MP': 'Northern Mariana Islands','MS': 'Mississippi','MT': 'Montana','NA': 'National','NC': 'North Carolina','ND': 'North Dakota','NE': 'Nebraska','NH': 'New Hampshire','NJ': 'New Jersey','NM': 'New Mexico','NV': 'Nevada','NY': 'New York','OH': 'Ohio','OK': 'Oklahoma','OR': 'Oregon','PA': 'Pennsylvania','PR': 'Puerto Rico','RI': 'Rhode Island','SC': 'South Carolina','SD': 'South Dakota','TN': 'Tennessee','TX': 'Texas','UT': 'Utah','VA': 'Virginia','VI': 'Virgin Islands','VT': 'Vermont','WA': 'Washington','WI': 'Wisconsin','WV': 'West Virginia','WY': 'Wyoming'} # Global variables countyToUtility = {} # Mapping from county number to a list of utilities serving it utilityToCounty = {} # Mapping from utility id to a list of counties it serves countyToPopulation = {} # Mapping from county number to a population from census data nameToID = {} # Mapping from county name to the county code utilityToConsumption = dict() # Mapping the utility id to the total consumption in mWh utilityToPopulation = dict() # Mapping the utility id to the total county population it serves countyToConsumption = dict() # Mapping the final result of county ID to consumption in mWh def loadCensusData(): ''' Loads the mapping from county number to population into 'countyToUPopulation' and a name to ID mapping from the PATH_TO_POPULATION_DATA file. ''' f = open(PATH_TO_POPULATION_DATA) reader = csv.reader(f) reader.next() reader.next() for row in reader: # Grab the important parts of the data id = int(row[ID]) desc = row[DESC] pop2012 = row[POP2012] # Derive the 'county key' from the description column (county, state) = desc.split(',') county = county.lower().replace("county", "").replace(".", "").replace(" ", "") state = state.lower().replace(' ', '') key = state + '_' + county # correction to Lousiana county names if state == "louisiana": key = key.replace("parish", "") # Setup the two mappings nameToID[key] = id countyToPopulation[id] = float(pop2012) def loadCountytoUtilityData(): ''' Loads the mapping from county number to utilities into 'countyToUtility' from the PATH_TO_SERVICE_DATA file. ''' f = open(PATH_TO_SERVICE_DATA) reader = csv.reader(f) reader.next() for row in reader: state = STATES[ row[STATE].upper() ].lower().replace(' ', '') county = row[COUNTY].lower().replace(' ', '').replace('.', '') key = state + '_' + county utilityID = int(row[UTILITY]) try: if nameToID[key] in countyToUtility: countyToUtility[nameToID[key]].add(utilityID) else: #if key not in nameToID: # print "key %s not found" % key countyToUtility[nameToID[key]] = set([utilityID]) if utilityID in utilityToCounty: utilityToCounty[utilityID].add(nameToID[key]) else: utilityToCounty[utilityID] = set([nameToID[key]]) except Exception as e: pass #print "Exception: %s" % key def loadUtilityConsumptionData(): ''' Load the retail data as mapping from utility ID to total level of consumption in mWh. ''' # Open file as CSV and skip three header lines f = open(PATH_TO_RETAIL_DATA) reader = csv.reader(f) reader.next() reader.next() reader.next() # For each row in the reader ... for row in reader: # Read in the ID and consumption total level id = int(row[UTILITY_ID]) consumption = float(row[CONSUMPTION].replace(',', '')) # Update global mapping utilityToConsumption[id] = consumption f = open(PATH_TO_SHORTRETAIL_DATA) reader = csv.reader(f) reader.next() # For each row in the reader ... for row in reader: # Read in the ID and consumption total level id = int(row[UTILITY_ID]) consumption = float(row[SHORT_CONSUMPTION].replace(',', '')) # Update global mapping if id in utilityToConsumption: print "Whoa, we have a duplicate utility id: %d" % id utilityToConsumption[id] = consumption def deriveUtilityPopulation(): ''' Derive the total number of people in all counties served by a utility. ''' # Loop through all utility companies and the list of counties served by them for utility, counties in utilityToCounty.items(): # Calculate total population for all counties totPopulation = 0 for county in counties: totPopulation += countyToPopulation[county] # Save total population in mapping utilityToPopulation[utility] = float(totPopulation) def calculateCountyConsumption(): ''' Apply our methodology and generate a mapping of county to total energy consumption in mWh. ''' countiesWithErrors = 0 for county, utilities in countyToUtility.items(): # Since we are using defaultdicts, if we try to access data on a utility we do not have, # it'll be counted as 0. See 'Issues' section above. #countyToConsumption[county] = sum([((countyToPopulation[county] / utilityToPopulation[utility]) * # utilityToConsumption[utility]) for utility in utilities]) countySum = 0 errorCount = 0 for utility in utilities: try: countySum += ((countyToPopulation[county] / utilityToPopulation[utility]) * utilityToConsumption[utility]) except Exception as e: errorCount+=1 #print "Error for county %d and utility %d" % (county, utility) if errorCount != 0: countiesWithErrors+=1 #print "Number of errors: %d / %d" % (errorCount, len(utilities)) countyToConsumption[county] = countySum #print "Counties with errors: %d / %d" % (countiesWithErrors, len(countyToUtility.keys())) # Start by loading the census data and the service territory mapping loadCensusData() loadCountytoUtilityData() loadUtilityConsumptionData() deriveUtilityPopulation() calculateCountyConsumption() # Print random sample of county name to list of utility IDs for county, utilityList in countyToUtility.items()[0:10]: print "%s is served by utility IDs %s" % (county, list(utilityList)) # Print random sample of county name to list of utility IDs for name, countyID in nameToID.items()[0:10]: print "%s (%s) had a population of %s" % (name, countyID, countyToPopulation[countyID]) # Print random sample of utility ID to consumption data for utilityID, consumption in utilityToConsumption.items()[0:10]: if consumption != 0: print "Utility ID %s in 2012 had %s mWhs consumed" % (utilityID, consumption) from IPython.display import HTML HTML('') #HTML('') from IPython.display import HTML HTML('') #HTML('')