### Globals and Constants

import csv
from collections import defaultdict

# Column indicies for EIA data
UTILITY = 1
STATE = 3
COUNTY = 4

# Column indicies for EIA retail data
UTILITY_ID = 1
CONSUMPTION = 21
SHORT_CONSUMPTION = 5

# Column indicies for Census data
ID = 1
DESC = 2
POP2012 = 7

# Constants
YEAR = 2012
PATH_TO_SERVICE_DATA = "data/f8612012/service_territory_%s.csv" % YEAR  # Path to the defined utility service territories
PATH_TO_RETAIL_DATA = "data/f8612012/retail_sales_%s.csv" % YEAR  # Path to the defined utility service territories
PATH_TO_SHORTRETAIL_DATA = "data/f8612012/short_form_%s_Changed.csv" % YEAR  # Path to the defined utility service territories
PATH_TO_POPULATION_DATA = "data/PEP_2013_PEPANNRES/PEP_2013_PEPANNRES_with_ann_with_changes.csv"
STATES = { 'AK': 'Alaska','AL': 'Alabama','AR': 'Arkansas','AS': 'American Samoa','AZ': 'Arizona','CA': 'California','CO': 'Colorado','CT': 'Connecticut','DC': 'District of Columbia','DE': 'Delaware','FL': 'Florida','GA': 'Georgia','GU': 'Guam','HI': 'Hawaii','IA': 'Iowa','ID': 'Idaho','IL': 'Illinois','IN': 'Indiana','KS': 'Kansas','KY': 'Kentucky','LA': 'Louisiana','MA': 'Massachusetts','MD': 'Maryland','ME': 'Maine','MI': 'Michigan','MN': 'Minnesota','MO': 'Missouri','MP': 'Northern Mariana Islands','MS': 'Mississippi','MT': 'Montana','NA': 'National','NC': 'North Carolina','ND': 'North Dakota','NE': 'Nebraska','NH': 'New Hampshire','NJ': 'New Jersey','NM': 'New Mexico','NV': 'Nevada','NY': 'New York','OH': 'Ohio','OK': 'Oklahoma','OR': 'Oregon','PA': 'Pennsylvania','PR': 'Puerto Rico','RI': 'Rhode Island','SC': 'South Carolina','SD': 'South Dakota','TN': 'Tennessee','TX': 'Texas','UT': 'Utah','VA': 'Virginia','VI': 'Virgin Islands','VT': 'Vermont','WA': 'Washington','WI': 'Wisconsin','WV': 'West Virginia','WY': 'Wyoming'}


# Global variables
countyToUtility = {}        # Mapping from county number to a list of utilities serving it
utilityToCounty = {}        # Mapping from utility id to a list of counties it serves
countyToPopulation = {}     # Mapping from county number to a population from census data
nameToID = {}               # Mapping from county name to the county code
utilityToConsumption = dict()   # Mapping the utility id to the total consumption in mWh
utilityToPopulation = dict()    # Mapping the utility id to the total county population it serves
countyToConsumption = dict()    # Mapping the final result of county ID to consumption in mWh

def loadCensusData():
    ''' Loads the mapping from county number to population into 
    'countyToUPopulation' and a name to ID mapping from the PATH_TO_POPULATION_DATA file. '''
    f = open(PATH_TO_POPULATION_DATA)
    reader = csv.reader(f)
    reader.next()
    reader.next()
    for row in reader:
        # Grab the important parts of the data
        id = int(row[ID])
        desc = row[DESC]
        pop2012 = row[POP2012]

        # Derive the 'county key' from the description column
        (county, state) = desc.split(',')
        county = county.lower().replace("county", "").replace(".", "").replace(" ", "")
        state = state.lower().replace(' ', '')
        key = state + '_' + county

        # correction to Lousiana county names
        if state == "louisiana":
            key = key.replace("parish", "")

        # Setup the two mappings
        nameToID[key] = id
        countyToPopulation[id] = float(pop2012)


def loadCountytoUtilityData():
    ''' Loads the mapping from county number to utilities into 
    'countyToUtility' from the PATH_TO_SERVICE_DATA file. '''
    f = open(PATH_TO_SERVICE_DATA)
    reader = csv.reader(f)
    reader.next()
    for row in reader:
        state = STATES[ row[STATE].upper() ].lower().replace(' ', '')
        county = row[COUNTY].lower().replace(' ', '').replace('.', '')
        key = state + '_' + county
        utilityID = int(row[UTILITY])

        try:
            if nameToID[key] in countyToUtility:
                countyToUtility[nameToID[key]].add(utilityID)
            else:
                #if key not in nameToID:
                #    print "key %s not found" % key
                countyToUtility[nameToID[key]] = set([utilityID])

            if utilityID in utilityToCounty:
                utilityToCounty[utilityID].add(nameToID[key])
            else:
                utilityToCounty[utilityID] = set([nameToID[key]])
        except Exception as e:
            pass
            #print "Exception: %s" % key


def loadUtilityConsumptionData():
    ''' Load the retail data as  mapping from utility ID 
    to total level of consumption in mWh. '''
    # Open file as CSV and skip three header lines
    f = open(PATH_TO_RETAIL_DATA)
    reader = csv.reader(f)
    reader.next()
    reader.next()
    reader.next()

    # For each row in the reader ...
    for row in reader:
        # Read in the ID and consumption total level
        id = int(row[UTILITY_ID])
        consumption = float(row[CONSUMPTION].replace(',', ''))

        # Update global mapping
        utilityToConsumption[id] = consumption


    f = open(PATH_TO_SHORTRETAIL_DATA)
    reader = csv.reader(f)
    reader.next()

    # For each row in the reader ...
    for row in reader:
        # Read in the ID and consumption total level
        id = int(row[UTILITY_ID])
        consumption = float(row[SHORT_CONSUMPTION].replace(',', ''))

        # Update global mapping
        if id in utilityToConsumption:
            print "Whoa, we have a duplicate utility id: %d" % id

        utilityToConsumption[id] = consumption


def deriveUtilityPopulation():
    ''' Derive the total number of people in all counties served by a utility. '''

    # Loop through all utility companies and the list of counties served by them
    for utility, counties in utilityToCounty.items():
        # Calculate total population for all counties
        totPopulation = 0
        for county in counties:
            totPopulation += countyToPopulation[county]

        # Save total population in mapping
        utilityToPopulation[utility] = float(totPopulation)


def calculateCountyConsumption():
    ''' Apply our methodology and generate a mapping of county to total energy consumption in mWh. '''
    countiesWithErrors = 0
    for county, utilities in countyToUtility.items():
        # Since we are using defaultdicts, if we try to access data on a utility we do not have, 
        # it'll be counted as 0. See 'Issues' section above.
        #countyToConsumption[county] = sum([((countyToPopulation[county] / utilityToPopulation[utility]) * 
        #                                   utilityToConsumption[utility]) for utility in utilities])
        countySum = 0
        errorCount = 0

        for utility in utilities:
            try:
                countySum += ((countyToPopulation[county] / utilityToPopulation[utility]) * utilityToConsumption[utility])
            except Exception as e:
                errorCount+=1

                #print "Error for county %d and utility %d" % (county, utility)

        if errorCount != 0:
            countiesWithErrors+=1
            #print "Number of errors: %d / %d" % (errorCount, len(utilities))

        countyToConsumption[county] = countySum

    #print "Counties with errors: %d / %d" % (countiesWithErrors, len(countyToUtility.keys()))


# Start by loading the census data and the service territory mapping
loadCensusData()
loadCountytoUtilityData()
loadUtilityConsumptionData()
deriveUtilityPopulation()
calculateCountyConsumption()

# Print random sample of county name to list of utility IDs
for county, utilityList in countyToUtility.items()[0:10]:
  print "%s is served by utility IDs %s" % (county, list(utilityList))

# Print random sample of county name to list of utility IDs
for name, countyID in nameToID.items()[0:10]:
  print "%s (%s) had a population of %s" % (name, countyID, countyToPopulation[countyID]) 

# Print random sample of utility ID to consumption data
for utilityID, consumption in utilityToConsumption.items()[0:10]:
    if consumption != 0:
        print "Utility ID %s in 2012 had %s mWhs consumed" % (utilityID, consumption) 

from IPython.display import HTML
HTML('<iframe src=http://jquacinella.github.io/IS608_Project/map.html width=100% height=750></iframe>')
#HTML('<iframe src="file:///home/james/Code/Masters/IS608/Project/map.html" width=100% height=1000></iframe>')

from IPython.display import HTML
HTML('<iframe src=http://jquacinella.github.io/IS608_Project/map_norm.html width=100% height=750></iframe>')
#HTML('<iframe src="file:///home/james/Code/Masters/IS608/Project/map.html" width=100% height=1000></iframe>')