PG&E Gas and Electric Usage Summaries

Download your historical PG&E monthly data in CSV format and save it as files named pge_electric.csv and pge_gas.csv. You can then use this notebook to easily get a summary of your data.

See the accompanying README.md file for detailed usage instructions.

In [ ]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from IPython.display import Markdown

# Nice utility to print Markdown anywhere
md = lambda t: display(Markdown(t))

plt.plot([]), plt.close('all') # IPython bug https://github.com/ipython/ipython/issues/10383
plt.rcParams['figure.figsize'] = (7, 4)
plt.rcParams['figure.dpi'] = 100
pd.set_option('display.precision', 2)

Change these filenames according to yours if you prefer not to rename them to this convention:

In [ ]:
elec_fname = 'pge_electric.csv'
gas_fname = 'pge_gas.csv'

Let's read the customer data header from one of them:

In [ ]:
customer = {}
for line in open(elec_fname).readlines()[:4]:
    field, val = line.split(',', 1)
    field = ''.join(ch for ch in field if ch.isalnum())
    customer[field.strip()] = val.strip()

customer

Now load up the CSV files and take a look at the data

In [ ]:
def read_pge_csv(fname):
    """Read standard PG&E CSV files and clean them up a bit.
    
    Converts fields into proper date-time objects and numbers.
    """
    # Skip header with address/etc.
    df = pd.read_csv(fname, skiprows=4)
    # Make column names in TitleCase, by default they are UPPER
    df.columns = [x.title() for x in df.columns]
    # Cost column is strings with $ prepended
    df['Cost'] = df['Cost'].str.lstrip('$').astype(float)
    # Type column has needless 'billing' word
    df['Type'] = df['Type'].str.replace('billing', '').str.strip()
    # Make dates proper datetime objects
    for dates in ['Start Date', 'End Date']:
        df[dates] = pd.to_datetime(df[dates])
    # Notes column is empty, drop it
    return df.drop('Notes', axis=1)
In [ ]:
elec = read_pge_csv(elec_fname)
elec
In [ ]:
gas = read_pge_csv(gas_fname)
gas

If all looks good, then we can now make various useful plots, and below combine them into a full report:

In [ ]:
def usage_plot(df):
    """Make a usage plot from a PG&E clean dataframe
    """
    fig, ax1 = plt.subplots()
    
    col = 'Usage'
    ax1.plot('Start Date', col, '-o', label=col, data=df)
    ax1.set_xlabel('Date')
    ax1.set_ylabel(f'Monthly Usage [{df.Units.iloc[0]}]')
    ax1.grid()

    ax2 = ax1.twinx()
    col = 'Cost'
    ax2.plot('Start Date', col, '-^', color='orange', label=col, data=df)
    ax2.set_ylabel('Monthly Cost [$]');

    # ask matplotlib for the plotted objects and their labels
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc=0)
    
    kind = df.Type.iloc[0]
    ax2.set_title(f"Monthly Usage & Cost - {kind}")
    
def cost_plot(elec, gas):
    """Make a combined cost-only plot for gas+electric data
    """
    cost = elec.Cost + gas.Cost
    mean = cost.mean()

    fig, ax = plt.subplots()
    ax.axhline(mean, label=f'Mean (${mean:.2f})')
    ax.plot(elec['Start Date'], cost, '-^', color='orange', label='Cost')
    ax.set_xlabel('Date')
    ax.set_ylabel('Monthly Cost [$]')
    ax.set_title('Monthly Total Energy Cost (G&E)')
    ax.legend()
    ax.grid()
        
usage_plot(elec)
usage_plot(gas)
cost_plot(elec, gas)
In [ ]:
def annual_summ(df):
    """Make an average year from the entire dataset for a PG&E DF.
    
    Returns a DataFrame with the same structure but only 12 (named)
    months, so it can be used interchangeably with other utilities.
    """
    df_annual = df.groupby(elec['Start Date'].dt.month).mean().reset_index()
    for col in ['Type', 'Units']:
        df_annual[col] = df[col][:len(df_annual)]

    df_annual = df_annual[df.drop('End Date', axis=1).columns]
    df_annual['Start Date'] = pd.to_datetime(df_annual['Start Date'], format='%m').\
                                dt.month_name().str.slice(stop=3)

    return df_annual

elec_annual = annual_summ(elec)
gas_annual = annual_summ(gas)

elec_annual
In [ ]:
usage_plot(elec_annual)
usage_plot(gas_annual)
cost_plot(elec_annual, gas_annual)
In [ ]:
def hist(df, field, ax):
    """Customized histogram that highlights the mean.
    """
    sns.histplot(data=df, x=field, ax=ax)
    mean = df[field].mean()
    ax.axvline(mean, color='orange', label=f'mean ({mean:.1f})')
    ax.legend()
    

def usage_hist(elec, gas):
    """Show usage histograms for electric/gas DataFrames
    """
    fig, ax = plt.subplots(2, 2, figsize=(7, 5))

    start = elec['Start Date'].min()
    end = elec['Start Date'].max()
    fig.suptitle(f'Usage period: {start.date()} to {end.date()}')

    hist(elec, 'Usage', ax[0,0])
    ax[0,0].set_title(f'{elec.Type.iloc[0]} [{elec.Units.iloc[0]}]')
    hist(elec, 'Cost', ax[1,0])

    hist(gas, 'Usage', ax[0,1])
    ax[0,1].set_title(f'{gas.Type.iloc[0]} [{gas.Units.iloc[0]}]')
    hist(gas, 'Cost', ax[1,1])

    plt.tight_layout()
    
usage_hist(elec, gas)
In [ ]:
def summarize(df):
    """Summarize usage data.
    
    Slightly customized describe() call that drops quartiles
    and makes clearer column names.
    """
    out = df.describe().drop(['25%', '75%'])
    out = out.rename({'50%': 'median'})
    unit = df['Units'].iloc[0]
    name = df['Type'].iloc[0]
    out = out.rename({'Usage': f'Usage [{unit}]',
                      'Cost': f'{name} Cost [$]'}, axis=1)
    return out

summarize(elec)
In [ ]:
def monthly_report(elec, gas):
    """Print a summary report for monthly data.
    """   
    start = elec['Start Date'].min()
    end = elec['Start Date'].max()

    md('## Report for monthly data')
    md(f'**Report Period:** _{start.date()}_ to _{end.date()}_, '
       f'{elec.Type.count()} months total.')
    md('### Data summaries')
    gas_annual_c = gas.Cost.mean() * 12
    gas_annual_u = gas.Usage.mean() * 12
    elec_annual_c = elec.Cost.mean() * 12
    elec_annual_u = elec.Usage.mean() * 12
    md("**Annual averages:**")
    md(f"_Electric:_ ${elec_annual_c:.2f} (for {elec_annual_u:.0f} kWh).")
    md(f"_Gas:_ ${gas_annual_c:.2f} (for {gas_annual_u:.0f} therm).")
    
    display(pd.concat([summarize(elec), summarize(gas)], axis=1))
    md('### Histograms')
    usage_hist(elec, gas)
    plt.show()
    md('### Detailed usage/cost plots')    
    usage_plot(elec)
    usage_plot(gas)
    cost_plot(elec, gas)
    plt.show()

    
def report(elec, gas, elec_annual, gas_annual, cust):
    """Print a clean summary report based on all PG&E DataFrames.
    """
    md(f'# PG&E Usage Report')
    md(f'**`Name   `** {cust["Name"].title()}')
    md(f'**`Address`** {cust["Address"]}')
    md(f'**`Account`** {cust["AccountNumber"]}')

    monthly_report(elec, gas)

    md('## Annual Averages')
    md('### Data summaries')
    display(pd.concat([summarize(elec_annual), summarize(gas_annual)], axis=1))    
    md('# Monthly data averaged over all years')    
    usage_plot(elec_annual)
    usage_plot(gas_annual)
    cost_plot(elec_annual, gas_annual)
    
report(elec, gas, elec_annual, gas_annual, customer)

Let's run just the monthly data report over the last two years, to get a better estimate of our current usage for future planning purposes:

In [ ]:
monthly_report(elec[-24:], gas[-24:])
In [ ]: