PG&E Gas and Electric Usage Summaries

Download your historical PG&E monthly data in CSV format and save it as files named pge_electric.csv and pge_gas.csv. You can then use this notebook to easily get a summary of your data.

See the accompanying file for detailed usage instructions.

In [ ]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from IPython.display import Markdown

# Nice utility to print Markdown anywhere
md = lambda t: display(Markdown(t))

plt.plot([]), plt.close('all') # IPython bug
plt.rcParams['figure.figsize'] = (7, 4)
plt.rcParams['figure.dpi'] = 100
pd.set_option('display.precision', 2)

Change these filenames according to yours if you prefer not to rename them to this convention:

In [ ]:
elec_fname = 'pge_electric.csv'
gas_fname = 'pge_gas.csv'

Let's read the customer data header from one of them:

In [ ]:
customer = {}
for line in open(elec_fname).readlines()[:4]:
    field, val = line.split(',', 1)
    field = ''.join(ch for ch in field if ch.isalnum())
    customer[field.strip()] = val.strip()


Now load up the CSV files and take a look at the data

In [ ]:
def read_pge_csv(fname):
    """Read standard PG&E CSV files and clean them up a bit.
    Converts fields into proper date-time objects and numbers.
    # Skip header with address/etc.
    df = pd.read_csv(fname, skiprows=4)
    # Make column names in TitleCase, by default they are UPPER
    df.columns = [x.title() for x in df.columns]
    # Cost column is strings with $ prepended
    df['Cost'] = df['Cost'].str.lstrip('$').astype(float)
    # Type column has needless 'billing' word
    df['Type'] = df['Type'].str.replace('billing', '').str.strip()
    # Make dates proper datetime objects
    for dates in ['Start Date', 'End Date']:
        df[dates] = pd.to_datetime(df[dates])
    # Notes column is empty, drop it
    return df.drop('Notes', axis=1)
In [ ]:
elec = read_pge_csv(elec_fname)
In [ ]:
gas = read_pge_csv(gas_fname)

If all looks good, then we can now make various useful plots, and below combine them into a full report:

In [ ]:
def usage_plot(df):
    """Make a usage plot from a PG&E clean dataframe
    fig, ax1 = plt.subplots()
    col = 'Usage'
    ax1.plot('Start Date', col, '-o', label=col, data=df)
    ax1.set_ylabel(f'Monthly Usage [{df.Units.iloc[0]}]')

    ax2 = ax1.twinx()
    col = 'Cost'
    ax2.plot('Start Date', col, '-^', color='orange', label=col, data=df)
    ax2.set_ylabel('Monthly Cost [$]');

    # ask matplotlib for the plotted objects and their labels
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc=0)
    kind = df.Type.iloc[0]
    ax2.set_title(f"Monthly Usage & Cost - {kind}")
def cost_plot(elec, gas):
    """Make a combined cost-only plot for gas+electric data
    cost = elec.Cost + gas.Cost
    mean = cost.mean()

    fig, ax = plt.subplots()
    ax.axhline(mean, label=f'Mean (${mean:.2f})')
    ax.plot(elec['Start Date'], cost, '-^', color='orange', label='Cost')
    ax.set_ylabel('Monthly Cost [$]')
    ax.set_title('Monthly Total Energy Cost (G&E)')
cost_plot(elec, gas)
In [ ]:
def annual_summ(df):
    """Make an average year from the entire dataset for a PG&E DF.
    Returns a DataFrame with the same structure but only 12 (named)
    months, so it can be used interchangeably with other utilities.
    df_annual = df.groupby(elec['Start Date'].dt.month).mean().reset_index()
    for col in ['Type', 'Units']:
        df_annual[col] = df[col][:len(df_annual)]

    df_annual = df_annual[df.drop('End Date', axis=1).columns]
    df_annual['Start Date'] = pd.to_datetime(df_annual['Start Date'], format='%m').\

    return df_annual

elec_annual = annual_summ(elec)
gas_annual = annual_summ(gas)

In [ ]:
cost_plot(elec_annual, gas_annual)
In [ ]:
def hist(df, field, ax):
    """Customized histogram that highlights the mean.
    sns.histplot(data=df, x=field, ax=ax)
    mean = df[field].mean()
    ax.axvline(mean, color='orange', label=f'mean ({mean:.1f})')

def usage_hist(elec, gas):
    """Show usage histograms for electric/gas DataFrames
    fig, ax = plt.subplots(2, 2, figsize=(7, 5))

    start = elec['Start Date'].min()
    end = elec['Start Date'].max()
    fig.suptitle(f'Usage period: {} to {}')

    hist(elec, 'Usage', ax[0,0])
    ax[0,0].set_title(f'{elec.Type.iloc[0]} [{elec.Units.iloc[0]}]')
    hist(elec, 'Cost', ax[1,0])

    hist(gas, 'Usage', ax[0,1])
    ax[0,1].set_title(f'{gas.Type.iloc[0]} [{gas.Units.iloc[0]}]')
    hist(gas, 'Cost', ax[1,1])

usage_hist(elec, gas)
In [ ]:
def summarize(df):
    """Summarize usage data.
    Slightly customized describe() call that drops quartiles
    and makes clearer column names.
    out = df.describe().drop(['25%', '75%'])
    out = out.rename({'50%': 'median'})
    unit = df['Units'].iloc[0]
    name = df['Type'].iloc[0]
    out = out.rename({'Usage': f'Usage [{unit}]',
                      'Cost': f'{name} Cost [$]'}, axis=1)
    return out

In [ ]:
def monthly_report(elec, gas):
    """Print a summary report for monthly data.
    start = elec['Start Date'].min()
    end = elec['Start Date'].max()

    md('## Report for monthly data')
    md(f'**Report Period:** _{}_ to _{}_, '
       f'{elec.Type.count()} months total.')
    md('### Data summaries')
    gas_annual_c = gas.Cost.mean() * 12
    gas_annual_u = gas.Usage.mean() * 12
    elec_annual_c = elec.Cost.mean() * 12
    elec_annual_u = elec.Usage.mean() * 12
    md("**Annual averages:**")
    md(f"_Electric:_ ${elec_annual_c:.2f} (for {elec_annual_u:.0f} kWh).")
    md(f"_Gas:_ ${gas_annual_c:.2f} (for {gas_annual_u:.0f} therm).")
    display(pd.concat([summarize(elec), summarize(gas)], axis=1))
    md('### Histograms')
    usage_hist(elec, gas)
    md('### Detailed usage/cost plots')    
    cost_plot(elec, gas)

def report(elec, gas, elec_annual, gas_annual, cust):
    """Print a clean summary report based on all PG&E DataFrames.
    md(f'# PG&E Usage Report')
    md(f'**`Name   `** {cust["Name"].title()}')
    md(f'**`Address`** {cust["Address"]}')
    md(f'**`Account`** {cust["AccountNumber"]}')

    monthly_report(elec, gas)

    md('## Annual Averages')
    md('### Data summaries')
    display(pd.concat([summarize(elec_annual), summarize(gas_annual)], axis=1))    
    md('# Monthly data averaged over all years')    
    cost_plot(elec_annual, gas_annual)
report(elec, gas, elec_annual, gas_annual, customer)

Let's run just the monthly data report over the last two years, to get a better estimate of our current usage for future planning purposes:

In [ ]:
monthly_report(elec[-24:], gas[-24:])
In [ ]: