import pandas as pd

## Read sample data set and convert string dates to datetimes
bydate_df = pd.read_csv('data/bydate_shortstay_csv.csv',parse_dates=['datetime'])

bydate_df.head()

bydate_df[1320:1350]

# Create a GroupBy object for the summary stats    
bydate_dfgrp1 = bydate_df.groupby(['category','binofweek'])

# Having a group by object makes it easy to compute statistics such as the mean of all of the fields other than the grouping fields.
# You'll see that the result is simply another DataFrame.
bydate_dfgrp1.mean()

# Let's explore some of the means.
bydate_dfgrp1.mean()[100:120]

bydate_dfgrp2 = bydate_df.groupby(['category','dayofweek','binofday'])

def get_occstats(group, stub=''):
    return {stub+'count': group.count(), stub+'mean': group.mean(), 
            stub+'min': group.min(),
            stub+'max': group.max(), 'stdev': group.std(), 
            stub+'p50': group.quantile(0.5), stub+'p55': group.quantile(0.55),
            stub+'p60': group.quantile(0.6), stub+'p65': group.quantile(0.65),
            stub+'p70': group.quantile(0.7), stub+'p75': group.quantile(0.75),
            stub+'p80': group.quantile(0.8), stub+'p85': group.quantile(0.85),
            stub+'p90': group.quantile(0.9), stub+'p95': group.quantile(0.95),
            stub+'p975': group.quantile(0.975), 
            stub+'p99': group.quantile(0.99)}

occ_stats = bydate_dfgrp2['occupancy'].apply(get_occstats)
arr_stats = bydate_dfgrp2['arrivals'].apply(get_occstats)
dep_stats = bydate_dfgrp2['departures'].apply(get_occstats)

type(occ_stats)

occ_stats.index

occ_stats.unstack()

occ_stats_summary = occ_stats.unstack()
arr_stats_summary = arr_stats.unstack()
dep_stats_summary = dep_stats.unstack()

occ_stats_summary[200:220] # Let's peek into the middle of the table.

occ_stats_summary.to_csv('occ_stats_summary.csv')
arr_stats_summary.to_csv('arr_stats_summary.csv')
dep_stats_summary.to_csv('dep_stats_summary.csv')