import pandas as pd ## Read sample data set and convert string dates to datetimes bydate_df = pd.read_csv('data/bydate_shortstay_csv.csv',parse_dates=['datetime']) bydate_df.head() bydate_df[1320:1350] # Create a GroupBy object for the summary stats bydate_dfgrp1 = bydate_df.groupby(['category','binofweek']) # Having a group by object makes it easy to compute statistics such as the mean of all of the fields other than the grouping fields. # You'll see that the result is simply another DataFrame. bydate_dfgrp1.mean() # Let's explore some of the means. bydate_dfgrp1.mean()[100:120] bydate_dfgrp2 = bydate_df.groupby(['category','dayofweek','binofday']) def get_occstats(group, stub=''): return {stub+'count': group.count(), stub+'mean': group.mean(), stub+'min': group.min(), stub+'max': group.max(), 'stdev': group.std(), stub+'p50': group.quantile(0.5), stub+'p55': group.quantile(0.55), stub+'p60': group.quantile(0.6), stub+'p65': group.quantile(0.65), stub+'p70': group.quantile(0.7), stub+'p75': group.quantile(0.75), stub+'p80': group.quantile(0.8), stub+'p85': group.quantile(0.85), stub+'p90': group.quantile(0.9), stub+'p95': group.quantile(0.95), stub+'p975': group.quantile(0.975), stub+'p99': group.quantile(0.99)} occ_stats = bydate_dfgrp2['occupancy'].apply(get_occstats) arr_stats = bydate_dfgrp2['arrivals'].apply(get_occstats) dep_stats = bydate_dfgrp2['departures'].apply(get_occstats) type(occ_stats) occ_stats.index occ_stats.unstack() occ_stats_summary = occ_stats.unstack() arr_stats_summary = arr_stats.unstack() dep_stats_summary = dep_stats.unstack() occ_stats_summary[200:220] # Let's peek into the middle of the table. occ_stats_summary.to_csv('occ_stats_summary.csv') arr_stats_summary.to_csv('arr_stats_summary.csv') dep_stats_summary.to_csv('dep_stats_summary.csv')