import pandas
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
df = pandas.DataFrame.from_csv('voomaxer_logs_for_user799_on_2015-01-03.csv')
start = df.index.searchsorted(datetime(2008, 1, 1))
end = df.index.searchsorted(datetime(2015, 1, 1))
df = df[start:end]
df = df[(df.activity == 'swimming') | (df.activity == 'cycling') | (df.activity == 'running')]
df.head()
activity | distance in meters | duration in seconds | notes | event name | created at | updated at | |
---|---|---|---|---|---|---|---|
date | |||||||
2008-01-01 12:00:00 | running | 9977.9328 | 2380 | New years day 10k. Perfect running conditions,... | NaN | 2008-01-01 00:00:00 +0000 | 2008-09-24 11:25:42 +0100 |
2008-01-01 12:00:00 | running | 10000.0000 | 2380 | New Year's Day 10k | 10,000 meter (10K) | 2008-03-26 20:07:56 +0000 | 2008-09-24 11:25:42 +0100 |
2008-01-03 12:00:00 | swimming | 1200.0000 | 1500 | Short swim | NaN | 2008-03-27 22:34:43 +0000 | 2008-09-24 11:25:42 +0100 |
2008-01-04 12:00:00 | running | 4828.0320 | 1440 | Short run after 75m bike ride in macc | NaN | 2008-01-05 00:00:00 +0000 | 2008-09-24 11:25:42 +0100 |
2008-01-04 12:00:00 | cycling | 120700.8000 | 18000 | Ride around Macclesfield (Macc-Cat & fiddle-bu... | NaN | 2008-03-27 22:35:57 +0000 | 2008-09-24 11:25:42 +0100 |
key = lambda x: x.year
act = lambda x: x.activity
#filt = lambda x: x.index > 2007 and x.index < 2015
func = lambda x: x.sum() / 1000
grouped = df.groupby((key, 'activity'))
agg = grouped['distance in meters'].aggregate(func)
agg.index.names = ['year', 'activity']
agg
year activity 2008 cycling 7911.143338 running 1401.251831 swimming 179.415507 2009 cycling 8570.671679 running 1325.080813 swimming 198.885786 2010 cycling 10928.988478 running 1540.407439 swimming 279.721670 2011 cycling 8427.658511 running 1860.040048 swimming 187.732008 2012 cycling 4084.345216 running 583.600819 swimming 81.640000 2013 cycling 6965.568400 running 1221.309424 swimming 157.830000 2014 cycling 5317.035686 running 961.177376 swimming 121.926000 Name: distance in meters, dtype: float64
aggu = agg.unstack()
agguf = aggu.reset_index(False)
ax = agg.unstack().plot(kind='bar', stacked=False)
plt.ylabel('distance (km)')
emily = agguf[agguf.year == 2012].cycling
clara = agguf[agguf.year == 2014].cycling
ax.annotate('Emily', xy=(4, emily), xytext=(4, emily + 1500), arrowprops=dict(facecolor='black', shrink=0.05))
ax.annotate('Clara', xy=(6, clara), xytext=(6, clara + 1500), arrowprops=dict(facecolor='black', shrink=0.05))
<matplotlib.text.Annotation at 0x10788c1d0>