%pylab inline
import os
import glob
import pandas as pd
import numpy as np
import mpltools
from mpltools import style
from mpltools import special
style.use( 'ggplot' )
from datetime import datetime
Populating the interactive namespace from numpy and matplotlib
datapath = '/Users/td6301/Dropbox/reframe_challenge_to_flach_group/'
frames = []
keys = []
# Train/deploy data
for f in glob.glob(os.path.join(datapath, 'train_deploy','*.csv')):
df = pd.read_csv(file, index_col=4) #, parse_dates={'dt': [4]})) #parse_dates={'dt': [5,6,7,8,9]})) #parse_dates=[4]))
df.index = pd.to_datetime((df.index.values*1e9).astype(int))
frames.append(df)
keys.append(f.split('/')[-1].split('.')[0].replace('_', ' '))
df = pd.concat(frames, keys=keys)
frames[0].head()
station | latitude | longitude | numDocks | year | month | day | hour | weekday | weekhour | ... | full_profile_1h_diff_bikes | full_profile_bikes | short_profile_24h_diff_bikes | short_profile_12h_diff_bikes | short_profile_6h_diff_bikes | short_profile_3h_diff_bikes | short_profile_2h_diff_bikes | short_profile_1h_diff_bikes | short_profile_bikes | bikes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2012-05-31 22:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2012 | 6 | 1 | 0 | Friday | 97 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3 |
2012-05-31 23:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2012 | 6 | 1 | 1 | Friday | 98 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
2012-06-01 00:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2012 | 6 | 1 | 2 | Friday | 99 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
2012-06-01 01:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2012 | 6 | 1 | 3 | Friday | 100 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 |
2012-06-01 02:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2012 | 6 | 1 | 4 | Friday | 101 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
5 rows × 39 columns
frames[0].tail()
station | latitude | longitude | numDocks | year | month | day | hour | weekday | weekhour | ... | full_profile_1h_diff_bikes | full_profile_bikes | short_profile_24h_diff_bikes | short_profile_12h_diff_bikes | short_profile_6h_diff_bikes | short_profile_3h_diff_bikes | short_profile_2h_diff_bikes | short_profile_1h_diff_bikes | short_profile_bikes | bikes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2014-10-31 18:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2014 | 10 | 31 | 19 | Friday | 116 | ... | 1.532258 | 19.104 | 1.50 | 3.00 | -3.75 | 11.75 | 8.00 | 5.25 | 14.75 | 17 |
2014-10-31 19:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2014 | 10 | 31 | 20 | Friday | 117 | ... | -1.776000 | 17.328 | 9.00 | -5.50 | 0.75 | 6.00 | 3.25 | -2.00 | 12.75 | 15 |
2014-10-31 20:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2014 | 10 | 31 | 21 | Friday | 118 | ... | -6.192000 | 11.136 | 8.75 | -7.25 | 5.00 | -0.25 | -5.50 | -3.50 | 9.25 | 18 |
2014-10-31 21:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2014 | 10 | 31 | 22 | Friday | 119 | ... | -1.784000 | 9.352 | 9.25 | -5.25 | 6.75 | -5.00 | -3.00 | 0.50 | 9.75 | 27 |
2014-10-31 22:00:00 | 9 | 39.467436 | -0.37735 | 30 | 2014 | 10 | 31 | 23 | Friday | 120 | ... | -1.320000 | 8.032 | 9.25 | -6.00 | 3.50 | -2.50 | 1.00 | 0.50 | 10.25 | 29 |
5 rows × 39 columns
fig = pylab.figure(figsize=(15,15))
for (i,df1) in enumerate(frames):
#for (i,k) in enumerate(keys):
ax = pylab.subplot(ceil(sqrt(len(frames))), ceil(sqrt(len(frames))),i+1)
#df1.head()
idx = df1.index
vals = df1.bikes
ax.plot_date(idx, vals, fmt="-")
pylab.xticks(rotation=70)
ax.set_title(keys[i])
pylab.subplots_adjust(hspace=0.5, right=1.2)