import numpy as np
import matplotlib.pylab as plt
from matplotlib import cm
%matplotlib inline
import datetime
import time
import dateutil
file = 'davenport_netflix_data.txt'
data = np.loadtxt(file, unpack=True, usecols=(0,), delimiter='\t', dtype='string')
data
# I looked up Jan 3, 2009: a Saturday
date_s = []
for k in data:
date_s.append(time.mktime(dateutil.parser.parse(k).timetuple()))
date_s.sort()
date = np.array(date_s, dtype='float')
plt.plot((date-min(date))/60./60./24./365., np.arange(len(date))+1)
plt.xlabel('Years since [3 Jan 2009]')
plt.ylabel('# of Videos Watched')
h = plt.hist(((date-min(date))/60./60./24.+6) % 7, bins=np.arange(0,8,1))
h2, xi, yi = plt.histogram2d((date-min(date))/60./60./24./365.,
((date-min(date))/60./60./24.+6) % 7,bins=(100,7))
plt.figure(figsize=(15,4))
plt.imshow(np.log10(h2.T+1), origin='lower', interpolation='nearest',cmap=cm.BuPu,
extent=(np.min(xi),np.max(xi),np.min(yi),np.max(yi)), aspect=0.3)
plt.colorbar()
date_s = []
for k in data:
date_s.append(time.mktime(dateutil.parser.parse(k).timetuple()))
date = np.array(date_s, dtype='float')
mo,dd,yr = np.loadtxt(data,delimiter='/',unpack=True)
data[0]
hw = plt.hist(mo, bins=np.arange(0,13,.5))
h2, xi, yi = plt.histogram2d(yr, mo,bins=(7,12))
plt.figure(figsize=(15,5))
plt.imshow(np.log10(h2.T+1), origin='lower', interpolation='nearest',cmap=cm.BuPu,
extent=(np.min(xi),np.max(xi),np.min(yi),np.max(yi)), aspect=0.4)
plt.colorbar()