import pandas as pd %pylab inline def dateparser(datestring): return datetime.datetime.strptime(datestring,'%Y%m%d%H') total= pd.read_csv('total-volume.csv',parse_dates=[0],date_parser=dateparser) total.columns=["date","tweets"] total.head() total.index = total.date pylab.rcParams['figure.figsize'] = (16.0, 8.0) total.plot(marker="o",markerfacecolor="red") perday = total.resample("W-Mon",how="sum").plot(kind="bar") title("Tweets per Week") xlabel("Sum of Tweets per Week") model = pd.ols(y=log(total.tweets[:400]), x=total.tweets[:400].shift(-1), intercept=True) print model.summary fig=figure() sub=fig.add_subplot(111) plot(model.sm_ols.model.fit().fittedvalues,linewidth=0,marker="o") plot(log(total.tweets[:400]),color="red",marker="o",linewidth=0) t=total.tweets.tolist() t1=total.tweets.shift(-1).tolist() %load_ext rmagic tweettoday = total.tweets.tolist() tweetyesterday = total.tweets.shift(1).tolist() %%R -i tweettoday,tweetyesterday require(ggplot2) print(qplot(x=tweettoday,y=tweetyesterday,alpha=.5)) print(summary(lm(tweettoday~tweetyesterday)))