This IPython notebook serves as an answer to the following Stack Overflow question concerning time series.

In [1]:

import pandas as pd
print pd.__version__
import datetime,time
from pandas import DataFrame
import numpy as np

0.11.1.dev-58642a6

In [2]:

size=1000000
start_time=int(time.time())
timestamp=start_time+np.arange(size)
column_1=np.random.rand(size)
column_2=np.random.rand(size)
df=DataFrame({'timestamp':timestamp,'column_1':column_1,'column_2':column_2})

In [3]:

df

Out[3]:

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000000 entries, 0 to 999999
Data columns (total 3 columns):
column_1     1000000  non-null values
column_2     1000000  non-null values
timestamp    1000000  non-null values
dtypes: float64(2), int64(1)

In [4]:

df.head()

Out[4]:

	column_1	column_2	timestamp
0	0.033724	0.514638	1381583437
1	0.333073	0.423733	1381583438
2	0.843147	0.428994	1381583439
3	0.414871	0.937023	1381583440
4	0.708825	0.311550	1381583441

In [5]:

df.describe()

Out[5]:

	column_1	column_2	timestamp
count	1000000.000000	1000000.000000	1.000000e+06
mean	0.499664	0.499976	1.382083e+09
std	0.288668	0.288601	2.886753e+05
min	0.000001	0.000000	1.381583e+09
25%	0.249752	0.250039	1.381833e+09
50%	0.500003	0.499842	1.382083e+09
75%	0.749253	0.749884	1.382333e+09
max	1.000000	0.999999	1.382583e+09

In [6]:

df.index=pd.to_datetime((df.timestamp.values*1e9).astype(int))

In [7]:

df.index

Out[7]:

<class 'pandas.tseries.index.DatetimeIndex'>
[2013-10-12 13:10:37, ..., 2013-10-24 02:57:16]
Length: 1000000, Freq: None, Timezone: None

In [8]:

del df['timestamp']

In [9]:

df

Out[9]:

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1000000 entries, 2013-10-12 13:10:37 to 2013-10-24 02:57:16
Data columns (total 2 columns):
column_1    1000000  non-null values
column_2    1000000  non-null values
dtypes: float64(2)

In [10]:

df=df.resample('10Min',how='mean')

In [11]:

df

Out[11]:

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1667 entries, 2013-10-12 13:10:00 to 2013-10-24 02:50:00
Freq: 10T
Data columns (total 2 columns):
column_1    1667  non-null values
column_2    1667  non-null values
dtypes: float64(2)

In [12]:

df.plot()

Out[12]:

<matplotlib.axes.AxesSubplot at 0x41b0410>

In [13]:

df.ix['2013-10-14 09:30':'2013-10-16 09:30']

Out[13]:

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 289 entries, 2013-10-14 09:30:00 to 2013-10-16 09:30:00
Freq: 10T
Data columns (total 2 columns):
column_1    289  non-null values
column_2    289  non-null values
dtypes: float64(2)

In [14]:

df2=df.ix['2013-10-14 09:30':'2013-10-16 09:30']

In [15]:

df2.plot()

Out[15]:

<matplotlib.axes.AxesSubplot at 0x6970c10>

NB: The same operation can be done directly also, as below.

In [17]:

df['2013-10-14 09:30':'2013-10-16 09:30'].plot()

Out[17]:

<matplotlib.axes.AxesSubplot at 0x40da6d0>

Contact: Nipun Batra

Twitter: @nipun_batra