import pandas as pd import numpy as np from pandas import Series, DataFrame from datetime import datetime from datetime import timedelta import matplotlib.pyplot as plt %matplotlib inline dates = [datetime(2012, 5, 1, 8, 30), datetime(2012, 5, 1, 9, 25), datetime(2012, 5, 1, 15, 30)] dates type(dates) type(dates[0]) intime_dt = dates[0] outtime_dt = dates[1] type(intime_dt) # The commented out lines represent attributes that ARE available in pandas Timestamp objects # but not in Python datetime objects print ('Datetime: {}'.format(intime_dt)) print ('Date: {}'.format(intime_dt.date())) print ('Month: {}'.format(intime_dt.month)) print ('Day: {}'.format(intime_dt.day)) #print ('DayOfWeek: {}'.format(intime_dt.dayofweek)) print ('Weekday: {}'.format(intime_dt.weekday())) #print ('DayOfYear: {}'.format(intime_dt.dayofyear)) #print ('WeekOfYear: {}'.format(intime_dt.weekofyear)) #print ('Quarter: {}'.format(intime_dt.quarter)) print ('Hour: {}'.format(intime_dt.hour)) print ('Minute: {}'.format(intime_dt.minute)) print ('Second: {}'.format(intime_dt.second)) print ('Microsecond: {}'.format(intime_dt.microsecond)) los_td = outtime_dt - intime_dt print (los_td) type(los_td) atts = [att for att in dir(los_td) if '__' not in att] print(atts) print ('Timedelta: {}'.format(los_td)) print ('Seconds: {}'.format(los_td.seconds)) print ('Total Seconds: {}'.format(los_td.total_seconds())) print ('Microseconds: {}'.format(los_td.microseconds)) print ('Resolution: {}'.format(los_td.resolution)) print ('Min: {}'.format(los_td.min)) print ('Max: {}'.format(los_td.max)) timedelta(0,0,10) timedelta(0,0,17).total_seconds() atts = [att for att in dir(intime_dt) if '__' not in att] print (atts) np.datetime64('2012-05-01 08:30:00') dates64 = [np.datetime64('2012-05-01 08:30:00'), np.datetime64('2012-05-01 09:25:00'), np.datetime64('2012-05-01 15:30:00')] dates64 dt_utcnow = datetime.utcnow() dt_utcnow # Can create datetime64's from base Python datetimes dt64_utcnow = np.datetime64(dt_utcnow) dt64_utcnow curdates64 = [np.datetime64('2014-06-01 08:30:00'), np.datetime64('2014-06-01 09:25:00'), np.datetime64('2014-06-01 15:30:00')] curdates64 intime_dt64 = dates64[0] outtime_dt64 = dates64[1] print(type(intime_dt64)) print(intime_dt64) intime_dt64 # The commented out lines represent attributes that ARE available in pandas Timestamp objects # but not in numpy datetime64 objects print ('Datetime: {}'.format(str(intime_dt64))) #print ('Date: {}'.format(intime_dt64.date())) #print ('Month: {}'.format(intime_dt64.month)) #print ('Day: {}'.format(intime_dt.day)) #print ('DayOfWeek: {}'.format(intime_dt.dayofweek)) #print ('Weekday: {}'.format(intime_dt.weekday())) #print ('DayOfYear: {}'.format(intime_dt.dayofyear)) #print ('WeekOfYear: {}'.format(intime_dt.weekofyear)) #print ('Quarter: {}'.format(intime_dt.quarter)) #print ('Hour: {}'.format(intime_dt.hour)) #print ('Minute: {}'.format(intime_dt.minute)) #print ('Second: {}'.format(intime_dt.second)) #print ('Microsecond: {}'.format(intime_dt.microsecond)) los_dt64 = outtime_dt64 - intime_dt64 los_dt64 # Coerce it to other units np.timedelta64(los_dt64,'m') ts = Series(np.random.randint(1,100,3), index=dates) ts ts.index from pandas import Period, PeriodIndex periods = PeriodIndex([Period('2012-01'), Period('2012-02'),Period('2012-03')]) ts2 = Series(np.random.randint(1,100,3), index=periods) ts2 ts2.index from pandas import Timestamp Timestamp('20120501 08:30:00') timestamps = [Timestamp('20120501 08:30:00'), Timestamp('20120501 09:25:00'), Timestamp('20120501 15:30:00')] intime_ts = timestamps[0] outtime_ts = timestamps[1] type(intime_ts) los_ptd = outtime_ts - intime_ts print (los_ptd) type(los_ptd) # The pandas Timestamp data type has a number of useful attributes (dayofweek, dayofyear, weekofyear) # that the base Python datetime type does not. print ('Datetime: {}'.format(intime_ts)) print ('Date: {}'.format(intime_ts.date())) print ('Month: {}'.format(intime_ts.month)) print ('Day: {}'.format(intime_ts.day)) print ('DayOfWeek: {}'.format(intime_ts.dayofweek)) print ('Weekday: {}'.format(intime_ts.weekday())) print ('DayOfYear: {}'.format(intime_ts.dayofyear)) print ('WeekOfYear: {}'.format(intime_ts.weekofyear)) print ('Quarter: {}'.format(intime_ts.quarter)) print ('Hour: {}'.format(intime_ts.hour)) print ('Minute: {}'.format(intime_ts.minute)) print ('Second: {}'.format(intime_ts.second)) print ('Microsecond: {}'.format(intime_ts.microsecond)) print ('Timedelta: {}'.format(los_ptd)) print ('Days: {}'.format(los_ptd.days)) print ('Seconds: {}'.format(los_ptd.seconds)) print ('Total Seconds: {}'.format(los_ptd.total_seconds())) print ('Microseconds: {}'.format(los_ptd.microseconds)) print ('Resolution: {}'.format(los_ptd.resolution)) print ('Min: {}'.format(los_ptd.min)) print ('Max: {}'.format(los_ptd.max)) dt64 = np.datetime64('2014-06-01 08:30:00') dt64 dt64.tolist() ts = pd.Timestamp(dt64) ts dt = ts.to_datetime() dt np.datetime64(dt) index = pd.date_range('2000-1-1', periods=1000, freq='M') index start = datetime(2014, 1, 1) end = datetime(2014, 6, 12) rng = pd.date_range(start, end) rng pd.date_range(start, end, freq='W') data = np.random.randint(1, 100, size=163) df = DataFrame(data, index=rng) df.head() # Date index slicing is easy but, BE CAREFUL, the end point is included (unlike usual Python slicing behavior). df['2014-01-02':'2014-01-12'] df[datetime(2014,1,2):datetime(2014,1,12)] rng2 = pd.date_range(start, end, freq='M') len(rng2) data[:len(rng2)] df2 = DataFrame(data[:len(rng2)], index=rng2) df2 from pandas.tseries.offsets import Hour, Minute #We rarely use these but they are available one_hour = Hour() four_hours = Hour(4) one_hour + four_hours one_hour + Minute(35) rng = pd.date_range('9/1/2012','1/1/2014',freq='M') rng list(rng) rng = pd.date_range('9/1/2012','1/1/2014',freq='WOM-2TUE') list(rng) ts = Series(np.random.randint(1,10,4),index = pd.date_range('1/2/2013',periods=4,freq='M')) ts ts.shift(periods = 2) # Common use is pct change ts/ts.shift(1) - 1.0 # Since naive shifts leave index unchanged, some data is lost. To shift both data and index, pass in the frequency ts.shift(1) ts.shift(1, freq='M') p = pd.Period('6/1/2014',freq='M') p p+6 # Ranges of periods is done much like ranges of dates rng = pd.period_range('1/1/2010','6/1/2014',freq='Q') rng list(rng) p.asfreq('M','start') p.asfreq('M','end') rng = pd.date_range('9/1/2012',periods=100,freq='D') ts = Series(np.random.randint(1,25,len(rng)),index=rng) ts ts.resample('M',how='mean',kind='period') ts.resample('M',how='sum',kind='period') ts.resample('M',how='sum') rng15 = pd.date_range('9/1/2012',periods=96,freq='15T') ts15 = Series(np.random.randint(1,25,len(rng15)),index=rng15) ts15 ts15.resample('30min',how='mean',closed='right',label='left') ts30 = ts15.resample('30min',how='mean',closed='left',label='left') ts30 ts60 = ts15.resample('60min',how='ohlc',closed='left',label='left') ts60 ts30 ts30.resample('15min') close_px_all = pd.read_csv('stock_px.csv', parse_dates=True, index_col=0) close_px_all.head() close_px = close_px_all[['AAPL','MSFT','XOM']] close_px[1:10] close_px['AAPL'].plot() close_px.plot() # And here's what it looks like if we just plot one year. Notice the auto x-axis formatting. close_px.ix['2009'].plot() close_px.index list(close_px.index)[1:25] close_px['AAPL'].ix['01-2011':'03-2011'].plot() appl_q = close_px['AAPL'].resample('Q-DEC') appl_q appl_q.ix['2009':].plot() close_px.AAPL.plot() pd.rolling_mean(close_px.AAPL,250).plot() pd.rolling_mean(close_px.AAPL,250) pd.rolling_mean(close_px.AAPL,250,min_periods=10) close_px.AAPL.plot() pd.rolling_mean(close_px.AAPL,250,min_periods=10).plot() # Things like rolling standard deviation are also possible appl_std250 = pd.rolling_std(close_px.AAPL,250,min_periods=10) appl_std250.plot()