#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc("figure", figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)


# In[2]:


import numpy as np
import pandas as pd


# In[3]:


from datetime import datetime
now = datetime.now()
now
now.year, now.month, now.day


# In[4]:


delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta
delta.days
delta.seconds


# In[5]:


from datetime import timedelta
start = datetime(2011, 1, 7)
start + timedelta(12)
start - 2 * timedelta(12)


# In[6]:


stamp = datetime(2011, 1, 3)
str(stamp)
stamp.strftime("%Y-%m-%d")


# In[7]:


value = "2011-01-03"
datetime.strptime(value, "%Y-%m-%d")
datestrs = ["7/6/2011", "8/6/2011"]
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]


# In[8]:


datestrs = ["2011-07-06 12:00:00", "2011-08-06 00:00:00"]
pd.to_datetime(datestrs)


# In[9]:


idx = pd.to_datetime(datestrs + [None])
idx
idx[2]
pd.isna(idx)


# In[10]:


dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.standard_normal(6), index=dates)
ts


# In[11]:


ts.index


# In[12]:


ts + ts[::2]


# In[13]:


ts.index.dtype


# In[14]:


stamp = ts.index[0]
stamp


# In[15]:


stamp = ts.index[2]
ts[stamp]


# In[16]:


ts["2011-01-10"]


# In[17]:


longer_ts = pd.Series(np.random.standard_normal(1000),
                      index=pd.date_range("2000-01-01", periods=1000))
longer_ts
longer_ts["2001"]


# In[18]:


longer_ts["2001-05"]


# In[19]:


ts[datetime(2011, 1, 7):]
ts[datetime(2011, 1, 7):datetime(2011, 1, 10)]


# In[20]:


ts
ts["2011-01-06":"2011-01-11"]


# In[21]:


ts.truncate(after="2011-01-09")


# In[22]:


dates = pd.date_range("2000-01-01", periods=100, freq="W-WED")
long_df = pd.DataFrame(np.random.standard_normal((100, 4)),
                       index=dates,
                       columns=["Colorado", "Texas",
                                "New York", "Ohio"])
long_df.loc["2001-05"]


# In[23]:


dates = pd.DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-02",
                          "2000-01-02", "2000-01-03"])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts


# In[24]:


dup_ts.index.is_unique


# In[25]:


dup_ts["2000-01-03"]  # not duplicated
dup_ts["2000-01-02"]  # duplicated


# In[26]:


grouped = dup_ts.groupby(level=0)
grouped.mean()
grouped.count()


# In[27]:


ts
resampler = ts.resample("D")
resampler


# In[28]:


index = pd.date_range("2012-04-01", "2012-06-01")
index


# In[29]:


pd.date_range(start="2012-04-01", periods=20)
pd.date_range(end="2012-06-01", periods=20)


# In[30]:


pd.date_range("2000-01-01", "2000-12-01", freq="BM")


# In[31]:


pd.date_range("2012-05-02 12:56:31", periods=5)


# In[32]:


pd.date_range("2012-05-02 12:56:31", periods=5, normalize=True)


# In[33]:


from pandas.tseries.offsets import Hour, Minute
hour = Hour()
hour


# In[34]:


four_hours = Hour(4)
four_hours


# In[35]:


pd.date_range("2000-01-01", "2000-01-03 23:59", freq="4H")


# In[36]:


Hour(2) + Minute(30)


# In[37]:


pd.date_range("2000-01-01", periods=10, freq="1h30min")


# In[38]:


monthly_dates = pd.date_range("2012-01-01", "2012-09-01", freq="WOM-3FRI")
list(monthly_dates)


# In[39]:


ts = pd.Series(np.random.standard_normal(4),
               index=pd.date_range("2000-01-01", periods=4, freq="M"))
ts
ts.shift(2)
ts.shift(-2)


# In[40]:


ts.shift(2, freq="M")


# In[41]:


ts.shift(3, freq="D")
ts.shift(1, freq="90T")


# In[42]:


from pandas.tseries.offsets import Day, MonthEnd
now = datetime(2011, 11, 17)
now + 3 * Day()


# In[43]:


now + MonthEnd()
now + MonthEnd(2)


# In[44]:


offset = MonthEnd()
offset.rollforward(now)
offset.rollback(now)


# In[45]:


ts = pd.Series(np.random.standard_normal(20),
               index=pd.date_range("2000-01-15", periods=20, freq="4D"))
ts
ts.groupby(MonthEnd().rollforward).mean()


# In[46]:


ts.resample("M").mean()


# In[47]:


import pytz
pytz.common_timezones[-5:]


# In[48]:


tz = pytz.timezone("America/New_York")
tz


# In[49]:


dates = pd.date_range("2012-03-09 09:30", periods=6)
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts


# In[50]:


print(ts.index.tz)


# In[51]:


pd.date_range("2012-03-09 09:30", periods=10, tz="UTC")


# In[52]:


ts
ts_utc = ts.tz_localize("UTC")
ts_utc
ts_utc.index


# In[53]:


ts_utc.tz_convert("America/New_York")


# In[54]:


ts_eastern = ts.tz_localize("America/New_York")
ts_eastern.tz_convert("UTC")
ts_eastern.tz_convert("Europe/Berlin")


# In[55]:


ts.index.tz_localize("Asia/Shanghai")


# In[56]:


stamp = pd.Timestamp("2011-03-12 04:00")
stamp_utc = stamp.tz_localize("utc")
stamp_utc.tz_convert("America/New_York")


# In[57]:


stamp_moscow = pd.Timestamp("2011-03-12 04:00", tz="Europe/Moscow")
stamp_moscow


# In[58]:


stamp_utc.value
stamp_utc.tz_convert("America/New_York").value


# In[59]:


stamp = pd.Timestamp("2012-03-11 01:30", tz="US/Eastern")
stamp
stamp + Hour()


# In[60]:


stamp = pd.Timestamp("2012-11-04 00:30", tz="US/Eastern")
stamp
stamp + 2 * Hour()


# In[61]:


dates = pd.date_range("2012-03-07 09:30", periods=10, freq="B")
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts
ts1 = ts[:7].tz_localize("Europe/London")
ts2 = ts1[2:].tz_convert("Europe/Moscow")
result = ts1 + ts2
result.index


# In[62]:


p = pd.Period("2011", freq="A-DEC")
p


# In[63]:


p + 5
p - 2


# In[64]:


pd.Period("2014", freq="A-DEC") - p


# In[65]:


periods = pd.period_range("2000-01-01", "2000-06-30", freq="M")
periods


# In[66]:


pd.Series(np.random.standard_normal(6), index=periods)


# In[67]:


values = ["2001Q3", "2002Q2", "2003Q1"]
index = pd.PeriodIndex(values, freq="Q-DEC")
index


# In[68]:


p = pd.Period("2011", freq="A-DEC")
p
p.asfreq("M", how="start")
p.asfreq("M", how="end")
p.asfreq("M")


# In[69]:


p = pd.Period("2011", freq="A-JUN")
p
p.asfreq("M", how="start")
p.asfreq("M", how="end")


# In[70]:


p = pd.Period("Aug-2011", "M")
p.asfreq("A-JUN")


# In[71]:


periods = pd.period_range("2006", "2009", freq="A-DEC")
ts = pd.Series(np.random.standard_normal(len(periods)), index=periods)
ts
ts.asfreq("M", how="start")


# In[72]:


ts.asfreq("B", how="end")


# In[73]:


p = pd.Period("2012Q4", freq="Q-JAN")
p


# In[74]:


p.asfreq("D", how="start")
p.asfreq("D", how="end")


# In[75]:


p4pm = (p.asfreq("B", how="end") - 1).asfreq("T", how="start") + 16 * 60
p4pm
p4pm.to_timestamp()


# In[76]:


periods = pd.period_range("2011Q3", "2012Q4", freq="Q-JAN")
ts = pd.Series(np.arange(len(periods)), index=periods)
ts
new_periods = (periods.asfreq("B", "end") - 1).asfreq("H", "start") + 16
ts.index = new_periods.to_timestamp()
ts


# In[77]:


dates = pd.date_range("2000-01-01", periods=3, freq="M")
ts = pd.Series(np.random.standard_normal(3), index=dates)
ts
pts = ts.to_period()
pts


# In[78]:


dates = pd.date_range("2000-01-29", periods=6)
ts2 = pd.Series(np.random.standard_normal(6), index=dates)
ts2
ts2.to_period("M")


# In[79]:


pts = ts2.to_period()
pts
pts.to_timestamp(how="end")


# In[80]:


data = pd.read_csv("examples/macrodata.csv")
data.head(5)
data["year"]
data["quarter"]


# In[81]:


index = pd.PeriodIndex(year=data["year"], quarter=data["quarter"],
                       freq="Q-DEC")
index
data.index = index
data["infl"]


# In[82]:


dates = pd.date_range("2000-01-01", periods=100)
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts
ts.resample("M").mean()
ts.resample("M", kind="period").mean()


# In[83]:


dates = pd.date_range("2000-01-01", periods=12, freq="T")
ts = pd.Series(np.arange(len(dates)), index=dates)
ts


# In[84]:


ts.resample("5min").sum()


# In[85]:


ts.resample("5min", closed="right").sum()


# In[86]:


ts.resample("5min", closed="right", label="right").sum()


# In[87]:


from pandas.tseries.frequencies import to_offset
result = ts.resample("5min", closed="right", label="right").sum()
result.index = result.index + to_offset("-1s")
result


# In[88]:


ts = pd.Series(np.random.permutation(np.arange(len(dates))), index=dates)
ts.resample("5min").ohlc()


# In[89]:


frame = pd.DataFrame(np.random.standard_normal((2, 4)),
                     index=pd.date_range("2000-01-01", periods=2,
                                         freq="W-WED"),
                     columns=["Colorado", "Texas", "New York", "Ohio"])
frame


# In[90]:


df_daily = frame.resample("D").asfreq()
df_daily


# In[91]:


frame.resample("D").ffill()


# In[92]:


frame.resample("D").ffill(limit=2)


# In[93]:


frame.resample("W-THU").ffill()


# In[94]:


frame = pd.DataFrame(np.random.standard_normal((24, 4)),
                     index=pd.period_range("1-2000", "12-2001",
                                           freq="M"),
                     columns=["Colorado", "Texas", "New York", "Ohio"])
frame.head()
annual_frame = frame.resample("A-DEC").mean()
annual_frame


# In[95]:


# Q-DEC: Quarterly, year ending in December
annual_frame.resample("Q-DEC").ffill()
annual_frame.resample("Q-DEC", convention="end").asfreq()


# In[96]:


annual_frame.resample("Q-MAR").ffill()


# In[97]:


N = 15
times = pd.date_range("2017-05-20 00:00", freq="1min", periods=N)
df = pd.DataFrame({"time": times,
                   "value": np.arange(N)})
df


# In[98]:


df.set_index("time").resample("5min").count()


# In[99]:


df2 = pd.DataFrame({"time": times.repeat(3),
                    "key": np.tile(["a", "b", "c"], N),
                    "value": np.arange(N * 3.)})
df2.head(7)


# In[100]:


time_key = pd.Grouper(freq="5min")


# In[101]:


resampled = (df2.set_index("time")
             .groupby(["key", time_key])
             .sum())
resampled
resampled.reset_index()


# In[102]:


close_px_all = pd.read_csv("examples/stock_px.csv",
                           parse_dates=True, index_col=0)
close_px = close_px_all[["AAPL", "MSFT", "XOM"]]
close_px = close_px.resample("B").ffill()


# In[103]:


close_px["AAPL"].plot()
close_px["AAPL"].rolling(250).mean().plot()


# In[104]:


plt.figure()
std250 = close_px["AAPL"].pct_change().rolling(250, min_periods=10).std()
std250[5:12]
std250.plot()


# In[105]:


expanding_mean = std250.expanding().mean()


# In[106]:


plt.figure()


# In[107]:


plt.style.use('grayscale')
close_px.rolling(60).mean().plot(logy=True)


# In[108]:


close_px.rolling("20D").mean()


# In[109]:


plt.figure()


# In[110]:


aapl_px = close_px["AAPL"]["2006":"2007"]

ma30 = aapl_px.rolling(30, min_periods=20).mean()
ewma30 = aapl_px.ewm(span=30).mean()

aapl_px.plot(style="k-", label="Price")
ma30.plot(style="k--", label="Simple Moving Avg")
ewma30.plot(style="k-", label="EW MA")
plt.legend()


# In[111]:


plt.figure()


# In[112]:


spx_px = close_px_all["SPX"]
spx_rets = spx_px.pct_change()
returns = close_px.pct_change()


# In[113]:


corr = returns["AAPL"].rolling(125, min_periods=100).corr(spx_rets)
corr.plot()


# In[114]:


plt.figure()


# In[115]:


corr = returns.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()


# In[116]:


plt.figure()


# In[117]:


from scipy.stats import percentileofscore
def score_at_2percent(x):
    return percentileofscore(x, 0.02)

result = returns["AAPL"].rolling(250).apply(score_at_2percent)
result.plot()


# In[118]:


# In[119]:


pd.options.display.max_rows = PREVIOUS_MAX_ROWS