import pandas as pd
import numpy as np
import datetime as dt
from datetime import datetime as dt
t=pd.Timestamp
lu = pd.DataFrame({ 'name' : ['Bill','Elon','Larry','Jeff','Marissa'],
'feels' : ['charitable','Alcoa envy','Elon envy','like the number 7','sassy'],
'last ate' : [t('20151209'),t('20151201'),t('20151208'),t('20151208'),t('20151209')],
'boxers' : [True,True,True,False,True]})
af = pd.DataFrame({ 'name' : ['Bill','Elon','Larry','Elon','Jeff','Larry','Larry'],
'address' : ['in my computer','moon','internet','mars','cardboard box','autonomous car','every where'],
'sq_ft' : [2,2135,69,84535, 1.32, 54,168],
'forks' : [7,1,2,1,0,np.nan,1]})
rand_dates=[t('20141202'),t('20130804'),t('20120508'),t('20150411'),
t('20141209'),t('20091023'),t('20130921'),t('20110102'),
t('20130728'),t('20141119'),t('20151024'),t('20130824')]
df = pd.DataFrame({ 'name' : ['Elon','Bill','Larry','Elon','Jeff','Larry','Larry','Bill','Larry','Elon','Marissa','Jeff'],
'activity' : ['slept','tripped','spoke','swam','spooked','liked','whistled','up dog','smiled','donated','grant men paternity leave','fondled'],
'date' : rand_dates})
lu
boxers | feels | last ate | name | |
---|---|---|---|---|
0 | True | charitable | 2015-12-09 | Bill |
1 | True | Alcoa envy | 2015-12-01 | Elon |
2 | True | Elon envy | 2015-12-08 | Larry |
3 | False | like the number 7 | 2015-12-08 | Jeff |
4 | True | sassy | 2015-12-09 | Marissa |
af
df
#af.groupby(['name','address']).size()
af.name.value_counts()
Larry 3 Elon 2 Jeff 1 Bill 1 Name: name, dtype: int64
# Exhibit A
# boxers feels last ate name addresses
#0 True Elon envy 2015-12-08 Larry 3
#1 True Alcoa envy 2015-12-01 Elon 2
#2 False like the number 7 2015-12-08 Jeff 1
#3 True charitable 2015-12-09 Bill 1
df.groupby(['name','date']).size()
name date Bill 2011-01-02 1 2013-08-04 1 Elon 2014-11-19 1 2014-12-02 1 2015-04-11 1 Jeff 2013-08-24 1 2014-12-09 1 Larry 2009-10-23 1 2012-05-08 1 2013-07-28 1 2013-09-21 1 Marissa 2015-10-24 1 dtype: int64
# Exhibit B - Calculate time deltas
# name time_delta
# Bill Timedelta('945 days 00:00:00')
# Elon Timedelta('143 days 00:00:00')
# Jeff Timedelta('472 days 00:00:00')
# Larry Timedelta('1429 days 00:00:00')
# Marissa Timedelta('0 days 00:00:00')
# Exhibit C - Rank time deltas (this is easy)
# name time_delta
# Larry Timedelta('1429 days 00:00:00')
# Bill Timedelta('945 days 00:00:00')
# Jeff Timedelta('472 days 00:00:00')
# Elon Timedelta('143 days 00:00:00')
# Marissa Timedelta('0 days 00:00:00')
# Exhibit D - Add to and re-rank the table built in Exhibit A according to time_delta
# boxers feels last ate name addresses time_delta
# 0 True Elon envy 2015-12-08 Larry 3 1429 days 00:00:00
# 1 True charitable 2015-12-09 Bill 1 945 days 00:00:00
# 2 False like the number 7 2015-12-08 Jeff 1 472 days 00:00:00
# 3 True Alcoa envy 2015-12-01 Elon 2 143 days 00:00:00
# 4 True sassy 2015-12-09 Marissa NaN 0 days 00:00:00