import pandas as pd
import sys
%matplotlib inline
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
Python version 3.7.4 (default, Aug 9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)] Pandas version 1.0.5
df1 = pd.DataFrame({'col1':[pd.Timestamp('20130102000030'),
pd.Timestamp('2013-01-03 00:00:30'),
pd.Timestamp('1/4/2013 000030')],
'col2':[1,10,18]
})
df1
col1 | col2 | |
---|---|---|
0 | 2013-01-02 00:00:30 | 1 |
1 | 2013-01-03 00:00:30 | 10 |
2 | 2013-01-04 00:00:30 | 18 |
df1 = df1.set_index('col1')
df1
col2 | |
---|---|
col1 | |
2013-01-02 00:00:30 | 1 |
2013-01-03 00:00:30 | 10 |
2013-01-04 00:00:30 | 18 |
d = {'col2':[22,10,113]}
i = [pd.Timestamp('20130102'),
pd.Timestamp('2013-01-03'),
pd.Timestamp('1/4/2013')]
df2 = pd.DataFrame(data=d, index = i)
df2.index.name = 'col1'
df2
col2 | |
---|---|
col1 | |
2013-01-02 | 22 |
2013-01-03 | 10 |
2013-01-04 | 113 |
# If we try to add the data frames together, we do not get the results we want.
df2+df1
col2 | |
---|---|
col1 | |
2013-01-02 00:00:00 | NaN |
2013-01-02 00:00:30 | NaN |
2013-01-03 00:00:00 | NaN |
2013-01-03 00:00:30 | NaN |
2013-01-04 00:00:00 | NaN |
2013-01-04 00:00:30 | NaN |
# Make the index of df2 the same as the index of df1
# Fill the missing values with previous known value
#
#2013-01-02 00:00:00 => 22
#2013-01-02 00:00:30 => 22
#2013-01-03 00:00:00 => 10
#2013-01-03 00:00:00 => 10
#2013-01-04 00:00:00 => 113
#2013-01-04 00:00:00 => 113
df2.reindex(df1.index, method='pad')
col2 | |
---|---|
col1 | |
2013-01-02 00:00:30 | 22 |
2013-01-03 00:00:30 | 10 |
2013-01-04 00:00:30 | 113 |
# Now we can add them
df2 = df2.reindex(df1.index, method='pad')
df1+df2
col2 | |
---|---|
col1 | |
2013-01-02 00:00:30 | 23 |
2013-01-03 00:00:30 | 20 |
2013-01-04 00:00:30 | 131 |
df1 = pd.DataFrame([1,2,3])
df1
0 | |
---|---|
0 | 1 |
1 | 2 |
2 | 3 |
df2 = pd.DataFrame([4,5,6])
df2
0 | |
---|---|
0 | 4 |
1 | 5 |
2 | 6 |
pd.concat([df1,df2])
0 | |
---|---|
0 | 1 |
1 | 2 |
2 | 3 |
0 | 4 |
1 | 5 |
2 | 6 |
d = {'col1':[22,10,113]}
i = [pd.Timestamp('1/1/2013'),
pd.Timestamp('1/2/2013'),
pd.Timestamp('1/3/2013')]
df1 = pd.DataFrame(data=d, index = i)
df1
col1 | |
---|---|
2013-01-01 | 22 |
2013-01-02 | 10 |
2013-01-03 | 113 |
d = {'col2':[5,5]}
i = [pd.Timestamp('1/1/2013'),
pd.Timestamp('1/3/2013')]
df2 = pd.DataFrame(data=d, index = i)
df2
col2 | |
---|---|
2013-01-01 | 5 |
2013-01-03 | 5 |
df1.merge(df2, left_index=True, right_index=True, how='left')
col1 | col2 | |
---|---|---|
2013-01-01 | 22 | 5.0 |
2013-01-02 | 10 | NaN |
2013-01-03 | 113 | 5.0 |
This tutorial was created by HEDARO