In [1]:
from pandas import DataFrame
import pandas as pd
%matplotlib inline
In [2]:
print 'Pandas Version: ' + pd.__version__
Pandas Version: 0.13.0rc1

Merge

I have two dataframes that have dates as their index. The problem is that one of the dataframes has a timestamp and this is preventing me from adding the dataframes together. How can I match up the time stamps?

In [3]:
df1 = DataFrame({'col1':[pd.Timestamp('20130102000030'),
                         pd.Timestamp('2013-01-03 00:00:30'),
                         pd.Timestamp('1/4/2013 000030')],
                 'col2':[1,10,18]
                 })
df1
Out[3]:
col1 col2
0 2013-01-02 00:00:30 1
1 2013-01-03 00:00:30 10
2 2013-01-04 00:00:30 18

3 rows × 2 columns

In [4]:
df1 = df1.set_index('col1')
df1
Out[4]:
col2
col1
2013-01-02 00:00:30 1
2013-01-03 00:00:30 10
2013-01-04 00:00:30 18

3 rows × 1 columns

In [5]:
d = {'col2':[22,10,113]}

i = [pd.Timestamp('20130102'),
     pd.Timestamp('2013-01-03'),
     pd.Timestamp('1/4/2013')]
                 

df2 = DataFrame(data=d, index = i)
df2.index.name = 'col1'
df2
Out[5]:
col2
col1
2013-01-02 22
2013-01-03 10
2013-01-04 113

3 rows × 1 columns

In [6]:
# If we try to add the data frames together, we do not get the results we want.
df2+df1
Out[6]:
col2
col1
2013-01-02 00:00:00 NaN
2013-01-02 00:00:30 NaN
2013-01-03 00:00:00 NaN
2013-01-03 00:00:30 NaN
2013-01-04 00:00:00 NaN
2013-01-04 00:00:30 NaN

6 rows × 1 columns

In [7]:
# Make the index of df2 the same as the index of df1
# Fill the missing values with previous known value
#
#2013-01-02 00:00:00 => 22
#2013-01-02 00:00:30 => 22 
#2013-01-03 00:00:00 => 10
#2013-01-03 00:00:00 => 10
#2013-01-04 00:00:00 => 113
#2013-01-04 00:00:00 => 113
df2.reindex(df1.index, method='pad')
Out[7]:
col2
col1
2013-01-02 00:00:30 22
2013-01-03 00:00:30 10
2013-01-04 00:00:30 113

3 rows × 1 columns

In [8]:
# Now we can add them
df2 = df2.reindex(df1.index, method='pad')
df1+df2
Out[8]:
col2
col1
2013-01-02 00:00:30 23
2013-01-03 00:00:30 20
2013-01-04 00:00:30 131

3 rows × 1 columns

How do I add two dataframes together by row?

In [9]:
df1 = DataFrame([1,2,3])
df1
Out[9]:
0
0 1
1 2
2 3

3 rows × 1 columns

In [10]:
df2 = DataFrame([4,5,6])
df2
Out[10]:
0
0 4
1 5
2 6

3 rows × 1 columns

In [11]:
pd.concat([df1,df2])
Out[11]:
0
0 1
1 2
2 3
0 4
1 5
2 6

6 rows × 1 columns

How do I join two data frames by index?

In [12]:
d = {'col1':[22,10,113]}

i = [pd.Timestamp('1/1/2013'),
     pd.Timestamp('1/2/2013'),
     pd.Timestamp('1/3/2013')]
                 

df1 = DataFrame(data=d, index = i)
df1
Out[12]:
col1
2013-01-01 22
2013-01-02 10
2013-01-03 113

3 rows × 1 columns

In [13]:
d = {'col2':[5,5]}

i = [pd.Timestamp('1/1/2013'),
     pd.Timestamp('1/3/2013')]
                 

df2 = DataFrame(data=d, index = i)
df2
Out[13]:
col2
2013-01-01 5
2013-01-03 5

2 rows × 1 columns

In [14]:
df1.merge(df2, left_index=True, right_index=True, how='left')
Out[14]:
col1 col2
2013-01-01 22 5
2013-01-02 10 NaN
2013-01-03 113 5

3 rows × 2 columns

Author: David Rojas LLC

Back to top