import pandas as pd
import numpy as np
import sys
%matplotlib inline
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
Python version 3.7.4 (default, Aug 9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)] Pandas version 1.3.5
df = pd.DataFrame({'group1':["a","a","b","b"],
'value':[10,20,30,40]
})
df
group1 | value | |
---|---|---|
0 | a | 10 |
1 | a | 20 |
2 | b | 30 |
3 | b | 40 |
group = df.groupby('group1')
group.agg([len,sum])
value | ||
---|---|---|
len | sum | |
group1 | ||
a | 2 | 30 |
b | 2 | 70 |
df = pd.DataFrame({'labels':["a","a","b","b"],
'value':[10,20,30,40]
})
df
labels | value | |
---|---|---|
0 | a | 10 |
1 | a | 20 |
2 | b | 30 |
3 | b | 40 |
group = df.groupby('labels')['value']
df['value.sum'] = group.transform('sum')
df
labels | value | value.sum | |
---|---|---|---|
0 | a | 10 | 30 |
1 | a | 20 | 30 |
2 | b | 30 | 70 |
3 | b | 40 | 70 |
df = pd.DataFrame({'col1':[pd.Timestamp('20130102000030'),
pd.Timestamp('2013-02-03 00:00:30'),
pd.Timestamp('3/4/2013 000030')]
})
df
col1 | |
---|---|
0 | 2013-01-02 00:00:30 |
1 | 2013-02-03 00:00:30 |
2 | 2013-03-04 00:00:30 |
df['MonthNumber'] = df['col1'].apply(lambda x: x.month)
df['Day'] = df['col1'].apply(lambda x: x.day)
df['Year'] = df['col1'].apply(lambda x: x.year)
df['MonthName'] = df['col1'].apply(lambda x: x.strftime('%B'))
df['WeekDay'] = df['col1'].apply(lambda x: x.strftime('%A'))
df
col1 | MonthNumber | Day | Year | MonthName | WeekDay | |
---|---|---|---|---|---|---|
0 | 2013-01-02 00:00:30 | 1 | 2 | 2013 | January | Wednesday |
1 | 2013-02-03 00:00:30 | 2 | 3 | 2013 | February | Sunday |
2 | 2013-03-04 00:00:30 | 3 | 4 | 2013 | March | Monday |
df = pd.DataFrame({'col1':['minus','minus','positive','nan'],
'col2':[10,20,30,40]
})
df
col1 | col2 | |
---|---|---|
0 | minus | 10 |
1 | minus | 20 |
2 | positive | 30 |
3 | nan | 40 |
df['col3'] = df['col2']*df['col1'].apply(lambda x: -1 if x=='minus' else (1 if x=='positive' else np.nan))
df
col1 | col2 | col3 | |
---|---|---|---|
0 | minus | 10 | -10.0 |
1 | minus | 20 | -20.0 |
2 | positive | 30 | 30.0 |
3 | nan | 40 | NaN |
df = pd.DataFrame({'group1':['a','a','a','b','b','b'],
'group2':['c','c','d','d','d','e'],
'value1':[1.1,2,3,4,5,6],
'value2':[7.1,8,9,10,11,12]
})
df
group1 | group2 | value1 | value2 | |
---|---|---|---|---|
0 | a | c | 1.1 | 7.1 |
1 | a | c | 2.0 | 8.0 |
2 | a | d | 3.0 | 9.0 |
3 | b | d | 4.0 | 10.0 |
4 | b | d | 5.0 | 11.0 |
5 | b | e | 6.0 | 12.0 |
group = df.groupby(['group1','group2'])
def Half(x):
return x.sum()
df['new'] = group['value1'].transform(Half)
df
group1 | group2 | value1 | value2 | new | |
---|---|---|---|---|---|
0 | a | c | 1.1 | 7.1 | 3.1 |
1 | a | c | 2.0 | 8.0 | 3.1 |
2 | a | d | 3.0 | 9.0 | 3.0 |
3 | b | d | 4.0 | 10.0 | 9.0 |
4 | b | d | 5.0 | 11.0 | 9.0 |
5 | b | e | 6.0 | 12.0 | 6.0 |
# For multiple functions
def HalfPlus(x):
return x.sum() + 1
newcol = group['value1'].agg([Half,HalfPlus])
newcol
Half | HalfPlus | ||
---|---|---|---|
group1 | group2 | ||
a | c | 3.1 | 4.1 |
d | 3.0 | 4.0 | |
b | d | 9.0 | 10.0 |
e | 6.0 | 7.0 |
df.merge(newcol, left_on=['group1','group2'], right_index=True)
group1 | group2 | value1 | value2 | new | Half | HalfPlus | |
---|---|---|---|---|---|---|---|
0 | a | c | 1.1 | 7.1 | 3.1 | 3.1 | 4.1 |
1 | a | c | 2.0 | 8.0 | 3.1 | 3.1 | 4.1 |
2 | a | d | 3.0 | 9.0 | 3.0 | 3.0 | 4.0 |
3 | b | d | 4.0 | 10.0 | 9.0 | 9.0 | 10.0 |
4 | b | d | 5.0 | 11.0 | 9.0 | 9.0 | 10.0 |
5 | b | e | 6.0 | 12.0 | 6.0 | 6.0 | 7.0 |
df1 = pd.DataFrame(data=[26371, 1755, 2], index=[-9999, 240, 138.99], columns=['value'])
df1
value | |
---|---|
-9999.00 | 26371 |
240.00 | 1755 |
138.99 | 2 |
df2 = pd.DataFrame(data=[26371, 1755, 6, 4], index=[-9999, 240, 113.03, 110], columns=['value'])
df2
value | |
---|---|
-9999.00 | 26371 |
240.00 | 1755 |
113.03 | 6 |
110.00 | 4 |
# If you simply add them, you will get null values
# were the index does not match
df1 + df2
value | |
---|---|
-9999.00 | 52742.0 |
110.00 | NaN |
113.03 | NaN |
138.99 | NaN |
240.00 | 3510.0 |
# Here we fix this issue
df1.add(df2, fill_value=0)
value | |
---|---|
-9999.00 | 52742.0 |
110.00 | 4.0 |
113.03 | 6.0 |
138.99 | 2.0 |
240.00 | 3510.0 |
This tutorial was created by HEDARO