 # Lesson 6¶

Lets take a look at the groupby function.

In :
# Import libraries
import pandas as pd
import sys

In :
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)

Python version 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]
Pandas version 1.0.5

In :
# Our small data set
d = {'one':[1,1,1,1,1],
'two':[2,2,2,2,2],
'letter':['a','a','b','b','c']}

# Create dataframe
df = pd.DataFrame(d)
df

Out:
one two letter
0 1 2 a
1 1 2 a
2 1 2 b
3 1 2 b
4 1 2 c
In :
# Create group object
one = df.groupby('letter')

# Apply sum function
one.sum()

Out:
one two
letter
a 2 4
b 2 4
c 1 2
In :
letterone = df.groupby(['letter','one']).sum()
letterone

Out:
two
letter one
a 1 4
b 1 4
c 1 2
In :
letterone.index

Out:
MultiIndex([('a', 1),
('b', 1),
('c', 1)],
names=['letter', 'one'])

You may want to not have the columns you are grouping by become your index, this can be easily achieved as shown below.

In :
letterone = df.groupby(['letter','one'], as_index=False).sum()
letterone

Out:
letter one two
0 a 1 4
1 b 1 4
2 c 1 2
In :
letterone.index

Out:
Int64Index([0, 1, 2], dtype='int64')

This tutorial was created by HEDARO