Pandas set up

In [1]:
# Import libraries
from pandas import DataFrame
import pandas as pd
import datetime
%matplotlib inline

print 'Pandas version: ' + pd.__version__
Pandas version: 0.12.0
In [2]:
# Create dummy data for examples
sample = DataFrame(data = {'Date':[pd.datetime(2013,1,1), pd.datetime(2014,1,1), pd.datetime(2015,1,1), pd.datetime(2016,1,1)],
                           'Quarter 1':[100,200,300,400],
                           'Quarter 2':[20,30,40,50],
                           'Quarter 3':[15,20,25,30],
                           'Quarter 4':[80,90,100,110]})

sample
Out[2]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4
0 2013-01-01 00:00:00 100 20 15 80
1 2014-01-01 00:00:00 200 30 20 90
2 2015-01-01 00:00:00 300 40 25 100
3 2016-01-01 00:00:00 400 50 30 110
In [3]:
# How to add a column and sum horizontally
# How to add a column and compute the average
# How to add a column and compute the percentage of Total Sales
# How to sort by a column
# How to filter by a value
# How to create a column chart
# How to create a pivot table
# How to perform a vlookup
# How to perform an IF/THEN statement

How to add a column and sum horizontally

In [4]:
# Make a copy of our test data
df = sample.copy(deep=True)

df['Total Yearly Sales'] = df.sum(axis=1)
df
Out[4]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales
0 2013-01-01 00:00:00 100 20 15 80 215
1 2014-01-01 00:00:00 200 30 20 90 340
2 2015-01-01 00:00:00 300 40 25 100 465
3 2016-01-01 00:00:00 400 50 30 110 590

How to add a column and compute the average

In [5]:
# Columns we are interested in
cols = ['Quarter 1','Quarter 2','Quarter 3','Quarter 4']

df['Average'] = df[cols].mean(axis=1)
df
Out[5]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales Average
0 2013-01-01 00:00:00 100 20 15 80 215 53.75
1 2014-01-01 00:00:00 200 30 20 90 340 85.00
2 2015-01-01 00:00:00 300 40 25 100 465 116.25
3 2016-01-01 00:00:00 400 50 30 110 590 147.50

How to add a column and compute the percentage of Total Sales

In [6]:
TotalSales = df['Total Yearly Sales'].sum()

df['Percentage of Sales'] = df['Total Yearly Sales'].apply(lambda x: 100.0*x/TotalSales)
df
Out[6]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales Average Percentage of Sales
0 2013-01-01 00:00:00 100 20 15 80 215 53.75 13.354037
1 2014-01-01 00:00:00 200 30 20 90 340 85.00 21.118012
2 2015-01-01 00:00:00 300 40 25 100 465 116.25 28.881988
3 2016-01-01 00:00:00 400 50 30 110 590 147.50 36.645963

How to sort by a column

In [7]:
df.sort(columns = 'Quarter 1', ascending=False)
Out[7]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales Average Percentage of Sales
3 2016-01-01 00:00:00 400 50 30 110 590 147.50 36.645963
2 2015-01-01 00:00:00 300 40 25 100 465 116.25 28.881988
1 2014-01-01 00:00:00 200 30 20 90 340 85.00 21.118012
0 2013-01-01 00:00:00 100 20 15 80 215 53.75 13.354037

How to filter by a value

In [8]:
mask = df['Average'] == 85

# Filter where average is equal to 85
df[mask]
Out[8]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales Average Percentage of Sales
1 2014-01-01 00:00:00 200 30 20 90 340 85 21.118012

How to create a column chart

In [9]:
df[cols].plot(kind='bar');

How to create a pivot table

In [10]:
# Create dummy data for examples
sample2 = DataFrame(data = {'Date':[pd.datetime(2013,1,1), pd.datetime(2013,1,1), pd.datetime(2013,1,1), pd.datetime(2013,1,1)],
                            'Type':['A','A','B','B'],
                            'Amount':[80,90,100,110]})

sample2
Out[10]:
Amount Date Type
0 80 2013-01-01 00:00:00 A
1 90 2013-01-01 00:00:00 A
2 100 2013-01-01 00:00:00 B
3 110 2013-01-01 00:00:00 B
In [11]:
# Make a copy of our test data
df2 = sample2.copy(deep=True)

df2.pivot_table(values='Amount', rows='Date', cols='Type', aggfunc='sum')
Out[11]:
Type A B
Date
2013-01-01 170 210

How to perform a vlookup

In [12]:
# Create lookup table
tbl = pd.Series({'A':'letter A', 
                 'B':'letter B'})


df2['Type'] = df2['Type'].map(tbl)
df2
Out[12]:
Amount Date Type
0 80 2013-01-01 00:00:00 letter A
1 90 2013-01-01 00:00:00 letter A
2 100 2013-01-01 00:00:00 letter B
3 110 2013-01-01 00:00:00 letter B

How to perform an IF/THEN statement

In [13]:
# =If(Amount >= 100, "yes", "no")
df2['yes/no'] = df2['Amount'].apply(lambda x: 'yes' if x >= 100 else 'no')
df2
Out[13]:
Amount Date Type yes/no
0 80 2013-01-01 00:00:00 letter A no
1 90 2013-01-01 00:00:00 letter A no
2 100 2013-01-01 00:00:00 letter B yes
3 110 2013-01-01 00:00:00 letter B yes

Author: David Rojas LLC