Pandas set up

In [1]:
# Import libraries
import pandas as pd
import datetime
import sys
%matplotlib inline

print 'Python version ' + sys.version
print 'Pandas version ' + pd.__version__
Python version 2.7.5 |Anaconda 2.0.1 (64-bit)| (default, Jul  1 2013, 12:37:52) [MSC v.1500 64 bit (AMD64)]
Pandas version 0.14.1

In [2]:
# Create dummy data for examples
sample = pd.DataFrame(data = {'Date':[pd.datetime(2013,1,1), pd.datetime(2014,1,1), pd.datetime(2015,1,1), pd.datetime(2016,1,1)],
                           'Quarter 1':[100,200,300,400],
                           'Quarter 2':[20,30,40,50],
                           'Quarter 3':[15,20,25,30],
                           'Quarter 4':[80,90,100,110]})

sample
Out[2]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4
0 2013-01-01 100 20 15 80
1 2014-01-01 200 30 20 90
2 2015-01-01 300 40 25 100
3 2016-01-01 400 50 30 110
In [3]:
# How to add a column and sum horizontally
# How to add a column and compute the average
# How to add a column and compute the percentage of Total Sales
# How to sort by a column
# How to filter by a value
# How to create a column chart
# How to create a pivot table
# How to perform a vlookup
# How to perform an IF/THEN statement

How to add a column and sum horizontally

In [4]:
# Make a copy of our test data
df = sample.copy(deep=True)

df['Total Yearly Sales'] = df.sum(axis=1)
df
Out[4]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales
0 2013-01-01 100 20 15 80 215
1 2014-01-01 200 30 20 90 340
2 2015-01-01 300 40 25 100 465
3 2016-01-01 400 50 30 110 590

How to add a column and compute the average

In [5]:
# Columns we are interested in
cols = ['Quarter 1','Quarter 2','Quarter 3','Quarter 4']

df['Average'] = df[cols].mean(axis=1)
df
Out[5]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales Average
0 2013-01-01 100 20 15 80 215 53.75
1 2014-01-01 200 30 20 90 340 85.00
2 2015-01-01 300 40 25 100 465 116.25
3 2016-01-01 400 50 30 110 590 147.50

How to add a column and compute the percentage of Total Sales

In [6]:
TotalSales = df['Total Yearly Sales'].sum()

df['Percentage of Sales'] = df['Total Yearly Sales'].apply(lambda x: 100.0*x/TotalSales)
df
Out[6]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales Average Percentage of Sales
0 2013-01-01 100 20 15 80 215 53.75 13.354037
1 2014-01-01 200 30 20 90 340 85.00 21.118012
2 2015-01-01 300 40 25 100 465 116.25 28.881988
3 2016-01-01 400 50 30 110 590 147.50 36.645963

How to sort by a column

In [7]:
df.sort(columns = 'Quarter 1', ascending=False)
Out[7]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales Average Percentage of Sales
3 2016-01-01 400 50 30 110 590 147.50 36.645963
2 2015-01-01 300 40 25 100 465 116.25 28.881988
1 2014-01-01 200 30 20 90 340 85.00 21.118012
0 2013-01-01 100 20 15 80 215 53.75 13.354037

How to filter by a value

In [8]:
mask = df['Average'] == 85

# Filter where average is equal to 85
df[mask]
Out[8]:
Date Quarter 1 Quarter 2 Quarter 3 Quarter 4 Total Yearly Sales Average Percentage of Sales
1 2014-01-01 200 30 20 90 340 85 21.118012

How to create a column chart

In [9]:
df[cols].plot(kind='bar');

How to create a pivot table

In [11]:
# Create dummy data for examples
sample2 = pd.DataFrame(data = {'Date':[pd.datetime(2013,1,1), pd.datetime(2013,1,1), pd.datetime(2013,1,1), pd.datetime(2013,1,1)],
                            'Type':['A','A','B','B'],
                            'Amount':[80,90,100,110]})

sample2
Out[11]:
Amount Date Type
0 80 2013-01-01 A
1 90 2013-01-01 A
2 100 2013-01-01 B
3 110 2013-01-01 B
In [13]:
# Make a copy of our test data
df2 = sample2.copy(deep=True)

df2.pivot_table(values='Amount', rows='Date', columns='Type', aggfunc='sum')
Out[13]:
Type A B
Date
2013-01-01 170 210

How to perform a vlookup

In [14]:
# Create lookup table
tbl = pd.Series({'A':'letter A', 
                 'B':'letter B'})


df2['Type'] = df2['Type'].map(tbl)
df2
Out[14]:
Amount Date Type
0 80 2013-01-01 letter A
1 90 2013-01-01 letter A
2 100 2013-01-01 letter B
3 110 2013-01-01 letter B

How to perform an IF/THEN statement

In [15]:
# =If(Amount >= 100, "yes", "no")
df2['yes/no'] = df2['Amount'].apply(lambda x: 'yes' if x >= 100 else 'no')
df2
Out[15]:
Amount Date Type yes/no
0 80 2013-01-01 letter A no
1 90 2013-01-01 letter A no
2 100 2013-01-01 letter B yes
3 110 2013-01-01 letter B yes

Author: David Rojas LLC