apply
on a pandas Series
and DataFrame
lambda
functions work# numpy and pandas related imports
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
Let's make two Series and a DataFrame to use for our example
# for example, using lower and uppercase English letters
import string
string.lowercase, string.uppercase
('abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
# we can make a list composed of the individual lowercase letters
list(string.lowercase)
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
# create a pandas Series out of the list of lowercase letters
lower = Series(list(string.lowercase), name='lower')
print type(lower)
lower.head()
<class 'pandas.core.series.Series'>
0 a 1 b 2 c 3 d 4 e Name: lower, dtype: object
# create a pandas Series out of the list of lowercase letters
upper = Series(list(string.uppercase), name='upper')
# concatenate the two Series as columns, using axis=1
# axis = 0 would result in two rows in the DataFrame
df = pd.concat((lower, upper), axis=1)
df.head()
lower | upper | |
---|---|---|
0 | a | A |
1 | b | B |
2 | c | C |
3 | d | D |
4 | e | E |
5 rows × 2 columns
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html:
Series.apply(func, convert_dtype=True, args=(), **kwds)
Invoke function on values of Series.
# Let's start by using Series.apply
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html
# first of all, it's useful to find a way to use apply to return the exact same Series
def identity(s):
return s
lower.apply(identity)
0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j 10 k 11 l 12 m 13 n 14 o 15 p 16 q 17 r 18 s 19 t 20 u 21 v 22 w 23 x 24 y 25 z Name: lower, dtype: object
# show that identity yields the same Series -- first on element by element basis
lower.apply(identity) == lower
0 True 1 True 2 True 3 True 4 True 5 True 6 True 7 True 8 True 9 True 10 True 11 True 12 True 13 True 14 True 15 True 16 True 17 True 18 True 19 True 20 True 21 True 22 True 23 True 24 True 25 True Name: lower, dtype: bool
# Check that match happens for every element in the Series using numpy.all
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.all.html
np.all(lower.apply(identity) == lower)
True
lambda
¶Sometimes it's convenient to write functions using lambda
, especially short functions for doing a simple transformation of the parameters. Only some functions can be rewritten with lambda
.
def add_preface(s):
return 'letter ' + s
lower.apply(add_preface)
0 letter a 1 letter b 2 letter c 3 letter d 4 letter e 5 letter f 6 letter g 7 letter h 8 letter i 9 letter j 10 letter k 11 letter l 12 letter m 13 letter n 14 letter o 15 letter p 16 letter q 17 letter r 18 letter s 19 letter t 20 letter u 21 letter v 22 letter w 23 letter x 24 letter y 25 letter z Name: lower, dtype: object
# rewrite with lambda
lower.apply(lambda s: 'letter ' + s)
0 letter a 1 letter b 2 letter c 3 letter d 4 letter e 5 letter f 6 letter g 7 letter h 8 letter i 9 letter j 10 letter k 11 letter l 12 letter m 13 letter n 14 letter o 15 letter p 16 letter q 17 letter r 18 letter s 19 letter t 20 letter u 21 letter v 22 letter w 23 letter x 24 letter y 25 letter z Name: lower, dtype: object
Another illustration of using apply
-- using ord
and chr
# ord: Given a string of length one, return an integer representing the Unicode code
# point of the character when the argument is a unicode object, or the value of the
# byte when the argument is an 8-bit string.
# http://docs.python.org/2.7/library/functions.html#ord
ord('a')
97
# chr: Return a string of one character whose ASCII code is the integer i.
# http://docs.python.org/2.7/library/functions.html#chr
chr(97)
'a'
# show that for the case of 'a', chr(ord()) returns what we start with:'a'
chr(ord('a')) == 'a'
True
# we can test whether chr reverses ord for all the lower case letters
# note how we chain two apply together
np.all(lower.apply(ord).apply(chr) == lower)
True
Note that we read off a specific series from the DataFrame
type(df.upper)
pandas.core.series.Series
# transform
df.upper.apply(lambda s: s.lower())
0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j 10 k 11 l 12 m 13 n 14 o 15 p 16 q 17 r 18 s 19 t 20 u 21 v 22 w 23 x 24 y 25 z Name: upper, dtype: object
apply
can also be applied to a DataFrame
http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.apply.html
DataFrame.apply(func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds)
Applies function along input axis of DataFrame.
Objects passed to functions are Series objects having index either the DataFrame’s index (axis=0) or the columns (axis=1). Return type depends on whether passed function aggregates, or the reduce argument if the DataFrame is empty.
# let's show that whether we use apply on columns (axis=0) or rows (axis=1), we get the same
# result
def identity(s):
return s
np.all(df.apply(identity, axis=0) == df.apply(identity, axis=1))
True
# for each column, first lower and then upper, return the index
def index(s):
return s.index
df.apply(index, axis=0)
lower | upper | |
---|---|---|
0 | 0 | 0 |
1 | 1 | 1 |
2 | 2 | 2 |
3 | 3 | 3 |
4 | 4 | 4 |
5 | 5 | 5 |
6 | 6 | 6 |
7 | 7 | 7 |
8 | 8 | 8 |
9 | 9 | 9 |
10 | 10 | 10 |
11 | 11 | 11 |
12 | 12 | 12 |
13 | 13 | 13 |
14 | 14 | 14 |
15 | 15 | 15 |
16 | 16 | 16 |
17 | 17 | 17 |
18 | 18 | 18 |
19 | 19 | 19 |
20 | 20 | 20 |
21 | 21 | 21 |
22 | 22 | 22 |
23 | 23 | 23 |
24 | 24 | 24 |
25 | 25 | 25 |
26 rows × 2 columns
# for each row (axis=1), first lower and then upper, return the index
# (which are the column names)
def index(s):
return s.index
df.apply(index, axis=1)
lower | upper | |
---|---|---|
0 | lower | upper |
1 | lower | upper |
2 | lower | upper |
3 | lower | upper |
4 | lower | upper |
5 | lower | upper |
6 | lower | upper |
7 | lower | upper |
8 | lower | upper |
9 | lower | upper |
10 | lower | upper |
11 | lower | upper |
12 | lower | upper |
13 | lower | upper |
14 | lower | upper |
15 | lower | upper |
16 | lower | upper |
17 | lower | upper |
18 | lower | upper |
19 | lower | upper |
20 | lower | upper |
21 | lower | upper |
22 | lower | upper |
23 | lower | upper |
24 | lower | upper |
25 | lower | upper |
26 rows × 2 columns
# it might be easier to see the difference between axis=0 vs axis=1
# by using join
# Consider what you get with
"".join(df.lower)
'abcdefghijklmnopqrstuvwxyz'
# Now compare (axis=0)
df.apply(lambda s: "".join(s), axis=0)
lower abcdefghijklmnopqrstuvwxyz upper ABCDEFGHIJKLMNOPQRSTUVWXYZ dtype: object
# join with axis=1
df.apply(lambda s: "".join(s), axis=1)
0 aA 1 bB 2 cC 3 dD 4 eE 5 fF 6 gG 7 hH 8 iI 9 jJ 10 kK 11 lL 12 mM 13 nN 14 oO 15 pP 16 qQ 17 rR 18 sS 19 tT 20 uU 21 vV 22 wW 23 xX 24 yY 25 zZ dtype: object
# note that you can access use the index in your function passed to apply
df.apply(lambda s: s['upper'] + s['lower'], axis=1)
0 Aa 1 Bb 2 Cc 3 Dd 4 Ee 5 Ff 6 Gg 7 Hh 8 Ii 9 Jj 10 Kk 11 Ll 12 Mm 13 Nn 14 Oo 15 Pp 16 Qq 17 Rr 18 Ss 19 Tt 20 Uu 21 Vv 22 Ww 23 Xx 24 Yy 25 Zz dtype: object