# numpy and pandas related imports import numpy as np from pandas import Series, DataFrame import pandas as pd # for example, using lower and uppercase English letters import string string.lowercase, string.uppercase # we can make a list composed of the individual lowercase letters list(string.lowercase) # create a pandas Series out of the list of lowercase letters lower = Series(list(string.lowercase), name='lower') print type(lower) lower.head() # create a pandas Series out of the list of lowercase letters upper = Series(list(string.uppercase), name='upper') # concatenate the two Series as columns, using axis=1 # axis = 0 would result in two rows in the DataFrame df = pd.concat((lower, upper), axis=1) df.head() # Let's start by using Series.apply # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html # first of all, it's useful to find a way to use apply to return the exact same Series def identity(s): return s lower.apply(identity) # show that identity yields the same Series -- first on element by element basis lower.apply(identity) == lower # Check that match happens for every element in the Series using numpy.all # http://docs.scipy.org/doc/numpy/reference/generated/numpy.all.html np.all(lower.apply(identity) == lower) def add_preface(s): return 'letter ' + s lower.apply(add_preface) # rewrite with lambda lower.apply(lambda s: 'letter ' + s) # ord: Given a string of length one, return an integer representing the Unicode code # point of the character when the argument is a unicode object, or the value of the # byte when the argument is an 8-bit string. # http://docs.python.org/2.7/library/functions.html#ord ord('a') # chr: Return a string of one character whose ASCII code is the integer i. # http://docs.python.org/2.7/library/functions.html#chr chr(97) # show that for the case of 'a', chr(ord()) returns what we start with:'a' chr(ord('a')) == 'a' # we can test whether chr reverses ord for all the lower case letters # note how we chain two apply together np.all(lower.apply(ord).apply(chr) == lower) type(df.upper) # transform df.upper.apply(lambda s: s.lower()) # let's show that whether we use apply on columns (axis=0) or rows (axis=1), we get the same # result def identity(s): return s np.all(df.apply(identity, axis=0) == df.apply(identity, axis=1)) # for each column, first lower and then upper, return the index def index(s): return s.index df.apply(index, axis=0) # for each row (axis=1), first lower and then upper, return the index # (which are the column names) def index(s): return s.index df.apply(index, axis=1) # it might be easier to see the difference between axis=0 vs axis=1 # by using join # Consider what you get with "".join(df.lower) # Now compare (axis=0) df.apply(lambda s: "".join(s), axis=0) # join with axis=1 df.apply(lambda s: "".join(s), axis=1) # note that you can access use the index in your function passed to apply df.apply(lambda s: s['upper'] + s['lower'], axis=1)