import pandas as pd import numpy as np # 'characters' is equivalent to string firstName = 'jeff' print type(firstName), firstName # 'numeric' is equivalent to float heightCM = 188.2 print type(heightCM), heightCM # integer is equivalent to integer numberSons = 1 print type(numberSons), numberSons # 'logical' is equivalent to Boolean teachingCoursera = True print type(teachingCoursera), teachingCoursera # 'vectors' is equivalent to numpy array or Python list (I will use array everywhere for consistency) heights = np.array([188.2, 181.3, 193.4]) print heights firstNames = np.array(['jeff', 'roger', 'andrew', 'brian']) print firstNames # 'list' is equivalent to dictionary in Python vector1 = np.array([188.2, 181.3, 193.4]) vector2 = np.array(['jeff', 'roger', 'andrew', 'brian']) myList = dict(heights = vector1, firstNames = vector2) print myList print myList['heights'] print myList['firstNames'] # 'matrices' is equivalent to two-dimensional numpy array myMatrix = np.array([[1, 2], [3, 4]]) print myMatrix # data frame is equivalent to Pandas DataFrame # this example doesn't work because the input array lengths are not the same vector1 = np.array([188.2, 181.3, 193.4]) vector2 = np.array(['jeff', 'roger', 'andrew', 'brian']) # ValueError: arrays must all be same length # myDataFrame = pd.DataFrame(dict(heights = vector1, firstNames = vector2)) # data frame -- fixed vector1 = np.array([188.2, 181.3, 193.4, 192.3]) vector2 = np.array(['jeff', 'roger', 'andrew', 'brian']) myDataFrame = pd.DataFrame(dict(heights = vector1, firstNames = vector2)) myDataFrame # factors is equivalent to pandas Categorical smoker = np.array(['yes', 'no', 'yes', 'yes']) smokerFactor = pd.Categorical.from_array(smoker) smokerFactor # R's NA missing values is equivalent to NaN vector1 = np.array([188.2, 181.3, 193.4, NaN]) print vector1 print isnan(vector1) # subsetting vector1 = np.array([188.2, 181.3, 193.4, 192.3]) vector2 = np.array(['jeff', 'roger', 'andrew', 'brian']) myDataFrame = pd.DataFrame(dict(heights = vector1, firstNames = vector2)) print '------------------' print vector1[0] print '------------------' print vector1[[0, 1, 3]] print '------------------' print myDataFrame.ix[0, 0:2] # appears transposed as compared to R print '------------------' print myDataFrame['firstNames'] # there's no 'Levels' as in R print '------------------' print myDataFrame[myDataFrame['firstNames'] == 'jeff'] print '------------------' print myDataFrame[myDataFrame['heights'] < 190]