import pandas as pd
nba_df = pd.read_csv("NBA-Census-10.14.2013.csv")
# Look at the first few parts of the dataframe
nba_df.head()
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Riviera Beach, FL | Florida | US | Black | No |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Sylacauga, AL | Alabama | US | Black | No |
2 | Williams, Mo | 30 | Trail Blazers | G | 25 | $2,652,000 | 73 | 195 | 10 | 2003 | 12/19/1982 | Alabama | Jackson, MS | Mississippi | US | Black | No |
3 | Gladness, Mickell | 27 | Magic | C | 40 | $762,195 | 83 | 220 | 2 | 2011 | 7/26/1986 | Alabama A&M | Birmingham, AL | Alabama | US | Black | No |
4 | Jefferson, Richard | 33 | Jazz | F | 44 | $11,046,000 | 79 | 230 | 12 | 2001 | 6/21/1980 | Arizona | Los Angeles, CA | California | US | Black | No |
5 | Hill, Solomon | 22 | Pacers | F | 9 | $1,246,680 | 79 | 220 | 0 | 2013 | 3/18/1991 | Arizona | Los Angeles, CA | California | US | Black | No |
6 | Budinger, Chase | 25 | Timberwolves | F | 10 | $5,000,000 | 79 | 218 | 4 | 2009 | 5/22/1988 | Arizona | Encinitas, CA | California | US | White | No |
7 | Williams, Derrick | 22 | Timberwolves | F | 7 | $5,016,960 | 80 | 241 | 2 | 2011 | 5/25/1991 | Arizona | La Mirada, CA | California | US | Black | No |
8 | Hill, Jordan | 26 | Lakers | F/C | 27 | $3,563,600 | 82 | 235 | 1 | 2012 | 7/27/1987 | Arizona | Newberry, SC | South Carolina | US | Black | No |
9 | Frye, Channing | 30 | Suns | F/C | 8 | $6,500,000 | 83 | 245 | 8 | 2005 | 5/17/1983 | Arizona | White Plains, NY | New York | US | Black | No |
# ....or
nba_df[:10]
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Riviera Beach, FL | Florida | US | Black | No |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Sylacauga, AL | Alabama | US | Black | No |
2 | Williams, Mo | 30 | Trail Blazers | G | 25 | $2,652,000 | 73 | 195 | 10 | 2003 | 12/19/1982 | Alabama | Jackson, MS | Mississippi | US | Black | No |
3 | Gladness, Mickell | 27 | Magic | C | 40 | $762,195 | 83 | 220 | 2 | 2011 | 7/26/1986 | Alabama A&M | Birmingham, AL | Alabama | US | Black | No |
4 | Jefferson, Richard | 33 | Jazz | F | 44 | $11,046,000 | 79 | 230 | 12 | 2001 | 6/21/1980 | Arizona | Los Angeles, CA | California | US | Black | No |
5 | Hill, Solomon | 22 | Pacers | F | 9 | $1,246,680 | 79 | 220 | 0 | 2013 | 3/18/1991 | Arizona | Los Angeles, CA | California | US | Black | No |
6 | Budinger, Chase | 25 | Timberwolves | F | 10 | $5,000,000 | 79 | 218 | 4 | 2009 | 5/22/1988 | Arizona | Encinitas, CA | California | US | White | No |
7 | Williams, Derrick | 22 | Timberwolves | F | 7 | $5,016,960 | 80 | 241 | 2 | 2011 | 5/25/1991 | Arizona | La Mirada, CA | California | US | Black | No |
8 | Hill, Jordan | 26 | Lakers | F/C | 27 | $3,563,600 | 82 | 235 | 1 | 2012 | 7/27/1987 | Arizona | Newberry, SC | South Carolina | US | Black | No |
9 | Frye, Channing | 30 | Suns | F/C | 8 | $6,500,000 | 83 | 245 | 8 | 2005 | 5/17/1983 | Arizona | White Plains, NY | New York | US | Black | No |
# Find out how many people are in each category
# If you're dealing with numerical data, use .describe()
nba_df["POS"].value_counts()
G 175 F 142 F/C 74 G/F 70 C 67 dtype: int64
# Get all of the people who match a certain characteristic
nba_df[nba_df["POS"] == "F"].head()
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Riviera Beach, FL | Florida | US | Black | No |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Sylacauga, AL | Alabama | US | Black | No |
4 | Jefferson, Richard | 33 | Jazz | F | 44 | $11,046,000 | 79 | 230 | 12 | 2001 | 6/21/1980 | Arizona | Los Angeles, CA | California | US | Black | No |
5 | Hill, Solomon | 22 | Pacers | F | 9 | $1,246,680 | 79 | 220 | 0 | 2013 | 3/18/1991 | Arizona | Los Angeles, CA | California | US | Black | No |
6 | Budinger, Chase | 25 | Timberwolves | F | 10 | $5,000,000 | 79 | 218 | 4 | 2009 | 5/22/1988 | Arizona | Encinitas, CA | California | US | White | No |
# Get all of the people who match a certain characteristic
nba_df[(nba_df["POS"] == "F") & (nba_df["HS Only"] == "No") ].head()
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Riviera Beach, FL | Florida | US | Black | No |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Sylacauga, AL | Alabama | US | Black | No |
4 | Jefferson, Richard | 33 | Jazz | F | 44 | $11,046,000 | 79 | 230 | 12 | 2001 | 6/21/1980 | Arizona | Los Angeles, CA | California | US | Black | No |
5 | Hill, Solomon | 22 | Pacers | F | 9 | $1,246,680 | 79 | 220 | 0 | 2013 | 3/18/1991 | Arizona | Los Angeles, CA | California | US | Black | No |
6 | Budinger, Chase | 25 | Timberwolves | F | 10 | $5,000,000 | 79 | 218 | 4 | 2009 | 5/22/1988 | Arizona | Encinitas, CA | California | US | White | No |
# Get all of the people who match one of any X characteristics
nba_df[(nba_df["POS"] == "F") | (nba_df["POS"] == "G") ].head()
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Riviera Beach, FL | Florida | US | Black | No |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Sylacauga, AL | Alabama | US | Black | No |
2 | Williams, Mo | 30 | Trail Blazers | G | 25 | $2,652,000 | 73 | 195 | 10 | 2003 | 12/19/1982 | Alabama | Jackson, MS | Mississippi | US | Black | No |
4 | Jefferson, Richard | 33 | Jazz | F | 44 | $11,046,000 | 79 | 230 | 12 | 2001 | 6/21/1980 | Arizona | Los Angeles, CA | California | US | Black | No |
5 | Hill, Solomon | 22 | Pacers | F | 9 | $1,246,680 | 79 | 220 | 0 | 2013 | 3/18/1991 | Arizona | Los Angeles, CA | California | US | Black | No |
# Retrieve what's nan/null/etc
nba_df[pd.isnull(nba_df["Race"])].head()
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
300 | Karasev, Sergey | 19 | Cavaliers | G/F | 10 | $1,467,840 | 79 | 203 | 0 | 2013 | 10/26/1993 | n/a | Saint Petersburg | n/a | Russia | NaN | No |
# Retrieve what's NOT nan/null/etc
nba_df[~pd.isnull(nba_df["Race"])].head()
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Riviera Beach, FL | Florida | US | Black | No |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Sylacauga, AL | Alabama | US | Black | No |
2 | Williams, Mo | 30 | Trail Blazers | G | 25 | $2,652,000 | 73 | 195 | 10 | 2003 | 12/19/1982 | Alabama | Jackson, MS | Mississippi | US | Black | No |
3 | Gladness, Mickell | 27 | Magic | C | 40 | $762,195 | 83 | 220 | 2 | 2011 | 7/26/1986 | Alabama A&M | Birmingham, AL | Alabama | US | Black | No |
4 | Jefferson, Richard | 33 | Jazz | F | 44 | $11,046,000 | 79 | 230 | 12 | 2001 | 6/21/1980 | Arizona | Los Angeles, CA | California | US | Black | No |
# or this
nba_df[pd.notnull(nba_df["Race"])].head()
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Riviera Beach, FL | Florida | US | Black | No |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Sylacauga, AL | Alabama | US | Black | No |
2 | Williams, Mo | 30 | Trail Blazers | G | 25 | $2,652,000 | 73 | 195 | 10 | 2003 | 12/19/1982 | Alabama | Jackson, MS | Mississippi | US | Black | No |
3 | Gladness, Mickell | 27 | Magic | C | 40 | $762,195 | 83 | 220 | 2 | 2011 | 7/26/1986 | Alabama A&M | Birmingham, AL | Alabama | US | Black | No |
4 | Jefferson, Richard | 33 | Jazz | F | 44 | $11,046,000 | 79 | 230 | 12 | 2001 | 6/21/1980 | Arizona | Los Angeles, CA | California | US | Black | No |
# Retrieve everyone who is not a guard
nba_df[~(nba_df["POS"] == "G")].head()
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | City | State (Province, Territory, Etc..) | Country | Race | HS Only | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Riviera Beach, FL | Florida | US | Black | No |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Sylacauga, AL | Alabama | US | Black | No |
3 | Gladness, Mickell | 27 | Magic | C | 40 | $762,195 | 83 | 220 | 2 | 2011 | 7/26/1986 | Alabama A&M | Birmingham, AL | Alabama | US | Black | No |
4 | Jefferson, Richard | 33 | Jazz | F | 44 | $11,046,000 | 79 | 230 | 12 | 2001 | 6/21/1980 | Arizona | Los Angeles, CA | California | US | Black | No |
5 | Hill, Solomon | 22 | Pacers | F | 9 | $1,246,680 | 79 | 220 | 0 | 2013 | 3/18/1991 | Arizona | Los Angeles, CA | California | US | Black | No |
# Get numerical data on a column
# If you're dealing with labels or groups, use .value_counts()
nba_df["Age"].describe()
count 528.000000 mean 26.242424 std 4.178868 min 18.000000 25% 23.000000 50% 25.000000 75% 29.000000 max 39.000000 dtype: float64
# Get numerical data on grouped data
nba_df.groupby("POS")["Age"].describe()
POS C count 67.000000 mean 26.208955 std 3.800069 min 19.000000 25% 23.500000 50% 26.000000 75% 28.000000 max 36.000000 F count 142.000000 mean 26.352113 std 4.122585 min 20.000000 25% 23.000000 50% 25.500000 75% 29.000000 max 37.000000 F/C count 74.000000 mean 27.175676 std 4.142523 min 20.000000 25% 24.000000 50% 26.000000 75% 30.000000 max 39.000000 G count 175.000000 mean 25.725714 std 4.364719 min 19.000000 25% 22.000000 50% 25.000000 75% 28.000000 max 39.000000 G/F count 70.000000 mean 26.357143 std 4.121473 min 18.000000 25% 23.000000 50% 26.000000 75% 28.750000 max 36.000000 dtype: float64
# Remove columns that you HATE with .drop
# Need to save it as a new (or the same) variable
nba_df = nba_df.drop(["City"], axis=1)
nba_df.columns
Index([u'Name', u'Age', u'Team', u'POS', u'#', u'2013 $', u'Ht (In.)', u'WT', u'EXP', u'1st Year', u'DOB', u'School', u'State (Province, Territory, Etc..)', u'Country', u'Race', u'HS Only'], dtype='object')
# Calculate a new column from an existing column
nba_df["Ht (Cm.)"] = nba_df["Ht (In.)"] * 2.54
nba_df[:2]
Name | Age | Team | POS | # | 2013 $ | Ht (In.) | WT | EXP | 1st Year | DOB | School | State (Province, Territory, Etc..) | Country | Race | HS Only | Ht (Cm.) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gee, Alonzo | 26 | Cavaliers | F | 33 | $3,250,000 | 78 | 219 | 4 | 2009 | 5/29/1987 | Alabama | Florida | US | Black | No | 198.12 |
1 | Wallace, Gerald | 31 | Celtics | F | 45 | $10,105,855 | 79 | 220 | 12 | 2001 | 7/23/1982 | Alabama | Alabama | US | Black | No | 200.66 |
# String manipulation on an entire column
# Need to use .str to treat it as a string
nba_df["Name"].str.lower()
0 gee, alonzo 1 wallace, gerald 2 williams, mo 3 gladness, mickell 4 jefferson, richard 5 hill, solomon 6 budinger, chase 7 williams, derrick 8 hill, jordan 9 frye, channing 10 bayless, jerryd 11 terry, jason 12 fogg, kyle 13 iguodala, andre 14 boateng, eric ... 513 alexander, joe 514 fischer, d'or 515 ebanks, devin 516 johnson, amir 517 martin, kevin 518 evans, jeremy 519 lee, courtney 520 mekel, gal 521 murry, toure' 522 stiemsma, greg 523 leuer, jon 524 landry, marcus 525 harris, devin 526 west, david 527 crawford, jordan Name: Name, Length: 528, dtype: object
# Do more intense manipulation with .apply + an external function
# You will always forget to do axis=1, so remember it!
# Just treat row like a dictionary, it goes one at a time
def do_i_like_them(row):
if row["Age"] >= 31:
return True
else:
return False
nba_df["Liked"] = nba_df.apply(do_i_like_them, axis=1)
nba_df["Liked"].value_counts()
False 439 True 89 dtype: int64
# OPEN QUESTION: HOW DO YOU ADD A ROW TO A DATAFRAME!!!!!!!
# Get one column of a dataframe
nba_df.ix[0]
# Maybe sometimes use .iloc
Name Gee, Alonzo Age 26 Team Cavaliers POS F # 33 2013 $ $3,250,000 Ht (In.) 78 WT 219 EXP 4 1st Year 2009 DOB 5/29/1987 School Alabama State (Province, Territory, Etc..) Florida Country US Race Black HS Only No Ht (Cm.) 198.12 Liked False Name: 0, dtype: object
# For loops with dataframes
# Can't do for row in nba_df, gotta use iterrows()
for index, row in nba_df.iterrows():
print str(index) + ": " + row["Name"]
0: Gee, Alonzo 1: Wallace, Gerald 2: Williams, Mo 3: Gladness, Mickell 4: Jefferson, Richard 5: Hill, Solomon 6: Budinger, Chase 7: Williams, Derrick 8: Hill, Jordan 9: Frye, Channing 10: Bayless, Jerryd 11: Terry, Jason 12: Fogg, Kyle 13: Iguodala, Andre 14: Boateng, Eric 15: Diogu, Ike 16: Ayres, Jeff 17: Harden, James 18: Felix, Carrick 19: Pargo, Jannero 20: Beverley, Patrick 21: Johnson, Joe 22: Brewer, Ronnie 23: Fisher, Derek 24: Miller, Quincy 25: Acy, Quincy 26: Jones, Perry 27: Udoh, Ekpe 28: Clark, Ian 29: Andersen, Chris 30: Jackson, Reggie 31: Dudley, Jared 32: O'Bryant, Patrick 33: Davies, Brandon 34: Fredette, Jimmer 35: Mack, Shelvin 36: Hayward, Gordon 37: Anderson, Ryan 38: Crabbe, Allen 39: Griffin, Eric 40: Taylor, Jermaine 41: Kaman, Chris 42: Martin, Kenyon 43: Maxiell, Jason 44: Stephenson, Lance 45: Booker, Trevor 46: Cole, Norris 47: Perkins, Kendrick 48: Copeland, Chris 49: Burks, Alec 50: Billups, Chauncey 51: Roberson, Andr� 52: Smith, Jason 53: Gordon, Ben 54: Thabeet, Hasheem 55: Drummond, Andre 56: Adrien, Jeff 57: Butler, Caron 58: Villanueva, Charlie 59: Gay, Rudy 60: Armstrong, Hilton 61: Okafor, Emeka 62: Walker, Kemba 63: Allen, Ray 64: Price, A.J. 65: Lamb, Jeremy 66: Tolliver, Anthony 67: Korver, Kyle 68: Stoudemire, Amar'e� 69: Curry, Stephen 70: Wright, Chris 71: Roberts, Brian 72: Koshwal, Mac 73: Chandler, Wilson 74: Green, Willie 75: McCallum, Ray 76: Chandler, Tyson 77: Irving, Kyrie 78: Boozer, Carlos 79: Deng, Luol 80: Battier, Shane 81: Kelly, Ryan 82: Thomas, Lance 83: Maggette, Corey 84: McRoberts, Josh 85: Brand, Elton 86: Plumlee, Mason 87: Plumlee, Miles 88: Redick, J. J. 89: Rivers, Austin 90: Curry, Seth 91: Henderson, Gerald 92: Dunleavy, Mike 93: Singler, Kyle 94: James, Mike 95: Stuckey, Rodney 96: O'Neal, Jermaine 97: Lewis, Rashard 98: Garnett, Kevin 99: Horford, Al 100: Murphy, Erik 101: Noah, Joakim 102: Parsons, Chandler 103: Haslem, Udonis 104: Bonner, Matt 105: Speights, Marreese 106: Lee, David 107: Calathes, Nick 108: Beal, Bradley 109: Miller, Mike 110: Brewer, Corey 111: James, Bernard 112: Singleton, Chris 113: Douglas, Toney 114: McGuire, Dominic 115: Ely, Melvin 116: Smith, Greg 117: George, Paul 118: Mensah-Bonsu, Pops 119: Sims, Henry 120: Hibbert, Roy 121: Green, Jeff 122: Porter, Otto 123: Monroe, Greg 124: Thompson, Hollis 125: Caldwell-Pope, Kentavious 126: Wilkins, Damien 127: Young, Thaddeus 128: Lawal, Gani 129: Bosh, Chris 130: Favors, Derrick 131: Jack, Jarrett 132: Morrow, Anthony 133: Bynum, Will 134: Shumpert, Iman 135: Rice, Glen 136: Brown, Kwame 137: Olynyk, Kelly 138: Harris, Elias 139: Turiaf, Ronny 140: Sacre, Robert 141: Daye, Austin 142: Green, Gerald 143: Lin, Jeremy 144: Cousin, Marcus 145: Leonard, Meyers 146: Cook, Brian 147: Williams, Deron 148: White, DJ 149: Zeller, Cody 150: Oladipo, Victor 151: Gordon, Eric 152: Machado, Scott 153: Evans, Reggie 154: White, Royce 155: Garrett, Diante 156: Hill, George 157: Henry, Xavier 158: Aldrich, Cole 159: Withey, Jeff 160: Pierce, Paul 161: Arthur, Darrell 162: Jackson, Darnell 163: Morris, Markieff 164: Morris, Marcus 165: Robinson, Thomas 166: Collison, Nick 167: Hinrich, Kirk 168: Chalmers, Mario 169: McLemore, Ben 170: Taylor, Tyshawn 171: Rush, Brandon 172: Beasley, Michael 173: McGruder, Rodney 174: Noel, Nerlens 175: Mohammed, Nazr 176: Cousins, DeMarcus 177: Harrellson, Josh 178: Kidd-Gilchrist, Michael 179: Prince, Tayshaun 180: Miller, Darius 181: Jones, Terrence 182: Hayes, Chuck 183: Patterson, Patrick 184: Davis, Anthony 185: Knight, Brandon 186: Teague, Marquis 187: Rondo, Rajon 188: Meeks, Jodie 189: Lamb, Doron 190: Bledsoe, Eric 191: Goodwin, Archie 192: Wall, John 193: Bogans, Keith 194: Butler, Rasual 195: Ellis, Monta 196: McCollum, C. J. 197: Millsap, Paul 198: Garc�a, Francisco 199: Dieng, Gorgui 200: Clark, Earl 201: Smith, Chris 202: Siva, Peyton 203: Bryant, Kobe 204: Randolph, Anthony 205: Hamilton, Justin 206: Bass, Brandon 207: Davis, Glen 208: Johnson, Chris 209: Thornton, Marcus 210: Temple, Garrett 211: Crowder, Jae 212: Novak, Steve 213: Blue, Vander 214: Wade, Dwyane 215: Lockett, Trent 216: Johnson-Odom, Darius 217: Buycks, Dwight 218: Butler, Jimmy 219: Hayward, Lazar 220: Matthews, Wesley 221: Len, Alex 222: Blake, Steve 223: V�squez, Greivis 224: Gaffney, Tony 225: Camby, Marcus 226: Williams, Shawne 227: Rose, Derrick 228: Williams, Elliot 229: Evans, Tyreke 230: Barton, Will 231: Thomas, Adonis 232: Douglas-Roberts, Chris 233: Kadji, Kenny 234: Jones, DeQuan 235: Larkin, Shane 236: Jones, James 237: Salmons, John 238: Morris, Darius 239: Crawford, Jamal 240: Burke, Trey 241: Hardaway, Tim, Jr. 242: Harris, Manny 243: Randolph, Zach 244: Green, Draymond 245: Brown, Shannon 246: Richardson, Jason 247: Anderson, Alan 248: Dawson, Eric 249: Humphries, Kris 250: Williams, Rodney 251: Becton-Buckner, Reginald 252: Moultrie, Arnett 253: Varnado, Jarvis 254: Bost, Dee 255: Carroll, DeMarre 256: Pressey, Phil 257: Faried, Kenneth 258: Canaan, Isaiah 259: Morais, Carlos 260: Scola, Luis 261: Prigioni, Pablo 262: Gin�bili, Manu 263: Delfino, Carlos 264: Parker, Tony 265: Teletovi?, Mirza 266: Splitter, Tiago 267: Varej�o, Anderson 268: Faverani, V�tor 269: Nen� 270: Biyombo, Bismack 271: Ibaka, Serge 272: Vesel�, Jan 273: Freeland, Joel 274: Gobert, Rudy 275: Mahinmi, Ian 276: Diaw, Boris 277: Fournier, Evan 278: De Colo, Nando 279: Batum, Nicolas 280: Shengelia, Tornike 281: Pachulia, Zaza 282: Nowitzki, Dirk 283: Ohlbrecht, Tim 284: Schr�der, Dennis 285: Antetokounmpo, Giannis 286: Seraphin, Kevin 287: Casspi, Omri 288: Gallinari, Danilo 289: Datome, Luigi 290: Belinelli, Marco 291: Valan?i?nas, Jonas 292: Motiej?nas, Donatas 293: Anti?, Pero 294: Gortat, Marcin 295: Bargnani, Andrea 296: Biedri?�, Andris 297: Mozgov, Timofey 298: Kirilenko, Andrei 299: Shved, Alexey 300: Karasev, Sergey 301: Raduljica, Miroslav 302: Udrih, Beno 303: Gasol, Marc 304: Claver, V�ctor 305: Gasol, Pau 306: Calder�n, Jos� 307: Rubio, Ricky 308: Jerebko, Jonas 309: Kanter, Enes 310: Sefolosha, Thabo 311: A??k, �mer 312: T�rko?lu, Hedo 313: ?lyasova, Ersan 314: Kravtsov, Viacheslav 315: Pekovi?, Nikola 316: Kuzmi?, Ognjen 317: Dubljevic, Bojan 318: Dragi?, Goran 319: Nedovi?, Nemanja 320: McGee, JaVale 321: Sessions, Ramon 322: Snell, Tony 323: Granger, Danny 324: O'Quinn, Kyle 325: Haywood, Brendan 326: Henson, John 327: Jamison, Antawn 328: Davis, Ed 329: Williams, Marvin 330: Barnes, Harrison 331: Zeller, Tyler 332: Wright, Brandan 333: Hansbrough, Tyler 334: Felton, Raymond 335: Ellington, Wayne 336: Lawson, Ty 337: Marshall, Kendall 338: Bullock, Reggie 339: Carter, Vince 340: Green, Danny 341: Leslie, C. J. 342: Powell, Josh 343: Howell, Richard 344: Hickson, J. J. 345: Brown, Lorenzo 346: Mitchell, Tony 347: Barea, Jos� Juan 348: Silas, Xavier 349: Diop, DeSagana 350: Smith, Josh 351: Jennings, Brandon 352: Koufos, Kosta 353: Oden, Greg 354: Sullinger, Jared 355: Mullens, Byron 356: Conley, Mike 357: Turner, Evan 358: Lighty, David 359: Osby, Romero 360: Griffin, Blake 361: Anderson, James 362: Lucas III, John 363: Graham, Stephen 364: Allen, Tony 365: Bazemore, Kent 366: Singler, E. J. 367: Ridnour, Luke 368: Brooks, Aaron 369: Cunningham, Jared 370: Livingston, Shaun 371: Adams, Steven 372: Gray, Aaron 373: Blair, DeJuan 374: Jefferson, Al 375: Gomes, Ryan 376: Brooks, MarShon 377: Landry, Carl 378: Moore, E'Twaun 379: Hummel, Robbie 380: Harris, Mike 381: Thompson, Jason 382: Jeffers, Othyus 383: N'Diaye, Hamady 384: Jones, Dwayne 385: Nelson, Jameer 386: Reed, Willie 387: Mills, Patrick 388: McConnell, Mickey 389: Tyler, Jeremy� 390: Franklin, Jamaal 391: Leonard, Kawhi 392: Nash, Steve 393: Webster, Martell 394: Dalembert, Samuel 395: Miles, C. J. 396: Balkman, Renaldo 397: Westbrook, Charlie 398: Wolters, Nate 399: Jones, Solomon 400: Williams, Louis 401: Wright, Dorell 402: Blatche, Andray 403: Ledo, Ricky 404: Vu?evi?, Nikola 405: Dedmon, Dewayne 406: Gibson, Taj 407: Mayo, O. J. 408: Young, Nick 409: DeRozan, DeMar� 410: Smith, J. R. 411: Nicholson, Andrew 412: Lee, Michael 413: World Peace, Metta 414: Harkless, Maurice 415: Kennedy, D. J. 416: Bynum, Andrew 417: Dellavedova, Matthew 418: Harrington, Al 419: James, LeBron 420: Lopez, Brook 421: Lopez, Robin 422: Childress, Josh 423: Fields, Landry 424: Outlaw, Travis 425: Howard, Dwight 426: Melo, Fab 427: Joseph, Kris 428: Southerland, James 429: Anthony, Carmelo 430: Onuaku, Arinze 431: Carter-Williams, Michael 432: Waiters, Dion 433: Johnson, Wesley 434: Allen, Lavoy 435: Wyatt, Khalif 436: Christmas, Dionte 437: Harris, Tobias 438: Watson, C. J. 439: Covington, Robert 440: Hudson, Lester 441: Thompson, Tristan 442: Joseph, Cory 443: Pittman, Dexter 444: James, Damion 445: Tucker, P. J. 446: Durant, Kevin 447: Aldridge, LaMarcus 448: Bradley, Avery 449: Ivey, Royal 450: Augustin, D. J. 451: Hamilton, Jordan 452: Jordan, DeAndre 453: Middleton, Khris 454: Sloan, Donald 455: Stone, Julyan 456: Neal, Gary 457: Johnson, Orlando 458: Nunnally, James 459: Mbah a Moute, Luc 460: Gadzuric, Dan 461: Hollins, Ryan 462: Barnes, Matt 463: Love, Kevin 464: Collison, Darren 465: Drew, Larry 466: Farmar, Jordan 467: Holiday, Jrue 468: Lee, Malcolm 469: Westbrook, Russell 470: Watson, Earl 471: Afflalo, Arron 472: Muhammad, Shabazz 473: Ariza, Trevor 474: Anthony, Joel 475: Bennett, Anthony 476: Amundson, Lou 477: Marion, Shawn 478: Ay�n, Gustavo 479: Bogut, Andrew 480: Miller, Andre 481: Price, Ronnie 482: Howard, Ron 483: Ezeli, Festus 484: Taylor, Jeffery 485: Jenkins, John 486: Cunningham, Dante 487: Wayns, Maalik 488: Foye, Randy 489: Lowry, Kyle 490: Scott, Mike 491: Mason, Jr., Roger 492: Sanders, Larry 493: Daniels, Troy 494: Maynor, Eric 495: Williams, Reggie 496: Johnson, James 497: Aminu, Al-Farouq 498: Paul, Chris 499: Teague, Jeff 500: Smith, Ish 501: Duncan, Tim 502: Hawes, Spencer 503: Wroten, Tony 504: Gaddy, Abdul 505: Thomas, Isaiah 506: Robinson, Nate 507: Ross, Terrence 508: Pondexter, Quincy 509: Holiday, Justin 510: Baynes, Aron 511: Thompson, Klay 512: Lillard, Damian 513: Alexander, Joe 514: Fischer, D'or 515: Ebanks, Devin 516: Johnson, Amir 517: Martin, Kevin 518: Evans, Jeremy 519: Lee, Courtney 520: Mekel, Gal 521: Murry, Toure' 522: Stiemsma, Greg 523: Leuer, Jon 524: Landry, Marcus 525: Harris, Devin 526: West, David 527: Crawford, Jordan
# Grouping by as many as you want
# Be sure to put the groupby stuff in square brackets
nba_df.groupby(["POS", "Race"])["Age"].describe()
POS Race C Black count 36.000000 mean 26.972222 std 3.974822 min 19.000000 25% 24.750000 50% 26.000000 75% 30.250000 max 36.000000 Hispanic count 1.000000 mean 28.000000 std NaN min 28.000000 25% 28.000000 50% 28.000000 75% 28.000000 ... G/F Mixed mean 24.000000 std 1.414214 min 23.000000 25% 23.500000 50% 24.000000 75% 24.500000 max 25.000000 White count 6.000000 mean 28.333333 std 4.802777 min 23.000000 25% 24.250000 50% 28.500000 75% 32.750000 max 33.000000 Length: 160, dtype: float64
# Histograms
# Shows you the spread of one numerical value
nba_df["Age"].hist()
<matplotlib.axes.AxesSubplot at 0x112af1c90>
# Cathy says there should always be 30 mins
nba_df["Age"].hist(bins=30)
# Cathy is never wrong
<matplotlib.axes.AxesSubplot at 0x112b07990>
# Scatterplots show you the relationship of two numerical values
# If you have a line they're related, otherwise nopers
nba_df.plot("Ht (In.)","WT", kind='scatter')
<matplotlib.axes.AxesSubplot at 0x112c60490>
# You can also scatterplot like this
plt.scatter(nba_df["Ht (In.)"], nba_df["WT"])
<matplotlib.collections.PathCollection at 0x11330c750>
# Bar Chart
pos_size = nba_df.groupby("POS").size()
print pos_size
pos_size.plot(kind='bar', title="Position")
POS C 67 F 142 F/C 74 G 175 G/F 70 dtype: int64
<matplotlib.axes.AxesSubplot at 0x117387910>