# special IPython command to prepare the notebook for matplotlib
%matplotlib inline
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
import requests
from pattern import web
import re
from numpy.random import rand
from numpy import arange
In this notebook we fetch data about draft prospects from the Steelers Depot website.
http://www.steelersdepot.com/2014-nfl-combine-results-defensive-linemen/ has a table for position groups for different years that summarizes their combine performance. We will combine the data for all years in a single panda dataframe and visualize the combine performance.
Our first step is to fetch the content from a relevant webpage.
Follow the link below to see the structure of the webpage we'll be working on.
http://www.steelersdepot.com/2014-nfl-combine-results-defensive-linemen/
We'll use the python requests module to get the text of the webpage.
url0 = 'http://www.steelersdepot.com/2014-nfl-combine-results-defensive-linemen/'
# used later
url1 = 'http://www.steelersdepot.com/2014-nfl-combine-results-linebackers/'
url2 = 'http://www.steelersdepot.com/2013-nfl-combine-results-defensive-linemen/'
url3 = 'http://www.steelersdepot.com/2013-nfl-combine-results-linebackers/'
website_html = requests.get(url0).text
We now have a string of text that's structured like this.
<!DOCTYPE html>
<html>
<body>
<h1>My First Heading</h1>;
<p>My first paragraph.</p>
</body>
</html>
We'll use the python module pattern to read and understand the html for us. That will make it easy for us to extract the elements that we're interested in.
def get_html_table(html):
"""Parse html and return html tables."""
# web is the pattern module
dom = web.Element(html)
# we can get all tables using
# tbls = dom('table')
# returns a list of the elements that you can
# iterate over
tbls = dom.by_class('tableizer-table sortable')
#### 2. step: get all tables we care about
return tbls
data_table = get_html_table(website_html)
print "table length: %d" %len(data_table)
print data_table[0].attributes
table length: 1 {u'class': u'tableizer-table sortable'}
At this point we've used pattern to extract the table that we want from the html we scraped from the Steelers Depot website.
We now want to build a dataframe with the player name as the row label and combine activity as the column label.
Here is what's going on with this code. When the pattern module parsed the hmtl it returns a list of all the tables that match the class name that we provided. In this case there's only one table that matches the classname but that single table is still returned in a list that's of length 1. So in the first step we're just extracting the table from the list.
table[0]
In the next step we're doing something similar. We're getting all of the elements of type ('tr'). ('tr') equates to the rows of the table. Again, the result it returned as a list. Since we're only interested in the headers, which are in the first row, we pull out just the first row.
table[0]('tr')[0]
Now we want the actual text in each cell (i.e. the actual column labels). Selecting ('th') returns a list of all the ('th') elements.
th_s = table[0]('tr')[0]('th')
That gives us a list of elements that look like this
<th><span style="font-size: small;">PLAYER</span></th>
<th><span style="font-size: small;">COLLEGE</span></th>
<th><span style="font-size: small;">POS</span></th>
Now that we have a list of the ('th') elements we extract the actual text. Concretely, if we have the element
<th><span style="font-size: small;">PLAYER</span></th>
Then we want to extract the text "PLAYER" which we accomplish with the following code
[td('span')[0].content if len(td('span')) != 0 else u"n/a"
for td in tds]
We're going to build a dictionary of dictionaries. The key is going to be the player name, the 2nd key will be the column labels, the values will be the combine result for each category.
def clean_num(str):
if(str == "DNP"):
return "DNP"
nums = re.split(r"'|\"", str)
res = 0
try:
res = float(nums[0]) + float(nums[1])/12
except ValueError:
res = float(nums[0][:-1]) + float(nums[1])/12
return res
def get_data(table):
"""Extract data for players from table and store it in dictionary.
Args:
table: a DOM element
"""
result = defaultdict(dict)
# we get the first row in the table and all the th elements
# this gives us a list of all the column headers.
th_s = table[0]('tr')[0]('th')
format_2014 = True
if th_s[0]('span') == []:
format_2014 = False
# use a little python magic to extract the actual text
if format_2014:
headers = [th('span')[0].content for th in th_s]
else:
headers = [th.content for th in th_s]
headers[headers.index("VERT")] = "VJ"
headers[headers.index("JUMP")] = "BJ"
# we're going to work with the vertical jump data
# we'll use the indices below to convert text values to number values
bj_idx = headers.index("BJ")
vj_idx = headers.index("VJ")
wgt_idx = headers.index("WGT")
# throw away the header row
rows = table[0]('tr')[1:]
for row in rows:
tds = row('td')
if format_2014:
row_data = [td('span')[0].content if len(td('span')) != 0 else u"n/a"
for td in tds]
else:
row_data = [td.content for td in tds]
row_data[vj_idx] = float(row_data[vj_idx]) if row_data[vj_idx] != u"DNP" else 0
row_data[bj_idx] = clean_num(row_data[bj_idx])
row_data[wgt_idx] = int(row_data[wgt_idx])
subdict = dict(zip(headers, row_data))
# iterate over all tables, extract headings and actual data and combine data into single dict
result[row_data[0]] = subdict
return result
result = get_data(data_table)
# quick check that we were put the data into the dataframe successfully.
# for key in result:
# print (str(result[key]['VJ']) + ", "),
# create dataframe
df = pd.DataFrame.from_dict(result, orient='index')
# sort based on year
df.sort(axis=1,inplace=True)
print df
10(1) 10(2) 20S 225 3C 40(1) 40(2) 40(O) ARMS \ Bromley, Jason 1.72 1.79 n/a 26 7.92 4.93 5.13 5.06 33 1/2 Carrethers, Ryan 1.78 1.88 n/a 32 7.89 5.44 5.5 5.47 31 3/4 Clarke, William 1.69 1.63 n/a 22 7.26 4.72 4.75 4.77 34 5/8 Clowney, Jadeveon 1.56 1.59 n/a 21 7.27 4.47 4.48 4.53 34 1/2 Coleman, Deandre DNP DNP n/a 24 DNP DNP DNP DNP 34 3/8 Crichton, Scott 1.62 1.76 4.29 24 7.19 4.78 4.82 4.84 32 3/4 Donald, Aaron 1.59 1.63 4.39 35 7.11 4.65 4.66 4.68 32 5/8 Ealy, Kony 1.72 1.66 n/a 22 6.83 4.84 4.84 4.92 34 1/4 Easley, Dominique DNP DNP n/a 26 DNP DNP DNP DNP 32 7/8 Edebali, Kasim 1.64 N/A 4.35 19 7.2 4.69 N/A 4.79 32 3/4 Ellis, Justin 1.88 N/A n/a 25 7.81 5.15 5.12 5.27 33 Enemkpali, Ikemefuna 1.75 N/A n/a 28 7.67 4.9 N/A 5.01 33 1/8 Ferguson, Ego DNP DNP n/a 24 DNP DNP DNP DNP 32 1/4 Ford, Donald DNP DNP n/a DNP DNP DNP DNP DNP 32 7/8 Gayle, Jadon 1.6 N/A 4.27 26 7.19 4.65 4.6 4.7 32 3/8 Hageman, Ra'shede 1.81 1.75 n/a 32 7.87 4.97 5.02 5.02 34 1/4 Hart, Taylor DNP DNP n/a 21 DNP DNP DNP DNP 32 3/4 Hyder, Kerry 1.75 N/A 4.33 20 7.23 4.97 N/A 5.1 33 1/2 Jeffcoat, Jackson 1.6 N/A 4.18 18 6.97 4.6 N/A 4.63 33 7/8 Jernigan, Timmy 1.72 1.75 n/a 27 DNP 5.09 4.93 5.06 31 5/8 Johnson, Anthony 1.88 1.88 n/a 20 7.93 5.25 5.25 5.24 33 Jones, Daquan 1.81 1.87 n/a 25 7.76 5.28 5.34 5.35 33 1/2 Jones, Howard 1.58 1.66 4.41 21 7.16 4.6 4.57 4.60 34 1/8 Kerr, Zachariah 1.78 1.72 n/a 28 7.93 5.07 5.03 5.08 32 7/8 Lawrence, Demarcus 1.63 1.62 4.31 20 7.46 4.72 4.81 4.8 33 3/4 Lynch, Aaron DNP DNP n/a 18 DNP DNP DNP DNP 34 Manumaleuna, Eathyn 1.68 1.69 n/a 29 7.78 5.2 5.15 5.16 32 1/4 Marsh, Cassius 1.69 1.66 4.25 14 7.08 4.89 4.89 4.89 32 3/4 Martin, Kareem 1.6 1.53 4.33 22 7.2 4.72 4.68 4.72 35 Mauro, Joshua 1.78 1.75 n/a 21 7.43 5.11 5.16 5.21 33 Mccullers, Daniel DNP DNP n/a 27 DNP DNP DNP DNP 36 5/8 Mims, Tevin 1.69 1.69 n/a 17 DNP 4.95 4.95 4.95 33 1/2 Moore, Zachary 1.56 1.63 n/a 23 7.41 4.82 4.87 4.84 33 3/4 Newsome, Jonathan 1.62 1.66 n/a 21 7.31 4.69 4.75 4.73 33 1/4 Nix, Louis 1.85 1.87 n/a DNP 8.29 5.35 5.37 5.42 33 Pagan, Jeoffrey 1.72 1.75 n/a DNP DNP 4.94 5.12 DNP 33 Palepoi, Tenny DNP DNP n/a 31 7.67 DNP DNP 5.1 30 1/2 Pennel, Michael 1.75 1.75 n/a 23 7.94 5.2 5.19 5.23 33 3/8 Quarles, Kelcy 1.81 1.87 n/a 27 DNP 5 5.1 5.03 33 1/4 Ramsey, Kaleb DNP DNP n/a 36 DNP DNP DNP DNP 32 7/8 Reid, Caraun 1.69 1.69 n/a 20 7.59 4.9 5 4.91 33 Sam, Michael 1.72 1.75 n/a 17 7.8 4.79 4.84 4.91 33 3/8 Smith, Chris 1.59 1.63 n/a 28 7.55 4.54 4.69 4.71 34 1/8 Smith, Marcus 1.57 1.6 n/a 23 7.48 4.63 4.65 4.68 34 Stephen, Shamar 1.81 INJ n/a 25 DNP 5.17 INJ 5.25 33 1/8 Stinson, Edward DNP DNP n/a DNP DNP DNP DNP DNP 33 3/4 Sutton, William 1.75 1.82 n/a 24 7.93 5.37 5.47 5.36 31 1 /4 Thomas, Robert DNP DNP n/a 32 DNP DNP DNP DNP 33 7/8 Thornton, Khyri 1.72 1.72 n/a 28 7.83 5.05 4.94 5.03 32 1/2 Tuitt, Stephon DNP DNP n/a 31 DNP DNP DNP DNP 34 3/4 Uko, George 1.75 1.79 n/a 18 DNP 4.9 4.88 4.99 33 1/4 Urban, Brent DNP DNP n/a DNP DNP DNP DNP DNP 34 1/4 Webster, Larry 1.63 1.57 n/a 17 7.29 4.62 4.6 4.58 33 1/2 Westbrooks, Ethan 1.75 1.75 n/a 19 7.3 4.97 4.85 4.9 33 1/4 Whaley, Christopher DNP DNP n/a DNP DNP DNP DNP DNP 32 1/8 Wynn, Kerry 1.6 INJ n/a 31 DNP 5 INJ 4.97 31 3/4 BJ COLLEGE HANDS HGT \ Bromley, Jason 8.833333 Styracuse 9 1/4 6031 Carrethers, Ryan 7.333333 Arkansas St. 9 3/8 6012 Clarke, William 9.333333 West Virginia 9 7/8 6061 Clowney, Jadeveon 10.33333 South Carolina 10 6052 Coleman, Deandre DNP California 10 1/4 6050 Crichton, Scott 9 Oregon St. 10 1/8 6027 Donald, Aaron 9.666667 Pittsburgh 9 7/8 6006 Ealy, Kony 9.5 Missouri 9 1/2 6040 Easley, Dominique DNP Florida 9 3/4 6020 Edebali, Kasim 9.583333 Boston College 9 1/2 6020 Ellis, Justin 7.666667 Louisiana Tech 10 1/8 6014 Enemkpali, Ikemefuna 9.333333 Louisiana Tech 9 3/4 6006 Ferguson, Ego DNP LSU 10 3/4 6030 Ford, Donald DNP Auburn 10 1/4 6021 Gayle, Jadon 10.16667 Virginia Tech 9 5/8 6036 Hageman, Ra'shede 9.5 Minnesota 10 1/4 6057 Hart, Taylor DNP Oregon 10 1/8 6060 Hyder, Kerry 9 Texas Tech 9 3/4 6024 Jeffcoat, Jackson 10.25 Texas 9 5/8 6030 Jernigan, Timmy 8.5 Florida St. 9 5/8 6015 Johnson, Anthony 8.5 LSU 10 3/8 6024 Jones, Daquan 8.416667 Penn St. 9 5/8 6036 Jones, Howard 10.33333 Shepherd 9 1/2 6024 Kerr, Zachariah 8.25 Delaware 9 3/4 6013 Lawrence, Demarcus 9.416667 Boise St. 11 6027 Lynch, Aaron DNP South Florida 10 1/4 6050 Manumaleuna, Eathyn 8.416667 BYU 10 6022 Marsh, Cassius 9 UCLA 9 1/2 6040 Martin, Kareem 10.75 North Carolina 10 6057 Mauro, Joshua 9.666667 Stanford 9 1/2 6057 Mccullers, Daniel 8.083333 Tennessee 11 6070 Mims, Tevin 9.083333 South Florida 9 3/8 6040 Moore, Zachary 10.25 Concordia-SP 9 5/8 6054 Newsome, Jonathan 9.75 Ball St. 9 5/8 6025 Nix, Louis 8.083333 Notre Dame 9 7/8 6023 Pagan, Jeoffrey DNP Alabama 9 5/8 6030 Palepoi, Tenny 9.25 Utah 9 7/8 6014 Pennel, Michael 8.5 Colorado St. Pueblo 9 7/8 6042 Quarles, Kelcy 8.5 South Carolina 9 3/8 6036 Ramsey, Kaleb DNP Boston College 9 3/4 6030 Reid, Caraun 8.833333 Princeton 10 1/2 6021 Sam, Michael 9.5 Missouri 9 3/8 6020 Smith, Chris 10.08333 Arkansas 9 1/2 6010 Smith, Marcus 10.08333 Louisville 10 6033 Stephen, Shamar 8.583333 Connecticut 10 6047 Stinson, Edward DNP Alabama 9 3/8 6030 Sutton, William 8.25 Arizona St. 10 6004 Thomas, Robert DNP Arkansas 10 6010 Thornton, Khyri 9.333333 Southern Miss 9 1/2 6025 Tuitt, Stephon DNP Notre Dame 10 6050 Uko, George 9.166667 USC 10 1/8 6025 Urban, Brent DNP Virginia 9 3/4 6070 Webster, Larry 10.25 Bloomsburg 10 1/8 6056 Westbrooks, Ethan 9.166667 West Texas A&M 9 5/8 6034 Whaley, Christopher DNP Texas 9 1/8 6030 Wynn, Kerry 9.416667 Richmond 9 1/8 6047 PLAYER POS VJ WGT Bromley, Jason Bromley, Jason DL 33.5 306 Carrethers, Ryan Carrethers, Ryan DL 26.0 337 Clarke, William Clarke, William DL 32.0 271 Clowney, Jadeveon Clowney, Jadeveon DL 37.5 266 Coleman, Deandre Coleman, Deandre DL 0.0 314 Crichton, Scott Crichton, Scott DL 31.5 273 Donald, Aaron Donald, Aaron DL 32.0 285 Ealy, Kony Ealy, Kony DL 31.0 273 Easley, Dominique Easley, Dominique DL 0.0 288 Edebali, Kasim Edebali, Kasim DL 34.5 253 Ellis, Justin Ellis, Justin DL 28.0 334 Enemkpali, Ikemefuna Enemkpali, Ikemefuna DL 34.0 261 Ferguson, Ego Ferguson, Ego DL 0.0 315 Ford, Donald Ford, Donald DL 0.0 252 Gayle, Jadon Gayle, Jadon DL 37.0 259 Hageman, Ra'shede Hageman, Ra'shede DL 35.5 310 Hart, Taylor Hart, Taylor DL 0.0 281 Hyder, Kerry Hyder, Kerry DL 29.5 290 Jeffcoat, Jackson Jeffcoat, Jackson DL 36.0 247 Jernigan, Timmy Jernigan, Timmy DL 29.5 299 Johnson, Anthony Johnson, Anthony DL 24.5 308 Jones, Daquan Jones, Daquan DL 27.5 322 Jones, Howard Jones, Howard DL 40.5 235 Kerr, Zachariah Kerr, Zachariah DL 28.5 326 Lawrence, Demarcus Lawrence, Demarcus DL 34.5 251 Lynch, Aaron Lynch, Aaron DL 0.0 249 Manumaleuna, Eathyn Manumaleuna, Eathyn DL 28.0 296 Marsh, Cassius Marsh, Cassius DL 32.0 252 Martin, Kareem Martin, Kareem DL 35.5 272 Mauro, Joshua Mauro, Joshua DL 32.0 271 Mccullers, Daniel Mccullers, Daniel DL 20.5 352 Mims, Tevin Mims, Tevin DL 27.5 260 Moore, Zachary Moore, Zachary DL 33.5 269 Newsome, Jonathan Newsome, Jonathan DL 34.0 247 Nix, Louis Nix, Louis DL 25.5 331 Pagan, Jeoffrey Pagan, Jeoffrey DL 0.0 310 Palepoi, Tenny Palepoi, Tenny DL 30.5 298 Pennel, Michael Pennel, Michael DL 28.5 332 Quarles, Kelcy Quarles, Kelcy DL 23.5 297 Ramsey, Kaleb Ramsey, Kaleb DL 0.0 297 Reid, Caraun Reid, Caraun DL 26.5 302 Sam, Michael Sam, Michael DL 25.5 261 Smith, Chris Smith, Chris DL 37.0 266 Smith, Marcus Smith, Marcus DL 35.0 251 Stephen, Shamar Stephen, Shamar DL 30.5 309 Stinson, Edward Stinson, Edward DL 0.0 287 Sutton, William Sutton, William DL 28.5 303 Thomas, Robert Thomas, Robert DL 0.0 327 Thornton, Khyri Thornton, Khyri DL 29.0 304 Tuitt, Stephon Tuitt, Stephon DL 0.0 304 Uko, George Uko, George DL 29.5 284 Urban, Brent Urban, Brent DL 0.0 295 Webster, Larry Webster, Larry DL 36.5 252 Westbrooks, Ethan Westbrooks, Ethan DL 29.0 267 Whaley, Christopher Whaley, Christopher DL 0.0 269 Wynn, Kerry Wynn, Kerry DL 34.0 266 [56 rows x 17 columns]
These are just a few functions that show you how you can access the data once you have it in the dataframe.
subtable = df.iloc[0:2, 0:2]
print "subtable"
print subtable
print ""
df.head()
column = df['VJ']
print "column"
print column
print ""
row = df.ix[0] #row 0
print "row"
print row
print ""
rows = df.ix[:2] #rows 0,1
print "rows"
print rows
print ""
subtable 10(1) 10(2) Bromley, Jason 1.72 1.79 Carrethers, Ryan 1.78 1.88 [2 rows x 2 columns] column Bromley, Jason 33.5 Carrethers, Ryan 26.0 Clarke, William 32.0 Clowney, Jadeveon 37.5 Coleman, Deandre 0.0 Crichton, Scott 31.5 Donald, Aaron 32.0 Ealy, Kony 31.0 Easley, Dominique 0.0 Edebali, Kasim 34.5 Ellis, Justin 28.0 Enemkpali, Ikemefuna 34.0 Ferguson, Ego 0.0 Ford, Donald 0.0 Gayle, Jadon 37.0 Hageman, Ra'shede 35.5 Hart, Taylor 0.0 Hyder, Kerry 29.5 Jeffcoat, Jackson 36.0 Jernigan, Timmy 29.5 Johnson, Anthony 24.5 Jones, Daquan 27.5 Jones, Howard 40.5 Kerr, Zachariah 28.5 Lawrence, Demarcus 34.5 Lynch, Aaron 0.0 Manumaleuna, Eathyn 28.0 Marsh, Cassius 32.0 Martin, Kareem 35.5 Mauro, Joshua 32.0 Mccullers, Daniel 20.5 Mims, Tevin 27.5 Moore, Zachary 33.5 Newsome, Jonathan 34.0 Nix, Louis 25.5 Pagan, Jeoffrey 0.0 Palepoi, Tenny 30.5 Pennel, Michael 28.5 Quarles, Kelcy 23.5 Ramsey, Kaleb 0.0 Reid, Caraun 26.5 Sam, Michael 25.5 Smith, Chris 37.0 Smith, Marcus 35.0 Stephen, Shamar 30.5 Stinson, Edward 0.0 Sutton, William 28.5 Thomas, Robert 0.0 Thornton, Khyri 29.0 Tuitt, Stephon 0.0 Uko, George 29.5 Urban, Brent 0.0 Webster, Larry 36.5 Westbrooks, Ethan 29.0 Whaley, Christopher 0.0 Wynn, Kerry 34.0 Name: VJ, Length: 56, dtype: float64 row 10(1) 1.72 10(2) 1.79 20S n/a 225 26 3C 7.92 40(1) 4.93 40(2) 5.13 40(O) 5.06 ARMS 33 1/2 BJ 8.833333 COLLEGE Styracuse HANDS 9 1/4 HGT 6031 PLAYER Bromley, Jason POS DL VJ 33.5 WGT 306 Name: Bromley, Jason, dtype: object rows 10(1) 10(2) 20S 225 3C 40(1) 40(2) 40(O) ARMS \ Bromley, Jason 1.72 1.79 n/a 26 7.92 4.93 5.13 5.06 33 1/2 Carrethers, Ryan 1.78 1.88 n/a 32 7.89 5.44 5.5 5.47 31 3/4 BJ COLLEGE HANDS HGT PLAYER POS \ Bromley, Jason 8.833333 Styracuse 9 1/4 6031 Bromley, Jason DL Carrethers, Ryan 7.333333 Arkansas St. 9 3/8 6012 Carrethers, Ryan DL VJ WGT Bromley, Jason 33.5 306 Carrethers, Ryan 26.0 337 [2 rows x 17 columns]
Our first step is to start visualizing the data. In this case I'm just going to look a bar chart of the DL vertical jump results. We can quickly compare the different players.
# extract the vertical jump information from the dataframe.
bar_width = 0.35
opacity = 0.4
col = df["VJ"]
# generate the
pos = [i + .5 for i in arange(len(col))]
# Here we set the width and height of the figure
fig = plt.figure(figsize=(4,10))
# Add a subplot, 1 row, 1 col
ax = fig.add_subplot(111)
ax.set_title("Vertical jump")
ax.barh(pos, col.values, align='center', height=0.3)
ax.set_yticks(pos)
_ = ax.set_yticklabels(col.index, fontsize="small")
Howard Jones sure looks good but when I dig a little deeper I see that he's only 235 lbs. I think we can agree that a 350 lb player with a 40" vertical is a little more interesting than a 235 lb player. So lets look at a scatter plot of vertical jump versus weight.
As you would expect we can see a downward sloping trend. In english, the heavier the player, the smaller the vertical jump.
However its interesting to note the players that are above the trend.
#filter out the zero entries
mask = df['VJ'] > 0
subdf = df[mask]
fig, ax = plt.subplots()
ax.scatter(subdf['WGT'], subdf['VJ'])
ax.set_xlabel('Weight')
ax.set_ylabel('Vertical Jump')
ax.annotate("Hageman", xy=(310, 35.5), xytext=(330,40),arrowprops=dict(facecolor="black",shrink=0.05))
# plt.scatter(subdf['WGT'],subdf['VJ'])
<matplotlib.text.Annotation at 0x10469da0>
One of my pet peeves is irrelevant groupings. Here's a description from AdvancedNFLStats.com
An irrelevant grouping is achieved by picking certain information, say: attempts, receptions and >percentage of team attempts. And then showing that by this cooked-up collection of data points, Shonn >Greene is analogous to Jim Brown. This often involves arbitrary end points.
As a starting point I start with largest possible set of data and then follow the data. Here's where the power of the computer really shines. Now that we've already built the machinery to parse the data lets add the data for defensive linemen from 2012 as well as the data for linebackers for 2013 and 2012 as well.
website_html1 = requests.get(url1).text
website_html2 = requests.get(url2).text
website_html3 = requests.get(url3).text
data_table_1 = get_html_table(website_html1)
data_table_2 = get_html_table(website_html2)
data_table_3 = get_html_table(website_html3)
dict1 = get_data(data_table_1)
dict2 = get_data(data_table_2)
dict3 = get_data(data_table_3)
df1 = pd.DataFrame.from_dict(dict1, orient='index')
df2 = pd.DataFrame.from_dict(dict2, orient='index')
df3 = pd.DataFrame.from_dict(dict3, orient='index')
df_all = pd.concat([df,df1,df2,df3])
Let's run that plot again except this time we'll look at DL and LBs from 2013 and 2014.
# extract the vertical jump information from the dataframe.
df_VJ = df_all[df_all.VJ > 0]
df_VJ.sort(["VJ"], ascending=[1], inplace=True)
bar_width = 0.35
opacity = 0.4
col = df_VJ["VJ"]
# generate the
pos = [i + .5 for i in arange(len(col))]
# Here we set the width and height of the figure
fig = plt.figure(figsize=(4,40))
# Add a subplot, 1 row, 1 col
ax = fig.add_subplot(111)
ax.set_title("Vertical jump")
ax.barh(pos, col.values, align='center', height=0.3)
ax.set_yticks(pos)
ax.xaxis.grid(True)
_ = ax.set_yticklabels(col.index, fontsize="small")