%matplotlib inline
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import urllib
import json
from collections import namedtuple
import numpy as np
import sqlite3
If necessary, create the database file:
def create_db():
conn = sqlite3.connect('nfl_teams.db')
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS players (number int, name text, pos text, height int, weight int, dob text, team text)''')
conn.commit()
conn.close()
Initialize stuff:
#Use teams.txt to get the team abbreviations and cities:
all_city = []
all_abbr = []
all_name = []
with open("teams.txt","r") as f:
for line in f:
line = line.rstrip()
line = line.split(" - ")
all_city.append(line[1])
all_abbr.append(line[0].rstrip())
all_name.append(line[2].title())
def feet2inches(instr):
""" Convert a string in f the form X'Y" into an integer number of inches"""
instr = "".join([c for c in instr if c not in ('"',' ')])
instr = instr.split("'")
instr = [int(c) for c in instr]
return int(12*instr[0] + instr[1])
Now loop over teams, extract player stats from the HTML and dump them to nfl_teams.db
players = []
create_db()
url_base = "http://www.nfl.com/teams/roster?team="
for abbr, city, name in zip(all_abbr,all_city,all_name):
print "Parsing team : ",city,name
url = url_base+abbr
soup = BeautifulSoup(urllib.urlopen(url) )
for row in soup.findAll('table')[1].tbody.findAll('tr'):
try:
number = int(row.findAll('td')[0].contents[0])
name = str(row.findAll('td')[1].a.contents[0])
pos = str(row.findAll('td')[2].contents[0])
height = feet2inches(str(row.findAll('td')[4].contents[0]))
weight = int(row.findAll('td')[5].contents[0])
dob = str(row.findAll('td')[6].contents[0])
player_record = (number,name,pos,height,weight,dob,abbr)
conn = sqlite3.connect('nfl_teams.db')
c = conn.cursor()
c.execute("INSERT INTO players VALUES (?,?,?,?,?,?,?)",player_record)
conn.commit()
conn.close()
players.append(player_record)
except:
pass
print " --done."
Parsing team : Seattle Seahawks --done. Parsing team : Arizona Cardinals --done. Parsing team : Tampa Bay Buccaneers --done. Parsing team : Washington Redskins --done. Parsing team : Carolina Panthers --done. Parsing team : Jacksonville Jaguars --done. Parsing team : Houston Texans --done. Parsing team : Atlanta Falcons --done. Parsing team : Buffalo Bills --done. Parsing team : Chicago Bears --done. Parsing team : Cincinnati Bengals --done. Parsing team : Cleveland Browns --done. Parsing team : Baltimore Ravens --done. Parsing team : Dallas Cowboys --done. Parsing team : Denver Broncos --done. Parsing team : Detroit Lions --done. Parsing team : Green Bay Packers --done. Parsing team : Tennessee Titans --done. Parsing team : Indianapolis Colts --done. Parsing team : Kansas City Chiefs --done. Parsing team : Oakland Raiders --done. Parsing team : St. Louis Rams --done. Parsing team : Miami Dolphins --done. Parsing team : Minnesota Vikings --done. Parsing team : New England Patriots --done. Parsing team : New Orleans Saints --done. Parsing team : NY Giants Giants --done. Parsing team : NY Jets Jets --done. Parsing team : Philadelphia Eagles --done. Parsing team : Pittsburgh Steelers --done. Parsing team : San Diego Chargers --done. Parsing team : San Francisco 49Ers --done. Parsing team : Seattle Seahawks --done. Parsing team : Arizona Cardinals --done. Parsing team : Tampa Bay Buccaneers --done. Parsing team : Washington Redskins --done. Parsing team : Carolina Panthers --done. Parsing team : Jacksonville Jaguars --done. Parsing team : Houston Texans --done.