In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import urllib
import json
from collections import namedtuple
import numpy as np
import sqlite3

If necessary, create the database file:

In [2]:
def create_db():
  conn = sqlite3.connect('nfl_teams.db')
  c = conn.cursor()
  c.execute('''CREATE TABLE IF NOT EXISTS players (number int, name text, pos text, height int, weight int, dob text, team text)''')
  conn.commit()
  conn.close()

Initialize stuff:

In [9]:
#Use teams.txt to get the team abbreviations and cities:
all_city = []
all_abbr = []
all_name = []
with open("teams.txt","r") as f:

    for line in f:
        line = line.rstrip()
        line = line.split(" - ")
        all_city.append(line[1])
        all_abbr.append(line[0].rstrip())
        all_name.append(line[2].title())
In [4]:
def feet2inches(instr):
    """ Convert a string in f the form X'Y" into an integer number of inches"""
    instr = "".join([c for c in instr if c not in ('"',' ')])
    instr = instr.split("'")
    instr = [int(c) for c in instr]
    return int(12*instr[0] + instr[1])

Now loop over teams, extract player stats from the HTML and dump them to nfl_teams.db

In [6]:
players = []
create_db()

url_base = "http://www.nfl.com/teams/roster?team="
for abbr, city, name in zip(all_abbr,all_city,all_name):
    print "Parsing team : ",city,name
    url = url_base+abbr
    soup = BeautifulSoup(urllib.urlopen(url) )
    for row in soup.findAll('table')[1].tbody.findAll('tr'):
        try:
            number = int(row.findAll('td')[0].contents[0])
            name   = str(row.findAll('td')[1].a.contents[0])
            pos    = str(row.findAll('td')[2].contents[0])
            height = feet2inches(str(row.findAll('td')[4].contents[0]))
            weight = int(row.findAll('td')[5].contents[0])
            dob    = str(row.findAll('td')[6].contents[0])
            player_record = (number,name,pos,height,weight,dob,abbr)
            conn = sqlite3.connect('nfl_teams.db')
            c = conn.cursor()
            c.execute("INSERT INTO players VALUES (?,?,?,?,?,?,?)",player_record)
            conn.commit()
            conn.close()
            players.append(player_record)
        except:
            pass
    print "   --done."
Parsing team :  Seattle Seahawks
   --done.
Parsing team :  Arizona Cardinals
   --done.
Parsing team :  Tampa Bay Buccaneers
   --done.
Parsing team :  Washington Redskins
   --done.
Parsing team :  Carolina Panthers
   --done.
Parsing team :  Jacksonville Jaguars
   --done.
Parsing team :  Houston Texans
   --done.
Parsing team :  Atlanta Falcons
   --done.
Parsing team :  Buffalo Bills
   --done.
Parsing team :  Chicago Bears
   --done.
Parsing team :  Cincinnati Bengals
   --done.
Parsing team :  Cleveland Browns
   --done.
Parsing team :  Baltimore Ravens
   --done.
Parsing team :  Dallas Cowboys
   --done.
Parsing team :  Denver Broncos
   --done.
Parsing team :  Detroit Lions
   --done.
Parsing team :  Green Bay Packers
   --done.
Parsing team :  Tennessee Titans
   --done.
Parsing team :  Indianapolis Colts
   --done.
Parsing team :  Kansas City Chiefs
   --done.
Parsing team :  Oakland Raiders
   --done.
Parsing team :  St. Louis Rams
   --done.
Parsing team :  Miami Dolphins
   --done.
Parsing team :  Minnesota Vikings
   --done.
Parsing team :  New England Patriots
   --done.
Parsing team :  New Orleans Saints
   --done.
Parsing team :  NY Giants Giants
   --done.
Parsing team :  NY Jets Jets
   --done.
Parsing team :  Philadelphia Eagles
   --done.
Parsing team :  Pittsburgh Steelers
   --done.
Parsing team :  San Diego Chargers
   --done.
Parsing team :  San Francisco 49Ers
   --done.
Parsing team :  Seattle Seahawks
   --done.
Parsing team :  Arizona Cardinals
   --done.
Parsing team :  Tampa Bay Buccaneers
   --done.
Parsing team :  Washington Redskins
   --done.
Parsing team :  Carolina Panthers
   --done.
Parsing team :  Jacksonville Jaguars
   --done.
Parsing team :  Houston Texans
   --done.