Introduction Pandas Dataframes

In [119]:
import vincent
import pandas as pd
from pandas import DataFrame, Series
vincent.core.initialize_notebook()
pd.set_option('display.max_columns', None)

Importing CSV Data

In [120]:
columns = ['Rk','G','Date','Age','Tm','Home/Away','Opp','Win/Lose','GS','MP','FG','FGA','FG%','3P','3PA','3P%','FT','FTA','FT%','ORB','DRB','TRB','AST','STL','BLK','TOV','PF','PTS','GmSc','+/-']
data = pd.read_csv('kevin.csv', names=columns)
#data.head(10)
data[data.Opp =='ATL'].head()
Out[120]:
Rk G Date Age Tm Home/Away Opp Win/Lose GS MP FG FGA FG% 3P 3PA 3P% FT FTA FT% ORB DRB TRB AST STL BLK TOV PF PTS GmSc +/-
2 3 3 2012-11-04 24-036 OKC NaN ATL L (-9) 1 42:05 7 17 0.412 1 4 0.25 7 8 0.875 1 11 12 8 3 2 6 3 22 19.3 -8
24 25 25 2012-12-19 24-081 OKC @ ATL W (+8) 1 40:52 14 23 0.609 4 8 0.50 9 10 0.900 1 12 13 3 0 2 6 1 41 31.5 2

Deleting Columns

In [30]:
del data['Rk']
del data['Home/Away']
del data['Tm']
del data['Opp']
del data['GS']
del data['Win/Lose']
data.head()
Out[30]:
G Date Age MP FG FGA FG% 3P 3PA 3P% FT FTA FT% ORB DRB TRB AST STL BLK TOV PF PTS GmSc +/-
0 1 2012-11-01 24-033 40:39 9 18 0.500 1 2 0.500 4 5 0.800 2 12 14 5 2 0 4 1 23 19.7 2
1 2 2012-11-02 24-034 42:41 7 14 0.500 1 3 0.333 8 12 0.667 2 15 17 7 0 2 6 1 23 20.2 13
2 3 2012-11-04 24-036 42:05 7 17 0.412 1 4 0.250 7 8 0.875 1 11 12 8 3 2 6 3 22 19.3 -8
3 4 2012-11-06 24-038 29:09 4 11 0.364 1 4 0.250 6 6 1.000 0 6 6 3 2 0 4 3 15 9.6 23
4 5 2012-11-08 24-040 38:23 11 19 0.579 0 2 0.000 2 2 1.000 1 3 4 1 3 3 6 1 24 16.1 -3

Field Goals Made / Field Goals Attempted Per Minute

In [64]:
temp = data[['MP', 'FG', 'FGA']]
temp.head()
Out[64]:
MP FG FGA
0 40:39 9 18
1 42:41 7 14
2 42:05 7 17
3 29:09 4 11
4 38:23 11 19
In [65]:
import time
import datetime

def str_to_minutes(minutes):
    minutes = str(minutes)
    minutes = time.strptime(minutes, '%M:%S')
    return datetime.timedelta(minutes=minutes.tm_min, seconds=minutes.tm_sec).total_seconds()/60
    
    
In [66]:
temp['MP'] = temp['MP'].map(str_to_minutes)
In [67]:
temp.head()
Out[67]:
MP FG FGA
0 40.650000 9 18
1 42.683333 7 14
2 42.083333 7 17
3 29.150000 4 11
4 38.383333 11 19
In [71]:
temp['FGA/M'] = temp['FGA'] / temp['MP']
temp['FG/M'] = temp['FG'] / temp['MP']
temp.head()
temp.describe()
Out[71]:
MP FG FGA FGA/M FG/M
count 81.000000 81.000000 81.000000 81.000000 81.000000
mean 38.504527 9.024691 17.691358 0.455967 0.234632
std 5.771923 2.554287 5.001605 0.094907 0.057639
min 23.766667 4.000000 8.000000 0.267499 0.095390
25% 35.766667 7.000000 14.000000 0.387812 0.202703
50% 39.100000 9.000000 17.000000 0.447344 0.232288
75% 42.083333 10.000000 21.000000 0.513619 0.267857
max 49.666667 16.000000 31.000000 0.699029 0.411487

Grouping By

In [114]:
data.head()
Out[114]:
Rk G Date Age Tm Home/Away Opp Win/Lose GS MP FG FGA FG% 3P 3PA 3P% FT FTA FT% ORB DRB TRB AST STL BLK TOV PF PTS GmSc +/-
0 1 1 2012-11-01 24-033 OKC @ SAS L (-2) 1 40:39 9 18 0.500 1 2 0.500 4 5 0.800 2 12 14 5 2 0 4 1 23 19.7 2
1 2 2 2012-11-02 24-034 OKC NaN POR W (+14) 1 42:41 7 14 0.500 1 3 0.333 8 12 0.667 2 15 17 7 0 2 6 1 23 20.2 13
2 3 3 2012-11-04 24-036 OKC NaN ATL L (-9) 1 42:05 7 17 0.412 1 4 0.250 7 8 0.875 1 11 12 8 3 2 6 3 22 19.3 -8
3 4 4 2012-11-06 24-038 OKC NaN TOR W (+20) 1 29:09 4 11 0.364 1 4 0.250 6 6 1.000 0 6 6 3 2 0 4 3 15 9.6 23
4 5 5 2012-11-08 24-040 OKC @ CHI W (+6) 1 38:23 11 19 0.579 0 2 0.000 2 2 1.000 1 3 4 1 3 3 6 1 24 16.1 -3
In [115]:
group_by_opp = data.groupby('Opp')
In [116]:
field_goal_per_team = group_by_opp.sum()[['FGA', 'FG']]
In [117]:
data[data.Opp == 'ATL']
Out[117]:
Rk G Date Age Tm Home/Away Opp Win/Lose GS MP FG FGA FG% 3P 3PA 3P% FT FTA FT% ORB DRB TRB AST STL BLK TOV PF PTS GmSc +/-
2 3 3 2012-11-04 24-036 OKC NaN ATL L (-9) 1 42:05 7 17 0.412 1 4 0.25 7 8 0.875 1 11 12 8 3 2 6 3 22 19.3 -8
24 25 25 2012-12-19 24-081 OKC @ ATL W (+8) 1 40:52 14 23 0.609 4 8 0.50 9 10 0.900 1 12 13 3 0 2 6 1 41 31.5 2
In [123]:
field_goal_per_team
stacked = vincent.StackedBar(field_goal_per_team)
stacked.legend(title="Field Goals")
stacked.scales['x'].padding = 0.1
print stacked.to_json()
{
  "axes": [
    {
      "scale": "x",
      "type": "x"
    },
    {
      "scale": "y",
      "type": "y"
    }
  ],
  "data": [
    {
      "name": "table",
      "values": [
        {
          "col": "FGA",
          "idx": "ATL",
          "val": 40
        },
        {
          "col": "FG",
          "idx": "ATL",
          "val": 21
        },
        {
          "col": "FGA",
          "idx": "BOS",
          "val": 36
        },
        {
          "col": "FG",
          "idx": "BOS",
          "val": 15
        },
        {
          "col": "FGA",
          "idx": "BRK",
          "val": 33
        },
        {
          "col": "FG",
          "idx": "BRK",
          "val": 20
        },
        {
          "col": "FGA",
          "idx": "CHA",
          "val": 20
        },
        {
          "col": "FG",
          "idx": "CHA",
          "val": 12
        },
        {
          "col": "FGA",
          "idx": "CHI",
          "val": 38
        },
        {
          "col": "FG",
          "idx": "CHI",
          "val": 17
        },
        {
          "col": "FGA",
          "idx": "CLE",
          "val": 37
        },
        {
          "col": "FG",
          "idx": "CLE",
          "val": 17
        },
        {
          "col": "FGA",
          "idx": "DAL",
          "val": 89
        },
        {
          "col": "FG",
          "idx": "DAL",
          "val": 42
        },
        {
          "col": "FGA",
          "idx": "DEN",
          "val": 75
        },
        {
          "col": "FG",
          "idx": "DEN",
          "val": 33
        },
        {
          "col": "FGA",
          "idx": "DET",
          "val": 38
        },
        {
          "col": "FG",
          "idx": "DET",
          "val": 17
        },
        {
          "col": "FGA",
          "idx": "GSW",
          "val": 67
        },
        {
          "col": "FG",
          "idx": "GSW",
          "val": 38
        },
        {
          "col": "FGA",
          "idx": "HOU",
          "val": 53
        },
        {
          "col": "FG",
          "idx": "HOU",
          "val": 25
        },
        {
          "col": "FGA",
          "idx": "IND",
          "val": 45
        },
        {
          "col": "FG",
          "idx": "IND",
          "val": 22
        },
        {
          "col": "FGA",
          "idx": "LAC",
          "val": 63
        },
        {
          "col": "FG",
          "idx": "LAC",
          "val": 29
        },
        {
          "col": "FGA",
          "idx": "LAL",
          "val": 92
        },
        {
          "col": "FG",
          "idx": "LAL",
          "val": 45
        },
        {
          "col": "FGA",
          "idx": "MEM",
          "val": 65
        },
        {
          "col": "FG",
          "idx": "MEM",
          "val": 33
        },
        {
          "col": "FGA",
          "idx": "MIA",
          "val": 45
        },
        {
          "col": "FG",
          "idx": "MIA",
          "val": 23
        },
        {
          "col": "FGA",
          "idx": "MIL",
          "val": 19
        },
        {
          "col": "FG",
          "idx": "MIL",
          "val": 10
        },
        {
          "col": "FGA",
          "idx": "MIN",
          "val": 74
        },
        {
          "col": "FG",
          "idx": "MIN",
          "val": 44
        },
        {
          "col": "FGA",
          "idx": "NOH",
          "val": 53
        },
        {
          "col": "FG",
          "idx": "NOH",
          "val": 32
        },
        {
          "col": "FGA",
          "idx": "NYK",
          "val": 37
        },
        {
          "col": "FG",
          "idx": "NYK",
          "val": 16
        },
        {
          "col": "FGA",
          "idx": "ORL",
          "val": 29
        },
        {
          "col": "FG",
          "idx": "ORL",
          "val": 15
        },
        {
          "col": "FGA",
          "idx": "PHI",
          "val": 37
        },
        {
          "col": "FG",
          "idx": "PHI",
          "val": 18
        },
        {
          "col": "FGA",
          "idx": "PHO",
          "val": 76
        },
        {
          "col": "FG",
          "idx": "PHO",
          "val": 37
        },
        {
          "col": "FGA",
          "idx": "POR",
          "val": 63
        },
        {
          "col": "FG",
          "idx": "POR",
          "val": 36
        },
        {
          "col": "FGA",
          "idx": "SAC",
          "val": 42
        },
        {
          "col": "FG",
          "idx": "SAC",
          "val": 27
        },
        {
          "col": "FGA",
          "idx": "SAS",
          "val": 64
        },
        {
          "col": "FG",
          "idx": "SAS",
          "val": 32
        },
        {
          "col": "FGA",
          "idx": "TOR",
          "val": 22
        },
        {
          "col": "FG",
          "idx": "TOR",
          "val": 10
        },
        {
          "col": "FGA",
          "idx": "UTA",
          "val": 52
        },
        {
          "col": "FG",
          "idx": "UTA",
          "val": 32
        },
        {
          "col": "FGA",
          "idx": "WAS",
          "val": 29
        },
        {
          "col": "FG",
          "idx": "WAS",
          "val": 13
        }
      ]
    },
    {
      "name": "stats",
      "source": "table",
      "transform": [
        {
          "keys": [
            "data.idx"
          ],
          "type": "facet"
        },
        {
          "type": "stats",
          "value": "data.val"
        }
      ]
    }
  ],
  "height": 500,
  "legends": [
    {
      "fill": "color",
      "offset": 0,
      "title": "Field Goals"
    }
  ],
  "marks": [
    {
      "from": {
        "data": "table",
        "transform": [
          {
            "keys": [
              "data.col"
            ],
            "type": "facet"
          },
          {
            "height": "data.val",
            "point": "data.idx",
            "type": "stack"
          }
        ]
      },
      "marks": [
        {
          "properties": {
            "enter": {
              "fill": {
                "field": "data.col",
                "scale": "color"
              },
              "width": {
                "band": true,
                "offset": -1,
                "scale": "x"
              },
              "x": {
                "field": "data.idx",
                "scale": "x"
              },
              "y": {
                "field": "y",
                "scale": "y"
              },
              "y2": {
                "field": "y2",
                "scale": "y"
              }
            }
          },
          "type": "rect"
        }
      ],
      "type": "group"
    }
  ],
  "padding": {
    "bottom": 50,
    "left": 50,
    "right": 100,
    "top": 10
  },
  "scales": [
    {
      "domain": {
        "data": "table",
        "field": "data.idx"
      },
      "name": "x",
      "padding": 0.1,
      "range": "width",
      "type": "ordinal"
    },
    {
      "domain": {
        "data": "stats",
        "field": "sum"
      },
      "name": "y",
      "nice": true,
      "range": "height",
      "type": "linear"
    },
    {
      "domain": {
        "data": "table",
        "field": "data.col"
      },
      "name": "color",
      "range": "category20",
      "type": "ordinal"
    }
  ],
  "width": 960
}