import os
import numpy as np
import pandas as pd
#from pandas.api.types import CategoricalDtype
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from sklearn.preprocessing import MinMaxScaler
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
years = [2020]
data = pd.DataFrame()
for i in years:
i_data = pd.read_csv('https://github.com/guga31bb/nflfastR-data/blob/master/data/' \
'play_by_play_' + str(i) + '.csv.gz?raw=True',
compression='gzip', low_memory=False)
data = data.append(i_data, sort=True)
data = data.drop(columns=data.columns[data.columns.str.contains('epa') | data.columns.str.contains('wp')])
data = data.drop(columns=data.columns[data.columns.str.contains('home') | data.columns.str.contains('away')])
data = data.drop(columns=data.columns[data.columns.str.contains('tackle') | data.columns.str.contains('fumble')])
data = data.drop(columns=data.columns[data.columns.str.contains('punt') | data.columns.str.contains('kick')])
data = data.drop(columns=data.columns[data.columns.str.contains('xyac') | data.columns.str.contains('prob')])
data = data.drop(columns=data.columns[data.columns.str.contains('stadium')])
data = data.drop(columns=data.columns[data.columns.str.contains('blocked')])
data = data.drop(columns=data.columns[data.columns.str.contains('extra')])
data = data.drop(columns=data.columns[data.columns.str.contains('drive')])
data = data.drop(columns=data.columns[data.columns.str.contains('fantasy')])
data = data.drop(columns=data.columns[data.columns.str.contains('field_goal')])
data = data.drop(columns=data.columns[data.columns.str.contains('jersey')])
data = data.drop(columns=data.columns[data.columns.str.contains('lateral')])
data = data.drop(columns=data.columns[data.columns.str.contains('pass_def')])
data = data.drop(columns=data.columns[data.columns.str.contains('qb_hit')])
data = data.drop(columns=data.columns[data.columns.str.contains('return')])
data = data.drop(columns=data.columns[data.columns.str.contains('run')])
data = data.drop(columns=data.columns[data.columns.str.contains('rush')])
data = data.drop(columns=data.columns[data.columns.str.contains('two')])
data = data.drop(columns=data.columns[data.columns.str.contains('_down_')])
data = data.drop(columns=data.columns[data.columns.str.contains('end_')])
data = data.drop(columns=data.columns[data.columns.str.contains('_post')])
data = data.drop(columns=data.columns[data.columns.str.contains('series_')])
data = data.drop(columns=data.columns[data.columns.str.contains('td_')])
data = data.drop(index=data.index[data.play_type_nfl != 'PASS'])
data = data.drop(columns=data.columns[data.columns.str.contains('penalty')])
data = data.drop(columns=['cp', 'cpoe', 'ep', 'pass_attempt', 'pass_oe', 'sp', 'touchback', 'weather'])
#data = data.drop(columns=data.columns[data.columns.str.contains('end_')])
#data = data.drop(columns=['cp', 'cpoe'])
#data.columns[data.columns.str.contains('end_')]
data.shape
(18983, 95)
data.columns
Index(['aborted_play', 'air_yards', 'complete_pass', 'defteam', 'defteam_score', 'defteam_timeouts_remaining', 'desc', 'div_game', 'down', 'first_down', 'game_date', 'game_half', 'game_id', 'game_seconds_remaining', 'goal_to_go', 'half_seconds_remaining', 'id', 'incomplete_pass', 'interception', 'interception_player_id', 'interception_player_name', 'location', 'name', 'nfl_api_id', 'no_huddle', 'old_game_id', 'order_sequence', 'out_of_bounds', 'pass', 'pass_length', 'pass_location', 'pass_touchdown', 'passer', 'passer_id', 'passer_player_id', 'passer_player_name', 'passing_yards', 'play', 'play_clock', 'play_deleted', 'play_id', 'play_type', 'play_type_nfl', 'posteam', 'posteam_score', 'posteam_timeouts_remaining', 'posteam_type', 'qb_dropback', 'qb_kneel', 'qb_scramble', 'qb_spike', 'qtr', 'quarter_end', 'quarter_seconds_remaining', 'receiver', 'receiver_id', 'receiver_player_id', 'receiver_player_name', 'receiving_yards', 'replay_or_challenge', 'replay_or_challenge_result', 'result', 'roof', 'sack', 'safety', 'score_differential', 'season', 'season_type', 'series', 'shotgun', 'side_of_field', 'special', 'special_teams_play', 'spread_line', 'st_play_type', 'start_time', 'success', 'surface', 'temp', 'time', 'time_of_day', 'timeout', 'timeout_team', 'total', 'total_line', 'touchdown', 'week', 'wind', 'xpass', 'yardline_100', 'yards_after_catch', 'yards_gained', 'ydsnet', 'ydstogo', 'yrdln'], dtype='object')
# https://stackoverflow.com/questions/40372030/pandas-round-to-the-nearest-n
def custom_round(x, base=5):
return (x // base) * base
def plot_completions_violin(week='ALL', team='ALL'):
if (team == 'ALL') and (week == 'ALL'):
plot_subdata = data
elif (team == 'ALL'):
plot_subdata = data[(data.week == int(week))]
elif (week == 'ALL'):
plot_subdata = data[(data.posteam == team)]
else:
plot_subdata = data[(data.week == int(week)) & (data.posteam == team)]
completions = int(sum(plot_subdata.complete_pass))
total_passes = plot_subdata.shape[0]
if plot_subdata.shape[0] == 0:
return None
plt.figure(figsize=(15, 5))
ax = sns.violinplot(data=plot_subdata,
x='air_yards', y='pass_location', order=['left', 'middle', 'right'],
hue='complete_pass', split=True, hue_order=[1.0, 0.0],
inner='quartile',
orient='h',
palette={1.0: '#1479FC', 0.0: '0.85'},
linewidth=0.5)
ax.xaxis.set_major_locator(plt.MultipleLocator(10))
ax.xaxis.set_minor_locator(plt.MultipleLocator(5))
ax.grid(b=True, which='minor', axis='x', linewidth=0.5)
sns.set_theme(rc={
'axes.facecolor': '#196F0C',
'figure.facecolor': '#196F0C',
'axes.labelcolor': 'white',
'axes.titlecolor': 'white',
'axes.titlesize': 20,
'xtick.color': 'white',
'ytick.color': 'white',
})
plt.setp(ax.collections, edgecolor='black')
plt.xlabel('Air Yards (from LOS)')
plt.ylabel('Pass Location')
plt.title(f'Passing success ({completions}/{total_passes})')
plt.legend(title = 'Completions')
plt.show()
weeks = data.week.unique()
teams = data.posteam.unique()
teams.sort()
weeks = np.append(weeks, 'ALL')
teams = np.append(teams, 'ALL')
@interact
def f(week=weeks, team=teams):
return plot_completions_violin(week, team)
interactive(children=(Dropdown(description='week', options=('1', '2', '3', '4', '5', '6', '7', '8', '9', '10',…
#one_game = data[(data.week == 12) & (data.posteam == 'KC')].copy()
def prepare_passing_accuracy_data(one_game):
one_game['air_yards_rounded'] = custom_round(one_game.air_yards)
one_game = one_game.groupby(by=['air_yards_rounded', 'pass_location'], as_index=False).agg(
{
'complete_pass': ['sum', 'count']
}
)
one_game.columns = ['air_yards_rounded', 'location', 'completions', 'passes']
one_game['location_int'] = one_game.location.replace({'left': 2, 'middle': 1, 'right': 0})
one_game['location'] = one_game.location.str.capitalize()
one_game['accuracy'] = one_game.completions / one_game.passes
one_game['text'] = one_game.completions.astype('int64').astype(str) + '/' + one_game.passes.astype(str)
one_game = one_game.sort_values(by=['location_int'])
return one_game
def normalize_passes_size(passes):
max_passes = passes.max()
if max_passes > 1000:
min_range, max_range = (1000, 7500)
elif max_passes > 50:
min_range, max_range = (1000, 6000)
else:
min_range, max_range = (500, 5000)
return (max_range - min_range) * (passes - passes.min()) / (passes.max() - passes.min()) + min_range
normalize_passes_size(pd.Series([10000,200, 40, 20]))
0 7500.000000 1 1117.234469 2 1013.026052 3 1000.000000 dtype: float64
def plot_completions_matplotlib(week='ALL', team='ALL'):
if (team == 'ALL') and (week == 'ALL'):
one_game = data
elif (team == 'ALL'):
one_game = data[(data.week == int(week))]
elif (week == 'ALL'):
one_game = data[(data.posteam == team)]
else:
one_game = data[(data.week == int(week)) & (data.posteam == team)]
one_game = prepare_passing_accuracy_data(one_game)
max_passes = one_game.passes.max()
total_completions = int(sum(one_game.completions))
total_passes = int(sum(one_game.passes))
fig, ax = plt.subplots(1,1,figsize=(20,10))
# sns.set_theme(rc={
# 'axes.facecolor': '#196F0C',
# 'figure.facecolor': '#19300C',
# 'axes.labelcolor': 'white',
# 'axes.titlecolor': 'white',
# 'xtick.color': 'white',
# 'ytick.color': 'white',
# })
plt.xlim(-20, 70)
plt.ylim(-1, 3)
plt.xticks(fontsize=20, color='white')
plt.yticks(fontsize=20, color='white')
ax.xaxis.set_major_locator(plt.MultipleLocator(10))
ax.xaxis.set_minor_locator(plt.MultipleLocator(5))
ax.grid(b=True, which='both', axis='x', linewidth=2.5)
#ax.grid(b=True, which='minor', axis='x', linewidth=1)
ax.grid(b=False, which='both', axis='y')
ax.set_axisbelow(True)
# if max_passes > 50:
# one_game['passes_size'] = ((6000 - 500) * one_game.passes / max_passes) + 500
# else:
# one_game['passes_size'] = ((5000 - 500) * one_game.passes / max_passes) + 500
one_game['passes_size'] = normalize_passes_size(one_game.passes)
plt.scatter(x=one_game.air_yards_rounded,
y=one_game.location,
s=one_game.passes_size,
c=one_game.accuracy,
cmap='Oranges',
edgecolors='black')
for x, y, text in zip(one_game.air_yards_rounded, one_game.location, one_game.text):
plt.text(x=x, y=y, s=text,
ha = 'center', va = 'center',
fontsize=15)
fig.set_facecolor('#196F0C')
ax.set_facecolor('#196F0C')
#ax.set_labelcolor('white')
#ax.set_titlecolor('white')
plt.xlabel('Air yards (from LOS)', fontsize=15, color='white')
plt.ylabel('')
plt.title(f'Pass success for Week={week} and Team={team} ({total_completions}/{total_passes})', fontsize=30, color='white')
plt.show()
plt.rcParams.update(plt.rcParamsDefault)
#plot_completions_matplotlib(5, 'KC')
@interact
def f(week=weeks, team=teams):
return plot_completions_matplotlib(week, team)
interactive(children=(Dropdown(description='week', options=('1', '2', '3', '4', '5', '6', '7', '8', '9', '10',…
team = 'KC'
week = 'ALL'
if (team == 'ALL') and (week == 'ALL'):
one_game = data
elif (team == 'ALL'):
one_game = data[(data.week == int(week))]
elif (week == 'ALL'):
one_game = data[(data.posteam == team)]
else:
one_game = data[(data.week == int(week)) & (data.posteam == team)]
one_game = prepare_passing_accuracy_data(one_game)
total_completions = int(sum(one_game.completions))
total_passes = int(sum(one_game.passes))
points = alt.Chart(one_game).mark_circle().encode(
alt.X('air_yards_rounded', title='Air Yards from Line of Scrimmage',
axis=alt.Axis(values=list(range(-20,70,5))),
scale=alt.Scale(domain=[-20, 70])),
alt.Y('location', title=None),
alt.Size('passes', legend=None),
alt.Color('accuracy', scale=alt.Scale(scheme='lightgreyred'), legend=alt.Legend(title='Accuracy')),
)
text = alt.Chart(one_game).mark_text(
align='center',
baseline='middle',
fontSize=10,
color='black'
).encode(
x='air_yards_rounded',
y='location',
text='text'
)
alt.layer(
points,
text
).properties(
title=f'Pass success for Week={week} and Team={team} ({total_completions}/{total_passes})',
#subtitle='here',
width=1000,
height=500
).configure(
background='#196F0C'
).configure_axis(
labelColor='white',
labelFontSize=20,
titleColor='white',
titleFontSize=30,
).configure_scale(
maxSize=6000
).configure_title(
color='white',
fontSize=30
).configure_view(
fill='#19500C'
)#.interactive()
season = data.copy()
season['air_yards_rounded'] = custom_round(season.air_yards)
season = season.groupby(by=['posteam', 'air_yards_rounded', 'pass_location'], as_index=False).agg(
{
'complete_pass': ['sum', 'count']
}
)
season.columns = ['team', 'air_yards_rounded', 'location', 'completions', 'passes']
season['location_int'] = season.location.replace({'left': 2, 'middle': 1, 'right': 0})
season['location'] = season.location.str.capitalize()
season['accuracy'] = season.completions / season.passes
season['text'] = season.completions.astype('int64').astype(str) + '/' + season.passes.astype(str)
season = season.sort_values(by=['location_int'])
season = season.reset_index()
season_2020_url = 'season_2020.json'
season.to_json(season_2020_url, orient='records')
season.shape
(1093, 9)
input_dropdown = alt.binding_select(options=np.sort(season.team.unique()), name='Team: ')
selection = alt.selection_single(fields=['team'], bind=input_dropdown, init={'team': 'NE'})
github_season_2020_url = 'https://raw.githubusercontent.com/javendano585/NFL_Data/main/season_2020.json'
points = alt.Chart(github_season_2020_url).mark_circle().encode(
alt.X('air_yards_rounded:Q', title='Air Yards from Line of Scrimmage',
axis=alt.Axis(values=list(range(-20,70,5))),
scale=alt.Scale(domain=[-20, 70])),
alt.Y('location:N', title=None),
alt.Size('passes:Q', legend=None),
alt.Color('accuracy:Q', scale=alt.Scale(scheme='lightgreyred'), legend=alt.Legend(title='Accuracy')),
tooltip='text:N'
)
text = alt.Chart(github_season_2020_url).mark_text(
align='center',
baseline='middle',
fontSize=10,
color='black'
).encode(
x='air_yards_rounded:Q',
y='location:N',
text='text:N'
)
passing_chart = alt.layer(
points,
text
).properties(
title={
"text": 'Pass success for 2020 Season',
#"subtitle": f'Accuracy = {(total_completions / total_passes):.3f}% ({total_completions}/{total_passes})'
},
width=1000,
height=500
).configure(
background='#196F0C'
).configure_axis(
labelColor='white',
labelFontSize=20,
titleColor='white',
titleFontSize=30,
).configure_scale(
maxSize=6000
).configure_title(
anchor='start',
color='white',
fontSize=30,
subtitleFontSize=15,
subtitleColor='white'
).configure_view(
fill='#19500C'
).add_selection(
selection
).transform_filter(
selection
)#.interactive()
passing_chart.save('Passing_2020.html')
passing_chart