import pandas as pd import numpy as np import matplotlib.pyplot as plt from footyscripts.footyviz import draw_events, draw_pitch, type_names #plotting settings %matplotlib inline pd.options.display.mpl_style = 'default' df = pd.read_csv("../datasets/germany-vs-argentina-731830.csv", encoding='utf-8', index_col=0) #standard dimensions x_size = 105.0 y_size = 68.0 box_height = 16.5*2 + 7.32 box_width = 16.5 y_box_start = y_size/2-box_height/2 y_box_end = y_size/2+box_height/2 #scale of dataset is 100 by 100. Normalizing for a standard soccer pitch size df['x']=df['x']/100*x_size df['y']=df['y']/100*y_size df['to_x']=df['to_x']/100*x_size df['to_y']=df['to_y']/100*y_size #creating some measures and classifiers from the original df['count'] = 1 df['dx'] = df['to_x'] - df['x'] df['dy'] = df['to_y'] - df['y'] df['distance'] = np.sqrt(df['dx']**2+df['dy']**2) df['fivemin'] = np.floor(df['min']/5)*5 df['type_name'] = df['type'].map(type_names.get) df['to_box'] = (df['to_x'] > x_size - box_width) & (y_box_start < df['to_y']) & (df['to_y'] < y_box_end) df['from_box'] = (df['x'] > x_size - box_width) & (y_box_start < df['y']) & (df['y'] < y_box_end) df['on_offense'] = df['x']>x_size/2 #preslicing of the main DataFrame in smaller DFs that will be reused along the notebook dfPeriod1 = df[df['period']==1] dfP1Shots = dfPeriod1[dfPeriod1['type'].isin([13, 14, 15, 16])] dfPeriod2 = df[df['period']==2] dfP2Shots = dfPeriod2[dfPeriod2['type'].isin([13, 14, 15, 16])] dfExtraTime = df[df['period']>2] dfETShots = dfExtraTime[dfExtraTime['type'].isin([13, 14, 15, 16])] fig = plt.figure(figsize=(12,4)) avg_x = (dfPeriod1[dfPeriod1['team_name']=='Germany'].groupby('min').apply(np.mean)['x'] - dfPeriod1[dfPeriod1['team_name']=='Argentina'].groupby('min').apply(np.mean)['x']) plt.stackplot(list(avg_x.index.values), list([x if x>0 else 0 for x in avg_x])) plt.stackplot(list(avg_x.index.values), list([x if x<0 else 0 for x in avg_x])) for i, shot in dfP1Shots.iterrows(): x = shot['min'] y = avg_x.ix[shot['min']] signal = 1 if shot['team_name']=='Germany' else -1 plt.annotate(s=(shot['type_name']+' ('+shot['team_name'][0]+")"), xy=(x, y), xytext=(x-5,y+30*signal), arrowprops=dict(facecolor='black')) plt.gca().set_xlabel('minute') plt.title("First Half Profile") draw_pitch() draw_events(dfPeriod1[(dfPeriod1['type']==1) & (dfPeriod1['outcome']==1) & (dfPeriod1['team_name']=='Argentina')], mirror_away=True) plt.text(x_size/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center') plt.text(x_size*3/4, -3, "Argentina's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center') plt.title("Argentina's passes during the first half") dfPeriod1.groupby('team_name').agg({'x': np.mean, 'on_offense': np.mean}) dfPeriod1[dfPeriod1['type']==1].groupby('team_name').agg({'outcome': np.mean}) draw_pitch() draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==1) & (df['outcome']==1)], mirror_away=True) draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==1) & (df['outcome']==0)], mirror_away=True, alpha=0.2) draw_events(dfP1Shots, mirror_away=True, base_color='#a93e3e') plt.text(x_size/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center') plt.text(x_size*3/4, -3, "Argentina's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center') dfPeriod1[(dfPeriod1['to_box']==True) & (dfPeriod1['from_box']==False) & (df['type']==1)].groupby(['team_name']).agg({'outcome': np.mean, 'count': np.sum}) dfKramer = df[df['player_name']=='Christoph Kramer'] pd.pivot_table(dfKramer, values='count', rows='type_name', cols='min', aggfunc=sum, fill_value=0) dfKramer['action']=dfKramer['outcome'].map(str) + '-' + dfKramer['type_name'] dfKramer['action'].unique() score = {'1-LINEUP': 0, '1-RUN WITH BALL': 0.5, '1-RECEPTION': 0, '1-PASS': 1, '0-PASS': -1, '0-TACKLE (NO CONTROL)': 0, '1-CLEAR BALL (OUT OF PITCH)': 0.5, '0-LOST CONTROL OF BALL': -1, '1-SUBSTITUTION (OFF)': 0} dfKramer['score'] = dfKramer['action'].map(score.get) dfKramer.groupby('min')['score'].sum().reindex(range(32), fill_value=0).plot(kind='bar') plt.annotate('Injury', (19,0.5), (14,1.1), arrowprops=dict(facecolor='black')) plt.annotate('Substitution', (31,0), (22,1.6), arrowprops=dict(facecolor='black')) plt.gca().set_xlabel('minute') plt.gca().set_ylabel('no. events') fig = plt.figure(figsize=(12,4)) avg_x = (dfPeriod2[dfPeriod2['team_name']=='Germany'].groupby('min').apply(np.mean)['x'] - dfPeriod2[dfPeriod2['team_name']=='Argentina'].groupby('min').apply(np.mean)['x']) plt.stackplot(list(avg_x.index.values), list([x if x>0 else 0 for x in avg_x])) plt.stackplot(list(avg_x.index.values), list([x if x<0 else 0 for x in avg_x])) for i, shot in dfP2Shots.iterrows(): x = shot['min'] y = avg_x.ix[shot['min']] signal = 1 if shot['team_name']=='Germany' else -1 plt.annotate(s=(shot['type_name']+' ('+shot['team_name'][0]+")"), xy=(x, y), xytext=(x-5,y+30*signal), arrowprops=dict(facecolor='black')) plt.gca().set_xlabel('minute') plt.title("Second Half Profile") dfPeriod2.groupby('team_name').agg({'x': np.mean, 'on_offense': np.mean}) dfPeriod2[dfPeriod2['type']==1].groupby('team_name').agg({'outcome': np.mean}) draw_pitch() draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==2) & (df['outcome']==1)], mirror_away=True) draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==2) & (df['outcome']==0)], mirror_away=True, alpha=0.2) draw_events(dfP2Shots, mirror_away=True, base_color='#a93e3e') plt.text(x_size/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center') plt.text(x_size*3/4, -3, "Argentina's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center') dfPeriod2[(dfPeriod2['to_box']==True) & (dfPeriod2['from_box']==False) & (df['type']==1)].groupby(['team_name']).agg({'outcome': np.mean, 'count': np.sum}) fig = plt.figure(figsize=(12,4)) avg_x = (dfExtraTime[dfExtraTime['team_name']=='Germany'].groupby('min').apply(np.mean)['x'] - dfExtraTime[dfExtraTime['team_name']=='Argentina'].groupby('min').apply(np.mean)['x'].reindex(dfExtraTime['min'].unique(), fill_value=0)) plt.stackplot(list(avg_x.index.values), list([x if x>0 else 0 for x in avg_x])) plt.stackplot(list(avg_x.index.values), list([x if x<0 else 0 for x in avg_x])) for i, shot in dfETShots.iterrows(): x = shot['min'] y = avg_x.ix[shot['min']] signal = 1 if shot['team_name']=='Germany' else -1 plt.annotate(s=(shot['type_name']+' ('+shot['team_name'][0]+")"), xy=(x, y), xytext=(x-5,y+20*signal), arrowprops=dict(facecolor='black')) plt.gca().set_xlabel('minute') plt.title("Extra Time Profile") df.groupby(['team_name', 'period']).agg({'count': np.sum, 'x': np.mean, 'on_offense': np.mean}) goal_ix = df[df['type']==16].index[0] df.ix[goal_ix+1:].groupby(['team_name', 'period']).agg({'count': np.sum, 'x': np.mean, 'on_offense': np.mean}) draw_pitch() draw_events(df.ix[goal_ix+1:][(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['outcome']==1)], mirror_away=True) draw_events(df.ix[goal_ix+1:][(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['outcome']==0)], mirror_away=True, alpha=0.2) draw_events(df.ix[goal_ix+1:][df['type'].isin([13,14,15,16])], mirror_away=True, base_color='#a93e3e') plt.text(x_size/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center') plt.text(x_size*3/4, -3, "Argentina's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center') df.ix[goal_ix+1:][df.ix[goal_ix+1:]['type'].isin([13,14,15,16])][['min', 'player_name', 'team_name', 'type_name']] goal = df[df['type']==16].index[0] dfGoal = df.ix[goal-30:goal] draw_pitch() draw_events(dfGoal[dfGoal.team_name=='Germany'], base_color='white') draw_events(dfGoal[dfGoal.team_name=='Argentina'], base_color='cyan') #Germany's players involved in the play dfGoal['progression']=dfGoal['to_x']-dfGoal['x'] dfGoal[dfGoal['type'].isin([1, 101, 16])][['player_name', 'type_name', 'progression']] #passing accuracy df.groupby(['player_name', 'team_name']).agg({'count': np.sum, 'outcome': np.mean}).sort('count', ascending=False) #shots pd.pivot_table(df[df['type'].isin([13,14,15,16])], values='count', aggfunc=sum, rows=['player_name', 'team_name'], cols='type_name', fill_value=0, margins=True).sort('All', ascending=False) #defensive play pd.pivot_table(df[df['type'].isin([7, 8, 49])], values='count', aggfunc=np.sum, rows=['player_name', 'team_name'], cols='type_name', fill_value=0, margins=True).sort('All', ascending=False) from IPython.display import Image Image(url='http://i.minus.com/ibpQVB7fHa5NDj.gif')