#!/usr/bin/env python # coding: utf-8 # # MLS National TV Lineup # # By Christian McDonald # # Major League Soccer announced their [2018 national TV lineup](https://www.mlssoccer.com/post/2018/01/04/mls-unveils-2018-us-and-canada-national-broadcast-schedule) on Thursday, Jan. 4th. A quick scan of the lineup seemed heavy on Atlanta, Seattle, Portland and the L.A. and N.Y. teams, but it was hard to know for sure. I didn't see Houston or Dallas very often, nor our maybe-Texas-team-to-be Columbus Crew. # # So I thought I would practice some analysis skills and take a look at the matchups, seeing who was favored in the schedule. Surely TV market size has a lot to do with these results, but we needed another reason to talk about soccer in the office. # # ## Goals # - games by team # - breakdown by language # - Who is a home more # # ## To Do # - This is not DRY. I need to turn the chart/table making into a function, which I could then feed the language table into it. # - I sure would like to add a title to the charts. Perhaps leather is too limited, but it does show how to do it in the [leather docs](http://leather.readthedocs.io/), but it doesn't work the same inside agate. # # ## Of note # - The data was scraped using the Chrome Scraper tool, inserted into a [Google Spreadsheet and cleaned up](https://docs.google.com/spreadsheets/d/e/2PACX-1vT1wVvMBp-OBmOm0tDfc6LZuPoz6D8X7NppUkHn6O51-KoIqjA67S7E9SIjaT3Oo_HT73nEeUUaz_48/pubhtml), then downloaded as CSV. # - The result was used in the story [Columbus Crew SC schedule provides limited exposure for Texas audience](http://www.statesman.com/sports/columbus-crew-schedule-provides-limited-exposure-for-texas-audience/Um8mpSm4HisuWR9ieEeXVI/) # ## What I see ... # # Atlanta is clearly the darling overall, and why not after hosting the [most-attended MLS game ever](http://mercedesbenzstadium.com/atlanta-united-break-mls-attendance-record-4th-largest-attended-match-world/) last year in the new Mercedes Benz stadium. # # Atlanta is also a decent TV market, [ranking 10th for 2016-2017](http://www.nielsen.com/content/dam/corporate/us/en/docs/solutions/measurement/television/2016-2017-nielsen-local-dma-ranks.pdf), the latest rankings I could find. # # MLS is clearly taking advantage of the innaugural season of LAFC to showcase the left-coast big-city rivalry, along with three showings of the New York derby. The popular Seattle and Portland teams are up there with still-relatively-new teams NYC FC and Orlando. # # Austin's maybe-team-to-be gets only two national matches, and the other Texas teams don't fare too well on the English-language channels. That said, FC Dallas and the Houston Dynamo do top the Spanish matchups. # # Atlanta also hosts the most games, with the LA teams and English-language channel Portland games also high on the list. # # See the output below: # In[1]: import agate import warnings warnings.filterwarnings('ignore') # In[2]: raw = agate.Table.from_csv('../data/mlstv.csv') print(raw) # In[3]: # filtering for language both = raw english = raw.where(lambda r: r['Spanish'] is False) spanish = raw.where(lambda r: r['Spanish'] is True) print('len of both: {}'.format(len(both))) print('len of english: {}'.format(len(english))) print('len of spanish: {}'.format(len(spanish))) # ## All National TV # In[4]: # create a single list of all teams in matchups home = both.select(['Home', 'Date']).rename(column_names = ['team', 'date']) visitor = both.select(['Visitor', 'Date']).rename(column_names = ['team', 'date']) teams = agate.Table.merge([visitor, home]) print('len of visitor: {}'.format(len(visitor))) print('len of home: {}'.format(len(home))) print('len of teams: {}\n'.format(len(teams))) teams_pivot = teams.pivot('team').rename(column_names = {'Count': 'Matches'}) teams_chart = teams_pivot.order_by('team').order_by('Matches', reverse=True) teams_chart.print_table(max_rows=None, max_column_width=None) teams_chart.to_csv('../data/all_tv.csv') teams_chart.bar_chart('team', 'Matches','../charts/all_tv.svg') teams_chart.bar_chart('team', 'Matches') # ## English language # In[5]: en_visitor = english.select(['Visitor', 'Date']).rename(column_names = ['team', 'date']) en_home = english.select(['Home', 'Date']).rename(column_names = ['team', 'date']) en_teams = agate.Table.merge([en_visitor, en_home]) print('len of en_visitor: {}'.format(len(en_visitor))) print('len of en_home: {}'.format(len(en_home))) print('len of en_teams: {}\n'.format(len(en_teams))) en_teams_pivot = en_teams.pivot('team').rename(column_names = {'Count': 'Matches'}) en_teams_chart = en_teams_pivot.order_by('team').order_by('Matches', reverse=True) en_teams_chart.to_csv('../data/en_tv.csv') en_teams_chart.print_table(max_rows=None, max_column_width=None) en_teams_chart.bar_chart('team', 'Matches','../charts/en_tv.svg') en_teams_chart.bar_chart('team', 'Matches') # ## Spanish language # In[6]: es_visitor = spanish.select(['Visitor', 'Date']).rename(column_names = ['team', 'date']) es_home = spanish.select(['Home', 'Date']).rename(column_names = ['team', 'date']) es_teams = agate.Table.merge([es_visitor, es_home]) print('len of es_visitor: {}'.format(len(es_visitor))) print('len of es_home: {}'.format(len(es_home))) print('len of es_teams: {}\n'.format(len(es_teams))) es_teams_pivot = es_teams.pivot('team').rename(column_names = {'Count': 'Matches'}) es_teams_chart = es_teams_pivot.order_by('team').order_by('Matches', reverse=True) es_teams_chart.to_csv('../data/es_tv.csv') es_teams_chart.print_table(max_rows=None, max_column_width=None) es_teams_chart.bar_chart('team', 'Matches','../charts/es_tv.svg') es_teams_chart.bar_chart('team', 'Matches') # In[7]: la_derby = both.where(lambda row: row['Match'] in { 'LA Galaxy vs. LAFC', 'LAFC vs. LA Galaxy'}).order_by('Date') la_derby.select(['Date', 'Match', 'Network', 'Spanish']).print_table(max_column_width=None) # In[8]: ny_derby = both.where(lambda row: row['Match'] in { 'New York City FC vs. New York Red Bulls', 'New York Red Bulls vs. New York City FC'}).order_by('Date') ny_derby.select(['Date', 'Match', 'Network', 'Spanish']).print_table(max_column_width=None) # ## Who hosts the most? # # Who hosts the most home games on National TV? # In[9]: home_team_pivot = both.pivot('Home').rename(column_names = {'Count': 'Matches'}).order_by( 'Matches', reverse=True ) home_team_pivot.print_table() # In[10]: # Hosting on English national TV en_home_team_pivot = english.pivot('Home').rename(column_names = {'Count': 'Matches'}).order_by( 'Matches', reverse=True ) en_home_team_pivot.print_table() # In[11]: # Hosting on English national TV es_home_team_pivot = spanish.pivot('Home').rename(column_names = {'Count': 'Matches'}).order_by( 'Matches', reverse=True ) es_home_team_pivot.print_table() # ## Does the same match appear on more than one network? # # I pivot by both Date and Match, which will count records that have the both. I order in reverse to show there are no cases where two matchups appear more than one. I did test this by creating a fake dupe record to make sure it works. # In[12]: dupe_check = both.pivot(['Date', 'Match']) dupe_check.order_by('Count', reverse=True).limit(5).print_table(max_column_width=None) # In[ ]: