In [1]:
url_base='http://www.wrc.com/service/sasCacheApi.php?route={stub}'
In [2]:
itinerary_stub='rallies/{rallyId}/itinerary'
startlists_stub='rallies/{rallyId}/entries'
penalties_stub='rallies/{rallyId}/penalties'
retirements_stub='rallies/{rallyId}/retirements'
stagewinners_stub='rallies/{rallyId}/stagewinners'


overall_stub='stages/{stageId}/results?rallyId={rallyId}'
split_times_stub ='stages/{stageId}/splittimes?rallyId={rallyId}'
stage_times_stage_stub='stages/{stageId}/stagetimes?rallyId={rallyId}'
stage_times_overall_stub='stages/{stageId}/results?rallyId={rallyId}'

championship_stub='championships/{championshipId}'
championship_results_stub='championships/{championshipId}/results'
In [10]:
meta={'rallyId':30}

Scrape page for rallyid

The rallyid used in the API seems to differ from IDs in the data returned by the API, so we need to scrape the rally pages to get the actual ID.

In [5]:
import requests
import re
from bs4 import BeautifulSoup 
In [302]:
results_main_url='http://www.wrc.com/en/wrc/results/wales/stage-times/page/416-238---.html#'
In [ ]:
html=requests.get(results_main_url)
soup=BeautifulSoup(html.content, "html5lib")
#BeautifulSoup has a routine - find_all() - that will find all the HTML tags of a particular sort
#Links are represented in HTML pages in the form <a href="http//example.com/page.html">link text</a>
#Grab all the <a> (anchor) tags...
souplist=soup.findAll("li",{'class':'flag'})
In [310]:
items={}
for s in souplist:
    href=s.find('a')['href']
    if href:
        title=s.find('img')['title']
        title = 'Monaco' if title == 'Monte Carlo' else title
        items[title]=href
items
Out[310]:
{'Argentina': 'http://www.wrc.com/en/wrc/results/argentina/stage-times/page/346-230---.html',
 'Australia': 'http://www.wrc.com/en/wrc/results/australia/stage-times/page/392-235---.html',
 'Finland': 'http://www.wrc.com/en/wrc/results/finland/stage-times/page/376-233---.html',
 'France': 'http://www.wrc.com/en/wrc/results/france/stage-times/page/400-236---.html',
 'Germany': 'http://www.wrc.com/en/wrc/results/germany/stage-times/page/384-234---.html',
 'Italy': 'http://www.wrc.com/en/wrc/results/italy/stage-times/page/360-231---.html',
 'Mexico': 'http://www.wrc.com/en/wrc/results/mexico/stage-times/page/334-228---.html',
 'Monaco': 'http://www.wrc.com/en/wrc/results/monte-carlo/stage-times/page/318-226---.html',
 'Portugal': 'http://www.wrc.com/en/wrc/results/portugal/stage-times/page/342-229---.html',
 'Spain': 'http://www.wrc.com/en/wrc/results/spain/stage-times/page/408-237---.html',
 'Sweden': 'http://www.wrc.com/en/wrc/results/sweden/stage-times/page/326-227---.html',
 'Turkey': 'http://www.wrc.com/en/wrc/results/turkey/stage-times/page/368-232---.html',
 'Wales': 'http://www.wrc.com/en/wrc/results/wales/stage-times/page/416-238---.html'}

Note that the names don't properly match any single column in other tables. Need to match on either country or location in championship table - or change "Monte Carlo" to "Monaco".

In [244]:
url='http://www.wrc.com/en/wrc/results/wales/stage-times/page/416-238---.html'
html=requests.get(url)
In [275]:
m = re.search("var rallyId = '(.+?)'", html.text)
In [276]:
if m:
    print(m.group(1))
40
In [6]:
import pandas as pd
import requests
In [311]:
rallyids={}
for item in items:
    html=requests.get(items[item])
    m = re.search("var rallyId = '(.+?)'", html.text)
    if m:
        rallyids[item] = m.group(1)
rallyids 
Out[311]:
{'Argentina': '34',
 'Australia': '42',
 'Finland': '37',
 'France': '33',
 'Germany': '38',
 'Italy': '36',
 'Mexico': '32',
 'Monaco': '30',
 'Portugal': '35',
 'Spain': '41',
 'Sweden': '31',
 'Turkey': '39',
 'Wales': '40'}

Itinerary

In [12]:
itinerary_json=requests.get( url_base.format(stub=itinerary_stub.format(**meta) ) ).json()
#itinerary_json

If no data is returned, we get an empty list.

Need to check that we do get a response, eg if itinerary_json:

In [14]:
from pandas.io.json import json_normalize

itinerary_event = json_normalize(itinerary_json).drop('itineraryLegs', axis=1)
itinerary_event
Out[14]:
eventId itineraryId name priority
0 26 31 Itinerary 0
In [222]:
itinerary_legs = json_normalize(itinerary_json, record_path='itineraryLegs',
                                meta='eventId').drop('itinerarySections', axis=1)
#?don't need eventId?
itinerary_legs
Out[222]:
itineraryId itineraryLegId legDate name order startListId status eventId
0 31 46 2018-01-25 Thursday 25th January 2018 1 66 Running 26
1 31 47 2018-01-26 Friday 26th January 2018 2 68 Running 26
2 31 48 2018-01-27 Saturday 27th January 2018 3 70 Running 26
3 31 49 2018-01-28 Sunday 28th January 2018 4 73 Running 26
In [205]:
itinerary_sections = json_normalize(itinerary_json, ['itineraryLegs', 'itinerarySections'],
                                    meta='eventId').drop(['stages','controls'],axis=1)
#?don't need eventId?
itinerary_sections
Out[205]:
itineraryLegId itinerarySectionId name order eventId
0 46 106 Section 1 1 26
1 47 107 Section 2 2 26
2 47 108 Section 3 3 26
3 48 109 Section 4 4 26
4 48 110 Section 5 5 26
5 48 111 Section 6 6 26
6 49 112 Section 7 7 26
7 49 113 Section 8 8 26
In [238]:
itinerary_stages=json_normalize(itinerary_json['itineraryLegs'],
                                ['itinerarySections','stages'],
                                meta = 'itineraryLegId')
itinerary_stages.head()
Out[238]:
code distance eventId name number stageId stageType status timingPrecision itineraryLegId
0 SS1 36.69 26 Thoard - Sisteron 1 289 SpecialStage Completed Tenth 46
1 SS2 25.49 26 Bayons - Breziers 1 2 287 SpecialStage Completed Tenth 46
2 SS3 26.72 26 Vitrolles - Oze 1 3 286 SpecialStage Completed Tenth 47
3 SS4 30.54 26 Roussieux - Eygalayes 1 4 285 SpecialStage Completed Tenth 47
4 SS5 15.18 26 Vaumeilh - Claret 1 5 284 SpecialStage Completed Tenth 47
In [86]:
meta['stages']=itinerary_stages['stageId'].tolist()
In [239]:
itinerary_controls=json_normalize(itinerary_json['itineraryLegs'], 
                                  ['itinerarySections','controls'] , meta='itineraryLegId')
itinerary_controls['stageId'] = itinerary_controls['stageId'].fillna(-1).astype(int)
itinerary_controls.head()
Out[239]:
code controlId distance eventId firstCarDueDateTime firstCarDueDateTimeLocal location stageId status targetDuration targetDurationMs timingPrecision type itineraryLegId
0 TC0 1066 NaN 26 2018-01-25T16:50:00 2018-01-25T17:50:00+01:00 Monaco - Depart Day 1 0 Completed 00:00:00 0 Minute TimeControl 46
1 TC1 1133 192.71 26 2018-01-25T20:40:00 2018-01-25T21:40:00+01:00 Thoard 289 Completed 03:50:00 13800000 Minute TimeControl 46
2 SS1 1134 36.69 26 2018-01-25T20:43:00 2018-01-25T21:43:00+01:00 Thoard - Sisteron 289 Completed 00:03:00 180000 Minute StageStart 46
3 SF1 1135 NaN 26 None None Thoard - Sisteron 289 Completed 00:00:00 0 Tenth FlyingFinish 46
4 TC2 1123 30.71 26 2018-01-25T21:48:00 2018-01-25T22:48:00+01:00 Bayons 287 Completed 01:05:00 3900000 Minute TimeControl 46

Startlists

In [116]:
startlists_json=requests.get( url_base.format(stub=startlists_stub.format(**meta) ) ).json()
#startlists_json
In [117]:
startlists = json_normalize(startlists_json).drop('eventClasses', axis=1)
startlists.head()
Out[117]:
codriver.abbvName codriver.code codriver.country.countryId codriver.country.iso2 codriver.country.iso3 codriver.country.name codriver.countryId codriver.firstName codriver.fullName codriver.lastName ... groupId identifier manufacturer.logoFilename manufacturer.manufacturerId manufacturer.name manufacturerId priority status tyreManufacturer vehicleModel
0 J. INGRASSIA ING 76 FR FRA France 76 Julien Julien INGRASSIA INGRASSIA ... 10 1 ford 26 Ford 26 P1 Entry Michelin FIESTA WRC
1 D. BARRITT BAR 235 GB GBR United Kingdom of Great Britain and Northern I... 235 Daniel Daniel BARRITT BARRITT ... 10 2 ford 26 Ford 26 P1 Entry Michelin FIESTA WRC
2 X. PANSERI PAN 76 FR FRA France 76 Xavier Xavier PANSERI PANSERI ... 10 3 ford 26 Ford 26 P1 Entry Michelin FIESTA WRC
3 A. JAEGER JAE 166 NO NOR Norway 166 Anders Anders JAEGER JAEGER ... 10 4 hyundai 33 Hyundai 33 P1 Rejoined Michelin i20 WRC
4 N. GILSOUL GIL 22 BE BEL Belgium 22 Nicolas Nicolas GILSOUL GILSOUL ... 10 5 hyundai 33 Hyundai 33 P1 Entry Michelin i20 WRC

5 rows × 42 columns

In [78]:
startlist_classes = json_normalize(startlists_json,['eventClasses'], 'entryId' )
startlist_classes.head()
Out[78]:
eventClassId eventId name entryId
0 103 26 RC1 1483
1 103 26 RC1 1484
2 103 26 RC1 1485
3 103 26 RC1 1486
4 103 26 RC1 1487

Penalties

In [118]:
penalties_json=requests.get( url_base.format(stub=penalties_stub.format(**meta) ) ).json()
#penalties_json
In [81]:
penalties = json_normalize(penalties_json)
penalties.head()
Out[81]:
controlId entryId penaltyDuration penaltyDurationMs penaltyId reason
0 1123 1496 PT40S 40000 38 4 MN LATE
1 1123 1493 PT10S 10000 39 1 MN LATE
2 1123 1524 PT30S 30000 40 3 MN LATE
3 1123 1516 PT20S 20000 41 2 MN LATE
4 1123 1517 PT30S 30000 42 3 MN LATE

Retirements

In [119]:
retirements_json=requests.get( url_base.format(stub=retirements_stub.format(**meta) ) ).json()
#retirements_json
In [83]:
retirements = json_normalize(retirements_json)
retirements.head()
Out[83]:
controlId entryId reason retirementDateTime retirementDateTimeLocal retirementId
0 1135 1518 Mechanical 2018-01-25T21:40:00Z 2018-01-25T22:40:00+01:00 17
1 1135 1538 Off Road 2018-01-25T22:00:00Z 2018-01-25T23:00:00+01:00 18
2 1135 1546 Mechanical 2018-01-25T22:05:00Z 2018-01-25T23:05:00+01:00 19
3 1123 1528 Mechanical 2018-01-25T23:15:00Z 2018-01-26T00:15:00+01:00 20
4 1123 1534 Mechanical 2018-01-25T23:15:00Z 2018-01-26T00:15:00+01:00 21

 Stagewinners

In [120]:
stagewinners_json=requests.get( url_base.format(stub=stagewinners_stub.format(**meta) ) ).json()
#stagewinners_json
In [85]:
stagewinners = json_normalize(stagewinners_json)
stagewinners.head()
Out[85]:
elapsedDuration elapsedDurationMs entryId stageId stageName
0 00:14:53.2000000 893200 1483 287 Bayons - Breziers 1
1 00:23:16.6000000 1396600 1483 289 Thoard - Sisteron
2 00:08:42.6000000 522600 1484 284 Vaumeilh - Claret 1
3 00:18:25.3000000 1105300 1483 285 Roussieux - Eygalayes 1
4 00:16:32.3000000 992300 1490 286 Vitrolles - Oze 1

Stage Iterator

TO DO: For following stage based tables, create a generic stage iterator function.

 Overall

In [90]:
meta2=meta
stage_overall = pd.DataFrame()
for stageId in meta['stages']:
    meta2['stageId']=stageId
    _stage_overall_json=requests.get( url_base.format(stub=overall_stub.format(**meta2) ) ).json()
    _stage_overall = json_normalize(stage_overall_json)
    _stage_overall['stageId'] = stageId
    stage_overall = pd.concat([stage_overall, _stage_overall])
stage_overall.head()
Out[90]:
diffFirst diffFirstMs diffPrev diffPrevMs entryId penaltyTime penaltyTimeMs position stageTime stageTimeMs totalTime totalTimeMs stageId
0 PT0S 0 PT0S 0 1483 PT0S 0 1 PT38M9.8S 2289800 PT38M9.8S 2289800 289
1 PT17.3S 17300 PT17.3S 17300 1486 PT0S 0 2 PT38M27.1S 2307100 PT38M27.1S 2307100 289
2 PT37.4S 37400 PT20.1S 20100 1491 PT0S 0 3 PT38M47.2S 2327200 PT38M47.2S 2327200 289
3 PT42.4S 42400 PT5S 5000 1490 PT0S 0 4 PT38M52.2S 2332200 PT38M52.2S 2332200 289
4 PT55.4S 55400 PT13S 13000 1489 PT0S 0 5 PT39M5.2S 2345200 PT39M5.2S 2345200 289

Split Times

In [103]:
split_times=pd.DataFrame()
for stageId in meta['stages']:
    meta2['stageId']=stageId
    _stage_split_times_json=requests.get( url_base.format(stub=split_times_stub.format(**meta2) ) ).json()
    _stage_split_times = json_normalize(_stage_split_times_json)
    _stage_split_times['stageId'] = stageId
    split_times = pd.concat([split_times, _stage_split_times])
split_times.head()
Out[103]:
elapsedDuration elapsedDurationMs entryId splitDateTime splitDateTimeLocal splitPointId splitPointTimeId startDateTime startDateTimeLocal stageId
0 PT23M18.17S 1398170 1487 2018-01-25T21:08:18.17Z 2018-01-25T22:08:18.17+01:00 577 13338 2018-01-25T20:45:00 2018-01-25T21:45:00+01:00 289
1 PT19M47.803S 1187803 1489 2018-01-25T21:08:47.803Z 2018-01-25T22:08:47.803+01:00 577 13339 2018-01-25T20:49:00 2018-01-25T21:49:00+01:00 289
2 PT19M37.267S 1177267 1490 2018-01-25T21:06:37.267Z 2018-01-25T22:06:37.267+01:00 577 13341 2018-01-25T20:47:00 2018-01-25T21:47:00+01:00 289
3 PT19M20.088S 1160088 1488 2018-01-25T21:12:20.088Z 2018-01-25T22:12:20.088+01:00 577 13349 2018-01-25T20:53:00 2018-01-25T21:53:00+01:00 289
4 PT22M42.864S 1362864 1484 2018-01-25T21:13:42.864Z 2018-01-25T22:13:42.864+01:00 577 13352 2018-01-25T20:51:00 2018-01-25T21:51:00+01:00 289
In [195]:
split_times[(split_times['entryId']==1487) & (split_times['stageId']==289)]
Out[195]:
elapsedDuration elapsedDurationMs entryId splitDateTime splitDateTimeLocal splitPointId splitPointTimeId startDateTime startDateTimeLocal stageId
0 PT23M18.17S 1398170 1487 2018-01-25T21:08:18.17Z 2018-01-25T22:08:18.17+01:00 577 13338 2018-01-25T20:45:00 2018-01-25T21:45:00+01:00 289
65 PT19M46.543S 1186543 1487 2018-01-25T21:04:46.543Z 2018-01-25T22:04:46.543+01:00 580 13330 2018-01-25T20:45:00 2018-01-25T21:45:00+01:00 289
85 PT19M11.78S 1151780 1487 2018-01-25T21:10:11.78Z 2018-01-25T22:10:11.78+01:00 580 13461 2018-01-25T20:45:00 2018-01-25T21:45:00+01:00 289
129 PT17M27.061S 1047061 1487 2018-01-25T21:02:27.061Z 2018-01-25T22:02:27.061+01:00 581 13325 2018-01-25T20:45:00 2018-01-25T21:45:00+01:00 289
191 PT7M57.321S 477321 1487 2018-01-25T20:52:57.321Z 2018-01-25T21:52:57.321+01:00 582 13318 2018-01-25T20:45:00 2018-01-25T21:45:00+01:00 289
287 PT4M9.425S 249425 1487 2018-01-25T20:49:09.4266667Z 2018-01-25T21:49:09.4266667+01:00 614 13439 2018-01-25T20:45:00 2018-01-25T21:45:00+01:00 289

Stage Times - Stage

In [111]:
stage_times_stage=pd.DataFrame()
for stageId in meta['stages']:
    meta2['stageId']=stageId
    _stage_times_stage_json=requests.get( url_base.format(stub=stage_times_stage_stub.format(**meta2) ) ).json()
    _stage_times_stage = json_normalize(_stage_times_stage_json)
    _stage_times_stage['stageId'] = stageId
    stage_times_stage = pd.concat([stage_times_stage, _stage_times_stage])
stage_times_stage.head()
Out[111]:
diffFirst diffFirstMs diffPrev diffPrevMs elapsedDuration elapsedDurationMs entryId position source stageId stageTimeId status
0 00:00:00 0.0 00:00:00 0.0 00:23:16.6000000 1396600.0 1483 1 Default 289 6639 Completed
1 00:00:07.7000000 7700.0 00:00:07.7000000 7700.0 00:23:24.3000000 1404300.0 1486 2 Default 289 6657 Completed
2 00:00:19.4000000 19400.0 00:00:11.7000000 11700.0 00:23:36 1416000.0 1491 3 Assessed 289 6655 Completed
3 00:00:19.7000000 19700.0 00:00:00.3000000 300.0 00:23:36.3000000 1416300.0 1488 4 Default 289 6649 Completed
4 00:00:24.6000000 24600.0 00:00:04.9000000 4900.0 00:23:41.2000000 1421200.0 1493 5 Default 289 6653 Completed

Stage Times - Overall

In [112]:
stage_times_overall=pd.DataFrame()
for stageId in meta['stages']:
    meta2['stageId']=stageId
    _stage_times_overall_json=requests.get( url_base.format(stub=stage_times_overall_stub.format(**meta2) ) ).json()
    _stage_times_overall = json_normalize(_stage_times_overall_json)
    _stage_times_overall['stageId'] = stageId
    stage_times_overall = pd.concat([stage_times_overall, _stage_times_overall])
stage_times_overall.head()
Out[112]:
diffFirst diffFirstMs diffPrev diffPrevMs entryId penaltyTime penaltyTimeMs position stageTime stageTimeMs totalTime totalTimeMs stageId
0 PT0S 0 PT0S 0 1483 PT0S 0 1 PT23M16.6S 1396600 PT23M16.6S 1396600 289
1 PT7.7S 7700 PT7.7S 7700 1486 PT0S 0 2 PT23M24.3S 1404300 PT23M24.3S 1404300 289
2 PT19.4S 19400 PT11.7S 11700 1491 PT0S 0 3 PT23M36S 1416000 PT23M36S 1416000 289
3 PT24.6S 24600 PT5.2S 5200 1493 PT0S 0 4 PT23M41.2S 1421200 PT23M41.2S 1421200 289
4 PT37.9S 37900 PT13.3S 13300 1490 PT0S 0 5 PT23M54.5S 1434500 PT23M54.5S 1434500 289

 Championships

In [241]:
#http://www.wrc.com/en/wrc/results/championship-standings/page/4176----.html
championshipClasses = { 'WRC': {
                                    'Driver': '6',
                                    'Co-Driver': '7',
                                    'Manufacturers': '8'
                                  },
                                  'WRC 2': {
                                    'Driver': '10',
                                    'Co-Driver': '11',
                                    'Manufacturers': '9'
                                  },
                                  'WRC 3': {
                                    'Driver': '13',
                                    'Co-Driver': '14',
                                    'Manufacturers': '12'
                                  },
                                  'JWRC': {
                                    'Driver': '15',
                                    'Co-Driver': '16'
                                  }
                      }

We can extract the javascript that declares the championship codes from the HTML page and convert it to a Python dict.

In [263]:
import json
import re
import requests

url='http://www.wrc.com/en/wrc/results/championship-standings/page/4176----.html'

html2=requests.get(url).text
In [300]:
m = re.search("var championshipClasses = (.*?);", html2, re.DOTALL)
mm=m.group(1).replace('\n','').replace("'",'"')
d=json.loads(mm)
#https://stackoverflow.com/a/35758583/454773
championshipClasses={k.replace(' ', ''): v for k, v in d.items()}
championshipClasses
Out[300]:
{'JWRC': {'Co-Driver': '16', 'Driver': '15'},
 'WRC': {'Co-Driver': '7', 'Driver': '6', 'Manufacturers': '8'},
 'WRC2': {'Co-Driver': '11', 'Driver': '10', 'Manufacturers': '9'},
 'WRC3': {'Co-Driver': '14', 'Driver': '13', 'Manufacturers': '12'}}
In [184]:
drivers = 6
codrivers = 7
manufacturer = 8

championships={}

champ_num = drivers
In [186]:
meta2['championshipId']= champ_num
championship_json=requests.get( url_base.format(stub=championship_stub.format(**meta2) ) ).json()
#championship_json
In [187]:
championship = json_normalize(championship_json).drop(['championshipEntries','championshipRounds'], axis=1)
championship.head()
Out[187]:
championshipId fieldFiveDescription fieldFourDescription fieldOneDescription fieldThreeDescription fieldTwoDescription name seasonId type
0 6 TyreManufacturer Manufacturer FirstName CountryISO3 LastName FIA World Rally Championship for Drivers 1 Person
In [188]:
championships={}
championship_dict = championship.to_dict()
championships[champ_num] = {c:championship_dict[c][0] for c in championship_dict}
championships
Out[188]:
{6: {'championshipId': 6,
  'fieldFiveDescription': 'TyreManufacturer',
  'fieldFourDescription': 'Manufacturer',
  'fieldOneDescription': 'FirstName',
  'fieldThreeDescription': 'CountryISO3',
  'fieldTwoDescription': 'LastName',
  'name': 'FIA World Rally Championship for Drivers',
  'seasonId': 1,
  'type': 'Person'}}
In [191]:
renamer={c.replace('Description',''):championships[champ_num][c] for c in championships[champ_num] if c.startswith('field')}
renamer
Out[191]:
{'fieldFive': 'TyreManufacturer',
 'fieldFour': 'Manufacturer',
 'fieldOne': 'FirstName',
 'fieldThree': 'CountryISO3',
 'fieldTwo': 'LastName'}
In [192]:
championship_entries = json_normalize(championship_json,['championshipEntries'] )
championship_entries = championship_entries.rename(columns=renamer)
championship_entries = championship_entries[[c for c in _championship_entries.columns if c!='']]
championship_entries
Out[192]:
championshipEntryId championshipId entrantId TyreManufacturer Manufacturer FirstName CountryISO3 LastName manufacturerId personId tyreManufacturer
0 31 6 None Michelin ford Sébastien FRA OGIER 26 670 Michelin
1 32 6 None Michelin toyota Ott EST TÄNAK 84 524 Michelin
2 33 6 None Michelin toyota Jari-Matti FIN LATVALA 84 526 Michelin
3 34 6 None Michelin toyota Esapekka FIN LAPPI 84 548 Michelin
4 35 6 None Michelin citroen Kris GBR MEEKE 13 550 Michelin
5 36 6 None Michelin ford Elfyn GBR EVANS 26 534 Michelin
6 37 6 None Michelin hyundai Thierry BEL NEUVILLE 33 522 Michelin
7 38 6 None Michelin ford Bryan FRA BOUFFIER 26 536 Michelin
8 39 6 None Michelin citroen Craig IRL BREEN 13 552 Michelin
9 40 6 None Michelin skoda Jan CZE KOPECKY 77 564 Michelin
10 45 6 None Michelin hyundai Andreas NOR MIKKELSEN 33 538 Michelin
In [161]:
championship_rounds = json_normalize(championship_json,['championshipRounds'] ).drop('event', axis=1)
championship_rounds
Out[161]:
championshipId eventId order
0 8 26 1
1 8 27 2
2 8 28 3
3 8 29 4
4 8 30 5
5 8 31 6
6 8 32 7
7 8 33 8
8 8 34 9
9 8 35 10
10 8 36 11
11 8 37 12
12 8 38 13
In [162]:
_events_json = json_normalize(championship_json,['championshipRounds' ])['event']
championship_events = json_normalize(_events_json)
championship_events.head()
Out[162]:
categories country.countryId country.iso2 country.iso3 country.name countryId eventId finishDate location mode name organiserUrl slug startDate surfaces timeZoneId timeZoneName timeZoneOffset trackingEventId
0 None 147 MC MCO Monaco 147 26 2018-01-28 Monte Carlo Rally Rallye Monte-Carlo https://sas.blob.core.windows.net/cdn/EventSma... rallye-monte-carlo-2018 2018-01-25 W. Europe Standard Time (UTC+01:00) Amsterdam, Berlin, Bern, Rome, Sto... 60 2586
1 None 215 SE SWE Sweden 215 27 2018-02-18 Sweden Rally Rally Sweden rally-sweden-2018 2018-02-15 W. Europe Standard Time (UTC+01:00) Amsterdam, Berlin, Bern, Rome, Sto... 60 2587
2 None 144 MX MEX Mexico 144 28 2018-03-11 Leon Rally Rally Guanajuato México rally-guanajuato-mexico-2018 2018-03-08 Central Standard Time (Mexico) (UTC-06:00) Guadalajara, Mexico City, Monterrey -360 2589
3 None 76 FR FRA France 76 29 2018-04-08 Bastia Rally Tour de Corse tour-de-corse-2018 2018-04-05 Jordan Standard Time (UTC+02:00) Amman 120 2590
4 None 11 AR ARG Argentina 11 30 2018-04-29 Villa Carlos Paz Rally YPF Rally Argentina ypf-rally-argentina-2018 2018-04-26 Argentina Standard Time (UTC-03:00) City of Buenos Aires -180 2591

Championship Results

In [166]:
championship_results_json=requests.get( url_base.format(stub=championship_results_stub.format(**meta2) ) ).json()
championship_results = json_normalize(championship_results_json)
championship_results.head()
Out[166]:
championshipEntryId championshipId dropped eventId pointsBreakdown position publishedStatus status totalPoints
0 31 6 False 26 25+1 1 Published Finished 26
1 32 6 False 26 18+0 2 Published Finished 18
2 33 6 False 26 15+2 3 Published Finished 17
3 34 6 False 26 6+0 7 Published Finished 6
4 35 6 False 26 12+5 4 Published Finished 17