Notebook

Package for WRC Results Scrape¶

This notebook contains most of the ingredients for a package that can be used to scrape results data from current and archived WRC Live Timing pages.

At the moment, the script will only happily downloaded the complete set of results for a particular rally. The code needs updating to allow:

loading data for a completed stage into a database that already contains results for other completed stages;
incremental loads of data to upsert data into the database from an ongoing stage;

In [ ]:

#https://www.wrc.com/service/sasCacheApi.php?route=rallies%2F40%2Fitinerary

In [1]:

YEAR = 2019

In [80]:

url_root='http://www.wrc.com/service/sasCacheApi.php?route={stub}'

#What's the events/{ID}/{stub} ID number?
#url_base='http://www.wrc.com/service/sasCacheApi.php?route=events/79/{stub}'
url_base='http://www.wrc.com/service/sasCacheApi.php?route=events/{SASEVENTID}/{{stub}}'
#we need to grab SASEVENTID from getEventMetadata()

In [3]:

#Call a resource by ID
wrcapi='https://webappsdata.wrc.com/srv/wrc/json/api/wrcsrv/byId?id=%22{}%22' #requires resource ID

In [4]:

stubs = { 'itinerary': 'rallies/{rallyId}/itinerary',
          'startlists': 'rallies/{rallyId}/entries',
         'penalties': 'rallies/{rallyId}/penalties',
         'retirements': 'rallies/{rallyId}/retirements',
         'stagewinners':'rallies/{rallyId}/stagewinners',
         'overall':'stages/{stageId}/results?rallyId={rallyId}',
         'split_times':'stages/{stageId}/splittimes?rallyId={rallyId}',
         'stage_times_stage':'stages/{stageId}/stagetimes?rallyId={rallyId}',
         'stage_times_overall':'stages/{stageId}/results?rallyId={rallyId}',
         'seasons':'seasons',
         'seasonDetails':'seasons/{seasonId}',
         # TO DO - for 2019, the following may need to be prefixed with seasons/ ?
         'championship':'seasons/4/championships/{championshipId}',
         'championship_results':'seasons/4/championships/{championshipId}/results',
        }

In [5]:

#pip3 install sqlite-utils

import requests
import re
import json
from bs4 import BeautifulSoup 

import sqlite3
from sqlite_utils import Database

import pandas as pd
from pandas.io.json import json_normalize

#!pip3 install isodate
import isodate

Database Schema¶

The following tables are literal mappings from flattened JSON datafiles published by the WRC.

The data model (primary and foreign key relationships) is derived by observation.

In [6]:

#SQL in wrcResults.sql
setup_q='''
CREATE TABLE "itinerary_event" (
  "eventId" INTEGER,
  "itineraryId" INTEGER PRIMARY KEY,
  "name" TEXT,
  "priority" INTEGER
);
CREATE TABLE "itinerary_legs" (
  "itineraryId" INTEGER,
  "itineraryLegId" INTEGER PRIMARY KEY,
  "legDate" TEXT,
  "name" TEXT,
  "order" INTEGER,
  "startListId" INTEGER,
  "status" TEXT,
  FOREIGN KEY ("itineraryId") REFERENCES "itinerary_event" ("itineraryId")
);
CREATE TABLE "itinerary_sections" (
  "itineraryLegId" INTEGER,
  "itinerarySectionId" INTEGER PRIMARY KEY,
  "name" TEXT,
  "order" INTEGER,
  FOREIGN KEY ("itineraryLegId") REFERENCES "itinerary_legs" ("itineraryLegId")
);
CREATE TABLE "itinerary_stages" (
  "code" TEXT,
  "distance" REAL,
  "eventId" INTEGER,
  "name" TEXT,
  "number" INTEGER,
  "stageId" INTEGER PRIMARY KEY,
  "stageType" TEXT,
  "status" TEXT,
  "timingPrecision" TEXT,
  "itineraryLegId" INTEGER,
  "itinerarySections.itinerarySectionId" INTEGER,
  FOREIGN KEY ("itineraryLegId") REFERENCES "itinerary_legs" ("itineraryLegId")
);
CREATE TABLE "itinerary_controls" (
  "code" TEXT,
  "controlId" INTEGER PRIMARY KEY,
  "controlPenalties" TEXT,
  "distance" REAL,
  "eventId" INTEGER,
  "firstCarDueDateTime" TEXT,
  "firstCarDueDateTimeLocal" TEXT,
  "location" TEXT,
  "stageId" INTEGER,
  "status" TEXT,
  "targetDuration" TEXT,
  "targetDurationMs" INTEGER,
  "timingPrecision" TEXT,
  "type" TEXT,
  "itineraryLegId" INTEGER,
  "itinerarySections.itinerarySectionId" INTEGER,
  "roundingPolicy" TEXT,
  FOREIGN KEY ("itineraryLegId") REFERENCES "itinerary_legs" ("itineraryLegId")
);
CREATE TABLE "startlists" (
  "codriver.abbvName" TEXT,
  "codriver.code" TEXT,
  "codriver.country.countryId" INTEGER,
  "codriver.country.iso2" TEXT,
  "codriver.country.iso3" TEXT,
  "codriver.country.name" TEXT,
  "codriver.countryId" INTEGER,
  "codriver.firstName" TEXT,
  "codriver.fullName" TEXT,
  "codriver.lastName" TEXT,
  "codriver.personId" INTEGER,
  "codriverId" INTEGER,
  "driver.abbvName" TEXT,
  "driver.code" TEXT,
  "driver.country.countryId" INTEGER,
  "driver.country.iso2" TEXT,
  "driver.country.iso3" TEXT,
  "driver.country.name" TEXT,
  "driver.countryId" INTEGER,
  "driver.firstName" TEXT,
  "driver.fullName" TEXT,
  "driver.lastName" TEXT,
  "driver.personId" INTEGER,
  "driverId" INTEGER,
  "eligibility" TEXT,
  "entrant.entrantId" INTEGER,
  "entrant.logoFilename" TEXT,
  "entrant.name" TEXT,
  "entrantId" INTEGER,
  "entryId" INTEGER PRIMARY KEY,
  "eventId" INTEGER,
  "group.name" TEXT,
  "groupId" INTEGER,
  "group.groupId" INTEGER,
  "identifier" TEXT,
  "manufacturer.logoFilename" TEXT,
  "manufacturer.manufacturerId" INTEGER,
  "manufacturer.name" TEXT,
  "manufacturerId" INTEGER,
  "priority" TEXT,
  "status" TEXT,
  "tag" TEXT,
  "tag.name" TEXT,
  "tag.tagId" INTEGER,
  "tagId" INTEGER,
  "tyreManufacturer" TEXT,
  "vehicleModel" TEXT,
  "entryListOrder" INTEGER,
  FOREIGN KEY ("eventId") REFERENCES "itinerary_event" ("eventId")
);
CREATE TABLE "roster" (
  "fiasn" INTEGER,
  "code" TEXT,
  "sas-entryid" INTEGER PRIMARY KEY,
  "roster_num" INTEGER,
  FOREIGN KEY ("sas-entryid") REFERENCES "startlists" ("entryId")
);
CREATE TABLE "startlist_classes" (
  "eventClassId" INTEGER,
  "eventId" INTEGER,
  "name" TEXT,
  "entryId" INTEGER,
  PRIMARY KEY ("eventClassId","entryId"),
  FOREIGN KEY ("eventId") REFERENCES "itinerary_event" ("eventId"),
  FOREIGN KEY ("entryId") REFERENCES "startlists" ("entryId")
);
CREATE TABLE "penalties" (
  "controlId" INTEGER,
  "entryId" INTEGER,
  "penaltyDuration" TEXT,
  "penaltyDurationMs" INTEGER,
  "penaltyId" INTEGER PRIMARY KEY,
  "reason" TEXT,
  FOREIGN KEY ("entryId") REFERENCES "startlists" ("entryId")
);
CREATE TABLE "retirements" (
  "controlId" INTEGER,
  "entryId" INTEGER,
  "reason" TEXT,
  "retirementDateTime" TEXT,
  "retirementDateTimeLocal" TEXT,
  "retirementId" INTEGER PRIMARY KEY,
  "status" TEXT,
  FOREIGN KEY ("entryId") REFERENCES "startlists" ("entryId")
);
CREATE TABLE "stagewinners" (
  "elapsedDuration" TEXT,
  "elapsedDurationMs" INTEGER,
  "entryId" INTEGER,
  "stageId" INTEGER,
  "stageName" TEXT,
  PRIMARY KEY ("stageId"),
  FOREIGN KEY ("entryId") REFERENCES "startlists" ("entryId"),
  FOREIGN KEY ("stageId") REFERENCES "itinerary_stages" ("stageId")
);
CREATE TABLE "stage_overall" (
  "diffFirst" TEXT,
  "diffFirstMs" INTEGER,
  "diffPrev" TEXT,
  "diffPrevMs" INTEGER,
  "entryId" INTEGER,
  "penaltyTime" TEXT,
  "penaltyTimeMs" INTEGER,
  "position" INTEGER,
  "stageTime" TEXT,
  "stageTimeMs" INTEGER,
  "totalTime" TEXT,
  "totalTimeMs" INTEGER,
  "stageId" INTEGER,
  PRIMARY KEY ("stageId","entryId"),
  FOREIGN KEY ("stageId") REFERENCES "itinerary_stages" ("stageId"),
  FOREIGN KEY ("entryId") REFERENCES "startlists" ("entryId")
);
CREATE TABLE "split_times" (
  "elapsedDuration" TEXT,
  "elapsedDurationMs" INTEGER,
  "entryId" INTEGER,
  "splitDateTime" TEXT,
  "splitDateTimeLocal" TEXT,
  "splitPointId" INTEGER,
  "splitPointTimeId" INTEGER PRIMARY KEY,
  "stageTimeDuration" TEXT,
  "stageTimeDurationMs" REAL,
  "startDateTime" TEXT,
  "startDateTimeLocal" TEXT,
  "stageId" INTEGER,
  FOREIGN KEY ("stageId") REFERENCES "itinerary_stages" ("stageId"),
  FOREIGN KEY ("entryId") REFERENCES "startlists" ("entryId")
);
CREATE TABLE "stage_times_stage" (
  "diffFirst" TEXT,
  "diffFirstMs" INTEGER,
  "diffPrev" TEXT,
  "diffPrevMs" INTEGER,
  "elapsedDuration" TEXT,
  "elapsedDurationMs" INTEGER,
  "entryId" INTEGER,
  "position" INTEGER,
  "source" TEXT,
  "stageId" INTEGER,
  "stageTimeId" INTEGER PRIMARY KEY,
  "status" TEXT,
  FOREIGN KEY ("stageId") REFERENCES "itinerary_stages" ("stageId"),
  FOREIGN KEY ("entryId") REFERENCES "startlists" ("entryId")
);
CREATE TABLE "stage_times_overall" (
  "diffFirst" TEXT,
  "diffFirstMs" INTEGER,
  "diffPrev" TEXT,
  "diffPrevMs" INTEGER,
  "entryId" INTEGER,
  "penaltyTime" TEXT,
  "penaltyTimeMs" INTEGER,
  "position" INTEGER,
  "stageTime" TEXT,
  "stageTimeMs" INTEGER,
  "totalTime" TEXT,
  "totalTimeMs" INTEGER,
  "stageId" INTEGER,
  PRIMARY KEY ("stageId","entryId"),
  FOREIGN KEY ("stageId") REFERENCES "itinerary_stages" ("stageId"),
  FOREIGN KEY ("entryId") REFERENCES "startlists" ("entryId")
);
CREATE TABLE "championship_lookup" (
  "championshipId" INTEGER PRIMARY KEY,
  "fieldFiveDescription" TEXT,
  "fieldFourDescription" TEXT,
  "fieldOneDescription" TEXT,
  "fieldThreeDescription" TEXT,
  "fieldTwoDescription" TEXT,
  "name" TEXT,
  "seasonId" INTEGER,
  "type" TEXT,
  "_codeClass" TEXT,
  "_codeTyp" TEXT
);
CREATE TABLE "championship_results" (
  "championshipEntryId" INTEGER,
  "championshipId" INTEGER,
  "dropped" INTEGER,
  "eventId" INTEGER,
  "pointsBreakdown" TEXT,
  "position" INTEGER,
  "publishedStatus" TEXT,
  "status" TEXT,
  "totalPoints" INTEGER,
  PRIMARY KEY ("championshipEntryId","eventId"),
  FOREIGN KEY ("championshipId") REFERENCES "championship_lookup" ("championshipId"),
  FOREIGN KEY ("eventId") REFERENCES "itinerary_event" ("eventId")
);
CREATE TABLE "championship_entries_codrivers" (
  "championshipEntryId" INTEGER PRIMARY KEY,
  "championshipId" INTEGER,
  "entrantId" TEXT,
  "ManufacturerTyre" TEXT,
  "Manufacturer" TEXT,
  "FirstName" TEXT,
  "CountryISO3" TEXT,
  "CountryISO2" TEXT,
  "LastName" TEXT,
  "manufacturerId" INTEGER,
  "personId" INTEGER,
  "tyreManufacturer" TEXT,
  FOREIGN KEY ("championshipId") REFERENCES "championship_lookup" ("championshipId")
);
CREATE TABLE "championship_entries_manufacturers" (
  "championshipEntryId" INTEGER PRIMARY KEY ,
  "championshipId" INTEGER,
  "entrantId" INTEGER,
  "Name" TEXT,
  "LogoFileName" TEXT,
  "Manufacturer" TEXT,
  "manufacturerId" INTEGER,
  "personId" TEXT,
  "tyreManufacturer" TEXT,
  FOREIGN KEY ("championshipId") REFERENCES "championship_lookup" ("championshipId")
);
CREATE TABLE "championship_rounds" (
  "championshipId" INTEGER,
  "eventId" INTEGER,
  "order" INTEGER,
  PRIMARY KEY ("championshipId","eventId"),
  FOREIGN KEY ("championshipId") REFERENCES "championship_lookup" ("championshipId"),
  FOREIGN KEY ("eventId") REFERENCES "itinerary_event" ("eventId")
);
CREATE TABLE "championship_events" (
  "categories" TEXT,
  "clerkOfTheCourse" TEXT,
  "country.countryId" INTEGER,
  "country.iso2" TEXT,
  "country.iso3" TEXT,
  "country.name" TEXT,
  "countryId" INTEGER,
  "eventId" INTEGER PRIMARY KEY,
  "finishDate" TEXT,
  "location" TEXT,
  "mode" TEXT,
  "name" TEXT,
  "organiserUrl" TEXT,
  "slug" TEXT,
  "startDate" TEXT,
  "stewards" TEXT,
  "surfaces" TEXT,
  "templateFilename" TEXT,
  "timeZoneId" TEXT,
  "timeZoneName" TEXT,
  "timeZoneOffset" INTEGER,
  "trackingEventId" INTEGER ,
  FOREIGN KEY ("eventId") REFERENCES "itinerary_event" ("eventId")
);
CREATE TABLE "championship_entries_drivers" (
  "championshipEntryId" INTEGER PRIMARY KEY ,
  "championshipId" INTEGER,
  "entrantId" TEXT,
  "ManufacturerTyre" TEXT,
  "Manufacturer" TEXT,
  "FirstName" TEXT,
  "CountryISO3" TEXT,
  "CountryISO2" TEXT,
  "LastName" TEXT,
  "manufacturerId" INTEGER,
  "personId" INTEGER,
  "tyreManufacturer" TEXT,
  FOREIGN KEY ("championshipId") REFERENCES "championship_lookup" ("championshipId")
);
CREATE TABLE "event_metadata" (
  "_id" TEXT,
  "availability" TEXT,
  "date-finish" TEXT,
  "date-start" TEXT,
  "gallery" TEXT,
  "hasdata" TEXT,
  "hasfootage" TEXT,
  "hasvideos" TEXT,
  "id" TEXT,
  "info-based" TEXT,
  "info-categories" TEXT,
  "info-date" TEXT,
  "info-flag" TEXT,
  "info-surface" TEXT,
  "info-website" TEXT,
  "kmlfile" TEXT,
  "logo" TEXT,
  "name" TEXT,
  "org-website" TEXT,
  "poi-Klo im Wald" TEXT,
  "poilistid" TEXT,
  "position" TEXT,
  "rosterid" TEXT,
  "sas-eventid" TEXT,
  "sas-itineraryid" TEXT,
  "sas-rallyid" TEXT,
  "sas-trackingid" TEXT,
  "sitid" TEXT,
  "testid" TEXT,
  "thumbnail" TEXT,
  "time-zone" TEXT,
  "tzoffset" TEXT,
  "year" INTEGER
);


'''


#conn = sqlite3.connect('wrc18_test1keys.db')
#c = conn.cursor()
#c.executescript(setup_q)

In [7]:

#Maybe bring in additional tables 
#eg via https://blog.ouseful.info/2016/11/14/what-nationality-did-you-say-you-were-again/
#bring in a table to give nationalites from country codes
#Or maybe make that a pip package?

In [8]:

setup_views_q = '''
'''

In [9]:

#meta={'rallyId':None, 'stages':[], 'championshipId':None }

#This is a literal approach and is DEPRECATED def getRally_URLs(results_main_url=None): if results_main_url is None: results_main_url='http://www.wrc.com/en/wrc/results/wales/stage-times/page/416-238---.html#' html=requests.get(results_main_url) soup=BeautifulSoup(html.content, "html5lib") #BeautifulSoup has a routine - find_all() - that will find all the HTML tags of a particular sort #Links are represented in HTML pages in the form link text #Grab all the (anchor) tags... souplist=soup.findAll("li",{'class':'flag'}) items={} for s in souplist: href=s.find('a')['href'] if href: title=s.find('img')['title'] title = 'Monaco' if title == 'Monte Carlo' else title items[title]=href return items def listRallies(display=True, **kwargs): rallyURLs = getRally_URLs(**kwargs) if display: print( ', '.join(rallyURLs.keys()) ) else: return getRallyIDs

In [10]:

def _getEventMetadata():
    ''' Get event metadata as JSON data feed from WRC API. '''
    url='https://webappsdata.wrc.com/srv/wrc/json/api/wrcsrv/byType?t=%22Event%22&maxdepth=1'
    eventmeta = requests.get(url).json()
    return eventmeta

def getEventMetadata():
    ''' Get a list of events from WRC as a flat pandas dataframe.
        Itinerary / event data is only available for rallies starting in stated year. '''
    eventMetadata = json_normalize(_getEventMetadata(),
                                   record_path='_meta',
                                   meta='_id'  ).drop_duplicates().pivot('_id', 'n','v').reset_index()

    eventMetadata['date-finish']=pd.to_datetime(eventMetadata['date-finish'])
    eventMetadata['date-start']=pd.to_datetime(eventMetadata['date-start'])
    eventMetadata['year'] = eventMetadata['date-start'].dt.year
    
    return eventMetadata


getEventMetadata().head()

Out[10]:

n	_id	availability	date-finish	date-start	gallery	hasdata	hasvideos	id	info-based	info-categories	...	sas-eventid	sas-itineraryid	sas-rallyid	sas-trackingid	sitid	testid	thumbnail	time-zone	tzoffset	year
0	029e41e3-afff-44ad-950f-9af3dccf06be	now	2015-08-02	2015-07-30	NaN	NaN	true	neste-oil-rally-finland	Jyväskylä	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	143	NaN	NaN	Helsinki	NaN	2015
1	03bea1af-87f3-43d2-b499-0d9095440ee5	NaN	2019-08-25	2019-08-22	NaN	NaN	NaN	NaN	Bostasee	NaN	...	87	138	103	2791	87	NaN	NaN	NaN	0	2019
2	03d002d1-82d8-499c-8252-bdc36685ffd3	now	2015-10-25	2015-10-22	NaN	NaN	true	rallyracc-rally-de-espana	Salou	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	151	NaN	NaN	Madrid	NaN	2015
3	0919ef82-a65f-4115-8372-1364be04a219	now	2015-10-04	2015-10-02	NaN	NaN	true	rallye-de-france-alsace	Corte	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	149	NaN	NaN	Paris	NaN	2015
4	098ea36b-902d-4292-8e3a-c85b6ae741b6	NaN	2019-03-10	2019-03-07	NaN	NaN	NaN	NaN	Leon	NaN	...	80	131	96	2784	80	NaN	NaN	NaN	-25200000	2019

5 rows × 32 columns

In [11]:

def _getRallyIDs2(year=YEAR):
    em=getEventMetadata()
    em = em[em['year']==year][['name','sas-rallyid', 'sas-eventid', 'kmlfile', 'date-start']].reset_index(drop=True).dropna()
    em['stub']=em['kmlfile'].apply(lambda x: x.split('_')[0])
    return em

def getRallyIDs2(year=YEAR):
    em = _getRallyIDs2(year=year)
    return em[['stub','sas-rallyid']].set_index('stub').to_dict()['sas-rallyid']

def getEventID(year=YEAR):
    em = _getRallyIDs2(year=year)
    return em[['stub','sas-eventid']].set_index('stub').to_dict()['sas-eventid']


def listRallies2(year=YEAR):
    return getRallyIDs2(year)

In [12]:

getEventID(2019)

Out[12]:

{'sweden': '79', 'montecarlo': '78'}

In [13]:

url_base='http://www.wrc.com/service/sasCacheApi.php?route=events/{SASEVENTID}/{{stub}}'.format(SASEVENTID=getEventID(2019)[name])
url_base

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-3d0d8f9c9386> in <module>
----> 1 url_base='http://www.wrc.com/service/sasCacheApi.php?route=events/{SASEVENTID}/{{stub}}'.format(SASEVENTID=getEventID(2019)[name])
      2 url_base

NameError: name 'name' is not defined

In [ ]:

listRallies2()

#This appproach makes literal calls to an original HTML page and is DEPRECATED def _getRallyID(rallyURL): html=requests.get(rallyURL) m = re.search("var rallyId = '(.+?)'", html.text) if m: return m.group(1) return None def getRallyIDs(rally=None,results_main_url=None): rallyids={} items = getRally_URLs(results_main_url) #if we know the rally, just get that one.. if rally in items: items = {rally:items[rally]} for item in items: rallyids[item] = _getRallyID(items[item]) return rallyids

In [14]:

'''
def set_rallyId(rally, year, rallyIDs=None):
    meta={'rallyId':None, 'stages':[], 'championshipId':None }
    if rallyIDs is None:
        rallyIDs = getRallyIDs()
    if rally in rallyIDs:
        meta['rallyId']=rallyIDs[rally]
        meta['rally_name'] = rally
    return meta
'''

def set_rallyId2(rally, year, rallyIDs=None):
    meta={'rallyId':None, 'stages':[], 'championshipId':None }
    if rallyIDs is None:
        rallyIDs = getRallyIDs2()
    if rally in rallyIDs:
        meta['rallyId']=rallyIDs[rally]
        meta['rally_name'] = rally
    return meta

In [15]:

year = 2019
name = 'sweden'
dbname='sweden19.db'

meta = set_rallyId2(name, year)
meta

Out[15]:

{'rallyId': '95', 'stages': [], 'championshipId': None, 'rally_name': 'sweden'}

In [16]:

listRallies2()

Out[16]:

{'sweden': '95', 'montecarlo': '94'}

In [17]:

#rallyIDs = getRallyIDs2()
#rallyIDs

In [18]:

def nvToDict(nvdict, key='n',val='v', retdict=None):
    if retdict is None:
        retdict={nvdict[key]:nvdict[val]}
    else:
        retdict[nvdict[key]]=nvdict[val]
    return retdict
#assert nvToDict({'n': "id",'v': "adac-rallye-deutschland"}) == {'id': 'adac-rallye-deutschland'}

In [19]:

 #getEventMetadata()['rosterid'].iloc[0]

In [20]:

getEventMetadata().head(2)

Out[20]:

n	_id	availability	date-finish	date-start	gallery	hasdata	hasvideos	id	info-based	info-categories	...	sas-eventid	sas-itineraryid	sas-rallyid	sas-trackingid	sitid	testid	thumbnail	time-zone	tzoffset	year
0	029e41e3-afff-44ad-950f-9af3dccf06be	now	2015-08-02	2015-07-30	NaN	NaN	true	neste-oil-rally-finland	Jyväskylä	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	143	NaN	NaN	Helsinki	NaN	2015
1	03bea1af-87f3-43d2-b499-0d9095440ee5	NaN	2019-08-25	2019-08-22	NaN	NaN	NaN	NaN	Bostasee	NaN	...	87	138	103	2791	87	NaN	NaN	NaN	0	2019

2 rows × 32 columns

In [21]:

#roster_id='bab64d15-4691-4561-a6bf-7284f3bd85f9'
import requests
#roster_json = requests.get( '{}&maxdepth=2'.format(wrcapi.format(roster_id),) ).json()
#roster_json   

#TO CHECK - is the sas-entryid the entryid we use elsewhere?

#This comes from event metadata
def _getRoster(roster_id):
    roster_json = requests.get(wrcapi.format(roster_id) ).json()
    roster=json_normalize(roster_json)
    
    aa=json_normalize(roster_json, record_path='_dchildren')
    zz=json_normalize(roster_json['_dchildren'],record_path=['_meta'], meta='_id').pivot('_id', 'n','v').reset_index()
    zz=pd.merge(zz,aa[['_id','name','type']], on='_id')[['fiasn','filename','sas-entryid','name']]
    zz.columns = ['fiasn','code','sas-entryid','roster_num']
    #defensive?
    zz = zz.dropna(subset=['sas-entryid'])
    return zz

def getRoster(meta):
    em = getEventMetadata()
    roster_id= em[em['sas-rallyid']==meta['rallyId']]['rosterid'].iloc[0]
    return _getRoster(roster_id)

In [22]:

def getItinerary(meta):
    ''' Get event itinerary. Also updates the stages metadata. '''
    itinerary_json=requests.get( url_base.format(stub=stubs['itinerary'].format(**meta) ) ).json()
    itinerary_event = json_normalize(itinerary_json).drop('itineraryLegs', axis=1)
    
    #meta='eventId' for eventId
    itinerary_legs = json_normalize(itinerary_json, 
                                    record_path='itineraryLegs').drop('itinerarySections', axis=1)
    #meta='eventId' for eventId
    itinerary_sections = json_normalize(itinerary_json,
                                        ['itineraryLegs', 'itinerarySections']).drop(['stages','controls'],axis=1)

    itinerary_stages=json_normalize(itinerary_json['itineraryLegs'],
                                    ['itinerarySections','stages'],
                                   meta=['itineraryLegId',['itinerarySections','itinerarySectionId']])
    meta['stages']=itinerary_stages['stageId'].tolist()
    #Should do this a pandas idiomatic way
    #meta['_stages']=zip(itinerary_stages['stageId'].tolist(),
     #                   itinerary_stages['code'].tolist(),
     #                   itinerary_stages['status'].tolist())
    meta['_stages'] = itinerary_stages[['stageId','code','status']].set_index('code').to_dict(orient='index')
    itinerary_controls=json_normalize(itinerary_json['itineraryLegs'], 
                                  ['itinerarySections','controls'] ,
                                     meta=['itineraryLegId',['itinerarySections','itinerarySectionId']])
    itinerary_controls['stageId'] = itinerary_controls['stageId'].fillna(-1).astype(int)
    
    return itinerary_event, itinerary_legs, itinerary_sections, itinerary_stages, itinerary_controls

In [23]:

itinerary_json=requests.get( url_base.format(stub=stubs['itinerary'].format(**meta) ) ).json()
itinerary_json

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-23-5bf65d1e1350> in <module>
----> 1 itinerary_json=requests.get( url_base.format(stub=stubs['itinerary'].format(**meta) ) ).json()
      2 itinerary_json

KeyError: 'SASEVENTID'

In [24]:

'''
{'rallyId': '30',
 'stages': [],
 'championshipId': None,
 'rally_name': 'montecarlo'}
 '''

#https://www.wrc.com/service/sasCacheApi.php?route=events%2F78%2Frallies%2F94%2Fitinerary
meta

Out[24]:

{'rallyId': '95', 'stages': [], 'championshipId': None, 'rally_name': 'sweden'}

In [25]:

getItinerary(meta)

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-25-1ad86eeba532> in <module>
----> 1 getItinerary(meta)

<ipython-input-22-a0ffd3686dd0> in getItinerary(meta)
      1 def getItinerary(meta):
      2     ''' Get event itinerary. Also updates the stages metadata. '''
----> 3     itinerary_json=requests.get( url_base.format(stub=stubs['itinerary'].format(**meta) ) ).json()
      4     itinerary_event = json_normalize(itinerary_json).drop('itineraryLegs', axis=1)
      5 

KeyError: 'SASEVENTID'

In [26]:

#a,b,c,d,e = getItinerary(meta)

In [27]:

def _get_single_json_table(meta, stub):
    _json = requests.get( url_base.format(stub=stubs[stub].format(**meta) ) ).json()
    return json_normalize(_json)

def _get_single_json_table_root(meta, stub):
    _json = requests.get( url_root.format(stub=stubs[stub].format(**meta) ) ).json()
    return json_normalize(_json)

In [28]:

#meta =  set_rallyId(name, year)

#startlists_json=requests.get( url_base.format(stub=stubs['startlists'].format(**meta) ) ).json()
#ff=[]
#for f in startlists_json:
#    if f['manufacturer']['logoFilename'] is None:
#        f['manufacturer']['logoFilename']=''
#    if f['entrant']['logoFilename'] is None:
#        f['entrant']['logoFilename']='' 
#    ff.append(f)
#ff

In [29]:

#startlists = json_normalize(ff).drop('eventClasses', axis=1)

In [30]:

def get_startlists(meta):
    startlists_json=requests.get( url_base.format(stub=stubs['startlists'].format(**meta) ) ).json()
    ff=[]
    for f in startlists_json:
        if f['manufacturer']['logoFilename'] is None:
            f['manufacturer']['logoFilename']=''
        if f['entrant']['logoFilename'] is None:
            f['entrant']['logoFilename']='' 
        ff.append(f)
    startlists = json_normalize(ff).drop('eventClasses', axis=1)
    startlist_classes = json_normalize(ff,['eventClasses'], 'entryId' )
    #startlists = json_normalize(startlists_json).drop('eventClasses', axis=1)
    #startlist_classes = json_normalize(startlists_json,['eventClasses'], 'entryId' )
    
    return startlists, startlist_classes 

In [31]:

def get_penalties(meta):
    ''' Get the list of penalties for a specified event. '''
    penalties = _get_single_json_table(meta, 'penalties')
    return penalties

In [32]:

def get_retirements(meta):
    ''' Get the list of retirements for a specified event. '''
    retirements = _get_single_json_table(meta, 'retirements')
    return retirements

In [33]:

def get_stagewinners(meta):
    ''' Get the stage winners table for a specified event. '''
    stagewinners = _get_single_json_table(meta, 'stagewinners')
    return stagewinners

In [34]:

def _single_stage(meta2, stub, stageId):
    ''' For a single stageId, get the requested resource. '''
    meta2['stageId']=stageId
    _json=requests.get( url_base.format(stub=stubs[stub].format(**meta2) ) ).json()
    _df = json_normalize(_json)
    _df['stageId'] = stageId
    return _df

def _stage_iterator(meta, stub, stage=None):
    ''' Iterate through a list of stageId values and get requested resource. '''
    meta2={'rallyId':meta['rallyId']}
    df = pd.DataFrame()
    #If stage is None get data for all stages
    if stage is not None:
        stages=[]
        #If we have a single stage (specified in form SS4) get it
        if isinstance(stage,str) and stage in meta['_stages']:
            stages.append(meta['_stages'][stage]['stageId'])
        #If we have a list of stages (in form ['SS4','SS5']) get them all
        elif isinstance(stage, list):
            for _stage in stage:
                if isinstance(_stage,str) and _stage in meta['_stages']:
                    stages.append(meta['_stages'][_stage]['stageId'])
                elif _stage in meta['stages']:
                    stages.append(_stage)
    else:
        stages = meta['stages']
        
    #Get data for required stages
    for stageId in stages:
        #meta2['stageId']=stageId
        #_json=requests.get( url_base.format(stub=stubs[stub].format(**meta2) ) ).json()
        #_df = json_normalize(_json)
        #_df['stageId'] = stageId
        _df = _single_stage(meta2, stub, stageId)
        df = pd.concat([df, _df], sort=False)
    return df.reset_index(drop=True)

def _stage_iterator(meta, stub): ''' Iterate through a list of stageId values and get requested resource. ''' meta2={'rallyId':meta['rallyId']} df = pd.DataFrame() for stageId in meta['stages']: meta2['stageId']=stageId _json=requests.get( url_base.format(stub=stubs[stub].format(**meta2) ) ).json() _df = json_normalize(_json) _df['stageId'] = stageId df = pd.concat([df, _df], sort=False) return df

In [35]:

def get_overall(meta, stage=None):
    ''' Get the overall results table for all stages on an event or a specified stage. '''
    stage_overall = _stage_iterator(meta, 'overall', stage)
    return stage_overall

In [36]:

#get_overall(meta)

In [37]:

def get_splitTimes(meta, stage=None):
    ''' Get split times table for all stages on an event or a specified stage. '''
    split_times = _stage_iterator(meta, 'split_times', stage)
    return split_times

In [38]:

#get_splitTimes(meta)

In [39]:

def get_stage_times_stage(meta, stage=None):
    ''' Get stage times table for all stages on an event or a specified stage. '''
    stage_times_stage = _stage_iterator(meta, 'stage_times_stage', stage)
    return stage_times_stage

In [40]:

def get_stage_times_overall(meta,stage=None):
    ''' Get overall stage times table for all stages on an event or a specified stage. '''
    stage_times_overall = _stage_iterator(meta, 'stage_times_overall', stage)
    return stage_times_overall

In [41]:

#There must be a JSON/API way of getting this rather than having to fish for it
def _get_championship_codesOLD(url=None):
    if url is None:
        url = 'http://www.wrc.com/en/wrc/results/championship-standings/page/4176----.html'
    html2=requests.get(url).text
    m = re.search("var championshipClasses = (.*?);", html2, re.DOTALL)
    mm=m.group(1).replace('\n','').replace("'",'"')
    #Hack for null table
    for v in ['jwrcDriver','jwrcCoDriver','wrcDriver','wrcCoDriver','wrcManufacturers','wrc2ProDriver',
              'wrc2ProCoDriver','wrc2ProManufacturers','wrc2Driver','wrc2CoDriver']:
        mm = mm.replace(v,'[]')
    d=json.loads(mm)
    #https://stackoverflow.com/a/35758583/454773
    championshipClasses={k.replace(' ', ''): v for k, v in d.items()}
    return championshipClasses

In [42]:

#         'championship':'seasons/4/championships/{championshipId}',
#         'championship_results':'seasons/4/championships/{championshipId}/results',
_get_championship_codesOLD(season)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-42-9ea1319f62dc> in <module>
      1 #         'championship':'seasons/4/championships/{championshipId}',
      2 #         'championship_results':'seasons/4/championships/{championshipId}/results',
----> 3 _get_championship_codesOLD(season)

NameError: name 'season' is not defined

In [43]:

# TO DO
def get_championship_rounds():
    pass
    #https://www.wrc.com/service/sasCacheApi.php?route=seasons%2F4%2Fchampionships%2F24

In [44]:

def get_seasons():
    ''' Get season info. '''
    return requests.get(url_root.format(stub=stubs['seasons'] )).json()
#https://www.wrc.com/service/sasCacheApi.php?route=seasons/
get_seasons()

Out[44]:

[{'seasonId': 1, 'name': 'World Rally Championship', 'year': 2018},
 {'seasonId': 4, 'name': 'World Rally Championship', 'year': 2019}]

In [45]:

def getSeasonDetails(seasonId):
    return requests.get(url_root.format(stub=stubs['seasonDetails'].format(seasonId=seasonId) )).json()
getSeasonDetails(4)

Out[45]:

{'championships': [{'championshipId': 24,
   'seasonId': 4,
   'name': 'FIA World Rally Championship for Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 25,
   'seasonId': 4,
   'name': 'FIA World Rally Championship for Co-Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 26,
   'seasonId': 4,
   'name': 'FIA World Rally Championship for Manufacturers',
   'type': 'Manufacturer',
   'fieldOneDescription': 'Name',
   'fieldTwoDescription': 'Manufacturer',
   'fieldThreeDescription': 'LogoFileName',
   'fieldFourDescription': '',
   'fieldFiveDescription': ''},
  {'championshipId': 27,
   'seasonId': 4,
   'name': 'FIA WRC 2 Pro Championship for Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 28,
   'seasonId': 4,
   'name': 'FIA WRC 2 Pro Championship for Co-Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 29,
   'seasonId': 4,
   'name': 'FIA WRC 2 Pro Championship for Manufacturers',
   'type': 'Manufacturer',
   'fieldOneDescription': 'Name',
   'fieldTwoDescription': 'Manufacturer',
   'fieldThreeDescription': 'LogoFileName',
   'fieldFourDescription': '',
   'fieldFiveDescription': ''},
  {'championshipId': 30,
   'seasonId': 4,
   'name': ' FIA WRC 2 Championship for Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 31,
   'seasonId': 4,
   'name': ' FIA WRC 2 Championship for Co-Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 32,
   'seasonId': 4,
   'name': 'FIA Junior WRC Championship for Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 33,
   'seasonId': 4,
   'name': 'FIA Junior WRC Championship for Co-Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 34,
   'seasonId': 4,
   'name': 'FIA Junior WRC Trophy for Nations',
   'type': 'Nation',
   'fieldOneDescription': 'Name',
   'fieldTwoDescription': 'ISO2',
   'fieldThreeDescription': 'ISO3',
   'fieldFourDescription': '',
   'fieldFiveDescription': ''},
  {'championshipId': 35,
   'seasonId': 4,
   'name': 'FIA RGT Cup for Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'},
  {'championshipId': 36,
   'seasonId': 4,
   'name': 'FIA RGT Cup for Co-Drivers',
   'type': 'Person',
   'fieldOneDescription': 'FirstName',
   'fieldTwoDescription': 'LastName',
   'fieldThreeDescription': 'CountryISO3',
   'fieldFourDescription': 'Manufacturer',
   'fieldFiveDescription': 'TyreManufacturer'}],
 'seasonRounds': [{'seasonId': 4,
   'eventId': 78,
   'event': {'eventId': 78,
    'countryId': 147,
    'country': {'countryId': 147,
     'name': 'Monaco',
     'iso2': 'MC',
     'iso3': 'MCO'},
    'name': '87e Rallye Automobile Monte-Carlo',
    'slug': '87e-rallye-automobile-monte-carlo-2019',
    'location': 'Monte-Carlo',
    'startDate': '2019-01-24',
    'finishDate': '2019-01-27',
    'timeZoneId': 'Romance Standard Time',
    'timeZoneName': 'Central European Time (+01:00)',
    'timeZoneOffset': 60,
    'surfaces': 'Tarmac & Ice',
    'organiserUrl': 'http://acm.mc/edition/rallye-monte-carlo-edition-2019/',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2782,
    'clerkOfTheCourse': 'Alain Pallanca',
    'stewards': 'Waltraud Wünsch                                Eric Barrabino                                Zrinko Gregurek',
    'templateFilename': 'results-report-templates/e2a3be60-c4d4-4c97-9671-8cb701226d6e.dotx'},
   'order': 1},
  {'seasonId': 4,
   'eventId': 79,
   'event': {'eventId': 79,
    'countryId': 215,
    'country': {'countryId': 215,
     'name': 'Sweden',
     'iso2': 'SE',
     'iso3': 'SWE'},
    'name': 'Rally Sweden',
    'slug': 'rally-sweden-2019',
    'location': 'Torsby',
    'startDate': '2019-02-14',
    'finishDate': '2019-02-17',
    'timeZoneId': 'W. Europe Standard Time',
    'timeZoneName': 'Central European Time (+01:00)',
    'timeZoneOffset': 60,
    'surfaces': 'Snow',
    'organiserUrl': 'https://rallysweden.com/en/',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2783,
    'clerkOfTheCourse': 'Stig Rune Kjernsli',
    'stewards': 'Waltraud Wünsch                                Andrew Kellitt                                Bo Swaner',
    'templateFilename': 'results-report-templates/de6b7684-b04f-4484-be60-6621dd305c0c.dotx'},
   'order': 2},
  {'seasonId': 4,
   'eventId': 80,
   'event': {'eventId': 80,
    'countryId': 144,
    'country': {'countryId': 144,
     'name': 'Mexico',
     'iso2': 'MX',
     'iso3': 'MEX'},
    'name': 'Rally Guanajuato Mexico',
    'slug': 'rally-mexico-2019',
    'location': 'Leon',
    'startDate': '2019-03-07',
    'finishDate': '2019-03-10',
    'timeZoneId': 'Central America Standard Time',
    'timeZoneName': 'Central Time (-06:00)',
    'timeZoneOffset': -360,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2784,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 3},
  {'seasonId': 4,
   'eventId': 81,
   'event': {'eventId': 81,
    'countryId': 76,
    'country': {'countryId': 76,
     'name': 'France',
     'iso2': 'FR',
     'iso3': 'FRA'},
    'name': 'Tour de Corse',
    'slug': 'tour-de-corse-2019',
    'location': 'Bastia',
    'startDate': '2019-03-28',
    'finishDate': '2019-03-31',
    'timeZoneId': 'Romance Standard Time',
    'timeZoneName': 'Central European Time (+01:00)',
    'timeZoneOffset': 60,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2785,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 4},
  {'seasonId': 4,
   'eventId': 82,
   'event': {'eventId': 82,
    'countryId': 11,
    'country': {'countryId': 11,
     'name': 'Argentina',
     'iso2': 'AR',
     'iso3': 'ARG'},
    'name': 'Rally Argentina',
    'slug': 'rally-argentina-2019',
    'location': 'Villa Carlos Paz',
    'startDate': '2019-04-25',
    'finishDate': '2019-04-28',
    'timeZoneId': 'Argentina Standard Time',
    'timeZoneName': 'Argentina Time (-03:00)',
    'timeZoneOffset': -180,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2786,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 5},
  {'seasonId': 4,
   'eventId': 83,
   'event': {'eventId': 83,
    'countryId': 45,
    'country': {'countryId': 45, 'name': 'Chile', 'iso2': 'CL', 'iso3': 'CHL'},
    'name': 'Rally Chile',
    'slug': 'rally-chile-2019',
    'location': 'Concepcion',
    'startDate': '2019-05-09',
    'finishDate': '2019-05-12',
    'timeZoneId': 'Pacific SA Standard Time',
    'timeZoneName': 'Chile Time (-03:00)',
    'timeZoneOffset': -180,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2787,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 6},
  {'seasonId': 4,
   'eventId': 84,
   'event': {'eventId': 84,
    'countryId': 178,
    'country': {'countryId': 178,
     'name': 'Portugal',
     'iso2': 'PT',
     'iso3': 'PRT'},
    'name': 'Rally de Portugal',
    'slug': 'rally-de-portugal-2019',
    'location': 'Matosinhos',
    'startDate': '2019-05-30',
    'finishDate': '2019-06-02',
    'timeZoneId': 'GMT Standard Time',
    'timeZoneName': 'United Kingdom Time (+00:00)',
    'timeZoneOffset': 0,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2788,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 7},
  {'seasonId': 4,
   'eventId': 85,
   'event': {'eventId': 85,
    'countryId': 110,
    'country': {'countryId': 110,
     'name': 'Italy',
     'iso2': 'IT',
     'iso3': 'ITA'},
    'name': 'Rally Italia Sardegna',
    'slug': 'rally-italia-sardegna-2019',
    'location': 'Alghero',
    'startDate': '2019-06-13',
    'finishDate': '2019-06-16',
    'timeZoneId': 'W. Europe Standard Time',
    'timeZoneName': 'Central European Time (+01:00)',
    'timeZoneOffset': 60,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2789,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 8},
  {'seasonId': 4,
   'eventId': 86,
   'event': {'eventId': 86,
    'countryId': 75,
    'country': {'countryId': 75,
     'name': 'Finland',
     'iso2': 'FI',
     'iso3': 'FIN'},
    'name': 'Rally Finland',
    'slug': 'rally-finland-2019',
    'location': 'Jyvaskyla',
    'startDate': '2019-08-01',
    'finishDate': '2019-08-04',
    'timeZoneId': 'FLE Standard Time',
    'timeZoneName': 'Eastern European Time (+02:00)',
    'timeZoneOffset': 120,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2790,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 9},
  {'seasonId': 4,
   'eventId': 87,
   'event': {'eventId': 87,
    'countryId': 83,
    'country': {'countryId': 83,
     'name': 'Germany',
     'iso2': 'DE',
     'iso3': 'DEU'},
    'name': 'ADAC Rallye Deutschland',
    'slug': 'adac-rallye-deutschland-2019',
    'location': 'Bostasee',
    'startDate': '2019-08-22',
    'finishDate': '2019-08-25',
    'timeZoneId': 'W. Europe Standard Time',
    'timeZoneName': 'Central European Time (+01:00)',
    'timeZoneOffset': 60,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2791,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 10},
  {'seasonId': 4,
   'eventId': 88,
   'event': {'eventId': 88,
    'countryId': 228,
    'country': {'countryId': 228,
     'name': 'Turkey',
     'iso2': 'TR',
     'iso3': 'TUR'},
    'name': 'Marmaris Rally of Turkey',
    'slug': 'marmaris-rally-turkey-2019',
    'location': 'Marmaris',
    'startDate': '2019-09-12',
    'finishDate': '2019-09-15',
    'timeZoneId': 'Turkey Standard Time',
    'timeZoneName': 'Turkey Time (+03:00)',
    'timeZoneOffset': 180,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2792,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 11},
  {'seasonId': 4,
   'eventId': 89,
   'event': {'eventId': 89,
    'countryId': 235,
    'country': {'countryId': 235,
     'name': 'United Kingdom of Great Britain and Northern Ireland',
     'iso2': 'GB',
     'iso3': 'GBR'},
    'name': 'Wales Rally GB',
    'slug': 'wales-rally-gb-2019',
    'location': 'Deeside',
    'startDate': '2019-10-03',
    'finishDate': '2019-10-06',
    'timeZoneId': 'GMT Standard Time',
    'timeZoneName': 'United Kingdom Time (+00:00)',
    'timeZoneOffset': 0,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2793,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 12},
  {'seasonId': 4,
   'eventId': 90,
   'event': {'eventId': 90,
    'countryId': 209,
    'country': {'countryId': 209,
     'name': 'Spain',
     'iso2': 'ES',
     'iso3': 'ESP'},
    'name': 'RACC Rally Catalunya de Espana',
    'slug': 'rally-de-espana-2019',
    'location': 'Salou',
    'startDate': '2019-10-24',
    'finishDate': '2019-10-27',
    'timeZoneId': 'Romance Standard Time',
    'timeZoneName': 'Central European Time (+01:00)',
    'timeZoneOffset': 60,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2794,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 13},
  {'seasonId': 4,
   'eventId': 91,
   'event': {'eventId': 91,
    'countryId': 14,
    'country': {'countryId': 14,
     'name': 'Australia',
     'iso2': 'AU',
     'iso3': 'AUS'},
    'name': 'Rally Australia',
    'slug': 'rally-australia-2019',
    'location': 'Coffs Harbous',
    'startDate': '2019-11-14',
    'finishDate': '2019-11-17',
    'timeZoneId': 'AUS Eastern Standard Time',
    'timeZoneName': 'Eastern Australia Time (+11:00)',
    'timeZoneOffset': 660,
    'surfaces': '',
    'organiserUrl': '',
    'categories': None,
    'mode': 'Rally',
    'trackingEventId': 2795,
    'clerkOfTheCourse': None,
    'stewards': None,
    'templateFilename': None},
   'order': 14}],
 'seasonId': 4,
 'name': 'World Rally Championship',
 'year': 2019}

In [46]:

# TO DO set seasonId properly
def championship_tables(champ_class=None, champ_typ=None, seasonId=4):
    ''' Get all championship tables in a particular championship and / or class. '''
    #if championship is None then get all
    championship_lookup = pd.DataFrame()
    championship_entries_all = {}
    championship_rounds = pd.DataFrame()
    championship_events = pd.DataFrame()
    championship_results = pd.DataFrame()
    
    championships = getSeasonDetails(seasonId)['championships']
    
    for championship in championships:
        champ_num = championship['championshipId']
        #TO DO - are we setting the champType correctly?
        # championship['type'] returns as Person or Manufacturer
        champType = championship['name'].split()[-1]#championship['type']
        if champType not in championship_entries_all:
            championship_entries_all[champType] = pd.DataFrame()
            
        meta2={'championshipId': champ_num}
        championship_url = url_root.format(stub=stubs['championship'].format(**meta2) )
        championship_json=requests.get( championship_url ).json()
        if championship_json:
            _championship_lookup = json_normalize(championship_json).drop(['championshipEntries','championshipRounds'], axis=1)
            _championship_lookup['_codeClass'] = championship['name']
            _championship_lookup['_codeTyp'] = championship['type']
            championship_lookup = pd.concat([championship_lookup,_championship_lookup],sort=True)
    
            championships={}
            championship_dict = _championship_lookup.to_dict()
            championships[champ_num] = {c:championship_dict[c][0] for c in championship_dict}
            renamer={c.replace('Description',''):championships[champ_num][c] for c in championships[champ_num] if c.startswith('field')}            
            _championship_entries = json_normalize(championship_json,['championshipEntries'] )
            _championship_entries = _championship_entries.rename(columns=renamer)
            _championship_entries = _championship_entries[[c for c in _championship_entries.columns if c!='']]
            #pd.concat sort=False to retain current behaviour
            
            championship_entries_all[champType] = pd.concat([championship_entries_all[champType],_championship_entries],sort=False)

            _championship_rounds = json_normalize(championship_json,['championshipRounds'] ).drop('event', axis=1)
            championship_rounds = pd.concat([championship_rounds,_championship_rounds],sort=False).drop_duplicates()

            _events_json = json_normalize(championship_json,['championshipRounds' ])['event']
            _championship_events = json_normalize(_events_json)
            #Below also available as eg https://www.wrc.com/service/sasCacheApi.php?route=seasons/4/championships/24
            championship_events = pd.concat([championship_events,_championship_events],sort=False).drop_duplicates()

            _championship_results =  _get_single_json_table_root(meta2, 'championship_results')
            championship_results = pd.concat([championship_results, _championship_results],sort=False)
    
    for k in championship_entries_all:
        championship_entries_all[k].reset_index(drop=True)
        if k in ['Driver', 'Co-Driver']:
            championship_entries_all[k] = championship_entries_all[k].rename(columns={'TyreManufacturer':'ManufacturerTyre'})
    
    return championship_lookup.reset_index(drop=True), \
            championship_results.reset_index(drop=True), \
            championship_entries_all, \
            championship_rounds.reset_index(drop=True), \
            championship_events.reset_index(drop=True)

#championship_tables()

def championship_tablesOLD(champ_class=None, champ_typ=None): ''' Get all championship tables in a particular championship and / or class. ''' #if championship is None then get all championship_lookup = pd.DataFrame() championship_entries_all = {} championship_rounds = pd.DataFrame() championship_events = pd.DataFrame() championship_results = pd.DataFrame() championship_codes = _get_championship_codes() _class_codes = championship_codes.keys() if champ_class is None else [champ_class] for champClass in _class_codes: _champ_typ = championship_codes[champClass].keys() if champ_typ is None else [champ_typ] for champType in _champ_typ: if champType not in championship_entries_all: championship_entries_all[champType] = pd.DataFrame() champ_num = championship_codes[champClass][champType] meta2={'championshipId': champ_num} championship_url = url_base.format(stub=stubs['championship'].format(**meta2) ) print(championship_url) championship_json=requests.get( championship_url ).json() if championship_json: _championship_lookup = json_normalize(championship_json).drop(['championshipEntries','championshipRounds'], axis=1) _championship_lookup['_codeClass'] = champClass _championship_lookup['_codeTyp'] = champType championship_lookup = pd.concat([championship_lookup,_championship_lookup],sort=True) championships={} championship_dict = _championship_lookup.to_dict() championships[champ_num] = {c:championship_dict[c][0] for c in championship_dict} renamer={c.replace('Description',''):championships[champ_num][c] for c in championships[champ_num] if c.startswith('field')} _championship_entries = json_normalize(championship_json,['championshipEntries'] ) _championship_entries = _championship_entries.rename(columns=renamer) _championship_entries = _championship_entries[[c for c in _championship_entries.columns if c!='']] #pd.concat sort=False to retain current behaviour championship_entries_all[champType] = pd.concat([championship_entries_all[champType],_championship_entries],sort=False) _championship_rounds = json_normalize(championship_json,['championshipRounds'] ).drop('event', axis=1) championship_rounds = pd.concat([championship_rounds,_championship_rounds],sort=False).drop_duplicates() _events_json = json_normalize(championship_json,['championshipRounds' ])['event'] _championship_events = json_normalize(_events_json) #TO DO: Season id -> https://www.wrc.com/service/sasCacheApi.php?route=seasons/ # TO DO: list of championships: eg https://www.wrc.com/service/sasCacheApi.php?route=seasons/4 #Below also available as eg https://www.wrc.com/service/sasCacheApi.php?route=seasons/4/championships/24 championship_events = pd.concat([championship_events,_championship_events],sort=False).drop_duplicates() _championship_results = _get_single_json_table(meta2, 'championship_results') championship_results = pd.concat([championship_results, _championship_results],sort=False) for k in championship_entries_all: championship_entries_all[k].reset_index(drop=True) if k in ['Driver', 'Co-Driver']: championship_entries_all[k] = championship_entries_all[k].rename(columns={'TyreManufacturer':'ManufacturerTyre'}) return championship_lookup.reset_index(drop=True), \ championship_results.reset_index(drop=True), \ championship_entries_all, \ championship_rounds.reset_index(drop=True), \ championship_events.reset_index(drop=True)

Usage¶

In [47]:

#listRallies2()

In [48]:

def cleardbtable(conn, table):
    ''' Clear the table whilst retaining the table definition '''
    c = conn.cursor()
    c.execute('DELETE FROM "{}"'.format(table))
    
def dbfy(conn, df, table, if_exists='upsert', index=False, clear=False, **kwargs):
    ''' Save a dataframe as a SQLite table.
        Clearing or replacing a table will first empty the table of entries but retain the structure. '''
    if if_exists=='replace':
        clear=True
        if_exists='append'
    if clear: cleardbtable(conn, table)
    if if_exists=='upsert':
        DB[table].upsert_all(df.to_dict(orient='records'))
    else:
        df.to_sql(table,conn,if_exists=if_exists,index=index)

In [49]:

def save_rally(meta, conn, stage=None):
    ''' Save all tables associated with a particular rally. '''
    
    if stage is None:
        print('Getting base info...')
        roster = getRoster(meta)
        dbfy(conn, roster, 'roster', if_exists='replace')

        itinerary_event, itinerary_legs, itinerary_sections, \
        itinerary_stages, itinerary_controls = getItinerary(meta)

        dbfy(conn, itinerary_event, 'itinerary_event', if_exists='replace')
        dbfy(conn, itinerary_legs, 'itinerary_legs', if_exists='replace')
        dbfy(conn, itinerary_sections, 'itinerary_sections', if_exists='replace')
        dbfy(conn, itinerary_stages, 'itinerary_stages', if_exists='replace')
        dbfy(conn, itinerary_controls, 'itinerary_controls', if_exists='replace')

        startlists, startlist_classes = get_startlists(meta)
        dbfy(conn, startlists, 'startlists', if_exists='replace')
        dbfy(conn, startlist_classes, 'startlist_classes', if_exists='replace')

    #These need to be upserted
    print('Getting penalties...')
    penalties = get_penalties(meta)
    dbfy(conn, penalties, 'penalties')

    print('Getting retirements...')
    retirements = get_retirements(meta)
    dbfy(conn, retirements, 'retirements')

    print('Getting stagewinners...')
    stagewinners = get_stagewinners(meta)
    dbfy(conn, stagewinners, 'stagewinners')

    print('Getting stage_overall...')
    stage_overall = get_overall(meta, stage)
    dbfy(conn, stage_overall, 'stage_overall')

    print('Getting split_times...')
    split_times = get_splitTimes(meta, stage)
    dbfy(conn, split_times, 'split_times')
    
    print('Getting stage_times_stage...')
    stage_times_stage = get_stage_times_stage(meta, stage)
    dbfy(conn, stage_times_stage, 'stage_times_stage')
    
    print('Getting stage_times_overall...')
    stage_times_overall = get_stage_times_overall(meta, stage)
    dbfy(conn, stage_times_overall, 'stage_times_overall')

    
def save_championship(meta, conn):
    ''' Save all championship tables for a particular year. '''
    championship_lookup, championship_results, _championship_entries_all, \
        championship_rounds, championship_events = championship_tables()
        
    championship_entries_drivers = _championship_entries_all['Drivers']
    championship_entries_codrivers = _championship_entries_all['Co-Drivers']
    championship_entries_manufacturers = _championship_entries_all['Manufacturers']
    #championship_entries_nations = _championship_entries_all['Nations']
    
    dbfy(conn, championship_lookup, 'championship_lookup', if_exists='replace')
    dbfy(conn, championship_results, 'championship_results', if_exists='replace')
    dbfy(conn, championship_entries_drivers, 'championship_entries_drivers',if_exists='replace')
    dbfy(conn, championship_entries_codrivers, 'championship_entries_codrivers', if_exists='replace')
    dbfy(conn, championship_entries_manufacturers, 'championship_entries_manufacturers', if_exists='replace')
    dbfy(conn, championship_rounds, 'championship_rounds', if_exists='replace')
    dbfy(conn, championship_events, 'championship_events', if_exists='replace')

def get_one(rally, stage, dbname='wrc19_test1.db', year=YEAR):
    conn = sqlite3.connect(dbname)
    meta =  set_rallyId2(rally, year)
    getItinerary(meta) #to update meta
    print(meta)
    save_rally(meta, conn, stage)
    
def get_all(rally, dbname='wrc19_test1.db', year=YEAR):
    
    conn = sqlite3.connect(dbname)
    
    meta =  set_rallyId2(rally, year)
    
    save_rally(meta, conn)
    save_championship(meta, conn)
    
def get_championship(rally, dbname='wrc19_test1.db', year=YEAR):
    
    conn = sqlite3.connect(dbname)
    
    meta =  set_rallyId2(rally, year)

    save_championship(meta, conn)

In [50]:

championship_lookup, championship_results, _championship_entries_all, \
        championship_rounds, championship_events = championship_tables()

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-50-4999333c2920> in <module>
      1 championship_lookup, championship_results, _championship_entries_all, \
----> 2         championship_rounds, championship_events = championship_tables()

<ipython-input-46-8911176c6630> in championship_tables(champ_class, champ_typ, seasonId)
     47             championship_events = pd.concat([championship_events,_championship_events],sort=False).drop_duplicates()
     48 
---> 49             _championship_results = _get_single_json_table(meta2, 'championship_results')
     50             championship_results = pd.concat([championship_results, _championship_results],sort=False)
     51 

<ipython-input-27-857ad9e5133f> in _get_single_json_table(meta, stub)
      1 def _get_single_json_table(meta, stub):
----> 2     _json = requests.get( url_base.format(stub=stubs[stub].format(**meta) ) ).json()
      3     return json_normalize(_json)

KeyError: 'SASEVENTID'

In [51]:

_championship_entries_all['Manufacturers']

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-51-3991f89ad093> in <module>
----> 1 _championship_entries_all['Manufacturers']

NameError: name '_championship_entries_all' is not defined

In [52]:

#listRallies()
listRallies2()

Out[52]:

{'sweden': '95', 'montecarlo': '94'}

In [53]:

#dbname='wrc18.db'
#year = 2019
#name = 'sweden'
#dbname='sweden.db'

In [54]:

meta =  set_rallyId2(name, year)
meta

Out[54]:

{'rallyId': '95', 'stages': [], 'championshipId': None, 'rally_name': 'sweden'}

In [55]:

import os

#For some reason, these don't seem to get set / picked up correctly from a notebook?
os.environ["WRC_RESULTS_NAME"] = name
os.environ["WRC_RESULTS_DBNAME"] = dbname
os.environ["WRC_RESULTS_YEAR"] = str(year)

_meta=set_rallyId2(name, year) _,_,_,_s,_ = getItinerary(_meta) _meta

In [56]:

#championship_tables()

In [57]:

#set_rallyId2('uk', 2018)

In [58]:

#rr=get_retirements(meta)
#rr.head()

In [59]:

# TO DO - ability to top up just the stage we need

In [60]:

#set_rallyId("Finland",2018)
#set_rallyId2("australia",2018)

In [61]:

getEventMetadata().columns

Out[61]:

Index(['_id', 'availability', 'date-finish', 'date-start', 'gallery',
       'hasdata', 'hasvideos', 'id', 'info-based', 'info-categories',
       'info-date', 'info-flag', 'info-surface', 'info-website', 'kmlfile',
       'logo', 'name', 'org-website', 'poi-Klo im Wald', 'poilistid',
       'position', 'rosterid', 'sas-eventid', 'sas-itineraryid', 'sas-rallyid',
       'sas-trackingid', 'sitid', 'testid', 'thumbnail', 'time-zone',
       'tzoffset', 'year'],
      dtype='object', name='n')

In [62]:

#full run
#dbname = 'wrc18.db'

In [63]:

#from sqlite_utils import Database
#DB = Database(sqlite3.connect(dbname))

In [81]:

#new db
!mv $dbname old-$dbname
!rm $dbname
conn = sqlite3.connect(dbname, timeout=10)
c = conn.cursor()

DB = Database(sqlite3.connect(dbname))

c.executescript(setup_q)
c.executescript(setup_views_q)
q="SELECT name FROM sqlite_master WHERE type = 'table';"

#The upsert breaks with the - and space chars in column names
dbfy(conn, getEventMetadata(), 'event_metadata', if_exists='replace')

url_base='http://www.wrc.com/service/sasCacheApi.php?route=events/{SASEVENTID}/{{stub}}'
url_base = url_base.format(SASEVENTID=getEventID(year)[name])


pd.read_sql(q,conn)

rm: sweden19.db: No such file or directory

Out[81]:

	name
0	itinerary_event
1	itinerary_legs
2	itinerary_sections
3	itinerary_stages
4	itinerary_controls
5	startlists
6	roster
7	startlist_classes
8	penalties
9	retirements
10	stagewinners
11	stage_overall
12	split_times
13	stage_times_stage
14	stage_times_overall
15	championship_lookup
16	championship_results
17	championship_entries_codrivers
18	championship_entries_manufacturers
19	championship_rounds
20	championship_events
21	championship_entries_drivers
22	event_metadata

In [65]:

dbname

Out[65]:

'sweden19.db'

In [66]:

getEventMetadata()

Out[66]:

n	_id	availability	date-finish	date-start	gallery	hasdata	hasvideos	id	info-based	info-categories	...	sas-eventid	sas-itineraryid	sas-rallyid	sas-trackingid	sitid	testid	thumbnail	time-zone	tzoffset	year
0	029e41e3-afff-44ad-950f-9af3dccf06be	now	2015-08-02	2015-07-30	NaN	NaN	true	neste-oil-rally-finland	Jyväskylä	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	143	NaN	NaN	Helsinki	NaN	2015
1	03bea1af-87f3-43d2-b499-0d9095440ee5	NaN	2019-08-25	2019-08-22	NaN	NaN	NaN	NaN	Bostasee	NaN	...	87	138	103	2791	87	NaN	NaN	NaN	0	2019
2	03d002d1-82d8-499c-8252-bdc36685ffd3	now	2015-10-25	2015-10-22	NaN	NaN	true	rallyracc-rally-de-espana	Salou	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	151	NaN	NaN	Madrid	NaN	2015
3	0919ef82-a65f-4115-8372-1364be04a219	now	2015-10-04	2015-10-02	NaN	NaN	true	rallye-de-france-alsace	Corte	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	149	NaN	NaN	Paris	NaN	2015
4	098ea36b-902d-4292-8e3a-c85b6ae741b6	NaN	2019-03-10	2019-03-07	NaN	NaN	NaN	NaN	Leon	NaN	...	80	131	96	2784	80	NaN	NaN	NaN	-25200000	2019
5	0b24651f-0eeb-4e44-81ed-2a5aa3a3eba9	now	2017-04-30	2017-04-27	NaN	NaN	true	rally-argentina	Villa Carlos Paz	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	224	NaN	NaN	Buenos Aires	-14400000	2017
6	0d6cf890-381e-4010-9fd8-36d942ac80d2	now	2017-02-12	2017-02-09	NaN	NaN	true	rally-sweden	Karlstad	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	218	NaN	NaN	Stockholm	NaN	2017
7	0f3fa7ea-f3f1-493a-87d5-5cb3f0fad39e	now	2018-09-16	2018-09-13	NaN	NaN	true	NaN	Marmaris	NaN	...	35	114	39	2596	35	NaN	NaN	NaN	10800000	2018
8	1127f742-70ee-4dc7-b093-05d7e7a8b8fa	NaN	2019-03-31	2019-03-28	NaN	NaN	NaN	NaN	Bastia	NaN	...	81	132	97	2785	81	NaN	NaN	NaN	0	2019
9	117b1be7-1d26-447f-a405-878da1112436	now	2014-08-24	2014-08-21	NaN	NaN	true	adac-rallye-deutschland	Trier	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	91	NaN	http://www.wrc.com/fileadmin/images/Calendar/2...	Europe/Berlin	NaN	2014
10	182ce929-8bda-4acf-9fd1-c178d9d68482	now	2018-06-10	2018-06-07	NaN	NaN	true	NaN	Sardegna	NaN	...	32	83	36	2593	32	NaN	NaN	NaN	3600000	2018
11	1a3cdf87-4d91-435c-a1f9-7aab556fc3b1	now	2017-08-20	2017-08-17	NaN	NaN	true	adac-rallye-deutschland	Trier	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	234	NaN	NaN	Berlin	3600000	2017
12	1d7c1500-42fd-4045-b447-1cc8006245b6	NaN	2014-06-08	2014-06-05	NaN	NaN	NaN	rally-italia-sardegna	Alghero	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	82	NaN	http://www.wrc.com/fileadmin/images/Calendar/2...	Rome	NaN	2014
13	209a7294-32ec-489e-bd86-14488dcc3f14	now	2018-04-08	2018-04-06	NaN	NaN	true	NaN	Bastia	NaN	...	29	34	33	2590	29	NaN	NaN	NaN	3600000	2018
14	218862ea-0ead-4302-af36-8c15c79b558c	now	2016-10-30	2016-10-28	NaN	NaN	true	wales-rally-gb	Deeside	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	199	NaN	NaN	London	NaN	2016
15	25313603-333c-40d4-82b9-c3868e6d0f61	now	2016-10-02	2016-09-30	NaN	NaN	true	tour-de-corse-2016	Ajaccio	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	195	NaN	NaN	Paris	NaN	2016
16	25e409c3-4122-43b8-b145-3892a7994790	now	2016-02-14	2016-02-12	NaN	NaN	true	rally-sweden	Karlstad	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	177	NaN	NaN	Stockholm	NaN	2016
17	2e36e2f3-949e-4495-a37f-e5a4e81b4d15	now	2016-10-16	2016-10-14	NaN	NaN	true	rallyracc-catalunya-costa-daurada	Salou	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	197	NaN	NaN	Madrid	NaN	2016
18	2fbc34c2-77eb-49cc-851f-ea2be5220bcc	now	2016-04-24	2016-04-21	NaN	NaN	true	rally-argentina	Villa Carlos Paz	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	181	NaN	NaN	Buenos Aires	-14400000	2016
19	31678d36-3066-4158-b04f-ddfc8a540c00	now	2015-09-13	2015-09-11	NaN	NaN	true	coates-hire-rally-australia	Coffs Harbour	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	147	NaN	NaN	Brisbane	NaN	2015
20	33635eee-719b-4d80-a87b-dc9c08a8dab8	now	2017-03-12	2017-03-09	NaN	NaN	true	rally-guanajuato-mexico	Leon	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	220	NaN	NaN	Mexico City	-25200000	2017
21	3bb56cbc-0d15-44b7-8b4b-763d1a5c5581	NaN	2019-04-28	2019-04-25	NaN	NaN	NaN	NaN	Villa Carlos Paz	NaN	...	82	133	98	2786	82	NaN	NaN	NaN	-14400000	2019
22	3e9e8b54-0825-47a5-b154-8a6d4bee526e	now	2018-10-28	2018-10-25	NaN	NaN	true	NaN	Salou	NaN	...	37	124	41	2598	37	NaN	NaN	NaN	36000000	2018
23	419122ea-e3cb-4fca-bc06-f7d144dc3d2a	now	2016-07-03	2016-06-30	NaN	NaN	true	lotos-rally-poland	Mikolajki	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	187	NaN	NaN	Warsaw	NaN	2016
24	45399001-bb08-4e0a-8273-663d1601ac2e	now	2018-07-29	2018-07-26	NaN	NaN	true	NaN	Jyväskylä	NaN	...	33	106	37	2594	33	NaN	NaN	NaN	7200000	2018
25	4c633f0d-5727-4491-b6cd-584f345c043e	NaN	2019-06-02	2019-05-30	NaN	NaN	NaN	NaN	Matosinhos	NaN	...	84	135	100	2788	84	NaN	NaN	NaN	-3600000	2019
26	4d124a8b-f36b-43a3-93fb-d6e50bfe9a85	now	2018-05-20	2018-05-17	NaN	NaN	true	NaN	Matosinhos	NaN	...	31	78	35	2592	31	NaN	NaN	NaN	0	2018
27	4f81cf04-c9f8-4e34-b961-c2f4a4307a34	now	2016-06-12	2016-06-09	NaN	NaN	true	rally-italia-sardegna	Alghero	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	185	NaN	NaN	Rome	NaN	2016
28	50f63c8e-7612-4918-8376-c745777ec8a4	NaN	2014-01-18	2014-01-16	NaN	NaN	NaN	rallye-monte-carlo	Gap and Monaco	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	67	NaN	NaN	Europe/Paris	NaN	2014
29	51282a82-107e-45db-b3ab-e3e38bbcb770	now	2014-09-14	2014-09-12	http://www.wrc.com/service/gallery.php?appwv=0...	NaN	true	coates-hire-rally-australia	Coffs Harbour	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	94	NaN	http://www.wrc.com/fileadmin/images/Calendar/2...	Australia/Brisbane	NaN	2014
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
50	9c9b9998-f99b-48e7-be1c-e0eb3ec6f9c8	NaN	2019-11-17	2019-11-14	NaN	NaN	NaN	NaN	Coffs Harbous	NaN	...	91	142	107	2795	91	NaN	NaN	NaN	36000000	2019
51	9ccc5470-5691-44c0-b8ed-d244d97a8ba0	now	2016-03-06	2016-03-04	NaN	NaN	true	rally-guanajuato-mexico	Leon	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	179	NaN	NaN	Mexico City	-21600000	2016
52	a0738cb4-ed12-42a5-b411-4fa189f18584	now	2014-08-03	2014-08-01	NaN	NaN	true	neste-oil-rally-finland	Jyväskyla	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	88	NaN	http://www.wrc.com/fileadmin/images/Calendar/2...	Europe/Helsinki	NaN	2014
53	a0c3103c-1203-4642-92ee-3f3cb47fc3b5	now	2018-11-18	2018-11-15	NaN	NaN	true	NaN	Coffs Harbour	NaN	...	38	125	42	2599	38	NaN	NaN	NaN	36000000	2018
54	a506b05d-371f-4110-bd94-c0d1ff725926	now	2018-01-28	2018-01-25	NaN	NaN	true	NaN	Monte Carlo	NaN	...	26	31	30	2586	26	NaN	NaN	NaN	0	2018
55	acb0a6ac-f44b-45e3-bb5f-b6149112308c	now	2017-04-09	2017-04-07	NaN	NaN	true	tour-de-corse-2017	Ajaccio	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	222	NaN	NaN	Paris	3600000	2017
56	ad276338-3c76-418f-9f54-b9531f5a784f	now	2018-10-07	2018-10-04	NaN	NaN	true	NaN	Deeside	NaN	...	36	117	40	2597	36	NaN	NaN	NaN	3600000	2018
57	b08b8196-beb8-40d3-92c8-3d057817134a	NaN	2014-04-06	2014-04-03	NaN	NaN	NaN	vodafone-rally-de-portugal	Faro	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	76	NaN	http://www.wrc.com/fileadmin/images/Calendar/2...	Lisbon	NaN	2014
58	b69d27e8-8d66-48c4-9840-aae1c9835c58	NaN	2014-03-09	2014-03-06	NaN	NaN	NaN	rally-guanajuato-mexico	Leon	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	73	NaN	http://www.wrc.com/fileadmin/images/Calendar/2...	Guadalajara	NaN	2014
59	b75fda9c-3113-427d-87b0-aa6a283ea3af	now	2015-04-26	2015-04-23	http://www.wrc.com/service/gallery.php?appwv=0...	NaN	true	rally-argentina	Villa Carlos Paz	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	135	NaN	NaN	Buenos Aires	NaN	2015
60	b8706701-bf54-48db-8e1c-691e824d6213	now	2017-07-02	2017-06-29	NaN	NaN	true	pzm-rally-poland	Mikolajki	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	230	NaN	NaN	Warsaw	3600000	2017
61	b9a212ca-6fc6-4a16-9d71-49d4f452ec47	now	2015-07-05	2015-07-02	http://www.wrc.com/service/gallery.php?appwv=0...	NaN	true	lotos-rally-poland	Mikolajki	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	141	NaN	NaN	Warsaw	NaN	2015
62	bf065c52-ef68-49a4-8a18-29758838e4f4	now	2019-01-27	2019-01-24	NaN	NaN	true	NaN	Monte-Carlo	NaN	...	78	144	94	2782	78	NaN	NaN	NaN	0	2019
63	c2ef6635-d041-4ca7-a193-d974a1810c62	now	2018-02-18	2018-02-15	NaN	NaN	true	NaN	Sweden	NaN	...	27	32	31	2587	27	NaN	NaN	NaN	0	2018
64	d2ec66f4-75e6-4cbc-a2e8-b1ccdeb9353f	now	2017-11-19	2017-11-17	NaN	NaN	true	kennards-hire-rally-australia	Coffs Harbour	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	240	NaN	NaN	New Caledonia	36000000	2017
65	d3bacdde-a073-4d55-9c88-c3e790acdd99	now	2015-05-24	2015-05-21	http://www.wrc.com/service/gallery.php?appwv=0...	NaN	true	vodafone-rally-de-portugal	Porto	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	137	NaN	NaN	Lisbon	NaN	2015
66	d3f91d59-c1ad-49de-a74b-c0f248a52e5f	now	2014-10-26	2014-10-23	NaN	NaN	true	rallyracc-rally-de-espana	Salou	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	100	NaN	http://www.wrc.com/fileadmin/images/Calendar/2...	Europe/Madrid	NaN	2014
67	d5b87e31-fe70-436f-acfc-4b68962e23b4	now	2016-05-22	2016-05-19	NaN	NaN	true	vodafone-rally-de-portugal	Matosinhos	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	183	NaN	NaN	Lisbon	0	2016
68	d6a23bd3-a840-42b6-8afc-c0c52dc89138	NaN	2019-06-16	2019-06-13	NaN	NaN	NaN	NaN	Alghero	NaN	...	85	136	101	2789	85	NaN	NaN	NaN	0	2019
69	d77b5bd3-9031-4de7-a3df-bd8d392c179c	NaN	2019-09-15	2019-09-12	NaN	NaN	NaN	NaN	Marmaris	NaN	...	88	139	104	2792	88	NaN	NaN	NaN	7200000	2019
70	d7e7150b-a6e6-4bab-bb22-6009a3c30f27	NaN	2019-05-12	2019-05-09	NaN	NaN	NaN	NaN	Concepcion	NaN	...	83	134	99	2787	83	NaN	NaN	NaN	-14400000	2019
71	dacb6932-e302-4806-b843-28a260227013	now	2017-06-11	2017-06-08	NaN	NaN	true	rally-italia-sardegna	Alghero	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	228	NaN	NaN	Rome	3600000	2017
72	e2110172-5d94-4b8e-a814-a6ff58ba3cf4	now	2017-07-30	2017-07-27	NaN	NaN	true	neste-rally-finland	Jyväskylä	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	232	NaN	NaN	Helsinki	7200000	2017
73	e618baba-f112-48ac-b642-c8225f57004b	NaN	2014-02-08	2014-02-05	NaN	NaN	NaN	rally-sweden	Karlstad	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	70	NaN	http://www.wrc.com/fileadmin/images/Calendar/2...	Europe/Paris	NaN	2014
74	e77f81e6-0e13-44d6-a3cd-d64b577aa656	now	2015-02-15	2015-02-12	NaN	NaN	true	rally-sweden	Karlstad	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	131	NaN	NaN	Paris	NaN	2015
75	e81a90af-acf1-41e3-b9be-4afea9e0378e	now	2016-07-31	2016-07-28	NaN	NaN	true	neste-oil-rally-finland	Jyväskylä	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	189	NaN	NaN	Helsinki	3600000	2016
76	e921099e-be24-4ef7-a874-4c89a318c87e	now	2015-01-25	2015-01-22	NaN	NaN	true	rallye-monte-carlo	Gap and Monaco	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	130	NaN	NaN	Paris	NaN	2015
77	ed1debca-0913-4568-8479-00623b3975cb	now	2014-11-16	2014-11-14	NaN	NaN	true	wales-rally-gb	Deeside	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	103	1	http://www.wrc.com/fileadmin/images/Calendar/2...	Europe/London	NaN	2014
78	ee21bc9b-885a-41dc-bc35-88c1e8ebc323	NaN	2014-05-11	2014-05-08	NaN	NaN	true	rally-argentina	Villa Carlos Paz	WRC, WRC-2, WRC-3	...	NaN	NaN	NaN	NaN	79	NaN	http://images.wrc.com/News/2637_wrc-argentina-...	Buenos Aires	NaN	2014
79	f34f9b28-199d-4e5a-95b9-dc012363f064	now	2016-08-21	2016-08-19	NaN	NaN	true	adac-rallye-deutschland	Trier	WRC, WRC-2, WRC-3, JWRC	...	NaN	NaN	NaN	NaN	191	NaN	NaN	Berlin	NaN	2016

80 rows × 32 columns

In [67]:

#DEBUG
#for example: OperationalError: table event_metadata has no column named hasfootage
#

In [186]:

#itinerary_event

In [187]:

#itinerary_event

In [74]:

get_championship(name, dbname=dbname, year=year )

In [82]:

#set_rallyId( name,year )
get_all(name, dbname=dbname, year=year )

Getting base info...
Getting penalties...
Getting retirements...
Getting stagewinners...
Getting stage_overall...
Getting split_times...
Getting stage_times_stage...
Getting stage_times_overall...

In [99]:

#full run:
for name in listRallies2():
    print('trying {}'.format(name))
    get_all(name, dbname=dbname, year=year )

trying turkey
trying sardegna
trying france
trying spain
trying finland
trying portugal
trying germany
trying mexico
trying argentina
trying australia
trying montecarlo
trying uk
trying sweden

In [81]:

#PK issues?
#upsert issues? Pandas doesn't support upsert

#Use Simon Willison's sqlite utils, which has upsert.
get_one(name, 'SS4', dbname=dbname, year=year)

{'rallyId': '95', 'stages': [901, 902, 904, 905, 907, 909, 917, 910, 911, 914, 918, 908, 900, 916, 913, 912, 906, 903, 915], 'championshipId': None, 'rally_name': 'sweden', '_stages': {'SS1': {'stageId': 901, 'status': 'Completed'}, 'SS2': {'stageId': 902, 'status': 'Completed'}, 'SS3': {'stageId': 904, 'status': 'Running'}, 'SS4': {'stageId': 905, 'status': 'Running'}, 'SS5': {'stageId': 907, 'status': 'ToRun'}, 'SS6': {'stageId': 909, 'status': 'ToRun'}, 'SS7': {'stageId': 917, 'status': 'ToRun'}, 'SS8': {'stageId': 910, 'status': 'ToRun'}, 'SS9': {'stageId': 911, 'status': 'ToRun'}, 'SS10': {'stageId': 914, 'status': 'ToRun'}, 'SS11': {'stageId': 918, 'status': 'ToRun'}, 'SS12': {'stageId': 908, 'status': 'ToRun'}, 'SS13': {'stageId': 900, 'status': 'ToRun'}, 'SS14': {'stageId': 916, 'status': 'ToRun'}, 'SS15': {'stageId': 913, 'status': 'ToRun'}, 'SS16': {'stageId': 912, 'status': 'ToRun'}, 'SS17': {'stageId': 906, 'status': 'ToRun'}, 'SS18': {'stageId': 903, 'status': 'ToRun'}, 'SS19': {'stageId': 915, 'status': 'ToRun'}}}
Getting penalties...
Getting retirements...
Getting stagewinners...
Getting stage_overall...
Getting split_times...
Getting stage_times_stage...
Getting stage_times_overall...

In [203]:

meta =  set_rallyId2(name, year)
meta

Out[203]:

{'rallyId': '94',
 'stages': [],
 'championshipId': None,
 'rally_name': 'montecarlo'}

In [76]:

conn = sqlite3.connect(dbname)

q="SELECT name FROM sqlite_master WHERE type = 'table';"
pd.read_sql(q,conn)

Out[76]:

	name
0	itinerary_event
1	itinerary_legs
2	itinerary_sections
3	itinerary_stages
4	itinerary_controls
5	startlists
6	roster
7	startlist_classes
8	penalties
9	retirements
10	stagewinners
11	stage_overall
12	split_times
13	stage_times_stage
14	stage_times_overall
15	championship_lookup
16	championship_results
17	championship_entries_codrivers
18	championship_entries_manufacturers
19	championship_rounds
20	championship_events
21	championship_entries_drivers
22	event_metadata

In [221]:

q="SELECT * FROM event_metadata LIMIT 1;"
pd.read_sql(q,conn).columns

Out[221]:

Index(['_id', 'availability', 'date-finish', 'date-start', 'gallery',
       'hasdata' TEXT,\n  'hasvideos', 'id', 'info-based', 'info-categories',
       'info-date', 'info-flag', 'info-surface', 'info-website', 'kmlfile',
       'logo', 'name', 'org-website', 'poi-Klo im Wald', 'poilistid',
       'position', 'rosterid', 'sas-eventid', 'sas-itineraryid', 'sas-rallyid',
       'sas-trackingid', 'sitid', 'testid', 'thumbnail', 'time-zone',
       'tzoffset', 'year'],
      dtype='object')

In [357]:

q="SELECT * FROM championship_events LIMIT 1;"
pd.read_sql(q,conn)

Out[357]:

	categories	clerkOfTheCourse	country.countryId	country.iso2	country.iso3	country.name	countryId	eventId	finishDate	location	...	organiserUrl	slug	startDate	stewards	surfaces	templateFilename	timeZoneId	timeZoneName	timeZoneOffset	trackingEventId
0	None	Alain Pallanca	147	MC	MCO	Monaco	147	78	2019-01-27	Monte-Carlo	...	http://acm.mc/edition/rallye-monte-carlo-editi...	87e-rallye-automobile-monte-carlo-2019	2019-01-24	Waltraud Wünsch ...	Tarmac & Ice	results-report-templates/e2a3be60-c4d4-4c97-96...	Romance Standard Time	Central European Time (+01:00)	60	2782

1 rows × 22 columns

In [358]:

q="SELECT * FROM championship_rounds LIMIT 1;"
pd.read_sql(q,conn)

Out[358]:

	championshipId	eventId	order
0	24	78	1

In [ ]:

#!rm wrc18_test1.db

In [245]:

#!pip3 install isodate
import isodate
pd.to_timedelta(isodate.parse_duration('PT1H5M26S')), isodate.parse_duration('PT0.1S')
#ISO 8601 https://stackoverflow.com/questions/51168022/what-does-pt-prefix-stand-for-in-duration

# TO DO - some function that timifies particular columns

def _totime(s):
    if s and isinstance(s,str):
        if s.startswith('PT'):
            return pd.to_timedelta(isodate.parse_duration(s))
        else:
            #Should regex to check this?
            return pd.to_timedelta(s)
            
def timeify(df):
    time_cols = ['diffFirst', 'diffPrev']
    for c in [c for c in df.columns if c in time_cols]:
        df[c] = df[c].apply(_totime)
    return df

In [246]:

#Data2Text - Stage Result notebook has time wrangling

In [247]:

#Why does this get cast to time but stage_times_overall doesn't?
q="SELECT * FROM stage_times_stage LIMIT 10 ;"
timeify(pd.read_sql(q,conn)).dtypes

#The diffFirst etc are not time objects - they;re strings; cast to timedelta? DOes SQLIte do timedelta?
#The cast is easy in Python: pd.to_timedelta('00:04:45.5000000')

Out[247]:

diffFirst            timedelta64[ns]
diffFirstMs                    int64
diffPrev             timedelta64[ns]
diffPrevMs                     int64
elapsedDuration               object
elapsedDurationMs              int64
entryId                        int64
position                       int64
source                        object
stageId                        int64
stageTimeId                    int64
status                        object
dtype: object

In [223]:

## Are stage_times_overall and stage_overall the same?


#Need to parse these time things correctly...
#Maybe parse 'PT10M40.4S'  to  '00:10:40.4000000',  'PT1H9M30.8S' to '00:01:09:30.800000'
#0.1S ->00:00:00.100000, 1M0.1s->00:10:00.100000, 1H1M0.1S ->01:10:00.100000
q="SELECT * FROM stage_times_overall ORDER BY totalTimeMs LIMIT 10 ;"
pd.read_sql(q,conn)

Out[223]:

	diffFirst	diffFirstMs	diffPrev	diffPrevMs	entryId	penaltyTime	position	stageTime	stageTimeMs	totalTime	totalTimeMs	stageId
0	PT0S	0	PT0S	0	3444	PT0S	1	PT13M2S	782000	PT13M2S	782000	881
1	PT5S	5000	PT5S	5000	3451	PT0S	2	PT13M7S	787000	PT13M7S	787000	881
2	PT10.6S	10600	PT5.6S	5600	3442	PT0S	3	PT13M12.6S	792600	PT13M12.6S	792600	881
3	PT23.2S	23200	PT12.6S	12600	3450	PT0S	4	PT13M25.2S	805200	PT13M25.2S	805200	881
4	PT26.8S	26800	PT3.6S	3600	3443	PT0S	5	PT13M28.8S	808800	PT13M28.8S	808800	881
5	PT32.3S	32300	PT5.5S	5500	3448	PT0S	6	PT13M34.3S	814300	PT13M34.3S	814300	881
6	PT32.7S	32700	PT0.4S	400	3446	PT0S	7	PT13M34.7S	814700	PT13M34.7S	814700	881
7	PT35.4S	35400	PT2.7S	2700	3445	PT0S	8	PT13M37.4S	817400	PT13M37.4S	817400	881
8	PT37S	37000	PT1.6S	1600	3447	PT0S	9	PT13M39S	819000	PT13M39S	819000	881
9	PT52.2S	52200	PT15.2S	15200	3452	PT0S	10	PT13M54.2S	834200	PT13M54.2S	834200	881

In [214]:

q="SELECT * FROM stage_overall ORDER BY totalTimeMs LIMIT 10;"
pd.read_sql(q,conn)

Out[214]:

	diffFirst	diffFirstMs	diffPrev	diffPrevMs	entryId	penaltyTime	position	stageTime	stageTimeMs	totalTime	totalTimeMs	stageId
0	PT0S	0	PT0S	0	3444	PT0S	1	PT13M2S	782000	PT13M2S	782000	881
1	PT5S	5000	PT5S	5000	3451	PT0S	2	PT13M7S	787000	PT13M7S	787000	881
2	PT10.6S	10600	PT5.6S	5600	3442	PT0S	3	PT13M12.6S	792600	PT13M12.6S	792600	881
3	PT23.2S	23200	PT12.6S	12600	3450	PT0S	4	PT13M25.2S	805200	PT13M25.2S	805200	881
4	PT26.8S	26800	PT3.6S	3600	3443	PT0S	5	PT13M28.8S	808800	PT13M28.8S	808800	881
5	PT32.3S	32300	PT5.5S	5500	3448	PT0S	6	PT13M34.3S	814300	PT13M34.3S	814300	881
6	PT32.7S	32700	PT0.4S	400	3446	PT0S	7	PT13M34.7S	814700	PT13M34.7S	814700	881
7	PT35.4S	35400	PT2.7S	2700	3445	PT0S	8	PT13M37.4S	817400	PT13M37.4S	817400	881
8	PT37S	37000	PT1.6S	1600	3447	PT0S	9	PT13M39S	819000	PT13M39S	819000	881
9	PT52.2S	52200	PT15.2S	15200	3452	PT0S	10	PT13M54.2S	834200	PT13M54.2S	834200	881

In [229]:

q="SELECT * FROM stagewinners;"
pd.read_sql(q,conn)

Out[229]:

	elapsedDuration	elapsedDurationMs	entryId	stageId	stageName
0	None	NaN	NaN	881	LA BREOLE - SELONNET
1	None	NaN	NaN	885	AVANÇON - NOTRE DAME DU LAUS
2	None	NaN	NaN	888	CURBANS - PIEGUT 1
3	None	NaN	NaN	886	ROUSSIEUX - LABOREL 1
4	None	NaN	NaN	894	VALDROME - SIGOTTIER 1
5	None	NaN	NaN	892	ROUSSIEUX - LABOREL 2
6	None	NaN	NaN	889	VALDROME - SIGOTTIER 2
7	None	NaN	NaN	887	CURBANS - PIEGUT 2
8	None	NaN	NaN	893	ST LEGER LES MELEZES - LA BATIE NEUVE 1
9	None	NaN	NaN	895	AGNIERES EN DEVOLUY - CORPS 1
10	None	NaN	NaN	890	ST LEGER LES MELEZES - LA BATIE NEUVE 2
11	None	NaN	NaN	891	AGNIERES EN DEVOLUY - CORPS 2
12	None	NaN	NaN	882	LA BOLLENE VESUBIE - PEIRA CAVA 2
13	None	NaN	NaN	883	LA CABANETTE - COL DE BRAUS 1
14	None	NaN	NaN	884	LA BOLLENE VESUBIE - PEIRA CAVA 1
15	None	NaN	NaN	880	LA CABANETTE - COL DE BRAUS 2 (Power Stage)
16	None	NaN	NaN	888	CURBANS - PIEGUT 1
17	None	NaN	NaN	886	ROUSSIEUX - LABOREL 1
18	None	NaN	NaN	894	VALDROME - SIGOTTIER 1
19	None	NaN	NaN	892	ROUSSIEUX - LABOREL 2
20	None	NaN	NaN	889	VALDROME - SIGOTTIER 2
21	None	NaN	NaN	887	CURBANS - PIEGUT 2
22	None	NaN	NaN	893	ST LEGER LES MELEZES - LA BATIE NEUVE 1
23	None	NaN	NaN	895	AGNIERES EN DEVOLUY - CORPS 1
24	None	NaN	NaN	890	ST LEGER LES MELEZES - LA BATIE NEUVE 2
25	None	NaN	NaN	891	AGNIERES EN DEVOLUY - CORPS 2
26	None	NaN	NaN	882	LA BOLLENE VESUBIE - PEIRA CAVA 2
27	None	NaN	NaN	883	LA CABANETTE - COL DE BRAUS 1
28	None	NaN	NaN	884	LA BOLLENE VESUBIE - PEIRA CAVA 1
29	None	NaN	NaN	880	LA CABANETTE - COL DE BRAUS 2 (Power Stage)
30	00:13:02	782000.0	3444.0	881	LA BREOLE - SELONNET
31	00:13:18.5000000	798500.0	3443.0	885	AVANÇON - NOTRE DAME DU LAUS
32	None	NaN	NaN	888	CURBANS - PIEGUT 1
33	None	NaN	NaN	886	ROUSSIEUX - LABOREL 1
34	None	NaN	NaN	894	VALDROME - SIGOTTIER 1
35	None	NaN	NaN	892	ROUSSIEUX - LABOREL 2
36	None	NaN	NaN	889	VALDROME - SIGOTTIER 2
37	None	NaN	NaN	887	CURBANS - PIEGUT 2
38	None	NaN	NaN	893	ST LEGER LES MELEZES - LA BATIE NEUVE 1
39	None	NaN	NaN	895	AGNIERES EN DEVOLUY - CORPS 1
40	None	NaN	NaN	890	ST LEGER LES MELEZES - LA BATIE NEUVE 2
41	None	NaN	NaN	891	AGNIERES EN DEVOLUY - CORPS 2
42	None	NaN	NaN	882	LA BOLLENE VESUBIE - PEIRA CAVA 2
43	None	NaN	NaN	883	LA CABANETTE - COL DE BRAUS 1
44	None	NaN	NaN	884	LA BOLLENE VESUBIE - PEIRA CAVA 1
45	None	NaN	NaN	880	LA CABANETTE - COL DE BRAUS 2 (Power Stage)

In [227]:

!ls -al *.db

-rw-r--r--  1 ajh59  1182653967    36864 12 Feb 15:32 amagicdemo.db
-rw-r--r--  1 ajh59  1182653967   638976 19 May 16:01 argentina18.db
-rw-r--r--  1 ajh59  1182653967        0 10 Feb 18:32 database.db
-rw-r--r--  1 ajh59  1182653967  1421312 31 Jul 14:08 finland18.db
-rw-r--r--  1 ajh59  1182653967   974848 19 May 12:46 france18.db
-rw-r--r--  1 ajh59  1182653967        0  8 Jun 08:30 italy.db
-rw-r--r--  1 ajh59  1182653967  1024000 11 Jun 09:46 italy18.db
-rw-r--r--  1 ajh59  1182653967   622592 19 May 12:45 mexico18.db
-rw-r--r--  1 ajh59  1182653967   966656 19 May 12:44 monaco18.db
-rw-r--r--  1 ajh59  1182653967  1290240 20 May 14:30 portugal18.db
-rw-r--r--  1 ajh59  1182653967        0  3 Feb 20:55 rally.db
-rw-r--r--  1 ajh59  1182653967  1236992 19 May 12:45 sweden18.db
-rw-r--r--  1 ajh59  1182653967  2117632  5 Mar 21:02 wrc18.db
-rw-r--r--  1 ajh59  1182653967   954368  5 Mar 15:33 wrc18_monaco.db
-rw-r--r--  1 ajh59  1182653967   954368  5 Mar 15:31 wrc18_sweden.db
-rw-r--r--  1 ajh59  1182653967        0  5 Mar 14:59 wrc18_swedenX.db
-rw-r--r--  1 ajh59  1182653967   917504 10 Feb 15:39 wrc18_test1.db
-rw-r--r--  1 ajh59  1182653967   925696 10 Feb 19:22 wrc18_test1keys.db
-rw-r--r--  1 ajh59  1182653967   831488 11 Feb 01:01 wrc18_test1keysconstraints.db
-rw-r--r--  1 ajh59  1182653967   126976 11 Feb 01:13 wrc18_test1keysconstraintsX.db

In [ ]:

In [133]:

import sqlite3
import pandas as pd
conn = sqlite3.connect(dbname)
c = conn.cursor()
#c.executescript(setup_q)
#c.executescript(setup_views_q)
q="SELECT name FROM sqlite_master WHERE type = 'table';"
pd.read_sql(q,conn)

Out[133]:

	name
0	itinerary_event
1	itinerary_legs
2	itinerary_sections
3	itinerary_stages
4	itinerary_controls
5	startlists
6	startlist_classes
7	penalties
8	retirements
9	stagewinners
10	stage_overall
11	split_times
12	stage_times_stage
13	stage_times_overall
14	championship_lookup
15	championship_results
16	championship_entries_codrivers
17	championship_entries_manufacturers
18	championship_rounds
19	championship_events
20	championship_entries_drivers

In [134]:

q="SELECT * FROM startlists LIMIT 1;"
pd.read_sql(q,conn).to_dict()

Out[134]:

{'codriver.abbvName': {0: 'J. INGRASSIA'},
 'codriver.code': {0: 'ING'},
 'codriver.country.countryId': {0: 76},
 'codriver.country.iso2': {0: 'FR'},
 'codriver.country.iso3': {0: 'FRA'},
 'codriver.country.name': {0: 'France'},
 'codriver.countryId': {0: 76},
 'codriver.firstName': {0: 'Julien'},
 'codriver.fullName': {0: 'Julien INGRASSIA'},
 'codriver.lastName': {0: 'INGRASSIA'},
 'codriver.personId': {0: 521},
 'codriverId': {0: 521},
 'driver.abbvName': {0: 'S. OGIER'},
 'driver.code': {0: 'OGI'},
 'driver.country.countryId': {0: 76},
 'driver.country.iso2': {0: 'FR'},
 'driver.country.iso3': {0: 'FRA'},
 'driver.country.name': {0: 'France'},
 'driver.countryId': {0: 76},
 'driver.firstName': {0: 'Sébastien\xa0'},
 'driver.fullName': {0: 'Sébastien\xa0 OGIER'},
 'driver.lastName': {0: 'OGIER'},
 'driver.personId': {0: 2453},
 'driverId': {0: 2453},
 'eligibility': {0: 'M'},
 'entrant.entrantId': {0: 94},
 'entrant.logoFilename': {0: ''},
 'entrant.name': {0: 'M-SPORT FORD WORLD RALLY TEAM'},
 'entrantId': {0: 94},
 'entryId': {0: 2275},
 'eventId': {0: 32},
 'group.name': {0: 'WRC'},
 'groupId': {0: 10},
 'group.groupId': {0: 10},
 'identifier': {0: '1'},
 'manufacturer.logoFilename': {0: 'ford'},
 'manufacturer.manufacturerId': {0: 26},
 'manufacturer.name': {0: 'Ford'},
 'manufacturerId': {0: 26},
 'priority': {0: 'P1'},
 'status': {0: 'Entry'},
 'tag': {0: None},
 'tag.name': {0: None},
 'tag.tagId': {0: None},
 'tagId': {0: None},
 'tyreManufacturer': {0: 'Michelin'},
 'vehicleModel': {0: 'FIESTA WRC'}}

In [ ]: