Driver Splits¶

Script to rebase the split times for a stage and display them relative to a specified driver.

The intention is to generate a report on a stage that is meaningful to a specified driver.

Ideally the report should:

show where the driver finished on the stage (stage rank)
show the running stage delta at each split compared to each other driver
show the extent to which a driver gained or lost time on each split compared to each other driver
show the start order (so that this can be related to stage rank)
identify the overall position at the end of the stage for each driver
show whether overall positions were gained or lost after the stage (not implemented yet; need a +=- column)

Ideally, we'd use drivercodes, but these are not necessarily unique. For example, there are duplicates in RC2.

In [2]:

if __name__=='__main__':
    %load_ext autoreload
    %autoreload 2

In [59]:

import notebookimport

if __name__=='__main__':
    typ = 'overall' #this defines ???
    #rebase='overallleader' #TO DO
    rebase='OGI'#'PAD'
    MAXINSPLITDELTA=20 #set xlim on the within split delta
    ss='SS3'
    
    #The drivercode inbuilds some intelligence
    drivercode=rebase

In [60]:

sr = __import__("Charts - Stage Results")
ssd = __import__("Charts - Split Sector Delta")

In [61]:

#!pip3 install pytablewriter

Set up a connection to a simple SQLite database, and specify some metadata relating to the actual rally we are interested in.

In [113]:

import os
import sqlite3
import pandas as pd
import pytablewriter
import six
from numpy import NaN

#dbname='wrc18.db'
#dbname='france18.db'
#conn = sqlite3.connect(dbname)

if __name__=='__main__':
    #dbname='wrc18.db'
    dbname='sweden19.db'
    conn = sqlite3.connect(dbname)
    rally='Sweden'
    rc='RC1'
    year=2019
    #ss='SS4'

In [63]:

if __name__=='__main__':
    #This doesn't appear to be used elsewhere in this notebook
    #May support logic for checking stage status?
    stagedetails = sr.dbGetRallyStages(conn, rally).sort_values('number')
    stagedetails.head()

In [64]:

if __name__=='__main__':
    #Let's see what data is available to us in the stagerank_overall table
    stagerank_overall = sr.getEnrichedStageRank(conn, rally, rc=rc, typ='overall')
    print(stagerank_overall.columns)
    display(stagerank_overall.head())

Index(['diffFirst', 'diffFirstMs', 'diffPrev', 'diffPrevMs', 'entryId',
       'penaltyTime', 'penaltyTimeMs', 'position', 'stageTime', 'stageTimeMs',
       'totalTime', 'totalTimeMs', 'stageId', 'class', 'code', 'distance',
       'name', 'snum', 'drivercode', 'entrant.name', 'classrank',
       'gainedClassPos', 'gainedClassLead', 'classPosDiff', 'lostClassLead',
       'retainedClassLead', 'gainedTime', 'gainedOverallPos',
       'gainedOverallLead', 'overallPosDiff', 'lostOverallLead',
       'retainedOverallLead', 'firstinarow'],
      dtype='object')

	diffFirst	diffFirstMs	diffPrev	diffPrevMs	entryId	penaltyTime	position	stageTime	stageTimeMs	...	lostClassLead	retainedClassLead	gainedTime	gainedOverallPos	gainedOverallLead	lostOverallLead	retainedOverallLead	firstinarow
0	PT0S	0	PT0S	0	3533	PT0S	1	PT1M34.9S	94900	...	False	False	False	False	False	False	False	1
1	PT0.8S	800	PT0.8S	800	3526	PT0S	2	PT1M35.7S	95700	...	False	False	False	False	False	False	False	0
2	PT1.1S	1100	PT0.3S	300	3538	PT0S	3	PT1M36S	96000	...	False	False	False	False	False	False	False	0
3	PT1.9S	1900	PT0.8S	800	3531	PT0S	4	PT1M36.8S	96800	...	False	False	False	False	False	False	False	0
4	PT2S	2000	PT0.1S	100	3532	PT0S	5	PT1M36.9S	96900	...	False	False	False	False	False	False	False	0

5 rows × 33 columns

In [65]:

if __name__=='__main__':
    #Get the total stage time for specified driver on each stage
    #We can then subtract this from each driver's time to get their times as rebased delta times
    #  compared to the the specified driver
    rebaser = stagerank_overall[stagerank_overall['drivercode']==drivercode][['code','totalTimeMs']].set_index('code').to_dict(orient='dict')['totalTimeMs']
    display(rebaser)

{'SS1': 95700,
 'SS2': 708900,
 'SS3': 1483400,
 'SS4': 2010900,
 'SS5': 2642200,
 'SS6': 3826100,
 'SS7': 4767800,
 'SS8': 5538100,
 'SS9': 6227000,
 'SS10': 6980100,
 'SS11': 7475000,
 'SS12': 8315400,
 'SS13': 9081300,
 'SS14': 9581400,
 'SS15': 9678400,
 'SS16': 9797400,
 'SS17': 10463900}

In [66]:

def rebaseOverallRallyTime(stagerank_overall, drivercode):
    ''' Rebase overall stage rank relative to a specified driver. '''
    #Get the time for each stage for a particular driver
    rebaser = stagerank_overall[stagerank_overall['drivercode']==drivercode][['code','totalTimeMs']].set_index('code').to_dict(orient='dict')['totalTimeMs']
    #The stagerank_overall['code'].map(rebaser) returns the total time for each stage achieved by the rebase driver
    # stagerank_overall['code'] identifies the stage
    #Subtract this rebase time from the overall stage time for each driver by stage
    stagerank_overall['rebased'] = stagerank_overall['totalTimeMs'] - stagerank_overall['code'].map(rebaser)
    return stagerank_overall

In [67]:

if __name__=='__main__':
    #Preview the stagerank_overall contents for a particular stage
    display(stagerank_overall[stagerank_overall['code']==ss][['drivercode','position','totalTimeMs','code']])

	drivercode	position	totalTimeMs	code
28	TÄN	1	1474000	SS3
29	SUN	2	1477500	SS3
30	NEU	3	1478200	SS3
31	LAT	4	1479400	SS3
32	OGI	5	1483400	SS3
33	LAP	6	1486500	SS3
34	MIK	7	1488600	SS3
35	MEE	8	1495600	SS3
36	LOE	9	1509100	SS3
37	EVA	10	1518000	SS3
38	TID	11	1522600	SS3
39	TUO	22	1585400	SS3
40	BER	24	1607200	SS3
41	GRÖ	25	1610400	SS3

In [68]:

def rebased_stage_stagerank(conn,rally,ss,drivercode,rc='RC1',typ='overall'):
    ''' Calculate the rebased time for each driver, in a specified stage (ss),
        relative to a specified driver (drivercode).
        Returns columns: ['position','totalTimeMs','code','rebased','Overall Time']
    '''
    stagerank_overall = sr.getEnrichedStageRank(conn, rally, rc=rc, typ=typ)
    zz=rebaseOverallRallyTime(stagerank_overall, drivercode)#, ss)
    #Get the rebased times for a particular stage
    #The position corresponds to either overall or stage pos
    zz=zz[zz['code']==ss][['drivercode','position','totalTimeMs','code', 'rebased']].set_index('drivercode')
    #Scale down the time from milliseconds to seconds
    zz['Overall Time']=-zz['rebased']/1000
    return zz

In [69]:

if __name__=='__main__':
    zz=rebased_stage_stagerank(conn,rally,ss, drivercode, rc)
    display(zz)

	position	totalTimeMs	code	rebased	Overall Time
drivercode
TÄN	1	1474000	SS3	-9400	9.4
SUN	2	1477500	SS3	-5900	5.9
NEU	3	1478200	SS3	-5200	5.2
LAT	4	1479400	SS3	-4000	4.0
OGI	5	1483400	SS3	0	0.0
LAP	6	1486500	SS3	3100	-3.1
MIK	7	1488600	SS3	5200	-5.2
MEE	8	1495600	SS3	12200	-12.2
LOE	9	1509100	SS3	25700	-25.7
EVA	10	1518000	SS3	34600	-34.6
TID	11	1522600	SS3	39200	-39.2
TUO	22	1585400	SS3	102000	-102.0
BER	24	1607200	SS3	123800	-123.8
GRÖ	25	1610400	SS3	127000	-127.0

In [70]:

if __name__=='__main__':
    display(stagerank_overall.columns)

Index(['diffFirst', 'diffFirstMs', 'diffPrev', 'diffPrevMs', 'entryId',
       'penaltyTime', 'penaltyTimeMs', 'position', 'stageTime', 'stageTimeMs',
       'totalTime', 'totalTimeMs', 'stageId', 'class', 'code', 'distance',
       'name', 'snum', 'drivercode', 'entrant.name', 'classrank',
       'gainedClassPos', 'gainedClassLead', 'classPosDiff', 'lostClassLead',
       'retainedClassLead', 'gainedTime', 'gainedOverallPos',
       'gainedOverallLead', 'overallPosDiff', 'lostOverallLead',
       'retainedOverallLead', 'firstinarow'],
      dtype='object')

In [71]:

if __name__=='__main__':
    #Preview a long format dataframe describing position and stage code for a specified driver
    #This appears not be be referenced anywhere else in this notebook
    stagerank_stage = sr.getEnrichedStageRank(conn, rally, rc=rc, typ='stage')
    stagerank_stage[stagerank_stage['drivercode']==rebase][['position','code']]

In [72]:

if __name__=='__main__':
    sr.dbGetStageRank(conn, rally, rc, 'overall', stages='SS8').columns

In [73]:

if __name__=='__main__':
    #If there are no splits, ssd.dbGetSplits should optionally get the overall times from elsewhere as a single split
    splits = ssd.dbGetSplits(conn,rally,ss,rc)#, forcesingle=True)

    elapseddurations=ssd.getElapsedDurations(splits)
    display(elapseddurations.head())

	drivercode	elapsedDurationS	startDateTime	section
0	OGI	194.2	2019-02-15T08:08:00	1
1	OGI	379.5	2019-02-15T08:08:00	2
2	OGI	450.6	2019-02-15T08:08:00	3
3	OGI	668.1	2019-02-15T08:08:00	4
65	OGI	774.5	2019-02-15T08:08:00	5

In [74]:

def getRoadPosition(conn,rally,rc='RC1',stages=None):
    ''' Get road position for each driver for a given stage.
    
        NOTE:
        The start time is only available from stages with split times recorded.
        We can't get road position for stages with no splits.
    
    '''
    
    #TO DO - this doesn't seem to work on stage with no splits?
    roadPos=sr.dbGetStageStart(conn, rally, rc, stages)
    roadPos=roadPos[['drivercode','startDateTime','startpos']]
    roadPos.columns=['drivercode','startDateTime','Road Position']
    roadPos = roadPos.set_index('drivercode')
    return roadPos

In [75]:

if __name__=='__main__':
    roadPos = getRoadPosition(conn,rally,rc,ss)
    display(roadPos)

	startDateTime	Road Position
drivercode
OGI	2019-02-15T08:08:00	1
NEU	2019-02-15T08:10:00	2
TÄN	2019-02-15T08:12:00	3
MEE	2019-02-15T08:14:00	4
LOE	2019-02-15T08:16:00	5
LAT	2019-02-15T08:18:00	6
SUN	2019-02-15T08:20:00	7
TID	2019-02-15T08:22:00	8
MIK	2019-02-15T08:24:00	9
LAP	2019-02-15T08:26:00	10
EVA	2019-02-15T08:28:00	11
GRÖ	2019-02-15T08:30:00	12
BER	2019-02-15T08:32:00	13
TUO	2019-02-15T08:34:00	14

In [76]:

def waypoint_rank(splitdurations, on='section',by='elapsedDurationS'):
    ''' Return rank at each waypoint. '''
    splitdurations['split_pos'] = splitdurations.groupby(on)[by].rank(method='min',na_option='keep')#.astype(int)
    
    #For diff to first, do we want first at each waypoint or first at end of stage?
    #Use diff to driver in first at each waypoint
    splitdurations['gapToStageLeader'] = splitdurations[by] - splitdurations.groupby(on)[by].transform('min')
    #For each group, rebase relative to that time
    return splitdurations

In [77]:

if __name__=='__main__':
    rebasedelapseddurations = ssd.rebaseElapsedDurations(elapseddurations, drivercode)
    #This returns columns of the form: drivercode	elapsedDurationS	startDateTime	section	rebased
    #If there are no splits, this is currently an empty dataframe
    rebasedelapseddurations = waypoint_rank(rebasedelapseddurations,by = 'elapsedDurationS')
    display(rebasedelapseddurations.head())

	drivercode	elapsedDurationS	startDateTime	section	split_pos	gapToStageLeader
0	OGI	194.2	2019-02-15T08:08:00	1	7.0	2.2
1	OGI	379.5	2019-02-15T08:08:00	2	7.0	5.7
2	OGI	450.6	2019-02-15T08:08:00	3	7.0	5.1
3	OGI	668.1	2019-02-15T08:08:00	4	7.0	8.6
65	OGI	774.5	2019-02-15T08:08:00	5	7.0	7.5

In [78]:

if __name__=='__main__':
    rebasedelapseddurations

In [79]:

from dakar_utils import sparklineStep, sparkline2, moveColumn

def pivotRebasedElapsedDurations(rebasedelapseddurations, ss):
    ''' Pivot rebased elapsed durations (that is, deltas relative target).
        Rows give stage delta at each split for a specific driver.
        
        Returns columns of the form: ['1','2','3','SS9 Overall']
    '''
    if rebasedelapseddurations.empty:
        return pd.DataFrame(columns=['drivercode']).set_index('drivercode')
    
    rbe=-rebasedelapseddurations.pivot('drivercode','section','rebased')
    
    
    #TO DO: DRY stuff here - mungeForSparkLine function, maybe?
    #Add in a bar chart to identify evolving gap at each waypoint
    #Gap refers to the difference between driver
    col='Rebase Gap'
    rbe[col] = rbe[[c for c in rbe.columns ]].values.tolist()
    rbe[col] = rbe[col].apply(lambda x: [-y for y in x])
    rbe[col] = rbe[col].apply(sparkline2, typ='bar', dot=False)
    
    rbe.columns=list(rbe.columns)[:-2]+['{} Overall'.format(ss), 'Rebase Gap']
    rbe=rbe.sort_values(rbe.columns[-2],ascending = False)
    
    rbe2=rebasedelapseddurations.pivot('drivercode','section','split_pos')
    rbe2.columns=['{}_pos'.format(i) for i in rbe2.columns]
    col='Waypoint Rank'
    rbe2[col] = rbe2[[c for c in rbe2.columns ]].values.tolist()
    rbe2[col] = rbe2[col].apply(lambda x: [-y for y in x])
    rbe2[col] = rbe2[col].apply(sparklineStep)
    rbe = pd.merge(rbe,rbe2[col],left_index=True,right_index=True)
    
    rbe2=rebasedelapseddurations.pivot('drivercode','section','gapToStageLeader')
    rbe2.columns=['{}_pos'.format(i) for i in rbe2.columns]
    col='gapToStageLeader'
    rbe2[col] = rbe2[[c for c in rbe2.columns ]].values.tolist()
    rbe2[col] = rbe2[col].apply(lambda x: [-y for y in x])
    rbe2[col] = rbe2[col].apply(sparkline2, typ='bar', dot=True)
    rbe = pd.merge(rbe,rbe2[col],left_index=True,right_index=True)

    return rbe

if __name__=='__main__':
    rbe = pivotRebasedElapsedDurations(rebasedelapseddurations, ss)

In [80]:

if __name__=='__main__':
    display(rbe)

	1	2	3	4	SS3 Overall	Rebase Gap	Waypoint Rank	gapToStageLeader
drivercode
SUN	2.2	5.7	5.1	8.6	7.5	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
TÄN	1.6	4.1	3.8	6.9	6.4	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
NEU	0.3	3.5	3.7	6.7	6.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
LAT	0.8	2.9	3.0	6.0	5.1	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
LAP	0.7	2.8	2.2	5.1	2.9	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
MIK	0.8	2.0	1.1	2.8	0.2	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
OGI	-0.0	-0.0	-0.0	-0.0	-0.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
MEE	-0.1	-0.7	-2.3	-1.9	-4.5	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
LOE	-1.6	-2.2	-3.0	-3.9	-6.6	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
EVA	-4.3	-2.7	-4.7	-5.0	-8.8	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
TID	-5.9	-8.4	-9.2	-12.8	-16.4	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
TUO	-10.3	-17.7	-22.7	-33.7	-41.5	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
BER	-10.0	-18.1	-21.4	-60.4	-68.5	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...
GRÖ	-9.0	-15.9	-19.3	-28.0	-73.5	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...

In [81]:

#https://pandas.pydata.org/pandas-docs/stable/style.html
def color_negative(val):
    """
    Takes a scalar and returns a string with
    the css property `'color: red'` for negative
    strings, black otherwise.
    """
    if isinstance(val, str): return ''
    elif val and (isinstance(val,int) or isinstance(val,float)):
        color = 'green' if val < 0 else 'red' if val > 0  else 'black'
    else:
        color='white'
    return 'color: %s' % color

In [82]:

if __name__=='__main__':
    #test of applying style to pandas dataframe
    #Is this really fown to pandas to fail gracefully if df is empty??
    s = rbe.style.applymap(color_negative)
    display(s)

	1	2	3	4	SS3 Overall
drivercode
SUN	2.2	5.7	5.1	8.6	7.5
TÄN	1.6	4.1	3.8	6.9	6.4
NEU	0.3	3.5	3.7	6.7	6
LAT	0.8	2.9	3	6	5.1
LAP	0.7	2.8	2.2	5.1	2.9
MIK	0.8	2	1.1	2.8	0.2
OGI	-0	-0	-0	-0	-0
MEE	-0.1	-0.7	-2.3	-1.9	-4.5
LOE	-1.6	-2.2	-3	-3.9	-6.6
EVA	-4.3	-2.7	-4.7	-5	-8.8
TID	-5.9	-8.4	-9.2	-12.8	-16.4
TUO	-10.3	-17.7	-22.7	-33.7	-41.5
BER	-10	-18.1	-21.4	-60.4	-68.5
GRÖ	-9	-15.9	-19.3	-28	-73.5

In [83]:

# TO DO:
# - calculate stage position at each split
# - calculate rank within that sector

In [84]:

if __name__=='__main__':
    #splitdurations are the time in each sector (time take to get from one split to the next)
    #But what if there are no splits? We get an empty dataframe...
    splitdurations = ssd.getSplitDurationsFromSplits(conn,rally,ss,rc)
    #splitdurations = waypoint_rank(splitdurations, 'section','stageTimeDurationMs' )
    display(splitdurations)#.head()

	drivercode	splitDurationS	startDateTime	stageTimeDurationMs	section
0	OGI	194.2	2019-02-15T08:08:00	774500.0	1
1	OGI	185.3	2019-02-15T08:08:00	774500.0	2
2	OGI	71.1	2019-02-15T08:08:00	774500.0	3
3	OGI	217.5	2019-02-15T08:08:00	774500.0	4
65	OGI	106.4	2019-02-15T08:08:00	774500.0	5
4	NEU	193.9	2019-02-15T08:10:00	768500.0	1
5	NEU	182.1	2019-02-15T08:10:00	768500.0	2
6	NEU	70.9	2019-02-15T08:10:00	768500.0	3
7	NEU	214.5	2019-02-15T08:10:00	768500.0	4
64	NEU	107.1	2019-02-15T08:10:00	768500.0	5
8	TÄN	192.6	2019-02-15T08:12:00	768100.0	1
9	TÄN	182.8	2019-02-15T08:12:00	768100.0	2
10	TÄN	71.4	2019-02-15T08:12:00	768100.0	3
11	TÄN	214.4	2019-02-15T08:12:00	768100.0	4
69	TÄN	106.9	2019-02-15T08:12:00	768100.0	5
12	MEE	194.3	2019-02-15T08:14:00	779000.0	1
13	MEE	185.9	2019-02-15T08:14:00	779000.0	2
14	MEE	72.7	2019-02-15T08:14:00	779000.0	3
15	MEE	217.1	2019-02-15T08:14:00	779000.0	4
62	MEE	109.0	2019-02-15T08:14:00	779000.0	5
16	LOE	195.8	2019-02-15T08:16:00	781100.0	1
17	LOE	185.9	2019-02-15T08:16:00	781100.0	2
18	LOE	71.9	2019-02-15T08:16:00	781100.0	3
19	LOE	218.4	2019-02-15T08:16:00	781100.0	4
61	LOE	109.1	2019-02-15T08:16:00	781100.0	5
20	LAT	193.4	2019-02-15T08:18:00	769400.0	1
21	LAT	183.2	2019-02-15T08:18:00	769400.0	2
22	LAT	71.0	2019-02-15T08:18:00	769400.0	3
23	LAT	214.5	2019-02-15T08:18:00	769400.0	4
60	LAT	107.3	2019-02-15T08:18:00	769400.0	5
...	...	...	...	...	...
32	MIK	193.4	2019-02-15T08:24:00	774300.0	1
33	MIK	184.1	2019-02-15T08:24:00	774300.0	2
34	MIK	72.0	2019-02-15T08:24:00	774300.0	3
35	MIK	215.8	2019-02-15T08:24:00	774300.0	4
63	MIK	109.0	2019-02-15T08:24:00	774300.0	5
36	LAP	193.5	2019-02-15T08:26:00	771600.0	1
37	LAP	183.2	2019-02-15T08:26:00	771600.0	2
38	LAP	71.7	2019-02-15T08:26:00	771600.0	3
39	LAP	214.6	2019-02-15T08:26:00	771600.0	4
59	LAP	108.6	2019-02-15T08:26:00	771600.0	5
40	EVA	198.5	2019-02-15T08:28:00	783300.0	1
41	EVA	183.7	2019-02-15T08:28:00	783300.0	2
42	EVA	73.1	2019-02-15T08:28:00	783300.0	3
43	EVA	217.8	2019-02-15T08:28:00	783300.0	4
57	EVA	110.2	2019-02-15T08:28:00	783300.0	5
44	GRÖ	203.2	2019-02-15T08:30:00	848000.0	1
45	GRÖ	192.2	2019-02-15T08:30:00	848000.0	2
46	GRÖ	74.5	2019-02-15T08:30:00	848000.0	3
47	GRÖ	226.2	2019-02-15T08:30:00	848000.0	4
58	GRÖ	151.9	2019-02-15T08:30:00	848000.0	5
48	BER	204.2	2019-02-15T08:32:00	843000.0	1
49	BER	193.4	2019-02-15T08:32:00	843000.0	2
50	BER	74.4	2019-02-15T08:32:00	843000.0	3
51	BER	256.5	2019-02-15T08:32:00	843000.0	4
56	BER	114.5	2019-02-15T08:32:00	843000.0	5
52	TUO	204.5	2019-02-15T08:34:00	816000.0	1
53	TUO	192.7	2019-02-15T08:34:00	816000.0	2
54	TUO	76.1	2019-02-15T08:34:00	816000.0	3
55	TUO	228.5	2019-02-15T08:34:00	816000.0	4
68	TUO	114.2	2019-02-15T08:34:00	816000.0	5

70 rows × 5 columns

In [85]:

if __name__=='__main__':
    #This will be an empty dataframe if there are no splits
    rebasedSplits = ssd.rebaseSplitDurations(splitdurations, drivercode)
    display(rebasedSplits.head())

	drivercode	splitDurationS	startDateTime	stageTimeDurationMs	section
0	OGI	194.2	2019-02-15T08:08:00	774500.0	1
1	OGI	185.3	2019-02-15T08:08:00	774500.0	2
2	OGI	71.1	2019-02-15T08:08:00	774500.0	3
3	OGI	217.5	2019-02-15T08:08:00	774500.0	4
65	OGI	106.4	2019-02-15T08:08:00	774500.0	5

In [86]:

if __name__=='__main__':
    #preview what's available as a splitduration
    display(splitdurations[splitdurations['drivercode'].isin( ['PAD','NEU'])])

	drivercode	splitDurationS	startDateTime	stageTimeDurationMs	section	rebased
4	NEU	193.9	2019-02-15T08:10:00	768500.0	1	-0.3
5	NEU	182.1	2019-02-15T08:10:00	768500.0	2	-3.2
6	NEU	70.9	2019-02-15T08:10:00	768500.0	3	-0.2
7	NEU	214.5	2019-02-15T08:10:00	768500.0	4	-3.0
64	NEU	107.1	2019-02-15T08:10:00	768500.0	5	0.7

In [87]:

def pivotRebasedSplits(rebasedSplits):
    ''' For each driver row, find the split. '''
    
    #If there are no splits...
    if rebasedSplits.empty:
        return pd.DataFrame(columns=['drivercode']).set_index('drivercode')
    
    rbp=-rebasedSplits.pivot('drivercode','section','rebased')
    rbp.columns=['D{}'.format(c) for c in rbp.columns]
    rbp.sort_values(rbp.columns[-1],ascending =True)
    return rbp

if __name__=='__main__':
    rbp = pivotRebasedSplits(rebasedSplits)
    display(rbp)

	D1	D2	D3	D4	D5
drivercode
BER	-10.0	-8.1	-3.3	-39.0	-8.1
EVA	-4.3	1.6	-2.0	-0.3	-3.8
GRÖ	-9.0	-6.9	-3.4	-8.7	-45.5
LAP	0.7	2.1	-0.6	2.9	-2.2
LAT	0.8	2.1	0.1	3.0	-0.9
LOE	-1.6	-0.6	-0.8	-0.9	-2.7
MEE	-0.1	-0.6	-1.6	0.4	-2.6
MIK	0.8	1.2	-0.9	1.7	-2.6
NEU	0.3	3.2	0.2	3.0	-0.7
OGI	-0.0	-0.0	-0.0	-0.0	-0.0
SUN	2.2	3.5	-0.6	3.5	-1.1
TID	-5.9	-2.5	-0.8	-3.6	-3.6
TUO	-10.3	-7.4	-5.0	-11.0	-7.8
TÄN	1.6	2.5	-0.3	3.1	-0.5

In [88]:

if __name__=='__main__':
    #Just remind ourselves of what is available in the road position data
    display(roadPos)

	startDateTime	Road Position
drivercode
OGI	2019-02-15T08:08:00	1
NEU	2019-02-15T08:10:00	2
TÄN	2019-02-15T08:12:00	3
MEE	2019-02-15T08:14:00	4
LOE	2019-02-15T08:16:00	5
LAT	2019-02-15T08:18:00	6
SUN	2019-02-15T08:20:00	7
TID	2019-02-15T08:22:00	8
MIK	2019-02-15T08:24:00	9
LAP	2019-02-15T08:26:00	10
EVA	2019-02-15T08:28:00	11
GRÖ	2019-02-15T08:30:00	12
BER	2019-02-15T08:32:00	13
TUO	2019-02-15T08:34:00	14

In [89]:

def getDriverSplitReportBaseDataframe(rbe,rbp, zz, roadPos, stageresult, ss):
    #TO DO: return empty w/ proper colnames
    if roadPos.empty: return pd.DataFrame()
    ''' Create a base dataframe for the rebased driver split report. '''
    
    stageresult.columns = ['drivercode','Stage Rank']
    rb2 = pd.merge(rbe,stageresult.set_index('drivercode'),left_index=True, right_index=True)

    rb2=pd.merge(rb2,zz[['position','Overall Time']],left_index=True, right_index=True)
    rb2.rename(columns={'position': 'Overall Position'}, inplace=True)
    
    #The following is calculated rather than being based on the actual timing data / result for the previous stage
    #Would be better to explicitly grab data for previous stage, along with previous ranking
    #display(rb2[['Overall Time','{} Overall'.format(ss)]])
    rb2['Previous'] =  rb2['Overall Time'] - rb2['{} Overall'.format(ss)]
    #Related to this, would be useful to have an overall places gained / lost column
    
    rb2=pd.merge(rb2,rbp,left_index=True, right_index=True)
    rb2=pd.merge(rb2,roadPos[['Road Position']],left_index=True, right_index=True)
    cols=rb2.columns.tolist()
    #Reorder the columns - move Road Position to first column
    rb2=rb2[[cols[-1]]+cols[:-1]]
    
    #reorder cols
    prev = rb2['Previous']
    rb2.drop(labels=['Previous'], axis=1,inplace = True)
    rb2.insert(1, 'Previous', prev)
    
    moveColumn(rb2,'Waypoint Rank',right_of='Previous')
    moveColumn(rb2,'Rebase Gap',right_of='Waypoint Rank')
    #The following line is not correctly locating... it's offsetting by 1 pos to right?
    moveColumn(rb2,'gapToStageLeader',right_of='Overall Position')
    
    
    return rb2

if __name__=='__main__':
    stageresult=sr.getEnrichedStageRank(conn, rally, stages=ss, rc=rc,typ='stage')[['drivercode','position']]
    rb2=getDriverSplitReportBaseDataframe(rbe,rbp, zz, roadPos, stageresult, ss)
    display(rb2)

	Road Position	Previous	Waypoint Rank	Rebase Gap	1	2	3	4	SS3 Overall	Stage Rank	Overall Position	Overall Time	gapToStageLeader	D1	D2	D3	D4	D5
drivercode
SUN	7	-1.6	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	2.2	5.7	5.1	8.6	7.5	1	2	5.9	<img src="data:image/png;base64,iVBORw0KGgoAAA...	2.2	3.5	-0.6	3.5	-1.1
TÄN	3	3.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	1.6	4.1	3.8	6.9	6.4	2	1	9.4	<img src="data:image/png;base64,iVBORw0KGgoAAA...	1.6	2.5	-0.3	3.1	-0.5
NEU	2	-0.8	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	0.3	3.5	3.7	6.7	6.0	3	3	5.2	<img src="data:image/png;base64,iVBORw0KGgoAAA...	0.3	3.2	0.2	3.0	-0.7
LAT	6	-1.1	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	0.8	2.9	3.0	6.0	5.1	4	4	4.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	0.8	2.1	0.1	3.0	-0.9
LAP	10	-6.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	0.7	2.8	2.2	5.1	2.9	5	6	-3.1	<img src="data:image/png;base64,iVBORw0KGgoAAA...	0.7	2.1	-0.6	2.9	-2.2
MIK	9	-5.4	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	0.8	2.0	1.1	2.8	0.2	6	7	-5.2	<img src="data:image/png;base64,iVBORw0KGgoAAA...	0.8	1.2	-0.9	1.7	-2.6
OGI	1	0.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-0.0	-0.0	-0.0	-0.0	-0.0	7	5	0.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-0.0	-0.0	-0.0	-0.0	-0.0
MEE	4	-7.7	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-0.1	-0.7	-2.3	-1.9	-4.5	8	8	-12.2	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-0.1	-0.6	-1.6	0.4	-2.6
LOE	5	-19.1	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-1.6	-2.2	-3.0	-3.9	-6.6	9	9	-25.7	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-1.6	-0.6	-0.8	-0.9	-2.7
EVA	11	-25.8	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-4.3	-2.7	-4.7	-5.0	-8.8	10	10	-34.6	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-4.3	1.6	-2.0	-0.3	-3.8
TID	8	-22.8	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-5.9	-8.4	-9.2	-12.8	-16.4	11	11	-39.2	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-5.9	-2.5	-0.8	-3.6	-3.6
TUO	14	-60.5	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-10.3	-17.7	-22.7	-33.7	-41.5	18	22	-102.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-10.3	-7.4	-5.0	-11.0	-7.8
BER	13	-55.3	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-10.0	-18.1	-21.4	-60.4	-68.5	26	24	-123.8	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-10.0	-8.1	-3.3	-39.0	-8.1
GRÖ	12	-53.5	<img src="data:image/png;base64,iVBORw0KGgoAAA...	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-9.0	-15.9	-19.3	-28.0	-73.5	29	25	-127.0	<img src="data:image/png;base64,iVBORw0KGgoAAA...	-9.0	-6.9	-3.4	-8.7	-45.5

In [90]:

if __name__=='__main__':
    display(rb2.dtypes)

Road Position         int64
Previous            float64
Waypoint Rank        object
Rebase Gap           object
1                   float64
2                   float64
3                   float64
4                   float64
SS3 Overall         float64
Stage Rank            int64
Overall Position      int64
Overall Time        float64
gapToStageLeader     object
D1                  float64
D2                  float64
D3                  float64
D4                  float64
D5                  float64
dtype: object

In [91]:

#There seems to be missing tenths?
#Elapsed durations are provided in milliseconds. Need to round correctly to tenths?
#Elapsed times grabbed from ssd.dbGetSplits(conn,rally,ss,rc)

def cleanDriverSplitReportBaseDataframe(rb2, ss):
    ''' Tidy up the driver split report dataframe, replacing 0 values with NaNs that can be hidden.
        Check column names and data types. '''
    
    #TO DO: set proper colnames
    if rb2.empty: return rb2
    
    rb2=rb2.replace(0,NaN)
    #rb2=rb2.fillna('') #This casts columns containing NA to object type which means we can't use nan processing
    
    rb2['Road Position']=rb2['Road Position'].astype(float)
    return rb2

def __styleDriverSplitReportBaseDataframe(rb2, ss):
    ''' Test if basic dataframe styling.
        DEPRECATED. '''
    s=rb2.fillna('').style.applymap(color_negative,
                                    subset=[c for c in rb2.columns if isinstance(c, int) and c not in ['Overall Position', 'Road Position']])
    #data.style.applymap(highlight_cols, subset=pd.IndexSlice[:, ['B', 'C']])

    s.set_caption("{}: running split times and deltas within each split.".format(ss))
    return s
    
if __name__=='__main__':
    rb2c = cleanDriverSplitReportBaseDataframe(rb2.copy(), ss)
    s = __styleDriverSplitReportBaseDataframe(rb2c, ss)
    

In [92]:

from IPython.core.display import HTML

if __name__=='__main__':
    html=s.render()
    display(HTML(html))

SS3: running split times and deltas within each split.
	Road Position	Previous	1	2	3	4	SS3 Overall	Stage Rank	Overall Position	Overall Time	D1	D2	D3	D4	D5
drivercode
SUN	7	-1.6	2.2	5.7	5.1	8.6	7.5	1	2	5.9	2.2	3.5	-0.6	3.5	-1.1
TÄN	3	3	1.6	4.1	3.8	6.9	6.4	2	1	9.4	1.6	2.5	-0.3	3.1	-0.5
NEU	2	-0.8	0.3	3.5	3.7	6.7	6	3	3	5.2	0.3	3.2	0.2	3	-0.7
LAT	6	-1.1	0.8	2.9	3	6	5.1	4	4	4	0.8	2.1	0.1	3	-0.9
LAP	10	-6	0.7	2.8	2.2	5.1	2.9	5	6	-3.1	0.7	2.1	-0.6	2.9	-2.2
MIK	9	-5.4	0.8	2	1.1	2.8	0.2	6	7	-5.2	0.8	1.2	-0.9	1.7	-2.6
OGI	1							7	5
MEE	4	-7.7	-0.1	-0.7	-2.3	-1.9	-4.5	8	8	-12.2	-0.1	-0.6	-1.6	0.4	-2.6
LOE	5	-19.1	-1.6	-2.2	-3	-3.9	-6.6	9	9	-25.7	-1.6	-0.6	-0.8	-0.9	-2.7
EVA	11	-25.8	-4.3	-2.7	-4.7	-5	-8.8	10	10	-34.6	-4.3	1.6	-2	-0.3	-3.8
TID	8	-22.8	-5.9	-8.4	-9.2	-12.8	-16.4	11	11	-39.2	-5.9	-2.5	-0.8	-3.6	-3.6
TUO	14	-60.5	-10.3	-17.7	-22.7	-33.7	-41.5	18	22	-102	-10.3	-7.4	-5	-11	-7.8
BER	13	-55.3	-10	-18.1	-21.4	-60.4	-68.5	26	24	-123.8	-10	-8.1	-3.3	-39	-8.1
GRÖ	12	-53.5	-9	-15.9	-19.3	-28	-73.5	29	25	-127	-9	-6.9	-3.4	-8.7	-45.5

In [93]:

from math import nan
def bg_color(s):
    ''' Set background colour sensitive to time gained or lost.
    '''
    attrs=[]
    for _s in s:
        if _s < 0:
            attr = 'background-color: green; color: white'
        elif _s > 0: 
            attr = 'background-color: red; color: white'
        else:
            attr = ''
        attrs.append(attr)
    return attrs

In [94]:

import seaborn as sns

def moreStyleDriverSplitReportBaseDataframe(rb2,ss, caption=None):
    ''' Style the driver split report dataframe. '''
    
    if rb2.empty: return ''
        
    def _subsetter(cols, items):
        ''' Generate a subset of valid columns from a list. '''
        return [c for c in cols if c in items]
    
    
    #https://community.modeanalytics.com/gallery/python_dataframe_styling/
    # Set CSS properties for th elements in dataframe
    th_props = [
      ('font-size', '11px'),
      ('text-align', 'center'),
      ('font-weight', 'bold'),
      ('color', '#6d6d6d'),
      ('background-color', '#f7f7f9')
      ]

    # Set CSS properties for td elements in dataframe
    td_props = [
      ('font-size', '11px')
      ]

    # Set table styles
    styles = [
      dict(selector="th", props=th_props),
      dict(selector="td", props=td_props)
      ]
    
    #Define colour palettes
    #cmg = sns.light_palette("green", as_cmap=True)
    #The blue palette helps us scale the Road Position column
    # This may help us to help identify any obvious road position effect when sorting stage times by stage rank
    cm=sns.light_palette((210, 90, 60), input="husl",as_cmap=True)

    s2=(rb2.style
        .background_gradient(cmap=cm, subset=_subsetter(rb2.columns, ['Road Position']))
        .applymap(color_negative,
                  subset=[c for c in rb2.columns if isinstance(c, int) and c not in ['Overall Position', 'Road Position']])
        .highlight_min(subset=_subsetter(rb2.columns, ['Overall Position']), color='lightgrey')
        .highlight_max(subset=_subsetter(rb2.columns, ['Overall Time']), color='lightgrey')
        .highlight_max(subset=_subsetter(rb2.columns, ['Previous']), color='lightgrey')
        .apply(bg_color,subset=_subsetter(rb2.columns, ['{} Overall'.format(ss),'{} Overall*'.format(ss), 'Overall Time', 'Previous']))
        .bar(subset=[c for c in rb2.columns if str(c).startswith('D')], align='zero', color=[ '#5fba7d','#d65f5f'])
        .set_table_styles(styles)
        
        #.format({'total_amt_usd_pct_diff': "{:.2%}"})
       )
    
    if caption is not None:
        s2.set_caption(caption)

    #nan issue: https://github.com/pandas-dev/pandas/issues/21527
    return s2.render().replace('nan','')

if __name__=='__main__':
    rb2c = cleanDriverSplitReportBaseDataframe(rb2.copy(), ss)
    s2 = moreStyleDriverSplitReportBaseDataframe(rb2c, ss)
    display(HTML(s2))

	Road Position	Previous	1	2	3	4	SS3 Overall	Stage Rank	Overall Position	Overall Time	D1	D2	D3	D4	D5
drivercode
SUN	7	-1.6	2.2	5.7	5.1	8.6	7.5	1	2	5.9	2.2	3.5	-0.6	3.5	-1.1
TÄN	3	3	1.6	4.1	3.8	6.9	6.4	2	1	9.4	1.6	2.5	-0.3	3.1	-0.5
NEU	2	-0.8	0.3	3.5	3.7	6.7	6	3	3	5.2	0.3	3.2	0.2	3	-0.7
LAT	6	-1.1	0.8	2.9	3	6	5.1	4	4	4	0.8	2.1	0.1	3	-0.9
LAP	10	-6	0.7	2.8	2.2	5.1	2.9	5	6	-3.1	0.7	2.1	-0.6	2.9	-2.2
MIK	9	-5.4	0.8	2	1.1	2.8	0.2	6	7	-5.2	0.8	1.2	-0.9	1.7	-2.6
OGI	1							7	5
MEE	4	-7.7	-0.1	-0.7	-2.3	-1.9	-4.5	8	8	-12.2	-0.1	-0.6	-1.6	0.4	-2.6
LOE	5	-19.1	-1.6	-2.2	-3	-3.9	-6.6	9	9	-25.7	-1.6	-0.6	-0.8	-0.9	-2.7
EVA	11	-25.8	-4.3	-2.7	-4.7	-5	-8.8	10	10	-34.6	-4.3	1.6	-2	-0.3	-3.8
TID	8	-22.8	-5.9	-8.4	-9.2	-12.8	-16.4	11	11	-39.2	-5.9	-2.5	-0.8	-3.6	-3.6
TUO	14	-60.5	-10.3	-17.7	-22.7	-33.7	-41.5	18	22	-102	-10.3	-7.4	-5	-11	-7.8
BER	13	-55.3	-10	-18.1	-21.4	-60.4	-68.5	26	24	-123.8	-10	-8.1	-3.3	-39	-8.1
GRÖ	12	-53.5	-9	-15.9	-19.3	-28	-73.5	29	25	-127	-9	-6.9	-3.4	-8.7	-45.5

In [95]:

if __name__=='__main__':
    sr.dbGetStageRank(conn, rally, rc, typ='stage', stages=ss)[['position','drivercode','classrank']]
#'overall':'stage_times_overall', 'stage_times_overall':'stage_times_overall',
#              'stage':'stage_times_stage', 'stage_times_stage':'stage_times_stage'
#sr.getEnrichedStageRank(conn, rally, typ=typ)

In [ ]:

In [96]:

if __name__=='__main__':
    sr.getDriverCodeBy(conn, rally, ss,'stage')

In [97]:

if __name__=='__main__':
    ss

In [98]:

if __name__=='__main__':
    sr.getEnrichedStageRank(conn, rally, stages=ss,rc=rc,typ='stage')

In [99]:

if __name__=='__main__':
    rebased_stage_stagerank(conn,rally,ss,drivercode,rc=rc, typ='overall')

In [100]:

def getDriverStageReport(conn, rally, ss, drivercode, rc='RC1', typ='overall', order=None, caption=None):
    ''' Generate a dataframe to report overall stage result. '''
    #'Previous',' SS9 Overall', 'Overall Position'	'Overall Time'; stage position by sort order
    
    if order is None: order='stage'
    #change cols depending on what report / sort order ie. remove redundant col
    
    #Get the overall results, rebased
    zz = rebased_stage_stagerank(conn,rally,ss,drivercode,rc=rc, typ='overall')
    zz.rename(columns={'position':'Overall Position'}, inplace=True)
    
    #Get stage result - does it need to be enriched?
    stageresult=sr.getEnrichedStageRank(conn, rally, stages=ss,rc=rc, typ='stage')

    stagerebaser = stageresult[stageresult['drivercode']==drivercode][['code','elapsedDurationMs']].set_index('code').to_dict(orient='dict')['elapsedDurationMs']
    #The stagerank_overall['code'].map(rebaser) returns the total time for each stage achieved by the rebase driver
    # stagerank_overall['code'] identifies the stage
    #Subtract this rebase time from the overall stage time for each driver by stage

    stcol='{} Time'.format(ss)
    sdeltacol='{} Overall'.format(ss)
    stageresult[sdeltacol] = -(stageresult['elapsedDurationMs'] - stageresult['code'].map(stagerebaser))
    stageresult=stageresult[['drivercode', 'position','elapsedDuration', sdeltacol,'elapsedDurationMs']]
    stageresult.columns=['drivercode', 'Stage Rank',stcol, sdeltacol,'stageDurationMs']
    stageresult[stcol] = stageresult[stcol].str.replace('00000','')
    
    combined = pd.merge(zz,stageresult, on='drivercode' )
    
    combined[sdeltacol] = combined[sdeltacol]/1000
    combined['Previous'] = (combined['Overall Time']-combined[sdeltacol])
    
    _tmp=combined[['drivercode','Previous','Stage Rank',stcol,sdeltacol,'Overall Position','Overall Time']].replace(0,NaN).set_index('drivercode')

    if order=='overall':
        combined=combined.sort_values('Overall Position', ascending=True)
    elif order=='previous':
        combined=combined.fillna(0).sort_values('Previous', ascending=False).replace(0,NaN)
    elif order=='stage':
        combined=combined.sort_values('Stage Rank', ascending=True)
    else:
        #Default is stage order
        combined=combined.sort_values('Stage Rank', ascending=True)
    
    s2 = moreStyleDriverSplitReportBaseDataframe(_tmp, ss, caption)
    return s2

if __name__=='__main__':
    s2=getDriverStageReport(conn, rally, ss, drivercode)
    display(HTML(s2))

	Previous	Stage Rank	SS3 Time	SS3 Overall	Overall Position	Overall Time
drivercode
TÄN	3	2	00:12:48.10	6.4	1	9.4
SUN	-1.6	1	00:12:47	7.5	2	5.9
NEU	-0.8	3	00:12:48.50	6	3	5.2
LAT	-1.1	4	00:12:49.40	5.1	4	4
OGI		7	00:12:54.50		5
LAP	-6	5	00:12:51.60	2.9	6	-3.1
MIK	-5.4	6	00:12:54.30	0.2	7	-5.2
MEE	-7.7	8	00:12:59	-4.5	8	-12.2
LOE	-19.1	9	00:13:01.10	-6.6	9	-25.7
EVA	-25.8	10	00:13:03.30	-8.8	10	-34.6
TID	-22.8	11	00:13:10.90	-16.4	11	-39.2
TUO	-60.5	18	00:13:36	-41.5	22	-102
BER	-55.3	26	00:14:03	-68.5	24	-123.8
GRÖ	-53.5	29	00:14:08	-73.5	25	-127

In [101]:

def getDriverSplitsReport(conn, rally, ss, drivercode, rc='RC1', typ='overall', 
                          order=None, caption=None, bars=True, dropcols=None):
    ''' Generate dataframe report relative to a given driver on a given stage.
            order: sorts table according to: overall | previous | roadpos
            
        At the moment, the splits reporter doesn't report anything if there are no splits.
        In this case, default to a simple overal stage (without splits) reporter table.
    '''
    
    dropcols = [] if dropcols is None else dropcols
    #TO DO - this needs to fail gracefully if there are no splits
    
    #Allow the drivercode to be relative to a position
    #if drivercode=='firstonroad':
        #allow things like onroad1, onroad2?
    #    drivercode=
    #elif drivercode=='previousfirst':
        #allow things like previous1, previous2?
    #    drivercode = 
    #elif drivercode = 'stagewinner':
        #allowthings like stage1, stage2?
    #    drivercode = 
    
    
    #Get the overall results, rebased
    zz = rebased_stage_stagerank(conn,rally,ss,drivercode,rc=rc, typ=typ)
    
    #Get the road position
    roadPos = getRoadPosition(conn,rally,rc,ss)
    if roadPos.empty:
        #Should we automatically offer the stagetable report as an alternative
        return getDriverStageReport(conn, rally, ss, drivercode, rc=rc, order=order, caption=caption)
    
    #Get the splits
    splits = ssd.dbGetSplits(conn,rally,ss,rc)
    elapseddurations=ssd.getElapsedDurations(splits)
    
    #Rebase the split elapsed durations
    rebasedelapseddurations = ssd.rebaseElapsedDurations(elapseddurations, drivercode)
    rebasedelapseddurations = waypoint_rank(rebasedelapseddurations,by = 'elapsedDurationS')
    rbe = pivotRebasedElapsedDurations(rebasedelapseddurations, ss)
    
    #splitdurations are the time in each sector (time take to get from one split to the next)
    splitdurations = ssd.getSplitDurationsFromSplits(conn,rally,ss,rc)
    rebasedSplits = ssd.rebaseSplitDurations(splitdurations, drivercode)

    rbp = pivotRebasedSplits(rebasedSplits)

    #Get stage result to merge in stage position
    stageresult=sr.getEnrichedStageRank(conn, rally, rc=rc, stages=ss,typ='stage')[['drivercode','position']]

    rb2=getDriverSplitReportBaseDataframe(rbe, rbp, zz, roadPos, stageresult, ss)
    rb2 = cleanDriverSplitReportBaseDataframe(rb2, ss)
    if not bars:
        rb2=rb2.drop([c for c in rb2.columns if str(c).startswith('D')], axis=1)
        
    if ss=='SS1':
        rb2['Previous']=NaN

    if order=='overall':
        rb2=rb2.sort_values('Overall Position', ascending=True)
        #Remove the redundant column
        rb2=rb2.drop(['Overall Position'], axis=1)
        #rb2=rb2.rename(columns={'Overall Position':'{} Overall*'.format(ss)})
    elif order=='previous':
        rb2=rb2.fillna(0).sort_values('Previous', ascending=False).replace(0,NaN)
        #rb2 = rb2.rename(columns={'Previous':'Previous*'})
    elif order=='roadpos':
        rb2=rb2.sort_values('Road Position', ascending=True)
        #rb2 = rb2.rename(columns={'Road Position':'Road Position*'})
    elif order=='stage':
        rb2.sort_values('Stage Rank', ascending=True)
        #Remove the redundant column
        rb2=rb2.drop(['Stage Rank'], axis=1)
    else:
        #Default is stage order
        rb2.sort_values('Stage Rank', ascending=True)
        #Remove the redundant column
        rb2=rb2.drop(['Stage Rank'], axis=1)
        rb2 = rb2.rename(columns={'{} Overall'.format(ss):'{} Overall*'.format(ss)})

    if caption =='auto':
        caption = 'Rebased stage split times for {}{}.'.format('{}, '.format(drivercode), ss)

    dc = [c for c in dropcols if c in rb2.columns]
    rb2 = rb2.drop(columns=dc)
        
    #s = styleDriverSplitReportBaseDataframe(rb2, ss)
    s2 = moreStyleDriverSplitReportBaseDataframe(rb2,ss, caption)
    return s2

if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, ss, drivercode, rc, typ)#, caption='auto')
    display(HTML(s2))

	Road Position	Previous	1	2	3	4	SS3 Overall*	Overall Position	Overall Time	D1	D2	D3	D4	D5
drivercode
SUN	7	-1.6	2.2	5.7	5.1	8.6	7.5	2	5.9	2.2	3.5	-0.6	3.5	-1.1
TÄN	3	3	1.6	4.1	3.8	6.9	6.4	1	9.4	1.6	2.5	-0.3	3.1	-0.5
NEU	2	-0.8	0.3	3.5	3.7	6.7	6	3	5.2	0.3	3.2	0.2	3	-0.7
LAT	6	-1.1	0.8	2.9	3	6	5.1	4	4	0.8	2.1	0.1	3	-0.9
LAP	10	-6	0.7	2.8	2.2	5.1	2.9	6	-3.1	0.7	2.1	-0.6	2.9	-2.2
MIK	9	-5.4	0.8	2	1.1	2.8	0.2	7	-5.2	0.8	1.2	-0.9	1.7	-2.6
OGI	1							5
MEE	4	-7.7	-0.1	-0.7	-2.3	-1.9	-4.5	8	-12.2	-0.1	-0.6	-1.6	0.4	-2.6
LOE	5	-19.1	-1.6	-2.2	-3	-3.9	-6.6	9	-25.7	-1.6	-0.6	-0.8	-0.9	-2.7
EVA	11	-25.8	-4.3	-2.7	-4.7	-5	-8.8	10	-34.6	-4.3	1.6	-2	-0.3	-3.8
TID	8	-22.8	-5.9	-8.4	-9.2	-12.8	-16.4	11	-39.2	-5.9	-2.5	-0.8	-3.6	-3.6
TUO	14	-60.5	-10.3	-17.7	-22.7	-33.7	-41.5	22	-102	-10.3	-7.4	-5	-11	-7.8
BER	13	-55.3	-10	-18.1	-21.4	-60.4	-68.5	24	-123.8	-10	-8.1	-3.3	-39	-8.1
GRÖ	12	-53.5	-9	-15.9	-19.3	-28	-73.5	25	-127	-9	-6.9	-3.4	-8.7	-45.5

In [114]:

if __name__=='__main__':
    ss='SS19'
    d='TÄN'
    s2 = getDriverSplitsReport(conn, rally, ss, d, rc, typ)
    display(HTML(s2))

	Road Position	Previous	1	2	SS19 Overall*	Overall Position	Overall Time	D1	D2	D3
drivercode
TÄN	13					1
NEU	11	-53.1	-2	-2.9	-3.6	3	-56.7	-2	-0.9	-0.7
EVA	9	-63.7	-2.5	-4	-4.5	5	-68.2	-2.5	-1.5	-0.5
OGI	2	-1454.3	-0.5	-3.2	-4.7	16	-1459	-0.5	-2.7	-1.4
LAP	12	-48.7	-4.5	-7	-5	2	-53.7	-4.5	-2.5	2.1
MEE	8	-93.6	-2.5	-4.3	-5.2	6	-98.8	-2.5	-1.8	-0.9
LAT	4	-878.6	-4.1	-4.4	-5.8	14	-884.4	-4.1	-0.3	-1.4
LOE	7	-102.8	-2.8	-5.3	-6.9	7	-109.7	-2.8	-2.5	-1.6
MIK	10	-57.3	-3.6	-6.5	-8.1	4	-65.4	-3.6	-2.9	-1.6
SUN	3	-976.2	-2.6	-5.7	-8.3	15	-984.5	-2.6	-3.1	-2.6
TID	6	-204.7	-8.8	-11.4	-13	8	-217.7	-8.8	-2.6	-1.6
GRÖ	1	-2529.6	-9	-13.4	-17.5	17	-2547.1	-9	-4.4	-4.1
BER	5	-784.4	-13.3	-17.9	-20.9	13	-805.3	-13.3	-4.6	-3
TUO	14	-480.5	-13.4	-17.9	-20.9	9	-501.4	-13.4	-4.4	-3

In [115]:

if __name__=='__main__':
    from dakar_utils import getTablePNG
    getTablePNG(s2, fnstub='stage_{}_{}'.format(ss,d),scale_factor=5)

In [48]:

if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS10', 'PAD', rc, typ, 'roadpos')
    display(HTML(s2))

	Road Position	Stage Rank	Overall Position
drivercode
GRÖ	1	10	50
OGI	2	1	43
LAT	3	5	27
BER	4	20	26
TUO	5	13	16
TID	6	11	9
MEE	7	4	6
NEU	8	3	5
LOE	9	9	7
LAP	10	6	4
EVA	11	8	3
MIK	12	7	2
TÄN	13	2	1
SUN	14	30	8

In [124]:

if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS20', 'TÄN', rc, typ,'stage')
    display(HTML(s2))

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-124-ecf828d653a6> in <module>
      1 if __name__=='__main__':
----> 2     s2 = getDriverSplitsReport(conn, rally, 'SS20', 'TÄN', rc, typ,'stage')
      3     display(HTML(s2))

<ipython-input-119-cae16c32d3b8> in getDriverSplitsReport(conn, rally, ss, drivercode, rc, typ, order, caption, bars)
     28     if roadPos.empty:
     29         #Should we automatically offer the stagetable report as an alternative
---> 30         return getDriverStageReport(conn, rally, ss, drivercode, order=order, caption=caption)
     31 
     32     #Get the splits

<ipython-input-118-22ae797e3ff1> in getDriverStageReport(conn, rally, ss, drivercode, rc, typ, order, caption)
     11 
     12     #Get stage result - does it need to be enriched?
---> 13     stageresult=sr.getEnrichedStageRank(conn, rally, stages=ss,typ='stage')
     14 
     15     stagerebaser = stageresult[stageresult['drivercode']==drivercode][['code','elapsedDurationMs']].set_index('code').to_dict(orient='dict')['elapsedDurationMs']

~/Documents/GitHub/WRC_sketches/doodles/Charts - Stage Results.ipynb in getEnrichedStageRank(conn, rally, rc, typ, stages)

~/Documents/GitHub/WRC_sketches/doodles/Charts - Stage Results.ipynb in dbGetCombinedStageRanks(conn, rally, rc, typ, stages)

~/Documents/GitHub/WRC_sketches/doodles/Charts - Stage Results.ipynb in dbGetStageRank(conn, rally, rc, typ, stages)

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in rank(self, method, ascending, na_option, pct, axis)
   1844         return self._cython_transform('rank', numeric_only=False,
   1845                                       ties_method=method, ascending=ascending,
-> 1846                                       na_option=na_option, pct=pct, axis=axis)
   1847 
   1848     @Substitution(name='groupby')

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _cython_transform(self, how, numeric_only, **kwargs)
    805             try:
    806                 result, names = self.grouper.transform(obj.values, how,
--> 807                                                        **kwargs)
    808             except NotImplementedError:
    809                 continue

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in transform(self, values, how, axis, **kwargs)
    549 
    550     def transform(self, values, how, axis=0, **kwargs):
--> 551         return self._cython_operation('transform', values, how, axis, **kwargs)
    552 
    553     def _aggregate(self, result, counts, values, comp_ids, agg_func,

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in _cython_operation(self, kind, values, how, axis, min_count, **kwargs)
    509             result = self._transform(
    510                 result, values, labels, func, is_numeric, is_datetimelike,
--> 511                 **kwargs)
    512 
    513         if is_integer_dtype(result) and not is_datetimelike:

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in _transform(self, result, values, comp_ids, transform_func, is_numeric, is_datetimelike, **kwargs)
    582                                comp_ids, is_datetimelike, **kwargs)
    583         else:
--> 584             transform_func(result, values, comp_ids, is_datetimelike, **kwargs)
    585 
    586         return result

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in wrapper(*args, **kwargs)
    395 
    396                 def wrapper(*args, **kwargs):
--> 397                     return f(afunc, *args, **kwargs)
    398 
    399                 # need to curry our sub-function

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in <lambda>(func, a, b, c, d, **kwargs)
    346                     kwargs.get('ascending', True),
    347                     kwargs.get('pct', False),
--> 348                     kwargs.get('na_option', 'keep')
    349                 )
    350             }

TypeError: 'NoneType' object is not callable

In [125]:

if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS18', 'OGI', rc, typ)
    display(HTML(s2))

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-125-04d69b1892a5> in <module>
      1 if __name__=='__main__':
----> 2     s2 = getDriverSplitsReport(conn, rally, 'SS18', 'OGI', rc, typ)
      3     display(HTML(s2))

<ipython-input-119-cae16c32d3b8> in getDriverSplitsReport(conn, rally, ss, drivercode, rc, typ, order, caption, bars)
     28     if roadPos.empty:
     29         #Should we automatically offer the stagetable report as an alternative
---> 30         return getDriverStageReport(conn, rally, ss, drivercode, order=order, caption=caption)
     31 
     32     #Get the splits

<ipython-input-118-22ae797e3ff1> in getDriverStageReport(conn, rally, ss, drivercode, rc, typ, order, caption)
     11 
     12     #Get stage result - does it need to be enriched?
---> 13     stageresult=sr.getEnrichedStageRank(conn, rally, stages=ss,typ='stage')
     14 
     15     stagerebaser = stageresult[stageresult['drivercode']==drivercode][['code','elapsedDurationMs']].set_index('code').to_dict(orient='dict')['elapsedDurationMs']

~/Documents/GitHub/WRC_sketches/doodles/Charts - Stage Results.ipynb in getEnrichedStageRank(conn, rally, rc, typ, stages)

~/Documents/GitHub/WRC_sketches/doodles/Charts - Stage Results.ipynb in dbGetCombinedStageRanks(conn, rally, rc, typ, stages)

~/Documents/GitHub/WRC_sketches/doodles/Charts - Stage Results.ipynb in dbGetStageRank(conn, rally, rc, typ, stages)

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in rank(self, method, ascending, na_option, pct, axis)
   1844         return self._cython_transform('rank', numeric_only=False,
   1845                                       ties_method=method, ascending=ascending,
-> 1846                                       na_option=na_option, pct=pct, axis=axis)
   1847 
   1848     @Substitution(name='groupby')

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _cython_transform(self, how, numeric_only, **kwargs)
    805             try:
    806                 result, names = self.grouper.transform(obj.values, how,
--> 807                                                        **kwargs)
    808             except NotImplementedError:
    809                 continue

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in transform(self, values, how, axis, **kwargs)
    549 
    550     def transform(self, values, how, axis=0, **kwargs):
--> 551         return self._cython_operation('transform', values, how, axis, **kwargs)
    552 
    553     def _aggregate(self, result, counts, values, comp_ids, agg_func,

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in _cython_operation(self, kind, values, how, axis, min_count, **kwargs)
    509             result = self._transform(
    510                 result, values, labels, func, is_numeric, is_datetimelike,
--> 511                 **kwargs)
    512 
    513         if is_integer_dtype(result) and not is_datetimelike:

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in _transform(self, result, values, comp_ids, transform_func, is_numeric, is_datetimelike, **kwargs)
    582                                comp_ids, is_datetimelike, **kwargs)
    583         else:
--> 584             transform_func(result, values, comp_ids, is_datetimelike, **kwargs)
    585 
    586         return result

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in wrapper(*args, **kwargs)
    395 
    396                 def wrapper(*args, **kwargs):
--> 397                     return f(afunc, *args, **kwargs)
    398 
    399                 # need to curry our sub-function

/usr/local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in <lambda>(func, a, b, c, d, **kwargs)
    346                     kwargs.get('ascending', True),
    347                     kwargs.get('pct', False),
--> 348                     kwargs.get('na_option', 'keep')
    349                 )
    350             }

TypeError: 'NoneType' object is not callable

Problem with the bars is that the range is different in each column; ideally we want the same range in each column; could do this with two dummy rows to force max and min values?

In [89]:

if __name__=='__main__':
    #Example for pandas issue https://github.com/pandas-dev/pandas/issues/21526
    import pandas as pd
    import numpy as np
    
    df=pd.DataFrame({'x1':list(np.random.randint(-10,10,size=10))+[-500,1000, -1000],
               'y1':list(np.random.randint(-5,5,size=13)),'y2':list(np.random.randint(-2,3,size=13)) })
    
    display(df.style.bar( align='zero', color=[ '#5fba7d','#d65f5f']))

	x1	y1	y2
0	-4	2	-1
1	-2	0	1
2	1	4	2
3	7	-3	1
4	-7	0	0
5	1	-3	-2
6	-4	-3	-1
7	5	-4	1
8	3	1	0
9	9	-4	1
10	-500	-4	2
11	1000	1	1
12	-1000	1	-2

In [90]:

if __name__=='__main__':
    #clip lets us set a max limiting range although it means we lose the actual value?
    df['x2']= df['x1'].clip(upper=10, lower=-10)
    display(df.style.bar( align='zero', color=[ '#d65f5f','#5fba7d']))

	x1	y1	y2	x2
0	-4	2	-1	-4
1	-2	0	1	-2
2	1	4	2	1
3	7	-3	1	7
4	-7	0	0	-7
5	1	-3	-2	1
6	-4	-3	-1	-4
7	5	-4	1	5
8	3	1	0	3
9	9	-4	1	9
10	-500	-4	2	-10
11	1000	1	1	10
12	-1000	1	-2	-10

In [91]:

if __name__=='__main__':
    #for pandas 0.24 ? https://github.com/pandas-dev/pandas/pull/21548
    df['x2']= df['x1'].clip(upper=10, lower=-10)
    #Set axis=None for table wide range?
    #display(df.style.bar( align='zero', axis=None, color=[ '#d65f5f','#5fba7d']))
    

In [ ]: