Notebook
#This is a literal approach and is DEPRECATED def getRally_URLs(results_main_url=None): if results_main_url is None: results_main_url='http://www.wrc.com/en/wrc/results/wales/stage-times/page/416-238---.html#' html=requests.get(results_main_url) soup=BeautifulSoup(html.content, "html5lib") #BeautifulSoup has a routine - find_all() - that will find all the HTML tags of a particular sort #Links are represented in HTML pages in the form link text #Grab all the (anchor) tags... souplist=soup.findAll("li",{'class':'flag'}) items={} for s in souplist: href=s.find('a')['href'] if href: title=s.find('img')['title'] title = 'Monaco' if title == 'Monte Carlo' else title items[title]=href return items def listRallies(display=True, **kwargs): rallyURLs = getRally_URLs(**kwargs) if display: print( ', '.join(rallyURLs.keys()) ) else: return getRallyIDs
#This appproach makes literal calls to an original HTML page and is DEPRECATED def _getRallyID(rallyURL): html=requests.get(rallyURL) m = re.search("var rallyId = '(.+?)'", html.text) if m: return m.group(1) return None def getRallyIDs(rally=None,results_main_url=None): rallyids={} items = getRally_URLs(results_main_url) #if we know the rally, just get that one.. if rally in items: items = {rally:items[rally]} for item in items: rallyids[item] = _getRallyID(items[item]) return rallyids
def _stage_iterator(meta, stub): ''' Iterate through a list of stageId values and get requested resource. ''' meta2={'rallyId':meta['rallyId']} df = pd.DataFrame() for stageId in meta['stages']: meta2['stageId']=stageId _json=requests.get( url_base.format(stub=stubs[stub].format(**meta2) ) ).json() _df = json_normalize(_json) _df['stageId'] = stageId df = pd.concat([df, _df], sort=False) return df
def championship_tablesOLD(champ_class=None, champ_typ=None): ''' Get all championship tables in a particular championship and / or class. ''' #if championship is None then get all championship_lookup = pd.DataFrame() championship_entries_all = {} championship_rounds = pd.DataFrame() championship_events = pd.DataFrame() championship_results = pd.DataFrame() championship_codes = _get_championship_codes() _class_codes = championship_codes.keys() if champ_class is None else [champ_class] for champClass in _class_codes: _champ_typ = championship_codes[champClass].keys() if champ_typ is None else [champ_typ] for champType in _champ_typ: if champType not in championship_entries_all: championship_entries_all[champType] = pd.DataFrame() champ_num = championship_codes[champClass][champType] meta2={'championshipId': champ_num} championship_url = url_base.format(stub=stubs['championship'].format(**meta2) ) print(championship_url) championship_json=requests.get( championship_url ).json() if championship_json: _championship_lookup = json_normalize(championship_json).drop(['championshipEntries','championshipRounds'], axis=1) _championship_lookup['_codeClass'] = champClass _championship_lookup['_codeTyp'] = champType championship_lookup = pd.concat([championship_lookup,_championship_lookup],sort=True) championships={} championship_dict = _championship_lookup.to_dict() championships[champ_num] = {c:championship_dict[c][0] for c in championship_dict} renamer={c.replace('Description',''):championships[champ_num][c] for c in championships[champ_num] if c.startswith('field')} _championship_entries = json_normalize(championship_json,['championshipEntries'] ) _championship_entries = _championship_entries.rename(columns=renamer) _championship_entries = _championship_entries[[c for c in _championship_entries.columns if c!='']] #pd.concat sort=False to retain current behaviour championship_entries_all[champType] = pd.concat([championship_entries_all[champType],_championship_entries],sort=False) _championship_rounds = json_normalize(championship_json,['championshipRounds'] ).drop('event', axis=1) championship_rounds = pd.concat([championship_rounds,_championship_rounds],sort=False).drop_duplicates() _events_json = json_normalize(championship_json,['championshipRounds' ])['event'] _championship_events = json_normalize(_events_json) #TO DO: Season id -> https://www.wrc.com/service/sasCacheApi.php?route=seasons/ # TO DO: list of championships: eg https://www.wrc.com/service/sasCacheApi.php?route=seasons/4 #Below also available as eg https://www.wrc.com/service/sasCacheApi.php?route=seasons/4/championships/24 championship_events = pd.concat([championship_events,_championship_events],sort=False).drop_duplicates() _championship_results = _get_single_json_table(meta2, 'championship_results') championship_results = pd.concat([championship_results, _championship_results],sort=False) for k in championship_entries_all: championship_entries_all[k].reset_index(drop=True) if k in ['Driver', 'Co-Driver']: championship_entries_all[k] = championship_entries_all[k].rename(columns={'TyreManufacturer':'ManufacturerTyre'}) return championship_lookup.reset_index(drop=True), \ championship_results.reset_index(drop=True), \ championship_entries_all, \ championship_rounds.reset_index(drop=True), \ championship_events.reset_index(drop=True)
_meta=set_rallyId2(name, year) _,_,_,_s,_ = getItinerary(_meta) _meta