#!/usr/bin/env python # coding: utf-8 # In[1]: """ conda install pandas numpy seaborn folium basemap pip install ckanapi cufflinks """ get_ipython().run_line_magic('matplotlib', 'nbagg') from matplotlib import pyplot as plt plt.rcParams.update({'figure.max_open_warning': 0}) import matplotlib.ticker as tick from mpl_toolkits.basemap import Basemap import folium import pandas as pd from ckanapi import RemoteCKAN import cufflinks import seaborn graph_figsize = (10,6) import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # # Data Science with Python # # ## AKA How am I expected to follow that? # # Andrew Bolster # * [bolster.online](https://bolster.online) # * Tweets [@bolster](https://twitter.bolster.online) # * GitHubs at [andrewbolster](https://github.com/andrewbolster) # * Works at [Sensum Co](https://sensum.co) **We're hiring DS/DevOps** # * Plays at [Farset Labs](https://www.farsetlabs.org.uk) # * **[THIS NOTEBOOK IS AT present.bolster.online](http://present.bolster.online)** # * [Also available in source at presentgh.bolster.online](http://presentgh.bolster.online) # # ## Points of Order # ### Soz Colin, Python's older # > In February 1991, van Rossum published the code (labeled version 0.9.0) to alt.sources - [Wikipedia](https://en.wikipedia.org/wiki/History_of_Python) # # > Python reached version 1.0 in January 1994. - [Wikipedia](https://en.wikipedia.org/wiki/History_of_Python) # # _PS Don't even think about using anything < Python 3.5 for new projects, it's awesome_ # # ### Python and R share a massive amount # * including a `ggplot` mapping # * `pandas` in particular basically lifted the DataFrame structure # In[2]: # Pandas does reads df = pd.read_csv("https://www.jumpingrivers.com/data/movie.txt") # It Grap display(df.head()) # In[3]: df.Rating.mean() # it means # In[4]: f,ax = plt.subplots() df.Rating.plot.hist(ax=ax) # it histz # In[5]: df.describe() # it describz # # * `pip` -> `cran` # * 114304 packages with tests, documentation, and binary bundles (`wheels` so if you don't want to build, you can bin) # * the `anaconda` distribution is even nicer and my preferred versionn # ![](so-2017.png) # ![](http://kgullikson88.github.io/blog/Images/PypiGraph.png) # ![](http://kgullikson88.github.io/blog/Figures/DegreeDistribution.png) # # Zee Plan # * (Honorary Mention) [CKANApi](https://github.com/ckan/ckanapi) # * Pandas # * Seaborn # * ~~Patsy *(If I don't run massively over time)*~~ I ran massively over time and got distracted by # * Cufflinks # * Basemap # ## Pandas # * Data Import / Export (Not really shown but ask me about it) # * *Actually Decent* Datetime operation # * Boiled in Stats and Grouping # ## Seaborn # * Pretty Pretty Graphs # * Fantastic "I have no idea what I'm looking at" explorations # ## ~~Patsy~~ # * R-like statistical modelling building on `statsmodels` and `pandas` # * *IANASIJP1OTV* - I Am Not A Statisician I Just Play 1 On TV # # ## Cufflinks # * "Prettier, Interactive, Seaborn" - Me 2016 # # ## Basemap # * Everything looks better on a map # # Zee Data # ## DF - [Contracts Awarded by Central Procurement Directorate](https://www.opendatani.gov.uk/dataset/contracts-awarded-by-central-procurement-directorate-in-the-2016-2017-year) # # From [OpenDataNI](https://www.opendatani.gov.uk), released under the [UK OGL v3](http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/) # # ## `ckanapi` # * Wrapper to talk to CKAN datasets 'easily' # * I discovered this 3 days ago so have only scratched the surface... # ## Getting a list of dataset groups # In[6]: ua = 'ItsBolster/29.5 (+http://farsetlab.org.uk/)' demo = RemoteCKAN('https://www.opendatani.gov.uk/', user_agent=ua) groups = demo.action.group_list(id='data-explorer') print(groups) # ## Getting stats on Datasets per group # In[7]: group_data = demo.action.group_list(id='data-explorer', all_fields=True) group_data # JSON is ugly as sin # `pandas` can help with that # In[8]: pd.DataFrame(group_data).head() # In[9]: pd.DataFrame(group_data).keys() # ## Basic Graphing with `matplotlib` and `pandas` # In[10]: df = pd.DataFrame(group_data).set_index('title')['package_count'].sort_values() df # In[11]: f,ax = plt.subplots(figsize=graph_figsize) _=df.plot.barh(ax=ax,rot=45, title='Number of Open Datasets by Category') # ## Acquiring Datastores from OpenDataNI directly # I've not explored the datastore query language in `ckanapi` so these are magic numbers taken from the OpenDataNI Website # In[12]: # Contracts awarded by CPD Construction Division - construction_resource_id = '6cc96ec3-9ec5-426d-9e5d-f67a9423d0ab' # Contracts awarded by CPD Supplies and Services supplies_resource_id = 'd5b993f2-4f6f-4e94-9f45-c77b98201438' # Lets start with the Construction Division # (Smaller dataset (71)) # # Also: Fun Python Language Construct: # ### Generator Expressions # * Lazy evaluation of iterables # In[13]: def dataset_generator(resource_id): """A Generator that yields records from a given dataset resource id""" offset=0 while True: datastore_page = demo.action.datastore_search(resource_id=resource_id, offset=offset) if not datastore_page['records']: raise StopIteration for record in datastore_page['records']: yield record ## Execution is passed back to the caller here offset+=1 # In[14]: df = pd.DataFrame.from_records(dataset_generator(construction_resource_id)) df.head() # ## How much? # `ckanapi` unfortunately doesn't do any data-type introspection so everything appears as strings initially # # Easily fixed with `pandas` # In[15]: # Floatify monies df['Awarded value']=df['Awarded value'].astype(float) df['Awarded value'].mean() # Pandas has built in stats capability to easily describe columns # In[16]: (df['Awarded value']/1000).describe() # NOTE in £k # In[17]: f,ax = plt.subplots(figsize=graph_figsize) _=(df['Awarded value']/1000).plot.hist(ax=ax) # In[18]: f,ax = plt.subplots(figsize=graph_figsize) _=(df['Awarded value']/1000).plot.hist(ax=ax, logy=True) # In[19]: f,ax = plt.subplots(figsize=graph_figsize) _=(df['Awarded value']/1000).plot.hist(ax=ax, logy=True, cumulative=True) # Open Datasets (esp. Gov) notoriously bad at "Coding" entries, so end up with variations in: # * Case (NI vs ni, Ltd vs LTD, WITHHELD vs Withheld) # * Abbreviations (NI vs Northern Ireland) # In[20]: # Note withheld, RPS Ireland, WYG company_list = df['Company name'].unique() print("There are {} unique company names".format(len(company_list)), sorted(company_list)) # So we perform some (greedy, niave, probably broken) input cleaning # In[21]: text_fields = ['City','Basis for DAC Award','Company name','Contract awarded by','Title','Street'] interesting_fields = None for text_field in text_fields: df[text_field]=df[text_field].str.lower().str.strip() df[text_field]=df[text_field].str.replace(' ltd','') df[text_field]=df[text_field].str.replace(' uk','') df[text_field]=df[text_field].str.replace(' \(ni\)','') company_list = df['Company name'].unique() print("There are now {} unique company names".format(len(company_list)), sorted(company_list)) # In some cases, there are columns you either don't care about or doesn't contain any meaningful data for analysis; so drop it # In[22]: df['Basis for DAC Award'].describe() # In[23]: df.drop('Basis for DAC Award', axis=1, inplace=True) # ## Who gave out what? # As well as buggering the coding for `Company name`, department names are also buggered with abbrevs. and inconsistencies # In[24]: dept_list = df['Contract awarded by'].unique() print("There are {} unique dept. names".format(len(dept_list)), sorted(dept_list)) # In[25]: df['Contract awarded by'].str.endswith(' ni').mean() # ## How do you solve a problem like gov. depts. # * Department names are long, ugly and inconsistent # * Department abbreviations are gobbledegook and inconsistent too (DARD/DAERA) # * Set "Optimistic" conversions between abbrevs. and depts. # In[26]: df['Contract awarded by']=df['Contract awarded by'].str.replace(' ni','') # Edge case for misues of DE/DENI df.replace(to_replace={'Contract awarded by': {'deni':'de'}}, inplace=True) department_replacements = { 'department for communities': 'dfc', 'department for infrastructure': 'dfi', 'department for infrastructure transport': 'dfi', 'department for employment and learning': 'dfe/dfc', 'department for the economy': 'dfe', 'department of agriculture environment and rural affairs': 'daera', 'department of education': 'de', 'department of finance': 'df', 'department of health': 'dh', 'department of justice': 'doj', 'department of justice courts & tribunals service': 'doj', 'intertrade ireland': 'iti', 'invest northern ireland': 'ini', 'northern ireland office': 'nio', 'northern ireland prison service': 'nips', 'ni public health agency': 'pha', 'police service of northern ireland': 'psni', 'the executive office': 'teo' } df.replace(to_replace={'Contract awarded by':department_replacements})['Contract awarded by'].unique() # In[27]: df.replace(to_replace={'Contract awarded by':department_replacements}, inplace=True) df.head() # In[28]: construction_df = df.copy() construction_df['Division'] = 'construction' # In[29]: construction_df.head() # # And what about services? # In[30]: df = pd.DataFrame.from_records(dataset_generator(supplies_resource_id)) df.head() # Helpful renaming of the "Basis for DAC Award Column", well done DF... # ## Everything old is new again # Do it all over again, and fix column naming inconsistency # In[31]: df.rename(columns={'Basis of Award':'Basis for DAC Award'}, inplace=True) text_fields = ['City','Basis for DAC Award','Company name','Contract awarded by','Title','Street'] interesting_fields = None for text_field in text_fields: df[text_field]=df[text_field].str.lower().str.strip() df[text_field]=df[text_field].str.replace(' ltd','') df[text_field]=df[text_field].str.replace(' uk','') df[text_field]=df[text_field].str.replace(' \(ni\)','') df['Contract awarded by']=df['Contract awarded by'].str.replace(' ni','') # Drop ni df.replace(to_replace={'Contract awarded by': {'deni':'de'}}, inplace=True) # Edge case for misues of DE/DENI df.replace(to_replace={'Contract awarded by':department_replacements}, inplace=True) # Depts > Abbrevs # In[32]: df.head() # WTF is 1*? # ## WTF is 1*? # In[33]: # Boolean operators yield Series objects! (df['Awarded value']=='1*').mean() # 11.7% of Service contracts are 'withheld' as per the guidance on the ODNI page: # # > For certain types of contract it is difficult to predict the estimated contract value, so this has been recorded as 1*. # # Yey for commuinicating the meaning, boo for hiding and hand flapping. # # _Plan: Replace 1* with `None` and hope pandas can cope (it can)_ # # _Cynical Hypothesis: 1*'s are more likely to be direct (unadvertised) awards_ # ## WTF is DAC # [Direct Award Contract](https://www.finance-ni.gov.uk/publications/cpd-direct-award-contract-form-dac), or "We asked for these guys and noone else" # In[34]: df['Basis for DAC Award'].describe() # Lets keep those, but what do they look like? # In[35]: df[df['Basis for DAC Award'].str.len()>0]['Basis for DAC Award'].unique() # regulation32 basically means "it was a direct tender". Interesting but not relevant # ### Sidebar: WTF was **extreme urgency**? # In[36]: urgency = df[df['Basis for DAC Award'].str.contains('extreme').fillna(False)][['Awarded value','Company name','Title','Contract awarded by','Date of Award']] urgency.iloc[0],urgency.iloc[0]['Title'] # So M$FT got £2.5m to fix welfare reform # > “as for reasons of extreme urgency brought about by events unforeseeable by the department.' # ## Question: How many 1*'s were Direct Tenders and is this different from the overall ratio # In[37]: direct_awards_df = df[df['Basis for DAC Award'].str.len()>0] direct_awards_df.shape[0], (direct_awards_df['Awarded value']=='1*').mean() # _Cynical Hypothesis_ was *wrong*; if anything DAC tenders are _slightly less_ likely to be withheld # # Nothing to be gained by keeping the 1* value so we replace it with `None` # In[38]: df.replace(to_replace={'Awarded value':{'1*':None}}, inplace=True) df.head() # Now we can follow the money # ## Following the Money # Need to get rid of £ and , so we can safely convert the string to a float # In[39]: df['Awarded value']=df['Awarded value'].replace('[£,]','', regex=True).astype(float) df.head() # In[40]: df.sort_values(by='Awarded value', ascending=False).head() # In[41]: services_df = df.copy() services_df['Division'] = 'services' # # Graphs : Still on Services Only # In[42]: f,ax = plt.subplots(figsize = graph_figsize) _=services_df.groupby('Contract awarded by')['Awarded value'].sum().sort_values().plot.pie(ax=ax) # In[43]: def make_autopct(total): def my_autopct(pct): val = int(round(pct*total/100.0)) return '{p:.1f}% (£{v:,.1f}m)'.format(p=pct,v=val/1000000) return my_autopct # In[44]: f,ax = plt.subplots(figsize = graph_figsize) _=services_df.groupby('Contract awarded by')['Awarded value'].sum().sort_values().plot.pie( ax=ax, autopct=make_autopct(services_df['Awarded value'].sum()), title='Who Gave Out How Much?[Services]' ) # In[45]: f,ax = plt.subplots(figsize = graph_figsize) _=services_df.groupby('Company name')['Awarded value'].sum().sort_values().plot.pie( ax=ax, autopct=make_autopct(services_df['Awarded value'].sum()), title='Who Got How Much?[Services]' ) # ## Graphing: Construction # Same as before but on the Construction Dataset # In[46]: f,ax = plt.subplots(figsize = graph_figsize) _=construction_df.groupby('Contract awarded by')['Awarded value'].sum().sort_values().plot.pie( ax=ax, autopct=make_autopct(construction_df['Awarded value'].sum()), title='Who Gave Out How Much?[Construction]' ) # In[47]: f,ax = plt.subplots(figsize = graph_figsize) _=construction_df.groupby('Company name')['Awarded value'].sum().sort_values().plot.pie( ax=ax,autopct=make_autopct(construction_df['Awarded value'].sum()), title='Who Got How Much?[Construction]' ) # # Combining Datasets for 50% profit # There are always risks in combining datasets # * Inner/Outer joining # * Column mismatches # * Duplicate Values # # Since we've already walked through each dataset so it *should* make sense and be common across, we can concatenate the datasets together # # _Need to be careful that we end up with the same number of records in total_ # In[48]: [s[0] for s in [services_df.shape, construction_df.shape, pd.concat([services_df,construction_df]).shape] ] # In[49]: df = pd.concat([services_df,construction_df]) df.head() # ## Once more with feeling # Plotting everything together # In[50]: f,ax = plt.subplots(figsize = graph_figsize) _=df.groupby('Contract awarded by')['Awarded value'].sum().sort_values().plot.pie( ax=ax, autopct=make_autopct(df['Awarded value'].sum()), title='Who Gave Out How Much?[Combined]' ) #Note NIPS was the biggest Construction buyer # In[51]: f,ax = plt.subplots(figsize = graph_figsize) _=df.groupby('Company name')['Awarded value'].sum().sort_values().plot.pie( ax=ax, autopct=make_autopct(df['Awarded value'].sum()), title='Who Got How Much?[Combined]' ) # In[52]: f,ax = plt.subplots(figsize = graph_figsize) _=df.groupby('Division')['Awarded value'].sum().sort_values().plot.pie( ax=ax, autopct=make_autopct(df['Awarded value'].sum()), title='What division got what?[Combined]' ) # # Timeseries for fun and transparancy # AKA `pandas` is magic # In[53]: df.dtypes # In[54]: for c in ['Date of Award','Contract end date']: df[c]=pd.to_datetime(df[c]) # In[55]: f,ax = plt.subplots(figsize=graph_figsize) ax.yaxis.set_major_formatter(tick.FuncFormatter(lambda x,y: '£{:,d}M'.format(int(x/10**6)))) _=df.groupby(pd.TimeGrouper(key='Date of Award',freq='M'))['Awarded value'].sum().plot(ax=ax) # In[56]: def _x_month_formatter(x,y): return '{year}-{month}'.format(year=x.year, month=x.month) _df_month=df.groupby([pd.TimeGrouper(key='Date of Award',freq='M'),'Division'])['Awarded value'].sum().unstack() _df_month # In[57]: f,ax = plt.subplots(figsize=graph_figsize) _df_month.plot.bar(ax=ax, stacked=True, cmap=plt.get_cmap('tab20')) ax.yaxis.set_major_formatter(tick.FuncFormatter(lambda x,y: '£{:,d}M'.format(int(x/10**6)))) ax.xaxis.set_major_formatter(tick.FixedFormatter(_df_month.index.map(lambda d: '{}-{}'.format(d.year,d.month)))) plt.setp( ax.xaxis.get_majorticklabels(), rotation=60 ) _=ax.legend(loc='upper center',ncol=1, fancybox=True, shadow=True) # In[58]: _df_month=df.groupby([pd.TimeGrouper(key='Date of Award',freq='M'),'Contract awarded by'])['Awarded value'].sum().unstack() f,ax = plt.subplots(figsize=graph_figsize) _df_month.plot.bar(ax=ax, stacked=True, cmap=plt.get_cmap('tab20')) ax.yaxis.set_major_formatter(tick.FuncFormatter(lambda x,y: '£{:,d}M'.format(int(x/10**6)))) ax.xaxis.set_major_formatter(tick.FixedFormatter(_df_month.index.map(lambda d: '{}-{}'.format(d.year,d.month)))) plt.setp( ax.xaxis.get_majorticklabels(), rotation=60 ) _=ax.legend(loc='upper center',ncol=2, fancybox=True, shadow=True) # ## What a total mess! # # Too many varying tiny department names make graphs ugly as hell # # Create temporary mapping for bottom 30%ile # In[59]: _contract_sums = df.groupby('Contract awarded by')['Awarded value'].sum().sort_values(ascending=False) _contract_sums.head() # In[60]: _contract_sums.cumsum().head() # In[82]: display(_contract_sums.cumsum().quantile(0.3)) in_bottom_tertile=_contract_sums.cumsum()>_contract_sums.cumsum().quantile(0.3) in_bottom_tertile # In[62]: df_tertile = df.copy() df_tertile['Contract awarded by'] = df_tertile['Contract awarded by'].apply( lambda x: "other" if in_bottom_tertile.loc[x] else x ) # In[63]: _df_month=df_tertile.groupby([pd.TimeGrouper(key='Date of Award',freq='M'),'Contract awarded by'])['Awarded value'].sum().unstack() f,ax = plt.subplots(figsize=graph_figsize) _df_month.plot.bar(ax=ax, stacked=True, cmap=plt.get_cmap('tab20')) ax.yaxis.set_major_formatter(tick.FuncFormatter(lambda x,y: '£{:,d}M'.format(int(x/10**6)))) ax.xaxis.set_major_formatter(tick.FixedFormatter(_df_month.index.map(lambda d: '{}-{}'.format(d.year,d.month)))) plt.setp( ax.xaxis.get_majorticklabels(), rotation=60 ) _=ax.legend(loc='upper center',ncol=2, fancybox=True, shadow=True) # ## BONUS ROUND: `cufflinks` # Basically Magic that binds a new `iplot` method to `pandas` objects to call the [plot.ly](https://plot.ly) interface. # In[83]: df_tertile.groupby([pd.TimeGrouper(key='Date of Award',freq='M'),'Contract awarded by'])['Awarded value']\ .sum().unstack().iplot( kind='bar', barmode='stack', filename='blug-stackedbar-aug-17',world_readable=True ) # In[84]: df_tertile.groupby([pd.TimeGrouper(key='Date of Award',freq='M'),'Contract awarded by'])['Awarded value']\ .sum().unstack().iplot( kind='heatmap',colorscale='spectral', filename='blug-heatmap-aug-17',world_readable=True ) # # `seaborn` # > Seaborn is a Python visualization library based on matplotlib. It provides a high-level interface for drawing attractive statistical graphics. # And it has far more power than I would ever have time to explore # In[66]: f,ax = plt.subplots(figsize=graph_figsize) _=seaborn.stripplot( data=df_tertile, ax=ax, x="Contract awarded by", y="Awarded value" ) # In[67]: f,ax = plt.subplots(figsize=graph_figsize) _=seaborn.swarmplot( data=df_tertile, ax=ax, x="Contract awarded by", y="Awarded value" ) # In[68]: f,ax = plt.subplots(figsize=graph_figsize) seaborn.swarmplot( data=df_tertile, ax=ax, x="Contract awarded by", y="Awarded value" ) ax.set_yscale('log') # In[69]: f,ax = plt.subplots(figsize=graph_figsize) seaborn.boxplot( data=df_tertile, ax=ax, x="Contract awarded by", y="Awarded value", hue="Division" ) ax.set_yscale('log') # In[70]: f,ax = plt.subplots(figsize=graph_figsize) seaborn.violinplot( data=df_tertile, ax=ax, x="Contract awarded by", y="Awarded value", hue="Division", split=True, inner="stick" ) # In[71]: df_tertile['Month of Award'] = df_tertile['Date of Award'].dt.month # In[72]: f,ax = plt.subplots(figsize=graph_figsize) seaborn.barplot(data=df_tertile, ax=ax, y="Awarded value", x="Month of Award", hue="Division", ) # In[73]: f,ax = plt.subplots(figsize=graph_figsize) seaborn.countplot(data=df_tertile, ax=ax, x="Month of Award", hue="Division", ) # In[74]: f,ax = plt.subplots(figsize=graph_figsize) df_tertile['Contract duration'] = (df_tertile['Contract end date'] - df_tertile['Date of Award']).dt.days seaborn.regplot(data=df_tertile, x='Awarded value', y='Contract duration', ax=ax) # In[75]: g=seaborn.jointplot(data = df_tertile.groupby('Contract awarded by')['Awarded value'].agg([np.mean,np.sum,len]).reset_index(), x='mean', y='len', kind='kde') g.plot_joint(plt.scatter, c="w", s=30, linewidth=1, marker="+") # In[76]: for c in ['Latitude','Longitude']: df[c]=df[c].astype(float) df_with_geo=df[df[['Latitude','Longitude']].abs().sum(axis=1)>0] centre = df_with_geo[['Latitude','Longitude']].mean() mins = df_with_geo[['Latitude','Longitude']].min() maxs = df_with_geo[['Latitude','Longitude']].max() def get_marker_colour(logmon): if logmon < 5.0: return ('go') elif logmon < 6.0: return ('yo') else: return ('ro') def _plot(r): x,y = m(r['Longitude'],r['Latitude']) if r['Awarded value'] is None: m_size=1 else: m_size = np.log10(r['Awarded value']) m.plot(x,y,get_marker_colour(m_size), markersize=m_size) # In[77]: f,ax = plt.subplots(figsize=graph_figsize) x=np.log10(df_with_geo['Awarded value'].fillna(1).values) hist, bins = np.histogram(x, bins=50) width = 0.7 * (bins[1] - bins[0]) center = (bins[:-1] + bins[1:]) / 2 ax.bar(center, hist, align='center', width=width) # In[78]: f,ax = plt.subplots() m = Basemap(projection='merc', lon_0=centre['Longitude'], lat_0=centre['Latitude'], llcrnrlon=mins['Longitude'], llcrnrlat=mins['Latitude'], urcrnrlon=maxs['Longitude'], urcrnrlat=maxs['Latitude']) m.drawcoastlines() m.drawcountries() m.fillcontinents(color = 'coral') m.drawmapboundary() _=df_with_geo.apply(_plot, axis=1) # In[79]: mins = df_with_geo[['Latitude','Longitude']].quantile(0.009) maxs = df_with_geo[['Latitude','Longitude']].quantile(0.991) # In[80]: f,ax = plt.subplots() m = Basemap(projection='merc', lon_0=centre['Longitude'], lat_0=centre['Latitude'], llcrnrlon=mins['Longitude'], llcrnrlat=mins['Latitude'], urcrnrlon=maxs['Longitude'], urcrnrlat=maxs['Latitude']) m.drawcoastlines() m.drawcountries() m.fillcontinents() m.drawmapboundary() _=df_with_geo.apply(_plot, axis=1) # In[81]: f,ax = plt.subplots() m = Basemap(projection='merc', lon_0=centre['Longitude'], lat_0=centre['Latitude'], llcrnrlon=mins['Longitude'], llcrnrlat=mins['Latitude'], urcrnrlon=maxs['Longitude'], urcrnrlat=maxs['Latitude']) m.shadedrelief() x,y = m(df_with_geo['Longitude'].values, df_with_geo['Latitude'].values) _=m.hexbin(x,y,gridsize=[900,400],bins='log', mincnt=1, cmap='jet_r') # There's so much more in `seaborn` but really not enough time to show, so hopefully this was an interesting start # # Questions / Challenges / Ideas? # I was Andrew Bolster # * [bolster.online](https://bolster.online) # * Tweets [@bolster](https://twitter.bolster.online) # * GitHubs at [andrewbolster](https://github.com/andrewbolster) # * Works at [Sensum Co](https://sensum.co) **We're hiring DS/DevOps** # * Plays at [Farset Labs](https://www.farsetlabs.org.uk) # * **[THIS NOTEBOOK IS AT presentgh.bolster.online](http://presentgh.bolster.online)** # # In[ ]: