In [1]:

import pandas as pd
from ebmdatalab import bq
import numpy as np

In [2]:

from ebmdatalab import bq

# save table in BQ as follows
'''SELECT pct, practice, chemical, bnf_code, bnf_name, SUM(items) AS items, SUM(actual_cost) AS actual_cost, SUM(quantity) AS quantity, CAST(month AS DATE) AS month 
FROM `ebmdatalab.hscic.normalised_prescribing_standard` p

LEFT JOIN `hscic.bnf` b ON p.bnf_code = b.presentation_code

WHERE SUBSTR(bnf_code,1,9) IN (
    '0212000AA', --Rosuvastatin Calcium 
    '0212000AC', --Simvastatin & Ezetimibe
    '0212000B0', --Atorvastatin
    '0212000C0', --Cerivastatin 
    '0212000M0', --Fluvastatin Sodium
    '0212000X0', --Pravastatin Sodium 
    '0212000Y0') --Simvastatin

GROUP BY 
 pct, practice, chemical, bnf_code, bnf_name, month
 
ORDER BY
month, bnf_code'''

# for practice- and chemical-level use following
'''SELECT pct, practice, chemical, SUM(items) AS items, SUM(actual_cost) AS actual_cost, SUM(quantity) AS quantity, CAST(month AS DATE) AS month ,
SUM(CASE WHEN concat(SUBSTR(bnf_code,1,9),SUBSTR(bnf_code,-2,2)) NOT IN (
    '0212000AAAA', --Rosuvastatin Calc_Tab 10mg (brand, generic) 
    '0212000AAAB', --Rosuvastatin Calc_Tab 20mg (brand, generic) 
    '0212000AAAC', --Rosuvastatin Calc_Tab 40mg (brand, generic) 
    '0212000B0AB', --Atorvastatin_Tab 20mg (brand, generic)
    '0212000B0AC', --Atorvastatin_Tab 40mg (brand, generic) 
    '0212000B0AD', --Atorvastatin_Tab 80mg (brand, generic)
    '0212000B0AN', --Atorvastatin_Tab 30mg (brand, generic)
    '0212000B0AP', --Atorvastatin_Tab 60mg (brand, generic)
    '0212000Y0AH') --Simvastatin_Tab 80mg (brand, generic)
    THEN items ELSE 0 END) AS low_med_dose_items

FROM `ebmdatalab.helen.prescribing_statins_all_and_lowdose` p


WHERE SUBSTR(bnf_code,1,9) IN (
    '0212000AA', --Rosuvastatin Calcium 
    '0212000AC', --Simvastatin & Ezetimibe
    '0212000B0', --Atorvastatin
    '0212000C0', --Cerivastatin 
    '0212000M0', --Fluvastatin Sodium
    '0212000X0', --Pravastatin Sodium 
    '0212000Y0') --Simvastatin

GROUP BY 
 pct, practice, chemical, month
 
ORDER BY
month'''


# Import practice-level data

sql = '''-- practice data by chemical

SELECT p.month, p.practice, chemical, 
SUM(items) AS items, SUM(actual_cost) AS actual_cost, SUM(quantity) AS quantity
FROM `ebmdatalab.helen.prescribing_section_2_12` p

INNER JOIN ebmdatalab.hscic.practices prac
ON p.practice = prac.code
  AND prac.setting = 4
  
GROUP BY
month, practice, chemical'''


#df = bq.cached_read(sql, csv_path='statins_practice_level.zip')  # add `use_cache=False` to override
df = pd.read_csv('helen_prescribing_statins_all_and_lowdose_chem.csv')
df.head()

Out[2]:

	pct	practice	chemical	items	actual_cost	quantity	month	low_med_dose_items
0	06A	M92011	Fluvastatin Sodium	3	72.74	168	2010-08-01	3
1	08A	G83680	Rosuvastatin Calcium	2	49.83	84	2010-08-01	1
2	10X	Y02838	Simvastatin	49	81.24	1652	2010-08-01	49
3	08W	F86001	Simvastatin & Ezetimibe	2	123.08	112	2010-08-01	2
4	08G	E86632	Simvastatin	155	332.77	7070	2010-08-01	154

In [3]:

prac = df.copy()
prac["month"]= pd.to_datetime(prac.month, utc=True)
prac["proportion_lm"] = prac["low_med_dose_items"]/prac["items"]
prac.head()

Out[3]:

	pct	practice	chemical	items	actual_cost	quantity	month	low_med_dose_items	proportion_lm
0	06A	M92011	Fluvastatin Sodium	3	72.74	168	2010-08-01 00:00:00+00:00	3	1.000000
1	08A	G83680	Rosuvastatin Calcium	2	49.83	84	2010-08-01 00:00:00+00:00	1	0.500000
2	10X	Y02838	Simvastatin	49	81.24	1652	2010-08-01 00:00:00+00:00	49	1.000000
3	08W	F86001	Simvastatin & Ezetimibe	2	123.08	112	2010-08-01 00:00:00+00:00	2	1.000000
4	08G	E86632	Simvastatin	155	332.77	7070	2010-08-01 00:00:00+00:00	154	0.993548

Practice population data¶

note running this cell takes some time

In [4]:

sql_prac_p = '''
SELECT practice, month, SUM(total_list_size) AS total_list_size
--, SUM( female_75_plus+female_65_74+female_55_64+female_45_54+male_75_plus+male_65_74+male_55_64+male_45_54) AS list_size_over_45 
FROM ebmdatalab.hscic.practice_statistics_all_years stat
GROUP BY practice, month'''

prac_p = bq.cached_read(sql_prac_p, csv_path='prac_p.csv', use_cache=False)  # add `use_cache=False` to override


prac_p["total_list_size"] = prac_p["total_list_size"].astype("float")
#prac_p["list_size_over_45"] = prac_p["list_size_over_45"].astype("float")
prac_p["month"]= pd.to_datetime(prac_p.month, utc=True)
prac_p.info()

C:\Users\hcurtis\Anaconda3\lib\site-packages\pandas\io\gbq.py:149: FutureWarning: verbose is deprecated and will be removed in a future version. Set logging level in order to vary verbosity
  credentials=credentials, verbose=verbose, private_key=private_key)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 899154 entries, 0 to 99153
Data columns (total 3 columns):
practice           899154 non-null object
month              899154 non-null datetime64[ns, UTC]
total_list_size    899154 non-null float64
dtypes: datetime64[ns, UTC](1), float64(1), object(1)
memory usage: 27.4+ MB

National trends¶

In [5]:

national = prac.copy().groupby(["month"]).sum().reset_index()

prac_n = prac_p.groupby("month").sum().reset_index()
national = national.merge(prac_n, on=["month"])

national["items_per_thou"] = 1000*national["items"]/national["total_list_size"]
national["lmdose_items_per_thou"] = 1000*national["low_med_dose_items"]/national["total_list_size"]
national["proportion_lm"] = national["low_med_dose_items"]/national["items"]

national.head()

Out[5]:

	month	items	actual_cost	quantity	low_med_dose_items	proportion_lm	total_list_size	items_per_thou	lmdose_items_per_thou
0	2010-10-01 00:00:00+00:00	4489263	3.315708e+07	158659485	3623327	0.807110	55218374.0	81.300166	65.618140
1	2010-11-01 00:00:00+00:00	4649310	3.412504e+07	163886937	3754116	0.807457	55218374.0	84.198604	67.986718
2	2010-12-01 00:00:00+00:00	4976000	3.655069e+07	175528148	4016546	0.807184	55218374.0	90.114932	72.739302
3	2011-01-01 00:00:00+00:00	4446690	3.228819e+07	156205627	3591797	0.807746	55256645.0	80.473398	65.002083
4	2011-02-01 00:00:00+00:00	4287507	3.101696e+07	149868554	3461122	0.807257	55256645.0	77.592604	62.637209

In [6]:

import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

data = national.copy()

s = [(0,'items_per_thou',0,0,'(a) All statins per 1000 patients'), 
     (1,'proportion_lm',0,1,'(b) Proportion low & medium intensity'),
     (2,'lmdose_items_per_thou',1,0,'(c) Low & medium intensity prescribing rate')]

fig = plt.figure(figsize=(16,16)) 

gs = gridspec.GridSpec(3,2)  # grid layout for subplots

for i in s:
    ax = plt.subplot(gs[i[2], i[3]])
    data.plot('month', i[1], title=i[4], legend=False, ax=ax)
    
#data.plot('month', 'lmdose_items_per_thou')

plt.show()
#plot(x, y, color='green', marker='o', linestyle='dashed',
#...      linewidth=2, markersize=12)

C:\Users\hcurtis\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:1172: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.
  "will drop timezone information.", UserWarning)

Deciles for practices¶

Rearrange data to give items per 1000 for each chemical as a column¶

Also select top chemicals, grouping the rest into "other"

In [7]:

prac_deciles = prac.copy()

# group up chemicals to top 5 and "other"
conditions = [
    (prac_deciles["chemical"] == "Pravastatin Sodium"),
    (prac_deciles["chemical"] == "Rosuvastatin Calcium"),
    (prac_deciles["chemical"] == "Simvastatin"),
    (prac_deciles["chemical"] == "Atorvastatin"),
    (prac_deciles["chemical"] == "Fluvastatin Sodium")]
choices = ["Pravastatin Sodium","Rosuvastatin Calcium","Simvastatin","Atorvastatin","Fluvastatin"]
prac_deciles['chem2'] = np.select(conditions, choices, default='other statins')

prac_deciles = prac_deciles.groupby(["month","practice","chem2"],as_index=False).sum()


prac_deciles = prac_deciles.set_index(["month","practice","chem2"]).unstack()
prac_deciles["items","total"] = prac_deciles["items"].sum(axis=1)
prac_deciles["low_med_dose_items","total"] = prac_deciles["low_med_dose_items"].sum(axis=1)


prac_deciles.sort_index(axis=1,inplace=True)
prac_deciles = prac_deciles.stack().reset_index()

# join prac population data and calc items & cost per 1000
prac_deciles = prac_deciles.merge(prac_p, on=["practice","month"])
prac_deciles["items_per_thou"] = 1000*prac_deciles["items"]/prac_deciles["total_list_size"]
prac_deciles["lmdose_items_per_thou"] = 1000*prac_deciles["low_med_dose_items"]/prac_deciles["total_list_size"]
prac_deciles["proportion_lm"] = prac_deciles["low_med_dose_items"]/prac_deciles["items"]


# reduce to required columns and set index
prac_deciles = prac_deciles[["month","practice","chem2","items_per_thou","lmdose_items_per_thou", "proportion_lm"]].set_index(["month","practice","chem2"]).unstack().reset_index().replace([np.inf, -np.inf], np.nan).fillna(0)


prac_deciles.columns = ['_'.join(tup).rstrip('_') for tup in prac_deciles.columns.values]

prac_deciles.head(-20)

Out[7]:

	month	practice	items_per_thou_Atorvastatin	items_per_thou_Fluvastatin	items_per_thou_Pravastatin Sodium	items_per_thou_Rosuvastatin Calcium	items_per_thou_Simvastatin	items_per_thou_other statins	items_per_thou_total	lmdose_items_per_thou_Atorvastatin	...	lmdose_items_per_thou_Simvastatin	lmdose_items_per_thou_other statins	lmdose_items_per_thou_total	proportion_lm_Atorvastatin	proportion_lm_Fluvastatin	proportion_lm_Pravastatin Sodium	proportion_lm_Rosuvastatin Calcium	proportion_lm_Simvastatin	proportion_lm_other statins	proportion_lm_total
0	2010-10-01 00:00:00+00:00	A81001	20.042443	1.178967	2.593728	0.943174	50.931384	0.000000	75.689696	1.414761	...	50.931384	0.000000	56.354633	0.070588	1.0	1.0	0.250000	1.000000	0.0	0.744548
1	2010-10-01 00:00:00+00:00	A81002	21.426832	0.097395	1.899196	5.843682	59.215973	0.000000	88.483078	3.749696	...	59.215973	0.000000	65.254444	0.175000	1.0	1.0	0.050000	1.000000	0.0	0.737479
2	2010-10-01 00:00:00+00:00	A81003	15.380464	0.000000	4.587156	1.079331	110.091743	0.000000	131.138694	5.666487	...	110.091743	0.000000	120.345386	0.368421	0.0	1.0	0.000000	1.000000	0.0	0.917695
3	2010-10-01 00:00:00+00:00	A81004	21.862648	0.000000	3.458498	1.482213	47.060277	0.000000	73.863636	4.323123	...	46.813241	0.000000	54.718379	0.197740	0.0	1.0	0.083333	0.994751	0.0	0.740803
4	2010-10-01 00:00:00+00:00	A81005	28.481807	0.125471	5.771644	1.882058	100.376412	0.000000	136.637390	7.151819	...	96.612296	0.000000	109.661230	0.251101	1.0	1.0	0.000000	0.962500	0.0	0.802571
5	2010-10-01 00:00:00+00:00	A81006	28.759444	0.162483	2.599724	2.599724	73.523438	0.568690	108.213502	7.067999	...	73.279714	0.568690	83.841092	0.245763	1.0	1.0	0.062500	0.996685	1.0	0.774775
6	2010-10-01 00:00:00+00:00	A81007	4.516990	0.307977	3.901037	0.205318	70.937275	0.307977	80.176573	1.026589	...	70.731958	0.307977	76.275536	0.227273	1.0	1.0	0.000000	0.997106	1.0	0.951344
7	2010-10-01 00:00:00+00:00	A81008	27.495379	0.231054	0.462107	9.011091	175.831793	2.310536	215.341959	1.617375	...	175.600739	2.310536	180.221811	0.058824	1.0	1.0	0.000000	0.998686	1.0	0.836910
8	2010-10-01 00:00:00+00:00	A81009	21.709203	0.635391	3.282855	1.164884	84.295245	0.105899	111.193477	3.918246	...	83.977550	0.105899	91.919941	0.180488	1.0	1.0	0.000000	0.996231	1.0	0.826667
9	2010-10-01 00:00:00+00:00	A81011	15.090350	0.000000	1.633987	0.672818	73.913879	0.000000	91.311034	1.826221	...	73.817762	0.000000	77.277970	0.121019	0.0	1.0	0.000000	0.998700	0.0	0.846316
10	2010-10-01 00:00:00+00:00	A81012	28.684471	0.000000	2.373887	4.154303	105.637982	1.582591	142.433234	7.517310	...	103.264095	1.582591	115.924827	0.262069	0.0	1.0	0.285714	0.977528	1.0	0.813889
11	2010-10-01 00:00:00+00:00	A81013	13.392857	0.000000	8.392857	3.035714	65.535714	0.000000	90.357143	3.571429	...	65.357143	0.000000	77.500000	0.266667	0.0	1.0	0.058824	0.997275	0.0	0.857708
12	2010-10-01 00:00:00+00:00	A81014	39.980378	0.000000	8.339465	6.867795	138.337013	0.981114	194.505764	12.018641	...	132.450331	0.981114	153.789551	0.300613	0.0	1.0	0.000000	0.957447	1.0	0.790668
13	2010-10-01 00:00:00+00:00	A81015	25.632911	0.000000	8.544304	0.949367	76.582278	0.000000	111.708861	3.164557	...	75.000000	0.000000	87.025316	0.123457	0.0	1.0	0.333333	0.979339	0.0	0.779037
14	2010-10-01 00:00:00+00:00	A81016	26.355100	0.000000	3.414426	1.813914	72.876654	0.213402	104.673496	1.387111	...	72.449851	0.213402	77.464789	0.052632	0.0	1.0	0.000000	0.994143	1.0	0.740061
15	2010-10-01 00:00:00+00:00	A81017	14.942584	0.000000	1.345313	1.537501	65.824244	0.144141	83.793783	2.306251	...	65.055494	0.144141	69.091433	0.154341	0.0	1.0	0.156250	0.988321	1.0	0.824541
16	2010-10-01 00:00:00+00:00	A81018	18.404908	0.000000	7.064510	21.379439	107.826734	0.000000	154.675590	5.949061	...	107.826734	0.000000	122.513478	0.323232	0.0	1.0	0.078261	1.000000	0.0	0.792067
17	2010-10-01 00:00:00+00:00	A81019	18.795888	0.097895	3.915810	1.272638	61.771904	0.097895	85.952031	2.545277	...	60.890847	0.097895	67.743514	0.135417	1.0	1.0	0.153846	0.985737	1.0	0.788155
18	2010-10-01 00:00:00+00:00	A81020	12.526889	0.506137	1.391877	6.326711	77.565481	0.126534	98.443629	1.898013	...	74.149057	0.126534	78.198153	0.151515	1.0	1.0	0.020000	0.955954	1.0	0.794344
19	2010-10-01 00:00:00+00:00	A81021	22.851503	0.379593	2.277558	4.099605	81.840267	0.455512	111.904039	2.808989	...	81.384756	0.455512	87.382326	0.122924	1.0	1.0	0.018519	0.994434	1.0	0.780868
20	2010-10-01 00:00:00+00:00	A81022	24.494446	0.094940	1.329156	0.664578	78.610083	0.284819	105.478021	4.841925	...	77.091047	0.284819	83.641887	0.197674	1.0	1.0	0.000000	0.980676	1.0	0.792979
21	2010-10-01 00:00:00+00:00	A81023	9.921455	0.275596	0.964586	2.618162	35.414083	0.000000	49.193882	1.102384	...	33.760507	0.000000	36.240871	0.111111	1.0	1.0	0.052632	0.953307	0.0	0.736695
22	2010-10-01 00:00:00+00:00	A81025	7.656613	0.464037	1.160093	4.408353	77.030162	0.928074	91.647332	1.856148	...	76.566125	0.928074	80.974478	0.242424	1.0	1.0	0.000000	0.993976	1.0	0.883544
23	2010-10-01 00:00:00+00:00	A81026	16.113534	0.147831	3.178358	1.034814	64.232390	0.295661	85.002587	2.069628	...	59.871387	0.295661	65.784611	0.128440	1.0	1.0	0.214286	0.932106	1.0	0.773913
24	2010-10-01 00:00:00+00:00	A81027	9.009009	0.080438	1.447876	0.643501	52.927928	0.000000	64.108752	1.850064	...	52.203990	0.000000	55.743243	0.205357	1.0	1.0	0.250000	0.986322	0.0	0.869511
25	2010-10-01 00:00:00+00:00	A81029	17.589083	0.151630	3.335861	0.454890	59.135709	0.000000	80.667172	1.819560	...	57.922669	0.000000	63.532980	0.103448	1.0	1.0	0.666667	0.979487	0.0	0.787594
26	2010-10-01 00:00:00+00:00	A81030	10.251716	0.091533	1.464531	7.231121	47.871854	0.000000	66.910755	3.569794	...	47.871854	0.000000	53.455378	0.348214	1.0	1.0	0.063291	1.000000	0.0	0.798906
27	2010-10-01 00:00:00+00:00	A81031	17.908814	0.000000	1.720497	1.173066	67.255807	0.000000	88.058184	4.301243	...	66.004536	0.000000	72.104481	0.240175	0.0	1.0	0.066667	0.981395	0.0	0.818828
28	2010-10-01 00:00:00+00:00	A81032	15.627978	0.000000	4.097580	1.810558	85.763293	0.095293	107.394702	3.335239	...	85.382123	0.095293	93.196112	0.213415	0.0	1.0	0.157895	0.995556	1.0	0.867791
29	2010-10-01 00:00:00+00:00	A81033	19.157088	0.000000	4.597701	7.407407	85.312899	0.000000	116.475096	1.787995	...	84.546616	0.000000	90.932312	0.093333	0.0	1.0	0.000000	0.991018	0.0	0.780702
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
791213	2019-02-01 00:00:00+00:00	Y03661	81.809658	0.000000	4.922644	6.797937	31.645570	0.000000	125.175809	26.254102	...	31.411158	0.000000	66.338490	0.320917	0.0	1.0	0.551724	0.992593	0.0	0.529963
791214	2019-02-01 00:00:00+00:00	Y03663	46.869535	0.000000	0.699545	8.977498	27.632039	0.233182	84.411799	8.860907	...	27.632039	0.233182	40.806809	0.189055	0.0	1.0	0.376623	1.000000	1.0	0.483425
791215	2019-02-01 00:00:00+00:00	Y03664	42.567125	0.000000	0.982318	2.401222	9.823183	0.000000	55.773849	4.693298	...	9.823183	0.000000	16.481118	0.110256	0.0	1.0	0.409091	1.000000	0.0	0.295499
791216	2019-02-01 00:00:00+00:00	Y03671	76.957197	0.000000	1.330672	4.213795	16.189843	0.000000	98.691506	12.419605	...	15.968064	0.000000	33.045021	0.161383	0.0	1.0	0.789474	0.986301	0.0	0.334831
791217	2019-02-01 00:00:00+00:00	Y03755	30.334511	0.000000	2.597133	2.077706	9.557449	0.000000	44.566798	4.155412	...	9.453563	0.000000	17.660503	0.136986	0.0	1.0	0.700000	0.989130	0.0	0.396270
791218	2019-02-01 00:00:00+00:00	Y04225	271.206691	0.000000	7.168459	7.168459	102.747909	0.000000	388.291517	22.700119	...	99.163680	0.000000	129.032258	0.083700	0.0	1.0	0.000000	0.965116	0.0	0.332308
791219	2019-02-01 00:00:00+00:00	Y04266	67.859806	0.248571	4.225702	4.722844	41.759881	0.497141	119.313945	9.694258	...	41.511310	0.497141	58.911260	0.142857	1.0	1.0	0.578947	0.994048	1.0	0.493750
791220	2019-02-01 00:00:00+00:00	Y04273	13.861786	0.000000	0.874773	0.807483	4.441155	0.000000	19.985196	1.345804	...	4.306574	0.000000	6.796312	0.097087	0.0	1.0	0.333333	0.969697	0.0	0.340067
791221	2019-02-01 00:00:00+00:00	Y04662	17.159547	0.000000	0.000000	0.365097	4.746258	0.000000	22.270902	1.460387	...	4.746258	0.000000	6.206645	0.085106	0.0	0.0	0.000000	1.000000	0.0	0.278689
791222	2019-02-01 00:00:00+00:00	Y04664	69.655172	0.000000	5.057471	3.218391	48.735632	0.000000	126.666667	7.356322	...	48.505747	0.000000	62.068966	0.105611	0.0	1.0	0.357143	0.995283	0.0	0.490018
791223	2019-02-01 00:00:00+00:00	Y04809	66.888243	0.000000	2.908184	0.415455	31.574574	0.415455	102.201911	12.879103	...	30.743664	0.415455	46.946406	0.192547	0.0	1.0	0.000000	0.973684	1.0	0.459350
791224	2019-02-01 00:00:00+00:00	Y04882	62.456627	1.040944	6.939625	3.816794	65.579459	0.000000	139.833449	17.002082	...	65.232477	0.000000	91.256072	0.272222	1.0	1.0	0.272727	0.994709	0.0	0.652605
791225	2019-02-01 00:00:00+00:00	Y04883	95.619048	0.000000	2.666667	4.190476	63.238095	0.000000	165.714286	25.904762	...	63.238095	0.000000	93.714286	0.270916	0.0	1.0	0.454545	1.000000	0.0	0.565517
791226	2019-02-01 00:00:00+00:00	Y04884	57.535784	1.403312	3.367948	6.174572	45.186640	0.000000	113.668257	9.823183	...	44.905978	0.000000	60.903733	0.170732	1.0	1.0	0.227273	0.993789	0.0	0.535802
791227	2019-02-01 00:00:00+00:00	Y04925	33.228470	0.000000	3.539127	1.966182	11.010617	0.000000	49.744396	9.241054	...	11.010617	0.000000	24.380653	0.278107	0.0	1.0	0.300000	1.000000	0.0	0.490119
791228	2019-02-01 00:00:00+00:00	Y04942	0.658390	0.000000	0.000000	0.000000	0.094056	0.000000	0.752445	0.094056	...	0.094056	0.000000	0.188111	0.142857	0.0	0.0	0.000000	1.000000	0.0	0.250000
791229	2019-02-01 00:00:00+00:00	Y04947	36.363636	0.000000	1.154401	1.731602	19.047619	0.000000	58.297258	8.080808	...	19.047619	0.000000	28.571429	0.222222	0.0	1.0	0.166667	1.000000	0.0	0.490099
791230	2019-02-01 00:00:00+00:00	Y04951	17.674927	0.000000	0.182216	2.186589	4.008746	0.000000	24.052478	1.457726	...	4.008746	0.000000	6.195335	0.082474	0.0	1.0	0.250000	1.000000	0.0	0.257576
791231	2019-02-01 00:00:00+00:00	Y04952	58.928118	0.508001	0.762002	3.048006	23.622047	0.000000	86.868174	9.398019	...	23.622047	0.000000	36.322073	0.159483	1.0	1.0	0.666667	1.000000	0.0	0.418129
791232	2019-02-01 00:00:00+00:00	Y04957	49.536407	0.064838	2.788044	3.241911	33.164754	0.000000	88.795954	11.087337	...	33.067497	0.000000	47.785774	0.223822	1.0	1.0	0.240000	0.997067	0.0	0.538153
791233	2019-02-01 00:00:00+00:00	Y04965	28.541775	0.000000	1.556824	1.297353	10.897769	0.000000	42.293721	1.556824	...	10.897769	0.000000	14.270887	0.054545	0.0	1.0	0.200000	1.000000	0.0	0.337423
791234	2019-02-01 00:00:00+00:00	Y04968	15.682175	0.174246	0.696986	0.000000	12.545740	0.000000	29.099146	1.219725	...	12.197247	0.000000	14.288204	0.077778	1.0	1.0	0.000000	0.972222	0.0	0.491018
791235	2019-02-01 00:00:00+00:00	Y04969	34.561164	0.000000	1.591633	1.591633	19.099591	0.000000	56.844020	2.955889	...	19.099591	0.000000	24.556617	0.085526	0.0	1.0	0.571429	1.000000	0.0	0.432000
791236	2019-02-01 00:00:00+00:00	Y04977	82.074316	0.000000	7.145774	2.449980	85.136791	0.000000	176.806860	10.208248	...	85.136791	0.000000	103.715802	0.124378	0.0	1.0	0.500000	1.000000	0.0	0.586605
791237	2019-02-01 00:00:00+00:00	Y04995	73.530869	0.445942	4.954910	1.536022	56.733723	0.000000	137.201467	14.319691	...	56.485978	0.000000	76.553364	0.194744	1.0	1.0	0.225806	0.995633	0.0	0.557963
791238	2019-02-01 00:00:00+00:00	Y05023	75.184017	0.000000	8.937960	5.783386	33.385910	0.000000	123.291272	10.778128	...	32.860147	0.000000	55.205047	0.143357	0.0	1.0	0.454545	0.984252	0.0	0.447761
791239	2019-02-01 00:00:00+00:00	Y05080	183.534137	0.000000	1.606426	0.401606	8.433735	0.000000	193.975904	4.417671	...	8.433735	0.000000	14.457831	0.024070	0.0	1.0	0.000000	1.000000	0.0	0.074534
791240	2019-02-01 00:00:00+00:00	Y05167	33.156084	0.078198	0.547388	1.407570	8.992806	0.000000	44.182046	4.379105	...	8.836409	0.000000	14.779481	0.132075	1.0	1.0	0.666667	0.982609	0.0	0.334513
791241	2019-02-01 00:00:00+00:00	Y05212	18.845701	0.000000	1.177856	1.177856	10.895171	0.000000	32.096584	3.533569	...	10.895171	0.000000	15.901060	0.187500	0.0	1.0	0.250000	1.000000	0.0	0.495413
791242	2019-02-01 00:00:00+00:00	Y05248	101.910828	0.000000	1.005699	5.698961	64.699966	0.000000	173.315454	13.074086	...	64.364733	0.000000	81.461616	0.128289	0.0	1.0	0.529412	0.994819	0.0	0.470019

791243 rows × 23 columns

Plot practice deciles for statin prescribing by chemical per 1000 patients¶

In [8]:

import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

data = prac_deciles

# set sort order of drugs manually, and add grid refs to position each subplot:
s = [(0,'total',0,0,'(a) Total statins'), 
     (1,'Atorvastatin',0,1,'(b) Atorvastatin items'),
     (2,'Simvastatin',1,0,'(c) Simvastatin items'),
     (3,'Rosuvastatin Calcium',1,1,'(d) Rosuvastatin items'),
     (4,'Pravastatin Sodium',2,0,'(e) Pravastatin items'),
     (5,'Fluvastatin',2,1,'(f) Fluvastatin items'),
     (6,'other statins',3,0,'(g) Other statins')]


fig = plt.figure(figsize=(16,16)) 


gs = gridspec.GridSpec(4,2)  # grid layout for subplots

# Plot each subplot using a loop
for i in s:
    ax = plt.subplot(gs[i[2], i[3]])  # position of subplot in grid using coordinates listed in s
    if i[3]==2:
        legend = True
    else:
        legend = False
    
    plt = charts.deciles_chart(
        data,
        period_column='month',
        column="items_per_thou_"+i[1],
        title=i[4],
        ylabel="Items per 1000 patients",
        show_outer_percentiles=True,
        show_legend=legend,
        ax=ax)
    if i[0]==0:
        ax.set_ylim([0, 250])
    else:
        ax.set_ylim([0, 180])
# Now add a single line against the deciles
'''df_subject = pd.DataFrame(np.random.rand(12, 1), columns=['val']) * 100
df_subject['month'] = months
df_subject.set_index('month')

plt.plot(df_subject['month'], df_subject['val'], 'r--')'''
fig.autofmt_xdate()
plt.show()

Plot practice deciles for low/medium intensity items per 1000 patients¶

In [9]:

import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

data = prac_deciles

# set sort order of drugs manually, and add grid refs to position each subplot:
s = [(0,'total',0,0,'(a) All statins of low/medium intensity'), 
     (1,'Atorvastatin',0,1,'(b) Atorvastatin items of low/medium intensity'),
     (2,'Simvastatin',1,0,'(c) Simvastatin items of low/medium intensity'),
     (3,'Rosuvastatin Calcium',1,1,'(d) Rosuvastatin items of low/medium intensity'),
     (4,'Pravastatin Sodium',2,0,'(e) Pravastatin items of low/medium intensity')#,
     #(5,'Fluvastatin',2,1,'(f) Fluvastatin items of low/medium intensity'),
     #(6,'other statins',3,0,'(g) Other statins of low/medium intensity')
    ]


fig = plt.figure(figsize=(16,16)) 


gs = gridspec.GridSpec(3,2)  # grid layout for subplots

# Plot each subplot using a loop
for i in s:
    ax = plt.subplot(gs[i[2], i[3]])  # position of subplot in grid using coordinates listed in s
    if i[3]==2:
        legend = True
    else:
        legend = False
    
    plt = charts.deciles_chart(
        data,
        period_column='month',
        column="lmdose_items_per_thou_"+i[1],
        title=i[4],
        ylabel="Items per 1000 patients",
        show_outer_percentiles=True,
        show_legend=legend,
        ax=ax)
    if i[0]==0:
        ax.set_ylim([0, 200])
    else:
        ax.set_ylim([0, 180])
# Now add a single line against the deciles
'''df_subject = pd.DataFrame(np.random.rand(12, 1), columns=['val']) * 100
df_subject['month'] = months
df_subject.set_index('month')

plt.plot(df_subject['month'], df_subject['val'], 'r--')'''
fig.autofmt_xdate()
plt.show()

Plot practice deciles for low/medium intensity items as proportion of each chemical¶

In [25]:

import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

data = prac_deciles

# set sort order of drugs manually, and add grid refs to position each subplot:
s = [(0,'total',0,0,'(a) Proportion of all statins low/medium intensity'), 
     (1,'Atorvastatin',0,1,'(b) Atorvastatin items low/medium intensity'),
     (2,'Simvastatin',1,0,'(c) Simvastatin items low/medium intensity'),
     (3,'Rosuvastatin Calcium',1,1,'(d) Rosuvastatin items low/medium intensity')]


fig = plt.figure(figsize=(16,10)) 


gs = gridspec.GridSpec(2,2)  # grid layout for subplots

# Plot each subplot using a loop
for i in s:
    ax = plt.subplot(gs[i[2], i[3]])  # position of subplot in grid using coordinates listed in s
    if i[3]==2:
        legend = True
    else:
        legend = False
    
    plt = charts.deciles_chart(
        data,
        period_column='month',
        column="proportion_lm_"+i[1],
        title=i[4],
        ylabel="Proportion of "+i[1],
        show_outer_percentiles=True,
        show_legend=legend,
        ax=ax)
    if i[0]==0:
        ax.set_ylim([0, 1])
    else:
        ax.set_ylim([0, 1])
# Now add a single line against the deciles
'''df_subject = pd.DataFrame(np.random.rand(12, 1), columns=['val']) * 100
df_subject['month'] = months
df_subject.set_index('month')

plt.plot(df_subject['month'], df_subject['val'], 'r--')'''
fig.autofmt_xdate()
plt.show()

Regression analysis¶

In [11]:

import datetime as dt

regr = prac.copy()
regr = regr.loc[pd.DatetimeIndex(regr['month']).year==2018]
regr["year"] = pd.DatetimeIndex(regr['month']).year
regr = regr.groupby(["practice","year"])["items","low_med_dose_items"].sum()

regr["proportion_lm_dose"] = regr["low_med_dose_items"]/regr["items"] 

regr.reset_index().to_csv("statins_proportion_for_regression.csv")
regr.head()

Out[11]:

		items	low_med_dose_items	proportion_lm_dose
practice	year
A81001	2018	5261	2266	0.430717
A81002	2018	30575	12199	0.398986
A81004	2018	13974	5234	0.374553
A81005	2018	15779	7366	0.466823
A81006	2018	26783	10885	0.406415

boxplot¶

In [53]:

import numpy as np

prac_p_2018 = prac_p.loc[pd.DatetimeIndex(prac_p['month']).year==2018].drop("month",axis=1).groupby("practice").mean()

prac_2018 = regr.reset_index().merge(prac_p_2018, on="practice")
prac_2018["items_per_thou"] = 1000*prac_2018["items"]/prac_2018["total_list_size"]

# filter out those with very small 
prac_2018 = prac_2018.loc[prac_2018["total_list_size"]>500]

prac_2018["prescribing_decile"] = np.ceil(prac_2018["items_per_thou"].rank(pct=True)*10)
prac_2018.sort_values(by="items_per_thou",ascending=False).head()

Out[53]:

	practice	year	items	low_med_dose_items	proportion_lm_dose	total_list_size	items_per_thou	prescribing_decile
7067	Y02625	2018	4185	2660	0.635603	1072.583333	3901.794732	10.0
1221	C83064	2018	54549	28766	0.527342	14040.916667	3885.002760	10.0
1194	C83036	2018	26712	13375	0.500711	6970.500000	3832.149774	10.0
359	A88010	2018	12067	7372	0.610922	3160.500000	3818.066762	10.0
202	A83075	2018	17698	10691	0.604080	4673.250000	3787.086075	10.0

In [55]:

prac_2018.boxplot(column="proportion_lm_dose", by="prescribing_decile") #, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds)[source]¶

plt.
plt.show()

CCG data¶

In [12]:

sql = '''-- CCG data by chemical
SELECT p.month, p.pct, chemical, 
SUM(items) AS items, 
SUM(actual_cost) AS actual_cost, 
SUM(quantity) AS quantity 

FROM `ebmdatalab.helen.prescribing_section_2_12` p

INNER JOIN ebmdatalab.hscic.ccgs c
ON p.pct = c.code
  AND c. org_type = "CCG"

GROUP BY
month, pct, chemical'''


ccg = bq.cached_read(sql, csv_path='statins_ccg_level.csv')  # add `use_cache=False` to override
ccg["month"]= pd.to_datetime(ccg.month)
ccg.head()

Out[12]:

	month	pct	chemical	items	actual_cost	quantity
0	2014-03-01	08Q	Rosuvastatin Calcium	373	10756.01	15889
1	2014-03-01	01C	Simvastatin & Ezetimibe	1	38.09	28
2	2014-03-01	99P	Acipimox	3	125.76	264
3	2014-03-01	06K	Atorvastatin	20220	34625.87	581145
4	2014-03-01	10K	Ciprofibrate	27	3626.37	1344

CCG population data¶

In [13]:

sql_ccg_p = '''
SELECT pct_id AS pct, month, SUM(total_list_size) AS total_list_size,
SUM( female_75_plus+female_65_74+female_55_64+male_75_plus+male_65_74+male_55_64) AS list_size_over_55 FROM ebmdatalab.hscic.practice_statistics_all_years stat
GROUP BY pct, month'''

ccg_p = bq.cached_read(sql_ccg_p, csv_path='ccg_p.csv', use_cache=False)  # add `use_cache=False` to override


ccg_p["total_list_size"] = ccg_p["total_list_size"].astype("float")
ccg_p["list_size_over_55"] = ccg_p["list_size_over_55"].astype("float")
ccg_p["month"]= pd.to_datetime(ccg_p.month)
ccg_p.info()

C:\Users\hcurtis\Anaconda3\lib\site-packages\pandas\io\gbq.py:149: FutureWarning: verbose is deprecated and will be removed in a future version. Set logging level in order to vary verbosity
  credentials=credentials, verbose=verbose, private_key=private_key)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30992 entries, 0 to 30991
Data columns (total 4 columns):
pct                  30957 non-null object
month                30992 non-null datetime64[ns, UTC]
total_list_size      30992 non-null float64
list_size_over_55    30992 non-null float64
dtypes: datetime64[ns, UTC](1), float64(2), object(1)
memory usage: 968.6+ KB

Maps¶

In [14]:

import numpy as np
import pandas as pd
from ebmdatalab import maps

# total items per thousand over last year

import datetime as dt
ccg_last_year =  ccg.copy()
ccg_last_year =  ccg_last_year.loc[pd.DatetimeIndex(ccg_last_year['month']).year==2018]
# join on list sizes:
ccg_last_year["month"] = pd.to_datetime(ccg_last_year["month"], utc = True)
ccg_p["month"] = pd.to_datetime(ccg_p["month"], utc = True)
ccg_last_year = ccg_last_year.merge(ccg_p, on=["pct","month"])

ccg_last_year = ccg_last_year.groupby(['pct']).agg({"items":"sum","total_list_size":"mean","list_size_over_55":"mean"}).reset_index()
ccg_last_year["items_per_thou"] = 1000*ccg_last_year["items"]/ccg_last_year["total_list_size"]
ccg_last_year["items_per_55+"] = 1000*ccg_last_year["items"]/ccg_last_year["list_size_over_55"]
#ccg_last_year["cost_per_55+"] = 1000*ccg_last_year["actual_cost"]/ccg_last_year["list_size_over_55"]
ccg_last_year.sort_values(by="items_per_thou").describe()

Out[14]:

	items	total_list_size	list_size_over_55	items_per_thou	items_per_55+
count	1.950000e+02	1.950000e+02	195.000000	195.000000	195.000000
mean	3.743369e+05	3.035664e+05	88108.096572	1287.204058	4405.056640
std	2.088908e+05	1.813134e+05	53587.147747	362.288657	1207.256880
min	9.340500e+04	7.897006e+04	17678.780347	514.729698	2486.538978
25%	2.200335e+05	1.879890e+05	56159.071835	992.637298	3597.829360
50%	3.234200e+05	2.597903e+05	72473.450000	1287.561976	4218.239763
75%	4.685255e+05	3.350260e+05	101118.173857	1496.764103	5079.479771
max	1.251592e+06	1.314049e+06	331025.500000	2318.566931	11268.964903

In [59]:

import matplotlib.pyplot as plt 
import importlib
from ebmdatalab import maps
importlib.reload(maps)

plt.figure(figsize=(14,9))
data = ccg_last_year
#df.columns = ['month', 'pct', 'ccg_name', 'total_list_size']  # The CCG column must be named 'pct'
plt = maps.ccg_map(data, title="statins per 1000", column='items_per_thou', separate_london=True)
plt.show()

In [60]:

import matplotlib.pyplot as plt 
import importlib
from ebmdatalab import maps
importlib.reload(maps)

plt.figure(figsize=(14,9))
data = ccg_last_year
#df.columns = ['month', 'pct', 'ccg_name', 'total_list_size']  # The CCG column must be named 'pct'
plt = maps.ccg_map(data, title="statins per 1000 aged 55+", column='items_per_55+', separate_london=True)
plt.show()

In [17]:

# Maps for measure 

sql = '''-- statin intensity measure

SELECT * FROM measures.ccg_data_statinintensity2
WHERE EXTRACT(year from month) = 2018

'''

intensity = bq.cached_read(sql, csv_path='ccg_statin_intensity.csv')  # add `use_cache=False` to override
intensity.head()

Out[17]:

	pct_id	stp_id	regional_team_id	month	numerator	denominator	calc_value	percentile
0	02N	E54000005	Y54	2018-07-01	7098	17138	0.414167	0.103093
1	02N	E54000005	Y54	2018-01-01	7751	17646	0.439250	0.103093
2	02N	E54000005	Y54	2018-02-01	6614	15246	0.433819	0.097938
3	02N	E54000005	Y54	2018-05-01	7528	17712	0.425023	0.103093
4	02N	E54000005	Y54	2018-04-01	7290	16963	0.429759	0.108247

In [61]:

import matplotlib.pyplot as plt 
import importlib
from ebmdatalab import maps
importlib.reload(maps)

plt.figure(figsize=(14,9))
data = intensity.groupby("pct_id")["numerator","denominator"].sum().reset_index().rename(columns={"pct_id":"pct"})
data["calc_value"] = data["numerator"]/data["denominator"]

#df.columns = ['month', 'pct', 'ccg_name', 'total_list_size']  # The CCG column must be named 'pct'
plt = maps.ccg_map(data, title="statin intensity", column='calc_value', separate_london=True)
plt.show()

individual formulations¶

In [19]:

# tablets/capsules only - exclude other formulations such as liquids and powder sachets.

sql = '''SELECT p.practice, p.month,
SUM(CASE WHEN concat(SUBSTR(bnf_code,1,9),SUBSTR(bnf_code,-2,2)) IN ('0212000B0AA','0212000B0AL') THEN quantity ELSE 0 END)/SUM(quantity) AS atorvastatin_10mg_quant, 
SUM(CASE WHEN concat(SUBSTR(bnf_code,1,9),SUBSTR(bnf_code,-2,2)) IN ('0212000B0AB', '0212000B0AE', '0212000B0AM') THEN quantity ELSE 0 END)/SUM(quantity) AS atorvastatin_20mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000B0%AC' THEN quantity ELSE 0 END)/SUM(quantity) AS atorvastatin_40mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000B0%AD' THEN quantity ELSE 0 END)/SUM(quantity) AS atorvastatin_80mg_quant, 
SUM(CASE WHEN concat(SUBSTR(bnf_code,1,9),SUBSTR(bnf_code,-2,2)) IN ('0212000Y0AA','0212000Y0AC') THEN quantity ELSE 0 END)/SUM(quantity) AS simvastatin_10mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AB' OR bnf_code LIKE '0212000Y0%AE' THEN quantity ELSE 0 END)/SUM(quantity) AS simvastatin_20mg_quant, 
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AD' THEN quantity ELSE 0 END)/SUM(quantity) AS simvastatin_40mg_quant, 
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AH' THEN quantity ELSE 0 END)/SUM(quantity) AS simvastatin_80mg_quant, 

SUM(CASE WHEN bnf_code LIKE '0212000B0%AB' THEN items ELSE 0 END)/SUM(items) AS atorvastatin_20mg_items, 
SUM(CASE WHEN bnf_code LIKE '0212000B0%AD' THEN items ELSE 0 END)/SUM(items) AS atorvastatin_80mg_items, 
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AB' OR bnf_code LIKE '0212000Y0%AE' THEN items ELSE 0 END)/SUM(items) AS simvastatin_20mg_items, 
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AD' THEN items ELSE 0 END)/SUM(items) AS simvastatin_40mg_items, 
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AH' THEN items ELSE 0 END)/SUM(items) AS simvastatin_80mg_items, 

SUM(items) as total_items,
SUM(quantity) as total_quantity

FROM `ebmdatalab.helen.prescribing_section_2_12` p
INNER JOIN (
select distinct chemical, presentation, presentation_code 
from `hscic.bnf` 
where SUBSTR(presentation_code,1,9) IN (
'0212000AA', --Rosuvastatin Calcium 
'0212000AC', --Simvastatin & Ezetimibe
'0212000B0', --Atorvastatin
'0212000C0', --Cerivastatin 
'0212000M0', --Fluvastatin Sodium
'0212000X0', --Pravastatin Sodium 
'0212000Y0') --Simvastatin)
AND (presentation LIKE "%Tab%" OR presentation LIKE "%Cap%")
) b ON p.bnf_code = b.presentation_code

INNER JOIN ebmdatalab.hscic.practices prac
ON p.practice = prac.code
  AND prac.setting = 4
  
GROUP BY
practice, month'''

dose = bq.cached_read(sql, csv_path='statins_dose_practice_level.zip')  # add `use_cache=False` to override
dose["month"] = pd.to_datetime(dose.month)

dose.head()

Out[19]:

	practice	month	atorvastatin_10mg_quant	atorvastatin_20mg_quant	atorvastatin_40mg_quant	atorvastatin_80mg_quant	simvastatin_10mg_quant	simvastatin_20mg_quant	simvastatin_40mg_quant	simvastatin_80mg_quant	atorvastatin_20mg_items	atorvastatin_80mg_items	simvastatin_20mg_items	simvastatin_40mg_items	simvastatin_80mg_items	total_items	total_quantity
0	F81025	2014-03-01	0.084950	0.076560	0.085999	0.049292	0.009439	0.120084	0.414263	0.004195	0.080292	0.048175	0.124088	0.392701	0.004380	685	26698
1	K81018	2014-03-01	0.059587	0.100913	0.108602	0.024988	0.092263	0.254685	0.229217	0.009611	0.106017	0.027221	0.239255	0.239255	0.008596	698	29134
2	L84613	2014-03-01	0.021702	0.013317	0.137613	0.047351	0.017756	0.085541	0.470054	0.000000	0.019523	0.047722	0.084599	0.433839	0.000000	461	14192
3	E84030	2014-03-01	0.038280	0.140322	0.104253	0.004887	0.082669	0.386468	0.207284	0.000000	0.137421	0.004228	0.376321	0.200846	0.000000	473	17189
4	A86004	2014-03-01	0.045586	0.053824	0.181656	0.059431	0.017438	0.142249	0.436130	0.000641	0.052409	0.064243	0.138631	0.425190	0.000845	1183	43698

In [20]:

# join prac population data and calc items & quantity per 1000
dose["month"] = pd.to_datetime(dose.month, utc=True)# join prac population data and calc items & quantity per 1000

dose2 = dose.merge(prac_p, on=["practice","month"])
dose2["items_per_thou"] = 1000*dose2["total_items"]/dose2["total_list_size"]
dose2["quantity_per_thou"] = 1000*dose2["total_quantity"]/dose2["total_list_size"]

dose2.head()

Out[20]:

	practice	month	atorvastatin_10mg_quant	atorvastatin_20mg_quant	atorvastatin_40mg_quant	atorvastatin_80mg_quant	simvastatin_10mg_quant	simvastatin_20mg_quant	simvastatin_40mg_quant	simvastatin_80mg_quant	atorvastatin_20mg_items	atorvastatin_80mg_items	simvastatin_20mg_items	simvastatin_40mg_items	simvastatin_80mg_items	total_items	total_quantity	total_list_size	items_per_thou	quantity_per_thou
0	F81025	2014-03-01 00:00:00+00:00	0.084950	0.076560	0.085999	0.049292	0.009439	0.120084	0.414263	0.004195	0.080292	0.048175	0.124088	0.392701	0.004380	685	26698	8602.0	79.632644	3103.696815
1	K81018	2014-03-01 00:00:00+00:00	0.059587	0.100913	0.108602	0.024988	0.092263	0.254685	0.229217	0.009611	0.106017	0.027221	0.239255	0.239255	0.008596	698	29134	9718.0	71.825478	2997.941963
2	L84613	2014-03-01 00:00:00+00:00	0.021702	0.013317	0.137613	0.047351	0.017756	0.085541	0.470054	0.000000	0.019523	0.047722	0.084599	0.433839	0.000000	461	14192	2709.0	170.173496	5238.833518
3	E84030	2014-03-01 00:00:00+00:00	0.038280	0.140322	0.104253	0.004887	0.082669	0.386468	0.207284	0.000000	0.137421	0.004228	0.376321	0.200846	0.000000	473	17189	6090.0	77.668309	2822.495895
4	A86004	2014-03-01 00:00:00+00:00	0.045586	0.053824	0.181656	0.059431	0.017438	0.142249	0.436130	0.000641	0.052409	0.064243	0.138631	0.425190	0.000845	1183	43698	15210.0	77.777778	2872.978304

In [23]:

import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

data = dose2.copy()

# set sort order of drugs manually, and add grid refs to position each subplot:
s = [(0,'atorvastatin_10mg_quant',0,0,'(a) Atorvastatin 10 mg'), 
     (1,'atorvastatin_20mg_quant',0,1,'(b) Atorvastatin 20 mg (HI)'), 
     (2,'atorvastatin_40mg_quant',1,0,'(c) Atorvastatin 40 mg (HI)'), 
     (3,'atorvastatin_80mg_quant',1,1,'(d) Atorvastatin 80 mg (HI)'), 
     (4,'simvastatin_10mg_quant',2,0,'(e) Simvastatin 10 mg'),
     (5,'simvastatin_20mg_quant',2,1,'(f) Simvastatin 20 mg'),
     (6,'simvastatin_40mg_quant',3,0,'(g) Simvastatin 40 mg'),
     (7,'simvastatin_80mg_quant',3,1,'(h) Simvastatin 80 mg (HI)'),
     (8,'total_quantity',4,0,'(i) Total quantity statin tablets/capsules')]


fig = plt.figure(figsize=(16,22)) 


gs = gridspec.GridSpec(5,2)  # grid layout for subplots

# Plot each subplot using a loop
for i in s:
    ax = plt.subplot(gs[i[2], i[3]])  # position of subplot in grid using coordinates listed in s
    if i[0]==8: # set positions at which to display the legend
        legend = True
        ylabel="statin quantity"
    else:
        legend = False
        ylabel="proportion of statin quantity"
    
    plt = charts.deciles_chart(
        data,
        period_column='month',
        column=i[1],
        title=i[4],
        ylabel=ylabel,
        show_outer_percentiles=True,
        show_legend=legend,
        ax=ax)
    if i[0]==8:
        ax.set_ylim([0, 100000])
    else:
        ax.set_ylim([0, 0.8])

# Now add a single line against the deciles
'''df_subject = pd.DataFrame(np.random.rand(12, 1), columns=['val']) * 100
df_subject['month'] = months
df_subject.set_index('month')

plt.plot(df_subject['month'], df_subject['val'], 'r--')'''
fig.autofmt_xdate()
plt.show()

In [ ]: