import pandas as pd
from ebmdatalab import bq
import numpy as np
from ebmdatalab import bq
# save table in BQ as follows
'''SELECT pct, practice, chemical, bnf_code, bnf_name, SUM(items) AS items, SUM(actual_cost) AS actual_cost, SUM(quantity) AS quantity, CAST(month AS DATE) AS month
FROM `ebmdatalab.hscic.normalised_prescribing_standard` p
LEFT JOIN `hscic.bnf` b ON p.bnf_code = b.presentation_code
WHERE SUBSTR(bnf_code,1,9) IN (
'0212000AA', --Rosuvastatin Calcium
'0212000AC', --Simvastatin & Ezetimibe
'0212000B0', --Atorvastatin
'0212000C0', --Cerivastatin
'0212000M0', --Fluvastatin Sodium
'0212000X0', --Pravastatin Sodium
'0212000Y0') --Simvastatin
GROUP BY
pct, practice, chemical, bnf_code, bnf_name, month
ORDER BY
month, bnf_code'''
# for practice- and chemical-level use following
'''SELECT pct, practice, chemical, SUM(items) AS items, SUM(actual_cost) AS actual_cost, SUM(quantity) AS quantity, CAST(month AS DATE) AS month ,
SUM(CASE WHEN concat(SUBSTR(bnf_code,1,9),SUBSTR(bnf_code,-2,2)) NOT IN (
'0212000AAAA', --Rosuvastatin Calc_Tab 10mg (brand, generic)
'0212000AAAB', --Rosuvastatin Calc_Tab 20mg (brand, generic)
'0212000AAAC', --Rosuvastatin Calc_Tab 40mg (brand, generic)
'0212000B0AB', --Atorvastatin_Tab 20mg (brand, generic)
'0212000B0AC', --Atorvastatin_Tab 40mg (brand, generic)
'0212000B0AD', --Atorvastatin_Tab 80mg (brand, generic)
'0212000B0AN', --Atorvastatin_Tab 30mg (brand, generic)
'0212000B0AP', --Atorvastatin_Tab 60mg (brand, generic)
'0212000Y0AH') --Simvastatin_Tab 80mg (brand, generic)
THEN items ELSE 0 END) AS low_med_dose_items
FROM `ebmdatalab.helen.prescribing_statins_all_and_lowdose` p
WHERE SUBSTR(bnf_code,1,9) IN (
'0212000AA', --Rosuvastatin Calcium
'0212000AC', --Simvastatin & Ezetimibe
'0212000B0', --Atorvastatin
'0212000C0', --Cerivastatin
'0212000M0', --Fluvastatin Sodium
'0212000X0', --Pravastatin Sodium
'0212000Y0') --Simvastatin
GROUP BY
pct, practice, chemical, month
ORDER BY
month'''
# Import practice-level data
sql = '''-- practice data by chemical
SELECT p.month, p.practice, chemical,
SUM(items) AS items, SUM(actual_cost) AS actual_cost, SUM(quantity) AS quantity
FROM `ebmdatalab.helen.prescribing_section_2_12` p
INNER JOIN ebmdatalab.hscic.practices prac
ON p.practice = prac.code
AND prac.setting = 4
GROUP BY
month, practice, chemical'''
#df = bq.cached_read(sql, csv_path='statins_practice_level.zip') # add `use_cache=False` to override
df = pd.read_csv('helen_prescribing_statins_all_and_lowdose_chem.csv')
df.head()
pct | practice | chemical | items | actual_cost | quantity | month | low_med_dose_items | |
---|---|---|---|---|---|---|---|---|
0 | 06A | M92011 | Fluvastatin Sodium | 3 | 72.74 | 168 | 2010-08-01 | 3 |
1 | 08A | G83680 | Rosuvastatin Calcium | 2 | 49.83 | 84 | 2010-08-01 | 1 |
2 | 10X | Y02838 | Simvastatin | 49 | 81.24 | 1652 | 2010-08-01 | 49 |
3 | 08W | F86001 | Simvastatin & Ezetimibe | 2 | 123.08 | 112 | 2010-08-01 | 2 |
4 | 08G | E86632 | Simvastatin | 155 | 332.77 | 7070 | 2010-08-01 | 154 |
prac = df.copy()
prac["month"]= pd.to_datetime(prac.month, utc=True)
prac["proportion_lm"] = prac["low_med_dose_items"]/prac["items"]
prac.head()
pct | practice | chemical | items | actual_cost | quantity | month | low_med_dose_items | proportion_lm | |
---|---|---|---|---|---|---|---|---|---|
0 | 06A | M92011 | Fluvastatin Sodium | 3 | 72.74 | 168 | 2010-08-01 00:00:00+00:00 | 3 | 1.000000 |
1 | 08A | G83680 | Rosuvastatin Calcium | 2 | 49.83 | 84 | 2010-08-01 00:00:00+00:00 | 1 | 0.500000 |
2 | 10X | Y02838 | Simvastatin | 49 | 81.24 | 1652 | 2010-08-01 00:00:00+00:00 | 49 | 1.000000 |
3 | 08W | F86001 | Simvastatin & Ezetimibe | 2 | 123.08 | 112 | 2010-08-01 00:00:00+00:00 | 2 | 1.000000 |
4 | 08G | E86632 | Simvastatin | 155 | 332.77 | 7070 | 2010-08-01 00:00:00+00:00 | 154 | 0.993548 |
note running this cell takes some time
sql_prac_p = '''
SELECT practice, month, SUM(total_list_size) AS total_list_size
--, SUM( female_75_plus+female_65_74+female_55_64+female_45_54+male_75_plus+male_65_74+male_55_64+male_45_54) AS list_size_over_45
FROM ebmdatalab.hscic.practice_statistics_all_years stat
GROUP BY practice, month'''
prac_p = bq.cached_read(sql_prac_p, csv_path='prac_p.csv', use_cache=False) # add `use_cache=False` to override
prac_p["total_list_size"] = prac_p["total_list_size"].astype("float")
#prac_p["list_size_over_45"] = prac_p["list_size_over_45"].astype("float")
prac_p["month"]= pd.to_datetime(prac_p.month, utc=True)
prac_p.info()
C:\Users\hcurtis\Anaconda3\lib\site-packages\pandas\io\gbq.py:149: FutureWarning: verbose is deprecated and will be removed in a future version. Set logging level in order to vary verbosity credentials=credentials, verbose=verbose, private_key=private_key)
<class 'pandas.core.frame.DataFrame'> Int64Index: 899154 entries, 0 to 99153 Data columns (total 3 columns): practice 899154 non-null object month 899154 non-null datetime64[ns, UTC] total_list_size 899154 non-null float64 dtypes: datetime64[ns, UTC](1), float64(1), object(1) memory usage: 27.4+ MB
national = prac.copy().groupby(["month"]).sum().reset_index()
prac_n = prac_p.groupby("month").sum().reset_index()
national = national.merge(prac_n, on=["month"])
national["items_per_thou"] = 1000*national["items"]/national["total_list_size"]
national["lmdose_items_per_thou"] = 1000*national["low_med_dose_items"]/national["total_list_size"]
national["proportion_lm"] = national["low_med_dose_items"]/national["items"]
national.head()
month | items | actual_cost | quantity | low_med_dose_items | proportion_lm | total_list_size | items_per_thou | lmdose_items_per_thou | |
---|---|---|---|---|---|---|---|---|---|
0 | 2010-10-01 00:00:00+00:00 | 4489263 | 3.315708e+07 | 158659485 | 3623327 | 0.807110 | 55218374.0 | 81.300166 | 65.618140 |
1 | 2010-11-01 00:00:00+00:00 | 4649310 | 3.412504e+07 | 163886937 | 3754116 | 0.807457 | 55218374.0 | 84.198604 | 67.986718 |
2 | 2010-12-01 00:00:00+00:00 | 4976000 | 3.655069e+07 | 175528148 | 4016546 | 0.807184 | 55218374.0 | 90.114932 | 72.739302 |
3 | 2011-01-01 00:00:00+00:00 | 4446690 | 3.228819e+07 | 156205627 | 3591797 | 0.807746 | 55256645.0 | 80.473398 | 65.002083 |
4 | 2011-02-01 00:00:00+00:00 | 4287507 | 3.101696e+07 | 149868554 | 3461122 | 0.807257 | 55256645.0 | 77.592604 | 62.637209 |
import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
data = national.copy()
s = [(0,'items_per_thou',0,0,'(a) All statins per 1000 patients'),
(1,'proportion_lm',0,1,'(b) Proportion low & medium intensity'),
(2,'lmdose_items_per_thou',1,0,'(c) Low & medium intensity prescribing rate')]
fig = plt.figure(figsize=(16,16))
gs = gridspec.GridSpec(3,2) # grid layout for subplots
for i in s:
ax = plt.subplot(gs[i[2], i[3]])
data.plot('month', i[1], title=i[4], legend=False, ax=ax)
#data.plot('month', 'lmdose_items_per_thou')
plt.show()
#plot(x, y, color='green', marker='o', linestyle='dashed',
#... linewidth=2, markersize=12)
C:\Users\hcurtis\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:1172: UserWarning: Converting to PeriodArray/Index representation will drop timezone information. "will drop timezone information.", UserWarning)
Also select top chemicals, grouping the rest into "other"
prac_deciles = prac.copy()
# group up chemicals to top 5 and "other"
conditions = [
(prac_deciles["chemical"] == "Pravastatin Sodium"),
(prac_deciles["chemical"] == "Rosuvastatin Calcium"),
(prac_deciles["chemical"] == "Simvastatin"),
(prac_deciles["chemical"] == "Atorvastatin"),
(prac_deciles["chemical"] == "Fluvastatin Sodium")]
choices = ["Pravastatin Sodium","Rosuvastatin Calcium","Simvastatin","Atorvastatin","Fluvastatin"]
prac_deciles['chem2'] = np.select(conditions, choices, default='other statins')
prac_deciles = prac_deciles.groupby(["month","practice","chem2"],as_index=False).sum()
prac_deciles = prac_deciles.set_index(["month","practice","chem2"]).unstack()
prac_deciles["items","total"] = prac_deciles["items"].sum(axis=1)
prac_deciles["low_med_dose_items","total"] = prac_deciles["low_med_dose_items"].sum(axis=1)
prac_deciles.sort_index(axis=1,inplace=True)
prac_deciles = prac_deciles.stack().reset_index()
# join prac population data and calc items & cost per 1000
prac_deciles = prac_deciles.merge(prac_p, on=["practice","month"])
prac_deciles["items_per_thou"] = 1000*prac_deciles["items"]/prac_deciles["total_list_size"]
prac_deciles["lmdose_items_per_thou"] = 1000*prac_deciles["low_med_dose_items"]/prac_deciles["total_list_size"]
prac_deciles["proportion_lm"] = prac_deciles["low_med_dose_items"]/prac_deciles["items"]
# reduce to required columns and set index
prac_deciles = prac_deciles[["month","practice","chem2","items_per_thou","lmdose_items_per_thou", "proportion_lm"]].set_index(["month","practice","chem2"]).unstack().reset_index().replace([np.inf, -np.inf], np.nan).fillna(0)
prac_deciles.columns = ['_'.join(tup).rstrip('_') for tup in prac_deciles.columns.values]
prac_deciles.head(-20)
month | practice | items_per_thou_Atorvastatin | items_per_thou_Fluvastatin | items_per_thou_Pravastatin Sodium | items_per_thou_Rosuvastatin Calcium | items_per_thou_Simvastatin | items_per_thou_other statins | items_per_thou_total | lmdose_items_per_thou_Atorvastatin | ... | lmdose_items_per_thou_Simvastatin | lmdose_items_per_thou_other statins | lmdose_items_per_thou_total | proportion_lm_Atorvastatin | proportion_lm_Fluvastatin | proportion_lm_Pravastatin Sodium | proportion_lm_Rosuvastatin Calcium | proportion_lm_Simvastatin | proportion_lm_other statins | proportion_lm_total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2010-10-01 00:00:00+00:00 | A81001 | 20.042443 | 1.178967 | 2.593728 | 0.943174 | 50.931384 | 0.000000 | 75.689696 | 1.414761 | ... | 50.931384 | 0.000000 | 56.354633 | 0.070588 | 1.0 | 1.0 | 0.250000 | 1.000000 | 0.0 | 0.744548 |
1 | 2010-10-01 00:00:00+00:00 | A81002 | 21.426832 | 0.097395 | 1.899196 | 5.843682 | 59.215973 | 0.000000 | 88.483078 | 3.749696 | ... | 59.215973 | 0.000000 | 65.254444 | 0.175000 | 1.0 | 1.0 | 0.050000 | 1.000000 | 0.0 | 0.737479 |
2 | 2010-10-01 00:00:00+00:00 | A81003 | 15.380464 | 0.000000 | 4.587156 | 1.079331 | 110.091743 | 0.000000 | 131.138694 | 5.666487 | ... | 110.091743 | 0.000000 | 120.345386 | 0.368421 | 0.0 | 1.0 | 0.000000 | 1.000000 | 0.0 | 0.917695 |
3 | 2010-10-01 00:00:00+00:00 | A81004 | 21.862648 | 0.000000 | 3.458498 | 1.482213 | 47.060277 | 0.000000 | 73.863636 | 4.323123 | ... | 46.813241 | 0.000000 | 54.718379 | 0.197740 | 0.0 | 1.0 | 0.083333 | 0.994751 | 0.0 | 0.740803 |
4 | 2010-10-01 00:00:00+00:00 | A81005 | 28.481807 | 0.125471 | 5.771644 | 1.882058 | 100.376412 | 0.000000 | 136.637390 | 7.151819 | ... | 96.612296 | 0.000000 | 109.661230 | 0.251101 | 1.0 | 1.0 | 0.000000 | 0.962500 | 0.0 | 0.802571 |
5 | 2010-10-01 00:00:00+00:00 | A81006 | 28.759444 | 0.162483 | 2.599724 | 2.599724 | 73.523438 | 0.568690 | 108.213502 | 7.067999 | ... | 73.279714 | 0.568690 | 83.841092 | 0.245763 | 1.0 | 1.0 | 0.062500 | 0.996685 | 1.0 | 0.774775 |
6 | 2010-10-01 00:00:00+00:00 | A81007 | 4.516990 | 0.307977 | 3.901037 | 0.205318 | 70.937275 | 0.307977 | 80.176573 | 1.026589 | ... | 70.731958 | 0.307977 | 76.275536 | 0.227273 | 1.0 | 1.0 | 0.000000 | 0.997106 | 1.0 | 0.951344 |
7 | 2010-10-01 00:00:00+00:00 | A81008 | 27.495379 | 0.231054 | 0.462107 | 9.011091 | 175.831793 | 2.310536 | 215.341959 | 1.617375 | ... | 175.600739 | 2.310536 | 180.221811 | 0.058824 | 1.0 | 1.0 | 0.000000 | 0.998686 | 1.0 | 0.836910 |
8 | 2010-10-01 00:00:00+00:00 | A81009 | 21.709203 | 0.635391 | 3.282855 | 1.164884 | 84.295245 | 0.105899 | 111.193477 | 3.918246 | ... | 83.977550 | 0.105899 | 91.919941 | 0.180488 | 1.0 | 1.0 | 0.000000 | 0.996231 | 1.0 | 0.826667 |
9 | 2010-10-01 00:00:00+00:00 | A81011 | 15.090350 | 0.000000 | 1.633987 | 0.672818 | 73.913879 | 0.000000 | 91.311034 | 1.826221 | ... | 73.817762 | 0.000000 | 77.277970 | 0.121019 | 0.0 | 1.0 | 0.000000 | 0.998700 | 0.0 | 0.846316 |
10 | 2010-10-01 00:00:00+00:00 | A81012 | 28.684471 | 0.000000 | 2.373887 | 4.154303 | 105.637982 | 1.582591 | 142.433234 | 7.517310 | ... | 103.264095 | 1.582591 | 115.924827 | 0.262069 | 0.0 | 1.0 | 0.285714 | 0.977528 | 1.0 | 0.813889 |
11 | 2010-10-01 00:00:00+00:00 | A81013 | 13.392857 | 0.000000 | 8.392857 | 3.035714 | 65.535714 | 0.000000 | 90.357143 | 3.571429 | ... | 65.357143 | 0.000000 | 77.500000 | 0.266667 | 0.0 | 1.0 | 0.058824 | 0.997275 | 0.0 | 0.857708 |
12 | 2010-10-01 00:00:00+00:00 | A81014 | 39.980378 | 0.000000 | 8.339465 | 6.867795 | 138.337013 | 0.981114 | 194.505764 | 12.018641 | ... | 132.450331 | 0.981114 | 153.789551 | 0.300613 | 0.0 | 1.0 | 0.000000 | 0.957447 | 1.0 | 0.790668 |
13 | 2010-10-01 00:00:00+00:00 | A81015 | 25.632911 | 0.000000 | 8.544304 | 0.949367 | 76.582278 | 0.000000 | 111.708861 | 3.164557 | ... | 75.000000 | 0.000000 | 87.025316 | 0.123457 | 0.0 | 1.0 | 0.333333 | 0.979339 | 0.0 | 0.779037 |
14 | 2010-10-01 00:00:00+00:00 | A81016 | 26.355100 | 0.000000 | 3.414426 | 1.813914 | 72.876654 | 0.213402 | 104.673496 | 1.387111 | ... | 72.449851 | 0.213402 | 77.464789 | 0.052632 | 0.0 | 1.0 | 0.000000 | 0.994143 | 1.0 | 0.740061 |
15 | 2010-10-01 00:00:00+00:00 | A81017 | 14.942584 | 0.000000 | 1.345313 | 1.537501 | 65.824244 | 0.144141 | 83.793783 | 2.306251 | ... | 65.055494 | 0.144141 | 69.091433 | 0.154341 | 0.0 | 1.0 | 0.156250 | 0.988321 | 1.0 | 0.824541 |
16 | 2010-10-01 00:00:00+00:00 | A81018 | 18.404908 | 0.000000 | 7.064510 | 21.379439 | 107.826734 | 0.000000 | 154.675590 | 5.949061 | ... | 107.826734 | 0.000000 | 122.513478 | 0.323232 | 0.0 | 1.0 | 0.078261 | 1.000000 | 0.0 | 0.792067 |
17 | 2010-10-01 00:00:00+00:00 | A81019 | 18.795888 | 0.097895 | 3.915810 | 1.272638 | 61.771904 | 0.097895 | 85.952031 | 2.545277 | ... | 60.890847 | 0.097895 | 67.743514 | 0.135417 | 1.0 | 1.0 | 0.153846 | 0.985737 | 1.0 | 0.788155 |
18 | 2010-10-01 00:00:00+00:00 | A81020 | 12.526889 | 0.506137 | 1.391877 | 6.326711 | 77.565481 | 0.126534 | 98.443629 | 1.898013 | ... | 74.149057 | 0.126534 | 78.198153 | 0.151515 | 1.0 | 1.0 | 0.020000 | 0.955954 | 1.0 | 0.794344 |
19 | 2010-10-01 00:00:00+00:00 | A81021 | 22.851503 | 0.379593 | 2.277558 | 4.099605 | 81.840267 | 0.455512 | 111.904039 | 2.808989 | ... | 81.384756 | 0.455512 | 87.382326 | 0.122924 | 1.0 | 1.0 | 0.018519 | 0.994434 | 1.0 | 0.780868 |
20 | 2010-10-01 00:00:00+00:00 | A81022 | 24.494446 | 0.094940 | 1.329156 | 0.664578 | 78.610083 | 0.284819 | 105.478021 | 4.841925 | ... | 77.091047 | 0.284819 | 83.641887 | 0.197674 | 1.0 | 1.0 | 0.000000 | 0.980676 | 1.0 | 0.792979 |
21 | 2010-10-01 00:00:00+00:00 | A81023 | 9.921455 | 0.275596 | 0.964586 | 2.618162 | 35.414083 | 0.000000 | 49.193882 | 1.102384 | ... | 33.760507 | 0.000000 | 36.240871 | 0.111111 | 1.0 | 1.0 | 0.052632 | 0.953307 | 0.0 | 0.736695 |
22 | 2010-10-01 00:00:00+00:00 | A81025 | 7.656613 | 0.464037 | 1.160093 | 4.408353 | 77.030162 | 0.928074 | 91.647332 | 1.856148 | ... | 76.566125 | 0.928074 | 80.974478 | 0.242424 | 1.0 | 1.0 | 0.000000 | 0.993976 | 1.0 | 0.883544 |
23 | 2010-10-01 00:00:00+00:00 | A81026 | 16.113534 | 0.147831 | 3.178358 | 1.034814 | 64.232390 | 0.295661 | 85.002587 | 2.069628 | ... | 59.871387 | 0.295661 | 65.784611 | 0.128440 | 1.0 | 1.0 | 0.214286 | 0.932106 | 1.0 | 0.773913 |
24 | 2010-10-01 00:00:00+00:00 | A81027 | 9.009009 | 0.080438 | 1.447876 | 0.643501 | 52.927928 | 0.000000 | 64.108752 | 1.850064 | ... | 52.203990 | 0.000000 | 55.743243 | 0.205357 | 1.0 | 1.0 | 0.250000 | 0.986322 | 0.0 | 0.869511 |
25 | 2010-10-01 00:00:00+00:00 | A81029 | 17.589083 | 0.151630 | 3.335861 | 0.454890 | 59.135709 | 0.000000 | 80.667172 | 1.819560 | ... | 57.922669 | 0.000000 | 63.532980 | 0.103448 | 1.0 | 1.0 | 0.666667 | 0.979487 | 0.0 | 0.787594 |
26 | 2010-10-01 00:00:00+00:00 | A81030 | 10.251716 | 0.091533 | 1.464531 | 7.231121 | 47.871854 | 0.000000 | 66.910755 | 3.569794 | ... | 47.871854 | 0.000000 | 53.455378 | 0.348214 | 1.0 | 1.0 | 0.063291 | 1.000000 | 0.0 | 0.798906 |
27 | 2010-10-01 00:00:00+00:00 | A81031 | 17.908814 | 0.000000 | 1.720497 | 1.173066 | 67.255807 | 0.000000 | 88.058184 | 4.301243 | ... | 66.004536 | 0.000000 | 72.104481 | 0.240175 | 0.0 | 1.0 | 0.066667 | 0.981395 | 0.0 | 0.818828 |
28 | 2010-10-01 00:00:00+00:00 | A81032 | 15.627978 | 0.000000 | 4.097580 | 1.810558 | 85.763293 | 0.095293 | 107.394702 | 3.335239 | ... | 85.382123 | 0.095293 | 93.196112 | 0.213415 | 0.0 | 1.0 | 0.157895 | 0.995556 | 1.0 | 0.867791 |
29 | 2010-10-01 00:00:00+00:00 | A81033 | 19.157088 | 0.000000 | 4.597701 | 7.407407 | 85.312899 | 0.000000 | 116.475096 | 1.787995 | ... | 84.546616 | 0.000000 | 90.932312 | 0.093333 | 0.0 | 1.0 | 0.000000 | 0.991018 | 0.0 | 0.780702 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
791213 | 2019-02-01 00:00:00+00:00 | Y03661 | 81.809658 | 0.000000 | 4.922644 | 6.797937 | 31.645570 | 0.000000 | 125.175809 | 26.254102 | ... | 31.411158 | 0.000000 | 66.338490 | 0.320917 | 0.0 | 1.0 | 0.551724 | 0.992593 | 0.0 | 0.529963 |
791214 | 2019-02-01 00:00:00+00:00 | Y03663 | 46.869535 | 0.000000 | 0.699545 | 8.977498 | 27.632039 | 0.233182 | 84.411799 | 8.860907 | ... | 27.632039 | 0.233182 | 40.806809 | 0.189055 | 0.0 | 1.0 | 0.376623 | 1.000000 | 1.0 | 0.483425 |
791215 | 2019-02-01 00:00:00+00:00 | Y03664 | 42.567125 | 0.000000 | 0.982318 | 2.401222 | 9.823183 | 0.000000 | 55.773849 | 4.693298 | ... | 9.823183 | 0.000000 | 16.481118 | 0.110256 | 0.0 | 1.0 | 0.409091 | 1.000000 | 0.0 | 0.295499 |
791216 | 2019-02-01 00:00:00+00:00 | Y03671 | 76.957197 | 0.000000 | 1.330672 | 4.213795 | 16.189843 | 0.000000 | 98.691506 | 12.419605 | ... | 15.968064 | 0.000000 | 33.045021 | 0.161383 | 0.0 | 1.0 | 0.789474 | 0.986301 | 0.0 | 0.334831 |
791217 | 2019-02-01 00:00:00+00:00 | Y03755 | 30.334511 | 0.000000 | 2.597133 | 2.077706 | 9.557449 | 0.000000 | 44.566798 | 4.155412 | ... | 9.453563 | 0.000000 | 17.660503 | 0.136986 | 0.0 | 1.0 | 0.700000 | 0.989130 | 0.0 | 0.396270 |
791218 | 2019-02-01 00:00:00+00:00 | Y04225 | 271.206691 | 0.000000 | 7.168459 | 7.168459 | 102.747909 | 0.000000 | 388.291517 | 22.700119 | ... | 99.163680 | 0.000000 | 129.032258 | 0.083700 | 0.0 | 1.0 | 0.000000 | 0.965116 | 0.0 | 0.332308 |
791219 | 2019-02-01 00:00:00+00:00 | Y04266 | 67.859806 | 0.248571 | 4.225702 | 4.722844 | 41.759881 | 0.497141 | 119.313945 | 9.694258 | ... | 41.511310 | 0.497141 | 58.911260 | 0.142857 | 1.0 | 1.0 | 0.578947 | 0.994048 | 1.0 | 0.493750 |
791220 | 2019-02-01 00:00:00+00:00 | Y04273 | 13.861786 | 0.000000 | 0.874773 | 0.807483 | 4.441155 | 0.000000 | 19.985196 | 1.345804 | ... | 4.306574 | 0.000000 | 6.796312 | 0.097087 | 0.0 | 1.0 | 0.333333 | 0.969697 | 0.0 | 0.340067 |
791221 | 2019-02-01 00:00:00+00:00 | Y04662 | 17.159547 | 0.000000 | 0.000000 | 0.365097 | 4.746258 | 0.000000 | 22.270902 | 1.460387 | ... | 4.746258 | 0.000000 | 6.206645 | 0.085106 | 0.0 | 0.0 | 0.000000 | 1.000000 | 0.0 | 0.278689 |
791222 | 2019-02-01 00:00:00+00:00 | Y04664 | 69.655172 | 0.000000 | 5.057471 | 3.218391 | 48.735632 | 0.000000 | 126.666667 | 7.356322 | ... | 48.505747 | 0.000000 | 62.068966 | 0.105611 | 0.0 | 1.0 | 0.357143 | 0.995283 | 0.0 | 0.490018 |
791223 | 2019-02-01 00:00:00+00:00 | Y04809 | 66.888243 | 0.000000 | 2.908184 | 0.415455 | 31.574574 | 0.415455 | 102.201911 | 12.879103 | ... | 30.743664 | 0.415455 | 46.946406 | 0.192547 | 0.0 | 1.0 | 0.000000 | 0.973684 | 1.0 | 0.459350 |
791224 | 2019-02-01 00:00:00+00:00 | Y04882 | 62.456627 | 1.040944 | 6.939625 | 3.816794 | 65.579459 | 0.000000 | 139.833449 | 17.002082 | ... | 65.232477 | 0.000000 | 91.256072 | 0.272222 | 1.0 | 1.0 | 0.272727 | 0.994709 | 0.0 | 0.652605 |
791225 | 2019-02-01 00:00:00+00:00 | Y04883 | 95.619048 | 0.000000 | 2.666667 | 4.190476 | 63.238095 | 0.000000 | 165.714286 | 25.904762 | ... | 63.238095 | 0.000000 | 93.714286 | 0.270916 | 0.0 | 1.0 | 0.454545 | 1.000000 | 0.0 | 0.565517 |
791226 | 2019-02-01 00:00:00+00:00 | Y04884 | 57.535784 | 1.403312 | 3.367948 | 6.174572 | 45.186640 | 0.000000 | 113.668257 | 9.823183 | ... | 44.905978 | 0.000000 | 60.903733 | 0.170732 | 1.0 | 1.0 | 0.227273 | 0.993789 | 0.0 | 0.535802 |
791227 | 2019-02-01 00:00:00+00:00 | Y04925 | 33.228470 | 0.000000 | 3.539127 | 1.966182 | 11.010617 | 0.000000 | 49.744396 | 9.241054 | ... | 11.010617 | 0.000000 | 24.380653 | 0.278107 | 0.0 | 1.0 | 0.300000 | 1.000000 | 0.0 | 0.490119 |
791228 | 2019-02-01 00:00:00+00:00 | Y04942 | 0.658390 | 0.000000 | 0.000000 | 0.000000 | 0.094056 | 0.000000 | 0.752445 | 0.094056 | ... | 0.094056 | 0.000000 | 0.188111 | 0.142857 | 0.0 | 0.0 | 0.000000 | 1.000000 | 0.0 | 0.250000 |
791229 | 2019-02-01 00:00:00+00:00 | Y04947 | 36.363636 | 0.000000 | 1.154401 | 1.731602 | 19.047619 | 0.000000 | 58.297258 | 8.080808 | ... | 19.047619 | 0.000000 | 28.571429 | 0.222222 | 0.0 | 1.0 | 0.166667 | 1.000000 | 0.0 | 0.490099 |
791230 | 2019-02-01 00:00:00+00:00 | Y04951 | 17.674927 | 0.000000 | 0.182216 | 2.186589 | 4.008746 | 0.000000 | 24.052478 | 1.457726 | ... | 4.008746 | 0.000000 | 6.195335 | 0.082474 | 0.0 | 1.0 | 0.250000 | 1.000000 | 0.0 | 0.257576 |
791231 | 2019-02-01 00:00:00+00:00 | Y04952 | 58.928118 | 0.508001 | 0.762002 | 3.048006 | 23.622047 | 0.000000 | 86.868174 | 9.398019 | ... | 23.622047 | 0.000000 | 36.322073 | 0.159483 | 1.0 | 1.0 | 0.666667 | 1.000000 | 0.0 | 0.418129 |
791232 | 2019-02-01 00:00:00+00:00 | Y04957 | 49.536407 | 0.064838 | 2.788044 | 3.241911 | 33.164754 | 0.000000 | 88.795954 | 11.087337 | ... | 33.067497 | 0.000000 | 47.785774 | 0.223822 | 1.0 | 1.0 | 0.240000 | 0.997067 | 0.0 | 0.538153 |
791233 | 2019-02-01 00:00:00+00:00 | Y04965 | 28.541775 | 0.000000 | 1.556824 | 1.297353 | 10.897769 | 0.000000 | 42.293721 | 1.556824 | ... | 10.897769 | 0.000000 | 14.270887 | 0.054545 | 0.0 | 1.0 | 0.200000 | 1.000000 | 0.0 | 0.337423 |
791234 | 2019-02-01 00:00:00+00:00 | Y04968 | 15.682175 | 0.174246 | 0.696986 | 0.000000 | 12.545740 | 0.000000 | 29.099146 | 1.219725 | ... | 12.197247 | 0.000000 | 14.288204 | 0.077778 | 1.0 | 1.0 | 0.000000 | 0.972222 | 0.0 | 0.491018 |
791235 | 2019-02-01 00:00:00+00:00 | Y04969 | 34.561164 | 0.000000 | 1.591633 | 1.591633 | 19.099591 | 0.000000 | 56.844020 | 2.955889 | ... | 19.099591 | 0.000000 | 24.556617 | 0.085526 | 0.0 | 1.0 | 0.571429 | 1.000000 | 0.0 | 0.432000 |
791236 | 2019-02-01 00:00:00+00:00 | Y04977 | 82.074316 | 0.000000 | 7.145774 | 2.449980 | 85.136791 | 0.000000 | 176.806860 | 10.208248 | ... | 85.136791 | 0.000000 | 103.715802 | 0.124378 | 0.0 | 1.0 | 0.500000 | 1.000000 | 0.0 | 0.586605 |
791237 | 2019-02-01 00:00:00+00:00 | Y04995 | 73.530869 | 0.445942 | 4.954910 | 1.536022 | 56.733723 | 0.000000 | 137.201467 | 14.319691 | ... | 56.485978 | 0.000000 | 76.553364 | 0.194744 | 1.0 | 1.0 | 0.225806 | 0.995633 | 0.0 | 0.557963 |
791238 | 2019-02-01 00:00:00+00:00 | Y05023 | 75.184017 | 0.000000 | 8.937960 | 5.783386 | 33.385910 | 0.000000 | 123.291272 | 10.778128 | ... | 32.860147 | 0.000000 | 55.205047 | 0.143357 | 0.0 | 1.0 | 0.454545 | 0.984252 | 0.0 | 0.447761 |
791239 | 2019-02-01 00:00:00+00:00 | Y05080 | 183.534137 | 0.000000 | 1.606426 | 0.401606 | 8.433735 | 0.000000 | 193.975904 | 4.417671 | ... | 8.433735 | 0.000000 | 14.457831 | 0.024070 | 0.0 | 1.0 | 0.000000 | 1.000000 | 0.0 | 0.074534 |
791240 | 2019-02-01 00:00:00+00:00 | Y05167 | 33.156084 | 0.078198 | 0.547388 | 1.407570 | 8.992806 | 0.000000 | 44.182046 | 4.379105 | ... | 8.836409 | 0.000000 | 14.779481 | 0.132075 | 1.0 | 1.0 | 0.666667 | 0.982609 | 0.0 | 0.334513 |
791241 | 2019-02-01 00:00:00+00:00 | Y05212 | 18.845701 | 0.000000 | 1.177856 | 1.177856 | 10.895171 | 0.000000 | 32.096584 | 3.533569 | ... | 10.895171 | 0.000000 | 15.901060 | 0.187500 | 0.0 | 1.0 | 0.250000 | 1.000000 | 0.0 | 0.495413 |
791242 | 2019-02-01 00:00:00+00:00 | Y05248 | 101.910828 | 0.000000 | 1.005699 | 5.698961 | 64.699966 | 0.000000 | 173.315454 | 13.074086 | ... | 64.364733 | 0.000000 | 81.461616 | 0.128289 | 0.0 | 1.0 | 0.529412 | 0.994819 | 0.0 | 0.470019 |
791243 rows × 23 columns
import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
data = prac_deciles
# set sort order of drugs manually, and add grid refs to position each subplot:
s = [(0,'total',0,0,'(a) Total statins'),
(1,'Atorvastatin',0,1,'(b) Atorvastatin items'),
(2,'Simvastatin',1,0,'(c) Simvastatin items'),
(3,'Rosuvastatin Calcium',1,1,'(d) Rosuvastatin items'),
(4,'Pravastatin Sodium',2,0,'(e) Pravastatin items'),
(5,'Fluvastatin',2,1,'(f) Fluvastatin items'),
(6,'other statins',3,0,'(g) Other statins')]
fig = plt.figure(figsize=(16,16))
gs = gridspec.GridSpec(4,2) # grid layout for subplots
# Plot each subplot using a loop
for i in s:
ax = plt.subplot(gs[i[2], i[3]]) # position of subplot in grid using coordinates listed in s
if i[3]==2:
legend = True
else:
legend = False
plt = charts.deciles_chart(
data,
period_column='month',
column="items_per_thou_"+i[1],
title=i[4],
ylabel="Items per 1000 patients",
show_outer_percentiles=True,
show_legend=legend,
ax=ax)
if i[0]==0:
ax.set_ylim([0, 250])
else:
ax.set_ylim([0, 180])
# Now add a single line against the deciles
'''df_subject = pd.DataFrame(np.random.rand(12, 1), columns=['val']) * 100
df_subject['month'] = months
df_subject.set_index('month')
plt.plot(df_subject['month'], df_subject['val'], 'r--')'''
fig.autofmt_xdate()
plt.show()
import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
data = prac_deciles
# set sort order of drugs manually, and add grid refs to position each subplot:
s = [(0,'total',0,0,'(a) All statins of low/medium intensity'),
(1,'Atorvastatin',0,1,'(b) Atorvastatin items of low/medium intensity'),
(2,'Simvastatin',1,0,'(c) Simvastatin items of low/medium intensity'),
(3,'Rosuvastatin Calcium',1,1,'(d) Rosuvastatin items of low/medium intensity'),
(4,'Pravastatin Sodium',2,0,'(e) Pravastatin items of low/medium intensity')#,
#(5,'Fluvastatin',2,1,'(f) Fluvastatin items of low/medium intensity'),
#(6,'other statins',3,0,'(g) Other statins of low/medium intensity')
]
fig = plt.figure(figsize=(16,16))
gs = gridspec.GridSpec(3,2) # grid layout for subplots
# Plot each subplot using a loop
for i in s:
ax = plt.subplot(gs[i[2], i[3]]) # position of subplot in grid using coordinates listed in s
if i[3]==2:
legend = True
else:
legend = False
plt = charts.deciles_chart(
data,
period_column='month',
column="lmdose_items_per_thou_"+i[1],
title=i[4],
ylabel="Items per 1000 patients",
show_outer_percentiles=True,
show_legend=legend,
ax=ax)
if i[0]==0:
ax.set_ylim([0, 200])
else:
ax.set_ylim([0, 180])
# Now add a single line against the deciles
'''df_subject = pd.DataFrame(np.random.rand(12, 1), columns=['val']) * 100
df_subject['month'] = months
df_subject.set_index('month')
plt.plot(df_subject['month'], df_subject['val'], 'r--')'''
fig.autofmt_xdate()
plt.show()
import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
data = prac_deciles
# set sort order of drugs manually, and add grid refs to position each subplot:
s = [(0,'total',0,0,'(a) Proportion of all statins low/medium intensity'),
(1,'Atorvastatin',0,1,'(b) Atorvastatin items low/medium intensity'),
(2,'Simvastatin',1,0,'(c) Simvastatin items low/medium intensity'),
(3,'Rosuvastatin Calcium',1,1,'(d) Rosuvastatin items low/medium intensity')]
fig = plt.figure(figsize=(16,10))
gs = gridspec.GridSpec(2,2) # grid layout for subplots
# Plot each subplot using a loop
for i in s:
ax = plt.subplot(gs[i[2], i[3]]) # position of subplot in grid using coordinates listed in s
if i[3]==2:
legend = True
else:
legend = False
plt = charts.deciles_chart(
data,
period_column='month',
column="proportion_lm_"+i[1],
title=i[4],
ylabel="Proportion of "+i[1],
show_outer_percentiles=True,
show_legend=legend,
ax=ax)
if i[0]==0:
ax.set_ylim([0, 1])
else:
ax.set_ylim([0, 1])
# Now add a single line against the deciles
'''df_subject = pd.DataFrame(np.random.rand(12, 1), columns=['val']) * 100
df_subject['month'] = months
df_subject.set_index('month')
plt.plot(df_subject['month'], df_subject['val'], 'r--')'''
fig.autofmt_xdate()
plt.show()
import datetime as dt
regr = prac.copy()
regr = regr.loc[pd.DatetimeIndex(regr['month']).year==2018]
regr["year"] = pd.DatetimeIndex(regr['month']).year
regr = regr.groupby(["practice","year"])["items","low_med_dose_items"].sum()
regr["proportion_lm_dose"] = regr["low_med_dose_items"]/regr["items"]
regr.reset_index().to_csv("statins_proportion_for_regression.csv")
regr.head()
items | low_med_dose_items | proportion_lm_dose | ||
---|---|---|---|---|
practice | year | |||
A81001 | 2018 | 5261 | 2266 | 0.430717 |
A81002 | 2018 | 30575 | 12199 | 0.398986 |
A81004 | 2018 | 13974 | 5234 | 0.374553 |
A81005 | 2018 | 15779 | 7366 | 0.466823 |
A81006 | 2018 | 26783 | 10885 | 0.406415 |
import numpy as np
prac_p_2018 = prac_p.loc[pd.DatetimeIndex(prac_p['month']).year==2018].drop("month",axis=1).groupby("practice").mean()
prac_2018 = regr.reset_index().merge(prac_p_2018, on="practice")
prac_2018["items_per_thou"] = 1000*prac_2018["items"]/prac_2018["total_list_size"]
# filter out those with very small
prac_2018 = prac_2018.loc[prac_2018["total_list_size"]>500]
prac_2018["prescribing_decile"] = np.ceil(prac_2018["items_per_thou"].rank(pct=True)*10)
prac_2018.sort_values(by="items_per_thou",ascending=False).head()
practice | year | items | low_med_dose_items | proportion_lm_dose | total_list_size | items_per_thou | prescribing_decile | |
---|---|---|---|---|---|---|---|---|
7067 | Y02625 | 2018 | 4185 | 2660 | 0.635603 | 1072.583333 | 3901.794732 | 10.0 |
1221 | C83064 | 2018 | 54549 | 28766 | 0.527342 | 14040.916667 | 3885.002760 | 10.0 |
1194 | C83036 | 2018 | 26712 | 13375 | 0.500711 | 6970.500000 | 3832.149774 | 10.0 |
359 | A88010 | 2018 | 12067 | 7372 | 0.610922 | 3160.500000 | 3818.066762 | 10.0 |
202 | A83075 | 2018 | 17698 | 10691 | 0.604080 | 4673.250000 | 3787.086075 | 10.0 |
prac_2018.boxplot(column="proportion_lm_dose", by="prescribing_decile") #, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds)[source]¶
plt.
plt.show()
sql = '''-- CCG data by chemical
SELECT p.month, p.pct, chemical,
SUM(items) AS items,
SUM(actual_cost) AS actual_cost,
SUM(quantity) AS quantity
FROM `ebmdatalab.helen.prescribing_section_2_12` p
INNER JOIN ebmdatalab.hscic.ccgs c
ON p.pct = c.code
AND c. org_type = "CCG"
GROUP BY
month, pct, chemical'''
ccg = bq.cached_read(sql, csv_path='statins_ccg_level.csv') # add `use_cache=False` to override
ccg["month"]= pd.to_datetime(ccg.month)
ccg.head()
month | pct | chemical | items | actual_cost | quantity | |
---|---|---|---|---|---|---|
0 | 2014-03-01 | 08Q | Rosuvastatin Calcium | 373 | 10756.01 | 15889 |
1 | 2014-03-01 | 01C | Simvastatin & Ezetimibe | 1 | 38.09 | 28 |
2 | 2014-03-01 | 99P | Acipimox | 3 | 125.76 | 264 |
3 | 2014-03-01 | 06K | Atorvastatin | 20220 | 34625.87 | 581145 |
4 | 2014-03-01 | 10K | Ciprofibrate | 27 | 3626.37 | 1344 |
sql_ccg_p = '''
SELECT pct_id AS pct, month, SUM(total_list_size) AS total_list_size,
SUM( female_75_plus+female_65_74+female_55_64+male_75_plus+male_65_74+male_55_64) AS list_size_over_55 FROM ebmdatalab.hscic.practice_statistics_all_years stat
GROUP BY pct, month'''
ccg_p = bq.cached_read(sql_ccg_p, csv_path='ccg_p.csv', use_cache=False) # add `use_cache=False` to override
ccg_p["total_list_size"] = ccg_p["total_list_size"].astype("float")
ccg_p["list_size_over_55"] = ccg_p["list_size_over_55"].astype("float")
ccg_p["month"]= pd.to_datetime(ccg_p.month)
ccg_p.info()
C:\Users\hcurtis\Anaconda3\lib\site-packages\pandas\io\gbq.py:149: FutureWarning: verbose is deprecated and will be removed in a future version. Set logging level in order to vary verbosity credentials=credentials, verbose=verbose, private_key=private_key)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 30992 entries, 0 to 30991 Data columns (total 4 columns): pct 30957 non-null object month 30992 non-null datetime64[ns, UTC] total_list_size 30992 non-null float64 list_size_over_55 30992 non-null float64 dtypes: datetime64[ns, UTC](1), float64(2), object(1) memory usage: 968.6+ KB
import numpy as np
import pandas as pd
from ebmdatalab import maps
# total items per thousand over last year
import datetime as dt
ccg_last_year = ccg.copy()
ccg_last_year = ccg_last_year.loc[pd.DatetimeIndex(ccg_last_year['month']).year==2018]
# join on list sizes:
ccg_last_year["month"] = pd.to_datetime(ccg_last_year["month"], utc = True)
ccg_p["month"] = pd.to_datetime(ccg_p["month"], utc = True)
ccg_last_year = ccg_last_year.merge(ccg_p, on=["pct","month"])
ccg_last_year = ccg_last_year.groupby(['pct']).agg({"items":"sum","total_list_size":"mean","list_size_over_55":"mean"}).reset_index()
ccg_last_year["items_per_thou"] = 1000*ccg_last_year["items"]/ccg_last_year["total_list_size"]
ccg_last_year["items_per_55+"] = 1000*ccg_last_year["items"]/ccg_last_year["list_size_over_55"]
#ccg_last_year["cost_per_55+"] = 1000*ccg_last_year["actual_cost"]/ccg_last_year["list_size_over_55"]
ccg_last_year.sort_values(by="items_per_thou").describe()
items | total_list_size | list_size_over_55 | items_per_thou | items_per_55+ | |
---|---|---|---|---|---|
count | 1.950000e+02 | 1.950000e+02 | 195.000000 | 195.000000 | 195.000000 |
mean | 3.743369e+05 | 3.035664e+05 | 88108.096572 | 1287.204058 | 4405.056640 |
std | 2.088908e+05 | 1.813134e+05 | 53587.147747 | 362.288657 | 1207.256880 |
min | 9.340500e+04 | 7.897006e+04 | 17678.780347 | 514.729698 | 2486.538978 |
25% | 2.200335e+05 | 1.879890e+05 | 56159.071835 | 992.637298 | 3597.829360 |
50% | 3.234200e+05 | 2.597903e+05 | 72473.450000 | 1287.561976 | 4218.239763 |
75% | 4.685255e+05 | 3.350260e+05 | 101118.173857 | 1496.764103 | 5079.479771 |
max | 1.251592e+06 | 1.314049e+06 | 331025.500000 | 2318.566931 | 11268.964903 |
import matplotlib.pyplot as plt
import importlib
from ebmdatalab import maps
importlib.reload(maps)
plt.figure(figsize=(14,9))
data = ccg_last_year
#df.columns = ['month', 'pct', 'ccg_name', 'total_list_size'] # The CCG column must be named 'pct'
plt = maps.ccg_map(data, title="statins per 1000", column='items_per_thou', separate_london=True)
plt.show()
import matplotlib.pyplot as plt
import importlib
from ebmdatalab import maps
importlib.reload(maps)
plt.figure(figsize=(14,9))
data = ccg_last_year
#df.columns = ['month', 'pct', 'ccg_name', 'total_list_size'] # The CCG column must be named 'pct'
plt = maps.ccg_map(data, title="statins per 1000 aged 55+", column='items_per_55+', separate_london=True)
plt.show()
# Maps for measure
sql = '''-- statin intensity measure
SELECT * FROM measures.ccg_data_statinintensity2
WHERE EXTRACT(year from month) = 2018
'''
intensity = bq.cached_read(sql, csv_path='ccg_statin_intensity.csv') # add `use_cache=False` to override
intensity.head()
pct_id | stp_id | regional_team_id | month | numerator | denominator | calc_value | percentile | |
---|---|---|---|---|---|---|---|---|
0 | 02N | E54000005 | Y54 | 2018-07-01 | 7098 | 17138 | 0.414167 | 0.103093 |
1 | 02N | E54000005 | Y54 | 2018-01-01 | 7751 | 17646 | 0.439250 | 0.103093 |
2 | 02N | E54000005 | Y54 | 2018-02-01 | 6614 | 15246 | 0.433819 | 0.097938 |
3 | 02N | E54000005 | Y54 | 2018-05-01 | 7528 | 17712 | 0.425023 | 0.103093 |
4 | 02N | E54000005 | Y54 | 2018-04-01 | 7290 | 16963 | 0.429759 | 0.108247 |
import matplotlib.pyplot as plt
import importlib
from ebmdatalab import maps
importlib.reload(maps)
plt.figure(figsize=(14,9))
data = intensity.groupby("pct_id")["numerator","denominator"].sum().reset_index().rename(columns={"pct_id":"pct"})
data["calc_value"] = data["numerator"]/data["denominator"]
#df.columns = ['month', 'pct', 'ccg_name', 'total_list_size'] # The CCG column must be named 'pct'
plt = maps.ccg_map(data, title="statin intensity", column='calc_value', separate_london=True)
plt.show()
# tablets/capsules only - exclude other formulations such as liquids and powder sachets.
sql = '''SELECT p.practice, p.month,
SUM(CASE WHEN concat(SUBSTR(bnf_code,1,9),SUBSTR(bnf_code,-2,2)) IN ('0212000B0AA','0212000B0AL') THEN quantity ELSE 0 END)/SUM(quantity) AS atorvastatin_10mg_quant,
SUM(CASE WHEN concat(SUBSTR(bnf_code,1,9),SUBSTR(bnf_code,-2,2)) IN ('0212000B0AB', '0212000B0AE', '0212000B0AM') THEN quantity ELSE 0 END)/SUM(quantity) AS atorvastatin_20mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000B0%AC' THEN quantity ELSE 0 END)/SUM(quantity) AS atorvastatin_40mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000B0%AD' THEN quantity ELSE 0 END)/SUM(quantity) AS atorvastatin_80mg_quant,
SUM(CASE WHEN concat(SUBSTR(bnf_code,1,9),SUBSTR(bnf_code,-2,2)) IN ('0212000Y0AA','0212000Y0AC') THEN quantity ELSE 0 END)/SUM(quantity) AS simvastatin_10mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AB' OR bnf_code LIKE '0212000Y0%AE' THEN quantity ELSE 0 END)/SUM(quantity) AS simvastatin_20mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AD' THEN quantity ELSE 0 END)/SUM(quantity) AS simvastatin_40mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AH' THEN quantity ELSE 0 END)/SUM(quantity) AS simvastatin_80mg_quant,
SUM(CASE WHEN bnf_code LIKE '0212000B0%AB' THEN items ELSE 0 END)/SUM(items) AS atorvastatin_20mg_items,
SUM(CASE WHEN bnf_code LIKE '0212000B0%AD' THEN items ELSE 0 END)/SUM(items) AS atorvastatin_80mg_items,
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AB' OR bnf_code LIKE '0212000Y0%AE' THEN items ELSE 0 END)/SUM(items) AS simvastatin_20mg_items,
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AD' THEN items ELSE 0 END)/SUM(items) AS simvastatin_40mg_items,
SUM(CASE WHEN bnf_code LIKE '0212000Y0%AH' THEN items ELSE 0 END)/SUM(items) AS simvastatin_80mg_items,
SUM(items) as total_items,
SUM(quantity) as total_quantity
FROM `ebmdatalab.helen.prescribing_section_2_12` p
INNER JOIN (
select distinct chemical, presentation, presentation_code
from `hscic.bnf`
where SUBSTR(presentation_code,1,9) IN (
'0212000AA', --Rosuvastatin Calcium
'0212000AC', --Simvastatin & Ezetimibe
'0212000B0', --Atorvastatin
'0212000C0', --Cerivastatin
'0212000M0', --Fluvastatin Sodium
'0212000X0', --Pravastatin Sodium
'0212000Y0') --Simvastatin)
AND (presentation LIKE "%Tab%" OR presentation LIKE "%Cap%")
) b ON p.bnf_code = b.presentation_code
INNER JOIN ebmdatalab.hscic.practices prac
ON p.practice = prac.code
AND prac.setting = 4
GROUP BY
practice, month'''
dose = bq.cached_read(sql, csv_path='statins_dose_practice_level.zip') # add `use_cache=False` to override
dose["month"] = pd.to_datetime(dose.month)
dose.head()
practice | month | atorvastatin_10mg_quant | atorvastatin_20mg_quant | atorvastatin_40mg_quant | atorvastatin_80mg_quant | simvastatin_10mg_quant | simvastatin_20mg_quant | simvastatin_40mg_quant | simvastatin_80mg_quant | atorvastatin_20mg_items | atorvastatin_80mg_items | simvastatin_20mg_items | simvastatin_40mg_items | simvastatin_80mg_items | total_items | total_quantity | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | F81025 | 2014-03-01 | 0.084950 | 0.076560 | 0.085999 | 0.049292 | 0.009439 | 0.120084 | 0.414263 | 0.004195 | 0.080292 | 0.048175 | 0.124088 | 0.392701 | 0.004380 | 685 | 26698 |
1 | K81018 | 2014-03-01 | 0.059587 | 0.100913 | 0.108602 | 0.024988 | 0.092263 | 0.254685 | 0.229217 | 0.009611 | 0.106017 | 0.027221 | 0.239255 | 0.239255 | 0.008596 | 698 | 29134 |
2 | L84613 | 2014-03-01 | 0.021702 | 0.013317 | 0.137613 | 0.047351 | 0.017756 | 0.085541 | 0.470054 | 0.000000 | 0.019523 | 0.047722 | 0.084599 | 0.433839 | 0.000000 | 461 | 14192 |
3 | E84030 | 2014-03-01 | 0.038280 | 0.140322 | 0.104253 | 0.004887 | 0.082669 | 0.386468 | 0.207284 | 0.000000 | 0.137421 | 0.004228 | 0.376321 | 0.200846 | 0.000000 | 473 | 17189 |
4 | A86004 | 2014-03-01 | 0.045586 | 0.053824 | 0.181656 | 0.059431 | 0.017438 | 0.142249 | 0.436130 | 0.000641 | 0.052409 | 0.064243 | 0.138631 | 0.425190 | 0.000845 | 1183 | 43698 |
# join prac population data and calc items & quantity per 1000
dose["month"] = pd.to_datetime(dose.month, utc=True)# join prac population data and calc items & quantity per 1000
dose2 = dose.merge(prac_p, on=["practice","month"])
dose2["items_per_thou"] = 1000*dose2["total_items"]/dose2["total_list_size"]
dose2["quantity_per_thou"] = 1000*dose2["total_quantity"]/dose2["total_list_size"]
dose2.head()
practice | month | atorvastatin_10mg_quant | atorvastatin_20mg_quant | atorvastatin_40mg_quant | atorvastatin_80mg_quant | simvastatin_10mg_quant | simvastatin_20mg_quant | simvastatin_40mg_quant | simvastatin_80mg_quant | atorvastatin_20mg_items | atorvastatin_80mg_items | simvastatin_20mg_items | simvastatin_40mg_items | simvastatin_80mg_items | total_items | total_quantity | total_list_size | items_per_thou | quantity_per_thou | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | F81025 | 2014-03-01 00:00:00+00:00 | 0.084950 | 0.076560 | 0.085999 | 0.049292 | 0.009439 | 0.120084 | 0.414263 | 0.004195 | 0.080292 | 0.048175 | 0.124088 | 0.392701 | 0.004380 | 685 | 26698 | 8602.0 | 79.632644 | 3103.696815 |
1 | K81018 | 2014-03-01 00:00:00+00:00 | 0.059587 | 0.100913 | 0.108602 | 0.024988 | 0.092263 | 0.254685 | 0.229217 | 0.009611 | 0.106017 | 0.027221 | 0.239255 | 0.239255 | 0.008596 | 698 | 29134 | 9718.0 | 71.825478 | 2997.941963 |
2 | L84613 | 2014-03-01 00:00:00+00:00 | 0.021702 | 0.013317 | 0.137613 | 0.047351 | 0.017756 | 0.085541 | 0.470054 | 0.000000 | 0.019523 | 0.047722 | 0.084599 | 0.433839 | 0.000000 | 461 | 14192 | 2709.0 | 170.173496 | 5238.833518 |
3 | E84030 | 2014-03-01 00:00:00+00:00 | 0.038280 | 0.140322 | 0.104253 | 0.004887 | 0.082669 | 0.386468 | 0.207284 | 0.000000 | 0.137421 | 0.004228 | 0.376321 | 0.200846 | 0.000000 | 473 | 17189 | 6090.0 | 77.668309 | 2822.495895 |
4 | A86004 | 2014-03-01 00:00:00+00:00 | 0.045586 | 0.053824 | 0.181656 | 0.059431 | 0.017438 | 0.142249 | 0.436130 | 0.000641 | 0.052409 | 0.064243 | 0.138631 | 0.425190 | 0.000845 | 1183 | 43698 | 15210.0 | 77.777778 | 2872.978304 |
import importlib
from ebmdatalab import charts
importlib.reload(charts)
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
data = dose2.copy()
# set sort order of drugs manually, and add grid refs to position each subplot:
s = [(0,'atorvastatin_10mg_quant',0,0,'(a) Atorvastatin 10 mg'),
(1,'atorvastatin_20mg_quant',0,1,'(b) Atorvastatin 20 mg (HI)'),
(2,'atorvastatin_40mg_quant',1,0,'(c) Atorvastatin 40 mg (HI)'),
(3,'atorvastatin_80mg_quant',1,1,'(d) Atorvastatin 80 mg (HI)'),
(4,'simvastatin_10mg_quant',2,0,'(e) Simvastatin 10 mg'),
(5,'simvastatin_20mg_quant',2,1,'(f) Simvastatin 20 mg'),
(6,'simvastatin_40mg_quant',3,0,'(g) Simvastatin 40 mg'),
(7,'simvastatin_80mg_quant',3,1,'(h) Simvastatin 80 mg (HI)'),
(8,'total_quantity',4,0,'(i) Total quantity statin tablets/capsules')]
fig = plt.figure(figsize=(16,22))
gs = gridspec.GridSpec(5,2) # grid layout for subplots
# Plot each subplot using a loop
for i in s:
ax = plt.subplot(gs[i[2], i[3]]) # position of subplot in grid using coordinates listed in s
if i[0]==8: # set positions at which to display the legend
legend = True
ylabel="statin quantity"
else:
legend = False
ylabel="proportion of statin quantity"
plt = charts.deciles_chart(
data,
period_column='month',
column=i[1],
title=i[4],
ylabel=ylabel,
show_outer_percentiles=True,
show_legend=legend,
ax=ax)
if i[0]==8:
ax.set_ylim([0, 100000])
else:
ax.set_ylim([0, 0.8])
# Now add a single line against the deciles
'''df_subject = pd.DataFrame(np.random.rand(12, 1), columns=['val']) * 100
df_subject['month'] = months
df_subject.set_index('month')
plt.plot(df_subject['month'], df_subject['val'], 'r--')'''
fig.autofmt_xdate()
plt.show()