Opiate prescribing in England

Data sources: http://www.hscic.gov.uk/gpprescribingdata - all the presentation prescribing data from here http://www.hscic.gov.uk/media/10686/Download-glossary-of-terms-for-GP-prescribing---presentation-level/pdf/GP_Prescribing_Presentation_Level_Glossary_of_Terms.pdf http://www.connectingforhealth.nhs.uk/systemsandservices/data/ods/ccginterim/interimpcmem_v5.zip - gp to ccg mapping here http://www.erpho.org.uk/viewResource.aspx?id=22125 - old ccg to new ccg code mapping from here http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-325526 - ccg population data from here (2012) https://geoportal.statistics.gov.uk/Docs/Boundaries/Clinical_commissioning_groups_(Eng)_Apr_2013_Boundaries_(Full_Extent).zip - maps Notes: Prescribing data is GP level and there are two types of CCG code (old and new) so this requires 1. obtaining CCG population data 2. obtaining mapping of old to new CCG codes 3. obtaining GP practice to CCG mapping NB: There does not appear to be an authorative list of GP practices. There are other data quality issues that should be written up another time.
In [1]:
import os, datetime
import pandas as pd
import pickle
import folium
from pandas import Series, DataFrame, Panel
from datetime import datetime, date, time

Prepare our prescribing data

In [2]:
#cuts gp prescribing data for rows containing patterns of interest as dataframes

def pattern_timeseries(pattern):
    pathtodata = '/media/mydisk/prescribing_data/bnf/'#set to the directory with bnf CSVs in
    pathtopatterns = '/media/mydisk/prescribing_data/patterns/' 
    pathtopickles = '/media/mydisk/prescribing_data/pickles/'
    files = !ls T2*
    global pattern_df
    clean_filenames = []

    for f in files.l:
        clean_filenames.append(f[:7]) #clean up filenames so that grep will work
    for i, item in enumerate(clean_filenames):
        date = clean_filenames[i].replace('T', '')
        date = pd.to_datetime(date, format="%Y%m") #make pandas know the date is a date
        name = "bnf_%s_%.10s.csv" % (pattern, date) #create a name for new csv of grep for pattern using date from file name
        print "writing %s" % name
        !fgrep $pattern {clean_filenames[i]}* > $pathtopatterns/$name #grep for pattern in csv files and write to file 
    pattern_files = !ls $pathtopatterns*$pattern*
    cols = ['SHA', 'PCT', 'PRACTICE', 'BNF_CODE', 'BNF_NAME', 'ITEMS', 'NIC', 'ACT_COST', 'QUANTITY', 'DateTime', 'Index']
    practices_est = 10000 #estimated number of practices
    df_list = [pd.read_csv(file, names=cols) for file in pattern_files] 
    pattern_df = pd.concat(df_list)
    pattern_df['DateTime'] = pattern_df['DateTime'].astype('|S6') 
    pattern_df['DateTime'] = pd.to_datetime(pattern_df['DateTime'], format="%Y%m")

    pattern_df.to_pickle('%s.pkl' % pattern)
In [3]:
#define our opiates
Opiates = ['Codeine', 'Dihydrocodeine', 'Tramadol', 'Tapentadol', 'Buprenorphine', 'Fentanyl', 'Methadone', 'Morph', 'Oxycodone']

#commented because have run to Feb 2014
#for opiate in Opiates:
#   pattern_timeseries(opiate)
In [4]:
#load prev outputs from above
pathtopickles = '/media/mydisk/prescribing_data/pickles/'

#load dataframes
Codeine = pd.read_pickle('Codeine.pkl')
Dihydrocodeine = pd.read_pickle('Dihydrocodeine.pkl')
Tramadol = pd.read_pickle('Tramadol.pkl')
Tapentadol = pd.read_pickle('Tapentadol.pkl')
Buprenorphine = pd.read_pickle('Buprenorphine.pkl')
Fentanyl = pd.read_pickle('Fentanyl.pkl')
Methadone = pd.read_pickle('Methadone.pkl')
Morph = pd.read_pickle('Morph.pkl')
Oxycodone = pd.read_pickle('Oxycodone.pkl')

#make a list of dataframes
df_list = [Codeine, Dihydrocodeine, Tramadol, Tapentadol, Buprenorphine, Fentanyl, Methadone, Morph, Oxycodone]
opiate_lookup = {0:'Codeine', 1:'Dihydrocodeine', 2:'Tramadol', 3:'Tapentadol', 4:'Buprenorphine', 5:'Fentanyl', 6:'Methadone', 7:'Morphine', 8:'Oxycodone'}
In [5]:
#prescribing data don't contain a quantity that's a quantity
#work out quantity in mg from prep information and quantity.. 

pathtocsv = '/media/mydisk/prescribing_data/xls/csv'

Codeine['BNF_NAME'] = Codeine['BNF_NAME'].map(str.strip) #get rid of white space
codeine_preps = Codeine['BNF_NAME'].unique().tolist() #get a list of preparation names
codeine_doses = [3, 3, 0.6, 60, 5, 15, 30, 60, 3.2, 12.8, 8, 1.35, 12.8, 2, 8, 6, 1, 0.6, 10, 5, 30, 15, 30, 12] #codeine in mg for each prep
codeine_dose_lookup = dict(zip(codeine_preps, codeine_doses)) #make dict of prep names and codeine doses
Codeine['QUANTITY_IN_MG'] = Codeine['BNF_NAME'].map(lambda x: codeine_dose_lookup[x]) * Codeine['QUANTITY']

#work out quantity in mg from prep information and quantity
df = pd.read_csv('Dihydrocodeine.csv') #lookup prepared by colleague
Dihydrocodeine['BNF_NAME'] = Dihydrocodeine['BNF_NAME'].map(str.strip) #get rid of white space
dihydrocodeine_preps = Dihydrocodeine['BNF_NAME'].unique().tolist() #get a list of preparation names
dihydrocodeine_doses = df['mg of drug per unit of preparation'].str.replace('mg', '').astype(float).tolist() #remove the mg my collaborator added
dihydrocodeine_dose_lookup = dict(zip(dihydrocodeine_preps, dihydrocodeine_doses)) #make dict of prep names and codeine doses
Dihydrocodeine['QUANTITY_IN_MG'] = Dihydrocodeine['BNF_NAME'].map(lambda x: dihydrocodeine_dose_lookup[x]) * Dihydrocodeine['QUANTITY']

#work out quantity in mg from prep information and quantity
df = pd.read_csv('Tramadol.csv') #lookup prepared by colleague
Tramadol['BNF_NAME'] = Tramadol['BNF_NAME'].map(str.strip) #get rid of white space
tramadol_preps = Tramadol['BNF_NAME'].unique().tolist() #get a list of preparation names
tramadol_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added
tramadol_dose_lookup = dict(zip(tramadol_preps, tramadol_doses)) #make dict of prep names and codeine doses
Tramadol['QUANTITY_IN_MG'] = Tramadol['BNF_NAME'].map(lambda x: tramadol_dose_lookup[x]) * Tramadol['QUANTITY']

#work out quantity in mg from prep information and quantity
df = pd.read_csv('Tapentadol.csv') #lookup prepared by colleague
Tapentadol['BNF_NAME'] = Tapentadol['BNF_NAME'].map(str.strip) #get rid of white space
tapentadol_preps = Tapentadol['BNF_NAME'].unique().tolist() #get a list of preparation names
tapentadol_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added
tapentadol_dose_lookup = dict(zip(tapentadol_preps, tapentadol_doses)) #make dict of prep names and codeine doses
Tapentadol['QUANTITY_IN_MG'] = Tapentadol['BNF_NAME'].map(lambda x: tapentadol_dose_lookup[x]) * Tapentadol['QUANTITY']

#work out quantity in mg from prep information and quantity
df = pd.read_csv('Buprenorphine.csv') #lookup prepared by colleague
Buprenorphine['BNF_NAME'] = Buprenorphine['BNF_NAME'].map(str.strip) #get rid of white space
buprenorphine_preps = Buprenorphine['BNF_NAME'].unique().tolist() #get a list of preparation names
buprenorphine_doses = df['mg of drug per unit of preparation'].str.replace('mg', '').astype(float).tolist() #remove the mg my collaborator added
buprenorphine_dose_lookup = dict(zip(buprenorphine_preps, buprenorphine_doses)) #make dict of prep names and codeine doses
Buprenorphine['QUANTITY_IN_MG'] = Buprenorphine['BNF_NAME'].map(lambda x: buprenorphine_dose_lookup[x]) * Buprenorphine['QUANTITY']

#work out quantity in mg from prep information and quantity
df = pd.read_csv('Fentanyl.csv') #lookup prepared by colleague
Fentanyl['BNF_NAME'] = Fentanyl['BNF_NAME'].map(str.strip) #get rid of white space
fentanyl_preps = Fentanyl['BNF_NAME'].unique().tolist() #get a list of preparation names
fentanyl_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added
fentanyl_dose_lookup = dict(zip(fentanyl_preps, fentanyl_doses)) #make dict of prep names and codeine doses
Fentanyl['QUANTITY_IN_MG'] = Fentanyl['BNF_NAME'].map(lambda x: fentanyl_dose_lookup[x]) * Fentanyl['QUANTITY']

#work out quantity in mg from prep information and quantity
df = pd.read_csv('Methadone.csv') #lookup prepared by colleague
Methadone['BNF_NAME'] = Methadone['BNF_NAME'].map(str.strip) #get rid of white space
methadone_preps = Methadone['BNF_NAME'].unique().tolist() #get a list of preparation names
methadone_doses = df['mg of drug per unit of preparation'].str.replace('tbc', '0')#remove the mg my collaborator added
methadone_doses = methadone_doses.astype(float).tolist()
methadone_dose_lookup = dict(zip(methadone_preps, methadone_doses)) #make dict of prep names and codeine doses
Methadone['QUANTITY_IN_MG'] = Methadone['BNF_NAME'].map(lambda x: methadone_dose_lookup[x]) * Methadone['QUANTITY']

#work out quantity in mg from prep information and quantity
df = pd.read_csv('Morphine.csv') #lookup prepared by colleague
Morph['BNF_NAME'] = Morph['BNF_NAME'].map(str.strip) #get rid of white space
morphine_preps = Morph['BNF_NAME'].unique().tolist() #get a list of preparation names
morphine_doses = df['mg of drug per unit of preparation'].str.replace('tbc', '0')
morphine_doses = morphine_doses.astype(float).tolist() #remove the mg my collaborator added
morphine_dose_lookup = dict(zip(morphine_preps, morphine_doses)) #make dict of prep names and codeine doses
Morph['QUANTITY_IN_MG'] = Morph['BNF_NAME'].map(lambda x: morphine_dose_lookup[x]) * Morph['QUANTITY']

#work out quantity in mg from prep information and quantity
df = pd.read_csv('Oxycodone.csv') #lookup prepared by colleague
Oxycodone['BNF_NAME'] = Oxycodone['BNF_NAME'].map(str.strip) #get rid of white space
oxycodone_preps = Oxycodone['BNF_NAME'].unique().tolist() #get a list of preparation names
oxycodone_doses = df['mg of drug per unit of preparation'].tolist()
oxycodone_dose_lookup = dict(zip(oxycodone_preps, oxycodone_doses)) #make dict of prep names and codeine doses
Oxycodone['QUANTITY_IN_MG'] = Oxycodone['BNF_NAME'].map(lambda x: oxycodone_dose_lookup[x]) * Oxycodone['QUANTITY']

Prepare our CCG population data

In [6]:
#ccg mid-year population estimates from ONS for 2012
#from http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-325526
#has CCG code in new format

pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'

df = pd.read_csv('SAPE7DT1-Mid-2012-ccg-syoa-file.csv') 
df.icol(3).dropna().nunique() #column 3 has our 211 ccgs
df1 = df[~df.icol(3).isnull()]  #throw away rows that don't relate to our ccgs
df2 = df1.icol([0,3,4]) #just use all ages and throw away the rest
df2.columns = ['CCG13CD', 'CCG_Name', 'Population'] #name the columns sensibly
#csv of old CCG codes matched onto new ones
#from http://www.erpho.org.uk/viewResource.aspx?id=22125
df3 = pd.read_csv('ccgcodemap.csv')
#merge our dataframes to make a frame with CCG names and old and new codes and population sizes
df4 = pd.merge(df2, df3, on='CCG13CD')

#gp to ccg mapping 1
#wget http://www.connectingforhealth.nhs.uk/systemsandservices/data/ods/ccginterim/interimpcmem_v5.zip
#has CCG code in old format and contains too many CCGs (>211)
df5 = pd.read_csv('interimpcmem_v5.csv') 
df5.rename(columns = {'PRACTICECODE': 'PRACTICE'}, inplace=True)
df5 = df5[['PRACTICE', 'CCGCODE']] #throw away columns we don't care about

pathtopickles = '/media/mydisk/prescribing_data/pickles/'
In [7]:
CCGPopData = pd.read_pickle('CCGPopData.pkl')
GPCCGMap = pd.read_pickle('GPCCGMap.pkl')

Combine prescribing and CCG population data

In [8]:
CCGPopData['Population'] = CCGPopData['Population'].str.replace(',','')
CCGPopData['Population'] = CCGPopData['Population'].astype(float)

for i, item in enumerate(df_list):
    df_list[i] = df_list[i][['DateTime', 'PRACTICE', 'QUANTITY_IN_MG']]
    df_list[i] = pd.merge(df_list[i], GPCCGMap, on='PRACTICE')
    df_list[i] = df_list[i].groupby(['CCGCODE', 'DateTime']).QUANTITY_IN_MG.sum() #group by ccg and date time, sum the mg of codeine
    df_list[i] = DataFrame(df_list[i]).reset_index()
    df_list[i] = pd.merge(df_list[i], CCGPopData, on='CCGCODE')
    df_list[i]['QUANTITY_IN_MG_PER_PERSON'] = df_list[i]['QUANTITY_IN_MG'] / df_list[i]['Population'] 
    df_list[i]['Drug'] = opiate_lookup[i]
    df_list[i] = df_list[i][['DateTime', 'CCG_Name', 'CCG13CD', 'Drug', 'QUANTITY_IN_MG', 'Population', 'QUANTITY_IN_MG_PER_PERSON']]
In [9]:
df = pd.concat(df_list) 

Analysing the data

In [11]:
df.index = df.DateTime
df['CCG_Name'] = df['CCG_Name'].str.strip()
In [31]:
print df.hist()
print df.describe()
count     77816.000000   77816.000000               77816.000000
mean    2558732.521669  254540.299424                  10.445597
std     5847471.794484  135385.424098                  21.758098
min           1.740000   63073.000000                   0.000015
25%       13431.910000  167782.000000                   0.057381
50%      290580.000000  222811.000000                   1.253180
75%     1686307.500000  302739.000000                   7.870421
max    63228150.000000  869408.000000                 178.623148
In [39]:
<matplotlib.axes.AxesSubplot at 0x17b989d0>
In [29]:
for drug in df['2013'].Drug.unique():
    print drug
    df['2013'][df['2013']['Drug'] == drug]['QUANTITY_IN_MG_PER_PERSON'].hist(bins=100)
In [12]: