Data sources: http://www.hscic.gov.uk/gpprescribingdata - all the presentation prescribing data from here http://www.hscic.gov.uk/media/10686/Download-glossary-of-terms-for-GP-prescribing---presentation-level/pdf/GP_Prescribing_Presentation_Level_Glossary_of_Terms.pdf http://www.connectingforhealth.nhs.uk/systemsandservices/data/ods/ccginterim/interimpcmem_v5.zip - gp to ccg mapping here http://www.erpho.org.uk/viewResource.aspx?id=22125 - old ccg to new ccg code mapping from here http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-325526 - ccg population data from here (2012) https://geoportal.statistics.gov.uk/Docs/Boundaries/Clinical_commissioning_groups_(Eng)_Apr_2013_Boundaries_(Full_Extent).zip - maps Notes: Prescribing data is GP level and there are two types of CCG code (old and new) so this requires 1. obtaining CCG population data 2. obtaining mapping of old to new CCG codes 3. obtaining GP practice to CCG mapping NB: There does not appear to be an authorative list of GP practices. There are other data quality issues that should be written up another time. import os, datetime import pandas as pd import pickle import folium from pandas import Series, DataFrame, Panel from datetime import datetime, date, time #cuts gp prescribing data for rows containing patterns of interest as dataframes def pattern_timeseries(pattern): pathtodata = '/media/mydisk/prescribing_data/bnf/'#set to the directory with bnf CSVs in pathtopatterns = '/media/mydisk/prescribing_data/patterns/' pathtopickles = '/media/mydisk/prescribing_data/pickles/' os.chdir(pathtodata) files = !ls T2* global pattern_df clean_filenames = [] for f in files.l: clean_filenames.append(f[:7]) #clean up filenames so that grep will work for i, item in enumerate(clean_filenames): date = clean_filenames[i].replace('T', '') date = pd.to_datetime(date, format="%Y%m") #make pandas know the date is a date name = "bnf_%s_%.10s.csv" % (pattern, date) #create a name for new csv of grep for pattern using date from file name print "writing %s" % name !fgrep $pattern {clean_filenames[i]}* > $pathtopatterns/$name #grep for pattern in csv files and write to file pattern_files = !ls $pathtopatterns*$pattern* cols = ['SHA', 'PCT', 'PRACTICE', 'BNF_CODE', 'BNF_NAME', 'ITEMS', 'NIC', 'ACT_COST', 'QUANTITY', 'DateTime', 'Index'] practices_est = 10000 #estimated number of practices df_list = [pd.read_csv(file, names=cols) for file in pattern_files] pattern_df = pd.concat(df_list) pattern_df['DateTime'] = pattern_df['DateTime'].astype('|S6') pattern_df['DateTime'] = pd.to_datetime(pattern_df['DateTime'], format="%Y%m") os.chdir(pathtopickles) pattern_df.to_pickle('%s.pkl' % pattern) return(pattern_df) #define our opiates Opiates = ['Codeine', 'Dihydrocodeine', 'Tramadol', 'Tapentadol', 'Buprenorphine', 'Fentanyl', 'Methadone', 'Morph', 'Oxycodone'] #commented because have run to Feb 2014 #for opiate in Opiates: # pattern_timeseries(opiate) #load prev outputs from above pathtopickles = '/media/mydisk/prescribing_data/pickles/' os.chdir(pathtopickles) #load dataframes Codeine = pd.read_pickle('Codeine.pkl') Dihydrocodeine = pd.read_pickle('Dihydrocodeine.pkl') Tramadol = pd.read_pickle('Tramadol.pkl') Tapentadol = pd.read_pickle('Tapentadol.pkl') Buprenorphine = pd.read_pickle('Buprenorphine.pkl') Fentanyl = pd.read_pickle('Fentanyl.pkl') Methadone = pd.read_pickle('Methadone.pkl') Morph = pd.read_pickle('Morph.pkl') Oxycodone = pd.read_pickle('Oxycodone.pkl') #make a list of dataframes df_list = [Codeine, Dihydrocodeine, Tramadol, Tapentadol, Buprenorphine, Fentanyl, Methadone, Morph, Oxycodone] opiate_lookup = {0:'Codeine', 1:'Dihydrocodeine', 2:'Tramadol', 3:'Tapentadol', 4:'Buprenorphine', 5:'Fentanyl', 6:'Methadone', 7:'Morphine', 8:'Oxycodone'} #prescribing data don't contain a quantity that's a quantity #work out quantity in mg from prep information and quantity.. pathtocsv = '/media/mydisk/prescribing_data/xls/csv' os.chdir(pathtocsv) Codeine['BNF_NAME'] = Codeine['BNF_NAME'].map(str.strip) #get rid of white space codeine_preps = Codeine['BNF_NAME'].unique().tolist() #get a list of preparation names codeine_doses = [3, 3, 0.6, 60, 5, 15, 30, 60, 3.2, 12.8, 8, 1.35, 12.8, 2, 8, 6, 1, 0.6, 10, 5, 30, 15, 30, 12] #codeine in mg for each prep codeine_dose_lookup = dict(zip(codeine_preps, codeine_doses)) #make dict of prep names and codeine doses Codeine['QUANTITY_IN_MG'] = Codeine['BNF_NAME'].map(lambda x: codeine_dose_lookup[x]) * Codeine['QUANTITY'] #Dihydrocodeine #work out quantity in mg from prep information and quantity df = pd.read_csv('Dihydrocodeine.csv') #lookup prepared by colleague Dihydrocodeine['BNF_NAME'] = Dihydrocodeine['BNF_NAME'].map(str.strip) #get rid of white space dihydrocodeine_preps = Dihydrocodeine['BNF_NAME'].unique().tolist() #get a list of preparation names dihydrocodeine_doses = df['mg of drug per unit of preparation'].str.replace('mg', '').astype(float).tolist() #remove the mg my collaborator added dihydrocodeine_dose_lookup = dict(zip(dihydrocodeine_preps, dihydrocodeine_doses)) #make dict of prep names and codeine doses Dihydrocodeine['QUANTITY_IN_MG'] = Dihydrocodeine['BNF_NAME'].map(lambda x: dihydrocodeine_dose_lookup[x]) * Dihydrocodeine['QUANTITY'] #Tramadol #work out quantity in mg from prep information and quantity df = pd.read_csv('Tramadol.csv') #lookup prepared by colleague Tramadol['BNF_NAME'] = Tramadol['BNF_NAME'].map(str.strip) #get rid of white space tramadol_preps = Tramadol['BNF_NAME'].unique().tolist() #get a list of preparation names tramadol_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added tramadol_dose_lookup = dict(zip(tramadol_preps, tramadol_doses)) #make dict of prep names and codeine doses Tramadol['QUANTITY_IN_MG'] = Tramadol['BNF_NAME'].map(lambda x: tramadol_dose_lookup[x]) * Tramadol['QUANTITY'] #Tapentadol #work out quantity in mg from prep information and quantity df = pd.read_csv('Tapentadol.csv') #lookup prepared by colleague Tapentadol['BNF_NAME'] = Tapentadol['BNF_NAME'].map(str.strip) #get rid of white space tapentadol_preps = Tapentadol['BNF_NAME'].unique().tolist() #get a list of preparation names tapentadol_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added tapentadol_dose_lookup = dict(zip(tapentadol_preps, tapentadol_doses)) #make dict of prep names and codeine doses Tapentadol['QUANTITY_IN_MG'] = Tapentadol['BNF_NAME'].map(lambda x: tapentadol_dose_lookup[x]) * Tapentadol['QUANTITY'] #Buprenorphine #work out quantity in mg from prep information and quantity df = pd.read_csv('Buprenorphine.csv') #lookup prepared by colleague Buprenorphine['BNF_NAME'] = Buprenorphine['BNF_NAME'].map(str.strip) #get rid of white space buprenorphine_preps = Buprenorphine['BNF_NAME'].unique().tolist() #get a list of preparation names buprenorphine_doses = df['mg of drug per unit of preparation'].str.replace('mg', '').astype(float).tolist() #remove the mg my collaborator added buprenorphine_dose_lookup = dict(zip(buprenorphine_preps, buprenorphine_doses)) #make dict of prep names and codeine doses Buprenorphine['QUANTITY_IN_MG'] = Buprenorphine['BNF_NAME'].map(lambda x: buprenorphine_dose_lookup[x]) * Buprenorphine['QUANTITY'] #Fentanyl #work out quantity in mg from prep information and quantity df = pd.read_csv('Fentanyl.csv') #lookup prepared by colleague Fentanyl['BNF_NAME'] = Fentanyl['BNF_NAME'].map(str.strip) #get rid of white space fentanyl_preps = Fentanyl['BNF_NAME'].unique().tolist() #get a list of preparation names fentanyl_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added fentanyl_dose_lookup = dict(zip(fentanyl_preps, fentanyl_doses)) #make dict of prep names and codeine doses Fentanyl['QUANTITY_IN_MG'] = Fentanyl['BNF_NAME'].map(lambda x: fentanyl_dose_lookup[x]) * Fentanyl['QUANTITY'] #Methadone #work out quantity in mg from prep information and quantity df = pd.read_csv('Methadone.csv') #lookup prepared by colleague Methadone['BNF_NAME'] = Methadone['BNF_NAME'].map(str.strip) #get rid of white space methadone_preps = Methadone['BNF_NAME'].unique().tolist() #get a list of preparation names methadone_doses = df['mg of drug per unit of preparation'].str.replace('tbc', '0')#remove the mg my collaborator added methadone_doses = methadone_doses.astype(float).tolist() methadone_dose_lookup = dict(zip(methadone_preps, methadone_doses)) #make dict of prep names and codeine doses Methadone['QUANTITY_IN_MG'] = Methadone['BNF_NAME'].map(lambda x: methadone_dose_lookup[x]) * Methadone['QUANTITY'] #Morphine #work out quantity in mg from prep information and quantity df = pd.read_csv('Morphine.csv') #lookup prepared by colleague Morph['BNF_NAME'] = Morph['BNF_NAME'].map(str.strip) #get rid of white space morphine_preps = Morph['BNF_NAME'].unique().tolist() #get a list of preparation names morphine_doses = df['mg of drug per unit of preparation'].str.replace('tbc', '0') morphine_doses = morphine_doses.astype(float).tolist() #remove the mg my collaborator added morphine_dose_lookup = dict(zip(morphine_preps, morphine_doses)) #make dict of prep names and codeine doses Morph['QUANTITY_IN_MG'] = Morph['BNF_NAME'].map(lambda x: morphine_dose_lookup[x]) * Morph['QUANTITY'] #Oxycodone #work out quantity in mg from prep information and quantity df = pd.read_csv('Oxycodone.csv') #lookup prepared by colleague Oxycodone['BNF_NAME'] = Oxycodone['BNF_NAME'].map(str.strip) #get rid of white space oxycodone_preps = Oxycodone['BNF_NAME'].unique().tolist() #get a list of preparation names oxycodone_doses = df['mg of drug per unit of preparation'].tolist() oxycodone_dose_lookup = dict(zip(oxycodone_preps, oxycodone_doses)) #make dict of prep names and codeine doses Oxycodone['QUANTITY_IN_MG'] = Oxycodone['BNF_NAME'].map(lambda x: oxycodone_dose_lookup[x]) * Oxycodone['QUANTITY'] #ccg mid-year population estimates from ONS for 2012 #from http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-325526 #has CCG code in new format pathtogpdata = '/home/sam/Documents/OpenDataAbstract/' os.chdir(pathtogpdata) df = pd.read_csv('SAPE7DT1-Mid-2012-ccg-syoa-file.csv') df.icol(3).dropna().nunique() #column 3 has our 211 ccgs df1 = df[~df.icol(3).isnull()] #throw away rows that don't relate to our ccgs df2 = df1.icol([0,3,4]) #just use all ages and throw away the rest df2.columns = ['CCG13CD', 'CCG_Name', 'Population'] #name the columns sensibly #csv of old CCG codes matched onto new ones #from http://www.erpho.org.uk/viewResource.aspx?id=22125 df3 = pd.read_csv('ccgcodemap.csv') #merge our dataframes to make a frame with CCG names and old and new codes and population sizes df4 = pd.merge(df2, df3, on='CCG13CD') #gp to ccg mapping 1 #wget http://www.connectingforhealth.nhs.uk/systemsandservices/data/ods/ccginterim/interimpcmem_v5.zip #has CCG code in old format and contains too many CCGs (>211) df5 = pd.read_csv('interimpcmem_v5.csv') df5.rename(columns = {'PRACTICECODE': 'PRACTICE'}, inplace=True) df5 = df5[['PRACTICE', 'CCGCODE']] #throw away columns we don't care about pathtopickles = '/media/mydisk/prescribing_data/pickles/' os.chdir(pathtopickles) df4.to_pickle('CCGPopData.pkl') df5.to_pickle('GPCCGMap.pkl') CCGPopData = pd.read_pickle('CCGPopData.pkl') GPCCGMap = pd.read_pickle('GPCCGMap.pkl') CCGPopData['Population'] = CCGPopData['Population'].str.replace(',','') CCGPopData['Population'] = CCGPopData['Population'].astype(float) for i, item in enumerate(df_list): df_list[i] = df_list[i][['DateTime', 'PRACTICE', 'QUANTITY_IN_MG']] df_list[i] = pd.merge(df_list[i], GPCCGMap, on='PRACTICE') df_list[i] = df_list[i].groupby(['CCGCODE', 'DateTime']).QUANTITY_IN_MG.sum() #group by ccg and date time, sum the mg of codeine df_list[i] = DataFrame(df_list[i]).reset_index() df_list[i] = pd.merge(df_list[i], CCGPopData, on='CCGCODE') df_list[i]['QUANTITY_IN_MG_PER_PERSON'] = df_list[i]['QUANTITY_IN_MG'] / df_list[i]['Population'] df_list[i]['Drug'] = opiate_lookup[i] df_list[i] = df_list[i][['DateTime', 'CCG_Name', 'CCG13CD', 'Drug', 'QUANTITY_IN_MG', 'Population', 'QUANTITY_IN_MG_PER_PERSON']] df = pd.concat(df_list) df.index = df.DateTime df['CCG_Name'] = df['CCG_Name'].str.strip() print df.hist() print df.describe() df.groupby('DateTime')['QUANTITY_IN_MG_PER_PERSON'].sum().plot(ylim=(0,22000)) for drug in df['2013'].Drug.unique(): print drug df['2013'][df['2013']['Drug'] == drug]['QUANTITY_IN_MG_PER_PERSON'].hist(bins=100) plt.show()