import requests import pandas as pd class ONS_CONFIG: ONS_STUB= 'http://data.ons.gov.uk/ons/api/data/' DEFAULT_CONTEXT='Census' with open('onspwd.txt','r') as f: APIKEY=f.read() def __init__(self): self.url=ONS_STUB self.params={'jsontype':'json-stat','apikey':APIKEY} def _ONS_SetParam(self,param,value): self.params[param]=value def _ONS_SetContext(self,context): self.params['context']=context def _ONS_Census(self): self._ONS_SetContext('Census') def _ONS_SetConcept(self,concept): self.params['concept']=concept def _ONS_SetGeog(self,geog): self.params['geog']=geog def _ONS_SetConcepts(self,context=DEFAULT_CONTEXT): self.url=ONS_STUB+'concepts.json' #context is required? self._ONS_SetContext(context) def _ONS_SetCollections(self): self.url=ONS_STUB+'collections.json' def _ONS_SetCollectionDetails(self,collId): self.url=ONS_STUB+'collectiondetails/'+collId+'.json' def _ONS_SetDatasetDetails(self,datasetId,geog,context=DEFAULT_CONTEXT): self.url=ONS_STUB+'datasetdetails/'+datasetId+'.json' #context is required? self._ONS_SetContext(context) #geog is required? self._ONS_SetGeog(geog) def _ONS_SetGeohierarchies(self,geohier): self.url=ONS_STUB+'hierarchies/hierarchy/'+geohier+'.json' def _ONS_SetDataset(self,dataset,geohier,context,dims): self.url=ONS_STUB+'dataset/'+dataset+'.json' self._ONS_SetContext(context) self._ONS_SetGeog(geohier) for param in dims: self._ONS_SetParam(param,dims[param]) def _ONS_getConcepts(d,context='Census',lang='en'): d._ONS_SetConcepts(context) #Context MUST be defined r = requests.get(d.url,params=d.params) items={} for _concept in r.json()['ons']['conceptList']['concept']: for concept in _concept['names']['name']: if concept['@xml.lang']==lang: items[_concept['id']]=concept['$'] return items concepts=_ONS_getConcepts(ONS_CONFIG()) df=pd.DataFrame.from_dict(concepts,orient='index') df[:5] def _getName(d,lang='en'): for n in d['names']['name']: if n['@xml.lang']==lang: return n['$'] def _getLangItemFromList(d,attr,lang='en'): for n in d[attr]: if n['@xml.lang']==lang: return n['$'] def _getGeographicalTypes(d,lang='en'): geotypes=[] dh=d['geographicalHierarchies']['geographicalHierarchy'] if not(isinstance(dh,list)): dh=[dh] for geohier in dh: for geotype in geohier['geographicalType']: if geotype['@xml.lang']==lang: geotypes.append(geotype['$']) return geotypes #collections_url='http://data.ons.gov.uk/ons/api/data/collections.json?apikey='+APIKEY+'&context=Census' def _ONS_getCollections(d,concept='',context='Census',lang='en'): d._ONS_SetCollections() d._ONS_SetContext(context) d._ONS_SetConcept(concept) r = requests.get(d.url,params=d.params) data=r.json()['ons']['collectionList']['collection'] if not(isinstance(data,list)): data=[data] items=[] for data_el in data: item={'description':data_el['description'], 'name':_getName(data_el), 'id':data_el['id'], 'geographicalTypes':_getGeographicalTypes(data_el) } items.append(item) return items #List all collections data=_ONS_getCollections(ONS_CONFIG()) df=pd.DataFrame(data) df[:5] #Display a particular collection _ONS_getCollections(ONS_CONFIG(),60) #purl='collectiondetails/QS501EW.json?context=Census&apikey=**' def _ONS_parse_dimensions(d): dimensions=[] for dimension in d['dimensions']['dimension']: dimensions.append({ 'id':dimension['dimensionId'], 'title': _getLangItemFromList(dimension['dimensionTitles'],'dimensionTitle') }) return dimensions def _ONS_parse_areas(data): dl=data['geographicalHierarchies']['geographicalHierarchy'] metaitems=[] if not(isinstance(dl,list)): dl=[dl] for metaitem in dl: items={'area':[]} for el in ['year','id']: if el in metaitem: items[el]=metaitem[el] items['name']=_getLangItemFromList(metaitem['types'],'geographicalType') if not(isinstance(metaitem['areaTypes']['areaType'],list)): metaitem['areaTypes']['areaType']=[metaitem['areaTypes']['areaType']] for item in metaitem['areaTypes']['areaType']: tmp={'areaTypeCodeName':item['codename'], 'areaTypeCodeAbbrev':item['abbreviation'], 'areaTypeLevel':item['level'] } items['area'].append(tmp) metaitems.append(items) return metaitems def _datasetDescription(d,lang='en'): return _getLangItemFromList(d['refMetadata']['refMetadataItem']['descriptions'],'description') def _ONS_getCollectionDetails(d,collId,context='Census',lang='en'): d._ONS_SetCollectionDetails(collId) #Context required? d._ONS_SetContext(context) r = requests.get(d.url,params=d.params) data=r.json()['ons']['collectionDetail'] item={'description':_datasetDescription(data), 'dimensions':_ONS_parse_dimensions(data), 'id':data['id'], 'publicationDate': data['publicationDate'], 'areas':_ONS_parse_areas(data) } return item _ONS_getCollectionDetails(ONS_CONFIG(),'QS501EW') def _ONS_getDatasetDetails(d,datasetId,geog,context='Census',lang='en'): d._ONS_SetDatasetDetails(datasetId,geog,context) r = requests.get(d.url,params=d.params) data=r.json()['ons']['datasetDetail'] item={'description':_datasetDescription(data), 'dimensions':_ONS_parse_dimensions(data), 'id':data['id'], 'publicationDate': data['publicationDate'], 'areas':_ONS_parse_areas(data) } return item _ONS_getDatasetDetails(ONS_CONFIG(),'QS501EW','2011PCONH') def _ONS_getHierarchies(d,geohier,context='',lang='en'): d._ONS_SetGeohierarchies(geohier) if context!='': d._ONS_SetContext(context) r = requests.get(url=d.url,params=d.params) data=r.json()['ons']['geographyList'] items=[] for item in data['items']['item']: tmp={'label':_getLangItemFromList(item['labels'],'label'), 'itemCode':item['itemCode'], 'areaTypeCodeName':item['areaType']['codename'], 'areaTypeCodeAbbrev':item['areaType']['abbreviation'], 'areaTypeLevel':item['areaType']['level'], 'parentCode':'' } if 'parentCode' in item: tmp['parentCode']=item['parentCode'] items.append(tmp) return pd.DataFrame(items) _ONS_getHierarchies(ONS_CONFIG(),'2011PCONH')[:5]#,'Census') #2011WARDH, 2011PCONH def _ONS_SetDataset(d,dataset,geohier,context,dims,lang='en'): d._ONS_SetDataset(dataset,geohier,context,dims) r = requests.get(url=d.url,params=d.params) data=r.json() #------- #Via http://digitalpublishing.ons.gov.uk/2014/08/07/ons-api-just-the-numbers/ #First we create an empty dict object datax = {'vals':{}} #We need to specify the dataset name dataset = "QS501EW" geog='' # We get the actual observation values from the JSON-STAT as a list values = obj[dataset]['value'] # Then we get the index of the observations and its associated categories as a dict ##TH: Original uses index 0 - but this desonlt respond correctly for multiple areas? index = obj[dataset]['dimension'][obj[dataset]['dimension']['id'][0]]['category']['index'] #and finally the labels for the categories as another dict ##TH: Original uses index 0 - but this doesn't respond correctly for multiple areas? labels = obj[dataset]['dimension'][obj[dataset]['dimension']['id'][0]]['category']['label'] #What's the measure? key=obj[dataset]['dimension'][obj[dataset]['dimension']['id'][1]]['category']['label'] ix=obj[dataset]['dimension'][obj[dataset]['dimension']['id'][1]]['category']['index'] print(key,ix) datax['keys']=[] for ixl in ix: datax['keys'].append({'label':key[ixl],'id':ixl}) for l in labels: #Now we can iterate through the labels num = index[l] # get the position in the values dict of the specific label count = values[str(num)] #get that value from the values list datax['vals'][labels[l]] = count #create a new object in data dict for the value with the label as its name #print(datax) #All done! return datax _ONS_SetDataset(ONS_CONFIG(),'QS501EW','2011WARDH','Census',{'dm/2011WARDH':'E05008481,E05003606'}) !pip3 install pyjstat from pyjstat import pyjstat #Use example from ONS blog post d=ONS_CONFIG() d._ONS_SetDataset('QS104EW','2011WARDH','Census',{'dm/2011WARDH':'K04000001','totals':'false'}) r = requests.get(url=d.url,params=d.params) data=r.json() data results = pyjstat.from_json_stat(data)