import requests,json
import pandas as pd

#IATI Codelists
iati_country='http://iatistandard.org/codelists/downloads/clv1/codelist/Country.csv'
iati_region='http://iatistandard.org/codelists/downloads/clv1/codelist/Region.csv'
iati_sector='http://iatistandard.org/codelists/downloads/clv1/codelist/Sector.csv'
iati_category='http://iatistandard.org/codelists/downloads/clv1/codelist/SectorCategory.csv'
iati_orgtype='http://iatistandard.org/codelists/downloads/clv1/codelist/OrganisationType.csv'

def codesgetter(url):
    df=pd.read_csv(url).dropna(axis=1,how='all')
    df.code=df['code'].astype('str')
    df.set_index(keys='code',inplace=True)
    return df

countries=codesgetter(iati_country)
countries[:3]

regions=codesgetter(iati_region)
regions[:3]

#I fell foul of looking up countries that were regions - generate a combined set and call them areas
areas=pd.concat([countries,regions])

sectors=codesgetter(iati_sector)
sectors[:3]

categories=codesgetter(iati_category)
categories[:3]

orgtypes=codesgetter(iati_orgtype)
orgtypes[:3]

url='http://iatiregistry.org/api/3/action/package_search?q=extras_filetype:organisation&rows=1000'
data=json.loads(requests.get(url).text)
len(data['result']['results'])

from bs4 import BeautifulSoup

url='http://iatiregistry.org/publisher'
response = requests.get(url)
soup=BeautifulSoup(response.content)
results={}

table=soup.find('tbody')
res=[]
for row in table.findAll('tr'):
    tmp=[row.find('a').attrs['href'].replace('/publisher/','')]
    for cell in row.findAll('td'):
        tmp.append(cell.text)
    res.append(tmp)
    
publishers=pd.DataFrame(res,columns=['ID','Name','OrgType','Country','Datasets'])
publishers[:3]

#Look up org details by code/id on IATI Register
def iati_org_info(k):
    r=json.loads(requests.get('http://iatiregistry.org/api/1/rest/group/'+k).text)
    #print(r)
            
    d={}
    for x in ['title','display_name']:
        d[x]=r[x]
    d['id']=r['name']
    
    for e in r['extras']:
        if e in ['publisher_organization_type','publisher_country','publisher_iati_id']:
            d[e]=r['extras'][e]
    
    if 'publisher_organization_type' in d:
        d['orgtype'] = orgtypes.loc[ d['publisher_organization_type'] ]['name']
            
    if 'publisher_country' in d and d['publisher_country']!='':
        d['pub_country'] = areas.loc[ d['publisher_country'] ]['name']


    return d


iati_org_info('aauk')

#Trying this...
#It makes a few hundred requests to the API, so if we run this we may as well store the result
# We can then use the stored data in any future investigations
def scrapeIATIorgData():
    dx=[]

    #Get a list of groups
    groups=json.loads(requests.get('http://iatiregistry.org/api/1/rest/group').text)
    for group in groups:
         dx.append(iati_org_info(group))

    admindata=pd.DataFrame(dx)
    admindata.to_csv('iati_orgdata.csv',index=False)
    
#scrapeIATIorgData()

admindata=pd.read_csv('iati_orgdata.csv')
admindata[:3]

def lookupOrg(name):
    return admindata[admindata['title'].str.contains(name,case=False)]

lookupOrg('oxfam')

#These routines parse results of searches on the API
def iati_org_annotate(d,lang='en'):
    if 'publisher_organization_type' in d:
        d['orgtype'] = orgtypes.loc[ d['publisher_organization_type'] ]['name']
        
    if 'publisher_country' in d:
        d['pub_country'] = areas.loc[ d['publisher_country'] ]['name']
        
    return d

def iati_org_core(r):
    d={}
    
    d['item']=r['title']
        
    d['title']=r['organization']['title']
    d['id']=r['organization']['name']
    
    for e in r['extras']:
        if e['key'] in ['publisher_organization_type','publisher_country','publisher_iati_id']:
            d[e['key']]=e['value']
    return d


def iati_org(r):
    return iati_org_annotate( iati_org_core(r) )

iati_org(data['result']['results'][0])

#Really scruffy search
def searchIATIdatasets(q):
    url='http://iatiregistry.org/api/3/action/package_search'
    params={'rows':100, 'q':' '.join([q,'extras_filetype:activity'])}
    response=requests.get(url,params=params)
    
    data=json.loads(response.text)
    results=[]
    
    for result in data['result']['results']:
        tmp=iati_org(result)
        tmp['url']=result['resources'][0]['url']
        results.append(tmp)
        
    return pd.DataFrame(results)

resp=searchIATIdatasets('bangladesh')
resp[:4]

#Try to load in a data file identified from the previous search
#Pass in the index val from the previous query
from io import StringIO

def grabIATIdata(df,ix):
    print("Data for:",df.loc[ix]['item'])
    
    url='http://tools.aidinfolabs.org/csv/direct_from_registry/'
    params={'xml':df.loc[ix]['url'],
            'download':'true',
            'id':'true',
            'format':'simple'
            }
    response = requests.get(url,params=params)
    df=pd.read_csv(StringIO(response.text))#, skipfooter=2, engine='python' )
    
    return df

#This will grab the fourth result (index 3)  - CAFOD Activity File for Bangladesh - and display the data
ex=grabIATIdata(resp,3)
ex[:3]

#Here's a peek at the full range of columns available in the dataframe
ex.columns