#szekelyland workbook - extrapolating from romanian values using mortality statistics from INSSE
import pandas as pd, numpy as np
df=pd.read_csv('exportPivot_POP206C.csv')
de=pd.read_csv('exportPivot_POP206E.csv')
de.columns
df=df.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Clasificarea internationala a maladiilor - Revizia a X a 1994',u' Macroregiuni regiuni de dezvoltare si judete'])
de=de.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Sexe',u' Grupe de varsta ',u' Macroregiuni regiuni de dezvoltare si judete'])
de.head()
for i in df.index.levels[0].unique():
print i
for i in df.index.levels[1].unique():
print i
d={
"Alte cauze":1094,
"Boli ale aparatului circulator":1064,
"Boli ale aparatului digestiv":1078,
"Boli ale aparatului genito-urinar":1084,
"Boli ale aparatului respirator":1072,
"Boli ale sistemului nervos boli ale ochiului si anexele sale boli ale urechii si apofizei mastoide":9,
"Boli endocrine de nutritie si metabolism":1051,
"Boli infectioase si parazitare":1001,
"Leziuni traumatice otraviri si alte consecinte ale cauzelor externe":1095,
"Malformatii congenitale deformatii si anomalii cromozomiale":1093,
"Sarcina nastere si lauzie":1087,
"Total":1000,
"Tulburari mentale si de comportament":1055,
"Tumori":1026,
"Unele afectiuni a caror origine se situeaza in perioada perinatala":1092,
"din care: Boala ischemica a inimii":0,
"din care: Boli cerebro-vasculare":0,
"din care: Diabet zaharat":0,
"din care: Tuberculoza":0}
n0=[1048,1082,1083]
n9=[1058,1062,1063]
#calculate county share in romania total
pop={}
for j in df.index.levels[0].unique():
if j[6:] not in pop:pop[j[6:]]={}
for k in df.index.levels[1].unique():
if d[k]!=0:
if d[k] not in pop[j[6:]]:pop[j[6:]][str(d[k])]={}
pop[j[6:]][str(d[k])]["ro"]=df.loc[j].loc[k].loc[' TOTAL'][0]
try: a=df.loc[j].loc[k].loc[' Covasna'][0]
except: a=0
try: b=df.loc[j].loc[k].loc[' Harghita'][0]
except: b=0
try: c=df.loc[j].loc[k].loc[' Mures'][0]
except: c=0
pop[j[6:]][str(d[k])]["szf"]=(a+b+c)*1.0/df.loc[j].loc[k].loc[' TOTAL'][0]
for y in pop:
for c in n9:
if str(c) not in pop[y]:pop[y][str(c)]={}
for m in ["szf","ro"]:
pop[y][str(c)][m]=pop[y]['9'][m]/3.0
pop[y].pop('9');
for c in n0:
if str(c) not in pop[y]:pop[y][str(c)]={}
for m in ["szf","ro"]:
pop[y][str(c)][m]=pop[y]['1000'][m]
#calculate age deviation from national average
e={"Feminin":"f","Masculin":"m","Total":"s"}
pop2={}
pop3={}
for j in de.index.levels[0].unique():
if j[6:] not in pop2:pop2[j[6:]]={}
if j[6:] not in pop3:pop3[j[6:]]={}
for k in de.index.levels[1].unique():
if e[k] not in pop2[j[6:]]:pop2[j[6:]][e[k]]={}
if e[k] not in pop3[j[6:]]:pop3[j[6:]][e[k]]={}
for l in de.index.levels[2].unique():
age=l[:3].strip().strip('-')
try: a=de.loc[j].loc[k].loc[l].loc[' Covasna'][0]
except: a=0
try: b=de.loc[j].loc[k].loc[l].loc[' Harghita'][0]
except: b=0
try: c=de.loc[j].loc[k].loc[l].loc[' Mures'][0]
except: c=0
try: d=de.loc[j].loc[k].loc[l].loc[' TOTAL'][0]
except: d=0
if age!='To':
if age!='0':
if age!='85':
pop2[j[6:]][e[k]][age]=(a+b+c)*1.0
pop3[j[6:]][e[k]][age]=(d)*1.0
else:
pop2[j[6:]][e[k]]['90']=(a+b+c)*3.0/10
pop2[j[6:]][e[k]]['95']=(a+b+c)*1.0/10
pop2[j[6:]][e[k]]['85']=(a+b+c)*6.0/10
pop3[j[6:]][e[k]]['90']=(d)*3.0/10
pop3[j[6:]][e[k]]['95']=(d)*1.0/10
pop3[j[6:]][e[k]]['85']=(d)*6.0/10
else:
pop2[j[6:]][e[k]]['0']=(a+b+c)*1.0/5
pop2[j[6:]][e[k]]['1']=(a+b+c)*1.0/5
pop2[j[6:]][e[k]]['2']=(a+b+c)*1.0/5
pop2[j[6:]][e[k]]['3']=(a+b+c)*1.0/5
pop2[j[6:]][e[k]]['4']=(a+b+c)*1.0/5
pop3[j[6:]][e[k]]['0']=(d)*1.0/5
pop3[j[6:]][e[k]]['1']=(d)*1.0/5
pop3[j[6:]][e[k]]['2']=(d)*1.0/5
pop3[j[6:]][e[k]]['3']=(d)*1.0/5
pop3[j[6:]][e[k]]['4']=(d)*1.0/5
for i in pop2:
for j in ["f","m"]:
for a in pop2[i][j]:
pop2[i][j][a]=pop2[i][j][a]/sum(pop2[i]["s"].values())
pop3[i][j][a]=pop3[i][j][a]/sum(pop3[i]["s"].values())
import zipfile,json
#read RO data
z = zipfile.ZipFile('db2/642.zip')
ro = json.loads(z.open('data.json').read())
h=json.loads(file('hierarchy2.json').read())
szf=[]
for i in ro:
szf.append({"a":i["a"],
"c":i["c"],
"g":i["g"],
"t":i["t"],
"s":i["s"]*float(pop[str(i["t"])][h[i['c']]["group"]]["szf"])})
szf2=[]
for i in ro:
szf2.append({"a":i["a"],
"c":i["c"],
"g":i["g"],
"t":i["t"],
"s":i["s"]*float(pop[str(i["t"])][h[i['c']]["group"]]["szf"])
*float(pop2[str(i["t"])][i["g"]][str(i["a"])])
/float(pop3[str(i["t"])][i["g"]][str(i["a"])])
})
#save files
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
file('db2/data.json','w').write(json.dumps(szf))
zf = zipfile.ZipFile('db2/9999.zip', mode='w')
zf.write('db2/data.json','data.json',compress_type=compression)
zf.close()
file('db2/data.json','w').write(json.dumps(szf2))
zf = zipfile.ZipFile('db2/9998.zip', mode='w')
zf.write('db2/data.json','data.json',compress_type=compression)
zf.close()
#update dictionaries
#run only once
c=json.loads(file('countries.json').read())
c=[u'9998']+c
file('countries.json','w').write(json.dumps(c))
c=json.loads(file('cnames.json').read())
c[u'9998']=u'Székelyland'
file('cnames.json','w').write(json.dumps(c))
c=json.loads(file('hnames.json').read())
c[u'Székelyland']=u'Székelyföld'
file('hnames.json','w').write(json.dumps(c))
#update population
p=json.loads(file('pop.json').read())
x=pd.read_csv('exportPivot_POP107A.csv')
x=x.drop([u' Medii de rezidenta',u' UM: Numar persoane'],axis=1).set_index([u'Varste si grupe de varsta',u' Sexe',u' Ani',u' Macroregiuni regiuni de dezvoltare si judete'])
x=x.unstack(u' Macroregiuni regiuni de dezvoltare si judete')
x=pd.DataFrame(x.T.sum())
sorted(x.index.levels[0].unique())
indice=[str(i)+' ani' for i in range(5)]+[str(i*5)+'-'+str((i+1)*5-1)+' ani' for i in range(1,17)]
indice[5]='5- 9 ani'
p['9999']={}
p['9998']={}
gg={"f":u" Feminin","m":u" Masculin"}
for y in range(1999,2013):
if str(y) not in p['9999']:p['9999'][str(y)]={}
for g in ["f","m"]:
if g not in p['9999'][str(y)]:p['9999'][str(y)][g]={}
for i in indice:
p['9999'][str(y)][g][str(i[:2].strip().strip('-'))]=str(float(x.loc[i].loc[gg[g]].loc[' Anul '+str(y)][0]))
p['9999'][str(y)][g][str(85)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*6.0/10)
p['9999'][str(y)][g][str(90)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*3.0/10)
p['9999'][str(y)][g][str(95)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*1.0/10)
p['9998']=p['9999']
#save updated population file
file('pop.json','w').write(json.dumps(p))
p['9998']['2005']['m']
p['642']['2005']['f']