import pandas as pd, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#load CPI
cpi=pd.read_html('IPC_8_5_2015.xls',header=0)[0]
cpi.columns=['Year']+range(5)
cpi=cpi.drop(range(1,5),axis=1)[2:]
cpi=cpi.set_index('Year')
cpi.head()
0 | |
---|---|
Year | |
Anul 1991 | 14701010 |
Anul 1992 | 4735590 |
Anul 1993 | 1329834 |
Anul 1994 | 561690 |
Anul 1995 | 424681 |
#load first part of labor data
df=pd.read_csv('exportPivot_FOM103A.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
df.index=df.index.swaplevel(0,1)
df=df.unstack()
df.columns = df.columns.droplevel()
df=df.drop(u' Ani',axis=1)
df.columns=range(2001,2009)
df.head()
2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | |||
---|---|---|---|---|---|---|---|---|---|---|
Feminin | A Agricultura vanatoare si silvicultura | Alba | 35.6 | 31.3 | 30 | 27.5 | 28 | 26.8 | 26.2 | 25.8 |
Arad | 30.1 | 27.5 | 25.8 | 24.8 | 25.4 | 23.7 | 23.1 | 23 | ||
Arges | 51.5 | 45.3 | 43.4 | 40.1 | 40.5 | 38.6 | 37.4 | 36.7 | ||
Bacau | 48.6 | 43.2 | 40.9 | 37.6 | 37.9 | 36.2 | 35.2 | 34.7 | ||
Bihor | 63.6 | 56.3 | 53.7 | 49.7 | 49.6 | 47.2 | 46 | 45 |
#load 2nd part of labor data
dg=pd.read_csv('exportPivot_FOM103A(1).csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
dg.index=dg.index.swaplevel(0,1)
dg=dg.unstack()
dg.columns = dg.columns.droplevel()
dg=dg.drop(u' Ani',axis=1)
dg.columns=range(1992,2001)
dg.head()
1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | |||
---|---|---|---|---|---|---|---|---|---|---|---|
Feminin | A Agricultura vanatoare si silvicultura | Alba | 42.4 | 43.6 | 38.4 | 31.8 | 34.2 | 34.1 | 34.1 | 35.4 | 36.4 |
Arad | 39.6 | 33.9 | 36.2 | 32.7 | 31.9 | 30.1 | 29.7 | 30.2 | 31.5 | ||
Arges | 60.4 | 62.4 | 52.5 | 44.1 | 48.5 | 48.4 | 49 | 50.7 | 52.6 | ||
Bacau | 59.7 | 61.3 | 48.4 | 42.5 | 46.1 | 46.2 | 46.3 | 48.2 | 50 | ||
Bihor | 62.3 | 61.3 | 64.2 | 54.5 | 59.8 | 60.3 | 60.4 | 62.7 | 65.5 |
#merge dataframes
dg[range(2001,2009)]=df[range(2001,2009)]
dg.head()
1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | |||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Feminin | A Agricultura vanatoare si silvicultura | Alba | 42.4 | 43.6 | 38.4 | 31.8 | 34.2 | 34.1 | 34.1 | 35.4 | 36.4 | 35.6 | 31.3 | 30 | 27.5 | 28 | 26.8 | 26.2 | 25.8 |
Arad | 39.6 | 33.9 | 36.2 | 32.7 | 31.9 | 30.1 | 29.7 | 30.2 | 31.5 | 30.1 | 27.5 | 25.8 | 24.8 | 25.4 | 23.7 | 23.1 | 23 | ||
Arges | 60.4 | 62.4 | 52.5 | 44.1 | 48.5 | 48.4 | 49 | 50.7 | 52.6 | 51.5 | 45.3 | 43.4 | 40.1 | 40.5 | 38.6 | 37.4 | 36.7 | ||
Bacau | 59.7 | 61.3 | 48.4 | 42.5 | 46.1 | 46.2 | 46.3 | 48.2 | 50 | 48.6 | 43.2 | 40.9 | 37.6 | 37.9 | 36.2 | 35.2 | 34.7 | ||
Bihor | 62.3 | 61.3 | 64.2 | 54.5 | 59.8 | 60.3 | 60.4 | 62.7 | 65.5 | 63.6 | 56.3 | 53.7 | 49.7 | 49.6 | 47.2 | 46 | 45 |
#load first part of salary data
dh=pd.read_csv('exportPivot_FOM106A.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
dh.index=dh.index.swaplevel(0,1)
dh=dh.unstack()
dh.columns = dh.columns.droplevel()
dh=dh.drop(u' Ani',axis=1)[1:]
dh.columns=range(1990,2009)
dh.head()
1990 | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | |||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Total | Agricultura vanatoare | Alba | NaN | 6657 | 16198 | 51156 | 104888 | 172409 | 265364 | 433591 | 678615 | 1244730 | 1616274 | 1979834 | 2553320 | 3502950 | 4200240 | 479 | 649 | 836 | 936 |
Arad | NaN | 6772 | 16198 | 46126 | 115894 | 145146 | 236365 | 470299 | 728322 | 920535 | 1481668 | 2188808 | 2922893 | 3691041 | 4894790 | 465 | 638 | 805 | 870 | ||
Arges | NaN | 6388 | 16139 | 45937 | 94921 | 165966 | 250108 | 485230 | 737100 | 1158867 | 1385852 | 2071189 | 2954218 | 3407402 | 4675222 | 512 | 604 | 746 | 962 | ||
Bacau | NaN | 6395 | 16837 | 45838 | 116178 | 156111 | 236731 | 444497 | 663362 | 1226652 | 1755922 | 2865649 | 3200276 | 3391211 | 4701315 | 585 | 608 | 752 | 888 | ||
Bihor | NaN | 6952 | 17101 | 49267 | 108049 | 168891 | 241080 | 494484 | 695764 | 1171666 | 1340431 | 2098684 | 2669157 | 3968259 | 5331557 | 584 | 563 | 621 | 661 |
#load second part of labor data CAEN2
di=pd.read_csv('exportPivot_FOM103D.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
di.index=di.index.swaplevel(0,1)
di=di.unstack()
di.columns = di.columns.droplevel()
di=di.drop(u' Ani',axis=1)
di.columns=range(2009,2015)
di.head()
2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |||
---|---|---|---|---|---|---|---|---|
Feminin | A AGRICULTURA SILVICULTURA SI PESCUIT | Alba | 25.8 | 26.6 | 26 | 26.2 | 26.5 | 25 |
Arad | 22.9 | 23.1 | 23.1 | 23.7 | 24 | 22.6 | ||
Arges | 36.3 | 37.2 | 37.5 | 38.9 | 39.6 | 37.2 | ||
Bacau | 34.4 | 35.3 | 35.3 | 36.7 | 37.5 | 35.2 | ||
Bihor | 45 | 46 | 45.9 | 47.9 | 48.6 | 45.8 |
#load second part of salary data CAEN2
dj=pd.read_csv('exportPivot_FOM106E.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
dj.index=dj.index.swaplevel(0,1)
dj=dj.unstack()
dj.columns = dj.columns.droplevel()
dj=dj.drop(u' Ani',axis=1)
dj.columns=range(2009,2015)
dj.head()
2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |||
---|---|---|---|---|---|---|---|---|
Feminin | A AGRICULTURA SILVICULTURA SI PESCUIT | Alba | NaN | NaN | NaN | 1318 | 926 | 1354 |
Arad | NaN | NaN | NaN | 958 | 1284 | 1053 | ||
Arges | NaN | NaN | NaN | 1018 | 1043 | 1030 | ||
Bacau | NaN | NaN | NaN | 979 | 1029 | 1031 | ||
Bihor | NaN | NaN | NaN | 807 | 882 | 1038 |
#load third part of salary data CAEN2, for global normalization
dk=pd.read_csv('exportPivot_FOM106F.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
dk.index=dk.index.swaplevel(0,1)
dk.index=dk.index.swaplevel(1,2)
dk=dk.unstack()
dk.columns = dk.columns.droplevel()
dk=dk.drop(u' Ani',axis=1)
dk.columns=range(2000,2014)
dk.head()
2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | |||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Feminin | A AGRICULTURA SILVICULTURA SI PESCUIT | Total | NaN | NaN | NaN | 356 | 475 | 503 | 602 | 733 | 897 | 986 | 1010 | 1012 | 1055 | 1141 |
B INDUSTRIA EXTRACTIVA | Total | NaN | NaN | NaN | 721 | 873 | 1172 | 1408 | 1712 | 2315 | 2294 | 2480 | 2705 | 2946 | 3113 | |
C INDUSTRIA PRELUCRATOARE | Total | NaN | NaN | NaN | 377 | 479 | 567 | 627 | 749 | 915 | 1000 | 1085 | 1153 | 1212 | 1283 | |
D PRODUCTIA SI FURNIZAREA DE ENERGIE ELECTRICA SI TERMICA GAZE APA CALDA SI AER CONDITIONAT | Total | NaN | NaN | NaN | 753 | 916 | 1264 | 1466 | 1748 | 2273 | 2425 | 2576 | 2690 | 2836 | 2816 | |
E DISTRIBUTIA APEI; SALUBRITATE GESTIONAREA DESEURILOR ACTIVITATI DE DECONTAMINARE | Total | NaN | NaN | NaN | 434 | 532 | 648 | 757 | 928 | 1122 | 1209 | 1260 | 1337 | 1386 | 1548 |
#renormalize CAENs
cc={#CAEN 2
'B INDUSTRIA EXTRACTIVA':'C Industria extractiva',
'N ACTIVITATI DE SERVICII ADMINISTRATIVE SI ACTIVITATI DE SERVICII SUPORT':'L Administratie publica si aparare',
'M ACTIVITATI PROFESIONALE STIINTIFICE SI TEHNICE':'M Invatamant',
'J INFORMATII SI COMUNICATII':'Posta si telecomunicatii',
'S ALTE ACTIVITATI DE SERVICII':'Celelalte activitati ale economiei nationale',
'R ACTIVITATI DE SPECTACOLE CULTURALE SI RECREATIVE':'Celelalte activitati ale economiei nationale',
'INDUSTRIE':'Industrie',
'A AGRICULTURA SILVICULTURA SI PESCUIT': 'A Agricultura vanatoare si silvicultura',
'O ADMINISTRATIE PUBLICA SI APARARE; ASIGURARI SOCIALE DIN SISTEMUL PUBLIC':'L Administratie publica si aparare',
'P INVATAMANT':'M Invatamant',
'F CONSTRUCTII':'F Constructii',
'K INTERMEDIERI FINANCIARE SI ASIGURARI':'J Intermedieri financiare',
'C INDUSTRIA PRELUCRATOARE':'D Industria prelucratoare',
'TOTAL':'Total',
'D PRODUCTIA SI FURNIZAREA DE ENERGIE ELECTRICA SI TERMICA GAZE APA CALDA SI AER CONDITIONAT':'E Energie electrica si termica gaze si apa',
'Q SANATATE SI ASISTENTA SOCIALA':'N Sanatate si asistenta sociala',
'L TRANZACTII IMOBILIARE':'K Tranzactii imobiliare si alte servicii',
'G COMERT CU RIDICATA SI CU AMANUNTUL; REPARAREA AUTOVEHICULELOR SI MOTOCICLETELOR':'G Comert',
'CAEN Rev.2 (activitati ale economiei nationale)':'CAEN Rev.1 (activitati ale economiei nationale - sectiuni)',
'E DISTRIBUTIA APEI; SALUBRITATE GESTIONAREA DESEURILOR ACTIVITATI DE DECONTAMINARE':'E Energie electrica si termica gaze si apa',
'I HOTELURI SI RESTAURANTE':'H Hoteluri si restaurante',
'H TRANSPORT SI DEPOZITARE':'Transport si depozitare',
' B INDUSTRIA EXTRACTIVA':'C Industria extractiva',
' N ACTIVITATI DE SERVICII ADMINISTRATIVE SI ACTIVITATI DE SERVICII SUPORT':'L Administratie publica si aparare',
' M ACTIVITATI PROFESIONALE STIINTIFICE SI TEHNICE':'M Invatamant',
' J INFORMATII SI COMUNICATII':'Posta si telecomunicatii',
' S ALTE ACTIVITATI DE SERVICII':'Celelalte activitati ale economiei nationale',
' R ACTIVITATI DE SPECTACOLE CULTURALE SI RECREATIVE':'Celelalte activitati ale economiei nationale',
' INDUSTRIE':'Industrie',
' A AGRICULTURA SILVICULTURA SI PESCUIT': 'A Agricultura vanatoare si silvicultura',
' O ADMINISTRATIE PUBLICA SI APARARE; ASIGURARI SOCIALE DIN SISTEMUL PUBLIC':'L Administratie publica si aparare',
' P INVATAMANT':'M Invatamant',
' F CONSTRUCTII':'F Constructii',
' K INTERMEDIERI FINANCIARE SI ASIGURARI':'J Intermedieri financiare',
' C INDUSTRIA PRELUCRATOARE':'D Industria prelucratoare',
' TOTAL':'Total',
' D PRODUCTIA SI FURNIZAREA DE ENERGIE ELECTRICA SI TERMICA GAZE APA CALDA SI AER CONDITIONAT':'E Energie electrica si termica gaze si apa',
' Q SANATATE SI ASISTENTA SOCIALA':'N Sanatate si asistenta sociala',
' L TRANZACTII IMOBILIARE':'K Tranzactii imobiliare si alte servicii',
' G COMERT CU RIDICATA SI CU AMANUNTUL; REPARAREA AUTOVEHICULELOR SI MOTOCICLETELOR':'G Comert',
' CAEN Rev.2 (activitati ale economiei nationale)':'CAEN Rev.1 (activitati ale economiei nationale - sectiuni)',
' E DISTRIBUTIA APEI; SALUBRITATE GESTIONAREA DESEURILOR ACTIVITATI DE DECONTAMINARE':'E Energie electrica si termica gaze si apa',
' I HOTELURI SI RESTAURANTE':'H Hoteluri si restaurante',
' H TRANSPORT SI DEPOZITARE':'Transport si depozitare',
'Posta si telecomunicatii':'Posta si telecomunicatii',
'L Administratie publica si aparare':'L Administratie publica si aparare',
'Silvicultura exploatarea forestiera':'A Agricultura vanatoare si silvicultura',
'Industrie':'Industrie',
'I Transport depozitare si comunicatii':'Transport si depozitare',
'Transport si depozitare':'Transport si depozitare',
'B Pescuit si piscicultura':'A Agricultura vanatoare si silvicultura',
'C Industria extractiva':'C Industria extractiva',
'Celelalte activitati ale economiei nationale':'Celelalte activitati ale economiei nationale',
'A Agricultura vanatoare si silvicultura':'A Agricultura vanatoare si silvicultura',
'F Constructii':'F Constructii',
'H Hoteluri si restaurante':'H Hoteluri si restaurante',
'K Tranzactii imobiliare si alte servicii':'K Tranzactii imobiliare si alte servicii',
'N Sanatate si asistenta sociala':'N Sanatate si asistenta sociala',
'G Comert':'G Comert',
'Agricultura vanatoare':'A Agricultura vanatoare si silvicultura',
'CAEN Rev.1 (activitati ale economiei nationale - sectiuni)':'CAEN Rev.1 (activitati ale economiei nationale - sectiuni)',
'D Industria prelucratoare':'D Industria prelucratoare',
'M Invatamant':'M Invatamant',
'Total':'Total',
'J Intermedieri financiare':'J Intermedieri financiare',
'E Energie electrica si termica gaze si apa':'E Energie electrica si termica gaze si apa'}
rr={u' Harghita':'HR',
u' Covasna':'CV',
u' Mures':'MS',
u' TOTAL':'RO',
u'Total':'RO',
u' Regiunea CENTRU':'TR',
u' Regiunea VEST':'TR',
u' Regiunea NORD-VEST':'TR'}
def interpolate(d,years,gfit=1,depth=2,polyorder=1,override=True):
#depth * length of interpolation substrings will be taken to the left and right
#for example for {1971:5,1972:6,1973:7,1974:5} interpolating it over 1969-1990
#for the section 1960-1970 (2 elements) the values from 1972,1973,1974 (3 elements) will be taken with depth 1.5
#for the section 1974-1990 (15 elements) all values (4 elements) will be taken to extrapolate
if (gfit>2):
print 'interpolate takes only 1 (polynomial) or 2 (exponential) as 3rd argument [default=2]'
return
mydict={}
missing_points=[[]]
for year in years:
if year not in d.keys():
missing_points[-1].append(year)
else:
missing_points.append([])
for m in missing_points:
if m:
fit=gfit
if ((m[-1]<np.sort(d.keys())[0])|(m[0]>np.sort(d.keys())[-1])): #check if it is ends of the interval, then extrapolate mean only
if not override: fit=0
if fit==0: #take average
y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(3)),min(max(years),max(m)+int(3))+1))}
for i in range(len(m)):
mydict[m[i]]=np.mean(y.values())
elif fit==1:
#intersector
y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(depth*len(m))),min(max(years),max(m)+int(depth*len(m)))+1))}
#print y
w = np.polyfit(y.keys(),y.values(),polyorder) # obtaining regression parameters
if (polyorder==1):
intersector=w[0]*np.array(m)+w[1]
else:
intersector=w[0]*np.array(m)*np.array(m)+w[1]*np.array(m)+w[2]
for i in range(len(m)):
mydict[m[i]]=max(0,intersector[i])
else:
#intersector
y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(depth*len(m))),min(max(years),max(m)+int(depth*len(m)))+1))}
#print y
w = np.polyfit(y.keys(),np.log(y.values()),1) # obtaining log regression parameters (exp fitting)
intersector=np.exp(w[1])*np.exp(w[0]*np.array(m))
for i in range(len(m)):
mydict[m[i]]=max(0,intersector[i])
#return interpolated points
return mydict
years=range(1990,2015)
#Labor data
#CAEN 1
data={}
for i in dg.T.iteritems():
if i[0][2] in {u' Harghita',u' Covasna',u' Mures',u' TOTAL',u' Regiunea CENTRU',u' Regiunea VEST',u' Regiunea NORD-VEST'}:
if i[0][0] not in data: data[i[0][0]]={}
if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
if i[0][1] not in {'Agricultura vanatoare', 'Industrie', 'INDUSTRIE', ' INDUSTRIE'}: #eliminate doublecounting
if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]=i[1].astype(float)
else: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]=data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]].add(i[1].astype(float),fill_value=0)
mydata={}
for i in data:
if i not in mydata: mydata[i]={}
for j in data[i]:
if j not in mydata[i]: mydata[i][j]={}
for k in data[i][j]:
if k not in mydata[i][j]: mydata[i][j][k]={}
for y in data[i][j][k].index:
if not np.isnan(data[i][j][k].loc[y] ):
mydata[i][j][k][y]=data[i][j][k].loc[y]
mydata[i][j][k].update(interpolate(mydata[i][j][k],years))
#CAEN 2
data={}
for i in di.T.iteritems():
if i[0][2] in {u' Harghita',u' Covasna',u' Mures',u' TOTAL',u' Regiunea CENTRU',u' Regiunea VEST',u' Regiunea NORD-VEST'}:
if i[0][0] not in data: data[i[0][0]]={}
if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
if i[0][1] not in {'Agricultura vanatoare','Industrie', 'INDUSTRIE', ' INDUSTRIE'}: #eliminate doublecounting
if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]=i[1].astype(float)
else: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]=data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]].add(i[1].astype(float),fill_value=0)
for i in data:
if i not in mydata: mydata[i]={}
for j in data[i]:
if j not in mydata[i]: mydata[i][j]={}
for k in data[i][j]:
if k not in mydata[i][j]: mydata[i][j][k]={}
for y in data[i][j][k].index:
if not np.isnan(data[i][j][k].loc[y] ):
mydata[i][j][k][y]=data[i][j][k].loc[y]
if ((k=='K Tranzactii imobiliare si alte servicii') and (y>2008)): mydata[i][j][k].pop(y)
mydata[i][j][k].update(interpolate(mydata[i][j][k],years))
#calculate male
for i in {' Feminin'}:
mydata[' Masculin']={}
for j in mydata[i]:
mydata[' Masculin'][j]={}
for k in mydata[i][j]:
mydata[' Masculin'][j][k]={}
for y in mydata[i][j][k]:
mydata[' Masculin'][j][k][y]=mydata[' Total'][j][k][y]-mydata[' Feminin'][j][k][y]
#Salary data
#CAEN 1
data={}
for i in dh.T.iteritems():
if i[0][2] in {u' Harghita',u' Covasna',u' Mures',u' TOTAL',u' Regiunea CENTRU',u' Regiunea VEST',u' Regiunea NORD-VEST'}:
if i[0][0] not in data: data[i[0][0]]={}
if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
if i[0][1] not in {'Agricultura vanatoare','Industrie', 'INDUSTRIE', ' INDUSTRIE'}: #eliminate doublecounting
if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]={}
for y in i[1].index:
if y not in data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y]=[]
data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y].append((float)(i[1].loc[y]))
mydata2={}
for i in data:
if i not in mydata2: mydata2[i]={}
for j in data[i]:
if j not in mydata2[i]: mydata2[i][j]={}
for k in data[i][j]:
if k not in mydata2[i][j]: mydata2[i][j][k]={}
for y in data[i][j][k]:
if not np.isnan(np.nanmean(data[i][j][k][y])):
mydata2[i][j][k][y]=np.nanmean(data[i][j][k][y])
#mydata2[i][j][k].update(interpolate(mydata2[i][j][k],years))
#CAEN 2
data={}
for i in dj.T.iteritems():
if i[0][2] in {u' Harghita',u' Covasna',u' Mures',u' TOTAL',u' Regiunea CENTRU',u' Regiunea VEST',u' Regiunea NORD-VEST'}:
if i[0][0] not in data: data[i[0][0]]={}
if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
if i[0][1] not in {'Agricultura vanatoare'}: #eliminate doublecounting
if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]={}
for y in i[1].index:
if y not in data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y]=[]
data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y].append((float)(i[1].loc[y]))
for i in data:
if i not in mydata2: mydata2[i]={}
for j in data[i]:
if j not in mydata2[i]: mydata2[i][j]={}
for k in data[i][j]:
if k not in mydata2[i][j]: mydata2[i][j][k]={}
for y in data[i][j][k]:
if not np.isnan(np.nanmean(data[i][j][k][y])):
mydata2[i][j][k][y]=np.nanmean(data[i][j][k][y])
#mydata2[i][j][k].update(interpolate(mydata2[i][j][k],years))
C:\Anaconda\lib\site-packages\numpy\lib\nanfunctions.py:598: RuntimeWarning: Mean of empty slice warnings.warn("Mean of empty slice", RuntimeWarning)
#National salary data
#CAEN 2
data={}
for i in dk.T.iteritems():
if i[0][2] in {u'Total'}:
if i[0][0] not in data: data[i[0][0]]={}
if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
if i[0][1] not in {'Agricultura vanatoare','Industrie', 'INDUSTRIE', ' INDUSTRIE'}: #eliminate doublecounting
if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]={}
for y in i[1].index:
if y not in data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y]=[]
data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y].append((float)(i[1].loc[y]))
mydata3={}
for i in data:
if i not in mydata3: mydata3[i]={}
for j in data[i]:
if j not in mydata3[i]: mydata3[i][j]={}
for k in data[i][j]:
if k not in mydata3[i][j]: mydata3[i][j][k]={}
for y in data[i][j][k]:
if not np.isnan(np.nanmean(data[i][j][k][y])):
mydata3[i][j][k][y]=np.nanmean(data[i][j][k][y])
#mydata2[i][j][k].update(interpolate(mydata2[i][j][k],years))
mydata3[' Total']['RO']['A Agricultura vanatoare si silvicultura']
{2000: 163.0, 2001: 227.0, 2002: 284.0, 2003: 377.0, 2004: 480.0, 2005: 527.0, 2006: 617.0, 2007: 743.0, 2008: 914.0, 2009: 1007.0, 2010: 1024.0, 2011: 1044.0, 2012: 1093.0, 2013: 1179.0}
#calculate gender inequality
gii={" Masculin":{}," Feminin":{}}
for i in mydata3[' Masculin']['RO']:
gii[" Masculin"][i]={}
gii[" Feminin"][i]={}
for j in mydata3[' Masculin']['RO'][i]:
gii[" Masculin"][i][j]=mydata3[' Masculin']['RO'][i][j]/mydata3[' Total']['RO'][i][j]
gii[" Feminin"][i][j]=mydata3[' Feminin']['RO'][i][j]/mydata3[' Total']['RO'][i][j]
gii[" Masculin"][i].update(interpolate(gii[" Masculin"][i],years))
gii[" Feminin"][i].update(interpolate(gii[" Feminin"][i],years))
#readjust to country-level earnings
GII={}
for i in gii: #sex
if i not in GII: GII[i]={}
for j in mydata2[i]: #judet
if j not in GII[i]: GII[i][j]={}
for k in gii[i]: #indicator
if k not in GII[i][j]: GII[i][j][k]={}
for y in gii[i][k]: #year
try:
GII[i][j][k][y]=gii[i][k][y]*mydata2[' Total'][j][k][y]
if np.isnan(GII[i][j][k][y]): GII[i][j][k].pop(y)
except: pass
GII[i][j][k].update(interpolate(GII[i][j][k],years,2,1.5))
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning) C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned warnings.warn(msg, RankWarning)
#create cpi dict
cp={2013:100}
for i in cpi.T.iteritems():
cp[(int)(i[0][5:])]=(int)(i[1])/100.0
cp.update(interpolate(cp,years))
cp
{1990: 246664.29999992251, 1991: 147010.1, 1992: 47355.9, 1993: 13298.34, 1994: 5616.9, 1995: 4246.81, 1996: 3059.46, 1997: 1200.9, 1998: 754.83, 1999: 517.7, 2000: 355.4, 2001: 264.3, 2002: 215.69, 2003: 187.11, 2004: 167.25, 2005: 153.42, 2006: 143.97, 2007: 137.33, 2008: 127.34, 2009: 120.6, 2010: 113.67, 2011: 107.45, 2012: 103.98, 2013: 100, 2014: 96.019999999999527}
#create final data
DATA={}
for i in GII:
if i not in DATA: DATA[i]={}
for j in GII[i]:
if j not in DATA[i]: DATA[i][j]={}
for k in GII[i][j]:
if k not in DATA[i][j]: DATA[i][j][k]={}
for y in GII[i][j][k]:
yr=repr(y)
DATA[i][j][k][yr]={}
DATA[i][j][k][yr]['munka']=mydata[i][j][k][y]
salary=GII[i][j][k][y]
DATA[i][j][k][yr]['fizu']=salary
if y<2005: salary/=10000.0
DATA[i][j][k][yr]['realfizu']=salary*cp[y]/100.0
if k in {'Celelalte activitati ale economiei nationale','N Sanatate si asistenta sociala'}:
DATA[i][j][k][yr]['realfizu']=DATA[i][j][k][yr]['fizu']
ff={'Total':u'Összesen',
'H Hoteluri si restaurante':u'Vendéglátóipar',
'C Industria extractiva':u'Asványkincsek, bányászat',
'Posta si telecomunicatii':u'Távközlés',
'L Administratie publica si aparare':u'Közügy, hatóság',
'Celelalte activitati ale economiei nationale':u'Más',
'K Tranzactii imobiliare si alte servicii':u'Ingatlan',
'N Sanatate si asistenta sociala':u'Egészségügy',
'F Constructii':u'Építőipar',
'D Industria prelucratoare':u'Feldolgozóipar',
'M Invatamant':u'Oktatás',
'Industrie':u'Összes ipar',
'G Comert':u'Kereskedelem',
'J Intermedieri financiare':u'Pénzügy',
'E Energie electrica si termica gaze si apa':u'Energiaipar',
'A Agricultura vanatoare si silvicultura':u'Mezőgasdaság',
'Transport si depozitare':u'Szállítás'}
#save data
import json
for county in DATA[' Feminin']:
DATA2=[]
for i in DATA[' Feminin'][county]:
helper={}
helper["name"]=ff[i]
helper["region"]=ff[i]
helper["income"]=[]
helper["lifeExpectancy"]=[]
helper["population"]=[]
helper["avgwage"]=[]
for syear in range(1990,2015):
year=repr(syear)
helper["income"].append([syear,DATA[' Feminin'][county][i][year]['munka']/\
(DATA[' Feminin'][county][i][year]['munka']+DATA[' Masculin'][county][i][year]['munka'])])
helper["lifeExpectancy"].append([syear,DATA[' Feminin'][county][i][year]['realfizu']/\
(DATA[' Masculin'][county][i][year]['realfizu'])])
helper["avgwage"].append([syear,DATA[' Masculin'][county][i][year]['realfizu']])
helper["population"].append([syear,(DATA[' Feminin'][county][i][year]['munka']+\
DATA[' Masculin'][county][i][year]['munka'])*1000])
DATA2.append(helper)
file(county+'nations.json','w').write(json.dumps(DATA2))
#szekelyfold
DATA2=[]
for i in DATA[' Feminin']['HR']:
helper={}
helper["name"]=ff[i]
helper["region"]=ff[i]
helper["income"]=[]
helper["lifeExpectancy"]=[]
helper["population"]=[]
helper["avgwage"]=[]
for syear in range(1990,2015):
year=repr(syear)
helper["income"].append([syear,(DATA[' Feminin']['HR'][i][year]['munka']+\
DATA[' Feminin']['CV'][i][year]['munka']+\
DATA[' Feminin']['MS'][i][year]['munka'])/\
(DATA[' Feminin']['HR'][i][year]['munka']+DATA[' Masculin']['HR'][i][year]['munka']+\
DATA[' Feminin']['CV'][i][year]['munka']+DATA[' Masculin']['CV'][i][year]['munka']+\
DATA[' Feminin']['MS'][i][year]['munka']+DATA[' Masculin']['MS'][i][year]['munka'])])
helper["lifeExpectancy"].append([syear,(DATA[' Feminin']['HR'][i][year]['realfizu']+\
DATA[' Feminin']['CV'][i][year]['realfizu']+\
DATA[' Feminin']['MS'][i][year]['realfizu'])/\
(DATA[' Masculin']['HR'][i][year]['realfizu']+\
DATA[' Masculin']['CV'][i][year]['realfizu']+\
DATA[' Masculin']['MS'][i][year]['realfizu'])])
helper["avgwage"].append([syear,(DATA[' Masculin']['HR'][i][year]['realfizu']+\
DATA[' Masculin']['CV'][i][year]['realfizu']+\
DATA[' Masculin']['MS'][i][year]['realfizu'])/3.0])
helper["population"].append([syear,((DATA[' Feminin']['HR'][i][year]['munka']+\
DATA[' Feminin']['CV'][i][year]['munka']+\
DATA[' Feminin']['MS'][i][year]['munka'])+\
(DATA[' Masculin']['HR'][i][year]['munka']+\
DATA[' Masculin']['CV'][i][year]['munka']+\
DATA[' Masculin']['MS'][i][year]['munka']))*1000])
DATA2.append(helper)
file('SZFnations.json','w').write(json.dumps(DATA2))
#load first part of labor data
df=pd.read_csv('exportPivot_FOM106F(1).csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
df.index=df.index.swaplevel(0,1)
df=df.unstack()
df.columns = df.columns.droplevel()
df=df.drop(u' Ani',axis=1)
df.columns=range(2000,2014)
df=df.drop(range(2000,2003),axis=1)
df=df.astype(float)
df.head()
2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | |||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Feminin | Total | 01 Agricultura vanatoare si servicii anexe | 339 | 461 | 486 | 595 | 721 | 866 | 982 | 994 | 981 | 1014 | 1097 |
05 Extractia carbunelui superior si inferior | 755 | 824 | 1109 | 1295 | 1577 | 1849 | 1838 | 1866 | 1842 | 1955 | 2104 | ||
06 Extractia petrolului brut si a gazelor naturale | 817 | 1018 | 1394 | 1678 | 2040 | 3048 | 3057 | 3233 | 3726 | 4213 | 4517 | ||
07 Extractia minereurilor metalifere | 553 | 717 | 819 | 945 | 1207 | 1512 | 1669 | 1879 | 2091 | 2193 | 2141 | ||
08 Alte activitati extractive | 610 | 717 | 825 | 938 | 1066 | 1387 | 1363 | 1500 | 1685 | 1629 | 1623 |
dm=df.loc[' Feminin'].loc[u'Total']/df.loc[' Masculin'].loc[u'Total']*100
dm=dm.astype(int)
dm.index.name=u'Agazat'
dm.to_csv('data.csv')
dm.head()
2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | |
---|---|---|---|---|---|---|---|---|---|---|---|
name | |||||||||||
01 Agricultura vanatoare si servicii anexe | 98 | 106 | 99 | 101 | 102 | 101 | 100 | 102 | 99 | 97 | 97 |
05 Extractia carbunelui superior si inferior | 73 | 70 | 79 | 80 | 83 | 78 | 75 | 75 | 74 | 73 | 79 |
06 Extractia petrolului brut si a gazelor naturale | 97 | 97 | 99 | 93 | 96 | 108 | 105 | 116 | 121 | 119 | 118 |
07 Extractia minereurilor metalifere | 77 | 89 | 83 | 84 | 81 | 90 | 97 | 93 | 98 | 101 | 100 |
08 Alte activitati extractive | 110 | 106 | 110 | 111 | 108 | 119 | 112 | 115 | 121 | 123 | 119 |