import numpy as np, pandas as pd
from pygeocoder import Geocoder
import matplotlib.pyplot as plt
%matplotlib inline
df=pd.read_excel('http://www.recensamantromania.ro/wp-content/uploads/2013/07/sR_TAB_13.xls')
megye=[]
for i in df.index[6:3434]:
try:
if np.isnan(df.ix[int(i)-1,u'13. POPULATIA STABILA DUPA RELIGIE - JUDETE, MUNICIPII, ORASE, COMUNE']) and\
np.isnan(df.ix[int(i)+1,u'13. POPULATIA STABILA DUPA RELIGIE - JUDETE, MUNICIPII, ORASE, COMUNE']):
megye.append([i,df.ix[i,u'13. POPULATIA STABILA DUPA RELIGIE - JUDETE, MUNICIPII, ORASE, COMUNE']])
except:
pass
#run once only!
df=df.drop([u'Unnamed: 1',u'Unnamed: 24'],axis=1)
df.columns=[u'Falu',u'Ortodox',u'Katolikus',u'Református',u'Pünkösdista',u'Görög katolikus',u'Baptista',u'Adventista',u'Muzulmán',u'Unitárius',u'Jehova tanúja',u'Lutheránus evangélikus',u'Ókatolikus',u'Lutheránus',u'Szerb ortodox',u'Evangélikus',u'Kálvinista',u'Zsidó',u'Örmény',u'Más',u'Nem vallásos',u'Ateista',u'N/A']
df=df.drop(u'N/A',axis=1)
#run once only!
df=df.loc[df.index[7:]]
data={}
data2={}
data3={}
ez=0
for i in df.index:
try:
if megye[ez][0]<i: ez+=1
if not (megye[ez][1]==df.ix[int(i),u'Falu']):
if df.ix[int(i),u'Falu'] not in [u' A. MUNICIPII SI ORASE',u' B. COMUNE',np.NaN,'NaN']:
if megye[ez-1][1][2:] not in data: data[megye[ez-1][1][2:]]={}
data[megye[ez-1][1][2:]][df.ix[int(i),u'Falu'][3:]]={}
data3[df.ix[int(i),u'Falu'][2:]]={}
for j in df.columns[1:]:
if df.ix[int(i),j] not in [u'*',u'-',np.NaN,'NaN']:
data[megye[ez-1][1][2:]][df.ix[int(i),u'Falu'][3:]][j]=df.ix[int(i),j]
data3[df.ix[int(i),u'Falu'][2:]][j]=df.ix[int(i),j]
else:
if df.ix[int(i),u'Falu'] not in [u' A. MUNICIPII SI ORASE',u' B. COMUNE',np.NaN,'NaN']:
data2[df.ix[int(i),u'Falu'][2:]]={}
for j in df.columns[1:]:
if df.ix[int(i),j] not in [u'*',u'-',np.NaN,'NaN']:
data2[df.ix[int(i),u'Falu'][2:]][j]=df.ix[int(i),j]
except: pass
#add Bucharest to main dataset
data['MUNICIPIUL BUCURESTI']={}
data['MUNICIPIUL BUCURESTI']['MUNICIPIUL BUCURESTI']=data2['MUNICIPIUL BUCURESTI']
#save religion data
import json
file('data.json','w').write(json.dumps(data))
file('data2.json','w').write(json.dumps(data2))
file('data3.json','w').write(json.dumps(data3))
#county name converter
cc={}
for i in pd.read_csv('ro.csv').T.iteritems():
if ' '+i[1][11].upper() not in cc: cc[' '+i[1][11].upper()]=i[1][9].upper()
#fix db
cc['Bicazu ']='BACAU'
cc['Municipiul Brasov']='BRASOV'
cc['Oras intorsura ']='COVASNA'
cc['Sanmihaiu de ']='MURES'
cc['Municipiul Resita CS']='CARAS-SEVERIN'
#hungarian settlement names, where applicable
hun3={}
dh=pd.read_csv('magyar.csv',sep='|').dropna(axis=0)
for i in dh.T.iteritems():
try:
m=cc[i[1][1][str.find(i[1][1],',')+1:]] #county
if m not in hun3: hun3[m]={}
f=i[1][1][:str.find(i[1][1],',')].upper() #comune
if (i[1][2]):
if (i[1][0].upper()[:-1]==f): # village
hun3[m][f]=i[1][2]
if ('MUNICIPIUL '+i[1][0].upper()[:-1]==f): # city
hun3[m][f]=i[1][2]
if ('ORAS '+i[1][0].upper()[:-1]==f): # town
hun3[m][f]=repr(i[1][2])
except: pass
file('hun2.json','w').write(json.dumps(hun3))
#parse country for settlement coordinates
coords={}
de=pd.read_csv('ro.csv')
for i in de.T.iteritems():
if i[1][9].upper() not in coords: coords[i[1][9].upper()]={}
if i[1][5]!='V':
coords[i[1][9].upper()][i[1][8].upper()]=[i[1][0],i[1][1]]
coords[i[1][9].upper()][i[1][2].upper()]=[i[1][0],i[1][1]]
if i[1][11].upper() not in cc: cc[i[1][11].upper()]=i[1][9].upper()
#fix db
coords['MURES']['ORAS SANGEORGIU DE PADURE']=[Geocoder.geocode('SANGEORGIU DE PADURE').coordinates[1],Geocoder.geocode('SANGEORGIU DE PADURE').coordinates[0]]
coords['MURES']['RICIU']=[Geocoder.geocode('RICIU, MURES, ROMANIA').coordinates[1],Geocoder.geocode('RICIU, MURES, ROMANIA').coordinates[0]]
coords['MUNICIPIUL BUCURESTI']={'MUNICIPIUL BUCURESTI':coords['BUCURESTI']['MUNICIPIUL BUCURESTI']}
coords['HARGHITA']['RICIU']=[Geocoder.geocode('RICIU, MURES, ROMANIA').coordinates[1],Geocoder.geocode('RICIU, MURES, ROMANIA').coordinates[0]]
import json
file('coords2.json','w').write(json.dumps(coords))