import json
import os, sys
import pandas as pd
curdir = os.path.abspath('./..')
from localfinance.spiders.localfinance_spider import uniformize_code, convert_dom_code, convert_city
data_dir = os.path.join(curdir, 'scraped_data')
insee_filepath = os.path.join(curdir, 'data/france2013.txt')
quantiles = pd.np.arange(0.01, 1, 0.01)
def plot_quantiles(series, figsize=(12,10)):
pd.Series(quantiles, index=[series.quantile(q) for q in quantiles]).plot(figsize=figsize)
from localfinance.account_parsing import city_account
# Load insee code of cities
insee_df = pd.io.parsers.read_csv(insee_filepath, '\t')
# Load data on 2012
df = pd.DataFrame(json.load(open(os.path.join(data_dir, 'cities_2012.json'))))
print "Les informations suivantes ont été récupérées pour chacune des communes: \n %s"%df.columns.tolist()
Les informations suivantes ont été récupérées pour chacune des communes: [u'additionnal_land_property_tax_basis', u'additionnal_land_property_tax_cuts_on_deliberation', u'additionnal_land_property_tax_rate', u'additionnal_land_property_tax_value', u'allocation', u'business_network_tax_value', u'business_profit_contribution_value', u'business_property_contribution_basis', u'business_property_contribution_cuts_on_deliberation', u'business_property_contribution_rate', u'business_property_contribution_value', u'contigents', u'costs_to_allocate', u'debt_annual_costs', u'debt_at_end_year', u'debt_repayment_capacity', u'debt_repayments', u'facilities_expenses', u'fctva', u'financial_costs', u'financing_capacity', u'fixed_assets', u'global_profit', u'home_tax_basis', u'home_tax_cuts_on_deliberation', u'home_tax_rate', u'home_tax_value', u'insee_code', u'investment_ressources', u'investments_usage', u'land_property_tax_basis', u'land_property_tax_cuts_on_deliberation', u'land_property_tax_rate', u'land_property_tax_value', u'loans', u'localtax', u'name', u'net_profit', u'operating_costs', u'operating_revenues', u'other_tax', u'paid_subsidies', u'population', u'property_tax_basis', u'property_tax_cuts_on_deliberation', u'property_tax_rate', u'property_tax_value', u'purchases_and_external_costs', u'received_subsidies', u'residual_financing_capacity', u'retail_land_tax_value', u'returned_properties', u'self_financing_capacity', u'staff_costs', u'surplus', u'thirdparty_balance', u'working_capital', u'year', u'zone_type']
names_mapping = pd.DataFrame([dict([(k, v.get('name', '')) for k, v in city_account.nodes.items()])])
names_mapping.transpose()
0 | |
---|---|
additionnal_land_property_tax | Taxe additionnelle à la taxe foncière sur les ... |
advances_from_treasury | Avances du Trésor au 31/12/N |
allocation | Dotation globale de fonctionnement |
allocation_tax_revenues | Les produits des impôts de répartition |
business_network_tax | Impositions forfaitaires sur les entreprises d... |
business_profit_contribution | Cotisation sur la valeur ajoutée des entreprises |
business_property_contribution | Cotisation foncière des entreprises |
business_tax | [Taxe professionnelle (hors produits écrêtés),... |
compensation_2010 | Compensation-Relais 2010 |
contigents | Contingents |
costs_to_allocate | Charges à répartir |
debt_annual_costs | Annuité de la dette |
debt_at_end_year | Encours total de la dette au 31/12/N |
debt_repayment_capacity | CAF nette du remboursement en capital des empr... |
debt_repayments | Remboursement d'emprunts et dettes assimilées |
facilities_expenses | Dépenses d'équipement |
fctva | FCTVA |
financial_costs | Charges financières |
fixed_assets | Immobilisations affectées, concédées, ... |
home_tax | [Taxe d'habitation (y compris THLV), Produits ... |
investment_ressources | TOTAL DES RESSOURCES D'INVESTISSEMENT = C |
investments | OPERATIONS D'INVESTISSEMENT |
investments_usage | TOTAL DES EMPLOIS D'INVESTISSEMENT = D |
land_property_tax | [Taxe foncière sur les propriétés non bâties, ... |
liabilities | ENDETTEMENT |
loans | Emprunts bancaires et dettes assimilées |
localtax | Impôts Locaux |
net_profit | RESULTAT COMPTABLE = A - B = R |
operating_costs | TOTAL DES CHARGES DE FONCTIONNEMENT = B |
operating_revenues | TOTAL DES PRODUITS DE FONCTIONNEMENT = A |
operatings_operations | OPERATIONS DE FONCTIONNEMENT |
other_tax | Autres impôts et taxes |
paid_subsidies | Subventions versées |
property_tax | [Taxe foncière sur les propriétés bâties, Prod... |
purchases_and_external_costs | Achats et charges externes |
received_subsidies | Subventions reçues |
retail_land_tax | Taxe sur les surfaces commerciales |
returned_properties | Retour de biens affectés, concédés, ... |
root | |
self_financing | AUTOFINANCEMENT |
self_financing_capacity | Capacité d'autofinancement = CAF |
staff_costs | Charges de personnel |
surplus | Excédent brut de fonctionnement |
taxation | ELEMENTS DE FISCALITE DIRECTE LOCALE |
thirdparty_balance | Solde des opérations pour le compte de tiers |
# Sanity checks
print u"Nombre de communes dupliquées: %s"%(df['insee_code'].count() - df['insee_code'].unique().size)
print u"Numbre of communes non dupliquées: %s"%df['insee_code'].unique().size
df['dep'] = df['insee_code'].apply(lambda r: r[:3])
gp_by_dep = df.groupby('dep')
print u"Nombre de départements: %s"%gp_by_dep.dep.size().size
Nombre de communes dupliquées: 0 Numbre of communes non dupliquées: 36663 Nombre de départements: 100
# Take only current cities (Cf. insee doc http://www.insee.fr/fr/methodes/default.asp?page=nomenclatures/cog/doc_ffrancee.htm)
insee_df['DEP'] = uniformize_code(insee_df, 'DEP')
insee_df['COM'] = uniformize_code(insee_df, 'COM')
insee_df['DEP'] = convert_dom_code(insee_df)
insee_df['COM'] = insee_df.apply(convert_city, axis=1)
insee_df['DEPCOM'] = insee_df['DEP'] + insee_df['COM']
# Remove MAYOTTE department,
current_insee_df = insee_df[(insee_df['ACTUAL'] == 1) & (insee_df['DEP'] <> '976')]
print "Nombre de commune sans le département de Mayotte au 2013/01/01 selon l'insee: %s"%current_insee_df['DEPCOM'].unique().size
Nombre de commune sans le département de Mayotte au 2013/01/01 selon l'insee: 36664
Nous allons donc regarder de plus près si certains budgets de communes seraient absents du site collectivites-locales.gouv.fr
Certaines communes ne présentent aucune données financières sur 2012 sur le site des collectivités locales.
Quelques exemples de page où il n'y a effectivement aucune donnée sur 2012:
On notera que les données sont bien présentes sur les années précédentes.
Ci-dessous les 20 communes sans donnée financières.
(moyenne, écart-type, min, quantile 25%, 50%, 75% et max)
On notera que les charges de personnels (staff_costs) peuvent être négatives!
df[['operating_revenues', 'operating_costs', 'staff_costs', 'net_profit', 'investment_ressources',
'financing_capacity', 'investments_usage', 'debt_at_end_year', 'debt_annual_costs']].describe()[1:]
operating_revenues | operating_costs | staff_costs | net_profit | investment_ressources | financing_capacity | investments_usage | debt_at_end_year | debt_annual_costs | |
---|---|---|---|---|---|---|---|---|---|
mean | 2.200283e+06 | 1.922053e+06 | 9.267096e+05 | 2.782336e+05 | 9.567364e+05 | -24645.473638 | 9.324207e+05 | 1.671028e+06 | 2.119362e+05 |
std | 3.027319e+07 | 2.792053e+07 | 1.165512e+07 | 2.435761e+06 | 1.402173e+07 | 781739.813490 | 1.367754e+07 | 2.179044e+07 | 2.386009e+06 |
min | 9.000000e+03 | 5.000000e+03 | -1.300000e+04 | -2.968000e+06 | 0.000000e+00 | -60924000.000000 | 0.000000e+00 | -2.800000e+04 | 0.000000e+00 |
25% | 1.410000e+05 | 1.030000e+05 | 2.600000e+04 | 2.800000e+04 | 4.500000e+04 | -53000.000000 | 4.800000e+04 | 3.900000e+04 | 7.000000e+03 |
50% | 3.080000e+05 | 2.360000e+05 | 8.100000e+04 | 6.600000e+04 | 1.350000e+05 | 1000.000000 | 1.360000e+05 | 1.710000e+05 | 2.500000e+04 |
75% | 8.110000e+05 | 6.320000e+05 | 2.560000e+05 | 1.720000e+05 | 4.270000e+05 | 47000.000000 | 4.150000e+05 | 6.025000e+05 | 8.300000e+04 |
max | 5.218325e+09 | 4.820358e+09 | 1.878621e+09 | 3.979670e+08 | 2.450566e+09 | 18801000.000000 | 2.389281e+09 | 3.260027e+09 | 2.806090e+08 |
, 'property_tax_rate', 'land_property_tax_rate',
'additionnal_land_property_tax_rate', 'business_property_contribution_rate']].describe()[1:]
home_tax_rate | property_tax_rate | land_property_tax_rate | additionnal_land_property_tax_rate | business_property_contribution_rate | |
---|---|---|---|---|---|
mean | 0.131094 | 0.139850 | 0.447765 | 0.221133 | 0.085261 |
std | 0.050662 | 0.066394 | 0.282152 | 0.274089 | 0.097964 |
min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
25% | 0.094700 | 0.093000 | 0.260500 | 0.000000 | 0.000000 |
50% | 0.127400 | 0.132500 | 0.391900 | 0.000000 | 0.000000 |
75% | 0.162900 | 0.177600 | 0.562400 | 0.376000 | 0.171500 |
max | 0.610300 | 0.589400 | 3.744500 | 1.369600 | 0.505700 |
On s'intéresse dans cette partie à la qualité de quelques informations financières: on commence par des informations relativement bien connues du grand public, et donc simple à appréhender.
codes = set(current_insee_df['DEPCOM']).symmetric_difference(df['insee_code']).intersection(current_insee_df['DEPCOM'])
com_nodata = current_insee_df[['DEPCOM', 'NCC', 'DEP']][current_insee_df['DEPCOM'].apply(lambda r: r in codes)]
com_nodata.head(n=30)
DEPCOM | NCC | DEP | |
---|---|---|---|
3291 | 006121 | SAINT-JEAN-CAP-FERRAT | 006 |
5001 | 008105 | CHARLEVILLE-MEZIERES | 008 |
5330 | 008199 | GRANDVILLE | 008 |
5929 | 028285 | OINVILLE-SOUS-AUNEAU | 028 |
6974 | 025226 | ETRAPPE | 025 |
9066 | 023096 | GUERET | 023 |
9908 | 027226 | ETREPAGNY | 027 |
10251 | 027284 | GISORS | 027 |
10267 | 027304 | GUERNY | 027 |
13372 | 038254 | MONTEYNARD | 038 |
13383 | 038269 | MURE | 038 |
16155 | 051535 | SEZANNE | 051 |
25492 | 074273 | SIXT-FER-A-CHEVAL | 074 |
25494 | 074276 | TANINGES | 074 |
25507 | 074294 | VERCHAIX | 074 |
32808 | 089460 | VILLENEUVE-LA-GUYARD | 089 |
33750 | 006159 | VILLEFRANCHE-SUR-MER | 006 |
35506 | 080188 | CHAUSSOY-EPAGNY | 080 |
36311 | 080387 | GRATTEPANCHE | 080 |
37682 | 101117 | MOULE | 101 |
# Prepare data for analysis
df['property_tax_value_per_person'] = df['property_tax_value']/df['population']
df['home_tax_value_per_person'] = df['home_tax_value']/df['population']
df['debt_ratio'] = df['debt_annual_costs']/df['operating_revenues']
df['staff_costs_ratio'] = df['staff_costs']/df['operating_revenues']
df['staff_costs_per_person'] = df['staff_costs']/df['population']
On note que la taxe foncière et la taxe d'habitation sont en moyenne très proche, de l'ordre de 13%, avec un écart-type un peu plus important sur la taxe foncière.
df[['property_tax_rate', 'home_tax_rate']].describe()
property_tax_rate | home_tax_rate | |
---|---|---|
count | 36663.000000 | 36663.000000 |
mean | 0.139850 | 0.131094 |
std | 0.066394 | 0.050662 |
min | 0.000000 | 0.000000 |
25% | 0.093000 | 0.094700 |
50% | 0.132500 | 0.127400 |
75% | 0.177600 | 0.162900 |
max | 0.589400 | 0.610300 |
Une disparité entre les communes qui peut être très importante: on passe de 0% à 59% pour la taxe foncière, de 0% à 61% pour la taxe d'habitation.
plt.figure(figsize=(12,12));
df[['property_tax_rate', 'home_tax_rate']].boxplot()
df[['property_tax_rate', 'home_tax_rate', 'name', 'insee_code']].head(n=20)
property_tax_rate | home_tax_rate | name | insee_code | |
---|---|---|---|---|
0 | 0.2198 | 0.1382 | ROQUEFORT-SUR-SOULZON | 012203 |
1 | 0.1910 | 0.1273 | ROQUE-SAINTE-MARGUERITE (LA) | 012204 |
2 | 0.0998 | 0.1154 | ROUSSENNAC | 012206 |
3 | 0.0579 | 0.0715 | RULHAC-SAINT-CIRQ | 012207 |
4 | 0.2670 | 0.1145 | SAINT-AFFRIQUE | 012208 |
5 | 0.0622 | 0.0308 | SAINT-AMANS-DES-COTS | 012209 |
6 | 0.2100 | 0.1678 | SAINT-ANDRE-DE-NAJAC | 012210 |
7 | 0.0568 | 0.0451 | SAINT-BEAULIZE | 012212 |
8 | 0.1277 | 0.0970 | SAINT-ANDRE-DE-VEZINES | 012211 |
9 | 0.1917 | 0.1265 | SAINT-CHELY-D'AUBRAC | 012214 |
10 | 0.1582 | 0.1394 | SAINT-BEAUZELY | 012213 |
11 | 0.1425 | 0.0766 | SAINT-CHRISTOPHE-VALLON | 012215 |
12 | 0.1236 | 0.1395 | SAINT-COME-D'OLT | 012216 |
13 | 0.1339 | 0.0825 | SAINTE-CROIX | 012217 |
14 | 0.1550 | 0.0880 | SAINT-CYPRIEN-SUR-DOURDOU | 012218 |
15 | 0.0564 | 0.0637 | SAINTE-EULALIE-D'OLT | 012219 |
16 | 0.1437 | 0.1000 | SAINT-FELIX-DE-LUNEL | 012221 |
17 | 0.0914 | 0.1063 | SAINTE-GENEVIEVE-SUR-ARGENCE | 012223 |
18 | 0.1104 | 0.0635 | SAINT-FELIX-DE-SORGUES | 012222 |
19 | 0.1062 | 0.0628 | SAINTE-EULALIE-DE-CERNON | 012220 |
# Biggest property tax rate
_df = df.sort(columns='property_tax_rate', ascending=False)
_df[['property_tax_rate', 'property_tax_value', 'name', 'insee_code']].head(n=20)
property_tax_rate | property_tax_value | name | insee_code | |
---|---|---|---|---|
26903 | 0.5894 | 18219000 | BUSSY-SAINT-GEORGES | 077058 |
3823 | 0.5791 | 27000 | FONTANES-DE-SAULT | 011147 |
16143 | 0.5658 | 42000 | CHAMBORD | 041034 |
32240 | 0.5645 | 236000 | MIZOEN | 038237 |
35052 | 0.5496 | 14305000 | SAINT-LOUIS | 104414 |
3874 | 0.5284 | 127000 | MAILHAC | 011212 |
27699 | 0.5130 | 219000 | ETOILE (L' ) | 080296 |
19839 | 0.5028 | 2190000 | CONDE-SUR-ESCAUT | 059153 |
28283 | 0.5000 | 1190000 | SAINT-COLOMBAN-DES-VILLARDS | 073230 |
36017 | 0.4991 | 10723000 | WATTRELOS | 059650 |
21116 | 0.4989 | 1710000 | SAINS-EN-GOHELLE | 062737 |
29417 | 0.4936 | 3728000 | COULOUNIEIX-CHAMIERS | 024138 |
14261 | 0.4891 | 1127000 | SAINT-SEURIN-SUR-L'ISLE | 033478 |
3512 | 0.4824 | 485000 | CAUNES-MINERVOIS | 011081 |
33686 | 0.4788 | 17000 | REMECOURT | 060529 |
33918 | 0.4710 | 13209000 | SAVIGNY-LE-TEMPLE | 077445 |
20355 | 0.4677 | 9316000 | LIEVIN | 062510 |
24497 | 0.4673 | 796000 | ARQUES-LA-BATAILLE | 076026 |
20291 | 0.4668 | 11623000 | HENIN-BEAUMONT | 062427 |
4162 | 0.4650 | 758000 | PENNAUTIER | 011279 |
# Lowest property tax
_df = df.sort(columns='property_tax_rate', ascending=True)
_df[['property_tax_rate', 'name', 'insee_code']].head(n=20)
property_tax_rate | name | insee_code | |
---|---|---|---|
1871 | 0 | SUZAN | 009304 |
35405 | 0 | SAINT-CIRICE | 082158 |
6353 | 0 | ILE-DE-SEIN | 029083 |
1840 | 0 | MONTJUSTIN | 004129 |
20165 | 0 | BLANZEE | 055055 |
25300 | 0 | SISTELS | 082181 |
36324 | 0 | BRULLEMAIL | 061064 |
17365 | 0 | BOISSEI-LA-LANDE | 061049 |
36223 | 0 | SIVRY-LA-PERCHE | 055489 |
36596 | 0 | CHATEAU-D'ALMENECHES (LE ) | 061101 |
20926 | 0 | HAUMONT-PRES-SAMOGNEUX | 055239 |
36362 | 0 | TILLY-SUR-MEUSE | 055512 |
20965 | 0 | LOISON | 055299 |
15525 | 0 | AVRECOURT | 052033 |
11843 | 0 | FERRIERES | 050179 |
35772 | 0 | LOUVEMONT-COTE-DU-POIVRE | 055307 |
21740 | 0 | SENONCOURT-LES-MAUJOUY | 055482 |
24962 | 0 | CASTELSAGRAT | 082032 |
20160 | 0 | BETHELAINVILLE | 055047 |
20162 | 0 | BEZONVAUX | 055050 |
# Biggest home tax rate
_df = df.sort(columns='home_tax_rate', ascending=False)
_df[['home_tax_rate', 'home_tax_value', 'name', 'insee_code']].head(n=20)
home_tax_rate | home_tax_value | name | insee_code | |
---|---|---|---|---|
7851 | 0.6103 | 32000 | VERDESE | 02B344 |
3874 | 0.5130 | 197000 | MAILHAC | 011212 |
31870 | 0.4891 | 365000 | SANTA-MARIA-POGGIO | 02B311 |
32571 | 0.4664 | 478000 | ROURA | 102310 |
19839 | 0.4506 | 2113000 | CONDE-SUR-ESCAUT | 059153 |
7132 | 0.4336 | 62000 | TASSO | 02A322 |
35052 | 0.4311 | 9504000 | SAINT-LOUIS | 104414 |
30164 | 0.4305 | 18000 | BREMONDANS | 025089 |
26903 | 0.4227 | 12506000 | BUSSY-SAINT-GEORGES | 077058 |
7452 | 0.4160 | 94000 | CORSCIA | 02B095 |
18321 | 0.4081 | 65000 | GAVARNIE | 065188 |
30022 | 0.4074 | 327000 | BOUZY | 051079 |
20589 | 0.4059 | 9930000 | LAMBERSART | 059328 |
35014 | 0.3937 | 386000 | TERRE DE HAUT | 101131 |
21030 | 0.3921 | 3303000 | SAINT-ANDRE-LEZ-LILLE | 059527 |
21741 | 0.3828 | 12000 | SEPTSARGES | 055484 |
7783 | 0.3777 | 50000 | PIEDICROCE | 02B219 |
20625 | 0.3762 | 1013000 | MARCHIENNES | 059375 |
20233 | 0.3740 | 1447000 | FRESNES-SUR-ESCAUT | 059253 |
20996 | 0.3689 | 1076000 | QUESNOY (LE ) | 059481 |
#Biggest home tax value
_df = df.sort(columns='home_tax_value', ascending=False).dropna()
_df[['home_tax_rate', 'home_tax_value', 'home_tax_value_per_person', 'name', 'insee_code']].head(n=20)
home_tax_rate | home_tax_value | home_tax_value_per_person | name | insee_code | |
---|---|---|---|---|---|
24238 | 0.1338 | 700350000 | 310.166472 | PARIS | 075056 |
384 | 0.2723 | 247219000 | 287.831441 | MARSEILLE | 013055 |
35041 | 0.2130 | 158478000 | 324.764641 | LYON | 069123 |
31238 | 0.2133 | 126427000 | 367.029554 | NICE | 006088 |
29635 | 0.1587 | 88933000 | 198.779158 | TOULOUSE | 031555 |
12929 | 0.2404 | 88282000 | 304.284286 | NANTES | 044109 |
32542 | 0.2298 | 84684000 | 352.084217 | BORDEAUX | 033063 |
14784 | 0.2249 | 73973000 | 286.310892 | MONTPELLIER | 034172 |
20608 | 0.3355 | 70197000 | 300.998225 | LILLE | 059350 |
16037 | 0.2199 | 64701000 | 304.864085 | RENNES | 035238 |
2789 | 0.2788 | 64130000 | 861.441333 | CANNES | 006029 |
35972 | 0.2406 | 63592000 | 230.292320 | STRASBOURG | 067482 |
8540 | 0.2933 | 47963000 | 333.268481 | NIMES | 030189 |
12397 | 0.2152 | 47158000 | 298.051460 | GRENOBLE | 038185 |
15025 | 0.2075 | 45741000 | 247.270034 | REIMS | 051454 |
11332 | 0.2016 | 45307000 | 258.597170 | SAINT-ETIENNE | 042218 |
31727 | 0.2183 | 43129000 | 565.722681 | SAINT-MAUR-DES-FOSSES | 094068 |
35045 | 0.1935 | 40998000 | 244.307652 | TOULON | 083137 |
5435 | 0.2222 | 40075000 | 256.672196 | DIJON | 021231 |
27818 | 0.1837 | 38470000 | 271.740282 | LIMOGES | 087085 |
#Biggest home tax value per person
_df = df.sort(columns='home_tax_value_per_person', ascending=False).dropna()
_df[['home_tax_rate', 'home_tax_value', 'home_tax_value_per_person', 'name', 'insee_code']].head(n=20)
home_tax_rate | home_tax_value | home_tax_value_per_person | name | insee_code | |
---|---|---|---|---|---|
35541 | 0.0000 | 0 | inf | FLEURY-DEVANT-DOUAUMONT | 055189 |
20155 | 0.0000 | 0 | inf | BEAUMONT-EN-VERDUNOIS | 055039 |
35772 | 0.0000 | 0 | inf | LOUVEMONT-COTE-DU-POIVRE | 055307 |
20547 | 0.0000 | 0 | inf | CUMIERES-LE-MORT-HOMME | 055139 |
20926 | 0.0000 | 0 | inf | HAUMONT-PRES-SAMOGNEUX | 055239 |
20162 | 0.0000 | 0 | inf | BEZONVAUX | 055050 |
19151 | 0.2762 | 2284000 | 3993.006993 | ANGLES (LES) | 066004 |
34189 | 0.2261 | 790000 | 3062.015504 | VILLAREMBERT | 073318 |
12402 | 0.2355 | 3598000 | 2628.195763 | HUEZ | 038191 |
31252 | 0.2402 | 4149000 | 2622.629583 | THEOULE-SUR-MER | 006138 |
19585 | 0.2614 | 197000 | 2592.105263 | PUYVALADOR | 066154 |
19206 | 0.3066 | 308000 | 2444.444444 | EYNE | 066075 |
35678 | 0.2012 | 1655000 | 2398.550725 | RAYOL-CANADEL-SUR-MER | 083152 |
19164 | 0.2382 | 1943000 | 2386.977887 | BOLQUERE | 066020 |
28906 | 0.2963 | 2824000 | 2271.922767 | CHATEL | 074063 |
28241 | 0.2008 | 159000 | 2271.428571 | MONTGELLAFREY | 073167 |
28984 | 0.2340 | 9178000 | 2251.717370 | MEGEVE | 074173 |
18760 | 0.3291 | 2165000 | 2209.183673 | SAINT-LARY-SOULAN | 065388 |
27875 | 0.1778 | 3986000 | 2063.146998 | ALLUES (LES) | 073015 |
28600 | 0.1741 | 3177000 | 1933.657943 | VAL-D'ISERE | 073304 |
# lowest home tax rate
_df = df.sort(columns='home_tax_value', ascending=True)
_df[['home_tax_rate', 'home_tax_value', 'name', 'insee_code']].dropna().head(n=20)
home_tax_rate | home_tax_value | name | insee_code | |
---|---|---|---|---|
35541 | 0.0000 | 0 | FLEURY-DEVANT-DOUAUMONT | 055189 |
20972 | 0.0331 | 0 | MAIZERAY | 055311 |
2701 | 0.0001 | 0 | SAINT-VULBAS | 001390 |
35998 | 0.0703 | 0 | ORNES | 055394 |
17681 | 0.0684 | 0 | MOLRING | 057470 |
3132 | 0.0000 | 0 | SOULAINES-DHUYS | 010372 |
3164 | 0.0000 | 0 | VILLE-AUX-BOIS (LA) | 010411 |
28283 | 0.0001 | 0 | SAINT-COLOMBAN-DES-VILLARDS | 073230 |
5925 | 0.0000 | 0 | BOUVERANS | 025085 |
3436 | 0.0001 | 0 | CRUAS | 007076 |
8450 | 0.0000 | 0 | POMMEROL | 026245 |
17394 | 0.0102 | 0 | CHAMP-HAUT | 061088 |
17365 | 0.0000 | 0 | BOISSEI-LA-LANDE | 061049 |
17337 | 0.0097 | 0 | AUTHIEUX-DU-PUITS (LES) | 061017 |
36596 | 0.0000 | 0 | CHATEAU-D'ALMENECHES (LE ) | 061101 |
20162 | 0.0000 | 0 | BEZONVAUX | 055050 |
10708 | 0.0000 | 0 | SAULXURES2 | 052465 |
20155 | 0.0000 | 0 | BEAUMONT-EN-VERDUNOIS | 055039 |
27021 | 0.0000 | 0 | ARCAY | 086008 |
32574 | 0.0462 | 0 | OUANARY | 102314 |
df[['debt_ratio']].describe()
debt_ratio | |
---|---|
count | 36663.000000 |
mean | 0.099361 |
std | 0.134267 |
min | 0.000000 |
25% | 0.039039 |
50% | 0.080000 |
75% | 0.125519 |
max | 7.406250 |
# biggest debt ratio
plt.figure(figsize=(12,12));
df[['debt_ratio']].boxplot()
_df = df.sort(columns='debt_ratio', ascending=False)
_df[['debt_ratio', 'name', 'insee_code']].head(n=20)
debt_ratio | name | insee_code | |
---|---|---|---|
15510 | 7.406250 | AMBONVILLE | 052007 |
9472 | 4.550000 | BOUZIN | 031086 |
10707 | 3.718310 | SAUDRON | 052463 |
6236 | 3.602812 | CHAFFOIS | 025110 |
32363 | 3.092784 | BLANZAC-PORCHERESSE | 016046 |
29299 | 3.036364 | FREDIERE (LA ) | 017169 |
30338 | 2.989583 | BALIGNICOURT | 010027 |
10585 | 2.943333 | BEROU-LA-MULOTIERE | 028037 |
33800 | 2.904762 | PAILHAC | 065354 |
1913 | 2.882979 | ASSENCIERES | 010014 |
7157 | 2.666667 | SALEIGNES | 017416 |
8806 | 2.487805 | VILLEFRANCHE-LE-CHATEAU | 026375 |
10705 | 2.416357 | SARREY | 052461 |
14244 | 2.191358 | HEILTZ-L'EVEQUE | 051290 |
17956 | 2.162162 | BOUILH-DEVANT | 065102 |
7830 | 2.144330 | SANTA-LUCIA-DI-MERCURIO | 02B306 |
12089 | 2.044643 | DOMEYRAT | 043086 |
2242 | 2.009524 | EAUX-PUISEAUX | 010133 |
13189 | 1.993243 | SAINT-MURY-MONTEYMOND | 038430 |
13848 | 1.986248 | CHAUSSEE-SUR-MARNE (LA ) | 051141 |
# lowest debt ratio
_df = df.sort(columns='debt_ratio', ascending=True)
_df[['debt_ratio', 'name', 'insee_code']].head(n=20)
debt_ratio | name | insee_code | |
---|---|---|---|
6520 | 0 | SAINT-PIERRE-EN-VAUX | 021566 |
25748 | 0 | DONZY-LE-PERTUIS | 071181 |
3065 | 0 | MAS (LE ) | 006081 |
14705 | 0 | CELLES | 034072 |
14696 | 0 | CAUSSINIOJOULS | 034062 |
25755 | 0 | EPERTULLY | 071188 |
14693 | 0 | QUEUDES | 051451 |
25758 | 0 | ESSERTENNE | 071191 |
29043 | 0 | ROUY-LE-GRAND | 080683 |
14687 | 0 | PONTHION | 051441 |
14685 | 0 | POIX | 051438 |
14683 | 0 | POCANCY | 051435 |
25762 | 0 | FARGES-LES-MACON | 071195 |
3076 | 0 | PIERLAS | 006096 |
25769 | 0 | FRETTE (LA ) | 071206 |
9067 | 0 | BERNIENVILLE | 027057 |
5355 | 0 | SAVARTHES | 031537 |
14674 | 0 | PASSAVANT-EN-ARGONNE | 051424 |
14628 | 0 | MATOUGUES | 051357 |
14630 | 0 | MECRINGES | 051359 |
df[['staff_costs_ratio']].describe()
staff_costs_ratio | |
---|---|
count | 36663.000000 |
mean | 0.271105 |
std | 0.118803 |
min | -0.163265 |
25% | 0.183580 |
50% | 0.272727 |
75% | 0.354829 |
max | 1.667286 |
plt.figure(figsize=(12,12));
df[['staff_costs_ratio']].boxplot()
_df = df.sort(columns='staff_costs_ratio', ascending=False)
_df[['staff_costs_ratio', 'name', 'insee_code']].head(n=20)
staff_costs_ratio | name | insee_code | |
---|---|---|---|
32562 | 1.667286 | REGINA | 102301 |
32571 | 0.744156 | ROURA | 102310 |
32597 | 0.735596 | SAINT-PHILIPPE | 104417 |
32575 | 0.724289 | SAUL | 102352 |
29228 | 0.702703 | CHAMPOULET | 045070 |
35052 | 0.697193 | SAINT-LOUIS | 104414 |
32333 | 0.695884 | FONDS-SAINT-DENIS | 103208 |
32567 | 0.690004 | MANA | 102306 |
32547 | 0.689758 | MORNE-ROUGE (LE ) | 103218 |
32330 | 0.672104 | CARBET (LE ) | 103204 |
32582 | 0.670516 | APATOU | 102360 |
32600 | 0.664639 | SAINTE-MARIE | 104418 |
32334 | 0.660211 | GRAND-RIVIERE | 103211 |
32569 | 0.659062 | SAINT-GEORGES | 102308 |
32335 | 0.658672 | FORT-DE-FRANCE | 103209 |
32560 | 0.657873 | MORNE-VERT (LE ) | 103233 |
32594 | 0.653504 | SAINT-BENOIT | 104410 |
36660 | 0.653467 | VAUCLIN (LE ) | 103232 |
32324 | 0.650403 | TERRE DE BAS | 101130 |
32319 | 0.649391 | POINTE NOIRE | 101121 |
print df[['home_tax_rate', 'property_tax_rate', 'staff_costs_ratio', 'debt_ratio']].corr()
plot(df['property_tax_rate'], df['staff_costs_ratio'], 'o')
home_tax_rate property_tax_rate staff_costs_ratio debt_ratio home_tax_rate 1.000000 0.449842 0.179622 0.024401 property_tax_rate 0.449842 1.000000 0.400445 0.029337 staff_costs_ratio 0.179622 0.400445 1.000000 0.000091 debt_ratio 0.024401 0.029337 0.000091 1.000000
[<matplotlib.lines.Line2D at 0x111e58b10>]
df[['name', 'insee_code', 'staff_costs']].ix[df['staff_costs_ratio'].argmax()]
name REGINA insee_code 102301 staff_costs 897000 Name: 32562, dtype: object