import pandas as pd import os curdir = os.path.abspath('./..') df = pd.read_csv(os.path.join(curdir, 'scraped_data', 'region_all.csv')) df[['name', 'year', 'net_profit', 'tipp', 'staff_costs', 'financial_costs', 'debt_repayments', 'allocation']].head(n=20) df['staff_costs_per_person'] = df['staff_costs']/df['population'] df['allocation_per_person'] = df['allocation']/df['population'] df['operating_costs_per_person'] = df['operating_costs']/df['population'] df[['name', 'year', 'staff_costs_per_person', 'allocation_per_person', 'operating_costs_per_person']].head(n=26) dfgp = df.groupby(['insee_code', 'year']).first() dfmean = df.groupby('year').mean() population_by_year = df.groupby('year')['population'].sum() def plot_value(value, by_person=True): fig, axes = plt.subplots(figsize=(12,6)) axes.set_title('%s by person from 2008 to 2012'%value) for reg in dfgp.index.get_level_values(0).unique()[:5]: years = dfgp.index.get_level_values(1).unique() if by_person: data = dfgp[value][reg]/dfgp['population'][reg] else: data = dfgp[value][reg] axes.plot(years, data.tolist(), label=dfgp['name'][reg][years[0]]) if by_person: mean = dfmean[value]/population_by_year else: mean = dfmean[value] axes.plot(years, mean.tolist(), label='mean') axes.legend(loc='best'); plot_value('tipp') plot_value('debt_annual_costs')p plot_value('staff_costs') plot_value('staff_costs', by_person=False) df = pd.read_csv(os.path.join(curdir, 'nosdonnees', 'region_all.csv')) df.columns.tolist()