import os
import os.path
import datetime

import pandas as pd

#  Download CSV files
def download_csv(filenames):
    for filename in filenames:
        print("download {filename}...".format(filename=filename))
        os.system("curl http://www.city.sendai.jp/kikaku/seisaku/toukei/jinkou/kakusai/{filename} -o data/{filename}".format(filename=filename))
    print("Done!")

# download_filenames = ["h2004.xls", "h2104.xls", "h2204.xls" , "h2304.xls","h2404.xls", "h2504.xls", "h2604.xls"]
download_filenames = ["h2004.xls"]
download_csv(download_filenames)

def generate_df(filepath):
    print("{datetime} {filepath}".format(datetime=datetime.datetime.now(), filepath=filepath))
        
    filename = os.path.basename(filepath)
    xls = pd.ExcelFile(filepath)

    # 全ての人口統計を入れるデータフレーム
    df_all = pd.DataFrame(columns=['year', 'month', 'ward', 'town', 'gender', 'age', 'head_count'])

    print "    Sheet -> ", 

    for sheet_index_num, sheet_name in enumerate(xls.sheet_names):
        #  『合計』のシートは除く
        if u'合計' in sheet_name or u'計' in sheet_name or u'総数' in sheet_name:
            continue

        print sheet_name,

        df_xls= xls.parse(xls.sheet_names[sheet_index_num], skiprows=3)

        heisei_year = filename[1:3]
        df_xls['year'] = int(heisei_year) + 1988
        df_xls['month'] =  int(filename[3:5])

        # ward と town をクレンジングして追加
        df_xls['ward'] = df_xls.iloc[:,0].fillna(method='ffill').apply(lambda x : x.strip())
        df_xls['town'] = df_xls.iloc[:,1].fillna(method='ffill').apply(lambda x : x.strip())

        # 『町名』列にある不要な『合計』行を削除
        df_xls = df_xls[(df_xls['town'] != u'合計') & (df_xls['town'] != u'計') & (df_xls['town'] != u'総数') ]

        # 性別を振り分け
        gender = None
        if u'男' in sheet_name:
            gender = '男'
        if u'女' in sheet_name:
            gender = '男'
        df_xls['gender'] = gender
        df_xls.head()

        # 年齢毎に処理
        for age in range(0, 101):
            df_xls['age'] = age
            df_xls['head_count'] = df_xls.iloc[:, age + 3].apply(lambda x: x if isinstance(x, int) else None).fillna(method='ffill')
            df_all= pd.concat([df_all, df_xls.iloc[:, 104:]])
    
    print("Done!")
    return df_all

def get_df_all_demographic(filenames, dirpath = 'data/'):
    df = pd.DataFrame()

    dirpath = 'data/'
    print("Convert {num} files...".format(num=len(filenames)))
    for filename in filenames:
        filepath = dirpath + filename
        df = pd.concat([df,  generate_df(filepath)])

    return df

# filenames = ["h2004.xls", "h2104.xls", "h2204.xls" , "h2304.xls","h2404.xls", "h2504.xls", "h2604.xls"]
# NOTE: h20~h23のcsvのデータ形式が異なるので除外
filenames = ["h2404.xls", "h2504.xls", "h2604.xls"]

# データ読み込み
df_demographic = get_df_all_demographic(filenames)
df_demographic.info()
df_demographic

# 読み込んだデータをCSVに書き出し
df_demographic.to_csv('data/all_demographic.csv', index=False, encoding='UTF-8')
%ls -l data/all_demographic.csv

# 2014年4月の仙台の総人口
df_demographic[df_demographic.year == 2014].head_count.sum()

df_work = pd.DataFrame(columns=['year', 'group', 'ward', 'head_count'], index=range(len(df_demographic.ward.unique())))
df_work

latest_year = int(df_demographic.year.unique().max())

df_work['ward'] = df_demographic[df_demographic.year == latest_year].groupby(['ward'])['head_count'].sum().index
df_work['group'] = u'中学生'
df_work

# 最新年から10年間
df = pd.DataFrame()
for i in range(10):
    df_work.year = latest_year + i
    df_work.head_count = df_demographic[(df_demographic.year == latest_year) & (12 - i <= df_demographic.age) & (df_demographic.age <= 12 + 2 - i)].groupby(['ward'])['head_count'].sum().values
    df = pd.concat([df, df_work])

df

df_pivot = pd.pivot_table(df, values='head_count', index='year', columns='ward', aggfunc=sum, fill_value=0)
df_pivot

df_pivot.plot()
prop_mac_jp = {'fname' : r'/Library/Fonts/Osaka.ttf'}
plt.legend(prop=prop_mac_jp, loc='lower left')