from pathlib import Path import pandas as pd import numpy as np import re import matplotlib as mpl mpl.use('agg') %pylab inline chtfont2 = mpl.font_manager.FontProperties(fname='/Library/Fonts/儷黑 Pro.ttf') chtfont = mpl.font_manager.FontProperties(fname='/Users/liang/Library/Fonts/DFHeiStd-W5.otf') rcParams['figure.figsize'] = (10, 4) rcParams['savefig.format'] = 'svg' final_dir = Path('../../1_政策宣傳執情形/總整理') all_xls = [p for p in final_dir.iterdir() if p.suffix == '.xlsx' and '勞動' not in str(p)] df_by_disp = { p.stem[:3]: pd.read_excel(str(p), 0) for p in all_xls } all_disp = set(df_by_disp.keys()) all_disp RESULT_DIR = final_dir.parent / "輸出圖表" RESULT_DIR #報紙、雜誌、電視、廣播、行動、網路、電影院、燈箱、其他 media_type = '報紙、雜誌、電視、廣播、行動、網路、電影院、燈箱、其他'.split('、') def split_media_share(media_share): splitted_media_share = media_share.copy() for media, share in media_share.items(): if media not in media_type: mix_media = [ (m if m in media_type else '其他') for m in media.replace(' ', '').split('、') ] total_share = sum(media_share[m] for m in mix_media) adj_weight = [share * media_share[m] / total_share for m in mix_media] for m, s in zip(mix_media, adj_weight): splitted_media_share[m] += s del splitted_media_share[media] return {k: round(v) for k, v in splitted_media_share.items()} dfs_media_share = [] for disp, df in df_by_disp.items(): media_share = df.groupby('媒體').sum().loc[:, '金額'].to_dict() dfs_media_share.append(pd.DataFrame([split_media_share(media_share)], index=[disp])) df_final = pd.concat(dfs_media_share) df_final = df_final.fillna(value=0)[media_type] df_final['總和'] = df_final.sum(axis=1) timespan_span = '''\ - 農委會:24個月(100-102) - 教育部:24個月(100-102) - 陸委會:24個月(100-102) - 外交部:24個月 - 國防部:10個月 - 原民會:13個月(100-102) - 法務部:14個月(100-102) - 原能會:23個月(100-102) ''' parse_time = re.compile('-\s+(.{3}):(\d+)個月', re.UNICODE).search df_time_span = pd.DataFrame( [parse_time(m).groups() for m in timespan_span.splitlines()], ) df_time_span.index = df_time_span[0].values df_time_span = pd.DataFrame(df_time_span[1]) df_time_span.columns = ['資料時間'] df_final = pd.concat([df_final, df_time_span], axis=1) df_final['每年平均預算支出'] = df_final['總和'] / df_final['資料時間'].astype(np.int) * 12 df_final.to_excel(str(RESULT_DIR/'所有部會按媒體類型預算金額.xlsx')) df_final.to_csv(str(RESULT_DIR/'所有部會按媒體類型預算金額.csv')) disp_total_ad = df_final.sort(columns=['每年平均預算支出']) y_pos = np.arange(len(disp_total_ad)) plt.barh( y_pos, disp_total_ad['每年平均預算支出'], align='center', alpha=0.7 ) plt.yticks(y_pos, disp_total_ad.index, font_properties=chtfont) plt.title('行政部會平均每年廣告總支出排行', font_properties=chtfont, fontsize=12) plt.savefig(str(RESULT_DIR/'廣告總支出排行.svg')) plt.show() import brewer2mpl color_set = brewer2mpl.get_map('Set3', 'Qualitative', 9) color_set.hex_colors colors = pd.Series(color_set.hex_colors) colors.index = media_type colors arr = df_final.ix[0, :'其他'] arr = arr[arr != 0] arr.sort() plt.axes(aspect=1) patches, texts, autotexts = plt.pie( arr, labels=arr.index.values, colors=colors[arr.index], autopct='%1.1f%%', labeldistance=1.2, startangle=90 ) plt.setp(autotexts, color='w') plt.setp(texts, font_properties=chtfont) plt.title('勞動部', font_properties=chtfont, fontsize=16) legend_font = chtfont.copy() legend_font.set_size(14) plt.legend(loc='center right', prop=legend_font, bbox_to_anchor=(-0.2, 0.5)) plt.show() fig, axs = plt.subplots(3,3) fig.set_size_inches(15, 15) for (disp, row), ax in zip(df_final.iterrows(), axs.ravel()): arr = row.loc[:'其他'] arr = arr[arr != 0] arr.sort() ax.set_aspect('equal') patches, texts, autotexts = ax.pie( arr, labels=arr.index.values, colors=colors[arr.index], autopct='%1.1f%%', labeldistance=1.1, startangle=90, radius=1 ) plt.setp(autotexts, color='w') plt.setp(texts, font_properties=chtfont, size=16) ax.set_title(disp, font_properties=chtfont, fontsize=20) legend_font = chtfont.copy() legend_font.set_size(16) axs[0,1].legend(loc='center left', prop=legend_font, bbox_to_anchor=(1.25, -1.8)) axs[-1, -1].axis('off') fig.subplots_adjust(wspace=0.1, hspace=0.2) fig.savefig(str(RESULT_DIR/'各部會媒體類型圓餅圖.svg'))