from pathlib import Path
import pandas as pd
import numpy as np
import re
import matplotlib as mpl
mpl.use('agg')
%pylab inline
chtfont2 = mpl.font_manager.FontProperties(fname='/Library/Fonts/儷黑 Pro.ttf')
chtfont = mpl.font_manager.FontProperties(fname='/Users/liang/Library/Fonts/DFHeiStd-W5.otf')
Populating the interactive namespace from numpy and matplotlib
rcParams['figure.figsize'] = (10, 4)
rcParams['savefig.format'] = 'svg'
final_dir = Path('../../1_政策宣傳執情形/總整理')
all_xls = [p for p in final_dir.iterdir() if p.suffix == '.xlsx' and '勞動' not in str(p)]
df_by_disp = {
p.stem[:3]: pd.read_excel(str(p), 0)
for p in all_xls
}
all_disp = set(df_by_disp.keys())
all_disp
{'原民會', '原能會', '國防部', '外交部', '教育部', '法務部', '農委會', '陸委會'}
RESULT_DIR = final_dir.parent / "輸出圖表"
RESULT_DIR
PosixPath('../../1_政策宣傳執情形/輸出圖表')
#報紙、雜誌、電視、廣播、行動、網路、電影院、燈箱、其他
media_type = '報紙、雜誌、電視、廣播、行動、網路、電影院、燈箱、其他'.split('、')
def split_media_share(media_share):
splitted_media_share = media_share.copy()
for media, share in media_share.items():
if media not in media_type:
mix_media = [
(m if m in media_type else '其他')
for m in media.replace(' ', '').split('、')
]
total_share = sum(media_share[m] for m in mix_media)
adj_weight = [share * media_share[m] / total_share for m in mix_media]
for m, s in zip(mix_media, adj_weight):
splitted_media_share[m] += s
del splitted_media_share[media]
return {k: round(v) for k, v in splitted_media_share.items()}
dfs_media_share = []
for disp, df in df_by_disp.items():
media_share = df.groupby('媒體').sum().loc[:, '金額'].to_dict()
dfs_media_share.append(pd.DataFrame([split_media_share(media_share)], index=[disp]))
df_final = pd.concat(dfs_media_share)
df_final = df_final.fillna(value=0)[media_type]
df_final['總和'] = df_final.sum(axis=1)
考慮到各部會收集的總時間:
timespan_span = '''\
- 農委會:24個月(100-102)
- 教育部:24個月(100-102)
- 陸委會:24個月(100-102)
- 外交部:24個月
- 國防部:10個月
- 原民會:13個月(100-102)
- 法務部:14個月(100-102)
- 原能會:23個月(100-102)
'''
parse_time = re.compile('-\s+(.{3}):(\d+)個月', re.UNICODE).search
df_time_span = pd.DataFrame(
[parse_time(m).groups() for m in timespan_span.splitlines()],
)
df_time_span.index = df_time_span[0].values
df_time_span = pd.DataFrame(df_time_span[1])
df_time_span.columns = ['資料時間']
df_final = pd.concat([df_final, df_time_span], axis=1)
df_final['每年平均預算支出'] = df_final['總和'] / df_final['資料時間'].astype(np.int) * 12
df_final.to_excel(str(RESULT_DIR/'所有部會按媒體類型預算金額.xlsx'))
df_final.to_csv(str(RESULT_DIR/'所有部會按媒體類型預算金額.csv'))
disp_total_ad = df_final.sort(columns=['每年平均預算支出'])
y_pos = np.arange(len(disp_total_ad))
plt.barh(
y_pos,
disp_total_ad['每年平均預算支出'],
align='center', alpha=0.7
)
plt.yticks(y_pos, disp_total_ad.index, font_properties=chtfont)
plt.title('行政部會平均每年廣告總支出排行', font_properties=chtfont, fontsize=12)
plt.savefig(str(RESULT_DIR/'廣告總支出排行.svg'))
plt.show()
import brewer2mpl
color_set = brewer2mpl.get_map('Set3', 'Qualitative', 9)
color_set.hex_colors
['#8DD3C7', '#FFFFB3', '#BEBADA', '#FB8072', '#80B1D3', '#FDB462', '#B3DE69', '#FCCDE5', '#D9D9D9']
colors = pd.Series(color_set.hex_colors)
colors.index = media_type
colors
報紙 #8DD3C7 雜誌 #FFFFB3 電視 #BEBADA 廣播 #FB8072 行動 #80B1D3 網路 #FDB462 電影院 #B3DE69 燈箱 #FCCDE5 其他 #D9D9D9 dtype: object
arr = df_final.ix[0, :'其他']
arr = arr[arr != 0]
arr.sort()
plt.axes(aspect=1)
patches, texts, autotexts = plt.pie(
arr,
labels=arr.index.values,
colors=colors[arr.index],
autopct='%1.1f%%',
labeldistance=1.2,
startangle=90
)
plt.setp(autotexts, color='w')
plt.setp(texts, font_properties=chtfont)
plt.title('勞動部', font_properties=chtfont, fontsize=16)
legend_font = chtfont.copy()
legend_font.set_size(14)
plt.legend(loc='center right', prop=legend_font, bbox_to_anchor=(-0.2, 0.5))
plt.show()
fig, axs = plt.subplots(3,3)
fig.set_size_inches(15, 15)
for (disp, row), ax in zip(df_final.iterrows(), axs.ravel()):
arr = row.loc[:'其他']
arr = arr[arr != 0]
arr.sort()
ax.set_aspect('equal')
patches, texts, autotexts = ax.pie(
arr,
labels=arr.index.values,
colors=colors[arr.index],
autopct='%1.1f%%',
labeldistance=1.1,
startangle=90,
radius=1
)
plt.setp(autotexts, color='w')
plt.setp(texts, font_properties=chtfont, size=16)
ax.set_title(disp, font_properties=chtfont, fontsize=20)
legend_font = chtfont.copy()
legend_font.set_size(16)
axs[0,1].legend(loc='center left', prop=legend_font, bbox_to_anchor=(1.25, -1.8))
axs[-1, -1].axis('off')
fig.subplots_adjust(wspace=0.1, hspace=0.2)
fig.savefig(str(RESULT_DIR/'各部會媒體類型圓餅圖.svg'))