import pandas as pd import numpy as np name1880 = pd.read_csv('/Users/lpd/Downloads/names/yob1880.txt',names=['name','sex','births']) name1880.groupby('sex').births.sum() years = range(1880,2012) frames = [] for year in years: frame = pd.read_csv('/Users/lpd/Downloads/names/yob%d.txt'%year,names=['name','sex','births']) frame['year'] = year frames.append(frame) names = pd.concat(frames, ignore_index=True) names.describe() names.year total_births = names.pivot_table('births', rows='year',cols='sex',aggfunc=sum) total_births.tail() total_births.plot() names total_births['diff'] = total_births['F']/total_births['M'] total_births.tail() total_births['diff'].plot() total_births['diff'] names_pivot1 = names.pivot_table('births',rows='name',aggfunc=sum) top_names = names_pivot1.order()[-10:] top_names top_names_list = list(top_names.index.values) top_names_list names_by_year = names.pivot_table('births',rows='year',cols='name',aggfunc=sum) names_by_year[top_names_list].plot() top5_names = top_names_list[5:] names_by_year[top5_names].plot(subplots=True) same_names = names.pivot_table('births',rows=['sex','name'],cols=['year'])