Since the data is split into files by year, concatenate everything into a single DataFrame
%matplotlib inline
import numpy as np
import pandas as pd
"""
Please, write your code here!
"""
frames = []
for year in xrange(1880, 2014):
frame = pd.read_csv("../../data/usnames/yob%s.txt" % year, names=['name', 'sex', 'births'])
frame['year'] = year
frames.append(frame)
name_frame = pd.concat(frames, ignore_index=True)
Calculate and plot the total number of births grouped by sex and year
birth_year_sex = name_frame.groupby(['year', 'sex'])['births'].sum()
birth_year_sex = birth_year_sex.unstack()
birth_year_sex.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1043cfdd0>
Calculate the most popular female and male name in your birth year
popular_boy_names_1979 = name_frame[(name_frame['year']==1979) & (name_frame['sex']=='M')].sort_index(by='births', ascending=False)
popular_boy_names_1979[:1]
name | sex | births | year | |
---|---|---|---|---|
856638 | Michael | M | 67725 | 1979 |
popular_girl_names_1979 = name_frame[(name_frame['year']==1979) & (name_frame['sex']=='F')].sort_index(by='births', ascending=False)
popular_girl_names_1979[:1]
name | sex | births | year | |
---|---|---|---|---|
844681 | Jennifer | F | 56716 | 1979 |
Calculate the number of people born in your birth year carrying your name
popular_boy_names_1979[popular_boy_names_1979['name']=='Manuel']
name | sex | births | year | |
---|---|---|---|---|
856777 | Manuel | M | 1908 | 1979 |
Plot number of people carrying your name over the time
male_manuel_per_year = name_frame[(name_frame['name']=='Manuel') & (name_frame['sex']=='M')][['year', 'births']].set_index('year')
male_manuel_per_year.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1108e1990>
manuel_per_year = name_frame[(name_frame['name']=='Manuel')].groupby(['year', 'sex'])['births'].sum()
manuel_per_year = manuel_per_year.unstack()
manuel_per_year.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1108a0c90>