last_year = 2013 #change this when Social Security database is updated
save_path = "user_singletons" # files created by this notebook will be saved in this directory

import time
import os
if not os.path.isdir(save_path): # creates path if it does not exist
    os.makedirs(save_path)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import seaborn # comment out if you don't have it, but it makes good-looking charts
%run download_and_process.py

# used to round limit of y axis up to second-most-significant digit
def determine_y_limit(x):                      
    significance = int(floor((log10(x))))
    val = floor(x / (10 ** (significance - 1))) + 1
    val = val * (10 ** (significance - 1))
    return val

df_oneyear = names[names.year_min == names.year_max]
df_oneyear = df_oneyear[['name', 'sex', 'year_min', 'pct_max']]
df_oneyear.columns = ['name', 'sex', 'year', 'pct']

oneyearnames = list(df_oneyear.name.unique())
yobcopy = yob.copy()
yobcopy = yobcopy[yobcopy.name.isin(oneyearnames)]

df_oneyear['births'] = 0
for i in range(len(df_oneyear)):
    df_oneyear.births.iloc[i] = yobcopy[(yobcopy.name == df_oneyear.name.iloc[i]) & 
                                        (yobcopy.sex == df_oneyear.sex.iloc[i])].births.iloc[0]

df_oneyear.to_pickle(save_path+'/df_oneyear.pickle')

df_oneyear = pd.read_pickle(save_path+'/df_oneyear.pickle')

df_oneyear.sort('births', inplace=True, ascending=False)
df_oneyear.head(50)

%matplotlib inline
dictionary = {0:1000, 1:20, 2:15, 3:0, 4:5}
xmax = df_oneyear.year.max()
plt.figure() # <- makes a new figure and sets it active (add this)
plt.hist(list(df_oneyear.year),xmax) # <- finds the current active axes/figure and plots to it
plt.title('Histogram of names appearing only once in the Social Security database') 
plt.xlabel('Year')
plt.ylabel('Number of names')
plt.show()

#determine what percentage of births were single-appearances for each year
df_oygrt = pd.DataFrame(df_oneyear.groupby('year').births.sum())
df_oygrm = pd.DataFrame(df_oneyear[df_oneyear.sex == 'M'].groupby('year').births.sum())
df_oygrf = pd.DataFrame(df_oneyear[df_oneyear.sex == 'F'].groupby('year').births.sum())
df_oygrt['pct'] = 0.0
df_oygrm['pct'] = 0.0
df_oygrf['pct'] = 0.0
for i in range(len(df_oygrt)):
    df_oygrt.pct.iloc[i] = df_oygrt.births.iloc[i] * 1.0 / 

df_oygr.head()