import pandas as pd
Name statistics from SCB (Statistics Sweden): http://www.statistikdatabasen.scb.se/pxweb/sv/ssd/START__BE__BE0001/BE0001T04BAr
Inspiration: http://www.randalolson.com/2014/12/06/top-25-most-gender-neutral-names-in-the-u-s/
girls_names = pd.read_csv('girls_names_2014.csv', na_values=('..',), index_col=0)
boys_names = pd.read_csv('boys_names_2014.csv', na_values=('..',), index_col=0)
names = pd.merge(pd.DataFrame(girls_names.dropna().unstack(), columns=['girls']),
pd.DataFrame(boys_names.dropna().unstack(), columns=['boys']),
how='inner', left_index=True, right_index=True)
names['sum'] = names.sum(axis=1)
names['cmp'] = (names['girls'] - names['boys']).abs()
names.groupby('cmp') \
.apply(lambda x: x.sort('sum', ascending=False, inplace=False)) \
.reset_index(level=0, drop=True)[['girls', 'boys']] \
.xs(('2014',)) \
.head(n=25)
girls | boys | |
---|---|---|
Mayar | 4 | 4 |
Mino | 4 | 4 |
Adama | 2 | 2 |
Alexi | 2 | 2 |
Amine | 2 | 2 |
Bille | 2 | 2 |
Bon | 2 | 2 |
Casey | 2 | 2 |
Mischa | 2 | 2 |
Sunny | 2 | 2 |
Tin | 2 | 2 |
Ilon | 4 | 5 |
Elham | 2 | 3 |
Eris | 2 | 3 |
Frankie | 2 | 3 |
Freddie | 3 | 2 |
Ira | 3 | 2 |
Nicky | 2 | 3 |
Nicola | 3 | 2 |
Sindre | 2 | 3 |
Teddie | 2 | 3 |
Ward | 2 | 3 |
Arya | 6 | 4 |
Lee | 6 | 4 |
Nima | 4 | 6 |