In [1]:
import pandas as pd
In [3]:
df2019 = pd.read_csv('names/yob2019.txt', names=['Name', 'Sex', 'Cnt'])
df2019
Out[3]:
Name Sex Cnt
0 Olivia F 18451
1 Emma F 17102
2 Ava F 14440
3 Sophia F 13714
4 Isabella F 13306
... ... ... ...
31949 Zyheem M 5
31950 Zykel M 5
31951 Zyking M 5
31952 Zyn M 5
31953 Zyran M 5

31954 rows × 3 columns

In [4]:
df2019.groupby('Sex').max()
Out[4]:
Name Cnt
Sex
F Zyva 18451
M Zyrus 20502
In [5]:
df2019[ df2019.Sex == 'M' ].sort_values('Cnt', ascending=False).head(10)
Out[5]:
Name Sex Cnt
17905 Liam M 20502
17906 Noah M 19048
17907 Oliver M 13891
17908 William M 13542
17909 Elijah M 13300
17910 James M 13087
17911 Benjamin M 12942
17912 Lucas M 12412
17913 Mason M 11408
17914 Ethan M 11241
In [6]:
df2019[ df2019.Sex == 'F' ].sort_values('Cnt', ascending=False).head(10)
Out[6]:
Name Sex Cnt
0 Olivia F 18451
1 Emma F 17102
2 Ava F 14440
3 Sophia F 13714
4 Isabella F 13306
5 Charlotte F 13138
6 Amelia F 12862
7 Mia F 12414
8 Harper F 10442
9 Evelyn F 10392
In [8]:
df2019.sort_values('Cnt', ascending=False).drop_duplicates('Sex')
Out[8]:
Name Sex Cnt
17905 Liam M 20502
0 Olivia F 18451
In [9]:
result = pd.DataFrame()
for year in range(1880, 2020):
    tmp = pd.read_csv(f'names/yob{year}.txt', names=['Name', 'Sex', 'Cnt'])
    tmp['Year'] = year
    result = pd.concat([result, tmp])
    
result
Out[9]:
Name Sex Cnt Year
0 Mary F 7065 1880
1 Anna F 2604 1880
2 Emma F 2003 1880
3 Elizabeth F 1939 1880
4 Minnie F 1746 1880
... ... ... ... ...
31949 Zyheem M 5 2019
31950 Zykel M 5 2019
31951 Zyking M 5 2019
31952 Zyn M 5 2019
31953 Zyran M 5 2019

1989401 rows × 4 columns

In [10]:
result.to_csv('result.csv', index=False)
In [ ]: