In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('city.csv', sep=';')
df
Out[2]:
ID Name CountryCode District Population
0 1 Kabul AFG Kabol 1780000
1 2 Qandahar AFG Qandahar 237500
2 3 Herat AFG Herat 186800
3 4 Mazar-e-Sharif AFG Balkh 127800
4 5 Amsterdam NLD Noord-Holland 731200
... ... ... ... ... ...
4074 4075 Khan Yunis PSE Khan Yunis 123175
4075 4076 Hebron PSE Hebron 119401
4076 4077 Jabaliya PSE North Gaza 113901
4077 4078 Nablus PSE Nablus 100231
4078 4079 Rafah PSE Rafah 92020

4079 rows × 5 columns

In [4]:
df.sort_values(by='Population', ascending=False)
Out[4]:
ID Name CountryCode District Population
1023 1024 Mumbai (Bombay) IND Maharashtra 10500000
2330 2331 Seoul KOR Seoul 9981619
205 206 São Paulo BRA São Paulo 9968485
1889 1890 Shanghai CHN Shanghai 9696300
938 939 Jakarta IDN Jakarta Raya 9604900
... ... ... ... ... ...
2315 2316 Bantam CCK Home Island 503
3537 3538 Città del Vaticano VAT 455
3332 3333 Fakaofo TKL Fakaofo 300
2316 2317 West Island CCK West Island 167
2911 2912 Adamstown PCN 42

4079 rows × 5 columns

In [5]:
df2 = pd.read_csv('country.csv', sep=';')
df2
Out[5]:
Code Name Continent Region SurfaceArea IndepYear Population LifeExpectancy GNP GNPOld LocalName GovernmentForm HeadOfState Capital Code2
0 ABW Aruba North America Caribbean 193.0 NaN 103000 78.4 828.0 793.0 Aruba Nonmetropolitan Territory of The Netherlands Beatrix 129.0 AW
1 AFG Afghanistan Asia Southern and Central Asia 652090.0 1919.0 22720000 45.9 5976.0 NaN Afganistan/Afqanestan Islamic Emirate Mohammad Omar 1.0 AF
2 AGO Angola Africa Central Africa 1246700.0 1975.0 12878000 38.3 6648.0 7984.0 Angola Republic José Eduardo dos Santos 56.0 AO
3 AIA Anguilla North America Caribbean 96.0 NaN 8000 76.1 63.2 NaN Anguilla Dependent Territory of the UK Elisabeth II 62.0 AI
4 ALB Albania Europe Southern Europe 28748.0 1912.0 3401200 71.6 3205.0 2500.0 Shqipëria Republic Rexhep Mejdani 34.0 AL
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
234 YEM Yemen Asia Middle East 527968.0 1918.0 18112000 59.8 6041.0 5729.0 Al-Yaman Republic Ali Abdallah Salih 1780.0 YE
235 YUG Yugoslavia Europe Southern Europe 102173.0 1918.0 10640000 72.4 17000.0 NaN Jugoslavija Federal Republic Vojislav Koštunica 1792.0 YU
236 ZAF South Africa Africa Southern Africa 1221037.0 1910.0 40377000 51.1 116729.0 129092.0 South Africa Republic Thabo Mbeki 716.0 ZA
237 ZMB Zambia Africa Eastern Africa 752618.0 1964.0 9169000 37.2 3377.0 3922.0 Zambia Republic Frederick Chiluba 3162.0 ZM
238 ZWE Zimbabwe Africa Eastern Africa 390757.0 1980.0 11669000 37.8 5951.0 8670.0 Zimbabwe Republic Robert G. Mugabe 4068.0 ZW

239 rows × 15 columns

In [8]:
df2.sort_values(by='GNPOld', na_position='first', ascending=False)
Out[8]:
Code Name Continent Region SurfaceArea IndepYear Population LifeExpectancy GNP GNPOld LocalName GovernmentForm HeadOfState Capital Code2
1 AFG Afghanistan Asia Southern and Central Asia 652090.0 1919.0 22720000 45.9 5976.0 NaN Afganistan/Afqanestan Islamic Emirate Mohammad Omar 1.0 AF
3 AIA Anguilla North America Caribbean 96.0 NaN 8000 76.1 63.2 NaN Anguilla Dependent Territory of the UK Elisabeth II 62.0 AI
5 AND Andorra Europe Southern Europe 468.0 1278.0 78000 83.5 1630.0 NaN Andorra Parliamentary Coprincipality NaN 55.0 AD
6 ANT Netherlands Antilles North America Caribbean 800.0 NaN 217000 74.7 1941.0 NaN Nederlandse Antillen Nonmetropolitan Territory of The Netherlands Beatrix 33.0 AN
10 ASM American Samoa Oceania Polynesia 199.0 NaN 68000 75.1 334.0 NaN Amerika Samoa US Territory George W. Bush 54.0 AS
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
231 VUT Vanuatu Oceania Melanesia 12189.0 1980.0 190000 60.6 261.0 246.0 Vanuatu Republic John Bani 3537.0 VU
58 DMA Dominica North America Caribbean 751.0 1978.0 71000 73.4 256.0 243.0 Dominica Republic Vernon Shaw 586.0 DM
190 SLB Solomon Islands Oceania Melanesia 28896.0 1978.0 444000 71.3 182.0 220.0 Solomon Islands Constitutional Monarchy Elisabeth II 3161.0 SB
212 TON Tonga Oceania Polynesia 650.0 1970.0 99000 67.9 146.0 170.0 Tonga Monarchy Taufa'ahau Tupou IV 3334.0 TO
233 WSM Samoa Oceania Polynesia 2831.0 1962.0 180000 69.2 141.0 157.0 Samoa Parlementary Monarchy Malietoa Tanumafili II 3169.0 WS

239 rows × 15 columns

In [9]:
df
Out[9]:
ID Name CountryCode District Population
0 1 Kabul AFG Kabol 1780000
1 2 Qandahar AFG Qandahar 237500
2 3 Herat AFG Herat 186800
3 4 Mazar-e-Sharif AFG Balkh 127800
4 5 Amsterdam NLD Noord-Holland 731200
... ... ... ... ... ...
4074 4075 Khan Yunis PSE Khan Yunis 123175
4075 4076 Hebron PSE Hebron 119401
4076 4077 Jabaliya PSE North Gaza 113901
4077 4078 Nablus PSE Nablus 100231
4078 4079 Rafah PSE Rafah 92020

4079 rows × 5 columns

In [18]:
# df.sort_values(by='Population', ascending=False, ignore_index=True)
df.sort_values(by='Population', ascending=False).reset_index(drop=True)
Out[18]:
ID Name CountryCode District Population
0 1024 Mumbai (Bombay) IND Maharashtra 10500000
1 2331 Seoul KOR Seoul 9981619
2 206 São Paulo BRA São Paulo 9968485
3 1890 Shanghai CHN Shanghai 9696300
4 939 Jakarta IDN Jakarta Raya 9604900
... ... ... ... ... ...
4074 2316 Bantam CCK Home Island 503
4075 3538 Città del Vaticano VAT 455
4076 3333 Fakaofo TKL Fakaofo 300
4077 2317 West Island CCK West Island 167
4078 2912 Adamstown PCN 42

4079 rows × 5 columns

In [23]:
df.sort_values(by=['CountryCode', 'Name'], ascending=False).head(50)
Out[23]:
ID Name CountryCode District Population
4071 4072 Mutare ZWE Manicaland 131367
4070 4071 Mount Darwin ZWE Harare 164362
4067 4068 Harare ZWE Harare 1410000
4072 4073 Gweru ZWE Midlands 128037
4069 4070 Chitungwiza ZWE Harare 274912
4068 4069 Bulawayo ZWE Bulawayo 621742
3162 3163 Ndola ZMB Copperbelt 329200
3166 3167 Mufulira ZMB Copperbelt 123900
3161 3162 Lusaka ZMB Lusaka 1317000
3167 3168 Luanshya ZMB Copperbelt 118100
3163 3164 Kitwe ZMB Copperbelt 288600
3164 3165 Kabwe ZMB Central 154300
3165 3166 Chingola ZMB Copperbelt 142400
728 729 Wonderboom ZAF Gauteng 283289
743 744 Witbank ZAF Mpumalanga 167183
747 748 Westonaria ZAF Gauteng 159632
735 736 Welkom ZAF Free State 203296
727 728 Vereeniging ZAF Gauteng 328535
718 719 Vanderbijlpark ZAF Gauteng 468931
725 726 Umlazi ZAF KwaZulu-Natal 339233
737 738 Uitenhage ZAF Eastern Cape 192120
746 747 Springs ZAF Gauteng 162072
712 713 Soweto ZAF Gauteng 904165
732 733 Soshanguve ZAF Gauteng 242727
751 752 Rustenburg ZAF North West 97008
729 730 Roodepoort ZAF Gauteng 279340
748 749 Randfontein ZAF Gauteng 120838
724 725 Randburg ZAF Gauteng 341288
715 716 Pretoria ZAF Gauteng 658630
750 751 Potchefstroom ZAF North West 101817
714 715 Port Elizabeth ZAF Eastern Cape 752319
721 722 Pinetown ZAF KwaZulu-Natal 378810
722 723 Pietermaritzburg ZAF KwaZulu-Natal 370190
749 750 Paarl ZAF Western Cape 105768
744 745 Oberholzer ZAF Gauteng 164367
752 753 Nigel ZAF Gauteng 96734
733 734 Newcastle ZAF KwaZulu-Natal 222993
739 740 Mdantsane ZAF Eastern Cape 182639
754 755 Ladysmith ZAF KwaZulu-Natal 89292
740 741 Krugersdorp ZAF Gauteng 181503
731 732 Klerksdorp ZAF North West 261911
736 737 Kimberley ZAF Northern Cape 197254
719 720 Kempton Park ZAF Gauteng 442633
713 714 Johannesburg ZAF Gauteng 756653
716 717 Inanda ZAF KwaZulu-Natal 634065
745 746 Germiston ZAF Gauteng 164252
753 754 George ZAF Western Cape 93818
734 735 East London ZAF Eastern Cape 221047
717 718 Durban ZAF KwaZulu-Natal 566120
738 739 Chatsworth ZAF KwaZulu-Natal 189885
In [25]:
df.sort_values(by=['CountryCode', 'Population'], ascending=True).head(50)
Out[25]:
ID Name CountryCode District Population
128 129 Oranjestad ABW 29034
3 4 Mazar-e-Sharif AFG Balkh 127800
2 3 Herat AFG Herat 186800
1 2 Qandahar AFG Qandahar 237500
0 1 Kabul AFG Kabol 1780000
59 60 Namibe AGO Namibe 118200
58 59 Benguela AGO Benguela 128300
57 58 Lobito AGO Benguela 130000
56 57 Huambo AGO Huambo 163100
55 56 Luanda AGO Luanda 2022000
61 62 The Valley AIA 595
60 61 South Hill AIA 961
33 34 Tirana ALB Tirana 270000
54 55 Andorra la Vella AND Andorra la Vella 21189
32 33 Willemstad ANT Curaçao 2345
67 68 Ajman ARE Ajman 114395
66 67 al-Ayn ARE Abu Dhabi 225970
65 66 Sharja ARE Sharja 320095
64 65 Abu Dhabi ARE Abu Dhabi 398695
63 64 Dubai ARE Dubai 669181
124 125 Tandil ARG Buenos Aires 91101
123 124 San Rafael ARG Mendoza 94651
122 123 Ezeiza ARG Buenos Aires 99578
121 122 San Luis ARG San Luis 110136
120 121 Pilar ARG Buenos Aires 113428
119 120 Concordia ARG Entre Rios 116485
118 119 Escobar ARG Buenos Aires 116675
117 118 San Juan ARG San Juan 119152
116 117 San Nicolás de los Arroyos ARG Buenos Aires 119302
115 116 Mendoza ARG Mendoza 123027
114 115 Comodoro Rivadavia ARG Chubut 124104
113 114 Río Cuarto ARG Córdoba 134355
112 113 San Fernando del Valle de Cata ARG Catamarca 134935
111 112 La Rioja ARG La Rioja 138117
110 111 Las Heras ARG Mendoza 145823
109 110 Formosa ARG Formosa 147636
108 109 San Fernando ARG Buenos Aires 153036
107 108 Ituzaingó ARG Buenos Aires 158197
106 107 Neuquén ARG Neuquén 167296
105 106 Hurlingham ARG Buenos Aires 170028
104 105 San Salvador de Jujuy ARG Jujuy 178748
103 104 Santiago del Estero ARG Santiago del Estero 189947
102 103 Guaymallén ARG Mendoza 200595
101 102 Posadas ARG Misiones 201273
100 101 Godoy Cruz ARG Mendoza 206998
99 100 Paraná ARG Entre Rios 207041
98 99 José C. Paz ARG Buenos Aires 221754
97 98 Resistencia ARG Chaco 229212
96 97 Esteban Echeverría ARG Buenos Aires 235760
95 96 Bahía Blanca ARG Buenos Aires 239810
In [34]:
# df.groupby('CountryCode')[['Population']].max().sort_values('Population', ascending=False)
df.groupby('CountryCode')['Population'].max().to_frame().sort_values('Population', ascending=False).head(10)
Out[34]:
Population
CountryCode
IND 10500000
KOR 9981619
BRA 9968485
CHN 9696300
IDN 9604900
PAK 9269265
TUR 8787958
MEX 8591309
RUS 8389200
USA 8008278
In [ ]: