In [1]:
import pandas as pd
In [2]:
cities = pd.read_csv('city.csv', sep=';')
cities
Out[2]:
ID Name CountryCode District Population
0 1 Kabul AFG Kabol 1780000
1 2 Qandahar AFG Qandahar 237500
2 3 Herat AFG Herat 186800
3 4 Mazar-e-Sharif AFG Balkh 127800
4 5 Amsterdam NLD Noord-Holland 731200
... ... ... ... ... ...
4074 4075 Khan Yunis PSE Khan Yunis 123175
4075 4076 Hebron PSE Hebron 119401
4076 4077 Jabaliya PSE North Gaza 113901
4077 4078 Nablus PSE Nablus 100231
4078 4079 Rafah PSE Rafah 92020

4079 rows × 5 columns

In [8]:
cities.pivot_table(index='CountryCode', values='Population', aggfunc='sum')
Out[8]:
Population
CountryCode
ABW 29034
AFG 2332100
AGO 2561600
AIA 1556
ALB 270000
... ...
YEM 1743700
YUG 2189507
ZAF 15196370
ZMB 2473500
ZWE 2730420

232 rows × 1 columns

In [9]:
df = pd.read_csv('ratings.csv')
In [10]:
df
Out[10]:
userId movieId rating timestamp
0 1 1 4.0 964982703
1 1 3 4.0 964981247
2 1 6 4.0 964982224
3 1 47 5.0 964983815
4 1 50 5.0 964982931
... ... ... ... ...
100831 610 166534 4.0 1493848402
100832 610 168248 5.0 1493850091
100833 610 168250 5.0 1494273047
100834 610 168252 5.0 1493846352
100835 610 170875 3.0 1493846415

100836 rows × 4 columns

In [16]:
df[ df.userId == 1 ].rating.value_counts()
Out[16]:
5.0    124
4.0     76
3.0     26
2.0      5
1.0      1
Name: rating, dtype: int64
In [18]:
df.groupby('userId')['rating'].value_counts().head(20)
Out[18]:
userId  rating
1       5.0       124
        4.0        76
        3.0        26
        2.0         5
        1.0         1
2       4.0         9
        5.0         6
        3.0         4
        3.5         4
        4.5         4
        2.0         1
        2.5         1
3       0.5        20
        5.0        10
        4.5         5
        2.0         1
        3.0         1
        3.5         1
        4.0         1
4       4.0        64
Name: rating, dtype: int64
In [20]:
df.pivot_table(values='movieId', index='userId', columns='rating', aggfunc='count')
Out[20]:
rating 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
userId
1 NaN 1.0 NaN 5.0 NaN 26.0 NaN 76.0 NaN 124.0
2 NaN NaN NaN 1.0 1.0 4.0 4.0 9.0 4.0 6.0
3 20.0 NaN NaN 1.0 NaN 1.0 1.0 1.0 5.0 10.0
4 NaN 23.0 NaN 26.0 NaN 39.0 NaN 64.0 NaN 64.0
5 NaN 1.0 NaN 3.0 NaN 17.0 NaN 13.0 NaN 10.0
... ... ... ... ... ... ... ... ... ... ...
606 4.0 6.0 13.0 22.0 79.0 137.0 241.0 441.0 139.0 33.0
607 NaN 3.0 NaN 10.0 NaN 63.0 NaN 59.0 NaN 52.0
608 33.0 29.0 30.0 69.0 95.0 169.0 126.0 162.0 95.0 23.0
609 NaN NaN NaN NaN NaN 27.0 NaN 10.0 NaN NaN
610 3.0 13.0 11.0 42.0 74.0 230.0 315.0 286.0 148.0 180.0

610 rows × 10 columns

In [21]:
df.pivot_table(values='movieId', index='userId', columns='rating', aggfunc='count', fill_value=0)
Out[21]:
rating 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
userId
1 0 1 0 5 0 26 0 76 0 124
2 0 0 0 1 1 4 4 9 4 6
3 20 0 0 1 0 1 1 1 5 10
4 0 23 0 26 0 39 0 64 0 64
5 0 1 0 3 0 17 0 13 0 10
... ... ... ... ... ... ... ... ... ... ...
606 4 6 13 22 79 137 241 441 139 33
607 0 3 0 10 0 63 0 59 0 52
608 33 29 30 69 95 169 126 162 95 23
609 0 0 0 0 0 27 0 10 0 0
610 3 13 11 42 74 230 315 286 148 180

610 rows × 10 columns

In [22]:
df.pivot_table(values='movieId', index='userId', columns='rating', aggfunc='count', fill_value=0, margins=True)
Out[22]:
rating 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 All
userId
1 0 1 0 5 0 26 0 76 0 124 232
2 0 0 0 1 1 4 4 9 4 6 29
3 20 0 0 1 0 1 1 1 5 10 39
4 0 23 0 26 0 39 0 64 0 64 216
5 0 1 0 3 0 17 0 13 0 10 44
... ... ... ... ... ... ... ... ... ... ... ...
607 0 3 0 10 0 63 0 59 0 52 187
608 33 29 30 69 95 169 126 162 95 23 831
609 0 0 0 0 0 27 0 10 0 0 37
610 3 13 11 42 74 230 315 286 148 180 1302
All 1370 2811 1791 7551 5550 20047 13136 26818 8551 13211 100836

611 rows × 11 columns

In [23]:
data = df.pivot_table(values='movieId', index='userId', columns='rating', aggfunc='count', fill_value=0, margins=True)
In [27]:
data.sort_values(by=5, ascending=False)
Out[27]:
rating 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 All
userId
All 1370 2811 1791 7551 5550 20047 13136 26818 8551 13211 100836
380 2 16 0 94 13 399 25 391 4 274 1218
414 1 40 20 398 122 658 232 903 76 248 2698
275 0 10 0 31 0 62 0 111 0 189 403
305 4 3 3 14 43 103 89 161 75 182 677
... ... ... ... ... ... ... ... ... ... ... ...
132 1 2 12 55 30 114 64 55 14 0 347
311 5 3 2 1 3 8 2 3 1 0 28
478 1 0 2 5 3 2 4 3 2 0 22
293 0 0 0 11 0 7 0 3 0 0 21
133 0 1 0 5 0 22 0 7 0 0 35

611 rows × 11 columns

In [29]:
data.sort_values(by=0.5, ascending=False)
Out[29]:
rating 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 All
userId
All 1370 2811 1791 7551 5550 20047 13136 26818 8551 13211 100836
298 144 41 57 123 163 239 87 76 6 3 939
567 56 38 54 48 53 54 41 16 11 14 385
307 56 57 85 158 136 184 112 126 45 16 975
517 54 59 33 45 42 60 28 41 16 22 400
... ... ... ... ... ... ... ... ... ... ... ...
235 0 1 0 5 0 21 0 27 0 11 65
234 0 7 0 29 0 61 0 65 0 40 202
233 0 2 3 9 14 38 42 33 5 4 150
231 0 0 0 3 0 3 1 11 0 6 24
306 0 0 0 11 9 36 22 27 2 5 112

611 rows × 11 columns

In [ ]:
 
In [30]:
sales = pd.read_excel('sales-funnel.xlsx')
sales
Out[30]:
Account Name Rep Manager Product Quantity Price Status
0 714466 Trantow-Barrows Craig Booker Debra Henley CPU 1 30000 presented
1 714466 Trantow-Barrows Craig Booker Debra Henley Software 1 10000 presented
2 714466 Trantow-Barrows Craig Booker Debra Henley Maintenance 2 5000 pending
3 737550 Fritsch, Russel and Anderson Craig Booker Debra Henley CPU 1 35000 declined
4 146832 Kiehn-Spinka Daniel Hilton Debra Henley CPU 2 65000 won
5 218895 Kulas Inc Daniel Hilton Debra Henley CPU 2 40000 pending
6 218895 Kulas Inc Daniel Hilton Debra Henley Software 1 10000 presented
7 412290 Jerde-Hilpert John Smith Debra Henley Maintenance 2 5000 pending
8 740150 Barton LLC John Smith Debra Henley CPU 1 35000 declined
9 141962 Herman LLC Cedric Moss Fred Anderson CPU 2 65000 won
10 163416 Purdy-Kunde Cedric Moss Fred Anderson CPU 1 30000 presented
11 239344 Stokes LLC Cedric Moss Fred Anderson Maintenance 1 5000 pending
12 239344 Stokes LLC Cedric Moss Fred Anderson Software 1 10000 presented
13 307599 Kassulke, Ondricka and Metz Wendy Yule Fred Anderson Maintenance 3 7000 won
14 688981 Keeling LLC Wendy Yule Fred Anderson CPU 5 100000 won
15 729833 Koepp Ltd Wendy Yule Fred Anderson CPU 2 65000 declined
16 729833 Koepp Ltd Wendy Yule Fred Anderson Monitor 2 5000 presented
In [31]:
sales.pivot_table(index='Name')
Out[31]:
Account Price Quantity
Name
Barton LLC 740150 35000 1.000000
Fritsch, Russel and Anderson 737550 35000 1.000000
Herman LLC 141962 65000 2.000000
Jerde-Hilpert 412290 5000 2.000000
Kassulke, Ondricka and Metz 307599 7000 3.000000
Keeling LLC 688981 100000 5.000000
Kiehn-Spinka 146832 65000 2.000000
Koepp Ltd 729833 35000 2.000000
Kulas Inc 218895 25000 1.500000
Purdy-Kunde 163416 30000 1.000000
Stokes LLC 239344 7500 1.000000
Trantow-Barrows 714466 15000 1.333333
In [32]:
sales.pivot_table(index=['Manager', 'Name'])
Out[32]:
Account Price Quantity
Manager Name
Debra Henley Barton LLC 740150 35000 1.000000
Fritsch, Russel and Anderson 737550 35000 1.000000
Jerde-Hilpert 412290 5000 2.000000
Kiehn-Spinka 146832 65000 2.000000
Kulas Inc 218895 25000 1.500000
Trantow-Barrows 714466 15000 1.333333
Fred Anderson Herman LLC 141962 65000 2.000000
Kassulke, Ondricka and Metz 307599 7000 3.000000
Keeling LLC 688981 100000 5.000000
Koepp Ltd 729833 35000 2.000000
Purdy-Kunde 163416 30000 1.000000
Stokes LLC 239344 7500 1.000000
In [33]:
sales.pivot_table(index=['Manager'], aggfunc='sum')
Out[33]:
Account Price Quantity
Manager
Debra Henley 4618010 235000 13
Fred Anderson 3240312 287000 17
In [34]:
sales.groupby('Manager').sum()
Out[34]:
Account Quantity Price
Manager
Debra Henley 4618010 13 235000
Fred Anderson 3240312 17 287000
In [ ]:
 
In [37]:
sales.pivot_table(index='Manager', columns='Name', values='Price', aggfunc='sum', fill_value=0, margins=True)
Out[37]:
Name Barton LLC Fritsch, Russel and Anderson Herman LLC Jerde-Hilpert Kassulke, Ondricka and Metz Keeling LLC Kiehn-Spinka Koepp Ltd Kulas Inc Purdy-Kunde Stokes LLC Trantow-Barrows All
Manager
Debra Henley 35000 35000 0 5000 0 0 65000 0 50000 0 0 45000 235000
Fred Anderson 0 0 65000 0 7000 100000 0 70000 0 30000 15000 0 287000
All 35000 35000 65000 5000 7000 100000 65000 70000 50000 30000 15000 45000 522000
In [38]:
sales.pivot_table(index='Name', columns='Manager', values='Price', aggfunc='sum', fill_value=0, margins=True)
Out[38]:
Manager Debra Henley Fred Anderson All
Name
Barton LLC 35000 0 35000
Fritsch, Russel and Anderson 35000 0 35000
Herman LLC 0 65000 65000
Jerde-Hilpert 5000 0 5000
Kassulke, Ondricka and Metz 0 7000 7000
Keeling LLC 0 100000 100000
Kiehn-Spinka 65000 0 65000
Koepp Ltd 0 70000 70000
Kulas Inc 50000 0 50000
Purdy-Kunde 0 30000 30000
Stokes LLC 0 15000 15000
Trantow-Barrows 45000 0 45000
All 235000 287000 522000
In [39]:
sales.pivot_table(index='Name', columns=['Manager', 'Quantity'], values='Price', aggfunc='sum', fill_value=0, margins=True)
Out[39]:
Manager Debra Henley Fred Anderson All
Quantity 1 2 1 2 3 5
Name
Barton LLC 35000 0 0 0 0 0 35000
Fritsch, Russel and Anderson 35000 0 0 0 0 0 35000
Herman LLC 0 0 0 65000 0 0 65000
Jerde-Hilpert 0 5000 0 0 0 0 5000
Kassulke, Ondricka and Metz 0 0 0 0 7000 0 7000
Keeling LLC 0 0 0 0 0 100000 100000
Kiehn-Spinka 0 65000 0 0 0 0 65000
Koepp Ltd 0 0 0 70000 0 0 70000
Kulas Inc 10000 40000 0 0 0 0 50000
Purdy-Kunde 0 0 30000 0 0 0 30000
Stokes LLC 0 0 15000 0 0 0 15000
Trantow-Barrows 40000 5000 0 0 0 0 45000
All 120000 115000 45000 135000 7000 100000 522000
In [41]:
sales.pivot_table(index='Name', columns='Manager', values='Price', aggfunc=['sum', 'count'], fill_value=0, margins=True)
Out[41]:
sum count
Manager Debra Henley Fred Anderson All Debra Henley Fred Anderson All
Name
Barton LLC 35000 0 35000 1 0 1
Fritsch, Russel and Anderson 35000 0 35000 1 0 1
Herman LLC 0 65000 65000 0 1 1
Jerde-Hilpert 5000 0 5000 1 0 1
Kassulke, Ondricka and Metz 0 7000 7000 0 1 1
Keeling LLC 0 100000 100000 0 1 1
Kiehn-Spinka 65000 0 65000 1 0 1
Koepp Ltd 0 70000 70000 0 2 2
Kulas Inc 50000 0 50000 2 0 2
Purdy-Kunde 0 30000 30000 0 1 1
Stokes LLC 0 15000 15000 0 2 2
Trantow-Barrows 45000 0 45000 3 0 3
All 235000 287000 522000 9 8 17
In [ ]: