In [1]:
'string'.upper()
Out[1]:
'STRING'
In [2]:
'STRING'.lower()
Out[2]:
'string'
In [3]:
import pandas as pd
In [4]:
df = pd.read_csv('city.csv', sep=';')
df
Out[4]:
ID Name CountryCode District Population
0 1 Kabul AFG Kabol 1780000
1 2 Qandahar AFG Qandahar 237500
2 3 Herat AFG Herat 186800
3 4 Mazar-e-Sharif AFG Balkh 127800
4 5 Amsterdam NLD Noord-Holland 731200
... ... ... ... ... ...
4074 4075 Khan Yunis PSE Khan Yunis 123175
4075 4076 Hebron PSE Hebron 119401
4076 4077 Jabaliya PSE North Gaza 113901
4077 4078 Nablus PSE Nablus 100231
4078 4079 Rafah PSE Rafah 92020

4079 rows × 5 columns

In [8]:
df[ df.Name.str.contains('ka', case=False) ]
Out[8]:
ID Name CountryCode District Population
0 1 Kabul AFG Kabol 1780000
149 150 Dhaka BGD Dhaka 3612850
201 202 Banja Luka BIH Republika Srpska 143079
622 623 Kafr al-Dawwar EGY al-Buhayra 231978
629 630 Shibin al-Kawm EGY al-Minufiya 159909
... ... ... ... ... ...
3732 3733 Solikamsk RUS Perm 106000
3827 3828 Kansas City USA Missouri 441545
3888 3889 Spokane USA Washington 195629
3935 3936 Kansas City USA Kansas 146866
3971 3972 Topeka USA Kansas 122377

211 rows × 5 columns

In [11]:
df[ df.Name.str.contains('^ka', case=False, regex=True) ]
Out[11]:
ID Name CountryCode District Population
0 1 Kabul AFG Kabol 1780000
622 623 Kafr al-Dawwar EGY al-Buhayra 231978
633 634 Kafr al-Shaykh EGY Kafr al-Shaykh 124819
766 767 Kalookan PHL National Capital Reg 1177604
823 824 Kabankalan PHL Western Visayas 149769
... ... ... ... ... ...
3674 3675 Kamensk-Uralski RUS Sverdlovsk 190600
3710 3711 Kamyšin RUS Volgograd 124600
3730 3731 Kansk RUS Krasnojarsk 107400
3827 3828 Kansas City USA Missouri 441545
3935 3936 Kansas City USA Kansas 146866

89 rows × 5 columns

In [17]:
import re
df[ df.Name.str.contains('^ka[a-zš]+$', flags=re.IGNORECASE, regex=True) ]
Out[17]:
ID Name CountryCode District Population
0 1 Kabul AFG Kabol 1780000
766 767 Kalookan PHL National Capital Reg 1177604
823 824 Kabankalan PHL Western Visayas 149769
980 981 Karawang IDN West Java 145000
1030 1031 Kanpur IND Uttar Pradesh 1874409
... ... ... ... ... ...
3586 3587 Kazan RUS Tatarstan 1101000
3622 3623 Kaliningrad RUS Kaliningrad 424400
3633 3634 Kaluga RUS Kaluga 339300
3710 3711 Kamyšin RUS Volgograd 124600
3730 3731 Kansk RUS Krasnojarsk 107400

79 rows × 5 columns

In [21]:
df[ df.Name.str.contains('^(kab|kar)[a-zš]+$', case=False, regex=True) ]
Out[21]:
ID Name CountryCode District Population
0 1 Kabul AFG Kabol 1780000
823 824 Kabankalan PHL Western Visayas 149769
980 981 Karawang IDN West Java 145000
1178 1179 Karnal IND Haryana 173751
1209 1210 Karimnagar IND Andhra Pradesh 148583
1371 1372 Karbala IRQ Karbala 296705
1384 1385 Karaj IRN Teheran 940968
1696 1697 Kariya JPN Aichi 127969
2821 2822 Karachi PAK Sindh 9269265
3088 3089 Karlsruhe DEU Baden-Württemberg 277204
3164 3165 Kabwe ZMB Central 154300
3407 3408 Karaman TUR Karaman 104200
3415 3416 Kars TUR Kars 93000
3507 3508 Karsi UZB Qashqadaryo 194100
In [20]:
import warnings
warnings.filterwarnings('ignore')
In [ ]:
 
In [ ]:
 
In [25]:
df2 = pd.read_csv('price.csv', sep=';')
df2
Out[25]:
id title price qty
0 NaN NaN NaN NaN
1 1.0 Ноутбук Acer Aspire 5 A515-54G-502N (NX.HVGEU.006) Pure Silver 10.0 5.0
2 2.0 Ноутбук Asus ROG Strix G15 G512LI-HN057 (90NR0381-M01640) Black NaN NaN
3 3.0 Ноут HP Pavilion Gaming 15-bc504ur (7DT87EA) Black Суперцена!!! 7.0 NaN
4 4.0 Ноутбук HP Pavilion Notebook 15-cw1011ua (8RW14EA) Mineral Silver 25.0 2.0
5 5.0 Ноутбук Acer Aspire 7 A715-41G-R7MZ (NH.Q8LEU.004) Charcoal Black 35.0 3.0
6 6.0 Ноутбук Dell Inspiron 3582 (I3582C54H5NIL-BK) Black 5.0 NaN
7 7.0 Ноутбук Apple MacBook Air 13" 256GB 2020 Space Gray (MWTJ2) 11.0 1.0
8 8.0 Ноут Asus ROG Strix G15 G512LI-HN094 (90NR0381-M01620) Black 16.0 10.0
9 9.0 Ноутбук HP Pavilion Notebook 15-cw1002ua (7KE54EA) Mineral Silver Суперцена!!! 15.0 1.0
10 10.0 Ноутбук HP Pavilion Notebook 15-cw1005ua (7ZF75EA) Mineral Silver Суперцена!!! NaN NaN
11 NaN NaN NaN NaN
12 12.0 Ноутбук Lenovo IdeaPad L340-15IRH Gaming (81LK01HCRA) Granite Black 10.0 1.0
In [23]:
pd.set_option('max_colwidth', 100)
In [28]:
df2[ df2.title.str.contains('^ноут ', case=False, na=False) ]
Out[28]:
id title price qty
3 3.0 Ноут HP Pavilion Gaming 15-bc504ur (7DT87EA) Black Суперцена!!! 7.0 NaN
8 8.0 Ноут Asus ROG Strix G15 G512LI-HN094 (90NR0381-M01620) Black 16.0 10.0
In [52]:
# df2.title = df2.title.str.lower()
# df2.title = df2.title.str.upper()
# df2.title = df2.title.str.capitalize()
# df2.title = df2.title.str.title()
# df2.title = df2.title.str.swapcase()
df2.title = df2.title.str.capitalize()
# df2.title = df2.title.str.casefold()
# df2.title = df2.title.str.upper()
In [53]:
df2
Out[53]:
id title price qty
0 NaN NaN NaN NaN
1 1.0 Ноутбук acer aspire 5 a515-54g-502n (nx.hvgeu.006) pure silver 10.0 5.0
2 2.0 Ноутбук asus rog strix g15 g512li-hn057 (90nr0381-m01640) black NaN NaN
3 3.0 Ноут hp pavilion gaming 15-bc504ur (7dt87ea) black суперцена!!! 7.0 NaN
4 4.0 Ноутбук hp pavilion notebook 15-cw1011ua (8rw14ea) mineral silver 25.0 2.0
5 5.0 Ноутбук acer aspire 7 a715-41g-r7mz (nh.q8leu.004) charcoal black 35.0 3.0
6 6.0 Ноутбук dell inspiron 3582 (i3582c54h5nil-bk) black 5.0 NaN
7 7.0 Ноутбук apple macbook air 13" 256gb 2020 space gray (mwtj2) 11.0 1.0
8 8.0 Ноут asus rog strix g15 g512li-hn094 (90nr0381-m01620) black 16.0 10.0
9 9.0 Ноутбук hp pavilion notebook 15-cw1002ua (7ke54ea) mineral silver суперцена!!! 15.0 1.0
10 10.0 Ноутбук hp pavilion notebook 15-cw1005ua (7zf75ea) mineral silver суперцена!!! NaN NaN
11 NaN NaN NaN NaN
12 12.0 Ноутбук lenovo ideapad l340-15irh gaming (81lk01hcra) granite black 10.0 1.0
In [55]:
df2.title.str.count('н', flags=re.IGNORECASE)
Out[55]:
0     NaN
1     1.0
2     1.0
3     2.0
4     1.0
5     1.0
6     1.0
7     1.0
8     1.0
9     2.0
10    2.0
11    NaN
12    1.0
Name: title, dtype: float64
In [56]:
df2[df2.title.str.count('н', flags=re.IGNORECASE) > 1]
Out[56]:
id title price qty
3 3.0 Ноут hp pavilion gaming 15-bc504ur (7dt87ea) black суперцена!!! 7.0 NaN
9 9.0 Ноутбук hp pavilion notebook 15-cw1002ua (7ke54ea) mineral silver суперцена!!! 15.0 1.0
10 10.0 Ноутбук hp pavilion notebook 15-cw1005ua (7zf75ea) mineral silver суперцена!!! NaN NaN
In [58]:
df2[df2.title.str.contains('суперцена!!!', na=False)]
Out[58]:
id title price qty
3 3.0 Ноут hp pavilion gaming 15-bc504ur (7dt87ea) black суперцена!!! 7.0 NaN
9 9.0 Ноутбук hp pavilion notebook 15-cw1002ua (7ke54ea) mineral silver суперцена!!! 15.0 1.0
10 10.0 Ноутбук hp pavilion notebook 15-cw1005ua (7zf75ea) mineral silver суперцена!!! NaN NaN
In [59]:
df2[df2.title.str.endswith('суперцена!!!', na=False)]
Out[59]:
id title price qty
3 3.0 Ноут hp pavilion gaming 15-bc504ur (7dt87ea) black суперцена!!! 7.0 NaN
9 9.0 Ноутбук hp pavilion notebook 15-cw1002ua (7ke54ea) mineral silver суперцена!!! 15.0 1.0
10 10.0 Ноутбук hp pavilion notebook 15-cw1005ua (7zf75ea) mineral silver суперцена!!! NaN NaN
In [60]:
df2.title.str.replace(' ', '_')
Out[60]:
0                                                                                NaN
1                     Ноутбук_acer_aspire_5_a515-54g-502n_(nx.hvgeu.006)_pure_silver
2                    Ноутбук_asus_rog_strix_g15_g512li-hn057_(90nr0381-m01640)_black
3                    Ноут_hp_pavilion_gaming_15-bc504ur_(7dt87ea)_black_суперцена!!!
4                  Ноутбук_hp_pavilion_notebook_15-cw1011ua_(8rw14ea)_mineral_silver
5                  Ноутбук_acer_aspire_7_a715-41g-r7mz_(nh.q8leu.004)_charcoal_black
6                                Ноутбук_dell_inspiron_3582_(i3582c54h5nil-bk)_black
7                        Ноутбук_apple_macbook_air_13"_256gb_2020_space_gray_(mwtj2)
8                       Ноут_asus_rog_strix_g15_g512li-hn094_(90nr0381-m01620)_black
9     Ноутбук_hp_pavilion_notebook_15-cw1002ua_(7ke54ea)_mineral_silver_суперцена!!!
10    Ноутбук_hp_pavilion_notebook_15-cw1005ua_(7zf75ea)_mineral_silver_суперцена!!!
11                                                                               NaN
12               Ноутбук_lenovo_ideapad_l340-15irh_gaming_(81lk01hcra)_granite_black
Name: title, dtype: object
In [65]:
df3 = pd.read_csv('price.csv', sep=';')
df3
Out[65]:
id Название товара price qty
0 NaN NaN NaN NaN
1 1.0 Ноутбук Acer Aspire 5 A515-54G-502N (NX.HVGEU.006) Pure Silver 10.0 5.0
2 2.0 Ноутбук Asus ROG Strix G15 G512LI-HN057 (90NR0381-M01640) Black NaN NaN
3 3.0 Ноут HP Pavilion Gaming 15-bc504ur (7DT87EA) Black Суперцена!!! 7.0 NaN
4 4.0 Ноутбук HP Pavilion Notebook 15-cw1011ua (8RW14EA) Mineral Silver 25.0 2.0
5 5.0 Ноутбук Acer Aspire 7 A715-41G-R7MZ (NH.Q8LEU.004) Charcoal Black 35.0 3.0
6 6.0 Ноутбук Dell Inspiron 3582 (I3582C54H5NIL-BK) Black 5.0 NaN
7 7.0 Ноутбук Apple MacBook Air 13" 256GB 2020 Space Gray (MWTJ2) 11.0 1.0
8 8.0 Ноут Asus ROG Strix G15 G512LI-HN094 (90NR0381-M01620) Black 16.0 10.0
9 9.0 Ноутбук HP Pavilion Notebook 15-cw1002ua (7KE54EA) Mineral Silver Суперцена!!! 15.0 1.0
10 10.0 Ноутбук HP Pavilion Notebook 15-cw1005ua (7ZF75EA) Mineral Silver Суперцена!!! NaN NaN
11 NaN NaN NaN NaN
12 12.0 Ноутбук Lenovo IdeaPad L340-15IRH Gaming (81LK01HCRA) Granite Black 10.0 1.0
In [66]:
df3.columns = df3.columns.str.replace(' ', '_').str.lower()
In [67]:
df3
Out[67]:
id название_товара price qty
0 NaN NaN NaN NaN
1 1.0 Ноутбук Acer Aspire 5 A515-54G-502N (NX.HVGEU.006) Pure Silver 10.0 5.0
2 2.0 Ноутбук Asus ROG Strix G15 G512LI-HN057 (90NR0381-M01640) Black NaN NaN
3 3.0 Ноут HP Pavilion Gaming 15-bc504ur (7DT87EA) Black Суперцена!!! 7.0 NaN
4 4.0 Ноутбук HP Pavilion Notebook 15-cw1011ua (8RW14EA) Mineral Silver 25.0 2.0
5 5.0 Ноутбук Acer Aspire 7 A715-41G-R7MZ (NH.Q8LEU.004) Charcoal Black 35.0 3.0
6 6.0 Ноутбук Dell Inspiron 3582 (I3582C54H5NIL-BK) Black 5.0 NaN
7 7.0 Ноутбук Apple MacBook Air 13" 256GB 2020 Space Gray (MWTJ2) 11.0 1.0
8 8.0 Ноут Asus ROG Strix G15 G512LI-HN094 (90NR0381-M01620) Black 16.0 10.0
9 9.0 Ноутбук HP Pavilion Notebook 15-cw1002ua (7KE54EA) Mineral Silver Суперцена!!! 15.0 1.0
10 10.0 Ноутбук HP Pavilion Notebook 15-cw1005ua (7ZF75EA) Mineral Silver Суперцена!!! NaN NaN
11 NaN NaN NaN NaN
12 12.0 Ноутбук Lenovo IdeaPad L340-15IRH Gaming (81LK01HCRA) Granite Black 10.0 1.0
In [ ]: