import pandas as pd
df = pd.read_csv('result.csv')
df
Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address | |
---|---|---|---|---|---|---|
0 | 176558 | USB-C Charging Cable | 2 | 11.95 | 04/19/19 08:46 | 917 1st St, Dallas, TX 75001 |
1 | 176559 | Bose SoundSport Headphones | 1 | 99.99 | 04/07/19 22:30 | 682 Chestnut St, Boston, MA 02215 |
2 | 176560 | Google Phone | 1 | 600 | 04/12/19 14:38 | 669 Spruce St, Los Angeles, CA 90001 |
3 | 176560 | Wired Headphones | 1 | 11.99 | 04/12/19 14:38 | 669 Spruce St, Los Angeles, CA 90001 |
4 | 176561 | Wired Headphones | 1 | 11.99 | 04/30/19 09:27 | 333 8th St, Los Angeles, CA 90001 |
... | ... | ... | ... | ... | ... | ... |
186300 | 259353 | AAA Batteries (4-pack) | 3 | 2.99 | 09/17/19 20:56 | 840 Highland St, Los Angeles, CA 90001 |
186301 | 259354 | iPhone | 1 | 700 | 09/01/19 16:00 | 216 Dogwood St, San Francisco, CA 94016 |
186302 | 259355 | iPhone | 1 | 700 | 09/23/19 07:39 | 220 12th St, San Francisco, CA 94016 |
186303 | 259356 | 34in Ultrawide Monitor | 1 | 379.99 | 09/19/19 17:30 | 511 Forest St, San Francisco, CA 94016 |
186304 | 259357 | USB-C Charging Cable | 1 | 11.95 | 09/30/19 00:18 | 250 Meadow St, San Francisco, CA 94016 |
186305 rows × 6 columns
df.info(memory_usage='deep')
<class 'pandas.core.frame.DataFrame'> RangeIndex: 186305 entries, 0 to 186304 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order ID 186305 non-null object 1 Product 186305 non-null object 2 Quantity Ordered 186305 non-null object 3 Price Each 186305 non-null object 4 Order Date 186305 non-null object 5 Purchase Address 186305 non-null object dtypes: object(6) memory usage: 75.5 MB
df.rename(columns={
'Order ID': 'Order_ID',
'Quantity Ordered': 'Quantity',
'Price Each': 'Price',
'Order Date': 'Order_Date',
'Purchase Address': 'Address'
}, inplace=True)
df
Order_ID | Product | Quantity | Price | Order_Date | Address | |
---|---|---|---|---|---|---|
0 | 176558 | USB-C Charging Cable | 2 | 11.95 | 04/19/19 08:46 | 917 1st St, Dallas, TX 75001 |
1 | 176559 | Bose SoundSport Headphones | 1 | 99.99 | 04/07/19 22:30 | 682 Chestnut St, Boston, MA 02215 |
2 | 176560 | Google Phone | 1 | 600 | 04/12/19 14:38 | 669 Spruce St, Los Angeles, CA 90001 |
3 | 176560 | Wired Headphones | 1 | 11.99 | 04/12/19 14:38 | 669 Spruce St, Los Angeles, CA 90001 |
4 | 176561 | Wired Headphones | 1 | 11.99 | 04/30/19 09:27 | 333 8th St, Los Angeles, CA 90001 |
... | ... | ... | ... | ... | ... | ... |
186300 | 259353 | AAA Batteries (4-pack) | 3 | 2.99 | 09/17/19 20:56 | 840 Highland St, Los Angeles, CA 90001 |
186301 | 259354 | iPhone | 1 | 700 | 09/01/19 16:00 | 216 Dogwood St, San Francisco, CA 94016 |
186302 | 259355 | iPhone | 1 | 700 | 09/23/19 07:39 | 220 12th St, San Francisco, CA 94016 |
186303 | 259356 | 34in Ultrawide Monitor | 1 | 379.99 | 09/19/19 17:30 | 511 Forest St, San Francisco, CA 94016 |
186304 | 259357 | USB-C Charging Cable | 1 | 11.95 | 09/30/19 00:18 | 250 Meadow St, San Francisco, CA 94016 |
186305 rows × 6 columns
# df.Quantity.astype('int')
df[ df.Quantity.str.contains('Quantity Ordered') ]
Order_ID | Product | Quantity | Price | Order_Date | Address | |
---|---|---|---|---|---|---|
517 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
1146 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
1152 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
2869 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
2884 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
... | ... | ... | ... | ... | ... | ... |
184624 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
185009 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
186019 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
186088 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
186194 | Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address |
355 rows × 6 columns
df.drop( df[ df.Quantity.str.contains('Quantity Ordered') ].index, inplace=True )
df
Order_ID | Product | Quantity | Price | Order_Date | Address | |
---|---|---|---|---|---|---|
0 | 176558 | USB-C Charging Cable | 2 | 11.95 | 04/19/19 08:46 | 917 1st St, Dallas, TX 75001 |
1 | 176559 | Bose SoundSport Headphones | 1 | 99.99 | 04/07/19 22:30 | 682 Chestnut St, Boston, MA 02215 |
2 | 176560 | Google Phone | 1 | 600 | 04/12/19 14:38 | 669 Spruce St, Los Angeles, CA 90001 |
3 | 176560 | Wired Headphones | 1 | 11.99 | 04/12/19 14:38 | 669 Spruce St, Los Angeles, CA 90001 |
4 | 176561 | Wired Headphones | 1 | 11.99 | 04/30/19 09:27 | 333 8th St, Los Angeles, CA 90001 |
... | ... | ... | ... | ... | ... | ... |
186300 | 259353 | AAA Batteries (4-pack) | 3 | 2.99 | 09/17/19 20:56 | 840 Highland St, Los Angeles, CA 90001 |
186301 | 259354 | iPhone | 1 | 700 | 09/01/19 16:00 | 216 Dogwood St, San Francisco, CA 94016 |
186302 | 259355 | iPhone | 1 | 700 | 09/23/19 07:39 | 220 12th St, San Francisco, CA 94016 |
186303 | 259356 | 34in Ultrawide Monitor | 1 | 379.99 | 09/19/19 17:30 | 511 Forest St, San Francisco, CA 94016 |
186304 | 259357 | USB-C Charging Cable | 1 | 11.95 | 09/30/19 00:18 | 250 Meadow St, San Francisco, CA 94016 |
185950 rows × 6 columns
df['Quantity'] = df.Quantity.astype('int')
df['Price'] = df.Price.astype('float')
df.info(memory_usage='deep')
<class 'pandas.core.frame.DataFrame'> Int64Index: 185950 entries, 0 to 186304 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order_ID 185950 non-null object 1 Product 185950 non-null object 2 Quantity 185950 non-null int32 3 Price 185950 non-null float64 4 Order_Date 185950 non-null object 5 Address 185950 non-null object dtypes: float64(1), int32(1), object(4) memory usage: 57.0 MB
df.to_csv('result.csv', index=False)