In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('result.csv')
df
Out[2]:
Order ID Product Quantity Ordered Price Each Order Date Purchase Address
0 176558 USB-C Charging Cable 2 11.95 04/19/19 08:46 917 1st St, Dallas, TX 75001
1 176559 Bose SoundSport Headphones 1 99.99 04/07/19 22:30 682 Chestnut St, Boston, MA 02215
2 176560 Google Phone 1 600 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001
3 176560 Wired Headphones 1 11.99 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001
4 176561 Wired Headphones 1 11.99 04/30/19 09:27 333 8th St, Los Angeles, CA 90001
... ... ... ... ... ... ...
186300 259353 AAA Batteries (4-pack) 3 2.99 09/17/19 20:56 840 Highland St, Los Angeles, CA 90001
186301 259354 iPhone 1 700 09/01/19 16:00 216 Dogwood St, San Francisco, CA 94016
186302 259355 iPhone 1 700 09/23/19 07:39 220 12th St, San Francisco, CA 94016
186303 259356 34in Ultrawide Monitor 1 379.99 09/19/19 17:30 511 Forest St, San Francisco, CA 94016
186304 259357 USB-C Charging Cable 1 11.95 09/30/19 00:18 250 Meadow St, San Francisco, CA 94016

186305 rows × 6 columns

In [3]:
df.info(memory_usage='deep')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 186305 entries, 0 to 186304
Data columns (total 6 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   Order ID          186305 non-null  object
 1   Product           186305 non-null  object
 2   Quantity Ordered  186305 non-null  object
 3   Price Each        186305 non-null  object
 4   Order Date        186305 non-null  object
 5   Purchase Address  186305 non-null  object
dtypes: object(6)
memory usage: 75.5 MB
In [4]:
df.rename(columns={
    'Order ID': 'Order_ID',
    'Quantity Ordered': 'Quantity',
    'Price Each': 'Price',
    'Order Date': 'Order_Date',
    'Purchase Address': 'Address'
}, inplace=True)
df
Out[4]:
Order_ID Product Quantity Price Order_Date Address
0 176558 USB-C Charging Cable 2 11.95 04/19/19 08:46 917 1st St, Dallas, TX 75001
1 176559 Bose SoundSport Headphones 1 99.99 04/07/19 22:30 682 Chestnut St, Boston, MA 02215
2 176560 Google Phone 1 600 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001
3 176560 Wired Headphones 1 11.99 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001
4 176561 Wired Headphones 1 11.99 04/30/19 09:27 333 8th St, Los Angeles, CA 90001
... ... ... ... ... ... ...
186300 259353 AAA Batteries (4-pack) 3 2.99 09/17/19 20:56 840 Highland St, Los Angeles, CA 90001
186301 259354 iPhone 1 700 09/01/19 16:00 216 Dogwood St, San Francisco, CA 94016
186302 259355 iPhone 1 700 09/23/19 07:39 220 12th St, San Francisco, CA 94016
186303 259356 34in Ultrawide Monitor 1 379.99 09/19/19 17:30 511 Forest St, San Francisco, CA 94016
186304 259357 USB-C Charging Cable 1 11.95 09/30/19 00:18 250 Meadow St, San Francisco, CA 94016

186305 rows × 6 columns

In [6]:
# df.Quantity.astype('int')
In [7]:
df[ df.Quantity.str.contains('Quantity Ordered') ]
Out[7]:
Order_ID Product Quantity Price Order_Date Address
517 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
1146 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
1152 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
2869 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
2884 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
... ... ... ... ... ... ...
184624 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
185009 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
186019 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
186088 Order ID Product Quantity Ordered Price Each Order Date Purchase Address
186194 Order ID Product Quantity Ordered Price Each Order Date Purchase Address

355 rows × 6 columns

In [9]:
df.drop( df[ df.Quantity.str.contains('Quantity Ordered') ].index, inplace=True )
In [10]:
df
Out[10]:
Order_ID Product Quantity Price Order_Date Address
0 176558 USB-C Charging Cable 2 11.95 04/19/19 08:46 917 1st St, Dallas, TX 75001
1 176559 Bose SoundSport Headphones 1 99.99 04/07/19 22:30 682 Chestnut St, Boston, MA 02215
2 176560 Google Phone 1 600 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001
3 176560 Wired Headphones 1 11.99 04/12/19 14:38 669 Spruce St, Los Angeles, CA 90001
4 176561 Wired Headphones 1 11.99 04/30/19 09:27 333 8th St, Los Angeles, CA 90001
... ... ... ... ... ... ...
186300 259353 AAA Batteries (4-pack) 3 2.99 09/17/19 20:56 840 Highland St, Los Angeles, CA 90001
186301 259354 iPhone 1 700 09/01/19 16:00 216 Dogwood St, San Francisco, CA 94016
186302 259355 iPhone 1 700 09/23/19 07:39 220 12th St, San Francisco, CA 94016
186303 259356 34in Ultrawide Monitor 1 379.99 09/19/19 17:30 511 Forest St, San Francisco, CA 94016
186304 259357 USB-C Charging Cable 1 11.95 09/30/19 00:18 250 Meadow St, San Francisco, CA 94016

185950 rows × 6 columns

In [12]:
df['Quantity'] = df.Quantity.astype('int')
In [14]:
df['Price'] = df.Price.astype('float')
In [15]:
df.info(memory_usage='deep')
<class 'pandas.core.frame.DataFrame'>
Int64Index: 185950 entries, 0 to 186304
Data columns (total 6 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   Order_ID    185950 non-null  object 
 1   Product     185950 non-null  object 
 2   Quantity    185950 non-null  int32  
 3   Price       185950 non-null  float64
 4   Order_Date  185950 non-null  object 
 5   Address     185950 non-null  object 
dtypes: float64(1), int32(1), object(4)
memory usage: 57.0 MB
In [16]:
df.to_csv('result.csv', index=False)
In [ ]: