In [1]:
import pandas as pd
In [3]:
df = pd.DataFrame({
    'date': ['1.01.2008',
             '2.02.2009',
             '3.03.2010',
             '4.04.2011',
             '5.05.2012',
             '6.06.2013',
             '7.07.2014',
             '8.08.2015',
             '9.09.2016',
             '10.10.2017',
             '11.11.2018',
             '12.12.2019',
             '10.11.2020',
             '30.11.2020']
})
In [4]:
pd.to_datetime('30.11.2020')
Out[4]:
Timestamp('2020-11-30 00:00:00')
In [5]:
pd.to_datetime('12.11.2020')
Out[5]:
Timestamp('2020-12-11 00:00:00')
In [6]:
pd.to_datetime('12.11.2020', dayfirst=True)
Out[6]:
Timestamp('2020-11-12 00:00:00')
In [13]:
pd.to_datetime('12-11-2020', format='%d-%m-%Y').year
Out[13]:
2020
In [14]:
df
Out[14]:
date
0 1.01.2008
1 2.02.2009
2 3.03.2010
3 4.04.2011
4 5.05.2012
5 6.06.2013
6 7.07.2014
7 8.08.2015
8 9.09.2016
9 10.10.2017
10 11.11.2018
11 12.12.2019
12 10.11.2020
13 30.11.2020
In [15]:
df.dtypes
Out[15]:
date    object
dtype: object
In [18]:
df['dates'] = pd.to_datetime(df.date, format='%d.%m.%Y')
In [19]:
df
Out[19]:
date dates
0 1.01.2008 2008-01-01
1 2.02.2009 2009-02-02
2 3.03.2010 2010-03-03
3 4.04.2011 2011-04-04
4 5.05.2012 2012-05-05
5 6.06.2013 2013-06-06
6 7.07.2014 2014-07-07
7 8.08.2015 2015-08-08
8 9.09.2016 2016-09-09
9 10.10.2017 2017-10-10
10 11.11.2018 2018-11-11
11 12.12.2019 2019-12-12
12 10.11.2020 2020-11-10
13 30.11.2020 2020-11-30
In [20]:
df.dtypes
Out[20]:
date             object
dates    datetime64[ns]
dtype: object
In [22]:
df.dates.dt.day_name()
Out[22]:
0       Tuesday
1        Monday
2     Wednesday
3        Monday
4      Saturday
5      Thursday
6        Monday
7      Saturday
8        Friday
9       Tuesday
10       Sunday
11     Thursday
12      Tuesday
13       Monday
Name: dates, dtype: object
In [25]:
df.dates.dt.month_name()
Out[25]:
0       January
1      February
2         March
3         April
4           May
5          June
6          July
7        August
8     September
9       October
10     November
11     December
12     November
13     November
Name: dates, dtype: object
In [27]:
df[~(df.dates.dt.day > 10)]
Out[27]:
date dates
0 1.01.2008 2008-01-01
1 2.02.2009 2009-02-02
2 3.03.2010 2010-03-03
3 4.04.2011 2011-04-04
4 5.05.2012 2012-05-05
5 6.06.2013 2013-06-06
6 7.07.2014 2014-07-07
7 8.08.2015 2015-08-08
8 9.09.2016 2016-09-09
9 10.10.2017 2017-10-10
12 10.11.2020 2020-11-10
In [28]:
df.dates.max()
Out[28]:
Timestamp('2020-11-30 00:00:00')
In [29]:
df.dates.min()
Out[29]:
Timestamp('2008-01-01 00:00:00')
In [30]:
df.dates.max() - df.dates.min()
Out[30]:
Timedelta('4717 days 00:00:00')
In [32]:
df[ df.dates > pd.to_datetime('2014-07-07') ]
Out[32]:
date dates
7 8.08.2015 2015-08-08
8 9.09.2016 2016-09-09
9 10.10.2017 2017-10-10
10 11.11.2018 2018-11-11
11 12.12.2019 2019-12-12
12 10.11.2020 2020-11-10
13 30.11.2020 2020-11-30
In [33]:
df.dates.nunique()
Out[33]:
14
In [34]:
df.set_index('dates', inplace=True)
In [35]:
df
Out[35]:
date
dates
2008-01-01 1.01.2008
2009-02-02 2.02.2009
2010-03-03 3.03.2010
2011-04-04 4.04.2011
2012-05-05 5.05.2012
2013-06-06 6.06.2013
2014-07-07 7.07.2014
2015-08-08 8.08.2015
2016-09-09 9.09.2016
2017-10-10 10.10.2017
2018-11-11 11.11.2018
2019-12-12 12.12.2019
2020-11-10 10.11.2020
2020-11-30 30.11.2020
In [36]:
df['2020']
Out[36]:
date
dates
2020-11-10 10.11.2020
2020-11-30 30.11.2020
In [37]:
df['2017':'2020']
Out[37]:
date
dates
2017-10-10 10.10.2017
2018-11-11 11.11.2018
2019-12-12 12.12.2019
2020-11-10 10.11.2020
2020-11-30 30.11.2020
In [40]:
df['2017-11':'2020']
Out[40]:
date
dates
2018-11-11 11.11.2018
2019-12-12 12.12.2019
2020-11-10 10.11.2020
2020-11-30 30.11.2020
In [41]:
df2 = pd.read_csv('ratings.csv')
df2
Out[41]:
userId movieId rating timestamp
0 1 1 4.0 964982703
1 1 3 4.0 964981247
2 1 6 4.0 964982224
3 1 47 5.0 964983815
4 1 50 5.0 964982931
... ... ... ... ...
100831 610 166534 4.0 1493848402
100832 610 168248 5.0 1493850091
100833 610 168250 5.0 1494273047
100834 610 168252 5.0 1493846352
100835 610 170875 3.0 1493846415

100836 rows × 4 columns

In [44]:
df2['dates'] = pd.to_datetime(df2.timestamp, unit='s')
In [45]:
df2
Out[45]:
userId movieId rating timestamp dates
0 1 1 4.0 964982703 2000-07-30 18:45:03
1 1 3 4.0 964981247 2000-07-30 18:20:47
2 1 6 4.0 964982224 2000-07-30 18:37:04
3 1 47 5.0 964983815 2000-07-30 19:03:35
4 1 50 5.0 964982931 2000-07-30 18:48:51
... ... ... ... ... ...
100831 610 166534 4.0 1493848402 2017-05-03 21:53:22
100832 610 168248 5.0 1493850091 2017-05-03 22:21:31
100833 610 168250 5.0 1494273047 2017-05-08 19:50:47
100834 610 168252 5.0 1493846352 2017-05-03 21:19:12
100835 610 170875 3.0 1493846415 2017-05-03 21:20:15

100836 rows × 5 columns

In [46]:
df2['year'] = df2.dates.dt.year
In [47]:
df2
Out[47]:
userId movieId rating timestamp dates year
0 1 1 4.0 964982703 2000-07-30 18:45:03 2000
1 1 3 4.0 964981247 2000-07-30 18:20:47 2000
2 1 6 4.0 964982224 2000-07-30 18:37:04 2000
3 1 47 5.0 964983815 2000-07-30 19:03:35 2000
4 1 50 5.0 964982931 2000-07-30 18:48:51 2000
... ... ... ... ... ... ...
100831 610 166534 4.0 1493848402 2017-05-03 21:53:22 2017
100832 610 168248 5.0 1493850091 2017-05-03 22:21:31 2017
100833 610 168250 5.0 1494273047 2017-05-08 19:50:47 2017
100834 610 168252 5.0 1493846352 2017-05-03 21:19:12 2017
100835 610 170875 3.0 1493846415 2017-05-03 21:20:15 2017

100836 rows × 6 columns

In [ ]: