Import necessary dependencies and settings

In [1]:
import datetime
import numpy as np
import pandas as pd
from dateutil.parser import parse
import pytz

Load and process sample temporal data

In [2]:
time_stamps = ['2015-03-08 10:30:00.360000+00:00', '2017-07-13 15:45:05.755000-07:00',
               '2012-01-20 22:30:00.254000+05:30', '2016-12-25 00:30:00.000000+10:00']
df = pd.DataFrame(time_stamps, columns=['Time'])
df
Out[2]:
Time
0 2015-03-08 10:30:00.360000+00:00
1 2017-07-13 15:45:05.755000-07:00
2 2012-01-20 22:30:00.254000+05:30
3 2016-12-25 00:30:00.000000+10:00
In [3]:
ts_objs = np.array([pd.Timestamp(item) for item in np.array(df.Time)])
df['TS_obj'] = ts_objs
ts_objs
Out[3]:
array([Timestamp('2015-03-08 10:30:00.360000+0000', tz='UTC'),
       Timestamp('2017-07-13 15:45:05.755000-0700', tz='pytz.FixedOffset(-420)'),
       Timestamp('2012-01-20 22:30:00.254000+0530', tz='pytz.FixedOffset(330)'),
       Timestamp('2016-12-25 00:30:00+1000', tz='pytz.FixedOffset(600)')], dtype=object)

Date based features

In [4]:
df['Year'] = df['TS_obj'].apply(lambda d: d.year)
df['Month'] = df['TS_obj'].apply(lambda d: d.month)
df['Day'] = df['TS_obj'].apply(lambda d: d.day)
df['DayOfWeek'] = df['TS_obj'].apply(lambda d: d.dayofweek)
df['DayName'] = df['TS_obj'].apply(lambda d: d.weekday_name)
df['DayOfYear'] = df['TS_obj'].apply(lambda d: d.dayofyear)
df['WeekOfYear'] = df['TS_obj'].apply(lambda d: d.weekofyear)
df['Quarter'] = df['TS_obj'].apply(lambda d: d.quarter)

df[['Time', 'Year', 'Month', 'Day', 'Quarter', 
    'DayOfWeek', 'DayName', 'DayOfYear', 'WeekOfYear']]
Out[4]:
Time Year Month Day Quarter DayOfWeek DayName DayOfYear WeekOfYear
0 2015-03-08 10:30:00.360000+00:00 2015 3 8 1 6 Sunday 67 10
1 2017-07-13 15:45:05.755000-07:00 2017 7 13 3 3 Thursday 194 28
2 2012-01-20 22:30:00.254000+05:30 2012 1 20 1 4 Friday 20 3
3 2016-12-25 00:30:00.000000+10:00 2016 12 25 4 6 Saturday 360 51

Time based features

In [5]:
df['Hour'] = df['TS_obj'].apply(lambda d: d.hour)
df['Minute'] = df['TS_obj'].apply(lambda d: d.minute)
df['Second'] = df['TS_obj'].apply(lambda d: d.second)
df['MUsecond'] = df['TS_obj'].apply(lambda d: d.microsecond)
df['UTC_offset'] = df['TS_obj'].apply(lambda d: d.utcoffset())

df[['Time', 'Hour', 'Minute', 'Second', 'MUsecond', 'UTC_offset']]
Out[5]:
Time Hour Minute Second MUsecond UTC_offset
0 2015-03-08 10:30:00.360000+00:00 10 30 0 360000 00:00:00
1 2017-07-13 15:45:05.755000-07:00 15 45 5 755000 -1 days +17:00:00
2 2012-01-20 22:30:00.254000+05:30 22 30 0 254000 05:30:00
3 2016-12-25 00:30:00.000000+10:00 0 30 0 0 10:00:00
In [6]:
hour_bins = [-1, 5, 11, 16, 21, 23]
bin_names = ['Late Night', 'Morning', 'Afternoon', 'Evening', 'Night']
df['TimeOfDayBin'] = pd.cut(df['Hour'], 
                            bins=hour_bins, labels=bin_names)
df[['Time', 'Hour', 'TimeOfDayBin']]
Out[6]:
Time Hour TimeOfDayBin
0 2015-03-08 10:30:00.360000+00:00 10 Morning
1 2017-07-13 15:45:05.755000-07:00 15 Afternoon
2 2012-01-20 22:30:00.254000+05:30 22 Night
3 2016-12-25 00:30:00.000000+10:00 0 Late Night
In [7]:
df['TZ_info'] = df['TS_obj'].apply(lambda d: d.tzinfo)
df['TimeZones'] = df['TS_obj'].apply(lambda d: list({d.astimezone(tz).tzname() 
                                   for tz in map(pytz.timezone, 
                                                 pytz.all_timezones_set)
                                       if d.astimezone(tz).utcoffset() == d.utcoffset()}))

df[['Time', 'UTC_offset', 'TZ_info', 'TimeZones']]
Out[7]:
Time UTC_offset TZ_info TimeZones
0 2015-03-08 10:30:00.360000+00:00 00:00:00 UTC [WET, UTC, UCT, GMT]
1 2017-07-13 15:45:05.755000-07:00 -1 days +17:00:00 pytz.FixedOffset(-420) [MST, GMT+7, PDT]
2 2012-01-20 22:30:00.254000+05:30 05:30:00 pytz.FixedOffset(330) [IST]
3 2016-12-25 00:30:00.000000+10:00 10:00:00 pytz.FixedOffset(600) [VLAT, ChST, AEST, PGT, DDUT, GMT-10, CHUT]
In [8]:
df['TimeUTC'] = df['TS_obj'].apply(lambda d: d.tz_convert(pytz.utc))
df['Epoch'] = df['TimeUTC'].apply(lambda d: d.timestamp())
df['GregOrdinal'] = df['TimeUTC'].apply(lambda d: d.toordinal())

df[['Time', 'TimeUTC', 'Epoch', 'GregOrdinal']]
Out[8]:
Time TimeUTC Epoch GregOrdinal
0 2015-03-08 10:30:00.360000+00:00 2015-03-08 10:30:00.360000+00:00 1.425811e+09 735665
1 2017-07-13 15:45:05.755000-07:00 2017-07-13 22:45:05.755000+00:00 1.499986e+09 736523
2 2012-01-20 22:30:00.254000+05:30 2012-01-20 17:00:00.254000+00:00 1.327079e+09 734522
3 2016-12-25 00:30:00.000000+10:00 2016-12-24 14:30:00+00:00 1.482590e+09 736322
In [9]:
curr_ts = datetime.datetime.now(pytz.utc)
# compute days elapsed since today
df['DaysElapsedEpoch'] = (curr_ts.timestamp() - df['Epoch']) / (3600*24)
df['DaysElapsedOrdinal'] = (curr_ts.toordinal() - df['GregOrdinal']) 

df[['Time', 'TimeUTC', 'DaysElapsedEpoch', 'DaysElapsedOrdinal']]
Out[9]:
Time TimeUTC DaysElapsedEpoch DaysElapsedOrdinal
0 2015-03-08 10:30:00.360000+00:00 2015-03-08 10:30:00.360000+00:00 860.207396 860
1 2017-07-13 15:45:05.755000-07:00 2017-07-13 22:45:05.755000+00:00 1.696917 2
2 2012-01-20 22:30:00.254000+05:30 2012-01-20 17:00:00.254000+00:00 2002.936564 2003
3 2016-12-25 00:30:00.000000+10:00 2016-12-24 14:30:00+00:00 203.040734 203