Time series

Met Eireann data

In [1]:
# Pandas - for data frames.
import pandas as pd
In [2]:
# Read the data file.
df = pd.read_csv("http://cli.met.ie/cli/climate_data/webdata/hly4935.csv", skiprows=23, low_memory=False, nrows=1000)
In [3]:
# Change the date column to a Pythonic datetime.
df['datetime'] = pd.to_datetime(df['date'])
In [4]:
# Look at the data.
df
Out[4]:
date ind rain ind.1 temp ind.2 wetb dewpt vappr rhum ... wdsp ind.4 wddir ww w sun vis clht clamt datetime
0 10-apr-1996 14:00 0 0.0 0 11.5 0 8.1 3.9 0.0 0 ... 0 0 0 25 81 0.0 35000 32 5 1996-04-10 14:00:00
1 31-jul-1996 08:00 0 0.0 0 11.5 0 11.1 10.7 0.0 0 ... 0 0 0 25 82 0.0 40000 45 5 1996-07-31 08:00:00
2 31-jul-1996 09:00 0 0.0 0 11.6 0 10.7 9.8 0.0 0 ... 0 0 0 80 81 0.0 8000 32 7 1996-07-31 09:00:00
3 31-jul-1996 10:00 0 0.0 0 12.9 0 11.3 9.8 0.0 0 ... 0 0 0 25 82 0.0 28000 35 6 1996-07-31 10:00:00
4 31-jul-1996 11:00 0 0.0 0 14.5 0 10.8 7.0 0.0 0 ... 0 0 0 2 11 0.0 40000 40 6 1996-07-31 11:00:00
5 31-jul-1996 12:00 0 0.0 0 14.4 0 10.9 7.3 0.0 0 ... 0 0 0 1 82 0.0 40000 43 6 1996-07-31 12:00:00
6 31-jul-1996 13:00 0 0.0 0 14.9 0 10.9 6.7 0.0 0 ... 0 0 0 2 11 0.0 40000 43 5 1996-07-31 13:00:00
7 31-jul-1996 14:00 0 0.0 0 13.9 0 10.4 6.7 0.0 0 ... 0 0 0 2 11 0.0 50000 45 6 1996-07-31 14:00:00
8 31-jul-1996 15:00 0 0.0 0 13.2 0 10.9 8.6 0.0 0 ... 0 0 0 25 82 0.0 8000 45 7 1996-07-31 15:00:00
9 31-jul-1996 16:00 0 0.0 0 15.1 0 11.3 7.5 0.0 0 ... 0 0 0 25 82 0.0 35000 40 7 1996-07-31 16:00:00
10 31-jul-1996 17:00 0 0.0 0 11.6 0 10.6 9.6 0.0 0 ... 0 0 0 2 11 0.0 35000 40 7 1996-07-31 17:00:00
11 31-jul-1996 18:00 0 0.0 0 13.3 0 10.9 8.5 0.0 0 ... 0 0 0 2 82 0.0 35000 20 7 1996-07-31 18:00:00
12 31-jul-1996 19:00 0 0.0 0 12.7 0 11.0 9.4 0.0 0 ... 0 0 0 2 11 0.0 35000 40 6 1996-07-31 19:00:00
13 31-jul-1996 20:00 0 0.0 0 11.5 0 11.1 10.7 0.0 0 ... 0 0 0 25 82 0.0 35000 40 6 1996-07-31 20:00:00
14 31-jul-1996 21:00 0 0.0 0 10.9 0 10.2 9.5 0.0 0 ... 0 0 0 2 82 0.0 35000 40 6 1996-07-31 21:00:00
15 31-jul-1996 22:00 0 0.0 0 10.8 0 10.1 9.4 0.0 0 ... 0 0 0 2 11 0.0 35000 40 6 1996-07-31 22:00:00
16 31-jul-1996 23:00 0 0.0 0 9.6 0 8.8 7.9 0.0 0 ... 0 0 0 2 11 0.0 35000 0 3 1996-07-31 23:00:00
17 01-aug-1996 00:00 0 0.0 0 9.7 0 9.0 8.3 0.0 0 ... 0 0 0 1 81 0.0 35000 999 3 1996-08-01 00:00:00
18 01-aug-1996 01:00 0 0.0 0 9.1 0 8.5 7.9 10.6 92 ... 6 2 230 2 11 0.0 35000 999 4 1996-08-01 01:00:00
19 01-aug-1996 02:00 0 0.0 0 10.0 0 8.6 7.1 10.0 82 ... 8 2 240 2 11 0.0 35000 50 5 1996-08-01 02:00:00
20 01-aug-1996 03:00 0 0.0 0 10.3 0 9.3 8.3 10.9 87 ... 9 2 250 2 11 0.0 35000 44 5 1996-08-01 03:00:00
21 01-aug-1996 04:00 0 0.0 0 10.3 0 8.8 7.2 10.1 81 ... 8 2 240 2 11 0.0 30000 46 6 1996-08-01 04:00:00
22 01-aug-1996 05:00 0 0.0 0 10.0 0 9.1 8.2 10.8 88 ... 8 2 250 2 11 0.0 30000 40 7 1996-08-01 05:00:00
23 01-aug-1996 06:00 0 0.0 0 10.2 0 9.2 8.1 10.8 87 ... 8 2 250 2 11 0.0 30000 43 7 1996-08-01 06:00:00
24 01-aug-1996 07:00 0 0.0 0 10.7 0 9.5 8.3 10.9 85 ... 7 2 240 2 11 0.0 25000 45 7 1996-08-01 07:00:00
25 01-aug-1996 08:00 0 0.0 0 10.6 0 9.7 8.8 11.3 89 ... 11 2 250 2 11 0.0 30000 42 7 1996-08-01 08:00:00
26 01-aug-1996 09:00 2 0.0 0 12.0 0 10.6 9.2 11.7 83 ... 11 2 250 21 62 0.0 30000 40 7 1996-08-01 09:00:00
27 01-aug-1996 10:00 0 0.0 0 12.7 0 10.8 8.9 11.4 78 ... 13 2 260 2 11 0.0 35000 37 7 1996-08-01 10:00:00
28 01-aug-1996 11:00 2 0.0 0 12.5 0 10.7 8.9 11.4 79 ... 13 2 260 80 22 0.0 22000 33 7 1996-08-01 11:00:00
29 01-aug-1996 12:00 0 0.0 0 13.6 0 11.3 9.1 11.5 74 ... 12 2 270 1 86 0.6 40000 44 6 1996-08-01 12:00:00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
970 09-sep-1996 17:00 0 0.0 0 13.7 0 11.6 9.6 12.0 76 ... 16 2 330 2 11 1.0 50000 999 2 1996-09-09 17:00:00
971 09-sep-1996 18:00 0 0.0 0 12.6 0 11.0 9.5 11.8 81 ... 14 2 330 2 11 0.8 50000 250 5 1996-09-09 18:00:00
972 09-sep-1996 19:00 0 0.0 0 11.6 0 10.6 9.6 12.0 88 ... 8 2 340 2 11 0.0 50000 999 2 1996-09-09 19:00:00
973 09-sep-1996 20:00 0 0.0 0 10.3 0 9.8 9.3 11.7 94 ... 7 2 340 2 11 0.0 40000 999 2 1996-09-09 20:00:00
974 09-sep-1996 21:00 0 0.0 0 10.4 0 9.7 9.0 11.5 91 ... 9 2 360 2 11 0.0 40000 999 1 1996-09-09 21:00:00
975 09-sep-1996 22:00 0 0.0 0 10.4 0 9.6 8.8 11.3 90 ... 10 2 360 2 11 0.0 40000 999 1 1996-09-09 22:00:00
976 09-sep-1996 23:00 0 0.0 0 10.0 0 9.4 8.8 11.3 92 ... 10 2 360 2 11 0.0 40000 999 1 1996-09-09 23:00:00
977 10-sep-1996 00:00 0 0.0 0 10.3 0 9.7 9.1 11.5 92 ... 8 2 360 2 11 0.0 30000 22 5 1996-09-10 00:00:00
978 10-sep-1996 01:00 0 0.0 0 11.0 0 10.4 9.8 12.1 92 ... 8 2 360 2 11 0.0 30000 22 6 1996-09-10 01:00:00
979 10-sep-1996 02:00 0 0.0 0 11.3 0 10.4 9.5 11.9 89 ... 9 2 360 2 11 0.0 30000 22 7 1996-09-10 02:00:00
980 10-sep-1996 03:00 0 0.0 0 11.0 0 10.1 9.2 11.6 89 ... 7 2 360 2 11 0.0 30000 22 7 1996-09-10 03:00:00
981 10-sep-1996 04:00 0 0.0 0 10.4 0 9.8 9.2 11.6 92 ... 5 2 350 2 11 0.0 30000 20 6 1996-09-10 04:00:00
982 10-sep-1996 05:00 0 0.0 0 10.3 0 9.8 9.3 11.7 94 ... 4 2 350 2 11 0.0 30000 20 6 1996-09-10 05:00:00
983 10-sep-1996 06:00 0 0.0 0 10.0 0 9.7 9.4 11.8 96 ... 4 2 340 2 11 0.0 30000 22 6 1996-09-10 06:00:00
984 10-sep-1996 07:00 0 0.0 0 10.6 0 10.0 9.4 11.8 92 ... 6 2 360 2 11 0.2 30000 22 5 1996-09-10 07:00:00
985 10-sep-1996 08:00 0 0.0 0 11.2 0 10.5 9.8 12.1 91 ... 7 2 350 2 11 0.5 25000 999 4 1996-09-10 08:00:00
986 10-sep-1996 09:00 0 0.0 0 12.8 0 11.2 9.7 12.0 81 ... 8 2 350 2 11 0.1 30000 98 7 1996-09-10 09:00:00
987 10-sep-1996 10:00 0 0.0 0 14.2 0 12.2 10.4 12.6 78 ... 8 2 360 2 11 0.5 30000 72 6 1996-09-10 10:00:00
988 10-sep-1996 11:00 0 0.0 0 14.0 0 12.2 10.6 12.8 80 ... 8 2 350 2 11 0.0 25000 19 8 1996-09-10 11:00:00
989 10-sep-1996 12:00 2 0.0 0 12.8 0 12.3 11.9 13.9 94 ... 8 2 350 20 51 0.0 16000 55 8 1996-09-10 12:00:00
990 10-sep-1996 13:00 0 0.0 0 14.0 0 12.7 11.6 13.6 85 ... 9 2 330 2 11 0.0 20000 63 8 1996-09-10 13:00:00
991 10-sep-1996 14:00 0 0.0 0 14.5 0 12.9 11.5 13.6 82 ... 9 2 340 2 11 0.0 30000 62 7 1996-09-10 14:00:00
992 10-sep-1996 15:00 0 0.0 0 15.0 0 12.9 11.1 13.2 77 ... 9 2 340 2 11 0.1 30000 64 7 1996-09-10 15:00:00
993 10-sep-1996 16:00 0 0.0 0 14.8 0 12.7 10.9 13.0 77 ... 10 2 340 2 11 0.1 30000 20 7 1996-09-10 16:00:00
994 10-sep-1996 17:00 0 0.0 0 14.7 0 12.5 10.5 12.7 76 ... 10 2 340 2 11 1.0 30000 999 4 1996-09-10 17:00:00
995 10-sep-1996 18:00 0 0.0 0 13.7 0 12.3 11.1 13.2 84 ... 10 2 340 2 11 0.1 30000 59 7 1996-09-10 18:00:00
996 10-sep-1996 19:00 0 0.0 0 12.8 0 12.0 11.3 13.4 91 ... 9 2 340 2 11 0.0 24000 55 7 1996-09-10 19:00:00
997 10-sep-1996 20:00 0 0.0 0 12.4 0 11.7 11.1 13.2 92 ... 7 2 340 2 11 0.0 24000 34 7 1996-09-10 20:00:00
998 10-sep-1996 21:00 0 0.0 0 12.2 0 11.6 11.1 13.2 93 ... 6 2 340 2 11 0.0 24000 34 7 1996-09-10 21:00:00
999 10-sep-1996 22:00 0 0.0 0 12.0 0 11.4 10.9 13.0 93 ... 7 2 330 2 11 0.0 24000 34 7 1996-09-10 22:00:00

1000 rows × 22 columns

In [5]:
%matplotlib inline

# Plot the temperature over time.

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

sns.set(style="darkgrid")

fig, ax = plt.subplots()

sns.lineplot(x="datetime", y="temp", data=df[1:], ax=ax)

# Makes the x axis display well.
# See: https://stackoverflow.com/a/31262531/340324
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%y'))

plt.show()

Creating time series

In [6]:
# Create a datetime index for a data frame.

# Adapted from: https://pandas.pydata.org/pandas-docs/stable/timeseries.html

# 72 hours starting with midnight Jan 1st, 2011
rng = pd.date_range('1/1/2011', periods=72, freq='H')
In [7]:
rng
Out[7]:
DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00', '2011-01-01 05:00:00',
               '2011-01-01 06:00:00', '2011-01-01 07:00:00',
               '2011-01-01 08:00:00', '2011-01-01 09:00:00',
               '2011-01-01 10:00:00', '2011-01-01 11:00:00',
               '2011-01-01 12:00:00', '2011-01-01 13:00:00',
               '2011-01-01 14:00:00', '2011-01-01 15:00:00',
               '2011-01-01 16:00:00', '2011-01-01 17:00:00',
               '2011-01-01 18:00:00', '2011-01-01 19:00:00',
               '2011-01-01 20:00:00', '2011-01-01 21:00:00',
               '2011-01-01 22:00:00', '2011-01-01 23:00:00',
               '2011-01-02 00:00:00', '2011-01-02 01:00:00',
               '2011-01-02 02:00:00', '2011-01-02 03:00:00',
               '2011-01-02 04:00:00', '2011-01-02 05:00:00',
               '2011-01-02 06:00:00', '2011-01-02 07:00:00',
               '2011-01-02 08:00:00', '2011-01-02 09:00:00',
               '2011-01-02 10:00:00', '2011-01-02 11:00:00',
               '2011-01-02 12:00:00', '2011-01-02 13:00:00',
               '2011-01-02 14:00:00', '2011-01-02 15:00:00',
               '2011-01-02 16:00:00', '2011-01-02 17:00:00',
               '2011-01-02 18:00:00', '2011-01-02 19:00:00',
               '2011-01-02 20:00:00', '2011-01-02 21:00:00',
               '2011-01-02 22:00:00', '2011-01-02 23:00:00',
               '2011-01-03 00:00:00', '2011-01-03 01:00:00',
               '2011-01-03 02:00:00', '2011-01-03 03:00:00',
               '2011-01-03 04:00:00', '2011-01-03 05:00:00',
               '2011-01-03 06:00:00', '2011-01-03 07:00:00',
               '2011-01-03 08:00:00', '2011-01-03 09:00:00',
               '2011-01-03 10:00:00', '2011-01-03 11:00:00',
               '2011-01-03 12:00:00', '2011-01-03 13:00:00',
               '2011-01-03 14:00:00', '2011-01-03 15:00:00',
               '2011-01-03 16:00:00', '2011-01-03 17:00:00',
               '2011-01-03 18:00:00', '2011-01-03 19:00:00',
               '2011-01-03 20:00:00', '2011-01-03 21:00:00',
               '2011-01-03 22:00:00', '2011-01-03 23:00:00'],
              dtype='datetime64[ns]', freq='H')
In [8]:
# Simulate some hospital admissions data using numpy, using our datetime index.

import numpy as np
ts = pd.DataFrame(np.random.poisson(10, len(rng)), index=rng, columns=["admissions"])
In [9]:
ts
Out[9]:
admissions
2011-01-01 00:00:00 8
2011-01-01 01:00:00 10
2011-01-01 02:00:00 11
2011-01-01 03:00:00 12
2011-01-01 04:00:00 9
2011-01-01 05:00:00 14
2011-01-01 06:00:00 10
2011-01-01 07:00:00 11
2011-01-01 08:00:00 9
2011-01-01 09:00:00 15
2011-01-01 10:00:00 14
2011-01-01 11:00:00 7
2011-01-01 12:00:00 12
2011-01-01 13:00:00 14
2011-01-01 14:00:00 10
2011-01-01 15:00:00 8
2011-01-01 16:00:00 4
2011-01-01 17:00:00 13
2011-01-01 18:00:00 7
2011-01-01 19:00:00 8
2011-01-01 20:00:00 12
2011-01-01 21:00:00 10
2011-01-01 22:00:00 9
2011-01-01 23:00:00 6
2011-01-02 00:00:00 12
2011-01-02 01:00:00 12
2011-01-02 02:00:00 11
2011-01-02 03:00:00 10
2011-01-02 04:00:00 8
2011-01-02 05:00:00 4
... ...
2011-01-02 18:00:00 14
2011-01-02 19:00:00 6
2011-01-02 20:00:00 11
2011-01-02 21:00:00 8
2011-01-02 22:00:00 9
2011-01-02 23:00:00 11
2011-01-03 00:00:00 17
2011-01-03 01:00:00 11
2011-01-03 02:00:00 10
2011-01-03 03:00:00 13
2011-01-03 04:00:00 11
2011-01-03 05:00:00 11
2011-01-03 06:00:00 15
2011-01-03 07:00:00 10
2011-01-03 08:00:00 7
2011-01-03 09:00:00 7
2011-01-03 10:00:00 6
2011-01-03 11:00:00 11
2011-01-03 12:00:00 11
2011-01-03 13:00:00 10
2011-01-03 14:00:00 10
2011-01-03 15:00:00 10
2011-01-03 16:00:00 5
2011-01-03 17:00:00 12
2011-01-03 18:00:00 11
2011-01-03 19:00:00 14
2011-01-03 20:00:00 10
2011-01-03 21:00:00 10
2011-01-03 22:00:00 7
2011-01-03 23:00:00 16

72 rows × 1 columns

Accessing and grouping

In [10]:
# Calculate the mean number of hospital admissions per hour.
ts.mean()
Out[10]:
admissions    10.25
dtype: float64
In [11]:
# Look at a specific day's data using the index.
ts.loc['2011-01-02 00:00:00':'2011-01-02 23:00:00']
Out[11]:
admissions
2011-01-02 00:00:00 12
2011-01-02 01:00:00 12
2011-01-02 02:00:00 11
2011-01-02 03:00:00 10
2011-01-02 04:00:00 8
2011-01-02 05:00:00 4
2011-01-02 06:00:00 7
2011-01-02 07:00:00 11
2011-01-02 08:00:00 8
2011-01-02 09:00:00 14
2011-01-02 10:00:00 12
2011-01-02 11:00:00 8
2011-01-02 12:00:00 11
2011-01-02 13:00:00 11
2011-01-02 14:00:00 13
2011-01-02 15:00:00 9
2011-01-02 16:00:00 9
2011-01-02 17:00:00 11
2011-01-02 18:00:00 14
2011-01-02 19:00:00 6
2011-01-02 20:00:00 11
2011-01-02 21:00:00 8
2011-01-02 22:00:00 9
2011-01-02 23:00:00 11
In [12]:
# Calculate that day's mean.
ts.loc['2011-01-02 00:00:00':'2011-01-02 23:00:00'].mean()
Out[12]:
admissions    10.0
dtype: float64
In [13]:
# Use re-sampling to calculate the mean of all days.
ts.resample('D').mean()
Out[13]:
admissions
2011-01-01 10.125
2011-01-02 10.000
2011-01-03 10.625

End