%pylab inline
%load_ext rmagic
%R library(forecast)
Populating the interactive namespace from numpy and matplotlib
This is forecast 4.8
Reading TS
%%R
## King's ages
kings <- scan("http://robjhyndman.com/tsdldata/misc/kings.dat",skip=3)
kings <- ts(kings)
# NY city birth data
births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")
births <- ts(births, frequency = 12, start = c(1946, 1))
## monthly sale for souvenir shop
## trend, seasonality non-additive
## but its LOG transform can be modelled as trend, seasonality and additive
souvenir <- scan("http://robjhyndman.com/tsdldata/data/fancy.dat")
souvenir <- ts(souvenir, frequency = 12, start = c(1987, 1))
## london rainfall in inches from 1813 to 1912
## constant-level, non-seasonality, additive
rain <- scan("http://robjhyndman.com/tsdldata/hurst/precip1.dat",skip=1)
rain <- ts(rain, start = c(1813))
## women's skirts diameter at the hem from 1866 to 1911
## linear trend, non-seasonality and addtive
skirts <- scan("http://robjhyndman.com/tsdldata/roberts/skirts.dat",skip=5)
skirts <- ts(skirts, start = c(1866))
## Vocanic Dust Veil in Northern Hemisphere
## 1500 to 1969
dusts <- scan("http://robjhyndman.com/tsdldata/annual/dvi.dat", skip=1)
dusts <- ts(dusts, start = c(1500))
Read 42 items Read 168 items Read 84 items Read 100 items Read 46 items Read 470 items
Plotting TS
## the plot suggests an additive model
## as random fluctuations in the data are constant
%R plot.ts(kings)
## We can see from this time series that there seems to be seasonal variation in the number of births per month:
## there is a peak every summer, and a trough every winter. Again, it seems that this time series could probably be
## described using an additive model, as the seasonal fluctuations are roughly constant in size over time and do not
## seem to depend on the level of the time series, and the random fluctuations also seem to be roughly constant in
## size over time.
%R plot.ts(births)
%%R
## In this case, it appears that an additive model is not appropriate for describing this time series, since the size of
## the seasonal fluctuations and random fluctuations seem to increase with the level of the time series. Thus, we may
## need to transform the time series in order to get a transformed time series that can be described using an additive
## model. For example, we can transform the time series by calculating the natural log of the original data
par(mfrow = c(2, 1))
plot.ts(souvenir)
## Here we can see that the size of the seasonal fluctuations and random fluctuations in the log-transformed time
## series seem to be roughly constant over time, and do not depend on the level of the time series. Thus, the log
## transformed time series can probably be described using an additive model.
plot.ts(log(souvenir))
Decomposing TS
(1) First make sure that the sereis can be described as an additive model by checking its fluctutation is roughly constantly sized. Otherwise making a transformation first. (2) Then check there is a frequency > 1 in the data
Decomposing a time series means separating it into its constituent components, which are usually a trend component and an irregular component, and if it is a seasonal time series, a seasonal component.
The seasonal component is better described as a weakly stationary series. (constant mean function and auto-covariance functions that only depends on the distance of lag)
To estimate the trend component of a non-seasonal time series that can be described using an additive model, it is common to use a smoothing method (sepcially for non-seasonal series), such as calculating the simple moving average of the time series. The smoothing step is filtering out how frequency noise and useful to find trend and seasonal components.
%%R
## for example, to find the trend of the king ages,
## we use the moving average to smooth the curve,
## the order of the smoothing depends on trial and error
## we can see the trend is roughly that going up first
## and then going down
plot.ts(kings, col = 1)
lines(filter(kings, sides = 2,
method = "convolution",
filter = rep(1./3, 3)),
col = 2)
lines(filter(kings, sides = 2,
method = "convolution",
filter = rep(1./9, 9)),
col = 3)
legend("topleft",
c("original", "3-moving average", "9-moving average"),
col = 1:3, lty = 1)
%%R
## decomposition of trend + sesonal + irregular
## for birth data
births.stl <- decompose(births)
plot(births.stl)
%%R
## and use the Acf to check the quality of the decomposition
par(mfrow = c(2, 2))
Acf(births.stl$x, lag.max = 24)
Acf(na.omit(births.stl$trend, lag.max = 24))
Acf(na.omit(births.stl$seasonal, lag.max = 24))
Acf(na.omit(births.stl$random, lag.max = 24))
Forecasts by Exponential Smoothing
Mostly used to make short-term forecasts for time series
Simple Exponential Smoothing (constant-level, non-seasonality, additive) : If you have a time series that can be described using an additive model with constant level and no seasonality, you can use simple exponential smoothing to make short-term forecasts. The SES provides a way of estimating the constant level at the current time point. Control parameter $\alpha$ (between 0 and 1) controls the weight placed on the most recent observations when making forecasts of future values. $\alpha = 1$ means very little weight is placed on most recent observations and small $\alpha$ means the forecast depends on both recent and less recent observations
Simple Exponential Smoothing is implemented as HoltWinters() smoothing in R. set beta = F
and gamma = F
in HoltWinters to choose the "SIMPLE" smoothing. The forecasts made by HoltWinters() are stored in a named element called "fitted"
Holt's Exponential Smoothing (increasing/decreasing trend, non-seasonality, additive): it estimates the level and scope. Smoothing is controlled by two parameters, $\alpha$, for the estimate of the level at the current time point, and $\beta$ for the estimate of the slope b of the trend component at the current time point. As with simple exponential smoothing, the paramters $\alpha$ and $\beta$ have values between 0 and 1, and values that are close to 0 mean that little weight is placed on the most recent observations when making forecasts of future values
Holt Winters Exponential Smoothing (trend, seasonality, additive): Holt-Winters exponential smoothing estimates the level, slope and seasonal component at the current time point. Smoothing is controlled by three parameters: $\alpha$, $\beta$, and $\gamma$, for the estimates of the level, slope b of the trend component, and the seasonal component, respectively, at the current time point. The parameters alpha, beta and gamma all have values between 0 and 1, and values that are close to 0 mean that relatively little weight is placed on the most recent observations when making forecasts of future values.
%%R
## the rain data:
## 1. it is roughly at constant level - the mean stays at 25 inches
## 2. the random flucutations seem to be roughly constant in size over time
## so it is approporiate to describe it as an additive model
## 3. no obvious seasonality (frequency = 1)
## THUS, we use SES to do short time forecast
plot.ts(rain)
rain.hw <- HoltWinters(rain, beta = F, gamma = F)
print(rain.hw)
## alpha = 0.024 (close to zero) means forecasts are based on
## both recent and less recent observations
plot(rain.hw)
## measure of fitting errors
print(paste("SSE", rain.hw$SSE))
## use Holt Winters to predict on the future
rain.future <- forecast.HoltWinters(rain.hw, h = 8)
plot(rain.future)
## if there is still correlaton between forecast error
## for successive predictions, the model can still be
## improved on - usually it means that the simple SES
## should be replaced by other more sphotistcated models
Acf(residuals(rain.hw))
## it is also a good idea to check if the residuals follow
## a zero mean and constant variance Gaussian, to see if the
## prediction can be further improved
plot(density(residuals(rain.hw)))
Holt-Winters exponential smoothing without trend and without seasonal component. Call: HoltWinters(x = rain, beta = F, gamma = F) Smoothing parameters: alpha: 0.02412151 beta : FALSE gamma: FALSE Coefficients: [,1] a 24.67819 [1] "SSE 1828.8548918638"
%%R
## linear trend, non-seasonality and addtitive forecast
## by Holt's exponential smoothing
## non-seasonal, roughly additive (constant flucutation),
plot.ts(skirts)
## high values of alpha and beta meaning that
## the forecast is heavily on most recent values
## it is intuitive since both the level and slope
## change quite a lot over time
skirts.holt <- HoltWinters(skirts, gamma = F)
print(skirts.holt)
print(skirts.holt$SSE)
## predict on the fit
plot(skirts.holt)
## predict on the future
plot(forecast(skirts.holt, h = 10))
## check residuals
plot(density(residuals(skirts.holt)))
## lag 5 exceeds significance bounds, however its odds
## is 1 out of 20 for 95% interval
Acf(residuals(skirts.holt))
Holt-Winters exponential smoothing with trend and without seasonal component. Call: HoltWinters(x = skirts, gamma = F) Smoothing parameters: alpha: 0.8383481 beta : 1 gamma: FALSE Coefficients: [,1] a 529.308585 b 5.690464 [1] 16954.18
%%R
## log of souvenir - trend, seasonality and additive
log.souvenir <- log(souvenir)
## seasonal, and roughly constant fluctuation
plot.ts(log.souvenir)
## holt winter fit
## relatively small value of alpha means the level prediction
## depends on both recent and further past values.
## big value of gamma means
## the seasonality is based on very recent values
## 0 beta means the trend (slope) is not updated with time series
log.souvenir.hw <- HoltWinters(log.souvenir)
print(log.souvenir.hw)
print(log.souvenir.hw$SSE)
## plot the fit
plot(log.souvenir.hw)
## plot the forecast on the future
plot(forecast(log.souvenir.hw, h = 24))
## plot the residuals
plot(density(residuals(log.souvenir.hw)))
Acf(residuals(log.souvenir.hw))
## plot future fit on original
souvenir.forecast <- exp(forecast(log.souvenir.hw, h=50)$mean)
par(mfrow=c(1, 2))
plot(souvenir, col = 1)
plot(souvenir.forecast, col = 2)
Holt-Winters exponential smoothing with trend and additive seasonal component. Call: HoltWinters(x = log.souvenir) Smoothing parameters: alpha: 0.413418 beta : 0 gamma: 0.9561275 Coefficients: [,1] a 10.37661961 b 0.02996319 s1 -0.80952063 s2 -0.60576477 s3 0.01103238 s4 -0.24160551 s5 -0.35933517 s6 -0.18076683 s7 0.07788605 s8 0.10147055 s9 0.09649353 s10 0.05197826 s11 0.41793637 s12 1.18088423 [1] 2.011491
ARIMA Models
Exponential Smoothing
: Exponential smoothing methods are useful for making forecasts, and *make no assumptions about the correlationsbetween successive values of the time series.* However, if you want to make prediction intervals for forecasts made using exponential smoothing methods, the prediction intervals require that the forecast errors are uncorrelated and are normally distributed with mean zero and constant variance.
account. Autoregressive Integrated Moving Average (ARIMA) models include an explicit statistical model for the irregular component of a time series, that allows for non-zero autocorrelations in the irregular component.
d
times to obtain a stationary series, then you have an ARIMA(p, d, q)
model, where d
is the order of differencing used. Use diff()
in R to difference a time series.d
by diff()
the time sereis to be stationary, the next step is to find the parameters p
and q
in the ARIMA model. To do this, you usually need to examine the correlogram and partial correlogram of the stationary time series. To plot them, use acf()
and pacf()
in R, or Acf()
and Pacf()
in forecast, to get the value estimates instead of plotting, use plot = F
as the parameters to these functions.d
is the order of differences that is needed to make a series stationary (constant mean and roughly constant autocorrelation).p
and q
determine the type of the fitted model (two main types: autoregression and moving average): a nonzero p
indicates an autoregression model, the order of which is determined by partial autocorrelation.q
indicates a moving average model, the order of which is determined by correlogram (Acf)forecast
package comes up with a function auto.arima
which can estimate p, q, d
automatically from datap = 2
q = 1
%%R
## Skirt series is not stationary (its mean is not constant), after
## 2nd difference, it seems to be stationary
par(mfrow = c(3, 1))
plot(skirts)
plot(diff(skirts, differences = 1))
plot(diff(skirts, differences = 2))
## Now the 2nd differences does appear to be stationary
## in mean and variance, as the level of the series
## stays roughly constant over time, and the variance
## of the series appears roughly constant over time.
## so d = 2 in ARIMA
par(mfrow = c(2, 1))
Acf(diff(skirts, differences = 2)) ## q ~ 0 considering 5% odds
Pacf(diff(skirts, differences = 2)) ## p ~ 1
## so it indicates a ARIMA(p = 1, d = 2, q = 0) model
skirts.arima <- auto.arima(skirts)
print(skirts.arima)
## gives ARIMA (1, 2, 0), which is a autoregression model
## use ARIMA to fit
plot.ts(skirts, col = 1)
lines(skirts - skirts.arima$residuals, col = 2)
## use ARIMA to predict
plot(forecast(skirts.arima, h = 10))
## check whether residuals are Normal with zero mean
## check whether residuals are Acf uncorrleated
par(mfrow = c(2, 1))
Acf(skirts.arima$residuals)
plot(density(skirts.arima$residuals))
abline(v = 0, col = 2)
## does look a very good fit - so better check the assumption
## for ARIMA model
Series: skirts ARIMA(1,2,0) Coefficients: ar1 -0.2997 s.e. 0.1424 sigma^2 estimated as 388.7: log likelihood=-193.66 AIC=391.33 AICc=391.62 BIC=394.9
%%R
## age of kings
par(mfrow = c(3, 1))
plot(kings)
plot(diff(kings, differences = 1))
plot(diff(kings, differences = 2))
## so d = 1
## estimate p and q by acf and pacf
par(mfrow = c(1, 1))
Acf(diff(kings), lag.max = 20) # q = 1
#print(Acf(diff(kings), lag.max = 20, plot = F))
Pacf(diff(kings), lag.max = 20) # p = 3
#print(Pacf(diff(kings), lag.max = 20, plot = F))
## so the esitmated ARIMA is either (p = 3, d = 1, q = 0)
## or (p = 0, d = 1, q = 1) or a mix (p = x, d = 1, q = x)
## so the simplest would be (p = 0, d = 1, q = 1)
kings.arima <- auto.arima(kings)
print(kings.arima) # (p = 0, d = 1, q = 1) an moving average model
## check the residuals
par(mfrow = c(2, 1))
Acf(kings.arima$residuals)
plot(density(kings.arima$residuals))
abline(v = 0, col = 2)
## looks good enough
## see the forecast on the future
plot(forecast(kings.arima, h = 5))
Series: kings ARIMA(0,1,1) with drift Coefficients: ma1 drift -0.7463 0.3882 s.e. 0.1294 0.6603 sigma^2 estimated as 233.9: log likelihood=-165.76 AIC=337.53 AICc=338.17 BIC=342.67