-
-
Save bmweiner/d66e0b19ef5d97aa35d1da5cabadeb6a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Time Series Forecasting in R | |
## Example Datasets | |
# R Datasets package (e.g. AirPassengers, austres) | |
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html | |
# Census Time Series | |
# https://www.census.gov/econ/currentdata/dbsearch | |
# Data used for this demo: | |
# https://www.census.gov/econ/currentdata/dbsearch?program=MRTS&startYear=1992&endYear=2017&categories=441&dataType=SM&geoLevel=US¬Adjusted=1&submit=GET+DATA&releaseScheduleId= | |
## References | |
# https://people.duke.edu/~rnau/411home.htm | |
# http://pkg.robjhyndman.com/forecast/reference/index.html | |
# http://www.stat.pitt.edu/stoffer/tsa4/ | |
## Other Usefule packages | |
# library(lubridate) | |
# library(seasonal) | |
# library(factoextra) | |
library(forecast) | |
library(tseries) | |
# Read and explore data | |
setwd(dir="C:\\Users\\bweiner\\Desktop\\example") | |
df <- read.csv("auto_m.csv", skip = 6, nrows = 300, stringsAsFactors = FALSE) | |
summary(df) | |
plot(df$Value, xlab="Month ", ylab="Motor Vehicle and Parts Dealers (1M USD)", type='l') | |
# Roll-Up / Agglomerate if needed | |
# data is monthly | |
# Convert to Time Series class | |
y <- ts(df$Value, start=1992, frequency=12) # 25 years, 300 observations 1992 - 2016 | |
plot(y) | |
# Removing outliers | |
tsoutliers(y) | |
tsclean(y) | |
# Explore Time Series | |
tsdisplay(y) | |
# Check if series is autocorrelated (serial correlation exists) | |
Box.test(y) # h0: y is independent, e.g. rejected p < 0.05 | |
# check stationarity | |
kpss.test(y) #h0: y is stationary | |
# adf.test(y) | |
# Decompose Time Series | |
plot(stl(y, 'periodic')) | |
# Transforming the series | |
# BoxCox(y, BoxCox.lambda(y)) | |
# Fit Model | |
fit <- auto.arima(y) # takes care of transform, picks model with lowest AIC | |
# fit <- ets(y) | |
# fit <- nnetar(y) | |
fit | |
# Forecast with model | |
fcast <- forecast(fit, h=12) | |
# fcast <- stlm(y) | |
# fcast <- meanf(y) | |
plot(fcast) | |
#lines(residuals(fcast), col="brown") | |
residuals(fcast) | |
# View Forecast accuracy | |
accuracy(fc) | |
# validate residuals | |
tsdisplay(fit$residuals) | |
mean(fit$residuals, na.rm = 1) | |
Box.test(fit$residuals) | |
qqnorm(residuals(fit)) | |
qqline(residuals(fit)) | |
# split time series - test / train | |
split.ts <- function(x, test.size = 0.25){ | |
if (test.size > 0 & test.size < 1){ | |
train.obs = round(length(time(x)) * (1 - test.size)) | |
} else { | |
train.obs = length(time(x)) - test.size | |
} | |
train <- window(x, end=tsp(x)[1] + (train.obs - 1) / tsp(x)[3]) | |
test <- window(x, start=tsp(x)[1] + train.obs / tsp(x)[3]) | |
return(list(train=train, test=test)) | |
} | |
z <- split.ts(y, 12) | |
fit <- auto.arima(z$train) | |
fcast <- forecast(fit, 12) | |
accuracy(fcast, x = z$test) | |
lines(z$test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment