bmweiner · August 16, 2017 22:20
diff --git a/ts.r b/ts.r
 # Time Series Forecasting in R

 ## Example Datasets
 # R Datasets package (e.g. AirPassengers, austres) 
 # https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html

 # Census Time Series
 # https://www.census.gov/econ/currentdata/dbsearch

 # Data used for this demo:
 # https://www.census.gov/econ/currentdata/dbsearch?program=MRTS&startYear=1992&endYear=2017&categories=441&dataType=SM&geoLevel=US&notAdjusted=1&submit=GET+DATA&releaseScheduleId=

 ## References
 # https://people.duke.edu/~rnau/411home.htm
 # http://pkg.robjhyndman.com/forecast/reference/index.html
 # http://www.stat.pitt.edu/stoffer/tsa4/

 ## Other Usefule packages
 # library(lubridate)
 # library(seasonal)
 # library(factoextra)


 library(forecast)
 library(tseries)

 # Read and explore data
 setwd(dir="C:\\Users\\bweiner\\Desktop\\example")
 df <- read.csv("auto_m.csv", skip = 6, nrows = 300, stringsAsFactors = FALSE)

 summary(df)
 plot(df$Value, xlab="Month ", ylab="Motor Vehicle and Parts Dealers (1M USD)", type='l')

 # Roll-Up / Agglomerate if needed
 # data is monthly

 # Convert to Time Series class
 y <- ts(df$Value, start=1992, frequency=12)  # 25 years, 300 observations 1992 - 2016
 plot(y)

 # Removing outliers
 tsoutliers(y)
 tsclean(y)

 # Explore Time Series
 tsdisplay(y)

 # Check if series is autocorrelated (serial correlation exists) 
 Box.test(y)  # h0: y is independent, e.g. rejected p < 0.05

 # check stationarity
 kpss.test(y)  #h0: y is stationary
 # adf.test(y)

 # Decompose Time Series
 plot(stl(y, 'periodic'))

 # Transforming the series
 # BoxCox(y, BoxCox.lambda(y))


 # Fit Model
 fit <- auto.arima(y)  # takes care of transform, picks model with lowest AIC
 # fit <- ets(y)
 # fit <- nnetar(y)

 fit

 # Forecast with model
 fcast <- forecast(fit, h=12)
 # fcast <- stlm(y)
 # fcast <- meanf(y)

 plot(fcast)
 #lines(residuals(fcast), col="brown")

 residuals(fcast)

 # View Forecast accuracy
 accuracy(fc)

 # validate residuals
 tsdisplay(fit$residuals)
 mean(fit$residuals, na.rm = 1)
 Box.test(fit$residuals)
 qqnorm(residuals(fit))
 qqline(residuals(fit))


 # split time series - test / train
 split.ts <- function(x, test.size = 0.25){
  if (test.size > 0 & test.size < 1){
    train.obs = round(length(time(x)) * (1 - test.size))
  } else {
    train.obs = length(time(x)) - test.size
  }
  train <- window(x, end=tsp(x)[1] + (train.obs - 1) / tsp(x)[3])
  test <- window(x, start=tsp(x)[1] + train.obs / tsp(x)[3])
  return(list(train=train, test=test))
 }

 z <- split.ts(y, 12)

 fit <- auto.arima(z$train)
 fcast <- forecast(fit, 12)
 accuracy(fcast, x = z$test)
 lines(z$test)
	# Time Series Forecasting in R

	## Example Datasets
	# R Datasets package (e.g. AirPassengers, austres)
	# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html

	# Census Time Series
	# https://www.census.gov/econ/currentdata/dbsearch

	# Data used for this demo:
	# https://www.census.gov/econ/currentdata/dbsearch?program=MRTS&startYear=1992&endYear=2017&categories=441&dataType=SM&geoLevel=US&notAdjusted=1&submit=GET+DATA&releaseScheduleId=

	## References
	# https://people.duke.edu/~rnau/411home.htm
	# http://pkg.robjhyndman.com/forecast/reference/index.html
	# http://www.stat.pitt.edu/stoffer/tsa4/

	## Other Usefule packages
	# library(lubridate)
	# library(seasonal)
	# library(factoextra)


	library(forecast)
	library(tseries)

	# Read and explore data
	setwd(dir="C:\\Users\\bweiner\\Desktop\\example")
	df <- read.csv("auto_m.csv", skip = 6, nrows = 300, stringsAsFactors = FALSE)

	summary(df)
	plot(df$Value, xlab="Month ", ylab="Motor Vehicle and Parts Dealers (1M USD)", type='l')

	# Roll-Up / Agglomerate if needed
	# data is monthly

	# Convert to Time Series class
	y <- ts(df$Value, start=1992, frequency=12) # 25 years, 300 observations 1992 - 2016
	plot(y)

	# Removing outliers
	tsoutliers(y)
	tsclean(y)

	# Explore Time Series
	tsdisplay(y)

	# Check if series is autocorrelated (serial correlation exists)
	Box.test(y) # h0: y is independent, e.g. rejected p < 0.05

	# check stationarity
	kpss.test(y) #h0: y is stationary
	# adf.test(y)

	# Decompose Time Series
	plot(stl(y, 'periodic'))

	# Transforming the series
	# BoxCox(y, BoxCox.lambda(y))


	# Fit Model
	fit <- auto.arima(y) # takes care of transform, picks model with lowest AIC
	# fit <- ets(y)
	# fit <- nnetar(y)

	fit

	# Forecast with model
	fcast <- forecast(fit, h=12)
	# fcast <- stlm(y)
	# fcast <- meanf(y)

	plot(fcast)
	#lines(residuals(fcast), col="brown")

	residuals(fcast)

	# View Forecast accuracy
	accuracy(fc)

	# validate residuals
	tsdisplay(fit$residuals)
	mean(fit$residuals, na.rm = 1)
	Box.test(fit$residuals)
	qqnorm(residuals(fit))
	qqline(residuals(fit))


	# split time series - test / train
	split.ts <- function(x, test.size = 0.25){
	if (test.size > 0 & test.size < 1){
	train.obs = round(length(time(x)) * (1 - test.size))
	} else {
	train.obs = length(time(x)) - test.size
	}
	train <- window(x, end=tsp(x)[1] + (train.obs - 1) / tsp(x)[3])
	test <- window(x, start=tsp(x)[1] + train.obs / tsp(x)[3])
	return(list(train=train, test=test))
	}

	z <- split.ts(y, 12)

	fit <- auto.arima(z$train)
	fcast <- forecast(fit, 12)
	accuracy(fcast, x = z$test)
	lines(z$test)