-
Notifications
You must be signed in to change notification settings - Fork 8
empirical_results
Rebecca Pontes Salles edited this page Mar 25, 2021
·
4 revisions
This page provides code for obtaining some empirical results of TSPred regarding the task of predicting the fifth gap of missing observations of the CATS dataset.
Different approaches were adopted combining data preprocessing and data modeling, either by the machine learning model SVM or by the deep learning model CNN. The time series in CATS are mostly nonstationary, thus a data preprocessing step also adopted based on the application of the splitting-based nonstationary time series transform EMD. The linear ARIMA model is selected as baseline, and the results from each approach are ranked based on MSE prediction errors.
library(TSPred)
#Installing the required tensorflow version
if(tensorflow::tf_version() < "2.0") tensorflow::install_tensorflow(version="2.0.0")
data(CATS,CATS.cont)
data <- rbind(CATS[5],CATS.cont[5])
test_len <- nrow(CATS.cont[5])
data_subsetting <- subsetting(test_len=test_len)
mapping_based <- list(BCT=BoxCoxT(lambda=NULL))
splitting_based <- list(EMD=EMD(),
WT=WT())
normalization <- list(MM=MinMax(),
AN=AN())
sliding_window <- SW(window_len=5)
ARIMA_model <- ARIMA()
CNN_model <- Tensor_CNN(sw=sliding_window,proc=list(norm=normalization[["MM"]]))
SVM_model <- SVM(sw=sliding_window,proc=list(norm=normalization[["MM"]]))
mse_eval <- MSE_eval()
tspred_arima <- tspred(subsetting=data_subsetting,
modeling=ARIMA_model,
evaluating=list(MSE=mse_eval))
#Baseline
tspred_arima_run <- workflow(tspred_arima,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
tspred_cnn <- tspred(subsetting=data_subsetting,
processing=NULL,
modeling=CNN_model,
evaluating=list(MSE=mse_eval))
tspred_cnn_run <- workflow(tspred_cnn,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
tspred_cnn_proc <- tspred(subsetting=data_subsetting,
processing=list(EMD=splitting_based[["EMD"]]),
modeling=CNN_model,
evaluating=list(MSE=mse_eval))
tspred_cnn_proc_run <- workflow(tspred_cnn_proc,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
tspred_svm <- tspred(subsetting=data_subsetting,
processing=NULL,
modeling=SVM_model,
evaluating=list(MSE=mse_eval))
tspred_svm_run <- workflow(tspred_svm,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
#=========== SVM+EMD (beat the baseline) =============
tspred_svm_proc <- tspred(subsetting=data_subsetting,
processing=list(EMD=splitting_based[["EMD"]]),
modeling=SVM_model,
evaluating=list(MSE=mse_eval))
tspred_svm_proc_run <- workflow(tspred_svm_proc,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
tspred_svm_proc_2 <- tspred(subsetting=data_subsetting,
processing=list(BCT=mapping_based[["BCT"]],
EMD=splitting_based[["EMD"]]),
modeling=SVM_model,
evaluating=list(MSE=mse_eval))
tspred_svm_proc_2_run <- workflow(tspred_svm_proc_2,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
MLM_tests <- list(tspred_cnn_run,tspred_cnn_proc_run,tspred_svm_run,tspred_svm_proc_run)
benchmarking <- benchmark(tspred_arima_run,MLM_tests,rank.by=c("MSE"))
- TSPred prediction results are competitive when comparing the errors produced by CATS competitors.
- The demand for the adoption of a suitable baseline model is noticeable.
- The CNN and SVM could not outperform the baseline.
- Introducing nonstationarity treatment resulted in smaller prediction errors.
predictions <- data.frame()
models <- stringr::str_remove(names(benchmarking$ranked_tspred_objs), "MinMax-")
models <- stringr::str_remove(models, "Tensor_")
models <- stringr::str_replace(models, "-","+")
for(m in 1:length(benchmarking$ranked_tspred_objs)){
model <- names(benchmarking$ranked_tspred_obj)[m]
obj <- benchmarking$ranked_tspred_objs[[model]]
if(!is.null(obj$pred$postp)) pred <- obj$pred$postp[[1]]
else if(!is.null(obj$pred$raw)) pred <- obj$pred$raw[[1]]
predictions <- rbind(predictions,cbind(time=981:1000,pred=pred,model=models[m]))
}
predictions$time <- as.numeric(predictions$time)
predictions$pred <- as.numeric(predictions$pred)
predictions$model <- factor(predictions$model,levels = models)
library(ggplot2)
library(RColorBrewer)
colors <- brewer.pal(5,"Set1")
ggplot(predictions[predictions$model %in% c("ARIMA","SVM","EMD+SVM"),], aes(x = time, y = pred, col=model)) +
geom_line(data = CATS.cont[5], aes(x = 981:1000, y = V5), size=1.1, col="gray45", linetype = "dashed") +
geom_line(size=1) +
geom_point(size=2) +
labs(x ="Observation", y = "", col="Model", shape="Model") +
scale_colour_manual(values=colors[1:3])+
theme_bw()
ggplot(predictions[predictions$model %in% c("ARIMA","CNN","EMD+CNN"),], aes(x = time, y = pred, col=model)) +
geom_line(data = CATS.cont[5], aes(x = 981:1000, y = V5), size=1.1, col="gray45", linetype = "dashed") +
geom_line(size=1) +
geom_point(size=2) +
labs(x ="Observation", y = "", col="Model") +
scale_colour_manual(values=colors[c(2,4:5)])+
theme_bw()