#table type should be matrix instead of data.frame for ForeCA calculations #trainData <- as.matrix(read.csv("D:\\trainData.csv", header=TRUE, sep=";", na.strings=c(".", "NA", "", "?"), strip.white=TRUE, encoding="UTF-8", check.names=TRUE)) #trainDataFT <- as.matrix(read.csv("D:\\fronttestData.csv", header=TRUE, sep=";", na.strings=c(".", "NA", "", "?"), strip.white=TRUE, encoding="UTF-8", check.names=TRUE)) load("D:\\trainData.RData") library(ForeCA) RegressionToClasses01 <- function(regr){ regr <- round(regr) regr[regr<0] <- 0 regr[regr>1] <- 1 return(regr) } if(nrow(trainData) <= ncol(trainData)-1){ stop("Number of rows should be more than number of predictors") } # last column is target (numerical, !!!not factor!!! because linear model in this example is suited for regression) targetName <- colnames(trainData)[ncol(trainData)] predictorColnames <- colnames(trainData)[-ncol(trainData)] trainDataWhitenedObj <- whiten(trainData[,predictorColnames]) trainDataColMeans <- colMeans(trainData[,predictorColnames]) trainDataWhitened <- sweep(trainData[,predictorColnames], 2, trainDataColMeans, FUN = "-") %*% trainDataWhitenedObj$whitening #trainDataWhitened <- trainDataWhitenedObj$U #you can use this too, its already pre-calculated by the model #amount of created ForeCA components ForeCA_components <- length(predictorColnames) forecaObj <- foreca(series = trainDataWhitened, n.comp = ForeCA_components, plot = TRUE) summary(forecaObj) plot(forecaObj) ######################### #Train ForeCA + lm model# ######################### scoreMatrix <- (trainDataWhitened - forecaObj$center) %*% forecaObj$loadings #scoreMatrix <- forecaObj$scores #you can use this too, its already pre-calculated by the model componentNames <- colnames(scoreMatrix) scoreMatrix <- as.data.frame(scoreMatrix) scoreMatrix[,targetName] <- trainData[, targetName] lmModel <- lm(paste(targetName, paste(componentNames,collapse=' + '),sep=' ~ '),data=scoreMatrix) predictResultsForTrain <- predict(object = lmModel, newdata=scoreMatrix[,-ncol(scoreMatrix)]) predictResultsForTrain <- RegressionToClasses01(predictResultsForTrain) fronttestDataWhitened <- sweep(trainDataFT[,predictorColnames], 2, trainDataColMeans, FUN = "-") %*% trainDataWhitenedObj$whitening scoreMatrixFronttest <- (trainDataFT[, predictorColnames] - forecaObj$center) %*% forecaObj$loadings scoreMatrixFronttest <- as.data.frame(scoreMatrixFronttest) predictResultsForFronttest <- predict(object = lmModel, newdata=scoreMatrixFronttest) predictResultsForFronttest <- RegressionToClasses01(predictResultsForFronttest) "Accuracy on train data using ForeCA + lm" mean(predictResultsForTrain == trainData[, targetName]) "Accuracy on fronttest data using ForeCA + lm" mean(predictResultsForFronttest == trainDataFT[, targetName]) ################ #Train lm model# ################ rm(lmModel, predictResultsForTrain, predictResultsForFronttest, fronttestDataWhitened, scoreMatrixFronttest) lmModel <- lm(paste(targetName, paste(predictorColnames,collapse=' + '),sep=' ~ '),data=data.frame(trainData)) predictResultsForTrain <- predict(object = lmModel, newdata=data.frame(trainData[,predictorColnames])) predictResultsForTrain <- RegressionToClasses01(predictResultsForTrain) predictResultsForFronttest <- predict(object = lmModel, newdata=data.frame(trainDataFT[,predictorColnames])) predictResultsForFronttest <- RegressionToClasses01(predictResultsForFronttest) "Accuracy on train data using only lm" mean(predictResultsForTrain == trainData[, targetName]) "Accuracy on fronttest data using only lm" mean(predictResultsForFronttest == trainDataFT[, targetName]) ################################################################################ # Train ForeCA + lm model, compare results with different number of components # # # # it is possible to generate all components, # # and take less columns from scores and loading matrices # # (number of columns=number of required components). # # Or just execute foreca() again with different component count in n.comp # ################################################################################ rm(lmModel, predictResultsForTrain, predictResultsForFronttest) dev.new() trainingAccuracy <- c() fronttestAccuracy <- c() for(i in 1:length(predictorColnames)){ if(i==1){ #Linear model does not work with one predictor trainingAccuracy <- c(trainingAccuracy, 0) fronttestAccuracy <- c(fronttestAccuracy, 0) next } scoreMatrix <- (trainDataWhitened - forecaObj$center) %*% (forecaObj$loadings[,1:i]) colnames(scoreMatrix) <- colnames(forecaObj$loadings)[1:i] componentNames <- colnames(scoreMatrix) scoreMatrix <- as.data.frame(scoreMatrix) scoreMatrix[,targetName] <- trainData[, targetName] lmModel <- lm(paste(targetName, paste(componentNames,collapse=' + '),sep=' ~ '),data=scoreMatrix) predictResultsForTrain <- predict(object = lmModel, newdata=scoreMatrix[,-ncol(scoreMatrix)]) predictResultsForTrain <- RegressionToClasses01(predictResultsForTrain) fronttestDataWhitened <- sweep(trainDataFT[,predictorColnames], 2, trainDataColMeans, FUN = "-") %*% trainDataWhitenedObj$whitening scoreMatrixFronttest <- (trainDataFT[, predictorColnames] - forecaObj$center) %*% forecaObj$loadings[,1:i] colnames(scoreMatrixFronttest) <- colnames(forecaObj$loadings)[1:i] scoreMatrixFronttest <- as.data.frame(scoreMatrixFronttest) predictResultsForFronttest <- predict(object = lmModel, newdata=scoreMatrixFronttest) predictResultsForFronttest <- RegressionToClasses01(predictResultsForFronttest) trainingAccuracy <- c(trainingAccuracy, mean(predictResultsForTrain == trainData[, targetName])) fronttestAccuracy <- c(fronttestAccuracy, mean(predictResultsForFronttest == trainDataFT[, targetName])) plot(trainingAccuracy, type="l", ylab="Accuracy", xlab="Number of components", main="Accuracy of prediction based on components count", col="blue", ylim=c(0,1)) lines(fronttestAccuracy, type="l", col="red") legend(x="bottomright", legend=c("training","fronttest"), fill=c("blue","red")) grid() }