library(ForeCA) #amount of created components ForeCA_components <- 2 #table trainData has predictors; and target as last column #it will be split into 2 parts for training/validation 50%/50% # last column is target targetName <- tail(colnames(trainData),1) predictorColnames <- colnames(trainData)[-ncol(trainData)] #split into 2 parts for train/validation rowSampleTrain <- 1:round(nrow(trainData)/2) rowSampleValidate <- setdiff(1:nrow(trainData), rowSampleTrain) #series matrix MUST have a full rank!!!!!!!111 #remove predictors with eigenvalue ~ 0 #oh god this code is so bad and slow, ikr while(TRUE){ covMatrix <- cov(as.matrix(trainData[rowSampleTrain,predictorColnames])) covMatrixRank <- qr(covMatrix)$rank if(covMatrixRank == ncol(covMatrix)){ break }else{ eigenValues <- abs(eigen(covMatrix)$values) eigenValuesLow <- order(eigenValues, decreasing=FALSE)[1:(ncol(covMatrix)-covMatrixRank)] predictorColnames[eigenValuesLow] eigenValues[eigenValuesLow] predictorColnames <- predictorColnames[-eigenValuesLow] } } forecaObj <- foreca(series = as.matrix(trainData[rowSampleTrain,predictorColnames]), n.comp = ForeCA_components, plot = TRUE) summary(forecaObj) dev.new() plot(forecaObj) scoreMatrix <- (as.matrix(trainData[rowSampleTrain, predictorColnames]) - forecaObj$center) %*% as.matrix(forecaObj$loadings) #scoreMatrix <- forecaObj$scores #you can use this too, its already calculated in the model componentNames <- colnames(scoreMatrix) scoreMatrix <- as.data.frame(scoreMatrix) scoreMatrix[,targetName] <- trainData[rowSampleTrain, targetName] lmModel <- lm(paste(targetName, paste(componentNames,collapse=' + '),sep=' ~ '),data=scoreMatrix) predictResultsForTrain <- predict(object = lmModel, newdata=scoreMatrix[,-ncol(scoreMatrix)]) predictResultsForTrain <- round(predictResultsForTrain) scoreMatrixValidate <- (as.matrix(trainData[rowSampleValidate, predictorColnames]) - forecaObj$center) %*% as.matrix(forecaObj$loadings) scoreMatrixValidate <- as.data.frame(scoreMatrixValidate) predictResultsForValidation <- predict(object = lmModel, newdata=scoreMatrixValidate) predictResultsForValidation <- round(predictResultsForValidation) "Accuracy on train data" mean(predictResultsForTrain == trainData[rowSampleTrain, targetName]) "Accuracy on validation data" mean(predictResultsForValidation == trainData[rowSampleValidate, targetName])