rm(list = ls()) # remove all previous objects from memory load("C:\\ALL_cod.RData") # load RData # Rattle is Copyright (c) 2006-2015 Togaware Pty Ltd. #============================================================ # Rattle timestamp: 2016-05-30 21:49:08 x86_64-w64-mingw32 # Rattle version 4.1.0 user 'root' # This log file captures all Rattle interactions as R commands. # Export this log to a file using the Export button or the Tools # menu to save a log of all your activity. This facilitates repeatability. For example, exporting # to a file called 'myrf01.R' will allow you to type in the R Console # the command source('myrf01.R') and so repeat all actions automatically. # Generally, you will want to edit the file to suit your needs. You can also directly # edit this current log in place to record additional information before exporting. # Saving and loading projects also retains this log. # We begin by loading the required libraries. library(rattle) # To access the weather dataset and utility commands. library(magrittr) # For the %>% and %<>% operators. # This log generally records the process of building a model. However, with very # little effort the log can be used to score a new dataset. The logical variable # 'building' is used to toggle between generating transformations, as when building # a model, and simply using the transformations, as when scoring a dataset. building <- TRUE scoring <- ! building # A pre-defined value is used to reset the random seed so that results are repeatable. crv$seed <- 42 #============================================================ # Rattle timestamp: 2016-05-30 21:49:15 x86_64-w64-mingw32 # Load an R data frame. crs$dataset <- Rat_DF1 # Display a simple summary (structure) of the dataset. str(crs$dataset) #============================================================ # Rattle timestamp: 2016-05-30 21:49:16 x86_64-w64-mingw32 # Note the user selections. # Build the training/validate/test datasets. set.seed(crv$seed) crs$nobs <- nrow(crs$dataset) # 4082 observations crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 2857 observations crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 612 observations crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 613 observations # The following variable selections have been noted. crs$input <- c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9", "X10", "X11", "X12", "X13", "X14", "X15", "X16", "X17", "X18", "X19", "X20", "X21", "X22", "X23", "X24", "X25", "X26", "X27") crs$numeric <- c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9", "X10", "X11", "X12", "X13", "X14", "X15", "X16", "X17", "X18", "X19", "X20", "X21", "X22", "X23", "X24", "X25", "X26", "X27") crs$categoric <- NULL crs$target <- "Short_Long.75" crs$risk <- NULL crs$ident <- NULL crs$ignore <- c("Base", "Short_Long.35", "Flet_Long", "Short_Flet", "Flet_In") crs$weights <- NULL #============================================================ # Rattle timestamp: 2016-05-30 21:49:21 x86_64-w64-mingw32 # Neural Network # Build a neural network model using the nnet package. library(nnet, quietly=TRUE) # Build the NNet model. set.seed(199) bestResultOnTrainDataset <- 1 bestResultOnTestDataset <- 1 bestResultOnValidationDataset <- 1 bestResult <- 1 bestResultIter <- 0 iter <- 0 while( iter-bestResultIter<100 ){ iter <- iter+1 cat("Train iteration ", iter, "; best result on iteration", bestResultIter, "with errors (train/validate/test)", bestResultOnTrainDataset, "/", bestResultOnValidationDataset, "/", bestResultOnTestDataset, "\n") if(iter == 1){ crs$nnet <- nnet(as.factor(Short_Long.75) ~ ., data=crs$dataset[crs$sample,c(crs$input, crs$target)], size=200, MaxNWts=10000, trace=FALSE, maxit=1) } else{ crs$nnet <- nnet(as.factor(Short_Long.75) ~ ., data=crs$dataset[crs$sample,c(crs$input, crs$target)], size=200, MaxNWts=10000, trace=FALSE, maxit=1, Wts=crs$nnet$wts) } #calculate error on train dataset crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$sample, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$sample, c(crs$input, crs$target)]$Short_Long.75, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$sample, c(crs$input, crs$target)]$Short_Long.75, crs$pr) round(per, 2) # Calculate the overall error percentage. resultOnTrainDataset = 1-sum(diag(per), na.rm=TRUE) #calculate error on validation dataset crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$validate, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$validate, c(crs$input, crs$target)]$Short_Long.75, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$validate, c(crs$input, crs$target)]$Short_Long.75, crs$pr) round(per, 2) # Calculate the overall error percentage. resultOnValidationDataset = 1-sum(diag(per), na.rm=TRUE) #calculate error on test dataset crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$test, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$test, c(crs$input, crs$target)]$Short_Long.75, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$test, c(crs$input, crs$target)]$Short_Long.75, crs$pr) round(per, 2) # Calculate the overall error percentage. resultOnTestDataset = 1-sum(diag(per), na.rm=TRUE) if(resultOnValidationDataset < bestResultOnValidationDataset && resultOnTestDataset < bestResultOnTestDataset){ bestResultOnTrainDataset <- resultOnTrainDataset bestResultOnValidationDataset <- resultOnValidationDataset bestResultOnTestDataset <- resultOnTestDataset bestResultIter <- iter bestNet <- crs$nnet } } crs$nnet <- bestNet # Print the results of the modelling. cat(sprintf("A %s network with %d weights.\n", paste(crs$nnet$n, collapse="-"), length(crs$nnet$wts))) cat(sprintf("Inputs: %s.\n", paste(crs$nnet$coefnames, collapse=", "))) cat(sprintf("Output: %s.\n", names(attr(crs$nnet$terms, "dataClasses"))[1])) cat(sprintf("Sum of Squares Residuals: %.4f.\n", sum(residuals(crs$nnet) ^ 2))) cat("\n") print(summary(crs$nnet)) cat('\n') # Time taken: 1.68 secs #============================================================ # Rattle timestamp: 2016-05-30 21:49:26 x86_64-w64-mingw32 # Evaluate model performance. # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$sample, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$sample, c(crs$input, crs$target)]$Short_Long.75, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$sample, c(crs$input, crs$target)]$Short_Long.75, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) trainDatasetError <- 1-sum(diag(per), na.rm=TRUE) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2)) #============================================================ # Rattle timestamp: 2016-05-30 21:49:32 x86_64-w64-mingw32 # Evaluate model performance. # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$validate, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$validate, c(crs$input, crs$target)]$Short_Long.75, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$validate, c(crs$input, crs$target)]$Short_Long.75, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) validateDatasetError <- 1-sum(diag(per), na.rm=TRUE) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2)) #============================================================ # Rattle timestamp: 2016-05-30 21:49:34 x86_64-w64-mingw32 # Evaluate model performance. # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$test, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$test, c(crs$input, crs$target)]$Short_Long.75, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$test, c(crs$input, crs$target)]$Short_Long.75, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) testDatasetError <- 1-sum(diag(per), na.rm=TRUE) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2)) #============================================================ # Rattle timestamp: 2016-05-30 21:49:39 x86_64-w64-mingw32 # Evaluate model performance. # Assign the R dataset to be used as the test set. crs$testset <- Rat_DF2 # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$testset, type="class") # Generate the confusion matrix showing counts. table(crs$testset$Short_Long.75, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$testset$Short_Long.75, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) Rat_DF2_DatasetError <- 1-sum(diag(per), na.rm=TRUE) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2)) #============================================================ # Rattle timestamp: 2016-05-30 21:49:39 x86_64-w64-mingw32 # Evaluate model performance. # Assign the R dataset to be used as the test set. crs$testset <- Rat_DF3 # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$testset, type="class") # Generate the confusion matrix showing counts. table(crs$testset$Short_Long.75, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$testset$Short_Long.75, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) Rat_DF3_DatasetError <- 1-sum(diag(per), na.rm=TRUE) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2)) trainDatasetError validateDatasetError testDatasetError Rat_DF2_DatasetError Rat_DF3_DatasetError