file1 <- "file:///C:/dummy_set_features.csv" file2 <- "file:///C:/dummy_set_validation.csv" # Rattle is Copyright (c) 2006-2015 Togaware Pty Ltd. #============================================================ # Rattle timestamp: 2016-05-29 18:05:12 x86_64-w64-mingw32 # Rattle version 4.1.0 user 'root' # This log file captures all Rattle interactions as R commands. Export this log to a file using the Export button or the Tools # menu to save a log of all your activity. This facilitates repeatability. For example, exporting # to a file called 'myrf01.R' will allow you to type in the R Console # the command source('myrf01.R') and so repeat all actions automatically. # Generally, you will want to edit the file to suit your needs. You can also directly # edit this current log in place to record additional information before exporting. # Saving and loading projects also retains this log. # We begin by loading the required libraries. library(rattle) # To access the weather dataset and utility commands. library(magrittr) # For the %>% and %<>% operators. # This log generally records the process of building a model. However, with very # little effort the log can be used to score a new dataset. The logical variable # 'building' is used to toggle between generating transformations, as when building # a model, and simply using the transformations, as when scoring a dataset. building <- TRUE scoring <- ! building # A pre-defined value is used to reset the random seed so that results are repeatable. crv$seed <- 42 #============================================================ # Rattle timestamp: 2016-05-29 18:05:38 x86_64-w64-mingw32 # Load the data. crs$dataset <- read.csv(file1, na.strings=c(".", "NA", "", "?"), strip.white=TRUE, encoding="UTF-8") #============================================================ # Rattle timestamp: 2016-05-29 18:05:39 x86_64-w64-mingw32 # Note the user selections. # Build the training/validate/test datasets. set.seed(crv$seed) crs$nobs <- nrow(crs$dataset) # 7328 observations crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 5129 observations crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 1099 observations crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 1100 observations # The following variable selections have been noted. crs$input <- c("input_1", "input_2", "input_3", "input_4", "input_5", "input_6", "input_7", "input_8", "input_9", "input_10", "input_11", "input_12", "input_13", "input_14", "input_15", "input_16", "input_17", "input_18", "input_19", "input_20") crs$numeric <- c("input_1", "input_2", "input_3", "input_4", "input_5", "input_6", "input_7", "input_8", "input_9", "input_10", "input_11", "input_12", "input_13", "input_14", "input_15", "input_16", "input_17", "input_18", "input_19", "input_20") crs$categoric <- NULL crs$target <- "output" crs$risk <- NULL crs$ident <- NULL crs$ignore <- NULL crs$weights <- NULL #============================================================ # Rattle timestamp: 2016-05-29 18:06:07 x86_64-w64-mingw32 # Neural Network # Build a neural network model using the nnet package. library(nnet, quietly=TRUE) # Build the NNet model. set.seed(199) crs$nnet <- nnet(as.factor(output) ~ ., data=crs$dataset[crs$sample,c(crs$input, crs$target)], size=20, MaxNWts=10000, trace=FALSE, maxit=1000000) # Print the results of the modelling. cat(sprintf("A %s network with %d weights.\n", paste(crs$nnet$n, collapse="-"), length(crs$nnet$wts))) cat(sprintf("Inputs: %s.\n", paste(crs$nnet$coefnames, collapse=", "))) cat(sprintf("Output: %s.\n", names(attr(crs$nnet$terms, "dataClasses"))[1])) cat(sprintf("Sum of Squares Residuals: %.4f.\n", sum(residuals(crs$nnet) ^ 2))) cat("\n") print(summary(crs$nnet)) cat('\n') # Time taken: 1.71 secs #============================================================ # Rattle timestamp: 2016-05-29 18:06:17 x86_64-w64-mingw32 # Evaluate model performance. # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$sample, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$sample, c(crs$input, crs$target)]$output, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$sample, c(crs$input, crs$target)]$output, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2)) #============================================================ # Rattle timestamp: 2016-05-29 18:06:24 x86_64-w64-mingw32 # Evaluate model performance. # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$validate, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$validate, c(crs$input, crs$target)]$output, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$validate, c(crs$input, crs$target)]$output, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2)) #============================================================ # Rattle timestamp: 2016-05-29 18:06:28 x86_64-w64-mingw32 # Evaluate model performance. # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$test, c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$dataset[crs$test, c(crs$input, crs$target)]$output, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$dataset[crs$test, c(crs$input, crs$target)]$output, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2)) #============================================================ # Rattle timestamp: 2016-05-29 18:06:44 x86_64-w64-mingw32 # Evaluate model performance. # Read a dataset from file for testing the model. crs$testset <- read.csv(file2, na.strings=c(".", "NA", "", "?"), header=TRUE, sep=",", encoding="UTF-8", strip.white=TRUE) # Generate an Error Matrix for the Neural Net model. # Obtain the response from the Neural Net model. crs$pr <- predict(crs$nnet, newdata=crs$testset[,c(crs$input, crs$target)], type="class") # Generate the confusion matrix showing counts. table(crs$testset[,c(crs$input, crs$target)]$output, crs$pr, useNA="ifany", dnn=c("Actual", "Predicted")) # Generate the confusion matrix showing proportions. pcme <- function(actual, cl) { x <- table(actual, cl) nc <- nrow(x) # Number of classes. nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values. tbl <- cbind(x/nv, Error=sapply(1:nc, function(r) round(sum(x[r,-r])/sum(x[r,]), 2))) names(attr(tbl, "dimnames")) <- c("Actual", "Predicted") return(tbl) } per <- pcme(crs$testset[,c(crs$input, crs$target)]$output, crs$pr) round(per, 2) # Calculate the overall error percentage. cat(100*round(1-sum(diag(per), na.rm=TRUE), 2)) # Calculate the averaged class error percentage. cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2))