rm(list = ls()) # remove all previous objects from memory
load("C:\\ALL_cod.RData") # load RData


# Rattle is Copyright (c) 2006-2015 Togaware Pty Ltd.

#============================================================
# Rattle timestamp: 2016-05-30 21:49:08 x86_64-w64-mingw32 

# Rattle version 4.1.0 user 'root'

# This log file captures all Rattle interactions as R commands. 

# Export this log to a file using the Export button or the Tools 
# menu to save a log of all your activity. This facilitates repeatability. For example, exporting 
# to a file called 'myrf01.R' will allow you to type in the R Console 
# the command source('myrf01.R') and so repeat all actions automatically. 
# Generally, you will want to edit the file to suit your needs. You can also directly 
# edit this current log in place to record additional information before exporting. 
 
# Saving and loading projects also retains this log.

# We begin by loading the required libraries.

library(rattle)   # To access the weather dataset and utility commands.
library(magrittr) # For the %>% and %<>% operators.

# This log generally records the process of building a model. However, with very 
# little effort the log can be used to score a new dataset. The logical variable 
# 'building' is used to toggle between generating transformations, as when building 
# a model, and simply using the transformations, as when scoring a dataset.

building <- TRUE
scoring  <- ! building


# A pre-defined value is used to reset the random seed so that results are repeatable.

crv$seed <- 42 

#============================================================
# Rattle timestamp: 2016-05-30 21:49:15 x86_64-w64-mingw32 

# Load an R data frame.

crs$dataset <- Rat_DF1

# Display a simple summary (structure) of the dataset.

str(crs$dataset)

#============================================================
# Rattle timestamp: 2016-05-30 21:49:16 x86_64-w64-mingw32 

# Note the user selections. 

# Build the training/validate/test datasets.

set.seed(crv$seed) 
crs$nobs <- nrow(crs$dataset) # 4082 observations 
crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 2857 observations
crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 612 observations
crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 613 observations

# The following variable selections have been noted.

crs$input <- c("X1", "X2", "X3", "X4",
     "X5", "X6", "X7", "X8",
     "X9", "X10", "X11", "X12",
     "X13", "X14", "X15", "X16",
     "X17", "X18", "X19", "X20",
     "X21", "X22", "X23", "X24",
     "X25", "X26", "X27")

crs$numeric <- c("X1", "X2", "X3", "X4",
     "X5", "X6", "X7", "X8",
     "X9", "X10", "X11", "X12",
     "X13", "X14", "X15", "X16",
     "X17", "X18", "X19", "X20",
     "X21", "X22", "X23", "X24",
     "X25", "X26", "X27")
	 
	 
	 
	 
	 
	 
crs$categoric <- NULL

crs$target  <- "Short_Long.75"
crs$risk    <- NULL
crs$ident   <- NULL
crs$ignore  <- c("Base", "Short_Long.35", "Flet_Long", "Short_Flet", "Flet_In")
crs$weights <- NULL

#============================================================
# Rattle timestamp: 2016-05-30 21:49:21 x86_64-w64-mingw32 

# Neural Network 

# Build a neural network model using the nnet package.

library(nnet, quietly=TRUE)

# Build the NNet model.

set.seed(199)

bestResultOnTrainDataset <- 1
bestResultOnTestDataset <- 1
bestResultOnValidationDataset <- 1
bestResult <- 1
bestResultIter <- 0
iter <- 0

while( iter-bestResultIter<100 ){
	iter <- iter+1
	cat("Train iteration ", iter, "; best result on iteration", bestResultIter, "with errors (train/validate/test)", bestResultOnTrainDataset, "/", bestResultOnValidationDataset, "/", bestResultOnTestDataset, "\n")
	if(iter == 1){
		crs$nnet <- nnet(as.factor(Short_Long.75) ~ .,
			data=crs$dataset[crs$sample,c(crs$input, crs$target)],
			size=200, MaxNWts=10000, trace=FALSE, maxit=1)
	}
	else{
		crs$nnet <- nnet(as.factor(Short_Long.75) ~ .,
			data=crs$dataset[crs$sample,c(crs$input, crs$target)],
			size=200, MaxNWts=10000, trace=FALSE, maxit=1, Wts=crs$nnet$wts)
	}
	
	
	
	#calculate error on train dataset
	
	crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$sample, c(crs$input, crs$target)], type="class")

	# Generate the confusion matrix showing counts.

	table(crs$dataset[crs$sample, c(crs$input, crs$target)]$Short_Long.75, crs$pr,
			useNA="ifany",
			dnn=c("Actual", "Predicted"))

	# Generate the confusion matrix showing proportions.

	pcme <- function(actual, cl)
	{
	  x <- table(actual, cl)
	  nc <- nrow(x) # Number of classes.
	  nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values.
	  tbl <- cbind(x/nv,
				   Error=sapply(1:nc,
					 function(r) round(sum(x[r,-r])/sum(x[r,]), 2)))
	  names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
	  return(tbl)
	}
	per <- pcme(crs$dataset[crs$sample, c(crs$input, crs$target)]$Short_Long.75, crs$pr)
	round(per, 2)

	# Calculate the overall error percentage.

	resultOnTrainDataset = 1-sum(diag(per), na.rm=TRUE)
	
	
	
	
	
	#calculate error on validation dataset
	
	crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$validate, c(crs$input, crs$target)], type="class")
	# Generate the confusion matrix showing counts.
	table(crs$dataset[crs$validate, c(crs$input, crs$target)]$Short_Long.75, crs$pr,
			useNA="ifany",
			dnn=c("Actual", "Predicted"))
	# Generate the confusion matrix showing proportions.
	pcme <- function(actual, cl)
	{
	  x <- table(actual, cl)
	  nc <- nrow(x) # Number of classes.
	  nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values.
	  tbl <- cbind(x/nv,
				   Error=sapply(1:nc,
					 function(r) round(sum(x[r,-r])/sum(x[r,]), 2)))
	  names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
	  return(tbl)
	}
	per <- pcme(crs$dataset[crs$validate, c(crs$input, crs$target)]$Short_Long.75, crs$pr)
	round(per, 2)
	# Calculate the overall error percentage.
	resultOnValidationDataset = 1-sum(diag(per), na.rm=TRUE)

	
	
	
	#calculate error on test dataset
	
	crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$test, c(crs$input, crs$target)], type="class")
	# Generate the confusion matrix showing counts.
	table(crs$dataset[crs$test, c(crs$input, crs$target)]$Short_Long.75, crs$pr,
			useNA="ifany",
			dnn=c("Actual", "Predicted"))
	# Generate the confusion matrix showing proportions.
	pcme <- function(actual, cl)
	{
	  x <- table(actual, cl)
	  nc <- nrow(x) # Number of classes.
	  nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values.
	  tbl <- cbind(x/nv,
				   Error=sapply(1:nc,
					 function(r) round(sum(x[r,-r])/sum(x[r,]), 2)))
	  names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
	  return(tbl)
	}
	per <- pcme(crs$dataset[crs$test, c(crs$input, crs$target)]$Short_Long.75, crs$pr)
	round(per, 2)
	# Calculate the overall error percentage.
	resultOnTestDataset = 1-sum(diag(per), na.rm=TRUE)
	
	
	if(resultOnValidationDataset < bestResultOnValidationDataset && resultOnTestDataset < bestResultOnTestDataset){
		bestResultOnTrainDataset <- resultOnTrainDataset
		bestResultOnValidationDataset <- resultOnValidationDataset
		bestResultOnTestDataset <- resultOnTestDataset
		bestResultIter <- iter
		bestNet <- crs$nnet
	}
	
}

crs$nnet <- bestNet

# Print the results of the modelling.

cat(sprintf("A %s network with %d weights.\n",
    paste(crs$nnet$n, collapse="-"),
    length(crs$nnet$wts)))
cat(sprintf("Inputs: %s.\n",
    paste(crs$nnet$coefnames, collapse=", ")))
cat(sprintf("Output: %s.\n",
    names(attr(crs$nnet$terms, "dataClasses"))[1]))
cat(sprintf("Sum of Squares Residuals: %.4f.\n",
    sum(residuals(crs$nnet) ^ 2)))
cat("\n")
print(summary(crs$nnet))
cat('\n')

# Time taken: 1.68 secs

#============================================================
# Rattle timestamp: 2016-05-30 21:49:26 x86_64-w64-mingw32 

# Evaluate model performance. 

# Generate an Error Matrix for the Neural Net model.

# Obtain the response from the Neural Net model.

crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$sample, c(crs$input, crs$target)], type="class")

# Generate the confusion matrix showing counts.

table(crs$dataset[crs$sample, c(crs$input, crs$target)]$Short_Long.75, crs$pr,
        useNA="ifany",
        dnn=c("Actual", "Predicted"))

# Generate the confusion matrix showing proportions.

pcme <- function(actual, cl)
{
  x <- table(actual, cl)
  nc <- nrow(x) # Number of classes.
  nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values.
  tbl <- cbind(x/nv,
               Error=sapply(1:nc,
                 function(r) round(sum(x[r,-r])/sum(x[r,]), 2)))
  names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
  return(tbl)
}
per <- pcme(crs$dataset[crs$sample, c(crs$input, crs$target)]$Short_Long.75, crs$pr)
round(per, 2)

# Calculate the overall error percentage.

cat(100*round(1-sum(diag(per), na.rm=TRUE), 2))
trainDatasetError <- 1-sum(diag(per), na.rm=TRUE)

# Calculate the averaged class error percentage.

cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2))

#============================================================
# Rattle timestamp: 2016-05-30 21:49:32 x86_64-w64-mingw32 

# Evaluate model performance. 

# Generate an Error Matrix for the Neural Net model.

# Obtain the response from the Neural Net model.

crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$validate, c(crs$input, crs$target)], type="class")

# Generate the confusion matrix showing counts.

table(crs$dataset[crs$validate, c(crs$input, crs$target)]$Short_Long.75, crs$pr,
        useNA="ifany",
        dnn=c("Actual", "Predicted"))

# Generate the confusion matrix showing proportions.

pcme <- function(actual, cl)
{
  x <- table(actual, cl)
  nc <- nrow(x) # Number of classes.
  nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values.
  tbl <- cbind(x/nv,
               Error=sapply(1:nc,
                 function(r) round(sum(x[r,-r])/sum(x[r,]), 2)))
  names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
  return(tbl)
}
per <- pcme(crs$dataset[crs$validate, c(crs$input, crs$target)]$Short_Long.75, crs$pr)
round(per, 2)

# Calculate the overall error percentage.

cat(100*round(1-sum(diag(per), na.rm=TRUE), 2))
validateDatasetError <- 1-sum(diag(per), na.rm=TRUE)

# Calculate the averaged class error percentage.

cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2))

#============================================================
# Rattle timestamp: 2016-05-30 21:49:34 x86_64-w64-mingw32 

# Evaluate model performance. 

# Generate an Error Matrix for the Neural Net model.

# Obtain the response from the Neural Net model.

crs$pr <- predict(crs$nnet, newdata=crs$dataset[crs$test, c(crs$input, crs$target)], type="class")

# Generate the confusion matrix showing counts.

table(crs$dataset[crs$test, c(crs$input, crs$target)]$Short_Long.75, crs$pr,
        useNA="ifany",
        dnn=c("Actual", "Predicted"))

# Generate the confusion matrix showing proportions.

pcme <- function(actual, cl)
{
  x <- table(actual, cl)
  nc <- nrow(x) # Number of classes.
  nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values.
  tbl <- cbind(x/nv,
               Error=sapply(1:nc,
                 function(r) round(sum(x[r,-r])/sum(x[r,]), 2)))
  names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
  return(tbl)
}
per <- pcme(crs$dataset[crs$test, c(crs$input, crs$target)]$Short_Long.75, crs$pr)
round(per, 2)

# Calculate the overall error percentage.

cat(100*round(1-sum(diag(per), na.rm=TRUE), 2))
testDatasetError <- 1-sum(diag(per), na.rm=TRUE)

# Calculate the averaged class error percentage.

cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2))

#============================================================
# Rattle timestamp: 2016-05-30 21:49:39 x86_64-w64-mingw32 

# Evaluate model performance. 

# Assign the R dataset to be used as the test set.

crs$testset <- Rat_DF2

# Generate an Error Matrix for the Neural Net model.

# Obtain the response from the Neural Net model.

crs$pr <- predict(crs$nnet, newdata=crs$testset, type="class")

# Generate the confusion matrix showing counts.

table(crs$testset$Short_Long.75, crs$pr,
        useNA="ifany",
        dnn=c("Actual", "Predicted"))

# Generate the confusion matrix showing proportions.

pcme <- function(actual, cl)
{
  x <- table(actual, cl)
  nc <- nrow(x) # Number of classes.
  nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values.
  tbl <- cbind(x/nv,
               Error=sapply(1:nc,
                 function(r) round(sum(x[r,-r])/sum(x[r,]), 2)))
  names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
  return(tbl)
}
per <- pcme(crs$testset$Short_Long.75, crs$pr)
round(per, 2)

# Calculate the overall error percentage.

cat(100*round(1-sum(diag(per), na.rm=TRUE), 2))
Rat_DF2_DatasetError <- 1-sum(diag(per), na.rm=TRUE)

# Calculate the averaged class error percentage.

cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2))





#============================================================
# Rattle timestamp: 2016-05-30 21:49:39 x86_64-w64-mingw32 

# Evaluate model performance. 

# Assign the R dataset to be used as the test set.

crs$testset <- Rat_DF3

# Generate an Error Matrix for the Neural Net model.

# Obtain the response from the Neural Net model.

crs$pr <- predict(crs$nnet, newdata=crs$testset, type="class")

# Generate the confusion matrix showing counts.

table(crs$testset$Short_Long.75, crs$pr,
        useNA="ifany",
        dnn=c("Actual", "Predicted"))

# Generate the confusion matrix showing proportions.

pcme <- function(actual, cl)
{
  x <- table(actual, cl)
  nc <- nrow(x) # Number of classes.
  nv <- length(actual) - sum(is.na(actual) | is.na(cl)) # Number of values.
  tbl <- cbind(x/nv,
               Error=sapply(1:nc,
                 function(r) round(sum(x[r,-r])/sum(x[r,]), 2)))
  names(attr(tbl, "dimnames")) <- c("Actual", "Predicted")
  return(tbl)
}
per <- pcme(crs$testset$Short_Long.75, crs$pr)
round(per, 2)

# Calculate the overall error percentage.

cat(100*round(1-sum(diag(per), na.rm=TRUE), 2))
Rat_DF3_DatasetError <- 1-sum(diag(per), na.rm=TRUE)

# Calculate the averaged class error percentage.

cat(100*round(mean(per[,"Error"], na.rm=TRUE), 2))





trainDatasetError
validateDatasetError
testDatasetError
Rat_DF2_DatasetError
Rat_DF3_DatasetError