Skip to content

Latest commit

 

History

History
1555 lines (1400 loc) · 54.5 KB

HDP.md

File metadata and controls

1555 lines (1400 loc) · 54.5 KB
title author date output
Large-scale structure-activity relationship of host defense peptides
Saw Simeon, Hao Li, Theeraphon Piacham, Virapong Prachayasittikul and Chanin Nantasanamat
October 13, 2015
html_document

###HDPs having their respective activity were merge with negative data set to build seperate models

## $cancer
##           Training set Internal validation External set
## ACC_Mean       99.2613             97.5583      97.6046
## ACC_SD          0.1433              0.3102       0.6682
## Sens_Mean      99.3600             98.3807      98.3492
## Sens_SD         0.1644              0.2511       0.6887
## Spec_Mean      98.8989             94.5317      94.9041
## Spec_SD         0.5444              0.9232       2.1142
## MCC_Mean        0.9780              0.9274       0.9287
## MCC_SD          0.0043              0.0092       0.0201
## 
## $fungus
##           Training set Internal validation External set
## ACC_Mean       99.1317             96.9126      97.1210
## ACC_SD          0.1578              0.2887       0.5901
## Sens_Mean      99.3237             96.8298      96.9850
## Sens_SD         0.1842              0.3971       0.9161
## Spec_Mean      98.9841             96.9786      97.2420
## Spec_SD         0.2723              0.3417       0.8210
## MCC_Mean        0.9824              0.9373       0.9416
## MCC_SD          0.0032              0.0059       0.0120
## 
## $bacteria
##           Training set Internal validation External set
## ACC_Mean       99.1115             96.9422      97.0605
## ACC_SD          0.1624              0.2981       0.6091
## Sens_Mean      99.3654             96.9297      97.1218
## Sens_SD         0.1644              0.4589       1.0143
## Spec_Mean      98.9161             96.9535      97.0302
## Spec_SD         0.2763              0.2843       0.8232
## MCC_Mean        0.9820              0.9379       0.9404
## MCC_SD          0.0033              0.0061       0.0124
## 
## $virus
##           Training set Internal validation External set
## ACC_Mean       99.1646             96.0039      95.9189
## ACC_SD          0.1988              0.4324       0.8257
## Sens_Mean      98.4089             91.5197      91.3986
## Sens_SD         0.4843              1.0566       2.3605
## Spec_Mean      99.3926             97.3520      97.2890
## Spec_SD         0.2455              0.3853       0.9133
## MCC_Mean        0.9765              0.8876       0.8848
## MCC_SD          0.0056              0.0123       0.0238

###HDPs having anti--bacteria, anti--cancer, anti--fungus and virus-- were combined into one to represent HDPs while the negative set was used as non--HDPs.###

##           Training set Internal validation External set
## ACC_Mean       99.3873             98.1257      98.1922
## ACC_SD          0.0763              0.1237       0.2867
## Sens_Mean      99.4387             98.5458      98.5944
## Sens_SD         0.0878              0.1001       0.2640
## Spec_Mean      99.1291             95.9758      96.1536
## Spec_SD         0.1609              0.5045       1.1747
## MCC_Mean        0.9781              0.9325       0.9350
## MCC_SD          0.0027              0.0045       0.0104

###HDPs having their respective activity were combined together to build a union predictive model###

Results for training

##           Bacteria  Cancer  Fungus   Virus   Overall
## ACC_Mean   96.0307 96.0505 96.0500 96.0401 96.042825
## ACC_SD      0.2174  0.1845  0.2185  0.2011  0.205375
## Sens_Mean  96.1914 96.2048 96.1826 96.1909 96.192425
## Sens_SD     0.2364  0.2113  0.2100  0.2265  0.221050
## Spec_Mean  93.3186 93.4566 93.7839 93.5015 93.515150
## Spec_SD     1.5234  1.5337  1.5985  1.5726  1.557050
## MCC_Mean    0.7225  0.7241  0.7239  0.7234  0.723475
## MCC_SD      0.0169  0.0145  0.0170  0.0158  0.016050

Results for CV

##           Bacteria  Cancer  Fungus   Virus   Overall
## ACC_Mean   92.9778 93.0262 93.0751 93.0796 93.039675
## ACC_SD      0.2731  0.2657  0.2426  0.2499  0.257825
## Sens_Mean  97.9807 98.0357 98.0257 98.0608 98.025725
## Sens_SD     0.2337  0.2109  0.2006  0.2191  0.216075
## Spec_Mean  42.9199 42.8544 43.5194 43.2379 43.132900
## Spec_SD     1.8937  2.0311  1.8305  1.9767  1.933000
## MCC_Mean    0.5057  0.5078  0.5130  0.5124  0.509725
## MCC_SD      0.0201  0.0202  0.0181  0.0189  0.019325

Results for Testing

##           Bacteria  Cancer  Fungus   Virus   Overall
## ACC_Mean   93.3841 93.3183 93.3104 93.2103 93.305775
## ACC_SD      0.5439  0.5368  0.5752  0.5747  0.557650
## Sens_Mean  94.6636 94.7143 94.5986 94.5303 94.626700
## Sens_SD     0.4644  0.3942  0.3781  0.4320  0.417175
## Spec_Mean  71.5641 70.7074 71.3498 70.4909 71.028050
## Spec_SD     5.1636  6.2726  6.3225  6.0771  5.958950
## MCC_Mean    0.5263  0.5260  0.5206  0.5114  0.521075
## MCC_SD      0.0452  0.0397  0.0418  0.0449  0.042900

###Function and Scriptrs for Analysis### #####HDPs having their respective activity were merge with negative data set to build seperate models#####

### read FASTA
library(protr)
cancer <- readFASTA("cancer.fasta")
fungus <- readFASTA("fungus.fasta")
bacteria <- readFASTA("bacteria.fasta")
virus <- readFASTA("virus.fasta")
negative <- readFASTA("negative_Protein.fasta")

### removed wired protein
cancer <- cancer[(sapply(cancer, protcheck))]
fungus <- fungus[(sapply(fungus, protcheck))]
bacteria <- bacteria[(sapply(bacteria, protcheck))]
virus <- virus[(sapply(virus, protcheck))]
negative <- negative[(sapply(negative, protcheck))]


### Descriptors generation
cancer_des <- t(sapply(cancer, extractAAC))
fungus_des <- t(sapply(fungus, extractAAC))
bacteria_des <- t(sapply(bacteria, extractAAC))
virus_des <- t(sapply(virus, extractAAC))
negative_des <- t(sapply(negative, extractAAC))
#### label the labels 
cancer_des <- as.data.frame(cancer_des)
cancer_des$Label <- "Cancer"
fungus_des <- as.data.frame(fungus_des)
fungus_des$Label <- "Fungus"
bacteria_des <- as.data.frame(bacteria_des)
bacteria_des$Label <- "Bacteria"
virus_des <- as.data.frame(virus_des)
virus_des$Label <- "Virus"
negative_des <- as.data.frame(negative_des)
negative_des$Label <- "Negative"

cancer <- rbind(cancer_des, negative_des)
cancer$Label <- as.factor(cancer$Label)
fungus <- rbind(fungus_des, negative_des)
fungus$Label <- as.factor(fungus$Label)
bacteria <- rbind(fungus_des, negative_des)
bacteria$Label <- as.factor(bacteria$Label)
virus <- rbind(virus_des, negative_des)
virus$Label <- as.factor(virus$Label)



input <- list(cancer = cancer, fungus = fungus, bacteria = bacteria, 
              virus = virus)



#### training results using J48
J48_training <- function(x) {
  
  results <- list(100)
  for (i in 1:100) {
    in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
    train <- x[in_train, ]
    test <- x[-in_train, ]
    model_train <- RWeka::J48(Label~., data = train)
    summary <- summary(model_train)
    confusionmatrix <- summary$confusionMatrix
    results[[i]] <- as.numeric(confusionmatrix)
  }
  return(results)
}

mean_and_sd <- function(x) {
  c(round(mean(x, na.rm = TRUE), digits = 4),
    round(sd(x, na.rm = TRUE), digits = 4))
}

J48_train <- function(x) {
  ok <- J48_training(x)
  results <- data.frame(ok)
  data <- data.frame(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  return(results_all)
}

#### 10-fold results using J48
J48_10fold <- function(x) {
  results <- list(100)
  for (i in 1:100) {
    in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
    train <- x[in_train, ]
    test <- x[-in_train, ]
    model_train <- RWeka::J48(Label~., data = train)
    eval_j48 <- RWeka::evaluate_Weka_classifier(model_train, numFolds = 10, complexity = FALSE, seed = 1, class = TRUE)
    confusionmatrix <- eval_j48$confusionMatrix
    results[[i]] <- as.numeric(confusionmatrix)
  }
  return(results)
}

J48_cross_validation <- function(x) {
  ok <- J48_10fold(x)
  results <- data.frame(ok)
  data <- data.frame(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  return(results_all)
}

### testing results using J48
J48_testing <- function(x) {
  results <- list(100)
  for (i in 1:100) {
    in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
    train <- x[in_train, ]
    test <- x[-in_train, ]
    model_train <- RWeka::J48(Label~., data = train)
    eval_external <- RWeka::evaluate_Weka_classifier(model_train, newdata = test, numFolds = 0, complexity = FALSE, seed = 1, class = TRUE)
    confusionmatrix <- eval_external$confusionMatrix
    results[[i]] <- as.numeric(confusionmatrix)
  }
  return(results)
}


J48_external <- function(x) {
  ok <- J48_testing(x)
  results <- data.frame(ok)
  data <- data.frame(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  return(results_all)
}

results_J48 <- function(x) {
  training <- J48_train(x)
  cross_validation <- J48_cross_validation(x)
  testing <- J48_external(x)
  results <- data.frame(Training = training, Cross_Validation = cross_validation, Testing = testing)
  colnames(results) <- c("Training set", "Internal validation", "External set")
  return(results)
}

suppressPackageStartupMessages(library(parallel))
suppressPackageStartupMessages(library(doSNOW))
cl <- makeCluster(8)
registerDoSNOW(cl)
clusterExport(cl = cl, ls())

result_J48_performance <- parLapply(cl = cl, input, function(x) {
  models <- suppressWarnings(results_J48(x))
  return(models)
})
print(result_J48_performance)
stopCluster(cl)

#####HDPs having anti--bacteria, anti--cancer, anti--fungus and virus-- were combined into one to represent HDPs while the negative set was used as non--HDPs#####

### read FASTA
library(protr)
cancer <- readFASTA("cancer.fasta")
fungus <- readFASTA("fungus.fasta")
bacteria <- readFASTA("bacteria.fasta")
virus <- readFASTA("virus.fasta")
negative <- protr::readFASTA("negative_Protein.fasta")

### removed wired protein
cancer <- cancer[(sapply(cancer, protcheck))]
fungus <- fungus[(sapply(fungus, protcheck))]
bacteria <- bacteria[(sapply(bacteria, protcheck))]
virus <- virus[(sapply(virus, protcheck))]
negative <- negative[(sapply(negative, protcheck))]


### Descriptors generation
cancer_des <- t(sapply(cancer, extractAAC))
fungus_des <- t(sapply(fungus, extractAAC))
bacteria_des <- t(sapply(bacteria, extractAAC))
virus_des <- t(sapply(virus, extractAAC))
negative_des <- t(sapply(negative, extractAAC))
#### label the labels 
cancer_des <- as.data.frame(cancer_des)
fungus_des <- as.data.frame(fungus_des)
bacteria_des <- as.data.frame(bacteria_des)
virus_des <- as.data.frame(virus_des)
hdp <- rbind(cancer_des, fungus_des, bacteria_des, virus_des)
hdp$Label <- "Positive"
hdp <- as.data.frame(hdp)
negative_des <- as.data.frame(negative_des)
negative_des$Label <- "Negative"

data <- rbind(hdp, negative_des)
data$Label <- as.factor(data$Label)


results_J48 <- function(x) {
  training <- J48_train(x)
  cross_validation <- J48_cross_validation(x)
  testing <- J48_external(x)
  results <- data.frame(Training = training, Cross_Validation = cross_validation, Testing = testing)
  colnames(results) <- c("Training set", "Internal validation", "External set")
  return(results)
}

result_J48_performance <- suppressMessages(results_J48(data))
print(result_J48_performance)

#####HDPs having their respective activity were combined together to build a union predictive model#####

library(protr)
cancer <- readFASTA("cancer.fasta")
fungus <- readFASTA("fungus.fasta")
bacteria <- readFASTA("bacteria.fasta")
virus <- readFASTA("virus.fasta")
negative <- protr::readFASTA("negative_Protein.fasta")

### removed wired protein
cancer <- cancer[(sapply(cancer, protcheck))]
fungus <- fungus[(sapply(fungus, protcheck))]
bacteria <- bacteria[(sapply(bacteria, protcheck))]
virus <- virus[(sapply(virus, protcheck))]
negative <- negative[(sapply(negative, protcheck))]

### function for amino acid composition
composition <- function(x) {
  library(protr)
  c(extractAAC(x))
}
### generation the Descriptors
cancer_des <- t(sapply(cancer, extractAAC))
fungus_des <- t(sapply(fungus, extractAAC))
bacteria_des <- t(sapply(bacteria, extractAAC))
virus_des <- t(sapply(virus, extractAAC))
negative_des <- t(sapply(negative, extractAAC))
#### label the labels 
cancer_des <- as.data.frame(cancer_des)
cancer_des$Label <- "Cancer"
fungus_des <- as.data.frame(fungus_des)
fungus_des$Label <- "Fungus"
bacteria_des <- as.data.frame(bacteria_des)
bacteria_des$Label <- "Bacteria"
virus_des <- as.data.frame(virus_des)
virus_des$Label <- "Virus"
combine_data <- rbind(cancer_des, fungus_des,
                      bacteria_des, virus_des)
combine_data$Label <- as.factor(combine_data$Label)

#### training results using J48
J48_training <- function(x, Label){
  if (Label == "Bacteria") {
    suppressPackageStartupMessages(library(parallel))
    suppressPackageStartupMessages(library(doSNOW))
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      rm(test)
      model_train <- RWeka::J48(Label~., data = train)
      actual <- train$Label
      prediction <- predict(model_train, train)
      rm(train)
      rm(model_train)
      results <- caret::confusionMatrix(prediction, actual)
      results <- results$table
      results <- table(prediction, actual)
      rm(prediction)
      rm(actual)
      results <- as.numeric(results)
      ok[[i]] <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))
    }
  }  else if (Label == "Cancer") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      rm(test)
      model_train <- RWeka::J48(Label~., data = train)
      actual <- train$Label
      prediction <- predict(model_train, train)
      rm(train)
      rm(model_train)
      results <- caret::confusionMatrix(prediction, actual)
      results <- results$table
      results <- table(prediction, actual)
      results <- as.numeric(results)
      rm(prediction)
      rm(actual)
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      ok[[i]] <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
      } 
  }  else if (Label == "Fungus") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(test)
      rm(in_train)
      model_train <- RWeka::J48(Label~., data = train)
      actual <- train$Label
      prediction <- predict(model_train, train)
      rm(model_train)
      rm(train)
      results <- caret::confusionMatrix(prediction, actual)
      results <- results$table
      results <- table(prediction, actual)
      results <- as.numeric(results)
      rm(actual)
      rm(prediction)
      
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      ok[[i]] <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
    } 
  }  else if (Label == "Virus") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      rm(test)
      model_train <- RWeka::J48(Label~., data = train)
      actual <- train$Label
      prediction <- predict(model_train, train)
      rm(model_train)
      rm(train)
      results <- caret::confusionMatrix(prediction, actual)
      results <- results$table
      results <- table(prediction, actual)
      results <- as.numeric(results)
      rm(prediction)
      rm(actual)
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      ok[[i]] <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
    }
    return(ok)
    stopCluster(cl)
  } }

mean_and_sd <- function(x) {
  c(round(mean(x, na.rm = TRUE), digits = 4),
    round(sd(x, na.rm = TRUE), digits = 4))
}


results_training_Bacteria <- function(x) {
  yes <- J48_training(x, Label = "Bacteria")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  rm(great)
  data <- data.frame(results)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}

results_training_Cancer <- function(x) {
  yes <- J48_training(x, Label = "Cancer")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  rm(great)
  data <- data.frame(results)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}



results_training_Fungus <- function(x) {
  yes <- J48_training(x, Label = "Fungus")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  rm(great)
  data <- data.frame(results)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}


results_training_Virus <- function(x) {
  yes <- J48_training(x, Label = "Virus")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  rm(great)
  data <- data.frame(results)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}


J48_training_all <- function(x) {
  bacteria <- results_training_Bacteria(x)
  cancer <- results_training_Cancer(x)
  fungus <- results_training_Fungus(x)
  virus <- results_training_Virus(x)
  results_all <- cbind(bacteria, cancer, fungus, virus)
  rm(bacteria)
  rm(cancer)
  rm(fungus)
  rm(virus)
  total <- apply(results_all, 1, mean)
  results_all_mean <- cbind(results_all, total)
  rm(results_all)
  rm(total)
  colnames(results_all_mean) <- c("Bacteria", "Cancer", "Fungus", "Virus", "Overall")
  return(results_all_mean)
}


### 10 fold Cross Validations

J48_10_CV <- function(x, Label){
  if (Label == "Bacteria") {
    suppressPackageStartupMessages(library(parallel))
    suppressPackageStartupMessages(library(doSNOW))
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      rm(test)
      model_train <- RWeka::J48(Label~., data = train)
      results <- RWeka::evaluate_Weka_classifier(model_train, newata = NULL, numFolds = 10, complexity = FALSE)
      rm(train)
      confusionMatrix <- results$confusionMatrix
      rm(model_train)
      results <- as.numeric(confusionMatrix)
      rm(confusionMatrix)
      ok[[i]] <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                       (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))
    }
  }  else if (Label == "Cancer") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      rm(test)
      model_train <- RWeka::J48(Label~., data = train)
      results <- RWeka::evaluate_Weka_classifier(model_train, newata = NULL, numFolds = 10, complexity = FALSE)
      rm(train)
      confusionMatrix <- results$confusionMatrix
      rm(model_train)
      results <- as.numeric(confusionMatrix)
      rm(confusionMatrix)
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      ok[[i]] <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                       (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
    } 
  }  else if (Label == "Fungus") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      rm(test)
      model_train <- RWeka::J48(Label~., data = train)
      results <- RWeka::evaluate_Weka_classifier(model_train, newata = NULL, numFolds = 10, complexity = FALSE)
      rm(train)
      rm(model_train)
      confusionMatrix <- results$confusionMatrix
      rm(results)
      results <- as.numeric(confusionMatrix)
      rm(confusionMatrix)
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      ok[[i]] <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                       (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
    } 
  }  else if (Label == "Virus") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      rm(test)
      model_train <- RWeka::J48(Label~., data = train)
      results <- RWeka::evaluate_Weka_classifier(model_train, newata = NULL, numFolds = 10, complexity = FALSE)
      rm(model_train)
      rm(train)
      confusionMatrix <- results$confusionMatrix
      results <- as.numeric(confusionMatrix)
      rm(confusionMatrix)
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      ok[[i]] <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                       (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
    }
    return(ok)
    stopCluster(cl)
  } }

mean_and_sd <- function(x) {
  c(round(mean(x, na.rm = TRUE), digits = 4),
    round(sd(x, na.rm = TRUE), digits = 4))
}


results_CV_Bacteria <- function(x) {
  yes <- J48_10_CV(x, Label = "Bacteria")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  data <- data.frame(results)
  rm(great)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(data)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}

results_CV_Cancer <- function(x) {
  yes <- J48_10_CV(x, Label = "Cancer")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  data <- data.frame(results)
  rm(great)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(data)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}


results_CV_Fungus <- function(x) {
  yes <- J48_10_CV(x, Label = "Fungus")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  data <- data.frame(results)
  rm(great)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(data)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}

results_CV_Virus <- function(x) {
  yes <- J48_10_CV(x, Label = "Virus")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  data <- data.frame(results)
  rm(great)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(data)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}

J48_CV_all <- function(x) {
  bacteria <- results_CV_Bacteria(x)
  cancer <- results_CV_Cancer(x)
  fungus <- results_CV_Fungus(x)
  virus <- results_CV_Virus(x)
  results_all <- cbind(bacteria, cancer, fungus, virus)
  rm(bacteria)
  rm(cancer)
  rm(fungus)
  rm(virus)
  total <- apply(results_all, 1, mean)
  results_all_mean <- cbind(results_all, total)
  rm(results_all)
  rm(total)
  colnames(results_all_mean) <- c("Bacteria", "Cancer", "Fungus", "Virus", "Overall")
  return(results_all_mean)
}

#### function for testing

J48_testing <- function(x, Label){
  if (Label == "Bacteria") {
    suppressPackageStartupMessages(library(parallel))
    suppressPackageStartupMessages(library(doSNOW))
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      model_train <- RWeka::J48(Label~., data = train)
      rm(train)
      actual <- test$Label
      prediction <- predict(model_train, test)
      rm(model_train)
      rm(test)
      results <- caret::confusionMatrix(prediction, actual)
      results <- results$table
      results <- table(prediction, actual)
      rm(prediction)
      rm(actual)
      results <- as.numeric(results)
      ok[[i]] <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                       (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))
    }
  }  else if (Label == "Cancer") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      model_train <- RWeka::J48(Label~., data = train)
      rm(train)
      actual <- test$Label
      prediction <- predict(model_train, test)
      rm(model_train)
      rm(test)
      results <- caret::confusionMatrix(prediction, actual)
      results <- results$table
      results <- table(prediction, actual)
      results <- as.numeric(results)
      rm(prediction)
      rm(actual)
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      ok[[i]] <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                       (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
    } 
  }  else if (Label == "Fungus") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      model_train <- RWeka::J48(Label~., data = train)
      rm(train)
      actual <- test$Label
      prediction <- predict(model_train, test)
      rm(model_train)
      rm(test)
      results <- caret::confusionMatrix(prediction, actual)
      results <- results$table
      results <- table(prediction, actual)
      results <- as.numeric(results)
      rm(prediction)
      rm(actual)
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      ok[[i]] <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                       (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      Virus <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                     (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
    } 
  }  else if (Label == "Virus") {
    cl <- makeCluster(8)
    registerDoSNOW(cl)
    
    ok <- list(100)
    ok <- foreach(i = 1:100) %dopar% { 
      in_train <- caret::createDataPartition(x$Label, p = 0.80, list = FALSE)
      train <- x[in_train, ]
      test <- x[-in_train, ]
      rm(in_train)
      model_train <- RWeka::J48(Label~., data = train)
      rm(train)
      actual <- test$Label
      prediction <- predict(model_train, test)
      rm(model_train)
      rm(test)
      results <- caret::confusionMatrix(prediction, actual)
      results <- results$table
      results <- table(prediction, actual)
      results <- as.numeric(results)
      rm(prediction)
      rm(actual)
      Bacteria <- cbind(results[[1]], (results[[5]] + results[[9]] + results[[13]]),
                        (results[[2]] + results[[3]] + results[[4]]), (results[[6]] + results[[11]] + results[[16]]))
      Cancer <- cbind(results[[6]], (results[[2]] + results[[10]] + results[[14]]), 
                      (results[[5]] + results[[7]] + results[[8]]), (results[[1]] + results[[11]] + results[[16]]))
      Fungus <- cbind(results[[11]], (results[[3]] + results[[7]] + results[[15]]),
                      (results[[9]] + results[[10]] + results[[12]]), (results[[1]] + results[[6]] + results[[16]]))
      ok[[i]] <- cbind(results[[16]], (results[[4]] + results[[8]] + results[[12]]), 
                       (results[[13]] + results[[14]] + results[[15]]), (results[[1]] + results[[6]] + results[[11]]))    
    }
    return(ok)
    stopCluster(cl)
  } }

mean_and_sd <- function(x) {
  c(round(mean(x, na.rm = TRUE), digits = 4),
    round(sd(x, na.rm = TRUE), digits = 4))
}


results_testing_Bacteria <- function(x) {
  yes <- J48_testing(x, Label = "Bacteria")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  data <- data.frame(results)
  rm(great)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  rm(data)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}

results_testing_Cancer <- function(x) {
  yes <- J48_testing(x, Label = "Cancer")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  data <- data.frame(results)
  rm(great)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  rm(data)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}


results_testing_Fungus <- function(x) {
  yes <- J48_testing(x, Label = "Fungus")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  data <- data.frame(results)
  rm(great)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  rm(data)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}

results_testing_Virus <- function(x) {
  yes <- J48_testing(x, Label = "Virus")
  great <- data.frame(yes)
  rm(yes)
  TP <- seq(from = 1, to = 400, by = 4)
  FN <- seq(from = 2, to = 400, by = 4)
  FP <- seq(from = 3, to = 400, by = 4)
  TN <- seq(from = 4, to = 400, by = 4)
  results <- mapply(c, great[TP], great[FN], great[FP], great[TN])
  data <- data.frame(results)
  rm(great)
  rm(results)
  m = ncol(data)
  ACC  <- matrix(nrow = m, ncol = 1)
  SENS  <- matrix(nrow = m, ncol = 1)
  SPEC  <-matrix(nrow = m, ncol = 1)
  MCC <- matrix(nrow = m, ncol = 1)
  
  for(i in 1:m){ 
    ACC[i,1]  = (data[1,i]+data[4,i])/(data[1,i]+data[2,i]+data[3,i]+data[4,i])*100
    SENS[i,1]  =  (data[4,i])/(data[3,i]+data[4,i])*100
    SPEC[i,1]  = (data[1,i]/(data[1,i]+data[2,i]))*100
    MCC1      = (data[1,i]*data[4,i]) - (data[2,i]*data[3,i])
    MCC2      =  (data[4,i]+data[2,i])*(data[4,i]+data[3,i])
    MCC3      =  (data[1,i]+data[2,i])*(data[1,i]+data[3,i])
    MCC4  =  sqrt(MCC2)*sqrt(MCC3)
    
    
    MCC[i,1]  = MCC1/MCC4
  }
  rm(TP)
  rm(FP)
  rm(TN)
  rm(FN)
  results_ACC <- mean_and_sd(ACC)
  results_SENS <- mean_and_sd(SENS)
  results_SPEC <- mean_and_sd(SPEC)
  results_MCC <- mean_and_sd(MCC)
  rm(ACC)
  rm(SENS)
  rm(SPEC)
  rm(MCC)
  rm(data)
  results_all <- (data.frame(c(results_ACC, results_SENS, results_SPEC, results_MCC)))
  rownames(results_all) <- c("ACC_Mean", "ACC_SD", "Sens_Mean", "Sens_SD", "Spec_Mean", "Spec_SD",
                             "MCC_Mean", "MCC_SD")
  rm(results_ACC)
  rm(results_SENS)
  rm(results_SPEC)
  rm(results_MCC)
  return(results_all)
}

J48_testing_all <- function(x) {
  bacteria <- results_testing_Bacteria(x)
  cancer <- results_testing_Cancer(x)
  fungus <- results_testing_Fungus(x)
  virus <- results_testing_Virus(x)
  results_all <- cbind(bacteria, cancer, fungus, virus)
  rm(bacteria)
  rm(cancer)
  rm(fungus)
  rm(virus)
  total <- apply(results_all, 1, mean)
  results_all_mean <- cbind(results_all, total)
  rm(results_all)
  rm(total)
  colnames(results_all_mean) <- c("Bacteria", "Cancer", "Fungus", "Virus", "Overall")
  return(results_all_mean)
}
###Results for training 

training_results <- suppressWarnings(J48_training_all(combine_data))
print(training_results)

####Results for CV

CV_results <- suppressWarnings(J48_CV_all(combine_data))
print(CV_results)

####Results for Testing
{r, echo=FALSE, cache = TRUE}
testing_results <- suppressWarnings(J48_testing_all(combine_data))
print(testing_results)