-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassifiers_RF_SVM.R
174 lines (110 loc) · 4.51 KB
/
classifiers_RF_SVM.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
library(tidyverse)
library(randomForest)
library(caret)
library(caTools)
# Import data
tear = read.delim("C:/Users/Lucas Ribeiro/Dropbox/MBA/tear.txt")
# Enter the identifier of each line, that is, the column containing the name of the genes
# as the name of each line
tear1 = tear %>% remove_rownames %>% column_to_rownames(var="Gene")
#Transpose data
df_tear = t(tear1)
df_tear = as.data.frame(df_tear)
#Creates a column containing the group each row belongs to
df_tear$groups = "CT"
df_tear[28:54, "groups"] = "PD"
# Create training and testing dataset
# Where the training set is 70%.
split = createDataPartition(df_tear$groups, p=0.7, list = FALSE)
train = df_tear[split,] # training set
test = df_tear[-split,] # test set
#make the column where the groups are as a categorical variable.
train$groups = as.factor(train$groups)
test$groups = as.factor(test$groups)
# Applying Random Forest to training data
set.seed(20230102)
classifier_RF = randomForest(x = train,
y = train$groups,
mtry = 75,
ntree = 3000)
classifier_RF
#Predicting training set results
pred_train = predict(classifier_RF, train)
confusionMatrix(pred_train, train$groups)
# Predicting test set results
pred_test = predict(classifier_RF, test)
confusionMatrix(pred_test, test$groups)
# Suggestion of parameters to tune algorithm
t = tuneRF(train[,-5], train[,5],
stepFactor = 0.5,
plot = TRUE,
ntreeTry = 180,
trace = TRUE,
improve = 0.05)
# Classifier with new parameters
tuned_classifier_RF = randomForest(x = train,
y = train$groups,
mtry = 90,
ntree = 3000)
tuned_classifier_RF
# Prevendo os resultados do conjunto de treinamento após melhoramento
pred_train_tuned = predict(tuned_classifier_RF, train)
confusionMatrix(pred_train_tuned, train$groups)
# Predicting training set results after improvement
pred_test_tuned = predict(tuned_classifier_RF, test)
confusionMatrix(pred_test_tuned, test$groups)
pred_test_tuned
# Graphical plot of the error rate of the new classifier
error_RF = plot(tuned_classifier_RF)
#Plot the number of nodes for the decision tree
Node_tree = hist(treesize(tuned_classifier_RF),
main = "No. of Nodes for the Trees",
col = "green")
#Most important variables for algorithm classification
tbl_importante_RF = importance(tuned_classifier_RF)
tbl_importante_RF
head(tbl_importante_RF)
# Plot of the influence profile of each gene in the classifier
partialPlot(tuned_classifier_RF, train,SPB3, "PD")
#plot with the most important variables
#varImpPlot(classifier_RF)
######################################################################
####################################################################
#SVM
library(caret)
library(tidyverse)
# Create a data training control
# using 5-fold cross-validation method
SVMtrain_control = trainControl(method = "cv", number = 5,
classProbs = T,
summaryFunction = twoClassSummary)
# Create the classifier using SVM using the training data
# using the linear method
classifier_SVM = train(groups~., data = train,
method = "svmLinear",
trControl = SVMtrain_control,
preProcess = c("center","scale"),
metric = "ROC",
tuneLength = 50)
classifier_SVM
classifier_SVM$results
# Show the confusion matrix for the training data
confusionMatrix(classifier_SVM)
# Uses the trained SVM classifier to predict
# the test data
pred_SVM = predict(classifier_SVM, test)
pred_SVM
# Confusion matrix for the test set
results_SVM = confusionMatrix(data = pred_SVM, test$groups)
results_SVM
tbl_importance_SVM = varImp(classifier_SVM, scale = FALSE)
tbl_importance_SVM
#########
#ROC curve for RF model
library(pROC)
rf.roc = roc(train$groups,classifier_RF$votes[,2], print.auc = T)
plot(rf.roc, print.auc = T, col = "blue")
# ROC curve for SVM model
roc.svm <- roc(response = test$groups, predictor =as.numeric(pred_SVM))
plot(roc.svm, add = TRUE,col = "red", print.auc=TRUE, print.auc.x = 0.5, print.auc.y = 0.3)
legend("bottomright", legend = c("Random Forest", "Support Vector Machine"), col = c("blue","red"), lty = 1)