-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.R
128 lines (125 loc) · 5.36 KB
/
script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
p = read.csv("tabella.csv")
head(p)
library(MASS)
source("mr_cmroc.r")
# 1. Regressione logistica (modelli lineari generalizzati)
p.glm=glm(diagnosis_result~.,family=binomial,data=p)
summary(p.glm)
p.glm.pre=predict(p.glm,type="response")
sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result) #accuratezza
mconfmat(p$diagnosis_result,p.glm.pre) #matrice di confusione
p.glm.roc=mroc(p$diagnosis_result,p.glm.pre)
mroc.plot(p.glm.roc)
mauc(p.glm.roc)
# 2. Analisi discriminante lineare
p.lda=lda(diagnosis_result~.,data=p,CV=F)
p.lda.pre=predict(p.lda)
p.lda.post=p.lda.pre$posterior[,2] #prob. a posteriori di ottenere 1
sum((p.lda.post>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.lda.post)
p.lda.roc=mroc(p$diagnosis_result,p.lda.post)
mroc.plot(p.lda.roc)
mauc(p.lda.roc)
# 3. Analisi discriminante quadratica
p.qda=qda(diagnosis_result~.,data=p,CV=F)
p.qda.pre=predict(p.qda)
p.qda.post=p.qda.pre$posterior[,2]
sum((p.qda.post>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.qda.post)
p.qda.roc=mroc(p$diagnosis_result,p.qda.post)
mroc.plot(p.qda.roc,col="orange")
mauc(p.qda.roc)
# Confronto dei metodi
mroc.lines(p.lda.roc,col="green3")
mroc.lines(p.glm.roc,col="blue")
legend("bottomright",c("glm, AUC=0.92","lda, AUC=0.91","qda, AUC=0.89"),
col=c("blue","green3","orange"),pch=c(19,19,19),bg="white",cex=0.7)
# 4. Riduzione del modello
acc = 0:7
auc = 0:7
acc[1] = sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
auc[1] = mauc(p.glm.roc)
library(corrplot)
corrplot(cor(p),"square")
p.glm=glm(diagnosis_result~.-perimeter,family=binomial,data=p)
summary(p.glm)
p.glm.pre=predict(p.glm,type="response")
sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.glm.pre)
p.glm.roc=mroc(p$diagnosis_result,p.glm.pre)
mroc.plot(p.glm.roc)
mauc(p.glm.roc)
acc[2] = sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
auc[2] = mauc(p.glm.roc)
########################################################################
p.glm=glm(diagnosis_result~.-perimeter-radius,family=binomial,data=p)
summary(p.glm)
p.glm.pre=predict(p.glm,type="response")
sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.glm.pre)
p.glm.roc=mroc(p$diagnosis_result,p.glm.pre)
mroc.plot(p.glm.roc)
mauc(p.glm.roc)
acc[3] = sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
auc[3] = mauc(p.glm.roc)
########################################################################
p.glm=glm(diagnosis_result~.-perimeter-radius-symmetry,family=binomial,data=p)
summary(p.glm)
p.glm.pre=predict(p.glm,type="response")
sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.glm.pre)
p.glm.roc=mroc(p$diagnosis_result,p.glm.pre)
mroc.plot(p.glm.roc)
mauc(p.glm.roc)
acc[4] = sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
auc[4] = mauc(p.glm.roc)
########################################################################
p.glm=glm(diagnosis_result~.-perimeter-radius-symmetry-smoothness,family=binomial,data=p)
summary(p.glm)
p.glm.pre=predict(p.glm,type="response")
sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.glm.pre)
p.glm.roc=mroc(p$diagnosis_result,p.glm.pre)
mroc.plot(p.glm.roc)
mauc(p.glm.roc)
acc[5] = sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
auc[5] = mauc(p.glm.roc)
########################################################################
p.glm=glm(diagnosis_result~.-perimeter-radius-symmetry-smoothness-texture,family=binomial,data=p)
summary(p.glm)
p.glm.pre=predict(p.glm,type="response")
sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.glm.pre)
p.glm.roc=mroc(p$diagnosis_result,p.glm.pre)
mroc.plot(p.glm.roc)
mauc(p.glm.roc)
acc[6] = sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
auc[6] = mauc(p.glm.roc)
########################################################################
p.glm=glm(diagnosis_result~.-perimeter-radius-symmetry-smoothness-texture-compactness,family=binomial,data=p)
summary(p.glm)
p.glm.pre=predict(p.glm,type="response")
sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.glm.pre)
p.glm.roc=mroc(p$diagnosis_result,p.glm.pre)
mroc.plot(p.glm.roc)
mauc(p.glm.roc)
acc[7] = sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
auc[7] = mauc(p.glm.roc)
########################################################################
p.glm=glm(diagnosis_result~.-perimeter-radius-symmetry-smoothness-texture-compactness-fractal_dimension,family=binomial,data=p)
summary(p.glm)
p.glm.pre=predict(p.glm,type="response")
sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
mconfmat(p$diagnosis_result,p.glm.pre)
p.glm.roc=mroc(p$diagnosis_result,p.glm.pre)
mroc.plot(p.glm.roc)
mauc(p.glm.roc)
acc[8] = sum((p.glm.pre>0.5)==(p$diagnosis_result>0.5))/length(p$diagnosis_result)
auc[8] = mauc(p.glm.roc)
ymin=min(acc)
ymax=max(acc)
plot(acc,pch=19,type="b",col="red",ylim=c(ymin,ymax),xlab="Passi della riduzione del modello",ylab="accuratezza")
ymin=min(auc)
ymax=max(auc)
plot(auc,pch=19,type="b",col="red",ylim=c(ymin,ymax),xlab="Passi della riduzione del modello",ylab="AUC")