-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstep2_Missing value imputation_R code.txt
83 lines (69 loc) · 1.9 KB
/
step2_Missing value imputation_R code.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#install.packages('pacman')
library(pacman)
pacman::p_load(
mice,
naniar,
lattice,
MASS,
nnet,
VIM,
dplyr,
shipunov,
gridExtra)
#0.preparation
df <- read.csv('step1_merge_data.csv')
res <- miss_var_summary(df)
df$PTGENDER <- ifelse(df$PTGENDER == 'Female', 0, 1)
df$AGE <- df$AGE + round(df$Month / 12, digits = 1)
#1.create data
features <- df[, 8:32]
names(features)
str(features)
features_res <- miss_var_summary(features)
#2.Check missing values
shipunov::Missing.map(features)
md.pattern(features)
md.pairs(features)
#3.Visualizing missing values
aggr(features,plot = T, sortVars=T, only.miss=T)
matrixplot(features)
marginplot(features[c(17,19)])
#4.multiple imputation method
features <- select(features,-ICV)
methods(mice)
imputed_Data <- mice(features, m=5,maxit=50,method='rf',seed=500)
#5.View filling results
summary(imputed_Data)
imputed_Data$method
imputed_Data$imp
#6.Visualize filling results
stripplot(imputed_Data)
xyplot(imputed_Data,Hippocampus ~ Entorhinal|.imp) #¹Û²ì5×éÊý¾ÝµÄÌî³äÇé¿ö
densityplot(imputed_Data)
#7.Analyze imputation results and optimize models
names(imputed_Data$data)
fit <- with(imputed_Data,
lm(Hippocampus~AGE+PTGENDER))
summary(fit)
#8.model assessment
pooled=pool(fit)
pooled
summary(pooled)
pool.r.squared(fit)
#9.Final filling result (choose the best fitting result)
complete_Data <- complete(imputed_Data,5)
complete_Data
write.csv(complete_Data, 'step2_complete_Data.csv', row.names = T)
#10.output result
#write.csv(complete_Data, 'mci_complete_Data.csv', row.names = F)
df <- read.csv('step1_merge_data.csv')
ICV <- df$ICV
subject_id <- df$RID
time <- df$Month
label <- df$DX
df$PTGENDER <- ifelse(df$PTGENDER == 'Female', 0, 1)
gender <- df$PTGENDER
imageid <- df$IMAGEUID
DX_bl <- df$DX_bl
df4 <- cbind(subject_id,time,imageid,DX_bl,label,gender,complete_Data,ICV)
write.csv(df4, 'step2_merge_data_original.csv', row.names = T)