-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathRTEST.R
30 lines (26 loc) · 1.26 KB
/
RTEST.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
library(LKT)
set.seed(41)
val<-largerawsample
val = setDT(val)
# make student stratified folds (for crossvalidation for unseen sample)
unq = sample(unique(val$Anon.Student.Id))
sfold = rep(1:5,length.out=length(unq))
val$fold = rep(0,length(val[,1]))
for(i in 1:5){val$fold[which(val$Anon.Student.Id %in% unq[which(sfold==i)])]=i}
#Clean it up
val$KC..Default.<-val$Problem.Name
# get the times of each trial in seconds from 1970
val$CF..Time.<-as.numeric(as.POSIXct(as.character(val$Time),format="%Y-%m-%d %H:%M:%S"))
#make sure it is ordered in the way the code expects
val<-val[order(val$Anon.Student.Id, val$CF..Time.),]
#create a binary response column to predict and extract only data with a valid value
val$CF..ansbin.<-ifelse(tolower(val$Outcome)=="correct",1,ifelse(tolower(val$Outcome)=="incorrect",0,-1))
val<-val[val$CF..ansbin.==0 | val$CF..ansbin.==1,]
# create durations
val$Duration..sec.<-(val$CF..End.Latency.+val$CF..Review.Latency.+500)/1000
val_actual <- val[, c('Anon.Student.Id', 'Duration..sec.', 'Outcome', 'KC..Default.')]
data <- val_actual[, c('Duration..sec.', 'Outcome')]
data$NOutcome<-ifelse(tolower(data$Outcome)=="correct",1,0)
data_a <- data[, c('Duration..sec.', 'NOutcome')]
data_a <- as.matrix(data_a[sample(nrow(data_a), 1000), ])
heatmap(data_a)