-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstudy26_osu6003_analysis.R
259 lines (237 loc) · 9.77 KB
/
study26_osu6003_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# Analysing effect of adding/removing data points
# -----------------------------------------------------------------------------
# Enhancing the algorithm paper by determining the loss of accuracy/precision
# when reducing the number of sample times. Aiming to prove that less time
# points are needed.
# Data is from OSU6003; papers below:
# Blum W, Klisovic RB, Becker H, et al. Dose Escalation of Lenalidomide in
# Relapsed or Refractory Acute Leukemias. J Clin Oncol. 2010;28(33):4919-4925.
# Maddocks K, Ruppert AS, Browning R, et al. A dose escalation feasibility study
# of lenalidomide for treatment of symptomatic, relapsed chronic lymphocytic
# leukemia. Leuk Res. 2014;38(9):1025-1029.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Load packages
library(GA) # genetic algorithm (ga)
library(plyr) # iterative functions (ddply)
# Load data visualisation package (set theme, define palette)
library(ggplot2) # plots (ggsave etc.)
theme_bw2 <- theme_set(theme_bw(base_size = 14))
theme_update(plot.title = element_text(hjust = 0.5))
cbPalette <- data.frame(
grey = "#999999",
orange = "#E69F00",
skyblue = "#56B4E9",
green = "#009E73",
yellow = "#F0E442",
blue = "#0072B2",
red = "#D55E00",
pink = "#CC79A7",
stringsAsFactors = F
)
# Source algorithm functions
source("fn_diag/fix_functions.R")
# Define additional functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Last observation carried forward
# Finds an NA and carries forward the previous value
locf <- function(x) {
good <- !is.na(x)
positions <- seq(length(x))
good.positions <- good * positions
last.good.position <- cummax(good.positions)
last.good.position[last.good.position == 0] <- NA
x[last.good.position]
}
# Determine AUC (regular)
auc.regular <- function(times, C, log = F) {
auc <- c(NULL)
for (i in 2:length(C)) {
h <- times[i] - times[i-1]
dC <- C[i-1] - C[i]
if (log & dC > 0) auc[i-1] <- dC*h/log(C[i-1]/C[i])
else auc[i-1] <- (C[i-1] + C[i])*h/2
}
return(sum(auc))
}
auc.regular.lam <- function(t, dv) {
lambz <- try(-pred.lambdaz(dv, t)[2])
if (class(lambz) == "try-error") return(0)
else return(tail(dv, 1)/lambz)
}
# 2 comp. w/ abs sumexp
pred.d2a <- function(x, p) {
exp(p[1]*x + p[4]) + exp(p[2]*x + p[5]) - exp(p[3]*x + log(sum(exp(p[4]), exp(p[5]))))
}
# Load/Tidy/Prepare the Data
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Source lenalidomide data
nmprep <- read.csv(stringsAsFactors = F,
"C:/Users/Jim Hughes/Documents/GitRepos/LEN_PK/Data/data.csv"
)
# Subset the data to look at OSU6003, without MDVs (excluding time = 0)
# Additionally the weekly troughs taken after the first 48 hours
clindata <- nmprep[
nmprep$STUDY == 6003 &
(nmprep$MDV == 0 | nmprep$TIME == 0) &
nmprep$TIME <= 48,
]
# Check the times in the dataset
table(clindata$TIME)
# times are not uniform, must be binned to make mean pooled data
# Define nominal sample times
samp.times <- c(0, 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 4, 6, 8, 24)
# Create breaks to use with cut function
# First find the mean of each pair of values to choose a break half way
# between each sample time
inner.breaks <- rowMeans(cbind(head(samp.times, -1), tail(samp.times, -1)))
# Then create first and last break points by extending them before and after
# the final times by the first and final inner break points
cut.breaks <- c(head(samp.times, 1) - head(inner.breaks, 1),
inner.breaks,
tail(samp.times, 1) + tail(inner.breaks, 1))
# and now use cut
bin.groups <- cut(clindata$TIME, cut.breaks)
# Use these bins to define the nominal time after dose
# Remembering to convert from factor to numeric
levels(bin.groups) <- samp.times
clindata$TADNOM <- as.numeric(paste(bin.groups))
# Check nominal times in the dataset
table(clindata$TADNOM)
# Normalise DV for dose size
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Check the AMT column
table(clindata$AMT)
# needs to be filled using last observation carried forward
clindata$AMT[clindata$AMT == 0] <- NA
clindata$DOSE <- locf(clindata$AMT)
# Check the DOSE column
table(clindata$DOSE)
head(clindata[clindata$TADNOM == 2,])
# appears to have worked
# Normalise for dose of 25mg
clindata$DOSENORM <- with(clindata, 25*DV/DOSE)
# Create mean pooled data
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Use ddply to mean the normalised DV for each of the bins
meanpool <- ddply(clindata, .(TADNOM), function(df) {
out <- data.frame(MEAN = mean(df$DOSENORM))
})
# Visualisation of mean pooled data
p1 <- NULL
p1 <- ggplot()
p1 <- p1 + geom_line(aes(x = TADNOM, y = MEAN), data = meanpool,
col = cbPalette$red, size = 0.8)
p1 <- p1 + geom_point(aes(x = TIME, y = DOSENORM), data = clindata,
col = cbPalette$blue, size = 1, shape = 1, alpha = 0.7)
p1 # looks pretty decent
# Fit sum of exponentials to mean pooled data
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Optimise sum of exponential equation, then choose one with best aic
sumexp.res <- best.sumexp.aic(optim.sumexp.new(meanpool, oral = T, nexp = 2))
opt.sumexp <- sumexp.res$sumexp
# Visualisation of sum of exponentials
# Create dataset
sumexpdata <- data.frame(
TIME = seq(0, 24, 0.1),
PRED = pred.sumexp(opt.sumexp, seq(0, 24, 0.1))
)
# Plot dataset against mean pooled
p2 <- p1 + geom_line(aes(x = TIME, y = PRED), sumexpdata,
col = cbPalette$blue, size = 0.8, alpha = 0.8)
p2 # looks good!
# Determine optimal sample times and compare AUC
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Optimise time intervals for various numbers of times (6-13)
# Output is list due to variable lengths of time vectors
interv.res <- llply(6:13, function(i) {
res <- optim.interv.dtmax(opt.sumexp, seq(0, 24, length.out = i))
out <- res$times
})
# Determine AUC0-24 of original mean pooled data
out <- list(
auc024 = data.frame(
data = with(meanpool, auc.regular(TADNOM, MEAN)),
sumexp = integrate(pred.d2a, 0, 24, p = opt.sumexp)$value,
t6 = auc.interv.sumexp(interv.res[[1]], opt.sumexp),
t7 = auc.interv.sumexp(interv.res[[2]], opt.sumexp),
t8 = auc.interv.sumexp(interv.res[[3]], opt.sumexp),
t9 = auc.interv.sumexp(interv.res[[4]], opt.sumexp),
t10 = auc.interv.sumexp(interv.res[[5]], opt.sumexp),
t11 = auc.interv.sumexp(interv.res[[6]], opt.sumexp),
t12 = auc.interv.sumexp(interv.res[[7]], opt.sumexp),
t13 = auc.interv.sumexp(interv.res[[8]], opt.sumexp)
),
aucinf = data.frame(
data = with(meanpool,
auc.regular(TADNOM, MEAN) + auc.regular.lam(TADNOM, MEAN)
),
sumexp = integrate(pred.d2a, 0, 168, p = opt.sumexp)$value,
t6 = auc.interv.sumexp(interv.res[[1]], opt.sumexp)
+ auc.interv.lam(opt.sumexp, interv.res[[1]]),
t7 = auc.interv.sumexp(interv.res[[2]], opt.sumexp)
+ auc.interv.lam(opt.sumexp, interv.res[[2]]),
t8 = auc.interv.sumexp(interv.res[[3]], opt.sumexp)
+ auc.interv.lam(opt.sumexp, interv.res[[3]]),
t9 = auc.interv.sumexp(interv.res[[4]], opt.sumexp)
+ auc.interv.lam(opt.sumexp, interv.res[[4]]),
t10 = auc.interv.sumexp(interv.res[[5]], opt.sumexp)
+ auc.interv.lam(opt.sumexp, interv.res[[5]]),
t11 = auc.interv.sumexp(interv.res[[6]], opt.sumexp)
+ auc.interv.lam(opt.sumexp, interv.res[[6]]),
t12 = auc.interv.sumexp(interv.res[[7]], opt.sumexp)
+ auc.interv.lam(opt.sumexp, interv.res[[7]]),
t13 = auc.interv.sumexp(interv.res[[8]], opt.sumexp)
+ auc.interv.lam(opt.sumexp, interv.res[[8]])
),
cmax = data.frame(
data = with(meanpool, max(MEAN)),
sumexp = pred.sumexp(opt.sumexp, tmax.sumexp(opt.sumexp)),
t6 = max(pred.sumexp(opt.sumexp, interv.res[[1]])),
t7 = max(pred.sumexp(opt.sumexp, interv.res[[2]])),
t8 = max(pred.sumexp(opt.sumexp, interv.res[[3]])),
t9 = max(pred.sumexp(opt.sumexp, interv.res[[4]])),
t10 = max(pred.sumexp(opt.sumexp, interv.res[[5]])),
t11 = max(pred.sumexp(opt.sumexp, interv.res[[6]])),
t12 = max(pred.sumexp(opt.sumexp, interv.res[[7]])),
t13 = max(pred.sumexp(opt.sumexp, interv.res[[8]]))
),
tmax = data.frame(
data = with(meanpool, TADNOM[which(MEAN == max(MEAN))]),
sumexp = tmax.sumexp(opt.sumexp),
t6 = interv.res[[1]][which(
pred.sumexp(opt.sumexp, interv.res[[1]]) ==
max(pred.sumexp(opt.sumexp, interv.res[[1]]))
)],
t7 = interv.res[[2]][which(
pred.sumexp(opt.sumexp, interv.res[[2]]) ==
max(pred.sumexp(opt.sumexp, interv.res[[2]]))
)],
t8 = interv.res[[3]][which(
pred.sumexp(opt.sumexp, interv.res[[3]]) ==
max(pred.sumexp(opt.sumexp, interv.res[[3]]))
)],
t9 = interv.res[[4]][which(
pred.sumexp(opt.sumexp, interv.res[[4]]) ==
max(pred.sumexp(opt.sumexp, interv.res[[4]]))
)],
t10 = interv.res[[5]][which(
pred.sumexp(opt.sumexp, interv.res[[5]]) ==
max(pred.sumexp(opt.sumexp, interv.res[[5]]))
)],
t11 = interv.res[[6]][which(
pred.sumexp(opt.sumexp, interv.res[[6]]) ==
max(pred.sumexp(opt.sumexp, interv.res[[6]]))
)],
t12 = interv.res[[7]][which(
pred.sumexp(opt.sumexp, interv.res[[7]]) ==
max(pred.sumexp(opt.sumexp, interv.res[[7]]))
)],
t13 = interv.res[[8]][which(
pred.sumexp(opt.sumexp, interv.res[[8]]) ==
max(pred.sumexp(opt.sumexp, interv.res[[8]]))
)]
)
)
llply(out, function(x) {
rbind(x, x/x$sumexp)
})