-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMaking CR_BM Variables.Rmd
298 lines (234 loc) · 9.92 KB
/
Making CR_BM Variables.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
---
title: "CRBM Variables"
output: html_document
date: "2024-10-28"
---
# Cognitive Reserve and Brain Maintenance Variables
This document provides instructions and code for calculating cognitive reserve and brain maintenance variables based on the annual changes in a composite episodic memory variable and hippocampal volume. While this document is focused on hippocampal volume, the code can be adapted to incorporate other measures (e.g., cingulate cortex thickness or a composite measure). It is important that any new measure captures longitudinal changes in brain morphology to ensure it accurately represents brain maintenance.
## Recreating Cognitive Reserve and Brain Maintenance Variables
To recreate these variables using episodic memory and hippocampal volume, follow the provided code below. If you are using different measures, please modify the code accordingly to create an updated version of the cognitive reserve and brain maintenance variables.
### Data Requirements
- At least two time points of data
- Episodic memory variable/s
- Brain structure variable (e.g., hippocampal volume)
- Participant ages
```{r setup, include=FALSE}
if (!require("dplyr")) {
install.packages("dplyr")
require("dplyr")
}
```
### Step 1: Load Your Data
Begin by importing your dataset. Ensure the data contains either:
1. Episodic memory variables, hippocampal volume, timepoints, and age
2. Previously calculated episodic memory composite (scaled by baseline) and episodic memory slopes plus hippocampal slopes
It is best to read in your data as long format if using the first variables and in wide format if using the second variables.
```{r warning=FALSE}
data <- read.csv("your data.csv")
```
### Step 2a: Calculate Slopes (if needed)
If your data are in long format and you have not yet calculated a composite episodic memory variable or each variable's slopes, use the following code `{r calculate slopes}` to generate these. Customize your variable names and specify your number of timepoints accordingly.
```{r calculate slopes}
# Calculate Slopes: Insert variable names and timepoint number
# Define your memory variables
memory_vars <- c("memory variable 1", "memory variable 2") # Add more as needed
# Specify the number of timepoints
n_timepoints <- 6
# Rename your variables directly
data <- data %>%
rename(
hippocampus_volume = "your hippocampus volume variable",
timepoint = "your timepoint variable", # This variable needs to be numeric (e.g. timepoint = 1 not timepoint = R01)
id = "your id variable",
age = "your age variable"
)
# Optionally, uncomment to filter to only include the needed variables for this analysis
# data <- data %>% select(id, age, timepoint, hippocampus_volume, all_of(memory_vars))
# Main processing function
process_data <- function(data, n_timepoints, mem_vars) {
# Convert data to wide format
data_wide <- data
for (i in 1:n_timepoints) {
suffix <- paste0("_", i)
timepoint_data <- data %>%
filter(timepoint == i) %>%
rename_with(~ paste0(.x, suffix), -c(id))
if (i == 1) {
data_wide <- timepoint_data
} else {
data_wide <- full_join(data_wide, timepoint_data, by = "id")
}
}
# Remove timepoint column
data_wide <- data_wide %>% select(-starts_with("timepoint_"))
# Function to find the first non-NA value
get_first_non_na <- function(x) {
non_na_values <- x[!is.na(x)]
if (length(non_na_values) > 0) {
return(non_na_values[1])
} else {
return(NA)
}
}
# Generate baseline variables
for (j in seq_along(mem_vars)) {
mem_var <- mem_vars[j]
baseline_name <- paste0(mem_var, "_baseline")
data_wide[[baseline_name]] <- apply(
data_wide[, paste0(mem_var, "_", 1:n_timepoints), drop = FALSE],
1,
get_first_non_na
)
}
# Scale memory variables by baseline (timepoint 1)
for (i in 1:n_timepoints) {
for (j in seq_along(mem_vars)) {
mem_var <- mem_vars[j]
base_col_name <- paste0(mem_var, "_baseline")
# Current timepoint memory variable column
col_name <- paste0(mem_var, "_", i)
# Check if column exists
if (col_name %in% names(data_wide)) {
# Calculate baseline mean and SD
baseline_mean <- mean(data_wide[[base_col_name]], na.rm = TRUE)
baseline_sd <- sd(data_wide[[base_col_name]], na.rm = TRUE)
# Scale the variable using baseline mean and SD
scaled_values <- (data_wide[[col_name]] - baseline_mean) / baseline_sd
data_wide[[paste0("sc_", col_name)]] <- as.numeric(scaled_values) # Convert to numeric
} else {
cat("Column not found:", col_name, "\n")
}
}
}
# Calculate memory composites
for (i in 1:n_timepoints) {
mem_cols <- paste0("sc_", mem_vars, "_", i)
data_wide <- data_wide %>%
mutate(!!paste0("memory_", i) := rowMeans(
select(., any_of(mem_cols)),
na.rm = TRUE
))
}
# Calculate slopes
for (i in 1:nrow(data_wide)) {
em_data <- data_wide[i, paste0("memory_", 1:n_timepoints)]
hc_data <- data_wide[i, paste0("hippocampus_volume_", 1:n_timepoints)]
age <- data_wide[i, paste0("age_", 1:n_timepoints)]
if (sum(!is.na(em_data)) > 1 && sum(!is.na(hc_data)) > 1) {
data_wide[i, "memory_slopes"] <- lm(em_data[!is.na(em_data)] ~ age[!is.na(em_data)])$coeff[2]
data_wide[i, "hippocampus_slopes"] <- lm(hc_data[!is.na(hc_data)] ~ age[!is.na(hc_data)])$coeff[2]
}
}
data_wide
}
# Execute the process on your data
data <- process_data(data, n_timepoints, memory_vars)
```
### Step 2b: Define Relevant Variables (if needed)
For those with pre-calculated hippocampus and memory slopes, make sure your data aligns with the naming conventions in this script. Your data should already be in wide format.
```{r define variables}
# Define your variables
data <- data %>%
rename(
hippocampus_slopes = "hippocampus slopes variable",
memory_slopes = "memory slopes variable"
)
```
### Step 3: Further Processing
With your standard variable names and required calculations complete, you can run the rest of this code without needed to make any more modifications.
This block of code calculated the residuals of a regression between memory change and baseline age and another between hippocampal change and baseline age to be used in the calculation of the brain maintenance and cognitive reserve variables.
```{r residuals}
# Remove NAs
data <- data %>%
drop_na(hippocampus_slopes, memory_slopes)
# Identify the follow-up age column
age_cols <- grep("^age_\\d+$", names(data), value = TRUE)
# For each row, pick the maximum age among those columns
data <- data %>%
rowwise() %>%
mutate(
age_followup = max(c_across(all_of(age_cols)), na.rm = TRUE)
) %>%
ungroup()
# Now use age_followup to generate the residuals
mod_memory <- lm(memory_slopes ~ age_followup, data = data)
mod_hippocampus <- lm(hippocampus_slopes ~ age_followup, data = data)
# Attach residuals back to the data frame
data <- data %>%
mutate(
residuals_memory_age = resid(mod_memory),
residuals_hippocampus_age = resid(mod_hippocampus)
)
```
This creates a scaled version of the hippocampus and memory annual change variables.
```{r scaling}
data <- data %>%
mutate(
hippocampus_slopes_scaled = residuals_hippocampus_age / max(
abs(min(residuals_hippocampus_age, na.rm = TRUE)),
abs(max(residuals_hippocampus_age, na.rm = TRUE))
),
memory_slopes_scaled = residuals_memory_age / max(
abs(min(residuals_memory_age, na.rm = TRUE)),
abs(max(residuals_memory_age, na.rm = TRUE))
)
)
```
Then this creates the brain maintenance (BM) variable.
```{r brain maintenance}
# Define a function to calculate the distance from a given data point to the point (1,1)
distance_to_bm_point <- function(y, x) {
# Calculate the shortest distance from the data point to (1,1)
distance <- sqrt((x - 1)^2 + (y - 1)^2)
return(distance)
}
# Define a function to calculate the distance from a given data point to the point (1,1)
distance_to_bm_line <- function(y, x) {
# Calculate the intersection point (projection of the point onto the line x = y)
intersection_x <- (x + y) / 2
intersection_y <- intersection_x
# Calculate the distance from the point (x, y) to the projection point
distance <- sqrt((intersection_x - x)^2 + (intersection_y - y)^2)
return(distance)
}
# Calculate BM variable
data <- data %>%
mutate(
dist_to_bm_point = distance_to_bm_point(memory_slopes_scaled, hippocampus_slopes_scaled),
dist_to_bm_line = distance_to_bm_line(memory_slopes_scaled, hippocampus_slopes_scaled),
brain_maintenance = 3.414214 - (dist_to_bm_point + dist_to_bm_line)
)
```
Next, this creates the cognitive reserve (CR) variable.
```{r cognitive reserve}
# Define a function to calculate the distance from the data point to the point (1,1)
distance_to_cr_point <- function(y, x) {
# Calculate the shortest distance from the data point to (1,1)
distance <- sqrt((x + 1)^2 + (y - 1)^2)
return(distance)
}
# Define a function to calculate the distance from to the line x = -y
distance_to_cr_line <- function(y, x) {
# Calculate the intersection point (projection of the point onto the line x = -y)
intersection_x <- (x - y) / 2
intersection_y <- -(intersection_x)
# Calculate the distance from the point (x, y) to the intersection point
distance <- sqrt((intersection_x - x)^2 + (intersection_y - y)^2)
return(distance)
}
# Calculate CR variable
data <- data %>%
mutate(
dist_to_cr_point = distance_to_cr_point(memory_slopes_scaled, hippocampus_slopes_scaled),
dist_to_cr_line = distance_to_cr_line(memory_slopes_scaled, hippocampus_slopes_scaled),
cognitive_reserve = 3.414214 - (dist_to_cr_point + dist_to_cr_line)
)
```
Lastly, this calculates whether someone is aging more brain maintenance or cognitive reserve-like.
```{r pathway}
# Classify as on the BM or CR aging pathway
data <- data %>%
mutate(
pathway = ifelse(dist_to_cr_line > dist_to_bm_line, "Brain Maintenance Pathway", "Cognitive Reserve Pathway")
)
```