-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlegacyFingerprint.R
219 lines (163 loc) · 7.46 KB
/
legacyFingerprint.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
###################################################
# ~~~ FramebyFrame package ~~~
# Legacy fingerprint
# calculate old version of fingerprint from a .mat file obtained from SCRAP.m
# Francois Kroll 2022
# francois@kroll.be
###################################################
library(dplyr)
library(tibble)
# note, I confirmed with exp PSEN2 210907_13 that below generates exactly same fingerprint as code I received as example Clustering2.m
# function getFromMat(...) ------------------------------------------------
# small function to help get the data from the ludicrous data structure we get from the .mat file
# parameter = parameter name to extract
# meanstd = mean or std
# will return 4 values: night1, day1, night2, day2
# this matches order of fingerprints in drugDb.csv
getFromMat <- function(mat,
parameter,
nights,
days,
meanstd,
grpNum) {
c(mat[,,1]$summarytable[,,1][[meanstd]][,,1][[parameter]][,,1]$night[[grpNum]][[1]][nights[1]],
mat[,,1]$summarytable[,,1][[meanstd]][,,1][[parameter]][,,1]$day[[grpNum]][[1]][days[1]],
mat[,,1]$summarytable[,,1][[meanstd]][,,1][[parameter]][,,1]$night[[grpNum]][[1]][nights[2]],
mat[,,1]$summarytable[,,1][[meanstd]][,,1][[parameter]][,,1]$day[[grpNum]][[1]][days[2]])
}
# function legacyFingerprint(...) -----------------------------------------
# e.g. legacyFingerprint(
# matPath='~/Dropbox/ZFAD/220531_SORL1/legacyMiddur/220531_15/220531_15.mat',
# conGrp='scr',
# treGrp='sorl1',
# days=c(2,3),
# nights=c(2,3))
legacyFingerprint <- function(matPath,
conGrp,
treGrp,
nights=c(2,3),
days=c(2,3)) {
### read .mat file ###
mat <- R.matlab::readMat(matPath)$geno
# means and standard deviations are already calculated by SCRAP.m
# first, build a dataframe control group mean / control group std / treatment group mean / treatment group std
# with each row being one parameter
# will make it simple to calculate Z-scores
### extract genotype names ##
genonms <- unlist(mat[,,1]$name)
# check genotypes given by user are in there
if(!all(c(conGrp, treGrp) %in% genonms))
stop('\t \t \t \t >>> Could not find control and/or treatment group in this dataset \n')
# find the number of each group
conNum <- as.numeric(which(genonms==conGrp))
treNum <- as.numeric(which(genonms==treGrp))
### preallocate dataframe ###
legparams <- c('sleep',
'sleepBout',
'sleepLength',
'sleepLatency',
'averageActivity',
'averageWaking')
legpr <- rep(rep(legparams, rep(4, length(legparams)))) # this repeats 4 times each parameter, so sleep sleep sleep sleep; sleepBout ...
dns <- as.vector(replicate(length(legparams), c('night1', 'day1', 'night2', 'day2'))) # writes all the days/nights
# prepare data.frame:
# fd for fingerprint dataframe
fd <- data.frame(win=dns, parameter=legpr)
# add uparam column
# e.g. day1_sleepLength
fd <- fd %>%
mutate(uparam=paste(win, parameter, sep='_'), .before=1)
### fill in dataframe ####
# accessing the data in the mat file below is really trial and error and checking in MATLAB I am accessing the right values
# seems like code added a 4th night even though it does not exist, parameters are usually 0 for this night
# but sometimes a small value (e.g. 0.0213) which I suppose is an artefact from somewhere...
# here, could code a check that day/night given by user are full (~ 10 or 14 hours) by looking at middur data
# sapply below to loop through parameters
fd <- fd %>%
add_column(meanCon= as.vector(sapply(legparams, function(lp) {
getFromMat(mat=mat,
parameter=lp,
nights=nights,
days=days,
meanstd='mean',
grpNum=conNum)
}))) %>%
add_column(stdCon= as.vector(sapply(legparams, function(lp) {
getFromMat(mat=mat,
parameter=lp,
nights=nights,
days=days,
meanstd='std',
grpNum=conNum)
}))) %>%
add_column(meanTre= as.vector(sapply(legparams, function(lp) {
getFromMat(mat=mat,
parameter=lp,
nights=nights,
days=days,
meanstd='mean',
grpNum=treNum)
}))) %>%
add_column(stdTre= as.vector(sapply(legparams, function(lp) {
getFromMat(mat=mat,
parameter=lp,
nights=nights,
days=days,
meanstd='std',
grpNum=treNum)
})))
# add last two parameters manually
fd <- fd %>%
add_row(uparam='daymean_averageWaking',
win='daymean',
parameter='averageWaking',
meanCon=as.numeric(mat[,,1]$summarytable[,,1]$mean[,,1]$averageWaking[,,1]$daymean[[conNum]][[1]]),
stdCon=as.numeric(mat[,,1]$summarytable[,,1]$std[,,1]$averageWaking[,,1]$daymean[[conNum]][[1]]),
meanTre=as.numeric(mat[,,1]$summarytable[,,1]$mean[,,1]$averageWaking[,,1]$daymean[[treNum]][[1]]),
stdTre=as.numeric(mat[,,1]$summarytable[,,1]$std[,,1]$averageWaking[,,1]$daymean[[treNum]][[1]])) %>%
add_row(uparam='nightmean_averageWaking',
win='nightmean',
parameter='averageWaking',
meanCon=as.numeric(mat[,,1]$summarytable[,,1]$mean[,,1]$averageWaking[,,1]$nightmean[[conNum]][[1]]),
stdCon=as.numeric(mat[,,1]$summarytable[,,1]$std[,,1]$averageWaking[,,1]$nightmean[[conNum]][[1]]),
meanTre=as.numeric(mat[,,1]$summarytable[,,1]$mean[,,1]$averageWaking[,,1]$nightmean[[treNum]][[1]]),
stdTre=as.numeric(mat[,,1]$summarytable[,,1]$std[,,1]$averageWaking[,,1]$nightmean[[treNum]][[1]]))
### calculate fingerprint ####
# i.e. calculate Z-score from mean and std we extracted from the mat file
fd <- fd %>%
mutate(zsco=(meanTre-meanCon)/stdCon)
### add grp column ###
# maybe useful if we end up appending multiple fingerprints
# fd <- fd %>%
# add_column(grp=treGrp, .after='parameter')
### return fingerprint table ###
return(fd)
}
# legacyFingerprintMEAN(...) ----------------------------------------------
# takes multiple .mat files as input and returns mean fingerprint
# note, each fingerprint is first calculated separately, then the Z-scores are averaged
legacyFingerprintMEAN <- function(matPaths,
conGrp,
treGrp,
nights=c(2,3),
days=c(2,3)) {
# calculate legacy fingerprint for each .mat
fdl <- lapply(matPaths, function(pth) {
legacyFingerprint(matPath=pth,
conGrp=conGrp,
treGrp=treGrp,
nights=c(2,3),
days=c(2,3))
})
# average the Z-scores
# cbind every element of the list
fdm <- do.call(cbind, fdl)
# first 4 columns should always be uparam, win, parameter, grp
# keep those + all columns called zsco
fdm <- fdm[, c(1, 2, 3, which(colnames(fdm)=='zsco'))]
colnames(fdm)[which(startsWith(colnames(fdm), 'zsco'))] <- sprintf('exp%i', 1:length(matPaths))
# now calculate mean of Z-scores
fdm$zavg <- apply(fdm[,which(startsWith(colnames(fdm), 'exp'))], 1, mean)
# ready to return
return(fdm)
}