-
Notifications
You must be signed in to change notification settings - Fork 6
/
CBPP_parcelwise.m
151 lines (135 loc) · 6.7 KB
/
CBPP_parcelwise.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
function CBPP_parcelwise(fc, y, conf, cv_ind, out_dir, options)
% CBPP_parcelwise(fc, y, conf, cv_ind, out_dir, options)
%
% This function runs Connectivity-based Psychometric Prediction (CBPP) using parcel-wise connectivity matrix (fc) to
% predict psychometric variables (y)
%
% Inputs:
% - fc :
% DxN matrix containing the D functional connectivity values (i.e. between the chosen parcel/voxel and
% D othter parcels/voxels) from N subjects
% - y :
% NxP matrix containing P psychometric variables from N subjects
% - conf :
% NxC matrix containing C confounding variables from N subjects
% - cv_ind :
% NxM matrix containing cross-validation fold indices for M repeats on N subjects. The indices should
% range from 1 to K, for a K-fold cross-validation scheme
% - out_dir:
% Absolute path to output directory
% - options:
% (Optional) see below for available settings
%
% Options:
% - method :
% Regression method to use. Available options: 'MLR' (multiple linear regression), 'SVR' (Support
% Vector Regression), 'EN' (Elastic Nets), 'RR' (ridge regression)
% Default: 'SVR'
% - prefix :
% Prefix for output filename. If all setting are default, the output file will be named with the
% prefix 'pwCBPP_SVR_standard_test'
% Default: 'test'
% - isnull :
% Set this to 1 to perform permutation testing by shuffling y. Note that the number of repeats is
% still dependent on matrix size of cv_ind input, i.e. to run 1000 permutations, cv_ind input should
% be of size Nx1000.
% Default: 0
% - conf_opt:
% Confound controlling approach. Available options:
% 'standard' ('standard' approach): regress out confounding variables from training subjects and apply
% to test subjects
% 'str_conf' ('sex + brain size confounds' approach): similar to 'standard', but noting that the
% confounding variables passed in are only those correlated with strength (i.e. gender,
% brain size and ICV).
% 'no_conf' ('no confound' approach): don't use confounds
% Default: 'standard'
% - in_seed :
% Seed for inner-loop cross-validation indices generation. Can be set to 'shuffle' or any integer.
% Only required for ridge regression
% Default: 'shuffle'
%
% Output:
% One .mat file will be saved to out_dir, containing performance in training set (vairable 'r_train' and
% 'nrmsd_train') and validation set (variable 'r_test' and 'nrmsd_test').
%
% Jianxiao Wu, last edited on 21-Oct-2020
% usage
if nargin < 5
disp('Usage: CBPP_parcelwise(fc, y, conf, cv_ind, out_dir, [options])');
return
end
% add utility functions to path
my_path = fileparts(mfilename('fullpath'));
addpath(fullfile(my_path, 'utilities'));
% set default settings
if nargin < 6; options = []; end
if ~isfield(options, 'isnull'); options.isnull = 0; end
if ~isfield(options, 'method'); options.method = 'SVR'; end
if ~isfield(options, 'prefix'); options.prefix = 'test'; end
if ~isfield(options, 'conf_opt'); options.conf_opt = 'standard'; end
if ~isfield(options, 'in_seed'); options.in_seed = 'shuffle'; end
% set-up
yd = size(y, 2); % dimensionality of targets y == P
n = size(y, 1); % number of subjects == N
n_fold = max(cv_ind(:)); % number of folds for CV == K
n_repeat = size(cv_ind, 2); % number of repeats for CV == M
x = fc';
% run cross-validation
r_train = zeros(n_repeat, n_fold, yd);
r_test = zeros(n_repeat, n_fold, yd);
nrmsd_train = zeros(n_repeat, n_fold, yd);
nrmsd_test = zeros(n_repeat, n_fold, yd);
fprintf('Running repeat-fold 0001-01');
for repeat = 1:n_repeat
cv_ind_curr = cv_ind(:, repeat);
for fold = 1:n_fold
fprintf('\b\b\b\b\b\b\b%04d-%02d', repeat, fold);
% SVR/MLR/RR: split into training and test set
if strcmp(options.method, 'SVR') || strcmp(options.method, 'MLR') || strcmp(options.method, 'RR')
train_ind = double(cv_ind_curr ~= fold);
test_ind = double(cv_ind_curr == fold);
% EN: split into training, validation and test set
elseif strcmp(options.method, 'EN')
if fold == n_fold; fold_inner = 1; else; fold_inner = fold + 1; end
train_ind = (cv_ind_curr ~= fold) .* (cv_ind_curr ~= fold_inner);
val_ind = double(cv_ind_curr == fold_inner);
test_ind = double(cv_ind_curr == fold);
end
% remove confounds for 'standard' and 'str_conf' approaches
% except for RR, which does confound regression in inner-loop
y_curr = y;
if strcmp(options.method, 'RR') && strcmp(options.conf_opt, 'no_conf')
conf_pass = [];
elseif strcmp(options.method, 'RR')
conf_pass = conf;
elseif strcmp(options.conf_opt, 'standard') || strcmp(options.conf_opt, 'str_conf')
[y_curr(train_ind==1, :), reg_y] = regress_confounds_y(y_curr(train_ind==1, :), conf(train_ind==1, :));
y_curr(test_ind==1, :) = regress_confounds_y(y_curr(test_ind==1, :), conf(test_ind==1, :), reg_y);
% also apply confounds removal to validation fold for EN
if strcmp(options.method, 'EN')
y_curr(val_ind==1, :) = regress_confounds_y(y_curr(val_ind==1, :), conf(val_ind==1, :), reg_y);
end
end
for target_ind = 1:yd
% shuffle target labels for permutation testing if specified
if options.isnull ~= 0
y_curr_score = y_curr(randperm(n), target_ind);
else
y_curr_score = y_curr(:, target_ind);
end
% run regression
reg_func = str2func([options.method '_one_fold']);
perf = reg_func(x, y_curr_score, cv_ind_curr, fold);
% collect results
r_train(repeat, fold, target_ind) = perf.r_train;
r_test(repeat, fold, target_ind) = perf.r_test;
nrmsd_train(repeat, fold, target_ind) = perf.nrmsd_train;
nrmsd_test(repeat, fold, target_ind) = perf.nrmsd_test;
end
end
end
fprintf('\n');
% save performance results
output_name = ['pwCBPP_' options.method '_' options.conf_opt '_' options.prefix ];
if options.isnull ~= 0; output_name = ['null_' output_name]; end
save(fullfile(out_dir, [output_name '.mat']), 'r_train', 'r_test', 'nrmsd_train', 'nrmsd_test');