-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrss_net.m
174 lines (131 loc) · 5.93 KB
/
rss_net.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
function [logw,alpha,mu,s,param] = rss_net(data,hyper,options)
% USAGE: fit RSS-NET model for a given set of hyperparameters
% INPUT:
% data: paths of input data files for RSS-NET, structure
% hyper: hyper-parameter values for RSS-NET, structure
% options: user-specified behaviour of RSS-NET, structure
% OUTPUT:
% logw: variational lower bound (up to a constant), num_hyper by 1
% alpha: variational posterior inclusion probabilities, num_snp_fitted by num_hyper
% mu: variational posterior means of additive effects, num_snp_fitted by num_hyper
% s: variational posterior variances of additive effects, num_snp_fitted by num_hyper
% param: grid of hyper-parameters [theta0 theta sigma0 sigma], num_hyper by 4
% If the input variable `options` is not provided, then use default
% in make_{netmat,expr,logodds}.m and rss_varbvsr_bigmem_squarem.m.
if ~exist('options','var')
options = [];
disp('Default options for RSS-NET are used here ...');
end
% Load input data.
if ~exist('data','var')
error('Input data `data` must be provided ...');
end
% Load hyper-parameters.
if ~exist('hyper','var')
error('Hyper-parameters `hyper` must be provided ...');
end
% Specify the file of GWAS summary statistics and LD estimates.
sumstats = data.sumstats_file;
% Determine whether whole-genome data are fed to RSS-NET.
% If yes, RSS-NET output can be directly used for further inferences.
% If no, RSS-NET output should be adjusted before further inferences.
% For more details on this post-analysis adjustment, please see:
% 1. https://doi.org/10.1038/s41467-018-06805-x;
% 2. https://doi.org/10.1371/journal.pgen.1003770.
% If the GWAS summary data mat file contains a variable named `snps`,
% then RSS-NET assumes this mat file does not contain the whole-genome data.
% To avoid error please do not include `snps` in any whole-genome data.
% The variable `data.num_snp_fitted` denotes the number of SNPs fitted in RSS-NET.
% The variable `data.num_snp` denotes the total number of whole-genome SNPs.
% These two variables are the same if and only if `whole_genome == true`.
% Finally I applied RSS-NET only to whole-genome summary data to generate
% simulation and data analysis results for RSS-NET manuscript, thanks to the
% high-performance computing facilities at University of Chicago (https://rcc.uchicago.edu/)
% and Stanford University (https://srcc.stanford.edu/).
sumstats_info = who('-file',sumstats);
sumstats_data = matfile(sumstats);
if ~isfield(data,'num_snp_fitted')
data.num_snp_fitted = data.num_snp;
end
% Scenario 1: RSS-NET is applied to genome-wide SNPs.
whole_genome = ~ismember('snps',sumstats_info);
whole_genome = whole_genome && (data.num_snp_fitted == data.num_snp);
if whole_genome
fprintf('This RSS-NET analysis uses %d genome-wide SNPs ...\n',data.num_snp_fitted);
end
% Scenario 2: RSS-NET is applied to near-network SNPs.
subset_genome = ismember('snps',sumstats_info);
subset_genome = subset_genome && (data.num_snp_fitted ~= data.num_snp);
if subset_genome
snps = sumstats_data.snps;
if (length(sumstats_data.snps) ~= data.num_snp_fitted)
error('Inconsistent number of near-network SNPs ...\n');
end
fprintf('This RSS-NET analysis uses %d near-network SNPs ...\n',data.num_snp_fitted);
end
% Stop RSS-NET if neither Scenario 1 nor 2 is correctly specified.
if (~whole_genome) && (~subset_genome)
error('Invalid SNP specification of RSS-NET ...\n');
end
clear sumstats_data sumstats_info whole_genome;
% Create whole-genome SNP-gene network information matrix.
net_mat = make_netmat(data,options);
% Subset network matrix if whole-genome data are not used.
if subset_genome
net_mat = net_mat(snps, :);
clear snps;
end
% Obtain normalized context-specific expression values.
expr_nval = make_expr(data,options);
% Compute SNP-gene scaling factors.
gene_var = full((net_mat.^2) * expr_nval);
clear net_mat expr_nval;
% Create a 4-column matrix for all sets of hyper-parameters.
% a) param == [theta0 theta eta rho] if induced_flag == true
% b) param == [theta0 theta sigma0 sigma] if induced_flag == false
[param,induced_flag] = make_hpgrid(hyper);
clear hyper;
% Preallocate variational output.
num_hyper = size(param,1);
num_snp_fitted = data.num_snp_fitted;
logw = zeros(num_hyper,1);
alpha = zeros(num_snp_fitted,num_hyper);
mu = zeros(num_snp_fitted,num_hyper);
s = zeros(num_snp_fitted,num_hyper);
% Run variational approximation for each set of hyper-parameters.
for k=1:num_hyper
% Specify hyper-parameters related to log odds.
theta0 = param(k,1);
theta = param(k,2);
logodds = make_logodds(data.snp2net_file,theta0,theta,options);
% Specify hyper-parameters related to effect sizes.
% Note that one can either directly specify prior for (sigma0, sigma),
% or induce this prior from the prior specified for (eta, rho).
% However, inducing prior for (sigma0, sigma) from (eta, rho) is valid
% only if the whole-genome data are fed to RSS-NET.
if induced_flag && subset_genome
error('Induced prior can only be applied to whole-genome data ...');
end
if induced_flag
eta = param(k,3);
rho = param(k,4);
[sigma0,sigma] = make_sigma(data,gene_var,logodds,eta,rho);
param(k,3) = sigma0;
param(k,4) = sigma;
else
sigma0 = param(k,3);
sigma = param(k,4);
end
% Compute prior SD for each SNP.
sigbeta = sqrt(sigma0^2 + sigma^2*gene_var);
% Display the current status.
fprintf('(%03d) theta0=%0.2f theta=%0.2f sigma0=%0.3f sigma=%0.3f\n',...
k,theta0,theta,sigma0,sigma);
% Run mean-field variational approximation.
[logw(k),alpha(:,k),mu(:,k),s(:,k)] =...
rss_varbvsr_bigmem_squarem(sumstats,sigbeta,logodds,options);
clear sigbeta logodds theta0 theta sigma0 sigma;
fprintf('\n');
end
clear data options gene_var;
end