-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathsc_cellscore_admdl.m
109 lines (94 loc) · 3.69 KB
/
sc_cellscore_admdl.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
function [score] = sc_cellscore_admdl(X, genelist, tgsPos, tgsNeg, nbin, ctrl)
% Compute cell scores from a list of feature genes
%
% tgsPos - positive features (negative target marker genes)
% tgsNeg - negative features (negative target marker genes)
%
% see also: PKG.E_CELLSCORES, SC_CELLSCORE_UCELL
%
% ref: AddModuleScore - https://github.com/satijalab/seurat/blob/master/R/utilities.R
% ref: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8271111/
% disp("Seurat provides a computationally efficient gene signature scoring function,
% named AddModuleScore, originally proposed by Tirosh et al. [5]. ")
% The score is the average expression of a set of genes subtracted with
% the average expression of a reference set of genes. The reference set is randomly sampled from the gene_pool for each binned expression value.
% This reproduces the approach in Seurat [Satija15] and has been implemented for Scanpy by Davide Cittaro.
if nargin < 6, ctrl = 5; end
if nargin < 5, nbin = 25; end
if nargin < 4
tgsNeg = [];
end
if nargin < 3 || isempty(tgsPos)
error('USAGE: >>[score]=sc_cellscore_admdl(X,genelist,tgsPos);')
% tgsPos=["CD44","LY6C","KLRG1","CTLA","ICOS","LAG3"];
end
if ~any(matches(genelist, tgsPos, 'IgnoreCase', true))
score = NaN(size(X, 2), 1);
warning('No feature genes found in GENELIST. NaN scores returned');
return;
end
%genelist=upper(genelist);
%tgsPos=upper(tgsPos);
%tgsNeg=upper(tgsNeg);
try
if issparse(X), X = full(X); end
catch
warning('Keep using sparse X.');
end
X = sc_norm(X);
%disp('Library-size normalization...done.')
X = log1p(X);
%disp('Log(x+1) transformation...done.')
%idx=matches(genelist, tgsPos, 'IgnoreCase',true);
% if ~any(idx)
% score=NaN(size(X,2),1);
% warning('No feature genes found in GENELIST.');
% return;
% end
[score] = i_calculate_score(X, genelist, tgsPos, 1, nbin, ctrl);
if ~isempty(tgsNeg) && any(strlength(tgsNeg) > 0)
[s] = i_calculate_score(X, genelist, tgsNeg, -1, nbin, ctrl);
score = score + s; % add scores from negative markers (greater values for non-target cells)
end
end
function [score] = i_calculate_score(X, genelist, tgs, directtag, nbin, ctrl)
if nargin < 6, ctrl = 5; end
if nargin < 5, nbin = 25; end
if nargin < 4, directtag = 1; end
%rng default
% Initial stats
cluster_lenght = size(X, 1);
data_avg = mean(X, 2);
[~, I] = sort(data_avg);
% Sorting data
data_avg = data_avg(I);
gsorted = genelist(I);
Xsorted = X(I, :);
% Assigning bins by expression
assigned_bin = diag(zeros(cluster_lenght));
bin_size = cluster_lenght / nbin;
for i = 1:nbin
bin_match = data_avg <= data_avg(round(bin_size*i));
pos_avail = (assigned_bin == 0);
assigned_bin(pos_avail & bin_match) = i;
end
% Selecting bins of same expression
idx = matches(gsorted, tgs, 'IgnoreCase', true);
selected_bins = unique(assigned_bin(idx));
samebin_genes = gsorted(ismember(assigned_bin, selected_bins));
ctrl_use = [];
for i = 1:length(tgs)
ctrl_use = [ctrl_use; ...
randsample(samebin_genes, ctrl)];
end
ctrl_use = unique(ctrl_use);
% Averaging expression
ctrl_score = mean(Xsorted(matches(gsorted, ctrl_use, 'IgnoreCase', true), :), 1);
features_score = mean(Xsorted(idx, :), 1);
% Scoring
if directtag > 0
score = transpose(features_score-ctrl_score);
else
score = transpose(ctrl_score-features_score);
end
end