-
Notifications
You must be signed in to change notification settings - Fork 0
/
predictPhenotype.m
116 lines (88 loc) · 2.92 KB
/
predictPhenotype.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
function predictPhenotype(subchallenge,dataset,modality,can_override)
% Predict phenotypes (subchallenges) using a precomputed classifier MAT-file
%% Configure
parallelize = false;
subchallenges = {'on_off','dyskinesia','tremor'};
datasets = {'CIS-PD','REAL-PD'};
modalities = {'',...
'-smartphone_accelerometer',...
'-smartwatch_accelerometer',...
'-smartwatch_gyroscope' };
if nargin < 4
can_override = true;
if nargin < 3
modality = '';
if nargin < 2
dataset = 'CIS-PD';
if nargin < 1
subchallenge = 'on_off';
end
end
end
end
assert(any(contains(subchallenges,subchallenge)));
assert(any(contains(datasets,dataset)));
assert(any(contains(modalities,modality)));
N = 10000; % Should probably grab this from a sample TS but oh well..
%% Setup
if ~exist('TS_compute','file')
fprintf('Run startup.m from HCTSA directory.\n');
return;
end
prefix = [dataset modality '-' subchallenge '_'];
classifier_file = ['./classifiers/' prefix 'classifier.mat'];
predictions_file = ['./data/hctsa/' dataset '/' prefix 'predictions.mat'];
test_database = ['./data/' dataset '/'];
ts_dir = [test_database 'testing_data/'];
if ~isempty(modality)
ts_dir = [ts_dir modality(2:end) '/'];
end
ts_csvs = dir([ts_dir '*.csv']);
output_csv = ['./submission/' prefix 'predictions.csv'];
S = length(ts_csvs);
%% Predict labels
ts_data = cell(S,1);
measurement_ids = cell(S,1);
out = 'y';
if exist(predictions_file,'file')
if can_override
out = input(sprintf('File %s already exists - override? [y/n] ',predictions_file),'s');
else
out = 'n';
end
end
if out == 'y'
fprintf('Loading time-series data...\n');
for s = 1:S
ccsv = ts_csvs(s).name;
ctab = readtable([ts_dir ccsv]);
if any(contains(ctab.Properties.VariableNames,'X'))
xyz = [ctab.X, ctab.Y, ctab.Z];
else
xyz = [ctab.x, ctab.y, ctab.z];
end
[~,scores] = pca(xyz);
T = size(scores,1);
if N > 0 && N < T
sid = ceil(T/2-N/2);
seq = sid:sid+N-1;
else
seq = 1:T;
end
measurement_ids{s} = ts_csvs(s).name(1:end-4);
ts_data{s} = scores(seq,1);
fprintf('[%d/%d] %s (%i) loaded.\n', s, S, measurement_ids{s}, length(ts_data{s}));
end
else
x = load(predictions_file);
ts_data = x.TimeSeries.Data;
measurement_ids = x.TimeSeries.Name;
end
[tab,acc] = TS_predict(ts_data,measurement_ids,classifier_file,...
'predictionFilename',predictions_file,...
'classifierType','topFeature',...
'isParallel',parallelize);
output_tab = table(tab.labels,tab.predictGroups-1,acc.*ones(height(tab),1),'VariableNames',{'measurement_id','prediction','accuracy'});
fprintf('Saving predictions to %s...\n', output_csv);
writetable(output_tab,output_csv);
fprintf('Done.\n');