-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMainScript.m
94 lines (82 loc) · 3.57 KB
/
MainScript.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
%% MainScript.m
%%
%% Reads in a name of a dataset and then loads the dataset to memory. It will
%% then randomly select 'test_percent' of population of each classification
%% to use for training of the classifier. We then call Test_Classifier.m for
%% verifying the classification rate of the trained SO_Classifier.
%%
%% Possible values for
%% - datafile_loc:
%% 'iris_data.csv',
%% 'letter_recognition.csv'
%% - test_percent: 0.0 < x < 1.0
%%
%% Note All datasets must have the last column be the class/output-type, and
%% all columns must be numeric, no textual or symbolic for the time-being.
%%
function[] = MainScript ( datafile_loc, test_percent )
% Verify parameters.
if test_percent <= 0.0 || test_percent >= 1.0
disp(['ERROR: Invalid testing percentage : ' num2str(test_percent)])
return
end
% All other data has been formatted to be the same:
% Classification type is stored on the last column.
disp( ['Using: ' datafile_loc] )
data = csvread( [ cd filesep 'data' filesep datafile_loc ] );
% Get the size of our dataset.
[row column] = size(data);
% Count the number of different classes.
num_of_outputs = size( unique( data( :, column ) ), 1);
disp([ num2str(column-1) ' Factors, ' num2str(num_of_outputs) ' Classes'])
% Create an index table:
% first row is the output type
% second row is counter
% third and above is the index
array_output_type = zeros(1, num_of_outputs);
fill = 0;
for i = 1 : row
element = data(i, column);
member = ismember(array_output_type(1,:),element);
ind = find(member>0);
% new output goes to its own column
if isempty(ind)
fill = fill + 1;
array_output_type(1,fill) = element;
array_output_type(2,fill) = 1;
array_output_type(3,fill) = i;
% old output goes under its output type
else
array_output_type(2,ind) = array_output_type(2,ind) + 1;
array_output_type(array_output_type(2,ind)+2,ind) = i;
end
end
% Get certain percentage of each possible category.
take_counts = ceil( array_output_type( 2, : ) .* test_percent );
class_names = array_output_type(1,:);
% If our data is very misbalanced, we may bias it against smaller
% classifications. This will turn that off.
% if not( allow_bias ) % We assume False is being passed in.
take_count = min( take_counts );
take_counts( 1: length(take_counts) ) = take_count;
% end
% Randomly take percentage of each column and split into two lists of
% Indices
[trainIdxs, testIdxs] = Take_Partition( take_counts, ...
array_output_type( 3:end, : ));
% Build TrainingData set
for i = 1 : size( trainIdxs, 2 )
TrainingData( i, : ) = data( trainIdxs(i), : );
end
for i = 1 : size( testIdxs, 2 );
TestingData( i, : ) = data( testIdxs(i), : );
end
% Train the Spemann Organizer, will get back the Weighting system for each
% factor and the K-Factor Gradient Matrix for each possible class.
disp(['Training with ' num2str(size(TrainingData,1)) ' entries']);
[weights, rank, factorGradient] = ...
Train_Spemann_Organizer( TrainingData, num_of_outputs, take_counts );
% Test the remaining data using the classifier. Will print percentages of
% misclassified and correctly classified individuals from the test data.
Test_Classifier( TestingData, class_names, weights, rank, factorGradient );
end