-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess.xml
155 lines (155 loc) · 9.82 KB
/
process.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
<?xml version="1.0" encoding="UTF-8"?><process version="9.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve dataset" width="90" x="380" y="34">
<parameter key="repository_entry" value="//Local Repository/dataset"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="514" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="Description|ProcessActivity|Project|Project Tags|Stack Overflow|Task Tags|Title|Category"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="648" y="34">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="true"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="112" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="246" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="8.1.000" expanded="true" height="82" name="Filter Stopwords (Dictionary)" width="90" x="380" y="34">
<parameter key="file" value="stopwords.txt"/>
<parameter key="case_sensitive" value="false"/>
<parameter key="encoding" value="x-MacRomania"/>
</operator>
<operator activated="true" class="text:stem_snowball" compatibility="8.1.000" expanded="true" height="68" name="Stem (Snowball)" width="90" x="514" y="34">
<parameter key="language" value="Portuguese"/>
</operator>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
<connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_op="Stem (Snowball)" to_port="document"/>
<connect from_op="Stem (Snowball)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="9.2.001" expanded="true" height="82" name="Set Role" width="90" x="782" y="34">
<parameter key="attribute_name" value="Stack Overflow"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles">
<parameter key="Stack Overflow" value="id"/>
</list>
</operator>
<operator activated="true" class="data_to_similarity_data" compatibility="9.2.001" expanded="true" height="68" name="Data to Similarity Data" width="90" x="1050" y="34">
<parameter key="measure_types" value="NominalMeasures"/>
<parameter key="mixed_measure" value="MixedEuclideanDistance"/>
<parameter key="nominal_measure" value="JaccardSimilarity"/>
<parameter key="numerical_measure" value="CosineSimilarity"/>
<parameter key="divergence" value="GeneralizedIDivergence"/>
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_degree" value="3.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.2.001" expanded="true" height="82" name="Set Role (2)" width="90" x="1184" y="34">
<parameter key="attribute_name" value="FIRST_ID"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles">
<parameter key="FIRST_ID" value="label"/>
<parameter key="SECOND_ID" value="prediction"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="1318" y="34">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="SIMILARITY.ge.0\.5"/>
</list>
<parameter key="filters_logic_and" value="false"/>
<parameter key="filters_check_metadata" value="false"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="9.2.001" expanded="true" height="82" name="Performance (2)" width="90" x="1452" y="34">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="false"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="true"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="true"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_op="Retrieve dataset" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Data to Similarity Data" to_port="example set"/>
<connect from_op="Data to Similarity Data" from_port="similarity example set" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>