-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathexample.yaml
182 lines (182 loc) · 7.59 KB
/
example.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
{
#
# Training Data
# =============
#
# Size of training sample to use for feature extraction and
# discretization; this defines the `training data' below:
bn_abstr_train_size: 20000,
#
# Proportion of the training data (above) to use for feature
# extraction (defaults to 1, i.e. use all of it).
# feat_extr_train_size: .5,
#
# Feature Extraction (for each layer)
# ===================================
#
# Note that `decomp: "PCA"' is optional as it is the default.
#
# PCA: Use 3 components with randomized SVD algorithm (Note:
# usually quite efficient, yet may impair the generatation of test
# cases via the LP-based algorithm):
# feats: { decomp: 'pca', n_components: 3, svd_solver: 'randomized' },
#
# PCA: Use 5 components with exact algorthm (potentially slower,
# but yields exact inverse transformation---needed for LP-based
# test case generation):
feats: { decomp: 'pca', n_components: 5, svd_solver: 'full' },
#
# PCA: Use 5 components and guess a good algorithms based on size
# of training data:
# feats: 5,
#
# PCA: Use as many components as needed to capture at least 50% of
# variance. High values for shallow layers (close to input) is
# discouraged, as this usually leads to large amounts of
# components (which in turn leads to a BN that does not fit in
# memory):
# feats: 0.5,
#
# ICA: Use 3 components, with up to 5000 iteratons and a tolerance
# of 0.01:
# feats: { decomp: 'ica', n_components: 3, max_iter: 5000, tol: 0.01 },
#
# Custom (per-layer) Specifications
# ---------------------------------
#
# To define the feature extraction technique strategy on a
# per-layer basis, one can give a lambda function that takes the
# layer index as argument, and returns any of the specification
# dictionaries as above:
# feats: '(lambda li: 3 if li <= 2 else 4)',
#
# As above, returing minimum captured variance instead:
# feats: '(lambda li: 0.9 if li > 5 else 0.7 if li > 4 else 0.5 if li > 2 else 0.1)',
#
# Hybrid: Use ICA(3) for the three first layers, PCA(4) elsewhere:
# feats: '(lambda li: { "decomp": "ica", "n_components": 3 } if li <= 2 else { "n_components": 4 })',
#
# Discretization Strategy (for each extracted feature)
# ====================================================
#
# Binarize feature around 0.0 (this is the default, if `discr` is
# not given:
# discr: 'bin',
#
# Partition each feature into 2 intervals (with quantile strategy,
# that computes interval boundaries so that the projected training
# data is evenly spread among each interval---the default for
# non-binarization):
# discr: 2,
#
# Perform a 3-clustering of the projected training data, and use 3
# adjacent intervals that each span exactly one cluster:
# discr: { n_bins: 3, strategy: "kmeans" },
#
# Use 4 intervals of identical width that span all training data:
# discr: { n_bins: 4, strategy: "uniform" },
#
# Kernel Density Estimate (KDE):
# ------------------------------
#
# Compute a kernel density estimation based on training data and
# find split locations based on dips (local minima of the
# estimated density function) and plateaux (large-enough intervals
# where the density approaches zero): each dip is associated with
# an interval boundary, whereas each plateau defines a whole
# interval. Roughtly, dips are selected using a prominence
# criterion that is defined as a ratio w.r.t the maximum of the
# density function (`kde_dip_prominence_prop', defaulting to
# 1/10). Alternatively, plateaux are intervals where the density
# function is lower than a threshold that is also defined as a
# ratio w.r.t the maximum of the density function
# (`kde_baseline_density_prop`, defaulting to 1/20):
discr: { strategy: 'kde' },
# is thus equivalent to:
# discr: { strategy: 'kde', kde_dip_prominence_prop: 0.1, kde_baseline_density_prop: 0.05 },
#
# The additional key `kde_plot_spaces` allows to output plots of
# density estimates and resulting intervals, along with scattered
# points that each represent one element of the training data.
# When defined, it may take its values in `dens` or `logl`, to
# plot the density estimation or the log-likelihood, respectively.
# Additional arguments can also be provided to control whether to
# include some markers on dips in the plots, or the amount of
# points, to include in the plots:
# discr: { strategy: 'kde', kde_plot_spaces: 'dens', kde_plot_dip_markers: True, kde_plot_training_samples: 500 },
# discr: { strategy: 'kde', kde_plot_spaces: 'dens', kde_plot_dip_markers: False, kde_plot_training_samples: 0 },
#
# Extended Variants
# -----------------
#
# Extended discretization strategies (with `extended: True') add
# two left- and right-open intervals that contain no projected
# training data.
#
# Use 3 extended bins (so, 5 in total) to discretize based on
# 3-clustered training data:
# discr: { n_bins: 3, extended: True, strategy: "kmeans" },
#
# Use 3 extended bins (so, 5 in total) to discretize with 3
# intervals of identical width:
# discr: { n_bins: 3, extended: True, strategy: "uniform" },
#
# Kernel Density Estimates with additional open intervals (which
# may contain a limitted amount of projected training data, partly
# depending on `kde_baseline_density_prop`):
# discr: { strategy: 'kde', extended: True },
#
# Custom (per-layer) Specifications
# ---------------------------------
#
# To define the discretization strategy on a per-layer basis, one
# can also use a lambda function that takes the layer index as
# argument, and returns any of the specification dictionaries as
# above (or None for default, binarization):
# discr: '(lambda li: { "n_bins": 4, "strategy": "uniform" } if li in (1,) else None)',
#
# To use dictionaries:
# discr: '(lambda li: "bin" if li == 3 else dict(n_bins=3, strategy="uniform"))',
#
# Reporting
# =========
#
# Current reporting requires `matplotlib` to be installed.
#
# If True, plot graphs that show how some of the training data is
# spread w.r.t each extracted features for each layer:
report_on_feature_extractions: False,
#
# Size of test sample (also extracted from training data) to use
# for scoring and reporting:
bn_abstr_test_size: 200,
#
# Dump the BN fit with the (portion of the) training dataset that
# was used to extract and discretize features in a file called
# 'bn4trained.yml':
dump_bn_with_trained_dataset_distribution: False,
#
# Dump the BN fit with the initial and generated dataset in a file
# called 'bn4tests.yml' upon termination of the generation
# process:
dump_bn_with_final_dataset_distribution: False,
#
#
# Process Customization
# =====================
#
# The `*_n_jobs` values below default to `1` (unless some `joblib`
# context is setup). Use `-1` to use all processors (see
# https://scikit-learn.org/stable/glossary.html#term-n-jobs)
#
# Number of jobs used to bake and fit the BN abstraction:
bn_abstr_n_jobs: 3,
#
# Number of jobs used to discretize feature components/spaces (see
# https://scikit-learn.org/stable/glossary.html#term-n-jobs for
# details on its semantics):
discr_n_jobs: -1,
#
# Verbosity level (only affects console output):
verbose: 2,
}