Skip to content

Commit

Permalink
refactor: [sparse_weights] get for predict (#4651)
Browse files Browse the repository at this point in the history
* refactor: [sparse_weights] use std::map and get for predict

* small fixes

* clang

* add feature on learn path

* update -q:: for sparse tests

* add get to array_parameters

* brackets for interactions update

* fix interaction tests

* comments

* revert std::map

* clang

* cpp11 standard

* downgrade gcc

* cpp11

* just remove latest for ubuntu

* add include

* revert settings

* fix sparse random and test

* lint

* revert vendored change

* tests
  • Loading branch information
bassmang authored Nov 17, 2023
1 parent 26b74fd commit 2849b3b
Show file tree
Hide file tree
Showing 17 changed files with 150 additions and 23 deletions.
32 changes: 30 additions & 2 deletions test/core.vwtest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6021,7 +6021,7 @@
{
"id": 465,
"desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw -q::",
"diff_files": {
"stderr": "train-sets/ref/sparse_save_check.stderr",
"stdout": "train-sets/ref/sparse_save_check.stdout"
Expand All @@ -6033,7 +6033,7 @@
{
"id": 466,
"desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw -q::",
"diff_files": {
"stderr": "train-sets/ref/sparse_load_check.stderr",
"stdout": "train-sets/ref/sparse_load_check.stdout"
Expand All @@ -6045,5 +6045,33 @@
"depends_on": [
465
]
},
{
"id": 467,
"desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model with random_weights",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_random_model.vw -q:: --random_weights",
"diff_files": {
"stderr": "train-sets/ref/sparse_save_check_random.stderr",
"stdout": "train-sets/ref/sparse_save_check_random.stdout"
},
"input_files": [
"train-sets/cb_test.ldf"
]
},
{
"id": 468,
"desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model with random_weights",
"vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_random_model.vw -q:: --random_weights",
"diff_files": {
"stderr": "train-sets/ref/sparse_load_check_random.stderr",
"stdout": "train-sets/ref/sparse_load_check_random.stdout"
},
"input_files": [
"train-sets/cb_test.ldf",
"standard_sparse_random_model.vw"
],
"depends_on": [
467
]
}
]
9 changes: 5 additions & 4 deletions test/train-sets/ref/sparse_load_check.stderr
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
creating quadratic features for pairs: ::
using no cache
Reading datafile = train-sets/cb_test.ldf
num sources = 1
Expand All @@ -6,17 +7,17 @@ learning rate = 0.5
initial_t = 3
power_t = 0.5
cb_type = mtr
Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.066667 0.066667 1 1.0 0:1:0.5 1:0.48 15
0.033333 0.000000 2 2.0 1:0:0.5 1:0.95 6
0.066667 0.066667 1 1.0 0:1:0.5 1:0.48 60
0.033333 0.000000 2 2.0 1:0:0.5 1:0.95 18

finished run
number of examples = 3
weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.033333
total feature number = 27
total feature number = 96
3 changes: 3 additions & 0 deletions test/train-sets/ref/sparse_load_check.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line.
[warning] Any duplicate namespace interactions will be removed
You can use --leave_duplicate_interactions to disable this behaviour.
23 changes: 23 additions & 0 deletions test/train-sets/ref/sparse_load_check_random.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
creating quadratic features for pairs: ::
using no cache
Reading datafile = train-sets/cb_test.ldf
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 3
power_t = 0.5
cb_type = mtr
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.066667 0.066667 1 1.0 0:1:0.5 1:0.93 60
0.033333 0.000000 2 2.0 1:0:0.5 0:0.95 18

finished run
number of examples = 3
weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.033333
total feature number = 96
3 changes: 3 additions & 0 deletions test/train-sets/ref/sparse_load_check_random.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line.
[warning] Any duplicate namespace interactions will be removed
You can use --leave_duplicate_interactions to disable this behaviour.
9 changes: 5 additions & 4 deletions test/train-sets/ref/sparse_save_check.stderr
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
creating quadratic features for pairs: ::
final_regressor = standard_sparse_model.vw
using no cache
Reading datafile = train-sets/cb_test.ldf
Expand All @@ -7,17 +8,17 @@ learning rate = 0.5
initial_t = 0
power_t = 0.5
cb_type = mtr
Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.666667 0.666667 1 1.0 0:1:0.5 0:0.33 15
0.333333 0.000000 2 2.0 1:0:0.5 1:0.95 6
0.666667 0.666667 1 1.0 0:1:0.5 0:0.33 60
0.333333 0.000000 2 2.0 1:0:0.5 1:0.95 18

finished run
number of examples = 3
weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.333333
total feature number = 27
total feature number = 96
2 changes: 2 additions & 0 deletions test/train-sets/ref/sparse_save_check.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[warning] Any duplicate namespace interactions will be removed
You can use --leave_duplicate_interactions to disable this behaviour.
24 changes: 24 additions & 0 deletions test/train-sets/ref/sparse_save_check_random.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
creating quadratic features for pairs: ::
final_regressor = standard_sparse_random_model.vw
using no cache
Reading datafile = train-sets/cb_test.ldf
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
cb_type = mtr
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.066667 0.066667 1 1.0 0:1:0.5 1:0.93 60
0.033333 0.000000 2 2.0 1:0:0.5 0:0.95 18

finished run
number of examples = 3
weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.033333
total feature number = 96
2 changes: 2 additions & 0 deletions test/train-sets/ref/sparse_save_check_random.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[warning] Any duplicate namespace interactions will be removed
You can use --leave_duplicate_interactions to disable this behaviour.
6 changes: 6 additions & 0 deletions vowpalwabbit/core/include/vw/core/array_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ class parameters
else { return dense_weights[i]; }
}

inline VW::weight& get(size_t i)
{
if (sparse) { return sparse_weights.get(i); }
else { return dense_weights.get(i); }
}

template <typename Lambda>
void set_default(Lambda&& default_func)
{
Expand Down
4 changes: 4 additions & 0 deletions vowpalwabbit/core/include/vw/core/array_parameters_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ class dense_parameters
inline const VW::weight& operator[](size_t i) const { return _begin.get()[i & _weight_mask]; }
inline VW::weight& operator[](size_t i) { return _begin.get()[i & _weight_mask]; }

// get() is only needed for sparse_weights, same as operator[] for dense_weights
inline const VW::weight& get(size_t i) const { return operator[](i); }
inline VW::weight& get(size_t i) { return operator[](i); }

VW_ATTR(nodiscard) static dense_parameters shallow_copy(const dense_parameters& input);
VW_ATTR(nodiscard) static dense_parameters deep_copy(const dense_parameters& input);

Expand Down
7 changes: 6 additions & 1 deletion vowpalwabbit/core/include/vw/core/array_parameters_sparse.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,14 @@ class sparse_parameters
const_iterator cbegin() const { return const_iterator(_map.begin()); }
const_iterator cend() const { return const_iterator(_map.end()); }

// operator[] will find weight in _map and return and insert a default value if not found. Does alter _map.
inline VW::weight& operator[](size_t i) { return *(get_or_default_and_get(i)); }

inline const VW::weight& operator[](size_t i) const { return *(get_or_default_and_get(i)); }

// get() will find weight in _map and return a default value if not found. Does not alter _map.
inline VW::weight& get(size_t i) { return *(get_impl(i)); };
inline const VW::weight& get(size_t i) const { return *(get_impl(i)); };

inline VW::weight& strided_index(size_t index) { return operator[](index << _stride_shift); }
inline const VW::weight& strided_index(size_t index) const { return operator[](index << _stride_shift); }

Expand Down Expand Up @@ -119,6 +123,7 @@ class sparse_parameters
// It is marked const so it can be used from both const and non const operator[]
// The map itself is mutable to facilitate this
VW::weight* get_or_default_and_get(size_t i) const;
VW::weight* get_impl(size_t i) const;
};
} // namespace VW
using sparse_parameters VW_DEPRECATED("sparse_parameters moved into VW namespace") = VW::sparse_parameters;
4 changes: 2 additions & 2 deletions vowpalwabbit/core/include/vw/core/gd_predict.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& da
{
for (const auto& f : fs)
{
VW::weight& w = weights[(f.index() + offset)];
VW::weight& w = weights[f.index() + offset];
FuncT(dat, mult * f.value(), w);
}
}
Expand All @@ -46,7 +46,7 @@ template <class DataT, void (*FuncT)(DataT&, float, float), class WeightsT>
inline void foreach_feature(
const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.)
{
for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights[static_cast<size_t>(f.index() + offset)]); }
for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights.get(static_cast<size_t>(f.index() + offset))); }
}

template <class DataT, class WeightOrIndexT, void (*FuncT)(DataT&, float, WeightOrIndexT),
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/core/include/vw/core/interactions_predict.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ inline void call_func_t(DataT& dat, WeightsT& weights, const float ft_value, con
template <class DataT, void (*FuncT)(DataT&, const float, float), class WeightsT>
inline void call_func_t(DataT& dat, const WeightsT& weights, const float ft_value, const uint64_t ft_idx)
{
FuncT(dat, ft_value, weights[static_cast<size_t>(ft_idx)]);
FuncT(dat, ft_value, weights.get(static_cast<size_t>(ft_idx)));
}

template <class DataT, void (*FuncT)(DataT&, float, uint64_t), class WeightsT>
Expand Down
26 changes: 26 additions & 0 deletions vowpalwabbit/core/src/array_parameters_sparse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,32 @@ VW::weight* VW::sparse_parameters::get_or_default_and_get(size_t i) const
return iter->second.get();
}

VW::weight* VW::sparse_parameters::get_impl(size_t i) const
{
static auto default_value =
std::shared_ptr<VW::weight>(VW::details::calloc_mergable_or_throw<VW::weight>(stride()), free);
uint64_t index = i & _weight_mask;
auto iter = _map.find(index);
if (iter == _map.end())
{
// Add entry to map if _default_func is defined
if (_default_func != nullptr)
{
// memory allocated by calloc should be freed by C free()
_map.insert(std::make_pair(
index, std::shared_ptr<VW::weight>(VW::details::calloc_mergable_or_throw<VW::weight>(stride()), free)));
iter = _map.find(index);
_default_func(iter->second.get(), index);
return iter->second.get();
}
// Return default value if _default_func is not defined
return default_value.get();
}

// Get entry if it exists in the map
return iter->second.get();
}

VW::sparse_parameters::sparse_parameters(size_t length, uint32_t stride_shift)
: _weight_mask((length << stride_shift) - 1), _stride_shift(stride_shift), _default_func(nullptr)
{
Expand Down
15 changes: 6 additions & 9 deletions vowpalwabbit/core/src/parse_regressor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,23 +94,20 @@ void initialize_regressor(VW::workspace& all, T& weights)
}
else if (all.initial_weights_config.initial_weight != 0.)
{
auto initial_weight = all.initial_weights_config.initial_weight;
auto initial_value_weight_initializer = [initial_weight](VW::weight* weights, uint64_t /*index*/)
{ weights[0] = initial_weight; };
auto initial_value_weight_initializer = [&all](VW::weight* weights, uint64_t /*index*/)
{ weights[0] = all.initial_weights_config.initial_weight; };
weights.set_default(initial_value_weight_initializer);
}
else if (all.initial_weights_config.random_positive_weights)
{
auto rand_state = *all.get_random_state();
auto random_positive = [&rand_state](VW::weight* weights, uint64_t)
{ weights[0] = 0.1f * rand_state.get_and_update_random(); };
auto random_positive = [&all](VW::weight* weights, uint64_t)
{ weights[0] = 0.1f * all.get_random_state()->get_and_update_random(); };
weights.set_default(random_positive);
}
else if (all.initial_weights_config.random_weights)
{
auto rand_state = *all.get_random_state();
auto random_neg_pos = [&rand_state](VW::weight* weights, uint64_t)
{ weights[0] = rand_state.get_and_update_random() - 0.5f; };
auto random_neg_pos = [&all](VW::weight* weights, uint64_t)
{ weights[0] = all.get_random_state()->get_and_update_random() - 0.5f; };
weights.set_default(random_neg_pos);
}
else if (all.initial_weights_config.normal_weights) { weights.set_default(&initialize_weights_as_polar_normal); }
Expand Down
2 changes: 2 additions & 0 deletions vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ class lazy_gaussian
{
public:
inline float operator[](uint64_t index) const { return VW::details::merand48_boxmuller(index); }
// get() is only needed for sparse_weights, same as operator[] for lazy_gaussian
inline float get(uint64_t index) const { return operator[](index); }
};

inline void vec_add_with_norm(std::pair<float, float>& p, float fx, float fw)
Expand Down

0 comments on commit 2849b3b

Please sign in to comment.