diff --git a/test/core.vwtest.json b/test/core.vwtest.json index cd98a5da492..ef6857518f3 100644 --- a/test/core.vwtest.json +++ b/test/core.vwtest.json @@ -6021,7 +6021,7 @@ { "id": 465, "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model", - "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw", + "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw -q::", "diff_files": { "stderr": "train-sets/ref/sparse_save_check.stderr", "stdout": "train-sets/ref/sparse_save_check.stdout" @@ -6033,7 +6033,7 @@ { "id": 466, "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model", - "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw", + "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw -q::", "diff_files": { "stderr": "train-sets/ref/sparse_load_check.stderr", "stdout": "train-sets/ref/sparse_load_check.stdout" @@ -6045,5 +6045,33 @@ "depends_on": [ 465 ] + }, + { + "id": 467, + "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model with random_weights", + "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_random_model.vw -q:: --random_weights", + "diff_files": { + "stderr": "train-sets/ref/sparse_save_check_random.stderr", + "stdout": "train-sets/ref/sparse_save_check_random.stdout" + }, + "input_files": [ + "train-sets/cb_test.ldf" + ] + }, + { + "id": 468, + "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model with random_weights", + "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_random_model.vw -q:: --random_weights", + "diff_files": { + "stderr": "train-sets/ref/sparse_load_check_random.stderr", + "stdout": "train-sets/ref/sparse_load_check_random.stdout" + }, + "input_files": [ + "train-sets/cb_test.ldf", + "standard_sparse_random_model.vw" + ], + "depends_on": [ + 467 + ] } ] \ No newline at end of file diff --git a/test/train-sets/ref/sparse_load_check.stderr b/test/train-sets/ref/sparse_load_check.stderr index 8b27fe4af70..b4a3ce19f3b 100644 --- a/test/train-sets/ref/sparse_load_check.stderr +++ b/test/train-sets/ref/sparse_load_check.stderr @@ -1,3 +1,4 @@ +creating quadratic features for pairs: :: using no cache Reading datafile = train-sets/cb_test.ldf num sources = 1 @@ -6,17 +7,17 @@ learning rate = 0.5 initial_t = 3 power_t = 0.5 cb_type = mtr -Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger +Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger Input label = CB Output pred = ACTION_PROBS average since example example current current current loss last counter weight label predict features -0.066667 0.066667 1 1.0 0:1:0.5 1:0.48 15 -0.033333 0.000000 2 2.0 1:0:0.5 1:0.95 6 +0.066667 0.066667 1 1.0 0:1:0.5 1:0.48 60 +0.033333 0.000000 2 2.0 1:0:0.5 1:0.95 18 finished run number of examples = 3 weighted example sum = 3.000000 weighted label sum = 0.000000 average loss = 0.033333 -total feature number = 27 +total feature number = 96 diff --git a/test/train-sets/ref/sparse_load_check.stdout b/test/train-sets/ref/sparse_load_check.stdout index e69de29bb2d..8e3a1737902 100644 --- a/test/train-sets/ref/sparse_load_check.stdout +++ b/test/train-sets/ref/sparse_load_check.stdout @@ -0,0 +1,3 @@ +[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line. +[warning] Any duplicate namespace interactions will be removed +You can use --leave_duplicate_interactions to disable this behaviour. diff --git a/test/train-sets/ref/sparse_load_check_random.stderr b/test/train-sets/ref/sparse_load_check_random.stderr new file mode 100644 index 00000000000..21b25ba2d31 --- /dev/null +++ b/test/train-sets/ref/sparse_load_check_random.stderr @@ -0,0 +1,23 @@ +creating quadratic features for pairs: :: +using no cache +Reading datafile = train-sets/cb_test.ldf +num sources = 1 +Num weight bits = 18 +learning rate = 0.5 +initial_t = 3 +power_t = 0.5 +cb_type = mtr +Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger +Input label = CB +Output pred = ACTION_PROBS +average since example example current current current +loss last counter weight label predict features +0.066667 0.066667 1 1.0 0:1:0.5 1:0.93 60 +0.033333 0.000000 2 2.0 1:0:0.5 0:0.95 18 + +finished run +number of examples = 3 +weighted example sum = 3.000000 +weighted label sum = 0.000000 +average loss = 0.033333 +total feature number = 96 diff --git a/test/train-sets/ref/sparse_load_check_random.stdout b/test/train-sets/ref/sparse_load_check_random.stdout new file mode 100644 index 00000000000..8e3a1737902 --- /dev/null +++ b/test/train-sets/ref/sparse_load_check_random.stdout @@ -0,0 +1,3 @@ +[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line. +[warning] Any duplicate namespace interactions will be removed +You can use --leave_duplicate_interactions to disable this behaviour. diff --git a/test/train-sets/ref/sparse_save_check.stderr b/test/train-sets/ref/sparse_save_check.stderr index b5febcde188..a2b8fcb9e9d 100644 --- a/test/train-sets/ref/sparse_save_check.stderr +++ b/test/train-sets/ref/sparse_save_check.stderr @@ -1,3 +1,4 @@ +creating quadratic features for pairs: :: final_regressor = standard_sparse_model.vw using no cache Reading datafile = train-sets/cb_test.ldf @@ -7,17 +8,17 @@ learning rate = 0.5 initial_t = 0 power_t = 0.5 cb_type = mtr -Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger +Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger Input label = CB Output pred = ACTION_PROBS average since example example current current current loss last counter weight label predict features -0.666667 0.666667 1 1.0 0:1:0.5 0:0.33 15 -0.333333 0.000000 2 2.0 1:0:0.5 1:0.95 6 +0.666667 0.666667 1 1.0 0:1:0.5 0:0.33 60 +0.333333 0.000000 2 2.0 1:0:0.5 1:0.95 18 finished run number of examples = 3 weighted example sum = 3.000000 weighted label sum = 0.000000 average loss = 0.333333 -total feature number = 27 +total feature number = 96 diff --git a/test/train-sets/ref/sparse_save_check.stdout b/test/train-sets/ref/sparse_save_check.stdout index e69de29bb2d..fe315c40652 100644 --- a/test/train-sets/ref/sparse_save_check.stdout +++ b/test/train-sets/ref/sparse_save_check.stdout @@ -0,0 +1,2 @@ +[warning] Any duplicate namespace interactions will be removed +You can use --leave_duplicate_interactions to disable this behaviour. diff --git a/test/train-sets/ref/sparse_save_check_random.stderr b/test/train-sets/ref/sparse_save_check_random.stderr new file mode 100644 index 00000000000..23ce84d1670 --- /dev/null +++ b/test/train-sets/ref/sparse_save_check_random.stderr @@ -0,0 +1,24 @@ +creating quadratic features for pairs: :: +final_regressor = standard_sparse_random_model.vw +using no cache +Reading datafile = train-sets/cb_test.ldf +num sources = 1 +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +cb_type = mtr +Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger +Input label = CB +Output pred = ACTION_PROBS +average since example example current current current +loss last counter weight label predict features +0.066667 0.066667 1 1.0 0:1:0.5 1:0.93 60 +0.033333 0.000000 2 2.0 1:0:0.5 0:0.95 18 + +finished run +number of examples = 3 +weighted example sum = 3.000000 +weighted label sum = 0.000000 +average loss = 0.033333 +total feature number = 96 diff --git a/test/train-sets/ref/sparse_save_check_random.stdout b/test/train-sets/ref/sparse_save_check_random.stdout new file mode 100644 index 00000000000..fe315c40652 --- /dev/null +++ b/test/train-sets/ref/sparse_save_check_random.stdout @@ -0,0 +1,2 @@ +[warning] Any duplicate namespace interactions will be removed +You can use --leave_duplicate_interactions to disable this behaviour. diff --git a/vowpalwabbit/core/include/vw/core/array_parameters.h b/vowpalwabbit/core/include/vw/core/array_parameters.h index 095ae45b456..fe7a3b056cd 100644 --- a/vowpalwabbit/core/include/vw/core/array_parameters.h +++ b/vowpalwabbit/core/include/vw/core/array_parameters.h @@ -25,6 +25,12 @@ class parameters else { return dense_weights[i]; } } + inline VW::weight& get(size_t i) + { + if (sparse) { return sparse_weights.get(i); } + else { return dense_weights.get(i); } + } + template void set_default(Lambda&& default_func) { diff --git a/vowpalwabbit/core/include/vw/core/array_parameters_dense.h b/vowpalwabbit/core/include/vw/core/array_parameters_dense.h index f215f35cf61..755a4084ac8 100644 --- a/vowpalwabbit/core/include/vw/core/array_parameters_dense.h +++ b/vowpalwabbit/core/include/vw/core/array_parameters_dense.h @@ -122,6 +122,10 @@ class dense_parameters inline const VW::weight& operator[](size_t i) const { return _begin.get()[i & _weight_mask]; } inline VW::weight& operator[](size_t i) { return _begin.get()[i & _weight_mask]; } + // get() is only needed for sparse_weights, same as operator[] for dense_weights + inline const VW::weight& get(size_t i) const { return operator[](i); } + inline VW::weight& get(size_t i) { return operator[](i); } + VW_ATTR(nodiscard) static dense_parameters shallow_copy(const dense_parameters& input); VW_ATTR(nodiscard) static dense_parameters deep_copy(const dense_parameters& input); diff --git a/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h b/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h index dbebe75ed68..d5d1a611826 100644 --- a/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h +++ b/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h @@ -81,10 +81,14 @@ class sparse_parameters const_iterator cbegin() const { return const_iterator(_map.begin()); } const_iterator cend() const { return const_iterator(_map.end()); } + // operator[] will find weight in _map and return and insert a default value if not found. Does alter _map. inline VW::weight& operator[](size_t i) { return *(get_or_default_and_get(i)); } - inline const VW::weight& operator[](size_t i) const { return *(get_or_default_and_get(i)); } + // get() will find weight in _map and return a default value if not found. Does not alter _map. + inline VW::weight& get(size_t i) { return *(get_impl(i)); }; + inline const VW::weight& get(size_t i) const { return *(get_impl(i)); }; + inline VW::weight& strided_index(size_t index) { return operator[](index << _stride_shift); } inline const VW::weight& strided_index(size_t index) const { return operator[](index << _stride_shift); } @@ -119,6 +123,7 @@ class sparse_parameters // It is marked const so it can be used from both const and non const operator[] // The map itself is mutable to facilitate this VW::weight* get_or_default_and_get(size_t i) const; + VW::weight* get_impl(size_t i) const; }; } // namespace VW using sparse_parameters VW_DEPRECATED("sparse_parameters moved into VW namespace") = VW::sparse_parameters; diff --git a/vowpalwabbit/core/include/vw/core/gd_predict.h b/vowpalwabbit/core/include/vw/core/gd_predict.h index 868756250f8..3858d1d0eb2 100644 --- a/vowpalwabbit/core/include/vw/core/gd_predict.h +++ b/vowpalwabbit/core/include/vw/core/gd_predict.h @@ -36,7 +36,7 @@ inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& da { for (const auto& f : fs) { - VW::weight& w = weights[(f.index() + offset)]; + VW::weight& w = weights[f.index() + offset]; FuncT(dat, mult * f.value(), w); } } @@ -46,7 +46,7 @@ template inline void foreach_feature( const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) { - for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights[static_cast(f.index() + offset)]); } + for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights.get(static_cast(f.index() + offset))); } } template inline void call_func_t(DataT& dat, const WeightsT& weights, const float ft_value, const uint64_t ft_idx) { - FuncT(dat, ft_value, weights[static_cast(ft_idx)]); + FuncT(dat, ft_value, weights.get(static_cast(ft_idx))); } template diff --git a/vowpalwabbit/core/src/array_parameters_sparse.cc b/vowpalwabbit/core/src/array_parameters_sparse.cc index 611f66ee11c..88d150e9e92 100644 --- a/vowpalwabbit/core/src/array_parameters_sparse.cc +++ b/vowpalwabbit/core/src/array_parameters_sparse.cc @@ -26,6 +26,32 @@ VW::weight* VW::sparse_parameters::get_or_default_and_get(size_t i) const return iter->second.get(); } +VW::weight* VW::sparse_parameters::get_impl(size_t i) const +{ + static auto default_value = + std::shared_ptr(VW::details::calloc_mergable_or_throw(stride()), free); + uint64_t index = i & _weight_mask; + auto iter = _map.find(index); + if (iter == _map.end()) + { + // Add entry to map if _default_func is defined + if (_default_func != nullptr) + { + // memory allocated by calloc should be freed by C free() + _map.insert(std::make_pair( + index, std::shared_ptr(VW::details::calloc_mergable_or_throw(stride()), free))); + iter = _map.find(index); + _default_func(iter->second.get(), index); + return iter->second.get(); + } + // Return default value if _default_func is not defined + return default_value.get(); + } + + // Get entry if it exists in the map + return iter->second.get(); +} + VW::sparse_parameters::sparse_parameters(size_t length, uint32_t stride_shift) : _weight_mask((length << stride_shift) - 1), _stride_shift(stride_shift), _default_func(nullptr) { diff --git a/vowpalwabbit/core/src/parse_regressor.cc b/vowpalwabbit/core/src/parse_regressor.cc index 6465389e5b6..b6de703b24a 100644 --- a/vowpalwabbit/core/src/parse_regressor.cc +++ b/vowpalwabbit/core/src/parse_regressor.cc @@ -94,23 +94,20 @@ void initialize_regressor(VW::workspace& all, T& weights) } else if (all.initial_weights_config.initial_weight != 0.) { - auto initial_weight = all.initial_weights_config.initial_weight; - auto initial_value_weight_initializer = [initial_weight](VW::weight* weights, uint64_t /*index*/) - { weights[0] = initial_weight; }; + auto initial_value_weight_initializer = [&all](VW::weight* weights, uint64_t /*index*/) + { weights[0] = all.initial_weights_config.initial_weight; }; weights.set_default(initial_value_weight_initializer); } else if (all.initial_weights_config.random_positive_weights) { - auto rand_state = *all.get_random_state(); - auto random_positive = [&rand_state](VW::weight* weights, uint64_t) - { weights[0] = 0.1f * rand_state.get_and_update_random(); }; + auto random_positive = [&all](VW::weight* weights, uint64_t) + { weights[0] = 0.1f * all.get_random_state()->get_and_update_random(); }; weights.set_default(random_positive); } else if (all.initial_weights_config.random_weights) { - auto rand_state = *all.get_random_state(); - auto random_neg_pos = [&rand_state](VW::weight* weights, uint64_t) - { weights[0] = rand_state.get_and_update_random() - 0.5f; }; + auto random_neg_pos = [&all](VW::weight* weights, uint64_t) + { weights[0] = all.get_random_state()->get_and_update_random() - 0.5f; }; weights.set_default(random_neg_pos); } else if (all.initial_weights_config.normal_weights) { weights.set_default(&initialize_weights_as_polar_normal); } diff --git a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc index 47265bd48b3..110cf815b25 100644 --- a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc +++ b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc @@ -145,6 +145,8 @@ class lazy_gaussian { public: inline float operator[](uint64_t index) const { return VW::details::merand48_boxmuller(index); } + // get() is only needed for sparse_weights, same as operator[] for lazy_gaussian + inline float get(uint64_t index) const { return operator[](index); } }; inline void vec_add_with_norm(std::pair& p, float fx, float fw)