diff --git a/test/core.vwtest.json b/test/core.vwtest.json
index cd98a5da492..ef6857518f3 100644
--- a/test/core.vwtest.json
+++ b/test/core.vwtest.json
@@ -6021,7 +6021,7 @@
   {
     "id": 465,
     "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model",
-    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw",
+    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw -q::",
     "diff_files": {
       "stderr": "train-sets/ref/sparse_save_check.stderr",
       "stdout": "train-sets/ref/sparse_save_check.stdout"
@@ -6033,7 +6033,7 @@
   {
     "id": 466,
     "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model",
-    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw",
+    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw -q::",
     "diff_files": {
       "stderr": "train-sets/ref/sparse_load_check.stderr",
       "stdout": "train-sets/ref/sparse_load_check.stdout"
@@ -6045,5 +6045,33 @@
     "depends_on": [
       465
     ]
+  },
+  {
+    "id": 467,
+    "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model with random_weights",
+    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_random_model.vw -q:: --random_weights",
+    "diff_files": {
+      "stderr": "train-sets/ref/sparse_save_check_random.stderr",
+      "stdout": "train-sets/ref/sparse_save_check_random.stdout"
+    },
+    "input_files": [
+      "train-sets/cb_test.ldf"
+    ]
+  },
+  {
+    "id": 468,
+    "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model with random_weights",
+    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_random_model.vw -q:: --random_weights",
+    "diff_files": {
+      "stderr": "train-sets/ref/sparse_load_check_random.stderr",
+      "stdout": "train-sets/ref/sparse_load_check_random.stdout"
+    },
+    "input_files": [
+      "train-sets/cb_test.ldf",
+      "standard_sparse_random_model.vw"
+    ],
+    "depends_on": [
+      467
+    ]
   }
 ]
\ No newline at end of file
diff --git a/test/train-sets/ref/sparse_load_check.stderr b/test/train-sets/ref/sparse_load_check.stderr
index 8b27fe4af70..b4a3ce19f3b 100644
--- a/test/train-sets/ref/sparse_load_check.stderr
+++ b/test/train-sets/ref/sparse_load_check.stderr
@@ -1,3 +1,4 @@
+creating quadratic features for pairs: ::
 using no cache
 Reading datafile = train-sets/cb_test.ldf
 num sources = 1
@@ -6,17 +7,17 @@ learning rate = 0.5
 initial_t = 3
 power_t = 0.5
 cb_type = mtr
-Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
 Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.066667 0.066667            1            1.0        0:1:0.5         1:0.48       15
-0.033333 0.000000            2            2.0        1:0:0.5         1:0.95        6
+0.066667 0.066667            1            1.0        0:1:0.5         1:0.48       60
+0.033333 0.000000            2            2.0        1:0:0.5         1:0.95       18
 
 finished run
 number of examples = 3
 weighted example sum = 3.000000
 weighted label sum = 0.000000
 average loss = 0.033333
-total feature number = 27
+total feature number = 96
diff --git a/test/train-sets/ref/sparse_load_check.stdout b/test/train-sets/ref/sparse_load_check.stdout
index e69de29bb2d..8e3a1737902 100644
--- a/test/train-sets/ref/sparse_load_check.stdout
+++ b/test/train-sets/ref/sparse_load_check.stdout
@@ -0,0 +1,3 @@
+[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line.
+[warning] Any duplicate namespace interactions will be removed
+You can use --leave_duplicate_interactions to disable this behaviour.
diff --git a/test/train-sets/ref/sparse_load_check_random.stderr b/test/train-sets/ref/sparse_load_check_random.stderr
new file mode 100644
index 00000000000..21b25ba2d31
--- /dev/null
+++ b/test/train-sets/ref/sparse_load_check_random.stderr
@@ -0,0 +1,23 @@
+creating quadratic features for pairs: ::
+using no cache
+Reading datafile = train-sets/cb_test.ldf
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 3
+power_t = 0.5
+cb_type = mtr
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
+Input label = CB
+Output pred = ACTION_PROBS
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+0.066667 0.066667            1            1.0        0:1:0.5         1:0.93       60
+0.033333 0.000000            2            2.0        1:0:0.5         0:0.95       18
+
+finished run
+number of examples = 3
+weighted example sum = 3.000000
+weighted label sum = 0.000000
+average loss = 0.033333
+total feature number = 96
diff --git a/test/train-sets/ref/sparse_load_check_random.stdout b/test/train-sets/ref/sparse_load_check_random.stdout
new file mode 100644
index 00000000000..8e3a1737902
--- /dev/null
+++ b/test/train-sets/ref/sparse_load_check_random.stdout
@@ -0,0 +1,3 @@
+[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line.
+[warning] Any duplicate namespace interactions will be removed
+You can use --leave_duplicate_interactions to disable this behaviour.
diff --git a/test/train-sets/ref/sparse_save_check.stderr b/test/train-sets/ref/sparse_save_check.stderr
index b5febcde188..a2b8fcb9e9d 100644
--- a/test/train-sets/ref/sparse_save_check.stderr
+++ b/test/train-sets/ref/sparse_save_check.stderr
@@ -1,3 +1,4 @@
+creating quadratic features for pairs: ::
 final_regressor = standard_sparse_model.vw
 using no cache
 Reading datafile = train-sets/cb_test.ldf
@@ -7,17 +8,17 @@ learning rate = 0.5
 initial_t = 0
 power_t = 0.5
 cb_type = mtr
-Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
 Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.666667 0.666667            1            1.0        0:1:0.5         0:0.33       15
-0.333333 0.000000            2            2.0        1:0:0.5         1:0.95        6
+0.666667 0.666667            1            1.0        0:1:0.5         0:0.33       60
+0.333333 0.000000            2            2.0        1:0:0.5         1:0.95       18
 
 finished run
 number of examples = 3
 weighted example sum = 3.000000
 weighted label sum = 0.000000
 average loss = 0.333333
-total feature number = 27
+total feature number = 96
diff --git a/test/train-sets/ref/sparse_save_check.stdout b/test/train-sets/ref/sparse_save_check.stdout
index e69de29bb2d..fe315c40652 100644
--- a/test/train-sets/ref/sparse_save_check.stdout
+++ b/test/train-sets/ref/sparse_save_check.stdout
@@ -0,0 +1,2 @@
+[warning] Any duplicate namespace interactions will be removed
+You can use --leave_duplicate_interactions to disable this behaviour.
diff --git a/test/train-sets/ref/sparse_save_check_random.stderr b/test/train-sets/ref/sparse_save_check_random.stderr
new file mode 100644
index 00000000000..23ce84d1670
--- /dev/null
+++ b/test/train-sets/ref/sparse_save_check_random.stderr
@@ -0,0 +1,24 @@
+creating quadratic features for pairs: ::
+final_regressor = standard_sparse_random_model.vw
+using no cache
+Reading datafile = train-sets/cb_test.ldf
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+cb_type = mtr
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
+Input label = CB
+Output pred = ACTION_PROBS
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+0.066667 0.066667            1            1.0        0:1:0.5         1:0.93       60
+0.033333 0.000000            2            2.0        1:0:0.5         0:0.95       18
+
+finished run
+number of examples = 3
+weighted example sum = 3.000000
+weighted label sum = 0.000000
+average loss = 0.033333
+total feature number = 96
diff --git a/test/train-sets/ref/sparse_save_check_random.stdout b/test/train-sets/ref/sparse_save_check_random.stdout
new file mode 100644
index 00000000000..fe315c40652
--- /dev/null
+++ b/test/train-sets/ref/sparse_save_check_random.stdout
@@ -0,0 +1,2 @@
+[warning] Any duplicate namespace interactions will be removed
+You can use --leave_duplicate_interactions to disable this behaviour.
diff --git a/vowpalwabbit/core/include/vw/core/array_parameters.h b/vowpalwabbit/core/include/vw/core/array_parameters.h
index 095ae45b456..fe7a3b056cd 100644
--- a/vowpalwabbit/core/include/vw/core/array_parameters.h
+++ b/vowpalwabbit/core/include/vw/core/array_parameters.h
@@ -25,6 +25,12 @@ class parameters
     else { return dense_weights[i]; }
   }
 
+  inline VW::weight& get(size_t i)
+  {
+    if (sparse) { return sparse_weights.get(i); }
+    else { return dense_weights.get(i); }
+  }
+
   template <typename Lambda>
   void set_default(Lambda&& default_func)
   {
diff --git a/vowpalwabbit/core/include/vw/core/array_parameters_dense.h b/vowpalwabbit/core/include/vw/core/array_parameters_dense.h
index f215f35cf61..755a4084ac8 100644
--- a/vowpalwabbit/core/include/vw/core/array_parameters_dense.h
+++ b/vowpalwabbit/core/include/vw/core/array_parameters_dense.h
@@ -122,6 +122,10 @@ class dense_parameters
   inline const VW::weight& operator[](size_t i) const { return _begin.get()[i & _weight_mask]; }
   inline VW::weight& operator[](size_t i) { return _begin.get()[i & _weight_mask]; }
 
+  // get() is only needed for sparse_weights, same as operator[] for dense_weights
+  inline const VW::weight& get(size_t i) const { return operator[](i); }
+  inline VW::weight& get(size_t i) { return operator[](i); }
+
   VW_ATTR(nodiscard) static dense_parameters shallow_copy(const dense_parameters& input);
   VW_ATTR(nodiscard) static dense_parameters deep_copy(const dense_parameters& input);
 
diff --git a/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h b/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h
index dbebe75ed68..d5d1a611826 100644
--- a/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h
+++ b/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h
@@ -81,10 +81,14 @@ class sparse_parameters
   const_iterator cbegin() const { return const_iterator(_map.begin()); }
   const_iterator cend() const { return const_iterator(_map.end()); }
 
+  // operator[] will find weight in _map and return and insert a default value if not found. Does alter _map.
   inline VW::weight& operator[](size_t i) { return *(get_or_default_and_get(i)); }
-
   inline const VW::weight& operator[](size_t i) const { return *(get_or_default_and_get(i)); }
 
+  // get() will find weight in _map and return a default value if not found. Does not alter _map.
+  inline VW::weight& get(size_t i) { return *(get_impl(i)); };
+  inline const VW::weight& get(size_t i) const { return *(get_impl(i)); };
+
   inline VW::weight& strided_index(size_t index) { return operator[](index << _stride_shift); }
   inline const VW::weight& strided_index(size_t index) const { return operator[](index << _stride_shift); }
 
@@ -119,6 +123,7 @@ class sparse_parameters
   // It is marked const so it can be used from both const and non const operator[]
   // The map itself is mutable to facilitate this
   VW::weight* get_or_default_and_get(size_t i) const;
+  VW::weight* get_impl(size_t i) const;
 };
 }  // namespace VW
 using sparse_parameters VW_DEPRECATED("sparse_parameters moved into VW namespace") = VW::sparse_parameters;
diff --git a/vowpalwabbit/core/include/vw/core/gd_predict.h b/vowpalwabbit/core/include/vw/core/gd_predict.h
index 868756250f8..3858d1d0eb2 100644
--- a/vowpalwabbit/core/include/vw/core/gd_predict.h
+++ b/vowpalwabbit/core/include/vw/core/gd_predict.h
@@ -36,7 +36,7 @@ inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& da
 {
   for (const auto& f : fs)
   {
-    VW::weight& w = weights[(f.index() + offset)];
+    VW::weight& w = weights[f.index() + offset];
     FuncT(dat, mult * f.value(), w);
   }
 }
@@ -46,7 +46,7 @@ template <class DataT, void (*FuncT)(DataT&, float, float), class WeightsT>
 inline void foreach_feature(
     const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.)
 {
-  for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights[static_cast<size_t>(f.index() + offset)]); }
+  for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights.get(static_cast<size_t>(f.index() + offset))); }
 }
 
 template <class DataT, class WeightOrIndexT, void (*FuncT)(DataT&, float, WeightOrIndexT),
diff --git a/vowpalwabbit/core/include/vw/core/interactions_predict.h b/vowpalwabbit/core/include/vw/core/interactions_predict.h
index 7d79556c18b..a91bff7a5ec 100644
--- a/vowpalwabbit/core/include/vw/core/interactions_predict.h
+++ b/vowpalwabbit/core/include/vw/core/interactions_predict.h
@@ -43,7 +43,7 @@ inline void call_func_t(DataT& dat, WeightsT& weights, const float ft_value, con
 template <class DataT, void (*FuncT)(DataT&, const float, float), class WeightsT>
 inline void call_func_t(DataT& dat, const WeightsT& weights, const float ft_value, const uint64_t ft_idx)
 {
-  FuncT(dat, ft_value, weights[static_cast<size_t>(ft_idx)]);
+  FuncT(dat, ft_value, weights.get(static_cast<size_t>(ft_idx)));
 }
 
 template <class DataT, void (*FuncT)(DataT&, float, uint64_t), class WeightsT>
diff --git a/vowpalwabbit/core/src/array_parameters_sparse.cc b/vowpalwabbit/core/src/array_parameters_sparse.cc
index 611f66ee11c..88d150e9e92 100644
--- a/vowpalwabbit/core/src/array_parameters_sparse.cc
+++ b/vowpalwabbit/core/src/array_parameters_sparse.cc
@@ -26,6 +26,32 @@ VW::weight* VW::sparse_parameters::get_or_default_and_get(size_t i) const
   return iter->second.get();
 }
 
+VW::weight* VW::sparse_parameters::get_impl(size_t i) const
+{
+  static auto default_value =
+      std::shared_ptr<VW::weight>(VW::details::calloc_mergable_or_throw<VW::weight>(stride()), free);
+  uint64_t index = i & _weight_mask;
+  auto iter = _map.find(index);
+  if (iter == _map.end())
+  {
+    // Add entry to map if _default_func is defined
+    if (_default_func != nullptr)
+    {
+      // memory allocated by calloc should be freed by C free()
+      _map.insert(std::make_pair(
+          index, std::shared_ptr<VW::weight>(VW::details::calloc_mergable_or_throw<VW::weight>(stride()), free)));
+      iter = _map.find(index);
+      _default_func(iter->second.get(), index);
+      return iter->second.get();
+    }
+    // Return default value if _default_func is not defined
+    return default_value.get();
+  }
+
+  // Get entry if it exists in the map
+  return iter->second.get();
+}
+
 VW::sparse_parameters::sparse_parameters(size_t length, uint32_t stride_shift)
     : _weight_mask((length << stride_shift) - 1), _stride_shift(stride_shift), _default_func(nullptr)
 {
diff --git a/vowpalwabbit/core/src/parse_regressor.cc b/vowpalwabbit/core/src/parse_regressor.cc
index 6465389e5b6..b6de703b24a 100644
--- a/vowpalwabbit/core/src/parse_regressor.cc
+++ b/vowpalwabbit/core/src/parse_regressor.cc
@@ -94,23 +94,20 @@ void initialize_regressor(VW::workspace& all, T& weights)
   }
   else if (all.initial_weights_config.initial_weight != 0.)
   {
-    auto initial_weight = all.initial_weights_config.initial_weight;
-    auto initial_value_weight_initializer = [initial_weight](VW::weight* weights, uint64_t /*index*/)
-    { weights[0] = initial_weight; };
+    auto initial_value_weight_initializer = [&all](VW::weight* weights, uint64_t /*index*/)
+    { weights[0] = all.initial_weights_config.initial_weight; };
     weights.set_default(initial_value_weight_initializer);
   }
   else if (all.initial_weights_config.random_positive_weights)
   {
-    auto rand_state = *all.get_random_state();
-    auto random_positive = [&rand_state](VW::weight* weights, uint64_t)
-    { weights[0] = 0.1f * rand_state.get_and_update_random(); };
+    auto random_positive = [&all](VW::weight* weights, uint64_t)
+    { weights[0] = 0.1f * all.get_random_state()->get_and_update_random(); };
     weights.set_default(random_positive);
   }
   else if (all.initial_weights_config.random_weights)
   {
-    auto rand_state = *all.get_random_state();
-    auto random_neg_pos = [&rand_state](VW::weight* weights, uint64_t)
-    { weights[0] = rand_state.get_and_update_random() - 0.5f; };
+    auto random_neg_pos = [&all](VW::weight* weights, uint64_t)
+    { weights[0] = all.get_random_state()->get_and_update_random() - 0.5f; };
     weights.set_default(random_neg_pos);
   }
   else if (all.initial_weights_config.normal_weights) { weights.set_default(&initialize_weights_as_polar_normal); }
diff --git a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc
index 47265bd48b3..110cf815b25 100644
--- a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc
+++ b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc
@@ -145,6 +145,8 @@ class lazy_gaussian
 {
 public:
   inline float operator[](uint64_t index) const { return VW::details::merand48_boxmuller(index); }
+  // get() is only needed for sparse_weights, same as operator[] for lazy_gaussian
+  inline float get(uint64_t index) const { return operator[](index); }
 };
 
 inline void vec_add_with_norm(std::pair<float, float>& p, float fx, float fw)