diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 4e1813ed..e5a88a91 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,7 +7,7 @@
 Changelog
 =========
 
-3.1.0 - unreleased
+3.1.0 - 2024-11-11
 ------------------
 
 **New features:**
diff --git a/src/glum/_distribution.py b/src/glum/_distribution.py
index 2b6104df..a2f0eb43 100644
--- a/src/glum/_distribution.py
+++ b/src/glum/_distribution.py
@@ -1510,6 +1510,9 @@ def guess_intercept(
     If the distribution and corresponding link are something else, we use the
     Tweedie or normal solution, depending on the link function.
     """
+    if (not isinstance(link, IdentityLink)) and (len(np.unique(y)) == 1):
+        raise ValueError("No variation in `y`. Coefficients can't be estimated.")
+
     avg_y = np.average(y, weights=sample_weight)
 
     if isinstance(link, IdentityLink):
diff --git a/src/glum/_glm.py b/src/glum/_glm.py
index 6e706213..2a9dde21 100644
--- a/src/glum/_glm.py
+++ b/src/glum/_glm.py
@@ -452,7 +452,7 @@ def _one_over_var_inf_to_val(arr: np.ndarray, val: float) -> np.ndarray:
 
     If values are zeros, return val.
     """
-    zeros = np.where(np.abs(arr) < np.sqrt(np.finfo(arr.dtype).eps))
+    zeros = np.where(np.abs(arr) < 10 * np.sqrt(np.finfo(arr.dtype).eps))
     with np.errstate(divide="ignore"):
         one_over = 1 / arr
     one_over[zeros] = val
@@ -1104,7 +1104,7 @@ def _solve(
                 family=self._family_instance,
                 link=self._link_instance,
                 max_iter=max_iter,
-                max_inner_iter=self.max_inner_iter,
+                max_inner_iter=getattr(self, "max_inner_iter", 100_000),
                 gradient_tol=self._gradient_tol,
                 step_size_tol=self.step_size_tol,
                 fixed_inner_tol=fixed_inner_tol,
@@ -2544,12 +2544,14 @@ def _set_up_and_check_fit_args(
         # This will prevent accidental upcasting later and slow operations on
         # mixed-precision numbers
         y = np.asarray(y, dtype=X.dtype)
+
         sample_weight = _check_weights(
             sample_weight,
             y.shape[0],  # type: ignore
             X.dtype,
             force_all_finite=force_all_finite,
         )
+
         offset = _check_offset(offset, y.shape[0], X.dtype)  # type: ignore
 
         # IMPORTANT NOTE: Since we want to minimize
@@ -2559,9 +2561,8 @@ def _set_up_and_check_fit_args(
         # 1/2*deviance + L1 + L2 with deviance=sum(weights * unit_deviance)
         weights_sum: float = np.sum(sample_weight)  # type: ignore
         sample_weight = sample_weight / weights_sum
-        #######################################################################
-        # 2b. convert to wrapper matrix types
-        #######################################################################
+
+        # Convert to wrapper matrix types
         X = tm.as_tabmat(X)
 
         self.feature_names_ = X.get_names(type="column", missing_prefix="_col_")  # type: ignore
diff --git a/tests/glm/test_glm.py b/tests/glm/test_glm.py
index 511505a9..00ff28aa 100644
--- a/tests/glm/test_glm.py
+++ b/tests/glm/test_glm.py
@@ -56,11 +56,11 @@ def get_small_x_y(
     estimator: Union[GeneralizedLinearRegressor, GeneralizedLinearRegressorCV],
 ) -> tuple[np.ndarray, np.ndarray]:
     if isinstance(estimator, GeneralizedLinearRegressor):
-        n_rows = 1
+        n_rows = 2
     else:
         n_rows = 10
     x = np.ones((n_rows, 1), dtype=int)
-    y = np.ones(n_rows) * 0.5
+    y = np.array([0, 1] * (n_rows // 2)) * 0.5
     return x, y
 
 
@@ -222,7 +222,7 @@ def test_glm_family_argument_invalid_input(estimator):
 def test_glm_family_argument_as_exponential_dispersion_model(estimator, kwargs, family):
     X, y = get_small_x_y(estimator)
     glm = estimator(family=family(), **kwargs)
-    glm.fit(X, y)
+    glm.fit(X, np.where(y > family().lower_bound, y, y.max() / 2))
 
 
 @pytest.mark.parametrize(