From 2d714525cc52fecde545d247d7ed15479f3b36d0 Mon Sep 17 00:00:00 2001
From: Bilyana Indzheva <bilyana2002@gmail.com>
Date: Mon, 23 Dec 2024 16:31:03 +0200
Subject: [PATCH 01/10] fix

---
 src/glum/_distribution.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glum/_distribution.py b/src/glum/_distribution.py
index 2b6104df..6a9fdcc6 100644
--- a/src/glum/_distribution.py
+++ b/src/glum/_distribution.py
@@ -672,7 +672,7 @@ def _rowwise_gradient_hessian(
         elif 1 < self.power < 2 and isinstance(link, LogLink):
             f = partial(tweedie_log_rowwise_gradient_hessian, p=self.power)
         elif self.power == 3:
-            f = partial(inv_gaussian_log_rowwise_gradient_hessian, p=self.power)
+            f = inv_gaussian_log_rowwise_gradient_hessian
 
         if f is not None:
             return f(y, sample_weight, eta, mu, gradient_rows, hessian_rows)

From b0ab7e4bd6c709194f86abaefe4061b10c10f7d0 Mon Sep 17 00:00:00 2001
From: Bilyana Indzheva <bilyana2002@gmail.com>
Date: Mon, 23 Dec 2024 16:37:11 +0200
Subject: [PATCH 02/10] Add changelog

---
 CHANGELOG.rst | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 6e4702cd..6503dda9 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,7 +7,15 @@
 Changelog
 =========
 
-3.1.0 - unreleased
+3.1.1 - unreleased
+------------------
+
+**Bug fix:
+
+- Fixed a bug where `TweedieDistribution._rowwise_gradient_hessian` would pass `p` paramter to `inv_gaussian_log_rowwise_gradient_hessian`, even though `p` is not defined in its function signature.
+
+
+3.1.0
 ------------------
 
 **New features:**

From 644cc169fad4947ab9dc8169ae3e9cea778ba387 Mon Sep 17 00:00:00 2001
From: Bilyana Indzheva <bilyana2002@gmail.com>
Date: Tue, 24 Dec 2024 12:28:18 +0200
Subject: [PATCH 03/10] fix

---
 src/glum/_distribution.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glum/_distribution.py b/src/glum/_distribution.py
index 6a9fdcc6..58ac99a5 100644
--- a/src/glum/_distribution.py
+++ b/src/glum/_distribution.py
@@ -703,7 +703,7 @@ def _eta_mu_deviance(
         elif 1 < self.power < 2 and isinstance(link, LogLink):
             f = partial(tweedie_log_eta_mu_deviance, p=self.power)
         elif self.power == 3 and isinstance(link, LogLink):
-            f = partial(inv_gaussian_log_eta_mu_deviance, p=self.power)
+            f = inv_gaussian_log_eta_mu_deviance
 
         if f is not None:
             return f(cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor)

From 36a59166817415fb199929f658aa2bbee4b8ab0b Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 8 Jan 2025 16:47:45 +0100
Subject: [PATCH 04/10] Add tests demonstrating the issue

---
 tests/glm/test_distribution.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/glm/test_distribution.py b/tests/glm/test_distribution.py
index e58fd4c0..c43bd32b 100644
--- a/tests/glm/test_distribution.py
+++ b/tests/glm/test_distribution.py
@@ -170,6 +170,7 @@ def test_deviance_zero(family, chk_values):
         (InverseGaussianDistribution(), LogLink()),
         (TweedieDistribution(power=1.5), LogLink()),
         (TweedieDistribution(power=2.5), LogLink()),
+        (TweedieDistribution(power=3), LogLink()),
         (BinomialDistribution(), LogitLink()),
         (TweedieDistribution(power=1.5), TweedieLink(1.5)),
         (TweedieDistribution(power=2.5), TweedieLink(2.5)),
@@ -226,8 +227,9 @@ def f(coef2):
         (NormalDistribution(), IdentityLink(), False),
         (PoissonDistribution(), LogLink(), False),
         (GammaDistribution(), LogLink(), True),
-        (InverseGaussianDistribution(), LogLink(), False),
+        (InverseGaussianDistribution(), LogLink(), True),
         (TweedieDistribution(power=1.5), LogLink(), True),
+        (TweedieDistribution(power=3), LogLink(), True),
         (TweedieDistribution(power=4.5), LogLink(), False),
         (NegativeBinomialDistribution(theta=1.0), LogLink(), False),
     ],

From a2dde9edad16e3b507801dd74e222c54b48052d5 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 8 Jan 2025 16:50:01 +0100
Subject: [PATCH 05/10] Use inv_gaussian_log_* functions when appropriate

---
 src/glum/_distribution.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/glum/_distribution.py b/src/glum/_distribution.py
index 0627d55b..8d794009 100644
--- a/src/glum/_distribution.py
+++ b/src/glum/_distribution.py
@@ -671,7 +671,7 @@ def _rowwise_gradient_hessian(
             f = gamma_log_rowwise_gradient_hessian
         elif 1 < self.power < 2 and isinstance(link, LogLink):
             f = partial(tweedie_log_rowwise_gradient_hessian, p=self.power)
-        elif self.power == 3:
+        elif self.power == 3 and isinstance(link, LogLink):
             f = inv_gaussian_log_rowwise_gradient_hessian
 
         if f is not None:
@@ -1153,6 +1153,11 @@ def unit_deviance(self, y, mu):  # noqa D
     def _rowwise_gradient_hessian(
         self, link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
     ):
+        if isinstance(link, LogLink):
+            return inv_gaussian_log_rowwise_gradient_hessian(
+                y, sample_weight, eta, mu, gradient_rows, hessian_rows
+            )
+
         return super()._rowwise_gradient_hessian(
             link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
         )
@@ -1169,8 +1174,8 @@ def _eta_mu_deviance(
         mu_out,
     ):
         if isinstance(link, LogLink):
-            return tweedie_log_eta_mu_deviance(
-                cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor, p=3.0
+            return inv_gaussian_log_eta_mu_deviance(
+                cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor
             )
 
         return super()._eta_mu_deviance(

From 797f0b190b3b8d3d51902c9289bc391df2b034ac Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 8 Jan 2025 17:01:08 +0100
Subject: [PATCH 06/10] Fix inverse gaussian derivatives

---
 src/glum/_functions.pyx        | 4 ++--
 tests/glm/test_distribution.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/glum/_functions.pyx b/src/glum/_functions.pyx
index 6c19a2b0..ed86e039 100644
--- a/src/glum/_functions.pyx
+++ b/src/glum/_functions.pyx
@@ -276,8 +276,8 @@ def inv_gaussian_log_rowwise_gradient_hessian(
         inv_mu = 1 / mu[i]
         inv_mu2 = inv_mu ** 2
 
-        gradient_rows_out[i] = 2 * weights[i] * (inv_mu - y[i] * inv_mu2)
-        hessian_rows_out[i] = 2 * weights[i] * (inv_mu - 2 * y[i] * inv_mu2)
+        gradient_rows_out[i] = weights[i] * (y[i] * inv_mu2 - inv_mu)
+        hessian_rows_out[i] = weights[i] * inv_mu
 
 def inv_gaussian_log_likelihood(
     const_floating1d y,
diff --git a/tests/glm/test_distribution.py b/tests/glm/test_distribution.py
index c43bd32b..5ce4b9d2 100644
--- a/tests/glm/test_distribution.py
+++ b/tests/glm/test_distribution.py
@@ -227,9 +227,9 @@ def f(coef2):
         (NormalDistribution(), IdentityLink(), False),
         (PoissonDistribution(), LogLink(), False),
         (GammaDistribution(), LogLink(), True),
-        (InverseGaussianDistribution(), LogLink(), True),
+        (InverseGaussianDistribution(), LogLink(), False),
         (TweedieDistribution(power=1.5), LogLink(), True),
-        (TweedieDistribution(power=3), LogLink(), True),
+        (TweedieDistribution(power=3), LogLink(), False),
         (TweedieDistribution(power=4.5), LogLink(), False),
         (NegativeBinomialDistribution(theta=1.0), LogLink(), False),
     ],

From 27b123d08f5aee157b0bc1cf3357a82cba3cc512 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Wed, 8 Jan 2025 20:58:37 +0100
Subject: [PATCH 07/10] Update changelog

---
 CHANGELOG.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 734da0a8..d3c5c325 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -12,7 +12,8 @@ UNRELEASED
 
 **Bug fix:
 
-- Fixed a bug where `TweedieDistribution._rowwise_gradient_hessian` would pass `p` paramter to `inv_gaussian_log_rowwise_gradient_hessian`, even though `p` is not defined in its function signature.
+- Fixed a bug where :meth:`~glum.TweedieDistribution._rowwise_gradient_hessian` and :meth:`~glum.TweedieDistribution._eta_mu_deviance` would call functions with wrong arguments in the p = 3 case.
+- Fixed :class:`glum.InverseGaussianDistribution` not using the optimized gradient, hessian and deviance implementations, as well as those gradients and hessians being incorrect.
 
 **Other changes:**
 

From dd2a6c3217cd0ab64762b406df2a10ed177bd643 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 9 Jan 2025 11:24:10 +0100
Subject: [PATCH 08/10] Update CHANGELOG.rst

Co-authored-by: Luca Bittarello <15511539+lbittarello@users.noreply.github.com>
---
 CHANGELOG.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d3c5c325..118587a1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -12,8 +12,8 @@ UNRELEASED
 
 **Bug fix:
 
-- Fixed a bug where :meth:`~glum.TweedieDistribution._rowwise_gradient_hessian` and :meth:`~glum.TweedieDistribution._eta_mu_deviance` would call functions with wrong arguments in the p = 3 case.
-- Fixed :class:`glum.InverseGaussianDistribution` not using the optimized gradient, hessian and deviance implementations, as well as those gradients and hessians being incorrect.
+- Fixed a bug where :meth:`~glum.TweedieDistribution._rowwise_gradient_hessian` and :meth:`~glum.TweedieDistribution._eta_mu_deviance` would call functions with wrong arguments in the ``p = 3`` case.
+- Fixed :class:`glum.InverseGaussianDistribution` not using the optimized gradient, Hessian and deviance implementations, as well as those derivatives having the wrong sign.
 
 **Other changes:**
 

From 6f18433990bebae63af8b706ded803861819215d Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 9 Jan 2025 11:29:47 +0100
Subject: [PATCH 09/10] Clarify why we are using the FIM

---
 src/glum/_functions.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glum/_functions.pyx b/src/glum/_functions.pyx
index ed86e039..f19911fc 100644
--- a/src/glum/_functions.pyx
+++ b/src/glum/_functions.pyx
@@ -277,6 +277,8 @@ def inv_gaussian_log_rowwise_gradient_hessian(
         inv_mu2 = inv_mu ** 2
 
         gradient_rows_out[i] = weights[i] * (y[i] * inv_mu2 - inv_mu)
+        # Use the FIM instead of the true Hessian, as the latter is not
+        # necessarily positive definite.
         hessian_rows_out[i] = weights[i] * inv_mu
 
 def inv_gaussian_log_likelihood(

From b67feb429892d81c3048a09bc9257fcea2de3894 Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 9 Jan 2025 13:09:58 +0100
Subject: [PATCH 10/10] Add test against glmnet

---
 tests/glm/test_glm.py | 74 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tests/glm/test_glm.py b/tests/glm/test_glm.py
index ce43a34c..e37d1401 100644
--- a/tests/glm/test_glm.py
+++ b/tests/glm/test_glm.py
@@ -1270,6 +1270,80 @@ def test_binomial_cloglog_unregularized(solver):
     assert_allclose(glum_glm.coef_, sm_fit.params[1:], rtol=2e-5)
 
 
+def test_inv_gaussian_log_enet():
+    """Test elastic net regression with inverse gaussian family and log link.
+
+    Compare to R's glmnet
+    """
+    # library(glmnet)
+    # options(digits=10)
+    # df <- data.frame(a=c(1,2,3,4,5,6), b=c(0,0,0,0,1, 1), y=cy=c(.2,.5,.8,.3,.9,.9))
+    # x <- data.matrix(df[,c("a", "b")])
+    # y <- df$y
+    # fit <- glmnet(
+    #     x=x, y=y, lambda=1, alpha=0.5, intercept=TRUE,
+    #     family=inv.gaussian(link=log),standardize=FALSE, thresh=1e-10
+    # )
+    # coef(fit)
+    #                       s0
+    # (Intercept) -1.028655076
+    # a            0.123000467
+    # b            .
+    glmnet_intercept = -1.028655076
+    glmnet_coef = [0.123000467, 0.0]
+    X = np.array([[1, 2, 3, 4, 5, 6], [0, 0, 0, 0, 1, 1]], dtype="float").T
+    y = np.array([0.2, 0.5, 0.8, 0.3, 0.9, 0.9])
+    rng = np.random.RandomState(42)
+    glm = GeneralizedLinearRegressor(
+        alpha=1,
+        l1_ratio=0.5,
+        family="inverse.gaussian",
+        link="log",
+        solver="irls-cd",
+        gradient_tol=1e-8,
+        selection="random",
+        random_state=rng,
+    )
+    glm.fit(X, y)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
+
+    # same for start_params='zero' and selection='cyclic'
+    # with reduced precision
+    glm = GeneralizedLinearRegressor(
+        alpha=1,
+        l1_ratio=0.5,
+        family="inverse.gaussian",
+        link="log",
+        solver="irls-cd",
+        gradient_tol=1e-8,
+        selection="cyclic",
+    )
+    glm.fit(X, y)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
+
+    # check warm_start, therefore start with different alpha
+    glm = GeneralizedLinearRegressor(
+        alpha=0.005,
+        l1_ratio=0.5,
+        family="inverse.gaussian",
+        max_iter=300,
+        link="log",
+        solver="irls-cd",
+        gradient_tol=1e-8,
+        selection="cyclic",
+    )
+    glm.fit(X, y)
+    # warm start with original alpha and use of sparse matrices
+    glm.warm_start = True
+    glm.alpha = 1
+    X = sparse.csr_matrix(X)
+    glm.fit(X, y)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
+
+
 @pytest.mark.parametrize("alpha", [0.01, 0.1, 1, 10])
 def test_binomial_enet(alpha):
     """Test elastic net regression with binomial family and LogitLink.