From debf9650c90a3ffbae3fd836bd0e4965e4f3d719 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.lcte@gmail.com>
Date: Sun, 14 Sep 2025 18:51:22 +0200
Subject: [PATCH 1/5] minimal changes

---
 sklearn/tree/_criterion.pyx     | 44 +++++++++++++++++++--------------
 sklearn/tree/tests/test_tree.py |  8 +++---
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx
index 0e4b3358e5f89..681c11b2c1c82 100644
--- a/sklearn/tree/_criterion.pyx
+++ b/sklearn/tree/_criterion.pyx
@@ -1192,8 +1192,9 @@ cdef void precompute_absolute_errors(
     intp_t k,
     intp_t start,
     intp_t end,
+    float64_t q,
     float64_t[::1] abs_errors,
-    float64_t[::1] medians
+    float64_t[::1] quantiles
 ) noexcept nogil:
     """
     Fill `abs_errors` and `medians`.
@@ -1222,6 +1223,8 @@ cdef void precompute_absolute_errors(
         Start index in `sample_indices`
     end : intp_t
         End index (exclusive) in `sample_indices`
+    q : float64_t
+        Quantile
     abs_errors : float64_t[::1]
         array to store (increment) the computed absolute errors. Shape: (n,)
         with n := end - start
@@ -1257,8 +1260,8 @@ cdef void precompute_absolute_errors(
     cdef float64_t y
     cdef float64_t w = 1.0
     cdef float64_t top_val, top_weight
-    cdef float64_t median = 0.0
-    cdef float64_t half_weight
+    cdef float64_t quantile = 0.0
+    cdef float64_t split_weight
 
     p = start
     for _ in range(n):
@@ -1275,29 +1278,29 @@ cdef void precompute_absolute_errors(
         else:
             below.push(y, w)
 
-        half_weight = (above.total_weight + below.total_weight) / 2.0
+        split_weight = (above.total_weight + below.total_weight) * q
 
         # Rebalance heaps
-        while above.total_weight < half_weight and not below.is_empty():
+        while above.total_weight < split_weight and not below.is_empty():
             below.pop(&top_val, &top_weight)
             above.push(top_val, top_weight)
         while (
             not above.is_empty()
-            and (above.total_weight - above.top_weight()) >= half_weight
+            and (above.total_weight - above.top_weight()) >= split_weight
         ):
             above.pop(&top_val, &top_weight)
             below.push(top_val, top_weight)
 
         # Current median
-        if above.total_weight > half_weight + 1e-5 * fabs(half_weight):
-            median = above.top()
-        else:  # above and below weight are almost exactly equals
-            median = (above.top() + below.top()) / 2.
-        medians[j] = median
+        if above.total_weight > split_weight + 1e-5 * fabs(split_weight):
+            quantile = above.top()
+        else:  # above and below heaps are almost exactly balanced
+            quantile = q * above.top() + (1 - q) * below.top()
+            # FIXME: check if it should be (1 - q) * ... + q * ...
+        quantiles[j] = quantile
         abs_errors[j] += (
-            (below.total_weight - above.total_weight) * median
-            - below.weighted_sum
-            + above.weighted_sum
+            q * (above.weighted_sum - above.total_weight * quantile)
+            + (1 - q) * (below.total_weight * quantile - below.weighted_sum)
         )
         p += step
         j += step
@@ -1307,7 +1310,8 @@ def _py_precompute_absolute_errors(
     const float64_t[:, ::1] ys,
     const float64_t[:] sample_weight,
     const intp_t[:] sample_indices,
-    bint suffix=False
+    bint suffix=False,
+    float64_t q=0.5
 ):
     """
     Used for testing precompute_absolute_errors.
@@ -1334,7 +1338,7 @@ def _py_precompute_absolute_errors(
 
     precompute_absolute_errors(
         ys, sample_weight, sample_indices, above, below,
-        k, start, end, abs_errors, medians
+        k, start, end, q, abs_errors, medians
     )
     return np.asarray(abs_errors)
 
@@ -1469,7 +1473,7 @@ cdef class MAE(Criterion):
             # which are allowed only with n_outputs=1.
             precompute_absolute_errors(
                 self.y, self.sample_weight, self.sample_indices,
-                self.above, self.below, k, self.start, self.end,
+                self.above, self.below, k, self.start, self.end, 0.5,
                 # left_abs_errors is incremented, left_medians is overwritten
                 self.left_abs_errors, self.left_medians
             )
@@ -1477,13 +1481,17 @@ cdef class MAE(Criterion):
             # i.e., reversed, and abs error & median are filled in reverse order to.
             precompute_absolute_errors(
                 self.y, self.sample_weight, self.sample_indices,
-                self.above, self.below, k, self.end - 1, self.start - 1,
+                self.above, self.below, k, self.end - 1, self.start - 1, 0.5,
                 # right_abs_errors is incremented, right_medians is overwritten
                 self.right_abs_errors, self.right_medians
             )
             # Store the median for the current node
             self.node_medians[k] = self.right_medians[0]
 
+        for p in range(self.start, self.end):
+            self.left_abs_errors[p] *= 2
+            self.right_abs_errors[p] *= 2
+
         return 0
 
     cdef int reverse_reset(self) except -1 nogil:
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index f86e41a2723ca..f0202552ba87c 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -2915,11 +2915,11 @@ def compute_abs_error(y: np.ndarray, w: np.ndarray):
         y = np.random.uniform(size=(n, 1))
         w = np.random.rand(n)
         indices = np.arange(n)
-        abs_errors = _py_precompute_absolute_errors(y, w, indices)
+        abs_errors = 2 * _py_precompute_absolute_errors(y, w, indices)
         expected = compute_prefix_abs_errors_naive(y, w)
         assert np.allclose(abs_errors, expected)
 
-        abs_errors = _py_precompute_absolute_errors(y, w, indices, suffix=True)
+        abs_errors = 2 * _py_precompute_absolute_errors(y, w, indices, suffix=True)
         expected = compute_prefix_abs_errors_naive(y[::-1], w[::-1])[::-1]
         assert np.allclose(abs_errors, expected)
 
@@ -2929,10 +2929,10 @@ def compute_abs_error(y: np.ndarray, w: np.ndarray):
         y_sorted = y[indices]
         w_sorted = w[indices]
 
-        abs_errors = _py_precompute_absolute_errors(y, w, indices)
+        abs_errors = 2 * _py_precompute_absolute_errors(y, w, indices)
         expected = compute_prefix_abs_errors_naive(y_sorted, w_sorted)
         assert np.allclose(abs_errors, expected)
 
-        abs_errors = _py_precompute_absolute_errors(y, w, indices, suffix=True)
+        abs_errors = 2 * _py_precompute_absolute_errors(y, w, indices, suffix=True)
         expected = compute_prefix_abs_errors_naive(y_sorted[::-1], w_sorted[::-1])[::-1]
         assert np.allclose(abs_errors, expected)

From 6e267d5ba5e26c61f71028a411dfd996b7e7a3bd Mon Sep 17 00:00:00 2001
From: Arthur <arthur.lcte@gmail.com>
Date: Sun, 14 Sep 2025 21:24:51 +0200
Subject: [PATCH 2/5] AE to pinball loss

---
 sklearn/tree/_criterion.pyx     | 48 ++++++++++++++++---------------
 sklearn/tree/tests/test_tree.py | 51 +++++++++++++++++++++------------
 2 files changed, 57 insertions(+), 42 deletions(-)

diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx
index 681c11b2c1c82..709ee5f2ce362 100644
--- a/sklearn/tree/_criterion.pyx
+++ b/sklearn/tree/_criterion.pyx
@@ -1183,7 +1183,7 @@ cdef class MSE(RegressionCriterion):
 
 # Helper for MAE criterion:
 
-cdef void precompute_absolute_errors(
+cdef void precompute_pinball_losses(
     const float64_t[:, ::1] ys,
     const float64_t[:] sample_weight,
     const intp_t[:] sample_indices,
@@ -1193,18 +1193,18 @@ cdef void precompute_absolute_errors(
     intp_t start,
     intp_t end,
     float64_t q,
-    float64_t[::1] abs_errors,
+    float64_t[::1] losses,
     float64_t[::1] quantiles
 ) noexcept nogil:
     """
-    Fill `abs_errors` and `medians`.
+    Fill `losses` and `quantiles`.
 
     If start < end:
-        Computes the "prefix" AEs/medians, i.e the AEs for each set of indices
+        Computes the "prefix" losses/quantiles, i.e the losses for each set of indices
         sample_indices[start:start + i] with i in {1, ..., n}
         where n = end - start
     Else:
-        Computes the "suffix" AEs/medians, i.e the AEs for each set of indices
+        Computes the "suffix" losses/quantiles, i.e the losses for each set of indices
         sample_indices[i:] with i in {0, ..., n-1}
 
     Parameters
@@ -1224,25 +1224,26 @@ cdef void precompute_absolute_errors(
     end : intp_t
         End index (exclusive) in `sample_indices`
     q : float64_t
-        Quantile
-    abs_errors : float64_t[::1]
+        Probability for the quantile / alpha for the pinball loss
+    losses : float64_t[::1]
         array to store (increment) the computed absolute errors. Shape: (n,)
         with n := end - start
-    medians : float64_t[::1]
-        array to store (overwrite) the computed medians. Shape: (n,)
+    quantiles : float64_t[::1]
+        array to store (overwrite) the computed quantiles. Shape: (n,)
 
     Complexity: O(n log n)
     This algorithm is an adaptation of the two heaps solution of
     the "find median from a data stream" problem
     See for instance: https://www.geeksforgeeks.org/dsa/median-of-stream-of-integers-running-integers/
 
-    But here, it's the weighted median and we also need to compute the AE, so:
+    But here, it's the weighted quantile and we also need to compute the pinball loss, so:
     - instead of balancing the heaps based on their number of elements,
       rebalance them based on the summed weights of their elements
-    - rewrite the AE computation by splitting the sum between elements
-      above and below the median, which allow to express it as a simple
+    - rewrite the pinball loss computation by splitting the sum between
+      elements above and below the median, which allow to express it as a simple
       O(1) computation.
-      See the maths in the PR desc:
+      See the maths in the PR description: TODO
+      Also this the PR for the initial implementation (weighted median and absolute error):
       https://github.com/scikit-learn/scikit-learn/pull/32100
     """
     cdef intp_t j, p, i, step, n
@@ -1278,7 +1279,7 @@ cdef void precompute_absolute_errors(
         else:
             below.push(y, w)
 
-        split_weight = (above.total_weight + below.total_weight) * q
+        split_weight = (above.total_weight + below.total_weight) * (1 - q)
 
         # Rebalance heaps
         while above.total_weight < split_weight and not below.is_empty():
@@ -1298,7 +1299,7 @@ cdef void precompute_absolute_errors(
             quantile = q * above.top() + (1 - q) * below.top()
             # FIXME: check if it should be (1 - q) * ... + q * ...
         quantiles[j] = quantile
-        abs_errors[j] += (
+        losses[j] += (
             q * (above.weighted_sum - above.total_weight * quantile)
             + (1 - q) * (below.total_weight * quantile - below.weighted_sum)
         )
@@ -1306,7 +1307,7 @@ cdef void precompute_absolute_errors(
         j += step
 
 
-def _py_precompute_absolute_errors(
+def _py_precompute_pinball_losses(
     const float64_t[:, ::1] ys,
     const float64_t[:] sample_weight,
     const intp_t[:] sample_indices,
@@ -1329,18 +1330,18 @@ def _py_precompute_absolute_errors(
         intp_t k = 0
         intp_t start = 0
         intp_t end = n
-        float64_t[::1] abs_errors = np.zeros(n, dtype=np.float64)
-        float64_t[::1] medians = np.zeros(n, dtype=np.float64)
+        float64_t[::1] losses = np.zeros(n, dtype=np.float64)
+        float64_t[::1] quantiles = np.zeros(n, dtype=np.float64)
 
     if suffix:
         start = n - 1
         end = -1
 
-    precompute_absolute_errors(
+    precompute_pinball_losses(
         ys, sample_weight, sample_indices, above, below,
-        k, start, end, q, abs_errors, medians
+        k, start, end, q, losses, quantiles
     )
-    return np.asarray(abs_errors)
+    return np.asarray(losses)
 
 
 cdef class MAE(Criterion):
@@ -1471,7 +1472,7 @@ cdef class MAE(Criterion):
             # Note that at each iteration of this loop, we overwrite `self.left_medians`
             # and `self.right_medians`. They are used to check for monoticity constraints,
             # which are allowed only with n_outputs=1.
-            precompute_absolute_errors(
+            precompute_pinball_losses(
                 self.y, self.sample_weight, self.sample_indices,
                 self.above, self.below, k, self.start, self.end, 0.5,
                 # left_abs_errors is incremented, left_medians is overwritten
@@ -1479,7 +1480,7 @@ cdef class MAE(Criterion):
             )
             # For the right child, we consider samples from end-1 to start-1
             # i.e., reversed, and abs error & median are filled in reverse order to.
-            precompute_absolute_errors(
+            precompute_pinball_losses(
                 self.y, self.sample_weight, self.sample_indices,
                 self.above, self.below, k, self.end - 1, self.start - 1, 0.5,
                 # right_abs_errors is incremented, right_medians is overwritten
@@ -1488,6 +1489,7 @@ cdef class MAE(Criterion):
             # Store the median for the current node
             self.node_medians[k] = self.right_medians[0]
 
+        # pinball loss with q=0.5 is half of the AE
         for p in range(self.start, self.end):
             self.left_abs_errors[p] *= 2
             self.right_abs_errors[p] *= 2
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index f0202552ba87c..0e5c19afcde41 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -20,7 +20,12 @@
 from sklearn.dummy import DummyRegressor
 from sklearn.exceptions import NotFittedError
 from sklearn.impute import SimpleImputer
-from sklearn.metrics import accuracy_score, mean_poisson_deviance, mean_squared_error
+from sklearn.metrics import (
+    accuracy_score,
+    mean_pinball_loss,
+    mean_poisson_deviance,
+    mean_squared_error,
+)
 from sklearn.model_selection import cross_val_score, train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.random_projection import _sparse_random_matrix
@@ -36,7 +41,7 @@
     DENSE_SPLITTERS,
     SPARSE_SPLITTERS,
 )
-from sklearn.tree._criterion import _py_precompute_absolute_errors
+from sklearn.tree._criterion import _py_precompute_pinball_losses
 from sklearn.tree._partitioner import _py_sort
 from sklearn.tree._tree import (
     NODE_DTYPE,
@@ -2885,7 +2890,8 @@ def test_sort_log2_build():
     assert_array_equal(samples, expected_samples)
 
 
-def test_absolute_errors_precomputation_function():
+@pytest.mark.parametrize("q", [0.5, 0.2, 0.9])
+def test_absolute_errors_precomputation_function(q):
     """
     Test the main bit of logic of the MAE(RegressionCriterion) class
     (used by DecisionTreeRegressor(criterion="asbolute_error")).
@@ -2896,31 +2902,38 @@ def test_absolute_errors_precomputation_function():
     it can be safely removed.
     """
 
-    def compute_prefix_abs_errors_naive(y: np.ndarray, w: np.ndarray):
+    def compute_prefix_losses_naive(y: np.ndarray, w: np.ndarray):
+        """
+        Computes the pinball loss for all (y[:i], w[:i])
+        Naive: O(n^2 log n)
+        """
         y = y.ravel()
-        return np.array([compute_abs_error(y[:i], w[:i]) for i in range(1, y.size + 1)])
+        return np.array(
+            [compute_pinball_loss(y[:i], w[:i]) for i in range(1, y.size + 1)]
+        )
 
-    def compute_abs_error(y: np.ndarray, w: np.ndarray):
+    def compute_pinball_loss(y: np.ndarray, w: np.ndarray):
         # 1) compute the weighted median
         # i.e. once ordered by y, search for i such that:
-        # sum(w[:i]) <= 1/2  and sum(w[i+1:]) <= 1/2
+        # sum(w[:i]) <= q*sum(w) and sum(w[i+1:]) >= q*sum(w)
         sorter = np.argsort(y)
         wc = np.cumsum(w[sorter])
-        idx = np.searchsorted(wc, wc[-1] / 2)
-        median = y[sorter[idx]]
-        # 2) compute the AE
-        return (np.abs(y - median) * w).sum()
+        idx = np.searchsorted(wc, wc[-1] * q)
+        quantile = y[sorter[idx]]
+        y_pred = np.full(y.size, quantile)
+        # 2) compute the pinball loss
+        return mean_pinball_loss(y, y_pred, sample_weight=w, alpha=q) * w.sum()
 
     for n in [3, 5, 10, 20, 50, 100]:
         y = np.random.uniform(size=(n, 1))
         w = np.random.rand(n)
         indices = np.arange(n)
-        abs_errors = 2 * _py_precompute_absolute_errors(y, w, indices)
-        expected = compute_prefix_abs_errors_naive(y, w)
+        abs_errors = _py_precompute_pinball_losses(y, w, indices, q=q)
+        expected = compute_prefix_losses_naive(y, w)
         assert np.allclose(abs_errors, expected)
 
-        abs_errors = 2 * _py_precompute_absolute_errors(y, w, indices, suffix=True)
-        expected = compute_prefix_abs_errors_naive(y[::-1], w[::-1])[::-1]
+        abs_errors = _py_precompute_pinball_losses(y, w, indices, suffix=True, q=q)
+        expected = compute_prefix_losses_naive(y[::-1], w[::-1])[::-1]
         assert np.allclose(abs_errors, expected)
 
         x = np.random.rand(n)
@@ -2929,10 +2942,10 @@ def compute_abs_error(y: np.ndarray, w: np.ndarray):
         y_sorted = y[indices]
         w_sorted = w[indices]
 
-        abs_errors = 2 * _py_precompute_absolute_errors(y, w, indices)
-        expected = compute_prefix_abs_errors_naive(y_sorted, w_sorted)
+        abs_errors = _py_precompute_pinball_losses(y, w, indices, q=q)
+        expected = compute_prefix_losses_naive(y_sorted, w_sorted)
         assert np.allclose(abs_errors, expected)
 
-        abs_errors = 2 * _py_precompute_absolute_errors(y, w, indices, suffix=True)
-        expected = compute_prefix_abs_errors_naive(y_sorted[::-1], w_sorted[::-1])[::-1]
+        abs_errors = _py_precompute_pinball_losses(y, w, indices, suffix=True, q=q)
+        expected = compute_prefix_losses_naive(y_sorted[::-1], w_sorted[::-1])[::-1]
         assert np.allclose(abs_errors, expected)

From 592e74a85f7b1914da3a6856ccddf3b409501681 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.lcte@gmail.com>
Date: Wed, 17 Sep 2025 20:21:15 +0200
Subject: [PATCH 3/5] Pass down pinball_alpha

---
 sklearn/tree/_classes.py        | 25 ++++++++-
 sklearn/tree/_criterion.pyx     | 96 +++++++++++++++++++--------------
 sklearn/tree/tests/test_tree.py |  4 +-
 3 files changed, 81 insertions(+), 44 deletions(-)

diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index 6b0c46190f849..6f7c8b15dd5eb 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -77,6 +77,7 @@
     "friedman_mse": _criterion.FriedmanMSE,
     "absolute_error": _criterion.MAE,
     "poisson": _criterion.Poisson,
+    "pinball": _criterion.Pinball,
 }
 
 DENSE_SPLITTERS = {"best": _splitter.BestSplitter, "random": _splitter.RandomSplitter}
@@ -383,7 +384,14 @@ def _fit(
                     self.n_outputs_, self.n_classes_
                 )
             else:
-                criterion = CRITERIA_REG[self.criterion](self.n_outputs_, n_samples)
+                args = (self.n_outputs_, n_samples)
+                if self.criterion == "pinball":
+                    args = (*args, self.pinball_alpha)
+                if self.criterion == "absolute_error":
+                    # FIXME: this is coupled with code at a much lower level
+                    # because of some inheritance shenanigans with __cinit__
+                    args = (*args, 0.5)
+                criterion = CRITERIA_REG[self.criterion](*args)
         else:
             # Make a deepcopy in case the criterion has mutable attributes that
             # might be shared and modified concurrently during parallel fitting
@@ -1338,9 +1346,18 @@ class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree):
     _parameter_constraints: dict = {
         **BaseDecisionTree._parameter_constraints,
         "criterion": [
-            StrOptions({"squared_error", "friedman_mse", "absolute_error", "poisson"}),
+            StrOptions(
+                {
+                    "squared_error",
+                    "friedman_mse",
+                    "absolute_error",
+                    "poisson",
+                    "pinball",
+                }
+            ),
             Hidden(Criterion),
         ],
+        "pinball_alpha": [Interval(RealNotInt, 0.0, 1.0, closed="neither")],
     }
 
     def __init__(
@@ -1358,6 +1375,7 @@ def __init__(
         min_impurity_decrease=0.0,
         ccp_alpha=0.0,
         monotonic_cst=None,
+        pinball_alpha=0.5,
     ):
         super().__init__(
             criterion=criterion,
@@ -1373,6 +1391,7 @@ def __init__(
             ccp_alpha=ccp_alpha,
             monotonic_cst=monotonic_cst,
         )
+        self.pinball_alpha = pinball_alpha
 
     @_fit_context(prefer_skip_nested_validation=True)
     def fit(self, X, y, sample_weight=None, check_input=True):
@@ -1971,6 +1990,7 @@ def __init__(
         max_leaf_nodes=None,
         ccp_alpha=0.0,
         monotonic_cst=None,
+        pinball_alpha=0.5,
     ):
         super().__init__(
             criterion=criterion,
@@ -1985,6 +2005,7 @@ def __init__(
             random_state=random_state,
             ccp_alpha=ccp_alpha,
             monotonic_cst=monotonic_cst,
+            pinball_alpha=pinball_alpha,
         )
 
     def __sklearn_tags__(self):
diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx
index 709ee5f2ce362..859d8e83d22cb 100644
--- a/sklearn/tree/_criterion.pyx
+++ b/sklearn/tree/_criterion.pyx
@@ -1292,7 +1292,7 @@ cdef void precompute_pinball_losses(
             above.pop(&top_val, &top_weight)
             below.push(top_val, top_weight)
 
-        # Current median
+        # Current quantile
         if above.total_weight > split_weight + 1e-5 * fabs(split_weight):
             quantile = above.top()
         else:  # above and below heaps are almost exactly balanced
@@ -1344,7 +1344,7 @@ def _py_precompute_pinball_losses(
     return np.asarray(losses)
 
 
-cdef class MAE(Criterion):
+cdef class Pinball(Criterion):
     r"""Mean absolute error impurity criterion.
 
     MAE = (1 / n)*(\sum_i |y_i - f_i|), where y_i is the true
@@ -1353,15 +1353,16 @@ cdef class MAE(Criterion):
     It has almost nothing in common with other regression criterions
     so it doesn't inherit from RegressionCriterion
     """
-    cdef float64_t[::1] node_medians
-    cdef float64_t[::1] left_abs_errors
-    cdef float64_t[::1] right_abs_errors
-    cdef float64_t[::1] left_medians
-    cdef float64_t[::1] right_medians
+    cdef float64_t alpha
+    cdef float64_t[::1] node_quantiles
+    cdef float64_t[::1] left_pinball_losses
+    cdef float64_t[::1] right_pinball_losses
+    cdef float64_t[::1] left_quantiles
+    cdef float64_t[::1] right_quantiles
     cdef WeightedHeap above
     cdef WeightedHeap below
 
-    def __cinit__(self, intp_t n_outputs, intp_t n_samples):
+    def __cinit__(self, intp_t n_outputs, intp_t n_samples, float64_t alpha):
         """Initialize parameters for this criterion.
 
         Parameters
@@ -1372,6 +1373,8 @@ cdef class MAE(Criterion):
         n_samples : intp_t
             The total number of samples to fit on
         """
+        self.alpha = alpha
+
         # Default values
         self.start = 0
         self.pos = 0
@@ -1384,14 +1387,14 @@ cdef class MAE(Criterion):
         self.weighted_n_left = 0.0
         self.weighted_n_right = 0.0
 
-        self.node_medians = np.zeros(n_outputs, dtype=np.float64)
+        self.node_quantiles = np.zeros(n_outputs, dtype=np.float64)
 
         # Note: this criterion has an important memory footprint, which is
         # fine as it's instantiated only once to build an entire tree
-        self.left_abs_errors = np.empty(n_samples, dtype=np.float64)
-        self.right_abs_errors = np.empty(n_samples, dtype=np.float64)
-        self.left_medians = np.empty(n_samples, dtype=np.float64)
-        self.right_medians = np.empty(n_samples, dtype=np.float64)
+        self.left_pinball_losses = np.empty(n_samples, dtype=np.float64)
+        self.right_pinball_losses = np.empty(n_samples, dtype=np.float64)
+        self.left_quantiles = np.empty(n_samples, dtype=np.float64)
+        self.right_quantiles = np.empty(n_samples, dtype=np.float64)
         self.above = WeightedHeap(n_samples, True)   # min-heap
         self.below = WeightedHeap(n_samples, False)  # max-heap
 
@@ -1457,42 +1460,37 @@ cdef class MAE(Criterion):
         self.pos = self.start
 
         n_bytes = self.n_node_samples * sizeof(float64_t)
-        memset(&self.left_abs_errors[0], 0, n_bytes)
-        memset(&self.right_abs_errors[0], 0, n_bytes)
+        memset(&self.left_pinball_losses[0], 0, n_bytes)
+        memset(&self.right_pinball_losses[0], 0, n_bytes)
 
         # Precompute absolute errors (summed over each output)
-        # and medians (used only when n_outputs=1)
+        # and quantiles (used only when n_outputs=1)
         # of the right and left child of all possible splits
         # for the current ordering of `sample_indices`
         # Precomputation is needed here and can't be done step-by-step in the update method
         # like for other criterions. Indeed, we don't have efficient ways to update right child
-        # statistics when removing samples from it. So we compute right child AEs/medians by
+        # statistics when removing samples from it. So we compute right child AEs/quantiles by
         # traversing from right to left (and hence only adding samples).
         for k in range(self.n_outputs):
-            # Note that at each iteration of this loop, we overwrite `self.left_medians`
-            # and `self.right_medians`. They are used to check for monoticity constraints,
+            # Note that at each iteration of this loop, we overwrite `self.left_quantiles`
+            # and `self.right_quantiles`. They are used to check for monoticity constraints,
             # which are allowed only with n_outputs=1.
             precompute_pinball_losses(
                 self.y, self.sample_weight, self.sample_indices,
-                self.above, self.below, k, self.start, self.end, 0.5,
-                # left_abs_errors is incremented, left_medians is overwritten
-                self.left_abs_errors, self.left_medians
+                self.above, self.below, k, self.start, self.end, self.alpha,
+                # left_abs_errors is incremented, left_quantiles is overwritten
+                self.left_pinball_losses, self.left_quantiles
             )
             # For the right child, we consider samples from end-1 to start-1
-            # i.e., reversed, and abs error & median are filled in reverse order to.
+            # i.e., reversed, and abs error & quantile are filled in reverse order to.
             precompute_pinball_losses(
                 self.y, self.sample_weight, self.sample_indices,
-                self.above, self.below, k, self.end - 1, self.start - 1, 0.5,
-                # right_abs_errors is incremented, right_medians is overwritten
-                self.right_abs_errors, self.right_medians
+                self.above, self.below, k, self.end - 1, self.start - 1, self.alpha,
+                # right_abs_errors is incremented, right_quantiles is overwritten
+                self.right_pinball_losses, self.right_quantiles
             )
-            # Store the median for the current node
-            self.node_medians[k] = self.right_medians[0]
-
-        # pinball loss with q=0.5 is half of the AE
-        for p in range(self.start, self.end):
-            self.left_abs_errors[p] *= 2
-            self.right_abs_errors[p] *= 2
+            # Store the quantile for the current node
+            self.node_quantiles[k] = self.right_quantiles[0]
 
         return 0
 
@@ -1529,7 +1527,7 @@ cdef class MAE(Criterion):
         """Computes the node value of sample_indices[start:end] into dest."""
         cdef intp_t k
         for k in range(self.n_outputs):
-            dest[k] = <float64_t> self.node_medians[k]
+            dest[k] = <float64_t> self.node_quantiles[k]
 
     cdef inline float64_t middle_value(self) noexcept nogil:
         """Compute the middle value of a split for monotonicity constraints as the simple average
@@ -1540,8 +1538,8 @@ cdef class MAE(Criterion):
         """
         cdef intp_t j = self.pos - self.start
         return (
-            self.left_medians[j - 1]
-            + self.right_medians[j]
+            self.left_quantiles[j - 1]
+            + self.right_quantiles[j]
         ) / 2
 
     cdef inline bint check_monotonicity(
@@ -1555,7 +1553,7 @@ cdef class MAE(Criterion):
 
         return self._check_monotonicity(
             monotonic_cst, lower_bound, upper_bound,
-            self.left_medians[j - 1], self.right_medians[j])
+            self.left_quantiles[j - 1], self.right_quantiles[j])
 
     cdef float64_t node_impurity(self) noexcept nogil:
         """Evaluate the impurity of the current node.
@@ -1567,7 +1565,7 @@ cdef class MAE(Criterion):
         Time complexity: O(1) (precomputed in `.reset()`)
         """
         return (
-            self.right_abs_errors[0]
+            self.right_pinball_losses[0]
             / (self.weighted_n_node_samples * self.n_outputs)
         )
 
@@ -1586,13 +1584,13 @@ cdef class MAE(Criterion):
 
         # if pos == start, left child is empty, hence impurity is 0
         if self.pos > self.start:
-            impurity_left += self.left_abs_errors[j - 1]
+            impurity_left += self.left_pinball_losses[j - 1]
         p_impurity_left[0] = impurity_left / (self.weighted_n_left *
                                               self.n_outputs)
 
         # if pos == end, right child is empty, hence impurity is 0
         if self.pos < self.end:
-            impurity_right += self.right_abs_errors[j]
+            impurity_right += self.right_pinball_losses[j]
         p_impurity_right[0] = impurity_right / (self.weighted_n_right *
                                                 self.n_outputs)
 
@@ -1608,6 +1606,24 @@ cdef class MAE(Criterion):
             dest[0] = upper_bound
 
 
+cdef class MAE(Pinball):
+    """
+    The median is just the quantile alpha=0.5
+    And the absolute error is twice the pinball_loss (with alpha=0.5)
+    """
+
+    # FIXME/XXX: Trust the instanciater to pass alpha=0.5 to the __cinit__...
+
+    cdef float64_t node_impurity(self) noexcept nogil:
+        return 2 * Pinball.node_impurity(self)
+
+    cdef void children_impurity(self, float64_t* p_impurity_left,
+                                float64_t* p_impurity_right) noexcept nogil:
+        Pinball.children_impurity(self, p_impurity_left, p_impurity_right)
+        p_impurity_left[0] *= 2
+        p_impurity_right[0] *= 2
+
+
 cdef class FriedmanMSE(MSE):
     """Mean squared error impurity criterion with improvement score by Friedman.
 
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 0e5c19afcde41..c8318ffbdfbc0 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -1827,8 +1827,8 @@ def _pickle_copy(obj):
             assert n_outputs == n_outputs_
             assert_array_equal(n_classes, n_classes_)
 
-        for _, typename in CRITERIA_REG.items():
-            criteria = typename(n_outputs, n_samples)
+        for name, typename in CRITERIA_REG.items():
+            criteria = DecisionTreeRegressor(criterion=name).criterion
             result = copy_func(criteria).__reduce__()
             typename_, (n_outputs_, n_samples_), _ = result
             assert typename == typename_

From ecd2f15f17f62963d4f216051a22279c40ec46f5 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.lcte@gmail.com>
Date: Fri, 19 Sep 2025 12:51:29 +0200
Subject: [PATCH 4/5] small changes

---
 sklearn/tree/_criterion.pyx     | 11 ++++++-----
 sklearn/tree/tests/test_tree.py |  9 ++-------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx
index 6ee1a955b26cf..4db9d3f0cd33b 100644
--- a/sklearn/tree/_criterion.pyx
+++ b/sklearn/tree/_criterion.pyx
@@ -1289,12 +1289,13 @@ cdef void precompute_pinball_losses(
         if above.total_weight > split_weight + 1e-5 * fabs(split_weight):
             quantile = above.top()
         else:  # above and below heaps are almost exactly balanced
-            quantile = q * above.top() + (1 - q) * below.top()
-            # FIXME: check if it should be (1 - q) * ... + q * ...
+            # Any value between below.top() and above.top() is valid here.
+            # We choose the midpoint for determinism.
+            quantile = 0.5 * (above.top() + below.top())
         quantiles[j] = quantile
         losses[j] += (
-            q * (above.weighted_sum - above.total_weight * quantile)
-            + (1 - q) * (below.total_weight * quantile - below.weighted_sum)
+            q * (above.weighted_sum - quantile * above.total_weight)
+            + (1 - q) * (quantile * below.total_weight - below.weighted_sum)
         )
         p += step
         j += step
@@ -1321,7 +1322,7 @@ def _py_precompute_pinball_losses(
         ys, sample_weight, sample_indices, above, below,
         k, start, end, q, losses, quantiles
     )
-    return np.asarray(losses)
+    return np.asarray(losses), np.asarray(quantiles)
 
 
 cdef class Pinball(Criterion):
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 8acd216ae4c08..faf0356a3aac1 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -2913,13 +2913,8 @@ def compute_prefix_losses_naive(y: np.ndarray, w: np.ndarray):
         )
 
     def compute_pinball_loss(y: np.ndarray, w: np.ndarray):
-        # 1) compute the weighted median
-        # i.e. once ordered by y, search for i such that:
-        # sum(w[:i]) <= q*sum(w) and sum(w[i+1:]) >= q*sum(w)
-        sorter = np.argsort(y)
-        wc = np.cumsum(w[sorter])
-        idx = np.searchsorted(wc, wc[-1] * q)
-        quantile = y[sorter[idx]]
+        # 1) compute the weighted quantile
+        quantile = np.quantile(y, q, weights=w, method="inverted_cdf")
         y_pred = np.full(y.size, quantile)
         # 2) compute the pinball loss
         return mean_pinball_loss(y, y_pred, sample_weight=w, alpha=q) * w.sum()

From 075243c4f8b70e86002363671e75714a9778c1aa Mon Sep 17 00:00:00 2001
From: Arthur <arthur.lcte@gmail.com>
Date: Tue, 23 Sep 2025 13:16:10 +0200
Subject: [PATCH 5/5] fixes

---
 sklearn/tree/_classes.py        |  2 +-
 sklearn/tree/_criterion.pyx     |  6 +++---
 sklearn/tree/tests/test_tree.py | 26 ++++++++++++++------------
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index 6f7c8b15dd5eb..d6ee4193a2373 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -389,7 +389,7 @@ def _fit(
                     args = (*args, self.pinball_alpha)
                 if self.criterion == "absolute_error":
                     # FIXME: this is coupled with code at a much lower level
-                    # because of some inheritance shenanigans with __cinit__
+                    # because of the inheritance behavior of __cinit__
                     args = (*args, 0.5)
                 criterion = CRITERIA_REG[self.criterion](*args)
         else:
diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx
index 4db9d3f0cd33b..8ff9b3f9a87ce 100644
--- a/sklearn/tree/_criterion.pyx
+++ b/sklearn/tree/_criterion.pyx
@@ -1322,7 +1322,7 @@ def _py_precompute_pinball_losses(
         ys, sample_weight, sample_indices, above, below,
         k, start, end, q, losses, quantiles
     )
-    return np.asarray(losses), np.asarray(quantiles)
+    return np.asarray(losses)
 
 
 cdef class Pinball(Criterion):
@@ -1575,10 +1575,10 @@ cdef class Pinball(Criterion):
         p_impurity_right[0] = impurity_right / (self.weighted_n_right *
                                                 self.n_outputs)
 
-    # those 2 methods are copied from the RegressionCriterion abstract class:
     def __reduce__(self):
-        return (type(self), (self.n_outputs, self.n_samples), self.__getstate__())
+        return (type(self), (self.n_outputs, self.n_samples, self.alpha), self.__getstate__())
 
+    # this method is copied from the RegressionCriterion abstract class:
     cdef inline void clip_node_value(self, float64_t* dest, float64_t lower_bound, float64_t upper_bound) noexcept nogil:
         """Clip the value in dest between lower_bound and upper_bound for monotonic constraints."""
         if dest[0] < lower_bound:
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index faf0356a3aac1..83a2b4eae4893 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -1828,12 +1828,14 @@ def _pickle_copy(obj):
             assert_array_equal(n_classes, n_classes_)
 
         for name, typename in CRITERIA_REG.items():
-            criteria = DecisionTreeRegressor(criterion=name).criterion
+            args = (n_outputs, n_samples)
+            if name == "absolute_error" or name == "pinball":
+                args = (*args, 0.5)
+            criteria = typename(*args)
             result = copy_func(criteria).__reduce__()
-            typename_, (n_outputs_, n_samples_), _ = result
+            typename_, args_, _ = result
             assert typename == typename_
-            assert n_outputs == n_outputs_
-            assert n_samples == n_samples_
+            assert args == args_
 
 
 @pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS)
@@ -2925,13 +2927,13 @@ def compute_pinball_loss(y: np.ndarray, w: np.ndarray):
         y = rng.uniform(size=(n, 1))
         w = rng.rand(n)
         indices = np.arange(n)
-        abs_errors = _py_precompute_pinball_losses(y, w, indices, 0, n, q=q)
+        pb_losses = _py_precompute_pinball_losses(y, w, indices, 0, n, q=q)
         expected = compute_prefix_losses_naive(y, w)
-        assert np.allclose(abs_errors, expected)
+        assert np.allclose(pb_losses, expected)
 
-        abs_errors = _py_precompute_pinball_losses(y, w, indices, n - 1, -1, q=q)
+        pb_losses = _py_precompute_pinball_losses(y, w, indices, n - 1, -1, q=q)
         expected = compute_prefix_losses_naive(y[::-1], w[::-1])[::-1]
-        assert np.allclose(abs_errors, expected)
+        assert np.allclose(pb_losses, expected)
 
         x = rng.rand(n)
         indices = np.argsort(x)
@@ -2939,10 +2941,10 @@ def compute_pinball_loss(y: np.ndarray, w: np.ndarray):
         y_sorted = y[indices]
         w_sorted = w[indices]
 
-        abs_errors = _py_precompute_pinball_losses(y, w, indices, 0, n, q=q)
+        pb_losses = _py_precompute_pinball_losses(y, w, indices, 0, n, q=q)
         expected = compute_prefix_losses_naive(y_sorted, w_sorted)
-        assert np.allclose(abs_errors, expected)
+        assert np.allclose(pb_losses, expected)
 
-        abs_errors = _py_precompute_pinball_losses(y, w, indices, n - 1, -1, q=q)
+        pb_losses = _py_precompute_pinball_losses(y, w, indices, n - 1, -1, q=q)
         expected = compute_prefix_losses_naive(y_sorted[::-1], w_sorted[::-1])[::-1]
-        assert np.allclose(abs_errors, expected)
+        assert np.allclose(pb_losses, expected)