scikit-learn · rth · May 10, 2021 · Mar 20, 2021 · Mar 20, 2021 · Mar 20, 2021
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
@@ -954,7 +954,7 @@ controls the number of iterations of the boosting process::
   0.8965
 
 Available losses for regression are 'squared_error',
-'least_absolute_deviation', which is less sensitive to outliers, and
+'absolute_error', which is less sensitive to outliers, and
 'poisson', which is well suited to model counts and frequencies. For
 classification, 'binary_crossentropy' is used for binary classification and
 'categorical_crossentropy' is used for multiclass classification. By default

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
@@ -76,6 +76,35 @@ Changelog
   - For :class:`tree.ExtraTreeRegressor`, `criterion="mse"` is deprecated,
     use `"squared_error"` instead which is now the default.
 
+- |API| The option for using the absolute error via ``loss`` and
+  ``criterion`` parameters was made more consistent. The preferred way is by
+  setting the value to `"absolute_error"`. Old option names are still valid,
+  produce the same models, but are deprecated and will be removed in version
+  1.2.
+  :pr:`19733` by :user:`Christian Lorentzen <lorentzenchr>`.
+
+  - For :class:`ensemble.ExtraTreesRegressor`, `criterion="mae"` is deprecated,
+    use `"absolute_error"` instead.
+
+  - For :class:`ensemble.GradientBoostingRegressor`, `loss="lad"` is deprecated,
+    use `"absolute_error"` instead.
+
+  - For :class:`ensemble.RandomForestRegressor`, `criterion="mae"` is deprecated,
+    use `"absolute_error"` instead.
+
+  - For :class:`ensemble.HistGradientBoostingRegressor`,
+    `loss="least_absolute_deviation"` is deprecated, use `"absolute_error"`
+    instead.
+
+  - For :class:`linear_model.RANSACRegressor`, `loss="absolute_loss"` is
+    deprecated, use `"absolute_error"` instead which is now the default.
+
+  - For :class:`tree.DecisionTreeRegressor`, `criterion="mae"` is deprecated,
+    use `"absolute_error"` instead.
+
+  - For :class:`tree.ExtraTreeRegressor`, `criterion="mae"` is deprecated,
+    use `"absolute_error"` instead.
+
 :mod:`sklearn.cluster`
 ......................
 
@@ -271,10 +300,10 @@ Changelog
 :mod:`sklearn.utils`
 ....................
 
-- |Enhancement| Deprecated the default value of the `random_state=0` in 
+- |Enhancement| Deprecated the default value of the `random_state=0` in
   :func:`~sklearn.utils.extmath.randomized_svd`. Starting in 1.2,
   the default value of `random_state` will be set to `None`.
-  :pr:`19459` by :user:`Cindy Bezuidenhout <cinbez>` and 
+  :pr:`19459` by :user:`Cindy Bezuidenhout <cinbez>` and
   :user:`Clifford Akai-Nettey<cliffordEmmanuel>`.
 
 :mod:`sklearn.calibration`

diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py
@@ -153,13 +153,13 @@ def _make_estimator(self, append=True, random_state=None):
                                 for p in self.estimator_params})
 
         # TODO: Remove in v1.2
-        # criterion "mse" would cause warnings in every call to
+        # criterion "mse" and "mae" would cause warnings in every call to
         # DecisionTreeRegressor.fit(..)
-        if (
-            isinstance(estimator, (DecisionTreeRegressor, ExtraTreeRegressor))
-            and getattr(estimator, "criterion", None) == "mse"
-        ):
-            estimator.set_params(criterion="squared_error")
+        if isinstance(estimator, (DecisionTreeRegressor, ExtraTreeRegressor)):
+            if getattr(estimator, "criterion", None) == "mse":
+                estimator.set_params(criterion="squared_error")
+            elif getattr(estimator, "criterion", None) == "mae":
+                estimator.set_params(criterion="absolute_error")
 
         if random_state is not None:
             _set_random_states(estimator, random_state)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
@@ -346,16 +346,21 @@ def fit(self, X, y, sample_weight=None):
         # Check parameters
         self._validate_estimator()
         # TODO: Remove in v1.2
-        if (
-            isinstance(self, (RandomForestRegressor, ExtraTreesRegressor))
-            and self.criterion == "mse"
-        ):
-            warn(
-                "Criterion 'mse' was deprecated in v1.0 and will be "
-                "removed in version 1.2. Use `criterion='squared_error'` "
-                "which is equivalent.",
-                FutureWarning
-            )
+        if isinstance(self, (RandomForestRegressor, ExtraTreesRegressor)):
+            if self.criterion == "mse":
+                warn(
+                    "Criterion 'mse' was deprecated in v1.0 and will be "
+                    "removed in version 1.2. Use `criterion='squared_error'` "
+                    "which is equivalent.",
+                    FutureWarning
+                )
+            elif self.criterion == "mae":
+                warn(
+                    "Criterion 'mae' was deprecated in v1.0 and will be "
+                    "removed in version 1.2. Use `criterion='absolute_error'` "
+                    "which is equivalent.",
+                    FutureWarning
+                )
 
         if not self.bootstrap and self.oob_score:
             raise ValueError("Out of bag estimation only available"
@@ -1321,11 +1326,12 @@ class RandomForestRegressor(ForestRegressor):
            The default value of ``n_estimators`` changed from 10 to 100
            in 0.22.
 
-    criterion : {"squared_error", "mse", "mae"}, default="squared_error"
+    criterion : {"squared_error", "mse", "absolute_error", "mae"}, \
+            default="squared_error"
         The function to measure the quality of a split. Supported criteria
         are "squared_error" for the mean squared error, which is equal to
-        variance reduction as feature selection criterion, and "mae" for the
-        mean absolute error.
+        variance reduction as feature selection criterion, and "absolute_error"
+        for the mean absolute error.
 
         .. versionadded:: 0.18
            Mean Absolute Error (MAE) criterion.
@@ -1334,6 +1340,10 @@ class RandomForestRegressor(ForestRegressor):
             Criterion "mse" was deprecated in v1.0 and will be removed in
             version 1.2. Use `criterion="squared_error"` which is equivalent.
 
+        .. deprecated:: 1.0
+            Criterion "mae" was deprecated in v1.0 and will be removed in
+            version 1.2. Use `criterion="absolute_error"` which is equivalent.
+
     max_depth : int, default=None
         The maximum depth of the tree. If None, then nodes are expanded until
         all leaves are pure or until all leaves contain less than
@@ -1936,10 +1946,11 @@ class ExtraTreesRegressor(ForestRegressor):
            The default value of ``n_estimators`` changed from 10 to 100
            in 0.22.
 
-    criterion : {"squared_error", "mse", "mae"}, default="squared_error"
+    criterion : {"squared_error", "mse", "absolute_error", "mae"}, \
+            default="squared_error"
         The function to measure the quality of a split. Supported criteria
-        are "squared_error" and "mse" for the mean squared error, which is
-        equal to variance reduction as feature selection criterion, and "mae"
+        are "squared_error" for the mean squared error, which is equal to
+        variance reduction as feature selection criterion, and "absolute_error"
         for the mean absolute error.
 
         .. versionadded:: 0.18
@@ -1949,6 +1960,10 @@ class ExtraTreesRegressor(ForestRegressor):
             Criterion "mse" was deprecated in v1.0 and will be removed in
             version 1.2. Use `criterion="squared_error"` which is equivalent.
 
+        .. deprecated:: 1.0
+            Criterion "mae" was deprecated in v1.0 and will be removed in
+            version 1.2. Use `criterion="absolute_error"` which is equivalent.
+
     max_depth : int, default=None
         The maximum depth of the tree. If None, then nodes are expanded until
         all leaves are pure or until all leaves contain less than

diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
@@ -238,11 +238,17 @@ def _check_params(self):
                 or self.loss not in _gb_losses.LOSS_FUNCTIONS):
             raise ValueError("Loss '{0:s}' not supported. ".format(self.loss))
 
+        # TODO: Remove in v1.2
         if self.loss == "ls":
             warnings.warn("The loss 'ls' was deprecated in v1.0 and "
                           "will be removed in version 1.2. Use 'squared_error'"
                           " which is equivalent.",
                           FutureWarning)
+        elif self.loss == "lad":
+            warnings.warn("The loss 'lad' was deprecated in v1.0 and "
+                          "will be removed in version 1.2. Use "
+                          "'absolute_error' which is equivalent.",
+                          FutureWarning)
 
         if self.loss == 'deviance':
             loss_class = (_gb_losses.MultinomialDeviance
@@ -403,7 +409,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
         -------
         self : object
         """
-        if self.criterion == 'mae':
+        if self.criterion in ('absolute_error', 'mae'):
             # TODO: This should raise an error from 1.1
             self._warn_mae_for_criterion()
 
@@ -1340,19 +1346,22 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
 
     Parameters
     ----------
-    loss : {'squared_error', 'ls', 'lad', 'huber', 'quantile'}, \
-            default='squared_error'
+    loss : {'squared_error', 'ls', 'absolute_error', 'lad', 'huber', \
+            'quantile'}, default='squared_error'
         Loss function to be optimized. 'squared_error' refers to the squared
-        error for regression.
-        'lad' (least absolute deviation) is a highly robust
-        loss function solely based on order information of the input
-        variables. 'huber' is a combination of the two. 'quantile'
-        allows quantile regression (use `alpha` to specify the quantile).
+        error for regression. 'absolute_error' refers to the absolute error of
+        regression and is a robust loss function. 'huber' is a
+        combination of the two. 'quantile' allows quantile regression (use
+        `alpha` to specify the quantile).
 
         .. deprecated:: 1.0
             The loss 'ls' was deprecated in v1.0 and will be removed in
             version 1.2. Use `loss='squared_error'` which is equivalent.
 
+        .. deprecated:: 1.0
+            The loss 'lad' was deprecated in v1.0 and will be removed in
+            version 1.2. Use `loss='absolute_error'` which is equivalent.
+
     learning_rate : float, default=0.1
         Learning rate shrinks the contribution of each tree by `learning_rate`.
         There is a trade-off between learning_rate and n_estimators.
@@ -1383,7 +1392,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         .. deprecated:: 0.24
             `criterion='mae'` is deprecated and will be removed in version
             1.1 (renaming of 0.26). The correct way of minimizing the absolute
-            error is to use `loss='lad'` instead.
+            error is to use `loss='absolute_error'` instead.
 
         .. deprecated:: 1.0
             Criterion 'mse' was deprecated in v1.0 and will be removed in
@@ -1644,7 +1653,8 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     """
 
     # TODO: remove "ls" in verion 1.2
-    _SUPPORTED_LOSS = ("squared_error", 'ls', 'lad', 'huber', 'quantile')
+    _SUPPORTED_LOSS = ("squared_error", 'ls', "absolute_error", 'lad', 'huber',
+                       'quantile')
 
     @_deprecate_positional_args
     def __init__(self, *, loss="squared_error", learning_rate=0.1,
@@ -1681,7 +1691,7 @@ def _warn_mae_for_criterion(self):
         warnings.warn("criterion='mae' was deprecated in version 0.24 and "
                       "will be removed in version 1.1 (renaming of 0.26). The "
                       "correct way of minimizing the absolute error is to use "
-                      " loss='lad' instead.", FutureWarning)
+                      " loss='absolute_error' instead.", FutureWarning)
 
     def predict(self, X):
         """Predict regression target for X.

diff --git a/sklearn/ensemble/_gb_losses.py b/sklearn/ensemble/_gb_losses.py
@@ -856,10 +856,11 @@ def get_init_raw_predictions(self, X, estimator):
         return raw_predictions.reshape(-1, 1).astype(np.float64)
 
 
-# TODO: Remove entry 'ls' in version 1.2.
+# TODO: Remove entry 'ls' and 'lad' in version 1.2.
 LOSS_FUNCTIONS = {
     "squared_error": LeastSquaresError,
     'ls': LeastSquaresError,
+    "absolute_error": LeastAbsoluteError,
     'lad': LeastAbsoluteError,
     'huber': HuberLossFunction,
     'quantile': QuantileLossFunction,

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -904,8 +904,8 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
 
     Parameters
     ----------
-    loss : {'squared_error', 'least_squares', 'least_absolute_deviation', \
-            'poisson'}, default='squared_error'
+    loss : {'squared_error', 'least_squares', 'absolute_error', \
+            'least_absolute_deviation', 'poisson'}, default='squared_error'
         The loss function to use in the boosting process. Note that the
         "least squares" and "poisson" losses actually implement
         "half least squares loss" and "half poisson deviance" to simplify the
@@ -919,6 +919,11 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
             The loss 'least_squares' was deprecated in v1.0 and will be removed
             in version 1.2. Use `loss='squared_error'` which is equivalent.
 
+        .. deprecated:: 1.0
+            The loss 'least_absolute_deviation' was deprecated in v1.0 and will
+            be removed in version 1.2. Use `loss='absolute_error'` which is
+            equivalent.
+
     learning_rate : float, default=0.1
         The learning rate, also known as *shrinkage*. This is used as a
         multiplicative factor for the leaves values. Use ``1`` for no
@@ -1050,7 +1055,7 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
     0.92...
     """
 
-    _VALID_LOSSES = ('squared_error', 'least_squares',
+    _VALID_LOSSES = ('squared_error', 'least_squares', 'absolute_error',
                      'least_absolute_deviation', 'poisson')
 
     @_deprecate_positional_args
@@ -1126,13 +1131,21 @@ def _encode_y(self, y):
         return y
 
     def _get_loss(self, sample_weight):
+        # TODO: Remove in v1.2
         if self.loss == "least_squares":
             warnings.warn(
                 "The loss 'least_squares' was deprecated in v1.0 and will be "
                 "removed in version 1.2. Use 'squared_error' which is "
                 "equivalent.",
                 FutureWarning)
             return _LOSSES["squared_error"](sample_weight=sample_weight)
+        elif self.loss == "least_absolute_deviation":
+            warnings.warn(
+                "The loss 'least_absolute_deviation' was deprecated in v1.0 "
+                " and will be removed in version 1.2. Use 'absolute_error' "
+                "which is equivalent.",
+                FutureWarning)
+            return _LOSSES["absolute_error"](sample_weight=sample_weight)
 
         return _LOSSES[self.loss](sample_weight=sample_weight)
 

diff --git a/sklearn/ensemble/_hist_gradient_boosting/loss.py b/sklearn/ensemble/_hist_gradient_boosting/loss.py
@@ -420,7 +420,7 @@ def predict_proba(self, raw_predictions):
 
 _LOSSES = {
     'squared_error': LeastSquares,
-    'least_absolute_deviation': LeastAbsoluteDeviation,
+    'absolute_error': LeastAbsoluteDeviation,
     'binary_crossentropy': BinaryCrossEntropy,
     'categorical_crossentropy': CategoricalCrossEntropy,
     'poisson': Poisson,

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
@@ -36,7 +36,7 @@ def test_same_predictions_regression(seed, min_samples_leaf, n_samples,
     #   and max_leaf_nodes is low enough.
     # - To ignore  discrepancies caused by small differences the binning
     #   strategy, data is pre-binned if n_samples > 255.
-    # - We don't check the least_absolute_deviation loss here. This is because
+    # - We don't check the absolute_error loss here. This is because
     #   LightGBM's computation of the median (used for the initial value of
     #   raw_prediction) is a bit off (they'll e.g. return midpoints when there
     #   is no need to.). Since these tests only run 1 iteration, the

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -194,26 +194,26 @@ def test_should_stop(scores, n_iter_no_change, tol, stopping):
     assert gbdt._should_stop(scores) == stopping
 
 
-def test_least_absolute_deviation():
+def test_absolute_error():
     # For coverage only.
     X, y = make_regression(n_samples=500, random_state=0)
-    gbdt = HistGradientBoostingRegressor(loss='least_absolute_deviation',
+    gbdt = HistGradientBoostingRegressor(loss='absolute_error',
                                          random_state=0)
     gbdt.fit(X, y)
     assert gbdt.score(X, y) > .9
 
 
-def test_least_absolute_deviation_sample_weight():
+def test_absolute_error_sample_weight():
     # non regression test for issue #19400
     # make sure no error is thrown during fit of
-    # HistGradientBoostingRegressor with least_absolute_deviation loss function
+    # HistGradientBoostingRegressor with absolute_error loss function
     # and passing sample_weight
     rng = np.random.RandomState(0)
     n_samples = 100
     X = rng.uniform(-1, 1, size=(n_samples, 2))
     y = rng.uniform(-1, 1, size=n_samples)
     sample_weight = rng.uniform(0, 1, size=n_samples)
-    gbdt = HistGradientBoostingRegressor(loss='least_absolute_deviation')
+    gbdt = HistGradientBoostingRegressor(loss='absolute_error')
     gbdt.fit(X, y, sample_weight=sample_weight)
 
 
@@ -652,8 +652,7 @@ def test_sample_weight_effect(problem, duplication):
                        est_dup._raw_predict(X_dup))
 
 
-@pytest.mark.parametrize('loss_name', ('squared_error',
-                                       'least_absolute_deviation'))
+@pytest.mark.parametrize('loss_name', ('squared_error', 'absolute_error'))
 def test_sum_hessians_are_sample_weight(loss_name):
     # For losses with constant hessians, the sum_hessians field of the
     # histograms must be equal to the sum of the sample weight of samples at
@@ -995,14 +994,17 @@ def test_uint8_predict(Est):
 
 
 # TODO: Remove in v1.2
-def test_loss_least_squares_deprecated():
+@pytest.mark.parametrize("loss", ["least_squares", "least_absolute_deviation"])
+def test_loss_deprecated(loss):
     X, y = make_regression(n_samples=50, random_state=0)
-    est1 = HistGradientBoostingRegressor(loss="least_squares", random_state=0)
+    est1 = HistGradientBoostingRegressor(loss=loss, random_state=0)
 
     with pytest.warns(FutureWarning,
-                      match="The loss 'least_squares' was deprecated"):
+                      match="The loss '" + loss + "' was deprecated"):
         est1.fit(X, y)
 
-    est2 = HistGradientBoostingRegressor(loss="squared_error", random_state=0)
+    d = {"least_squares": "squared_error",
+         "least_absolute_deviation": "absolute_error"}
+    est2 = HistGradientBoostingRegressor(loss=d[loss], random_state=0)
     est2.fit(X, y)
     assert_allclose(est1.predict(X), est2.predict(X))