Source code for mapie.risk_control.binary_classification

from __future__ import annotations

import warnings
from typing import Any, Callable, List, Literal, Optional, Tuple, Union

import numpy as np
from numpy.typing import ArrayLike, NDArray

from mapie.utils import check_valid_ltt_params_index

from .methods import ltt_procedure
from .risks import (
    BinaryClassificationRisk,
    abstention_rate,
    accuracy,
    false_positive_rate,
    precision,
    negative_predictive_value,
    positive_predictive_value,
    predicted_positive_fraction,
    recall,
)

Risk_str = Literal[
    "precision",
    "recall",
    "accuracy",
    "fpr",
    "predicted_positive_fraction",
    "positive_predictive_value",
    "negative_predictive_value",
    "abstention_rate",
]
Risk = Union[
    BinaryClassificationRisk,
    Risk_str,
    List[BinaryClassificationRisk],
    List[Risk_str],
    List[Union[BinaryClassificationRisk, Risk_str]],
]



[docs]
class BinaryClassificationController:
    """
    Controls the risk or performance of a binary classifier.

    BinaryClassificationController finds the decision thresholds of a binary classifier
    that statistically guarantee a risk to be below a target level
    (the risk is "controlled").
    It can be used to control a performance metric as well, such as the precision.
    In that case, the thresholds guarantee that the performance is above a target level.

    Usage:

    1. Instantiate a BinaryClassificationController, providing the predict_proba method
       of your binary classifier
    2. Call the calibrate method to find the thresholds
    3. Use the predict method to predict using the best threshold

    Note: for a given model, calibration dataset, target level, and confidence level,
    there may not be any threshold controlling the risk.

    Parameters
    ----------
    predict_function : Callable[[ArrayLike], NDArray]
        predict_proba method of a fitted binary classifier.
        Its output signature must be of shape (len(X), 2).

        Or, in the general case of multi-dimensional parameters (thresholds),
        a function that takes (X, \\*params) and outputs 0 or 1. This can be useful to e.g.,
        ensemble multiple binary classifiers with different thresholds for each classifier.
        In that case, `predict_params` must be provided.

    risk : Union[BinaryClassificationRisk, str, List[BinaryClassificationRisk, str]]
        The risk or performance metric to control.
        Valid options:

        - An existing risk defined in `mapie.risk_control` accessible through
          its string equivalent: "precision", "recall", "accuracy",
          "fpr" for false positive rate, or "predicted_positive_fraction".
        - A custom instance of BinaryClassificationRisk object

        Can be a list of risks in the case of multi risk control.

    target_level : Union[float, List[float]]
        The maximum risk level (or minimum performance level). Must be between 0 and 1.
        Can be a list of target levels in the case of multi risk control (length should
        match the length of the risks list).

    confidence_level : float, default=0.9
        The confidence level with which the risk (or performance) is controlled.
        Must be between 0 and 1. See the documentation for detailed explanations.

    best_predict_param_choice : Union["auto", BinaryClassificationRisk, str],
        default="auto"
        How to select the best threshold from the valid thresholds that control the risk
        (or performance). The BinaryClassificationController will try to minimize
        (or maximize) a secondary objective.
        Valid options:

        - "auto" (default). For mono risk defined in mapie.risk_control, an automatic choice is made.
          For multi risk, we use the first risk in the list.
        - An existing risk defined in `mapie.risk_control` accessible through

          its string equivalent: "precision", "recall", "accuracy",
          "fpr" for false positive rate, or "predicted_positive_fraction".
        - A custom instance of BinaryClassificationRisk object

    list_predict_params : NDArray, default=np.linspace(0, 0.99, 100)
        The set of parameters (noted λ in [1]) to consider for controlling the risk (or performance).
        When `predict_function` is a `predict_proba` method, the shape is (n_params,)
        and the parameter values are used to threshold the probabilities.
        When `predict_function` is a general function with multi-dimensional parameters (λ) that outputs 0 or 1,
        the shape is (n_params, params_dim).
        Note that performance is degraded when `len(predict_params)` is large as it is used by the Bonferroni correction [1].

    Attributes
    ----------
    valid_predict_params : NDArray
        The valid thresholds that control the risk (or performance).
        Use the calibrate method to compute these.

    best_predict_param : Optional[Union[float, Tuple[float, ...]]]
        The best threshold that control the risk (or performance). It is a tuple if multi-dimensional
        parameters are used.
        Use the calibrate method to compute it.

    p_values : NDArray
        P-values associated with each tested parameter in `list_predict_params`.
        In the multi-risk setting, the value corresponds to the maximum over the tested risks.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.linear_model import LogisticRegression
    >>> from sklearn.datasets import make_classification
    >>> from sklearn.model_selection import train_test_split
    >>> from mapie.risk_control import BinaryClassificationController, precision

    >>> X, y = make_classification(
    ...     n_features=2,
    ...     n_redundant=0,
    ...     n_informative=2,
    ...     n_clusters_per_class=1,
    ...     n_classes=2,
    ...     random_state=42,
    ...     class_sep=2.0
    ... )
    >>> X_train, X_temp, y_train, y_temp = train_test_split(
    ...     X, y, test_size=0.4, random_state=42
    ... )
    >>> X_calib, X_test, y_calib, y_test = train_test_split(
    ...     X_temp, y_temp, test_size=0.1, random_state=42
    ... )

    >>> clf = LogisticRegression().fit(X_train, y_train)

    >>> controller = BinaryClassificationController(
    ...     predict_function=clf.predict_proba,
    ...     risk=precision,
    ...     target_level=0.6
    ... )

    >>> predictions = controller.calibrate(X_calib, y_calib).predict(X_test)

    References
    ----------
    [1] Angelopoulos, Anastasios N., Stephen, Bates, Emmanuel J. Candès, et al.
    "Learn Then Test: Calibrating Predictive Algorithms to Achieve Risk Control." (2022)
    """

    _best_predict_param_choice_map = {
        precision: recall,
        recall: precision,
        accuracy: accuracy,
        false_positive_rate: recall,
    }

    risk_choice_map = {
        "precision": precision,
        "recall": recall,
        "accuracy": accuracy,
        "fpr": false_positive_rate,
        "predicted_positive_fraction": predicted_positive_fraction,
        "positive_predictive_value": positive_predictive_value,
        "negative_predictive_value": negative_predictive_value,
        "abstention_rate": abstention_rate,
    }


[docs]
    def __init__(
        self,
        predict_function: Callable[[ArrayLike], NDArray],
        risk: Risk,
        target_level: Union[float, List[float]],
        confidence_level: float = 0.9,
        best_predict_param_choice: Union[
            Literal["auto"], Risk_str, BinaryClassificationRisk
        ] = "auto",
        list_predict_params: NDArray = np.linspace(0, 0.99, 100),
    ):
        self.is_multi_risk = self._check_if_multi_risk_control(risk, target_level)
        self._predict_function = predict_function
        risk_list = risk if isinstance(risk, list) else [risk]
        try:
            self._risk = [
                BinaryClassificationController.risk_choice_map[risk]
                if isinstance(risk, str)
                else risk
                for risk in risk_list
            ]
        except KeyError as e:
            raise ValueError(
                "When risk is provided as a string, it must be one of: "
                f"{list(BinaryClassificationController.risk_choice_map.keys())}"
            ) from e
        target_level_list = (
            target_level if isinstance(target_level, list) else [target_level]
        )
        self._alpha = self._convert_target_level_to_alpha(target_level_list)
        self._delta = 1 - confidence_level

        self._best_predict_param_choice = self._set_best_predict_param_choice(
            best_predict_param_choice
        )

        self._predict_params = list_predict_params
        self.is_multi_dimensional_param = self._check_if_multi_dimensional_param(
            self._predict_params
        )

        self.valid_predict_params: NDArray = np.array([])
        self.best_predict_param: Optional[Union[float, Tuple[float, ...]]] = None
        self.p_values: Optional[NDArray] = None


    # All subfunctions are unit-tested. To avoid having to write
    # tests just to make sure those subfunctions are called,
    # we don't include .calibrate in the coverage report

[docs]
    def calibrate(  # pragma: no cover
        self, X_calibrate: ArrayLike, y_calibrate: ArrayLike
    ) -> BinaryClassificationController:
        """
        Calibrate the BinaryClassificationController.
        Sets attributes valid_predict_params and best_predict_param (if the risk
        or performance can be controlled at the target level).

        Parameters
        ----------
        X_calibrate : ArrayLike
            Features of the calibration set.

        y_calibrate : ArrayLike
            Binary labels of the calibration set.

        Returns
        -------
        BinaryClassificationController
            The calibrated controller instance.
        """
        y_calibrate_ = np.asarray(y_calibrate, dtype=int)

        predictions_per_param = self._get_predictions_per_param(
            X_calibrate, self._predict_params, is_calibration_step=True
        )

        risk_values, eff_sample_sizes = self._get_risk_values_and_eff_sample_sizes(
            y_calibrate_, predictions_per_param, self._risk
        )
        (valid_index, p_values) = ltt_procedure(
            risk_values,
            np.expand_dims(self._alpha, axis=1),
            self._delta,
            eff_sample_sizes,
            True,
        )
        valid_params_index = valid_index[0]

        self.valid_predict_params = self._predict_params[valid_params_index]

        check_valid_ltt_params_index(
            predict_params=self._predict_params, valid_index=self.valid_predict_params
        )

        if len(self.valid_predict_params) == 0:
            self.best_predict_param = None
        else:
            self._set_best_predict_param(
                y_calibrate_,
                predictions_per_param,
                valid_params_index,
            )

        self.p_values = p_values

        return self



[docs]
    def predict(self, X_test: ArrayLike) -> NDArray:
        """
        Predict using predict_function at the best threshold.

        Parameters
        ----------
        X_test : ArrayLike
            Features

        Returns
        -------
        NDArray
            NDArray of shape (n_samples,)

        Raises
        ------
        ValueError
            If the method .calibrate was not called,
            or if no valid thresholds were found during calibration.
        """
        if self.best_predict_param is None:
            raise ValueError(
                "Cannot predict. "
                "Either you forgot to calibrate the controller first, "
                "or calibration was not successful."
            )
        return self._get_predictions_per_param(
            X_test,
            np.array([self.best_predict_param]),
        )[0]


    def _set_best_predict_param_choice(
        self,
        best_predict_param_choice: Union[
            Literal["auto"], Risk_str, BinaryClassificationRisk
        ] = "auto",
    ) -> BinaryClassificationRisk:
        if best_predict_param_choice == "auto":
            if self.is_multi_risk:
                # when multi risk, we minimize the first risk in the list
                return self._risk[0]
            else:
                try:
                    return self._best_predict_param_choice_map[self._risk[0]]
                except KeyError:
                    raise ValueError(
                        "When best_predict_param_choice is 'auto', "
                        "risk must be one of the risks defined in mapie.risk_control"
                        "(e.g. precision, accuracy, false_positive_rate)."
                    )
        if isinstance(best_predict_param_choice, str):
            return BinaryClassificationController.risk_choice_map[
                best_predict_param_choice
            ]
        if isinstance(best_predict_param_choice, BinaryClassificationRisk):
            return best_predict_param_choice

        raise TypeError(
            f"Got object of type {type(best_predict_param_choice)}. "
            "best_predict_param_choice must be either 'auto', "
            "a BinaryClassificationRisk instance, "
            "or a risk name (str) among those defined in mapie.risk_control "
            "(e.g. 'precision', 'accuracy', 'false_positive_rate')."
        )

    def _set_best_predict_param(
        self,
        y_calibrate_: NDArray,
        predictions_per_param: NDArray,
        valid_params_index: List[Any],
    ):
        secondary_risks_per_param, _ = self._get_risk_values_and_eff_sample_sizes(
            y_calibrate_,
            predictions_per_param[valid_params_index],
            [self._best_predict_param_choice],
        )

        self.best_predict_param = self.valid_predict_params[
            np.argmin(secondary_risks_per_param)
        ]
        if isinstance(self.best_predict_param, np.ndarray):
            self.best_predict_param = tuple(self.best_predict_param.tolist())

    @staticmethod
    def _get_risk_values_and_eff_sample_sizes(
        y_true: NDArray,
        predictions_per_param: NDArray,
        risks: List[BinaryClassificationRisk],
    ) -> Tuple[NDArray, NDArray]:
        """
        Compute the values of risks and effective sample sizes for multiple risks
        and for multiple parameter values.
        Returns arrays with shape (n_risks, n_params).
        """

        risks_values_and_eff_sizes = np.array(
            [
                [
                    risk.get_value_and_effective_sample_size(y_true, predictions)
                    for predictions in predictions_per_param
                ]
                for risk in risks
            ]
        )

        risk_values = risks_values_and_eff_sizes[:, :, 0]
        effective_sample_sizes = risks_values_and_eff_sizes[:, :, 1]

        return risk_values, effective_sample_sizes

    def _get_predictions_per_param(
        self, X: ArrayLike, params: NDArray, is_calibration_step=False
    ) -> NDArray:
        """Returns y_pred of shape (n_params, n_samples)"""
        n_params = len(params)
        n_samples = len(np.asarray(X))
        if self.is_multi_dimensional_param:
            y_pred: NDArray[np.float_] = np.empty((n_params, n_samples), dtype=float)
            for i in range(n_params):
                y_pred[i] = self._predict_function(X, *params[i])
            if is_calibration_step:
                self._check_predictions(y_pred)
        else:
            try:
                predictions_proba = self._predict_function(X)[:, 1]
            except TypeError as e:
                if "object is not callable" in str(e):
                    raise TypeError(
                        "Error when calling the predict_function. "
                        "Maybe you provided a binary classifier to the "
                        "predict_function parameter of the BinaryClassificationController. "
                        "You should provide your classifier's predict_proba method instead."
                    ) from e
                else:
                    raise
            except IndexError as e:
                if "array is 1-dimensional, but 2 were indexed" in str(e):
                    raise IndexError(
                        "Error when calling the predict_function. "
                        "Maybe the predict function you provided returns only the "
                        "probability of the positive class. "
                        "You should provide a predict function that returns the "
                        "probabilities of both classes, like scikit-learn estimators."
                    ) from e
                else:
                    raise
            if is_calibration_step:
                self._check_predictions(predictions_proba)
            y_pred = (predictions_proba[:, np.newaxis] >= params).T.astype(int)
        return y_pred

    def _convert_target_level_to_alpha(self, target_level: List[float]) -> NDArray:
        alpha = []
        for risk, target in zip(self._risk, target_level):
            if risk.higher_is_better:
                alpha.append(1 - target)
            else:
                alpha.append(target)
        return np.array(alpha)

    @staticmethod
    def _check_if_multi_risk_control(
        risk: Risk,
        target_level: Union[float, List[float]],
    ) -> bool:
        """
        Check if we are in a multi risk setting and if inputs types are correct.
        """
        if (
            isinstance(risk, list)
            and isinstance(target_level, list)
            and len(risk) == len(target_level)
            and len(risk) > 0
        ):
            if len(risk) == 1:
                return False
            else:
                return True
        elif (
            isinstance(risk, BinaryClassificationRisk) or isinstance(risk, str)
        ) and isinstance(target_level, float):
            return False
        else:
            raise ValueError(
                "If you provide a list of risks, you must provide "
                "a list of target levels of the same length and vice versa. "
                "If you provide a single BinaryClassificationRisk risk, "
                "you must provide a single float target level."
            )

    @staticmethod
    def _check_if_multi_dimensional_param(
        predict_params: NDArray,
    ) -> bool:
        """
        Check if the the parameters (the λ) are multi-dimensional.
        """
        if predict_params.ndim == 1:
            return False
        elif predict_params.ndim == 2:
            return True
        else:
            raise ValueError(
                "predict_params must be a 1D array of shape (n_params,) for one-dimensional parameters, "
                "or a 2D array of shape (n_params, params_dim) for multi-dimensional parameters "
                "(params_dim=1 is allowed for the case when a one-dimensional parameter is not used as a threshold)."
            )

    def _check_predictions(self, predictions_per_param: NDArray) -> None:
        """
        Checks if predictions are probabilities for one-dimensional parameters,
        or binary predictions for multi-dimensional parameters.
        """
        if (
            not self.is_multi_dimensional_param
            and np.logical_or(
                predictions_per_param == 0, predictions_per_param == 1
            ).all()
        ):
            warnings.warn(
                "All predictions are either 0 or 1 while the parameters are one-dimensional. "
                "Make sure that the provided predict_function is a "
                "predict_proba method or a function that outputs probabilities.",
            )

        if (
            self.is_multi_dimensional_param
            and not np.logical_or.reduce(
                (
                    predictions_per_param == 0,
                    predictions_per_param == 1,
                    np.isnan(predictions_per_param),
                )
            ).all()
        ):
            raise ValueError(
                "The provided predict_function with multi-dimensional "
                "parameters must return binary predictions (0, 1, np.nan)."
            )