Source code for mapie.conformity_scores.sets.naive

from typing import Tuple, Union, Optional

import numpy as np

from mapie.conformity_scores.classification import BaseClassificationScore
from mapie.conformity_scores.sets.utils import (
    get_last_index_included
)
from sklearn.model_selection import BaseCrossValidator

from mapie._machine_precision import EPSILON
from numpy.typing import NDArray


[docs]class NaiveConformityScore(BaseClassificationScore):
    """
    Naive classification non-conformity score method that is based on the
    cumulative sum of probabilities until the 1-alpha threshold.

    Attributes
    ----------
    classes: Optional[ArrayLike]
        Names of the classes.

    random_state: Optional[Union[int, RandomState]]
        Pseudo random number generator state.

    quantiles_: ArrayLike of shape (n_alpha)
        The quantiles estimated from ``get_sets`` method.
    """

[docs]    def __init__(self) -> None:
        super().__init__()

[docs]    def get_conformity_scores(
        self,
        y: NDArray,
        y_pred: NDArray,
        **kwargs
    ) -> NDArray:
        """
        Get the conformity score.

        Parameters
        ----------
        y: NDArray of shape (n_samples,)
            Observed target values.

        y_pred: NDArray of shape (n_samples,)
            Predicted target values.

        Returns
        -------
        NDArray of shape (n_samples,)
            Conformity scores.
        """
        conformity_scores = np.empty(y_pred.shape, dtype="float")
        return conformity_scores

[docs]    def get_predictions(
        self,
        X: NDArray,
        alpha_np: NDArray,
        y_pred_proba: NDArray,
        cv: Optional[Union[int, str, BaseCrossValidator]],
        **kwargs
    ) -> NDArray:
        """
        Just processes the passed y_pred_proba.

        Parameters
        -----------
        X: NDArray of shape (n_samples, n_features)
            Observed feature values (not used since predictions are passed).

        alpha_np: NDArray of shape (n_alpha,)
            NDArray of floats between ``0`` and ``1``, represents the
            uncertainty of the confidence interval.

        y_pred_proba: NDArray
            Predicted probabilities from the estimator.

        cv: Optional[Union[int, str, BaseCrossValidator]]
            Cross-validation strategy used by the estimator (not used here).

        Returns
        --------
        NDArray
            Array of predictions.
        """
        y_pred_proba = np.repeat(
            y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2
        )
        return y_pred_proba

[docs]    def get_conformity_score_quantiles(
        self,
        conformity_scores: NDArray,
        alpha_np: NDArray,
        cv: Optional[Union[int, str, BaseCrossValidator]],
        **kwargs
    ) -> NDArray:
        """
        Get the quantiles of the conformity scores for each uncertainty level.

        Parameters
        -----------
        conformity_scores: NDArray of shape (n_samples,)
            Conformity scores for each sample.

        alpha_np: NDArray of shape (n_alpha,)
            NDArray of floats between 0 and 1, representing the uncertainty
            of the confidence interval.

        cv: Optional[Union[int, str, BaseCrossValidator]]
            Cross-validation strategy used by the estimator (not used here).

        Returns
        --------
        NDArray
            Array of quantiles with respect to alpha_np.
        """
        quantiles_ = 1 - alpha_np
        return quantiles_

    def _add_regularization(
        self,
        y_pred_proba_sorted_cumsum: NDArray,
        **kwargs
    ):
        """
        Add regularization to the sorted cumulative sum of predicted
        probabilities.

        Parameters
        ----------
        y_pred_proba_sorted_cumsum: NDArray of shape (n_samples, n_classes)
            The sorted cumulative sum of predicted probabilities.

        **kwargs: dict, optional
            Additional keyword arguments that might be used.
            The current implementation does not use any.

        Returns
        -------
        NDArray
            The adjusted cumulative sum of predicted probabilities after
            applying the regularization technique.
        """
        return y_pred_proba_sorted_cumsum

    def _get_last_included_proba(
        self,
        y_pred_proba: NDArray,
        thresholds: NDArray,
        include_last_label: Union[bool, str, None],
        **kwargs
    ) -> Tuple[NDArray, NDArray, NDArray]:
        """
        Function that returns the smallest score
        among those which are included in the prediction set.

        Parameters
        ----------
        y_pred_proba: NDArray of shape (n_samples, n_classes)
            Predictions of the model.

        thresholds: NDArray of shape (n_alphas, )
            Quantiles that have been computed from the conformity scores.

        include_last_label: Union[bool, str, None]
            Whether to include or not the label whose score exceeds threshold.

        Returns
        -------
        Tuple[ArrayLike, ArrayLike, ArrayLike]
            Arrays of shape (n_samples, n_classes, n_alphas),
            (n_samples, 1, n_alphas) and (n_samples, 1, n_alphas).
            They are respectively the cumsumed scores in the original
            order which can be different according to the value of alpha
            with the RAPS method, the index of the last included score
            and the value of the last included score.
        """
        index_sorted = np.flip(
            np.argsort(y_pred_proba, axis=1), axis=1
        )
        # sort probabilities by decreasing order
        y_pred_proba_sorted = np.take_along_axis(
            y_pred_proba, index_sorted, axis=1
        )
        # get sorted cumulated score
        y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1)
        y_pred_proba_sorted_cumsum = self._add_regularization(
            y_pred_proba_sorted_cumsum, **kwargs
        )  # Do nothing as no regularization for the naive method

        # get cumulated score at their original position
        y_pred_proba_cumsum = np.take_along_axis(
            y_pred_proba_sorted_cumsum,
            np.argsort(index_sorted, axis=1),
            axis=1
        )
        # get index of the last included label
        y_pred_index_last = get_last_index_included(
            y_pred_proba_cumsum,
            thresholds,
            include_last_label
        )
        # get the probability of the last included label
        y_pred_proba_last = np.take_along_axis(
            y_pred_proba,
            y_pred_index_last,
            axis=1
        )

        zeros_scores_proba_last = (y_pred_proba_last <= EPSILON)

        # If the last included proba is zero, change it to the
        # smallest non-zero value to avoid inluding them in the
        # prediction sets.
        if np.sum(zeros_scores_proba_last) > 0:
            y_pred_proba_last[zeros_scores_proba_last] = np.expand_dims(
                np.min(
                    np.ma.masked_less(
                        y_pred_proba,
                        EPSILON
                    ).filled(fill_value=np.inf),
                    axis=1
                ), axis=1
            )[zeros_scores_proba_last]

        return y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last

[docs]    def get_prediction_sets(
        self,
        y_pred_proba: NDArray,
        conformity_scores: NDArray,
        alpha_np: NDArray,
        cv: Optional[Union[int, str, BaseCrossValidator]],
        **kwargs
    ) -> NDArray:
        """
        Generate prediction sets based on the probability predictions,
        the conformity scores and the uncertainty level.

        Parameters
        -----------
        y_pred_proba: NDArray of shape (n_samples, n_classes)
            Target prediction.

        conformity_scores: NDArray of shape (n_samples,)
            Conformity scores for each sample.

        alpha_np: NDArray of shape (n_alpha,)
            NDArray of floats between 0 and 1, representing the uncertainty
            of the confidence interval.

        cv: Optional[Union[int, str, BaseCrossValidator]]
            Cross-validation strategy used by the estimator (not used here).

        Returns
        --------
        NDArray
            Array of quantiles with respect to alpha_np.
        """
        # sort labels by decreasing probability
        _, _, y_pred_proba_last = (
            self._get_last_included_proba(
                y_pred_proba,
                thresholds=self.quantiles_,
                include_last_label=True
            )
        )
        # get the prediction set by taking all probabilities above the last one
        prediction_sets = np.greater_equal(
            y_pred_proba - y_pred_proba_last, -EPSILON
        )

        return prediction_sets