Source code for mapie.conformity_scores.sets.naive
from typing import Tuple, Union, Optional
import numpy as np
from mapie.conformity_scores.classification import BaseClassificationScore
from mapie.conformity_scores.sets.utils import (
get_last_index_included
)
from sklearn.model_selection import BaseCrossValidator
from mapie._machine_precision import EPSILON
from numpy.typing import NDArray
[docs]class NaiveConformityScore(BaseClassificationScore):
"""
Naive classification non-conformity score method that is based on the
cumulative sum of probabilities until the 1-alpha threshold.
Attributes
----------
classes: Optional[ArrayLike]
Names of the classes.
random_state: Optional[Union[int, RandomState]]
Pseudo random number generator state.
quantiles_: ArrayLike of shape (n_alpha)
The quantiles estimated from ``get_sets`` method.
"""
[docs] def get_conformity_scores(
self,
y: NDArray,
y_pred: NDArray,
**kwargs
) -> NDArray:
"""
Get the conformity score.
Parameters
----------
y: NDArray of shape (n_samples,)
Observed target values.
y_pred: NDArray of shape (n_samples,)
Predicted target values.
Returns
-------
NDArray of shape (n_samples,)
Conformity scores.
"""
conformity_scores = np.empty(y_pred.shape, dtype="float")
return conformity_scores
[docs] def get_predictions(
self,
X: NDArray,
alpha_np: NDArray,
y_pred_proba: NDArray,
cv: Optional[Union[int, str, BaseCrossValidator]],
**kwargs
) -> NDArray:
"""
Just processes the passed y_pred_proba.
Parameters
-----------
X: NDArray of shape (n_samples, n_features)
Observed feature values (not used since predictions are passed).
alpha_np: NDArray of shape (n_alpha,)
NDArray of floats between ``0`` and ``1``, represents the
uncertainty of the confidence interval.
y_pred_proba: NDArray
Predicted probabilities from the estimator.
cv: Optional[Union[int, str, BaseCrossValidator]]
Cross-validation strategy used by the estimator (not used here).
Returns
--------
NDArray
Array of predictions.
"""
y_pred_proba = np.repeat(
y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2
)
return y_pred_proba
[docs] def get_conformity_score_quantiles(
self,
conformity_scores: NDArray,
alpha_np: NDArray,
cv: Optional[Union[int, str, BaseCrossValidator]],
**kwargs
) -> NDArray:
"""
Get the quantiles of the conformity scores for each uncertainty level.
Parameters
-----------
conformity_scores: NDArray of shape (n_samples,)
Conformity scores for each sample.
alpha_np: NDArray of shape (n_alpha,)
NDArray of floats between 0 and 1, representing the uncertainty
of the confidence interval.
cv: Optional[Union[int, str, BaseCrossValidator]]
Cross-validation strategy used by the estimator (not used here).
Returns
--------
NDArray
Array of quantiles with respect to alpha_np.
"""
quantiles_ = 1 - alpha_np
return quantiles_
def _add_regularization(
self,
y_pred_proba_sorted_cumsum: NDArray,
**kwargs
):
"""
Add regularization to the sorted cumulative sum of predicted
probabilities.
Parameters
----------
y_pred_proba_sorted_cumsum: NDArray of shape (n_samples, n_classes)
The sorted cumulative sum of predicted probabilities.
**kwargs: dict, optional
Additional keyword arguments that might be used.
The current implementation does not use any.
Returns
-------
NDArray
The adjusted cumulative sum of predicted probabilities after
applying the regularization technique.
"""
return y_pred_proba_sorted_cumsum
def _get_last_included_proba(
self,
y_pred_proba: NDArray,
thresholds: NDArray,
include_last_label: Union[bool, str, None],
**kwargs
) -> Tuple[NDArray, NDArray, NDArray]:
"""
Function that returns the smallest score
among those which are included in the prediction set.
Parameters
----------
y_pred_proba: NDArray of shape (n_samples, n_classes)
Predictions of the model.
thresholds: NDArray of shape (n_alphas, )
Quantiles that have been computed from the conformity scores.
include_last_label: Union[bool, str, None]
Whether to include or not the label whose score exceeds threshold.
Returns
-------
Tuple[ArrayLike, ArrayLike, ArrayLike]
Arrays of shape (n_samples, n_classes, n_alphas),
(n_samples, 1, n_alphas) and (n_samples, 1, n_alphas).
They are respectively the cumsumed scores in the original
order which can be different according to the value of alpha
with the RAPS method, the index of the last included score
and the value of the last included score.
"""
index_sorted = np.flip(
np.argsort(y_pred_proba, axis=1), axis=1
)
# sort probabilities by decreasing order
y_pred_proba_sorted = np.take_along_axis(
y_pred_proba, index_sorted, axis=1
)
# get sorted cumulated score
y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1)
y_pred_proba_sorted_cumsum = self._add_regularization(
y_pred_proba_sorted_cumsum, **kwargs
) # Do nothing as no regularization for the naive method
# get cumulated score at their original position
y_pred_proba_cumsum = np.take_along_axis(
y_pred_proba_sorted_cumsum,
np.argsort(index_sorted, axis=1),
axis=1
)
# get index of the last included label
y_pred_index_last = get_last_index_included(
y_pred_proba_cumsum,
thresholds,
include_last_label
)
# get the probability of the last included label
y_pred_proba_last = np.take_along_axis(
y_pred_proba,
y_pred_index_last,
axis=1
)
zeros_scores_proba_last = (y_pred_proba_last <= EPSILON)
# If the last included proba is zero, change it to the
# smallest non-zero value to avoid inluding them in the
# prediction sets.
if np.sum(zeros_scores_proba_last) > 0:
y_pred_proba_last[zeros_scores_proba_last] = np.expand_dims(
np.min(
np.ma.masked_less(
y_pred_proba,
EPSILON
).filled(fill_value=np.inf),
axis=1
), axis=1
)[zeros_scores_proba_last]
return y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last
[docs] def get_prediction_sets(
self,
y_pred_proba: NDArray,
conformity_scores: NDArray,
alpha_np: NDArray,
cv: Optional[Union[int, str, BaseCrossValidator]],
**kwargs
) -> NDArray:
"""
Generate prediction sets based on the probability predictions,
the conformity scores and the uncertainty level.
Parameters
-----------
y_pred_proba: NDArray of shape (n_samples, n_classes)
Target prediction.
conformity_scores: NDArray of shape (n_samples,)
Conformity scores for each sample.
alpha_np: NDArray of shape (n_alpha,)
NDArray of floats between 0 and 1, representing the uncertainty
of the confidence interval.
cv: Optional[Union[int, str, BaseCrossValidator]]
Cross-validation strategy used by the estimator (not used here).
Returns
--------
NDArray
Array of quantiles with respect to alpha_np.
"""
# sort labels by decreasing probability
_, _, y_pred_proba_last = (
self._get_last_included_proba(
y_pred_proba,
thresholds=self.quantiles_,
include_last_label=True
)
)
# get the prediction set by taking all probabilities above the last one
prediction_sets = np.greater_equal(
y_pred_proba - y_pred_proba_last, -EPSILON
)
return prediction_sets