.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "examples_regression/2-advanced-analysis/plot_coverage_validity.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code .. rst-class:: sphx-glr-example-title .. _sphx_glr_examples_regression_2-advanced-analysis_plot_coverage_validity.py: =============================================================================== Coverage validity of MAPIE for regression tasks =============================================================================== This example verifies that conformal claims are valid in the MAPIE package when using the CP prefit/split methods. This notebook is inspired from the notebook used for episode "Uncertainty Quantification: Avoid these Missteps in Validating Your Conformal Claims!" (link to the [orginal notebook](https://github.com/mtorabirad/MLBoost)). For more details on theoretical guarantees: [1] Vovk, Vladimir, Alexander Gammerman, and Glenn Shafer. "Algorithmic Learning in a Random World." Springer Nature, 2022. [2] Angelopoulos, Anastasios N., and Stephen Bates. "Conformal prediction: A gentle introduction." Foundations and TrendsĀ® in Machine Learning 16.4 (2023): 494-591. .. GENERATED FROM PYTHON SOURCE LINES 23-43 .. code-block:: default import numpy as np import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeRegressor from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split from mapie.regression import SplitConformalRegressor from mapie.conformity_scores import AbsoluteConformityScore from mapie.metrics.regression import regression_coverage_score from joblib import Parallel, delayed import warnings warnings.filterwarnings("ignore") warnings.simplefilter("ignore", RuntimeWarning) warnings.simplefilter("ignore", UserWarning) .. GENERATED FROM PYTHON SOURCE LINES 44-51 Section 1: Comparison with the split conformalizer method (light version) ----------------------------------------------------------------------------------------------- We propose here to implement a lighter version of split CP by calculating the quantile with a small correction according to [1]. We prepare the fit/conformalize/test routine in order to calculate the average coverage over several simulations. .. GENERATED FROM PYTHON SOURCE LINES 51-174 .. code-block:: default # Conformalizer Class class StandardConformalizer(): def __init__( self, pre_trained_model, non_conformity_func, confidence_level ): # Initialize the conformalizer with required parameters self.estimator = pre_trained_model self.non_conformity_func = non_conformity_func self.confidence_level = confidence_level def _calculate_quantile(self, scores_conformalize): # Calculate the quantile value based on delta and non-conformity scores self.delta_cor = np.ceil( self.confidence_level * (self.n_conformalize + 1)) / self.n_conformalize return np.quantile(scores_conformalize, self.delta_cor, method='lower') def _conformalize(self, X_conformalize, y_conformalize): # Calibrate the conformalizer to calculate q_hat y_conformalize_pred = self.estimator.predict(X_conformalize) scores_conformalize = self.non_conformity_func( y_conformalize_pred, y_conformalize) self.q_hat = self._calculate_quantile(scores_conformalize) def fit(self, X, y): # Fit the conformalizer to the data and calculate q_hat self.n_conformalize = X.shape[0] self._conformalize(X, y) return self def predict(self, X): # Returns the predicted interval y_pred = self.estimator.predict(X) y_lower, y_upper = y_pred - self.q_hat, y_pred + self.q_hat y_pis = np.expand_dims(np.stack([y_lower, y_upper], axis=1), axis=-1) return y_lower, y_pis def non_conformity_func(y, y_hat): return np.abs(y - y_hat) def get_coverage_prefit( conformalizer, data, target, n_conformalize, random_state=None ): """ Calculate the fraction of test samples within the predicted intervals. This function splits the data into a conformalize set and a test set, and fits the regressor to the conformalize set. It then predicts intervals for the test set and calculates the fraction of test samples within these intervals. Parameters: ----------- conformalizer: object A mapie regressor object. data: array-like of shape (n_samples, n_features) The data to be split into a train set and a test set. target: array-like of shape (n_samples,) The target values for the data. n_conformalize: int The length of the conformalize set. random_state: int Random state for the data splits. Returns: -------- coverage: float The coverage within the predicted intervals. """ # Split data step X_conformalize, X_test, y_conformalize, y_test = train_test_split( data, target, train_size=n_conformalize, random_state=random_state ) if isinstance(conformalizer, SplitConformalRegressor): # Calibration step conformalizer.conformalize(X_conformalize, y_conformalize) # Prediction step _, y_pis = conformalizer.predict_interval(X_test) # Coverage step coverage = regression_coverage_score(y_test, y_pis) else: # Calibration step conformalizer.fit(X_conformalize, y_conformalize) # Prediction step _, y_pis = conformalizer.predict(X_test) # Coverage step coverage = regression_coverage_score(y_test, y_pis) return coverage def cumulative_average(arr): """ Calculate the cumulative average of a list of numbers. This function computes the cumulative average of a list of numbers by calculating the cumulative sum of the numbers and dividing it by the index of the current number. Parameters: ----------- arr: List[float] The input list of numbers. Returns: -------- cumulative_avg: List[float] The cumulative average of the input list. """ cumsum = np.cumsum(arr) indices = np.arange(1, len(arr) + 1) cumulative_avg = cumsum / indices return cumulative_avg .. GENERATED FROM PYTHON SOURCE LINES 175-181 Experiment 1: Coverage Validity for given confidence_level and n_conformalize -------------------------------------------------------------------------------- To begin, we propose to use ``confidence_level=0.8`` and ``n_conformalize=6`` and compare the coverage validity claim of the MAPIE class and the referenced class. .. GENERATED FROM PYTHON SOURCE LINES 181-268 .. code-block:: default RANDOM_STATE = 1 # Parameters of the modelisation confidence_level = 0.8 n_conformalize = 6 n_train = 1000 n_test = 1000 num_splits = 1000 # Load toy Data n_all = n_train + n_conformalize + n_test data, target = make_regression(n_all, random_state=RANDOM_STATE) # Split dataset into train, conformalize_validation sets X_train, X_conformalize_test, y_train, y_conformalize_test = train_test_split( data, target, train_size=n_train, random_state=RANDOM_STATE ) # Create a regression model and fit it to the train data model = DecisionTreeRegressor(random_state=RANDOM_STATE) model.fit(X_train, y_train) # Compute theorical bounds and exact coverage to attempt lower_bound = confidence_level upper_bound = (confidence_level + 1 / (n_conformalize + 1)) exact_cov = (np.ceil((n_conformalize + 1) * confidence_level)) / (n_conformalize + 1) # Run the experiment empirical_coverages_ref = [] empirical_coverages_mapie = [] for random_state in range(1, num_splits): # Compute empirical coverage for each trial with StandardConformalizer conformalizer = StandardConformalizer( pre_trained_model=model, non_conformity_func=non_conformity_func, confidence_level=confidence_level ) coverage = get_coverage_prefit( conformalizer=conformalizer, data=X_conformalize_test, target=y_conformalize_test, n_conformalize=n_conformalize, random_state=random_state ) empirical_coverages_ref.append(coverage) # Compute empirical coverage for each trial with SplitConformalRegressor conformalizer = SplitConformalRegressor( estimator=model, confidence_level=confidence_level, prefit=True ) coverage = get_coverage_prefit( conformalizer=conformalizer, data=X_conformalize_test, target=y_conformalize_test, n_conformalize=n_conformalize, random_state=random_state ) empirical_coverages_mapie.append(coverage) cumulative_averages_ref = cumulative_average(arr=empirical_coverages_ref) cumulative_averages_mapie = cumulative_average(arr=empirical_coverages_mapie) # Plot the results fig, ax = plt.subplots() plt.plot(cumulative_averages_ref, alpha=0.5, label='SplitCP', color='r') plt.plot(cumulative_averages_mapie, alpha=0.5, label='MAPIE', color='g') plt.hlines(exact_cov, 0, num_splits, color='r', ls='--', label='Exact Cov.') plt.hlines(lower_bound, 0, num_splits, color='k', label='Lower Bound') plt.hlines(upper_bound, 0, num_splits, color='b', label='Upper Bound') plt.xlabel(r'Split Number') plt.ylabel(r'$\overline{\mathbb{C}}$') plt.title( r'$|D_{cal}| = $' + str(n_conformalize) + r' and $\delta = $' + str(confidence_level) ) plt.legend(loc="upper right", ncol=2) plt.ylim(0.7, 1) plt.tight_layout() plt.show() .. image-sg:: /examples_regression/2-advanced-analysis/images/sphx_glr_plot_coverage_validity_001.png :alt: $|D_{cal}| = $6 and $\delta = $0.8 :srcset: /examples_regression/2-advanced-analysis/images/sphx_glr_plot_coverage_validity_001.png :class: sphx-glr-single-img .. GENERATED FROM PYTHON SOURCE LINES 269-273 It can be seen that the two curves overlap, proving that both methods produce the same results. Their effective coverage stabilizes between the theoretical limits, always above the target coverage and converges towards the exact coverage (i.e. expected according to the theory). .. GENERATED FROM PYTHON SOURCE LINES 276-282 Experiment 2: Coverage validity with different random states ----------------------------------------------------------------------------- We just propose to reproduce the previous experiment without fixing the random_state. The methods therefore follow different trajectories but always achieve the expected coverage. .. GENERATED FROM PYTHON SOURCE LINES 282-341 .. code-block:: default # Run the experiment empirical_coverages_ref = [] empirical_coverages_mapie = [] for random_state in range(1, num_splits): # Compute empirical coverage for each trial with StandardConformalizer conformalizer = StandardConformalizer( pre_trained_model=model, non_conformity_func=non_conformity_func, confidence_level=confidence_level ) coverage = get_coverage_prefit( conformalizer=conformalizer, data=X_conformalize_test, target=y_conformalize_test, n_conformalize=n_conformalize, random_state=random_state ) empirical_coverages_ref.append(coverage) # Compute empirical coverage for each trial conformalizer = SplitConformalRegressor( estimator=model, confidence_level=confidence_level, prefit=True ) coverage = get_coverage_prefit( conformalizer=conformalizer, data=X_conformalize_test, target=y_conformalize_test, n_conformalize=n_conformalize, random_state=num_splits + random_state ) empirical_coverages_mapie.append(coverage) cumulative_averages_ref = cumulative_average(empirical_coverages_ref) cumulative_averages_mapie = cumulative_average(empirical_coverages_mapie) # Plot the results fig, ax = plt.subplots() plt.plot(cumulative_averages_ref, alpha=0.5, label='SplitCP', color='r') plt.plot(cumulative_averages_mapie, alpha=0.5, label='MAPIE', color='g') plt.hlines(exact_cov, 0, num_splits, color='r', ls='--', label='Exact Cov.') plt.hlines(lower_bound, 0, num_splits, color='k', label='Lower Bound') plt.hlines(upper_bound, 0, num_splits, color='b', label='Upper Bound') plt.xlabel(r'Split Number') plt.ylabel(r'$\overline{\mathbb{C}}$') plt.title( r'$|D_{cal}| = $' + str(n_conformalize) + r' and $\delta = $' + str(confidence_level) ) plt.legend(loc="upper right", ncol=2) plt.ylim(0.7, 1) plt.tight_layout() plt.show() .. image-sg:: /examples_regression/2-advanced-analysis/images/sphx_glr_plot_coverage_validity_002.png :alt: $|D_{cal}| = $6 and $\delta = $0.8 :srcset: /examples_regression/2-advanced-analysis/images/sphx_glr_plot_coverage_validity_002.png :class: sphx-glr-single-img .. GENERATED FROM PYTHON SOURCE LINES 342-348 Section 2: Comparison with different MAPIE CP methods ----------------------------------------------------------------------------- We propose to reproduce the previous experience with different methods of the MAPIE package (prefit, prefit with asymmetrical non-conformity scores and split). .. GENERATED FROM PYTHON SOURCE LINES 348-394 .. code-block:: default def run_get_coverage_prefit( model, method, params, n_conformalize, data, target, confidence_level, random_state, num_splits ): if method == "reference": ref_reg = StandardConformalizer( pre_trained_model=model, non_conformity_func=non_conformity_func, confidence_level=confidence_level ) coverage = get_coverage_prefit( conformalizer=ref_reg, data=data, target=target, n_conformalize=n_conformalize, random_state=random_state ) else: mapie_reg = SplitConformalRegressor( estimator=model, confidence_level=confidence_level, **params ) coverage = get_coverage_prefit( conformalizer=mapie_reg, data=data, target=target, n_conformalize=n_conformalize, random_state=num_splits + random_state ) return coverage STRATEGIES = { "reference": None, "prefit": dict( prefit=True, conformity_score=AbsoluteConformityScore(sym=True) ), "prefit_asym": dict( prefit=True, conformity_score=AbsoluteConformityScore(sym=False) ), } .. GENERATED FROM PYTHON SOURCE LINES 395-402 Experiment 3: Coverage with different MAPIE CP methods ----------------------------------------------------------------------------- The methods always follow different trajectories but always achieve the expected coverage. Since asymmetric scores can be used, the limits are not exactly the same. We should calculate them differently but that doesn't change our conclusion. .. GENERATED FROM PYTHON SOURCE LINES 402-444 .. code-block:: default # Parameters of the modelisation confidence_level = 0.8 n_conformalize = 12 # for asymmetric non-conformity scores num_splits = 1000 # Run the experiment cumulative_averages_dict = dict() for method, params in STRATEGIES.items(): coverages_list = [] run_params = model, method, params, n_conformalize, data, target, confidence_level coverages_list = Parallel(n_jobs=-1)( delayed(run_get_coverage_prefit)( *run_params, num_splits=num_splits, random_state=random_state ) for random_state in range(num_splits) ) cumulative_averages_dict[method] = cumulative_average(arr=coverages_list) # Plot the results fig, ax = plt.subplots() for method in STRATEGIES: plt.plot(cumulative_averages_dict[method], alpha=0.5, label=method) plt.hlines(exact_cov, 0, num_splits, color='r', ls='--', label='Exact Cov.') plt.hlines(lower_bound, 0, num_splits, color='k', label='Lower Bound') plt.hlines(upper_bound, 0, num_splits, color='b', label='Upper Bound') plt.xlabel(r'Split Number') plt.ylabel(r'$\overline{\mathbb{C}}$') plt.title( r'$|D_{cal}| = $' + str(n_conformalize) + r' and $\delta = $' + str(confidence_level) ) plt.legend(loc="upper right", ncol=2) plt.ylim(0.7, 1) plt.tight_layout() plt.show() .. image-sg:: /examples_regression/2-advanced-analysis/images/sphx_glr_plot_coverage_validity_003.png :alt: $|D_{cal}| = $12 and $\delta = $0.8 :srcset: /examples_regression/2-advanced-analysis/images/sphx_glr_plot_coverage_validity_003.png :class: sphx-glr-single-img .. GENERATED FROM PYTHON SOURCE LINES 445-456 Experiment 4: Extensive experimentation on different delta and n_calib -------------------------------------------------------------------------------------------- Here we propose to extend the experiment on different sizes of the calibration dataset and target coverage. We show the influence of size on effective coverage. In particular, we see that the expected coverage fluctuates between the limits with respect to the size of the calibration dataset but continues to converge towards the target coverage. It can be noted that all methods follow this trajectory and continue to achieve coverage validity. .. GENERATED FROM PYTHON SOURCE LINES 456-543 .. code-block:: default num_splits = 100 nc_min, nc_max = 10, 30 n_calib_array = np.arange(nc_min, nc_max+1, 2) confidence_level = 0.8 confidence_level_array = [confidence_level] final_coverage_dict = { method: {confidence_level: [] for confidence_level in confidence_level_array} for method in STRATEGIES } # Run experiment for method, params in STRATEGIES.items(): for n_conformalize in n_calib_array: coverages_list = [] run_params = ( model, method, params, n_conformalize, data, target, confidence_level ) coverages_list = Parallel(n_jobs=-1)( delayed(run_get_coverage_prefit)( *run_params, num_splits=num_splits, random_state=random_state) for random_state in range(num_splits) ) coverages_list = np.array(coverages_list) final_coverage = cumulative_average(coverages_list)[-1] final_coverage_dict[method][confidence_level].append(final_coverage) # Theorical bounds and exact coverage to attempt def lower_bound_fct(delta): return delta * np.ones_like(n_calib_array) def upper_bound_fct(delta): return delta + 1/(n_calib_array) def upper_bound_asym_fct(delta): return delta + 1/(n_calib_array//2) def exact_coverage_fct(delta): return np.ceil((n_calib_array + 1) * delta) / (n_calib_array + 1) def exact_coverage_asym_fct(delta): new_n = n_calib_array//2 - 1 return np.ceil((new_n + 1) * delta) / (new_n + 1) # Plot the results n_strat = len(final_coverage_dict) nrows, ncols = n_strat, 1 fig, ax = plt.subplots(nrows=nrows, ncols=ncols) for random_state, method in enumerate(final_coverage_dict): # Compute the different bounds, target cov = final_coverage_dict[method][confidence_level] ub = upper_bound_fct(confidence_level) lb = lower_bound_fct(confidence_level) exact_cov = exact_coverage_fct(confidence_level) if 'asym' in method: ub = upper_bound_asym_fct(confidence_level) exact_cov = exact_coverage_asym_fct(confidence_level) ub = np.clip(ub, a_min=0, a_max=1) lb = np.clip(lb, a_min=0, a_max=1) # Plot the results ax[random_state].plot(n_calib_array, cov, alpha=0.5, label=method, color='g') ax[random_state].plot(n_calib_array, lb, color='k', label='Lower Bound') ax[random_state].plot(n_calib_array, ub, color='b', label='Upper Bound') ax[random_state].plot( n_calib_array, exact_cov, color='g', ls='--', label='Exact Cov' ) ax[random_state].hlines( confidence_level, nc_min, nc_max, color='r', ls='--', label='Target Cov' ) ax[random_state].legend(loc="upper right", ncol=2) ax[random_state].set_ylim(np.min(lb) - 0.05, 1.0) ax[random_state].set_xlabel(r'$n_{calib}$') ax[random_state].set_ylabel(r'$\overline{\mathbb{C}}$') fig.suptitle(r'$\delta = $' + str(confidence_level)) plt.show() .. image-sg:: /examples_regression/2-advanced-analysis/images/sphx_glr_plot_coverage_validity_004.png :alt: $\delta = $0.8 :srcset: /examples_regression/2-advanced-analysis/images/sphx_glr_plot_coverage_validity_004.png :class: sphx-glr-single-img .. rst-class:: sphx-glr-timing **Total running time of the script:** ( 0 minutes 16.691 seconds) .. _sphx_glr_download_examples_regression_2-advanced-analysis_plot_coverage_validity.py: .. only :: html .. container:: sphx-glr-footer :class: sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_coverage_validity.py ` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot_coverage_validity.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_