The symmetric_correction parameter of ConformalizedQuantileRegressor

An example plot of ConformalizedQuantileRegressor illustrating the impact of the symmetric_correction parameter.

import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor

from mapie.metrics.regression import regression_coverage_score
from mapie.regression import ConformalizedQuantileRegressor
from mapie.utils import train_conformalize_test_split

RANDOM_STATE = 1

We generate a synthetic data.

X, y = make_regression(
    n_samples=1000, n_features=1, noise=20, random_state=RANDOM_STATE
)

(
    X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
) = train_conformalize_test_split(
    X, y,
    train_size=0.6, conformalize_size=0.2, test_size=0.2,
    random_state=RANDOM_STATE
)


# Define confidence level
confidence_level = 0.8

# Initialize a Gradient Boosting Regressor for quantile regression
gb_reg = GradientBoostingRegressor(
    loss="quantile", alpha=0.5, random_state=RANDOM_STATE
)

# Using ConformalizedQuantileRegressor
mapie_qr = ConformalizedQuantileRegressor(
    estimator=gb_reg, confidence_level=confidence_level)
mapie_qr.fit(X_train, y_train)
mapie_qr.conformalize(X_conformalize, y_conformalize)
y_pred_sym, y_pis_sym = mapie_qr.predict_interval(X_test, symmetric_correction=True)
y_pred_asym, y_pis_asym = mapie_qr.predict_interval(X_test, symmetric_correction=False)
y_qlow = mapie_qr._mapie_quantile_regressor.estimators_[0].predict(X_test)
y_qup = mapie_qr._mapie_quantile_regressor.estimators_[1].predict(X_test)

print(f"y.shape: {y.shape}")
print(f"y_pis_sym[:, 0].shape: {y_pis_sym[:, 0].shape}")
print(f"y_pis_sym[:, 1].shape: {y_pis_sym[:, 1].shape}")
# Calculate coverage scores
coverage_score_sym = regression_coverage_score(
    y_test, y_pis_sym
)[0]
coverage_score_asym = regression_coverage_score(
    y_test, y_pis_asym
)[0]

# Sort the values for plotting
order = np.argsort(X_test[:, 0])
X_test_sorted = X_test[order]
y_pred_sym_sorted = y_pred_sym[order]
y_pis_sym_sorted = y_pis_sym[order]
y_pred_asym_sorted = y_pred_asym[order]
y_pis_asym_sorted = y_pis_asym[order]
y_qlow = y_qlow[order]
y_qup = y_qup[order]

Out:

y.shape: (1000,)
y_pis_sym[:, 0].shape: (200, 1)
y_pis_sym[:, 1].shape: (200, 1)

We will plot the predictions and prediction intervals for both symmetric and asymmetric intervals. The line represents the predicted values, the dashed lines represent the prediction intervals, and the shaded area represents the symmetric and asymmetric prediction intervals.

plt.figure(figsize=(14, 7))

plt.subplot(1, 2, 1)
plt.xlabel("x")
plt.ylabel("y")
plt.scatter(X_test, y_test, alpha=0.3)
plt.plot(X_test_sorted, y_qlow, color="C1")
plt.plot(X_test_sorted, y_qup, color="C1")
plt.plot(X_test_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--")
plt.plot(X_test_sorted, y_pis_sym_sorted[:, 1], color="C1", ls="--")
plt.fill_between(
    X_test_sorted.ravel(),
    y_pis_sym_sorted[:, 0].ravel(),
    y_pis_sym_sorted[:, 1].ravel(),
    alpha=0.2,
)
plt.title(
    f"Symmetric Intervals\n"
    f"Target and effective coverages for "
    f"confidence_level={confidence_level:.2f}; coverage={coverage_score_sym:.3f})"
)

# Plot asymmetric prediction intervals
plt.subplot(1, 2, 2)
plt.xlabel("x")
plt.ylabel("y")
plt.scatter(X_test, y_test, alpha=0.3)
plt.plot(X_test_sorted, y_qlow, color="C2")
plt.plot(X_test_sorted, y_qup, color="C2")
plt.plot(X_test_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--")
plt.plot(X_test_sorted, y_pis_asym_sorted[:, 1], color="C2", ls="--")
plt.fill_between(
    X_test_sorted.ravel(),
    y_pis_asym_sorted[:, 0].ravel(),
    y_pis_asym_sorted[:, 1].ravel(),
    alpha=0.2,
)
plt.title(
    f"Asymmetric Intervals\n"
    f"Target and effective coverages for "
    f"confidence_level={confidence_level:.2f}; coverage={coverage_score_sym:.3f})"
)
plt.tight_layout()
plt.show()
Symmetric Intervals Target and effective coverages for confidence_level=0.80; coverage=0.795), Asymmetric Intervals Target and effective coverages for confidence_level=0.80; coverage=0.795)

The symmetric intervals (symmetric_correction=True) use a combined set of residuals for both bounds, while the asymmetric intervals (symmetric_correction=False) use distinct residuals for each bound, allowing for more flexible and accurate intervals that reflect the heteroscedastic nature of the data. The resulting effective coverages demonstrate the theoretical guarantee of the target coverage level confidence_level.

Total running time of the script: ( 0 minutes 0.411 seconds)

Gallery generated by Sphinx-Gallery