From fbfd434e8c1d3bae2fbfb91fb02a6211198e668a Mon Sep 17 00:00:00 2001 From: jawadhussein462 <41950044+jawadhussein462@users.noreply.github.com> Date: Tue, 17 Dec 2024 18:47:33 +0100 Subject: [PATCH 1/5] FIX: correct ConformalizedQuantileRegressor integration tests warnings (#571) --- .../tests/test_regression.py | 149 ++++++++---------- 1 file changed, 62 insertions(+), 87 deletions(-) diff --git a/mapie_v1/integration_tests/tests/test_regression.py b/mapie_v1/integration_tests/tests/test_regression.py index e97fd94d8..966414024 100644 --- a/mapie_v1/integration_tests/tests/test_regression.py +++ b/mapie_v1/integration_tests/tests/test_regression.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Optional, Union, Dict, Tuple, Type +from typing import Optional, Union, Dict, Type import numpy as np import pytest @@ -9,7 +9,7 @@ from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import QuantileRegressor -from lightgbm import LGBMRegressor +from sklearn.ensemble import GradientBoostingRegressor from mapie.subsample import Subsample from mapie._typing import ArrayLike @@ -109,16 +109,17 @@ def test_intervals_and_predictions_exact_equality_split( "random_state": RANDOM_STATE, } - v0, v1 = select_models_by_strategy(cv) - compare_model_predictions_and_intervals(model_v0=v0, - model_v1=v1, - X=X_split, - y=y_split, - v0_params=v0_params, - v1_params=v1_params, - test_size=test_size, - random_state=RANDOM_STATE, - prefit=prefit) + compare_model_predictions_and_intervals( + model_v0=MapieRegressorV0, + model_v1=SplitConformalRegressor, + X=X_split, + y=y_split, + v0_params=v0_params, + v1_params=v1_params, + test_size=test_size, + prefit=prefit, + random_state=RANDOM_STATE, + ) params_test_cases_cross = [ @@ -185,11 +186,16 @@ def test_intervals_and_predictions_exact_equality_split( @pytest.mark.parametrize("params_cross", params_test_cases_cross) def test_intervals_and_predictions_exact_equality_cross(params_cross): - v0_params = params_cross["v0"] - v1_params = params_cross["v1"] - v0, v1 = select_models_by_strategy("cross") - compare_model_predictions_and_intervals(v0, v1, X, y, v0_params, v1_params) + compare_model_predictions_and_intervals( + model_v0=MapieRegressorV0, + model_v1=CrossConformalRegressor, + X=X, + y=y, + v0_params=params_cross["v0"], + v1_params=params_cross["v1"], + random_state=RANDOM_STATE, + ) params_test_cases_jackknife = [ @@ -268,28 +274,37 @@ def test_intervals_and_predictions_exact_equality_cross(params_cross): ] +@pytest.mark.parametrize("params_jackknife", params_test_cases_jackknife) +def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife): + + compare_model_predictions_and_intervals( + model_v0=MapieRegressorV0, + model_v1=JackknifeAfterBootstrapRegressor, + X=X, + y=y, + v0_params=params_jackknife["v0"], + v1_params=params_jackknife["v1"], + random_state=RANDOM_STATE, + ) + + split_model = QuantileRegressor( solver="highs-ds", alpha=0.0, ) -lgbm_models = [] -lgbm_alpha = 0.1 -for alpha_ in [lgbm_alpha / 2, (1 - (lgbm_alpha / 2)), 0.5]: - estimator_ = LGBMRegressor( - objective='quantile', +gbr_models = [] +gbr_alpha = 0.1 + +for alpha_ in [gbr_alpha / 2, (1 - (gbr_alpha / 2)), 0.5]: + estimator_ = GradientBoostingRegressor( + loss='quantile', alpha=alpha_, + n_estimators=100, + learning_rate=0.1, + max_depth=3 ) - lgbm_models.append(estimator_) - - -@pytest.mark.parametrize("params_jackknife", params_test_cases_jackknife) -def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife): - v0_params = params_jackknife["v0"] - v1_params = params_jackknife["v1"] - - v0, v1 = select_models_by_strategy("jackknife") - compare_model_predictions_and_intervals(v0, v1, X, y, v0_params, v1_params) + gbr_models.append(estimator_) params_test_cases_quantile = [ @@ -312,8 +327,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife): }, { "v0": { - "estimator": lgbm_models, - "alpha": lgbm_alpha, + "estimator": gbr_models, "cv": "prefit", "method": "quantile", "calib_size": 0.3, @@ -322,8 +336,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife): "random_state": RANDOM_STATE, }, "v1": { - "estimator": lgbm_models, - "confidence_level": 1-lgbm_alpha, + "estimator": gbr_models, "prefit": True, "test_size": 0.3, "fit_params": {"sample_weight": sample_weight}, @@ -378,58 +391,17 @@ def test_intervals_and_predictions_exact_equality_quantile(params_quantile): test_size = v1_params["test_size"] if "test_size" in v1_params else None prefit = ("prefit" in v1_params) and v1_params["prefit"] - v0, v1 = select_models_by_strategy("quantile") - compare_model_predictions_and_intervals(model_v0=v0, - model_v1=v1, - X=X, - y=y, - v0_params=v0_params, - v1_params=v1_params, - test_size=test_size, - prefit=prefit, - random_state=RANDOM_STATE) - - -def select_models_by_strategy( - strategy_key: str -) -> Tuple[ - Type[Union[MapieRegressorV0, MapieQuantileRegressorV0]], - Type[Union[ - SplitConformalRegressor, - CrossConformalRegressor, - JackknifeAfterBootstrapRegressor, - ConformalizedQuantileRegressor - ]] -]: - - model_v0: Type[Union[MapieRegressorV0, MapieQuantileRegressorV0]] - model_v1: Type[Union[ - SplitConformalRegressor, - CrossConformalRegressor, - JackknifeAfterBootstrapRegressor, - ConformalizedQuantileRegressor - ]] - - if strategy_key in ["split", "prefit"]: - model_v1 = SplitConformalRegressor - model_v0 = MapieRegressorV0 - - elif strategy_key == "cross": - model_v1 = CrossConformalRegressor - model_v0 = MapieRegressorV0 - - elif strategy_key == "jackknife": - model_v1 = JackknifeAfterBootstrapRegressor - model_v0 = MapieRegressorV0 - - elif strategy_key == "quantile": - model_v1 = ConformalizedQuantileRegressor - model_v0 = MapieQuantileRegressorV0 - - else: - raise ValueError(f"Unknown strategy key: {strategy_key}") - - return model_v0, model_v1 + compare_model_predictions_and_intervals( + model_v0=MapieQuantileRegressorV0, + model_v1=ConformalizedQuantileRegressor, + X=X, + y=y, + v0_params=v0_params, + v1_params=v1_params, + test_size=test_size, + prefit=prefit, + random_state=RANDOM_STATE, + ) def compare_model_predictions_and_intervals( @@ -486,6 +458,9 @@ def compare_model_predictions_and_intervals( v1.conformalize(X_conf, y_conf, **v1_conformalize_params) v0_predict_params = filter_params(v0.predict, v0_params) + if 'alpha' in v0_init_params: + v0_predict_params.pop('alpha') + v1_predict_params = filter_params(v1.predict, v1_params) v1_predict_set_params = filter_params(v1.predict_set, v1_params) From e52c10a3311deecf8dfde07f937e126778ce0046 Mon Sep 17 00:00:00 2001 From: Valentin Laurent Date: Tue, 17 Dec 2024 19:24:52 +0100 Subject: [PATCH 2/5] REFACTO: refactor MapieQuantileRegressor internals to prepare for v1: reorganized and renamed functions, fixed 1 test --- mapie/regression/quantile_regression.py | 164 ++++++++++-------------- mapie/tests/test_quantile_regression.py | 10 +- 2 files changed, 75 insertions(+), 99 deletions(-) diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index d86b0de67..3d2e30c3e 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -346,13 +346,11 @@ def _check_cv( "Invalid cv method, only valid method is ``split``." ) - def _check_calib_set( + def _train_calib_split( self, X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None, - X_calib: Optional[ArrayLike] = None, - y_calib: Optional[ArrayLike] = None, calib_size: Optional[float] = 0.3, random_state: Optional[Union[int, np.random.RandomState, None]] = None, shuffle: Optional[bool] = True, @@ -360,61 +358,33 @@ def _check_calib_set( ) -> Tuple[ ArrayLike, ArrayLike, ArrayLike, ArrayLike, Optional[ArrayLike] ]: - """ - Check if a calibration set has already been defined, if not, then - we define one using the ``train_test_split`` method. - - Parameters - ---------- - Same definition of parameters as for the ``fit`` method. - - Returns - ------- - Tuple[ArrayLike, ArrayLike, ArrayLike, ArrayLike, ArrayLike] - - [0]: ArrayLike of shape (n_samples_*(1-calib_size), n_features) - X_train - - [1]: ArrayLike of shape (n_samples_*(1-calib_size),) - y_train - - [2]: ArrayLike of shape (n_samples_*calib_size, n_features) - X_calib - - [3]: ArrayLike of shape (n_samples_*calib_size,) - y_calib - - [4]: ArrayLike of shape (n_samples_,) - sample_weight_train - """ - if X_calib is None or y_calib is None: - if sample_weight is None: - X_train, X_calib, y_train, y_calib = train_test_split( - X, - y, - test_size=calib_size, - random_state=random_state, - shuffle=shuffle, - stratify=stratify - ) - sample_weight_train = sample_weight - else: - ( - X_train, - X_calib, - y_train, - y_calib, - sample_weight_train, - _, - ) = train_test_split( - X, - y, - sample_weight, - test_size=calib_size, - random_state=random_state, - shuffle=shuffle, - stratify=stratify - ) + if sample_weight is None: + X_train, X_calib, y_train, y_calib = train_test_split( + X, + y, + test_size=calib_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify + ) + sample_weight_train = sample_weight else: - X_train, y_train, sample_weight_train = X, y, sample_weight - X_train, X_calib = cast(ArrayLike, X_train), cast(ArrayLike, X_calib) - y_train, y_calib = cast(ArrayLike, y_train), cast(ArrayLike, y_calib) - sample_weight_train = cast(ArrayLike, sample_weight_train) + ( + X_train, + X_calib, + y_train, + y_calib, + sample_weight_train, + _, + ) = train_test_split( + X, + y, + sample_weight, + test_size=calib_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify + ) return X_train, y_train, X_calib, y_calib, sample_weight_train def _check_prefit_params( @@ -547,13 +517,12 @@ def fit( MapieQuantileRegressor The model itself. """ - - self.initialize_fit() + self._initialize_fit_conformalize() if self.cv == "prefit": - X_calib, y_calib = self.prefit_estimators(X, y) + X_calib, y_calib = X, y else: - X_calib, y_calib = self.fit_estimators( + X_calib, y_calib = self._fit_estimators( X=X, y=y, sample_weight=sample_weight, @@ -571,26 +540,18 @@ def fit( return self - def initialize_fit(self) -> None: + def _initialize_fit_conformalize(self) -> None: self.cv = self._check_cv(cast(str, self.cv)) self.alpha_np = self._check_alpha(self.alpha) self.estimators_: List[RegressorMixin] = [] - def prefit_estimators( - self, - X: ArrayLike, - y: ArrayLike - ) -> Tuple[ArrayLike, ArrayLike]: - + def _initialize_and_check_prefit_estimators(self) -> None: estimator = cast(List, self.estimator) self._check_prefit_params(estimator) self.estimators_ = list(estimator) self.single_estimator_ = self.estimators_[2] - X_calib, y_calib = indexable(X, y) - return X_calib, y_calib - - def fit_estimators( + def _fit_estimators( self, X: ArrayLike, y: ArrayLike, @@ -604,30 +565,39 @@ def fit_estimators( stratify: Optional[ArrayLike] = None, **fit_params, ) -> Tuple[ArrayLike, ArrayLike]: + """ + This method: + - Creates train and calib sets + - Checks adn casts params, including the train set + - Fit the 3 estimators + - Returns the calib set + """ self._check_parameters() checked_estimator = self._check_estimator(self.estimator) random_state = check_random_state(random_state) X, y = indexable(X, y) - results = self._check_calib_set( - X, - y, - sample_weight, - X_calib, - y_calib, - calib_size, - random_state, - shuffle, - stratify, - ) + if X_calib is None or y_calib is None: + ( + X_train, y_train, X_calib, y_calib, sample_weight_train + ) = self._train_calib_split( + X, + y, + sample_weight, + calib_size, + random_state, + shuffle, + stratify, + ) + else: + X_train, y_train, sample_weight_train = X, y, sample_weight - X_train, y_train, X_calib, y_calib, sample_weight_train = results + X_train, y_train = cast(ArrayLike, X_train), cast(ArrayLike, y_train) + sample_weight_train = cast(ArrayLike, sample_weight_train) X_train, y_train = indexable(X_train, y_train) - X_calib, y_calib = indexable(X_calib, y_calib) - y_train, y_calib = _check_y(y_train), _check_y(y_calib) - self.n_calib_samples = _num_samples(y_calib) - check_alpha_and_n_samples(self.alpha, self.n_calib_samples) + y_train = _check_y(y_train) + sample_weight_train, X_train, y_train = check_null_weight( sample_weight_train, X_train, @@ -660,9 +630,6 @@ def fit_estimators( ) self.single_estimator_ = self.estimators_[2] - X_calib = cast(ArrayLike, X_calib) - y_calib = cast(ArrayLike, y_calib) - return X_calib, y_calib def conformalize( @@ -674,8 +641,15 @@ def conformalize( groups: Optional[ArrayLike] = None, **kwargs: Any, ) -> MapieRegressor: + if self.cv == "prefit": + self._initialize_and_check_prefit_estimators() + + X_calib, y_calib = cast(ArrayLike, X), cast(ArrayLike, y) + X_calib, y_calib = indexable(X_calib, y_calib) + y_calib = _check_y(y_calib) - self.n_calib_samples = _num_samples(y) + self.n_calib_samples = _num_samples(y_calib) + check_alpha_and_n_samples(self.alpha, self.n_calib_samples) y_calib_preds = np.full( shape=(3, self.n_calib_samples), @@ -683,15 +657,15 @@ def conformalize( ) for i, est in enumerate(self.estimators_): - y_calib_preds[i] = est.predict(X, **kwargs).ravel() + y_calib_preds[i] = est.predict(X_calib, **kwargs).ravel() self.conformity_scores_ = np.full( shape=(3, self.n_calib_samples), fill_value=np.nan ) - self.conformity_scores_[0] = y_calib_preds[0] - y - self.conformity_scores_[1] = y - y_calib_preds[1] + self.conformity_scores_[0] = y_calib_preds[0] - y_calib + self.conformity_scores_[1] = y_calib - y_calib_preds[1] self.conformity_scores_[2] = np.max( [ self.conformity_scores_[0], diff --git a/mapie/tests/test_quantile_regression.py b/mapie/tests/test_quantile_regression.py index 0ca88651e..871d62ccd 100644 --- a/mapie/tests/test_quantile_regression.py +++ b/mapie/tests/test_quantile_regression.py @@ -470,11 +470,13 @@ def test_for_small_dataset() -> None: estimator=qt, alpha=0.1 ) + X_calib_toy_small = X_calib_toy[:2] + y_calib_toy_small = y_calib_toy[:2] mapie_reg.fit( - np.array([1, 2, 3]), - np.array([2, 2, 3]), - X_calib=np.array([3, 5]), - y_calib=np.array([2, 3]) + X_train_toy, + y_train_toy, + X_calib=X_calib_toy_small, + y_calib=y_calib_toy_small ) From b14242b945b7749a47f42c41822913ce5260baad Mon Sep 17 00:00:00 2001 From: jawadhussein462 Date: Wed, 18 Dec 2024 21:16:41 +0100 Subject: [PATCH 3/5] ENH: v1 implement CQR --- mapie/regression/quantile_regression.py | 85 +++++++++++-------- mapie_v1/_utils.py | 6 +- .../tests/test_regression.py | 24 ++++-- mapie_v1/regression.py | 71 ++++++++++++++-- 4 files changed, 133 insertions(+), 53 deletions(-) diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index 3d2e30c3e..c8e8c7534 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -522,7 +522,7 @@ def fit( if self.cv == "prefit": X_calib, y_calib = X, y else: - X_calib, y_calib = self._fit_estimators( + result = self._prepare_train_calib( X=X, y=y, sample_weight=sample_weight, @@ -533,7 +533,13 @@ def fit( random_state=random_state, shuffle=shuffle, stratify=stratify, - **fit_params, + ) + X_train, y_train, X_calib, y_calib, sample_weight = result + self._fit_estimators( + X_train=X_train, + y_train=y_train, + sample_weight_train=sample_weight, + **fit_params ) self.conformalize(X_calib, y_calib) @@ -551,7 +557,7 @@ def _initialize_and_check_prefit_estimators(self) -> None: self.estimators_ = list(estimator) self.single_estimator_ = self.estimators_[2] - def _fit_estimators( + def _prepare_train_calib( self, X: ArrayLike, y: ArrayLike, @@ -563,56 +569,60 @@ def _fit_estimators( random_state: Optional[Union[int, np.random.RandomState]] = None, shuffle: Optional[bool] = True, stratify: Optional[ArrayLike] = None, - **fit_params, - ) -> Tuple[ArrayLike, ArrayLike]: + ) -> Tuple[ArrayLike, ArrayLike, ArrayLike, ArrayLike, ArrayLike]: """ - This method: - - Creates train and calib sets - - Checks adn casts params, including the train set - - Fit the 3 estimators - - Returns the calib set + Handles the preparation of training and calibration datasets, + including validation and splitting. + Returns: X_train, y_train, X_calib, y_calib, sample_weight_train """ - self._check_parameters() - checked_estimator = self._check_estimator(self.estimator) random_state = check_random_state(random_state) X, y = indexable(X, y) if X_calib is None or y_calib is None: - ( - X_train, y_train, X_calib, y_calib, sample_weight_train - ) = self._train_calib_split( + return self._train_calib_split( X, y, sample_weight, calib_size, random_state, shuffle, - stratify, + stratify ) else: - X_train, y_train, sample_weight_train = X, y, sample_weight + return X, y, X_calib, y_calib, sample_weight - X_train, y_train = cast(ArrayLike, X_train), cast(ArrayLike, y_train) - sample_weight_train = cast(ArrayLike, sample_weight_train) - X_train, y_train = indexable(X_train, y_train) - y_train = _check_y(y_train) + # Second function: Handles estimator fitting + def _fit_estimators( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + **fit_params + ) -> None: + """ + Fits the estimators with provided training data + and stores them in self.estimators_. + """ + checked_estimator = self._check_estimator(self.estimator) + + X, y = indexable(X, y) + y = _check_y(y) - sample_weight_train, X_train, y_train = check_null_weight( - sample_weight_train, - X_train, - y_train + sample_weight, X, y = check_null_weight( + sample_weight, X, y ) - y_train = cast(NDArray, y_train) if isinstance(checked_estimator, Pipeline): estimator = checked_estimator[-1] else: estimator = checked_estimator + name_estimator = estimator.__class__.__name__ - alpha_name = self.quantile_estimator_params[ - name_estimator - ]["alpha_name"] + alpha_name = self.quantile_estimator_params[name_estimator][ + "alpha_name" + ] + for i, alpha_ in enumerate(self.alpha_np): cloned_estimator_ = clone(checked_estimator) params = {alpha_name: alpha_} @@ -620,17 +630,18 @@ def _fit_estimators( cloned_estimator_[-1].set_params(**params) else: cloned_estimator_.set_params(**params) - self.estimators_.append(fit_estimator( - cloned_estimator_, - X_train, - y_train, - sample_weight_train, - **fit_params, + + self.estimators_.append( + fit_estimator( + cloned_estimator_, + X, + y, + sample_weight, + **fit_params, ) ) - self.single_estimator_ = self.estimators_[2] - return X_calib, y_calib + self.single_estimator_ = self.estimators_[2] def conformalize( self, diff --git a/mapie_v1/_utils.py b/mapie_v1/_utils.py index 7b3ac2849..2bac773af 100644 --- a/mapie_v1/_utils.py +++ b/mapie_v1/_utils.py @@ -54,9 +54,11 @@ def check_if_X_y_different_from_fit( def make_intervals_single_if_single_alpha( intervals: NDArray, - alphas: List[float] + alphas: Union[float, List[float]] ) -> NDArray: - if len(alphas) == 1: + if isinstance(alphas, float): + return intervals[:, :, 0] + if isinstance(alphas, list) and len(alphas) == 1: return intervals[:, :, 0] return intervals diff --git a/mapie_v1/integration_tests/tests/test_regression.py b/mapie_v1/integration_tests/tests/test_regression.py index 966414024..7a0387d7d 100644 --- a/mapie_v1/integration_tests/tests/test_regression.py +++ b/mapie_v1/integration_tests/tests/test_regression.py @@ -10,6 +10,7 @@ from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import QuantileRegressor from sklearn.ensemble import GradientBoostingRegressor +from sklearn.model_selection import train_test_split from mapie.subsample import Subsample from mapie._typing import ArrayLike @@ -306,6 +307,13 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife): ) gbr_models.append(estimator_) +sample_weight_train = train_test_split( + X, + y, + sample_weight, + test_size=0.4, + random_state=RANDOM_STATE +)[-2] params_test_cases_quantile = [ { @@ -313,15 +321,15 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife): "alpha": 0.2, "cv": "split", "method": "quantile", - "calib_size": 0.3, + "calib_size": 0.4, "sample_weight": sample_weight, "random_state": RANDOM_STATE, }, "v1": { "confidence_level": 0.8, "prefit": False, - "test_size": 0.3, - "fit_params": {"sample_weight": sample_weight}, + "test_size": 0.4, + "fit_params": {"sample_weight": sample_weight_train}, "random_state": RANDOM_STATE, }, }, @@ -330,7 +338,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife): "estimator": gbr_models, "cv": "prefit", "method": "quantile", - "calib_size": 0.3, + "calib_size": 0.2, "sample_weight": sample_weight, "optimize_beta": True, "random_state": RANDOM_STATE, @@ -338,7 +346,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife): "v1": { "estimator": gbr_models, "prefit": True, - "test_size": 0.3, + "test_size": 0.2, "fit_params": {"sample_weight": sample_weight}, "minimize_interval_width": True, "random_state": RANDOM_STATE, @@ -418,12 +426,16 @@ def compare_model_predictions_and_intervals( v1_params: Dict = {}, prefit: bool = False, test_size: Optional[float] = None, + sample_weight: Optional[ArrayLike] = None, random_state: int = 42, ) -> None: if test_size is not None: X_train, X_conf, y_train, y_conf = train_test_split_shuffle( - X, y, test_size=test_size, random_state=random_state + X, + y, + test_size=test_size, + random_state=random_state, ) else: X_train, X_conf, y_train, y_conf = X, X, y, y diff --git a/mapie_v1/regression.py b/mapie_v1/regression.py index fefa2140b..870074fd7 100644 --- a/mapie_v1/regression.py +++ b/mapie_v1/regression.py @@ -3,14 +3,15 @@ from typing_extensions import Self import numpy as np -from sklearn.linear_model import LinearRegression, QuantileRegressor +from sklearn.linear_model import LinearRegression from sklearn.base import RegressorMixin, clone from sklearn.model_selection import BaseCrossValidator +from sklearn.pipeline import Pipeline from mapie.subsample import Subsample from mapie._typing import ArrayLike, NDArray from mapie.conformity_scores import BaseRegressionScore -from mapie.regression import MapieRegressor +from mapie.regression import MapieRegressor, MapieQuantileRegressor from mapie.utils import check_estimator_fit_predict from mapie_v1.conformity_scores._utils import ( check_and_select_regression_conformity_score, @@ -904,12 +905,29 @@ class ConformalizedQuantileRegressor: def __init__( self, - estimator: RegressorMixin = QuantileRegressor(), - confidence_level: Union[float, List[float]] = 0.9, - conformity_score: Union[str, BaseRegressionScore] = "absolute", + estimator: Optional[ + Union[ + RegressorMixin, + Pipeline, + List[Union[RegressorMixin, Pipeline]] + ] + ] = None, + confidence_level: float = 0.9, + prefit: bool = False, random_state: Optional[Union[int, np.random.RandomState]] = None, ) -> None: - pass + + self._alpha = 1 - confidence_level + + cv: str = "prefit" if prefit else "split" + self._mapie_quantile_regressor = MapieQuantileRegressor( + estimator=estimator, + method="quantile", + cv=cv, + alpha=self._alpha, + ) + + self._sample_weight: Optional[NDArray] = None def fit( self, @@ -937,6 +955,21 @@ def fit( Self The fitted ConformalizedQuantileRegressor instance. """ + + if fit_params: + fit_params_ = copy.deepcopy(fit_params) + self._sample_weight = fit_params_.pop("sample_weight", None) + else: + fit_params_ = {} + + self._mapie_quantile_regressor._initialize_fit_conformalize() + self._mapie_quantile_regressor._fit_estimators( + X=X_train, + y=y_train, + sample_weight=self._sample_weight, + **fit_params_, + ) + return self def conformalize( @@ -969,6 +1002,16 @@ def conformalize( The ConformalizedQuantileRegressor instance with calibrated prediction intervals. """ + + if not predict_params: + predict_params = {} + + self._mapie_quantile_regressor.conformalize( + X_conf, + y_conf, + **predict_params + ) + return self def predict_set( @@ -1007,7 +1050,17 @@ def predict_set( Prediction intervals with shape `(n_samples, 2)`, with lower and upper bounds for each sample. """ - return np.ndarray(0) + _, intervals = self._mapie_quantile_regressor.predict( + X, + optimize_beta=minimize_interval_width, + allow_infinite_bounds=allow_infinite_bounds, + symmetry=symmetric_intervals + ) + + return make_intervals_single_if_single_alpha( + intervals, + self._alpha + ) def predict( self, @@ -1026,7 +1079,9 @@ def predict( NDArray Array of point predictions with shape `(n_samples,)`. """ - return np.ndarray(0) + estimator = self._mapie_quantile_regressor + predictions, _ = estimator.predict(X) + return predictions class GibbsConformalRegressor: From 948d50bf9b7f929a2a60e4f84a823fd70dd57147 Mon Sep 17 00:00:00 2001 From: jawadhussein462 Date: Wed, 18 Dec 2024 21:25:08 +0100 Subject: [PATCH 4/5] fix mypy --- mapie/regression/quantile_regression.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index c8e8c7534..d87f60408 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -569,7 +569,9 @@ def _prepare_train_calib( random_state: Optional[Union[int, np.random.RandomState]] = None, shuffle: Optional[bool] = True, stratify: Optional[ArrayLike] = None, - ) -> Tuple[ArrayLike, ArrayLike, ArrayLike, ArrayLike, ArrayLike]: + ) -> Tuple[ + ArrayLike, ArrayLike, ArrayLike, ArrayLike, Optional[ArrayLike] + ]: """ Handles the preparation of training and calibration datasets, including validation and splitting. From 34d9cdcd8e4cb0683c75777d573727473445394d Mon Sep 17 00:00:00 2001 From: jawadhussein462 Date: Wed, 18 Dec 2024 21:47:24 +0100 Subject: [PATCH 5/5] fix _fit_estimators --- mapie/regression/quantile_regression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index d87f60408..327dea743 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -536,8 +536,8 @@ def fit( ) X_train, y_train, X_calib, y_calib, sample_weight = result self._fit_estimators( - X_train=X_train, - y_train=y_train, + X=X_train, + y=y_train, sample_weight_train=sample_weight, **fit_params )