pydantic 2 - migration (#279)

experimental-design · Jan 15, 2024 · 59bbd57 · 59bbd57
1 parent 33a2053
commit 59bbd57
Show file tree

Hide file tree

Showing 71 changed files with 672 additions and 640 deletions.
diff --git a/bofire/benchmarks/multi.py b/bofire/benchmarks/multi.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pandas as pd
 import torch
-from pydantic import validator
+from pydantic import field_validator
 from pydantic.types import PositiveInt
 from scipy.integrate import solve_ivp
 from scipy.special import gamma
@@ -65,7 +65,8 @@ def __init__(self, dim: PositiveInt, num_objectives: PositiveInt = 2, **kwargs):
         }
         self._domain = domain
 
-    @validator("dim")
+    @field_validator("dim")
+    @classmethod
     def validate_dim(cls, dim, values):
         num_objectives = values["num_objectives"]
         if dim <= values["num_objectives"]:

diff --git a/bofire/data_models/base.py b/bofire/data_models/base.py
@@ -1,16 +1,17 @@
 import pandas as pd
 from pydantic import BaseModel as PydanticBaseModel
-from pydantic import Extra
+from pydantic import ConfigDict
 
 
 class BaseModel(PydanticBaseModel):
-    class Config:
-        validate_assignment = True
-        arbitrary_types_allowed = False
-        copy_on_model_validation = "none"
-        extra = Extra.forbid
-
-        json_encoders = {
+    # json_encoders is deprecated.
+    # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-config for more information.
+    model_config = ConfigDict(
+        validate_assignment=True,
+        arbitrary_types_allowed=False,
+        extra="forbid",
+        json_encoders={
             pd.DataFrame: lambda x: x.to_dict(orient="list"),
             pd.Series: lambda x: x.to_list(),
-        }
+        },
+    )
diff --git a/bofire/data_models/constraints/constraint.py b/bofire/data_models/constraints/constraint.py
@@ -72,5 +72,5 @@ class ConstraintNotFulfilledError(ConstraintError):
     pass
 
 
-FeatureKeys = Annotated[List[str], Field(min_items=2)]
-Coefficients = Annotated[List[float], Field(min_items=2)]
+FeatureKeys = Annotated[List[str], Field(min_length=2)]
+Coefficients = Annotated[List[float], Field(min_length=2)]
diff --git a/bofire/data_models/constraints/interpoint.py b/bofire/data_models/constraints/interpoint.py
@@ -28,7 +28,7 @@ class InterpointEqualityConstraint(InterpointConstraint):
 
     type: Literal["InterpointEqualityConstraint"] = "InterpointEqualityConstraint"
     feature: str
-    multiplicity: Optional[Annotated[int, Field(ge=2)]]
+    multiplicity: Optional[Annotated[int, Field(ge=2)]] = None
 
     def is_fulfilled(
         self, experiments: pd.DataFrame, tol: Optional[float] = 1e-6

diff --git a/bofire/data_models/constraints/linear.py b/bofire/data_models/constraints/linear.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pandas as pd
-from pydantic import root_validator, validator
+from pydantic import field_validator, model_validator
 
 from bofire.data_models.constraints.constraint import (
     Coefficients,
@@ -26,21 +26,22 @@ class LinearConstraint(IntrapointConstraint):
     coefficients: Coefficients
     rhs: float
 
-    @validator("features")
+    @field_validator("features")
+    @classmethod
     def validate_features_unique(cls, features):
         """Validate that feature keys are unique."""
         if len(features) != len(set(features)):
             raise ValueError("features must be unique")
         return features
 
-    @root_validator(pre=False, skip_on_failure=True)
-    def validate_list_lengths(cls, values):
+    @model_validator(mode="after")
+    def validate_list_lengths(self):
         """Validate that length of the feature and coefficient lists have the same length."""
-        if len(values["features"]) != len(values["coefficients"]):
+        if len(self.features) != len(self.coefficients):
             raise ValueError(
-                f'must provide same number of features and coefficients, got {len(values["features"])} != {len(values["coefficients"])}'
+                f"must provide same number of features and coefficients, got {len(self.features)} != {len(self.coefficients)}"
             )
-        return values
+        return self
 
     def __call__(self, experiments: pd.DataFrame) -> pd.Series:
         return (

diff --git a/bofire/data_models/constraints/nchoosek.py b/bofire/data_models/constraints/nchoosek.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pandas as pd
-from pydantic import root_validator, validator
+from pydantic import field_validator, model_validator
 
 from bofire.data_models.constraints.constraint import FeatureKeys, IntrapointConstraint
 
@@ -28,28 +28,26 @@ class NChooseKConstraint(IntrapointConstraint):
     max_count: int
     none_also_valid: bool
 
-    @validator("features")
+    @field_validator("features")
+    @classmethod
     def validate_features_unique(cls, features: List[str]):
         """Validates that provided feature keys are unique."""
         if len(features) != len(set(features)):
             raise ValueError("features must be unique")
         return features
 
-    @root_validator(pre=False, skip_on_failure=True)
-    def validate_counts(cls, values):
+    @model_validator(mode="after")
+    def validate_counts(self):
         """Validates if the minimum and maximum of allowed features are smaller than the overall number of features."""
-        features = values["features"]
-        min_count = values["min_count"]
-        max_count = values["max_count"]
 
-        if min_count > len(features):
+        if self.min_count > len(self.features):
             raise ValueError("min_count must be <= # of features")
-        if max_count > len(features):
+        if self.max_count > len(self.features):
             raise ValueError("max_count must be <= # of features")
-        if min_count > max_count:
+        if self.min_count > self.max_count:
             raise ValueError("min_values must be <= max_values")
 
-        return values
+        return self
 
     def __call__(self, experiments: pd.DataFrame) -> pd.Series:
         """Smooth relaxation of NChooseK constraint by countig the number of zeros in a candidate by a sum of
@@ -75,10 +73,16 @@ def relu(x):
         min_count_violation = np.zeros(experiments_tensor.shape[0])
 
         if self.max_count != len(self.features):
-            max_count_violation = relu(-1 * narrow_gaussian(x=experiments_tensor[..., indices]).sum(axis=-1) + (len(self.features) - self.max_count))  # type: ignore
+            max_count_violation = relu(
+                -1 * narrow_gaussian(x=experiments_tensor[..., indices]).sum(axis=-1)
+                + (len(self.features) - self.max_count)
+            )
 
         if self.min_count > 0:
-            min_count_violation = relu(narrow_gaussian(x=experiments_tensor[..., indices]).sum(axis=-1) - (len(self.features) - self.min_count))  # type: ignore
+            min_count_violation = relu(
+                narrow_gaussian(x=experiments_tensor[..., indices]).sum(axis=-1)
+                - (len(self.features) - self.min_count)
+            )
 
         return pd.Series(max_count_violation + min_count_violation)
 

diff --git a/bofire/data_models/constraints/nonlinear.py b/bofire/data_models/constraints/nonlinear.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pandas as pd
-from pydantic import validator
+from pydantic import Field, field_validator
 
 from bofire.data_models.constraints.constraint import FeatureKeys, IntrapointConstraint
 
@@ -19,10 +19,11 @@ class NonlinearConstraint(IntrapointConstraint):
 
     expression: str
     features: Optional[FeatureKeys] = None
-    jacobian_expression: Optional[str] = None
+    jacobian_expression: Optional[str] = Field(default=None, validate_default=True)
 
-    @validator("jacobian_expression", always=True)
-    def set_jacobian_expression(cls, jacobian_expression, values):
+    @field_validator("jacobian_expression")
+    @classmethod
+    def set_jacobian_expression(cls, jacobian_expression, info):
         try:
             import sympy  # type: ignore
         except ImportError as e:
@@ -32,16 +33,16 @@ def set_jacobian_expression(cls, jacobian_expression, values):
 
         if (
             jacobian_expression is None
-            and "features" in values
-            and "expression" in values
+            and "features" in info.data.keys()
+            and "expression" in info.data.keys()
         ):
-            if values["features"] is not None:
+            if info.data["features"] is not None:
                 return (
                     "["
                     + ", ".join(
                         [
-                            str(sympy.S(values["expression"]).diff(key))
-                            for key in values["features"]
+                            str(sympy.S(info.data["expression"]).diff(key))
+                            for key in info.data["features"]
                         ]
                     )
                     + "]"

diff --git a/bofire/data_models/domain/domain.py b/bofire/data_models/domain/domain.py
@@ -17,7 +17,7 @@
 
 import numpy as np
 import pandas as pd
-from pydantic import Field, validator
+from pydantic import Field, field_validator, model_validator
 
 from bofire.data_models.base import BaseModel
 from bofire.data_models.constraints.api import (
@@ -57,7 +57,6 @@ class Domain(BaseModel):
 
     inputs: Inputs = Field(default_factory=lambda: Inputs())
     outputs: Outputs = Field(default_factory=lambda: Outputs())
-
     constraints: Constraints = Field(default_factory=lambda: Constraints())
 
     """Representation of the optimization problem/domain
@@ -84,8 +83,9 @@ def from_lists(
             constraints=Constraints(constraints=constraints),
         )
 
-    @validator("inputs", always=True, pre=True)
-    def validate_inputs_list(cls, v, values):
+    @field_validator("inputs", mode="before")
+    @classmethod
+    def validate_inputs_list(cls, v):
         if isinstance(v, collections.abc.Sequence):
             v = Inputs(features=v)
             return v
@@ -94,26 +94,28 @@ def validate_inputs_list(cls, v, values):
         else:
             return v
 
-    @validator("outputs", always=True, pre=True)
-    def validate_outputs_list(cls, v, values):
+    @field_validator("outputs", mode="before")
+    @classmethod
+    def validate_outputs_list(cls, v):
         if isinstance(v, collections.abc.Sequence):
             return Outputs(features=v)
         if isinstance_or_union(v, AnyOutput):
             return Outputs(features=[v])
         else:
             return v
 
-    @validator("constraints", always=True, pre=True)
-    def validate_constraints_list(cls, v, values):
+    @field_validator("constraints", mode="before")
+    @classmethod
+    def validate_constraints_list(cls, v):
         if isinstance(v, list):
             return Constraints(constraints=v)
         if isinstance_or_union(v, AnyConstraint):
             return Constraints(constraints=[v])
         else:
             return v
 
-    @validator("outputs", always=True)
-    def validate_unique_feature_keys(cls, v: Outputs, values) -> Outputs:
+    @model_validator(mode="after")
+    def validate_unique_feature_keys(self):
         """Validates if provided input and output feature keys are unique
 
         Args:
@@ -126,16 +128,14 @@ def validate_unique_feature_keys(cls, v: Outputs, values) -> Outputs:
         Returns:
             Outputs: Keeps output features as given.
         """
-        if "inputs" not in values:
-            return v
-        features = v + values["inputs"]
-        keys = [f.key for f in features]
+
+        keys = self.outputs.get_keys() + self.inputs.get_keys()
         if len(set(keys)) != len(keys):
-            raise ValueError("feature keys are not unique")
-        return v
+            raise ValueError("Feature keys are not unique")
+        return self
 
-    @validator("constraints", always=True)
-    def validate_constraints(cls, v, values):
+    @model_validator(mode="after")
+    def validate_constraints(self):
         """Validate if all features included in the constraints are also defined as features for the domain.
 
         Args:
@@ -148,18 +148,17 @@ def validate_constraints(cls, v, values):
         Returns:
             List[Constraint]: List of constraints defined for the domain
         """
-        if "inputs" not in values:
-            return v
-        keys = [f.key for f in values["inputs"]]
-        for c in v:
+
+        keys = self.inputs.get_keys()
+        for c in self.constraints:
             if isinstance(c, LinearConstraint) or isinstance(c, NChooseKConstraint):
                 for f in c.features:
                     if f not in keys:
                         raise ValueError(f"feature {f} in constraint unknown ({keys})")
-        return v
+        return self
 
-    @validator("constraints", always=True)
-    def validate_linear_constraints(cls, v, values):
+    @model_validator(mode="after")
+    def validate_linear_constraints_and_nchoosek(self):
         """Validate if all features included in linear constraints are continuous ones.
 
         Args:
@@ -173,21 +172,13 @@ def validate_linear_constraints(cls, v, values):
         Returns:
            List[Constraint]: List of constraints defined for the domain
         """
-        if "inputs" not in values:
-            return v
-
-        # gather continuous inputs in dictionary
-        continuous_inputs_dict = {}
-        for f in values["inputs"]:
-            if isinstance(f, ContinuousInput):
-                continuous_inputs_dict[f.key] = f
+        keys = self.inputs.get_keys(ContinuousInput)
 
         # check if non continuous input features appear in linear constraints
-        for c in v:
-            if isinstance(c, LinearConstraint):
-                for f in c.features:
-                    assert f in continuous_inputs_dict, f"{f} must be continuous."
-        return v
+        for c in self.constraints.get(includes=[LinearConstraint, NChooseKConstraint]):
+            for f in c.features:  # type: ignore
+                assert f in keys, f"{f} must be continuous."
+        return self
 
     def get_feature_reps_df(self) -> pd.DataFrame:
         """Returns a pandas dataframe describing the features contained in the optimization domain."""
@@ -617,11 +608,6 @@ def candidate_column_names(self):
             ]
         )
 
-    def _set_constraints_unvalidated(
-        self, constraints: Union[Sequence[AnyConstraint], Constraints]
-    ):
-        """Hack for reduce_domain"""
-        self.constraints = Constraints(constraints=[])
-        if isinstance(constraints, Constraints):
-            constraints = constraints.constraints
-        self.constraints.constraints = constraints
+
+if __name__ == "__main__":
+    pass