From 0a92250aa18661e7e5d6b2f7c9c39925f5849ebe Mon Sep 17 00:00:00 2001 From: Stefano Zamboni Date: Thu, 24 Oct 2024 14:07:17 +0200 Subject: [PATCH 1/2] feat: added percentages dto to sdk --- .../apis/model_current_dataset.py | 52 +++++++ .../models/dataset_percentages.py | 23 +++ sdk/tests/apis/model_current_dataset_test.py | 132 ++++++++++++++++++ 3 files changed, 207 insertions(+) create mode 100644 sdk/radicalbit_platform_sdk/models/dataset_percentages.py diff --git a/sdk/radicalbit_platform_sdk/apis/model_current_dataset.py b/sdk/radicalbit_platform_sdk/apis/model_current_dataset.py index 88dcbc7f..a39fc0ed 100644 --- a/sdk/radicalbit_platform_sdk/apis/model_current_dataset.py +++ b/sdk/radicalbit_platform_sdk/apis/model_current_dataset.py @@ -20,6 +20,7 @@ ModelType, RegressionDataQuality, ) +from radicalbit_platform_sdk.models.dataset_percentages import Percentages class ModelCurrentDataset: @@ -42,6 +43,7 @@ def __init__( self.__model_metrics = None self.__data_metrics = None self.__drift = None + self.__percentages = None def uuid(self) -> UUID: return self.__uuid @@ -108,6 +110,56 @@ def __callback( return self.__statistics + def percentages(self) -> Optional[Percentages]: + """Get percentages about the actual dataset + + :return: The `Percentages` if exists + """ + + def __callback( + response: requests.Response, + ) -> tuple[JobStatus, Optional[Percentages]]: + try: + response_json = response.json() + job_status = JobStatus(response_json['jobStatus']) + if 'percentages' in response_json: + return ( + job_status, + Percentages.model_validate(response_json['percentages']), + ) + except KeyError as e: + raise ClientError(f'Unable to parse response: {response.text}') from e + except ValidationError as e: + raise ClientError(f'Unable to parse response: {response.text}') from e + else: + return job_status, None + + match self.__status: + case JobStatus.ERROR: + self.__percentages = None + case JobStatus.MISSING_CURRENT: + self.__percentages = None + case JobStatus.SUCCEEDED: + if self.__percentages is None: + _, percentages = invoke( + method='GET', + url=f'{self.__base_url}/api/models/{str(self.__model_uuid)}/current/{str(self.__uuid)}/percentages', + valid_response_code=200, + func=__callback, + ) + self.__percentages = percentages + case JobStatus.IMPORTING: + status, percentages = invoke( + method='GET', + url=f'{self.__base_url}/api/models/{str(self.__model_uuid)}/current/{str(self.__uuid)}/percentages', + valid_response_code=200, + func=__callback, + ) + self.__status = status + self.__percentages = percentages + + return self.__percentages + def drift(self) -> Optional[Drift]: """Get drift about the actual dataset diff --git a/sdk/radicalbit_platform_sdk/models/dataset_percentages.py b/sdk/radicalbit_platform_sdk/models/dataset_percentages.py new file mode 100644 index 00000000..225553aa --- /dev/null +++ b/sdk/radicalbit_platform_sdk/models/dataset_percentages.py @@ -0,0 +1,23 @@ +from typing import List, Optional + +from pydantic import BaseModel, ConfigDict +from pydantic.alias_generators import to_camel + + +class DetailPercentage(BaseModel): + feature_name: str + score: float + + +class MetricPercentage(BaseModel): + value: float + details: List[Optional[DetailPercentage]] = None + + +class Percentages(BaseModel): + data_quality: MetricPercentage + model_quality: MetricPercentage + drift: MetricPercentage + + model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel) + diff --git a/sdk/tests/apis/model_current_dataset_test.py b/sdk/tests/apis/model_current_dataset_test.py index 4ef64a3f..47901291 100644 --- a/sdk/tests/apis/model_current_dataset_test.py +++ b/sdk/tests/apis/model_current_dataset_test.py @@ -19,6 +19,7 @@ ModelType, RegressionDataQuality, ) +from radicalbit_platform_sdk.models.dataset_percentages import Percentages class ModelCurrentDatasetTest(unittest.TestCase): @@ -139,6 +140,137 @@ def test_statistics_key_error(self): with pytest.raises(ClientError): model_current_dataset.statistics() + @responses.activate + def test_percentages_ok(self): + base_url = 'http://api:9000' + model_id = uuid.uuid4() + import_uuid = uuid.uuid4() + model_current_dataset = ModelCurrentDataset( + base_url, + model_id, + ModelType.BINARY, + CurrentFileUpload( + uuid=import_uuid, + path='s3://bucket/file.csv', + date='2014', + correlation_id_column='column', + status=JobStatus.IMPORTING, + ), + ) + + responses.add( + method=responses.GET, + url=f'{base_url}/api/models/{str(model_id)}/current/{str(import_uuid)}/percentages', + status=200, + body="""{ + "jobStatus": "SUCCEEDED", + "percentages": { + "data_quality": { + "value": 0.9, + "details": [ + { + "feature_name": "num1", + "score": 0.4 + }, + { + "feature_name": "num2", + "score": 0.0 + }, + { + "feature_name": "cat1", + "score": 0.0 + }, + { + "feature_name": "cat2", + "score": 0.0 + } + ] + }, + "model_quality": { + "value": -1, + "details": [] + }, + "drift": { + "value": 0.75, + "details": [ + { + "feature_name": "num1", + "score": 1.0 + } + ] + } + } + }""", + ) + + percentages = model_current_dataset.percentages() + + assert isinstance(percentages, Percentages) + + assert percentages.data_quality.value == 0.9 + assert len(percentages.data_quality.details) == 4 + assert percentages.model_quality.value == -1 + assert len(percentages.model_quality.details) == 0 + assert percentages.drift.value == 0.75 + assert len(percentages.drift.details) == 1 + assert model_current_dataset.status() == JobStatus.SUCCEEDED + + @responses.activate + def test_percentages_validation_error(self): + base_url = 'http://api:9000' + model_id = uuid.uuid4() + import_uuid = uuid.uuid4() + model_current_dataset = ModelCurrentDataset( + base_url, + model_id, + ModelType.BINARY, + CurrentFileUpload( + uuid=import_uuid, + path='s3://bucket/file.csv', + date='2014', + correlation_id_column='column', + status=JobStatus.IMPORTING, + ), + ) + + responses.add( + method=responses.GET, + url=f'{base_url}/api/models/{str(model_id)}/current/{str(import_uuid)}/percentages', + status=200, + body='{"statistics": "wrong"}', + ) + + with pytest.raises(ClientError): + model_current_dataset.percentages() + + @responses.activate + def test_percentages_key_error(self): + base_url = 'http://api:9000' + model_id = uuid.uuid4() + import_uuid = uuid.uuid4() + model_current_dataset = ModelCurrentDataset( + base_url, + model_id, + ModelType.BINARY, + CurrentFileUpload( + uuid=import_uuid, + path='s3://bucket/file.csv', + date='2014', + correlation_id_column='column', + status=JobStatus.IMPORTING, + ), + ) + + responses.add( + method=responses.GET, + url=f'{base_url}/api/models/{str(model_id)}/current/{str(import_uuid)}/percentages', + status=200, + body='{"wrong": "json"}', + ) + + with pytest.raises(ClientError): + model_current_dataset.percentages() + @responses.activate def test_drift_ok(self): base_url = 'http://api:9000' From 3db228e787ba8f32903edebf99b6e106f4ad79d5 Mon Sep 17 00:00:00 2001 From: Stefano Zamboni Date: Mon, 28 Oct 2024 09:47:46 +0100 Subject: [PATCH 2/2] feat: added percentages to api for sdk --- api/app/routes/metrics_route.py | 23 ++++++++ api/app/services/metrics_service.py | 52 +++++++++++++++++++ .../models/dataset_percentages.py | 1 - 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/api/app/routes/metrics_route.py b/api/app/routes/metrics_route.py index 3b8219ef..869d8302 100644 --- a/api/app/routes/metrics_route.py +++ b/api/app/routes/metrics_route.py @@ -7,6 +7,7 @@ from app.models.metrics.data_quality_dto import DataQualityDTO from app.models.metrics.drift_dto import DriftDTO from app.models.metrics.model_quality_dto import ModelQualityDTO +from app.models.metrics.percentages_dto import PercentagesDTO from app.models.metrics.statistics_dto import StatisticsDTO from app.services.metrics_service import MetricsService @@ -128,4 +129,26 @@ def get_current_data_quality_by_model_by_uuid( model_uuid, current_uuid ) + @router.get( + '/{model_uuid}/current/latest/percentages', + status_code=200, + response_model=PercentagesDTO, + ) + def get_latest_current_percentages_by_model_by_uuid(model_uuid: UUID): + return metrics_service.get_current_percentages_by_model_by_uuid( + model_uuid, None + ) + + @router.get( + '/{model_uuid}/current/{current_uuid}/percentages', + status_code=200, + response_model=PercentagesDTO, + ) + def get_current_percentages_by_model_by_uuid( + model_uuid: UUID, current_uuid: UUID + ): + return metrics_service.get_current_percentages_by_model_by_uuid( + model_uuid, current_uuid + ) + return router diff --git a/api/app/services/metrics_service.py b/api/app/services/metrics_service.py index 8a8ea6a7..d0c585ba 100644 --- a/api/app/services/metrics_service.py +++ b/api/app/services/metrics_service.py @@ -16,6 +16,7 @@ from app.models.metrics.data_quality_dto import DataQualityDTO from app.models.metrics.drift_dto import DriftDTO from app.models.metrics.model_quality_dto import ModelQualityDTO +from app.models.metrics.percentages_dto import PercentagesDTO from app.models.metrics.statistics_dto import StatisticsDTO from app.models.model_dto import ModelType from app.services.model_service import ModelService @@ -104,6 +105,18 @@ def get_current_data_quality_by_model_by_uuid( missing_status=JobStatus.MISSING_CURRENT, ) + def get_current_percentages_by_model_by_uuid( + self, model_uuid: UUID, current_uuid: Optional[UUID] + ) -> PercentagesDTO: + """Retrieve current data quality for a model by its UUID and an optional current dataset UUID.""" + return self._get_percentages_by_model_uuid( + model_uuid=model_uuid, + dataset_and_metrics_getter=lambda uuid: self.check_and_get_current_dataset_and_metrics( + uuid, current_uuid + ), + missing_status=JobStatus.MISSING_CURRENT, + ) + def get_current_drift( self, model_uuid: UUID, current_uuid: Optional[UUID] ) -> DriftDTO: @@ -222,6 +235,22 @@ def _get_data_quality_by_model_uuid( missing_status=missing_status, ) + def _get_percentages_by_model_uuid( + self, + model_uuid: UUID, + dataset_and_metrics_getter, + missing_status, + ) -> PercentagesDTO: + """Retrieve data quality for a model by its UUID.""" + model = self.model_service.get_model_by_uuid(model_uuid) + dataset, metrics = dataset_and_metrics_getter(model_uuid) + return self._create_percentages_dto( + model_type=model.model_type, + dataset=dataset, + metrics=metrics, + missing_status=missing_status, + ) + def _get_drift_by_model_uuid( self, model_uuid: UUID, @@ -317,6 +346,29 @@ def _create_data_quality_dto( data_quality_data=metrics.data_quality, ) + @staticmethod + def _create_percentages_dto( + model_type: ModelType, + dataset: Optional[ReferenceDataset | CurrentDataset], + metrics: Optional[ReferenceDatasetMetrics | CurrentDatasetMetrics], + missing_status, + ) -> PercentagesDTO: + """Create a PercentagesDTO from the provided dataset and metrics.""" + if not dataset: + return PercentagesDTO.from_dict( + job_status=missing_status, + percentages_data=None, + ) + if not metrics: + return PercentagesDTO.from_dict( + job_status=dataset.status, + percentages_data=None, + ) + return PercentagesDTO.from_dict( + job_status=dataset.status, + percentages_data=metrics.percentages, + ) + @staticmethod def _create_drift_dto( dataset: Optional[ReferenceDataset | CurrentDataset], diff --git a/sdk/radicalbit_platform_sdk/models/dataset_percentages.py b/sdk/radicalbit_platform_sdk/models/dataset_percentages.py index 225553aa..fa4ecdbd 100644 --- a/sdk/radicalbit_platform_sdk/models/dataset_percentages.py +++ b/sdk/radicalbit_platform_sdk/models/dataset_percentages.py @@ -20,4 +20,3 @@ class Percentages(BaseModel): drift: MetricPercentage model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel) -