Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added percentages dto to sdk #183

Merged
merged 2 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions api/app/routes/metrics_route.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from app.models.metrics.data_quality_dto import DataQualityDTO
from app.models.metrics.drift_dto import DriftDTO
from app.models.metrics.model_quality_dto import ModelQualityDTO
from app.models.metrics.percentages_dto import PercentagesDTO
from app.models.metrics.statistics_dto import StatisticsDTO
from app.services.metrics_service import MetricsService

Expand Down Expand Up @@ -128,4 +129,26 @@ def get_current_data_quality_by_model_by_uuid(
model_uuid, current_uuid
)

@router.get(
'/{model_uuid}/current/latest/percentages',
status_code=200,
response_model=PercentagesDTO,
)
def get_latest_current_percentages_by_model_by_uuid(model_uuid: UUID):
return metrics_service.get_current_percentages_by_model_by_uuid(
model_uuid, None
)

@router.get(
'/{model_uuid}/current/{current_uuid}/percentages',
status_code=200,
response_model=PercentagesDTO,
)
def get_current_percentages_by_model_by_uuid(
model_uuid: UUID, current_uuid: UUID
):
return metrics_service.get_current_percentages_by_model_by_uuid(
model_uuid, current_uuid
)

return router
52 changes: 52 additions & 0 deletions api/app/services/metrics_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from app.models.metrics.data_quality_dto import DataQualityDTO
from app.models.metrics.drift_dto import DriftDTO
from app.models.metrics.model_quality_dto import ModelQualityDTO
from app.models.metrics.percentages_dto import PercentagesDTO
from app.models.metrics.statistics_dto import StatisticsDTO
from app.models.model_dto import ModelType
from app.services.model_service import ModelService
Expand Down Expand Up @@ -104,6 +105,18 @@ def get_current_data_quality_by_model_by_uuid(
missing_status=JobStatus.MISSING_CURRENT,
)

def get_current_percentages_by_model_by_uuid(
self, model_uuid: UUID, current_uuid: Optional[UUID]
) -> PercentagesDTO:
"""Retrieve current data quality for a model by its UUID and an optional current dataset UUID."""
return self._get_percentages_by_model_uuid(
model_uuid=model_uuid,
dataset_and_metrics_getter=lambda uuid: self.check_and_get_current_dataset_and_metrics(
uuid, current_uuid
),
missing_status=JobStatus.MISSING_CURRENT,
)

def get_current_drift(
self, model_uuid: UUID, current_uuid: Optional[UUID]
) -> DriftDTO:
Expand Down Expand Up @@ -222,6 +235,22 @@ def _get_data_quality_by_model_uuid(
missing_status=missing_status,
)

def _get_percentages_by_model_uuid(
self,
model_uuid: UUID,
dataset_and_metrics_getter,
missing_status,
) -> PercentagesDTO:
"""Retrieve data quality for a model by its UUID."""
model = self.model_service.get_model_by_uuid(model_uuid)
dataset, metrics = dataset_and_metrics_getter(model_uuid)
return self._create_percentages_dto(
model_type=model.model_type,
dataset=dataset,
metrics=metrics,
missing_status=missing_status,
)

def _get_drift_by_model_uuid(
self,
model_uuid: UUID,
Expand Down Expand Up @@ -317,6 +346,29 @@ def _create_data_quality_dto(
data_quality_data=metrics.data_quality,
)

@staticmethod
def _create_percentages_dto(
model_type: ModelType,
dataset: Optional[ReferenceDataset | CurrentDataset],
metrics: Optional[ReferenceDatasetMetrics | CurrentDatasetMetrics],
missing_status,
) -> PercentagesDTO:
"""Create a PercentagesDTO from the provided dataset and metrics."""
if not dataset:
return PercentagesDTO.from_dict(
job_status=missing_status,
percentages_data=None,
)
if not metrics:
return PercentagesDTO.from_dict(
job_status=dataset.status,
percentages_data=None,
)
return PercentagesDTO.from_dict(
job_status=dataset.status,
percentages_data=metrics.percentages,
)

@staticmethod
def _create_drift_dto(
dataset: Optional[ReferenceDataset | CurrentDataset],
Expand Down
52 changes: 52 additions & 0 deletions sdk/radicalbit_platform_sdk/apis/model_current_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
ModelType,
RegressionDataQuality,
)
from radicalbit_platform_sdk.models.dataset_percentages import Percentages


class ModelCurrentDataset:
Expand All @@ -42,6 +43,7 @@ def __init__(
self.__model_metrics = None
self.__data_metrics = None
self.__drift = None
self.__percentages = None

def uuid(self) -> UUID:
return self.__uuid
Expand Down Expand Up @@ -108,6 +110,56 @@ def __callback(

return self.__statistics

def percentages(self) -> Optional[Percentages]:
"""Get percentages about the actual dataset

:return: The `Percentages` if exists
"""

def __callback(
response: requests.Response,
) -> tuple[JobStatus, Optional[Percentages]]:
try:
response_json = response.json()
job_status = JobStatus(response_json['jobStatus'])
if 'percentages' in response_json:
return (
job_status,
Percentages.model_validate(response_json['percentages']),
)
except KeyError as e:
raise ClientError(f'Unable to parse response: {response.text}') from e
except ValidationError as e:
raise ClientError(f'Unable to parse response: {response.text}') from e
else:
return job_status, None

match self.__status:
case JobStatus.ERROR:
self.__percentages = None
case JobStatus.MISSING_CURRENT:
self.__percentages = None
case JobStatus.SUCCEEDED:
if self.__percentages is None:
_, percentages = invoke(
method='GET',
url=f'{self.__base_url}/api/models/{str(self.__model_uuid)}/current/{str(self.__uuid)}/percentages',
valid_response_code=200,
func=__callback,
)
self.__percentages = percentages
case JobStatus.IMPORTING:
status, percentages = invoke(
method='GET',
url=f'{self.__base_url}/api/models/{str(self.__model_uuid)}/current/{str(self.__uuid)}/percentages',
valid_response_code=200,
func=__callback,
)
self.__status = status
self.__percentages = percentages

return self.__percentages

def drift(self) -> Optional[Drift]:
"""Get drift about the actual dataset

Expand Down
22 changes: 22 additions & 0 deletions sdk/radicalbit_platform_sdk/models/dataset_percentages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import List, Optional

from pydantic import BaseModel, ConfigDict
from pydantic.alias_generators import to_camel


class DetailPercentage(BaseModel):
feature_name: str
score: float


class MetricPercentage(BaseModel):
value: float
details: List[Optional[DetailPercentage]] = None


class Percentages(BaseModel):
data_quality: MetricPercentage
model_quality: MetricPercentage
drift: MetricPercentage

model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
132 changes: 132 additions & 0 deletions sdk/tests/apis/model_current_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
ModelType,
RegressionDataQuality,
)
from radicalbit_platform_sdk.models.dataset_percentages import Percentages


class ModelCurrentDatasetTest(unittest.TestCase):
Expand Down Expand Up @@ -139,6 +140,137 @@ def test_statistics_key_error(self):
with pytest.raises(ClientError):
model_current_dataset.statistics()

@responses.activate
def test_percentages_ok(self):
base_url = 'http://api:9000'
model_id = uuid.uuid4()
import_uuid = uuid.uuid4()
model_current_dataset = ModelCurrentDataset(
base_url,
model_id,
ModelType.BINARY,
CurrentFileUpload(
uuid=import_uuid,
path='s3://bucket/file.csv',
date='2014',
correlation_id_column='column',
status=JobStatus.IMPORTING,
),
)

responses.add(
method=responses.GET,
url=f'{base_url}/api/models/{str(model_id)}/current/{str(import_uuid)}/percentages',
status=200,
body="""{
"jobStatus": "SUCCEEDED",
"percentages": {
"data_quality": {
"value": 0.9,
"details": [
{
"feature_name": "num1",
"score": 0.4
},
{
"feature_name": "num2",
"score": 0.0
},
{
"feature_name": "cat1",
"score": 0.0
},
{
"feature_name": "cat2",
"score": 0.0
}
]
},
"model_quality": {
"value": -1,
"details": []
},
"drift": {
"value": 0.75,
"details": [
{
"feature_name": "num1",
"score": 1.0
}
]
}
}
}""",
)

percentages = model_current_dataset.percentages()

assert isinstance(percentages, Percentages)

assert percentages.data_quality.value == 0.9
assert len(percentages.data_quality.details) == 4
assert percentages.model_quality.value == -1
assert len(percentages.model_quality.details) == 0
assert percentages.drift.value == 0.75
assert len(percentages.drift.details) == 1
assert model_current_dataset.status() == JobStatus.SUCCEEDED

@responses.activate
def test_percentages_validation_error(self):
base_url = 'http://api:9000'
model_id = uuid.uuid4()
import_uuid = uuid.uuid4()
model_current_dataset = ModelCurrentDataset(
base_url,
model_id,
ModelType.BINARY,
CurrentFileUpload(
uuid=import_uuid,
path='s3://bucket/file.csv',
date='2014',
correlation_id_column='column',
status=JobStatus.IMPORTING,
),
)

responses.add(
method=responses.GET,
url=f'{base_url}/api/models/{str(model_id)}/current/{str(import_uuid)}/percentages',
status=200,
body='{"statistics": "wrong"}',
)

with pytest.raises(ClientError):
model_current_dataset.percentages()

@responses.activate
def test_percentages_key_error(self):
base_url = 'http://api:9000'
model_id = uuid.uuid4()
import_uuid = uuid.uuid4()
model_current_dataset = ModelCurrentDataset(
base_url,
model_id,
ModelType.BINARY,
CurrentFileUpload(
uuid=import_uuid,
path='s3://bucket/file.csv',
date='2014',
correlation_id_column='column',
status=JobStatus.IMPORTING,
),
)

responses.add(
method=responses.GET,
url=f'{base_url}/api/models/{str(model_id)}/current/{str(import_uuid)}/percentages',
status=200,
body='{"wrong": "json"}',
)

with pytest.raises(ClientError):
model_current_dataset.percentages()

@responses.activate
def test_drift_ok(self):
base_url = 'http://api:9000'
Expand Down