Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add percentages dto and endpoint #177

Merged
merged 6 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions api/alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ def run_migrations_offline() -> None:
include_name=include_name
)

# Here we need to enforce public if schema target_metadata.schema is None, which is default schema (public for postgres) for alembic
target_schema = 'public' if target_metadata.schema is None else target_metadata.schema

with context.begin_transaction():
context.execute(f'create schema if not exists "{target_metadata.schema}";')
context.execute(f'set search_path to "{target_metadata.schema}"')
context.execute(f'create schema if not exists "{target_schema}";')
context.execute(f'set search_path to "{target_schema}"')
context.run_migrations()


Expand All @@ -99,9 +102,12 @@ def run_migrations_online() -> None:
include_name=include_name
)

# Here we need to enforce public if schema target_metadata.schema is None, which is default schema (public for postgres) for alembic
target_schema = 'public' if target_metadata.schema is None else target_metadata.schema

with context.begin_transaction():
context.execute(f'create schema if not exists "{target_metadata.schema}";')
context.execute(f'set search_path to "{target_metadata.schema}"')
context.execute(f'create schema if not exists "{target_schema}";')
context.execute(f'set search_path to "{target_schema}"')
context.run_migrations()


Expand Down
30 changes: 30 additions & 0 deletions api/alembic/versions/dccb82489f4d_add_percentage_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""add percentage column

Revision ID: dccb82489f4d
Revises: 6edab3f23907
Create Date: 2024-10-17 09:03:48.063883

"""
from typing import Sequence, Union, Text

from alembic import op
import sqlalchemy as sa
from app.db.tables.commons.json_encoded_dict import JSONEncodedDict

# revision identifiers, used by Alembic.
revision: str = 'dccb82489f4d'
down_revision: Union[str, None] = '6edab3f23907'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('current_dataset_metrics', sa.Column('PERCENTAGES', JSONEncodedDict(astext_type=Text()), nullable=True))
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('current_dataset_metrics', 'PERCENTAGES')
# ### end Alembic commands ###
34 changes: 33 additions & 1 deletion api/app/db/dao/model_dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
from fastapi_pagination import Page, Params
from fastapi_pagination.ext.sqlalchemy import paginate
import sqlalchemy
from sqlalchemy import asc, desc
from sqlalchemy import asc, desc, func
from sqlalchemy.future import select as future_select

from app.db.database import Database
from app.db.tables.current_dataset_metrics_table import CurrentDatasetMetrics
from app.db.tables.current_dataset_table import CurrentDataset
from app.db.tables.model_table import Model
from app.models.model_order import OrderType

Expand Down Expand Up @@ -57,6 +59,36 @@ def get_all(
with self.db.begin_session() as session:
return session.query(Model).where(Model.deleted.is_(False))

def get_last_n_percentages(self, n_models):
with self.db.begin_session() as session:
subq = (
session.query(
CurrentDataset.model_uuid,
func.max(CurrentDataset.date).label('maxdate'),
)
.group_by(CurrentDataset.model_uuid)
.subquery()
)
return (
session.query(Model, CurrentDatasetMetrics)
.join(
CurrentDataset,
CurrentDataset.model_uuid == Model.uuid,
)
.join(
subq,
(CurrentDataset.model_uuid == subq.c.model_uuid)
& (CurrentDataset.date == subq.c.maxdate),
)
.join(
CurrentDatasetMetrics,
CurrentDatasetMetrics.current_uuid == CurrentDataset.uuid,
)
.order_by(Model.updated_at.desc())
.limit(n_models)
.all()
)

def get_all_paginated(
self,
params: Params = Params(),
Expand Down
4 changes: 2 additions & 2 deletions api/app/db/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ class Reflected(DeferredReflection):

# https://github.com/sqlalchemy/alembic/discussions/1351
# If the schema is the default, Alembic needs None otherwise migrations are messed up
schema_name = (
fixed_schema = (
None
if get_config().db_config.db_schema == 'public'
else get_config().db_config.db_schema
)
BaseTable = declarative_base(
metadata=MetaData(schema=schema_name, naming_convention=naming_convention)
metadata=MetaData(schema=fixed_schema, naming_convention=naming_convention)
)


Expand Down
1 change: 1 addition & 0 deletions api/app/db/tables/current_dataset_metrics_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ class CurrentDatasetMetrics(Reflected, BaseTable, BaseDAO):
data_quality = Column('DATA_QUALITY', JSONEncodedDict, nullable=True)
drift = Column('DRIFT', JSONEncodedDict, nullable=True)
statistics = Column('STATISTICS', JSONEncodedDict, nullable=True)
percentages = Column('PERCENTAGES', JSONEncodedDict, nullable=True)
61 changes: 61 additions & 0 deletions api/app/models/metrics/percentages_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from typing import Dict, List, Optional

from pydantic import BaseModel, ConfigDict
from pydantic.alias_generators import to_camel

from app.models.job_status import JobStatus


class DetailPercentage(BaseModel):
feature_name: str
score: float


class MetricPercentage(BaseModel):
value: float
details: List[Optional[DetailPercentage]] = None


class Percentages(BaseModel):
data_quality: MetricPercentage
model_quality: MetricPercentage
drift: MetricPercentage

model_config = ConfigDict(
populate_by_name=True, alias_generator=to_camel, protected_namespaces=()
)


class PercentagesDTO(BaseModel):
job_status: JobStatus
percentages: Optional[Percentages]

model_config = ConfigDict(
arbitrary_types_allowed=True,
populate_by_name=True,
alias_generator=to_camel,
)

@staticmethod
def from_dict(
job_status: JobStatus,
percentages_data: Optional[Dict],
) -> 'PercentagesDTO':
"""Create a PercentagesDTO from a dictionary of data."""
percentages = PercentagesDTO._create_percentages(
percentages_data=percentages_data
)

return percentages_data(
job_status=job_status,
percentages=percentages,
)

@staticmethod
def _create_percentages(
percentages_data: Optional[Dict],
) -> Optional[Percentages]:
"""Create a specific percentages instance from a dictionary of data."""
if not percentages_data:
return None
return Percentages(**percentages_data)
4 changes: 4 additions & 0 deletions api/app/models/model_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from app.db.dao.reference_dataset_dao import ReferenceDataset
from app.models.inferred_schema_dto import FieldType, SupportedTypes
from app.models.job_status import JobStatus
from app.models.metrics.percentages_dto import Percentages
from app.models.utils import is_none, is_number, is_number_or_string, is_optional_float


Expand Down Expand Up @@ -216,6 +217,7 @@ class ModelOut(BaseModel):
latest_current_uuid: Optional[UUID]
latest_reference_job_status: JobStatus
latest_current_job_status: JobStatus
percentages: Optional[Percentages]

model_config = ConfigDict(
populate_by_name=True, alias_generator=to_camel, protected_namespaces=()
Expand All @@ -226,6 +228,7 @@ def from_model(
model: Model,
latest_reference_dataset: Optional[ReferenceDataset] = None,
latest_current_dataset: Optional[CurrentDataset] = None,
percentages: Optional[Percentages] = None,
):
latest_reference_uuid = (
latest_reference_dataset.uuid if latest_reference_dataset else None
Expand Down Expand Up @@ -264,4 +267,5 @@ def from_model(
latest_current_uuid=latest_current_uuid,
latest_reference_job_status=latest_reference_job_status,
latest_current_job_status=latest_current_job_status,
percentages=percentages,
)
4 changes: 4 additions & 0 deletions api/app/routes/model_route.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def get_all_models_paginated(
def get_all_models():
return model_service.get_all_models()

@router.get('/last_n', status_code=200, response_model=List[ModelOut])
def get_last_n_models(n_models: int):
return model_service.get_last_n_models_percentages(n_models)

@router.post('', status_code=201, response_model=ModelOut)
def create_model(model_in: ModelIn):
model = model_service.create_model(model_in)
Expand Down
16 changes: 16 additions & 0 deletions api/app/services/model_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,22 @@ def get_all_models(
model_out_list.append(model_out)
return model_out_list

def get_last_n_models_percentages(self, n_models) -> List[ModelOut]:
models = self.model_dao.get_last_n_percentages(n_models)
model_out_list_tmp = []
for model, metrics in models:
latest_reference_dataset, latest_current_dataset = self.get_latest_datasets(
model.uuid
)
model_out = ModelOut.from_model(
model=model,
latest_reference_dataset=latest_reference_dataset,
latest_current_dataset=latest_current_dataset,
percentages=metrics.percentages,
)
model_out_list_tmp.append(model_out)
return model_out_list_tmp

def get_all_models_paginated(
self,
params: Params = Params(),
Expand Down
Loading