From 53f36b6f2cb8bf2d761cd5ea63c243156587cc80 Mon Sep 17 00:00:00 2001 From: turingcompl33t Date: Sun, 10 Sep 2023 12:47:11 -0400 Subject: [PATCH 1/4] refactor report; port to artifact protocol --- mlte/artifact/model.py | 7 +- mlte/artifact/type.py | 3 + mlte/model/shared.py | 225 ++++++ .../{negotiation_card.py => artifact.py} | 20 +- mlte/negotiation/model.py | 211 +---- mlte/report/__init__.py | 25 - mlte/report/artifact.py | 107 +++ mlte/report/html.py | 107 --- mlte/report/model.py | 95 +++ mlte/report/render.py | 47 -- mlte/report/report.py | 292 ------- .../schema/artifact/report/v0.0.1/schema.json | 749 ++++++++++++++++++ test/artifact/test_artifact.py | 2 +- test/fixture/artifact.py | 15 +- test/model/__init__.py | 0 test/model/test_shared.py | 292 +++++++ test/negotiation/test_artifact.py | 56 +- test/negotiation/test_model.py | 344 ++------ test/report/__init__.py | 0 test/report/test_artifact.py | 33 + test/report/test_model.py | 188 +++++ test/schema/test_report_schema.py | 51 +- tools/schema.py | 4 + 23 files changed, 1798 insertions(+), 1075 deletions(-) create mode 100644 mlte/model/shared.py rename mlte/negotiation/{negotiation_card.py => artifact.py} (84%) create mode 100644 mlte/report/artifact.py delete mode 100644 mlte/report/html.py create mode 100644 mlte/report/model.py delete mode 100644 mlte/report/render.py delete mode 100644 mlte/report/report.py create mode 100644 mlte/schema/artifact/report/v0.0.1/schema.json create mode 100644 test/model/__init__.py create mode 100644 test/model/test_shared.py create mode 100644 test/report/__init__.py create mode 100644 test/report/test_artifact.py create mode 100644 test/report/test_model.py diff --git a/mlte/artifact/model.py b/mlte/artifact/model.py index f69fc1ffb..b9525f10b 100644 --- a/mlte/artifact/model.py +++ b/mlte/artifact/model.py @@ -11,6 +11,7 @@ from mlte.artifact.type import ArtifactType from mlte.model import BaseModel from mlte.negotiation.model import NegotiationCardModel +from mlte.report.model import ReportModel from mlte.spec.model import SpecModel from mlte.validation.model import ValidatedSpecModel from mlte.value.model import ValueModel @@ -36,6 +37,10 @@ class ArtifactModel(BaseModel): """The artifact header.""" body: Union[ - NegotiationCardModel, ValueModel, SpecModel, ValidatedSpecModel + NegotiationCardModel, + ValueModel, + SpecModel, + ValidatedSpecModel, + ReportModel, ] = Field(..., discriminator="artifact_type") """The artifact body.""" diff --git a/mlte/artifact/type.py b/mlte/artifact/type.py index c0ee36e93..082991a3b 100644 --- a/mlte/artifact/type.py +++ b/mlte/artifact/type.py @@ -25,3 +25,6 @@ class ArtifactType(str, Enum): VALIDATED_SPEC = auto() """The validated specification artifact type.""" + + REPORT = auto() + """The report artifact type.""" diff --git a/mlte/model/shared.py b/mlte/model/shared.py new file mode 100644 index 000000000..3a2bfc337 --- /dev/null +++ b/mlte/model/shared.py @@ -0,0 +1,225 @@ +""" +mlte/model/shared.py + +Shared model implementation. +""" + +from enum import Enum +from typing import List, Optional + +from mlte.model.base_model import BaseModel + +# ----------------------------------------------------------------------------- +# ProblemType +# ----------------------------------------------------------------------------- + + +class ProblemType(Enum): + """An enumeration over machine learning problem types.""" + + CLASSIFICATION = "classification" + CLUSTERING = "clustering" + TREND = "trend" + ALERT = "alert" + FORECASTING = "forecasting" + CONTENT_GENERATION = "content_generation" + BENCHMARKING = "benchmarking" + GOALS = "goals" + DETECTION = "detection" + OTHER = "other" + + +# ----------------------------------------------------------------------------- +# GoalDescriptor (and sub-models) +# ----------------------------------------------------------------------------- + + +class MetricDescriptor(BaseModel): + """A description of a metric that supports a system goal.""" + + description: Optional[str] = None + """A description of the metric.""" + + baseline: Optional[str] = None + """A description of the metric baseline value.""" + + +class GoalDescriptor(BaseModel): + """A description of a system goal.""" + + description: Optional[str] = None + """A description of the goal.""" + + metrics: List[MetricDescriptor] = [] + """A collection of metrics related to the goal.""" + + +# ----------------------------------------------------------------------------- +# RiskDescriptor +# ----------------------------------------------------------------------------- + + +class RiskDescriptor(BaseModel): + """A description of system-level risks.""" + + fp: Optional[str] = None + """A description of risks associated with false-positives.""" + + fn: Optional[str] = None + """A description of risks associated with false-negatives.""" + + other: Optional[str] = None + """A description of risks associated with other failures.""" + + +# ----------------------------------------------------------------------------- +# ModelDescriptor (and sub-models) +# ----------------------------------------------------------------------------- + + +class ModelResourcesDescriptor(BaseModel): + """A descriptor for model resource requirements.""" + + cpu: Optional[str] = None + """A description of model CPU requirements.""" + + gpu: Optional[str] = None + """A description of model GPU requirements.""" + + memory: Optional[str] = None + """A description of model memory (RAM) requirements.""" + + storage: Optional[str] = None + """A description of model storage requirements.""" + + +class ModelInputDescriptor(BaseModel): + """A description of the model input specification.""" + + description: Optional[str] = None + """A textual description of the input specification.""" + + +class ModelOutputDescriptor(BaseModel): + """A description of the model output specification.""" + + description: Optional[str] = None + """A textual description of the output specification.""" + + +class ModelInterfaceDescriptor(BaseModel): + """A description of the model interface.""" + + input: ModelInputDescriptor = ModelInputDescriptor() + """The model input specification.""" + + output: ModelOutputDescriptor = ModelOutputDescriptor() + """The model output specification.""" + + +class ModelDevelopmentDescriptor(BaseModel): + """A descriptor for model development considerations.""" + + resources: ModelResourcesDescriptor = ModelResourcesDescriptor() + """A description of model development resource requirements.""" + + +class ModelProductionDescriptor(BaseModel): + """A descriptor for model production considerations.""" + + integration: Optional[str] = None + """A description of the manner in which the model is integrated with the system.""" + + interface: ModelInterfaceDescriptor = ModelInterfaceDescriptor() + """A description of the model interface.""" + + resources: ModelResourcesDescriptor = ModelResourcesDescriptor() + """A description of model production resource requirements.""" + + +class ModelDescriptor(BaseModel): + """A descriptor for the model.""" + + development: ModelDevelopmentDescriptor = ModelDevelopmentDescriptor() + """A description of model development considerations.""" + + production: ModelProductionDescriptor = ModelProductionDescriptor() + """A description of model production considerations.""" + + +# ----------------------------------------------------------------------------- +# DataDescriptor (and sub-models) +# ----------------------------------------------------------------------------- + + +class DataClassification(Enum): + """An enumeration of data classification levels.""" + + UNCLASSIFIED = "unclassified" + CUI = "cui" + PII = "pii" + PHI = "phi" + OTHER = "other" + + +class LabelDescriptor(BaseModel): + """Describes a dataset label.""" + + description: Optional[str] = None + """A description of the label.""" + + percentage: Optional[float] = None + """The relative frequency with which the label occurs in the dataset.""" + + +class FieldDescriptor(BaseModel): + """Describes a dataset field.""" + + name: Optional[str] = None + """The name of the field.""" + + description: Optional[str] = None + """A description of the field.""" + + type: Optional[str] = None + """A description of the field type.""" + + expected_values: Optional[str] = None + """An example of expected values for the field.""" + + missing_values: Optional[str] = None + """An example of missing values for the field.""" + + special_values: Optional[str] = None + """An example of special values for the field.""" + + +class DataDescriptor(BaseModel): + """Describes a dataset used in model development.""" + + description: Optional[str] = None + """A description of the dataset.""" + + source: Optional[str] = None + """A description of the data source.""" + + classification: Optional[DataClassification] = None + """A description of the data classification level.""" + + access: Optional[str] = None + """A description of the manner in which this data is accessed.""" + + labels: List[LabelDescriptor] = [] + """A description of the labels that appear in the dataset.""" + + fields: List[FieldDescriptor] = [] + """A description of the dataset schema.""" + + rights: Optional[str] = None + """A description of the ways in which the data can / cannot be used.""" + + policies: Optional[str] = None + """A description of the policies that govern use of this data.""" + + identifiable_information: Optional[str] = None + """A description of personaly-identifiable information considerations for this dataset.""" diff --git a/mlte/negotiation/negotiation_card.py b/mlte/negotiation/artifact.py similarity index 84% rename from mlte/negotiation/negotiation_card.py rename to mlte/negotiation/artifact.py index e57d805bd..8110c57cb 100644 --- a/mlte/negotiation/negotiation_card.py +++ b/mlte/negotiation/artifact.py @@ -1,7 +1,7 @@ """ -mlte/negotiation/negotiation_card.py +mlte/negotiation/artifact.py -Negotiation card artifact implementation. +Artifact implementation for negotiation card. """ from __future__ import annotations @@ -9,17 +9,13 @@ import typing from typing import List -import deepdiff +from deepdiff import DeepDiff from mlte.artifact.artifact import Artifact from mlte.artifact.model import ArtifactModel from mlte.artifact.type import ArtifactType -from mlte.negotiation.model import ( - DataDescriptor, - ModelDescriptor, - NegotiationCardModel, - SystemDescriptor, -) +from mlte.model.shared import DataDescriptor, ModelDescriptor +from mlte.negotiation.model import NegotiationCardModel, SystemDescriptor DEFAULT_NEGOTIATION_CARD_ID = "default.negotiation_card" @@ -79,8 +75,4 @@ def get_default_id(cls) -> str: def __eq__(self, other: object) -> bool: if not isinstance(other, NegotiationCard): return False - return ( - self.system == other.system - and len(deepdiff.DeepDiff(self.data, other.data)) == 0 - and self.model == other.model - ) + return len(DeepDiff(self, other)) == 0 diff --git a/mlte/negotiation/model.py b/mlte/negotiation/model.py index e91ce5cf2..a453c8981 100644 --- a/mlte/negotiation/model.py +++ b/mlte/negotiation/model.py @@ -6,65 +6,23 @@ from __future__ import annotations -from enum import Enum from typing import List, Literal, Optional from mlte.artifact.type import ArtifactType -from mlte.model import BaseModel +from mlte.model.base_model import BaseModel +from mlte.model.shared import ( + DataDescriptor, + GoalDescriptor, + ModelDescriptor, + ProblemType, + RiskDescriptor, +) # ----------------------------------------------------------------------------- # System Subcomponents # ----------------------------------------------------------------------------- -class ProblemType(Enum): - """An enumeration over machine learning problem types.""" - - CLASSIFICATION = "classification" - CLUSTERING = "clustering" - TREND = "trend" - ALERT = "alert" - FORECASTING = "forecasting" - CONTENT_GENERATION = "content_generation" - BENCHMARKING = "benchmarking" - GOALS = "goals" - DETECTION = "detection" - OTHER = "other" - - -class MetricDescriptor(BaseModel): - """A description of a metric that supports a system goal.""" - - description: Optional[str] = None - """A description of the metric.""" - - baseline: Optional[str] = None - """A description of the metric baseline value.""" - - -class GoalDescriptor(BaseModel): - """A description of a system goal.""" - - description: Optional[str] = None - """A description of the goal.""" - - metrics: List[MetricDescriptor] = [] - """A collection of metrics related to the goal.""" - - -class RiskDescriptor(BaseModel): - """A description of system-level risks.""" - - fp: Optional[str] = None - """A description of risks associated with false-positives.""" - - fn: Optional[str] = None - """A description of risks associated with false-negatives.""" - - other: Optional[str] = None - """A description of risks associated with other failures.""" - - class SystemDescriptor(BaseModel): """A description of the system context.""" @@ -84,159 +42,6 @@ class SystemDescriptor(BaseModel): """A description of risks associated with system failures.""" -# ----------------------------------------------------------------------------- -# Data Subcomponents -# ----------------------------------------------------------------------------- - - -class DataClassification(Enum): - """An enumeration of data classification levels.""" - - UNCLASSIFIED = "unclassified" - CUI = "cui" - PII = "pii" - PHI = "phi" - OTHER = "other" - - -class LabelDescriptor(BaseModel): - """Describes a dataset label.""" - - description: Optional[str] = None - """A description of the label.""" - - percentage: Optional[float] = None - """The relative frequency with which the label occurs in the dataset.""" - - -class FieldDescriptor(BaseModel): - """Describes a dataset field.""" - - name: Optional[str] = None - """The name of the field.""" - - description: Optional[str] = None - """A description of the field.""" - - type: Optional[str] = None - """A description of the field type.""" - - expected_values: Optional[str] = None - """An example of expected values for the field.""" - - missing_values: Optional[str] = None - """An example of missing values for the field.""" - - special_values: Optional[str] = None - """An example of special values for the field.""" - - -class DataDescriptor(BaseModel): - """Describes a dataset used in model development.""" - - description: Optional[str] = None - """A description of the dataset.""" - - source: Optional[str] = None - """A description of the data source.""" - - classification: Optional[DataClassification] = None - """A description of the data classification level.""" - - access: Optional[str] = None - """A description of the manner in which this data is accessed.""" - - labels: List[LabelDescriptor] = [] - """A description of the labels that appear in the dataset.""" - - fields: List[FieldDescriptor] = [] - """A description of the dataset schema.""" - - rights: Optional[str] = None - """A description of the ways in which the data can / cannot be used.""" - - policies: Optional[str] = None - """A description of the policies that govern use of this data.""" - - identifiable_information: Optional[str] = None - """A description of personaly-identifiable information considerations for this dataset.""" - - -# ----------------------------------------------------------------------------- -# Model Subcomponents -# ----------------------------------------------------------------------------- - - -class ModelResourcesDescriptor(BaseModel): - """A descriptor for model resource requirements.""" - - cpu: Optional[str] = None - """A description of model CPU requirements.""" - - gpu: Optional[str] = None - """A description of model GPU requirements.""" - - memory: Optional[str] = None - """A description of model memory (RAM) requirements.""" - - storage: Optional[str] = None - """A description of model storage requirements.""" - - -class ModelInputDescriptor(BaseModel): - """A description of the model input specification.""" - - description: Optional[str] = None - """A textual description of the input specification.""" - - -class ModelOutputDescriptor(BaseModel): - """A description of the model output specification.""" - - description: Optional[str] = None - """A textual description of the output specification.""" - - -class ModelInterfaceDescriptor(BaseModel): - """A description of the model interface.""" - - input: ModelInputDescriptor = ModelInputDescriptor() - """The model input specification.""" - - output: ModelOutputDescriptor = ModelOutputDescriptor() - """The model output specification.""" - - -class ModelDevelopmentDescriptor(BaseModel): - """A descriptor for model development considerations.""" - - resources: ModelResourcesDescriptor = ModelResourcesDescriptor() - """A description of model development resource requirements.""" - - -class ModelProductionDescriptor(BaseModel): - """A descriptor for model production considerations.""" - - integration: Optional[str] = None - """A description of the manner in which the model is integrated with the system.""" - - interface: ModelInterfaceDescriptor = ModelInterfaceDescriptor() - """A description of the model interface.""" - - resources: ModelResourcesDescriptor = ModelResourcesDescriptor() - """A description of model production resource requirements.""" - - -class ModelDescriptor(BaseModel): - """A descriptor for the model.""" - - development: ModelDevelopmentDescriptor = ModelDevelopmentDescriptor() - """A description of model development considerations.""" - - production: ModelProductionDescriptor = ModelProductionDescriptor() - """A description of model production considerations.""" - - # ----------------------------------------------------------------------------- # NegotiationCardModel # ----------------------------------------------------------------------------- diff --git a/mlte/report/__init__.py b/mlte/report/__init__.py index 92e9f25cb..e69de29bb 100644 --- a/mlte/report/__init__.py +++ b/mlte/report/__init__.py @@ -1,25 +0,0 @@ -from .render import render -from .report import ( - Considerations, - Dataset, - Limitation, - Metadata, - ModelDetails, - ModelSpecification, - Report, - UseCase, - User, -) - -__all__ = [ - "Dataset", - "User", - "UseCase", - "Limitation", - "Metadata", - "ModelDetails", - "ModelSpecification", - "Considerations", - "Report", - "render", -] diff --git a/mlte/report/artifact.py b/mlte/report/artifact.py new file mode 100644 index 000000000..3446ff194 --- /dev/null +++ b/mlte/report/artifact.py @@ -0,0 +1,107 @@ +""" +mlte/report/artifact.py + +Artifact implementation for MLTE report. +""" + +from __future__ import annotations + +import typing +from typing import List + +from deepdiff import DeepDiff + +from mlte.artifact.artifact import Artifact +from mlte.artifact.model import ArtifactModel +from mlte.artifact.type import ArtifactType +from mlte.model.shared import DataDescriptor, RiskDescriptor +from mlte.report.model import ( + CommentDescriptor, + IntendedUseDescriptor, + PerformanceDesciptor, + QuantitiveAnalysisDescriptor, + ReportModel, + SummaryDescriptor, +) + +DEFAULT_REPORT_ID = "default.report" + + +class Report(Artifact): + """The report artifact contains the results of MLTE model evaluation.""" + + def __init__( + self, + identifier: str = DEFAULT_REPORT_ID, + summary: SummaryDescriptor = SummaryDescriptor(), + performance: PerformanceDesciptor = PerformanceDesciptor(), + intended_use: IntendedUseDescriptor = IntendedUseDescriptor(), + risks: RiskDescriptor = RiskDescriptor(), + data: List[DataDescriptor] = [], + comments: List[CommentDescriptor] = [], + quantitative_analysis: QuantitiveAnalysisDescriptor = QuantitiveAnalysisDescriptor(), + ) -> None: + super().__init__(identifier, ArtifactType.REPORT) + + self.summary = summary + """A summary of the evaluation.""" + + self.performance = performance + """A summary of model performance evaluation.""" + + self.intended_use = intended_use + """The intended use of the model under evaluation.""" + + self.risks = risks + """A description of the risks for the model.""" + + self.data = data + """A description of the data used during model evaluation.""" + + self.comments = comments + """A collection of comments for the report.""" + + self.quantitative_analysis = quantitative_analysis + """The quantitative analysis for the evaluation.""" + + def to_model(self) -> ArtifactModel: + """Convert a report artifact to its corresponding model.""" + return ArtifactModel( + header=self.build_artifact_header(), + body=ReportModel( + artifact_type=ArtifactType.REPORT, + summary=self.summary, + performance=self.performance, + intended_use=self.intended_use, + risks=self.risks, + data=self.data, + comments=self.comments, + quantitative_analysis=self.quantitative_analysis, + ), + ) + + @classmethod + def from_model(cls, model: ArtifactModel) -> Report: # type: ignore[override] + """Convert a report model to its corresponding artifact.""" + assert model.header.type == ArtifactType.REPORT, "Broken precondition." + body = typing.cast(ReportModel, model.body) + return Report( + identifier=model.header.identifier, + summary=body.summary, + performance=body.performance, + intended_use=body.intended_use, + risks=body.risks, + data=body.data, + comments=body.comments, + quantitative_analysis=body.quantitative_analysis, + ) + + @classmethod + def get_default_id(cls) -> str: + """Get the default identifier for the artifact.""" + return DEFAULT_REPORT_ID + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Report): + return False + return len(DeepDiff(self, other)) == 0 diff --git a/mlte/report/html.py b/mlte/report/html.py deleted file mode 100644 index 2baf18157..000000000 --- a/mlte/report/html.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -mlte/report/html.py - -Utilities for HTML report generation. -""" - -import json -import socket -import tempfile -from typing import Any, Dict -from urllib import request - -# The endpoint for resolving endpoints for report generation -RESOLUTION_ENDPOINT = "https://raw.githubusercontent.com/mlte-team/mlte/master/assets/endpoints.txt" # noqa -# The local endpoint -LOCAL_ENDPOINT = "http://localhost:8000/html" - - -def _connected(host: str = "8.8.8.8", port: int = 53, timeout: int = 2) -> bool: - """ - Determine if internet connectivity is available. - - :param host: The host used to test connectivity - :type host: str - :param port: The port used to test connectivity - :type port: int - :param timeout: The connection timeout - :type timeout: int - - :return `True` if connected to the internet, `False` otherwise - :rtype: bool - """ - try: - socket.setdefaulttimeout(timeout) - socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port)) - return True - except socket.error: - return False - - -def _resolve_endpoint( - local: bool, meta_endpoint: str = RESOLUTION_ENDPOINT -) -> str: - """ - Resolve the endpoint for report generation. - - :param local: Indicates to resolve to local address - :type local: bool - :param meta_endpoint: The endpoint for resolution requests - :type meta_endpoint: str - - :return: A report generation endpoint - :rtype: str - - :raises RuntimeError: If unable to resolve endpoint - """ - if local: - return LOCAL_ENDPOINT - with tempfile.NamedTemporaryFile() as f: - path = f.name - try: - request.urlretrieve(meta_endpoint, path) - except Exception: - raise RuntimeError( - "Unable to resolve endpoint for report generation." - ) - - with open(path, "r") as endpoints: - # Return an arbitrary line from the list of endpoints - for line in endpoints: - return line.strip() - - raise RuntimeError("Unreachable") - - -def _generate_html(document: Dict[str, Any], local: bool) -> str: - """ - Generate the HTML representation of a Report. - - :param document: The JSON document representation of the report - :type document: Dict[str, Any] - :param local: Indicates that the HTML generation server runs locally - :type local: bool - - :return: The HTML representation of the report, as a string - :rtype: str - - :raises RuntimeError: If report generation fails - """ - assert _connected(), "Broken precondition." - - # Resolve the endpoint for report generation - endpoint = _resolve_endpoint(local) - - # Construct the request with the report document - req = request.Request( - endpoint, - method="POST", - data=json.dumps(document).encode("utf-8"), - ) - req.add_header("Content-Type", "application/json; charset=utf-8") - - with request.urlopen(req) as response: - # TODO(Kyle): Better error handling. - if response.status != 200: - raise RuntimeError("Request failed.") - return str(response.read().decode("utf-8")) diff --git a/mlte/report/model.py b/mlte/report/model.py new file mode 100644 index 000000000..62f231ba1 --- /dev/null +++ b/mlte/report/model.py @@ -0,0 +1,95 @@ +""" +mlte/report/model.py + +Model implementation for MLTE report. +""" + +from typing import List, Literal, Optional + +from mlte.artifact.type import ArtifactType +from mlte.model.base_model import BaseModel +from mlte.model.shared import ( + DataDescriptor, + GoalDescriptor, + ModelProductionDescriptor, + ProblemType, + RiskDescriptor, +) + + +class SummaryDescriptor(BaseModel): + """The model implementation for the report summary.""" + + problem_type: Optional[ProblemType] = None + """The ML problem type.""" + + task: Optional[str] = None + """The ML task.""" + + +class PerformanceDesciptor(BaseModel): + """The model implementation for the performance descriptor.""" + + goals: List[GoalDescriptor] = [] + """A list of the goals for the system.""" + + # TODO(Kyle): Implement this. + findings: Optional[str] = None + """The findings from MLTE evaluation.""" + + +class IntendedUseDescriptor(BaseModel): + """The model implementation for intended use.""" + + usage_context: Optional[str] = None + """The intended useage context.""" + + production_requirements: ModelProductionDescriptor = ( + ModelProductionDescriptor() + ) + """The production requirements and considerations for the model.""" + + +class CommentDescriptor(BaseModel): + """The model implementation for a generic comment.""" + + content: str + """The comment content.""" + + +class QuantitiveAnalysisDescriptor(BaseModel): + """The model implementation for report quantitative analysis.""" + + # TODO(Kyle): This is not implemented. + content: Optional[str] = None + """The field content.""" + + +class ReportModel(BaseModel): + """The model implementation for the MLTE report artifact.""" + + artifact_type: Literal[ArtifactType.REPORT] + """Union discriminator.""" + + summary: SummaryDescriptor = SummaryDescriptor() + """A summary of the model under evaluation.""" + + performance: PerformanceDesciptor = PerformanceDesciptor() + """The results of MLTE model evaluation.""" + + intended_use: IntendedUseDescriptor = IntendedUseDescriptor() + """A description of the intended use of the model.""" + + risks: RiskDescriptor = RiskDescriptor() + """A description of the risks from the system.""" + + data: List[DataDescriptor] = [] + """A description of the data used during model training and evaluation.""" + + comments: List[CommentDescriptor] = [] + """Comments included in the report.""" + + quantitative_analysis: QuantitiveAnalysisDescriptor = ( + QuantitiveAnalysisDescriptor() + ) + """Quantitative analysis included in the report.""" diff --git a/mlte/report/render.py b/mlte/report/render.py deleted file mode 100644 index 057f93f1e..000000000 --- a/mlte/report/render.py +++ /dev/null @@ -1,47 +0,0 @@ -""" -mlte/report/render.py - -Utilities for report rendering. -""" - -import os -import tempfile -import webbrowser -from typing import Optional, Union - -from .html import _connected -from .report import Report - - -def render(target: Union[Report, str]): - """ - Render a Report as a web page. - - If possible, the rendered report is automatically - opened in an available window in the default browser. - - The argument `target` may be specified as either the - Report instance itself, or the HTML content generated - via a call to Report.to_html(). Rendering reports from - the local filesystem is currently unsupported. - - :param target: The report content to render - :type target: Union[Report, str] - """ - # TODO(Kyle): Validate HTML input. - - if not _connected(): - raise RuntimeError( - "An internet connection is required to render a report." - ) - - html: Optional[str] = ( - target.to_html() if isinstance(target, Report) else target - ) - assert html is not None, "Broken invariant." - - with tempfile.TemporaryDirectory() as dirname: - path = os.path.join(dirname, "index.html") - with open(path, "w") as f: - f.write(html) - webbrowser.open(f"file://{os.path.realpath(path)}") diff --git a/mlte/report/report.py b/mlte/report/report.py deleted file mode 100644 index 29ed3f21d..000000000 --- a/mlte/report/report.py +++ /dev/null @@ -1,292 +0,0 @@ -""" -mlte/report/report.py - -A model test report. - -Acknowledgements: - The contents of a mlte model report, and the - implementation of model report generation - in this file is adapted from the work by the - TensorFlow team in the Model Card Toolkit: - https://github.com/tensorflow/model-card-toolkit -""" - -from __future__ import annotations - -import dataclasses -import json -import typing -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Union - -from mlte.validation.validated_spec import ValidatedSpec - -from .._private.text import cleantext -from .html import _connected, _generate_html - - -@dataclass -class ReportAttribute: - """The base class for report attributes.""" - - def to_json(self) -> Dict[str, Any]: - """ - Convert a ReportAttribute instance to a JSON document. - - :return: The converted document - :rtype: Dict[str, Any] - """ - document: Dict[str, Any] = dataclasses.asdict( - self, - dict_factory=lambda properties: {k: v for k, v in properties if v}, - ) - return document - - -# ----------------------------------------------------------------------------- -# Report Sub-Sections -# ----------------------------------------------------------------------------- - - -@dataclass -class Dataset(ReportAttribute): - """A description of a dataset used to train the model.""" - - name: Optional[str] = None - """An identifier for the dataset.""" - - link: Optional[str] = None - """A method to access the dataset.""" - - description: Optional[str] = None - """A description of the dataset.""" - - def __setattr__(self, name: Any, value: Any) -> None: - """Clean text when adding attributes.""" - value = cleantext(value) if isinstance(value, str) else value - super().__setattr__(name, value) - - -@dataclass -class User(ReportAttribute): - """A description of an intended user of the model.""" - - identifier: Optional[str] = None - """An identifier for the intender user.""" - - description: Optional[str] = None - """A description of the intended user.""" - - def __setattr__(self, name: Any, value: Any) -> None: - """Clean text when adding attributes.""" - value = cleantext(value) if isinstance(value, str) else value - super().__setattr__(name, value) - - -@dataclass -class UseCase(ReportAttribute): - """A description of an intended use case of the model.""" - - identifier: Optional[str] = None - """An identifier for the use case.""" - - description: Optional[str] = None - """A description of the use case.""" - - def __setattr__(self, name: Any, value: Any) -> None: - """Clean text when adding attributes.""" - value = cleantext(value) if isinstance(value, str) else value - super().__setattr__(name, value) - - -@dataclass -class Limitation(ReportAttribute): - """A description of a technical limitation of the model.""" - - identifier: Optional[str] = None - """An identifier for the limitation.""" - - description: Optional[str] = None - """A description of the limitation.""" - - def __setattr__(self, name: Any, value: Any) -> None: - """Clean text when adding attributes.""" - value = cleantext(value) if isinstance(value, str) else value - super().__setattr__(name, value) - - -# ----------------------------------------------------------------------------- -# Report Sections -# ----------------------------------------------------------------------------- - - -@dataclass -class Metadata(ReportAttribute): - """Metadata for the report.""" - - project_name: Optional[str] = None - """The name of the project.""" - - authors: List[str] = field(default_factory=list) - """The authors of the report.""" - - source_url: Optional[str] = None - """The URL for model source.""" - - artifact_url: Optional[str] = None - """The URL for model artifacts.""" - - timestamp: Optional[str] = None - """The time at which the report was generated.""" - - -@dataclass -class ModelDetails(ReportAttribute): - """The `model details` section of the report.""" - - name: Optional[str] = None - """The name of the model.""" - - overview: Optional[str] = None - """A brief overview of the model.""" - - documentation: Optional[str] = None - """A detailed description of the model.""" - - def __setattr__(self, name: Any, value: Any) -> None: - """Clean text when adding attributes.""" - value = cleantext(value) if isinstance(value, str) else value - super().__setattr__(name, value) - - -@dataclass -class ModelSpecification(ReportAttribute): - """The `model specification` section of the report.""" - - domain: Optional[str] = None - """The domain of the model.""" - - architecture: Optional[str] = None - """A description of the model architecture.""" - - input: Optional[str] = None - """A description of model inputs.""" - - output: Optional[str] = None - """A description of model outputs.""" - - data: List[Dataset] = field(default_factory=list) - """A description of the data used to train the model.""" - - -@dataclass -class Considerations(ReportAttribute): - """The `considerations` section of the report.""" - - users: List[User] = field(default_factory=list) - """A description of the intended users of the model.""" - - use_cases: List[UseCase] = field(default_factory=list) - """A description of the intended use cases for the model.""" - - limitations: List[Limitation] = field(default_factory=list) - """A description of the technical limitations of the model.""" - - -@dataclass -class Report(ReportAttribute): - """The top-level model test report.""" - - metadata: Metadata = field(default_factory=Metadata) - """The report metadata.""" - - model_details: ModelDetails = field(default_factory=ModelDetails) - """The model details.""" - - model_specification: ModelSpecification = field( - default_factory=ModelSpecification - ) - """The model specification.""" - - considerations: Considerations = field(default_factory=Considerations) - """Model considerations.""" - - spec: ValidatedSpec = field(default_factory=lambda: ValidatedSpec()) - """The model test spec report.""" - - def _finalize(self) -> Dict[str, Any]: - """ - Finalize construction of the Report instance. - - :return: Report document - :rtype: Dict[str, Any] - """ - # TODO(Kyle): We should perform validation here - document = dataclasses.asdict( - self, - dict_factory=lambda properties: {k: v for k, v in properties if v}, - ) - # Manually serialize the spec-level document - document["spec"] = "" # TODO: Change to model. self.spec.to_json() - # Manually insert the schema version - document["schema_version"] = "0.0.1" - return document - - @typing.no_type_check - def to_json( - self, path: Optional[str] = None - ) -> Union[Dict[str, Any], None]: - """ - Convert the Report to a JSON document. - - The converted document is saved to the provided `path` - if it is specified, otherwise it is returned as a Python - dictionary. - - :param path: The path to which document is saved - :type path: Optional[str] - - :return: Python-dict representation of JSON document, or None - :rtype: Union[Dict[str, Any], None] - """ - document = self._finalize() - if path is None: - return document - - with open(path, "w") as f: - json.dump(document, f, indent=2) - - return None - - def to_html( - self, path: Optional[str] = None, local: bool = False - ) -> Union[str, None]: - """ - Convert the Report to an HTML document. - - The converted document is saved to the provided `path` - if it is specified, otherwise it is returned as a string. - - :param path: The path to which document is saved - :type path: Optional[str] - :param local: Indicates that the HTML generation server runs locally - :type local: bool - - :return: String representation of HTML document, or None - :rtype: Union[str, None] - - :raises RuntimeError: If network connection is unavailable - """ - if not _connected(): - raise RuntimeError( - "HTML report generation requires a network connection." - ) - - # Generate the string representation of HTML document - html = _generate_html(self._finalize(), local) - - if path is not None: - with open(path, "w") as f: - f.write(html) - - return html if path is None else None diff --git a/mlte/schema/artifact/report/v0.0.1/schema.json b/mlte/schema/artifact/report/v0.0.1/schema.json new file mode 100644 index 000000000..a03efd0d2 --- /dev/null +++ b/mlte/schema/artifact/report/v0.0.1/schema.json @@ -0,0 +1,749 @@ +{ + "$defs": { + "CommentDescriptor": { + "description": "The model implementation for a generic comment.", + "properties": { + "content": { + "title": "Content", + "type": "string" + } + }, + "required": [ + "content" + ], + "title": "CommentDescriptor", + "type": "object" + }, + "DataClassification": { + "description": "An enumeration of data classification levels.", + "enum": [ + "unclassified", + "cui", + "pii", + "phi", + "other" + ], + "title": "DataClassification", + "type": "string" + }, + "DataDescriptor": { + "description": "Describes a dataset used in model development.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "source": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Source" + }, + "classification": { + "anyOf": [ + { + "$ref": "#/$defs/DataClassification" + }, + { + "type": "null" + } + ], + "default": null + }, + "access": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Access" + }, + "labels": { + "default": [], + "items": { + "$ref": "#/$defs/LabelDescriptor" + }, + "title": "Labels", + "type": "array" + }, + "fields": { + "default": [], + "items": { + "$ref": "#/$defs/FieldDescriptor" + }, + "title": "Fields", + "type": "array" + }, + "rights": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Rights" + }, + "policies": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Policies" + }, + "identifiable_information": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Identifiable Information" + } + }, + "title": "DataDescriptor", + "type": "object" + }, + "FieldDescriptor": { + "description": "Describes a dataset field.", + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Name" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Type" + }, + "expected_values": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Expected Values" + }, + "missing_values": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Missing Values" + }, + "special_values": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Special Values" + } + }, + "title": "FieldDescriptor", + "type": "object" + }, + "GoalDescriptor": { + "description": "A description of a system goal.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "metrics": { + "default": [], + "items": { + "$ref": "#/$defs/MetricDescriptor" + }, + "title": "Metrics", + "type": "array" + } + }, + "title": "GoalDescriptor", + "type": "object" + }, + "IntendedUseDescriptor": { + "description": "The model implementation for intended use.", + "properties": { + "usage_context": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Usage Context" + }, + "production_requirements": { + "allOf": [ + { + "$ref": "#/$defs/ModelProductionDescriptor" + } + ], + "default": { + "integration": null, + "interface": { + "input": { + "description": null + }, + "output": { + "description": null + } + }, + "resources": { + "cpu": null, + "gpu": null, + "memory": null, + "storage": null + } + } + } + }, + "title": "IntendedUseDescriptor", + "type": "object" + }, + "LabelDescriptor": { + "description": "Describes a dataset label.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "percentage": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Percentage" + } + }, + "title": "LabelDescriptor", + "type": "object" + }, + "MetricDescriptor": { + "description": "A description of a metric that supports a system goal.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "baseline": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Baseline" + } + }, + "title": "MetricDescriptor", + "type": "object" + }, + "ModelInputDescriptor": { + "description": "A description of the model input specification.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + } + }, + "title": "ModelInputDescriptor", + "type": "object" + }, + "ModelInterfaceDescriptor": { + "description": "A description of the model interface.", + "properties": { + "input": { + "allOf": [ + { + "$ref": "#/$defs/ModelInputDescriptor" + } + ], + "default": { + "description": null + } + }, + "output": { + "allOf": [ + { + "$ref": "#/$defs/ModelOutputDescriptor" + } + ], + "default": { + "description": null + } + } + }, + "title": "ModelInterfaceDescriptor", + "type": "object" + }, + "ModelOutputDescriptor": { + "description": "A description of the model output specification.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + } + }, + "title": "ModelOutputDescriptor", + "type": "object" + }, + "ModelProductionDescriptor": { + "description": "A descriptor for model production considerations.", + "properties": { + "integration": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Integration" + }, + "interface": { + "allOf": [ + { + "$ref": "#/$defs/ModelInterfaceDescriptor" + } + ], + "default": { + "input": { + "description": null + }, + "output": { + "description": null + } + } + }, + "resources": { + "allOf": [ + { + "$ref": "#/$defs/ModelResourcesDescriptor" + } + ], + "default": { + "cpu": null, + "gpu": null, + "memory": null, + "storage": null + } + } + }, + "title": "ModelProductionDescriptor", + "type": "object" + }, + "ModelResourcesDescriptor": { + "description": "A descriptor for model resource requirements.", + "properties": { + "cpu": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cpu" + }, + "gpu": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Gpu" + }, + "memory": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Memory" + }, + "storage": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Storage" + } + }, + "title": "ModelResourcesDescriptor", + "type": "object" + }, + "PerformanceDesciptor": { + "description": "The model implementation for the performance descriptor.", + "properties": { + "goals": { + "default": [], + "items": { + "$ref": "#/$defs/GoalDescriptor" + }, + "title": "Goals", + "type": "array" + }, + "findings": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Findings" + } + }, + "title": "PerformanceDesciptor", + "type": "object" + }, + "ProblemType": { + "description": "An enumeration over machine learning problem types.", + "enum": [ + "classification", + "clustering", + "trend", + "alert", + "forecasting", + "content_generation", + "benchmarking", + "goals", + "detection", + "other" + ], + "title": "ProblemType", + "type": "string" + }, + "QuantitiveAnalysisDescriptor": { + "description": "The model implementation for report quantitative analysis.", + "properties": { + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Content" + } + }, + "title": "QuantitiveAnalysisDescriptor", + "type": "object" + }, + "RiskDescriptor": { + "description": "A description of system-level risks.", + "properties": { + "fp": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Fp" + }, + "fn": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Fn" + }, + "other": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Other" + } + }, + "title": "RiskDescriptor", + "type": "object" + }, + "SummaryDescriptor": { + "description": "The model implementation for the report summary.", + "properties": { + "problem_type": { + "anyOf": [ + { + "$ref": "#/$defs/ProblemType" + }, + { + "type": "null" + } + ], + "default": null + }, + "task": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Task" + } + }, + "title": "SummaryDescriptor", + "type": "object" + } + }, + "description": "The model implementation for the MLTE report artifact.", + "properties": { + "artifact_type": { + "const": "5", + "title": "Artifact Type" + }, + "summary": { + "allOf": [ + { + "$ref": "#/$defs/SummaryDescriptor" + } + ], + "default": { + "problem_type": null, + "task": null + } + }, + "performance": { + "allOf": [ + { + "$ref": "#/$defs/PerformanceDesciptor" + } + ], + "default": { + "findings": null, + "goals": [] + } + }, + "intended_use": { + "allOf": [ + { + "$ref": "#/$defs/IntendedUseDescriptor" + } + ], + "default": { + "production_requirements": { + "integration": null, + "interface": { + "input": { + "description": null + }, + "output": { + "description": null + } + }, + "resources": { + "cpu": null, + "gpu": null, + "memory": null, + "storage": null + } + }, + "usage_context": null + } + }, + "risks": { + "allOf": [ + { + "$ref": "#/$defs/RiskDescriptor" + } + ], + "default": { + "fn": null, + "fp": null, + "other": null + } + }, + "data": { + "default": [], + "items": { + "$ref": "#/$defs/DataDescriptor" + }, + "title": "Data", + "type": "array" + }, + "comments": { + "default": [], + "items": { + "$ref": "#/$defs/CommentDescriptor" + }, + "title": "Comments", + "type": "array" + }, + "quantitative_analysis": { + "allOf": [ + { + "$ref": "#/$defs/QuantitiveAnalysisDescriptor" + } + ], + "default": { + "content": null + } + } + }, + "required": [ + "artifact_type" + ], + "title": "ReportModel", + "type": "object" +} \ No newline at end of file diff --git a/test/artifact/test_artifact.py b/test/artifact/test_artifact.py index 76b4f2b31..e2776c728 100644 --- a/test/artifact/test_artifact.py +++ b/test/artifact/test_artifact.py @@ -4,7 +4,7 @@ Unit tests for MLTE artifact protocol implementation. """ -from mlte.negotiation.negotiation_card import NegotiationCard +from mlte.negotiation.artifact import NegotiationCard from mlte.session.state import set_context, set_store from ..fixture.store import FX_MODEL_ID, FX_NAMESPACE_ID, FX_VERSION_ID diff --git a/test/fixture/artifact.py b/test/fixture/artifact.py index 14c80bf59..80c21d977 100644 --- a/test/fixture/artifact.py +++ b/test/fixture/artifact.py @@ -14,6 +14,7 @@ from mlte.artifact.type import ArtifactType from mlte.evidence.metadata import EvidenceMetadata, Identifier from mlte.negotiation.model import NegotiationCardModel +from mlte.report.model import ReportModel from mlte.spec.model import SpecModel from mlte.validation.model import ValidatedSpecModel from mlte.value.model import IntegerValueModel, ValueModel, ValueType @@ -66,7 +67,9 @@ def all_others(type: ArtifactType) -> List[ArtifactType]: def _make_body( type: ArtifactType, id: str -) -> Union[NegotiationCardModel, ValueModel, SpecModel, ValidatedSpecModel]: +) -> Union[ + NegotiationCardModel, ValueModel, SpecModel, ValidatedSpecModel, ReportModel +]: """ Make the body of the artifact for a given type. :param type: The artifact type @@ -81,6 +84,8 @@ def _make_body( return _make_spec() if type == ArtifactType.VALIDATED_SPEC: return _make_validated_spec() + if type == ArtifactType.REPORT: + return _make_report() assert False, f"Unkown artifact type provided when creating body: {type}." @@ -122,3 +127,11 @@ def _make_validated_spec() -> ValidatedSpecModel: :return: The artifact """ return ValidatedSpecModel(artifact_type=ArtifactType.VALIDATED_SPEC) + + +def _make_report() -> ReportModel: + """ + Make a minimal report. + :return: The artifact + """ + return ReportModel(artifact_type=ArtifactType.REPORT) diff --git a/test/model/__init__.py b/test/model/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/model/test_shared.py b/test/model/test_shared.py new file mode 100644 index 000000000..97d475ddd --- /dev/null +++ b/test/model/test_shared.py @@ -0,0 +1,292 @@ +""" +test/model/test_shared.py + +Unit tests for shared model components. +""" + +from typing import Any, Dict + +from deepdiff import DeepDiff + +from mlte.model.shared import ( + DataClassification, + DataDescriptor, + FieldDescriptor, + GoalDescriptor, + LabelDescriptor, + MetricDescriptor, + ModelDescriptor, + ModelDevelopmentDescriptor, + ModelInputDescriptor, + ModelInterfaceDescriptor, + ModelOutputDescriptor, + ModelProductionDescriptor, + ModelResourcesDescriptor, + RiskDescriptor, +) + + +def test_metric_descriptor() -> None: + """A metric descriptor model can be serialized and deserialized.""" + m = MetricDescriptor(description="description", baseline="baseline") + expected = {"description": "description", "baseline": "baseline"} + assert deepequal(expected, m.to_json()) + + objects = [ + MetricDescriptor(description="description", baseline="baseline"), + MetricDescriptor(), + ] + + for object in objects: + s = object.to_json() + d = MetricDescriptor.from_json(s) + assert d == object + + +def test_goal_descriptor() -> None: + """A goal descriptor model can be serialized and deserialized.""" + m = GoalDescriptor( + description="description", + metrics=[ + MetricDescriptor(description="description", baseline="baseline") + ], + ) + expected = { + "description": "description", + "metrics": [{"description": "description", "baseline": "baseline"}], + } + assert deepequal(expected, m.to_json()) + + objects = [ + GoalDescriptor( + description="description", + metrics=[ + MetricDescriptor(description="description", baseline="baseline") + ], + ), + GoalDescriptor(), + ] + for object in objects: + s = object.to_json() + d = GoalDescriptor.from_json(s) + assert d == object + + +def test_risk_descriptor() -> None: + """A risk descriptor model can be serialized and deserialized successfully.""" + objects = [ + RiskDescriptor(fp="fp", fn="fn", other="other"), + RiskDescriptor(), + ] + + for object in objects: + s = object.to_json() + d = RiskDescriptor.from_json(s) + assert d == object + + +# ----------------------------------------------------------------------------- +# Data Subcomponents +# ----------------------------------------------------------------------------- + + +def test_data_label_descriptor() -> None: + """A data label descriptor model can be serialized and deserialized.""" + objects = [ + LabelDescriptor(description="description", percentage=95.0), + LabelDescriptor(), + ] + for object in objects: + s = object.to_json() + d = LabelDescriptor.from_json(s) + assert d == object + + +def test_data_field_descriptor() -> None: + """A data field descriptor model can be serialized and deserialized.""" + objects = [ + FieldDescriptor( + name="name", + description="description", + type="type", + expected_values="expected_values", + missing_values="missing_values", + special_values="special_values", + ), + FieldDescriptor(), + ] + for object in objects: + s = object.to_json() + d = FieldDescriptor.from_json(s) + assert d == object + + +def test_data_descriptor() -> None: + """A data descriptor model can be serialized and deserialized.""" + + objects = [ + DataDescriptor( + description="description", + classification=DataClassification.UNCLASSIFIED, + access="access", + fields=[ + FieldDescriptor( + name="name", + description="description", + type="type", + expected_values="expected_values", + missing_values="missing_values", + special_values="special_values", + ) + ], + labels=[ + LabelDescriptor(description="description", percentage=95.0) + ], + policies="policies", + rights="rights", + source="source", + identifiable_information="identifiable_information", + ), + DataDescriptor(), + ] + for object in objects: + s = object.to_json() + d = DataDescriptor.from_json(s) + assert d == object + + +# ----------------------------------------------------------------------------- +# Model Subcomponents +# ----------------------------------------------------------------------------- + + +def test_model_resources_descriptor() -> None: + """A model resources descriptor model can be serialized and deserialized.""" + objects = [ + ModelResourcesDescriptor( + cpu="cpu", gpu="gpu", memory="memory", storage="storage" + ), + ModelResourcesDescriptor(), + ] + + for object in objects: + s = object.to_json() + d = ModelResourcesDescriptor.from_json(s) + assert d == object + + +def test_model_input_descriptor() -> None: + """A model input descriptor model can be serialized and deserialized.""" + objects = [ + ModelInputDescriptor(description="description"), + ModelInputDescriptor(), + ] + + for object in objects: + s = object.to_json() + d = ModelInputDescriptor.from_json(s) + assert d == object + + +def test_model_output_descriptor() -> None: + """A model output descriptor model can be serialized and deserialized.""" + objects = [ + ModelOutputDescriptor(description="description"), + ModelOutputDescriptor(), + ] + + for object in objects: + s = object.to_json() + d = ModelOutputDescriptor.from_json(s) + assert d == object + + +def test_model_interface_descriptor() -> None: + """A model interface descriptor model can be serialized and deserialized.""" + objects = [ + ModelInterfaceDescriptor( + input=ModelInputDescriptor(description="description"), + output=ModelOutputDescriptor(description="description"), + ), + ModelInterfaceDescriptor(), + ] + for object in objects: + s = object.to_json() + d = ModelInterfaceDescriptor.from_json(s) + assert d == object + + +def test_model_development_descriptor() -> None: + """A model development descriptor model can be serialized and deserialized.""" + objects = [ + ModelDevelopmentDescriptor( + resources=ModelResourcesDescriptor( + cpu="cpu", gpu="gpu", memory="memory", storage="storage" + ) + ), + ModelDevelopmentDescriptor(), + ] + for object in objects: + s = object.to_json() + d = ModelDevelopmentDescriptor.from_json(s) + assert d == object + + +def test_model_production_descriptor() -> None: + """A model production descriptor model can be serialized and deserialized.""" + objects = [ + ModelProductionDescriptor( + integration="integration", + interface=ModelInterfaceDescriptor( + input=ModelInputDescriptor(description="description"), + output=ModelOutputDescriptor(description="description"), + ), + resources=ModelResourcesDescriptor( + cpu="cpu", gpu="gpu", memory="memory", storage="storage" + ), + ), + ModelProductionDescriptor(), + ] + + for object in objects: + s = object.to_json() + d = ModelProductionDescriptor.from_json(s) + assert d == object + + +def test_model_descriptor() -> None: + """A model descriptor model can be serialized and deserialized.""" + objects = [ + ModelDescriptor( + development=ModelDevelopmentDescriptor( + resources=ModelResourcesDescriptor( + cpu="cpu", gpu="gpu", memory="memory", storage="storage" + ) + ), + production=ModelProductionDescriptor( + integration="integration", + interface=ModelInterfaceDescriptor( + input=ModelInputDescriptor(description="description"), + output=ModelOutputDescriptor(description="description"), + ), + resources=ModelResourcesDescriptor( + cpu="cpu", gpu="gpu", memory="memory", storage="storage" + ), + ), + ), + ModelDescriptor(), + ] + + for object in objects: + s = object.to_json() + d = ModelDescriptor.from_json(s) + assert d == object + + +# ----------------------------------------------------------------------------- +# Model Subcomponents +# ----------------------------------------------------------------------------- + + +def deepequal(a: Dict[str, Any], b: Dict[str, Any]) -> bool: + return len(DeepDiff(a, b)) == 0 diff --git a/test/negotiation/test_artifact.py b/test/negotiation/test_artifact.py index 0fc30578d..fe38e833d 100644 --- a/test/negotiation/test_artifact.py +++ b/test/negotiation/test_artifact.py @@ -12,40 +12,12 @@ import mlte.store.error as errors from mlte.context.context import Context -from mlte.context.model import ModelCreate, NamespaceCreate, VersionCreate -from mlte.negotiation.negotiation_card import NegotiationCard -from mlte.store.base import ManagedSession, Store -from mlte.store.factory import create_store +from mlte.negotiation.artifact import NegotiationCard +from mlte.store.base import Store -# The namespace identifier for default context -NAMESPACE_ID = "ns0" - -# The mode identifier for default context -MODEL_ID = "model0" - -# The version identifier for default context -VERSION_ID = "v0" - - -@pytest.fixture(scope="function") -def store() -> Store: - """Create an in-memory artifact store.""" - store = create_store("memory://") - return store - - -@pytest.fixture(scope="function") -def store_with_context() -> tuple[Store, Context]: - """Create an in-memory artifact store with initial context.""" - store = create_store("memory://") - with ManagedSession(store.session()) as handle: - _ = handle.create_namespace(NamespaceCreate(identifier=NAMESPACE_ID)) - _ = handle.create_model(NAMESPACE_ID, ModelCreate(identifier=MODEL_ID)) - _ = handle.create_version( - NAMESPACE_ID, MODEL_ID, VersionCreate(identifier=VERSION_ID) - ) - - return store, Context(NAMESPACE_ID, MODEL_ID, VERSION_ID) +from ..fixture.store import store # noqa +from ..fixture.store import store_with_context # noqa +from ..fixture.store import FX_MODEL_ID, FX_NAMESPACE_ID, FX_VERSION_ID # noqa def test_round_trip() -> None: @@ -57,9 +29,9 @@ def test_round_trip() -> None: _ = NegotiationCard.from_model(model) -def test_save_load(store_with_context: Tuple[Store, Context]) -> None: +def test_save_load(store_with_context: Tuple[Store, Context]) -> None: # noqa """Negotiation card can be saved to and loaded from artifact store.""" - store, ctx = store_with_context + store, ctx = store_with_context # noqa card = NegotiationCard("my-card") card.save_with(ctx, store) @@ -68,26 +40,28 @@ def test_save_load(store_with_context: Tuple[Store, Context]) -> None: assert loaded == card -def test_save_noparents(store: Store) -> None: +def test_save_noparents(store: Store) -> None: # noqa """Save fails when no parents are present.""" - ctx = Context(NAMESPACE_ID, MODEL_ID, VERSION_ID) + ctx = Context(FX_NAMESPACE_ID, FX_MODEL_ID, FX_VERSION_ID) card = NegotiationCard("my-card") with pytest.raises(errors.ErrorNotFound): card.save_with(ctx, store) -def test_save_parents(store: Store) -> None: +def test_save_parents(store: Store) -> None: # noqa """Save succeeds when parents are present.""" - ctx = Context(NAMESPACE_ID, MODEL_ID, VERSION_ID) + ctx = Context(FX_NAMESPACE_ID, FX_MODEL_ID, FX_VERSION_ID) card = NegotiationCard("my-card") card.save_with(ctx, store, parents=True) -def test_save_overwrite(store_with_context: Tuple[Store, Context]) -> None: +def test_save_overwrite( + store_with_context: Tuple[Store, Context] # noqa +) -> None: """Save succeeds when old artifact is overwritten.""" - store, ctx = store_with_context + store, ctx = store_with_context # noqa # Initial write succeeds card = NegotiationCard("my-card") diff --git a/test/negotiation/test_model.py b/test/negotiation/test_model.py index 724fa0890..8fcdda660 100644 --- a/test/negotiation/test_model.py +++ b/test/negotiation/test_model.py @@ -10,42 +10,59 @@ from deepdiff import DeepDiff -import mlte.negotiation.model as model from mlte.artifact.type import ArtifactType +from mlte.model.shared import ( + DataClassification, + DataDescriptor, + FieldDescriptor, + GoalDescriptor, + LabelDescriptor, + MetricDescriptor, + ModelDescriptor, + ModelDevelopmentDescriptor, + ModelInputDescriptor, + ModelInterfaceDescriptor, + ModelOutputDescriptor, + ModelProductionDescriptor, + ModelResourcesDescriptor, + ProblemType, + RiskDescriptor, +) +from mlte.negotiation.model import NegotiationCardModel, SystemDescriptor # ----------------------------------------------------------------------------- # NegotiationCardModel # ----------------------------------------------------------------------------- -def test_negotiation_card_body() -> None: +def test_negotiation_card() -> None: """A negotiation card model can be serialized and deserialized.""" objects = [ - model.NegotiationCardModel( + NegotiationCardModel( artifact_type=ArtifactType.NEGOTIATION_CARD, - system=model.SystemDescriptor( + system=SystemDescriptor( goals=[ - model.GoalDescriptor( + GoalDescriptor( description="description", metrics=[ - model.MetricDescriptor( + MetricDescriptor( description="description", baseline="baseline" ) ], ) ], - problem_type=model.ProblemType.CLASSIFICATION, + problem_type=ProblemType.CLASSIFICATION, task="task", usage_context="usage_context", - risks=model.RiskDescriptor(fp="fp", fn="fn", other="other"), + risks=RiskDescriptor(fp="fp", fn="fn", other="other"), ), data=[ - model.DataDescriptor( + DataDescriptor( description="description", - classification=model.DataClassification.UNCLASSIFIED, + classification=DataClassification.UNCLASSIFIED, access="access", fields=[ - model.FieldDescriptor( + FieldDescriptor( name="name", description="description", type="type", @@ -55,7 +72,7 @@ def test_negotiation_card_body() -> None: ) ], labels=[ - model.LabelDescriptor( + LabelDescriptor( description="description", percentage=95.0 ) ], @@ -65,23 +82,19 @@ def test_negotiation_card_body() -> None: identifiable_information="identifiable_information", ) ], - model=model.ModelDescriptor( - development=model.ModelDevelopmentDescriptor( - resources=model.ModelResourcesDescriptor( + model=ModelDescriptor( + development=ModelDevelopmentDescriptor( + resources=ModelResourcesDescriptor( cpu="cpu", gpu="gpu", memory="memory", storage="storage" ) ), - production=model.ModelProductionDescriptor( + production=ModelProductionDescriptor( integration="integration", - interface=model.ModelInterfaceDescriptor( - input=model.ModelInputDescriptor( - description="description" - ), - output=model.ModelOutputDescriptor( - description="description" - ), + interface=ModelInterfaceDescriptor( + input=ModelInputDescriptor(description="description"), + output=ModelOutputDescriptor(description="description"), ), - resources=model.ModelResourcesDescriptor( + resources=ModelResourcesDescriptor( cpu="cpu", gpu="gpu", memory="memory", @@ -90,12 +103,12 @@ def test_negotiation_card_body() -> None: ), ), ), - model.NegotiationCardModel(artifact_type=ArtifactType.NEGOTIATION_CARD), + NegotiationCardModel(artifact_type=ArtifactType.NEGOTIATION_CARD), ] for object in objects: s = object.to_json() - d = model.NegotiationCardModel.from_json(s) + d = NegotiationCardModel.from_json(s) assert d == object @@ -104,295 +117,30 @@ def test_negotiation_card_body() -> None: # ----------------------------------------------------------------------------- -def test_metric_descriptor() -> None: - """A metric descriptor model can be serialized and deserialized.""" - m = model.MetricDescriptor(description="description", baseline="baseline") - expected = {"description": "description", "baseline": "baseline"} - assert deepequal(expected, m.to_json()) - - objects = [ - model.MetricDescriptor(description="description", baseline="baseline"), - model.MetricDescriptor(), - ] - - for object in objects: - s = object.to_json() - d = model.MetricDescriptor.from_json(s) - assert d == object - - -def test_goal_descriptor() -> None: - """A goal descriptor model can be serialized and deserialized.""" - m = model.GoalDescriptor( - description="description", - metrics=[ - model.MetricDescriptor( - description="description", baseline="baseline" - ) - ], - ) - expected = { - "description": "description", - "metrics": [{"description": "description", "baseline": "baseline"}], - } - assert deepequal(expected, m.to_json()) - - objects = [ - model.GoalDescriptor( - description="description", - metrics=[ - model.MetricDescriptor( - description="description", baseline="baseline" - ) - ], - ), - model.GoalDescriptor(), - ] - for object in objects: - s = object.to_json() - d = model.GoalDescriptor.from_json(s) - assert d == object - - -def test_risk_descriptor() -> None: - """A risk descriptor model can be serialized and deserialized successfully.""" - objects = [ - model.RiskDescriptor(fp="fp", fn="fn", other="other"), - model.RiskDescriptor(), - ] - - for object in objects: - s = object.to_json() - d = model.RiskDescriptor.from_json(s) - assert d == object - - def test_system_descriptor() -> None: """A system descriptor model can be serialized and deserialized.""" objects = [ - model.SystemDescriptor( + SystemDescriptor( goals=[ - model.GoalDescriptor( + GoalDescriptor( description="description", metrics=[ - model.MetricDescriptor( + MetricDescriptor( description="description", baseline="baseline" ) ], ) ], - problem_type=model.ProblemType.CLASSIFICATION, + problem_type=ProblemType.CLASSIFICATION, task="task", usage_context="usage_context", - risks=model.RiskDescriptor(fp="fp", fn="fn", other="other"), - ), - model.SystemDescriptor(), - ] - for object in objects: - s = object.to_json() - d = model.SystemDescriptor.from_json(s) - assert d == object - - -# ----------------------------------------------------------------------------- -# Data Subcomponents -# ----------------------------------------------------------------------------- - - -def test_data_label_descriptor() -> None: - """A data label descriptor model can be serialized and deserialized.""" - objects = [ - model.LabelDescriptor(description="description", percentage=95.0), - model.LabelDescriptor(), - ] - for object in objects: - s = object.to_json() - d = model.LabelDescriptor.from_json(s) - assert d == object - - -def test_data_field_descriptor() -> None: - """A data field descriptor model can be serialized and deserialized.""" - objects = [ - model.FieldDescriptor( - name="name", - description="description", - type="type", - expected_values="expected_values", - missing_values="missing_values", - special_values="special_values", - ), - model.FieldDescriptor(), - ] - for object in objects: - s = object.to_json() - d = model.FieldDescriptor.from_json(s) - assert d == object - - -def test_data_descriptor() -> None: - """A data descriptor model can be serialized and deserialized.""" - - objects = [ - model.DataDescriptor( - description="description", - classification=model.DataClassification.UNCLASSIFIED, - access="access", - fields=[ - model.FieldDescriptor( - name="name", - description="description", - type="type", - expected_values="expected_values", - missing_values="missing_values", - special_values="special_values", - ) - ], - labels=[ - model.LabelDescriptor( - description="description", percentage=95.0 - ) - ], - policies="policies", - rights="rights", - source="source", - identifiable_information="identifiable_information", - ), - model.DataDescriptor(), - ] - for object in objects: - s = object.to_json() - d = model.DataDescriptor.from_json(s) - assert d == object - - -# ----------------------------------------------------------------------------- -# Model Subcomponents -# ----------------------------------------------------------------------------- - - -def test_model_resources_descriptor() -> None: - """A model resources descriptor model can be serialized and deserialized.""" - objects = [ - model.ModelResourcesDescriptor( - cpu="cpu", gpu="gpu", memory="memory", storage="storage" - ), - model.ModelResourcesDescriptor(), - ] - - for object in objects: - s = object.to_json() - d = model.ModelResourcesDescriptor.from_json(s) - assert d == object - - -def test_model_input_descriptor() -> None: - """A model input descriptor model can be serialized and deserialized.""" - objects = [ - model.ModelInputDescriptor(description="description"), - model.ModelInputDescriptor(), - ] - - for object in objects: - s = object.to_json() - d = model.ModelInputDescriptor.from_json(s) - assert d == object - - -def test_model_output_descriptor() -> None: - """A model output descriptor model can be serialized and deserialized.""" - objects = [ - model.ModelOutputDescriptor(description="description"), - model.ModelOutputDescriptor(), - ] - - for object in objects: - s = object.to_json() - d = model.ModelOutputDescriptor.from_json(s) - assert d == object - - -def test_model_interface_descriptor() -> None: - """A model interface descriptor model can be serialized and deserialized.""" - objects = [ - model.ModelInterfaceDescriptor( - input=model.ModelInputDescriptor(description="description"), - output=model.ModelOutputDescriptor(description="description"), - ), - model.ModelInterfaceDescriptor(), - ] - for object in objects: - s = object.to_json() - d = model.ModelInterfaceDescriptor.from_json(s) - assert d == object - - -def test_model_development_descriptor() -> None: - """A model development descriptor model can be serialized and deserialized.""" - objects = [ - model.ModelDevelopmentDescriptor( - resources=model.ModelResourcesDescriptor( - cpu="cpu", gpu="gpu", memory="memory", storage="storage" - ) - ), - model.ModelDevelopmentDescriptor(), - ] - for object in objects: - s = object.to_json() - d = model.ModelDevelopmentDescriptor.from_json(s) - assert d == object - - -def test_model_production_descriptor() -> None: - """A model production descriptor model can be serialized and deserialized.""" - objects = [ - model.ModelProductionDescriptor( - integration="integration", - interface=model.ModelInterfaceDescriptor( - input=model.ModelInputDescriptor(description="description"), - output=model.ModelOutputDescriptor(description="description"), - ), - resources=model.ModelResourcesDescriptor( - cpu="cpu", gpu="gpu", memory="memory", storage="storage" - ), + risks=RiskDescriptor(fp="fp", fn="fn", other="other"), ), - model.ModelProductionDescriptor(), + SystemDescriptor(), ] - - for object in objects: - s = object.to_json() - d = model.ModelProductionDescriptor.from_json(s) - assert d == object - - -def test_model_descriptor() -> None: - """A model descriptor model can be serialized and deserialized.""" - objects = [ - model.ModelDescriptor( - development=model.ModelDevelopmentDescriptor( - resources=model.ModelResourcesDescriptor( - cpu="cpu", gpu="gpu", memory="memory", storage="storage" - ) - ), - production=model.ModelProductionDescriptor( - integration="integration", - interface=model.ModelInterfaceDescriptor( - input=model.ModelInputDescriptor(description="description"), - output=model.ModelOutputDescriptor( - description="description" - ), - ), - resources=model.ModelResourcesDescriptor( - cpu="cpu", gpu="gpu", memory="memory", storage="storage" - ), - ), - ), - model.ModelDescriptor(), - ] - for object in objects: s = object.to_json() - d = model.ModelDescriptor.from_json(s) + d = SystemDescriptor.from_json(s) assert d == object diff --git a/test/report/__init__.py b/test/report/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/report/test_artifact.py b/test/report/test_artifact.py new file mode 100644 index 000000000..4a996256a --- /dev/null +++ b/test/report/test_artifact.py @@ -0,0 +1,33 @@ +""" +test/report/test_artifact.py + +Unit tests for report artifact type. +""" + +from typing import Tuple + +from mlte.context.context import Context +from mlte.report.artifact import Report +from mlte.store.base import Store + +from ..fixture.store import store_with_context # noqa + + +def test_round_trip() -> None: + """Report can be converted to model and back.""" + + card = Report("my-report") + + model = card.to_model() + _ = Report.from_model(model) + + +def test_save_load(store_with_context: Tuple[Store, Context]) -> None: # noqa + """Report can be saved to and loaded from artifact store.""" + store, ctx = store_with_context + + card = Report("my-report") + card.save_with(ctx, store) + + loaded = Report.load_with("my-report", context=ctx, store=store) + assert loaded == card diff --git a/test/report/test_model.py b/test/report/test_model.py new file mode 100644 index 000000000..9e6e9faae --- /dev/null +++ b/test/report/test_model.py @@ -0,0 +1,188 @@ +""" +test/report/test_model.py + +Unit tests for report model. +""" + +from mlte.artifact.type import ArtifactType +from mlte.model.shared import ( + DataClassification, + DataDescriptor, + FieldDescriptor, + GoalDescriptor, + LabelDescriptor, + MetricDescriptor, + ModelInputDescriptor, + ModelInterfaceDescriptor, + ModelOutputDescriptor, + ModelProductionDescriptor, + ModelResourcesDescriptor, + ProblemType, + RiskDescriptor, +) +from mlte.report.model import ( + CommentDescriptor, + IntendedUseDescriptor, + PerformanceDesciptor, + QuantitiveAnalysisDescriptor, + ReportModel, + SummaryDescriptor, +) + + +def test_report() -> None: + """A report model can be serialized and deserialized.""" + objects = [ + ReportModel( + artifact_type=ArtifactType.REPORT, + summary=SummaryDescriptor( + problem_type=ProblemType.CLASSIFICATION, task="task" + ), + performance=PerformanceDesciptor( + goals=[ + GoalDescriptor( + description="description", + metrics=[ + MetricDescriptor( + description="description", baseline="baseline" + ) + ], + ) + ] + ), + intended_use=IntendedUseDescriptor( + usage_context="context", + production_requirements=ModelProductionDescriptor( + integration="integration", + interface=ModelInterfaceDescriptor( + input=ModelInputDescriptor(description="description"), + output=ModelOutputDescriptor(description="output"), + ), + resources=ModelResourcesDescriptor( + cpu="cpu", gpu="gpu", memory="memory", storage="storage" + ), + ), + ), + risks=RiskDescriptor(fp="fp", fn="fn", other="other"), + data=[ + DataDescriptor( + description="description", + classification=DataClassification.UNCLASSIFIED, + access="access", + fields=[ + FieldDescriptor( + name="name", + description="description", + type="type", + expected_values="expected_values", + missing_values="missing_values", + special_values="special_values", + ) + ], + labels=[ + LabelDescriptor( + description="description", percentage=95.0 + ) + ], + policies="policies", + rights="rights", + source="source", + identifiable_information="identifiable_information", + ) + ], + comments=[CommentDescriptor(content="content")], + quantitative_analysis=QuantitiveAnalysisDescriptor( + content="content" + ), + ), + ReportModel(artifact_type=ArtifactType.REPORT), + ] + + for object in objects: + s = object.to_json() + d = ReportModel.from_json(s) + assert d == object + + +def test_summary() -> None: + """A summary descriptor can be serialized and deserialized.""" + objects = [ + SummaryDescriptor(problem_type=ProblemType.CLASSIFICATION, task="task"), + SummaryDescriptor(), + ] + for object in objects: + s = object.to_json() + d = SummaryDescriptor.from_json(s) + assert d == object + + +def test_performance() -> None: + """A performance descriptor can be serialized and deserialized.""" + objects = [ + PerformanceDesciptor( + goals=[ + GoalDescriptor( + description="description", + metrics=[ + MetricDescriptor( + description="description", baseline="baseline" + ) + ], + ) + ] + ), + PerformanceDesciptor(), + ] + for object in objects: + s = object.to_json() + d = PerformanceDesciptor.from_json(s) + assert d == object + + +def test_intended_use() -> None: + """An intended use descriptor can be serialized and deserialized.""" + objects = [ + IntendedUseDescriptor( + usage_context="context", + production_requirements=ModelProductionDescriptor( + integration="integration", + interface=ModelInterfaceDescriptor( + input=ModelInputDescriptor(description="description"), + output=ModelOutputDescriptor(description="output"), + ), + resources=ModelResourcesDescriptor( + cpu="cpu", gpu="gpu", memory="memory", storage="storage" + ), + ), + ), + IntendedUseDescriptor(), + ] + + for object in objects: + s = object.to_json() + d = IntendedUseDescriptor.from_json(s) + assert d == object + + +def test_comment() -> None: + """A comment can be serialized and deserialized.""" + objects = [ + CommentDescriptor(content="content"), + ] + + for object in objects: + s = object.to_json() + d = CommentDescriptor.from_json(s) + assert d == object + + +def test_quantitative_analysis() -> None: + """A quantitative analysis model can be serialized and deserialized.""" + objects = [ + QuantitiveAnalysisDescriptor(content="content"), + QuantitiveAnalysisDescriptor(), + ] + for object in objects: + s = object.to_json() + d = QuantitiveAnalysisDescriptor.from_json(s) + assert d == object diff --git a/test/schema/test_report_schema.py b/test/schema/test_report_schema.py index 99ae15f6e..a64be1fa2 100644 --- a/test/schema/test_report_schema.py +++ b/test/schema/test_report_schema.py @@ -4,65 +4,26 @@ Unit tests for report schema validation. """ -import time - import pytest from jsonschema import ValidationError -from mlte.report import Dataset, Limitation, Report, UseCase, User -from mlte.validation.result import Ignore -from mlte.validation.validated_spec import ValidatedSpec +from mlte.report.artifact import Report from . import util as util -@pytest.mark.skip("Pending artifact protocol implementation.") def test_empty_instance() -> None: # noqa report = Report() - util.validate_report_schema(report.to_json()) - - -@pytest.mark.skip("Pending artifact protocol implementation.") -def test_valid_instance() -> None: # noqa - report = Report() - report.metadata.project_name = "ProjectName" - report.metadata.authors = ["Foo", "Bar"] - report.metadata.source_url = "https://github.com/mlte-team" - report.metadata.artifact_url = "https://github.com/mlte-team" - report.metadata.timestamp = f"{int(time.time())}" - - report.model_details.name = "ModelName" - report.model_details.overview = "Model overview." - report.model_details.documentation = "Model documentation." - - report.model_specification.domain = "ModelDomain" - report.model_specification.architecture = "ModelArchitecture" - report.model_specification.input = "ModelInput" - report.model_specification.output = "ModelOutput" - report.model_specification.data = [ - Dataset("Dataset0", "https://github.com/mlte-team", "Description"), - Dataset("Dataset1", "https://github.com/mlte-team", "Description."), - ] - report.considerations.users = [ - User("User description 0."), - User("User description 1."), - ] - report.considerations.use_cases = [ - UseCase("Use case description 0."), - UseCase("Use case description 1."), - ] - report.considerations.limitations = [ - Limitation("Limitation description 0."), - Limitation("Limitation description 1."), - ] + doc = report.to_model().to_json() + util.validate_report_schema(doc["body"]) - report.spec = ValidatedSpec(results={"prop": {"test": Ignore("ignore")}}) - util.validate_report_schema(report.to_json()) +@pytest.mark.skip("Pending artifact protocol implementation.") +def test_valid_instance() -> None: + assert True -@pytest.mark.skip("Pending artifact protocol implementation.") def test_invalid_instance(): with pytest.raises(ValidationError): util.validate_report_schema({}) diff --git a/tools/schema.py b/tools/schema.py index 50293edda..60ed18703 100644 --- a/tools/schema.py +++ b/tools/schema.py @@ -84,6 +84,10 @@ def __str__(self) -> str: ), output_path="validated/v0.0.1", ), + Config( + model=ModelImport(path="mlte.report.model", model="ReportModel"), + output_path="report/v0.0.1", + ), ] # ----------------------------------------------------------------------------- From 39678eb4a9f5c7ad0ae6dae7095fc5244f4bbd47 Mon Sep 17 00:00:00 2001 From: turingcompl33t Date: Sun, 10 Sep 2023 13:24:12 -0400 Subject: [PATCH 2/4] implement ability to populate report from negotiation card --- mlte/report/artifact.py | 30 ++++++++++++++++++++++++++++++ test/report/test_artifact.py | 31 ++++++++++++++++++++++++++----- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/mlte/report/artifact.py b/mlte/report/artifact.py index 3446ff194..706a5bf5e 100644 --- a/mlte/report/artifact.py +++ b/mlte/report/artifact.py @@ -7,6 +7,7 @@ from __future__ import annotations import typing +from copy import deepcopy from typing import List from deepdiff import DeepDiff @@ -15,6 +16,7 @@ from mlte.artifact.model import ArtifactModel from mlte.artifact.type import ArtifactType from mlte.model.shared import DataDescriptor, RiskDescriptor +from mlte.negotiation.artifact import NegotiationCard from mlte.report.model import ( CommentDescriptor, IntendedUseDescriptor, @@ -96,6 +98,34 @@ def from_model(cls, model: ArtifactModel) -> Report: # type: ignore[override] quantitative_analysis=body.quantitative_analysis, ) + def populate_from(self, artifact: NegotiationCard) -> Report: + """ + Populate the contents of a report from a negotiation card. + :param artifact: The artifact to populate from + :return: The new report artifact with fields populated + """ + summary = deepcopy(self.summary) + summary.problem_type = artifact.system.problem_type + summary.task = artifact.system.task + + performance = deepcopy(self.performance) + performance.goals = artifact.system.goals + + intended_use = deepcopy(self.intended_use) + intended_use.usage_context = artifact.system.usage_context + intended_use.production_requirements = artifact.model.production + + return Report( + identifier=self.identifier, + summary=summary, + performance=performance, + intended_use=intended_use, + risks=deepcopy(artifact.system.risks), + data=deepcopy(artifact.data), + comments=deepcopy(self.comments), + quantitative_analysis=deepcopy(self.quantitative_analysis), + ) + @classmethod def get_default_id(cls) -> str: """Get the default identifier for the artifact.""" diff --git a/test/report/test_artifact.py b/test/report/test_artifact.py index 4a996256a..ec590c0da 100644 --- a/test/report/test_artifact.py +++ b/test/report/test_artifact.py @@ -7,7 +7,10 @@ from typing import Tuple from mlte.context.context import Context +from mlte.model.shared import ProblemType +from mlte.negotiation.artifact import NegotiationCard from mlte.report.artifact import Report +from mlte.report.model import SummaryDescriptor from mlte.store.base import Store from ..fixture.store import store_with_context # noqa @@ -16,9 +19,9 @@ def test_round_trip() -> None: """Report can be converted to model and back.""" - card = Report("my-report") + report = Report("my-report") - model = card.to_model() + model = report.to_model() _ = Report.from_model(model) @@ -26,8 +29,26 @@ def test_save_load(store_with_context: Tuple[Store, Context]) -> None: # noqa """Report can be saved to and loaded from artifact store.""" store, ctx = store_with_context - card = Report("my-report") - card.save_with(ctx, store) + report = Report("my-report") + report.save_with(ctx, store) loaded = Report.load_with("my-report", context=ctx, store=store) - assert loaded == card + assert loaded == report + + +# TODO(Kyle): Exhaustive testing is required here. +def test_populate_from() -> None: + """The content of a report can be populated from negotiation card.""" + + report = Report( + "my-report", + summary=SummaryDescriptor(problem_type=ProblemType.BENCHMARKING), + ) + + card = NegotiationCard("my-card") + card.system.problem_type = ProblemType.CLASSIFICATION + + new = report.populate_from(card) + + assert new.summary.problem_type == ProblemType.CLASSIFICATION + assert report.summary.problem_type == ProblemType.BENCHMARKING From 1be452bd41fa033abff22093cbcdcd24d728ea08 Mon Sep 17 00:00:00 2001 From: turingcompl33t Date: Wed, 20 Sep 2023 10:55:19 -0400 Subject: [PATCH 3/4] execute schema generation with updated tool --- .../schema/artifact/report/v0.0.1/schema.json | 749 ++++++++++++++++++ 1 file changed, 749 insertions(+) create mode 100644 mlte/frontend/nuxt-app/assets/schema/artifact/report/v0.0.1/schema.json diff --git a/mlte/frontend/nuxt-app/assets/schema/artifact/report/v0.0.1/schema.json b/mlte/frontend/nuxt-app/assets/schema/artifact/report/v0.0.1/schema.json new file mode 100644 index 000000000..a03efd0d2 --- /dev/null +++ b/mlte/frontend/nuxt-app/assets/schema/artifact/report/v0.0.1/schema.json @@ -0,0 +1,749 @@ +{ + "$defs": { + "CommentDescriptor": { + "description": "The model implementation for a generic comment.", + "properties": { + "content": { + "title": "Content", + "type": "string" + } + }, + "required": [ + "content" + ], + "title": "CommentDescriptor", + "type": "object" + }, + "DataClassification": { + "description": "An enumeration of data classification levels.", + "enum": [ + "unclassified", + "cui", + "pii", + "phi", + "other" + ], + "title": "DataClassification", + "type": "string" + }, + "DataDescriptor": { + "description": "Describes a dataset used in model development.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "source": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Source" + }, + "classification": { + "anyOf": [ + { + "$ref": "#/$defs/DataClassification" + }, + { + "type": "null" + } + ], + "default": null + }, + "access": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Access" + }, + "labels": { + "default": [], + "items": { + "$ref": "#/$defs/LabelDescriptor" + }, + "title": "Labels", + "type": "array" + }, + "fields": { + "default": [], + "items": { + "$ref": "#/$defs/FieldDescriptor" + }, + "title": "Fields", + "type": "array" + }, + "rights": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Rights" + }, + "policies": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Policies" + }, + "identifiable_information": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Identifiable Information" + } + }, + "title": "DataDescriptor", + "type": "object" + }, + "FieldDescriptor": { + "description": "Describes a dataset field.", + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Name" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Type" + }, + "expected_values": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Expected Values" + }, + "missing_values": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Missing Values" + }, + "special_values": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Special Values" + } + }, + "title": "FieldDescriptor", + "type": "object" + }, + "GoalDescriptor": { + "description": "A description of a system goal.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "metrics": { + "default": [], + "items": { + "$ref": "#/$defs/MetricDescriptor" + }, + "title": "Metrics", + "type": "array" + } + }, + "title": "GoalDescriptor", + "type": "object" + }, + "IntendedUseDescriptor": { + "description": "The model implementation for intended use.", + "properties": { + "usage_context": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Usage Context" + }, + "production_requirements": { + "allOf": [ + { + "$ref": "#/$defs/ModelProductionDescriptor" + } + ], + "default": { + "integration": null, + "interface": { + "input": { + "description": null + }, + "output": { + "description": null + } + }, + "resources": { + "cpu": null, + "gpu": null, + "memory": null, + "storage": null + } + } + } + }, + "title": "IntendedUseDescriptor", + "type": "object" + }, + "LabelDescriptor": { + "description": "Describes a dataset label.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "percentage": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Percentage" + } + }, + "title": "LabelDescriptor", + "type": "object" + }, + "MetricDescriptor": { + "description": "A description of a metric that supports a system goal.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "baseline": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Baseline" + } + }, + "title": "MetricDescriptor", + "type": "object" + }, + "ModelInputDescriptor": { + "description": "A description of the model input specification.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + } + }, + "title": "ModelInputDescriptor", + "type": "object" + }, + "ModelInterfaceDescriptor": { + "description": "A description of the model interface.", + "properties": { + "input": { + "allOf": [ + { + "$ref": "#/$defs/ModelInputDescriptor" + } + ], + "default": { + "description": null + } + }, + "output": { + "allOf": [ + { + "$ref": "#/$defs/ModelOutputDescriptor" + } + ], + "default": { + "description": null + } + } + }, + "title": "ModelInterfaceDescriptor", + "type": "object" + }, + "ModelOutputDescriptor": { + "description": "A description of the model output specification.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + } + }, + "title": "ModelOutputDescriptor", + "type": "object" + }, + "ModelProductionDescriptor": { + "description": "A descriptor for model production considerations.", + "properties": { + "integration": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Integration" + }, + "interface": { + "allOf": [ + { + "$ref": "#/$defs/ModelInterfaceDescriptor" + } + ], + "default": { + "input": { + "description": null + }, + "output": { + "description": null + } + } + }, + "resources": { + "allOf": [ + { + "$ref": "#/$defs/ModelResourcesDescriptor" + } + ], + "default": { + "cpu": null, + "gpu": null, + "memory": null, + "storage": null + } + } + }, + "title": "ModelProductionDescriptor", + "type": "object" + }, + "ModelResourcesDescriptor": { + "description": "A descriptor for model resource requirements.", + "properties": { + "cpu": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cpu" + }, + "gpu": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Gpu" + }, + "memory": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Memory" + }, + "storage": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Storage" + } + }, + "title": "ModelResourcesDescriptor", + "type": "object" + }, + "PerformanceDesciptor": { + "description": "The model implementation for the performance descriptor.", + "properties": { + "goals": { + "default": [], + "items": { + "$ref": "#/$defs/GoalDescriptor" + }, + "title": "Goals", + "type": "array" + }, + "findings": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Findings" + } + }, + "title": "PerformanceDesciptor", + "type": "object" + }, + "ProblemType": { + "description": "An enumeration over machine learning problem types.", + "enum": [ + "classification", + "clustering", + "trend", + "alert", + "forecasting", + "content_generation", + "benchmarking", + "goals", + "detection", + "other" + ], + "title": "ProblemType", + "type": "string" + }, + "QuantitiveAnalysisDescriptor": { + "description": "The model implementation for report quantitative analysis.", + "properties": { + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Content" + } + }, + "title": "QuantitiveAnalysisDescriptor", + "type": "object" + }, + "RiskDescriptor": { + "description": "A description of system-level risks.", + "properties": { + "fp": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Fp" + }, + "fn": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Fn" + }, + "other": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Other" + } + }, + "title": "RiskDescriptor", + "type": "object" + }, + "SummaryDescriptor": { + "description": "The model implementation for the report summary.", + "properties": { + "problem_type": { + "anyOf": [ + { + "$ref": "#/$defs/ProblemType" + }, + { + "type": "null" + } + ], + "default": null + }, + "task": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Task" + } + }, + "title": "SummaryDescriptor", + "type": "object" + } + }, + "description": "The model implementation for the MLTE report artifact.", + "properties": { + "artifact_type": { + "const": "5", + "title": "Artifact Type" + }, + "summary": { + "allOf": [ + { + "$ref": "#/$defs/SummaryDescriptor" + } + ], + "default": { + "problem_type": null, + "task": null + } + }, + "performance": { + "allOf": [ + { + "$ref": "#/$defs/PerformanceDesciptor" + } + ], + "default": { + "findings": null, + "goals": [] + } + }, + "intended_use": { + "allOf": [ + { + "$ref": "#/$defs/IntendedUseDescriptor" + } + ], + "default": { + "production_requirements": { + "integration": null, + "interface": { + "input": { + "description": null + }, + "output": { + "description": null + } + }, + "resources": { + "cpu": null, + "gpu": null, + "memory": null, + "storage": null + } + }, + "usage_context": null + } + }, + "risks": { + "allOf": [ + { + "$ref": "#/$defs/RiskDescriptor" + } + ], + "default": { + "fn": null, + "fp": null, + "other": null + } + }, + "data": { + "default": [], + "items": { + "$ref": "#/$defs/DataDescriptor" + }, + "title": "Data", + "type": "array" + }, + "comments": { + "default": [], + "items": { + "$ref": "#/$defs/CommentDescriptor" + }, + "title": "Comments", + "type": "array" + }, + "quantitative_analysis": { + "allOf": [ + { + "$ref": "#/$defs/QuantitiveAnalysisDescriptor" + } + ], + "default": { + "content": null + } + } + }, + "required": [ + "artifact_type" + ], + "title": "ReportModel", + "type": "object" +} \ No newline at end of file From 0a8355d8a9093713b7206aa0d8525f2781935abe Mon Sep 17 00:00:00 2001 From: turingcompl33t Date: Wed, 20 Sep 2023 11:27:28 -0400 Subject: [PATCH 4/4] add schema validation test for report --- mlte/model/shared.py | 4 +- test/schema/test_report_schema.py | 90 +++++++++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 5 deletions(-) diff --git a/mlte/model/shared.py b/mlte/model/shared.py index 3a2bfc337..65ab36faa 100644 --- a/mlte/model/shared.py +++ b/mlte/model/shared.py @@ -14,7 +14,7 @@ # ----------------------------------------------------------------------------- -class ProblemType(Enum): +class ProblemType(str, Enum): """An enumeration over machine learning problem types.""" CLASSIFICATION = "classification" @@ -152,7 +152,7 @@ class ModelDescriptor(BaseModel): # ----------------------------------------------------------------------------- -class DataClassification(Enum): +class DataClassification(str, Enum): """An enumeration of data classification levels.""" UNCLASSIFIED = "unclassified" diff --git a/test/schema/test_report_schema.py b/test/schema/test_report_schema.py index a64be1fa2..2c2593fef 100644 --- a/test/schema/test_report_schema.py +++ b/test/schema/test_report_schema.py @@ -7,23 +7,107 @@ import pytest from jsonschema import ValidationError +from mlte.model.shared import ( + DataClassification, + DataDescriptor, + FieldDescriptor, + GoalDescriptor, + LabelDescriptor, + MetricDescriptor, + ModelInputDescriptor, + ModelInterfaceDescriptor, + ModelOutputDescriptor, + ModelProductionDescriptor, + ModelResourcesDescriptor, + ProblemType, + RiskDescriptor, +) from mlte.report.artifact import Report +from mlte.report.model import ( + CommentDescriptor, + IntendedUseDescriptor, + PerformanceDesciptor, + QuantitiveAnalysisDescriptor, + SummaryDescriptor, +) from . import util as util -def test_empty_instance() -> None: # noqa +def test_empty_instance() -> None: + """An empty instance validates successfully.""" report = Report() doc = report.to_model().to_json() util.validate_report_schema(doc["body"]) -@pytest.mark.skip("Pending artifact protocol implementation.") def test_valid_instance() -> None: - assert True + """A complete instance validates successfully.""" + report = Report( + "my-report", + summary=SummaryDescriptor( + problem_type=ProblemType.CLASSIFICATION, task="task" + ), + performance=PerformanceDesciptor( + goals=[ + GoalDescriptor( + description="description", + metrics=[ + MetricDescriptor( + description="description", baseline="baseline" + ) + ], + ) + ] + ), + intended_use=IntendedUseDescriptor( + usage_context="context", + production_requirements=ModelProductionDescriptor( + integration="integration", + interface=ModelInterfaceDescriptor( + input=ModelInputDescriptor(description="description"), + output=ModelOutputDescriptor(description="output"), + ), + resources=ModelResourcesDescriptor( + cpu="cpu", gpu="gpu", memory="memory", storage="storage" + ), + ), + ), + risks=RiskDescriptor(fp="fp", fn="fn", other="other"), + data=[ + DataDescriptor( + description="description", + classification=DataClassification.UNCLASSIFIED, + access="access", + fields=[ + FieldDescriptor( + name="name", + description="description", + type="type", + expected_values="expected_values", + missing_values="missing_values", + special_values="special_values", + ) + ], + labels=[ + LabelDescriptor(description="description", percentage=95.0) + ], + policies="policies", + rights="rights", + source="source", + identifiable_information="identifiable_information", + ) + ], + comments=[CommentDescriptor(content="content")], + quantitative_analysis=QuantitiveAnalysisDescriptor(content="content"), + ) + doc = report.to_model().to_json() + util.validate_report_schema(doc["body"]) def test_invalid_instance(): + """An invalid instances fails validation.""" + with pytest.raises(ValidationError): util.validate_report_schema({})