From 84016bdef880653454d919e8c8e7438dbecd2796 Mon Sep 17 00:00:00 2001 From: VladYoSlav Date: Sun, 27 Oct 2024 22:05:15 +0300 Subject: [PATCH] feat(primitives): cfd --- internal/domain/task/__init__.py | 1 + internal/domain/task/entities/__init__.py | 3 ++ internal/domain/task/entities/cfd/__init__.py | 1 + internal/domain/task/entities/cfd/cfd_task.py | 54 +++++++++++++++++++ .../domain/task/value_objects/__init__.py | 3 ++ .../domain/task/value_objects/cfd/__init__.py | 23 ++++++++ .../task/value_objects/cfd/algo_config.py | 38 +++++++++++++ .../value_objects/cfd/algo_descriptions.py | 8 +++ .../task/value_objects/cfd/algo_name.py | 5 ++ .../task/value_objects/cfd/algo_result.py | 18 +++++++ .../task/value_objects/primitive_name.py | 1 + 11 files changed, 155 insertions(+) create mode 100644 internal/domain/task/entities/cfd/__init__.py create mode 100644 internal/domain/task/entities/cfd/cfd_task.py create mode 100644 internal/domain/task/value_objects/cfd/__init__.py create mode 100644 internal/domain/task/value_objects/cfd/algo_config.py create mode 100644 internal/domain/task/value_objects/cfd/algo_descriptions.py create mode 100644 internal/domain/task/value_objects/cfd/algo_name.py create mode 100644 internal/domain/task/value_objects/cfd/algo_result.py diff --git a/internal/domain/task/__init__.py b/internal/domain/task/__init__.py index e1b1be4..d9268af 100644 --- a/internal/domain/task/__init__.py +++ b/internal/domain/task/__init__.py @@ -4,3 +4,4 @@ from internal.domain.task.entities import IndTask # noqa: F401 from internal.domain.task.entities import AindTask # noqa: F401 from internal.domain.task.entities import ArTask # noqa: F401 +from internal.domain.task.entities import CfdTask # noqa: F401 diff --git a/internal/domain/task/entities/__init__.py b/internal/domain/task/entities/__init__.py index 28856b2..e960b5f 100644 --- a/internal/domain/task/entities/__init__.py +++ b/internal/domain/task/entities/__init__.py @@ -6,6 +6,7 @@ from internal.domain.task.entities.ind import IndTask from internal.domain.task.entities.aind import AindTask from internal.domain.task.entities.ar import ArTask +from internal.domain.task.entities.cfd import CfdTask from internal.domain.task.value_objects import PrimitiveName @@ -35,4 +36,6 @@ def match_task_by_primitive_name(primitive_name: PrimitiveName): return AindTask() case PrimitiveName.ar: return ArTask() + case PrimitiveName.cfd: + return CfdTask() assert_never(primitive_name) diff --git a/internal/domain/task/entities/cfd/__init__.py b/internal/domain/task/entities/cfd/__init__.py new file mode 100644 index 0000000..adb483e --- /dev/null +++ b/internal/domain/task/entities/cfd/__init__.py @@ -0,0 +1 @@ +from internal.domain.task.entities.cfd.cfd_task import CfdTask # noqa: F401 diff --git a/internal/domain/task/entities/cfd/cfd_task.py b/internal/domain/task/entities/cfd/cfd_task.py new file mode 100644 index 0000000..f26faee --- /dev/null +++ b/internal/domain/task/entities/cfd/cfd_task.py @@ -0,0 +1,54 @@ +from desbordante.cfd import CfdAlgorithm +from desbordante.cfd.algorithms import FDFirst +from internal.domain.task.entities.task import Task +from internal.domain.task.value_objects import PrimitiveName, IncorrectAlgorithmName +from internal.domain.task.value_objects.cfd import CfdTaskConfig, CfdTaskResult +from internal.domain.task.value_objects.cfd import ( + CfdAlgoName, + CfdModel, + CfdAlgoResult, +) + + +class CfdTask(Task[CfdAlgorithm, CfdTaskConfig, CfdTaskResult]): + """ + Task class for Conditional Functional Dependencies (CFD) mining. + + This class handles the execution of different CFD algorithms and processes + the results into the appropriate format. It implements the abstract methods + defined in the Task base class. + + Methods: + - _match_algo_by_name(algo_name: CfdAlgoName) -> CfdAlgorithm: + Match CFD algorithm by its name. + - _collect_result(algo: CfdAlgorithm) -> CfdTaskResult: + Process the output of the CFD algorithm and return the result. + """ + + def _collect_result(self, algo: CfdAlgorithm) -> CfdTaskResult: + """ + Collect and process the CFD result. + + Args: + algo (CfdAlgorithm): CFD algorithm to process. + Returns: + CfdTaskResult: The processed result containing CFDs. + """ + cfds = algo.get_cfds() + algo_result = CfdAlgoResult(cfds=[CfdModel.from_cfd(cfd) for cfd in cfds]) + return CfdTaskResult(primitive_name=PrimitiveName.cfd, result=algo_result) + + def _match_algo_by_name(self, algo_name: str) -> CfdAlgorithm: + """ + Match the CFD algorithm by name. + + Args: + algo_name (CfdAlgoName): The name of the CFD algorithm. + Returns: + CfdAlgorithm: The corresponding algorithm instance. + """ + match algo_name: + case CfdAlgoName.FDFirst: + return FDFirst() + case _: + raise IncorrectAlgorithmName(algo_name, "CFD") diff --git a/internal/domain/task/value_objects/__init__.py b/internal/domain/task/value_objects/__init__.py index 640dfe5..5d03793 100644 --- a/internal/domain/task/value_objects/__init__.py +++ b/internal/domain/task/value_objects/__init__.py @@ -7,6 +7,7 @@ from internal.domain.task.value_objects.ind import IndTaskConfig, IndTaskResult from internal.domain.task.value_objects.aind import AindTaskConfig, AindTaskResult from internal.domain.task.value_objects.ar import ArTaskConfig, ArTaskResult +from internal.domain.task.value_objects.cfd import CfdTaskConfig, CfdTaskResult from internal.domain.task.value_objects.config import TaskConfig # noqa: F401 from internal.domain.task.value_objects.result import TaskResult # noqa: F401 @@ -32,6 +33,7 @@ IndTaskConfig, AindTaskConfig, ArTaskConfig, + CfdTaskConfig, ], Field(discriminator="primitive_name"), ] @@ -44,6 +46,7 @@ IndTaskResult, AindTaskResult, ArTaskResult, + CfdTaskResult, ], Field(discriminator="primitive_name"), ] diff --git a/internal/domain/task/value_objects/cfd/__init__.py b/internal/domain/task/value_objects/cfd/__init__.py new file mode 100644 index 0000000..3b2177b --- /dev/null +++ b/internal/domain/task/value_objects/cfd/__init__.py @@ -0,0 +1,23 @@ +from typing import Literal + +from pydantic import BaseModel + +from internal.domain.task.value_objects.primitive_name import PrimitiveName +from internal.domain.task.value_objects.cfd.algo_config import OneOfCfdAlgoConfig +from internal.domain.task.value_objects.cfd.algo_result import ( # noqa: F401 + CfdAlgoResult, + CfdModel, +) +from internal.domain.task.value_objects.cfd.algo_name import CfdAlgoName # noqa: F401 + + +class BaseCfdTaskModel(BaseModel): + primitive_name: Literal[PrimitiveName.cfd] + + +class CfdTaskConfig(BaseCfdTaskModel): + config: OneOfCfdAlgoConfig + + +class CfdTaskResult(BaseCfdTaskModel): + result: CfdAlgoResult diff --git a/internal/domain/task/value_objects/cfd/algo_config.py b/internal/domain/task/value_objects/cfd/algo_config.py new file mode 100644 index 0000000..441cf47 --- /dev/null +++ b/internal/domain/task/value_objects/cfd/algo_config.py @@ -0,0 +1,38 @@ +from typing import Literal, Annotated +from pydantic import Field +from internal.domain.common import OptionalModel +from internal.domain.task.value_objects.cfd.algo_name import CfdAlgoName +from internal.domain.task.value_objects.cfd.algo_descriptions import descriptions + + +class BaseCfdConfig(OptionalModel): + __non_optional_fields__ = { + "algo_name", + } + + +class FDFirstConfig(BaseCfdConfig): + algo_name: Literal[CfdAlgoName.FDFirst] + + columns_number: Annotated[ + int, Field(ge=1, description=descriptions["columns_number"]) + ] + cfd_minsup: Annotated[int, Field(ge=1, description=descriptions["cfd_minsup"])] + cfd_minconf: Annotated[ + float, Field(ge=0, le=1, description=descriptions["cfd_minconf"]) + ] + tuples_number: Annotated[ + int, Field(ge=1, description=descriptions["tuples_number"]) + ] + cfd_max_lhs: Annotated[int, Field(ge=1, description=descriptions["cfd_max_lhs"])] + cfd_substrategy: Annotated[ + str, + Literal["dfs", "bfs"], + Field(description=descriptions["cfd_substrategy"]), + ] + + +OneOfCfdAlgoConfig = Annotated[ + FDFirstConfig, + Field(discriminator="algo_name"), +] diff --git a/internal/domain/task/value_objects/cfd/algo_descriptions.py b/internal/domain/task/value_objects/cfd/algo_descriptions.py new file mode 100644 index 0000000..29877af --- /dev/null +++ b/internal/domain/task/value_objects/cfd/algo_descriptions.py @@ -0,0 +1,8 @@ +descriptions = { + "columns_number": "Number of columns in the part of the dataset if you want to use the algorithm on a subset of columns", + "cfd_minsup": "Minimum support value (integer between 1 and the number of tuples in dataset)", + "cfd_minconf": "Minimum confidence value (between 0 and 1)", + "tuples_number": "Number of tuples in the dataset or its part", + "cfd_max_lhs": "Maximum size of the left-hand side of the CFD", + "cfd_substrategy": "Lattice traversal strategy for CFD mining", +} diff --git a/internal/domain/task/value_objects/cfd/algo_name.py b/internal/domain/task/value_objects/cfd/algo_name.py new file mode 100644 index 0000000..f44bd1a --- /dev/null +++ b/internal/domain/task/value_objects/cfd/algo_name.py @@ -0,0 +1,5 @@ +from enum import StrEnum, auto + + +class CfdAlgoName(StrEnum): + FDFirst = auto() diff --git a/internal/domain/task/value_objects/cfd/algo_result.py b/internal/domain/task/value_objects/cfd/algo_result.py new file mode 100644 index 0000000..1026cc0 --- /dev/null +++ b/internal/domain/task/value_objects/cfd/algo_result.py @@ -0,0 +1,18 @@ +from pydantic import BaseModel +from desbordante.cfd import CFD + + +class CfdModel(BaseModel): + lhs_items: list[str | None] + rhs_item: str | None + + @classmethod + def from_cfd(cls, cfd: CFD): + return cls( + lhs_items=[item.value for item in cfd.lhs_items], + rhs_item=cfd.rhs_item.value, + ) + + +class CfdAlgoResult(BaseModel): + cfds: list[CfdModel] diff --git a/internal/domain/task/value_objects/primitive_name.py b/internal/domain/task/value_objects/primitive_name.py index 10163f1..8e2709a 100644 --- a/internal/domain/task/value_objects/primitive_name.py +++ b/internal/domain/task/value_objects/primitive_name.py @@ -8,6 +8,7 @@ class PrimitiveName(StrEnum): ac = auto() ind = auto() aind = auto() + cfd = auto() # fd_verification = auto() # mfd_verification = auto() # statistics = auto()