-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add celery task for data profiling and update API
- Loading branch information
1 parent
0d276d7
commit d238f24
Showing
15 changed files
with
174 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
POSTGRES_DIALECT_DRIVER=postgresql+psycopg | ||
|
||
POSTGRES_USER=admin | ||
POSTGRES_PASSWORD=admin | ||
POSTGRES_HOST=localhost | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,9 @@ | ||
from typing import Literal | ||
|
||
from fastapi import APIRouter | ||
|
||
router = APIRouter() | ||
|
||
|
||
@router.get("/ping") | ||
def ping() -> Literal["Pong"]: | ||
return "Pong" | ||
def ping() -> Literal["Pong!"]: | ||
return "Pong!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,29 @@ | ||
from contextlib import contextmanager | ||
from typing import Generator | ||
from sqlalchemy.orm import Session, sessionmaker | ||
from sqlalchemy import create_engine | ||
|
||
from app.settings import settings | ||
from sqlalchemy.pool import NullPool | ||
|
||
default_engine = create_engine(url=settings.postgres_dsn.unicode_string()) | ||
engine_without_pool = create_engine( | ||
url=settings.postgres_dsn.unicode_string(), | ||
poolclass=NullPool, | ||
) | ||
|
||
SessionLocal = sessionmaker(bind=default_engine) | ||
SessionLocalWithoutPool = sessionmaker(bind=engine_without_pool) | ||
|
||
engine = create_engine(url=settings.postgres_dsn.unicode_string()) | ||
SessionLocal = sessionmaker(bind=engine, autoflush=False) | ||
|
||
@contextmanager | ||
def get_session(with_pool=True) -> Generator[Session, None, None]: | ||
""" | ||
Returns a generator that yields a session object for database operations. | ||
def get_session() -> Generator[Session, None, None]: | ||
with SessionLocal() as session: | ||
Parameters: | ||
with_pool (bool): A flag to determine if the session uses a connection pool. | ||
Set to False when used in a Celery task. Defaults to True. | ||
""" | ||
maker = SessionLocal if with_pool else SessionLocalWithoutPool | ||
with maker() as session: | ||
yield session |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,53 @@ | ||
from pydantic import BaseModel, Field | ||
from pydantic import Field | ||
from typing import Annotated | ||
|
||
from app.domain.common.optional_model import OptionalModel | ||
|
||
class AidConfig(BaseModel): | ||
|
||
class AidConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
|
||
|
||
class DFDConfig(BaseModel): | ||
class DFDConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
threads: Annotated[int, Field(ge=1, le=8)] | ||
|
||
|
||
class DepminerConfig(BaseModel): | ||
class DepminerConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
|
||
|
||
class FDepConfig(BaseModel): | ||
class FDepConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
|
||
|
||
class FUNConfig(BaseModel): | ||
class FUNConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
|
||
|
||
class FastFDsConfig(BaseModel): | ||
class FastFDsConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
max_lhs: Annotated[int, Field(ge=1, le=10)] | ||
threads: Annotated[int, Field(ge=1, le=8)] | ||
|
||
|
||
class FdMineConfig(BaseModel): | ||
class FdMineConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
|
||
|
||
class HyFDConfig(BaseModel): | ||
class HyFDConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
|
||
|
||
class PyroConfig(BaseModel): | ||
class PyroConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
error: Annotated[float, Field(ge=0, le=1)] | ||
max_lhs: Annotated[int, Field(ge=1, le=10)] | ||
threads: Annotated[int, Field(ge=1, le=8)] | ||
seed: int | ||
|
||
|
||
class TaneConfig(BaseModel): | ||
class TaneConfig(OptionalModel): | ||
is_null_equal_null: bool | ||
error: Annotated[float, Field(ge=0, le=1)] | ||
max_lhs: Annotated[int, Field(ge=1, le=10)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .data_profiling_task import data_profiling_task as data_profiling_task |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import logging | ||
|
||
from app.db.session import get_session | ||
from app.worker import worker | ||
from app.domain.task.abstract_task import AnyConf, AnyRes | ||
from app.domain.task.primitive_factory import PrimitiveName, PrimitiveFactory | ||
from app.domain.task.task_factory import AnyAlgoName | ||
from app.domain.worker.task.resource_intensive_task import ResourceIntensiveTask | ||
from pydantic import UUID4 | ||
import pandas as pd | ||
from celery.signals import task_failure, task_prerun, task_postrun | ||
|
||
|
||
@worker.task(base=ResourceIntensiveTask, ignore_result=True, max_retries=0) | ||
def data_profiling_task( | ||
primitive_name: PrimitiveName, | ||
algo_name: AnyAlgoName, | ||
file_id: UUID4, | ||
config: AnyConf, | ||
) -> AnyRes: | ||
task_factory = PrimitiveFactory.get_by_name(primitive_name) | ||
task_cls = task_factory.get_by_name(algo_name) | ||
|
||
df = pd.read_csv( | ||
"tests/datasets/university_fd.csv", sep=",", header=0 | ||
) # TODO: Replace with actual file (by file_id) in future | ||
|
||
task = task_cls(df) | ||
result = task.execute(config) | ||
return result | ||
|
||
|
||
@task_prerun.connect(sender=data_profiling_task) | ||
def task_prerun_notifier( | ||
sender, | ||
task_id, | ||
task, | ||
args, | ||
kwargs, | ||
**_, | ||
): | ||
# TODO: Create Task in database and set status to "running" or similar | ||
with get_session(with_pool=False) as session: | ||
session | ||
|
||
logging.critical( | ||
f"From task_prerun_notifier ==> Running just before add() executes, {sender}" | ||
) | ||
|
||
|
||
@task_postrun.connect(sender=data_profiling_task) | ||
def task_postrun_notifier( | ||
sender, | ||
task_id, | ||
task, | ||
args, | ||
kwargs, | ||
retval, | ||
**_, | ||
): | ||
with get_session(with_pool=False) as session: | ||
session | ||
|
||
# TODO: Update Task in database and set status to "completed" or similar | ||
logging.critical(f"From task_postrun_notifier ==> Ok, done!, {sender}") | ||
|
||
|
||
@task_failure.connect(sender=data_profiling_task) | ||
def task_failure_notifier( | ||
sender, | ||
task_id, | ||
exception, | ||
args, | ||
kwargs, | ||
traceback, | ||
einfo, | ||
**_, | ||
): | ||
with get_session(with_pool=False) as session: | ||
session | ||
# TODO: Update Task in database and set status to "failed" or similar | ||
|
||
logging.critical( | ||
f"From task_failure_notifier ==> Task failed successfully! 😅, {sender}" | ||
) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from celery import Task | ||
import resource | ||
from app.settings import settings | ||
|
||
|
||
class ResourceIntensiveTask(Task): | ||
# There are default Celery time limits, see: https://docs.celeryq.dev/en/stable/userguide/workers.html#time-limits | ||
time_limit = settings.worker_hard_time_limit_in_seconds | ||
soft_time_limit = settings.worker_soft_time_limit_in_seconds | ||
|
||
# There are custom memory limits using `resource` module | ||
hard_memory_limit = settings.worker_hard_memory_limit | ||
soft_memory_limit = settings.worker_soft_memory_limit | ||
|
||
def before_start(self, task_id, args, kwargs) -> None: | ||
resource.setrlimit( | ||
resource.RLIMIT_AS, (self.soft_memory_limit, self.hard_memory_limit) | ||
) | ||
super().before_start(task_id, args, kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
from .settings import Settings | ||
from .settings import get_settings | ||
|
||
settings = Settings() | ||
settings = get_settings() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,6 @@ | ||
broker_connection_retry_on_startup = True | ||
task_serializer = "pickle" | ||
result_serializer = "pickle" | ||
event_serializer = "json" | ||
accept_content = ["application/json", "application/x-python-serialize"] | ||
result_accept_content = ["application/json", "application/x-python-serialize"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters