Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: set correlation id column as optional #88

Merged
merged 2 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions api/alembic/versions/3ec04e609ae9_set_correlation_id_optional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""set_correlation_id_optional

Revision ID: 3ec04e609ae9
Revises: 086f26392cc4
Create Date: 2024-07-08 10:28:35.068312

"""
from typing import Sequence, Union, Text

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '3ec04e609ae9'
down_revision: Union[str, None] = '086f26392cc4'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('current_dataset', 'CORRELATION_ID_COLUMN',
existing_type=sa.VARCHAR(),
nullable=True)
op.create_unique_constraint(None, 'current_dataset', ['UUID'])
op.create_unique_constraint(None, 'current_dataset_metrics', ['UUID'])
op.create_unique_constraint(None, 'reference_dataset', ['UUID'])
op.create_unique_constraint(None, 'reference_dataset_metrics', ['UUID'])
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(None, 'reference_dataset_metrics', type_='unique')
op.drop_constraint(None, 'reference_dataset', type_='unique')
op.drop_constraint(None, 'current_dataset_metrics', type_='unique')
op.drop_constraint(None, 'current_dataset', type_='unique')
op.alter_column('current_dataset', 'CORRELATION_ID_COLUMN',
existing_type=sa.VARCHAR(),
nullable=False)
# ### end Alembic commands ###
2 changes: 1 addition & 1 deletion api/app/db/tables/current_dataset_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ class CurrentDataset(Reflected, BaseTable, BaseDAO):
)
path = Column('PATH', VARCHAR, nullable=False)
date = Column('DATE', TIMESTAMP(timezone=True), nullable=False)
correlation_id_column = Column('CORRELATION_ID_COLUMN', VARCHAR, nullable=False)
correlation_id_column = Column('CORRELATION_ID_COLUMN', VARCHAR, nullable=True)
status = Column('STATUS', VARCHAR, nullable=False, default=JobStatus.IMPORTING)
2 changes: 1 addition & 1 deletion api/app/models/dataset_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class CurrentDatasetDTO(BaseModel):
model_uuid: UUID
path: str
date: str
correlation_id_column: str
correlation_id_column: Optional[str]
status: str

model_config = ConfigDict(
Expand Down
2 changes: 1 addition & 1 deletion api/app/routes/upload_dataset_route.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def upload_current_file(
model_uuid: UUID,
csv_file: UploadFile = File(...),
sep: str = Form(','),
correlation_id_column: str = Form(''),
correlation_id_column: Optional[str] = Form(None),
) -> CurrentDatasetDTO:
return file_service.upload_current_file(
model_uuid, csv_file, correlation_id_column, sep
Expand Down
2 changes: 1 addition & 1 deletion api/app/services/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def upload_current_file(
self,
model_uuid: UUID,
csv_file: UploadFile,
correlation_id_column: str,
correlation_id_column: Optional[str] = None,
sep: str = ',',
columns=None,
) -> CurrentDatasetDTO:
Expand Down
42 changes: 42 additions & 0 deletions api/tests/routes/upload_dataset_route_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,48 @@ def test_bind_reference(self):
assert res.status_code == 200
assert jsonable_encoder(upload_file_result) == res.json()

def test_upload_current(self):
file = csv.get_correct_sample_csv_file()
model_uuid = uuid.uuid4()
upload_file_result = CurrentDatasetDTO(
uuid=uuid.uuid4(),
model_uuid=model_uuid,
path='test',
date=str(datetime.datetime.now(tz=datetime.UTC)),
status=JobStatus.IMPORTING,
correlation_id_column=None
)
self.file_service.upload_current_file = MagicMock(
return_value=upload_file_result
)
res = self.client.post(
f'{self.prefix}/{model_uuid}/current/upload',
files={'csv_file': (file.filename, file.file)},
)
assert res.status_code == 200
assert jsonable_encoder(upload_file_result) == res.json()

def test_bind_current(self):
file_ref = FileReference(file_url='/file')
model_uuid = uuid.uuid4()
upload_file_result = CurrentDatasetDTO(
uuid=uuid.uuid4(),
model_uuid=model_uuid,
path='test',
date=str(datetime.datetime.now(tz=datetime.UTC)),
status=JobStatus.IMPORTING,
correlation_id_column=None
)
self.file_service.bind_current_file = MagicMock(
return_value=upload_file_result
)
res = self.client.post(
f'{self.prefix}/{model_uuid}/current/bind',
json=jsonable_encoder(file_ref),
)
assert res.status_code == 200
assert jsonable_encoder(upload_file_result) == res.json()

def test_get_all_reference_datasets_by_model_uuid_paginated(self):
test_model_uuid = uuid.uuid4()
reference_upload_1 = db_mock.get_sample_reference_dataset(
Expand Down
5 changes: 2 additions & 3 deletions api/tests/services/file_service_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,12 @@ def test_upload_current_file_ok(self):
)
object_name = f'{str(model.uuid)}/current/{file.filename}'
path = f's3://bucket/{object_name}'
correlation_id_column = 'correlation_id'
inserted_file = CurrentDataset(
uuid=uuid4(),
model_uuid=model_uuid,
path=path,
date=datetime.datetime.now(tz=datetime.UTC),
correlation_id_column=correlation_id_column,
correlation_id_column=None,
status=JobStatus.IMPORTING,
)
reference_file = get_sample_reference_dataset(model_uuid=model_uuid)
Expand All @@ -221,7 +220,7 @@ def test_upload_current_file_ok(self):
self.spark_k8s_client.submit_app = MagicMock()

result = self.files_service.upload_current_file(
model.uuid, file, correlation_id_column
model.uuid, file,
)

self.model_svc.get_model_by_uuid.assert_called_once()
Expand Down
7 changes: 4 additions & 3 deletions sdk/radicalbit_platform_sdk/apis/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def load_current_dataset(
self,
file_name: str,
bucket: str,
correlation_id_column: str,
correlation_id_column: Optional[str] = None,
object_name: Optional[str] = None,
aws_credentials: Optional[AwsCredentials] = None,
separator: str = ',',
Expand All @@ -307,7 +307,8 @@ def load_current_dataset(
).columns.tolist()

required_headers = self.__required_headers()
required_headers.append(correlation_id_column)
if correlation_id_column:
required_headers.append(correlation_id_column)
required_headers.append(self.__timestamp.name)

if set(required_headers).issubset(file_headers):
Expand Down Expand Up @@ -465,7 +466,7 @@ def __bind_current_dataset(
self,
dataset_url: str,
separator: str,
correlation_id_column: str,
correlation_id_column: Optional[str] = None,
) -> ModelCurrentDataset:
def __callback(response: requests.Response) -> ModelCurrentDataset:
try:
Expand Down
2 changes: 1 addition & 1 deletion sdk/radicalbit_platform_sdk/apis/model_current_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def uuid(self) -> UUID:
def path(self) -> str:
return self.__path

def correlation_id_column(self) -> str:
def correlation_id_column(self) -> Optional[str]:
return self.__correlation_id_column

def date(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion sdk/radicalbit_platform_sdk/models/file_upload_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class ReferenceFileUpload(FileUploadResult):


class CurrentFileUpload(FileUploadResult):
correlation_id_column: str
correlation_id_column: Optional[str] = None

model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)

Expand Down