diff --git a/tests/integration/io/test_files_roundtrip_s3_minio.py b/tests/integration/io/test_files_roundtrip_s3_minio.py new file mode 100644 index 0000000000..5963b2334c --- /dev/null +++ b/tests/integration/io/test_files_roundtrip_s3_minio.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import pytest + +import daft + +from .conftest import minio_create_bucket + + +@pytest.mark.integration() +def test_files_roundtrip_minio_native_downloader(minio_io_config): + bucket_name = "my-bucket" + folder = f"s3://{bucket_name}/my-folder" + with minio_create_bucket(minio_io_config=minio_io_config, bucket_name=bucket_name): + bytes_data = [b"a", b"b", b"c"] + data = {"data": bytes_data} + df = daft.from_pydict(data) + df = df.with_column("file_paths", df["data"].bytes.upload_to_folder(folder, io_config=minio_io_config)) + df.collect() + + df = df.with_column("roundtrip_data", df["file_paths"].url.download(io_config=minio_io_config)) + results = df.to_pydict() + + assert results["data"] == results["roundtrip_data"] == bytes_data + for path, expected in zip(results["file_paths"], bytes_data): + assert path.startswith(folder) diff --git a/tests/integration/io/test_url_download_s3_minio.py b/tests/integration/io/test_url_download_s3_minio.py deleted file mode 100644 index 821e0ae40b..0000000000 --- a/tests/integration/io/test_url_download_s3_minio.py +++ /dev/null @@ -1,13 +0,0 @@ -from __future__ import annotations - -import pytest - -import daft - - -@pytest.mark.integration() -def test_url_download_minio_native_downloader(minio_io_config, minio_image_data_fixture, image_data): - data = {"urls": minio_image_data_fixture} - df = daft.from_pydict(data) - df = df.with_column("data", df["urls"].url.download(io_config=minio_io_config)) - assert df.to_pydict() == {**data, "data": [image_data for _ in range(len(minio_image_data_fixture))]}