Skip to content

Commit

Permalink
fix failing io integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Jay Chia committed Oct 7, 2023
1 parent 42094c9 commit 66341ee
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 22 deletions.
11 changes: 6 additions & 5 deletions tests/integration/io/test_url_download_public_aws_s3.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from __future__ import annotations

import pytest
import s3fs

import daft


@pytest.mark.integration()
def test_url_download_aws_s3_public_bucket_custom_s3fs(small_images_s3_paths):
fs = s3fs.S3FileSystem(anon=True)
data = {"urls": small_images_s3_paths}
df = daft.from_pydict(data)
df = df.with_column("data", df["urls"].url.download(fs=fs))
df = df.with_column(
"data", df["urls"].url.download(io_config=daft.io.IOConfig(s3=daft.io.S3Config(anonymous=True)))
)

data = df.to_pydict()
assert len(data["data"]) == 6
Expand All @@ -21,10 +21,11 @@ def test_url_download_aws_s3_public_bucket_custom_s3fs(small_images_s3_paths):

@pytest.mark.integration()
def test_url_download_aws_s3_public_bucket_custom_s3fs_wrong_region(small_images_s3_paths):
fs = s3fs.S3FileSystem(anon=True)
data = {"urls": small_images_s3_paths}
df = daft.from_pydict(data)
df = df.with_column("data", df["urls"].url.download(fs=fs))
df = df.with_column(
"data", df["urls"].url.download(io_config=daft.io.IOConfig(s3=daft.io.S3Config(anonymous=True)))
)

data = df.to_pydict()
assert len(data["data"]) == 6
Expand Down
18 changes: 1 addition & 17 deletions tests/integration/io/test_url_download_s3_minio.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,13 @@
from __future__ import annotations

import pytest
import s3fs

import daft


@pytest.mark.integration()
def test_url_download_minio_custom_s3fs(minio_io_config, minio_image_data_fixture, image_data):
urls = minio_image_data_fixture
fs = s3fs.S3FileSystem(
key=minio_io_config.s3.key_id,
password=minio_io_config.s3.access_key,
client_kwargs={"endpoint_url": minio_io_config.s3.endpoint_url},
)
data = {"urls": urls}
df = daft.from_pydict(data)
df = df.with_column("data", df["urls"].url.download(fs=fs))

assert df.to_pydict() == {**data, "data": [image_data for _ in range(len(urls))]}


@pytest.mark.integration()
def test_url_download_minio_native_downloader(minio_io_config, minio_image_data_fixture, image_data):
data = {"urls": minio_image_data_fixture}
df = daft.from_pydict(data)
df = df.with_column("data", df["urls"].url.download(io_config=minio_io_config, use_native_downloader=True))
df = df.with_column("data", df["urls"].url.download(io_config=minio_io_config))
assert df.to_pydict() == {**data, "data": [image_data for _ in range(len(minio_image_data_fixture))]}

0 comments on commit 66341ee

Please sign in to comment.