Skip to content

Commit

Permalink
[BUG] add test cases for bulk minio reading (#1402)
Browse files Browse the repository at this point in the history
  • Loading branch information
samster25 authored Sep 22, 2023
1 parent 8c20f39 commit c4e083b
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 2 deletions.
4 changes: 2 additions & 2 deletions tests/integration/io/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ def retry_server_s3_config(request) -> daft.io.IOConfig:
def minio_create_bucket(
minio_io_config: daft.io.IOConfig, bucket_name: str = "my-minio-bucket"
) -> YieldFixture[list[str]]:
"""Mounts data in `folder` into files in minio
"""Creates a bucket in MinIO
Yields a list of S3 URLs
Yields a s3fs FileSystem
"""
fs = s3fs.S3FileSystem(
key=minio_io_config.s3.key_id,
Expand Down
28 changes: 28 additions & 0 deletions tests/integration/io/parquet/test_reads_s3_minio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

import pyarrow as pa
import pytest
from pyarrow import parquet as pq

import daft

from ..conftest import minio_create_bucket


@pytest.mark.integration()
def test_minio_parquet_bulk_readback(minio_io_config):
bucket_name = "data-engineering-prod"
with minio_create_bucket(minio_io_config, bucket_name=bucket_name) as fs:
target_paths = [
f"s3://data-engineering-prod/Y/part-00000-51723f93-0ba2-42f1-a58f-154f0ed40f28.c000.snappy.parquet",
f"s3://data-engineering-prod/Z/part-00000-6d5c7cc6-3b4a-443e-a46a-ca9e080bda1b.c000.snappy.parquet",
]
data = {"x": [1, 2, 3, 4]}
pa_table = pa.Table.from_pydict(data)
for path in target_paths:
pq.write_table(pa_table, path, filesystem=fs)

readback = daft.table.read_parquet_into_pyarrow_bulk(target_paths, io_config=minio_io_config)
assert len(readback) == len(target_paths)
for tab in readback:
assert tab.to_pydict() == data

0 comments on commit c4e083b

Please sign in to comment.