Skip to content

Commit

Permalink
Fix unexpected reader_type kwarg error (#443)
Browse files Browse the repository at this point in the history
* Fix unexpected reader_type kwarg error

* Remove default argument when popping reader_type kwarg

---------

Co-authored-by: Kevin Yan <[email protected]>
  • Loading branch information
yankevn and Kevin Yan authored Jan 10, 2025
1 parent 886d1c8 commit 3d9ae37
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
Binary file added deltacat/tests/utils/data/test_file.parquet.gz
Binary file not shown.
23 changes: 23 additions & 0 deletions deltacat/tests/utils/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pyarrow as pa

PARQUET_FILE_PATH = "deltacat/tests/utils/data/test_file.parquet"
PARQUET_GZIP_COMPRESSED_FILE_PATH = "deltacat/tests/utils/data/test_file.parquet.gz"
EMPTY_UTSV_PATH = "deltacat/tests/utils/data/empty.csv"
NON_EMPTY_VALID_UTSV_PATH = "deltacat/tests/utils/data/non_empty_valid.csv"
OVERFLOWING_DECIMAL_PRECISION_UTSV_PATH = (
Expand Down Expand Up @@ -789,3 +790,25 @@ def test_s3_file_to_table_when_parquet_schema_overridden(self):
self.assertEqual(field.name, schema.field(index).name)

self.assertEqual(result.schema.field(1).type, "string")

def test_s3_file_to_table_when_parquet_gzip(self):

pa_kwargs_provider = lambda content_type, kwargs: {
"reader_type": "pyarrow",
**kwargs,
}

result = s3_file_to_table(
PARQUET_GZIP_COMPRESSED_FILE_PATH,
ContentType.PARQUET.value,
ContentEncoding.GZIP.value,
["n_legs", "animal"],
["n_legs"],
pa_read_func_kwargs_provider=pa_kwargs_provider,
)

self.assertEqual(len(result), 6)
self.assertEqual(len(result.column_names), 1)
schema = result.schema
schema_index = schema.get_field_index("n_legs")
self.assertEqual(schema.field(schema_index).type, "int64")
4 changes: 2 additions & 2 deletions deltacat/utils/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,8 +569,8 @@ def s3_file_to_table(
**s3_client_kwargs,
)

if READER_TYPE_KWARG in kwargs:
kwargs.pop(READER_TYPE_KWARG)
if READER_TYPE_KWARG in kwargs:
kwargs.pop(READER_TYPE_KWARG)

filesystem = io
if s3_url.startswith("s3://"):
Expand Down

0 comments on commit 3d9ae37

Please sign in to comment.