diff --git a/daft/table/table.py b/daft/table/table.py index 054ba099f9..eb39859814 100644 --- a/daft/table/table.py +++ b/daft/table/table.py @@ -475,8 +475,6 @@ def read_parquet_into_pyarrow( multithreaded_io=multithreaded_io, coerce_int96_timestamp_unit=coerce_int96_timestamp_unit._timeunit, ) - if len(metadata) == 0: - metadata = None schema = pa.schema(fields, metadata=metadata) columns = [pa.chunked_array(c) for c in columns] # type: ignore return pa.table(columns, schema=schema) diff --git a/tests/integration/io/parquet/test_reads_public_data.py b/tests/integration/io/parquet/test_reads_public_data.py index 116e675b57..c7aa75ded7 100644 --- a/tests/integration/io/parquet/test_reads_public_data.py +++ b/tests/integration/io/parquet/test_reads_public_data.py @@ -232,7 +232,7 @@ def test_parquet_read_table_into_pyarrow(parquet_file, public_storage_io_config, ) pa_read = read_parquet_with_pyarrow(url) assert daft_native_read.schema == pa_read.schema - assert daft_native_read.schema.metadata == pa_read.schema.metadata + assert pa_read.schema.metadata is None or daft_native_read.schema.metadata == pa_read.schema.metadata pd.testing.assert_frame_equal(daft_native_read.to_pandas(), pa_read.to_pandas())