From e8d6ba1afb6febd3a50a97859216b614d2c6ad22 Mon Sep 17 00:00:00 2001 From: Jay Chia Date: Fri, 23 Feb 2024 23:07:27 -0800 Subject: [PATCH] Remove usage of MicroPartition.cast_to_schema in read function --- src/daft-micropartition/src/micropartition.rs | 6 ++---- src/daft-scan/src/lib.rs | 4 +++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/daft-micropartition/src/micropartition.rs b/src/daft-micropartition/src/micropartition.rs index db5415f376..fcec91427a 100644 --- a/src/daft-micropartition/src/micropartition.rs +++ b/src/daft-micropartition/src/micropartition.rs @@ -404,7 +404,7 @@ impl MicroPartition { .map(|cols| cols.iter().map(|s| s.as_str()).collect::>()); let row_groups = parquet_sources_to_row_groups(scan_task.sources.as_slice()); - let mp = read_parquet_into_micropartition( + read_parquet_into_micropartition( uris.as_slice(), columns.as_deref(), None, @@ -424,9 +424,7 @@ impl MicroPartition { Some(schema.clone()), field_id_mapping, ) - .context(DaftCoreComputeSnafu)?; - - mp.cast_to_schema(schema).context(DaftCoreComputeSnafu) + .context(DaftCoreComputeSnafu) } // CASE: Last resort fallback option diff --git a/src/daft-scan/src/lib.rs b/src/daft-scan/src/lib.rs index 825cc9c701..174f9a1037 100644 --- a/src/daft-scan/src/lib.rs +++ b/src/daft-scan/src/lib.rs @@ -253,7 +253,9 @@ impl DataFileSource { pub struct ScanTask { pub sources: Vec, - /// Schema to use when reading the DataFileSources. + /// Schema to use when reading the DataFileSources. This should always be passed in by the + /// ScanOperator implementation and should not have had any "pruning" applied. + /// /// Note that this is different than the schema of the data after pushdowns have been applied, /// which can be obtained with [`ScanTask::materialized_schema`] instead. pub schema: SchemaRef,