diff --git a/src/daft-micropartition/src/micropartition.rs b/src/daft-micropartition/src/micropartition.rs index db5415f376..fcec91427a 100644 --- a/src/daft-micropartition/src/micropartition.rs +++ b/src/daft-micropartition/src/micropartition.rs @@ -404,7 +404,7 @@ impl MicroPartition { .map(|cols| cols.iter().map(|s| s.as_str()).collect::>()); let row_groups = parquet_sources_to_row_groups(scan_task.sources.as_slice()); - let mp = read_parquet_into_micropartition( + read_parquet_into_micropartition( uris.as_slice(), columns.as_deref(), None, @@ -424,9 +424,7 @@ impl MicroPartition { Some(schema.clone()), field_id_mapping, ) - .context(DaftCoreComputeSnafu)?; - - mp.cast_to_schema(schema).context(DaftCoreComputeSnafu) + .context(DaftCoreComputeSnafu) } // CASE: Last resort fallback option diff --git a/src/daft-scan/src/lib.rs b/src/daft-scan/src/lib.rs index 825cc9c701..174f9a1037 100644 --- a/src/daft-scan/src/lib.rs +++ b/src/daft-scan/src/lib.rs @@ -253,7 +253,9 @@ impl DataFileSource { pub struct ScanTask { pub sources: Vec, - /// Schema to use when reading the DataFileSources. + /// Schema to use when reading the DataFileSources. This should always be passed in by the + /// ScanOperator implementation and should not have had any "pruning" applied. + /// /// Note that this is different than the schema of the data after pushdowns have been applied, /// which can be obtained with [`ScanTask::materialized_schema`] instead. pub schema: SchemaRef,