Skip to content

Commit

Permalink
Add todos
Browse files Browse the repository at this point in the history
  • Loading branch information
Jay Chia committed Feb 22, 2024
1 parent 47d0d61 commit 57d2451
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/daft-micropartition/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ impl PyMicroPartition {
1,
multithreaded_io.unwrap_or(true),
&schema_infer_options,
&None, // TODO: pass in field_id mapping
&None,
)
})?;
Ok(mp.into())
Expand Down Expand Up @@ -584,7 +584,7 @@ impl PyMicroPartition {
num_parallel_tasks.unwrap_or(128) as usize,
multithreaded_io.unwrap_or(true),
&schema_infer_options,
&None, // TODO: pass in field_id mapping
&None,
)
})?;
Ok(mp.into())
Expand Down
6 changes: 4 additions & 2 deletions src/daft-parquet/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ impl ParquetReaderBuilder {
}

pub fn prune_columns<S: ToString + AsRef<str>>(mut self, columns: &[S]) -> super::Result<Self> {
// TODO: perform pruning of columns on names AFTER applying the field_id_mapping
let avail_names = self
.parquet_schema()
.fields()
Expand Down Expand Up @@ -439,8 +440,7 @@ impl ParquetFileReader {
.fields
.iter()
.map(|pq_arrow_field| {
// Retrieve the appropriate field name from the field_id mapping
// TODO: We should do this recursively as well
// Retrieve the intended target field name (might differ from the Parquet field name, depending on the field_id_mapping)
let target_field_name = if let (Some(field_id_mapping), Some(field_id)) = (
self.field_id_mapping.as_ref(),
pq_arrow_field.metadata.get("field_id"),
Expand Down Expand Up @@ -564,6 +564,8 @@ impl ParquetFileReader {
all_arrays
.into_iter()
.map(|a| {
// TODO: Need to perform recursive renaming of Series here. Hopefully arrow array
// has the correct metadata and that was correctly transferred to the Series...
Series::try_from((
cloned_target_field_name.as_str(),
cast_array_for_daft_if_needed(a),
Expand Down
2 changes: 1 addition & 1 deletion src/daft-parquet/src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ pub fn read_parquet(
io_client,
io_stats,
schema_infer_options,
None, // TODO: Add field_id_mapping
None,
)
.await
})
Expand Down
4 changes: 2 additions & 2 deletions src/daft-scan/src/file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,10 @@ impl ParquetSourceConfig {
));
if let Some(mapping) = &self.field_id_mapping {
res.push(format!(
"Field ID to column names = [{}]",
"Field ID to Fields = {{{}}}",
mapping
.iter()
.map(|(f, c)| format!("{f}: {c}"))
.map(|(fid, f)| format!("{fid}: {f}"))
.collect::<Vec<String>>()
.join(",")
));
Expand Down

0 comments on commit 57d2451

Please sign in to comment.