Skip to content

Commit

Permalink
add file size
Browse files Browse the repository at this point in the history
  • Loading branch information
samster25 committed Nov 14, 2023
1 parent 8221c1e commit 7bb374e
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 6 deletions.
1 change: 1 addition & 0 deletions daft/iceberg/iceberg_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def _make_scan_tasks(self) -> list[ScanTask]:
schema=self._schema._schema,
num_rows=record_count,
storage_config=storage_config,
size_bytes=file.file_size_in_bytes,
)
scan_tasks.append(st)
return scan_tasks
Expand Down
3 changes: 1 addition & 2 deletions src/daft-scan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{

use common_error::{DaftError, DaftResult};
use daft_core::{datatypes::Field, schema::SchemaRef};
use daft_dsl::{Expr, ExprRef, optimization::get_required_columns};
use daft_dsl::{optimization::get_required_columns, Expr, ExprRef};
use daft_stats::{PartitionSpec, TableMetadata, TableStatistics};
use file_format::FileFormatConfig;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -242,7 +242,6 @@ impl Display for PartitionField {
}
}


pub trait ScanOperator: Send + Sync + Display + Debug {
fn schema(&self) -> SchemaRef;
fn partitioning_keys(&self) -> &[PartitionField];
Expand Down
16 changes: 12 additions & 4 deletions src/daft-scan/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ pub mod pylib {

#[staticmethod]
pub fn from_python_abc(py_scan: PyObject) -> PyResult<Self> {
let scan_op =
ScanOperatorRef(Arc::new(PythonScanOperatorBridge::from_python_abc(py_scan)?));
let scan_op = ScanOperatorRef(Arc::new(PythonScanOperatorBridge::from_python_abc(
py_scan,
)?));
Ok(ScanOperatorHandle { scan_op })
}
}
Expand Down Expand Up @@ -212,6 +213,7 @@ pub mod pylib {
schema: PySchema,
num_rows: i64,
storage_config: PyStorageConfig,
size_bytes: Option<u64>,
columns: Option<Vec<String>>,
limit: Option<usize>,
) -> PyResult<Self> {
Expand All @@ -221,14 +223,20 @@ pub mod pylib {
let data_source = DataFileSource::CatalogDataFile {
path: file,
chunk_spec: None,
size_bytes: None,
size_bytes: size_bytes,
metadata: TableMetadata {
length: num_rows as usize,
},
partition_spec: empty_pspec,
statistics: None,
};
let scan_task = ScanTask::new(vec![data_source], file_format.into(), schema.schema, storage_config.into(), Pushdowns::default());
let scan_task = ScanTask::new(
vec![data_source],
file_format.into(),
schema.schema,
storage_config.into(),
Pushdowns::default(),
);
Ok(PyScanTask(scan_task.into()))
}

Expand Down

0 comments on commit 7bb374e

Please sign in to comment.