From 604dae4d66f03b9c963e08e836723814bcc34f0e Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Thu, 26 Oct 2023 13:17:49 -0700 Subject: [PATCH] changes to partition field and field creation --- daft/daft.pyi | 2 ++ daft/logical/schema.py | 5 +++++ src/daft-core/src/python/field.rs | 5 +++++ src/daft-scan/src/anonymous.rs | 4 ++-- src/daft-scan/src/lib.rs | 8 +++++++- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/daft/daft.pyi b/daft/daft.pyi index 6a535b19ad..2d4f35b3c2 100644 --- a/daft/daft.pyi +++ b/daft/daft.pyi @@ -523,6 +523,8 @@ class PyDataType: class PyField: def name(self) -> str: ... + @staticmethod + def create(name: str, datatype: PyDataType) -> PyField: ... def dtype(self) -> PyDataType: ... def eq(self, other: PyField) -> bool: ... def __reduce__(self) -> tuple: ... diff --git a/daft/logical/schema.py b/daft/logical/schema.py index 7f33be83b9..89fad30b31 100644 --- a/daft/logical/schema.py +++ b/daft/logical/schema.py @@ -33,6 +33,11 @@ def _from_pyfield(field: _PyField) -> Field: f._field = field return f + @staticmethod + def create(name: str, dtype: DataType) -> Field: + pyfield = _PyField.create(name, dtype._dtype) + return Field._from_pyfield(pyfield) + @property def name(self): return self._field.name() diff --git a/src/daft-core/src/python/field.rs b/src/daft-core/src/python/field.rs index daae3e63b2..6529edd863 100644 --- a/src/daft-core/src/python/field.rs +++ b/src/daft-core/src/python/field.rs @@ -13,6 +13,11 @@ pub struct PyField { #[pymethods] impl PyField { + #[staticmethod] + pub fn create(name: &str, data_type: PyDataType) -> PyResult { + Ok(datatypes::Field::new(name, data_type.dtype).into()) + } + pub fn name(&self) -> PyResult { Ok(self.field.name.clone()) } diff --git a/src/daft-scan/src/anonymous.rs b/src/daft-scan/src/anonymous.rs index b92d37f2fb..4b1f31d55b 100644 --- a/src/daft-scan/src/anonymous.rs +++ b/src/daft-scan/src/anonymous.rs @@ -3,7 +3,7 @@ use std::fmt::Display; use common_error::DaftResult; use daft_core::schema::SchemaRef; -use crate::{DataFileSource, FileType, ScanOperator, ScanOperatorRef, ScanTask}; +use crate::{DataFileSource, FileType, PartitionField, ScanOperator, ScanOperatorRef, ScanTask}; #[derive(Debug)] pub struct AnonymousScanOperator { schema: SchemaRef, @@ -36,7 +36,7 @@ impl ScanOperator for AnonymousScanOperator { self.schema.clone() } - fn partitioning_keys(&self) -> &[daft_core::datatypes::Field] { + fn partitioning_keys(&self) -> &[PartitionField] { &[] } diff --git a/src/daft-scan/src/lib.rs b/src/daft-scan/src/lib.rs index 49201e60b9..018ba491af 100644 --- a/src/daft-scan/src/lib.rs +++ b/src/daft-scan/src/lib.rs @@ -70,10 +70,16 @@ pub struct ScanTask { columns: Option>, limit: Option, } +#[derive(Serialize, Deserialize)] +pub struct PartitionField { + field: Field, + source_field: Option, + transform: Option, +} pub trait ScanOperator: Send + Display { fn schema(&self) -> SchemaRef; - fn partitioning_keys(&self) -> &[Field]; + fn partitioning_keys(&self) -> &[PartitionField]; fn num_partitions(&self) -> DaftResult; // also returns a bool to indicate if the scan operator can "absorb" the predicate