diff --git a/src/daft-core/src/array/ops/cast.rs b/src/daft-core/src/array/ops/cast.rs index d326fffa74..ec5f27a72e 100644 --- a/src/daft-core/src/array/ops/cast.rs +++ b/src/daft-core/src/array/ops/cast.rs @@ -2091,7 +2091,7 @@ impl ListArray { } } } - DataType::Map{..} => Ok(MapArray::new( + DataType::Map { .. } => Ok(MapArray::new( Field::new(self.name(), dtype.clone()), self.clone(), ) diff --git a/src/daft-core/src/array/ops/from_arrow.rs b/src/daft-core/src/array/ops/from_arrow.rs index ec5b11dac1..3c0763e523 100644 --- a/src/daft-core/src/array/ops/from_arrow.rs +++ b/src/daft-core/src/array/ops/from_arrow.rs @@ -35,7 +35,7 @@ where // TODO: Consolidate Map to use the same .to_type conversion as other logical types // Currently, .to_type does not work for Map in Arrow2 because it requires physical types to be equivalent, // but the physical type of MapArray in Arrow2 is a MapArray, not a ListArray - DataType::Map{..} => arrow_arr, + DataType::Map { .. } => arrow_arr, _ => arrow_arr.to_type(data_array_field.dtype.to_arrow()?), }; let physical = ::ArrayType::from_arrow( @@ -98,7 +98,7 @@ impl FromArrow for ListArray { arrow_arr.validity().cloned(), )) } - (DataType::List(daft_child_dtype), arrow2::datatypes::DataType::Map(..)) => { + (DataType::List(daft_child_dtype), arrow2::datatypes::DataType::Map { .. }) => { let map_arr = arrow_arr .as_any() .downcast_ref::() diff --git a/src/daft-core/src/array/ops/map.rs b/src/daft-core/src/array/ops/map.rs index a40aea53bf..a1613ce19c 100644 --- a/src/daft-core/src/array/ops/map.rs +++ b/src/daft-core/src/array/ops/map.rs @@ -24,7 +24,10 @@ fn single_map_get(structs: &Series, key_to_get: &Series) -> DaftResult { impl MapArray { pub fn map_get(&self, key_to_get: &Series) -> DaftResult { - let DataType::Map { value: value_type, .. } = self.data_type() else { + let DataType::Map { + value: value_type, .. + } = self.data_type() + else { return Err(DaftError::TypeError(format!( "Expected input to be a map type, got {:?}", self.data_type() @@ -37,7 +40,7 @@ impl MapArray { for series in self.physical.into_iter() { match series { Some(s) if !s.is_empty() => result.push(single_map_get(&s, key_to_get)?), - _ => result.push(Series::full_null("value", &value_type, 1)), + _ => result.push(Series::full_null("value", value_type, 1)), } } Series::concat(&result.iter().collect::>()) @@ -47,7 +50,7 @@ impl MapArray { for (i, series) in self.physical.into_iter().enumerate() { match (series, key_to_get.slice(i, i + 1)?) { (Some(s), k) if !s.is_empty() => result.push(single_map_get(&s, &k)?), - _ => result.push(Series::full_null("value", &value_type, 1)), + _ => result.push(Series::full_null("value", value_type, 1)), } } Series::concat(&result.iter().collect::>()) diff --git a/src/daft-core/src/series/ops/map.rs b/src/daft-core/src/series/ops/map.rs index 571f7376e8..58f9a5b046 100644 --- a/src/daft-core/src/series/ops/map.rs +++ b/src/daft-core/src/series/ops/map.rs @@ -5,7 +5,7 @@ use crate::{datatypes::DataType, series::Series}; impl Series { pub fn map_get(&self, key: &Series) -> DaftResult { match self.data_type() { - DataType::Map{..} => self.map()?.map_get(key), + DataType::Map { .. } => self.map()?.map_get(key), dt => Err(DaftError::TypeError(format!( "map.get not implemented for {}", dt diff --git a/src/daft-core/src/series/serdes.rs b/src/daft-core/src/series/serdes.rs index 56f278ed49..3ce3b6f881 100644 --- a/src/daft-core/src/series/serdes.rs +++ b/src/daft-core/src/series/serdes.rs @@ -78,83 +78,83 @@ impl<'d> serde::Deserialize<'d> for Series { &field.dtype, map.next_value::()?, ) - .into_series()), + .into_series()), DataType::Boolean => Ok(BooleanArray::from(( field.name.as_str(), map.next_value::>>()?.as_slice(), )) - .into_series()), + .into_series()), DataType::Int8 => Ok(Int8Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::Int16 => Ok(Int16Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::Int32 => Ok(Int32Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::Int64 => Ok(Int64Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::Int128 => Ok(Int128Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::UInt8 => Ok(UInt8Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::UInt16 => Ok(UInt16Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::UInt32 => Ok(UInt32Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::UInt64 => Ok(UInt64Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::Float32 => Ok(Float32Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::Float64 => Ok(Float64Array::from_iter( field.name.as_str(), map.next_value::>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::Utf8 => Ok(Utf8Array::from_iter( field.name.as_str(), map.next_value::>>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::Binary => Ok(BinaryArray::from_iter( field.name.as_str(), map.next_value::>>>()?.into_iter(), ) - .into_series()), + .into_series()), DataType::FixedSizeBinary(size) => Ok(FixedSizeBinaryArray::from_iter( field.name.as_str(), map.next_value::>>>()?.into_iter(), *size, ) - .into_series()), + .into_series()), DataType::Extension(..) => { let physical = map.next_value::()?; let physical = physical.to_arrow(); @@ -169,7 +169,7 @@ impl<'d> serde::Deserialize<'d> for Series { Arc::new(field), physical.downcast::().unwrap().clone(), ) - .into_series()) + .into_series()) } DataType::Struct(..) => { let mut all_series = map.next_value::>>()?; @@ -198,7 +198,7 @@ impl<'d> serde::Deserialize<'d> for Series { let offsets = OffsetsBuffer::::try_from( offsets_array.as_arrow().values().clone(), ) - .unwrap(); + .unwrap(); let flat_child = all_series .pop() .ok_or_else(|| serde::de::Error::missing_field("flat_child"))? @@ -225,7 +225,7 @@ impl<'d> serde::Deserialize<'d> for Series { field, physical.downcast::().unwrap().clone(), ) - .into_series()) + .into_series()) } DataType::Timestamp(..) => { type PType = <::PhysicalType as DaftDataType>::ArrayType; @@ -234,7 +234,7 @@ impl<'d> serde::Deserialize<'d> for Series { field, physical.downcast::().unwrap().clone(), ) - .into_series()) + .into_series()) } DataType::Date => { type PType = <::PhysicalType as DaftDataType>::ArrayType; @@ -260,7 +260,7 @@ impl<'d> serde::Deserialize<'d> for Series { field, physical.downcast::().unwrap().clone(), ) - .into_series(), + .into_series(), ) } DataType::Embedding(..) => { @@ -270,7 +270,7 @@ impl<'d> serde::Deserialize<'d> for Series { field, physical.downcast::().unwrap().clone(), ) - .into_series()) + .into_series()) } DataType::Image(..) => { type PType = <::PhysicalType as DaftDataType>::ArrayType; @@ -287,7 +287,7 @@ impl<'d> serde::Deserialize<'d> for Series { field, physical.downcast::().unwrap().clone(), ) - .into_series()) + .into_series()) } DataType::FixedShapeTensor(..) => { type PType = <::PhysicalType as DaftDataType>::ArrayType; @@ -296,7 +296,7 @@ impl<'d> serde::Deserialize<'d> for Series { field, physical.downcast::().unwrap().clone(), ) - .into_series()) + .into_series()) } DataType::SparseTensor(..) => { type PType = <::PhysicalType as DaftDataType>::ArrayType; @@ -305,7 +305,7 @@ impl<'d> serde::Deserialize<'d> for Series { field, physical.downcast::().unwrap().clone(), ) - .into_series()) + .into_series()) } DataType::FixedShapeSparseTensor(..) => { type PType = <::PhysicalType as DaftDataType>::ArrayType; @@ -314,7 +314,7 @@ impl<'d> serde::Deserialize<'d> for Series { field, physical.downcast::().unwrap().clone(), ) - .into_series()) + .into_series()) } DataType::Tensor(..) => { type PType = <::PhysicalType as DaftDataType>::ArrayType; diff --git a/src/daft-dsl/src/functions/map/get.rs b/src/daft-dsl/src/functions/map/get.rs index caec845b19..bf5f9efdf0 100644 --- a/src/daft-dsl/src/functions/map/get.rs +++ b/src/daft-dsl/src/functions/map/get.rs @@ -13,10 +13,8 @@ impl FunctionEvaluator for GetEvaluator { fn to_field(&self, inputs: &[ExprRef], schema: &Schema, _: &FunctionExpr) -> DaftResult { match inputs { - // what is input and what is key // input is a map field - [input, key] => match (input.to_field(schema), key.to_field(schema)) { (Ok(input_field), Ok(_)) => match input_field.dtype { DataType::Map { value, .. } => { diff --git a/src/daft-schema/src/dtype.rs b/src/daft-schema/src/dtype.rs index b3d8cce10b..ca3691d21a 100644 --- a/src/daft-schema/src/dtype.rs +++ b/src/daft-schema/src/dtype.rs @@ -108,7 +108,10 @@ pub enum DataType { /// A nested [`DataType`] that is represented as List>. #[display("Map[{key}: {value}]")] - Map { key: Box, value: Box }, + Map { + key: Box, + value: Box, + }, /// Extension type. #[display("{_1}")] @@ -240,7 +243,8 @@ impl DataType { ]); // entries - let struct_field = arrow2::datatypes::Field::new("entries", struct_type.clone(), true); + let struct_field = + arrow2::datatypes::Field::new("entries", struct_type.clone(), true); let list_type = ArrowType::LargeList(Box::new(struct_field)); @@ -248,10 +252,7 @@ impl DataType { // todo: item? items? something else? let list_field = arrow2::datatypes::Field::new("item", list_type.clone(), true); - Ok(ArrowType::Map( - Box::new(list_field), - false, - )) + Ok(ArrowType::Map(Box::new(list_field), false)) } DataType::Struct(fields) => Ok({ let fields = fields @@ -454,7 +455,7 @@ impl DataType { #[inline] pub fn is_map(&self) -> bool { - matches!(self, DataType::Map{ .. }) + matches!(self, DataType::Map { .. }) } #[inline] @@ -660,7 +661,7 @@ impl From<&ArrowType> for DataType { let [key, value] = fields.as_slice() else { panic!("Map should have two fields") }; - + let key = &key.data_type; let value = &value.data_type; diff --git a/src/daft-schema/src/python/datatype.rs b/src/daft-schema/src/python/datatype.rs index d2bd62bbaa..32642fae58 100644 --- a/src/daft-schema/src/python/datatype.rs +++ b/src/daft-schema/src/python/datatype.rs @@ -209,7 +209,11 @@ impl PyDataType { #[staticmethod] pub fn map(key_type: Self, value_type: Self) -> PyResult { - Ok(DataType::Map { key: Box::new(key_type.dtype), value: Box::new(value_type.dtype) }.into()) + Ok(DataType::Map { + key: Box::new(key_type.dtype), + value: Box::new(value_type.dtype), + } + .into()) } #[staticmethod] @@ -220,7 +224,7 @@ impl PyDataType { .map(|(name, dtype)| Field::new(name, dtype.dtype)) .collect::>(), ) - .into() + .into() } #[staticmethod] @@ -234,7 +238,7 @@ impl PyDataType { Box::new(storage_data_type.dtype), metadata.map(|s| s.to_string()), ) - .into()) + .into()) } #[staticmethod] @@ -325,7 +329,7 @@ impl PyDataType { Self { dtype: *dtype.clone(), } - .to_arrow(py)?, + .to_arrow(py)?, pyo3::types::PyTuple::new_bound(py, shape.clone()), )) } else { diff --git a/src/daft-stats/src/column_stats/mod.rs b/src/daft-stats/src/column_stats/mod.rs index e8dc82f2f8..81f4dd5488 100644 --- a/src/daft-stats/src/column_stats/mod.rs +++ b/src/daft-stats/src/column_stats/mod.rs @@ -71,7 +71,7 @@ impl ColumnRangeStatistics { // UNSUPPORTED TYPES: // Types that don't support comparisons and can't be used as ColumnRangeStatistics - DataType::List(..) | DataType::FixedSizeList(..) | DataType::Image(..) | DataType::FixedShapeImage(..) | DataType::Tensor(..) | DataType::SparseTensor(..) | DataType::FixedShapeSparseTensor(..) | DataType::FixedShapeTensor(..) | DataType::Struct(..) | DataType::Map(..) | DataType::Extension(..) | DataType::Embedding(..) | DataType::Unknown => false, + DataType::List(..) | DataType::FixedSizeList(..) | DataType::Image(..) | DataType::FixedShapeImage(..) | DataType::Tensor(..) | DataType::SparseTensor(..) | DataType::FixedShapeSparseTensor(..) | DataType::FixedShapeTensor(..) | DataType::Struct(..) | DataType::Map { .. } | DataType::Extension(..) | DataType::Embedding(..) | DataType::Unknown => false, #[cfg(feature = "python")] DataType::Python => false, } diff --git a/src/daft-table/src/repr_html.rs b/src/daft-table/src/repr_html.rs index 79ecaf063a..0e46bb80b2 100644 --- a/src/daft-table/src/repr_html.rs +++ b/src/daft-table/src/repr_html.rs @@ -102,7 +102,7 @@ pub fn html_value(s: &Series, idx: usize) -> String { let arr = s.struct_().unwrap(); arr.html_value(idx) } - DataType::Map(_) => { + DataType::Map { .. } => { let arr = s.map().unwrap(); arr.html_value(idx) }