From 9cfc818fa2031ee5a070198b91ee8dccbb24a57e Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 20 Nov 2024 21:23:36 +0000 Subject: [PATCH 1/2] Fill Forward --- bench-vortex/src/bin/notimplemented.rs | 4 - .../src/array/bool/compute/boolean.rs | 48 --- vortex-array/src/array/bool/compute/mod.rs | 24 +- vortex-array/src/array/constant/compute.rs | 267 -------------- .../src/array/constant/compute/boolean.rs | 114 ++++++ .../src/array/constant/compute/mod.rs | 146 ++++++++ vortex-array/src/compute/boolean.rs | 335 +++++++++++------- vortex-array/src/compute/mod.rs | 41 +-- vortex-array/src/data/mod.rs | 7 + 9 files changed, 492 insertions(+), 494 deletions(-) delete mode 100644 vortex-array/src/array/bool/compute/boolean.rs delete mode 100644 vortex-array/src/array/constant/compute.rs create mode 100644 vortex-array/src/array/constant/compute/boolean.rs create mode 100644 vortex-array/src/array/constant/compute/mod.rs diff --git a/bench-vortex/src/bin/notimplemented.rs b/bench-vortex/src/bin/notimplemented.rs index 0f6cdb2713..e7d26f2685 100644 --- a/bench-vortex/src/bin/notimplemented.rs +++ b/bench-vortex/src/bin/notimplemented.rs @@ -188,8 +188,6 @@ fn compute_funcs(encodings: &[ArrayData]) { "search_sorted", "slice", "take", - "and", - "or", ] .into_iter() .map(Cell::new) @@ -207,8 +205,6 @@ fn compute_funcs(encodings: &[ArrayData]) { impls.push(bool_to_cell(arr.with_dyn(|a| a.search_sorted().is_some()))); impls.push(bool_to_cell(arr.encoding().slice_fn().is_some())); impls.push(bool_to_cell(arr.encoding().take_fn().is_some())); - impls.push(bool_to_cell(arr.with_dyn(|a| a.and().is_some()))); - impls.push(bool_to_cell(arr.with_dyn(|a| a.or().is_some()))); table.add_row(Row::new(impls)); } table.printstd(); diff --git a/vortex-array/src/array/bool/compute/boolean.rs b/vortex-array/src/array/bool/compute/boolean.rs deleted file mode 100644 index a8a6762b7b..0000000000 --- a/vortex-array/src/array/bool/compute/boolean.rs +++ /dev/null @@ -1,48 +0,0 @@ -use arrow_arith::boolean; -use arrow_array::cast::AsArray as _; -use arrow_array::{Array as _, BooleanArray}; -use arrow_schema::ArrowError; -use vortex_error::VortexResult; - -use crate::array::BoolArray; -use crate::arrow::FromArrowArray as _; -use crate::compute::{AndFn, OrFn}; -use crate::{ArrayData, IntoCanonical}; - -impl BoolArray { - /// Lift an Arrow binary boolean kernel function to Vortex arrays. - fn lift_arrow(&self, arrow_fun: F, other: &ArrayData) -> VortexResult - where - F: FnOnce(&BooleanArray, &BooleanArray) -> Result, - { - let lhs = self.clone().into_canonical()?.into_arrow()?; - let lhs = lhs.as_boolean(); - - let rhs = other.clone().into_canonical()?.into_arrow()?; - let rhs = rhs.as_boolean(); - - let array = arrow_fun(lhs, rhs)?; - - Ok(ArrayData::from_arrow(&array, array.is_nullable())) - } -} - -impl OrFn for BoolArray { - fn or(&self, array: &ArrayData) -> VortexResult { - self.lift_arrow(boolean::or, array) - } - - fn or_kleene(&self, array: &ArrayData) -> VortexResult { - self.lift_arrow(boolean::or_kleene, array) - } -} - -impl AndFn for BoolArray { - fn and(&self, array: &ArrayData) -> VortexResult { - self.lift_arrow(boolean::and, array) - } - - fn and_kleene(&self, array: &ArrayData) -> VortexResult { - self.lift_arrow(boolean::and_kleene, array) - } -} diff --git a/vortex-array/src/array/bool/compute/mod.rs b/vortex-array/src/array/bool/compute/mod.rs index 559d95857b..a6ba439809 100644 --- a/vortex-array/src/array/bool/compute/mod.rs +++ b/vortex-array/src/array/bool/compute/mod.rs @@ -1,10 +1,8 @@ use crate::array::{BoolArray, BoolEncoding}; use crate::compute::unary::{FillForwardFn, ScalarAtFn}; -use crate::compute::{AndFn, ArrayCompute, ComputeVTable, FilterFn, OrFn, SliceFn, TakeFn}; +use crate::compute::{ArrayCompute, BinaryBooleanFn, ComputeVTable, FilterFn, SliceFn, TakeFn}; use crate::ArrayData; -mod boolean; - mod fill; pub mod filter; mod flatten; @@ -16,17 +14,21 @@ impl ArrayCompute for BoolArray { fn fill_forward(&self) -> Option<&dyn FillForwardFn> { Some(self) } - - fn and(&self) -> Option<&dyn AndFn> { - Some(self) - } - - fn or(&self) -> Option<&dyn OrFn> { - Some(self) - } } impl ComputeVTable for BoolEncoding { + fn binary_boolean_fn( + &self, + _lhs: &ArrayData, + _rhs: &ArrayData, + ) -> Option<&dyn BinaryBooleanFn> { + // We only implement this when other is a constant value, otherwise we fall back to the + // default implementation that canonicalizes to Arrow. + // TODO(ngates): implement this for constants. + // other.is_constant().then_some(self) + None + } + fn filter_fn(&self) -> Option<&dyn FilterFn> { Some(self) } diff --git a/vortex-array/src/array/constant/compute.rs b/vortex-array/src/array/constant/compute.rs deleted file mode 100644 index 8d5cb31071..0000000000 --- a/vortex-array/src/array/constant/compute.rs +++ /dev/null @@ -1,267 +0,0 @@ -use std::cmp::Ordering; - -use vortex_dtype::Nullability; -use vortex_error::{vortex_bail, VortexResult}; -use vortex_scalar::Scalar; - -use crate::array::constant::ConstantArray; -use crate::array::ConstantEncoding; -use crate::compute::unary::{scalar_at, ScalarAtFn}; -use crate::compute::{ - scalar_cmp, AndFn, ArrayCompute, ComputeVTable, FilterFn, FilterMask, MaybeCompareFn, Operator, - OrFn, SearchResult, SearchSortedFn, SearchSortedSide, SliceFn, TakeFn, TakeOptions, -}; -use crate::stats::{ArrayStatistics, Stat}; -use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData}; - -impl ArrayCompute for ConstantArray { - fn compare(&self, other: &ArrayData, operator: Operator) -> Option> { - MaybeCompareFn::maybe_compare(self, other, operator) - } - - fn search_sorted(&self) -> Option<&dyn SearchSortedFn> { - Some(self) - } - - fn and(&self) -> Option<&dyn AndFn> { - Some(self) - } - - fn or(&self) -> Option<&dyn OrFn> { - Some(self) - } -} - -impl ComputeVTable for ConstantEncoding { - fn filter_fn(&self) -> Option<&dyn FilterFn> { - Some(self) - } - - fn scalar_at_fn(&self) -> Option<&dyn ScalarAtFn> { - Some(self) - } - fn slice_fn(&self) -> Option<&dyn SliceFn> { - Some(self) - } - - fn take_fn(&self) -> Option<&dyn TakeFn> { - Some(self) - } -} - -impl ScalarAtFn for ConstantEncoding { - fn scalar_at(&self, array: &ConstantArray, _index: usize) -> VortexResult { - Ok(array.owned_scalar()) - } -} - -impl TakeFn for ConstantEncoding { - fn take( - &self, - array: &ConstantArray, - indices: &ArrayData, - _options: TakeOptions, - ) -> VortexResult { - Ok(ConstantArray::new(array.owned_scalar(), indices.len()).into_array()) - } -} - -impl SliceFn for ConstantEncoding { - fn slice(&self, array: &ConstantArray, start: usize, stop: usize) -> VortexResult { - Ok(ConstantArray::new(array.owned_scalar(), stop - start).into_array()) - } -} - -impl FilterFn for ConstantEncoding { - fn filter(&self, array: &ConstantArray, mask: FilterMask) -> VortexResult { - Ok(ConstantArray::new(array.owned_scalar(), mask.true_count()).into_array()) - } -} - -impl SearchSortedFn for ConstantArray { - fn search_sorted(&self, value: &Scalar, side: SearchSortedSide) -> VortexResult { - match self - .scalar_value() - .partial_cmp(value.value()) - .unwrap_or(Ordering::Less) - { - Ordering::Greater => Ok(SearchResult::NotFound(0)), - Ordering::Less => Ok(SearchResult::NotFound(self.len())), - Ordering::Equal => match side { - SearchSortedSide::Left => Ok(SearchResult::Found(0)), - SearchSortedSide::Right => Ok(SearchResult::Found(self.len())), - }, - } - } -} - -impl MaybeCompareFn for ConstantArray { - fn maybe_compare( - &self, - other: &ArrayData, - operator: Operator, - ) -> Option> { - other.as_constant().map(|const_scalar| { - let lhs = self.owned_scalar(); - let scalar = scalar_cmp(&lhs, &const_scalar, operator); - Ok(ConstantArray::new(scalar, self.len()).into_array()) - }) - } -} - -fn and(left: Option, right: Option) -> Option { - left.zip(right).map(|(l, r)| l & r) -} - -fn kleene_and(left: Option, right: Option) -> Option { - match (left, right) { - (Some(false), _) => Some(false), - (_, Some(false)) => Some(false), - (None, _) => None, - (_, None) => None, - (Some(l), Some(r)) => Some(l & r), - } -} - -fn or(left: Option, right: Option) -> Option { - left.zip(right).map(|(l, r)| l | r) -} - -fn kleene_or(left: Option, right: Option) -> Option { - match (left, right) { - (Some(true), _) => Some(true), - (_, Some(true)) => Some(true), - (None, _) => None, - (_, None) => None, - (Some(l), Some(r)) => Some(l | r), - } -} - -impl AndFn for ConstantArray { - fn and(&self, array: &ArrayData) -> VortexResult { - constant_array_bool_impl(self, array, and, |other, this| { - other.with_dyn(|other| other.and().map(|other| other.and(this))) - }) - } - - fn and_kleene(&self, array: &ArrayData) -> VortexResult { - constant_array_bool_impl(self, array, kleene_and, |other, this| { - other.with_dyn(|other| other.and_kleene().map(|other| other.and_kleene(this))) - }) - } -} - -impl OrFn for ConstantArray { - fn or(&self, array: &ArrayData) -> VortexResult { - constant_array_bool_impl(self, array, or, |other, this| { - other.with_dyn(|other| other.or().map(|other| other.or(this))) - }) - } - - fn or_kleene(&self, array: &ArrayData) -> VortexResult { - constant_array_bool_impl(self, array, kleene_or, |other, this| { - other.with_dyn(|other| other.or_kleene().map(|other| other.or_kleene(this))) - }) - } -} - -fn constant_array_bool_impl( - constant_array: &ConstantArray, - other: &ArrayData, - bool_op: impl Fn(Option, Option) -> Option, - fallback_fn: impl Fn(&ArrayData, &ArrayData) -> Option>, -) -> VortexResult { - // If the right side is constant - if other.statistics().get_as::(Stat::IsConstant) == Some(true) { - let lhs = constant_array.scalar_value().as_bool()?; - let rhs = scalar_at(other, 0)?.value().as_bool()?; - - let scalar = match bool_op(lhs, rhs) { - Some(b) => Scalar::bool(b, Nullability::Nullable), - None => Scalar::null(constant_array.dtype().as_nullable()), - }; - - Ok(ConstantArray::new(scalar, constant_array.len()).into_array()) - } else { - // try and use a the rhs specialized implementation if it exists - match fallback_fn(other, constant_array.as_ref()) { - Some(r) => r, - None => vortex_bail!("Operation is not supported"), - } - } -} - -#[cfg(test)] -mod test { - use rstest::rstest; - - use crate::array::constant::ConstantArray; - use crate::array::BoolArray; - use crate::compute::unary::scalar_at; - use crate::compute::{and, or, search_sorted, SearchResult, SearchSortedSide}; - use crate::{ArrayData, IntoArrayData, IntoArrayVariant}; - - #[test] - pub fn search() { - let cst = ConstantArray::new(42, 5000).into_array(); - assert_eq!( - search_sorted(&cst, 33, SearchSortedSide::Left).unwrap(), - SearchResult::NotFound(0) - ); - assert_eq!( - search_sorted(&cst, 55, SearchSortedSide::Left).unwrap(), - SearchResult::NotFound(5000) - ); - } - - #[test] - pub fn search_equals() { - let cst = ConstantArray::new(42, 5000).into_array(); - assert_eq!( - search_sorted(&cst, 42, SearchSortedSide::Left).unwrap(), - SearchResult::Found(0) - ); - assert_eq!( - search_sorted(&cst, 42, SearchSortedSide::Right).unwrap(), - SearchResult::Found(5000) - ); - } - - #[rstest] - #[case(ConstantArray::new(true, 4).into_array(), BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array() - )] - #[case(BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array(), ConstantArray::new(true, 4).into_array() - )] - fn test_or(#[case] lhs: ArrayData, #[case] rhs: ArrayData) { - let r = or(&lhs, &rhs).unwrap().into_bool().unwrap().into_array(); - - let v0 = scalar_at(&r, 0).unwrap().value().as_bool().unwrap(); - let v1 = scalar_at(&r, 1).unwrap().value().as_bool().unwrap(); - let v2 = scalar_at(&r, 2).unwrap().value().as_bool().unwrap(); - let v3 = scalar_at(&r, 3).unwrap().value().as_bool().unwrap(); - - assert!(v0.unwrap()); - assert!(v1.unwrap()); - assert!(v2.unwrap()); - assert!(v3.unwrap()); - } - - #[rstest] - #[case(ConstantArray::new(true, 4).into_array(), BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array() - )] - #[case(BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array(), - ConstantArray::new(true, 4).into_array())] - fn test_and(#[case] lhs: ArrayData, #[case] rhs: ArrayData) { - let r = and(&lhs, &rhs).unwrap().into_bool().unwrap().into_array(); - - let v0 = scalar_at(&r, 0).unwrap().value().as_bool().unwrap(); - let v1 = scalar_at(&r, 1).unwrap().value().as_bool().unwrap(); - let v2 = scalar_at(&r, 2).unwrap().value().as_bool().unwrap(); - let v3 = scalar_at(&r, 3).unwrap().value().as_bool().unwrap(); - - assert!(v0.unwrap()); - assert!(!v1.unwrap()); - assert!(v2.unwrap()); - assert!(!v3.unwrap()); - } -} diff --git a/vortex-array/src/array/constant/compute/boolean.rs b/vortex-array/src/array/constant/compute/boolean.rs new file mode 100644 index 0000000000..d445fcffc8 --- /dev/null +++ b/vortex-array/src/array/constant/compute/boolean.rs @@ -0,0 +1,114 @@ +use vortex_dtype::DType; +use vortex_error::{vortex_bail, VortexResult}; +use vortex_scalar::Scalar; + +use crate::array::{ConstantArray, ConstantEncoding}; +use crate::compute::{BinaryBooleanFn, BinaryOperator}; +use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData}; + +impl BinaryBooleanFn for ConstantEncoding { + fn binary_boolean( + &self, + lhs: &ConstantArray, + rhs: &ArrayData, + op: BinaryOperator, + ) -> VortexResult { + let length = lhs.len(); + let nullable = lhs.dtype().is_nullable() || rhs.dtype().is_nullable(); + let lhs = >::try_from(lhs.scalar_value())?; + let Some(rhs) = rhs.as_constant() else { + vortex_bail!("Binary boolean operation requires both sides to be constant"); + }; + let rhs = >::try_from(rhs.value())?; + + let result = match op { + BinaryOperator::And => and(lhs, rhs), + BinaryOperator::AndKleene => kleene_and(lhs, rhs), + BinaryOperator::Or => or(lhs, rhs), + BinaryOperator::OrKleene => kleene_or(lhs, rhs), + }; + + let scalar = result + .map(|b| Scalar::bool(b, nullable.into())) + .unwrap_or_else(|| Scalar::null(DType::Bool(nullable.into()))); + + Ok(ConstantArray::new(scalar, length).into_array()) + } +} + +fn and(left: Option, right: Option) -> Option { + left.zip(right).map(|(l, r)| l & r) +} + +fn kleene_and(left: Option, right: Option) -> Option { + match (left, right) { + (Some(false), _) => Some(false), + (_, Some(false)) => Some(false), + (None, _) => None, + (_, None) => None, + (Some(l), Some(r)) => Some(l & r), + } +} + +fn or(left: Option, right: Option) -> Option { + left.zip(right).map(|(l, r)| l | r) +} + +fn kleene_or(left: Option, right: Option) -> Option { + match (left, right) { + (Some(true), _) => Some(true), + (_, Some(true)) => Some(true), + (None, _) => None, + (_, None) => None, + (Some(l), Some(r)) => Some(l | r), + } +} + +#[cfg(test)] +mod test { + use rstest::rstest; + + use crate::array::constant::ConstantArray; + use crate::array::BoolArray; + use crate::compute::unary::scalar_at; + use crate::compute::{and, or}; + use crate::{ArrayData, IntoArrayData, IntoArrayVariant}; + + #[rstest] + #[case(ConstantArray::new(true, 4).into_array(), BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array() + )] + #[case(BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array(), ConstantArray::new(true, 4).into_array() + )] + fn test_or(#[case] lhs: ArrayData, #[case] rhs: ArrayData) { + let r = or(&lhs, &rhs).unwrap().into_bool().unwrap().into_array(); + + let v0 = scalar_at(&r, 0).unwrap().value().as_bool().unwrap(); + let v1 = scalar_at(&r, 1).unwrap().value().as_bool().unwrap(); + let v2 = scalar_at(&r, 2).unwrap().value().as_bool().unwrap(); + let v3 = scalar_at(&r, 3).unwrap().value().as_bool().unwrap(); + + assert!(v0.unwrap()); + assert!(v1.unwrap()); + assert!(v2.unwrap()); + assert!(v3.unwrap()); + } + + #[rstest] + #[case(ConstantArray::new(true, 4).into_array(), BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array() + )] + #[case(BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array(), + ConstantArray::new(true, 4).into_array())] + fn test_and(#[case] lhs: ArrayData, #[case] rhs: ArrayData) { + let r = and(&lhs, &rhs).unwrap().into_bool().unwrap().into_array(); + + let v0 = scalar_at(&r, 0).unwrap().value().as_bool().unwrap(); + let v1 = scalar_at(&r, 1).unwrap().value().as_bool().unwrap(); + let v2 = scalar_at(&r, 2).unwrap().value().as_bool().unwrap(); + let v3 = scalar_at(&r, 3).unwrap().value().as_bool().unwrap(); + + assert!(v0.unwrap()); + assert!(!v1.unwrap()); + assert!(v2.unwrap()); + assert!(!v3.unwrap()); + } +} diff --git a/vortex-array/src/array/constant/compute/mod.rs b/vortex-array/src/array/constant/compute/mod.rs new file mode 100644 index 0000000000..2e5926479a --- /dev/null +++ b/vortex-array/src/array/constant/compute/mod.rs @@ -0,0 +1,146 @@ +mod boolean; + +use std::cmp::Ordering; + +use vortex_error::VortexResult; +use vortex_scalar::Scalar; + +use crate::array::constant::ConstantArray; +use crate::array::ConstantEncoding; +use crate::compute::unary::ScalarAtFn; +use crate::compute::{ + scalar_cmp, ArrayCompute, BinaryBooleanFn, ComputeVTable, FilterFn, FilterMask, MaybeCompareFn, + Operator, SearchResult, SearchSortedFn, SearchSortedSide, SliceFn, TakeFn, TakeOptions, +}; +use crate::{ArrayData, ArrayLen, IntoArrayData}; + +impl ArrayCompute for ConstantArray { + fn compare(&self, other: &ArrayData, operator: Operator) -> Option> { + MaybeCompareFn::maybe_compare(self, other, operator) + } + + fn search_sorted(&self) -> Option<&dyn SearchSortedFn> { + Some(self) + } +} + +impl ComputeVTable for ConstantEncoding { + fn binary_boolean_fn( + &self, + lhs: &ArrayData, + rhs: &ArrayData, + ) -> Option<&dyn BinaryBooleanFn> { + // We only need to deal with this if both sides are constant, otherwise other arrays + // will have handled the RHS being constant. + (lhs.is_constant() && rhs.is_constant()).then_some(self) + } + + fn filter_fn(&self) -> Option<&dyn FilterFn> { + Some(self) + } + + fn scalar_at_fn(&self) -> Option<&dyn ScalarAtFn> { + Some(self) + } + + fn slice_fn(&self) -> Option<&dyn SliceFn> { + Some(self) + } + + fn take_fn(&self) -> Option<&dyn TakeFn> { + Some(self) + } +} + +impl ScalarAtFn for ConstantEncoding { + fn scalar_at(&self, array: &ConstantArray, _index: usize) -> VortexResult { + Ok(array.owned_scalar()) + } +} + +impl TakeFn for ConstantEncoding { + fn take( + &self, + array: &ConstantArray, + indices: &ArrayData, + _options: TakeOptions, + ) -> VortexResult { + Ok(ConstantArray::new(array.owned_scalar(), indices.len()).into_array()) + } +} + +impl SliceFn for ConstantEncoding { + fn slice(&self, array: &ConstantArray, start: usize, stop: usize) -> VortexResult { + Ok(ConstantArray::new(array.owned_scalar(), stop - start).into_array()) + } +} + +impl FilterFn for ConstantEncoding { + fn filter(&self, array: &ConstantArray, mask: FilterMask) -> VortexResult { + Ok(ConstantArray::new(array.owned_scalar(), mask.true_count()).into_array()) + } +} + +impl SearchSortedFn for ConstantArray { + fn search_sorted(&self, value: &Scalar, side: SearchSortedSide) -> VortexResult { + match self + .scalar_value() + .partial_cmp(value.value()) + .unwrap_or(Ordering::Less) + { + Ordering::Greater => Ok(SearchResult::NotFound(0)), + Ordering::Less => Ok(SearchResult::NotFound(self.len())), + Ordering::Equal => match side { + SearchSortedSide::Left => Ok(SearchResult::Found(0)), + SearchSortedSide::Right => Ok(SearchResult::Found(self.len())), + }, + } + } +} + +impl MaybeCompareFn for ConstantArray { + fn maybe_compare( + &self, + other: &ArrayData, + operator: Operator, + ) -> Option> { + other.as_constant().map(|const_scalar| { + let lhs = self.owned_scalar(); + let scalar = scalar_cmp(&lhs, &const_scalar, operator); + Ok(ConstantArray::new(scalar, self.len()).into_array()) + }) + } +} + +#[cfg(test)] +mod test { + use crate::array::constant::ConstantArray; + use crate::compute::{search_sorted, SearchResult, SearchSortedSide}; + use crate::IntoArrayData; + + #[test] + pub fn search() { + let cst = ConstantArray::new(42, 5000).into_array(); + assert_eq!( + search_sorted(&cst, 33, SearchSortedSide::Left).unwrap(), + SearchResult::NotFound(0) + ); + assert_eq!( + search_sorted(&cst, 55, SearchSortedSide::Left).unwrap(), + SearchResult::NotFound(5000) + ); + } + + #[test] + pub fn search_equals() { + let cst = ConstantArray::new(42, 5000).into_array(); + assert_eq!( + search_sorted(&cst, 42, SearchSortedSide::Left).unwrap(), + SearchResult::Found(0) + ); + assert_eq!( + search_sorted(&cst, 42, SearchSortedSide::Right).unwrap(), + SearchResult::Found(5000) + ); + } +} diff --git a/vortex-array/src/compute/boolean.rs b/vortex-array/src/compute/boolean.rs index f22848e3c3..4c1f92a4b7 100644 --- a/vortex-array/src/compute/boolean.rs +++ b/vortex-array/src/compute/boolean.rs @@ -1,167 +1,232 @@ -use vortex_error::{vortex_bail, VortexResult}; - -use crate::array::BoolArray; -use crate::{ArrayDType, ArrayData, IntoArrayVariant}; -pub trait AndFn { - /// Point-wise logical _and_ between two Boolean arrays. - /// - /// This method uses Arrow-style null propagation rather than the Kleene logic semantics. - /// - /// # Examples - /// - /// ``` - /// use vortex_array::ArrayData; - /// use vortex_array::compute::and; - /// use vortex_array::IntoCanonical; - /// use vortex_array::accessor::ArrayAccessor; - /// let a = ArrayData::from_iter([Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]); - /// let b = ArrayData::from_iter([Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]); - /// let result = and(a, b)?.into_canonical()?.into_bool()?; - /// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::>())?; - /// assert_eq!(result_vec, vec![Some(true), None, Some(false), None, None, None, Some(false), None, Some(false)]); - /// # use vortex_error::VortexError; - /// # Ok::<(), VortexError>(()) - /// ``` - fn and(&self, array: &ArrayData) -> VortexResult; - - /// Point-wise Kleene logical _and_ between two Boolean arrays. - /// - /// # Examples - /// - /// ``` - /// use vortex_array::ArrayData; - /// use vortex_array::compute::and_kleene; - /// use vortex_array::IntoCanonical; - /// use vortex_array::accessor::ArrayAccessor; - /// let a = ArrayData::from_iter([Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]); - /// let b = ArrayData::from_iter([Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]); - /// let result = and_kleene(a, b)?.into_canonical()?.into_bool()?; - /// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::>())?; - /// assert_eq!(result_vec, vec![Some(true), None, Some(false), None, None, Some(false), Some(false), Some(false), Some(false)]); - /// # use vortex_error::VortexError; - /// # Ok::<(), VortexError>(()) - /// ``` - fn and_kleene(&self, array: &ArrayData) -> VortexResult; +use std::sync::Arc; + +use arrow_array::cast::AsArray; +use arrow_array::ArrayRef; +use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; + +use crate::arrow::FromArrowArray; +use crate::encoding::Encoding; +use crate::{ArrayDType, ArrayData, Canonical, IntoArrayVariant}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOperator { + And, + AndKleene, + Or, + OrKleene, + // AndNot, + // AndNotKleene, + // Xor, } -pub trait OrFn { - /// Point-wise logical _or_ between two Boolean arrays. - /// - /// This method uses Arrow-style null propagation rather than the Kleene logic semantics. - /// - /// # Examples - /// - /// ``` - /// use vortex_array::ArrayData; - /// use vortex_array::compute::or; - /// use vortex_array::IntoCanonical; - /// use vortex_array::accessor::ArrayAccessor; - /// let a = ArrayData::from_iter([Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]); - /// let b = ArrayData::from_iter([Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]); - /// let result = or(a, b)?.into_canonical()?.into_bool()?; - /// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::>())?; - /// assert_eq!(result_vec, vec![Some(true), None, Some(true), None, None, None, Some(true), None, Some(false)]); - /// # use vortex_error::VortexError; - /// # Ok::<(), VortexError>(()) - /// ``` - fn or(&self, array: &ArrayData) -> VortexResult; - - /// Point-wise Kleene logical _or_ between two Boolean arrays. - /// - /// # Examples - /// - /// ``` - /// use vortex_array::ArrayData; - /// use vortex_array::compute::or_kleene; - /// use vortex_array::IntoCanonical; - /// use vortex_array::accessor::ArrayAccessor; - /// let a = ArrayData::from_iter([Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]); - /// let b = ArrayData::from_iter([Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]); - /// let result = or_kleene(a, b)?.into_canonical()?.into_bool()?; - /// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::>())?; - /// assert_eq!(result_vec, vec![Some(true), Some(true), Some(true), Some(true), None, None, Some(true), None, Some(false)]); - /// # use vortex_error::VortexError; - /// # Ok::<(), VortexError>(()) - /// ``` - fn or_kleene(&self, array: &ArrayData) -> VortexResult; +pub trait BinaryBooleanFn { + fn binary_boolean( + &self, + array: &Array, + other: &ArrayData, + op: BinaryOperator, + ) -> VortexResult; } -fn lift_boolean_operator( - lhs: impl AsRef, - rhs: impl AsRef, - trait_fun: F, - bool_array_fun: G, -) -> VortexResult +impl BinaryBooleanFn for E where - F: Fn(&ArrayData, &ArrayData) -> Option>, - G: FnOnce(BoolArray, &ArrayData) -> VortexResult, + E: BinaryBooleanFn, + for<'a> &'a E::Array: TryFrom<&'a ArrayData, Error = VortexError>, { - let lhs = lhs.as_ref(); - let rhs = rhs.as_ref(); - - if lhs.len() != rhs.len() { - vortex_bail!("Boolean operations aren't supported on arrays of different lengths") - } - - if !lhs.dtype().is_boolean() || !rhs.dtype().is_boolean() { - vortex_bail!("Boolean operations are only supported on boolean arrays") + fn binary_boolean( + &self, + lhs: &ArrayData, + rhs: &ArrayData, + op: BinaryOperator, + ) -> VortexResult { + let array_ref = <&E::Array>::try_from(lhs)?; + let encoding = lhs + .encoding() + .as_any() + .downcast_ref::() + .ok_or_else(|| vortex_err!("Mismatched encoding"))?; + BinaryBooleanFn::binary_boolean(encoding, array_ref, rhs, op) } +} - if let Some(selection) = trait_fun(lhs, rhs) { - return selection; - } +pub trait AndFn { + fn and(&self, array: Array, other: &ArrayData) -> VortexResult; +} - if let Some(selection) = trait_fun(rhs, lhs) { - return selection; - } +pub trait AndKleeneFn { + fn and_kleene(&self, array: &Array, other: &ArrayData) -> VortexResult; +} - // If neither side implements the trait, we try to expand the left-hand side into a `BoolArray`, - // which we know does implement it, and call into that implementation. - let lhs = lhs.clone().into_bool()?; +pub trait OrFn { + fn or(&self, array: &Array, other: &ArrayData) -> VortexResult; +} - bool_array_fun(lhs, rhs) +pub trait OrKleeneFn { + fn or_kleene(&self, array: &Array, other: &ArrayData) -> VortexResult; } +/// Point-wise logical _and_ between two Boolean arrays. +/// +/// This method uses Arrow-style null propagation rather than the Kleene logic semantics. +/// +/// # Examples +/// +/// ``` +/// use vortex_array::ArrayData; +/// use vortex_array::compute::and; +/// use vortex_array::IntoCanonical; +/// use vortex_array::accessor::ArrayAccessor; +/// let a = ArrayData::from_iter([Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]); +/// let b = ArrayData::from_iter([Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]); +/// let result = and(a, b)?.into_canonical()?.into_bool()?; +/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::>())?; +/// assert_eq!(result_vec, vec![Some(true), None, Some(false), None, None, None, Some(false), None, Some(false)]); +/// # use vortex_error::VortexError; +/// # Ok::<(), VortexError>(()) +/// ``` pub fn and(lhs: impl AsRef, rhs: impl AsRef) -> VortexResult { - lift_boolean_operator( - lhs, - rhs, - |lhs, rhs| lhs.with_dyn(|lhs| lhs.and().map(|lhs| lhs.and(rhs))), - |lhs, rhs| lhs.and(rhs), - ) + binary_boolean(lhs.as_ref(), rhs.as_ref(), BinaryOperator::And) } +/// Point-wise Kleene logical _and_ between two Boolean arrays. +/// +/// # Examples +/// +/// ``` +/// use vortex_array::ArrayData; +/// use vortex_array::compute::and_kleene; +/// use vortex_array::IntoCanonical; +/// use vortex_array::accessor::ArrayAccessor; +/// let a = ArrayData::from_iter([Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]); +/// let b = ArrayData::from_iter([Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]); +/// let result = and_kleene(a, b)?.into_canonical()?.into_bool()?; +/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::>())?; +/// assert_eq!(result_vec, vec![Some(true), None, Some(false), None, None, Some(false), Some(false), Some(false), Some(false)]); +/// # use vortex_error::VortexError; +/// # Ok::<(), VortexError>(()) +/// ``` pub fn and_kleene( lhs: impl AsRef, rhs: impl AsRef, ) -> VortexResult { - lift_boolean_operator( - lhs, - rhs, - |lhs, rhs| lhs.with_dyn(|lhs| lhs.and_kleene().map(|lhs| lhs.and_kleene(rhs))), - |lhs, rhs| lhs.and_kleene(rhs), - ) + binary_boolean(lhs.as_ref(), rhs.as_ref(), BinaryOperator::AndKleene) } +/// Point-wise logical _or_ between two Boolean arrays. +/// +/// This method uses Arrow-style null propagation rather than the Kleene logic semantics. +/// +/// # Examples +/// +/// ``` +/// use vortex_array::ArrayData; +/// use vortex_array::compute::or; +/// use vortex_array::IntoCanonical; +/// use vortex_array::accessor::ArrayAccessor; +/// let a = ArrayData::from_iter([Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]); +/// let b = ArrayData::from_iter([Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]); +/// let result = or(a, b)?.into_canonical()?.into_bool()?; +/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::>())?; +/// assert_eq!(result_vec, vec![Some(true), None, Some(true), None, None, None, Some(true), None, Some(false)]); +/// # use vortex_error::VortexError; +/// # Ok::<(), VortexError>(()) +/// ``` pub fn or(lhs: impl AsRef, rhs: impl AsRef) -> VortexResult { - lift_boolean_operator( - lhs, - rhs, - |lhs, rhs| lhs.with_dyn(|lhs| lhs.or().map(|lhs| lhs.or(rhs))), - |lhs, rhs| lhs.or(rhs), - ) + binary_boolean(lhs.as_ref(), rhs.as_ref(), BinaryOperator::Or) } +/// Point-wise Kleene logical _or_ between two Boolean arrays. +/// +/// # Examples +/// +/// ``` +/// use vortex_array::ArrayData; +/// use vortex_array::compute::or_kleene; +/// use vortex_array::IntoCanonical; +/// use vortex_array::accessor::ArrayAccessor; +/// let a = ArrayData::from_iter([Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]); +/// let b = ArrayData::from_iter([Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]); +/// let result = or_kleene(a, b)?.into_canonical()?.into_bool()?; +/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::>())?; +/// assert_eq!(result_vec, vec![Some(true), Some(true), Some(true), Some(true), None, None, Some(true), None, Some(false)]); +/// # use vortex_error::VortexError; +/// # Ok::<(), VortexError>(()) +/// ``` pub fn or_kleene( lhs: impl AsRef, rhs: impl AsRef, ) -> VortexResult { - lift_boolean_operator( - lhs, - rhs, - |lhs, rhs| lhs.with_dyn(|lhs| lhs.or_kleene().map(|lhs| lhs.or_kleene(rhs))), - |lhs, rhs| lhs.or_kleene(rhs), - ) + binary_boolean(lhs.as_ref(), rhs.as_ref(), BinaryOperator::OrKleene) +} + +fn binary_boolean(lhs: &ArrayData, rhs: &ArrayData, op: BinaryOperator) -> VortexResult { + if lhs.len() != rhs.len() { + vortex_bail!("Boolean operations aren't supported on arrays of different lengths") + } + if !lhs.dtype().is_boolean() || !rhs.dtype().is_boolean() { + vortex_bail!("Boolean operations are only supported on boolean arrays") + } + + // If LHS is constant, then we make sure it's on the RHS. + if lhs.is_constant() && !rhs.is_constant() { + return binary_boolean(rhs, lhs, op); + } + + // Check if either LHS or RHS supports the operation directly. + if let Some(f) = lhs.encoding().binary_boolean_fn(lhs, rhs) { + return f.binary_boolean(lhs, rhs, op); + } else { + log::debug!( + "No boolean implementation found for LHS {}, RHS {}, and operator {:?}", + lhs.encoding().id(), + rhs.encoding().id(), + op, + ); + } + if let Some(f) = rhs.encoding().binary_boolean_fn(rhs, lhs) { + return f.binary_boolean(rhs, lhs, op); + } else { + log::debug!( + "No boolean implementation found for LHS {}, RHS {}, and operator {:?}", + rhs.encoding().id(), + lhs.encoding().id(), + op, + ); + } + + // If neither side implements the trait, we try to expand the left-hand side into a `BoolArray`, + // which we know does implement it, and call into that implementation. + arrow_boolean(lhs.clone(), rhs.clone(), op) +} + +/// Implementation of `BinaryBooleanFn` using the Arrow crate. +/// +/// Note that other encodings should handle a constant RHS value, so we can assume here that +/// the RHS is not constant and expand to a full array. +pub(crate) fn arrow_boolean( + lhs: ArrayData, + rhs: ArrayData, + operator: BinaryOperator, +) -> VortexResult { + let nullable = lhs.dtype().is_nullable() || rhs.dtype().is_nullable(); + + let lhs = Canonical::Bool(lhs.into_bool()?) + .into_arrow()? + .as_boolean() + .clone(); + let rhs = Canonical::Bool(rhs.into_bool()?) + .into_arrow()? + .as_boolean() + .clone(); + + let array = match operator { + BinaryOperator::And => arrow_arith::boolean::and(&lhs, &rhs)?, + BinaryOperator::AndKleene => arrow_arith::boolean::and_kleene(&lhs, &rhs)?, + BinaryOperator::Or => arrow_arith::boolean::or(&lhs, &rhs)?, + BinaryOperator::OrKleene => arrow_arith::boolean::or_kleene(&lhs, &rhs)?, + }; + + Ok(ArrayData::from_arrow(Arc::new(array) as ArrayRef, nullable)) } #[cfg(test)] diff --git a/vortex-array/src/compute/mod.rs b/vortex-array/src/compute/mod.rs index 3072e530ad..76c5f20273 100644 --- a/vortex-array/src/compute/mod.rs +++ b/vortex-array/src/compute/mod.rs @@ -7,7 +7,7 @@ //! implementations of these operators, else we will decode, and perform the equivalent operator //! from Arrow. -pub use boolean::{and, and_kleene, or, or_kleene, AndFn, OrFn}; +pub use boolean::*; pub(crate) use compare::arrow_compare; pub use compare::{compare, scalar_cmp, CompareFn, MaybeCompareFn, Operator}; pub use filter::*; @@ -30,6 +30,17 @@ pub mod unary; /// VTable for dispatching compute functions to Vortex encodings. pub trait ComputeVTable { + /// Implementation of binary boolean logic operations. + /// + /// See: [BinaryBooleanFn]. + fn binary_boolean_fn( + &self, + _lhs: &ArrayData, + _rhs: &ArrayData, + ) -> Option<&dyn BinaryBooleanFn> { + None + } + /// Implemented for arrays that can be casted to different types. /// /// See: [CastFn]. @@ -96,32 +107,4 @@ pub trait ArrayCompute { fn search_sorted(&self) -> Option<&dyn SearchSortedFn> { None } - - /// Perform an Arrow-style boolean AND operation over two arrays - /// - /// See: [AndFn]. - fn and(&self) -> Option<&dyn AndFn> { - None - } - - /// Perform a Kleene-style boolean AND operation over two arrays - /// - /// See: [AndFn]. - fn and_kleene(&self) -> Option<&dyn AndFn> { - None - } - - /// Perform an Arrow-style boolean OR operation over two arrays - /// - /// See: [OrFn]. - fn or(&self) -> Option<&dyn OrFn> { - None - } - - /// Perform a Kleene-style boolean OR operation over two arrays - /// - /// See: [OrFn]. - fn or_kleene(&self) -> Option<&dyn OrFn> { - None - } } diff --git a/vortex-array/src/data/mod.rs b/vortex-array/src/data/mod.rs index 49ecb0bfdc..26a5d7b631 100644 --- a/vortex-array/src/data/mod.rs +++ b/vortex-array/src/data/mod.rs @@ -146,6 +146,13 @@ impl ArrayData { } } + /// Return whether the array is constant. + pub fn is_constant(&self) -> bool { + self.statistics() + .get_as::(Stat::IsConstant) + .unwrap_or(false) + } + /// Return scalar value of this array if the array is constant pub fn as_constant(&self) -> Option { (self.statistics().get_as::(Stat::IsConstant)?) From a775f0d91a991607929272b4ee605caa41ce52e2 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 20 Nov 2024 22:18:16 +0000 Subject: [PATCH 2/2] Search sorted usize --- vortex-array/src/compute/boolean.rs | 19 +------------------ vortex-array/src/compute/mod.rs | 2 +- vortex-array/src/data/mod.rs | 2 +- 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/vortex-array/src/compute/boolean.rs b/vortex-array/src/compute/boolean.rs index 4c1f92a4b7..f432516bdc 100644 --- a/vortex-array/src/compute/boolean.rs +++ b/vortex-array/src/compute/boolean.rs @@ -49,22 +49,6 @@ where } } -pub trait AndFn { - fn and(&self, array: Array, other: &ArrayData) -> VortexResult; -} - -pub trait AndKleeneFn { - fn and_kleene(&self, array: &Array, other: &ArrayData) -> VortexResult; -} - -pub trait OrFn { - fn or(&self, array: &Array, other: &ArrayData) -> VortexResult; -} - -pub trait OrKleeneFn { - fn or_kleene(&self, array: &Array, other: &ArrayData) -> VortexResult; -} - /// Point-wise logical _and_ between two Boolean arrays. /// /// This method uses Arrow-style null propagation rather than the Kleene logic semantics. @@ -194,8 +178,7 @@ fn binary_boolean(lhs: &ArrayData, rhs: &ArrayData, op: BinaryOperator) -> Vorte ); } - // If neither side implements the trait, we try to expand the left-hand side into a `BoolArray`, - // which we know does implement it, and call into that implementation. + // If neither side implements the trait, then we delegate to Arrow compute. arrow_boolean(lhs.clone(), rhs.clone(), op) } diff --git a/vortex-array/src/compute/mod.rs b/vortex-array/src/compute/mod.rs index ce0a72bf25..6013efd055 100644 --- a/vortex-array/src/compute/mod.rs +++ b/vortex-array/src/compute/mod.rs @@ -7,7 +7,7 @@ //! implementations of these operators, else we will decode, and perform the equivalent operator //! from Arrow. -pub use boolean::*; +pub use boolean::{and, and_kleene, or, or_kleene, BinaryBooleanFn, BinaryOperator}; pub(crate) use compare::arrow_compare; pub use compare::{compare, scalar_cmp, CompareFn, MaybeCompareFn, Operator}; pub use filter::*; diff --git a/vortex-array/src/data/mod.rs b/vortex-array/src/data/mod.rs index 26a5d7b631..a1ba2bdbbd 100644 --- a/vortex-array/src/data/mod.rs +++ b/vortex-array/src/data/mod.rs @@ -155,7 +155,7 @@ impl ArrayData { /// Return scalar value of this array if the array is constant pub fn as_constant(&self) -> Option { - (self.statistics().get_as::(Stat::IsConstant)?) + self.is_constant() // This is safe to unwrap as long as empty arrays aren't constant .then(|| scalar_at(self, 0).vortex_expect("expected a scalar value")) }