Skip to content

Commit

Permalink
fix: boolean and/or expressions with null (#3544)
Browse files Browse the repository at this point in the history
Resolves #3512

A fun truth table ($V_{x}$ is the validity $x$, $L$ is left, and $R$ is
right):

| $L$ | $V_{L}$ | $R$ | $V_{R}$ | $∧$ | $V_{∧}$ | $∨$ | $V_{∨}$ |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |
:-----: |
| ⬜️ | ⬜️ | ⬜️ | ⬜️ |     | ⬜️ |     | ⬜️ |
| ⬜️ | ⬜️ | ⬜️ | ✅  | ⬜️ | ✅  |     | ⬜️ |
| ⬜️ | ⬜️ | ✅  | ⬜️ |     | ⬜️ |     | ⬜️ |
| ⬜️ | ⬜️ | ✅  | ✅  |     | ⬜️ | ✅  | ✅  |
| ⬜️ | ✅  | ⬜️ | ⬜️ | ⬜️ | ✅  |     | ⬜️ |
| ⬜️ | ✅  | ⬜️ | ✅  | ⬜️ | ✅  | ⬜️ | ✅  |
| ⬜️ | ✅  | ✅  | ⬜️ | ⬜️ | ✅  |     | ⬜️ |
| ⬜️ | ✅  | ✅  | ✅  | ⬜️ | ✅  | ✅  | ✅  |
| ✅  | ⬜️ | ⬜️ | ⬜️ |     | ⬜️ |     | ⬜️ |
| ✅  | ⬜️ | ⬜️ | ✅  | ⬜️ | ✅  |     | ⬜️ |
| ✅  | ⬜️ | ✅  | ⬜️ |     | ⬜️ |     | ⬜️ |
| ✅  | ⬜️ | ✅  | ✅  |     | ⬜️ | ✅  | ✅  |
| ✅  | ✅  | ⬜️ | ⬜️ |     | ⬜️ | ✅  | ✅  |
| ✅  | ✅  | ⬜️ | ✅  | ⬜️ | ✅  | ✅  | ✅  |
| ✅  | ✅  | ✅  | ⬜️ |     | ⬜️ | ✅  | ✅  |
| ✅  | ✅  | ✅  | ✅  | ✅  | ✅  | ✅  | ✅  |
  • Loading branch information
kevinzwang authored Dec 11, 2024
1 parent e5ea73f commit d103573
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 66 deletions.
140 changes: 104 additions & 36 deletions src/daft-core/src/array/ops/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,13 +601,47 @@ impl Not for &BooleanArray {
impl DaftLogical<&Self> for BooleanArray {
type Output = DaftResult<Self>;
fn and(&self, rhs: &Self) -> Self::Output {
// When performing a logical AND with a NULL value:
// - If the non-null value is false, the result is false (not null)
// - If the non-null value is true, the result is null
fn and_with_null(name: &str, arr: &BooleanArray) -> BooleanArray {
let values = arr.as_arrow().values();

let new_validity = match arr.as_arrow().validity() {
None => values.not(),
Some(validity) => arrow2::bitmap::and(&values.not(), validity),
};

BooleanArray::from((
name,
arrow2::array::BooleanArray::new(
arrow2::datatypes::DataType::Boolean,
values.clone(),
Some(new_validity),
),
))
}

match (self.len(), rhs.len()) {
(x, y) if x == y => {
let validity =
arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity());

let result_bitmap =
arrow2::bitmap::and(self.as_arrow().values(), rhs.as_arrow().values());
let l_values = self.as_arrow().values();
let r_values = rhs.as_arrow().values();

// (false & NULL) should be false, compute validity to ensure that
let validity = match (self.as_arrow().validity(), rhs.as_arrow().validity()) {
(None, None) => None,
(None, Some(r_valid)) => Some(arrow2::bitmap::or(&l_values.not(), r_valid)),
(Some(l_valid), None) => Some(arrow2::bitmap::or(l_valid, &r_values.not())),
(Some(l_valid), Some(r_valid)) => Some(arrow2::bitmap::or(
&arrow2::bitmap::or(
&arrow2::bitmap::and(&l_values.not(), l_valid),
&arrow2::bitmap::and(&r_values.not(), r_valid),
),
&arrow2::bitmap::and(l_valid, r_valid),
)),
};

let result_bitmap = arrow2::bitmap::and(l_values, r_values);
Ok(Self::from((
self.name(),
arrow2::array::BooleanArray::new(
Expand All @@ -617,18 +651,18 @@ impl DaftLogical<&Self> for BooleanArray {
),
)))
}
(l_size, 1) => {
(_, 1) => {
if let Some(value) = rhs.get(0) {
self.and(value)
} else {
Ok(Self::full_null(self.name(), &DataType::Boolean, l_size))
Ok(and_with_null(self.name(), self))
}
}
(1, r_size) => {
(1, _) => {
if let Some(value) = self.get(0) {
rhs.and(value)
} else {
Ok(Self::full_null(self.name(), &DataType::Boolean, r_size))
Ok(and_with_null(self.name(), rhs))
}
}
(l, r) => Err(DaftError::ValueError(format!(
Expand All @@ -640,13 +674,47 @@ impl DaftLogical<&Self> for BooleanArray {
}

fn or(&self, rhs: &Self) -> Self::Output {
// When performing a logical OR with a NULL value:
// - If the non-null value is false, the result is null
// - If the non-null value is true, the result is true (not null)
fn or_with_null(name: &str, arr: &BooleanArray) -> BooleanArray {
let values = arr.as_arrow().values();

let new_validity = match arr.as_arrow().validity() {
None => values.clone(),
Some(validity) => arrow2::bitmap::and(values, validity),
};

BooleanArray::from((
name,
arrow2::array::BooleanArray::new(
arrow2::datatypes::DataType::Boolean,
values.clone(),
Some(new_validity),
),
))
}

match (self.len(), rhs.len()) {
(x, y) if x == y => {
let validity =
arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity());

let result_bitmap =
arrow2::bitmap::or(self.as_arrow().values(), rhs.as_arrow().values());
let l_values = self.as_arrow().values();
let r_values = rhs.as_arrow().values();

// (true | NULL) should be true, compute validity to ensure that
let validity = match (self.as_arrow().validity(), rhs.as_arrow().validity()) {
(None, None) => None,
(None, Some(r_valid)) => Some(arrow2::bitmap::or(l_values, r_valid)),
(Some(l_valid), None) => Some(arrow2::bitmap::or(l_valid, r_values)),
(Some(l_valid), Some(r_valid)) => Some(arrow2::bitmap::or(
&arrow2::bitmap::or(
&arrow2::bitmap::and(l_values, l_valid),
&arrow2::bitmap::and(r_values, r_valid),
),
&arrow2::bitmap::and(l_valid, r_valid),
)),
};

let result_bitmap = arrow2::bitmap::or(l_values, r_values);
Ok(Self::from((
self.name(),
arrow2::array::BooleanArray::new(
Expand All @@ -656,18 +724,18 @@ impl DaftLogical<&Self> for BooleanArray {
),
)))
}
(l_size, 1) => {
(_, 1) => {
if let Some(value) = rhs.get(0) {
self.or(value)
} else {
Ok(Self::full_null(self.name(), &DataType::Boolean, l_size))
Ok(or_with_null(self.name(), self))
}
}
(1, r_size) => {
(1, _) => {
if let Some(value) = self.get(0) {
rhs.or(value)
} else {
Ok(Self::full_null(self.name(), &DataType::Boolean, r_size))
Ok(or_with_null(self.name(), rhs))
}
}
(l, r) => Err(DaftError::ValueError(format!(
Expand Down Expand Up @@ -756,33 +824,33 @@ impl DaftCompare<&Self> for NullArray {
impl DaftLogical<bool> for BooleanArray {
type Output = DaftResult<Self>;
fn and(&self, rhs: bool) -> Self::Output {
let validity = self.as_arrow().validity();
if rhs {
Ok(self.clone())
} else {
use arrow2::{array, bitmap::Bitmap, datatypes::DataType};
let arrow_array = array::BooleanArray::new(
DataType::Boolean,
Bitmap::new_zeroed(self.len()),
validity.cloned(),
);
Ok(Self::from((self.name(), arrow_array)))
Ok(Self::from((
self.name(),
arrow2::array::BooleanArray::new(
arrow2::datatypes::DataType::Boolean,
arrow2::bitmap::Bitmap::new_zeroed(self.len()),
None, // false & x is always valid false for any x
),
)))
}
}

fn or(&self, rhs: bool) -> Self::Output {
let validity = self.as_arrow().validity();
if rhs {
use arrow2::{array, bitmap::Bitmap, datatypes::DataType};
let arrow_array = array::BooleanArray::new(
DataType::Boolean,
Bitmap::new_zeroed(self.len()).not(),
validity.cloned(),
);
return Ok(Self::from((self.name(), arrow_array)));
Ok(Self::from((
self.name(),
arrow2::array::BooleanArray::new(
arrow2::datatypes::DataType::Boolean,
arrow2::bitmap::Bitmap::new_trued(self.len()),
None, // true | x is always valid true for any x
),
)))
} else {
Ok(self.clone())
}

Ok(self.clone())
}

fn xor(&self, rhs: bool) -> Self::Output {
Expand Down
Loading

0 comments on commit d103573

Please sign in to comment.