Skip to content

Commit

Permalink
[EXPRESSIONS] Implement Expression.float.is_inf (#2371)
Browse files Browse the repository at this point in the history
Resolves #2316.

Signed-off-by: Tai Le Manh <[email protected]>
  • Loading branch information
tlm365 authored Jun 13, 2024
1 parent 647ec43 commit 1b2973f
Show file tree
Hide file tree
Showing 15 changed files with 195 additions and 0 deletions.
2 changes: 2 additions & 0 deletions daft/daft.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,7 @@ class PyExpr:
def __hash__(self) -> int: ...
def __reduce__(self) -> tuple: ...
def is_nan(self) -> PyExpr: ...
def is_inf(self) -> PyExpr: ...
def dt_date(self) -> PyExpr: ...
def dt_day(self) -> PyExpr: ...
def dt_hour(self) -> PyExpr: ...
Expand Down Expand Up @@ -1207,6 +1208,7 @@ class PySeries:
def utf8_ilike(self, pattern: PySeries) -> PySeries: ...
def utf8_substr(self, start: PySeries, length: PySeries | None = None) -> PySeries: ...
def is_nan(self) -> PySeries: ...
def is_inf(self) -> PySeries: ...
def dt_date(self) -> PySeries: ...
def dt_day(self) -> PySeries: ...
def dt_hour(self) -> PySeries: ...
Expand Down
15 changes: 15 additions & 0 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,21 @@ def is_nan(self) -> Expression:
"""
return Expression._from_pyexpr(self._expr.is_nan())

def is_inf(self) -> Expression:
"""Checks if values in the Expression are Infinity.
.. NOTE::
Nulls will be propagated! I.e. this operation will return a null for null values.
Example:
>>> # [-float("inf"), 0., float("inf"), None] -> [True, False, True, None]
>>> col("x").float.is_inf()
Returns:
Expression: Boolean Expression indicating whether values are Infinity.
"""
return Expression._from_pyexpr(self._expr.is_inf())


class ExpressionDatetimeNamespace(ExpressionNamespace):
def date(self) -> Expression:
Expand Down
3 changes: 3 additions & 0 deletions daft/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,9 @@ class SeriesFloatNamespace(SeriesNamespace):
def is_nan(self) -> Series:
return Series._from_pyseries(self._series.is_nan())

def is_inf(self) -> Series:
return Series._from_pyseries(self._series.is_inf())


class SeriesStringNamespace(SeriesNamespace):
def endswith(self, suffix: Series) -> Series:
Expand Down
14 changes: 14 additions & 0 deletions docs/source/api_docs/expressions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,20 @@ The following methods are available under the ``expr.str`` attribute.
Expression.str.ilike
Expression.str.substr

.. _api-float-expression-operations:

Floats
#######

The following methods are available under the ``expr.float`` attribute.

.. autosummary::
:nosignatures:
:toctree: doc_gen/expression_methods
:template: autosummary/accessor_method.rst

Expression.float.is_inf

.. _api-expressions-temporal:

Temporal
Expand Down
30 changes: 30 additions & 0 deletions src/daft-core/src/array/ops/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::{
use common_error::DaftResult;
use num_traits::Float;

use super::DaftIsInf;
use super::DaftIsNan;

use super::as_arrow::AsArrow;
Expand Down Expand Up @@ -38,3 +39,32 @@ impl DaftIsNan for DataArray<NullType> {
)))
}
}

impl<T> DaftIsInf for DataArray<T>
where
T: DaftFloatType,
<T as DaftNumericType>::Native: Float,
{
type Output = DaftResult<DataArray<BooleanType>>;

fn is_inf(&self) -> Self::Output {
let arrow_array = self.as_arrow();
let result_arrow_array = arrow2::array::BooleanArray::from_trusted_len_values_iter(
arrow_array.values_iter().map(|v| v.is_infinite()),
)
.with_validity(arrow_array.validity().cloned());
Ok(BooleanArray::from((self.name(), result_arrow_array)))
}
}

impl DaftIsInf for DataArray<NullType> {
type Output = DaftResult<DataArray<BooleanType>>;

fn is_inf(&self) -> Self::Output {
Ok(BooleanArray::from((
self.name(),
arrow2::array::BooleanArray::from_slice(vec![false; self.len()])
.with_validity(Some(arrow2::bitmap::Bitmap::from(vec![false; self.len()]))),
)))
}
}
5 changes: 5 additions & 0 deletions src/daft-core/src/array/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ pub trait DaftIsNan {
fn is_nan(&self) -> Self::Output;
}

pub trait DaftIsInf {
type Output;
fn is_inf(&self) -> Self::Output;
}

pub type VecIndices = Vec<u64>;
pub type GroupIndices = Vec<VecIndices>;
pub type GroupIndicesPair = (VecIndices, GroupIndices);
Expand Down
4 changes: 4 additions & 0 deletions src/daft-core/src/python/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,10 @@ impl PySeries {
Ok(self.series.is_nan()?.into())
}

pub fn is_inf(&self) -> PyResult<Self> {
Ok(self.series.is_inf()?.into())
}

pub fn dt_date(&self) -> PyResult<Self> {
Ok(self.series.dt_date()?.into())
}
Expand Down
7 changes: 7 additions & 0 deletions src/daft-core/src/series/ops/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,11 @@ impl Series {
Ok(DaftIsNan::is_nan(self.downcast::<<$T as DaftDataType>::ArrayType>()?)?.into_series())
})
}

pub fn is_inf(&self) -> DaftResult<Series> {
use crate::array::ops::DaftIsInf;
with_match_float_and_null_daft_types!(self.data_type(), |$T| {
Ok(DaftIsInf::is_inf(self.downcast::<<$T as DaftDataType>::ArrayType>()?)?.into_series())
})
}
}
51 changes: 51 additions & 0 deletions src/daft-dsl/src/functions/float/is_inf.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use daft_core::{
datatypes::{DataType, Field},
schema::Schema,
series::Series,
};

use crate::ExprRef;

use crate::functions::FunctionExpr;
use common_error::{DaftError, DaftResult};

use super::super::FunctionEvaluator;

pub(super) struct IsInfEvaluator {}

impl FunctionEvaluator for IsInfEvaluator {
fn fn_name(&self) -> &'static str {
"is_inf"
}

fn to_field(&self, inputs: &[ExprRef], schema: &Schema, _: &FunctionExpr) -> DaftResult<Field> {
match inputs {
[data] => match data.to_field(schema) {
Ok(data_field) => match &data_field.dtype {
// DataType::Float16 |
DataType::Float32 | DataType::Float64 => {
Ok(Field::new(data_field.name, DataType::Boolean))
}
_ => Err(DaftError::TypeError(format!(
"Expects input to is_inf to be float, but received {data_field}",
))),
},
Err(e) => Err(e),
},
_ => Err(DaftError::SchemaMismatch(format!(
"Expected 1 input args, got {}",
inputs.len()
))),
}
}

fn evaluate(&self, inputs: &[Series], _: &FunctionExpr) -> DaftResult<Series> {
match inputs {
[data] => data.is_inf(),
_ => Err(DaftError::ValueError(format!(
"Expected 1 input args, got {}",
inputs.len()
))),
}
}
}
12 changes: 12 additions & 0 deletions src/daft-dsl/src/functions/float/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mod is_inf;
mod is_nan;

use is_inf::IsInfEvaluator;
use is_nan::IsNanEvaluator;
use serde::{Deserialize, Serialize};

Expand All @@ -10,6 +12,7 @@ use super::FunctionEvaluator;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum FloatExpr {
IsNan,
IsInf,
}

impl FloatExpr {
Expand All @@ -18,6 +21,7 @@ impl FloatExpr {
use FloatExpr::*;
match self {
IsNan => &IsNanEvaluator {},
IsInf => &IsInfEvaluator {},
}
}
}
Expand All @@ -29,3 +33,11 @@ pub fn is_nan(data: ExprRef) -> ExprRef {
}
.into()
}

pub fn is_inf(data: ExprRef) -> ExprRef {
Expr::Function {
func: super::FunctionExpr::Float(FloatExpr::IsInf),
inputs: vec![data],
}
.into()
}
5 changes: 5 additions & 0 deletions src/daft-dsl/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,11 @@ impl PyExpr {
Ok(is_nan(self.into()).into())
}

pub fn is_inf(&self) -> PyResult<Self> {
use functions::float::is_inf;
Ok(is_inf(self.into()).into())
}

pub fn dt_date(&self) -> PyResult<Self> {
use functions::temporal::date;
Ok(date(self.into()).into())
Expand Down
7 changes: 7 additions & 0 deletions tests/expressions/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,13 @@ def test_float_is_nan() -> None:
assert output == "is_nan(col(a))"


def test_float_is_inf() -> None:
a = col("a")
c = a.float.is_inf()
output = repr(c)
assert output == "is_inf(col(a))"


def test_date_lit_post_epoch() -> None:
d = lit(date(2022, 1, 1))
output = repr(d)
Expand Down
9 changes: 9 additions & 0 deletions tests/expressions/typing/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,12 @@ def test_float_is_nan(unary_data_fixture):
run_kernel=unary_data_fixture.float.is_nan,
resolvable=unary_data_fixture.datatype() in (DataType.float32(), DataType.float64()),
)


def test_float_is_inf(unary_data_fixture):
assert_typing_resolve_vs_runtime_behavior(
data=[unary_data_fixture],
expr=col(unary_data_fixture.name()).float.is_inf(),
run_kernel=unary_data_fixture.float.is_inf,
resolvable=unary_data_fixture.datatype() in (DataType.float32(), DataType.float64()),
)
24 changes: 24 additions & 0 deletions tests/series/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,27 @@ def test_float_is_nan_all_null() -> None:
s = Series.from_arrow(pa.array([None, None, None]))
result = s.float.is_nan()
assert result.to_pylist() == [None, None, None]


def test_float_is_inf() -> None:
s = Series.from_arrow(pa.array([-float("inf"), 0.0, np.inf]))
result = s.float.is_inf()
assert result.to_pylist() == [True, False, True]


def test_float_is_inf_with_nulls() -> None:
s = Series.from_arrow(pa.array([-np.inf, None, 1.0, None, float("inf")]))
result = s.float.is_inf()
assert result.to_pylist() == [True, None, False, None, True]


def test_float_is_inf_empty() -> None:
s = Series.from_arrow(pa.array([], type=pa.float64()))
result = s.float.is_inf()
assert result.to_pylist() == []


def test_float_is_inf_all_null() -> None:
s = Series.from_arrow(pa.array([None, None, None]))
result = s.float.is_inf()
assert result.to_pylist() == [None, None, None]
7 changes: 7 additions & 0 deletions tests/table/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,13 @@ def test_table_float_is_nan() -> None:
assert result_table.to_pydict() == {"a": [False, True, False, None, True]}


def test_table_float_is_inf() -> None:
table = MicroPartition.from_pydict({"a": [-np.inf, 0.0, None, float("inf")]})
result_table = table.eval_expression_list([col("a").float.is_inf()])
# Note that null entries are _not_ treated as float NaNs.
assert result_table.to_pydict() == {"a": [True, False, None, True]}


def test_table_if_else() -> None:
table = MicroPartition.from_arrow(
pa.Table.from_pydict({"ones": [1, 1, 1], "zeros": [0, 0, 0], "pred": [True, False, None]})
Expand Down

0 comments on commit 1b2973f

Please sign in to comment.