Skip to content

Commit

Permalink
[DOCS] Naming consistency of length functions (#2942)
Browse files Browse the repository at this point in the history
Solves #2769 

- Added `length` function to Expression.list
- Added deprecation warning to `Expression.list.lengths`
  • Loading branch information
vicky1999 authored Oct 5, 2024
1 parent 53a84ea commit edeee9e
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 10 deletions.
16 changes: 16 additions & 0 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import math
import os
import warnings
from datetime import date, datetime, time
from decimal import Decimal
from typing import (
Expand Down Expand Up @@ -2936,6 +2937,21 @@ def count(self, mode: CountMode = CountMode.Valid) -> Expression:
def lengths(self) -> Expression:
"""Gets the length of each list
(DEPRECATED) Please use Expression.list.length instead
Returns:
Expression: a UInt64 expression which is the length of each list
"""
warnings.warn(
"This function will be deprecated from Daft version >= 0.3.5! Instead, please use 'Expression.list.length'",
category=DeprecationWarning,
)

return Expression._from_pyexpr(native.list_count(self._expr, CountMode.All))

def length(self) -> Expression:
"""Gets the length of each list
Returns:
Expression: a UInt64 expression which is the length of each list
"""
Expand Down
9 changes: 9 additions & 0 deletions daft/series.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from typing import Any, Literal, TypeVar

from daft.arrow_utils import ensure_array, ensure_chunked_array
Expand Down Expand Up @@ -927,6 +928,14 @@ def iceberg_truncate(self, w: int) -> Series:

class SeriesListNamespace(SeriesNamespace):
def lengths(self) -> Series:
warnings.warn(
"This function will be deprecated from Daft version >= 0.3.5! Instead, please use 'length'",
category=DeprecationWarning,
)

return Series._from_pyseries(self._series.list_count(CountMode.All))

def length(self) -> Series:
return Series._from_pyseries(self._series.list_count(CountMode.All))

def get(self, idx: Series, default: Series) -> Series:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/api_docs/expressions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ List
:template: autosummary/accessor_method.rst

Expression.list.join
Expression.list.lengths
Expression.list.length
Expression.list.get
Expression.list.slice
Expression.list.chunk
Expand Down
16 changes: 8 additions & 8 deletions tests/series/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def test_cast_binary_to_fixed_size_binary():
assert casted.to_pylist() == [b"abc", b"def", None, b"bcd", None]


def test_cast_binary_to_fixed_size_binary_fails_with_variable_lengths():
def test_cast_binary_to_fixed_size_binary_fails_with_variable_length():
data = [b"abc", b"def", None, b"bcd", None, b"long"]

input = Series.from_pylist(data)
Expand Down Expand Up @@ -368,7 +368,7 @@ def test_series_cast_python_to_list(dtype) -> None:
assert t.datatype() == target_dtype
assert len(t) == len(data)

assert t.list.lengths().to_pylist() == [3, 3, 3, 3, 2, 2, None]
assert t.list.length().to_pylist() == [3, 3, 3, 3, 2, 2, None]

pydata = t.to_pylist()
assert pydata[-1] is None
Expand Down Expand Up @@ -397,7 +397,7 @@ def test_series_cast_python_to_fixed_size_list(dtype) -> None:
assert t.datatype() == target_dtype
assert len(t) == len(data)

assert t.list.lengths().to_pylist() == [3, 3, 3, 3, 3, 3, None]
assert t.list.length().to_pylist() == [3, 3, 3, 3, 3, 3, None]

pydata = t.to_pylist()
assert pydata[-1] is None
Expand Down Expand Up @@ -426,7 +426,7 @@ def test_series_cast_python_to_embedding(dtype) -> None:
assert t.datatype() == target_dtype
assert len(t) == len(data)

assert t.list.lengths().to_pylist() == [3, 3, 3, 3, 3, 3, None]
assert t.list.length().to_pylist() == [3, 3, 3, 3, 3, 3, None]

pydata = t.to_pylist()
assert pydata[-1] is None
Expand All @@ -448,7 +448,7 @@ def test_series_cast_list_to_embedding(dtype) -> None:
assert t.datatype() == target_dtype
assert len(t) == len(data)

assert t.list.lengths().to_pylist() == [3, 3, 3, None]
assert t.list.length().to_pylist() == [3, 3, 3, None]

pydata = t.to_pylist()
assert pydata[-1] is None
Expand All @@ -473,7 +473,7 @@ def test_series_cast_numpy_to_image() -> None:
assert t.datatype() == target_dtype
assert len(t) == len(data)

assert t.list.lengths().to_pylist() == [12, 27, None]
assert t.list.length().to_pylist() == [12, 27, None]

pydata = t.to_pylist()
assert pydata[-1] is None
Expand All @@ -495,7 +495,7 @@ def test_series_cast_numpy_to_image_infer_mode() -> None:
assert t.datatype() == target_dtype
assert len(t) == len(data)

assert t.list.lengths().to_pylist() == [4, 27, None]
assert t.list.length().to_pylist() == [4, 27, None]

pydata = t.to_arrow().to_pylist()
assert pydata[0] == {
Expand Down Expand Up @@ -536,7 +536,7 @@ def test_series_cast_python_to_fixed_shape_image() -> None:
assert t.datatype() == target_dtype
assert len(t) == len(data)

assert t.list.lengths().to_pylist() == [12, 12, None]
assert t.list.length().to_pylist() == [12, 12, None]

pydata = t.to_pylist()
assert pydata[-1] is None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,12 @@ def test_fixed_list_count(fixed_table):

result = fixed_table.eval_expression_list([col("col").list.count(CountMode.Null)])
assert result.to_pydict() == {"col": [0, 0, 1, 2, None]}


def test_list_length(fixed_table):
with pytest.warns(DeprecationWarning):
lengths_result = fixed_table.eval_expression_list([col("col").list.lengths()])
length_result = fixed_table.eval_expression_list([col("col").list.length()])

assert lengths_result.to_pydict() == {"col": [2, 2, 2, 2, None]}
assert length_result.to_pydict() == {"col": [2, 2, 2, 2, None]}
2 changes: 1 addition & 1 deletion tutorials/delta_lake/2-distributed-batch-inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
"\n",
"# Prune data\n",
"df = df.limit(NUM_ROWS)\n",
"df = df.where(df[\"object\"].list.lengths() == 1)"
"df = df.where(df[\"object\"].list.length() == 1)"
]
},
{
Expand Down

0 comments on commit edeee9e

Please sign in to comment.