From 40316376b1a2218370902fa8f5304bc39e59adc6 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Date: Wed, 27 Nov 2024 17:07:21 -0800 Subject: [PATCH] [CHORE] Remove CountMode and ResourceRequest from public API (#3429) CountMode is used in [`Expression.count`](https://www.getdaft.io/projects/docs/en/latest/api_docs/doc_gen/expression_methods/daft.Expression.count.html), but it's not user friendly to have users figure out how to use a CountMode object. Instead we get public methods to accept countmode strings instead. ResourceRequest is used in [DataFrame.with_column](https://www.getdaft.io/projects/docs/en/latest/api_docs/doc_gen/dataframe_methods/daft.DataFrame.with_column.html) and [DataFrame.with_columns](https://www.getdaft.io/projects/docs/en/latest/api_docs/doc_gen/dataframe_methods/daft.DataFrame.with_columns.html) but these are deprecated from public use for the same reasons as CountMode. We thus remove ResourceRequest from the dataframe methods. --- daft/dataframe/dataframe.py | 21 --------------------- daft/expressions/expressions.py | 19 +++++++++++++------ src/daft-core/src/count_mode.rs | 2 +- 3 files changed, 14 insertions(+), 28 deletions(-) diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index 31148e6896..f971a078ad 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -52,7 +52,6 @@ import ray import torch - from daft.daft import ResourceRequest from daft.io import DataCatalogTable from daft.logical.schema import Schema @@ -1430,7 +1429,6 @@ def with_column( self, column_name: str, expr: Expression, - resource_request: Optional["ResourceRequest"] = None, ) -> "DataFrame": """Adds a column to the current DataFrame with an Expression, equivalent to a ``select`` with all current columns and the new one @@ -1461,22 +1459,12 @@ def with_column( Returns: DataFrame: DataFrame with new column. """ - if resource_request is not None: - raise ValueError( - "Specifying resource_request through `with_column` is deprecated from Daft version >= 0.3.0! " - "Instead, please use the APIs on UDFs directly for controlling the resource requests of your UDFs. " - "You can define resource requests directly on the `@udf(num_gpus=N, num_cpus=M, ...)` decorator. " - "Alternatively, you can override resource requests on UDFs like so: `my_udf.override_options(num_gpus=N)`. " - "Check the Daft documentation for more details." - ) - return self.with_columns({column_name: expr}) @DataframePublicAPI def with_columns( self, columns: Dict[str, Expression], - resource_request: Optional["ResourceRequest"] = None, ) -> "DataFrame": """Adds columns to the current DataFrame with Expressions, equivalent to a ``select`` with all current columns and the new ones @@ -1506,15 +1494,6 @@ def with_columns( Returns: DataFrame: DataFrame with new columns. """ - if resource_request is not None: - raise ValueError( - "Specifying resource_request through `with_columns` is deprecated from Daft version >= 0.3.0! " - "Instead, please use the APIs on UDFs directly for controlling the resource requests of your UDFs. " - "You can define resource requests directly on the `@udf(num_gpus=N, num_cpus=M, ...)` decorator. " - "Alternatively, you can override resource requests on UDFs like so: `my_udf.override_options(num_gpus=N)`. " - "Check the Daft documentation for more details." - ) - new_columns = [col.alias(name) for name, col in columns.items()] builder = self._builder.with_columns(new_columns) diff --git a/daft/expressions/expressions.py b/daft/expressions/expressions.py index 88eb885976..80506f4ca5 100644 --- a/daft/expressions/expressions.py +++ b/daft/expressions/expressions.py @@ -1,5 +1,6 @@ from __future__ import annotations +import builtins import math import os import warnings @@ -43,8 +44,6 @@ from daft.series import Series, item_to_series if TYPE_CHECKING: - import builtins - from daft.io import IOConfig from daft.udf import PartialStatefulUDF, PartialStatelessUDF # This allows Sphinx to correctly work against our "namespaced" accessor functions by overriding @property to @@ -782,12 +781,16 @@ def shift_right(self, other: Expression) -> Expression: expr = Expression._to_expression(other) return Expression._from_pyexpr(self._expr >> expr._expr) - def count(self, mode: CountMode = CountMode.Valid) -> Expression: + def count( + self, mode: Literal["all"] | Literal["valid"] | Literal["null"] | CountMode = CountMode.Valid + ) -> Expression: """Counts the number of values in the expression. Args: - mode: whether to count all values, non-null (valid) values, or null values. Defaults to CountMode.Valid. + mode: A string ("all", "valid", or "null") that represents whether to count all values, non-null (valid) values, or null values. Defaults to "valid". """ + if isinstance(mode, builtins.str): + mode = CountMode.from_count_mode_str(mode) expr = self._expr.count(mode) return Expression._from_pyexpr(expr) @@ -3010,15 +3013,19 @@ def value_counts(self) -> Expression: """ return Expression._from_pyexpr(native.list_value_counts(self._expr)) - def count(self, mode: CountMode = CountMode.Valid) -> Expression: + def count( + self, mode: Literal["all"] | Literal["valid"] | Literal["null"] | CountMode = CountMode.Valid + ) -> Expression: """Counts the number of elements in each list Args: - mode: The mode to use for counting. Defaults to CountMode.Valid + mode: A string ("all", "valid", or "null") that represents whether to count all values, non-null (valid) values, or null values. Defaults to "valid". Returns: Expression: a UInt64 expression which is the length of each list """ + if isinstance(mode, str): + mode = CountMode.from_count_mode_str(mode) return Expression._from_pyexpr(native.list_count(self._expr, mode)) def lengths(self) -> Expression: diff --git a/src/daft-core/src/count_mode.rs b/src/daft-core/src/count_mode.rs index 0b1ea12368..ff0d909239 100644 --- a/src/daft-core/src/count_mode.rs +++ b/src/daft-core/src/count_mode.rs @@ -50,7 +50,7 @@ impl FromStr for CountMode { type Err = DaftError; fn from_str(count_mode: &str) -> DaftResult { - match count_mode { + match count_mode.to_lowercase().as_str() { "all" => Ok(Self::All), "valid" => Ok(Self::Valid), "null" => Ok(Self::Null),