Skip to content

Commit

Permalink
SNOW-1818018: add functions.size, collect_list (#2677)
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-aalam authored Nov 27, 2024
1 parent 0f2f080 commit 8f080c8
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 0 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
- `keyType`: keys of the map
- `valueType`: values of the map
- Added support for `include_nulls` argument in `DataFrame.unpivot`.
- Added support for following functions in `functions.py`:
- `size` to get size of array, object, or map columns.
- `collect_list` an alias of `array_agg`.
- Added parameter `ast_enabled` to session for internal usage (default: `False`).

#### Improvements
Expand Down
2 changes: 2 additions & 0 deletions docs/source/snowpark/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ Functions
col
collate
collation
collect_list
collect_set
column
concat
Expand Down Expand Up @@ -257,6 +258,7 @@ Functions
sha2
sin
sinh
size
skew
snowflake_cortex_summarize
sort_array
Expand Down
39 changes: 39 additions & 0 deletions src/snowflake/snowpark/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6722,6 +6722,44 @@ def array_unique_agg(col: ColumnOrName, _emit_ast: bool = True) -> Column:
return _call_function("array_unique_agg", True, c, _emit_ast=_emit_ast)


@publicapi
def size(col: ColumnOrName, _emit_ast: bool = True) -> Column:
"""Returns the size of the input ARRAY, OBJECT or MAP. Returns NULL if the
input column does not match any of these types.
Args:
col: A :class:`Column` object or column name that determines the values.
Example::
>>> df = session.create_dataframe([([1,2,3], {'a': 1, 'b': 2}, 3)], ['col1', 'col2', 'col3'])
>>> df.select(size(df.col1), size(df.col2), size(df.col3)).show()
----------------------------------------------------------
|"SIZE(""COL1"")" |"SIZE(""COL2"")" |"SIZE(""COL3"")" |
----------------------------------------------------------
|3 |2 |NULL |
----------------------------------------------------------
<BLANKLINE>
"""
c = _to_col_if_str(col, "size")
v = to_variant(c)

# TODO: SNOW-1831923 build AST
return (
when(
is_array(v, _emit_ast=False),
array_size(v, _emit_ast=False),
_emit_ast=False,
)
.when(
is_object(v, _emit_ast=False),
array_size(object_keys(v, _emit_ast=False), _emit_ast=False),
_emit_ast=False,
)
.otherwise(lit(None), _emit_ast=False)
.alias(f"SIZE({c.get_name()})", _emit_ast=False)
)


@publicapi
def object_agg(
key: ColumnOrName, value: ColumnOrName, _emit_ast: bool = True
Expand Down Expand Up @@ -9865,6 +9903,7 @@ def sproc(
# Add these alias for user code migration
call_builtin = call_function
collect_set = array_unique_agg
collect_list = array_agg
builtin = function
countDistinct = count_distinct
substr = substring
Expand Down
6 changes: 6 additions & 0 deletions tests/integ/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
regexp_replace,
reverse,
sequence,
size,
snowflake_cortex_summarize,
split,
sqrt,
Expand Down Expand Up @@ -1742,6 +1743,11 @@ def test_array_negative(session):
df.select(array_size([1])).collect()
assert "'ARRAY_SIZE' expected Column or str, got: <class 'list'>" in str(ex_info)

with pytest.raises(
TypeError, match="'SIZE' expected Column or str, got: <class 'list'>"
):
df.select(size([1])).collect()

with pytest.raises(TypeError) as ex_info:
df.select(array_slice([1], "col1", "col2")).collect()
assert "'ARRAY_SLICE' expected Column or str, got: <class 'list'>" in str(ex_info)
Expand Down

0 comments on commit 8f080c8

Please sign in to comment.