Skip to content

Commit

Permalink
feat(api): make struct() idempotent, add type option
Browse files Browse the repository at this point in the history
Part of ibis-project#8289

Depends on the array API getting updated
  • Loading branch information
NickCrews committed Jun 27, 2024
1 parent 33ec754 commit e2a91c9
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 43 deletions.
1 change: 1 addition & 0 deletions ibis/backends/exasol/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class ExasolCompiler(SQLGlotCompiler):
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.StructColumn,
ops.TimeDelta,
ops.TimestampAdd,
ops.TimestampBucket,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sqlite/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class SQLiteCompiler(SQLGlotCompiler):
ops.TimestampDiff,
ops.StringToDate,
ops.StringToTimestamp,
ops.StructColumn,
ops.TimeDelta,
ops.DateDelta,
ops.TimestampDelta,
Expand Down
77 changes: 62 additions & 15 deletions ibis/backends/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
import ibis.expr.datatypes as dt
from ibis import util
from ibis.backends.tests.errors import (
ClickHouseDatabaseError,
PolarsColumnNotFoundError,
PsycoPg2InternalError,
PsycoPg2SyntaxError,
Py4JJavaError,
PySparkAnalysisException,
)
from ibis.common.annotations import ValidationError
from ibis.common.exceptions import IbisError

pytestmark = [
Expand All @@ -28,6 +30,62 @@
pytest.mark.notimpl(["datafusion", "druid", "oracle", "exasol"]),
]

mark_notimpl_postgres_literals = pytest.mark.notimpl(
"postgres", reason="struct literals not implemented", raises=PsycoPg2SyntaxError
)


@pytest.mark.notimpl("risingwave")
@pytest.mark.broken("postgres", reason="JSON handling is buggy")
@pytest.mark.notimpl(
"flink",
raises=Py4JJavaError,
reason="Unexpected error in type inference logic of function 'COALESCE'",
)
def test_struct_factory(con):
s = ibis.struct({"a": 1, "b": 2})
assert con.execute(s) == {"a": 1, "b": 2}

s2 = ibis.struct(s)
assert con.execute(s2) == {"a": 1, "b": 2}

typed = ibis.struct({"a": 1, "b": 2}, type="struct<a: string, b: string>")
assert con.execute(typed) == {"a": "1", "b": "2"}

typed2 = ibis.struct(s, type="struct<a: string, b: string>")
assert con.execute(typed2) == {"a": "1", "b": "2"}

items = ibis.struct([("a", 1), ("b", 2)])
assert con.execute(items) == {"a": 1, "b": 2}


@pytest.mark.parametrize("type", ["struct<>", "struct<a: int64, b: int64>"])
@pytest.mark.parametrize("val", [{}, []])
def test_struct_factory_empty(val, type):
with pytest.raises(ValidationError):
ibis.struct(val, type=type)


@pytest.mark.notimpl("risingwave")
@mark_notimpl_postgres_literals
@pytest.mark.notyet(
"clickhouse", raises=ClickHouseDatabaseError, reason="nested types can't be NULL"
)
@pytest.mark.broken(
"polars",
reason=r"pl.lit(None, type='struct<a: int64>') gives {'a': None}: https://github.com/pola-rs/polars/issues/3462",
)
def test_struct_factory_null(con):
with pytest.raises(ValidationError):
ibis.struct(None)
none_typed = ibis.struct(None, type="struct<a: float64, b: float>")
assert none_typed.type() == dt.Struct(fields={"a": dt.float64, "b": dt.float64})
assert con.execute(none_typed) is None
# Execute a real value here, so the backends that don't support structs
# actually xfail as we expect them to.
# Otherwise would have to @mark.xfail every test in this file besides this one.
assert con.execute(ibis.struct({"a": 1, "b": 2})) == {"a": 1, "b": 2}


@pytest.mark.notimpl(["dask"])
@pytest.mark.parametrize(
Expand Down Expand Up @@ -78,6 +136,9 @@ def test_all_fields(struct, struct_df):

@pytest.mark.notimpl(["postgres", "risingwave"])
@pytest.mark.parametrize("field", ["a", "b", "c"])
@pytest.mark.notyet(
["flink"], reason="flink doesn't support creating struct columns from literals"
)
def test_literal(backend, con, field):
query = _STRUCT_LITERAL[field]
dtype = query.type().to_pandas()
Expand All @@ -87,7 +148,7 @@ def test_literal(backend, con, field):
backend.assert_series_equal(result, expected.astype(dtype))


@pytest.mark.notimpl(["postgres"])
@mark_notimpl_postgres_literals
@pytest.mark.parametrize("field", ["a", "b", "c"])
@pytest.mark.notyet(
["clickhouse"], reason="clickhouse doesn't support nullable nested types"
Expand Down Expand Up @@ -137,14 +198,6 @@ def test_collect_into_struct(alltypes):
assert len(val.loc[result.group == "1"].iat[0]["key"]) == 730


@pytest.mark.notimpl(
["postgres"], reason="struct literals not implemented", raises=PsycoPg2SyntaxError
)
@pytest.mark.notimpl(
["risingwave"],
reason="struct literals not implemented",
raises=PsycoPg2InternalError,
)
@pytest.mark.notimpl(["flink"], raises=Py4JJavaError, reason="not implemented in ibis")
def test_field_access_after_case(con):
s = ibis.struct({"a": 3})
Expand Down Expand Up @@ -240,12 +293,6 @@ def test_keyword_fields(con, nullable):
raises=PolarsColumnNotFoundError,
reason="doesn't seem to support IN-style subqueries on structs",
)
@pytest.mark.notimpl(
# https://github.com/pandas-dev/pandas/issues/58909
["pandas", "dask"],
raises=TypeError,
reason="unhashable type: 'dict'",
)
@pytest.mark.xfail_version(
pyspark=["pyspark<3.5"],
reason="requires pyspark 3.5",
Expand Down
75 changes: 52 additions & 23 deletions ibis/expr/types/structs.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,33 @@
from __future__ import annotations

import collections
from keyword import iskeyword
from typing import TYPE_CHECKING

from public import public

import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.common.annotations import ValidationError
from ibis.common.deferred import deferrable
from ibis.common.exceptions import IbisError
from ibis.expr.types.generic import Column, Scalar, Value, literal
from ibis.expr.types.generic import Column, Scalar, Value

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping, Sequence

import ibis.expr.datatypes as dt
import ibis.expr.types as ir
from ibis.expr.types.typing import V


@public
@deferrable
def struct(
value: Iterable[tuple[str, V]] | Mapping[str, V],
value: Iterable[tuple[str, V]]
| Mapping[str, V]
| StructValue
| ir.NullValue
| None,
*,
type: str | dt.DataType | None = None,
) -> StructValue:
"""Create a struct expression.
Expand All @@ -37,8 +42,7 @@ def struct(
`(str, Value)`.
type
An instance of `ibis.expr.datatypes.DataType` or a string indicating
the Ibis type of `value`. This is only used if all of the input values
are Python literals. eg `struct<a: float, b: string>`.
the Ibis type of `value`. eg `struct<a: float, b: string>`.
Returns
-------
Expand Down Expand Up @@ -66,26 +70,51 @@ def struct(
Create a struct column from a column and a scalar literal
>>> t = ibis.memtable({"a": [1, 2, 3]})
>>> ibis.struct([("a", t.a), ("b", "foo")])
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ StructColumn()
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ struct<a: int64, b: string> │
├─────────────────────────────┤
│ {'a': 1, 'b': 'foo'} │
│ {'a': 2, 'b': 'foo'} │
│ {'a': 3, 'b': 'foo'} │
└─────────────────────────────┘
>>> ibis.struct([("a", t.a), ("b", "foo")], type="struct<a: float, b: string>")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Cast(StructColumn(), struct<a: float64, b: string>)
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
│ struct<a: float64, b: string>
├─────────────────────────────────────────────────────
│ {'a': 1.0, 'b': 'foo'}
│ {'a': 2.0, 'b': 'foo'}
│ {'a': 3.0, 'b': 'foo'}
└─────────────────────────────────────────────────────
"""
import ibis.expr.operations as ops

type = dt.dtype(type) if type is not None else None
if type is not None and not isinstance(type, dt.Struct):
raise ValidationError(f"type must be an struct, got {type}")

if isinstance(value, ir.Value):
if type is not None:
return value.cast(type)
elif isinstance(value, StructValue):
return value
else:
raise ValidationError(
f"If no type passed, value must be a struct, got {value.type()}"
)

if value is None:
if type is None:
raise ValidationError("If values is None/NULL, type must be provided")
return ir.null(type)

fields = dict(value)
if any(isinstance(value, Value) for value in fields.values()):
names = tuple(fields.keys())
values = tuple(fields.values())
return ops.StructColumn(names=names, values=values).to_expr()
else:
return literal(collections.OrderedDict(fields), type=type)
if not fields:
raise ValidationError("Struct must have at least one field")
names = fields.keys()
result = ops.StructColumn(names=names, values=fields.values()).to_expr()
if type is not None:
if not set(names).issuperset(type.names):
raise ValidationError(
f"The passed type requires fields {type.names}",
f" but only found fields {names}",
)
result = result.cast(type)
return result


@public
Expand Down
7 changes: 2 additions & 5 deletions ibis/tests/expr/test_literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import ibis
import ibis.expr.datatypes as dt
from ibis.common.collections import frozendict
from ibis.common.annotations import ValidationError
from ibis.expr.operations import Literal
from ibis.tests.util import assert_pickle_roundtrip

Expand Down Expand Up @@ -109,9 +109,6 @@ def test_normalized_underlying_value(userinput, literal_type, expected_type):
def test_struct_literal(value):
typestr = "struct<field1: string, field2: float64>"
a = ibis.struct(value, type=typestr)
assert a.op().value == frozendict(
field1=str(value["field1"]), field2=float(value["field2"])
)
assert a.type() == dt.dtype(typestr)


Expand All @@ -123,7 +120,7 @@ def test_struct_literal(value):
],
)
def test_struct_literal_non_castable(value):
with pytest.raises(TypeError, match="Unable to normalize"):
with pytest.raises(ValidationError):
ibis.struct(value, type="struct<field1: string, field2: float64>")


Expand Down

0 comments on commit e2a91c9

Please sign in to comment.