From 5e22b8b9569b54e9491d83860102ca727e5ac498 Mon Sep 17 00:00:00 2001 From: Jay Chia Date: Thu, 26 Sep 2024 03:08:41 -0700 Subject: [PATCH] First pass through basic_concepts --- docs/source/user_guide/basic_concepts.rst | 130 +++++++++++++++------- 1 file changed, 90 insertions(+), 40 deletions(-) diff --git a/docs/source/user_guide/basic_concepts.rst b/docs/source/user_guide/basic_concepts.rst index 224e7e48a4..64ef5ccd5f 100644 --- a/docs/source/user_guide/basic_concepts.rst +++ b/docs/source/user_guide/basic_concepts.rst @@ -670,9 +670,9 @@ JSON Expressions If you have a column of JSON strings, Daft provides the :meth:`.json.* ` method namespace to run `JQ-style filters `_ on them. For example, to extract a value from a JSON object: -.. tab-set:: +.. tabs:: - .. tab-item:: 🐍 Python + .. group-tab:: 🐍 Python .. code:: python @@ -685,7 +685,7 @@ If you have a column of JSON strings, Daft provides the :meth:`.json.* `, and can only take on the values True or False. -.. code:: python +.. tabs:: - df = daft.from_pydict({"C": [True, False, True]}) - df["C"] + .. group-tab:: 🐍 Python + + .. code:: python + + df = daft.from_pydict({"C": [True, False, True]}) Daft supports logical operations such as ``&`` (and) and ``|`` (or) between logical expressions. @@ -741,27 +744,49 @@ Many of the types in Daft support comparisons between expressions that returns a For example, here we can compare if each element in column "A" is equal to elements in column "B": -.. code:: python +.. tabs:: - df = daft.from_pydict({"A": [1, 2, 3], "B": [1, 2, 4]}) + .. group-tab:: 🐍 Python - df = df.with_column("A_eq_B", df["A"] == df["B"]) + .. code:: python - df.collect() + df = daft.from_pydict({"A": [1, 2, 3], "B": [1, 2, 4]}) + + df = df.with_column("A_eq_B", df["A"] == df["B"]) + + df.collect() + + .. group-tab:: ⚙️ SQL + + .. code:: python + + df = daft.from_pydict({"A": [1, 2, 3], "B": [1, 2, 4]}) + + df = daft.sql(""" + SELECT + A, + B, + A = B AS A_eq_B + FROM df + """) + + df.collect() .. code-block:: text :caption: Output - +---------+---------+-----------+ - | A | B | A_eq_B | - | Int64 | Int64 | Boolean | - +=========+=========+===========+ - | 1 | 1 | true | - +---------+---------+-----------+ - | 2 | 2 | true | - +---------+---------+-----------+ - | 3 | 4 | false | - +---------+---------+-----------+ + ╭───────┬───────┬─────────╮ + │ A ┆ B ┆ A_eq_B │ + │ --- ┆ --- ┆ --- │ + │ Int64 ┆ Int64 ┆ Boolean │ + ╞═══════╪═══════╪═════════╡ + │ 1 ┆ 1 ┆ true │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ + │ 2 ┆ 2 ┆ true │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤ + │ 3 ┆ 4 ┆ false │ + ╰───────┴───────┴─────────╯ + (Showing first 3 of 3 rows) Other useful comparisons can be found in the :ref:`Expressions API reference `. @@ -771,32 +796,57 @@ If Else Pattern The :meth:`.if_else() ` method is a useful expression to have up your sleeve for choosing values between two other expressions based on a logical expression: -.. code:: python +.. tabs:: + + .. group-tab:: 🐍 Python + + .. code:: python + + df = daft.from_pydict({"A": [1, 2, 3], "B": [0, 2, 4]}) + + # Pick values from column A if the value in column A is bigger + # than the value in column B. Otherwise, pick values from column B. + df = df.with_column( + "A_if_bigger_else_B", + (df["A"] > df["B"]).if_else(df["A"], df["B"]), + ) - df = daft.from_pydict({"A": [1, 2, 3], "B": [0, 2, 4]}) + df.collect() + + .. group-tab:: ⚙️ SQL + + .. code:: python + + df = daft.from_pydict({"A": [1, 2, 3], "B": [0, 2, 4]}) - # Pick values from column A if the value in column A is bigger - # than the value in column B. Otherwise, pick values from column B. - df = df.with_column( - "A_if_bigger_else_B", - (df["A"] > df["B"]).if_else(df["A"], df["B"]), - ) + df = daft.sql(""" + SELECT + A, + B, + CASE + WHEN A > B THEN A + ELSE B + END AS A_if_bigger_else_B + FROM df + """) - df.collect() + df.collect() .. code-block:: text :caption: Output - +---------+---------+----------------------+ - | A | B | A_if_bigger_else_B | - | Int64 | Int64 | Int64 | - +=========+=========+======================+ - | 1 | 0 | 1 | - +---------+---------+----------------------+ - | 2 | 2 | 2 | - +---------+---------+----------------------+ - | 3 | 4 | 4 | - +---------+---------+----------------------+ + ╭───────┬───────┬────────────────────╮ + │ A ┆ B ┆ A_if_bigger_else_B │ + │ --- ┆ --- ┆ --- │ + │ Int64 ┆ Int64 ┆ Int64 │ + ╞═══════╪═══════╪════════════════════╡ + │ 1 ┆ 0 ┆ 1 │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 2 ┆ 2 ┆ 2 │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 3 ┆ 4 ┆ 4 │ + ╰───────┴───────┴────────────────────╯ + (Showing first 3 of 3 rows) This is a useful expression for cleaning your data!