From e9591dbe4bf7bf9052920182d8bd4bf160c66168 Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Wed, 20 Nov 2024 17:01:20 -0800 Subject: [PATCH] [TEST] connect: `df.dtypes` --- tests/connect/test_dtypes.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/connect/test_dtypes.py diff --git a/tests/connect/test_dtypes.py b/tests/connect/test_dtypes.py new file mode 100644 index 0000000000..96729328a8 --- /dev/null +++ b/tests/connect/test_dtypes.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from pyspark.sql.functions import col + + +def test_dtypes(spark_session): + # Create DataFrame from range(10) + df = spark_session.range(10) + + # Add a column that will have repeated values for grouping + df = df.withColumn("group", col("id") % 3) + + # Check dtypes of the DataFrame + expected_dtypes = [ + ("id", "bigint"), + ("group", "bigint") + ] + + # Get actual dtypes + actual_dtypes = df.dtypes + + # Verify the dtypes match expected + assert actual_dtypes == expected_dtypes + + # Also check individual column types + assert df.schema["id"].dataType.simpleString() == "bigint" + assert df.schema["group"].dataType.simpleString() == "bigint"