Skip to content

Commit

Permalink
add another test
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewgazelka committed Dec 4, 2024
1 parent 0e99387 commit 6f3d6aa
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
21 changes: 18 additions & 3 deletions src/daft-connect/src/translation/logical_plan/local_relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use daft_logical_plan::{
SourceInfo,
};
use daft_micropartition::{python::PyMicroPartition, MicroPartition};
use daft_schema::dtype::DaftDataType;
use daft_table::Table;
use eyre::{bail, ensure, WrapErr};
use itertools::Itertools;
Expand Down Expand Up @@ -56,13 +57,27 @@ pub fn local_relation(plan: spark_connect::LocalRelation) -> eyre::Result<Plan>
.map(|daft_field| daft_field.to_arrow())
.try_collect()?;

let mut dict_idx = 0;

let ipc_fields: Vec<_> = daft_fields
.iter()
.map(|_| {
.map(|field| {
let required_dictionary = field.dtype == DaftDataType::Utf8;

let dictionary_id = match required_dictionary {
true => {
let res = dict_idx;
dict_idx += 1;
debug!("using dictionary id {res}");
Some(res)
}
false => None,
};

// For integer columns, we don't need dictionary encoding
IpcField {
fields: vec![], // No nested fields for primitive types
dictionary_id: None, // No dictionary encoding
fields: vec![], // No nested fields for primitive types
dictionary_id,
}
})
.collect();
Expand Down
4 changes: 4 additions & 0 deletions tests/connect/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ def spark_session():
This fixture is available to all test files and creates a single
Spark session for the entire test suite run.
"""

from daft.daft import connect_start
from daft.logging import setup_debug_logger

setup_debug_logger()

# Start Daft Connect server
server = connect_start()
Expand Down
8 changes: 8 additions & 0 deletions tests/connect/test_create_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,11 @@ def test_create_df(spark_session):
assert len(df_two_pandas) == 3, "Two-column DataFrame should have 3 rows"
assert list(df_two_pandas["num1"]) == [1, 2, 3], "First number column should contain expected values"
assert list(df_two_pandas["num2"]) == [10, 20, 30], "Second number column should contain expected values"

# now do boolean
print("now testing boolean")
boolean_data = [(True,), (False,), (True,)]
df_boolean = spark_session.createDataFrame(boolean_data, ["value"])
df_boolean_pandas = df_boolean.toPandas()
assert len(df_boolean_pandas) == 3, "Boolean DataFrame should have 3 rows"
assert list(df_boolean_pandas["value"]) == [True, False, True], "Boolean DataFrame should contain expected values"

0 comments on commit 6f3d6aa

Please sign in to comment.