Skip to content

Commit

Permalink
Fix tests and style.
Browse files Browse the repository at this point in the history
  • Loading branch information
clarkzinzow committed Dec 1, 2023
1 parent 1d43860 commit 007425f
Show file tree
Hide file tree
Showing 14 changed files with 55 additions and 58 deletions.
3 changes: 1 addition & 2 deletions daft/logical/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import sys
from typing import TYPE_CHECKING, Iterator

from daft.daft import CsvParseOptions
from daft.daft import JsonParseOptions
from daft.daft import CsvParseOptions, JsonParseOptions
from daft.daft import PyField as _PyField
from daft.daft import PySchema as _PySchema
from daft.daft import read_csv_schema as _read_csv_schema
Expand Down
2 changes: 1 addition & 1 deletion daft/table/table_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
CsvConvertOptions,
CsvParseOptions,
CsvReadOptions,
IOConfig,
JsonConvertOptions,
JsonParseOptions,
JsonReadOptions,
IOConfig,
NativeStorageConfig,
PythonStorageConfig,
StorageConfig,
Expand Down
2 changes: 1 addition & 1 deletion src/daft-json/test/iris_tiny.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@
{"sepalLength": 5.4, "sepalWidth": 3.9, "petalLength": 1.3, "petalWidth": 0.4, "species": "setosa"}
{"sepalLength": 5.1, "sepalWidth": 3.5, "petalLength": 1.4, "petalWidth": 0.3, "species": "setosa"}
{"sepalLength": 5.7, "sepalWidth": 3.8, "petalLength": 1.7, "petalWidth": 0.3, "species": "setosa"}
{"sepalLength": 5.1, "sepalWidth": 3.8, "petalLength": 1.5, "petalWidth": 0.3, "species": "setosa"}
{"sepalLength": 5.1, "sepalWidth": 3.8, "petalLength": 1.5, "petalWidth": 0.3, "species": "setosa"}
2 changes: 1 addition & 1 deletion src/daft-json/test/iris_tiny_all_null_column.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
{"sepalLength": 4.7, "sepalWidth": 3.2, "petalLength": null, "petalWidth": 0.2, "species": "setosa"}
{"sepalLength": 4.6, "sepalWidth": 3.1, "petalLength": null, "petalWidth": 0.2, "species": "setosa"}
{"sepalLength": 5.0, "sepalWidth": 3.6, "petalLength": null, "petalWidth": 0.2, "species": "setosa"}
{"sepalLength": 5.4, "sepalWidth": 3.9, "petalLength": null, "petalWidth": 0.4, "species": "setosa"}
{"sepalLength": 5.4, "sepalWidth": 3.9, "petalLength": null, "petalWidth": 0.4, "species": "setosa"}
2 changes: 1 addition & 1 deletion src/daft-json/test/iris_tiny_conflicting_dtypes.jsonl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"sepalLength": 5.1, "sepalWidth": false, "petalLength": 3, "petalWidth": 3, "species": "setosa"}
{"sepalLength": "foo", "sepalWidth": 3.0, "petalLength": "bar", "petalWidth": 0.2, "species": false}
{"sepalLength": "foo", "sepalWidth": 3.0, "petalLength": "bar", "petalWidth": 0.2, "species": false}
2 changes: 1 addition & 1 deletion src/daft-json/test/iris_tiny_nulls.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
{"sepalLength": 4.7, "sepalWidth": 3.2, "petalLength": null, "petalWidth": 0.2, "species": "setosa"}
{"sepalLength": 4.6, "sepalWidth": 3.1, "petalLength": 1.5, "petalWidth": null, "species": "setosa"}
{"sepalLength": 5.0, "sepalWidth": 3.6, "petalLength": 1.4, "petalWidth": 0.2, "species": null}
{"sepalLength": null, "sepalWidth": null, "petalLength": null, "petalWidth": null, "species": null}
{"sepalLength": null, "sepalWidth": null, "petalLength": null, "petalWidth": null, "species": null}
2 changes: 1 addition & 1 deletion src/daft-micropartition/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ common-error = {path = "../common/error", default-features = false}
daft-core = {path = "../daft-core", default-features = false}
daft-csv = {path = "../daft-csv", default-features = false}
daft-dsl = {path = "../daft-dsl", default-features = false}
daft-json = {path = "../daft-json", default-features = false}
daft-io = {path = "../daft-io", default-features = false}
daft-json = {path = "../daft-json", default-features = false}
daft-parquet = {path = "../daft-parquet", default-features = false}
daft-scan = {path = "../daft-scan", default-features = false}
daft-stats = {path = "../daft-stats", default-features = false}
Expand Down
8 changes: 5 additions & 3 deletions src/daft-plan/src/optimization/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,14 +515,16 @@ mod tests {
],
OptimizerConfig::new(20),
);
let fields = vec![Field::new("a", DataType::Int64)];
let proj_exprs = vec![
col("a") + lit(1),
(col("a") + lit(2)).alias("b"),
(col("a") + lit(3)).alias("c"),
];
let plan = dummy_scan_node(vec![Field::new("a", DataType::Int64)])
let filter_predicate = col("a").lt(&lit(2));
let plan = dummy_scan_node(fields.clone())
.project(proj_exprs, Default::default())?
.filter(col("a").lt(&lit(2)))?
.filter(filter_predicate)?
.build();
let mut pass_count = 0;
let mut did_transform = false;
Expand All @@ -536,7 +538,7 @@ mod tests {
let expected = "\
Filter: [[[col(a) < lit(2)] | lit(false)] | lit(false)] & lit(true)\
\n Project: col(a) + lit(3) AS c, col(a) + lit(1), col(a) + lit(2) AS b\
\n Source: Json, File paths = [/foo], File schema = a (Int64), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64)";
assert_eq!(opt_plan.repr_indent(), expected);
Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion src/daft-plan/src/optimization/rules/drop_repartition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ mod tests {
.build();
let expected = "\
Repartition: Scheme = Hash, Number of partitions = 5, Partition by = col(a)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand Down
30 changes: 15 additions & 15 deletions src/daft-plan/src/optimization/rules/push_down_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ mod tests {
.build();
let expected = "\
Filter: [col(b) == lit(\"foo\")] & [col(a) < lit(2)]\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -279,7 +279,7 @@ mod tests {
let expected = "\
Project: col(a)\
\n Filter: col(a) < lit(2)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -297,7 +297,7 @@ mod tests {
let expected = "\
Project: col(a), col(b)\
\n Filter: [col(a) < lit(2)] & [col(b) == lit(\"foo\")]\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -317,7 +317,7 @@ mod tests {
let expected = "\
Filter: col(a) < lit(2)\
\n Project: col(a) + lit(1)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -338,7 +338,7 @@ mod tests {
let expected = "\
Project: col(a) + lit(1)\
\n Filter: [col(a) + lit(1)] < lit(2)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -356,7 +356,7 @@ mod tests {
let expected = "\
Sort: Sort by = (col(a), descending)\
\n Filter: col(a) < lit(2)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
// TODO(Clark): For tests in which we only care about reordering of operators, maybe switch to a form that leverages the single-node display?
// let expected = format!("{sort}\n {filter}\n {source}");
assert_optimized_plan_eq(plan, expected)?;
Expand All @@ -376,7 +376,7 @@ mod tests {
let expected = "\
Repartition: Scheme = Hash, Number of partitions = 1, Partition by = col(a)\
\n Filter: col(a) < lit(2)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -395,9 +395,9 @@ mod tests {
let expected = "\
Concat\
\n Filter: col(a) < lit(2)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)\
\n Filter: col(a) < lit(2)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -423,8 +423,8 @@ mod tests {
let expected = "\
Join: Type = Inner, On = col(b), Output schema = a (Int64), b (Utf8), c (Float64)\
\n Filter: col(a) < lit(2)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)\
\n Source: Json, File paths = [/foo], File schema = b (Utf8), c (Float64), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = b (Utf8), c (Float64)";
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)\
\n Source: Json, File paths = [/foo], File schema = b (Utf8), c (Float64), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = b (Utf8), c (Float64)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -449,9 +449,9 @@ mod tests {
.build();
let expected = "\
Join: Type = Inner, On = col(b), Output schema = a (Int64), b (Utf8), c (Float64)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Utf8), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Utf8)\
\n Filter: col(c) < lit(2.0)\
\n Source: Json, File paths = [/foo], File schema = b (Utf8), c (Float64), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = b (Utf8), c (Float64)";
\n Source: Json, File paths = [/foo], File schema = b (Utf8), c (Float64), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = b (Utf8), c (Float64)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand All @@ -475,9 +475,9 @@ mod tests {
let expected = "\
Join: Type = Inner, On = col(b), Output schema = a (Int64), b (Int64), c (Float64)\
\n Filter: col(b) < lit(2)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Int64), c (Float64), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Int64), c (Float64)\
\n Source: Json, File paths = [/foo], File schema = a (Int64), b (Int64), c (Float64), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = a (Int64), b (Int64), c (Float64)\
\n Filter: col(b) < lit(2)\
\n Source: Json, File paths = [/foo], File schema = b (Int64), Format-specific config = Json(JsonSourceConfig), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = b (Int64)";
\n Source: Json, File paths = [/foo], File schema = b (Int64), Format-specific config = Json(JsonSourceConfig { buffer_size: None, chunk_size: None }), Storage config = Native(NativeStorageConfig { io_config: None, multithreaded_io: true }), Output schema = b (Int64)";
assert_optimized_plan_eq(plan, expected)?;
Ok(())
}
Expand Down
Loading

0 comments on commit 007425f

Please sign in to comment.