From e50cd3fd8be5dac1ca990ef4db7efee06195bcb8 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Sat, 7 Sep 2024 18:17:51 +0100 Subject: [PATCH] remove arrow direct dependency, use just datafusion (#38) --- Cargo.toml | 7 +------ src/common.rs | 8 ++++---- src/common_macros.rs | 12 ++++++------ src/common_union.rs | 12 +++++++----- src/json_as_text.rs | 11 ++++++----- src/json_contains.rs | 8 ++++---- src/json_get.rs | 10 +++++----- src/json_get_bool.rs | 8 ++++---- src/json_get_float.rs | 8 ++++---- src/json_get_int.rs | 8 ++++---- src/json_get_json.rs | 8 ++++---- src/json_get_str.rs | 8 ++++---- src/json_length.rs | 8 ++++---- src/lib.rs | 7 ++++--- src/rewrite.rs | 18 +++++++++--------- tests/main.rs | 10 ++++++---- tests/utils/mod.rs | 13 ++++++------- 17 files changed, 82 insertions(+), 82 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a1f5e88..f6251fa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,11 +11,7 @@ repository = "https://github.com/datafusion-contrib/datafusion-functions-json/" rust-version = "1.76.0" [dependencies] -arrow = "52.2" -arrow-schema = "52.2" -datafusion-common = "41" -datafusion-expr = "41" -datafusion-execution = "41" +datafusion = "41" jiter = "0.5" paste = "1" log = "0.4" @@ -23,7 +19,6 @@ log = "0.4" [dev-dependencies] codspeed-criterion-compat = "2.3" criterion = "0.5.1" -datafusion = "41" clap = "4" tokio = { version = "1.37", features = ["full"] } diff --git a/src/common.rs b/src/common.rs index 9cbee92..e3d6d7c 100644 --- a/src/common.rs +++ b/src/common.rs @@ -1,9 +1,9 @@ use std::str::Utf8Error; -use arrow::array::{Array, ArrayRef, Int64Array, LargeStringArray, StringArray, UInt64Array}; -use arrow_schema::DataType; -use datafusion_common::{exec_err, plan_err, Result as DataFusionResult, ScalarValue}; -use datafusion_expr::ColumnarValue; +use datafusion::arrow::array::{Array, ArrayRef, Int64Array, LargeStringArray, StringArray, UInt64Array}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{exec_err, plan_err, Result as DataFusionResult, ScalarValue}; +use datafusion::logical_expr::ColumnarValue; use jiter::{Jiter, JiterError, Peek}; use crate::common_union::{is_json_union, json_from_union_scalar, nested_json_array}; diff --git a/src/common_macros.rs b/src/common_macros.rs index f3aa3b1..a2c6cd0 100644 --- a/src/common_macros.rs +++ b/src/common_macros.rs @@ -18,8 +18,8 @@ macro_rules! make_udf_function { ($udf_impl:ty, $expr_fn_name:ident, $($arg:ident)*, $doc:expr) => { paste::paste! { #[doc = $doc] - #[must_use] pub fn $expr_fn_name($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr { - datafusion_expr::Expr::ScalarFunction(datafusion_expr::expr::ScalarFunction::new_udf( + #[must_use] pub fn $expr_fn_name($($arg: datafusion::logical_expr::Expr),*) -> datafusion::logical_expr::Expr { + datafusion::logical_expr::Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf( [< $expr_fn_name _udf >](), vec![$($arg),*], )) @@ -27,16 +27,16 @@ macro_rules! make_udf_function { /// Singleton instance of [`$udf_impl`], ensures the UDF is only created once /// named for example `STATIC_JSON_OBJ_CONTAINS` - static [< STATIC_ $expr_fn_name:upper >]: std::sync::OnceLock> = + static [< STATIC_ $expr_fn_name:upper >]: std::sync::OnceLock> = std::sync::OnceLock::new(); /// ScalarFunction that returns a [`ScalarUDF`] for [`$udf_impl`] /// - /// [`ScalarUDF`]: datafusion_expr::ScalarUDF - pub fn [< $expr_fn_name _udf >]() -> std::sync::Arc { + /// [`ScalarUDF`]: datafusion::logical_expr::ScalarUDF + pub fn [< $expr_fn_name _udf >]() -> std::sync::Arc { [< STATIC_ $expr_fn_name:upper >] .get_or_init(|| { - std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl( + std::sync::Arc::new(datafusion::logical_expr::ScalarUDF::new_from_impl( <$udf_impl>::default(), )) }) diff --git a/src/common_union.rs b/src/common_union.rs index ae4433b..26aac57 100644 --- a/src/common_union.rs +++ b/src/common_union.rs @@ -1,9 +1,11 @@ use std::sync::{Arc, OnceLock}; -use arrow::array::{Array, ArrayRef, BooleanArray, Float64Array, Int64Array, NullArray, StringArray, UnionArray}; -use arrow::buffer::Buffer; -use arrow_schema::{DataType, Field, UnionFields, UnionMode}; -use datafusion_common::ScalarValue; +use datafusion::arrow::array::{ + Array, ArrayRef, BooleanArray, Float64Array, Int64Array, NullArray, StringArray, UnionArray, +}; +use datafusion::arrow::buffer::Buffer; +use datafusion::arrow::datatypes::{DataType, Field, UnionFields, UnionMode}; +use datafusion::common::ScalarValue; pub(crate) fn is_json_union(data_type: &DataType) -> bool { match data_type { @@ -112,7 +114,7 @@ impl FromIterator> for JsonUnion { } impl TryFrom for UnionArray { - type Error = arrow::error::ArrowError; + type Error = datafusion::arrow::error::ArrowError; fn try_from(value: JsonUnion) -> Result { let children: Vec> = vec![ diff --git a/src/json_as_text.rs b/src/json_as_text.rs index 9f02470..19bcb56 100644 --- a/src/json_as_text.rs +++ b/src/json_as_text.rs @@ -1,13 +1,14 @@ use std::any::Any; use std::sync::Arc; +use datafusion::arrow::array::{ArrayRef, StringArray}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{Result as DataFusionResult, ScalarValue}; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use jiter::Peek; + use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath}; use crate::common_macros::make_udf_function; -use arrow::array::{ArrayRef, StringArray}; -use arrow_schema::DataType; -use datafusion_common::{Result as DataFusionResult, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; -use jiter::Peek; make_udf_function!( JsonAsText, diff --git a/src/json_contains.rs b/src/json_contains.rs index 821c04a..c9b9f44 100644 --- a/src/json_contains.rs +++ b/src/json_contains.rs @@ -1,10 +1,10 @@ use std::any::Any; use std::sync::Arc; -use arrow_schema::DataType; -use datafusion_common::arrow::array::{ArrayRef, BooleanArray}; -use datafusion_common::{plan_err, Result, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::arrow::array::{ArrayRef, BooleanArray}; +use datafusion::common::{plan_err, Result, ScalarValue}; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use crate::common::{check_args, invoke, jiter_json_find, GetError, JsonPath}; use crate::common_macros::make_udf_function; diff --git a/src/json_get.rs b/src/json_get.rs index b1e4810..84d1593 100644 --- a/src/json_get.rs +++ b/src/json_get.rs @@ -1,11 +1,11 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::UnionArray; -use arrow_schema::DataType; -use datafusion_common::arrow::array::ArrayRef; -use datafusion_common::Result as DataFusionResult; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion::arrow::array::ArrayRef; +use datafusion::arrow::array::UnionArray; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::Result as DataFusionResult; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use jiter::{Jiter, NumberAny, NumberInt, Peek}; use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath}; diff --git a/src/json_get_bool.rs b/src/json_get_bool.rs index 92a4ac9..511d9b9 100644 --- a/src/json_get_bool.rs +++ b/src/json_get_bool.rs @@ -1,10 +1,10 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, BooleanArray}; -use arrow_schema::DataType; -use datafusion_common::{Result as DataFusionResult, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion::arrow::array::{ArrayRef, BooleanArray}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{Result as DataFusionResult, ScalarValue}; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use jiter::Peek; use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath}; diff --git a/src/json_get_float.rs b/src/json_get_float.rs index bed8c67..16fc4f9 100644 --- a/src/json_get_float.rs +++ b/src/json_get_float.rs @@ -1,10 +1,10 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, Float64Array}; -use arrow_schema::DataType; -use datafusion_common::{Result as DataFusionResult, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion::arrow::array::{ArrayRef, Float64Array}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{Result as DataFusionResult, ScalarValue}; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use jiter::{NumberAny, Peek}; use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath}; diff --git a/src/json_get_int.rs b/src/json_get_int.rs index 4f80256..e657d76 100644 --- a/src/json_get_int.rs +++ b/src/json_get_int.rs @@ -1,10 +1,10 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, Int64Array}; -use arrow_schema::DataType; -use datafusion_common::{Result as DataFusionResult, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion::arrow::array::{ArrayRef, Int64Array}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{Result as DataFusionResult, ScalarValue}; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use jiter::{NumberInt, Peek}; use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath}; diff --git a/src/json_get_json.rs b/src/json_get_json.rs index 002702b..66c1d3d 100644 --- a/src/json_get_json.rs +++ b/src/json_get_json.rs @@ -1,10 +1,10 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, StringArray}; -use arrow_schema::DataType; -use datafusion_common::{Result as DataFusionResult, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion::arrow::array::{ArrayRef, StringArray}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{Result as DataFusionResult, ScalarValue}; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath}; use crate::common_macros::make_udf_function; diff --git a/src/json_get_str.rs b/src/json_get_str.rs index a6f4ad5..ddd0e66 100644 --- a/src/json_get_str.rs +++ b/src/json_get_str.rs @@ -1,10 +1,10 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, StringArray}; -use arrow_schema::DataType; -use datafusion_common::{Result as DataFusionResult, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion::arrow::array::{ArrayRef, StringArray}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{Result as DataFusionResult, ScalarValue}; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use jiter::Peek; use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath}; diff --git a/src/json_length.rs b/src/json_length.rs index b1bb900..1f9ad09 100644 --- a/src/json_length.rs +++ b/src/json_length.rs @@ -1,10 +1,10 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, UInt64Array}; -use arrow_schema::DataType; -use datafusion_common::{Result as DataFusionResult, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion::arrow::array::{ArrayRef, UInt64Array}; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{Result as DataFusionResult, ScalarValue}; +use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use jiter::Peek; use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath}; diff --git a/src/lib.rs b/src/lib.rs index c576794..75b18f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,10 @@ -use datafusion_common::Result; -use datafusion_execution::FunctionRegistry; -use datafusion_expr::ScalarUDF; use log::debug; use std::sync::Arc; +use datafusion::common::Result; +use datafusion::execution::FunctionRegistry; +use datafusion::logical_expr::ScalarUDF; + mod common; mod common_macros; mod common_union; diff --git a/src/rewrite.rs b/src/rewrite.rs index 60403c8..91e53f9 100644 --- a/src/rewrite.rs +++ b/src/rewrite.rs @@ -1,12 +1,12 @@ -use arrow::datatypes::DataType; -use datafusion_common::config::ConfigOptions; -use datafusion_common::tree_node::Transformed; -use datafusion_common::DFSchema; -use datafusion_common::Result; -use datafusion_expr::expr::{Alias, Cast, Expr, ScalarFunction}; -use datafusion_expr::expr_rewriter::FunctionRewrite; -use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr}; -use datafusion_expr::sqlparser::ast::BinaryOperator; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::config::ConfigOptions; +use datafusion::common::tree_node::Transformed; +use datafusion::common::DFSchema; +use datafusion::common::Result; +use datafusion::logical_expr::expr::{Alias, Cast, Expr, ScalarFunction}; +use datafusion::logical_expr::expr_rewriter::FunctionRewrite; +use datafusion::logical_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr}; +use datafusion::logical_expr::sqlparser::ast::BinaryOperator; pub(crate) struct JsonFunctionRewriter; diff --git a/tests/main.rs b/tests/main.rs index 60933d3..91df2c2 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -1,12 +1,13 @@ -use arrow_schema::DataType; +use datafusion::arrow::datatypes::DataType; use datafusion::assert_batches_eq; -use datafusion_common::ScalarValue; +use datafusion::common::ScalarValue; +use datafusion::logical_expr::ColumnarValue; -mod utils; -use datafusion_expr::ColumnarValue; use datafusion_functions_json::udfs::json_get_str_udf; use utils::{display_val, logical_plan, run_query, run_query_large, run_query_params}; +mod utils; + #[tokio::test] async fn test_json_contains() { let expected = [ @@ -1131,6 +1132,7 @@ async fn test_long_arrow_cast() { assert_batches_eq!(expected, &batches); } +#[tokio::test] async fn test_arrow_cast_numeric() { let sql = r#"select ('{"foo": 420}'->'foo')::numeric = 420"#; let batches = run_query(sql).await.unwrap(); diff --git a/tests/utils/mod.rs b/tests/utils/mod.rs index a5279f9..7dd91c3 100644 --- a/tests/utils/mod.rs +++ b/tests/utils/mod.rs @@ -1,15 +1,14 @@ #![allow(dead_code)] use std::sync::Arc; -use arrow::array::{ArrayRef, Int64Array}; -use arrow::datatypes::{DataType, Field, Schema}; -use arrow::util::display::{ArrayFormatter, FormatOptions}; -use arrow::{array::LargeStringArray, array::StringArray, record_batch::RecordBatch}; - +use datafusion::arrow::array::{ArrayRef, Int64Array}; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::util::display::{ArrayFormatter, FormatOptions}; +use datafusion::arrow::{array::LargeStringArray, array::StringArray, record_batch::RecordBatch}; +use datafusion::common::ParamValues; use datafusion::error::Result; use datafusion::execution::context::SessionContext; -use datafusion_common::ParamValues; -use datafusion_execution::config::SessionConfig; +use datafusion::prelude::SessionConfig; use datafusion_functions_json::register_all; async fn create_test_table(large_utf8: bool) -> Result {