Skip to content

Commit

Permalink
remove arrow direct dependency, use just datafusion (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin authored Sep 7, 2024
1 parent c74be46 commit e50cd3f
Show file tree
Hide file tree
Showing 17 changed files with 82 additions and 82 deletions.
7 changes: 1 addition & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,14 @@ repository = "https://github.com/datafusion-contrib/datafusion-functions-json/"
rust-version = "1.76.0"

[dependencies]
arrow = "52.2"
arrow-schema = "52.2"
datafusion-common = "41"
datafusion-expr = "41"
datafusion-execution = "41"
datafusion = "41"
jiter = "0.5"
paste = "1"
log = "0.4"

[dev-dependencies]
codspeed-criterion-compat = "2.3"
criterion = "0.5.1"
datafusion = "41"
clap = "4"
tokio = { version = "1.37", features = ["full"] }

Expand Down
8 changes: 4 additions & 4 deletions src/common.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::str::Utf8Error;

use arrow::array::{Array, ArrayRef, Int64Array, LargeStringArray, StringArray, UInt64Array};
use arrow_schema::DataType;
use datafusion_common::{exec_err, plan_err, Result as DataFusionResult, ScalarValue};
use datafusion_expr::ColumnarValue;
use datafusion::arrow::array::{Array, ArrayRef, Int64Array, LargeStringArray, StringArray, UInt64Array};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::{exec_err, plan_err, Result as DataFusionResult, ScalarValue};
use datafusion::logical_expr::ColumnarValue;
use jiter::{Jiter, JiterError, Peek};

use crate::common_union::{is_json_union, json_from_union_scalar, nested_json_array};
Expand Down
12 changes: 6 additions & 6 deletions src/common_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,25 @@ macro_rules! make_udf_function {
($udf_impl:ty, $expr_fn_name:ident, $($arg:ident)*, $doc:expr) => {
paste::paste! {
#[doc = $doc]
#[must_use] pub fn $expr_fn_name($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
datafusion_expr::Expr::ScalarFunction(datafusion_expr::expr::ScalarFunction::new_udf(
#[must_use] pub fn $expr_fn_name($($arg: datafusion::logical_expr::Expr),*) -> datafusion::logical_expr::Expr {
datafusion::logical_expr::Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
[< $expr_fn_name _udf >](),
vec![$($arg),*],
))
}

/// Singleton instance of [`$udf_impl`], ensures the UDF is only created once
/// named for example `STATIC_JSON_OBJ_CONTAINS`
static [< STATIC_ $expr_fn_name:upper >]: std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
static [< STATIC_ $expr_fn_name:upper >]: std::sync::OnceLock<std::sync::Arc<datafusion::logical_expr::ScalarUDF>> =
std::sync::OnceLock::new();

/// ScalarFunction that returns a [`ScalarUDF`] for [`$udf_impl`]
///
/// [`ScalarUDF`]: datafusion_expr::ScalarUDF
pub fn [< $expr_fn_name _udf >]() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
/// [`ScalarUDF`]: datafusion::logical_expr::ScalarUDF
pub fn [< $expr_fn_name _udf >]() -> std::sync::Arc<datafusion::logical_expr::ScalarUDF> {
[< STATIC_ $expr_fn_name:upper >]
.get_or_init(|| {
std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
std::sync::Arc::new(datafusion::logical_expr::ScalarUDF::new_from_impl(
<$udf_impl>::default(),
))
})
Expand Down
12 changes: 7 additions & 5 deletions src/common_union.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use std::sync::{Arc, OnceLock};

use arrow::array::{Array, ArrayRef, BooleanArray, Float64Array, Int64Array, NullArray, StringArray, UnionArray};
use arrow::buffer::Buffer;
use arrow_schema::{DataType, Field, UnionFields, UnionMode};
use datafusion_common::ScalarValue;
use datafusion::arrow::array::{
Array, ArrayRef, BooleanArray, Float64Array, Int64Array, NullArray, StringArray, UnionArray,
};
use datafusion::arrow::buffer::Buffer;
use datafusion::arrow::datatypes::{DataType, Field, UnionFields, UnionMode};
use datafusion::common::ScalarValue;

pub(crate) fn is_json_union(data_type: &DataType) -> bool {
match data_type {
Expand Down Expand Up @@ -112,7 +114,7 @@ impl FromIterator<Option<JsonUnionField>> for JsonUnion {
}

impl TryFrom<JsonUnion> for UnionArray {
type Error = arrow::error::ArrowError;
type Error = datafusion::arrow::error::ArrowError;

fn try_from(value: JsonUnion) -> Result<Self, Self::Error> {
let children: Vec<Arc<dyn Array>> = vec![
Expand Down
11 changes: 6 additions & 5 deletions src/json_as_text.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use std::any::Any;
use std::sync::Arc;

use datafusion::arrow::array::{ArrayRef, StringArray};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::{Result as DataFusionResult, ScalarValue};
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::Peek;

use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
use crate::common_macros::make_udf_function;
use arrow::array::{ArrayRef, StringArray};
use arrow_schema::DataType;
use datafusion_common::{Result as DataFusionResult, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::Peek;

make_udf_function!(
JsonAsText,
Expand Down
8 changes: 4 additions & 4 deletions src/json_contains.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::any::Any;
use std::sync::Arc;

use arrow_schema::DataType;
use datafusion_common::arrow::array::{ArrayRef, BooleanArray};
use datafusion_common::{plan_err, Result, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::arrow::array::{ArrayRef, BooleanArray};
use datafusion::common::{plan_err, Result, ScalarValue};
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};

use crate::common::{check_args, invoke, jiter_json_find, GetError, JsonPath};
use crate::common_macros::make_udf_function;
Expand Down
10 changes: 5 additions & 5 deletions src/json_get.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::UnionArray;
use arrow_schema::DataType;
use datafusion_common::arrow::array::ArrayRef;
use datafusion_common::Result as DataFusionResult;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion::arrow::array::ArrayRef;
use datafusion::arrow::array::UnionArray;
use datafusion::arrow::datatypes::DataType;
use datafusion::common::Result as DataFusionResult;
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::{Jiter, NumberAny, NumberInt, Peek};

use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
Expand Down
8 changes: 4 additions & 4 deletions src/json_get_bool.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, BooleanArray};
use arrow_schema::DataType;
use datafusion_common::{Result as DataFusionResult, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion::arrow::array::{ArrayRef, BooleanArray};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::{Result as DataFusionResult, ScalarValue};
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::Peek;

use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
Expand Down
8 changes: 4 additions & 4 deletions src/json_get_float.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, Float64Array};
use arrow_schema::DataType;
use datafusion_common::{Result as DataFusionResult, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion::arrow::array::{ArrayRef, Float64Array};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::{Result as DataFusionResult, ScalarValue};
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::{NumberAny, Peek};

use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
Expand Down
8 changes: 4 additions & 4 deletions src/json_get_int.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, Int64Array};
use arrow_schema::DataType;
use datafusion_common::{Result as DataFusionResult, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion::arrow::array::{ArrayRef, Int64Array};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::{Result as DataFusionResult, ScalarValue};
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::{NumberInt, Peek};

use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
Expand Down
8 changes: 4 additions & 4 deletions src/json_get_json.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, StringArray};
use arrow_schema::DataType;
use datafusion_common::{Result as DataFusionResult, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion::arrow::array::{ArrayRef, StringArray};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::{Result as DataFusionResult, ScalarValue};
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};

use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
use crate::common_macros::make_udf_function;
Expand Down
8 changes: 4 additions & 4 deletions src/json_get_str.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, StringArray};
use arrow_schema::DataType;
use datafusion_common::{Result as DataFusionResult, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion::arrow::array::{ArrayRef, StringArray};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::{Result as DataFusionResult, ScalarValue};
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::Peek;

use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
Expand Down
8 changes: 4 additions & 4 deletions src/json_length.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, UInt64Array};
use arrow_schema::DataType;
use datafusion_common::{Result as DataFusionResult, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion::arrow::array::{ArrayRef, UInt64Array};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::{Result as DataFusionResult, ScalarValue};
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::Peek;

use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
Expand Down
7 changes: 4 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use datafusion_common::Result;
use datafusion_execution::FunctionRegistry;
use datafusion_expr::ScalarUDF;
use log::debug;
use std::sync::Arc;

use datafusion::common::Result;
use datafusion::execution::FunctionRegistry;
use datafusion::logical_expr::ScalarUDF;

mod common;
mod common_macros;
mod common_union;
Expand Down
18 changes: 9 additions & 9 deletions src/rewrite.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use arrow::datatypes::DataType;
use datafusion_common::config::ConfigOptions;
use datafusion_common::tree_node::Transformed;
use datafusion_common::DFSchema;
use datafusion_common::Result;
use datafusion_expr::expr::{Alias, Cast, Expr, ScalarFunction};
use datafusion_expr::expr_rewriter::FunctionRewrite;
use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr};
use datafusion_expr::sqlparser::ast::BinaryOperator;
use datafusion::arrow::datatypes::DataType;
use datafusion::common::config::ConfigOptions;
use datafusion::common::tree_node::Transformed;
use datafusion::common::DFSchema;
use datafusion::common::Result;
use datafusion::logical_expr::expr::{Alias, Cast, Expr, ScalarFunction};
use datafusion::logical_expr::expr_rewriter::FunctionRewrite;
use datafusion::logical_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr};
use datafusion::logical_expr::sqlparser::ast::BinaryOperator;

pub(crate) struct JsonFunctionRewriter;

Expand Down
10 changes: 6 additions & 4 deletions tests/main.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use arrow_schema::DataType;
use datafusion::arrow::datatypes::DataType;
use datafusion::assert_batches_eq;
use datafusion_common::ScalarValue;
use datafusion::common::ScalarValue;
use datafusion::logical_expr::ColumnarValue;

mod utils;
use datafusion_expr::ColumnarValue;
use datafusion_functions_json::udfs::json_get_str_udf;
use utils::{display_val, logical_plan, run_query, run_query_large, run_query_params};

mod utils;

#[tokio::test]
async fn test_json_contains() {
let expected = [
Expand Down Expand Up @@ -1131,6 +1132,7 @@ async fn test_long_arrow_cast() {
assert_batches_eq!(expected, &batches);
}

#[tokio::test]
async fn test_arrow_cast_numeric() {
let sql = r#"select ('{"foo": 420}'->'foo')::numeric = 420"#;
let batches = run_query(sql).await.unwrap();
Expand Down
13 changes: 6 additions & 7 deletions tests/utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
#![allow(dead_code)]
use std::sync::Arc;

use arrow::array::{ArrayRef, Int64Array};
use arrow::datatypes::{DataType, Field, Schema};
use arrow::util::display::{ArrayFormatter, FormatOptions};
use arrow::{array::LargeStringArray, array::StringArray, record_batch::RecordBatch};

use datafusion::arrow::array::{ArrayRef, Int64Array};
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::util::display::{ArrayFormatter, FormatOptions};
use datafusion::arrow::{array::LargeStringArray, array::StringArray, record_batch::RecordBatch};
use datafusion::common::ParamValues;
use datafusion::error::Result;
use datafusion::execution::context::SessionContext;
use datafusion_common::ParamValues;
use datafusion_execution::config::SessionConfig;
use datafusion::prelude::SessionConfig;
use datafusion_functions_json::register_all;

async fn create_test_table(large_utf8: bool) -> Result<SessionContext> {
Expand Down

0 comments on commit e50cd3f

Please sign in to comment.