Skip to content

Commit

Permalink
move make_array array_append array_prepend array_concat function to d…
Browse files Browse the repository at this point in the history
…atafusion-functions-array crate (apache#9504)

* move array function

* fix rebase

Signed-off-by: jayzhan211 <[email protected]>

* cleanup to trigger rerun

Signed-off-by: jayzhan211 <[email protected]>

* split functions to different files

Signed-off-by: jayzhan211 <[email protected]>

* fix

Signed-off-by: jayzhan211 <[email protected]>

* fix conflict

Signed-off-by: jayzhan211 <[email protected]>

* clippy

Signed-off-by: jayzhan211 <[email protected]>

---------

Signed-off-by: jayzhan211 <[email protected]>
Co-authored-by: jayzhan211 <[email protected]>
  • Loading branch information
guojidan and jayzhan211 authored Mar 10, 2024
1 parent 31fcd72 commit 88187d4
Show file tree
Hide file tree
Showing 23 changed files with 961 additions and 663 deletions.
3 changes: 3 additions & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

109 changes: 0 additions & 109 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@

//! Built-in functions module contains all the built-in functions definitions.
use std::cmp::Ordering;
use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
use std::sync::{Arc, OnceLock};

use crate::signature::TIMEZONE_WILDCARD;
use crate::type_coercion::binary::get_wider_type;
use crate::type_coercion::functions::data_types;
use crate::{FuncMonotonicity, Signature, TypeSignature, Volatility};

Expand Down Expand Up @@ -112,12 +110,8 @@ pub enum BuiltinScalarFunction {
Cot,

// array functions
/// array_append
ArrayAppend,
/// array_sort
ArraySort,
/// array_concat
ArrayConcat,
/// array_pop_front
ArrayPopFront,
/// array_pop_back
Expand All @@ -130,8 +124,6 @@ pub enum BuiltinScalarFunction {
ArrayPosition,
/// array_positions
ArrayPositions,
/// array_prepend
ArrayPrepend,
/// array_remove
ArrayRemove,
/// array_remove_n
Expand All @@ -158,8 +150,6 @@ pub enum BuiltinScalarFunction {
ArrayExcept,
/// array_resize
ArrayResize,
/// construct an array from columns
MakeArray,

// struct functions
/// struct
Expand Down Expand Up @@ -345,17 +335,14 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Tan => Volatility::Immutable,
BuiltinScalarFunction::Tanh => Volatility::Immutable,
BuiltinScalarFunction::Trunc => Volatility::Immutable,
BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
BuiltinScalarFunction::ArraySort => Volatility::Immutable,
BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable,
BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
BuiltinScalarFunction::ArrayExcept => Volatility::Immutable,
BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable,
BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
BuiltinScalarFunction::ArrayPositions => Volatility::Immutable,
BuiltinScalarFunction::ArrayPrepend => Volatility::Immutable,
BuiltinScalarFunction::ArrayRepeat => Volatility::Immutable,
BuiltinScalarFunction::ArrayRemove => Volatility::Immutable,
BuiltinScalarFunction::ArrayRemoveN => Volatility::Immutable,
Expand All @@ -368,7 +355,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
BuiltinScalarFunction::ArrayResize => Volatility::Immutable,
BuiltinScalarFunction::MakeArray => Volatility::Immutable,
BuiltinScalarFunction::Ascii => Volatility::Immutable,
BuiltinScalarFunction::BitLength => Volatility::Immutable,
BuiltinScalarFunction::Btrim => Volatility::Immutable,
Expand Down Expand Up @@ -426,25 +412,6 @@ impl BuiltinScalarFunction {
}
}

/// Returns the dimension [`DataType`] of [`DataType::List`] if
/// treated as a N-dimensional array.
///
/// ## Examples:
///
/// * `Int64` has dimension 1
/// * `List(Int64)` has dimension 2
/// * `List(List(Int64))` has dimension 3
/// * etc.
fn return_dimension(self, input_expr_type: &DataType) -> u64 {
let mut result: u64 = 1;
let mut current_data_type = input_expr_type;
while let DataType::List(field) = current_data_type {
current_data_type = field.data_type();
result += 1;
}
result
}

/// Returns the output [`DataType`] of this function
///
/// This method should be invoked only after `input_expr_types` have been validated
Expand All @@ -463,38 +430,7 @@ impl BuiltinScalarFunction {
// the return type of the built in function.
// Some built-in functions' return type depends on the incoming type.
match self {
BuiltinScalarFunction::ArrayAppend => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArraySort => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayConcat => {
let mut expr_type = Null;
let mut max_dims = 0;
for input_expr_type in input_expr_types {
match input_expr_type {
List(field) => {
if !field.data_type().equals_datatype(&Null) {
let dims = self.return_dimension(input_expr_type);
expr_type = match max_dims.cmp(&dims) {
Ordering::Greater => expr_type,
Ordering::Equal => {
get_wider_type(&expr_type, input_expr_type)?
}
Ordering::Less => {
max_dims = dims;
input_expr_type.clone()
}
};
}
}
_ => {
return plan_err!(
"The {self} function can only accept list as the args."
);
}
}
}

Ok(expr_type)
}
BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] {
List(field)
Expand All @@ -510,7 +446,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayPositions => {
Ok(List(Arc::new(Field::new("item", UInt64, true))))
}
BuiltinScalarFunction::ArrayPrepend => Ok(input_expr_types[1].clone()),
BuiltinScalarFunction::ArrayRepeat => Ok(List(Arc::new(Field::new(
"item",
input_expr_types[0].clone(),
Expand Down Expand Up @@ -551,20 +486,6 @@ impl BuiltinScalarFunction {
(dt, _) => Ok(dt),
}
}
BuiltinScalarFunction::MakeArray => match input_expr_types.len() {
0 => Ok(List(Arc::new(Field::new("item", Null, true)))),
_ => {
let mut expr_type = Null;
for input_expr_type in input_expr_types {
if !input_expr_type.equals_datatype(&Null) {
expr_type = input_expr_type.clone();
break;
}
}

Ok(List(Arc::new(Field::new("item", expr_type, true))))
}
},
BuiltinScalarFunction::Ascii => Ok(Int32),
BuiltinScalarFunction::BitLength => {
utf8_to_int_type(&input_expr_types[0], "bit_length")
Expand Down Expand Up @@ -763,18 +684,8 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArraySort => {
Signature::variadic_any(self.volatility())
}
BuiltinScalarFunction::ArrayAppend => {
Signature::array_and_element(self.volatility())
}
BuiltinScalarFunction::MakeArray => {
// 0 or more arguments of arbitrary type
Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility())
}
BuiltinScalarFunction::ArrayPopFront => Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayPopBack => Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayConcat => {
Signature::variadic_any(self.volatility())
}
BuiltinScalarFunction::ArrayElement => {
Signature::array_and_index(self.volatility())
}
Expand All @@ -786,9 +697,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayPositions => {
Signature::array_and_element(self.volatility())
}
BuiltinScalarFunction::ArrayPrepend => {
Signature::element_and_array(self.volatility())
}
BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayRemove => {
Signature::array_and_element(self.volatility())
Expand Down Expand Up @@ -1213,17 +1121,7 @@ impl BuiltinScalarFunction {
// other functions
BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],

// array functions
BuiltinScalarFunction::ArrayAppend => &[
"array_append",
"list_append",
"array_push_back",
"list_push_back",
],
BuiltinScalarFunction::ArraySort => &["array_sort", "list_sort"],
BuiltinScalarFunction::ArrayConcat => {
&["array_concat", "array_cat", "list_concat", "list_cat"]
}
BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"],
BuiltinScalarFunction::ArrayElement => &[
"array_element",
Expand All @@ -1245,12 +1143,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayPositions => {
&["array_positions", "list_positions"]
}
BuiltinScalarFunction::ArrayPrepend => &[
"array_prepend",
"list_prepend",
"array_push_front",
"list_push_front",
],
BuiltinScalarFunction::ArrayRepeat => &["array_repeat", "list_repeat"],
BuiltinScalarFunction::ArrayRemove => &["array_remove", "list_remove"],
BuiltinScalarFunction::ArrayRemoveN => &["array_remove_n", "list_remove_n"],
Expand All @@ -1268,7 +1160,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"],
BuiltinScalarFunction::ArrayResize => &["array_resize", "list_resize"],
BuiltinScalarFunction::MakeArray => &["make_array", "make_list"],
BuiltinScalarFunction::ArrayIntersect => {
&["array_intersect", "list_intersect"]
}
Expand Down
23 changes: 0 additions & 23 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -586,14 +586,6 @@ scalar_expr!(
scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value");
scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");

// array functions
scalar_expr!(
ArrayAppend,
array_append,
array element,
"appends an element to the end of an array."
);

scalar_expr!(ArraySort, array_sort, array desc null_first, "returns sorted array.");

scalar_expr!(
Expand All @@ -610,7 +602,6 @@ scalar_expr!(
"returns the array without the first element."
);

nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
scalar_expr!(
ArrayElement,
array_element,
Expand Down Expand Up @@ -641,12 +632,6 @@ scalar_expr!(
array element,
"searches for an element in the array, returns all occurrences."
);
scalar_expr!(
ArrayPrepend,
array_prepend,
array element,
"prepends an element to the beginning of an array."
);
scalar_expr!(
ArrayRepeat,
array_repeat,
Expand Down Expand Up @@ -710,11 +695,6 @@ scalar_expr!(
"returns an array with the specified size filled with the given value."
);

nary_scalar_expr!(
MakeArray,
array,
"returns an Arrow array using the specified input expressions."
);
scalar_expr!(
ArrayIntersect,
array_intersect,
Expand Down Expand Up @@ -1308,21 +1288,18 @@ mod test {

test_scalar_expr!(FromUnixtime, from_unixtime, unixtime);

test_scalar_expr!(ArrayAppend, array_append, array, element);
test_scalar_expr!(ArraySort, array_sort, array, desc, null_first);
test_scalar_expr!(ArrayPopFront, array_pop_front, array);
test_scalar_expr!(ArrayPopBack, array_pop_back, array);
test_scalar_expr!(ArrayPosition, array_position, array, element, index);
test_scalar_expr!(ArrayPositions, array_positions, array, element);
test_scalar_expr!(ArrayPrepend, array_prepend, array, element);
test_scalar_expr!(ArrayRepeat, array_repeat, element, count);
test_scalar_expr!(ArrayRemove, array_remove, array, element);
test_scalar_expr!(ArrayRemoveN, array_remove_n, array, element, max);
test_scalar_expr!(ArrayRemoveAll, array_remove_all, array, element);
test_scalar_expr!(ArrayReplace, array_replace, array, from, to);
test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to, max);
test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);
test_nary_scalar_expr!(MakeArray, array, input);

test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position, len);
Expand Down
3 changes: 3 additions & 0 deletions datafusion/functions-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ path = "src/lib.rs"

[dependencies]
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
datafusion-common = { workspace = true }
datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
Expand Down
Loading

0 comments on commit 88187d4

Please sign in to comment.