Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,11 @@ impl SessionStateBuilder {
.get_or_insert_with(Vec::new)
.extend(SessionStateDefaults::default_expr_planners());

let analyzer = self.analyzer.get_or_insert_with(Analyzer::default);
for rewrite in SessionStateDefaults::default_function_rewrites() {
analyzer.add_function_rewrite(rewrite);
}

self.scalar_functions
.get_or_insert_with(Vec::new)
.extend(SessionStateDefaults::default_scalar_functions());
Expand Down
11 changes: 11 additions & 0 deletions datafusion/core/src/execution/session_state_defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use datafusion_catalog::{MemoryCatalogProvider, MemorySchemaProvider};
use datafusion_execution::config::SessionConfig;
use datafusion_execution::object_store::ObjectStoreUrl;
use datafusion_execution::runtime_env::RuntimeEnv;
use datafusion_expr::expr_rewriter::FunctionRewrite;
use datafusion_expr::planner::ExprPlanner;
use datafusion_expr::registry::ExtensionTypeRegistrationRef;
use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
Expand Down Expand Up @@ -102,6 +103,16 @@ impl SessionStateDefaults {
expr_planners
}

/// returns the list of default [`FunctionRewrite`]s installed on the analyzer.
pub fn default_function_rewrites() -> Vec<Arc<dyn FunctionRewrite + Send + Sync>> {
let rewrites: Vec<Arc<dyn FunctionRewrite + Send + Sync>> = vec![
#[cfg(feature = "nested_expressions")]
Arc::new(functions_nested::concat_rewrite::ConcatArrayRewrite),
];

rewrites
}

/// returns the list of default [`ScalarUDF`]s
pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
#[cfg_attr(not(feature = "nested_expressions"), expect(unused_mut))]
Expand Down
86 changes: 86 additions & 0 deletions datafusion/functions-nested/src/concat_rewrite.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! [`ConcatArrayRewrite`] rewrites `concat(array, ...)` to `array_concat(array, ...)`.

use std::any::Any;

use arrow::datatypes::DataType;
use datafusion_common::config::ConfigOptions;
use datafusion_common::tree_node::Transformed;
use datafusion_common::{DFSchema, Result, plan_err};
use datafusion_expr::Expr;
use datafusion_expr::ExprSchemable;
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::expr_rewriter::FunctionRewrite;
use datafusion_functions::string::concat::ConcatFunc;

use crate::concat::array_concat;

/// [`FunctionRewrite`] that turns `concat(array, ...)` into
/// `array_concat(array, ...)` at the analyzer phase.
///
/// `concat` calls with only non-array arguments are left unchanged.
/// Mixed array and non-array arguments are rejected with a plan error.
#[derive(Debug, Default)]
pub struct ConcatArrayRewrite;

impl FunctionRewrite for ConcatArrayRewrite {
Comment thread
hcrosse marked this conversation as resolved.
fn name(&self) -> &str {
"concat_array_rewrite"
}

fn rewrite(
&self,
expr: Expr,
schema: &DFSchema,
_config: &ConfigOptions,
) -> Result<Transformed<Expr>> {
let Expr::ScalarFunction(ScalarFunction { func, args }) = &expr else {
return Ok(Transformed::no(expr));
};
if !(func.inner().as_ref() as &dyn Any).is::<ConcatFunc>() {
return Ok(Transformed::no(expr));
}

let mut any_list = false;
let mut any_non_list = false;
for arg in args {
match arg.get_type(schema)? {
DataType::List(_)
| DataType::LargeList(_)
Comment thread
hcrosse marked this conversation as resolved.
Outdated
| DataType::FixedSizeList(_, _) => any_list = true,
DataType::Null => {}
_ => any_non_list = true,
}
}

if !any_list {
return Ok(Transformed::no(expr));
}
if any_non_list {
return plan_err!(
"Cannot mix array and non-array arguments in concat function"
);
}

let Expr::ScalarFunction(ScalarFunction { args, .. }) = expr else {
unreachable!("already matched above")
};
Ok(Transformed::yes(array_concat(args)))
}
}
5 changes: 5 additions & 0 deletions datafusion/functions-nested/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub mod array_has;
pub mod arrays_zip;
pub mod cardinality;
pub mod concat;
pub mod concat_rewrite;
pub mod dimension;
pub mod distance;
pub mod empty;
Expand Down Expand Up @@ -192,6 +193,10 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
Ok(()) as Result<()>
})?;

// Register the analyzer rewrite that turns `concat(array, ...)` into
// `array_concat(...)`.
registry.register_function_rewrite(Arc::new(concat_rewrite::ConcatArrayRewrite))?;

Ok(())
}

Expand Down
82 changes: 82 additions & 0 deletions datafusion/sqllogictest/test_files/array/array_concat.slt
Original file line number Diff line number Diff line change
Expand Up @@ -388,4 +388,86 @@ select array_concat(make_array(column3), column1, column2) from arrays_values_v2
[NULL]


## concat() delegates to array_concat when all arguments are arrays.
Comment thread
hcrosse marked this conversation as resolved.
## An analyzer-phase FunctionRewrite rewrites the call before execution, so
## the string concat path never sees array inputs.

query ?
select concat(make_array(1, 2, 3), make_array(4, 5));
----
[1, 2, 3, 4, 5]

query ?
select concat(make_array(1, 2), make_array(3, 4), make_array(5, 6));
----
[1, 2, 3, 4, 5, 6]

query ?
select concat(make_array(1, NULL, 3), make_array(4));
----
[1, NULL, 3, 4]

query ?
select concat(make_array('a', 'b'), make_array('c', 'd'));
----
[a, b, c, d]

query ?
select concat(NULL::integer[], make_array(1, 2));
----
[1, 2]

query ?
select concat(make_array(1, 2), NULL::integer[]);
----
[1, 2]

query ?
select concat(column1, column2) from arrays_values_v2;
----
[NULL, 2, 3, 4, 5, NULL]
[7, NULL, 8]
[9, NULL, 10]
[NULL, 1, NULL, 21]
[11, 12]
NULL

query ?
select concat(
arrow_cast(['1', '2'], 'LargeList(Utf8)'),
arrow_cast(['3'], 'LargeList(Utf8)')
);
----
[1, 2, 3]

query ?
select concat(
arrow_cast(['1', '2'], 'FixedSizeList(2, Utf8)'),
arrow_cast(['3'], 'FixedSizeList(1, Utf8)')
);
----
[1, 2, 3]

query ?
select concat(NULL::integer[], NULL::integer[]);
----
NULL

# Mixed list variants are coerced by array_concat's own coerce_types
# rules (same result as calling array_concat directly).
query ?T
select
concat(arrow_cast([1, 2], 'FixedSizeList(2, Int64)'), make_array(3, 4)) as v,
arrow_typeof(concat(arrow_cast([1, 2], 'FixedSizeList(2, Int64)'), make_array(3, 4))) as t;
----
[1, 2, 3, 4] List(Int64)

# Mixed array + non-array arguments are rejected at plan time.
statement error Cannot mix array and non-array arguments in concat function
select concat(make_array(1), 'x');

statement error Cannot mix array and non-array arguments in concat function
select concat('x', make_array(1));


include ./cleanup.slt.part
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ EXPLAIN VERBOSE SELECT a, b, c FROM simple_explain_test
initial_logical_plan
01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
02)--TableScan: simple_explain_test
logical_plan after apply_function_rewrites SAME TEXT AS ABOVE
logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
logical_plan after type_coercion SAME TEXT AS ABOVE
analyzed_logical_plan SAME TEXT AS ABOVE
Expand Down Expand Up @@ -548,6 +549,7 @@ EXPLAIN VERBOSE SELECT a, b, c FROM simple_explain_test
initial_logical_plan
01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
02)--TableScan: simple_explain_test
logical_plan after apply_function_rewrites SAME TEXT AS ABOVE
logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
logical_plan after type_coercion SAME TEXT AS ABOVE
analyzed_logical_plan SAME TEXT AS ABOVE
Expand Down
Loading