From 8f1a055ffea0872eb52e09a053f340b2ca5fa1bf Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Sun, 29 Mar 2026 14:28:33 +0800 Subject: [PATCH 1/4] =?UTF-8?q?doc:=20Add=20documentation=20explaining=20t?= =?UTF-8?q?he=20behavior=20of=20`null`=20values=20=E2=80=8B=E2=80=8Bin=20s?= =?UTF-8?q?truct=20comparisons?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/user-guide/sql/struct_coercion.md | 20 +++++++++++++++++++ docs/source/user-guide/sql/subqueries.md | 9 +++++++++ 2 files changed, 29 insertions(+) diff --git a/docs/source/user-guide/sql/struct_coercion.md b/docs/source/user-guide/sql/struct_coercion.md index d2a32fcee2650..059f9fd009b00 100644 --- a/docs/source/user-guide/sql/struct_coercion.md +++ b/docs/source/user-guide/sql/struct_coercion.md @@ -208,6 +208,26 @@ SELECT [ ] FROM t_left JOIN t_right; ``` +## Comparison and Ordering + +DataFusion supports comparing `STRUCT` values with standard comparison operators +(`=`, `!=`, `<`, `<=`, `>`, `>=`). Ordering comparisons are lexicographical and +follow DataFusion's default ascending comparison behavior, where `NULL` sorts +before non-`NULL` values. + +### Examples + +```sql +SELECT {x: 1, y: 2} < {x: 1, y: 3}; +-- true + +SELECT {x: 1, y: NULL} < {x: 1, y: 2}; +-- true + +SELECT {x: 1, y: NULL} = {x: 1, y: NULL}; +--true +``` + ## Migration Guide: From Positional to Name-Based Matching If you have existing code that relied on **positional** struct field matching, you may need to update it. diff --git a/docs/source/user-guide/sql/subqueries.md b/docs/source/user-guide/sql/subqueries.md index 692d1c4020d74..8293f801862c1 100644 --- a/docs/source/user-guide/sql/subqueries.md +++ b/docs/source/user-guide/sql/subqueries.md @@ -102,6 +102,15 @@ SELECT * FROM x WHERE column_1 NOT IN (1,3); +----------+----------+ ``` +#### `IN` with tuple-like values and `NULL` + +For tuple-like values, `IN` uses DataFusion's struct equality semantics: + +```sql +SELECT (7521, 30) IN ((7521, NULL)); +-- false +``` + ## SELECT clause subqueries `SELECT` clause subqueries use values returned from the inner query as part From 0bfa958e17fd69ad9118a6fd3aa27d34fd97fa3a Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Sun, 29 Mar 2026 17:36:16 +0800 Subject: [PATCH 2/4] Added test for in --- docs/source/user-guide/sql/subqueries.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/subqueries.md b/docs/source/user-guide/sql/subqueries.md index 8293f801862c1..7533bd49d553d 100644 --- a/docs/source/user-guide/sql/subqueries.md +++ b/docs/source/user-guide/sql/subqueries.md @@ -107,8 +107,11 @@ SELECT * FROM x WHERE column_1 NOT IN (1,3); For tuple-like values, `IN` uses DataFusion's struct equality semantics: ```sql -SELECT (7521, 30) IN ((7521, NULL)); +SELECT (1, 1) IN ((1, NULL)); -- false + +SELECT (1, NULL) IN ((1, NULL)); +-- true ``` ## SELECT clause subqueries From 12edaba53cf9024871b51fb98808d1cee767b87f Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Mon, 30 Mar 2026 23:24:09 +0800 Subject: [PATCH 3/4] Add link to struct function --- docs/source/user-guide/sql/scalar_functions.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 5a8ef4db3d4b2..de885a464e82c 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4750,6 +4750,8 @@ _Alias of [struct](#struct)._ Returns an Arrow struct using the specified input expressions optionally named. Fields in the returned struct use the optional name or the `cN` naming convention. For example: `c0`, `c1`, `c2`, etc. +For information on comparing and ordering struct values (including `NULL` handling), +see [Comparison and Ordering](struct_coercion.md#comparison-and-ordering). ```sql struct(expression1[, ..., expression_n]) From 0b6a6831946192c7441dd84eb547b61b3909dd73 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Wed, 1 Apr 2026 07:00:57 +0800 Subject: [PATCH 4/4] fix format --- datafusion/functions/src/core/named_struct.rs | 4 +++- datafusion/functions/src/core/struct.rs | 4 +++- docs/source/user-guide/sql/scalar_functions.md | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index 6ae17f055e651..8973e836d33e7 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -27,7 +27,9 @@ use std::sync::Arc; #[user_doc( doc_section(label = "Struct Functions"), - description = "Returns an Arrow struct using the specified name and input expressions pairs.", + description = "Returns an Arrow struct using the specified name and input expressions pairs. +For information on comparing and ordering struct values (including `NULL` handling), +see [Comparison and Ordering](struct_coercion.md#comparison-and-ordering).", syntax_example = "named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input])", sql_example = r#" For example, this query converts two columns `a` and `b` to a single column with diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs index 476da39a5ab2d..2697cb46b09f0 100644 --- a/datafusion/functions/src/core/struct.rs +++ b/datafusion/functions/src/core/struct.rs @@ -27,7 +27,9 @@ use std::sync::Arc; doc_section(label = "Struct Functions"), description = "Returns an Arrow struct using the specified input expressions optionally named. Fields in the returned struct use the optional name or the `cN` naming convention. -For example: `c0`, `c1`, `c2`, etc.", +For example: `c0`, `c1`, `c2`, etc. +For information on comparing and ordering struct values (including `NULL` handling), +see [Comparison and Ordering](struct_coercion.md#comparison-and-ordering).", syntax_example = "struct(expression1[, ..., expression_n])", sql_example = r#"For example, this query converts two columns `a` and `b` to a single column with a struct type of fields `field_a` and `c1`: diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index de885a464e82c..022b0f9daec86 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4709,6 +4709,8 @@ _Alias of [string_to_array](#string_to_array)._ ### `named_struct` Returns an Arrow struct using the specified name and input expressions pairs. +For information on comparing and ordering struct values (including `NULL` handling), +see [Comparison and Ordering](struct_coercion.md#comparison-and-ordering). ```sql named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input])