diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index 6ae17f055e651..8973e836d33e7 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -27,7 +27,9 @@ use std::sync::Arc; #[user_doc( doc_section(label = "Struct Functions"), - description = "Returns an Arrow struct using the specified name and input expressions pairs.", + description = "Returns an Arrow struct using the specified name and input expressions pairs. +For information on comparing and ordering struct values (including `NULL` handling), +see [Comparison and Ordering](struct_coercion.md#comparison-and-ordering).", syntax_example = "named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input])", sql_example = r#" For example, this query converts two columns `a` and `b` to a single column with diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs index 476da39a5ab2d..2697cb46b09f0 100644 --- a/datafusion/functions/src/core/struct.rs +++ b/datafusion/functions/src/core/struct.rs @@ -27,7 +27,9 @@ use std::sync::Arc; doc_section(label = "Struct Functions"), description = "Returns an Arrow struct using the specified input expressions optionally named. Fields in the returned struct use the optional name or the `cN` naming convention. -For example: `c0`, `c1`, `c2`, etc.", +For example: `c0`, `c1`, `c2`, etc. +For information on comparing and ordering struct values (including `NULL` handling), +see [Comparison and Ordering](struct_coercion.md#comparison-and-ordering).", syntax_example = "struct(expression1[, ..., expression_n])", sql_example = r#"For example, this query converts two columns `a` and `b` to a single column with a struct type of fields `field_a` and `c1`: diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 5a8ef4db3d4b2..022b0f9daec86 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4709,6 +4709,8 @@ _Alias of [string_to_array](#string_to_array)._ ### `named_struct` Returns an Arrow struct using the specified name and input expressions pairs. +For information on comparing and ordering struct values (including `NULL` handling), +see [Comparison and Ordering](struct_coercion.md#comparison-and-ordering). ```sql named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input]) @@ -4750,6 +4752,8 @@ _Alias of [struct](#struct)._ Returns an Arrow struct using the specified input expressions optionally named. Fields in the returned struct use the optional name or the `cN` naming convention. For example: `c0`, `c1`, `c2`, etc. +For information on comparing and ordering struct values (including `NULL` handling), +see [Comparison and Ordering](struct_coercion.md#comparison-and-ordering). ```sql struct(expression1[, ..., expression_n]) diff --git a/docs/source/user-guide/sql/struct_coercion.md b/docs/source/user-guide/sql/struct_coercion.md index d2a32fcee2650..059f9fd009b00 100644 --- a/docs/source/user-guide/sql/struct_coercion.md +++ b/docs/source/user-guide/sql/struct_coercion.md @@ -208,6 +208,26 @@ SELECT [ ] FROM t_left JOIN t_right; ``` +## Comparison and Ordering + +DataFusion supports comparing `STRUCT` values with standard comparison operators +(`=`, `!=`, `<`, `<=`, `>`, `>=`). Ordering comparisons are lexicographical and +follow DataFusion's default ascending comparison behavior, where `NULL` sorts +before non-`NULL` values. + +### Examples + +```sql +SELECT {x: 1, y: 2} < {x: 1, y: 3}; +-- true + +SELECT {x: 1, y: NULL} < {x: 1, y: 2}; +-- true + +SELECT {x: 1, y: NULL} = {x: 1, y: NULL}; +--true +``` + ## Migration Guide: From Positional to Name-Based Matching If you have existing code that relied on **positional** struct field matching, you may need to update it. diff --git a/docs/source/user-guide/sql/subqueries.md b/docs/source/user-guide/sql/subqueries.md index 692d1c4020d74..7533bd49d553d 100644 --- a/docs/source/user-guide/sql/subqueries.md +++ b/docs/source/user-guide/sql/subqueries.md @@ -102,6 +102,18 @@ SELECT * FROM x WHERE column_1 NOT IN (1,3); +----------+----------+ ``` +#### `IN` with tuple-like values and `NULL` + +For tuple-like values, `IN` uses DataFusion's struct equality semantics: + +```sql +SELECT (1, 1) IN ((1, NULL)); +-- false + +SELECT (1, NULL) IN ((1, NULL)); +-- true +``` + ## SELECT clause subqueries `SELECT` clause subqueries use values returned from the inner query as part