From 1203b508dc65753b8ff988374ef4c14f0e198ffe Mon Sep 17 00:00:00 2001 From: Samyak Sarnayak Date: Tue, 31 Mar 2026 00:21:57 +0530 Subject: [PATCH 1/4] feat: add a custom ExprPlanner to support colon operator Closes #26 - Add a custom `ExprPlanner` that handles `:` as an operator and re-writes it to `variant_get`. --- examples/cli.rs | 10 ++++++---- src/expr_planner.rs | 36 ++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 ++ 3 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 src/expr_planner.rs diff --git a/examples/cli.rs b/examples/cli.rs index 77e0566..e6e9562 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -1,12 +1,13 @@ use anyhow::{Context, Result}; use arrow::array::{ArrayRef, RecordBatch, StringArray}; use arrow::datatypes::{DataType, Field, Schema}; +use datafusion::execution::FunctionRegistry; use datafusion::logical_expr::ScalarUDF; use datafusion::prelude::*; use datafusion_variant::{ - CastToVariantUdf, IsVariantNullUdf, JsonToVariantUdf, VariantGetUdf, VariantListConstruct, - VariantListInsert, VariantObjectConstruct, VariantObjectInsert, VariantObjectKeys, - VariantPretty, VariantToJsonUdf, + CastToVariantUdf, IsVariantNullUdf, JsonToVariantUdf, VariantExprPlanner, VariantGetUdf, + VariantListConstruct, VariantListInsert, VariantObjectConstruct, VariantObjectInsert, + VariantObjectKeys, VariantPretty, VariantToJsonUdf, }; use flate2::read::GzDecoder; use rustyline::error::ReadlineError; @@ -97,7 +98,7 @@ async fn main() -> Result<()> { let ctx = { let setup_start = Instant::now(); - let ctx = SessionContext::new(); + let mut ctx = SessionContext::new(); let schema = Schema::new(vec![Field::new("json_data", DataType::Utf8, false)]); let string_array: ArrayRef = Arc::new(StringArray::from(json_strings)); let batch = RecordBatch::try_new(Arc::new(schema), vec![string_array])?; @@ -119,6 +120,7 @@ async fn main() -> Result<()> { ctx.register_udf(ScalarUDF::new_from_impl(VariantListInsert::default())); ctx.register_udf(ScalarUDF::new_from_impl(VariantObjectInsert::default())); ctx.register_udf(ScalarUDF::new_from_impl(VariantObjectKeys::default())); + ctx.register_expr_planner(Arc::new(VariantExprPlanner))?; let setup_duration = setup_start.elapsed(); println!( diff --git a/src/expr_planner.rs b/src/expr_planner.rs new file mode 100644 index 0000000..ebf19b8 --- /dev/null +++ b/src/expr_planner.rs @@ -0,0 +1,36 @@ +use std::sync::Arc; + +use datafusion::{ + common::DFSchema, + error::DataFusionError, + logical_expr::{ + ScalarUDF, + expr::ScalarFunction, + planner::{ExprPlanner, PlannerResult, RawBinaryExpr}, + }, + prelude::Expr, + sql::sqlparser::ast::BinaryOperator, +}; + +use crate::VariantGetUdf; + +#[derive(Debug)] +pub struct VariantExprPlanner; + +impl ExprPlanner for VariantExprPlanner { + fn plan_binary_op( + &self, + expr: RawBinaryExpr, + _schema: &DFSchema, + ) -> Result, DataFusionError> { + match &expr.op { + BinaryOperator::Custom(s) if s == ":" => Ok(PlannerResult::Planned( + Expr::ScalarFunction(ScalarFunction::new_udf( + Arc::new(ScalarUDF::new_from_impl(VariantGetUdf::default())), + vec![expr.left, expr.right], + )), + )), + _ => Ok(PlannerResult::Original(expr)), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index f944f84..9f798e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ mod shared; mod cast_to_variant; +mod expr_planner; mod impl_variant_get; mod is_variant_null; mod json_to_variant; @@ -19,6 +20,7 @@ mod variant_pretty; mod variant_to_json; pub use cast_to_variant::*; +pub use expr_planner::*; pub use is_variant_null::*; pub use json_to_variant::*; pub use variant_get::*; From e715023c8850f856e6621da733db83143814c3d8 Mon Sep 17 00:00:00 2001 From: Samyak Sarnayak Date: Tue, 31 Mar 2026 13:28:09 +0530 Subject: [PATCH 2/4] doc: add docstring to explain VariantExprPlanner --- src/expr_planner.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/expr_planner.rs b/src/expr_planner.rs index ebf19b8..63e07c6 100644 --- a/src/expr_planner.rs +++ b/src/expr_planner.rs @@ -14,6 +14,10 @@ use datafusion::{ use crate::VariantGetUdf; +/// Custom [`ExprPlanner`] used to handle variant-specific syntax such as colon operator. +/// +/// Currently implements: +/// - Colon operator: short-hand syntax for `variant_get`. #[derive(Debug)] pub struct VariantExprPlanner; From b36452010ade8b131486a0cee831213b189ba293 Mon Sep 17 00:00:00 2001 From: Samyak Sarnayak Date: Tue, 31 Mar 2026 13:28:19 +0530 Subject: [PATCH 3/4] test: add sqllogictests for simple variant_get --- tests/sqllogictests.rs | 23 +++++++++------- tests/test_files/colon_operator.slt | 42 +++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 9 deletions(-) create mode 100644 tests/test_files/colon_operator.slt diff --git a/tests/sqllogictests.rs b/tests/sqllogictests.rs index f808d8c..5d2dfc8 100644 --- a/tests/sqllogictests.rs +++ b/tests/sqllogictests.rs @@ -1,14 +1,17 @@ +use datafusion::execution::FunctionRegistry; use datafusion::{logical_expr::ScalarUDF, prelude::*}; use datafusion_sqllogictest::{DataFusion, TestContext}; use datafusion_variant::{ - CastToVariantUdf, IsVariantNullUdf, JsonToVariantUdf, VariantGetBoolUdf, VariantGetFieldUdf, - VariantGetFloatUdf, VariantGetIntUdf, VariantGetJsonUdf, VariantGetStrUdf, VariantGetUdf, - VariantListConstruct, VariantListDelete, VariantListInsert, VariantObjectConstruct, - VariantObjectDelete, VariantObjectInsert, VariantObjectKeys, VariantPretty, VariantToJsonUdf, + CastToVariantUdf, IsVariantNullUdf, JsonToVariantUdf, VariantExprPlanner, VariantGetBoolUdf, + VariantGetFieldUdf, VariantGetFloatUdf, VariantGetIntUdf, VariantGetJsonUdf, VariantGetStrUdf, + VariantGetUdf, VariantListConstruct, VariantListDelete, VariantListInsert, + VariantObjectConstruct, VariantObjectDelete, VariantObjectInsert, VariantObjectKeys, + VariantPretty, VariantToJsonUdf, }; use indicatif::ProgressBar; use sqllogictest::strict_column_validator; use std::path::PathBuf; +use std::sync::Arc; #[tokio::test] async fn run_sqllogictests() -> Result<(), Box> { @@ -38,11 +41,12 @@ async fn run_sqllogictests() -> Result<(), Box> { .unwrap_or(&test_file) .to_path_buf(); - let ctx = if let Some(test_ctx) = TestContext::try_new_for_test_file(&relative_path).await { - test_ctx.session_ctx().clone() - } else { - SessionContext::new() - }; + let mut ctx = + if let Some(test_ctx) = TestContext::try_new_for_test_file(&relative_path).await { + test_ctx.session_ctx().clone() + } else { + SessionContext::new() + }; // register variant udfs ctx.register_udf(ScalarUDF::new_from_impl(VariantToJsonUdf::default())); @@ -64,6 +68,7 @@ async fn run_sqllogictests() -> Result<(), Box> { ctx.register_udf(ScalarUDF::new_from_impl(VariantObjectInsert::default())); ctx.register_udf(ScalarUDF::new_from_impl(VariantObjectDelete::default())); ctx.register_udf(ScalarUDF::new_from_impl(VariantObjectKeys::default())); + ctx.register_expr_planner(Arc::new(VariantExprPlanner))?; let pb = ProgressBar::new(24); diff --git a/tests/test_files/colon_operator.slt b/tests/test_files/colon_operator.slt new file mode 100644 index 0000000..f37352f --- /dev/null +++ b/tests/test_files/colon_operator.slt @@ -0,0 +1,42 @@ +statement ok +CREATE TABLE json_data (id INT, json_str TEXT) AS VALUES + (1, '{"name": "Alice", "age": 30}'), + (2, '{"name": "Bob", "age": 25}'), + (3, '{"items": [1, 2, 3], "count": 3}'), + (4, 'null'), + (5, '"simple string"'), + (6, '123'), + (7, 'true'); + +query T +select variant_pretty(json_to_variant(json_str):name) from json_data; +---- +ShortString(ShortString("Alice")) +ShortString(ShortString("Bob")) +NULL +NULL +NULL +NULL +NULL + +query T +select variant_pretty(json_to_variant(json_str):items[0]) from json_data; +---- +NULL +NULL +Int8(1) +NULL +NULL +NULL +NULL + +query T +select variant_pretty(json_to_variant(json_str):age[0]) from json_data; +---- +NULL +NULL +NULL +NULL +NULL +NULL +NULL From 233006b7d14b4140bd5c2cae92f452396ce2006f Mon Sep 17 00:00:00 2001 From: Samyak Sarnayak Date: Tue, 31 Mar 2026 13:47:40 +0530 Subject: [PATCH 4/4] test: more tests --- tests/test_files/colon_operator.slt | 41 ++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/tests/test_files/colon_operator.slt b/tests/test_files/colon_operator.slt index f37352f..903709d 100644 --- a/tests/test_files/colon_operator.slt +++ b/tests/test_files/colon_operator.slt @@ -3,11 +3,13 @@ CREATE TABLE json_data (id INT, json_str TEXT) AS VALUES (1, '{"name": "Alice", "age": 30}'), (2, '{"name": "Bob", "age": 25}'), (3, '{"items": [1, 2, 3], "count": 3}'), - (4, 'null'), - (5, '"simple string"'), - (6, '123'), - (7, 'true'); + (4, '{"items": [{"name": "Sam", "age": 33}, "a", 3], "count": 3}'), + (5, 'null'), + (6, '"simple string"'), + (7, '["looooooooong string", "hehe", "a"]'), + (8, 'true'); +# field access query T select variant_pretty(json_to_variant(json_str):name) from json_data; ---- @@ -18,18 +20,22 @@ NULL NULL NULL NULL +NULL +# field + array access query T select variant_pretty(json_to_variant(json_str):items[0]) from json_data; ---- NULL NULL Int8(1) +{"age": Int8(33), "name": ShortString(ShortString("Sam"))} NULL NULL NULL NULL +# array access with non-array query T select variant_pretty(json_to_variant(json_str):age[0]) from json_data; ---- @@ -40,3 +46,30 @@ NULL NULL NULL NULL +NULL + +# field -> array -> field access +query T +select variant_pretty(json_to_variant(json_str):items[0]["name"]) from json_data; +---- +NULL +NULL +NULL +ShortString(ShortString("Sam")) +NULL +NULL +NULL +NULL + +# field -> array -> field access but with single quotes +query T +select variant_pretty(json_to_variant(json_str):items[0]['name']) from json_data; +---- +NULL +NULL +NULL +ShortString(ShortString("Sam")) +NULL +NULL +NULL +NULL