Skip to content

Commit 115e29b

Browse files
committed
feat[datafusion]: support regex and LIKE coercion on Dict<Binary> types
And others that need an extra coercion step before string coercion. Signed-off-by: Alfonso Subiotto Marques <alfonso.subiotto@polarsignals.com>
1 parent 9ed1915 commit 115e29b

2 files changed

Lines changed: 19 additions & 2 deletions

File tree

datafusion/expr-common/src/type_coercion/binary.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,7 +1766,7 @@ fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
17661766
pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
17671767
string_coercion(lhs_type, rhs_type)
17681768
.or_else(|| binary_to_string_coercion(lhs_type, rhs_type))
1769-
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false, string_coercion))
1769+
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false, like_coercion))
17701770
.or_else(|| ree_coercion(lhs_type, rhs_type, false, like_coercion))
17711771
.or_else(|| regex_null_coercion(lhs_type, rhs_type))
17721772
.or_else(|| null_coercion(lhs_type, rhs_type))
@@ -1787,7 +1787,7 @@ fn regex_null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataT
17871787
/// This is a union of string coercion rules, dictionary coercion rules, and REE coercion rules.
17881788
pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
17891789
string_coercion(lhs_type, rhs_type)
1790-
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false, string_coercion))
1790+
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false, regex_coercion))
17911791
.or_else(|| ree_coercion(lhs_type, rhs_type, false, regex_coercion))
17921792
.or_else(|| regex_null_coercion(lhs_type, rhs_type))
17931793
}

datafusion/sqllogictest/test_files/regexp/regexp_like.slt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,23 @@ drop table strings
277277
statement ok
278278
drop table dict_table
279279

280+
# Dict value types that themselves need further coercion against the literal
281+
statement ok
282+
create table dict_inner as
283+
select arrow_cast(arrow_cast(c, 'Binary'), 'Dictionary(UInt32, Binary)') as bin_col,
284+
arrow_cast(arrow_cast(c, 'Dictionary(UInt32, Utf8)'),
285+
'Dictionary(Int32, Dictionary(UInt32, Utf8))') as nested_col
286+
from (values ('foo'), ('bar')) as t(c);
287+
288+
query BB
289+
select bin_col LIKE 'foo', nested_col ~ 'foo' from dict_inner;
290+
----
291+
true true
292+
false false
293+
294+
statement ok
295+
drop table dict_inner
296+
280297
# Ensure that regexp_like is rewritten to use the (more optimized) regex operators
281298
statement ok
282299
create table regexp_test as values

0 commit comments

Comments
 (0)