Skip to content

Commit 769e214

Browse files
Dandandanclaudealamb
authored
Support Date32/Date64 in unwrap_cast optimization (#21665)
## Which issue does this PR close? N/A ## Rationale for this change Filters like `WHERE EventDate >= '2013-07-01'` on a `UInt16` column exposed as `Date32` via a view produce `CAST(CAST(EventDate AS Int32) AS Date32) >= Date32("2013-07-01")` — 4 CAST operations per row. The existing `unwrap_cast_in_comparison` optimizer can eliminate these but didn't support `Date32`/`Date64`. ## What changes are included in this PR? Add `Date32`/`Date64` to `is_supported_numeric_type` and the match arms in `try_cast_numeric_literal`. They are treated like `Int32`/`Int64` since that's their physical representation (days / ms since epoch). A guard prevents Date↔Timestamp unwrapping, which is lossy (drops time-of-day). **ClickBench Q36-Q42 before:** ``` FilterExec: CAST(CAST(EventDate@0 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) <= 2013-07-31 ``` **After:** ``` FilterExec: EventDate@0 >= 15887 AND EventDate@0 <= 15917 ``` ## Are these changes tested? Existing `casts` tests pass. ClickBench sqllogictest expectations updated. ## Are there any user-facing changes? No. --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 8229509 commit 769e214

2 files changed

Lines changed: 92 additions & 32 deletions

File tree

datafusion/expr-common/src/casts.rs

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,28 @@ pub fn is_supported_type(data_type: &DataType) -> bool {
5858
|| is_supported_binary_type(data_type)
5959
}
6060

61-
/// Returns true if unwrap_cast_in_comparison support this numeric type
61+
fn is_date_type(data_type: &DataType) -> bool {
62+
matches!(data_type, DataType::Date32 | DataType::Date64)
63+
}
64+
65+
/// Returns true when unwrapping a date/timestamp cast could change comparison
66+
/// semantics.
67+
///
68+
/// A `Date` stores only a calendar day, while a `Timestamp` stores a specific
69+
/// instant or wall-clock time. `Timestamp -> Date` is lossy because it drops the
70+
/// time-of-day. `Date -> Timestamp` is also lossy in this optimizer context
71+
/// because there is no unique inverse: converting a date to a timestamp has to
72+
/// invent a time component such as midnight.
73+
///
74+
/// For example, `CAST(ts AS DATE) = DATE '2024-01-01'` means "any timestamp
75+
/// during that day", but unwrapping it to `ts = TIMESTAMP '2024-01-01
76+
/// 00:00:00'` matches only midnight.
77+
fn is_lossy_temporal_cast(from_type: &DataType, to_type: &DataType) -> bool {
78+
(is_date_type(from_type) && to_type.is_temporal())
79+
|| (is_date_type(to_type) && from_type.is_temporal())
80+
}
81+
82+
/// Returns true if unwrap_cast_in_comparison supports this numeric type
6283
fn is_supported_numeric_type(data_type: &DataType) -> bool {
6384
matches!(
6485
data_type,
@@ -70,6 +91,8 @@ fn is_supported_numeric_type(data_type: &DataType) -> bool {
7091
| DataType::Int16
7192
| DataType::Int32
7293
| DataType::Int64
94+
| DataType::Date32
95+
| DataType::Date64
7396
| DataType::Decimal32(_, _)
7497
| DataType::Decimal64(_, _)
7598
| DataType::Decimal128(_, _)
@@ -107,6 +130,10 @@ fn try_cast_numeric_literal(
107130
return None;
108131
}
109132

133+
if is_lossy_temporal_cast(&lit_data_type, target_type) {
134+
return None;
135+
}
136+
110137
let mul = match target_type {
111138
DataType::UInt8
112139
| DataType::UInt16
@@ -115,7 +142,9 @@ fn try_cast_numeric_literal(
115142
| DataType::Int8
116143
| DataType::Int16
117144
| DataType::Int32
118-
| DataType::Int64 => 1_i128,
145+
| DataType::Int64
146+
| DataType::Date32
147+
| DataType::Date64 => 1_i128,
119148
DataType::Timestamp(_, _) => 1_i128,
120149
DataType::Decimal32(_, scale) => 10_i128.pow(*scale as u32),
121150
DataType::Decimal64(_, scale) => 10_i128.pow(*scale as u32),
@@ -129,8 +158,8 @@ fn try_cast_numeric_literal(
129158
DataType::UInt64 => (u64::MIN as i128, u64::MAX as i128),
130159
DataType::Int8 => (i8::MIN as i128, i8::MAX as i128),
131160
DataType::Int16 => (i16::MIN as i128, i16::MAX as i128),
132-
DataType::Int32 => (i32::MIN as i128, i32::MAX as i128),
133-
DataType::Int64 => (i64::MIN as i128, i64::MAX as i128),
161+
DataType::Int32 | DataType::Date32 => (i32::MIN as i128, i32::MAX as i128),
162+
DataType::Int64 | DataType::Date64 => (i64::MIN as i128, i64::MAX as i128),
134163
DataType::Timestamp(_, _) => (i64::MIN as i128, i64::MAX as i128),
135164
DataType::Decimal32(precision, _) => (
136165
// Different precision for decimal32 can store different range of value.
@@ -164,6 +193,8 @@ fn try_cast_numeric_literal(
164193
ScalarValue::UInt16(Some(v)) => (*v as i128).checked_mul(mul),
165194
ScalarValue::UInt32(Some(v)) => (*v as i128).checked_mul(mul),
166195
ScalarValue::UInt64(Some(v)) => (*v as i128).checked_mul(mul),
196+
ScalarValue::Date32(Some(v)) => (*v as i128).checked_mul(mul),
197+
ScalarValue::Date64(Some(v)) => (*v as i128).checked_mul(mul),
167198
ScalarValue::TimestampSecond(Some(v), _) => (*v as i128).checked_mul(mul),
168199
ScalarValue::TimestampMillisecond(Some(v), _) => (*v as i128).checked_mul(mul),
169200
ScalarValue::TimestampMicrosecond(Some(v), _) => (*v as i128).checked_mul(mul),
@@ -241,6 +272,8 @@ fn try_cast_numeric_literal(
241272
DataType::Int16 => ScalarValue::Int16(Some(value as i16)),
242273
DataType::Int32 => ScalarValue::Int32(Some(value as i32)),
243274
DataType::Int64 => ScalarValue::Int64(Some(value as i64)),
275+
DataType::Date32 => ScalarValue::Date32(Some(value as i32)),
276+
DataType::Date64 => ScalarValue::Date64(Some(value as i64)),
244277
DataType::UInt8 => ScalarValue::UInt8(Some(value as u8)),
245278
DataType::UInt16 => ScalarValue::UInt16(Some(value as u16)),
246279
DataType::UInt32 => ScalarValue::UInt32(Some(value as u32)),
@@ -700,6 +733,33 @@ mod tests {
700733
}
701734
}
702735

736+
#[test]
737+
fn test_try_cast_to_type_date_timestamp_lossy_not_allowed() {
738+
expect_cast(
739+
ScalarValue::Date32(Some(1)),
740+
DataType::Timestamp(TimeUnit::Second, None),
741+
ExpectedCast::NoValue,
742+
);
743+
744+
expect_cast(
745+
ScalarValue::Date64(Some(86_400_000)),
746+
DataType::Timestamp(TimeUnit::Millisecond, None),
747+
ExpectedCast::NoValue,
748+
);
749+
750+
expect_cast(
751+
ScalarValue::TimestampSecond(Some(86_400), None),
752+
DataType::Date32,
753+
ExpectedCast::NoValue,
754+
);
755+
756+
expect_cast(
757+
ScalarValue::TimestampMillisecond(Some(86_400_000), None),
758+
DataType::Date64,
759+
ExpectedCast::NoValue,
760+
);
761+
}
762+
703763
#[test]
704764
fn test_try_cast_to_type_unsupported() {
705765
// int64 to list

0 commit comments

Comments
 (0)