Skip to content

Commit 73bce15

Browse files
authored
fix: Fix scalar broadcast for to_timestamp() (#20224)
When to_timestamp() was invoked with a scalar Float64 and an array of strings, the previous coding neglected to broadcast the scalar to the array properly when producing the return value. That is, a query like `SELECT to_timestamp(123.5, t.x) FROM t` would result in: Internal error: UDF to_timestamp returned a different number of rows than expected ## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #20223 ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? Yes, added SLT. <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
1 parent 0401a47 commit 73bce15

11 files changed

Lines changed: 85 additions & 30 deletions

File tree

datafusion/functions/src/datetime/to_timestamp.rs

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -430,27 +430,56 @@ impl ScalarUDFImpl for ToTimestampFunc {
430430
.cast_to(&Timestamp(Second, None), None)?
431431
.cast_to(&Timestamp(Nanosecond, tz), None),
432432
Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None),
433-
Float16 => {
434-
let arr = args[0].to_array(1)?;
435-
let f16_arr = downcast_arg!(&arr, Float16Array);
436-
let result: TimestampNanosecondArray =
437-
f16_arr.unary(|x| (x.to_f64() * 1_000_000_000.0) as i64);
438-
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
439-
}
440-
Float32 => {
441-
let arr = args[0].to_array(1)?;
442-
let f32_arr = downcast_arg!(&arr, Float32Array);
443-
let result: TimestampNanosecondArray =
444-
f32_arr.unary(|x| (x as f64 * 1_000_000_000.0) as i64);
445-
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
446-
}
447-
Float64 => {
448-
let arr = args[0].to_array(1)?;
449-
let f64_arr = downcast_arg!(&arr, Float64Array);
450-
let result: TimestampNanosecondArray =
451-
f64_arr.unary(|x| (x * 1_000_000_000.0) as i64);
452-
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
453-
}
433+
Float16 => match &args[0] {
434+
ColumnarValue::Scalar(ScalarValue::Float16(value)) => {
435+
let timestamp_nanos =
436+
value.map(|v| (v.to_f64() * 1_000_000_000.0) as i64);
437+
Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
438+
timestamp_nanos,
439+
tz,
440+
)))
441+
}
442+
ColumnarValue::Array(arr) => {
443+
let f16_arr = downcast_arg!(arr, Float16Array);
444+
let result: TimestampNanosecondArray =
445+
f16_arr.unary(|x| (x.to_f64() * 1_000_000_000.0) as i64);
446+
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
447+
}
448+
_ => exec_err!("Invalid Float16 value for to_timestamp"),
449+
},
450+
Float32 => match &args[0] {
451+
ColumnarValue::Scalar(ScalarValue::Float32(value)) => {
452+
let timestamp_nanos =
453+
value.map(|v| (v as f64 * 1_000_000_000.0) as i64);
454+
Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
455+
timestamp_nanos,
456+
tz,
457+
)))
458+
}
459+
ColumnarValue::Array(arr) => {
460+
let f32_arr = downcast_arg!(arr, Float32Array);
461+
let result: TimestampNanosecondArray =
462+
f32_arr.unary(|x| (x as f64 * 1_000_000_000.0) as i64);
463+
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
464+
}
465+
_ => exec_err!("Invalid Float32 value for to_timestamp"),
466+
},
467+
Float64 => match &args[0] {
468+
ColumnarValue::Scalar(ScalarValue::Float64(value)) => {
469+
let timestamp_nanos = value.map(|v| (v * 1_000_000_000.0) as i64);
470+
Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
471+
timestamp_nanos,
472+
tz,
473+
)))
474+
}
475+
ColumnarValue::Array(arr) => {
476+
let f64_arr = downcast_arg!(arr, Float64Array);
477+
let result: TimestampNanosecondArray =
478+
f64_arr.unary(|x| (x * 1_000_000_000.0) as i64);
479+
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
480+
}
481+
_ => exec_err!("Invalid Float64 value for to_timestamp"),
482+
},
454483
Decimal32(_, _) | Decimal64(_, _) | Decimal256(_, _) => {
455484
let arg = args[0].cast_to(&Decimal128(38, 9), None)?;
456485
decimal128_to_timestamp_nanos(&arg, tz)

datafusion/sqllogictest/test_files/datetime/arith_date_time.slt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,4 +113,3 @@ SELECT '2001-09-28'::date / '03:00'::time
113113

114114
query error Invalid timestamp arithmetic operation
115115
SELECT '2001-09-28'::date % '03:00'::time
116-

datafusion/sqllogictest/test_files/datetime/arith_timestamp_duration.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,4 @@ query error Invalid timestamp arithmetic operation
144144
SELECT '2001-09-28T01:00:00'::timestamp % arrow_cast(12345, 'Duration(Second)');
145145

146146
query error Invalid timestamp arithmetic operation
147-
SELECT '2001-09-28T01:00:00'::timestamp / arrow_cast(12345, 'Duration(Second)');
147+
SELECT '2001-09-28T01:00:00'::timestamp / arrow_cast(12345, 'Duration(Second)');

datafusion/sqllogictest/test_files/datetime/timestamps.slt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5328,3 +5328,33 @@ drop table ts_data_secs
53285328

53295329
statement ok
53305330
drop table ts_data_micros_kolkata
5331+
5332+
##########
5333+
## Test to_timestamp with scalar float inputs
5334+
##########
5335+
5336+
statement ok
5337+
create table test_to_timestamp_scalar(id int, name varchar) as values
5338+
(1, 'foo'),
5339+
(2, 'bar');
5340+
5341+
query P
5342+
SELECT to_timestamp(123.5, name) FROM test_to_timestamp_scalar ORDER BY id;
5343+
----
5344+
1970-01-01T00:02:03.500
5345+
1970-01-01T00:02:03.500
5346+
5347+
query P
5348+
SELECT to_timestamp(456.789::float, name) FROM test_to_timestamp_scalar ORDER BY id;
5349+
----
5350+
1970-01-01T00:07:36.789001464
5351+
1970-01-01T00:07:36.789001464
5352+
5353+
query P
5354+
SELECT to_timestamp(arrow_cast(100.5, 'Float16'), name) FROM test_to_timestamp_scalar ORDER BY id;
5355+
----
5356+
1970-01-01T00:01:40.500
5357+
1970-01-01T00:01:40.500
5358+
5359+
statement ok
5360+
drop table test_to_timestamp_scalar

datafusion/sqllogictest/test_files/limit.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,4 +871,4 @@ DROP TABLE test_limit_with_partitions;
871871

872872
# Tear down src_table table:
873873
statement ok
874-
DROP TABLE src_table;
874+
DROP TABLE src_table;

datafusion/sqllogictest/test_files/limit_single_row_batches.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ SELECT COUNT(*) FROM (SELECT i FROM filter_limit WHERE i <> 0 LIMIT 1);
1919
1
2020

2121
statement ok
22-
DROP TABLE filter_limit;
22+
DROP TABLE filter_limit;

datafusion/sqllogictest/test_files/spark/collection/size.slt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,4 +129,3 @@ SELECT size(column1) FROM VALUES (map(['a'], [1])), (map(['a','b'], [1,2])), (NU
129129
1
130130
2
131131
-1
132-

datafusion/sqllogictest/test_files/spark/datetime/time_trunc.slt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,3 @@ NULL
7171
# incorrect format
7272
query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: The format argument of `TIME_TRUNC` must be one of: hour, minute, second, millisecond, microsecond
7373
SELECT time_trunc('test', '09:32:05.123456'::time);
74-

datafusion/sqllogictest/test_files/spark/datetime/trunc.slt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,3 @@ SELECT trunc('2009-02-12'::date, NULL::string);
9090
# incorrect format
9191
query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: The format argument of `TRUNC` must be one of: year, yy, yyyy, month, mm, mon, day, week, quarter.
9292
SELECT trunc('2009-02-12'::date, 'test'::string);
93-

datafusion/sqllogictest/test_files/struct.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1666,4 +1666,4 @@ order by id;
16661666
3 2 150
16671667

16681668
statement ok
1669-
drop table t_agg_window;
1669+
drop table t_agg_window;

0 commit comments

Comments
 (0)