diff --git a/datafusion/spark/src/function/datetime/dayname.rs b/datafusion/spark/src/function/datetime/dayname.rs new file mode 100644 index 0000000000000..10aa551d399d1 --- /dev/null +++ b/datafusion/spark/src/function/datetime/dayname.rs @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{Array, ArrayRef, AsArray, StringArray}; +use arrow::compute::{CastOptions, DatePart, cast_with_options, date_part}; +use arrow::datatypes::{DataType, Field, FieldRef, Int32Type}; +use arrow::util::display::FormatOptions; +use datafusion::logical_expr::{ + Coercion, ColumnarValue, Signature, TypeSignature, TypeSignatureClass, Volatility, +}; +use datafusion_common::types::{logical_date, logical_string}; +use datafusion_common::{Result, internal_err, plan_err}; +use datafusion_expr::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl}; +use std::sync::Arc; + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkDayName { + signature: Signature, +} + +impl Default for SparkDayName { + fn default() -> Self { + Self::new() + } +} + +impl SparkDayName { + pub fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Timestamp, + )]), + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Native(logical_date()), + )]), + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Native(logical_string()), + )]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SparkDayName { + fn name(&self) -> &str { + "dayname" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + Ok(Arc::new(Field::new( + self.name(), + DataType::Utf8, + args.arg_fields[0].is_nullable(), + ))) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + if args.args.len() != 1 { + return plan_err!("dayname expects exactly 1 argument"); + } + let cast_options = CastOptions { + safe: !args.config_options.execution.enable_ansi_mode, + format_options: FormatOptions::default(), + }; + let result = match &args.args[0] { + ColumnarValue::Array(array) => spark_day_name(array, &cast_options)?, + ColumnarValue::Scalar(scalar) => { + let array = scalar.to_array()?; + spark_day_name(&array, &cast_options)? + } + }; + Ok(ColumnarValue::Array(result)) + } +} + +fn spark_day_name(array: &ArrayRef, cast_options: &CastOptions) -> Result { + match array.data_type() { + DataType::Date32 | DataType::Timestamp(_, _) => spark_day_name_inner(array), + DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8 => { + let date_array = cast_with_options(array, &DataType::Date32, cast_options)?; + spark_day_name_inner(&date_array) + } + other => { + internal_err!("Unsupported arg {other:?} for Spark function `dayname`") + } + } +} + +fn spark_day_name_inner(array: &ArrayRef) -> Result { + let result: StringArray = date_part(array, DatePart::DayOfWeekMonday0)? + .as_primitive::() + .iter() + .map(|x| x.and_then(get_display_name)) + .collect(); + Ok(Arc::new(result)) +} + +/// This function supports only the English locale, matching the behavior of Spark's +/// `dayname` function which return English day names regardless of the system or session locale. +fn get_display_name(day: i32) -> Option { + match day { + 0 => Some(String::from("Mon")), + 1 => Some(String::from("Tue")), + 2 => Some(String::from("Wed")), + 3 => Some(String::from("Thu")), + 4 => Some(String::from("Fri")), + 5 => Some(String::from("Sat")), + 6 => Some(String::from("Sun")), + _ => None, + } +} diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index 3133ed7337f25..53127138600ff 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -21,6 +21,7 @@ pub mod date_diff; pub mod date_part; pub mod date_sub; pub mod date_trunc; +pub mod dayname; pub mod extract; pub mod from_utc_timestamp; pub mod last_day; @@ -72,6 +73,7 @@ make_udf_function!( unix_seconds, unix::SparkUnixTimestamp::seconds ); +make_udf_function!(dayname::SparkDayName, dayname); pub mod expr_fn { use datafusion_functions::export_functions; @@ -179,6 +181,11 @@ pub mod expr_fn { "Returns the number of seconds since epoch (1970-01-01 00:00:00 UTC) for the given timestamp `ts`.", ts )); + export_functions!(( + dayname, + "Returns the three-letter abbreviated day name from the given date.", + arg1 + )); } pub fn functions() -> Vec> { @@ -204,5 +211,6 @@ pub fn functions() -> Vec> { unix_micros(), unix_millis(), unix_seconds(), + dayname(), ] } diff --git a/datafusion/sqllogictest/test_files/spark/datetime/dayname.slt b/datafusion/sqllogictest/test_files/spark/datetime/dayname.slt new file mode 100644 index 0000000000000..18ebb786747a2 --- /dev/null +++ b/datafusion/sqllogictest/test_files/spark/datetime/dayname.slt @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +statement ok +set datafusion.execution.enable_ansi_mode = false; + +query T +SELECT dayname('2008-02-20'::DATE); +---- +Wed + +query T +SELECT dayname(NULL::DATE); +---- +NULL + +query T +SELECT dayname('2026-03-09'::DATE); +---- +Mon + +query T +SELECT dayname('2026-03-08'::DATE); +---- +Sun + +query T +SELECT dayname('1948-08-10'::DATE); +---- +Tue + +query T +SELECT dayname('1987-11-13'::DATE); +---- +Fri + +query T +SELECT dayname('2000-07-27'::DATE); +---- +Thu + +query T +SELECT dayname('2010-04-24'::DATE); +---- +Sat + +query T +SELECT dayname('2010-04-24'::STRING); +---- +Sat + +query T +SELECT dayname(NULL::STRING); +---- +NULL + +query T +SELECT dayname(''::STRING); +---- +NULL + +query T +SELECT dayname('2010-04-24'::TIMESTAMP); +---- +Sat + +query T +SELECT dayname(NULL::TIMESTAMP); +---- +NULL