Skip to content

Commit cf8997e

Browse files
committed
fix: grouping separator for float and decimal in format_string
1 parent c17c87c commit cf8997e

1 file changed

Lines changed: 293 additions & 3 deletions

File tree

datafusion/spark/src/function/string/format_string.rs

Lines changed: 293 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,23 @@ impl ConversionSpecifier {
16901690
}
16911691

16921692
fn format_float(&self, writer: &mut String, value: f64) -> Result<()> {
1693+
// Java/Spark reject the grouping separator flag with scientific notation
1694+
if self.grouping_separator
1695+
&& matches!(
1696+
self.conversion_type,
1697+
ConversionType::SciFloatLower | ConversionType::SciFloatUpper
1698+
)
1699+
{
1700+
return exec_err!(
1701+
"Grouping separator ',' flag is not compatible with scientific notation conversion '{}'",
1702+
if self.conversion_type == ConversionType::SciFloatUpper {
1703+
'E'
1704+
} else {
1705+
'e'
1706+
}
1707+
);
1708+
}
1709+
16931710
let mut prefix = String::new();
16941711
let mut suffix = String::new();
16951712
let mut number = String::new();
@@ -1770,6 +1787,9 @@ impl ConversionSpecifier {
17701787
if strip_trailing_0s {
17711788
number = trim_trailing_0s(&number).to_owned();
17721789
}
1790+
if self.grouping_separator {
1791+
number = insert_thousands_separator(&number);
1792+
}
17731793
}
17741794
if self.alt_form && !number.contains('.') {
17751795
number += ".";
@@ -2000,6 +2020,23 @@ impl ConversionSpecifier {
20002020
}
20012021

20022022
fn format_decimal(&self, writer: &mut String, value: &str, scale: i64) -> Result<()> {
2023+
// Java/Spark reject the grouping separator flag with scientific notation
2024+
if self.grouping_separator
2025+
&& matches!(
2026+
self.conversion_type,
2027+
ConversionType::SciFloatLower | ConversionType::SciFloatUpper
2028+
)
2029+
{
2030+
return exec_err!(
2031+
"Grouping separator ',' flag is not compatible with scientific notation conversion '{}'",
2032+
if self.conversion_type == ConversionType::SciFloatUpper {
2033+
'E'
2034+
} else {
2035+
'e'
2036+
}
2037+
);
2038+
}
2039+
20032040
let mut prefix = String::new();
20042041
let upper = self.conversion_type.is_upper();
20052042

@@ -2033,7 +2070,15 @@ impl ConversionSpecifier {
20332070
let number = match self.conversion_type {
20342071
ConversionType::DecFloatLower => {
20352072
// Format as fixed-point decimal
2036-
self.format_decimal_fixed(&abs_decimal, precision, strip_trailing_0s)?
2073+
let mut n = self.format_decimal_fixed(
2074+
&abs_decimal,
2075+
precision,
2076+
strip_trailing_0s,
2077+
)?;
2078+
if self.grouping_separator {
2079+
n = insert_thousands_separator(&n);
2080+
}
2081+
n
20372082
}
20382083
ConversionType::SciFloatLower => self.format_decimal_scientific(
20392084
&abs_decimal,
@@ -2062,11 +2107,15 @@ impl ConversionSpecifier {
20622107
strip_trailing_0s,
20632108
)?
20642109
} else {
2065-
self.format_decimal_fixed(
2110+
let mut n = self.format_decimal_fixed(
20662111
&abs_decimal,
20672112
precision - 1 - log10_val.floor() as i32,
20682113
strip_trailing_0s,
2069-
)?
2114+
)?;
2115+
if self.grouping_separator {
2116+
n = insert_thousands_separator(&n);
2117+
}
2118+
n
20702119
}
20712120
}
20722121
_ => {
@@ -2332,6 +2381,24 @@ impl FloatBits for f64 {
23322381
}
23332382
}
23342383

2384+
/// Inserts thousands separators (`,`) into the integer part of a numeric string.
2385+
/// For example, `"1234567.89"` becomes `"1,234,567.89"`.
2386+
fn insert_thousands_separator(number: &str) -> String {
2387+
let (int_part, frac_part) = match number.find('.') {
2388+
Some(pos) => (&number[..pos], &number[pos..]),
2389+
None => (number, ""),
2390+
};
2391+
let mut result = String::with_capacity(number.len() + number.len() / 3);
2392+
for (i, c) in int_part.char_indices() {
2393+
if i > 0 && (int_part.len() - i) % 3 == 0 {
2394+
result.push(',');
2395+
}
2396+
result.push(c);
2397+
}
2398+
result.push_str(frac_part);
2399+
result
2400+
}
2401+
23352402
fn trim_trailing_0s(number: &str) -> &str {
23362403
if number.contains('.') {
23372404
for (i, c) in number.chars().rev().enumerate() {
@@ -2355,6 +2422,8 @@ fn trim_trailing_0s_hex(number: &str) -> &str {
23552422
#[cfg(test)]
23562423
mod tests {
23572424
use super::*;
2425+
use crate::function::utils::test::test_scalar_function;
2426+
use arrow::array::StringArray;
23582427
use arrow::datatypes::DataType::Utf8;
23592428

23602429
#[test]
@@ -2385,4 +2454,225 @@ mod tests {
23852454

23862455
Ok(())
23872456
}
2457+
2458+
#[test]
2459+
fn test_insert_thousands_separator() {
2460+
assert_eq!(insert_thousands_separator("1234567.89"), "1,234,567.89");
2461+
assert_eq!(insert_thousands_separator("123.45"), "123.45");
2462+
assert_eq!(insert_thousands_separator("1234"), "1,234");
2463+
assert_eq!(insert_thousands_separator("12"), "12");
2464+
assert_eq!(insert_thousands_separator("0.5"), "0.5");
2465+
assert_eq!(
2466+
insert_thousands_separator("1234567890.1234"),
2467+
"1,234,567,890.1234"
2468+
);
2469+
assert_eq!(insert_thousands_separator("1000"), "1,000");
2470+
assert_eq!(insert_thousands_separator("100"), "100");
2471+
}
2472+
2473+
#[test]
2474+
fn test_grouping_separator_float() -> Result<()> {
2475+
test_scalar_function!(
2476+
FormatStringFunc::new(),
2477+
vec![
2478+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
2479+
ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2480+
],
2481+
Ok(Some("1,234,567.89")),
2482+
&str,
2483+
Utf8,
2484+
StringArray
2485+
);
2486+
Ok(())
2487+
}
2488+
2489+
#[test]
2490+
fn test_grouping_separator_decimal() -> Result<()> {
2491+
test_scalar_function!(
2492+
FormatStringFunc::new(),
2493+
vec![
2494+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
2495+
ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2496+
],
2497+
Ok(Some("1,234,567.89")),
2498+
&str,
2499+
Utf8,
2500+
StringArray
2501+
);
2502+
Ok(())
2503+
}
2504+
2505+
#[test]
2506+
fn test_grouping_separator_scientific_float() -> Result<()> {
2507+
// %,e — Java/Spark reject grouping separator with scientific notation
2508+
test_scalar_function!(
2509+
FormatStringFunc::new(),
2510+
vec![
2511+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
2512+
ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2513+
],
2514+
Err(DataFusionError::Execution(
2515+
"Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2516+
)),
2517+
&str,
2518+
Utf8,
2519+
StringArray
2520+
);
2521+
// %,E — uppercase scientific also rejected
2522+
test_scalar_function!(
2523+
FormatStringFunc::new(),
2524+
vec![
2525+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,E".to_string()))),
2526+
ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2527+
],
2528+
Err(DataFusionError::Execution(
2529+
"Grouping separator ',' flag is not compatible with scientific notation conversion 'E'".to_string(),
2530+
)),
2531+
&str,
2532+
Utf8,
2533+
StringArray
2534+
);
2535+
// %,.0e — precision 0 scientific with grouping also rejected
2536+
test_scalar_function!(
2537+
FormatStringFunc::new(),
2538+
vec![
2539+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
2540+
ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2541+
],
2542+
Err(DataFusionError::Execution(
2543+
"Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2544+
)),
2545+
&str,
2546+
Utf8,
2547+
StringArray
2548+
);
2549+
Ok(())
2550+
}
2551+
2552+
#[test]
2553+
fn test_grouping_separator_compact_float() -> Result<()> {
2554+
// %,g with large number — triggers scientific, no commas
2555+
test_scalar_function!(
2556+
FormatStringFunc::new(),
2557+
vec![
2558+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2559+
ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2560+
],
2561+
Ok(Some("1.23457e+06")),
2562+
&str,
2563+
Utf8,
2564+
StringArray
2565+
);
2566+
// %,g with small number — triggers fixed-point, commas in integer part
2567+
test_scalar_function!(
2568+
FormatStringFunc::new(),
2569+
vec![
2570+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2571+
ColumnarValue::Scalar(ScalarValue::Float64(Some(12345.6))),
2572+
],
2573+
Ok(Some("12,345.6")),
2574+
&str,
2575+
Utf8,
2576+
StringArray
2577+
);
2578+
// %,.0g — precision 0 compact with grouping (large number, scientific)
2579+
test_scalar_function!(
2580+
FormatStringFunc::new(),
2581+
vec![
2582+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
2583+
ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2584+
],
2585+
Ok(Some("1e+06")),
2586+
&str,
2587+
Utf8,
2588+
StringArray
2589+
);
2590+
// %,G — uppercase compact
2591+
test_scalar_function!(
2592+
FormatStringFunc::new(),
2593+
vec![
2594+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,G".to_string()))),
2595+
ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
2596+
],
2597+
Ok(Some("1.23457E+06")),
2598+
&str,
2599+
Utf8,
2600+
StringArray
2601+
);
2602+
Ok(())
2603+
}
2604+
2605+
#[test]
2606+
fn test_grouping_separator_scientific_decimal() -> Result<()> {
2607+
// %,e on decimal — Java/Spark reject grouping separator with scientific notation
2608+
test_scalar_function!(
2609+
FormatStringFunc::new(),
2610+
vec![
2611+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
2612+
ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2613+
],
2614+
Err(DataFusionError::Execution(
2615+
"Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2616+
)),
2617+
&str,
2618+
Utf8,
2619+
StringArray
2620+
);
2621+
// %,.0e on decimal — also rejected
2622+
test_scalar_function!(
2623+
FormatStringFunc::new(),
2624+
vec![
2625+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
2626+
ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2627+
],
2628+
Err(DataFusionError::Execution(
2629+
"Grouping separator ',' flag is not compatible with scientific notation conversion 'e'".to_string(),
2630+
)),
2631+
&str,
2632+
Utf8,
2633+
StringArray
2634+
);
2635+
Ok(())
2636+
}
2637+
2638+
#[test]
2639+
fn test_grouping_separator_compact_decimal() -> Result<()> {
2640+
// %,g on decimal — large number triggers scientific, no commas
2641+
test_scalar_function!(
2642+
FormatStringFunc::new(),
2643+
vec![
2644+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2645+
ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2646+
],
2647+
Ok(Some("1.23457e+06")),
2648+
&str,
2649+
Utf8,
2650+
StringArray
2651+
);
2652+
// %,g on decimal — small number triggers fixed-point, commas expected
2653+
test_scalar_function!(
2654+
FormatStringFunc::new(),
2655+
vec![
2656+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
2657+
ColumnarValue::Scalar(ScalarValue::Decimal128(Some(1234560), 10, 2)),
2658+
],
2659+
Ok(Some("12,345.6")),
2660+
&str,
2661+
Utf8,
2662+
StringArray
2663+
);
2664+
// %,.0g on decimal — precision 0 compact with grouping (scientific)
2665+
test_scalar_function!(
2666+
FormatStringFunc::new(),
2667+
vec![
2668+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
2669+
ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 10, 2)),
2670+
],
2671+
Ok(Some("1e+06")),
2672+
&str,
2673+
Utf8,
2674+
StringArray
2675+
);
2676+
Ok(())
2677+
}
23882678
}

0 commit comments

Comments
 (0)