@@ -189,13 +189,19 @@ fn regexp_replace_func(args: &[ColumnarValue]) -> Result<ArrayRef> {
189189 }
190190}
191191
192- /// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
192+ /// replace POSIX capture groups (like \1 or \\1 ) with Rust Regex group (like ${1})
193193/// used by regexp_replace
194+ /// Handles both single backslash (\1) and double backslash (\\1) which can occur
195+ /// when SQL strings with escaped backslashes are passed through
196+ ///
197+ /// Note: \0 is converted to ${0}, which in Rust's regex replacement syntax
198+ /// substitutes the entire match. This is consistent with POSIX behavior where
199+ /// \0 (or &) refers to the entire matched string.
194200fn regex_replace_posix_groups ( replacement : & str ) -> String {
195201 static CAPTURE_GROUPS_RE_LOCK : LazyLock < Regex > =
196- LazyLock :: new ( || Regex :: new ( r"(\\) (\d* )" ) . unwrap ( ) ) ;
202+ LazyLock :: new ( || Regex :: new ( r"\\{1,2} (\d+ )" ) . unwrap ( ) ) ;
197203 CAPTURE_GROUPS_RE_LOCK
198- . replace_all ( replacement, "$${$2 }" )
204+ . replace_all ( replacement, "$${$1 }" )
199205 . into_owned ( )
200206}
201207
@@ -659,6 +665,42 @@ mod tests {
659665
660666 use super :: * ;
661667
668+ #[ test]
669+ fn test_regex_replace_posix_groups ( ) {
670+ // Test that \1, \2, etc. are replaced with ${1}, ${2}, etc.
671+ assert_eq ! ( regex_replace_posix_groups( r"\1" ) , "${1}" ) ;
672+ assert_eq ! ( regex_replace_posix_groups( r"\12" ) , "${12}" ) ;
673+ assert_eq ! ( regex_replace_posix_groups( r"X\1Y" ) , "X${1}Y" ) ;
674+ assert_eq ! ( regex_replace_posix_groups( r"\1\2" ) , "${1}${2}" ) ;
675+
676+ // Test double backslash (from SQL escaped strings like '\\1')
677+ assert_eq ! ( regex_replace_posix_groups( r"\\1" ) , "${1}" ) ;
678+ assert_eq ! ( regex_replace_posix_groups( r"X\\1Y" ) , "X${1}Y" ) ;
679+ assert_eq ! ( regex_replace_posix_groups( r"\\1\\2" ) , "${1}${2}" ) ;
680+
681+ // Test 3 or 4 backslashes before digits to document expected behavior
682+ assert_eq ! ( regex_replace_posix_groups( r"\\\1" ) , r"\${1}" ) ;
683+ assert_eq ! ( regex_replace_posix_groups( r"\\\\1" ) , r"\\${1}" ) ;
684+ assert_eq ! ( regex_replace_posix_groups( r"\\\1\\\\2" ) , r"\${1}\\${2}" ) ;
685+
686+ // Test that a lone backslash is NOT replaced (requires at least one digit)
687+ assert_eq ! ( regex_replace_posix_groups( r"\" ) , r"\" ) ;
688+ assert_eq ! ( regex_replace_posix_groups( r"foo\bar" ) , r"foo\bar" ) ;
689+
690+ // Test that backslash followed by non-digit is preserved
691+ assert_eq ! ( regex_replace_posix_groups( r"\n" ) , r"\n" ) ;
692+ assert_eq ! ( regex_replace_posix_groups( r"\t" ) , r"\t" ) ;
693+
694+ // Test \0 behavior: \0 is converted to ${0}, which in Rust's regex
695+ // replacement syntax substitutes the entire match. This is consistent
696+ // with POSIX behavior where \0 (or &) refers to the entire matched string.
697+ assert_eq ! ( regex_replace_posix_groups( r"\0" ) , "${0}" ) ;
698+ assert_eq ! (
699+ regex_replace_posix_groups( r"prefix\0suffix" ) ,
700+ "prefix${0}suffix"
701+ ) ;
702+ }
703+
662704 macro_rules! static_pattern_regexp_replace {
663705 ( $name: ident, $T: ty, $O: ty) => {
664706 #[ test]
0 commit comments