Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions datafusion/functions/src/regex/regexpreplace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,15 @@ fn regexp_replace_func(args: &[ColumnarValue]) -> Result<ArrayRef> {
}
}

/// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
/// replace POSIX capture groups (like \1 or \\1) with Rust Regex group (like ${1})
/// used by regexp_replace
/// Handles both single backslash (\1) and double backslash (\\1) which can occur
/// when SQL strings with escaped backslashes are passed through
fn regex_replace_posix_groups(replacement: &str) -> String {
static CAPTURE_GROUPS_RE_LOCK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(\\)(\d*)").unwrap());
LazyLock::new(|| Regex::new(r"\\{1,2}(\d+)").unwrap());
CAPTURE_GROUPS_RE_LOCK
.replace_all(replacement, "$${$2}")
.replace_all(replacement, "$${$1}")
.into_owned()
}

Expand Down Expand Up @@ -659,6 +661,33 @@ mod tests {

use super::*;

#[test]
fn test_regex_replace_posix_groups() {
// Test that \1, \2, etc. are replaced with ${1}, ${2}, etc.
assert_eq!(regex_replace_posix_groups(r"\1"), "${1}");
assert_eq!(regex_replace_posix_groups(r"\12"), "${12}");
assert_eq!(regex_replace_posix_groups(r"X\1Y"), "X${1}Y");
assert_eq!(regex_replace_posix_groups(r"\1\2"), "${1}${2}");

// Test double backslash (from SQL escaped strings like '\\1')
assert_eq!(regex_replace_posix_groups(r"\\1"), "${1}");
assert_eq!(regex_replace_posix_groups(r"X\\1Y"), "X${1}Y");
assert_eq!(regex_replace_posix_groups(r"\\1\\2"), "${1}${2}");

// Test 3 or 4 backslashes before digits to document expected behavior
assert_eq!(regex_replace_posix_groups(r"\\\1"), r"\${1}");
assert_eq!(regex_replace_posix_groups(r"\\\\1"), r"\\${1}");
assert_eq!(regex_replace_posix_groups(r"\\\1\\\\2"), r"\${1}\\${2}");

// Test that a lone backslash is NOT replaced (requires at least one digit)
assert_eq!(regex_replace_posix_groups(r"\"), r"\");
assert_eq!(regex_replace_posix_groups(r"foo\bar"), r"foo\bar");

// Test that backslash followed by non-digit is preserved
assert_eq!(regex_replace_posix_groups(r"\n"), r"\n");
assert_eq!(regex_replace_posix_groups(r"\t"), r"\t");
}

macro_rules! static_pattern_regexp_replace {
($name:ident, $T:ty, $O:ty) => {
#[test]
Expand Down