Skip to content

Commit d14e24b

Browse files
Fix zero-width regex replace (#815)
As a drive-by this also puts all replace-all changes into a single undo group. Tested by: * Running Replace-All with `$` and "x": Every line gets a trailing "x", the final line gets a lone "x" and a new trailing newline is added. * Running Replace with `$` and "x" repeatedly: New "x"s are added to each line incrementally, and it wraps around the buffer. Closes #775 Co-authored-by: Leonard Hecker <leonard@hecker.io>
1 parent 2bb7c10 commit d14e24b

2 files changed

Lines changed: 115 additions & 46 deletions

File tree

crates/edit/src/buffer/mod.rs

Lines changed: 103 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,15 +1152,15 @@ impl TextBuffer {
11521152

11531153
// If the user moved the cursor since the last search, but the needle remained the same,
11541154
// we still need to move the start of the search to the new cursor position.
1155-
let next_search_offset = match self.selection {
1156-
Some(TextBufferSelection { beg, end }) => {
1157-
if self.selection_generation == search.selection_generation {
1158-
search.next_search_offset
1159-
} else {
1155+
let next_search_offset = if self.selection_generation == search.selection_generation {
1156+
search.next_search_offset
1157+
} else {
1158+
match self.selection {
1159+
Some(TextBufferSelection { beg, end }) => {
11601160
self.cursor_move_to_logical_internal(self.cursor, beg.min(end)).offset
11611161
}
1162+
_ => self.cursor.offset,
11621163
}
1163-
_ => self.cursor.offset,
11641164
};
11651165

11661166
self.find_select_next(search, next_search_offset, true);
@@ -1175,15 +1175,23 @@ impl TextBuffer {
11751175
replacement: &[u8],
11761176
) -> icu::Result<()> {
11771177
// Editors traditionally replace the previous search hit, not the next possible one.
1178-
if let (Some(search), Some(..)) = (&self.search, &self.selection) {
1178+
if let Some(search) = &self.search {
11791179
let search = unsafe { &mut *search.get() };
11801180
if search.selection_generation == self.selection_generation {
11811181
let scratch = scratch_arena(None);
1182+
let zero_width = self.selection.is_none();
11821183
let parsed_replacements =
11831184
Self::find_parse_replacement(&scratch, &mut *search, replacement);
11841185
let replacement =
11851186
self.find_fill_replacement(&mut *search, replacement, &parsed_replacements);
1186-
self.write(&replacement, self.cursor, true);
1187+
self.write_raw(&replacement);
1188+
1189+
// After replacing a zero-width match, advance past it so that find_and_select wraps to the
1190+
// next match rather than finding the same anchor (e.g. `$`) again at the same line end.
1191+
if zero_width {
1192+
search.next_search_offset =
1193+
self.find_advance_past_zero_width(self.active_edit_off).unwrap_or(0);
1194+
}
11871195
}
11881196
}
11891197

@@ -1197,26 +1205,48 @@ impl TextBuffer {
11971205
options: SearchOptions,
11981206
replacement: &[u8],
11991207
) -> icu::Result<()> {
1208+
self.edit_begin_grouping();
1209+
12001210
let scratch = scratch_arena(None);
12011211
let mut search = self.find_construct_search(pattern, options)?;
12021212
let mut offset = 0;
12031213
let parsed_replacements = Self::find_parse_replacement(&scratch, &mut search, replacement);
12041214

1205-
loop {
1206-
self.find_select_next(&mut search, offset, false);
1207-
if !self.has_selection() {
1208-
break;
1209-
}
1210-
1215+
while let Some(range) = self.find_select_next(&mut search, offset, false) {
12111216
let replacement =
12121217
self.find_fill_replacement(&mut search, replacement, &parsed_replacements);
1213-
self.write(&replacement, self.cursor, true);
1214-
offset = self.cursor.offset;
1218+
self.write_raw(&replacement);
1219+
1220+
// The `active_edit_off` points to the end of the last edit made by `write_raw()`.
1221+
// This differs from the self.cursor.offset, if `write_raw()` did an `insert_final_newline`.
1222+
offset = self.active_edit_off;
1223+
1224+
// Avoid infinite loops when hitting zero-length matches
1225+
// by advancing past the zero-length match location.
1226+
//
1227+
// This is technically not entirely correct. For instance imagine replacing
1228+
// "^|f" with "x" in "foo". It should technically produce "xxoo", but I
1229+
// found that other editors also do it wrong, so it can't matter too much.
1230+
if range.is_empty() {
1231+
offset = match self.find_advance_past_zero_width(offset) {
1232+
Some(next) => next,
1233+
None => break,
1234+
};
1235+
}
12151236
}
12161237

1238+
self.edit_end_grouping();
12171239
Ok(())
12181240
}
12191241

1242+
/// After replacing a zero-width match, compute the offset to resume
1243+
/// searching from. Returns `None` if we're at the end of the buffer.
1244+
fn find_advance_past_zero_width(&self, offset: usize) -> Option<usize> {
1245+
let cursor = self.cursor_move_to_offset_internal(self.cursor, offset);
1246+
let next = self.cursor_move_delta_internal(cursor, CursorMovement::Grapheme, 1);
1247+
(next.offset > offset).then_some(next.offset)
1248+
}
1249+
12201250
fn find_construct_search(
12211251
&self,
12221252
pattern: &str,
@@ -1277,7 +1307,12 @@ impl TextBuffer {
12771307
})
12781308
}
12791309

1280-
fn find_select_next(&mut self, search: &mut ActiveSearch, offset: usize, wrap: bool) {
1310+
fn find_select_next(
1311+
&mut self,
1312+
search: &mut ActiveSearch,
1313+
offset: usize,
1314+
wrap: bool,
1315+
) -> Option<Range<usize>> {
12811316
if search.buffer_generation != self.buffer.generation() {
12821317
unsafe { search.regex.set_text(&mut search.text, offset) };
12831318
search.buffer_generation = self.buffer.generation();
@@ -1297,7 +1332,7 @@ impl TextBuffer {
12971332
hit = search.regex.next();
12981333
}
12991334

1300-
search.selection_generation = if let Some(range) = hit {
1335+
search.selection_generation = if let Some(range) = &hit {
13011336
// Now the search offset is no more at the start of the buffer.
13021337
search.next_search_offset = range.end;
13031338

@@ -1316,6 +1351,8 @@ impl TextBuffer {
13161351
search.no_matches = true;
13171352
self.set_selection(None)
13181353
};
1354+
1355+
hit
13191356
}
13201357

13211358
fn find_parse_replacement<'a>(
@@ -3095,3 +3132,51 @@ fn detect_bom(bytes: &[u8]) -> Option<&'static str> {
30953132
}
30963133
None
30973134
}
3135+
3136+
#[cfg(test)]
3137+
mod tests {
3138+
use super::{SearchOptions, TextBuffer};
3139+
3140+
fn buffer_contents(buf: &mut TextBuffer) -> String {
3141+
let mut str = String::new();
3142+
buf.save_as_string(&mut str);
3143+
str
3144+
}
3145+
3146+
#[test]
3147+
fn replace_one_zero_width() {
3148+
let mut buf = TextBuffer::new(false).unwrap();
3149+
buf.set_crlf(false);
3150+
buf.set_insert_final_newline(true);
3151+
buf.write_raw(b"a\nb\n");
3152+
buf.cursor_move_to_logical(Default::default());
3153+
3154+
for _ in 0..6 {
3155+
buf.find_and_replace(
3156+
"$",
3157+
SearchOptions { use_regex: true, ..Default::default() },
3158+
b"x",
3159+
)
3160+
.unwrap();
3161+
}
3162+
3163+
assert_eq!(buffer_contents(&mut buf), "axx\nbxx\nx\n");
3164+
}
3165+
3166+
#[test]
3167+
fn replace_all_zero_width() {
3168+
let mut buf = TextBuffer::new(false).unwrap();
3169+
buf.set_crlf(false);
3170+
buf.set_insert_final_newline(true);
3171+
buf.write_raw(b"a\nb\n");
3172+
3173+
buf.find_and_replace_all(
3174+
"$",
3175+
SearchOptions { use_regex: true, ..Default::default() },
3176+
b"x",
3177+
)
3178+
.unwrap();
3179+
3180+
assert_eq!(buffer_contents(&mut buf), "ax\nbx\nx\n");
3181+
}
3182+
}

crates/edit/src/icu.rs

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use std::ffi::{CStr, c_char};
88
use std::mem::MaybeUninit;
99
use std::ops::Range;
1010
use std::ptr::{null, null_mut};
11+
use std::sync::OnceLock;
1112
use std::{fmt, mem};
1213

1314
use stdext::arena::{Arena, scratch_arena};
@@ -995,28 +996,18 @@ const LIBICUI18N_PROC_NAMES: [*const c_char; 12] = [
995996
proc_name!("uregex_end64"),
996997
];
997998

998-
enum LibraryFunctionsState {
999-
Uninitialized,
1000-
Failed,
1001-
Loaded(LibraryFunctions),
1002-
}
1003-
1004-
static mut LIBRARY_FUNCTIONS: LibraryFunctionsState = LibraryFunctionsState::Uninitialized;
999+
static LIBRARY_FUNCTIONS: OnceLock<Option<LibraryFunctions>> = OnceLock::new();
10051000

10061001
pub fn init() -> Result<()> {
10071002
init_if_needed()?;
10081003
Ok(())
10091004
}
10101005

1011-
#[allow(static_mut_refs)]
10121006
fn init_if_needed() -> Result<&'static LibraryFunctions> {
1013-
#[cold]
1014-
fn load() {
1007+
fn load() -> Option<LibraryFunctions> {
10151008
unsafe {
1016-
LIBRARY_FUNCTIONS = LibraryFunctionsState::Failed;
1017-
10181009
let Ok(icu) = sys::load_icu() else {
1019-
return;
1010+
return None;
10201011
};
10211012

10221013
type TransparentFunction = unsafe extern "C" fn() -> *const ();
@@ -1060,35 +1051,28 @@ fn init_if_needed() -> Result<&'static LibraryFunctions> {
10601051
"Failed to load ICU function: {:?}",
10611052
CStr::from_ptr(name)
10621053
);
1063-
return;
1054+
return None;
10641055
};
10651056

10661057
ptr.write(func);
10671058
ptr = ptr.add(1);
10681059
}
10691060
}
10701061

1071-
LIBRARY_FUNCTIONS = LibraryFunctionsState::Loaded(funcs.assume_init());
1072-
}
1073-
}
1074-
1075-
unsafe {
1076-
if matches!(&LIBRARY_FUNCTIONS, LibraryFunctionsState::Uninitialized) {
1077-
load();
1062+
Some(funcs.assume_init())
10781063
}
10791064
}
10801065

1081-
match unsafe { &LIBRARY_FUNCTIONS } {
1082-
LibraryFunctionsState::Loaded(f) => Ok(f),
1083-
_ => Err(ICU_MISSING_ERROR),
1066+
match LIBRARY_FUNCTIONS.get_or_init(load) {
1067+
Some(f) => Ok(f),
1068+
None => Err(ICU_MISSING_ERROR),
10841069
}
10851070
}
10861071

1087-
#[allow(static_mut_refs)]
10881072
fn assume_loaded() -> &'static LibraryFunctions {
1089-
match unsafe { &LIBRARY_FUNCTIONS } {
1090-
LibraryFunctionsState::Loaded(f) => f,
1091-
_ => unreachable!(),
1073+
match LIBRARY_FUNCTIONS.get() {
1074+
Some(Some(f)) => f,
1075+
_ => unsafe { std::hint::unreachable_unchecked() },
10921076
}
10931077
}
10941078

0 commit comments

Comments
 (0)