fix(tools): stop _strategy_exact emitting overlapping matches (#56211)
_strategy_exact advanced its scan cursor by pos+1 instead of pos+len(pattern), so self-overlapping patterns (e.g. "aa" in "aaaa") matched at overlapping offsets. _apply_replacements works in reverse order, so the second replacement operated on already-modified content using stale offsets — corrupting the file and reporting the wrong count under replace_all=True. Advancing by len(pattern) matches str.replace() semantics.
This commit is contained in:
parent
ea533e7f41
commit
d57a4c197c
2 changed files with 39 additions and 1 deletions
|
|
@ -207,6 +207,39 @@ class TestReplaceAll:
|
|||
assert count == 2
|
||||
assert new == "ccc bbb ccc"
|
||||
|
||||
def test_self_overlapping_pattern_non_overlapping_matches(self):
|
||||
"""Self-overlapping patterns must produce non-overlapping spans.
|
||||
|
||||
Regression: _strategy_exact advanced the scan cursor by 1 instead of
|
||||
len(pattern), so "aa" in "aaaa" matched at offsets 0, 1, 2 (overlapping)
|
||||
instead of 0, 2. _apply_replacements works in reverse order, so the
|
||||
stale offsets corrupted the file. Fix aligns with str.replace().
|
||||
"""
|
||||
# replace_all: 2 non-overlapping matches, not 3 overlapping ones.
|
||||
new, count, _, err = fuzzy_find_and_replace("aaaa", "aa", "b", replace_all=True)
|
||||
assert err is None
|
||||
assert count == 2
|
||||
assert new == "bb"
|
||||
|
||||
# single-char pattern still counts every occurrence
|
||||
new, count, _, err = fuzzy_find_and_replace("aaa", "a", "b", replace_all=True)
|
||||
assert err is None
|
||||
assert count == 3
|
||||
assert new == "bbb"
|
||||
|
||||
# embedded in surrounding content — non-matched parts preserved
|
||||
new, count, _, err = fuzzy_find_and_replace(
|
||||
"prefix aaaa suffix", "aa", "b", replace_all=True
|
||||
)
|
||||
assert err is None
|
||||
assert count == 2
|
||||
assert new == "prefix bb suffix"
|
||||
|
||||
# without the flag, the non-overlapping count is reported (2, not 3)
|
||||
new, count, _, err = fuzzy_find_and_replace("aaaa", "aa", "b", replace_all=False)
|
||||
assert count == 0
|
||||
assert "2 matches" in err
|
||||
|
||||
|
||||
class TestUnicodeNormalized:
|
||||
"""Tests for the unicode_normalized strategy (Bug 5)."""
|
||||
|
|
|
|||
|
|
@ -349,7 +349,12 @@ def _strategy_exact(content: str, pattern: str) -> List[Tuple[int, int]]:
|
|||
if pos == -1:
|
||||
break
|
||||
matches.append((pos, pos + len(pattern)))
|
||||
start = pos + 1
|
||||
# Advance past the whole match, not just one char, so self-overlapping
|
||||
# patterns (e.g. "aa" in "aaaa") produce non-overlapping spans matching
|
||||
# str.replace() semantics. Advancing by 1 yielded overlapping matches
|
||||
# that corrupt the file under replace_all=True (reverse-order apply on
|
||||
# stale offsets).
|
||||
start = pos + len(pattern)
|
||||
return matches
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue