feat(agent): add fuzzy whitespace matching to edit_files tool (#22446)

Inspired by openai/codex's `apply_patch` implementation, this changes
the `edit_files` search-and-replace to use a cascading match strategy
when the exact search string isn't found:

1. **Exact substring match** (byte-for-byte) — existing behavior,
unchanged
2. **Line-by-line match ignoring trailing whitespace** — handles
trailing spaces/tabs the LLM omits
3. **Line-by-line match ignoring all leading/trailing whitespace** —
handles tabs-vs-spaces and wrong indentation depth

## Problem

When the chat agent uses `edit_files`, it generates a search string that
must match the file content exactly. LLMs frequently get whitespace
wrong:
- Emitting spaces when the file uses tabs (or vice versa)
- Getting the indentation depth wrong by one or more levels
- Omitting trailing whitespace that exists in the file

When this happens, the edit silently does nothing, and the agent falls
into a retry loop using `cat -A` to diagnose the exact whitespace
characters.

## Solution

Adopted the same cascading fuzzy match strategy that [openai/codex uses
in
`seek_sequence.rs`](https://github.com/openai/codex/blob/main/codex-rs/apply-patch/src/seek_sequence.rs):

- Pass 1: exact match (existing behavior)
- Pass 2: `TrimRight` each line before comparing (trailing whitespace
tolerance)
- Pass 3: `TrimSpace` each line before comparing (full indentation
tolerance)

When a fuzzy match is found, the matched lines in the original file are
replaced with the replacement text. This preserves surrounding content
exactly.

## Changes

- `agent/agentfiles/files.go`: Replaced `icholy/replace` streaming
transformer with in-memory `fuzzyReplace` + helper functions
(`seekLines`, `spliceLines`)
- `agent/agentfiles/files_test.go`: Added 6 new test cases covering
trailing whitespace, tabs-vs-spaces, different indent depths, exact
match preference, no-match behavior, and mixed whitespace multiline
edits
- Removed `icholy/replace` dependency from go.mod/go.sum

---------

Co-authored-by: Kyle Carberry <kylecarbs@users.noreply.github.com>
This commit is contained in:
Kyle Carberry
2026-02-28 17:02:57 -05:00
committed by GitHub
parent 22d4539a7a
commit 5945febf06
4 changed files with 206 additions and 15 deletions
+106 -7
View File
@@ -13,9 +13,7 @@ import (
"strings"
"syscall"
"github.com/icholy/replace"
"github.com/spf13/afero"
"golang.org/x/text/transform"
"golang.org/x/xerrors"
"cdr.dev/slog/v3"
@@ -422,9 +420,21 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk.
return http.StatusBadRequest, xerrors.Errorf("open %s: not a file", path)
}
transforms := make([]transform.Transformer, len(edits))
for i, edit := range edits {
transforms[i] = replace.String(edit.Search, edit.Replace)
data, err := io.ReadAll(f)
if err != nil {
return http.StatusInternalServerError, xerrors.Errorf("read %s: %w", path, err)
}
content := string(data)
for _, edit := range edits {
var ok bool
content, ok = fuzzyReplace(content, edit.Search, edit.Replace)
if !ok {
api.logger.Warn(ctx, "edit search string not found, skipping",
slog.F("path", path),
slog.F("search_preview", truncate(edit.Search, 64)),
)
}
}
// Create an adjacent file to ensure it will be on the same device and can be
@@ -435,8 +445,7 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk.
}
defer tmpfile.Close()
_, err = io.Copy(tmpfile, replace.Chain(f, transforms...))
if err != nil {
if _, err := tmpfile.Write([]byte(content)); err != nil {
if rerr := api.filesystem.Remove(tmpfile.Name()); rerr != nil {
api.logger.Warn(ctx, "unable to clean up temp file", slog.Error(rerr))
}
@@ -450,3 +459,93 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk.
return 0, nil
}
// fuzzyReplace attempts to find `search` inside `content` and replace its first
// occurrence with `replace`. It uses a cascading match strategy inspired by
// openai/codex's apply_patch:
//
// 1. Exact substring match (byte-for-byte).
// 2. Line-by-line match ignoring trailing whitespace on each line.
// 3. Line-by-line match ignoring all leading/trailing whitespace (indentation-tolerant).
//
// When a fuzzy match is found (passes 2 or 3), the replacement is still applied
// at the byte offsets of the original content so that surrounding text (including
// indentation of untouched lines) is preserved.
//
// Returns the (possibly modified) content and a bool indicating whether a match
// was found.
func fuzzyReplace(content, search, replace string) (string, bool) {
// Pass 1 exact substring (replace all occurrences).
if strings.Contains(content, search) {
return strings.ReplaceAll(content, search, replace), true
}
// For line-level fuzzy matching we split both content and search into lines.
contentLines := strings.SplitAfter(content, "\n")
searchLines := strings.SplitAfter(search, "\n")
// A trailing newline in the search produces an empty final element from
// SplitAfter. Drop it so it doesn't interfere with line matching.
if len(searchLines) > 0 && searchLines[len(searchLines)-1] == "" {
searchLines = searchLines[:len(searchLines)-1]
}
// Pass 2 trim trailing whitespace on each line.
if start, end, ok := seekLines(contentLines, searchLines, func(a, b string) bool {
return strings.TrimRight(a, " \t\r\n") == strings.TrimRight(b, " \t\r\n")
}); ok {
return spliceLines(contentLines, start, end, replace), true
}
// Pass 3 trim all leading and trailing whitespace (indentation-tolerant).
if start, end, ok := seekLines(contentLines, searchLines, func(a, b string) bool {
return strings.TrimSpace(a) == strings.TrimSpace(b)
}); ok {
return spliceLines(contentLines, start, end, replace), true
}
return content, false
}
// seekLines scans contentLines looking for a contiguous subsequence that matches
// searchLines according to the provided `eq` function. It returns the start and
// end (exclusive) indices into contentLines of the match.
func seekLines(contentLines, searchLines []string, eq func(a, b string) bool) (start, end int, ok bool) {
if len(searchLines) == 0 {
return 0, 0, true
}
if len(searchLines) > len(contentLines) {
return 0, 0, false
}
outer:
for i := 0; i <= len(contentLines)-len(searchLines); i++ {
for j, sLine := range searchLines {
if !eq(contentLines[i+j], sLine) {
continue outer
}
}
return i, i + len(searchLines), true
}
return 0, 0, false
}
// spliceLines replaces contentLines[start:end] with replacement text, returning
// the full content as a single string.
func spliceLines(contentLines []string, start, end int, replacement string) string {
var b strings.Builder
for _, l := range contentLines[:start] {
_, _ = b.WriteString(l)
}
_, _ = b.WriteString(replacement)
for _, l := range contentLines[end:] {
_, _ = b.WriteString(l)
}
return b.String()
}
func truncate(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "..."
}
+100
View File
@@ -649,6 +649,106 @@ func TestEditFiles(t *testing.T) {
filepath.Join(tmpdir, "file3"): "edited3 3",
},
},
{
name: "TrailingWhitespace",
contents: map[string]string{filepath.Join(tmpdir, "trailing-ws"): "foo \nbar\t\t\nbaz"},
edits: []workspacesdk.FileEdits{
{
Path: filepath.Join(tmpdir, "trailing-ws"),
Edits: []workspacesdk.FileEdit{
{
Search: "foo\nbar\nbaz",
Replace: "replaced",
},
},
},
},
expected: map[string]string{filepath.Join(tmpdir, "trailing-ws"): "replaced"},
},
{
name: "TabsVsSpaces",
contents: map[string]string{filepath.Join(tmpdir, "tabs-vs-spaces"): "\tif true {\n\t\tfoo()\n\t}"},
edits: []workspacesdk.FileEdits{
{
Path: filepath.Join(tmpdir, "tabs-vs-spaces"),
Edits: []workspacesdk.FileEdit{
{
// Search uses spaces but file uses tabs.
Search: " if true {\n foo()\n }",
Replace: "\tif true {\n\t\tbar()\n\t}",
},
},
},
},
expected: map[string]string{filepath.Join(tmpdir, "tabs-vs-spaces"): "\tif true {\n\t\tbar()\n\t}"},
},
{
name: "DifferentIndentDepth",
contents: map[string]string{filepath.Join(tmpdir, "indent-depth"): "\t\t\tdeep()\n\t\t\tnested()"},
edits: []workspacesdk.FileEdits{
{
Path: filepath.Join(tmpdir, "indent-depth"),
Edits: []workspacesdk.FileEdit{
{
// Search has wrong indent depth (1 tab instead of 3).
Search: "\tdeep()\n\tnested()",
Replace: "\t\t\tdeep()\n\t\t\tchanged()",
},
},
},
},
expected: map[string]string{filepath.Join(tmpdir, "indent-depth"): "\t\t\tdeep()\n\t\t\tchanged()"},
},
{
name: "ExactMatchPreferred",
contents: map[string]string{filepath.Join(tmpdir, "exact-preferred"): "hello world"},
edits: []workspacesdk.FileEdits{
{
Path: filepath.Join(tmpdir, "exact-preferred"),
Edits: []workspacesdk.FileEdit{
{
Search: "hello world",
Replace: "goodbye world",
},
},
},
},
expected: map[string]string{filepath.Join(tmpdir, "exact-preferred"): "goodbye world"},
},
{
name: "NoMatchStillSucceeds",
contents: map[string]string{filepath.Join(tmpdir, "no-match"): "original content"},
edits: []workspacesdk.FileEdits{
{
Path: filepath.Join(tmpdir, "no-match"),
Edits: []workspacesdk.FileEdit{
{
Search: "this does not exist in the file",
Replace: "whatever",
},
},
},
},
// File should remain unchanged.
expected: map[string]string{filepath.Join(tmpdir, "no-match"): "original content"},
},
{
name: "MixedWhitespaceMultiline",
contents: map[string]string{filepath.Join(tmpdir, "mixed-ws"): "func main() {\n\tresult := compute()\n\tfmt.Println(result)\n}"},
edits: []workspacesdk.FileEdits{
{
Path: filepath.Join(tmpdir, "mixed-ws"),
Edits: []workspacesdk.FileEdit{
{
// Search uses spaces, file uses tabs.
Search: " result := compute()\n fmt.Println(result)\n",
Replace: "\tresult := compute()\n\tlog.Println(result)\n",
},
},
},
},
expected: map[string]string{filepath.Join(tmpdir, "mixed-ws"): "func main() {\n\tresult := compute()\n\tlog.Println(result)\n}"},
},
{
name: "MultiError",
contents: map[string]string{
-1
View File
@@ -490,7 +490,6 @@ require (
github.com/elazarl/goproxy v1.8.0
github.com/fsnotify/fsnotify v1.9.0
github.com/go-git/go-git/v5 v5.16.5
github.com/icholy/replace v0.6.0
github.com/mark3labs/mcp-go v0.38.0
gonum.org/v1/gonum v0.17.0
)
-7
View File
@@ -620,7 +620,6 @@ github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q=
github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
@@ -730,8 +729,6 @@ github.com/hugelgupf/vmtest v0.0.0-20240216064925-0561770280a1 h1:jWoR2Yqg8tzM0v
github.com/hugelgupf/vmtest v0.0.0-20240216064925-0561770280a1/go.mod h1:B63hDJMhTupLWCHwopAyEo7wRFowx9kOc8m8j1sfOqE=
github.com/iancoleman/orderedmap v0.3.0 h1:5cbR2grmZR/DiVt+VJopEhtVs9YGInGIxAoMJn+Ichc=
github.com/iancoleman/orderedmap v0.3.0/go.mod h1:XuLcCUkdL5owUCQeF2Ue9uuw1EptkJDkXXS7VoV7XGE=
github.com/icholy/replace v0.6.0 h1:EBiD2pGqZIOJAbEaf/5GVRaD/Pmbb4n+K3LrBdXd4dw=
github.com/icholy/replace v0.6.0/go.mod h1:zzi8pxElj2t/5wHHHYmH45D+KxytX/t4w3ClY5nlK+g=
github.com/illarion/gonotify v1.0.1 h1:F1d+0Fgbq/sDWjj/r66ekjDG+IDeecQKUFH4wNwsoio=
github.com/illarion/gonotify v1.0.1/go.mod h1:zt5pmDofZpU1f8aqlK0+95eQhoEAn/d4G4B/FjVW4jE=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -1053,7 +1050,6 @@ github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
@@ -1344,7 +1340,6 @@ golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
@@ -1448,7 +1443,6 @@ golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
@@ -1514,7 +1508,6 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=
gvisor.dev/gvisor v0.0.0-20240509041132-65b30f7869dc h1:DXLLFYv/k/xr0rWcwVEvWme1GR36Oc4kNMspg38JeiE=