From 5945febf063f2425854168c1dca75f951ff89f20 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sat, 28 Feb 2026 17:02:57 -0500 Subject: [PATCH] feat(agent): add fuzzy whitespace matching to edit_files tool (#22446) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inspired by openai/codex's `apply_patch` implementation, this changes the `edit_files` search-and-replace to use a cascading match strategy when the exact search string isn't found: 1. **Exact substring match** (byte-for-byte) — existing behavior, unchanged 2. **Line-by-line match ignoring trailing whitespace** — handles trailing spaces/tabs the LLM omits 3. **Line-by-line match ignoring all leading/trailing whitespace** — handles tabs-vs-spaces and wrong indentation depth ## Problem When the chat agent uses `edit_files`, it generates a search string that must match the file content exactly. LLMs frequently get whitespace wrong: - Emitting spaces when the file uses tabs (or vice versa) - Getting the indentation depth wrong by one or more levels - Omitting trailing whitespace that exists in the file When this happens, the edit silently does nothing, and the agent falls into a retry loop using `cat -A` to diagnose the exact whitespace characters. ## Solution Adopted the same cascading fuzzy match strategy that [openai/codex uses in `seek_sequence.rs`](https://github.com/openai/codex/blob/main/codex-rs/apply-patch/src/seek_sequence.rs): - Pass 1: exact match (existing behavior) - Pass 2: `TrimRight` each line before comparing (trailing whitespace tolerance) - Pass 3: `TrimSpace` each line before comparing (full indentation tolerance) When a fuzzy match is found, the matched lines in the original file are replaced with the replacement text. This preserves surrounding content exactly. ## Changes - `agent/agentfiles/files.go`: Replaced `icholy/replace` streaming transformer with in-memory `fuzzyReplace` + helper functions (`seekLines`, `spliceLines`) - `agent/agentfiles/files_test.go`: Added 6 new test cases covering trailing whitespace, tabs-vs-spaces, different indent depths, exact match preference, no-match behavior, and mixed whitespace multiline edits - Removed `icholy/replace` dependency from go.mod/go.sum --------- Co-authored-by: Kyle Carberry --- agent/agentfiles/files.go | 113 +++++++++++++++++++++++++++++++-- agent/agentfiles/files_test.go | 100 +++++++++++++++++++++++++++++ go.mod | 1 - go.sum | 7 -- 4 files changed, 206 insertions(+), 15 deletions(-) diff --git a/agent/agentfiles/files.go b/agent/agentfiles/files.go index e8474330ad..3e1657db24 100644 --- a/agent/agentfiles/files.go +++ b/agent/agentfiles/files.go @@ -13,9 +13,7 @@ import ( "strings" "syscall" - "github.com/icholy/replace" "github.com/spf13/afero" - "golang.org/x/text/transform" "golang.org/x/xerrors" "cdr.dev/slog/v3" @@ -422,9 +420,21 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk. return http.StatusBadRequest, xerrors.Errorf("open %s: not a file", path) } - transforms := make([]transform.Transformer, len(edits)) - for i, edit := range edits { - transforms[i] = replace.String(edit.Search, edit.Replace) + data, err := io.ReadAll(f) + if err != nil { + return http.StatusInternalServerError, xerrors.Errorf("read %s: %w", path, err) + } + content := string(data) + + for _, edit := range edits { + var ok bool + content, ok = fuzzyReplace(content, edit.Search, edit.Replace) + if !ok { + api.logger.Warn(ctx, "edit search string not found, skipping", + slog.F("path", path), + slog.F("search_preview", truncate(edit.Search, 64)), + ) + } } // Create an adjacent file to ensure it will be on the same device and can be @@ -435,8 +445,7 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk. } defer tmpfile.Close() - _, err = io.Copy(tmpfile, replace.Chain(f, transforms...)) - if err != nil { + if _, err := tmpfile.Write([]byte(content)); err != nil { if rerr := api.filesystem.Remove(tmpfile.Name()); rerr != nil { api.logger.Warn(ctx, "unable to clean up temp file", slog.Error(rerr)) } @@ -450,3 +459,93 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk. return 0, nil } + +// fuzzyReplace attempts to find `search` inside `content` and replace its first +// occurrence with `replace`. It uses a cascading match strategy inspired by +// openai/codex's apply_patch: +// +// 1. Exact substring match (byte-for-byte). +// 2. Line-by-line match ignoring trailing whitespace on each line. +// 3. Line-by-line match ignoring all leading/trailing whitespace (indentation-tolerant). +// +// When a fuzzy match is found (passes 2 or 3), the replacement is still applied +// at the byte offsets of the original content so that surrounding text (including +// indentation of untouched lines) is preserved. +// +// Returns the (possibly modified) content and a bool indicating whether a match +// was found. +func fuzzyReplace(content, search, replace string) (string, bool) { + // Pass 1 – exact substring (replace all occurrences). + if strings.Contains(content, search) { + return strings.ReplaceAll(content, search, replace), true + } + + // For line-level fuzzy matching we split both content and search into lines. + contentLines := strings.SplitAfter(content, "\n") + searchLines := strings.SplitAfter(search, "\n") + + // A trailing newline in the search produces an empty final element from + // SplitAfter. Drop it so it doesn't interfere with line matching. + if len(searchLines) > 0 && searchLines[len(searchLines)-1] == "" { + searchLines = searchLines[:len(searchLines)-1] + } + + // Pass 2 – trim trailing whitespace on each line. + if start, end, ok := seekLines(contentLines, searchLines, func(a, b string) bool { + return strings.TrimRight(a, " \t\r\n") == strings.TrimRight(b, " \t\r\n") + }); ok { + return spliceLines(contentLines, start, end, replace), true + } + + // Pass 3 – trim all leading and trailing whitespace (indentation-tolerant). + if start, end, ok := seekLines(contentLines, searchLines, func(a, b string) bool { + return strings.TrimSpace(a) == strings.TrimSpace(b) + }); ok { + return spliceLines(contentLines, start, end, replace), true + } + + return content, false +} + +// seekLines scans contentLines looking for a contiguous subsequence that matches +// searchLines according to the provided `eq` function. It returns the start and +// end (exclusive) indices into contentLines of the match. +func seekLines(contentLines, searchLines []string, eq func(a, b string) bool) (start, end int, ok bool) { + if len(searchLines) == 0 { + return 0, 0, true + } + if len(searchLines) > len(contentLines) { + return 0, 0, false + } +outer: + for i := 0; i <= len(contentLines)-len(searchLines); i++ { + for j, sLine := range searchLines { + if !eq(contentLines[i+j], sLine) { + continue outer + } + } + return i, i + len(searchLines), true + } + return 0, 0, false +} + +// spliceLines replaces contentLines[start:end] with replacement text, returning +// the full content as a single string. +func spliceLines(contentLines []string, start, end int, replacement string) string { + var b strings.Builder + for _, l := range contentLines[:start] { + _, _ = b.WriteString(l) + } + _, _ = b.WriteString(replacement) + for _, l := range contentLines[end:] { + _, _ = b.WriteString(l) + } + return b.String() +} + +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "..." +} diff --git a/agent/agentfiles/files_test.go b/agent/agentfiles/files_test.go index bd179f69ba..85a9bcd32c 100644 --- a/agent/agentfiles/files_test.go +++ b/agent/agentfiles/files_test.go @@ -649,6 +649,106 @@ func TestEditFiles(t *testing.T) { filepath.Join(tmpdir, "file3"): "edited3 3", }, }, + { + name: "TrailingWhitespace", + contents: map[string]string{filepath.Join(tmpdir, "trailing-ws"): "foo \nbar\t\t\nbaz"}, + edits: []workspacesdk.FileEdits{ + { + Path: filepath.Join(tmpdir, "trailing-ws"), + Edits: []workspacesdk.FileEdit{ + { + Search: "foo\nbar\nbaz", + Replace: "replaced", + }, + }, + }, + }, + expected: map[string]string{filepath.Join(tmpdir, "trailing-ws"): "replaced"}, + }, + { + name: "TabsVsSpaces", + contents: map[string]string{filepath.Join(tmpdir, "tabs-vs-spaces"): "\tif true {\n\t\tfoo()\n\t}"}, + edits: []workspacesdk.FileEdits{ + { + Path: filepath.Join(tmpdir, "tabs-vs-spaces"), + Edits: []workspacesdk.FileEdit{ + { + // Search uses spaces but file uses tabs. + Search: " if true {\n foo()\n }", + Replace: "\tif true {\n\t\tbar()\n\t}", + }, + }, + }, + }, + expected: map[string]string{filepath.Join(tmpdir, "tabs-vs-spaces"): "\tif true {\n\t\tbar()\n\t}"}, + }, + { + name: "DifferentIndentDepth", + contents: map[string]string{filepath.Join(tmpdir, "indent-depth"): "\t\t\tdeep()\n\t\t\tnested()"}, + edits: []workspacesdk.FileEdits{ + { + Path: filepath.Join(tmpdir, "indent-depth"), + Edits: []workspacesdk.FileEdit{ + { + // Search has wrong indent depth (1 tab instead of 3). + Search: "\tdeep()\n\tnested()", + Replace: "\t\t\tdeep()\n\t\t\tchanged()", + }, + }, + }, + }, + expected: map[string]string{filepath.Join(tmpdir, "indent-depth"): "\t\t\tdeep()\n\t\t\tchanged()"}, + }, + { + name: "ExactMatchPreferred", + contents: map[string]string{filepath.Join(tmpdir, "exact-preferred"): "hello world"}, + edits: []workspacesdk.FileEdits{ + { + Path: filepath.Join(tmpdir, "exact-preferred"), + Edits: []workspacesdk.FileEdit{ + { + Search: "hello world", + Replace: "goodbye world", + }, + }, + }, + }, + expected: map[string]string{filepath.Join(tmpdir, "exact-preferred"): "goodbye world"}, + }, + { + name: "NoMatchStillSucceeds", + contents: map[string]string{filepath.Join(tmpdir, "no-match"): "original content"}, + edits: []workspacesdk.FileEdits{ + { + Path: filepath.Join(tmpdir, "no-match"), + Edits: []workspacesdk.FileEdit{ + { + Search: "this does not exist in the file", + Replace: "whatever", + }, + }, + }, + }, + // File should remain unchanged. + expected: map[string]string{filepath.Join(tmpdir, "no-match"): "original content"}, + }, + { + name: "MixedWhitespaceMultiline", + contents: map[string]string{filepath.Join(tmpdir, "mixed-ws"): "func main() {\n\tresult := compute()\n\tfmt.Println(result)\n}"}, + edits: []workspacesdk.FileEdits{ + { + Path: filepath.Join(tmpdir, "mixed-ws"), + Edits: []workspacesdk.FileEdit{ + { + // Search uses spaces, file uses tabs. + Search: " result := compute()\n fmt.Println(result)\n", + Replace: "\tresult := compute()\n\tlog.Println(result)\n", + }, + }, + }, + }, + expected: map[string]string{filepath.Join(tmpdir, "mixed-ws"): "func main() {\n\tresult := compute()\n\tlog.Println(result)\n}"}, + }, { name: "MultiError", contents: map[string]string{ diff --git a/go.mod b/go.mod index 02f63b01a6..62214036b0 100644 --- a/go.mod +++ b/go.mod @@ -490,7 +490,6 @@ require ( github.com/elazarl/goproxy v1.8.0 github.com/fsnotify/fsnotify v1.9.0 github.com/go-git/go-git/v5 v5.16.5 - github.com/icholy/replace v0.6.0 github.com/mark3labs/mcp-go v0.38.0 gonum.org/v1/gonum v0.17.0 ) diff --git a/go.sum b/go.sum index 2768342573..006a1eed97 100644 --- a/go.sum +++ b/go.sum @@ -620,7 +620,6 @@ github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -730,8 +729,6 @@ github.com/hugelgupf/vmtest v0.0.0-20240216064925-0561770280a1 h1:jWoR2Yqg8tzM0v github.com/hugelgupf/vmtest v0.0.0-20240216064925-0561770280a1/go.mod h1:B63hDJMhTupLWCHwopAyEo7wRFowx9kOc8m8j1sfOqE= github.com/iancoleman/orderedmap v0.3.0 h1:5cbR2grmZR/DiVt+VJopEhtVs9YGInGIxAoMJn+Ichc= github.com/iancoleman/orderedmap v0.3.0/go.mod h1:XuLcCUkdL5owUCQeF2Ue9uuw1EptkJDkXXS7VoV7XGE= -github.com/icholy/replace v0.6.0 h1:EBiD2pGqZIOJAbEaf/5GVRaD/Pmbb4n+K3LrBdXd4dw= -github.com/icholy/replace v0.6.0/go.mod h1:zzi8pxElj2t/5wHHHYmH45D+KxytX/t4w3ClY5nlK+g= github.com/illarion/gonotify v1.0.1 h1:F1d+0Fgbq/sDWjj/r66ekjDG+IDeecQKUFH4wNwsoio= github.com/illarion/gonotify v1.0.1/go.mod h1:zt5pmDofZpU1f8aqlK0+95eQhoEAn/d4G4B/FjVW4jE= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -1053,7 +1050,6 @@ github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY= github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= -github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -1344,7 +1340,6 @@ golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -1448,7 +1443,6 @@ golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= @@ -1514,7 +1508,6 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= -gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= gvisor.dev/gvisor v0.0.0-20240509041132-65b30f7869dc h1:DXLLFYv/k/xr0rWcwVEvWme1GR36Oc4kNMspg38JeiE=