mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat(agent): add fuzzy whitespace matching to edit_files tool (#22446)
Inspired by openai/codex's `apply_patch` implementation, this changes the `edit_files` search-and-replace to use a cascading match strategy when the exact search string isn't found: 1. **Exact substring match** (byte-for-byte) — existing behavior, unchanged 2. **Line-by-line match ignoring trailing whitespace** — handles trailing spaces/tabs the LLM omits 3. **Line-by-line match ignoring all leading/trailing whitespace** — handles tabs-vs-spaces and wrong indentation depth ## Problem When the chat agent uses `edit_files`, it generates a search string that must match the file content exactly. LLMs frequently get whitespace wrong: - Emitting spaces when the file uses tabs (or vice versa) - Getting the indentation depth wrong by one or more levels - Omitting trailing whitespace that exists in the file When this happens, the edit silently does nothing, and the agent falls into a retry loop using `cat -A` to diagnose the exact whitespace characters. ## Solution Adopted the same cascading fuzzy match strategy that [openai/codex uses in `seek_sequence.rs`](https://github.com/openai/codex/blob/main/codex-rs/apply-patch/src/seek_sequence.rs): - Pass 1: exact match (existing behavior) - Pass 2: `TrimRight` each line before comparing (trailing whitespace tolerance) - Pass 3: `TrimSpace` each line before comparing (full indentation tolerance) When a fuzzy match is found, the matched lines in the original file are replaced with the replacement text. This preserves surrounding content exactly. ## Changes - `agent/agentfiles/files.go`: Replaced `icholy/replace` streaming transformer with in-memory `fuzzyReplace` + helper functions (`seekLines`, `spliceLines`) - `agent/agentfiles/files_test.go`: Added 6 new test cases covering trailing whitespace, tabs-vs-spaces, different indent depths, exact match preference, no-match behavior, and mixed whitespace multiline edits - Removed `icholy/replace` dependency from go.mod/go.sum --------- Co-authored-by: Kyle Carberry <kylecarbs@users.noreply.github.com>
This commit is contained in:
+106
-7
@@ -13,9 +13,7 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/icholy/replace"
|
||||
"github.com/spf13/afero"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"cdr.dev/slog/v3"
|
||||
@@ -422,9 +420,21 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk.
|
||||
return http.StatusBadRequest, xerrors.Errorf("open %s: not a file", path)
|
||||
}
|
||||
|
||||
transforms := make([]transform.Transformer, len(edits))
|
||||
for i, edit := range edits {
|
||||
transforms[i] = replace.String(edit.Search, edit.Replace)
|
||||
data, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
return http.StatusInternalServerError, xerrors.Errorf("read %s: %w", path, err)
|
||||
}
|
||||
content := string(data)
|
||||
|
||||
for _, edit := range edits {
|
||||
var ok bool
|
||||
content, ok = fuzzyReplace(content, edit.Search, edit.Replace)
|
||||
if !ok {
|
||||
api.logger.Warn(ctx, "edit search string not found, skipping",
|
||||
slog.F("path", path),
|
||||
slog.F("search_preview", truncate(edit.Search, 64)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Create an adjacent file to ensure it will be on the same device and can be
|
||||
@@ -435,8 +445,7 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk.
|
||||
}
|
||||
defer tmpfile.Close()
|
||||
|
||||
_, err = io.Copy(tmpfile, replace.Chain(f, transforms...))
|
||||
if err != nil {
|
||||
if _, err := tmpfile.Write([]byte(content)); err != nil {
|
||||
if rerr := api.filesystem.Remove(tmpfile.Name()); rerr != nil {
|
||||
api.logger.Warn(ctx, "unable to clean up temp file", slog.Error(rerr))
|
||||
}
|
||||
@@ -450,3 +459,93 @@ func (api *API) editFile(ctx context.Context, path string, edits []workspacesdk.
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// fuzzyReplace attempts to find `search` inside `content` and replace its first
|
||||
// occurrence with `replace`. It uses a cascading match strategy inspired by
|
||||
// openai/codex's apply_patch:
|
||||
//
|
||||
// 1. Exact substring match (byte-for-byte).
|
||||
// 2. Line-by-line match ignoring trailing whitespace on each line.
|
||||
// 3. Line-by-line match ignoring all leading/trailing whitespace (indentation-tolerant).
|
||||
//
|
||||
// When a fuzzy match is found (passes 2 or 3), the replacement is still applied
|
||||
// at the byte offsets of the original content so that surrounding text (including
|
||||
// indentation of untouched lines) is preserved.
|
||||
//
|
||||
// Returns the (possibly modified) content and a bool indicating whether a match
|
||||
// was found.
|
||||
func fuzzyReplace(content, search, replace string) (string, bool) {
|
||||
// Pass 1 – exact substring (replace all occurrences).
|
||||
if strings.Contains(content, search) {
|
||||
return strings.ReplaceAll(content, search, replace), true
|
||||
}
|
||||
|
||||
// For line-level fuzzy matching we split both content and search into lines.
|
||||
contentLines := strings.SplitAfter(content, "\n")
|
||||
searchLines := strings.SplitAfter(search, "\n")
|
||||
|
||||
// A trailing newline in the search produces an empty final element from
|
||||
// SplitAfter. Drop it so it doesn't interfere with line matching.
|
||||
if len(searchLines) > 0 && searchLines[len(searchLines)-1] == "" {
|
||||
searchLines = searchLines[:len(searchLines)-1]
|
||||
}
|
||||
|
||||
// Pass 2 – trim trailing whitespace on each line.
|
||||
if start, end, ok := seekLines(contentLines, searchLines, func(a, b string) bool {
|
||||
return strings.TrimRight(a, " \t\r\n") == strings.TrimRight(b, " \t\r\n")
|
||||
}); ok {
|
||||
return spliceLines(contentLines, start, end, replace), true
|
||||
}
|
||||
|
||||
// Pass 3 – trim all leading and trailing whitespace (indentation-tolerant).
|
||||
if start, end, ok := seekLines(contentLines, searchLines, func(a, b string) bool {
|
||||
return strings.TrimSpace(a) == strings.TrimSpace(b)
|
||||
}); ok {
|
||||
return spliceLines(contentLines, start, end, replace), true
|
||||
}
|
||||
|
||||
return content, false
|
||||
}
|
||||
|
||||
// seekLines scans contentLines looking for a contiguous subsequence that matches
|
||||
// searchLines according to the provided `eq` function. It returns the start and
|
||||
// end (exclusive) indices into contentLines of the match.
|
||||
func seekLines(contentLines, searchLines []string, eq func(a, b string) bool) (start, end int, ok bool) {
|
||||
if len(searchLines) == 0 {
|
||||
return 0, 0, true
|
||||
}
|
||||
if len(searchLines) > len(contentLines) {
|
||||
return 0, 0, false
|
||||
}
|
||||
outer:
|
||||
for i := 0; i <= len(contentLines)-len(searchLines); i++ {
|
||||
for j, sLine := range searchLines {
|
||||
if !eq(contentLines[i+j], sLine) {
|
||||
continue outer
|
||||
}
|
||||
}
|
||||
return i, i + len(searchLines), true
|
||||
}
|
||||
return 0, 0, false
|
||||
}
|
||||
|
||||
// spliceLines replaces contentLines[start:end] with replacement text, returning
|
||||
// the full content as a single string.
|
||||
func spliceLines(contentLines []string, start, end int, replacement string) string {
|
||||
var b strings.Builder
|
||||
for _, l := range contentLines[:start] {
|
||||
_, _ = b.WriteString(l)
|
||||
}
|
||||
_, _ = b.WriteString(replacement)
|
||||
for _, l := range contentLines[end:] {
|
||||
_, _ = b.WriteString(l)
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func truncate(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[:n] + "..."
|
||||
}
|
||||
|
||||
@@ -649,6 +649,106 @@ func TestEditFiles(t *testing.T) {
|
||||
filepath.Join(tmpdir, "file3"): "edited3 3",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "TrailingWhitespace",
|
||||
contents: map[string]string{filepath.Join(tmpdir, "trailing-ws"): "foo \nbar\t\t\nbaz"},
|
||||
edits: []workspacesdk.FileEdits{
|
||||
{
|
||||
Path: filepath.Join(tmpdir, "trailing-ws"),
|
||||
Edits: []workspacesdk.FileEdit{
|
||||
{
|
||||
Search: "foo\nbar\nbaz",
|
||||
Replace: "replaced",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: map[string]string{filepath.Join(tmpdir, "trailing-ws"): "replaced"},
|
||||
},
|
||||
{
|
||||
name: "TabsVsSpaces",
|
||||
contents: map[string]string{filepath.Join(tmpdir, "tabs-vs-spaces"): "\tif true {\n\t\tfoo()\n\t}"},
|
||||
edits: []workspacesdk.FileEdits{
|
||||
{
|
||||
Path: filepath.Join(tmpdir, "tabs-vs-spaces"),
|
||||
Edits: []workspacesdk.FileEdit{
|
||||
{
|
||||
// Search uses spaces but file uses tabs.
|
||||
Search: " if true {\n foo()\n }",
|
||||
Replace: "\tif true {\n\t\tbar()\n\t}",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: map[string]string{filepath.Join(tmpdir, "tabs-vs-spaces"): "\tif true {\n\t\tbar()\n\t}"},
|
||||
},
|
||||
{
|
||||
name: "DifferentIndentDepth",
|
||||
contents: map[string]string{filepath.Join(tmpdir, "indent-depth"): "\t\t\tdeep()\n\t\t\tnested()"},
|
||||
edits: []workspacesdk.FileEdits{
|
||||
{
|
||||
Path: filepath.Join(tmpdir, "indent-depth"),
|
||||
Edits: []workspacesdk.FileEdit{
|
||||
{
|
||||
// Search has wrong indent depth (1 tab instead of 3).
|
||||
Search: "\tdeep()\n\tnested()",
|
||||
Replace: "\t\t\tdeep()\n\t\t\tchanged()",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: map[string]string{filepath.Join(tmpdir, "indent-depth"): "\t\t\tdeep()\n\t\t\tchanged()"},
|
||||
},
|
||||
{
|
||||
name: "ExactMatchPreferred",
|
||||
contents: map[string]string{filepath.Join(tmpdir, "exact-preferred"): "hello world"},
|
||||
edits: []workspacesdk.FileEdits{
|
||||
{
|
||||
Path: filepath.Join(tmpdir, "exact-preferred"),
|
||||
Edits: []workspacesdk.FileEdit{
|
||||
{
|
||||
Search: "hello world",
|
||||
Replace: "goodbye world",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: map[string]string{filepath.Join(tmpdir, "exact-preferred"): "goodbye world"},
|
||||
},
|
||||
{
|
||||
name: "NoMatchStillSucceeds",
|
||||
contents: map[string]string{filepath.Join(tmpdir, "no-match"): "original content"},
|
||||
edits: []workspacesdk.FileEdits{
|
||||
{
|
||||
Path: filepath.Join(tmpdir, "no-match"),
|
||||
Edits: []workspacesdk.FileEdit{
|
||||
{
|
||||
Search: "this does not exist in the file",
|
||||
Replace: "whatever",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
// File should remain unchanged.
|
||||
expected: map[string]string{filepath.Join(tmpdir, "no-match"): "original content"},
|
||||
},
|
||||
{
|
||||
name: "MixedWhitespaceMultiline",
|
||||
contents: map[string]string{filepath.Join(tmpdir, "mixed-ws"): "func main() {\n\tresult := compute()\n\tfmt.Println(result)\n}"},
|
||||
edits: []workspacesdk.FileEdits{
|
||||
{
|
||||
Path: filepath.Join(tmpdir, "mixed-ws"),
|
||||
Edits: []workspacesdk.FileEdit{
|
||||
{
|
||||
// Search uses spaces, file uses tabs.
|
||||
Search: " result := compute()\n fmt.Println(result)\n",
|
||||
Replace: "\tresult := compute()\n\tlog.Println(result)\n",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: map[string]string{filepath.Join(tmpdir, "mixed-ws"): "func main() {\n\tresult := compute()\n\tlog.Println(result)\n}"},
|
||||
},
|
||||
{
|
||||
name: "MultiError",
|
||||
contents: map[string]string{
|
||||
|
||||
@@ -490,7 +490,6 @@ require (
|
||||
github.com/elazarl/goproxy v1.8.0
|
||||
github.com/fsnotify/fsnotify v1.9.0
|
||||
github.com/go-git/go-git/v5 v5.16.5
|
||||
github.com/icholy/replace v0.6.0
|
||||
github.com/mark3labs/mcp-go v0.38.0
|
||||
gonum.org/v1/gonum v0.17.0
|
||||
)
|
||||
|
||||
@@ -620,7 +620,6 @@ github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
|
||||
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
||||
github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q=
|
||||
github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
@@ -730,8 +729,6 @@ github.com/hugelgupf/vmtest v0.0.0-20240216064925-0561770280a1 h1:jWoR2Yqg8tzM0v
|
||||
github.com/hugelgupf/vmtest v0.0.0-20240216064925-0561770280a1/go.mod h1:B63hDJMhTupLWCHwopAyEo7wRFowx9kOc8m8j1sfOqE=
|
||||
github.com/iancoleman/orderedmap v0.3.0 h1:5cbR2grmZR/DiVt+VJopEhtVs9YGInGIxAoMJn+Ichc=
|
||||
github.com/iancoleman/orderedmap v0.3.0/go.mod h1:XuLcCUkdL5owUCQeF2Ue9uuw1EptkJDkXXS7VoV7XGE=
|
||||
github.com/icholy/replace v0.6.0 h1:EBiD2pGqZIOJAbEaf/5GVRaD/Pmbb4n+K3LrBdXd4dw=
|
||||
github.com/icholy/replace v0.6.0/go.mod h1:zzi8pxElj2t/5wHHHYmH45D+KxytX/t4w3ClY5nlK+g=
|
||||
github.com/illarion/gonotify v1.0.1 h1:F1d+0Fgbq/sDWjj/r66ekjDG+IDeecQKUFH4wNwsoio=
|
||||
github.com/illarion/gonotify v1.0.1/go.mod h1:zt5pmDofZpU1f8aqlK0+95eQhoEAn/d4G4B/FjVW4jE=
|
||||
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||
@@ -1053,7 +1050,6 @@ github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
|
||||
github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
|
||||
github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
|
||||
github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
|
||||
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
|
||||
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
@@ -1344,7 +1340,6 @@ golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
|
||||
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
@@ -1448,7 +1443,6 @@ golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
|
||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
@@ -1514,7 +1508,6 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
|
||||
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
|
||||
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
|
||||
gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
|
||||
gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=
|
||||
gvisor.dev/gvisor v0.0.0-20240509041132-65b30f7869dc h1:DXLLFYv/k/xr0rWcwVEvWme1GR36Oc4kNMspg38JeiE=
|
||||
|
||||
Reference in New Issue
Block a user