diff --git a/agent/agentfiles/files.go b/agent/agentfiles/files.go index 79602dbc17..4f92a8f7c9 100644 --- a/agent/agentfiles/files.go +++ b/agent/agentfiles/files.go @@ -1196,9 +1196,242 @@ func fuzzyReplace(content string, edit workspacesdk.FileEdit) (string, error) { return result, err } - return "", xerrors.New("search string not found in file. Verify the search " + + msg := "search string not found in file. Verify the search " + "string matches the file content exactly, including whitespace " + - "and indentation") + "and indentation" + // miscount takes precedence: a near-match means the search is the + // model's typo'd new text, not a swapped field. Emitting both can + // trick an agent into following the inversion hint and corrupting + // an unrelated line where the replace string coincidentally + // occurs. + if hint := miscountHint(contentLines, searchLines); hint != "" { + msg += ". " + hint + } else if hint := inversionHint(content, contentLines, replace, replaceLines, trimRight, trimAll); hint != "" { + msg += ". " + hint + } + return "", xerrors.New(msg) +} + +// maxHintLines caps the number of line numbers (inversion) or +// candidate file lines (per miscount) listed in a single hint before +// truncation with " and N more". +const maxHintLines = 5 + +// inversionHint detects the case where the caller swapped `search` +// and `replace`: search did not match but replace appears in the file. +func inversionHint( + content string, + contentLines []string, + replace string, + replaceLines []string, + trimRight, trimAll func(a, b string) bool, +) string { + if len(replaceLines) == 0 { + return "" + } + + lines := substringMatchLines(content, replace) + if len(lines) == 0 { + lines = lineEquivalentMatchLines(contentLines, replaceLines, trimRight) + } + if len(lines) == 0 { + lines = lineEquivalentMatchLines(contentLines, replaceLines, trimAll) + } + if len(lines) == 0 { + return "" + } + return fmt.Sprintf( + "Did you swap %q and %q? Your replace string appears at line %s", + "search", "replace", formatLineList(lines), + ) +} + +// substringMatchLines returns the 1-based line numbers where needle +// occurs in content as a byte-for-byte substring, including +// overlapping starts. Repeat occurrences on the same line collapse +// to a single line number. +func substringMatchLines(content, needle string) []int { + if needle == "" { + return nil + } + var lines []int + seen := make(map[int]struct{}) + for offset := 0; ; { + rel := strings.Index(content[offset:], needle) + if rel < 0 { + break + } + idx := offset + rel + line := 1 + strings.Count(content[:idx], "\n") + if _, dup := seen[line]; !dup { + seen[line] = struct{}{} + lines = append(lines, line) + } + // Advance by one byte so self-overlapping needles (e.g. + // "A\nB\nA\n" inside "A\nB\nA\nB\nA\n") still report + // every distinct starting line. + offset = idx + 1 + if offset > len(content) { + break + } + } + return lines +} + +// lineEquivalentMatchLines returns the 1-based start line of every +// contiguous block of contentLines that matches needleLines under eq. +func lineEquivalentMatchLines(contentLines, needleLines []string, eq func(a, b string) bool) []int { + if len(needleLines) == 0 || len(needleLines) > len(contentLines) { + return nil + } + var starts []int +outer: + for i := 0; i <= len(contentLines)-len(needleLines); i++ { + for j, n := range needleLines { + if !eq(contentLines[i+j], n) { + continue outer + } + } + starts = append(starts, i+1) + } + return starts +} + +// formatLineList renders a sorted line list as "12, 47, 89", truncated +// to maxHintLines entries with " and N more" when more exist. +func formatLineList(lines []int) string { + var b strings.Builder + shown := min(len(lines), maxHintLines) + for i := 0; i < shown; i++ { + if i > 0 { + _, _ = b.WriteString(", ") + } + _, _ = fmt.Fprintf(&b, "%d", lines[i]) + } + if rest := len(lines) - shown; rest > 0 { + _, _ = fmt.Fprintf(&b, " and %d more", rest) + } + return b.String() +} + +// miscountHint detects search lines that match a file line except for +// the count of one repeated rune. Emits one hint per +// (search-line, disagreeing-rune) group, capped at maxMiscountHints +// total with " and N more" suffix. +func miscountHint(contentLines, searchLines []string) string { + const maxMiscountHints = 3 + var hints []string + extra := 0 + for _, sLine := range searchLines { + sContent, _ := splitEnding(sLine) + if strings.TrimSpace(sContent) == "" { + continue + } + // One search line can disagree on different runes against + // different file lines; group by rune so each hint names a + // single codepoint. + groups := make(map[rune][]candidate) + counts := make(map[rune]int) + order := []rune{} + for i, cLine := range contentLines { + cContent, _ := splitEnding(cLine) + r, sc, cc, ok := singleRuneCountMismatch(sContent, cContent) + if !ok { + continue + } + if _, seen := groups[r]; !seen { + order = append(order, r) + counts[r] = sc + } + groups[r] = append(groups[r], candidate{line: i + 1, cCount: cc}) + } + for _, r := range order { + if len(hints) >= maxMiscountHints { + extra++ + continue + } + hints = append(hints, formatMiscount(counts[r], r, groups[r])) + } + } + if extra > 0 { + hints = append(hints, fmt.Sprintf("and %d more", extra)) + } + return strings.Join(hints, ". ") +} + +// formatMiscount renders one miscount candidate group. +func formatMiscount(sCount int, r rune, cands []candidate) string { + var b strings.Builder + _, _ = fmt.Fprintf(&b, "Your search has %d %q (U+%04X); the file has ", sCount, string(r), r) + shown := min(len(cands), maxHintLines) + for i := 0; i < shown; i++ { + if i > 0 { + _, _ = b.WriteString(", ") + } + _, _ = fmt.Fprintf(&b, "%d at line %d", cands[i].cCount, cands[i].line) + } + if rest := len(cands) - shown; rest > 0 { + _, _ = fmt.Fprintf(&b, " and %d more", rest) + } + return b.String() +} + +// candidate records a file line where one rune's count disagrees with +// the search. +type candidate struct { + line int + cCount int +} + +// singleRuneCountMismatch reports whether s and c agree on every rune +// class except one, where the disagreeing rune appears at least twice +// on one side. +func singleRuneCountMismatch(s, c string) (r rune, sCount, cCount int, ok bool) { + if s == "" || c == "" { + return 0, 0, 0, false + } + sFreq := runeFrequency(s) + cFreq := runeFrequency(c) + var ( + diffRune rune + diffCount int + sc int + cc int + ) + for rr, scv := range sFreq { + ccv := cFreq[rr] + if scv != ccv { + diffCount++ + diffRune = rr + sc = scv + cc = ccv + } + } + for rr, ccv := range cFreq { + if _, present := sFreq[rr]; present { + continue + } + diffCount++ + diffRune = rr + sc = 0 + cc = ccv + } + if diffCount != 1 { + return 0, 0, 0, false + } + if sc < 2 && cc < 2 { + return 0, 0, 0, false + } + return diffRune, sc, cc, true +} + +// runeFrequency returns the count of each rune in s. +func runeFrequency(s string) map[rune]int { + freq := make(map[rune]int) + for _, r := range s { + freq[r]++ + } + return freq } // seekLines scans contentLines looking for a contiguous subsequence that matches diff --git a/agent/agentfiles/files_test.go b/agent/agentfiles/files_test.go index 19f3187d88..8f8572b74c 100644 --- a/agent/agentfiles/files_test.go +++ b/agent/agentfiles/files_test.go @@ -3183,3 +3183,389 @@ func TestFuzzyReplace_Expansion_PreservesFileIndent(t *testing.T) { require.NoError(t, err) require.Equal(t, expected, string(data)) } + +// baseFuzzyNotFoundMessage is the leading sentence the matcher +// returns when all three passes miss. It must remain the leading +// sentence even when diagnostic hints are appended, so existing log +// scrapers continue to match. +const baseFuzzyNotFoundMessage = "search string not found in file. " + + "Verify the search string matches the file content exactly, " + + "including whitespace and indentation" + +// TestFuzzyReplace_Hints exercises the post-fail diagnostic hints: +// inversion (search and replace swapped) and miscount (one repeated +// rune at the wrong count). Each detector lists every match it finds +// and truncates the output to five entries with " and N more". +func TestFuzzyReplace_Hints(t *testing.T) { + t.Parallel() + + tmpdir := os.TempDir() + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug) + + type edit struct { + search, replace string + } + tests := []struct { + name string + content string + edit edit + wantSubs []string + notWantSubs []string + }{ + { + name: "Inversion_HintIncludesSwapAndLine", + content: "package main\n" + + "\n" + + "func adder(a int, b int) int { return a + b }\n" + + "\n" + + "// trailing comment\n", + edit: edit{ + search: "func adder(a, b int) int {\n\treturn a + b\n}\n", + replace: "func adder(a int, b int) int { return a + b }\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Did you swap "search" and "replace"? Your replace string appears at line 3`, + }, + }, + { + name: "Inversion_ThreeAnchors_AllListed", + content: "a\n" + + "matching block body of substantial length\n" + + "b\n" + + "matching block body of substantial length\n" + + "c\n" + + "matching block body of substantial length\n" + + "d\n", + edit: edit{ + search: "this search text is absent from the file\n", + replace: "matching block body of substantial length\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Did you swap "search" and "replace"? Your replace string appears at line 2, 4, 6`, + }, + notWantSubs: []string{"more"}, + }, + { + name: "Inversion_SevenAnchors_TruncatedWithAndMore", + content: "matching block body of substantial length\n" + + "matching block body of substantial length\n" + + "matching block body of substantial length\n" + + "matching block body of substantial length\n" + + "matching block body of substantial length\n" + + "matching block body of substantial length\n" + + "matching block body of substantial length\n", + edit: edit{ + search: "this search text is absent from the file\n", + replace: "matching block body of substantial length\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Did you swap "search" and "replace"? Your replace string appears at line 1, 2, 3, 4, 5 and 2 more`, + }, + }, + { + name: "Inversion_ShortReplace_TruncatedWithAndMore", + // Short replace strings used to be silently suppressed by + // a length floor. Now the line-list cap signals "your + // replace is too generic" by showing five matches plus + // " and N more", which is more informative than no hint. + content: "alpha\nbeta\nbeta\nbeta\nbeta\nbeta\nbeta\nbeta\ngamma\n", + edit: edit{ + search: "missing line that does not occur anywhere\n", + replace: "beta\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Did you swap "search" and "replace"? Your replace string appears at line 2, 3, 4, 5, 6 and 2 more`, + }, + }, + { + name: "Miscount_BoxDrawingDashes_HintNamesCodepoint", + content: "
\n" + + "{/* SECTION HEADING " + strings.Repeat("\u2500", 37) + " */}\n" + + "\n", + edit: edit{ + search: "{/* SECTION HEADING " + strings.Repeat("\u2500", 32) + " */}\n", + replace: "{/* REPLACED */}\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + "Your search has 32 \"\u2500\" (U+2500); the file has 37 at line 2", + }, + }, + { + name: "Miscount_ASCIIEquals_HintWorks", + content: "title\n" + + "section =======\n" + + "body\n", + edit: edit{ + search: "section =====\n", + replace: "section *****\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Your search has 5 "=" (U+003D); the file has 7 at line 2`, + }, + }, + { + name: "Miscount_TwoCandidates_BothListed", + content: "section =======\n" + + "section ===\n", + edit: edit{ + search: "section =====\n", + replace: "section *****\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Your search has 5 "=" (U+003D); the file has 7 at line 1, 3 at line 2`, + }, + notWantSubs: []string{"more"}, + }, + { + name: "Miscount_SixCandidates_TruncatedWithAndMore", + content: "section ==\n" + + "section ===\n" + + "section ======\n" + + "section =======\n" + + "section ========\n" + + "section =========\n", + edit: edit{ + search: "section =====\n", + replace: "section *****\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Your search has 5 "=" (U+003D); the file has 2 at line 1, 3 at line 2, 6 at line 3, 7 at line 4, 8 at line 5 and 1 more`, + }, + }, + { + name: "Miscount_TwoDistinctChanges_NoHint", + content: "first\n" + + "a===b\n" + + "last\n", + edit: edit{ + search: "a=====b!\n", + replace: "unused\n", + }, + wantSubs: []string{baseFuzzyNotFoundMessage}, + notWantSubs: []string{"Your search has", "the file has"}, + }, + { + name: "Miscount_Unrelated_NoHint", + content: "package foo\n\nfunc bar() {}\n", + edit: edit{ + search: "this content is wholly different from the file\n", + replace: "unused\n", + }, + wantSubs: []string{baseFuzzyNotFoundMessage}, + notWantSubs: []string{"Your search has", "the file has"}, + }, + { + name: "Miscount_SuppressesInversion_WhenBothCouldFire", + content: "
\n" + + "{/* SECTION HEADING " + strings.Repeat("\u2500", 8) + " */}\n" + + "\n" + + "doSomethingWithLongName(ctx)\n" + + "\n", + edit: edit{ + // Search has 6 dashes (miscount target on line 2). + search: "{/* SECTION HEADING " + strings.Repeat("\u2500", 6) + " */}\n", + // Replace is unrelated text that happens to appear at + // line 4. Without miscount-takes-precedence, the + // inversion hint would direct an agent to swap and + // corrupt line 4. + replace: "doSomethingWithLongName(ctx)\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + "Your search has 6 \"\u2500\" (U+2500); the file has 8 at line 2", + }, + notWantSubs: []string{"swap", "appears at line"}, + }, + { + name: "Inversion_DedupRepeatsOnOneLine", + content: "prefix\n" + + "AAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAA\n" + + "suffix\n", + edit: edit{ + search: "absent search line not in file at all\n", + replace: "AAAAAAAAAAAAAAAAAAAA\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Did you swap "search" and "replace"? Your replace string appears at line 2`, + }, + // Line 2 must appear once, not 2, 2, 2. + notWantSubs: []string{"line 2, 2", "more"}, + }, + { + name: "Inversion_TrimRightFallback_TrailingSpaces", + // Content line has trailing spaces; replace omits them. + // Byte-substring misses; trimRight line-equivalent + // matches. + content: "preamble\n" + + "matching block body of substantial length \n" + + "trailer\n", + edit: edit{ + search: "absent search line not in file at all\n", + replace: "matching block body of substantial length\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Did you swap "search" and "replace"? Your replace string appears at line 2`, + }, + }, + { + name: "Inversion_TrimAllFallback_LeadingIndent", + // Content line has leading indentation that replace + // omits. Byte-substring misses; trim-right also misses + // (the leading whitespace is on a different side); + // trim-all matches. + content: "preamble\n" + + "\t\tmatching block body of substantial length\n" + + "trailer\n", + edit: edit{ + search: "absent search line not in file at all\n", + replace: "matching block body of substantial length\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Did you swap "search" and "replace"? Your replace string appears at line 2`, + }, + }, + { + name: "Miscount_SingleRuneDiff_Suppressed", + // Rune `b` differs (sc=1, cc=0). Both counts < 2, the + // suppression guard fires, no hint. + content: "first\nxa\nlast\n", + edit: edit{ + search: "xab\n", + replace: "unused\n", + }, + wantSubs: []string{baseFuzzyNotFoundMessage}, + notWantSubs: []string{"Your search has", "the file has"}, + }, + { + name: "Miscount_TotalHintsCapped", + // Four search lines, each matching a distinct file line + // via a distinct miscount rune. With maxMiscountHints=3, + // only 3 hint sentences appear plus " and 1 more". + content: "section ==\n" + + "divider ++\n" + + "line ##\n" + + "header @@\n", + edit: edit{ + search: "section ====\n" + + "divider ++++\n" + + "line ####\n" + + "header @@@@\n", + replace: "unused\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Your search has 4 "=" (U+003D)`, + `Your search has 4 "+" (U+002B)`, + `Your search has 4 "#" (U+0023)`, + "and 1 more", + }, + // The fourth hint (`@`) is suppressed by the cap. + notWantSubs: []string{`"@"`}, + }, + { + name: "Inversion_OverlappingMultilineMatch", + // Self-overlapping multi-line replace: "A\nB\nA\n" + // starts at line 1 and line 3 of the file. The old + // non-overlapping advancement missed line 3. + content: "AAAAAAAAAAAAAAAAAAAA\n" + + "BBBBBBBBBBBBBBBBBBBB\n" + + "AAAAAAAAAAAAAAAAAAAA\n" + + "BBBBBBBBBBBBBBBBBBBB\n" + + "AAAAAAAAAAAAAAAAAAAA\n", + edit: edit{ + search: "absent search line not in file at all\n", + replace: "AAAAAAAAAAAAAAAAAAAA\n" + + "BBBBBBBBBBBBBBBBBBBB\n" + + "AAAAAAAAAAAAAAAAAAAA\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Did you swap "search" and "replace"? Your replace string appears at line 1, 3`, + }, + notWantSubs: []string{"more"}, + }, + { + name: "Miscount_RuneOnlyInFile", + // Disagreeing rune `b` appears only in the file line. + // Exercises the second loop of singleRuneCountMismatch + // (runes in c but absent from s). + content: "section ==bb\n", + edit: edit{ + search: "section ==\n", + replace: "section --\n", + }, + wantSubs: []string{ + baseFuzzyNotFoundMessage, + `Your search has 0 "b" (U+0062); the file has 2 at line 1`, + }, + }, + { + name: "NoHints_BaseErrorOnly", + content: "package foo\n" + + "\n" + + "func bar() {}\n", + edit: edit{ + search: "func zzzz() {}\n", + replace: "new\n", + }, + wantSubs: []string{baseFuzzyNotFoundMessage}, + notWantSubs: []string{"swap", "Your search has", "appears at line"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + fs := afero.NewMemMapFs() + api := agentfiles.NewAPI(logger, fs, nil) + path := filepath.Join(tmpdir, "hint-"+tt.name) + require.NoError(t, afero.WriteFile(fs, path, []byte(tt.content), 0o644)) + + req := workspacesdk.FileEditRequest{ + Files: []workspacesdk.FileEdits{{ + Path: path, + Edits: []workspacesdk.FileEdit{{ + Search: tt.edit.search, + Replace: tt.edit.replace, + }}, + }}, + } + + ctx := testutil.Context(t, testutil.WaitShort) + buf := bytes.NewBuffer(nil) + enc := json.NewEncoder(buf) + enc.SetEscapeHTML(false) + require.NoError(t, enc.Encode(req)) + w := httptest.NewRecorder() + r := httptest.NewRequestWithContext(ctx, http.MethodPost, "/edit-files", buf) + api.Routes().ServeHTTP(w, r) + + require.Equal(t, http.StatusBadRequest, w.Code, "body: %s", w.Body.String()) + got := &codersdk.Error{} + require.NoError(t, json.NewDecoder(w.Body).Decode(got)) + msg := got.Message + for _, sub := range tt.wantSubs { + require.Contains(t, msg, sub, "want substring missing") + } + for _, sub := range tt.notWantSubs { + require.NotContains(t, msg, sub, "unwanted substring present") + } + + data, err := afero.ReadFile(fs, path) + require.NoError(t, err) + require.Equal(t, tt.content, string(data)) + }) + } +}