mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat: add line-based read_file tool with safety limits (#22400)
## Summary Adds a new line-based file reading endpoint to the workspace agent, replacing the unbounded byte-based approach for the `read_file` chat tool and `coder_workspace_read_file` MCP tool. **Problem**: The current `read_file` tool returns the entire file contents with no limits, which can blow up LLM context windows and cause OOM issues with large files. **Solution**: Inspired by [`coder/mux`](https://github.com/coder/mux) and [`openai/codex`](https://github.com/openai/codex), implement a line-based reader with safety limits. ## Changes ### Agent (`agent/agentfiles/`) - New `/read-file-lines` endpoint with `HandleReadFileLines` handler - Line-based `offset` (1-based line number, default: 1) and `limit` (line count, default: 2000) - Safety constants: | Constant | Value | Purpose | |---|---|---| | `MaxFileSize` | 1 MB | Reject files larger than this at stat | | `MaxLineBytes` | 1,024 | Per-line truncation with `... [truncated]` marker | | `MaxResponseLines` | 2,000 | Max lines per response | | `MaxResponseBytes` | 32 KB | Max total response size | | `DefaultLineLimit` | 2,000 | Default when no limit specified | - Line numbering format: `1\tcontent` (tab-separated) - Structured JSON response: `{ success, file_size, total_lines, lines_read, content, error }` - Hard errors when limits exceeded — tells the LLM to use `offset`/`limit` - Existing byte-based `/read-file` endpoint preserved (used by `instruction.go`) ### SDK (`codersdk/workspacesdk/`) - `ReadFileLinesResponse` type added - `ReadFileLines` method added to `AgentConn` interface - Mock regenerated ### Chat tool (`coderd/chatd/chattool/`) - `read_file` tool now uses `conn.ReadFileLines()` instead of `conn.ReadFile()` - Updated tool description to document line-based parameters - Response includes `file_size`, `total_lines`, `lines_read` metadata ### MCP tool (`codersdk/toolsdk/`) - `coder_workspace_read_file` updated to use line-based reading - Schema descriptions updated for line-based offset/limit - Removed `maxFileLimit` constant (agent handles limits now) ### Tests - 13 new test cases for `TestReadFileLines`: - Path validation (empty, relative, non-existent, directory, no permissions) - Empty file handling - Basic read, offset, limit, offset+limit combinations - Offset beyond file length - Long line truncation (>1024 bytes) - Large file rejection (>1MB) - All existing tests pass unchanged ## Design decisions | Decision | Rationale | |---|---| | Line-based, not byte-based | Both coder/mux and openai/codex use line-based — matches how LLMs reason about code | | Default limit of 2000 | Matches codex; prevents accidental full-file dumps while being generous | | 32 KB response cap | Compromise between mux (16 KB) and codex (no cap) | | 1024 byte/line truncation with marker | More generous than codex (500), marker helps LLM know data is missing | | Hard errors on overflow | Matches mux; forces LLM to paginate rather than getting partial data | | Preserve byte-based endpoint | `instruction.go` needs raw byte access for AGENTS.md |
This commit is contained in:
@@ -29,6 +29,7 @@ func (api *API) Routes() http.Handler {
|
||||
|
||||
r.Post("/list-directory", api.HandleLS)
|
||||
r.Get("/read-file", api.HandleReadFile)
|
||||
r.Get("/read-file-lines", api.HandleReadFileLines)
|
||||
r.Post("/write-file", api.HandleWriteFile)
|
||||
r.Post("/edit-files", api.HandleEditFiles)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/icholy/replace"
|
||||
@@ -23,6 +24,22 @@ import (
|
||||
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
||||
)
|
||||
|
||||
// ReadFileLinesResponse is the JSON response for the line-based file reader.
|
||||
type ReadFileLinesResponse struct {
|
||||
// Success indicates whether the read was successful.
|
||||
Success bool `json:"success"`
|
||||
// FileSize is the original file size in bytes.
|
||||
FileSize int64 `json:"file_size,omitempty"`
|
||||
// TotalLines is the total number of lines in the file.
|
||||
TotalLines int `json:"total_lines,omitempty"`
|
||||
// LinesRead is the count of lines returned in this response.
|
||||
LinesRead int `json:"lines_read,omitempty"`
|
||||
// Content is the line-numbered file content.
|
||||
Content string `json:"content,omitempty"`
|
||||
// Error is the error message when success is false.
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type HTTPResponseCode = int
|
||||
|
||||
func (api *API) HandleReadFile(rw http.ResponseWriter, r *http.Request) {
|
||||
@@ -103,6 +120,166 @@ func (api *API) streamFile(ctx context.Context, rw http.ResponseWriter, path str
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (api *API) HandleReadFileLines(rw http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
|
||||
query := r.URL.Query()
|
||||
parser := httpapi.NewQueryParamParser().RequiredNotEmpty("path")
|
||||
path := parser.String(query, "", "path")
|
||||
offset := parser.PositiveInt64(query, 1, "offset")
|
||||
limit := parser.PositiveInt64(query, 0, "limit")
|
||||
maxFileSize := parser.PositiveInt64(query, workspacesdk.DefaultMaxFileSize, "max_file_size")
|
||||
maxLineBytes := parser.PositiveInt64(query, workspacesdk.DefaultMaxLineBytes, "max_line_bytes")
|
||||
maxResponseLines := parser.PositiveInt64(query, workspacesdk.DefaultMaxResponseLines, "max_response_lines")
|
||||
maxResponseBytes := parser.PositiveInt64(query, workspacesdk.DefaultMaxResponseBytes, "max_response_bytes")
|
||||
parser.ErrorExcessParams(query)
|
||||
if len(parser.Errors) > 0 {
|
||||
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
|
||||
Message: "Query parameters have invalid values.",
|
||||
Validations: parser.Errors,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
resp := api.readFileLines(ctx, path, offset, limit, workspacesdk.ReadFileLinesLimits{
|
||||
MaxFileSize: maxFileSize,
|
||||
MaxLineBytes: int(maxLineBytes),
|
||||
MaxResponseLines: int(maxResponseLines),
|
||||
MaxResponseBytes: int(maxResponseBytes),
|
||||
})
|
||||
httpapi.Write(ctx, rw, http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func (api *API) readFileLines(_ context.Context, path string, offset, limit int64, limits workspacesdk.ReadFileLinesLimits) ReadFileLinesResponse {
|
||||
errResp := func(msg string) ReadFileLinesResponse {
|
||||
return ReadFileLinesResponse{Success: false, Error: msg}
|
||||
}
|
||||
|
||||
if !filepath.IsAbs(path) {
|
||||
return errResp(fmt.Sprintf("file path must be absolute: %q", path))
|
||||
}
|
||||
|
||||
f, err := api.filesystem.Open(path)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return errResp(fmt.Sprintf("file does not exist: %s", path))
|
||||
}
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
return errResp(fmt.Sprintf("permission denied: %s", path))
|
||||
}
|
||||
return errResp(fmt.Sprintf("open file: %s", err))
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
stat, err := f.Stat()
|
||||
if err != nil {
|
||||
return errResp(fmt.Sprintf("stat file: %s", err))
|
||||
}
|
||||
|
||||
if stat.IsDir() {
|
||||
return errResp(fmt.Sprintf("not a file: %s", path))
|
||||
}
|
||||
|
||||
fileSize := stat.Size()
|
||||
if fileSize > limits.MaxFileSize {
|
||||
return errResp(fmt.Sprintf(
|
||||
"file is %d bytes which exceeds the maximum of %d bytes. Use grep, sed, or awk to extract the content you need, or use offset and limit to read a portion.",
|
||||
fileSize, limits.MaxFileSize,
|
||||
))
|
||||
}
|
||||
|
||||
// Read the entire file (up to MaxFileSize).
|
||||
data, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
return errResp(fmt.Sprintf("read file: %s", err))
|
||||
}
|
||||
|
||||
// Split into lines.
|
||||
content := string(data)
|
||||
// Handle empty file.
|
||||
if content == "" {
|
||||
return ReadFileLinesResponse{
|
||||
Success: true,
|
||||
FileSize: fileSize,
|
||||
TotalLines: 0,
|
||||
LinesRead: 0,
|
||||
Content: "",
|
||||
}
|
||||
}
|
||||
|
||||
lines := strings.Split(content, "\n")
|
||||
totalLines := len(lines)
|
||||
|
||||
// offset is 1-based line number.
|
||||
if offset < 1 {
|
||||
offset = 1
|
||||
}
|
||||
if offset > int64(totalLines) {
|
||||
return errResp(fmt.Sprintf(
|
||||
"offset %d is beyond the file length of %d lines",
|
||||
offset, totalLines,
|
||||
))
|
||||
}
|
||||
|
||||
// Default limit.
|
||||
if limit <= 0 {
|
||||
limit = int64(limits.MaxResponseLines)
|
||||
}
|
||||
|
||||
startIdx := int(offset - 1) // convert to 0-based
|
||||
endIdx := startIdx + int(limit)
|
||||
if endIdx > totalLines {
|
||||
endIdx = totalLines
|
||||
}
|
||||
|
||||
var numbered []string
|
||||
totalBytesAccumulated := 0
|
||||
|
||||
for i := startIdx; i < endIdx; i++ {
|
||||
line := lines[i]
|
||||
|
||||
// Per-line truncation.
|
||||
if len(line) > limits.MaxLineBytes {
|
||||
line = line[:limits.MaxLineBytes] + "... [truncated]"
|
||||
}
|
||||
|
||||
// Format with 1-based line number.
|
||||
numberedLine := fmt.Sprintf("%d\t%s", i+1, line)
|
||||
lineBytes := len(numberedLine)
|
||||
|
||||
// Check total byte budget.
|
||||
newTotal := totalBytesAccumulated + lineBytes
|
||||
if len(numbered) > 0 {
|
||||
newTotal++ // account for \n joiner
|
||||
}
|
||||
if newTotal > limits.MaxResponseBytes {
|
||||
return errResp(fmt.Sprintf(
|
||||
"output would exceed %d bytes. Read less at a time using offset and limit parameters.",
|
||||
limits.MaxResponseBytes,
|
||||
))
|
||||
}
|
||||
|
||||
// Check line count.
|
||||
if len(numbered) >= limits.MaxResponseLines {
|
||||
return errResp(fmt.Sprintf(
|
||||
"output would exceed %d lines. Read less at a time using offset and limit parameters.",
|
||||
limits.MaxResponseLines,
|
||||
))
|
||||
}
|
||||
|
||||
numbered = append(numbered, numberedLine)
|
||||
totalBytesAccumulated = newTotal
|
||||
}
|
||||
|
||||
return ReadFileLinesResponse{
|
||||
Success: true,
|
||||
FileSize: fileSize,
|
||||
TotalLines: totalLines,
|
||||
LinesRead: len(numbered),
|
||||
Content: strings.Join(numbered, "\n"),
|
||||
}
|
||||
}
|
||||
|
||||
func (api *API) HandleWriteFile(rw http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
|
||||
|
||||
@@ -737,3 +737,188 @@ func TestEditFiles(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadFileLines(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tmpdir := os.TempDir()
|
||||
noPermsFilePath := filepath.Join(tmpdir, "no-perms-lines")
|
||||
|
||||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
||||
fs := newTestFs(afero.NewMemMapFs(), func(call, file string) error {
|
||||
if file == noPermsFilePath {
|
||||
return os.ErrPermission
|
||||
}
|
||||
return nil
|
||||
})
|
||||
api := agentfiles.NewAPI(logger, fs)
|
||||
|
||||
dirPath := filepath.Join(tmpdir, "a-directory-lines")
|
||||
err := fs.MkdirAll(dirPath, 0o755)
|
||||
require.NoError(t, err)
|
||||
|
||||
emptyFilePath := filepath.Join(tmpdir, "empty-file")
|
||||
err = afero.WriteFile(fs, emptyFilePath, []byte(""), 0o644)
|
||||
require.NoError(t, err)
|
||||
|
||||
basicFilePath := filepath.Join(tmpdir, "basic-file")
|
||||
err = afero.WriteFile(fs, basicFilePath, []byte("line1\nline2\nline3"), 0o644)
|
||||
require.NoError(t, err)
|
||||
|
||||
longLine := string(bytes.Repeat([]byte("x"), 1025))
|
||||
longLineFilePath := filepath.Join(tmpdir, "long-line-file")
|
||||
err = afero.WriteFile(fs, longLineFilePath, []byte(longLine), 0o644)
|
||||
require.NoError(t, err)
|
||||
|
||||
largeFilePath := filepath.Join(tmpdir, "large-file")
|
||||
err = afero.WriteFile(fs, largeFilePath, bytes.Repeat([]byte("x"), 1<<20+1), 0o644)
|
||||
require.NoError(t, err)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
path string
|
||||
offset int64
|
||||
limit int64
|
||||
expSuccess bool
|
||||
expError string
|
||||
expContent string
|
||||
expTotal int
|
||||
expRead int
|
||||
expSize int64
|
||||
// useCodersdk is set for cases where the handler returns
|
||||
// codersdk.Response (query param validation) instead of ReadFileLinesResponse.
|
||||
useCodersdk bool
|
||||
}{
|
||||
{
|
||||
name: "NoPath",
|
||||
path: "",
|
||||
useCodersdk: true,
|
||||
expError: "is required",
|
||||
},
|
||||
{
|
||||
name: "RelativePath",
|
||||
path: "relative/path",
|
||||
expError: "file path must be absolute",
|
||||
},
|
||||
{
|
||||
name: "NonExistent",
|
||||
path: filepath.Join(tmpdir, "does-not-exist"),
|
||||
expError: "file does not exist",
|
||||
},
|
||||
{
|
||||
name: "IsDir",
|
||||
path: dirPath,
|
||||
expError: "not a file",
|
||||
},
|
||||
{
|
||||
name: "NoPermissions",
|
||||
path: noPermsFilePath,
|
||||
expError: "permission denied",
|
||||
},
|
||||
{
|
||||
name: "EmptyFile",
|
||||
path: emptyFilePath,
|
||||
expSuccess: true,
|
||||
expTotal: 0,
|
||||
expRead: 0,
|
||||
expSize: 0,
|
||||
},
|
||||
{
|
||||
name: "BasicRead",
|
||||
path: basicFilePath,
|
||||
expSuccess: true,
|
||||
expContent: "1\tline1\n2\tline2\n3\tline3",
|
||||
expTotal: 3,
|
||||
expRead: 3,
|
||||
expSize: int64(len("line1\nline2\nline3")),
|
||||
},
|
||||
{
|
||||
name: "Offset2",
|
||||
path: basicFilePath,
|
||||
offset: 2,
|
||||
expSuccess: true,
|
||||
expContent: "2\tline2\n3\tline3",
|
||||
expTotal: 3,
|
||||
expRead: 2,
|
||||
expSize: int64(len("line1\nline2\nline3")),
|
||||
},
|
||||
{
|
||||
name: "Limit1",
|
||||
path: basicFilePath,
|
||||
limit: 1,
|
||||
expSuccess: true,
|
||||
expContent: "1\tline1",
|
||||
expTotal: 3,
|
||||
expRead: 1,
|
||||
expSize: int64(len("line1\nline2\nline3")),
|
||||
},
|
||||
{
|
||||
name: "Offset2Limit1",
|
||||
path: basicFilePath,
|
||||
offset: 2,
|
||||
limit: 1,
|
||||
expSuccess: true,
|
||||
expContent: "2\tline2",
|
||||
expTotal: 3,
|
||||
expRead: 1,
|
||||
expSize: int64(len("line1\nline2\nline3")),
|
||||
},
|
||||
{
|
||||
name: "OffsetBeyondFile",
|
||||
path: basicFilePath,
|
||||
offset: 100,
|
||||
expError: "offset 100 is beyond the file length of 3 lines",
|
||||
},
|
||||
{
|
||||
name: "LongLineTruncation",
|
||||
path: longLineFilePath,
|
||||
expSuccess: true,
|
||||
expContent: "1\t" + string(bytes.Repeat([]byte("x"), 1024)) + "... [truncated]",
|
||||
expTotal: 1,
|
||||
expRead: 1,
|
||||
expSize: 1025,
|
||||
},
|
||||
{
|
||||
name: "LargeFile",
|
||||
path: largeFilePath,
|
||||
expError: "exceeds the maximum",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
|
||||
defer cancel()
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
r := httptest.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("/read-file-lines?path=%s&offset=%d&limit=%d", tt.path, tt.offset, tt.limit), nil)
|
||||
api.Routes().ServeHTTP(w, r)
|
||||
|
||||
if tt.useCodersdk {
|
||||
// Query param validation errors return codersdk.Response.
|
||||
require.Equal(t, http.StatusBadRequest, w.Code)
|
||||
require.Contains(t, w.Body.String(), tt.expError)
|
||||
return
|
||||
}
|
||||
|
||||
var resp agentfiles.ReadFileLinesResponse
|
||||
err := json.NewDecoder(w.Body).Decode(&resp)
|
||||
require.NoError(t, err)
|
||||
|
||||
if tt.expSuccess {
|
||||
require.Equal(t, http.StatusOK, w.Code)
|
||||
require.True(t, resp.Success)
|
||||
require.Equal(t, tt.expContent, resp.Content)
|
||||
require.Equal(t, tt.expTotal, resp.TotalLines)
|
||||
require.Equal(t, tt.expRead, resp.LinesRead)
|
||||
require.Equal(t, tt.expSize, resp.FileSize)
|
||||
} else {
|
||||
require.Equal(t, http.StatusOK, w.Code)
|
||||
require.False(t, resp.Success)
|
||||
require.Contains(t, resp.Error, tt.expError)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user