feat: add line-based read_file tool with safety limits (#22400)

## Summary

Adds a new line-based file reading endpoint to the workspace agent,
replacing the unbounded byte-based approach for the `read_file` chat
tool and `coder_workspace_read_file` MCP tool.

**Problem**: The current `read_file` tool returns the entire file
contents with no limits, which can blow up LLM context windows and cause
OOM issues with large files.

**Solution**: Inspired by [`coder/mux`](https://github.com/coder/mux)
and [`openai/codex`](https://github.com/openai/codex), implement a
line-based reader with safety limits.

## Changes

### Agent (`agent/agentfiles/`)
- New `/read-file-lines` endpoint with `HandleReadFileLines` handler
- Line-based `offset` (1-based line number, default: 1) and `limit`
(line count, default: 2000)
- Safety constants:
  | Constant | Value | Purpose |
  |---|---|---|
  | `MaxFileSize` | 1 MB | Reject files larger than this at stat |
| `MaxLineBytes` | 1,024 | Per-line truncation with `... [truncated]`
marker |
  | `MaxResponseLines` | 2,000 | Max lines per response |
  | `MaxResponseBytes` | 32 KB | Max total response size |
  | `DefaultLineLimit` | 2,000 | Default when no limit specified |
- Line numbering format: `1\tcontent` (tab-separated)
- Structured JSON response: `{ success, file_size, total_lines,
lines_read, content, error }`
- Hard errors when limits exceeded — tells the LLM to use
`offset`/`limit`
- Existing byte-based `/read-file` endpoint preserved (used by
`instruction.go`)

### SDK (`codersdk/workspacesdk/`)
- `ReadFileLinesResponse` type added
- `ReadFileLines` method added to `AgentConn` interface
- Mock regenerated

### Chat tool (`coderd/chatd/chattool/`)
- `read_file` tool now uses `conn.ReadFileLines()` instead of
`conn.ReadFile()`
- Updated tool description to document line-based parameters
- Response includes `file_size`, `total_lines`, `lines_read` metadata

### MCP tool (`codersdk/toolsdk/`)
- `coder_workspace_read_file` updated to use line-based reading
- Schema descriptions updated for line-based offset/limit
- Removed `maxFileLimit` constant (agent handles limits now)

### Tests
- 13 new test cases for `TestReadFileLines`:
- Path validation (empty, relative, non-existent, directory, no
permissions)
  - Empty file handling
  - Basic read, offset, limit, offset+limit combinations
  - Offset beyond file length
  - Long line truncation (>1024 bytes)
  - Large file rejection (>1MB)
- All existing tests pass unchanged

## Design decisions

| Decision | Rationale |
|---|---|
| Line-based, not byte-based | Both coder/mux and openai/codex use
line-based — matches how LLMs reason about code |
| Default limit of 2000 | Matches codex; prevents accidental full-file
dumps while being generous |
| 32 KB response cap | Compromise between mux (16 KB) and codex (no cap)
|
| 1024 byte/line truncation with marker | More generous than codex
(500), marker helps LLM know data is missing |
| Hard errors on overflow | Matches mux; forces LLM to paginate rather
than getting partial data |
| Preserve byte-based endpoint | `instruction.go` needs raw byte access
for AGENTS.md |
This commit is contained in:
Kyle Carberry
2026-02-27 15:12:56 -05:00
committed by GitHub
parent ff687aa780
commit b65c0766d2
6 changed files with 462 additions and 11 deletions
+1
View File
@@ -29,6 +29,7 @@ func (api *API) Routes() http.Handler {
r.Post("/list-directory", api.HandleLS)
r.Get("/read-file", api.HandleReadFile)
r.Get("/read-file-lines", api.HandleReadFileLines)
r.Post("/write-file", api.HandleWriteFile)
r.Post("/edit-files", api.HandleEditFiles)
+177
View File
@@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"github.com/icholy/replace"
@@ -23,6 +24,22 @@ import (
"github.com/coder/coder/v2/codersdk/workspacesdk"
)
// ReadFileLinesResponse is the JSON response for the line-based file reader.
type ReadFileLinesResponse struct {
// Success indicates whether the read was successful.
Success bool `json:"success"`
// FileSize is the original file size in bytes.
FileSize int64 `json:"file_size,omitempty"`
// TotalLines is the total number of lines in the file.
TotalLines int `json:"total_lines,omitempty"`
// LinesRead is the count of lines returned in this response.
LinesRead int `json:"lines_read,omitempty"`
// Content is the line-numbered file content.
Content string `json:"content,omitempty"`
// Error is the error message when success is false.
Error string `json:"error,omitempty"`
}
type HTTPResponseCode = int
func (api *API) HandleReadFile(rw http.ResponseWriter, r *http.Request) {
@@ -103,6 +120,166 @@ func (api *API) streamFile(ctx context.Context, rw http.ResponseWriter, path str
return 0, nil
}
func (api *API) HandleReadFileLines(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
query := r.URL.Query()
parser := httpapi.NewQueryParamParser().RequiredNotEmpty("path")
path := parser.String(query, "", "path")
offset := parser.PositiveInt64(query, 1, "offset")
limit := parser.PositiveInt64(query, 0, "limit")
maxFileSize := parser.PositiveInt64(query, workspacesdk.DefaultMaxFileSize, "max_file_size")
maxLineBytes := parser.PositiveInt64(query, workspacesdk.DefaultMaxLineBytes, "max_line_bytes")
maxResponseLines := parser.PositiveInt64(query, workspacesdk.DefaultMaxResponseLines, "max_response_lines")
maxResponseBytes := parser.PositiveInt64(query, workspacesdk.DefaultMaxResponseBytes, "max_response_bytes")
parser.ErrorExcessParams(query)
if len(parser.Errors) > 0 {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Query parameters have invalid values.",
Validations: parser.Errors,
})
return
}
resp := api.readFileLines(ctx, path, offset, limit, workspacesdk.ReadFileLinesLimits{
MaxFileSize: maxFileSize,
MaxLineBytes: int(maxLineBytes),
MaxResponseLines: int(maxResponseLines),
MaxResponseBytes: int(maxResponseBytes),
})
httpapi.Write(ctx, rw, http.StatusOK, resp)
}
func (api *API) readFileLines(_ context.Context, path string, offset, limit int64, limits workspacesdk.ReadFileLinesLimits) ReadFileLinesResponse {
errResp := func(msg string) ReadFileLinesResponse {
return ReadFileLinesResponse{Success: false, Error: msg}
}
if !filepath.IsAbs(path) {
return errResp(fmt.Sprintf("file path must be absolute: %q", path))
}
f, err := api.filesystem.Open(path)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return errResp(fmt.Sprintf("file does not exist: %s", path))
}
if errors.Is(err, os.ErrPermission) {
return errResp(fmt.Sprintf("permission denied: %s", path))
}
return errResp(fmt.Sprintf("open file: %s", err))
}
defer f.Close()
stat, err := f.Stat()
if err != nil {
return errResp(fmt.Sprintf("stat file: %s", err))
}
if stat.IsDir() {
return errResp(fmt.Sprintf("not a file: %s", path))
}
fileSize := stat.Size()
if fileSize > limits.MaxFileSize {
return errResp(fmt.Sprintf(
"file is %d bytes which exceeds the maximum of %d bytes. Use grep, sed, or awk to extract the content you need, or use offset and limit to read a portion.",
fileSize, limits.MaxFileSize,
))
}
// Read the entire file (up to MaxFileSize).
data, err := io.ReadAll(f)
if err != nil {
return errResp(fmt.Sprintf("read file: %s", err))
}
// Split into lines.
content := string(data)
// Handle empty file.
if content == "" {
return ReadFileLinesResponse{
Success: true,
FileSize: fileSize,
TotalLines: 0,
LinesRead: 0,
Content: "",
}
}
lines := strings.Split(content, "\n")
totalLines := len(lines)
// offset is 1-based line number.
if offset < 1 {
offset = 1
}
if offset > int64(totalLines) {
return errResp(fmt.Sprintf(
"offset %d is beyond the file length of %d lines",
offset, totalLines,
))
}
// Default limit.
if limit <= 0 {
limit = int64(limits.MaxResponseLines)
}
startIdx := int(offset - 1) // convert to 0-based
endIdx := startIdx + int(limit)
if endIdx > totalLines {
endIdx = totalLines
}
var numbered []string
totalBytesAccumulated := 0
for i := startIdx; i < endIdx; i++ {
line := lines[i]
// Per-line truncation.
if len(line) > limits.MaxLineBytes {
line = line[:limits.MaxLineBytes] + "... [truncated]"
}
// Format with 1-based line number.
numberedLine := fmt.Sprintf("%d\t%s", i+1, line)
lineBytes := len(numberedLine)
// Check total byte budget.
newTotal := totalBytesAccumulated + lineBytes
if len(numbered) > 0 {
newTotal++ // account for \n joiner
}
if newTotal > limits.MaxResponseBytes {
return errResp(fmt.Sprintf(
"output would exceed %d bytes. Read less at a time using offset and limit parameters.",
limits.MaxResponseBytes,
))
}
// Check line count.
if len(numbered) >= limits.MaxResponseLines {
return errResp(fmt.Sprintf(
"output would exceed %d lines. Read less at a time using offset and limit parameters.",
limits.MaxResponseLines,
))
}
numbered = append(numbered, numberedLine)
totalBytesAccumulated = newTotal
}
return ReadFileLinesResponse{
Success: true,
FileSize: fileSize,
TotalLines: totalLines,
LinesRead: len(numbered),
Content: strings.Join(numbered, "\n"),
}
}
func (api *API) HandleWriteFile(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
+185
View File
@@ -737,3 +737,188 @@ func TestEditFiles(t *testing.T) {
})
}
}
func TestReadFileLines(t *testing.T) {
t.Parallel()
tmpdir := os.TempDir()
noPermsFilePath := filepath.Join(tmpdir, "no-perms-lines")
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
fs := newTestFs(afero.NewMemMapFs(), func(call, file string) error {
if file == noPermsFilePath {
return os.ErrPermission
}
return nil
})
api := agentfiles.NewAPI(logger, fs)
dirPath := filepath.Join(tmpdir, "a-directory-lines")
err := fs.MkdirAll(dirPath, 0o755)
require.NoError(t, err)
emptyFilePath := filepath.Join(tmpdir, "empty-file")
err = afero.WriteFile(fs, emptyFilePath, []byte(""), 0o644)
require.NoError(t, err)
basicFilePath := filepath.Join(tmpdir, "basic-file")
err = afero.WriteFile(fs, basicFilePath, []byte("line1\nline2\nline3"), 0o644)
require.NoError(t, err)
longLine := string(bytes.Repeat([]byte("x"), 1025))
longLineFilePath := filepath.Join(tmpdir, "long-line-file")
err = afero.WriteFile(fs, longLineFilePath, []byte(longLine), 0o644)
require.NoError(t, err)
largeFilePath := filepath.Join(tmpdir, "large-file")
err = afero.WriteFile(fs, largeFilePath, bytes.Repeat([]byte("x"), 1<<20+1), 0o644)
require.NoError(t, err)
tests := []struct {
name string
path string
offset int64
limit int64
expSuccess bool
expError string
expContent string
expTotal int
expRead int
expSize int64
// useCodersdk is set for cases where the handler returns
// codersdk.Response (query param validation) instead of ReadFileLinesResponse.
useCodersdk bool
}{
{
name: "NoPath",
path: "",
useCodersdk: true,
expError: "is required",
},
{
name: "RelativePath",
path: "relative/path",
expError: "file path must be absolute",
},
{
name: "NonExistent",
path: filepath.Join(tmpdir, "does-not-exist"),
expError: "file does not exist",
},
{
name: "IsDir",
path: dirPath,
expError: "not a file",
},
{
name: "NoPermissions",
path: noPermsFilePath,
expError: "permission denied",
},
{
name: "EmptyFile",
path: emptyFilePath,
expSuccess: true,
expTotal: 0,
expRead: 0,
expSize: 0,
},
{
name: "BasicRead",
path: basicFilePath,
expSuccess: true,
expContent: "1\tline1\n2\tline2\n3\tline3",
expTotal: 3,
expRead: 3,
expSize: int64(len("line1\nline2\nline3")),
},
{
name: "Offset2",
path: basicFilePath,
offset: 2,
expSuccess: true,
expContent: "2\tline2\n3\tline3",
expTotal: 3,
expRead: 2,
expSize: int64(len("line1\nline2\nline3")),
},
{
name: "Limit1",
path: basicFilePath,
limit: 1,
expSuccess: true,
expContent: "1\tline1",
expTotal: 3,
expRead: 1,
expSize: int64(len("line1\nline2\nline3")),
},
{
name: "Offset2Limit1",
path: basicFilePath,
offset: 2,
limit: 1,
expSuccess: true,
expContent: "2\tline2",
expTotal: 3,
expRead: 1,
expSize: int64(len("line1\nline2\nline3")),
},
{
name: "OffsetBeyondFile",
path: basicFilePath,
offset: 100,
expError: "offset 100 is beyond the file length of 3 lines",
},
{
name: "LongLineTruncation",
path: longLineFilePath,
expSuccess: true,
expContent: "1\t" + string(bytes.Repeat([]byte("x"), 1024)) + "... [truncated]",
expTotal: 1,
expRead: 1,
expSize: 1025,
},
{
name: "LargeFile",
path: largeFilePath,
expError: "exceeds the maximum",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
w := httptest.NewRecorder()
r := httptest.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("/read-file-lines?path=%s&offset=%d&limit=%d", tt.path, tt.offset, tt.limit), nil)
api.Routes().ServeHTTP(w, r)
if tt.useCodersdk {
// Query param validation errors return codersdk.Response.
require.Equal(t, http.StatusBadRequest, w.Code)
require.Contains(t, w.Body.String(), tt.expError)
return
}
var resp agentfiles.ReadFileLinesResponse
err := json.NewDecoder(w.Body).Decode(&resp)
require.NoError(t, err)
if tt.expSuccess {
require.Equal(t, http.StatusOK, w.Code)
require.True(t, resp.Success)
require.Equal(t, tt.expContent, resp.Content)
require.Equal(t, tt.expTotal, resp.TotalLines)
require.Equal(t, tt.expRead, resp.LinesRead)
require.Equal(t, tt.expSize, resp.FileSize)
} else {
require.Equal(t, http.StatusOK, w.Code)
require.False(t, resp.Success)
require.Contains(t, resp.Error, tt.expError)
}
})
}
}