mirror of
https://github.com/coder/coder.git
synced 2026-06-03 13:08:25 +00:00
b65c0766d2
## Summary Adds a new line-based file reading endpoint to the workspace agent, replacing the unbounded byte-based approach for the `read_file` chat tool and `coder_workspace_read_file` MCP tool. **Problem**: The current `read_file` tool returns the entire file contents with no limits, which can blow up LLM context windows and cause OOM issues with large files. **Solution**: Inspired by [`coder/mux`](https://github.com/coder/mux) and [`openai/codex`](https://github.com/openai/codex), implement a line-based reader with safety limits. ## Changes ### Agent (`agent/agentfiles/`) - New `/read-file-lines` endpoint with `HandleReadFileLines` handler - Line-based `offset` (1-based line number, default: 1) and `limit` (line count, default: 2000) - Safety constants: | Constant | Value | Purpose | |---|---|---| | `MaxFileSize` | 1 MB | Reject files larger than this at stat | | `MaxLineBytes` | 1,024 | Per-line truncation with `... [truncated]` marker | | `MaxResponseLines` | 2,000 | Max lines per response | | `MaxResponseBytes` | 32 KB | Max total response size | | `DefaultLineLimit` | 2,000 | Default when no limit specified | - Line numbering format: `1\tcontent` (tab-separated) - Structured JSON response: `{ success, file_size, total_lines, lines_read, content, error }` - Hard errors when limits exceeded — tells the LLM to use `offset`/`limit` - Existing byte-based `/read-file` endpoint preserved (used by `instruction.go`) ### SDK (`codersdk/workspacesdk/`) - `ReadFileLinesResponse` type added - `ReadFileLines` method added to `AgentConn` interface - Mock regenerated ### Chat tool (`coderd/chatd/chattool/`) - `read_file` tool now uses `conn.ReadFileLines()` instead of `conn.ReadFile()` - Updated tool description to document line-based parameters - Response includes `file_size`, `total_lines`, `lines_read` metadata ### MCP tool (`codersdk/toolsdk/`) - `coder_workspace_read_file` updated to use line-based reading - Schema descriptions updated for line-based offset/limit - Removed `maxFileLimit` constant (agent handles limits now) ### Tests - 13 new test cases for `TestReadFileLines`: - Path validation (empty, relative, non-existent, directory, no permissions) - Empty file handling - Basic read, offset, limit, offset+limit combinations - Offset beyond file length - Long line truncation (>1024 bytes) - Large file rejection (>1MB) - All existing tests pass unchanged ## Design decisions | Decision | Rationale | |---|---| | Line-based, not byte-based | Both coder/mux and openai/codex use line-based — matches how LLMs reason about code | | Default limit of 2000 | Matches codex; prevents accidental full-file dumps while being generous | | 32 KB response cap | Compromise between mux (16 KB) and codex (no cap) | | 1024 byte/line truncation with marker | More generous than codex (500), marker helps LLM know data is missing | | Hard errors on overflow | Matches mux; forces LLM to paginate rather than getting partial data | | Preserve byte-based endpoint | `instruction.go` needs raw byte access for AGENTS.md |
75 lines
2.0 KiB
Go
75 lines
2.0 KiB
Go
package chattool
|
|
|
|
import (
|
|
"context"
|
|
|
|
"charm.land/fantasy"
|
|
|
|
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
|
)
|
|
|
|
type ReadFileOptions struct {
|
|
GetWorkspaceConn func(context.Context) (workspacesdk.AgentConn, error)
|
|
}
|
|
|
|
type ReadFileArgs struct {
|
|
Path string `json:"path"`
|
|
Offset *int64 `json:"offset,omitempty"`
|
|
Limit *int64 `json:"limit,omitempty"`
|
|
}
|
|
|
|
func ReadFile(options ReadFileOptions) fantasy.AgentTool {
|
|
return fantasy.NewAgentTool(
|
|
"read_file",
|
|
"Read a file from the workspace. Returns line-numbered content. "+
|
|
"The offset parameter is a 1-based line number (default: 1). "+
|
|
"The limit parameter is the number of lines to return (default: 2000). "+
|
|
"For large files, use offset and limit to paginate.",
|
|
func(ctx context.Context, args ReadFileArgs, _ fantasy.ToolCall) (fantasy.ToolResponse, error) {
|
|
if options.GetWorkspaceConn == nil {
|
|
return fantasy.NewTextErrorResponse("workspace connection resolver is not configured"), nil
|
|
}
|
|
conn, err := options.GetWorkspaceConn(ctx)
|
|
if err != nil {
|
|
return fantasy.NewTextErrorResponse(err.Error()), nil
|
|
}
|
|
return executeReadFileTool(ctx, conn, args)
|
|
},
|
|
)
|
|
}
|
|
|
|
func executeReadFileTool(
|
|
ctx context.Context,
|
|
conn workspacesdk.AgentConn,
|
|
args ReadFileArgs,
|
|
) (fantasy.ToolResponse, error) {
|
|
if args.Path == "" {
|
|
return fantasy.NewTextErrorResponse("path is required"), nil
|
|
}
|
|
|
|
offset := int64(1) // 1-based line number default
|
|
limit := int64(0) // 0 means use server default (2000)
|
|
if args.Offset != nil {
|
|
offset = *args.Offset
|
|
}
|
|
if args.Limit != nil {
|
|
limit = *args.Limit
|
|
}
|
|
|
|
resp, err := conn.ReadFileLines(ctx, args.Path, offset, limit, workspacesdk.DefaultReadFileLinesLimits())
|
|
if err != nil {
|
|
return fantasy.NewTextErrorResponse(err.Error()), nil
|
|
}
|
|
|
|
if !resp.Success {
|
|
return fantasy.NewTextErrorResponse(resp.Error), nil
|
|
}
|
|
|
|
return toolResponse(map[string]any{
|
|
"content": resp.Content,
|
|
"file_size": resp.FileSize,
|
|
"total_lines": resp.TotalLines,
|
|
"lines_read": resp.LinesRead,
|
|
}), nil
|
|
}
|