diff --git a/agent/agentfiles/api.go b/agent/agentfiles/api.go index b4535bfb11..51e27dfae6 100644 --- a/agent/agentfiles/api.go +++ b/agent/agentfiles/api.go @@ -29,6 +29,7 @@ func (api *API) Routes() http.Handler { r.Post("/list-directory", api.HandleLS) r.Get("/read-file", api.HandleReadFile) + r.Get("/read-file-lines", api.HandleReadFileLines) r.Post("/write-file", api.HandleWriteFile) r.Post("/edit-files", api.HandleEditFiles) diff --git a/agent/agentfiles/files.go b/agent/agentfiles/files.go index 86d073dfd1..e8474330ad 100644 --- a/agent/agentfiles/files.go +++ b/agent/agentfiles/files.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "syscall" "github.com/icholy/replace" @@ -23,6 +24,22 @@ import ( "github.com/coder/coder/v2/codersdk/workspacesdk" ) +// ReadFileLinesResponse is the JSON response for the line-based file reader. +type ReadFileLinesResponse struct { + // Success indicates whether the read was successful. + Success bool `json:"success"` + // FileSize is the original file size in bytes. + FileSize int64 `json:"file_size,omitempty"` + // TotalLines is the total number of lines in the file. + TotalLines int `json:"total_lines,omitempty"` + // LinesRead is the count of lines returned in this response. + LinesRead int `json:"lines_read,omitempty"` + // Content is the line-numbered file content. + Content string `json:"content,omitempty"` + // Error is the error message when success is false. + Error string `json:"error,omitempty"` +} + type HTTPResponseCode = int func (api *API) HandleReadFile(rw http.ResponseWriter, r *http.Request) { @@ -103,6 +120,166 @@ func (api *API) streamFile(ctx context.Context, rw http.ResponseWriter, path str return 0, nil } +func (api *API) HandleReadFileLines(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + query := r.URL.Query() + parser := httpapi.NewQueryParamParser().RequiredNotEmpty("path") + path := parser.String(query, "", "path") + offset := parser.PositiveInt64(query, 1, "offset") + limit := parser.PositiveInt64(query, 0, "limit") + maxFileSize := parser.PositiveInt64(query, workspacesdk.DefaultMaxFileSize, "max_file_size") + maxLineBytes := parser.PositiveInt64(query, workspacesdk.DefaultMaxLineBytes, "max_line_bytes") + maxResponseLines := parser.PositiveInt64(query, workspacesdk.DefaultMaxResponseLines, "max_response_lines") + maxResponseBytes := parser.PositiveInt64(query, workspacesdk.DefaultMaxResponseBytes, "max_response_bytes") + parser.ErrorExcessParams(query) + if len(parser.Errors) > 0 { + httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ + Message: "Query parameters have invalid values.", + Validations: parser.Errors, + }) + return + } + + resp := api.readFileLines(ctx, path, offset, limit, workspacesdk.ReadFileLinesLimits{ + MaxFileSize: maxFileSize, + MaxLineBytes: int(maxLineBytes), + MaxResponseLines: int(maxResponseLines), + MaxResponseBytes: int(maxResponseBytes), + }) + httpapi.Write(ctx, rw, http.StatusOK, resp) +} + +func (api *API) readFileLines(_ context.Context, path string, offset, limit int64, limits workspacesdk.ReadFileLinesLimits) ReadFileLinesResponse { + errResp := func(msg string) ReadFileLinesResponse { + return ReadFileLinesResponse{Success: false, Error: msg} + } + + if !filepath.IsAbs(path) { + return errResp(fmt.Sprintf("file path must be absolute: %q", path)) + } + + f, err := api.filesystem.Open(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return errResp(fmt.Sprintf("file does not exist: %s", path)) + } + if errors.Is(err, os.ErrPermission) { + return errResp(fmt.Sprintf("permission denied: %s", path)) + } + return errResp(fmt.Sprintf("open file: %s", err)) + } + defer f.Close() + + stat, err := f.Stat() + if err != nil { + return errResp(fmt.Sprintf("stat file: %s", err)) + } + + if stat.IsDir() { + return errResp(fmt.Sprintf("not a file: %s", path)) + } + + fileSize := stat.Size() + if fileSize > limits.MaxFileSize { + return errResp(fmt.Sprintf( + "file is %d bytes which exceeds the maximum of %d bytes. Use grep, sed, or awk to extract the content you need, or use offset and limit to read a portion.", + fileSize, limits.MaxFileSize, + )) + } + + // Read the entire file (up to MaxFileSize). + data, err := io.ReadAll(f) + if err != nil { + return errResp(fmt.Sprintf("read file: %s", err)) + } + + // Split into lines. + content := string(data) + // Handle empty file. + if content == "" { + return ReadFileLinesResponse{ + Success: true, + FileSize: fileSize, + TotalLines: 0, + LinesRead: 0, + Content: "", + } + } + + lines := strings.Split(content, "\n") + totalLines := len(lines) + + // offset is 1-based line number. + if offset < 1 { + offset = 1 + } + if offset > int64(totalLines) { + return errResp(fmt.Sprintf( + "offset %d is beyond the file length of %d lines", + offset, totalLines, + )) + } + + // Default limit. + if limit <= 0 { + limit = int64(limits.MaxResponseLines) + } + + startIdx := int(offset - 1) // convert to 0-based + endIdx := startIdx + int(limit) + if endIdx > totalLines { + endIdx = totalLines + } + + var numbered []string + totalBytesAccumulated := 0 + + for i := startIdx; i < endIdx; i++ { + line := lines[i] + + // Per-line truncation. + if len(line) > limits.MaxLineBytes { + line = line[:limits.MaxLineBytes] + "... [truncated]" + } + + // Format with 1-based line number. + numberedLine := fmt.Sprintf("%d\t%s", i+1, line) + lineBytes := len(numberedLine) + + // Check total byte budget. + newTotal := totalBytesAccumulated + lineBytes + if len(numbered) > 0 { + newTotal++ // account for \n joiner + } + if newTotal > limits.MaxResponseBytes { + return errResp(fmt.Sprintf( + "output would exceed %d bytes. Read less at a time using offset and limit parameters.", + limits.MaxResponseBytes, + )) + } + + // Check line count. + if len(numbered) >= limits.MaxResponseLines { + return errResp(fmt.Sprintf( + "output would exceed %d lines. Read less at a time using offset and limit parameters.", + limits.MaxResponseLines, + )) + } + + numbered = append(numbered, numberedLine) + totalBytesAccumulated = newTotal + } + + return ReadFileLinesResponse{ + Success: true, + FileSize: fileSize, + TotalLines: totalLines, + LinesRead: len(numbered), + Content: strings.Join(numbered, "\n"), + } +} + func (api *API) HandleWriteFile(rw http.ResponseWriter, r *http.Request) { ctx := r.Context() diff --git a/agent/agentfiles/files_test.go b/agent/agentfiles/files_test.go index 0038795ad8..bd179f69ba 100644 --- a/agent/agentfiles/files_test.go +++ b/agent/agentfiles/files_test.go @@ -737,3 +737,188 @@ func TestEditFiles(t *testing.T) { }) } } + +func TestReadFileLines(t *testing.T) { + t.Parallel() + + tmpdir := os.TempDir() + noPermsFilePath := filepath.Join(tmpdir, "no-perms-lines") + + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug) + fs := newTestFs(afero.NewMemMapFs(), func(call, file string) error { + if file == noPermsFilePath { + return os.ErrPermission + } + return nil + }) + api := agentfiles.NewAPI(logger, fs) + + dirPath := filepath.Join(tmpdir, "a-directory-lines") + err := fs.MkdirAll(dirPath, 0o755) + require.NoError(t, err) + + emptyFilePath := filepath.Join(tmpdir, "empty-file") + err = afero.WriteFile(fs, emptyFilePath, []byte(""), 0o644) + require.NoError(t, err) + + basicFilePath := filepath.Join(tmpdir, "basic-file") + err = afero.WriteFile(fs, basicFilePath, []byte("line1\nline2\nline3"), 0o644) + require.NoError(t, err) + + longLine := string(bytes.Repeat([]byte("x"), 1025)) + longLineFilePath := filepath.Join(tmpdir, "long-line-file") + err = afero.WriteFile(fs, longLineFilePath, []byte(longLine), 0o644) + require.NoError(t, err) + + largeFilePath := filepath.Join(tmpdir, "large-file") + err = afero.WriteFile(fs, largeFilePath, bytes.Repeat([]byte("x"), 1<<20+1), 0o644) + require.NoError(t, err) + + tests := []struct { + name string + path string + offset int64 + limit int64 + expSuccess bool + expError string + expContent string + expTotal int + expRead int + expSize int64 + // useCodersdk is set for cases where the handler returns + // codersdk.Response (query param validation) instead of ReadFileLinesResponse. + useCodersdk bool + }{ + { + name: "NoPath", + path: "", + useCodersdk: true, + expError: "is required", + }, + { + name: "RelativePath", + path: "relative/path", + expError: "file path must be absolute", + }, + { + name: "NonExistent", + path: filepath.Join(tmpdir, "does-not-exist"), + expError: "file does not exist", + }, + { + name: "IsDir", + path: dirPath, + expError: "not a file", + }, + { + name: "NoPermissions", + path: noPermsFilePath, + expError: "permission denied", + }, + { + name: "EmptyFile", + path: emptyFilePath, + expSuccess: true, + expTotal: 0, + expRead: 0, + expSize: 0, + }, + { + name: "BasicRead", + path: basicFilePath, + expSuccess: true, + expContent: "1\tline1\n2\tline2\n3\tline3", + expTotal: 3, + expRead: 3, + expSize: int64(len("line1\nline2\nline3")), + }, + { + name: "Offset2", + path: basicFilePath, + offset: 2, + expSuccess: true, + expContent: "2\tline2\n3\tline3", + expTotal: 3, + expRead: 2, + expSize: int64(len("line1\nline2\nline3")), + }, + { + name: "Limit1", + path: basicFilePath, + limit: 1, + expSuccess: true, + expContent: "1\tline1", + expTotal: 3, + expRead: 1, + expSize: int64(len("line1\nline2\nline3")), + }, + { + name: "Offset2Limit1", + path: basicFilePath, + offset: 2, + limit: 1, + expSuccess: true, + expContent: "2\tline2", + expTotal: 3, + expRead: 1, + expSize: int64(len("line1\nline2\nline3")), + }, + { + name: "OffsetBeyondFile", + path: basicFilePath, + offset: 100, + expError: "offset 100 is beyond the file length of 3 lines", + }, + { + name: "LongLineTruncation", + path: longLineFilePath, + expSuccess: true, + expContent: "1\t" + string(bytes.Repeat([]byte("x"), 1024)) + "... [truncated]", + expTotal: 1, + expRead: 1, + expSize: 1025, + }, + { + name: "LargeFile", + path: largeFilePath, + expError: "exceeds the maximum", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + w := httptest.NewRecorder() + r := httptest.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("/read-file-lines?path=%s&offset=%d&limit=%d", tt.path, tt.offset, tt.limit), nil) + api.Routes().ServeHTTP(w, r) + + if tt.useCodersdk { + // Query param validation errors return codersdk.Response. + require.Equal(t, http.StatusBadRequest, w.Code) + require.Contains(t, w.Body.String(), tt.expError) + return + } + + var resp agentfiles.ReadFileLinesResponse + err := json.NewDecoder(w.Body).Decode(&resp) + require.NoError(t, err) + + if tt.expSuccess { + require.Equal(t, http.StatusOK, w.Code) + require.True(t, resp.Success) + require.Equal(t, tt.expContent, resp.Content) + require.Equal(t, tt.expTotal, resp.TotalLines) + require.Equal(t, tt.expRead, resp.LinesRead) + require.Equal(t, tt.expSize, resp.FileSize) + } else { + require.Equal(t, http.StatusOK, w.Code) + require.False(t, resp.Success) + require.Contains(t, resp.Error, tt.expError) + } + }) + } +} diff --git a/coderd/chatd/chattool/readfile.go b/coderd/chatd/chattool/readfile.go index 6144016921..2a70566879 100644 --- a/coderd/chatd/chattool/readfile.go +++ b/coderd/chatd/chattool/readfile.go @@ -2,7 +2,6 @@ package chattool import ( "context" - "io" "charm.land/fantasy" @@ -22,7 +21,10 @@ type ReadFileArgs struct { func ReadFile(options ReadFileOptions) fantasy.AgentTool { return fantasy.NewAgentTool( "read_file", - "Read a file from the workspace.", + "Read a file from the workspace. Returns line-numbered content. "+ + "The offset parameter is a 1-based line number (default: 1). "+ + "The limit parameter is the number of lines to return (default: 2000). "+ + "For large files, use offset and limit to paginate.", func(ctx context.Context, args ReadFileArgs, _ fantasy.ToolCall) (fantasy.ToolResponse, error) { if options.GetWorkspaceConn == nil { return fantasy.NewTextErrorResponse("workspace connection resolver is not configured"), nil @@ -45,8 +47,8 @@ func executeReadFileTool( return fantasy.NewTextErrorResponse("path is required"), nil } - offset := int64(0) - limit := int64(0) + offset := int64(1) // 1-based line number default + limit := int64(0) // 0 means use server default (2000) if args.Offset != nil { offset = *args.Offset } @@ -54,19 +56,19 @@ func executeReadFileTool( limit = *args.Limit } - reader, mimeType, err := conn.ReadFile(ctx, args.Path, offset, limit) + resp, err := conn.ReadFileLines(ctx, args.Path, offset, limit, workspacesdk.DefaultReadFileLinesLimits()) if err != nil { return fantasy.NewTextErrorResponse(err.Error()), nil } - defer reader.Close() - data, err := io.ReadAll(reader) - if err != nil { - return fantasy.NewTextErrorResponse(err.Error()), nil + if !resp.Success { + return fantasy.NewTextErrorResponse(resp.Error), nil } return toolResponse(map[string]any{ - "content": string(data), - "mime_type": mimeType, + "content": resp.Content, + "file_size": resp.FileSize, + "total_lines": resp.TotalLines, + "lines_read": resp.LinesRead, }), nil } diff --git a/codersdk/workspacesdk/agentconn.go b/codersdk/workspacesdk/agentconn.go index 01fc7b98e8..471c158747 100644 --- a/codersdk/workspacesdk/agentconn.go +++ b/codersdk/workspacesdk/agentconn.go @@ -63,6 +63,7 @@ type AgentConn interface { RecreateDevcontainer(ctx context.Context, devcontainerID string) (codersdk.Response, error) LS(ctx context.Context, path string, req LSRequest) (LSResponse, error) ReadFile(ctx context.Context, path string, offset, limit int64) (io.ReadCloser, string, error) + ReadFileLines(ctx context.Context, path string, offset, limit int64, limits ReadFileLinesLimits) (ReadFileLinesResponse, error) WriteFile(ctx context.Context, path string, reader io.Reader) error EditFiles(ctx context.Context, edits FileEditRequest) error SSH(ctx context.Context) (*gonet.TCPConn, error) @@ -551,6 +552,31 @@ func (c *agentConn) LS(ctx context.Context, path string, req LSRequest) (LSRespo return m, nil } +// ReadFileLines reads a file with line-based offset and limit, returning +// line-numbered content with safety limits. +func (c *agentConn) ReadFileLines(ctx context.Context, path string, offset, limit int64, limits ReadFileLinesLimits) (ReadFileLinesResponse, error) { + ctx, span := tracing.StartSpan(ctx) + defer span.End() + + res, err := c.apiRequest(ctx, http.MethodGet, fmt.Sprintf( + "/api/v0/read-file-lines?path=%s&offset=%d&limit=%d&max_file_size=%d&max_line_bytes=%d&max_response_lines=%d&max_response_bytes=%d", + path, offset, limit, limits.MaxFileSize, limits.MaxLineBytes, limits.MaxResponseLines, limits.MaxResponseBytes, + ), nil) + if err != nil { + return ReadFileLinesResponse{}, xerrors.Errorf("do request: %w", err) + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return ReadFileLinesResponse{}, codersdk.ReadBodyAsError(res) + } + + var resp ReadFileLinesResponse + if err := json.NewDecoder(res.Body).Decode(&resp); err != nil { + return ReadFileLinesResponse{}, xerrors.Errorf("decode response: %w", err) + } + return resp, nil +} + // ReadFile reads from a file from the workspace, returning a file reader and // the mime type. func (c *agentConn) ReadFile(ctx context.Context, path string, offset, limit int64) (io.ReadCloser, string, error) { @@ -596,6 +622,51 @@ func (c *agentConn) WriteFile(ctx context.Context, path string, reader io.Reader return nil } +// ReadFileLinesResponse is the response from the line-based file reader. +type ReadFileLinesResponse struct { + Success bool `json:"success"` + FileSize int64 `json:"file_size,omitempty"` + TotalLines int `json:"total_lines,omitempty"` + LinesRead int `json:"lines_read,omitempty"` + Content string `json:"content,omitempty"` + Error string `json:"error,omitempty"` +} + +// ReadFileLinesLimits contains configurable safety limits for the line-based +// file reader. These are sent as query parameters so callers can tune them +// without requiring an agent redeployment. +type ReadFileLinesLimits struct { + // MaxFileSize is the maximum file size (in bytes) that will be opened. + MaxFileSize int64 + // MaxLineBytes is the per-line byte cap before truncation. + MaxLineBytes int + // MaxResponseLines is the maximum number of lines in a single response. + MaxResponseLines int + // MaxResponseBytes is the maximum total bytes of formatted output. + MaxResponseBytes int +} + +const ( + // DefaultMaxFileSize is the default maximum file size (1 MB). + DefaultMaxFileSize int64 = 1 << 20 + // DefaultMaxLineBytes is the default per-line truncation threshold. + DefaultMaxLineBytes int64 = 1024 + // DefaultMaxResponseLines is the default max lines per response. + DefaultMaxResponseLines int64 = 2000 + // DefaultMaxResponseBytes is the default max response size (32 KB). + DefaultMaxResponseBytes int64 = 32768 +) + +// DefaultReadFileLinesLimits returns the default limits. +func DefaultReadFileLinesLimits() ReadFileLinesLimits { + return ReadFileLinesLimits{ + MaxFileSize: DefaultMaxFileSize, + MaxLineBytes: int(DefaultMaxLineBytes), + MaxResponseLines: int(DefaultMaxResponseLines), + MaxResponseBytes: int(DefaultMaxResponseBytes), + } +} + type FileEdit struct { Search string `json:"search"` Replace string `json:"replace"` diff --git a/codersdk/workspacesdk/agentconnmock/agentconnmock.go b/codersdk/workspacesdk/agentconnmock/agentconnmock.go index 962522035f..50cb84ce76 100644 --- a/codersdk/workspacesdk/agentconnmock/agentconnmock.go +++ b/codersdk/workspacesdk/agentconnmock/agentconnmock.go @@ -291,6 +291,21 @@ func (mr *MockAgentConnMockRecorder) ReadFile(ctx, path, offset, limit any) *gom return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReadFile", reflect.TypeOf((*MockAgentConn)(nil).ReadFile), ctx, path, offset, limit) } +// ReadFileLines mocks base method. +func (m *MockAgentConn) ReadFileLines(ctx context.Context, path string, offset, limit int64, limits workspacesdk.ReadFileLinesLimits) (workspacesdk.ReadFileLinesResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ReadFileLines", ctx, path, offset, limit, limits) + ret0, _ := ret[0].(workspacesdk.ReadFileLinesResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ReadFileLines indicates an expected call of ReadFileLines. +func (mr *MockAgentConnMockRecorder) ReadFileLines(ctx, path, offset, limit, limits any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReadFileLines", reflect.TypeOf((*MockAgentConn)(nil).ReadFileLines), ctx, path, offset, limit, limits) +} + // ReconnectingPTY mocks base method. func (m *MockAgentConn) ReconnectingPTY(ctx context.Context, id uuid.UUID, height, width uint16, command string, initOpts ...workspacesdk.AgentReconnectingPTYInitOption) (net.Conn, error) { m.ctrl.T.Helper()