mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
cc4e04afde
Renders the durable file attachments introduced in #24280 in the chat interface. Without this, attachments were stored and served correctly but the UI showed raw file parts with no previews or download UX. Every attachment gets a download affordance, split into three rendering tiers: - **Images** — thumbnail with a hover/focus overlay containing a download link. `onFocusCapture`/`onBlurCapture` with `contains(relatedTarget)` keeps the overlay open while tabbing between the image and its download link. - **Text-like files** (`text/*`, `application/json`) — expandable preview button with loading + error-with-retry states and the same download overlay. Preview fetches throw a typed `FetchTextAttachmentError` with a `.status` field instead of a stringly-typed error. - **Everything else** — compact `FileCard` with extension badge, filename, and download link. User-side and assistant-side rendering now share `AttachmentBlocks.tsx` (`AttachmentPreviewFrame`, `TextAttachmentButton`, `ImageAttachmentButton`, `FileCard`, plus `getAttachmentHref`/`getAttachmentName`) instead of two near-duplicate implementations. The text-attachment overlay anchors to the preview surface so the download button stays pinned even when a loading/error status line widens the row below. `ComputerRenderer` detects when a screenshot was stored as a durable attachment (`attachment_file_id`) and suppresses the stale base64 rendering — the screenshot appears as a proper file part instead. `ToolLabel` shows the attached filename for `attach_file` tool calls. Storybook coverage in `ConversationTimeline.stories.tsx` was expanded to cover every tier (single/multiple images, inline + file-id text, JSON, download-only files, fetch-failure retry, mixed attachments + file references) with play-function assertions. <img width="811" height="150" alt="image" src="https://github.com/user-attachments/assets/27c71081-3502-4e80-92a7-d8adf1ff9323" /> ## Cleanup Per Mathias' post-merge suggestion on #24280, this PR also relocates `coderd/chatfiles` → `coderd/x/chatfiles` so the durable-attachment helpers live beside the rest of the `chatd` experimental surface. Closes CODAGT-91
259 lines
7.0 KiB
Go
259 lines
7.0 KiB
Go
package chatd
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"mime"
|
|
"mime/multipart"
|
|
|
|
"github.com/google/uuid"
|
|
|
|
"cdr.dev/slog/v3"
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
"github.com/coder/coder/v2/coderd/database/dbauthz"
|
|
"github.com/coder/coder/v2/coderd/x/chatd/chattool"
|
|
"github.com/coder/coder/v2/coderd/x/chatfiles"
|
|
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
|
)
|
|
|
|
type recordingResult struct {
|
|
recordingFileID string
|
|
thumbnailFileID string
|
|
}
|
|
|
|
// stopAndStoreRecording stops the desktop recording, downloads the
|
|
// multipart response containing the MP4 and optional thumbnail, and
|
|
// stores them in chat_files. Only called when the subagent completed
|
|
// successfully. Returns file IDs on success, empty fields on any
|
|
// failure. All errors are logged but not propagated; recording is
|
|
// best-effort.
|
|
func (p *Server) stopAndStoreRecording(
|
|
ctx context.Context,
|
|
conn workspacesdk.AgentConn,
|
|
recordingID string,
|
|
parentChatID uuid.UUID,
|
|
ownerID uuid.UUID,
|
|
workspaceID uuid.NullUUID,
|
|
) recordingResult {
|
|
var result recordingResult
|
|
|
|
workspaceIDValue := ""
|
|
if workspaceID.Valid {
|
|
workspaceIDValue = workspaceID.UUID.String()
|
|
}
|
|
recordingWarnFields := []slog.Field{
|
|
slog.F("recording_id", recordingID),
|
|
slog.F("parent_chat_id", parentChatID.String()),
|
|
slog.F("workspace_id", workspaceIDValue),
|
|
}
|
|
warn := func(msg string, fields ...slog.Field) {
|
|
allFields := make([]slog.Field, 0, len(recordingWarnFields)+len(fields))
|
|
allFields = append(allFields, recordingWarnFields...)
|
|
allFields = append(allFields, fields...)
|
|
p.logger.Warn(ctx, msg, allFields...)
|
|
}
|
|
|
|
select {
|
|
case p.recordingSem <- struct{}{}:
|
|
defer func() { <-p.recordingSem }()
|
|
case <-ctx.Done():
|
|
warn("context canceled waiting for recording semaphore", slog.Error(ctx.Err()))
|
|
return result
|
|
}
|
|
|
|
resp, err := conn.StopDesktopRecording(ctx,
|
|
workspacesdk.StopDesktopRecordingRequest{RecordingID: recordingID})
|
|
if err != nil {
|
|
warn("failed to stop desktop recording",
|
|
slog.Error(err))
|
|
return result
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
_, params, err := mime.ParseMediaType(resp.ContentType)
|
|
if err != nil {
|
|
warn("failed to parse content type from recording response",
|
|
slog.F("content_type", resp.ContentType),
|
|
slog.Error(err))
|
|
return result
|
|
}
|
|
boundary := params["boundary"]
|
|
if boundary == "" {
|
|
warn("missing boundary in recording response content type",
|
|
slog.F("content_type", resp.ContentType))
|
|
return result
|
|
}
|
|
|
|
if !workspaceID.Valid {
|
|
warn("chat has no workspace, cannot store recording")
|
|
return result
|
|
}
|
|
|
|
// The chatd actor is used here because the recording is stored on
|
|
// behalf of the chat system, not a specific user request.
|
|
//nolint:gocritic // AsChatd is required to read the workspace for org lookup.
|
|
chatdCtx := dbauthz.AsChatd(ctx)
|
|
ws, err := p.db.GetWorkspaceByID(chatdCtx, workspaceID.UUID)
|
|
if err != nil {
|
|
warn("failed to resolve workspace for recording",
|
|
slog.Error(err))
|
|
return result
|
|
}
|
|
|
|
mr := multipart.NewReader(resp.Body, boundary)
|
|
// Context cancellation is checked between parts. Within a
|
|
// part read, cancellation relies on Go's HTTP transport closing
|
|
// the underlying connection when the context is done, which
|
|
// interrupts the blocked io.ReadAll.
|
|
// First pass: parse all multipart parts into memory.
|
|
// The agent sends at most two parts: one video/mp4 and one
|
|
// optional image/jpeg thumbnail. Cap the number of parts to
|
|
// prevent a malicious or broken agent from forcing the server
|
|
// into an unbounded parsing loop.
|
|
const maxParts = 2
|
|
var videoData, thumbnailData []byte
|
|
for range maxParts {
|
|
if ctx.Err() != nil {
|
|
warn("context canceled while reading recording parts", slog.Error(ctx.Err()))
|
|
break
|
|
}
|
|
|
|
part, err := mr.NextPart()
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
}
|
|
if err != nil {
|
|
warn("error reading next multipart part", slog.Error(err))
|
|
break
|
|
}
|
|
|
|
contentType := part.Header.Get("Content-Type")
|
|
|
|
// Select the read limit based on content type so that
|
|
// thumbnails (image/jpeg) do not allocate up to
|
|
// MaxRecordingSize (100 MB) before the size check rejects
|
|
// them. Unknown types use a small default since they are
|
|
// discarded below.
|
|
maxSize := int64(1 << 20) // 1 MB default for unknown types
|
|
switch contentType {
|
|
case "video/mp4":
|
|
maxSize = int64(workspacesdk.MaxRecordingSize)
|
|
case "image/jpeg":
|
|
maxSize = int64(workspacesdk.MaxThumbnailSize)
|
|
}
|
|
|
|
data, err := io.ReadAll(io.LimitReader(part, maxSize+1))
|
|
if err != nil {
|
|
warn("failed to read recording part data",
|
|
slog.F("content_type", contentType),
|
|
slog.Error(err))
|
|
continue
|
|
}
|
|
if int64(len(data)) > maxSize {
|
|
warn("recording part exceeds maximum size, skipping",
|
|
slog.F("content_type", contentType),
|
|
slog.F("size", len(data)),
|
|
slog.F("max_size", maxSize))
|
|
continue
|
|
}
|
|
if len(data) == 0 {
|
|
warn("recording part is empty, skipping",
|
|
slog.F("content_type", contentType))
|
|
continue
|
|
}
|
|
|
|
switch contentType {
|
|
case "video/mp4":
|
|
if videoData != nil {
|
|
warn("duplicate video/mp4 part in recording response, skipping")
|
|
continue
|
|
}
|
|
videoData = data
|
|
case "image/jpeg":
|
|
if thumbnailData != nil {
|
|
warn("duplicate image/jpeg part in recording response, skipping")
|
|
continue
|
|
}
|
|
thumbnailData = data
|
|
default:
|
|
p.logger.Debug(ctx, "skipping unknown part content type",
|
|
slog.F("content_type", contentType))
|
|
}
|
|
}
|
|
|
|
// Second pass: store the collected data in the database.
|
|
if videoData != nil {
|
|
attachment, err := p.storeRecordingArtifact(
|
|
chatdCtx,
|
|
parentChatID,
|
|
ownerID,
|
|
ws.OrganizationID,
|
|
fmt.Sprintf("recording-%s.mp4", p.clock.Now().UTC().Format("2006-01-02T15-04-05Z")),
|
|
"video/mp4",
|
|
videoData,
|
|
)
|
|
if err != nil {
|
|
warn("failed to store recording in database",
|
|
slog.Error(err))
|
|
} else {
|
|
result.recordingFileID = attachment.FileID.String()
|
|
}
|
|
}
|
|
if thumbnailData != nil && result.recordingFileID != "" {
|
|
attachment, err := p.storeRecordingArtifact(
|
|
chatdCtx,
|
|
parentChatID,
|
|
ownerID,
|
|
ws.OrganizationID,
|
|
fmt.Sprintf("thumbnail-%s.jpg", p.clock.Now().UTC().Format("2006-01-02T15-04-05Z")),
|
|
"image/jpeg",
|
|
thumbnailData,
|
|
)
|
|
if err != nil {
|
|
warn("failed to store thumbnail in database",
|
|
slog.Error(err))
|
|
} else {
|
|
result.thumbnailFileID = attachment.FileID.String()
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func (p *Server) storeRecordingArtifact(
|
|
ctx context.Context,
|
|
chatID uuid.UUID,
|
|
ownerID uuid.UUID,
|
|
organizationID uuid.UUID,
|
|
name string,
|
|
mediaType string,
|
|
data []byte,
|
|
) (chattool.AttachmentMetadata, error) {
|
|
storedName, verifiedMediaType, err := chatfiles.PrepareRecordingArtifact(name, mediaType, data)
|
|
if err != nil {
|
|
return chattool.AttachmentMetadata{}, err
|
|
}
|
|
|
|
var attachment chattool.AttachmentMetadata
|
|
err = p.db.InTx(func(tx database.Store) error {
|
|
var err error
|
|
attachment, err = storeLinkedChatFileTx(
|
|
ctx,
|
|
tx,
|
|
chatID,
|
|
ownerID,
|
|
organizationID,
|
|
storedName,
|
|
verifiedMediaType,
|
|
data,
|
|
)
|
|
return err
|
|
}, database.DefaultTXOptions().WithID("store_recording_artifact"))
|
|
if err != nil {
|
|
return chattool.AttachmentMetadata{}, err
|
|
}
|
|
return attachment, nil
|
|
}
|