feat: record created_at/completed_at on reasoning ChatMessageParts (#24789)

Records reasoning start and end times on persisted reasoning
`ChatMessagePart`s so reasoning duration can be computed for stored
chats. Backend-only: no SSE changes and no frontend rendering ship in
this PR.

The `created_at` field on `ChatMessagePart` is extended to also be
present on `reasoning` parts (it previously appeared only on `tool-call`
and `tool-result`), and a new `completed_at` field is added for
`reasoning` parts.

### How timestamps are recorded

- `StreamPartTypeReasoningStart`: stamp `startedAt = dbtime.Now()` on
the active reasoning state.
- `StreamPartTypeReasoningEnd`: stamp `completedAt = dbtime.Now()` and
append both into parallel `[]time.Time` slices on `stepResult`.
- Persistence reads the slices in occurrence order (reasoning has no
provider-side ID) and applies them to the matching `ChatMessagePart` via
`buildAssistantPartsForPersist`. The first reasoning block's stamps go
onto the first reasoning part, and so on.
- `flushActiveState` flushes partial reasoning interrupted before
`StreamPartTypeReasoningEnd` with `startedAt` from the active state and
`completedAt = dbtime.Now()` at the interruption.

### Why two fields, not one?

Tool calls and results are point events. The frontend computes their
duration by subtracting the call's `created_at` from the result's
`created_at`. Reasoning is one assistant part that brackets a span, so
we record both endpoints on the part itself.

### Why not stamp in `PartFromContent`?

Same rationale as #24101: `PartFromContent` is called during both SSE
publishing and persistence. Stamping there would yield incorrect
persistence-time timestamps for reasoning blocks that finished much
earlier in the step. Instead we capture in the chatloop and apply during
persistence.

<details><summary>Implementation plan</summary>

- `codersdk/chats.go`: extend `CreatedAt`'s `variants` to include
`reasoning?`; add `CompletedAt *time.Time` with `variants:"reasoning?"`.
- `coderd/x/chatd/chatloop/chatloop.go`: extend `reasoningState` with
`startedAt`; extend `stepResult` and `PersistedStep` with parallel
`[]time.Time` reasoning slices; stamp on
`ReasoningStart`/`ReasoningEnd`; thread the slices through all
`PersistStep` call sites including the interrupt-safe path; record
partial reasoning in `flushActiveState`.
- `coderd/x/chatd/attachments.go`: walk reasoning parts in occurrence
order and apply `step.ReasoningStartedAt[i]` to `part.CreatedAt` and
`step.ReasoningCompletedAt[i]` to `part.CompletedAt`.

### Tests

- `codersdk/chats_test.go` round-trips `created_at` + `completed_at` on
reasoning parts and verifies omission when absent and partial
interrupted parts.
- `coderd/x/chatd/chatprompt/chatprompt_test.go` asserts
`PartFromContent(ReasoningContent{})` does NOT stamp timestamps.
- `coderd/x/chatd/chatloop/chatloop_test.go`
`TestRun_ReasoningTimestamps` drives a stream with two reasoning blocks
and verifies parallel slices, monotonicity, ordering, non-zero values,
and content-block ordering.
`TestRun_InterruptedReasoningFlushesTimestamps` cancels mid-reasoning
and verifies `flushActiveState` records a non-zero pair.
- `coderd/x/chatd/attachments_test.go` covers
`buildAssistantPartsForPersist` for normal interleaved reasoning,
partial (zero `completed_at`), and missing slices.

</details>

> Generated by Coder Agents.

Co-authored-by: Coder Agent <agent@coder.com>
This commit is contained in:
Kyle Carberry
2026-05-18 12:30:30 -04:00
committed by GitHub
parent 46821525f7
commit 385146000b
12 changed files with 661 additions and 179 deletions
+18
View File
@@ -22,6 +22,11 @@ func buildAssistantPartsForPersist(
toolNameToConfigID map[string]uuid.UUID,
) []codersdk.ChatMessagePart {
parts := make([]codersdk.ChatMessagePart, 0, len(assistantBlocks)+len(toolResults))
// reasoningIdx walks reasoning blocks in occurrence order so we
// can apply the matching ReasoningStartedAt/ReasoningCompletedAt
// entry from step onto each reasoning part's CreatedAt and
// CompletedAt.
reasoningIdx := 0
for _, block := range assistantBlocks {
part := chatprompt.PartFromContentWithLogger(ctx, logger, block)
if part.ToolName != "" {
@@ -39,6 +44,19 @@ func buildAssistantPartsForPersist(
part.CreatedAt = &ts
}
}
if part.Type == codersdk.ChatMessagePartTypeReasoning {
if reasoningIdx < len(step.ReasoningStartedAt) {
if ts := step.ReasoningStartedAt[reasoningIdx]; !ts.IsZero() {
part.CreatedAt = &ts
}
}
if reasoningIdx < len(step.ReasoningCompletedAt) {
if ts := step.ReasoningCompletedAt[reasoningIdx]; !ts.IsZero() {
part.CompletedAt = &ts
}
}
reasoningIdx++
}
parts = append(parts, part)
}
for _, tr := range toolResults {
+111
View File
@@ -134,3 +134,114 @@ func TestBuildAssistantPartsForPersist_InvalidAttachmentMetadataSkipsOnlyBrokenR
require.Equal(t, "image/png", parts[1].MediaType)
require.Equal(t, "good.png", parts[1].Name)
}
func TestBuildAssistantPartsForPersist_AppliesReasoningTimestamps(t *testing.T) {
t.Parallel()
startedAt1 := time.Date(2026, time.April, 10, 12, 0, 0, 0, time.UTC)
completedAt1 := startedAt1.Add(500 * time.Millisecond)
startedAt2 := completedAt1.Add(time.Second)
completedAt2 := startedAt2.Add(750 * time.Millisecond)
// Interleave reasoning blocks with a text block to confirm the
// index walks reasoning content in occurrence order without
// being thrown off by non-reasoning entries.
parts := buildAssistantPartsForPersist(
context.Background(),
testutil.Logger(t),
[]fantasy.Content{
fantasy.ReasoningContent{Text: "first thought"},
fantasy.TextContent{Text: "intermission"},
fantasy.ReasoningContent{Text: "second thought"},
},
nil,
chatloop.PersistedStep{
ReasoningStartedAt: []time.Time{startedAt1, startedAt2},
ReasoningCompletedAt: []time.Time{completedAt1, completedAt2},
},
nil,
)
require.Len(t, parts, 3)
require.Equal(t, codersdk.ChatMessagePartTypeReasoning, parts[0].Type)
require.Equal(t, "first thought", parts[0].Text)
require.NotNil(t, parts[0].CreatedAt)
require.True(t, parts[0].CreatedAt.Equal(startedAt1),
"first reasoning part must use ReasoningStartedAt[0]")
require.NotNil(t, parts[0].CompletedAt)
require.True(t, parts[0].CompletedAt.Equal(completedAt1),
"first reasoning part must use ReasoningCompletedAt[0]")
require.Equal(t, codersdk.ChatMessagePartTypeText, parts[1].Type)
require.Nil(t, parts[1].CreatedAt,
"text part must not inherit reasoning timestamps")
require.Nil(t, parts[1].CompletedAt)
require.Equal(t, codersdk.ChatMessagePartTypeReasoning, parts[2].Type)
require.Equal(t, "second thought", parts[2].Text)
require.NotNil(t, parts[2].CreatedAt)
require.True(t, parts[2].CreatedAt.Equal(startedAt2),
"second reasoning part must use ReasoningStartedAt[1]")
require.NotNil(t, parts[2].CompletedAt)
require.True(t, parts[2].CompletedAt.Equal(completedAt2),
"second reasoning part must use ReasoningCompletedAt[1]")
}
func TestBuildAssistantPartsForPersist_PartialReasoningTimestamps(t *testing.T) {
t.Parallel()
startedAt := time.Date(2026, time.April, 10, 12, 0, 0, 0, time.UTC)
// Tests the persistence helper when the parallel CompletedAt
// slot is zero-valued, ensuring it leaves CompletedAt nil rather
// than setting it to the Go zero time. No production code path
// currently emits a zero CompletedAt alongside a non-zero
// StartedAt (flushActiveState always stamps both with
// dbtime.Now()), so this is a defensive boundary test for the
// `variants:"reasoning?"` contract.
parts := buildAssistantPartsForPersist(
context.Background(),
testutil.Logger(t),
[]fantasy.Content{
fantasy.ReasoningContent{Text: "incomplete thought"},
},
nil,
chatloop.PersistedStep{
ReasoningStartedAt: []time.Time{startedAt},
ReasoningCompletedAt: []time.Time{{}},
},
nil,
)
require.Len(t, parts, 1)
require.Equal(t, codersdk.ChatMessagePartTypeReasoning, parts[0].Type)
require.NotNil(t, parts[0].CreatedAt)
require.True(t, parts[0].CreatedAt.Equal(startedAt))
require.Nil(t, parts[0].CompletedAt,
"zero-valued ReasoningCompletedAt must not produce a stamp")
}
func TestBuildAssistantPartsForPersist_MissingReasoningTimestamps(t *testing.T) {
t.Parallel()
// Legacy persisted steps and steps that never observed a
// reasoning block carry empty timestamp slices. The helper must
// leave CreatedAt and CompletedAt nil instead of panicking on
// the out-of-range index.
parts := buildAssistantPartsForPersist(
context.Background(),
testutil.Logger(t),
[]fantasy.Content{
fantasy.ReasoningContent{Text: "no timestamps recorded"},
},
nil,
chatloop.PersistedStep{},
nil,
)
require.Len(t, parts, 1)
require.Equal(t, codersdk.ChatMessagePartTypeReasoning, parts[0].Type)
require.Nil(t, parts[0].CreatedAt)
require.Nil(t, parts[0].CompletedAt)
}
+50 -23
View File
@@ -97,6 +97,15 @@ type PersistedStep struct {
// Applied by the persistence layer to set CreatedAt
// on persisted tool-result ChatMessageParts.
ToolResultCreatedAt map[string]time.Time
// ReasoningStartedAt and ReasoningCompletedAt are parallel
// slices indexed by the occurrence order of reasoning
// content in Content. The persistence layer walks reasoning
// parts in order and applies these timestamps to the
// corresponding ChatMessageParts so the frontend can render
// reasoning duration. Reasoning parts have no provider-side
// stable ID, so order is the only correlation we have.
ReasoningStartedAt []time.Time
ReasoningCompletedAt []time.Time
}
// RunOptions configures a single streaming chat loop run.
@@ -223,14 +232,16 @@ type ProviderTool struct {
// step. Since we own the stream consumer, all content is tracked
// directly here, no shadow draft state needed.
type stepResult struct {
content []fantasy.Content
usage fantasy.Usage
providerMetadata fantasy.ProviderMetadata
finishReason fantasy.FinishReason
toolCalls []fantasy.ToolCallContent
shouldContinue bool
toolCallCreatedAt map[string]time.Time
toolResultCreatedAt map[string]time.Time
content []fantasy.Content
usage fantasy.Usage
providerMetadata fantasy.ProviderMetadata
finishReason fantasy.FinishReason
toolCalls []fantasy.ToolCallContent
shouldContinue bool
toolCallCreatedAt map[string]time.Time
toolResultCreatedAt map[string]time.Time
reasoningStartedAt []time.Time
reasoningCompletedAt []time.Time
}
// toResponseMessages converts step content into messages suitable
@@ -336,8 +347,9 @@ func (r stepResult) toResponseMessages() []fantasy.Message {
// reasoningState accumulates reasoning content and provider
// metadata while the stream is in flight.
type reasoningState struct {
text string
options fantasy.ProviderMetadata
text string
options fantasy.ProviderMetadata
startedAt time.Time
}
// Run executes the chat step-stream loop and delegates
@@ -575,13 +587,15 @@ func Run(ctx context.Context, opts RunOptions) error {
// check and here, fall back to the interrupt-safe
// path so partial content is not lost.
if err := opts.PersistStep(ctx, PersistedStep{
Content: result.content,
Usage: result.usage,
ContextLimit: contextLimit,
ProviderResponseID: chatopenai.ExtractResponseIDIfStored(opts.ProviderOptions, result.providerMetadata),
Runtime: time.Since(stepStart),
ToolCallCreatedAt: result.toolCallCreatedAt,
ToolResultCreatedAt: result.toolResultCreatedAt,
Content: result.content,
Usage: result.usage,
ContextLimit: contextLimit,
ProviderResponseID: chatopenai.ExtractResponseIDIfStored(opts.ProviderOptions, result.providerMetadata),
Runtime: time.Since(stepStart),
ToolCallCreatedAt: result.toolCallCreatedAt,
ToolResultCreatedAt: result.toolResultCreatedAt,
ReasoningStartedAt: result.reasoningStartedAt,
ReasoningCompletedAt: result.reasoningCompletedAt,
}); err != nil {
if errors.Is(err, ErrInterrupted) {
persistInterruptedStep(ctx, opts, &result)
@@ -915,8 +929,9 @@ func processStepStream(
case fantasy.StreamPartTypeReasoningStart:
activeReasoningContent[part.ID] = reasoningState{
text: part.Delta,
options: part.ProviderMetadata,
text: part.Delta,
options: part.ProviderMetadata,
startedAt: dbtime.Now(),
}
case fantasy.StreamPartTypeReasoningDelta:
@@ -939,6 +954,8 @@ func processStepStream(
ProviderMetadata: active.options,
}
result.content = append(result.content, content)
result.reasoningStartedAt = append(result.reasoningStartedAt, active.startedAt)
result.reasoningCompletedAt = append(result.reasoningCompletedAt, dbtime.Now())
delete(activeReasoningContent, part.ID)
}
case fantasy.StreamPartTypeToolInputStart:
@@ -1376,6 +1393,8 @@ func persistPendingDynamicStep(
ProviderResponseID: chatopenai.ExtractResponseIDIfStored(opts.ProviderOptions, result.providerMetadata),
Runtime: time.Since(stepStart),
PendingDynamicToolCalls: pending,
ReasoningStartedAt: result.reasoningStartedAt,
ReasoningCompletedAt: result.reasoningCompletedAt,
}); err != nil {
if errors.Is(err, ErrInterrupted) {
persistInterruptedStep(ctx, opts, result)
@@ -1605,7 +1624,11 @@ func flushActiveState(
}
}
// Flush partial reasoning content.
// Flush partial reasoning content. The matching
// completedAt is filled in here with the interruption
// time so partial reasoning shows the time spent before
// the interruption.
flushedAt := dbtime.Now()
for _, rs := range activeReasoning {
if rs.text == "" && !chatsanitize.HasAnthropicSignedReasoningOptions(fantasy.ProviderOptions(rs.options)) {
continue
@@ -1614,6 +1637,8 @@ func flushActiveState(
Text: rs.text,
ProviderMetadata: rs.options,
})
result.reasoningStartedAt = append(result.reasoningStartedAt, rs.startedAt)
result.reasoningCompletedAt = append(result.reasoningCompletedAt, flushedAt)
}
// Flush in-progress tool calls. These haven't received a
@@ -1727,9 +1752,11 @@ func persistInterruptedStep(
persistCtx := context.WithoutCancel(ctx)
if err := opts.PersistStep(persistCtx, PersistedStep{
Content: content,
ToolCallCreatedAt: toolCallCreatedAt,
ToolResultCreatedAt: toolResultCreatedAt,
Content: content,
ToolCallCreatedAt: toolCallCreatedAt,
ToolResultCreatedAt: toolResultCreatedAt,
ReasoningStartedAt: result.reasoningStartedAt,
ReasoningCompletedAt: result.reasoningCompletedAt,
}); err != nil {
if opts.OnInterruptedPersistError != nil {
opts.OnInterruptedPersistError(err)
+167
View File
@@ -4415,3 +4415,170 @@ func TestExecuteSingleTool_MediaBase64Encoding(t *testing.T) {
require.Contains(t, textOutput.Text, "world")
})
}
// TestRun_ReasoningTimestamps verifies that StreamPartTypeReasoningStart
// and StreamPartTypeReasoningEnd produce parallel ReasoningStartedAt /
// ReasoningCompletedAt slices on PersistedStep, in the same occurrence
// order as the reasoning content blocks. The frontend computes
// reasoning duration as completed_at - started_at.
func TestRun_ReasoningTimestamps(t *testing.T) {
t.Parallel()
model := &chattest.FakeModel{
ProviderName: "fake",
StreamFn: func(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
return streamFromParts([]fantasy.StreamPart{
{Type: fantasy.StreamPartTypeReasoningStart, ID: "reason-1"},
{Type: fantasy.StreamPartTypeReasoningDelta, ID: "reason-1", Delta: "first thought"},
{Type: fantasy.StreamPartTypeReasoningEnd, ID: "reason-1"},
{Type: fantasy.StreamPartTypeReasoningStart, ID: "reason-2"},
{Type: fantasy.StreamPartTypeReasoningDelta, ID: "reason-2", Delta: "second thought"},
{Type: fantasy.StreamPartTypeReasoningEnd, ID: "reason-2"},
{Type: fantasy.StreamPartTypeTextStart, ID: "text-1"},
{Type: fantasy.StreamPartTypeTextDelta, ID: "text-1", Delta: "answer"},
{Type: fantasy.StreamPartTypeTextEnd, ID: "text-1"},
{Type: fantasy.StreamPartTypeFinish, FinishReason: fantasy.FinishReasonStop},
}), nil
},
}
var persistedSteps []PersistedStep
err := Run(context.Background(), RunOptions{
Model: model,
Messages: []fantasy.Message{
textMessage(fantasy.MessageRoleUser, "think"),
},
MaxSteps: 1,
PersistStep: func(_ context.Context, step PersistedStep) error {
persistedSteps = append(persistedSteps, step)
return nil
},
})
require.NoError(t, err)
require.Len(t, persistedSteps, 1)
step := persistedSteps[0]
// Both reasoning blocks must produce parallel timestamp entries.
require.Len(t, step.ReasoningStartedAt, 2,
"each StreamPartTypeReasoningEnd must record a started_at")
require.Len(t, step.ReasoningCompletedAt, 2,
"each StreamPartTypeReasoningEnd must record a completed_at")
// Timestamps must be monotonic per block (completed_at >= started_at),
// and both timestamps must be populated. Asserting only monotonicity
// is not enough: time.Time{} is year 0001, so completed_at.Before(zero)
// is trivially false and a regression that drops the started_at stamp
// would slip past the comparison.
for i := range step.ReasoningStartedAt {
require.False(t, step.ReasoningStartedAt[i].IsZero(),
"started_at[%d] must be non-zero", i)
require.False(t, step.ReasoningCompletedAt[i].IsZero(),
"completed_at[%d] must be non-zero", i)
require.False(t,
step.ReasoningCompletedAt[i].Before(step.ReasoningStartedAt[i]),
"completed_at[%d] must be >= started_at[%d]", i, i)
}
// Successive blocks must be ordered: reasoning-2 cannot start
// before reasoning-1 completes.
require.False(t,
step.ReasoningStartedAt[1].Before(step.ReasoningCompletedAt[0]),
"reasoning-2 started_at must be >= reasoning-1 completed_at")
// The reasoning content blocks must appear in the same order
// in step.Content so the persistence layer can correlate by
// occurrence order.
var reasoningOrder []string
for _, c := range step.Content {
if r, ok := fantasy.AsContentType[fantasy.ReasoningContent](c); ok {
reasoningOrder = append(reasoningOrder, r.Text)
}
}
require.Equal(t, []string{"first thought", "second thought"}, reasoningOrder)
}
func TestRun_InterruptedReasoningFlushesTimestamps(t *testing.T) {
t.Parallel()
started := make(chan struct{})
model := &chattest.FakeModel{
ProviderName: "fake",
StreamFn: func(ctx context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
return iter.Seq[fantasy.StreamPart](func(yield func(fantasy.StreamPart) bool) {
parts := []fantasy.StreamPart{
{Type: fantasy.StreamPartTypeReasoningStart, ID: "reason-1"},
{Type: fantasy.StreamPartTypeReasoningDelta, ID: "reason-1", Delta: "interrupted thought"},
}
for _, part := range parts {
if !yield(part) {
return
}
}
select {
case <-started:
default:
close(started)
}
<-ctx.Done()
_ = yield(fantasy.StreamPart{
Type: fantasy.StreamPartTypeError,
Error: ctx.Err(),
})
}), nil
},
}
ctx, cancel := context.WithCancelCause(context.Background())
defer cancel(nil)
go func() {
<-started
cancel(ErrInterrupted)
}()
var persistedStep PersistedStep
err := Run(ctx, RunOptions{
Model: model,
Messages: []fantasy.Message{
textMessage(fantasy.MessageRoleUser, "think"),
},
MaxSteps: 1,
PersistStep: func(_ context.Context, step PersistedStep) error {
persistedStep = step
return nil
},
})
require.ErrorIs(t, err, ErrInterrupted)
// flushActiveState must have appended exactly one entry to each
// parallel slice, matching the single in-progress reasoning block.
require.Len(t, persistedStep.ReasoningStartedAt, 1,
"interrupted reasoning must flush its started_at")
require.Len(t, persistedStep.ReasoningCompletedAt, 1,
"interrupted reasoning must flush a completed_at stamp")
// Both timestamps must be populated and the completed stamp
// must be at or after the started stamp.
require.False(t, persistedStep.ReasoningStartedAt[0].IsZero(),
"flushed reasoning started_at must be non-zero")
require.False(t, persistedStep.ReasoningCompletedAt[0].IsZero(),
"flushed reasoning completed_at must be non-zero")
require.False(t,
persistedStep.ReasoningCompletedAt[0].Before(persistedStep.ReasoningStartedAt[0]),
"flushed completed_at must be >= started_at")
// The flushed reasoning content must appear in step.Content so
// the persistence layer's occurrence-order correlation lines up
// with the timestamp slices.
var reasoningBlocks []fantasy.ReasoningContent
for _, c := range persistedStep.Content {
if r, ok := fantasy.AsContentType[fantasy.ReasoningContent](c); ok {
reasoningBlocks = append(reasoningBlocks, r)
}
}
require.Len(t, reasoningBlocks, 1)
require.Equal(t, "interrupted thought", reasoningBlocks[0].Text)
}
@@ -3008,6 +3008,22 @@ func TestPartFromContent_CreatedAtNotStamped(t *testing.T) {
part := chatprompt.PartFromContent(fantasy.TextContent{Text: "hello"})
assert.Nil(t, part.CreatedAt)
})
t.Run("ReasoningHasNilCreatedAndCompletedAt", func(t *testing.T) {
t.Parallel()
// Same rationale as ToolCall: the chatloop layer records
// reasoning timestamps separately and the persistence
// layer applies them. PartFromContent is called in
// multiple contexts so stamping here would yield
// incorrect durations.
part := chatprompt.PartFromContent(fantasy.ReasoningContent{Text: "thinking"})
assert.Nil(t, part.CreatedAt)
assert.Nil(t, part.CompletedAt)
partPtr := chatprompt.PartFromContent(&fantasy.ReasoningContent{Text: "thinking"})
assert.Nil(t, partPtr.CreatedAt)
assert.Nil(t, partPtr.CompletedAt)
})
}
func TestToolResultAntivenom(t *testing.T) {