Files
coder/coderd/x/chatd/chatloop/metrics_test.go
T
Cian Johnston d7439a9de0 feat: add Prometheus metrics for chatd subsystem (#24371)
Adds 7 Prometheus metrics to the chatd subsystem and introduces typed
`ActivityBumpReason` for deadline bump attribution.

| Metric | Type | Labels |
|--------|------|--------|
| `coderd_chatd_chats` | Gauge | `state` (streaming, waiting) |
| `coderd_chatd_message_count` | Histogram | `provider` |
| `coderd_chatd_prompt_size_bytes` | Histogram | `provider` |
| `coderd_chatd_tool_result_size_bytes` | Histogram | `provider`,
`tool_name` |
| `coderd_chatd_ttft_seconds` | Histogram | `provider` |
| `coderd_chatd_compaction_total` | Counter | `provider`, `result` |
| `coderd_chatd_steps_total` | Counter | `provider` |

> 🤖
2026-04-15 19:53:10 +01:00

343 lines
9.0 KiB
Go

package chatloop_test
import (
"context"
"testing"
"charm.land/fantasy"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/coderd/x/chatd/chatloop"
"github.com/coder/coder/v2/coderd/x/chatd/chattest"
)
func TestNewMetrics_RegistersAllMetrics(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
m := chatloop.NewMetrics(reg)
// Initialize vector metrics so they appear in Gather output.
m.Chats.WithLabelValues(chatloop.StateStreaming)
m.CompactionTotal.WithLabelValues("anthropic", chatloop.CompactionResultSuccess)
m.ToolResultSizeBytes.WithLabelValues("anthropic", "test")
m.MessageCount.WithLabelValues("anthropic")
m.PromptSizeBytes.WithLabelValues("anthropic")
m.TTFTSeconds.WithLabelValues("anthropic")
m.StepsTotal.WithLabelValues("anthropic")
families, err := reg.Gather()
require.NoError(t, err)
expected := map[string]dto.MetricType{
"coderd_chatd_chats": dto.MetricType_GAUGE,
"coderd_chatd_message_count": dto.MetricType_HISTOGRAM,
"coderd_chatd_prompt_size_bytes": dto.MetricType_HISTOGRAM,
"coderd_chatd_tool_result_size_bytes": dto.MetricType_HISTOGRAM,
"coderd_chatd_ttft_seconds": dto.MetricType_HISTOGRAM,
"coderd_chatd_compaction_total": dto.MetricType_COUNTER,
"coderd_chatd_steps_total": dto.MetricType_COUNTER,
}
found := make(map[string]dto.MetricType)
for _, f := range families {
found[f.GetName()] = f.GetType()
}
for name, expectedType := range expected {
actualType, ok := found[name]
assert.True(t, ok, "metric %q not registered", name)
if ok {
assert.Equal(t, expectedType, actualType, "metric %q has wrong type", name)
}
}
}
func TestNopMetrics_DoesNotPanic(t *testing.T) {
t.Parallel()
m := chatloop.NopMetrics()
// Exercise every metric to confirm no nil-pointer panics.
m.Chats.WithLabelValues("streaming").Inc()
m.Chats.WithLabelValues("streaming").Dec()
m.Chats.WithLabelValues("waiting").Inc()
m.Chats.WithLabelValues("waiting").Dec()
m.MessageCount.WithLabelValues("anthropic").Observe(10)
m.PromptSizeBytes.WithLabelValues("openai").Observe(4096)
m.ToolResultSizeBytes.WithLabelValues("anthropic", "execute").Observe(512)
m.TTFTSeconds.WithLabelValues("anthropic").Observe(0.5)
m.CompactionTotal.WithLabelValues("anthropic", "success").Inc()
m.CompactionTotal.WithLabelValues("openai", "error").Inc()
m.CompactionTotal.WithLabelValues("google", "timeout").Inc()
m.StepsTotal.WithLabelValues("anthropic").Inc()
}
func TestEstimatePromptSize(t *testing.T) {
t.Parallel()
messages := []fantasy.Message{
{
Role: fantasy.MessageRoleSystem,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: "You are a helpful assistant."},
},
},
{
Role: fantasy.MessageRoleUser,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: "Hello world"},
fantasy.ReasoningPart{Text: "thinking..."},
fantasy.FilePart{Data: []byte("filedata")},
},
},
{
Role: fantasy.MessageRoleAssistant,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: "Hi there!"},
fantasy.ToolCallPart{Input: `{"file":"main.go"}`},
},
},
{
Role: fantasy.MessageRoleTool,
Content: []fantasy.MessagePart{
fantasy.ToolResultPart{
Output: fantasy.ToolResultOutputContentText{Text: "result"},
},
},
},
}
size := chatloop.EstimatePromptSize(messages)
// "You are a helpful assistant." (28) + "Hello world" (11) +
// "thinking..." (11) + "filedata" (8) +
// "Hi there!" (9) + `{"file":"main.go"}` (18) +
// "result" (6) = 91
assert.Equal(t, 91, size)
}
func TestToolResultSize(t *testing.T) {
t.Parallel()
tests := []struct {
name string
result fantasy.ToolResultContent
expected int
}{
{
name: "text",
result: fantasy.ToolResultContent{
Result: fantasy.ToolResultOutputContentText{Text: "hello"},
},
expected: 5,
},
{
name: "error",
result: fantasy.ToolResultContent{
Result: fantasy.ToolResultOutputContentError{
Error: assert.AnError,
},
},
expected: len(assert.AnError.Error()),
},
{
name: "media",
result: fantasy.ToolResultContent{
Result: fantasy.ToolResultOutputContentMedia{Data: "base64data"},
},
expected: 10,
},
{
name: "nil_result",
result: fantasy.ToolResultContent{},
expected: 0,
},
{
name: "error_nil_error",
result: fantasy.ToolResultContent{
Result: fantasy.ToolResultOutputContentError{Error: nil},
},
expected: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
assert.Equal(t, tt.expected, chatloop.ToolResultSize(tt.result))
})
}
}
func TestRecordCompaction(t *testing.T) {
t.Parallel()
t.Run("nil metrics does not panic", func(t *testing.T) {
t.Parallel()
var m *chatloop.Metrics
m.RecordCompaction("anthropic", true, nil)
})
tests := []struct {
name string
compacted bool
err error
wantLabel string
wantCount int
}{
{
name: "success",
compacted: true,
err: nil,
wantLabel: chatloop.CompactionResultSuccess,
wantCount: 1,
},
{
name: "error",
compacted: false,
err: assert.AnError,
wantLabel: chatloop.CompactionResultError,
wantCount: 1,
},
{
name: "timeout",
compacted: false,
err: context.DeadlineExceeded,
wantLabel: chatloop.CompactionResultTimeout,
wantCount: 1,
},
{
name: "threshold_not_reached",
compacted: false,
err: nil,
wantLabel: "",
wantCount: 0,
},
{
name: "canceled",
compacted: false,
err: context.Canceled,
wantLabel: "",
wantCount: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
m := chatloop.NewMetrics(reg)
m.RecordCompaction("test", tt.compacted, tt.err)
families, err := reg.Gather()
require.NoError(t, err)
if tt.wantCount == 0 {
for _, f := range families {
assert.NotEqual(t, "coderd_chatd_compaction_total", f.GetName(),
"compaction_total should not be recorded")
}
return
}
var found bool
for _, f := range families {
if f.GetName() != "coderd_chatd_compaction_total" {
continue
}
found = true
require.Len(t, f.GetMetric(), 1)
metric := f.GetMetric()[0]
assert.Equal(t, float64(tt.wantCount), metric.GetCounter().GetValue())
// Check label.
for _, lp := range metric.GetLabel() {
if lp.GetName() == "result" {
assert.Equal(t, tt.wantLabel, lp.GetValue())
}
}
}
assert.True(t, found, "compaction_total metric not found")
})
}
}
func TestRun_RecordsMetrics(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
metrics := chatloop.NewMetrics(reg)
model := &chattest.FakeModel{
ProviderName: "test-provider",
StreamFn: func(_ context.Context, call fantasy.Call) (fantasy.StreamResponse, error) {
return func(yield func(fantasy.StreamPart) bool) {
parts := []fantasy.StreamPart{
{Type: fantasy.StreamPartTypeTextStart, ID: "t1"},
{Type: fantasy.StreamPartTypeTextDelta, ID: "t1", Delta: "hello"},
{Type: fantasy.StreamPartTypeTextEnd, ID: "t1"},
{Type: fantasy.StreamPartTypeFinish, FinishReason: fantasy.FinishReasonStop},
}
for _, p := range parts {
if !yield(p) {
return
}
}
}, nil
},
}
err := chatloop.Run(context.Background(), chatloop.RunOptions{
Model: model,
Messages: []fantasy.Message{
{
Role: fantasy.MessageRoleUser,
Content: []fantasy.MessagePart{
fantasy.TextPart{Text: "hello"},
},
},
},
MaxSteps: 1,
PersistStep: func(_ context.Context, _ chatloop.PersistedStep) error {
return nil
},
Metrics: metrics,
})
require.NoError(t, err)
families, err := reg.Gather()
require.NoError(t, err)
found := make(map[string]bool)
for _, f := range families {
found[f.GetName()] = true
switch f.GetName() {
case "coderd_chatd_steps_total":
require.Len(t, f.GetMetric(), 1)
assert.Equal(t, float64(1), f.GetMetric()[0].GetCounter().GetValue(),
"steps_total should be 1 after one step")
case "coderd_chatd_message_count":
require.Len(t, f.GetMetric(), 1)
assert.Equal(t, uint64(1), f.GetMetric()[0].GetHistogram().GetSampleCount(),
"message_count should have 1 observation")
case "coderd_chatd_prompt_size_bytes":
require.Len(t, f.GetMetric(), 1)
assert.Equal(t, uint64(1), f.GetMetric()[0].GetHistogram().GetSampleCount(),
"prompt_size_bytes should have 1 observation")
case "coderd_chatd_ttft_seconds":
require.Len(t, f.GetMetric(), 1)
assert.Equal(t, uint64(1), f.GetMetric()[0].GetHistogram().GetSampleCount(),
"ttft_seconds should have 1 observation")
}
}
assert.True(t, found["coderd_chatd_steps_total"], "steps_total not recorded")
assert.True(t, found["coderd_chatd_message_count"], "message_count not recorded")
assert.True(t, found["coderd_chatd_prompt_size_bytes"], "prompt_size_bytes not recorded")
assert.True(t, found["coderd_chatd_ttft_seconds"], "ttft_seconds not recorded")
}