Files
coder/scaletest/chat/metrics.go
T
Ethan 4f1043a50a feat(scaletest): add chat scaletest command (#25553)
Adds `coder exp scaletest chat`, a harness for creating Coder Agents
chat load.
Start the mock LLM separately, prepare the scaletest workspaces you want
to target, then run the chat scaletest against the existing
`scaletest-*` fleet selected by the shared workspace targeting flags:

```sh
coder exp scaletest llm-mock --address 127.0.0.1:18080

coder exp scaletest chat --llm-mock-url http://127.0.0.1:18080/v1 --chats-per-workspace 10 --turns 1
coder exp scaletest chat --llm-mock-url http://127.0.0.1:18080/v1 --template docker --target-workspaces 0:10 --chats-per-workspace 1 --turns 10 --turn-start-delay 30s
```

This is the same pattern used by the `workspace-traffic` load generator.

Keeping the fake LLM as a separate process is intentional so it can be
scaled independently from the Coder deployment, which will likely be
necessary as we scale up and up.

This PR is the starting point: it provides the command, mock
provider/model bootstrap, existing workspace selection, chat streaming,
follow-up turns, metrics, and cleanup. Follow-up PRs will add multi-step
turns via tool calls. I'm still a bit iffy on the mechanism I have for
that. It'll likely involve having the runner send some magic strings
that the mock will recognise.


Relates to CODAGT-307
Relates to GRU-48
Relates to https://github.com/coder/scaletest/issues/124

Generated by Mux, but reviewed by a human
2026-05-26 14:19:36 +10:00

138 lines
5.3 KiB
Go

package chat
import "github.com/prometheus/client_golang/prometheus"
const (
metricLabelPhase = "phase"
metricLabelStatus = "status"
metricLabelStage = "stage"
phaseInitial = "initial"
phaseFollowUp = "follow_up"
failureStageCreateChat = "create_chat"
failureStageCreateMessage = "create_message"
failureStageStreamOpen = "stream_open"
failureStageStreamEndedEarly = "stream_ended_early"
failureStageStatusError = "status_error"
)
var (
chatRequestLatencyBuckets = prometheus.ExponentialBucketsRange(0.05, 120, 18)
chatProcessingLatencyBuckets = prometheus.ExponentialBucketsRange(0.1, 300, 18)
)
// Metrics holds the Prometheus metrics emitted by the chat scaletest.
type Metrics struct {
ChatCreateLatencySeconds prometheus.Histogram
ChatMessageLatencySeconds *prometheus.HistogramVec
ChatConversationDurationSeconds prometheus.Histogram
ChatTimeToRunningSeconds *prometheus.HistogramVec
ChatTimeToFirstOutputSeconds *prometheus.HistogramVec
ChatTimeToTerminalStatusSeconds *prometheus.HistogramVec
ChatStageFailuresTotal *prometheus.CounterVec
ChatTerminalStatusTotal *prometheus.CounterVec
ChatTurnsCompletedTotal prometheus.Counter
ChatRetryEventsTotal prometheus.Counter
ActiveChatStreams prometheus.Gauge
}
func NewMetrics(reg prometheus.Registerer) *Metrics {
if reg == nil {
reg = prometheus.DefaultRegisterer
}
phaseLabelNames := []string{metricLabelPhase}
terminalStatusLabelNames := []string{metricLabelStatus}
failureStageLabelNames := []string{metricLabelStage}
m := &Metrics{
ChatCreateLatencySeconds: prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_create_latency_seconds",
Help: "Time in seconds to create a chat and enqueue the initial turn.",
Buckets: chatRequestLatencyBuckets,
}),
ChatMessageLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_message_latency_seconds",
Help: "Time in seconds to add a follow-up message to an existing chat.",
Buckets: chatRequestLatencyBuckets,
}, phaseLabelNames),
ChatConversationDurationSeconds: prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_conversation_duration_seconds",
Help: "Time in seconds from chat creation start until the conversation finishes or errors.",
Buckets: chatProcessingLatencyBuckets,
}),
ChatTimeToRunningSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_time_to_running_seconds",
Help: "Time in seconds from the start of a chat turn until the chat enters running status.",
Buckets: chatProcessingLatencyBuckets,
}, phaseLabelNames),
ChatTimeToFirstOutputSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_time_to_first_output_seconds",
Help: "Time in seconds from the start of a chat turn until the first output is received.",
Buckets: chatProcessingLatencyBuckets,
}, phaseLabelNames),
ChatTimeToTerminalStatusSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_time_to_terminal_status_seconds",
Help: "Time in seconds from the start of a chat turn until a terminal status is received.",
Buckets: chatProcessingLatencyBuckets,
}, phaseLabelNames),
ChatStageFailuresTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_stage_failures_total",
Help: "Total number of terminal stage-specific chat runner failures.",
}, failureStageLabelNames),
ChatTerminalStatusTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_terminal_status_total",
Help: "Total number of terminal chat statuses observed.",
}, terminalStatusLabelNames),
ChatTurnsCompletedTotal: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_turns_completed_total",
Help: "Total number of chat turns completed successfully.",
}),
ChatRetryEventsTotal: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "chat_retry_events_total",
Help: "Total number of chat retry events observed.",
}),
ActiveChatStreams: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "coderd",
Subsystem: "scaletest",
Name: "active_chat_streams",
Help: "Current number of active chat streams.",
}),
}
reg.MustRegister(m.ChatCreateLatencySeconds)
reg.MustRegister(m.ChatMessageLatencySeconds)
reg.MustRegister(m.ChatConversationDurationSeconds)
reg.MustRegister(m.ChatTimeToRunningSeconds)
reg.MustRegister(m.ChatTimeToFirstOutputSeconds)
reg.MustRegister(m.ChatTimeToTerminalStatusSeconds)
reg.MustRegister(m.ChatStageFailuresTotal)
reg.MustRegister(m.ChatTerminalStatusTotal)
reg.MustRegister(m.ChatTurnsCompletedTotal)
reg.MustRegister(m.ChatRetryEventsTotal)
reg.MustRegister(m.ActiveChatStreams)
return m
}