Files
coder/coderd/x/chatd/chatdebug/service.go
T

776 lines
25 KiB
Go

package chatdebug
import (
"bytes"
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/google/uuid"
"github.com/sqlc-dev/pqtype"
"golang.org/x/xerrors"
"cdr.dev/slog/v3"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/quartz"
)
// DefaultStaleThreshold is the fallback stale timeout for debug rows
// when no caller-provided value is supplied.
const DefaultStaleThreshold = 5 * time.Minute
// Service persists chat debug rows and fans out lightweight change events.
type Service struct {
db database.Store
log slog.Logger
pubsub pubsub.Pubsub
clock quartz.Clock
alwaysEnable bool
// staleAfterNanos stores the stale threshold as nanoseconds in an
// atomic.Int64 so SetStaleAfter and FinalizeStale can be called
// from concurrent goroutines without a data race.
staleAfterNanos atomic.Int64
// thresholdMu protects thresholdChanged.
thresholdMu sync.Mutex
// thresholdChanged is closed by SetStaleAfter to wake heartbeat
// goroutines so they can re-read the (possibly shorter) interval
// immediately instead of waiting for the old ticker to fire.
thresholdChanged chan struct{}
}
// ServiceOption configures optional Service behavior.
type ServiceOption func(*Service)
// WithStaleThreshold overrides the default stale-row finalization
// threshold. Callers that already have a configurable in-flight chat
// timeout (e.g. chatd's InFlightChatStaleAfter) should pass it here
// so the two sweeps stay in sync.
func WithStaleThreshold(d time.Duration) ServiceOption {
return func(s *Service) {
if d > 0 {
s.staleAfterNanos.Store(d.Nanoseconds())
}
}
}
// WithAlwaysEnable forces debug logging on for every chat regardless
// of the runtime admin and user opt-in settings. This is used for the
// deployment-level serpent flag.
func WithAlwaysEnable(always bool) ServiceOption {
return func(s *Service) {
s.alwaysEnable = always
}
}
// WithClock overrides the default real clock. Tests inject
// quartz.NewMock(t) to control time-dependent behavior such as
// heartbeat tickers and FinalizeStale timestamps.
func WithClock(c quartz.Clock) ServiceOption {
return func(s *Service) {
if c != nil {
s.clock = c
}
}
}
// CreateRunParams contains friendly inputs for creating a debug run.
type CreateRunParams struct {
ChatID uuid.UUID
RootChatID uuid.UUID
ParentChatID uuid.UUID
ModelConfigID uuid.UUID
TriggerMessageID int64
HistoryTipMessageID int64
Kind RunKind
Status Status
Provider string
Model string
Summary any
}
// UpdateRunParams contains inputs for updating a debug run.
// Zero-valued fields are treated as "keep the existing value" by the
// COALESCE-based SQL query. Once a field is set it cannot be cleared
// back to NULL; this is intentional for the write-once-finalize
// lifecycle of debug rows.
type UpdateRunParams struct {
ID uuid.UUID
ChatID uuid.UUID
Status Status
Summary any
FinishedAt time.Time
}
// CreateStepParams contains friendly inputs for creating a debug step.
type CreateStepParams struct {
RunID uuid.UUID
ChatID uuid.UUID
StepNumber int32
Operation Operation
Status Status
HistoryTipMessageID int64
NormalizedRequest any
}
// UpdateStepParams contains optional inputs for updating a debug step.
// Most payload fields are typed as any and serialized through nullJSON
// because their shape varies by provider. The Attempts field uses a
// concrete slice for compile-time safety where the schema is stable.
// Zero-valued fields are treated as "keep the existing value" by the
// COALESCE-based SQL query. Once set, fields cannot be cleared back
// to NULL. This is intentional for the write-once-finalize lifecycle
// of debug rows.
type UpdateStepParams struct {
ID uuid.UUID
ChatID uuid.UUID
Status Status
AssistantMessageID int64
NormalizedResponse any
Usage any
Attempts []Attempt
Error any
Metadata any
FinishedAt time.Time
}
// NewService constructs a chat debug persistence service.
func NewService(db database.Store, log slog.Logger, ps pubsub.Pubsub, opts ...ServiceOption) *Service {
if db == nil {
panic("chatdebug: nil database.Store")
}
s := &Service{
db: db,
log: log,
pubsub: ps,
clock: quartz.NewReal(),
thresholdChanged: make(chan struct{}),
}
s.staleAfterNanos.Store(DefaultStaleThreshold.Nanoseconds())
for _, opt := range opts {
opt(s)
}
return s
}
// SetStaleAfter overrides the in-flight stale threshold used when
// finalizing abandoned debug rows. Zero or negative durations are
// ignored, leaving the current threshold (initial or previously
// overridden) unchanged. Active heartbeat goroutines are woken so
// they can re-read the (possibly shorter) interval immediately.
func (s *Service) SetStaleAfter(staleAfter time.Duration) {
if s == nil || staleAfter <= 0 {
return
}
s.staleAfterNanos.Store(staleAfter.Nanoseconds())
// Wake all heartbeat goroutines by closing the current channel
// and replacing it with a fresh one for the next update.
s.thresholdMu.Lock()
close(s.thresholdChanged)
s.thresholdChanged = make(chan struct{})
s.thresholdMu.Unlock()
}
// thresholdChan returns the current threshold-change notification
// channel. Heartbeat goroutines select on this to detect runtime
// stale-threshold updates.
func (s *Service) thresholdChan() <-chan struct{} {
s.thresholdMu.Lock()
defer s.thresholdMu.Unlock()
return s.thresholdChanged
}
// staleThreshold returns the current stale timeout.
func (s *Service) staleThreshold() time.Duration {
ns := s.staleAfterNanos.Load()
d := time.Duration(ns)
if d <= 0 {
return DefaultStaleThreshold
}
return d
}
// heartbeatInterval returns a safe ticker interval for stream heartbeats.
// It is half the stale threshold so at least one touch lands before the
// stale sweep considers the row abandoned. The result is clamped to a
// minimum of 1 ms to prevent panics from time.NewTicker(0) with
// pathologically small thresholds, while still staying well below any
// practical stale timeout.
func (s *Service) heartbeatInterval() time.Duration {
return max(s.staleThreshold()/2, time.Millisecond)
}
func chatdContext(ctx context.Context) context.Context {
//nolint:gocritic // AsChatd provides narrowly-scoped daemon access for
// chat debug persistence reads and writes.
return dbauthz.AsChatd(ctx)
}
// IsEnabled returns whether debug logging is enabled for the given chat.
func (s *Service) IsEnabled(
ctx context.Context,
chatID uuid.UUID,
ownerID uuid.UUID,
) bool {
if s == nil {
return false
}
if s.alwaysEnable {
return true
}
if s.db == nil {
return false
}
authCtx := chatdContext(ctx)
allowUsers, err := s.db.GetChatDebugLoggingAllowUsers(authCtx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return false
}
s.log.Warn(ctx, "failed to load runtime admin chat debug logging setting",
slog.Error(err),
)
return false
}
if !allowUsers {
return false
}
if ownerID == uuid.Nil {
s.log.Warn(ctx, "missing chat owner for debug logging enablement check",
slog.F("chat_id", chatID),
)
return false
}
enabled, err := s.db.GetUserChatDebugLoggingEnabled(authCtx, ownerID)
if err == nil {
return enabled
}
if errors.Is(err, sql.ErrNoRows) {
return false
}
s.log.Warn(ctx, "failed to load user chat debug logging setting",
slog.Error(err),
slog.F("chat_id", chatID),
slog.F("owner_id", ownerID),
)
return false
}
// CreateRun inserts a new debug run and emits a run update event.
func (s *Service) CreateRun(
ctx context.Context,
params CreateRunParams,
) (database.ChatDebugRun, error) {
now := s.clock.Now()
run, err := s.db.InsertChatDebugRun(chatdContext(ctx),
database.InsertChatDebugRunParams{
ChatID: params.ChatID,
RootChatID: nullUUID(params.RootChatID),
ParentChatID: nullUUID(params.ParentChatID),
ModelConfigID: nullUUID(params.ModelConfigID),
TriggerMessageID: nullInt64(params.TriggerMessageID),
HistoryTipMessageID: nullInt64(params.HistoryTipMessageID),
Kind: string(params.Kind),
Status: string(params.Status),
Provider: nullString(params.Provider),
Model: nullString(params.Model),
Summary: s.nullJSON(ctx, params.Summary),
StartedAt: sql.NullTime{Time: now, Valid: true},
UpdatedAt: sql.NullTime{Time: now, Valid: true},
FinishedAt: sql.NullTime{},
})
if err != nil {
return database.ChatDebugRun{}, err
}
s.publishEvent(ctx, run.ChatID, EventKindRunUpdate, run.ID, uuid.Nil)
return run, nil
}
// UpdateRun updates an existing debug run and emits a run update event.
// When a terminal status is set without an explicit FinishedAt, the
// service auto-fills the timestamp so the row is immediately visible
// to the InsertChatDebugStep atomic guard (finished_at IS NULL).
// UpdateChatDebugRun itself enforces finished_at as write-once: once
// the column is populated, repeated auto-fills or explicit refreshes
// never overwrite the original completion timestamp, so calling this
// more than once on an already-finalized run is idempotent.
func (s *Service) UpdateRun(
ctx context.Context,
params UpdateRunParams,
) (database.ChatDebugRun, error) {
if params.Status.IsTerminal() && params.FinishedAt.IsZero() {
params.FinishedAt = s.clock.Now()
}
run, err := s.db.UpdateChatDebugRun(chatdContext(ctx),
database.UpdateChatDebugRunParams{
RootChatID: uuid.NullUUID{},
ParentChatID: uuid.NullUUID{},
ModelConfigID: uuid.NullUUID{},
TriggerMessageID: sql.NullInt64{},
HistoryTipMessageID: sql.NullInt64{},
Status: nullString(string(params.Status)),
Provider: sql.NullString{},
Model: sql.NullString{},
Summary: s.nullJSON(ctx, params.Summary),
FinishedAt: nullTime(params.FinishedAt),
Now: s.clock.Now(),
ID: params.ID,
ChatID: params.ChatID,
})
if err != nil {
return database.ChatDebugRun{}, err
}
s.publishEvent(ctx, run.ChatID, EventKindRunUpdate, run.ID, uuid.Nil)
return run, nil
}
// errRunFinalized is returned by CreateStep when the parent run has
// already reached a terminal state (finished_at IS NOT NULL). This
// prevents delayed retries from appending in-progress steps to runs
// that FinalizeStale already marked as interrupted.
var errRunFinalized = xerrors.New("parent run is already finalized")
// errRunNotFound is returned by CreateStep when the parent run cannot
// be located (missing run_id or chat_id mismatch). This surfaces
// caller-side data bugs instead of conflating them with the legitimate
// "already finalized" terminal case.
var errRunNotFound = xerrors.New("parent run not found")
// CreateStep inserts a new debug step and emits a step update event.
// It returns errRunFinalized if the parent run has already finished,
// or errRunNotFound if the run_id/chat_id pair does not match an
// existing run. The finalization guard is enforced atomically by the
// INSERT's CTE, which issues an UPDATE on the parent run (taking a
// row lock). This prevents concurrent FinalizeStale from setting
// finished_at between the check and the INSERT.
func (s *Service) CreateStep(
ctx context.Context,
params CreateStepParams,
) (database.ChatDebugStep, error) {
now := s.clock.Now()
insert := database.InsertChatDebugStepParams{
RunID: params.RunID,
StepNumber: params.StepNumber,
Operation: string(params.Operation),
Status: string(params.Status),
HistoryTipMessageID: nullInt64(params.HistoryTipMessageID),
AssistantMessageID: sql.NullInt64{},
NormalizedRequest: s.nullJSON(ctx, params.NormalizedRequest),
NormalizedResponse: pqtype.NullRawMessage{},
Usage: pqtype.NullRawMessage{},
Attempts: pqtype.NullRawMessage{},
Error: pqtype.NullRawMessage{},
Metadata: pqtype.NullRawMessage{},
StartedAt: sql.NullTime{Time: now, Valid: true},
UpdatedAt: sql.NullTime{Time: now, Valid: true},
FinishedAt: sql.NullTime{},
ChatID: params.ChatID,
}
// Cap retry attempts to prevent infinite loops under
// pathological concurrency. Each iteration performs two DB
// round-trips (insert + list), so 10 retries is generous.
const maxCreateStepRetries = 10
for range maxCreateStepRetries {
if err := ctx.Err(); err != nil {
return database.ChatDebugStep{}, err
}
step, err := s.db.InsertChatDebugStep(chatdContext(ctx), insert)
if err == nil {
// The INSERT CTE atomically bumps the parent run's
// updated_at, so no separate touch call is needed.
s.publishEvent(ctx, step.ChatID, EventKindStepUpdate, step.RunID, step.ID)
return step, nil
}
// The INSERT's locked_run CTE filters on id, chat_id, and
// finished_at IS NULL, so sql.ErrNoRows can mean "run not
// found", "chat_id mismatch", or "already finalized." Look
// the run up to disambiguate instead of conflating
// caller-side data bugs with the legitimate terminal case.
if errors.Is(err, sql.ErrNoRows) {
return database.ChatDebugStep{}, s.classifyMissingRun(ctx, params)
}
if !database.IsUniqueViolation(err, database.UniqueIndexChatDebugStepsRunStep) {
return database.ChatDebugStep{}, err
}
steps, listErr := s.db.GetChatDebugStepsByRunID(chatdContext(ctx), params.RunID)
if listErr != nil {
return database.ChatDebugStep{}, listErr
}
nextStepNumber := insert.StepNumber + 1
for _, existing := range steps {
if existing.StepNumber >= nextStepNumber {
nextStepNumber = existing.StepNumber + 1
}
}
insert.StepNumber = nextStepNumber
}
return database.ChatDebugStep{}, xerrors.Errorf(
"chatdebug: failed to create step after %d retries (run %s)",
maxCreateStepRetries, params.RunID,
)
}
// classifyMissingRun disambiguates the sql.ErrNoRows returned by
// InsertChatDebugStep's locked_run CTE. The CTE filters on id,
// chat_id, and finished_at IS NULL, so empty RETURNING rows can mean
// the run is absent, belongs to a different chat, or has already been
// finalized. GetChatDebugRunByID is keyed only by id, which is
// sufficient to tell these cases apart.
func (s *Service) classifyMissingRun(
ctx context.Context,
params CreateStepParams,
) error {
run, err := s.db.GetChatDebugRunByID(chatdContext(ctx), params.RunID)
if errors.Is(err, sql.ErrNoRows) {
return errRunNotFound
}
if err != nil {
return xerrors.Errorf("look up parent run after failed step insert: %w", err)
}
if run.ChatID != params.ChatID {
return errRunNotFound
}
if run.FinishedAt.Valid {
return errRunFinalized
}
// The run matches the caller's (run_id, chat_id) and is still
// open, yet the INSERT returned no rows. This is unexpected
// under write-once-finalize semantics and likely indicates a
// concurrent delete or unrelated defect; surface it instead of
// silently masking it as a terminal case.
return xerrors.Errorf(
"InsertChatDebugStep returned no rows but run is still active (run_id=%s)",
params.RunID,
)
}
// UpdateStep updates an existing debug step and emits a step update event.
// When a terminal status is set without an explicit FinishedAt, the
// service auto-fills the timestamp so the stale sweep does not leave
// terminal rows with finished_at = NULL.
func (s *Service) UpdateStep(
ctx context.Context,
params UpdateStepParams,
) (database.ChatDebugStep, error) {
if params.Status.IsTerminal() && params.FinishedAt.IsZero() {
params.FinishedAt = s.clock.Now()
}
step, err := s.db.UpdateChatDebugStep(chatdContext(ctx),
database.UpdateChatDebugStepParams{
Status: nullString(string(params.Status)),
HistoryTipMessageID: sql.NullInt64{},
AssistantMessageID: nullInt64(params.AssistantMessageID),
NormalizedRequest: pqtype.NullRawMessage{},
NormalizedResponse: s.nullJSON(ctx, params.NormalizedResponse),
Usage: s.nullJSON(ctx, params.Usage),
Attempts: s.nullJSON(ctx, params.Attempts),
Error: s.nullJSON(ctx, params.Error),
Metadata: s.nullJSON(ctx, params.Metadata),
FinishedAt: nullTime(params.FinishedAt),
Now: s.clock.Now(),
ID: params.ID,
ChatID: params.ChatID,
})
if err != nil {
return database.ChatDebugStep{}, err
}
s.publishEvent(ctx, step.ChatID, EventKindStepUpdate, step.RunID, step.ID)
return step, nil
}
// TouchStep bumps the step's and its parent run's updated_at timestamps
// without changing any other fields. This prevents long-running operations
// (e.g. streaming) from being prematurely swept by FinalizeStale, which
// first marks runs stale by chat_debug_runs.updated_at and then cascades
// to steps whose run_id was just finalized.
func (s *Service) TouchStep(
ctx context.Context,
stepID uuid.UUID,
runID uuid.UUID,
chatID uuid.UUID,
) error {
// Atomically bump both the step and its parent run so
// FinalizeStale cannot interleave between the two touches.
return s.db.TouchChatDebugStepAndRun(chatdContext(ctx),
database.TouchChatDebugStepAndRunParams{
Now: s.clock.Now(),
StepID: stepID,
RunID: runID,
ChatID: chatID,
})
}
// DeleteByChatID deletes debug data for a chat and emits a delete event.
// The startedBefore bound scopes deletion to runs created before that
// instant so that retried cleanup does not remove runs created by a
// replacement turn that raced ahead of the retry window (for example,
// an unarchive that fires between the initial archive-cleanup attempt
// and its retry).
func (s *Service) DeleteByChatID(
ctx context.Context,
chatID uuid.UUID,
startedBefore time.Time,
) (int64, error) {
deleted, err := s.db.DeleteChatDebugDataByChatID(
chatdContext(ctx),
database.DeleteChatDebugDataByChatIDParams{
ChatID: chatID,
StartedBefore: startedBefore,
},
)
if err != nil {
return 0, err
}
s.publishEvent(ctx, chatID, EventKindDelete, uuid.Nil, uuid.Nil)
return deleted, nil
}
// DeleteAfterMessageID deletes debug data newer than the given message.
// The startedBefore bound scopes deletion to runs created before that
// instant so that retried cleanup does not remove runs created by a
// replacement turn that raced ahead of the retry window.
func (s *Service) DeleteAfterMessageID(
ctx context.Context,
chatID uuid.UUID,
messageID int64,
startedBefore time.Time,
) (int64, error) {
deleted, err := s.db.DeleteChatDebugDataAfterMessageID(
chatdContext(ctx),
database.DeleteChatDebugDataAfterMessageIDParams{
ChatID: chatID,
MessageID: messageID,
StartedBefore: startedBefore,
},
)
if err != nil {
return 0, err
}
s.publishEvent(ctx, chatID, EventKindDelete, uuid.Nil, uuid.Nil)
return deleted, nil
}
// FinalizeStale finalizes stale in-flight debug rows and emits a broadcast.
func (s *Service) FinalizeStale(
ctx context.Context,
) (database.FinalizeStaleChatDebugRowsRow, error) {
now := s.clock.Now()
result, err := s.db.FinalizeStaleChatDebugRows(
chatdContext(ctx),
database.FinalizeStaleChatDebugRowsParams{
Now: now,
UpdatedBefore: now.Add(-s.staleThreshold()),
},
)
if err != nil {
return database.FinalizeStaleChatDebugRowsRow{}, err
}
if result.RunsFinalized > 0 || result.StepsFinalized > 0 {
s.publishEvent(ctx, uuid.Nil, EventKindFinalize, uuid.Nil, uuid.Nil)
}
return result, nil
}
// FinalizeRunParams bundles the arguments for FinalizeRun.
type FinalizeRunParams struct {
RunID uuid.UUID
ChatID uuid.UUID
Status Status
SeedSummary map[string]any
// Timeout for the aggregate + update calls. Zero defaults to 5s.
Timeout time.Duration
}
// FinalizeRun aggregates the run summary, updates the run status, and
// cleans up the step counter. It detaches from the parent context's
// cancellation so finalization succeeds even when the request context
// is already done. Errors are returned but are always safe to ignore;
// callers that treat debug instrumentation as best-effort can discard
// them.
func (s *Service) FinalizeRun(ctx context.Context, p FinalizeRunParams) error {
timeout := p.Timeout
if timeout <= 0 {
timeout = 5 * time.Second
}
finalizeCtx, cancel := context.WithTimeout(
context.WithoutCancel(ctx), timeout,
)
defer cancel()
finalSummary := p.SeedSummary
if aggregated, aggErr := s.AggregateRunSummary(
finalizeCtx,
p.RunID,
p.SeedSummary,
); aggErr != nil {
// Non-fatal: proceed with the seed summary.
s.log.Warn(ctx, "failed to aggregate debug run summary",
slog.F("chat_id", p.ChatID),
slog.F("run_id", p.RunID),
slog.Error(aggErr),
)
} else {
finalSummary = aggregated
}
if _, err := s.UpdateRun(finalizeCtx, UpdateRunParams{
ID: p.RunID,
ChatID: p.ChatID,
Status: p.Status,
Summary: finalSummary,
FinishedAt: s.clock.Now(),
}); err != nil {
CleanupStepCounter(p.RunID)
return xerrors.Errorf("update debug run: %w", err)
}
CleanupStepCounter(p.RunID)
return nil
}
// ClassifyError maps a run error to the appropriate debug status.
// nil → StatusCompleted, context.Canceled → StatusInterrupted,
// everything else → StatusError. Callers with additional
// classification rules (e.g. ErrInterrupted, ErrDynamicToolCall)
// should handle those before falling back to this helper.
func ClassifyError(err error) Status {
switch {
case err == nil:
return StatusCompleted
case errors.Is(err, context.Canceled):
return StatusInterrupted
default:
return StatusError
}
}
func nullUUID(id uuid.UUID) uuid.NullUUID {
return uuid.NullUUID{UUID: id, Valid: id != uuid.Nil}
}
func nullInt64(v int64) sql.NullInt64 {
return sql.NullInt64{Int64: v, Valid: v != 0}
}
func nullString(value string) sql.NullString {
return sql.NullString{String: value, Valid: value != ""}
}
func nullTime(value time.Time) sql.NullTime {
return sql.NullTime{Time: value, Valid: !value.IsZero()}
}
// jsonClear is a sentinel value that tells nullJSON to emit a valid
// JSON null (JSONB 'null') instead of SQL NULL. COALESCE treats SQL
// NULL as "keep existing" but replaces with a non-NULL JSONB value,
// so passing jsonClear explicitly overwrites a previously set field.
type jsonClear struct{}
// nullJSON marshals value to a NullRawMessage. When value is nil
// (including typed nils such as `var p *T = nil` whose interface
// representation carries a type but no value) or marshals to JSON
// "null", the result is {Valid: false}. Typed nils fall through the
// `value == nil` guard but produce `[]byte("null")` from
// json.Marshal, which the `bytes.Equal(data, []byte("null"))` check
// catches identically. This is intentional for the write-once-finalize
// pattern: combined with the COALESCE-based UPDATE queries, passing
// nil (typed or untyped) preserves the existing column value. Fields
// accumulate monotonically (request -> response -> usage -> error) and
// never need to be cleared during normal operation. The jsonClear
// sentinel exists for the sole exception (error retry clearing).
func (s *Service) nullJSON(ctx context.Context, value any) pqtype.NullRawMessage {
if value == nil {
return pqtype.NullRawMessage{}
}
// Sentinel: emit a valid JSONB null so COALESCE replaces
// any previously stored value.
if _, ok := value.(jsonClear); ok {
return pqtype.NullRawMessage{
RawMessage: json.RawMessage("null"),
Valid: true,
}
}
data, err := json.Marshal(value)
if err != nil {
s.log.Warn(ctx, "failed to marshal chat debug JSON",
slog.Error(err),
slog.F("value_type", fmt.Sprintf("%T", value)),
)
return pqtype.NullRawMessage{}
}
if bytes.Equal(data, []byte("null")) {
return pqtype.NullRawMessage{}
}
return pqtype.NullRawMessage{RawMessage: data, Valid: true}
}
func (s *Service) publishEvent(
ctx context.Context,
chatID uuid.UUID,
kind EventKind,
runID uuid.UUID,
stepID uuid.UUID,
) {
if s.pubsub == nil {
s.log.Debug(ctx,
"chat debug pubsub unavailable; skipping event",
slog.F("kind", kind),
slog.F("chat_id", chatID),
)
return
}
event := DebugEvent{
Kind: kind,
ChatID: chatID,
RunID: runID,
StepID: stepID,
}
data, err := json.Marshal(event)
if err != nil {
s.log.Warn(ctx, "failed to marshal chat debug event",
slog.Error(err),
slog.F("kind", kind),
slog.F("chat_id", chatID),
)
return
}
channel := PubsubChannel(chatID)
if err := s.pubsub.Publish(channel, data); err != nil {
s.log.Warn(ctx, "failed to publish chat debug event",
slog.Error(err),
slog.F("channel", channel),
slog.F("kind", kind),
slog.F("chat_id", chatID),
)
}
}