chore: add debug logging and recovery to agent api requests (#20785)

This is to debug context timeouts on API requests to the agent.

Because rbac and database cannot be imported in slim, split the logger
middleware into slim and non-slim versions and break out the recovery
middleware.
This commit is contained in:
Asher
2025-11-25 14:59:20 -09:00
committed by GitHub
parent b0e8384b82
commit c266bb830c
9 changed files with 162 additions and 81 deletions
+8
View File
@@ -7,12 +7,20 @@ import (
"github.com/google/uuid"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/httpmw/loggermw"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/httpmw"
)
func (a *agent) apiHandler() http.Handler {
r := chi.NewRouter()
r.Use(
httpmw.Recover(a.logger),
tracing.StatusWriterMiddleware,
loggermw.Logger(a.logger),
)
r.Get("/", func(rw http.ResponseWriter, r *http.Request) {
httpapi.Write(r.Context(), rw, http.StatusOK, codersdk.Response{
Message: "Hello from the agent!",
+2 -1
View File
@@ -99,6 +99,7 @@ import (
"github.com/coder/coder/v2/coderd/workspacestats"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk"
sharedhttpmw "github.com/coder/coder/v2/httpmw"
"github.com/coder/coder/v2/provisionersdk"
"github.com/coder/coder/v2/site"
"github.com/coder/coder/v2/tailnet"
@@ -861,7 +862,7 @@ func New(options *Options) *API {
prometheusMW := httpmw.Prometheus(options.PrometheusRegistry)
r.Use(
httpmw.Recover(api.Logger),
sharedhttpmw.Recover(api.Logger),
httpmw.WithProfilingLabels,
tracing.StatusWriterMiddleware,
tracing.Middleware(api.TracerProvider),
+9 -78
View File
@@ -7,19 +7,17 @@ import (
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/go-chi/chi/v5"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/tracing"
)
var (
safeParams = []string{"page", "limit", "offset"}
safeParams = []string{"page", "limit", "offset", "path"}
countParams = []string{"ids", "template_ids"}
)
@@ -124,85 +122,18 @@ func Logger(log slog.Logger) func(next http.Handler) http.Handler {
}
}
type RequestLogger interface {
WithFields(fields ...slog.Field)
WriteLog(ctx context.Context, status int)
WithAuthContext(actor rbac.Subject)
}
type SlogRequestLogger struct {
log slog.Logger
written bool
message string
start time.Time
// Protects actors map for concurrent writes.
mu sync.RWMutex
actors map[rbac.SubjectType]rbac.Subject
}
var _ RequestLogger = &SlogRequestLogger{}
func NewRequestLogger(log slog.Logger, message string, start time.Time) RequestLogger {
return &SlogRequestLogger{
log: log,
written: false,
message: message,
start: start,
actors: make(map[rbac.SubjectType]rbac.Subject),
}
log slog.Logger
written bool
message string
start time.Time
addFields func()
}
func (c *SlogRequestLogger) WithFields(fields ...slog.Field) {
c.log = c.log.With(fields...)
}
func (c *SlogRequestLogger) WithAuthContext(actor rbac.Subject) {
c.mu.Lock()
defer c.mu.Unlock()
c.actors[actor.Type] = actor
}
func (c *SlogRequestLogger) addAuthContextFields() {
c.mu.RLock()
defer c.mu.RUnlock()
usr, ok := c.actors[rbac.SubjectTypeUser]
if ok {
c.log = c.log.With(
slog.F("requestor_id", usr.ID),
slog.F("requestor_name", usr.FriendlyName),
slog.F("requestor_email", usr.Email),
)
} else {
// If there is no user, we log the requestor name for the first
// actor in a defined order.
for _, v := range actorLogOrder {
subj, ok := c.actors[v]
if !ok {
continue
}
c.log = c.log.With(
slog.F("requestor_name", subj.FriendlyName),
)
break
}
}
}
var actorLogOrder = []rbac.SubjectType{
rbac.SubjectTypeAutostart,
rbac.SubjectTypeCryptoKeyReader,
rbac.SubjectTypeCryptoKeyRotator,
rbac.SubjectTypeJobReaper,
rbac.SubjectTypeNotifier,
rbac.SubjectTypePrebuildsOrchestrator,
rbac.SubjectTypeSubAgentAPI,
rbac.SubjectTypeProvisionerd,
rbac.SubjectTypeResourceMonitor,
rbac.SubjectTypeSystemReadProvisionerDaemons,
rbac.SubjectTypeSystemRestricted,
}
func (c *SlogRequestLogger) WriteLog(ctx context.Context, status int) {
if c.written {
return
@@ -210,9 +141,9 @@ func (c *SlogRequestLogger) WriteLog(ctx context.Context, status int) {
c.written = true
end := time.Now()
// Right before we write the log, we try to find the user in the actors
// and add the fields to the log.
c.addAuthContextFields()
if c.addFields != nil {
c.addFields()
}
logger := c.log.With(
slog.F("took", end.Sub(c.start)),
+88
View File
@@ -0,0 +1,88 @@
//go:build !slim
package loggermw
import (
"context"
"sync"
"time"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/rbac"
)
type RequestLogger interface {
WithFields(fields ...slog.Field)
WriteLog(ctx context.Context, status int)
WithAuthContext(actor rbac.Subject)
}
type RbacSlogRequestLogger struct {
SlogRequestLogger
// Protects actors map for concurrent writes.
mu sync.RWMutex
actors map[rbac.SubjectType]rbac.Subject
}
var _ RequestLogger = &RbacSlogRequestLogger{}
func NewRequestLogger(log slog.Logger, message string, start time.Time) RequestLogger {
rlogger := &RbacSlogRequestLogger{
SlogRequestLogger: SlogRequestLogger{
log: log,
written: false,
message: message,
start: start,
},
actors: make(map[rbac.SubjectType]rbac.Subject),
}
rlogger.addFields = rlogger.addAuthContextFields
return rlogger
}
func (c *RbacSlogRequestLogger) WithAuthContext(actor rbac.Subject) {
c.mu.Lock()
defer c.mu.Unlock()
c.actors[actor.Type] = actor
}
var actorLogOrder = []rbac.SubjectType{
rbac.SubjectTypeAutostart,
rbac.SubjectTypeCryptoKeyReader,
rbac.SubjectTypeCryptoKeyRotator,
rbac.SubjectTypeJobReaper,
rbac.SubjectTypeNotifier,
rbac.SubjectTypePrebuildsOrchestrator,
rbac.SubjectTypeSubAgentAPI,
rbac.SubjectTypeProvisionerd,
rbac.SubjectTypeResourceMonitor,
rbac.SubjectTypeSystemReadProvisionerDaemons,
rbac.SubjectTypeSystemRestricted,
}
func (c *RbacSlogRequestLogger) addAuthContextFields() {
c.mu.RLock()
defer c.mu.RUnlock()
usr, ok := c.actors[rbac.SubjectTypeUser]
if ok {
c.log = c.log.With(
slog.F("requestor_id", usr.ID),
slog.F("requestor_name", usr.FriendlyName),
slog.F("requestor_email", usr.Email),
)
} else {
// If there is no user, we log the requestor name for the first
// actor in a defined order.
for _, v := range actorLogOrder {
subj, ok := c.actors[v]
if !ok {
continue
}
c.log = c.log.With(
slog.F("requestor_name", subj.FriendlyName),
)
break
}
}
}
@@ -16,6 +16,7 @@ import (
"github.com/stretchr/testify/require"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/testutil"
"github.com/coder/websocket"
@@ -363,6 +364,31 @@ func TestSafeQueryParams(t *testing.T) {
}
}
func TestRequestLogger_AuthContext(t *testing.T) {
t.Parallel()
ctx := context.Background()
sink := &fakeSink{}
logger := slog.Make(sink)
logger = logger.Leveled(slog.LevelDebug)
logCtx := NewRequestLogger(logger, "GET", time.Now())
logCtx.WithAuthContext(rbac.Subject{
ID: "test-user-id",
FriendlyName: "test name",
Email: "test@coder.com",
Type: rbac.SubjectTypeUser,
})
logCtx.WriteLog(ctx, http.StatusOK)
require.Len(t, sink.entries, 1, "log was written twice")
require.Equal(t, sink.entries[0].Message, "GET")
require.Equal(t, sink.entries[0].Fields[0].Value, "test-user-id")
require.Equal(t, sink.entries[0].Fields[1].Value, "test name")
require.Equal(t, sink.entries[0].Fields[2].Value, "test@coder.com")
}
type fakeSink struct {
entries []slog.SinkEntry
newEntries chan slog.SinkEntry
+26
View File
@@ -0,0 +1,26 @@
//go:build slim
package loggermw
import (
"context"
"time"
"cdr.dev/slog"
)
type RequestLogger interface {
WithFields(fields ...slog.Field)
WriteLog(ctx context.Context, status int)
}
var _ RequestLogger = &SlogRequestLogger{}
func NewRequestLogger(log slog.Logger, message string, start time.Time) RequestLogger {
return &SlogRequestLogger{
log: log,
written: false,
message: message,
start: start,
}
}
+2 -1
View File
@@ -39,6 +39,7 @@ import (
"github.com/coder/coder/v2/enterprise/derpmesh"
"github.com/coder/coder/v2/enterprise/replicasync"
"github.com/coder/coder/v2/enterprise/wsproxy/wsproxysdk"
sharedhttpmw "github.com/coder/coder/v2/httpmw"
"github.com/coder/coder/v2/site"
"github.com/coder/coder/v2/tailnet"
)
@@ -328,7 +329,7 @@ func New(ctx context.Context, opts *Options) (*Server, error) {
// Persistent middlewares to all routes
r.Use(
// TODO: @emyrk Should we standardize these in some other package?
httpmw.Recover(s.Logger),
sharedhttpmw.Recover(s.Logger),
httpmw.WithProfilingLabels,
tracing.StatusWriterMiddleware,
tracing.Middleware(s.TracerProvider),
@@ -7,8 +7,8 @@ import (
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/coderd/httpmw"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/httpmw"
"github.com/coder/coder/v2/testutil"
)