chore: add debug logging and recovery to agent api requests (#20785)

This is to debug context timeouts on API requests to the agent.

Because rbac and database cannot be imported in slim, split the logger
middleware into slim and non-slim versions and break out the recovery
middleware.
This commit is contained in:
Asher
2025-11-25 14:59:20 -09:00
committed by GitHub
parent b0e8384b82
commit c266bb830c
9 changed files with 162 additions and 81 deletions
+9 -78
View File
@@ -7,19 +7,17 @@ import (
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/go-chi/chi/v5"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/tracing"
)
var (
safeParams = []string{"page", "limit", "offset"}
safeParams = []string{"page", "limit", "offset", "path"}
countParams = []string{"ids", "template_ids"}
)
@@ -124,85 +122,18 @@ func Logger(log slog.Logger) func(next http.Handler) http.Handler {
}
}
type RequestLogger interface {
WithFields(fields ...slog.Field)
WriteLog(ctx context.Context, status int)
WithAuthContext(actor rbac.Subject)
}
type SlogRequestLogger struct {
log slog.Logger
written bool
message string
start time.Time
// Protects actors map for concurrent writes.
mu sync.RWMutex
actors map[rbac.SubjectType]rbac.Subject
}
var _ RequestLogger = &SlogRequestLogger{}
func NewRequestLogger(log slog.Logger, message string, start time.Time) RequestLogger {
return &SlogRequestLogger{
log: log,
written: false,
message: message,
start: start,
actors: make(map[rbac.SubjectType]rbac.Subject),
}
log slog.Logger
written bool
message string
start time.Time
addFields func()
}
func (c *SlogRequestLogger) WithFields(fields ...slog.Field) {
c.log = c.log.With(fields...)
}
func (c *SlogRequestLogger) WithAuthContext(actor rbac.Subject) {
c.mu.Lock()
defer c.mu.Unlock()
c.actors[actor.Type] = actor
}
func (c *SlogRequestLogger) addAuthContextFields() {
c.mu.RLock()
defer c.mu.RUnlock()
usr, ok := c.actors[rbac.SubjectTypeUser]
if ok {
c.log = c.log.With(
slog.F("requestor_id", usr.ID),
slog.F("requestor_name", usr.FriendlyName),
slog.F("requestor_email", usr.Email),
)
} else {
// If there is no user, we log the requestor name for the first
// actor in a defined order.
for _, v := range actorLogOrder {
subj, ok := c.actors[v]
if !ok {
continue
}
c.log = c.log.With(
slog.F("requestor_name", subj.FriendlyName),
)
break
}
}
}
var actorLogOrder = []rbac.SubjectType{
rbac.SubjectTypeAutostart,
rbac.SubjectTypeCryptoKeyReader,
rbac.SubjectTypeCryptoKeyRotator,
rbac.SubjectTypeJobReaper,
rbac.SubjectTypeNotifier,
rbac.SubjectTypePrebuildsOrchestrator,
rbac.SubjectTypeSubAgentAPI,
rbac.SubjectTypeProvisionerd,
rbac.SubjectTypeResourceMonitor,
rbac.SubjectTypeSystemReadProvisionerDaemons,
rbac.SubjectTypeSystemRestricted,
}
func (c *SlogRequestLogger) WriteLog(ctx context.Context, status int) {
if c.written {
return
@@ -210,9 +141,9 @@ func (c *SlogRequestLogger) WriteLog(ctx context.Context, status int) {
c.written = true
end := time.Now()
// Right before we write the log, we try to find the user in the actors
// and add the fields to the log.
c.addAuthContextFields()
if c.addFields != nil {
c.addFields()
}
logger := c.log.With(
slog.F("took", end.Sub(c.start)),
+88
View File
@@ -0,0 +1,88 @@
//go:build !slim
package loggermw
import (
"context"
"sync"
"time"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/rbac"
)
type RequestLogger interface {
WithFields(fields ...slog.Field)
WriteLog(ctx context.Context, status int)
WithAuthContext(actor rbac.Subject)
}
type RbacSlogRequestLogger struct {
SlogRequestLogger
// Protects actors map for concurrent writes.
mu sync.RWMutex
actors map[rbac.SubjectType]rbac.Subject
}
var _ RequestLogger = &RbacSlogRequestLogger{}
func NewRequestLogger(log slog.Logger, message string, start time.Time) RequestLogger {
rlogger := &RbacSlogRequestLogger{
SlogRequestLogger: SlogRequestLogger{
log: log,
written: false,
message: message,
start: start,
},
actors: make(map[rbac.SubjectType]rbac.Subject),
}
rlogger.addFields = rlogger.addAuthContextFields
return rlogger
}
func (c *RbacSlogRequestLogger) WithAuthContext(actor rbac.Subject) {
c.mu.Lock()
defer c.mu.Unlock()
c.actors[actor.Type] = actor
}
var actorLogOrder = []rbac.SubjectType{
rbac.SubjectTypeAutostart,
rbac.SubjectTypeCryptoKeyReader,
rbac.SubjectTypeCryptoKeyRotator,
rbac.SubjectTypeJobReaper,
rbac.SubjectTypeNotifier,
rbac.SubjectTypePrebuildsOrchestrator,
rbac.SubjectTypeSubAgentAPI,
rbac.SubjectTypeProvisionerd,
rbac.SubjectTypeResourceMonitor,
rbac.SubjectTypeSystemReadProvisionerDaemons,
rbac.SubjectTypeSystemRestricted,
}
func (c *RbacSlogRequestLogger) addAuthContextFields() {
c.mu.RLock()
defer c.mu.RUnlock()
usr, ok := c.actors[rbac.SubjectTypeUser]
if ok {
c.log = c.log.With(
slog.F("requestor_id", usr.ID),
slog.F("requestor_name", usr.FriendlyName),
slog.F("requestor_email", usr.Email),
)
} else {
// If there is no user, we log the requestor name for the first
// actor in a defined order.
for _, v := range actorLogOrder {
subj, ok := c.actors[v]
if !ok {
continue
}
c.log = c.log.With(
slog.F("requestor_name", subj.FriendlyName),
)
break
}
}
}
@@ -16,6 +16,7 @@ import (
"github.com/stretchr/testify/require"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/testutil"
"github.com/coder/websocket"
@@ -363,6 +364,31 @@ func TestSafeQueryParams(t *testing.T) {
}
}
func TestRequestLogger_AuthContext(t *testing.T) {
t.Parallel()
ctx := context.Background()
sink := &fakeSink{}
logger := slog.Make(sink)
logger = logger.Leveled(slog.LevelDebug)
logCtx := NewRequestLogger(logger, "GET", time.Now())
logCtx.WithAuthContext(rbac.Subject{
ID: "test-user-id",
FriendlyName: "test name",
Email: "test@coder.com",
Type: rbac.SubjectTypeUser,
})
logCtx.WriteLog(ctx, http.StatusOK)
require.Len(t, sink.entries, 1, "log was written twice")
require.Equal(t, sink.entries[0].Message, "GET")
require.Equal(t, sink.entries[0].Fields[0].Value, "test-user-id")
require.Equal(t, sink.entries[0].Fields[1].Value, "test name")
require.Equal(t, sink.entries[0].Fields[2].Value, "test@coder.com")
}
type fakeSink struct {
entries []slog.SinkEntry
newEntries chan slog.SinkEntry
+26
View File
@@ -0,0 +1,26 @@
//go:build slim
package loggermw
import (
"context"
"time"
"cdr.dev/slog"
)
type RequestLogger interface {
WithFields(fields ...slog.Field)
WriteLog(ctx context.Context, status int)
}
var _ RequestLogger = &SlogRequestLogger{}
func NewRequestLogger(log slog.Logger, message string, start time.Time) RequestLogger {
return &SlogRequestLogger{
log: log,
written: false,
message: message,
start: start,
}
}
-47
View File
@@ -1,47 +0,0 @@
package httpmw
import (
"context"
"net/http"
"runtime/debug"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/tracing"
)
func Recover(log slog.Logger) func(h http.Handler) http.Handler {
return func(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
defer func() {
r := recover()
// Reverse proxying (among other things) may panic with
// http.ErrAbortHandler when the request is aborted. It's not a
// real panic so we shouldn't log them.
//
//nolint:errorlint // this is how the stdlib does the check
if r != nil && r != http.ErrAbortHandler {
log.Warn(context.Background(),
"panic serving http request (recovered)",
slog.F("panic", r),
slog.F("stack", string(debug.Stack())),
)
var hijacked bool
if sw, ok := w.(*tracing.StatusWriter); ok {
hijacked = sw.Hijacked
}
// Only try to write errors on
// non-hijacked responses.
if !hijacked {
httpapi.InternalServerError(w, nil)
}
}
}()
h.ServeHTTP(w, r)
})
}
}
-72
View File
@@ -1,72 +0,0 @@
package httpmw_test
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/coderd/httpmw"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/testutil"
)
func TestRecover(t *testing.T) {
t.Parallel()
handler := func(isPanic, _ bool) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if isPanic {
panic("Oh no!")
}
w.WriteHeader(http.StatusOK)
})
}
cases := []struct {
Name string
Code int
Panic bool
Hijack bool
}{
{
Name: "OK",
Code: http.StatusOK,
Panic: false,
Hijack: false,
},
{
Name: "Panic",
Code: http.StatusInternalServerError,
Panic: true,
Hijack: false,
},
{
Name: "Hijack",
Code: 0,
Panic: true,
Hijack: true,
},
}
for _, c := range cases {
t.Run(c.Name, func(t *testing.T) {
t.Parallel()
var (
log = testutil.Logger(t)
r = httptest.NewRequest("GET", "/", nil)
w = &tracing.StatusWriter{
ResponseWriter: httptest.NewRecorder(),
Hijacked: c.Hijack,
}
)
httpmw.Recover(log)(handler(c.Panic, c.Hijack)).ServeHTTP(w, r)
require.Equal(t, c.Code, w.Status)
})
}
}