mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
feat(coderd): add consolidated /debug/profile endpoint for pprof collection (#22892)
## Summary Adds a new `GET /api/v2/debug/profile` endpoint that collects multiple pprof profiles in a single request and returns them as a tar.gz archive. This allows collecting profiles (including block and mutex) without requiring `CODER_PPROF_ENABLE` to be set, and without restarting `coderd`. Closes #21679 ## What it does The endpoint: - Temporarily enables block and mutex profiling (normally disabled at runtime) - Runs CPU profile and/or trace for a configurable duration (default 10s, max 60s) - Collects snapshot profiles (heap, allocs, block, mutex, goroutine, threadcreate) - Returns a tar.gz archive containing all requested `.prof` files - Uses an atomic bool to prevent concurrent collections (returns 409 Conflict) - Is protected by the existing debug endpoint RBAC (owner-only) **Supported profile types:** cpu, heap, allocs, block, mutex, goroutine, threadcreate, trace **Query parameters:** - `duration`: How long to run timed profiles (default: `10s`, max: `60s`) - `profiles`: Comma-separated list of profile types (default: `cpu,heap,allocs,block,mutex,goroutine`) ## Additional changes - **SDK client method** (`codersdk.Client.DebugCollectProfile`) for easy programmatic access - **`coder support bundle --pprof` integration**: tries the consolidated endpoint first, falls back to individual `/debug/pprof/*` endpoints for older servers - **8 new tests** covering defaults, custom profiles, trace+CPU, validation errors, authorization, and conflict detection
This commit is contained in:
Generated
+22
@@ -869,6 +869,28 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/debug/profile": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
"CoderSessionToken": []
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"Debug"
|
||||
],
|
||||
"summary": "Collect debug profiles",
|
||||
"operationId": "collect-debug-profiles",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
}
|
||||
},
|
||||
"x-apidocgen": {
|
||||
"skip": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/debug/tailnet": {
|
||||
"get": {
|
||||
"security": [
|
||||
|
||||
Generated
+20
@@ -752,6 +752,26 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/debug/profile": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
"CoderSessionToken": []
|
||||
}
|
||||
],
|
||||
"tags": ["Debug"],
|
||||
"summary": "Collect debug profiles",
|
||||
"operationId": "collect-debug-profiles",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
}
|
||||
},
|
||||
"x-apidocgen": {
|
||||
"skip": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/debug/tailnet": {
|
||||
"get": {
|
||||
"security": [
|
||||
|
||||
+13
-1
@@ -627,7 +627,8 @@ func New(options *Options) *API {
|
||||
options.Database,
|
||||
options.Pubsub,
|
||||
),
|
||||
dbRolluper: options.DatabaseRolluper,
|
||||
dbRolluper: options.DatabaseRolluper,
|
||||
ProfileCollector: defaultProfileCollector{},
|
||||
}
|
||||
api.WorkspaceAppsProvider = workspaceapps.NewDBTokenProvider(
|
||||
ctx,
|
||||
@@ -1732,6 +1733,8 @@ func New(options *Options) *API {
|
||||
}
|
||||
r.Method("GET", "/expvar", expvar.Handler()) // contains DERP metrics as well as cmdline and memstats
|
||||
|
||||
r.Post("/profile", api.debugCollectProfile)
|
||||
|
||||
r.Route("/pprof", func(r chi.Router) {
|
||||
r.Use(func(next http.Handler) http.Handler {
|
||||
// Some of the pprof handlers strip the `/debug/pprof`
|
||||
@@ -2019,6 +2022,15 @@ type API struct {
|
||||
// gitSyncWorker refreshes stale chat diff statuses in the
|
||||
// background.
|
||||
gitSyncWorker *gitsync.Worker
|
||||
|
||||
// ProfileCollector abstracts the runtime/pprof and runtime/trace
|
||||
// calls used by the /debug/profile endpoint. Tests override this
|
||||
// with a stub to avoid process-global side-effects.
|
||||
ProfileCollector ProfileCollector
|
||||
// ProfileCollecting is used as a concurrency guard so that only one
|
||||
// profile collection (via /debug/profile) can run at a time. The CPU
|
||||
// profiler is process-global, so concurrent collections would fail.
|
||||
ProfileCollecting atomic.Bool
|
||||
}
|
||||
|
||||
// Close waits for all WebSocket connections to drain before returning.
|
||||
|
||||
+299
@@ -1,13 +1,20 @@
|
||||
package coderd
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"runtime"
|
||||
"runtime/pprof"
|
||||
"runtime/trace"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -330,6 +337,298 @@ func loadDismissedHealthchecks(ctx context.Context, db database.Store, logger sl
|
||||
return dismissedHealthchecks
|
||||
}
|
||||
|
||||
// ProfileCollector abstracts the mechanics of collecting pprof/trace
|
||||
// data from the Go runtime. Production code uses defaultProfileCollector;
|
||||
// tests can substitute a stub to avoid process-global side-effects.
|
||||
type ProfileCollector interface {
|
||||
// StartCPUProfile begins CPU profiling, writing to w. It returns
|
||||
// a stop function that must be called to finish profiling.
|
||||
StartCPUProfile(w io.Writer) (stop func(), err error)
|
||||
// StartTrace begins execution tracing, writing to w. It returns
|
||||
// a stop function that must be called to finish tracing.
|
||||
StartTrace(w io.Writer) (stop func(), err error)
|
||||
// LookupProfile writes the named snapshot profile to w.
|
||||
LookupProfile(name string, w io.Writer) error
|
||||
// SetBlockProfileRate enables/disables block profiling.
|
||||
SetBlockProfileRate(rate int)
|
||||
// SetMutexProfileFraction enables/disables mutex profiling.
|
||||
// Returns the previous fraction.
|
||||
SetMutexProfileFraction(rate int) int
|
||||
}
|
||||
|
||||
// defaultProfileCollector delegates to the real runtime/pprof and
|
||||
// runtime/trace packages.
|
||||
type defaultProfileCollector struct{}
|
||||
|
||||
func (defaultProfileCollector) StartCPUProfile(w io.Writer) (func(), error) {
|
||||
if err := pprof.StartCPUProfile(w); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return pprof.StopCPUProfile, nil
|
||||
}
|
||||
|
||||
func (defaultProfileCollector) StartTrace(w io.Writer) (func(), error) {
|
||||
if err := trace.Start(w); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return trace.Stop, nil
|
||||
}
|
||||
|
||||
func (defaultProfileCollector) LookupProfile(name string, w io.Writer) error {
|
||||
p := pprof.Lookup(name)
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
return p.WriteTo(w, 0)
|
||||
}
|
||||
|
||||
func (defaultProfileCollector) SetBlockProfileRate(rate int) { runtime.SetBlockProfileRate(rate) }
|
||||
func (defaultProfileCollector) SetMutexProfileFraction(rate int) int {
|
||||
return runtime.SetMutexProfileFraction(rate)
|
||||
}
|
||||
|
||||
// defaultProfiles is the set of profiles collected when none are specified.
|
||||
var defaultProfiles = []string{"cpu", "heap", "allocs", "block", "mutex", "goroutine"}
|
||||
|
||||
// allValidProfiles enumerates every profile name accepted by the endpoint.
|
||||
var allValidProfiles = map[string]bool{
|
||||
"cpu": true,
|
||||
"heap": true,
|
||||
"allocs": true,
|
||||
"block": true,
|
||||
"mutex": true,
|
||||
"goroutine": true,
|
||||
"threadcreate": true,
|
||||
"trace": true,
|
||||
}
|
||||
|
||||
const (
|
||||
// profileDurationDefault is used when no ?duration is supplied.
|
||||
profileDurationDefault = 10 * time.Second
|
||||
// profileDurationMax prevents callers from asking for arbitrarily long
|
||||
// collections that tie up the runtime-global CPU profiler.
|
||||
profileDurationMax = 60 * time.Second
|
||||
)
|
||||
|
||||
// @Summary Collect debug profiles
|
||||
// @ID collect-debug-profiles
|
||||
// @Security CoderSessionToken
|
||||
// @Tags Debug
|
||||
// @Success 200
|
||||
// @Router /debug/profile [post]
|
||||
// @x-apidocgen {"skip": true}
|
||||
func (api *API) debugCollectProfile(rw http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
|
||||
// Parse duration.
|
||||
duration := profileDurationDefault
|
||||
if v := r.URL.Query().Get("duration"); v != "" {
|
||||
d, err := time.ParseDuration(v)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
|
||||
Message: "Invalid duration parameter.",
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
if d <= 0 {
|
||||
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
|
||||
Message: "Duration must be positive.",
|
||||
})
|
||||
return
|
||||
}
|
||||
if d > profileDurationMax {
|
||||
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
|
||||
Message: fmt.Sprintf("Duration cannot exceed %s.", profileDurationMax),
|
||||
})
|
||||
return
|
||||
}
|
||||
duration = d
|
||||
}
|
||||
|
||||
// Parse requested profiles.
|
||||
profiles := defaultProfiles
|
||||
if v := r.URL.Query().Get("profiles"); v != "" {
|
||||
profiles = strings.Split(v, ",")
|
||||
for _, p := range profiles {
|
||||
if !allValidProfiles[p] {
|
||||
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
|
||||
Message: fmt.Sprintf("Unknown profile type: %q.", p),
|
||||
Detail: "Valid types: cpu, heap, allocs, block, mutex, goroutine, threadcreate, trace",
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only one profile collection can run at a time because the CPU
|
||||
// profiler is process-global.
|
||||
if !api.ProfileCollecting.CompareAndSwap(false, true) {
|
||||
httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{
|
||||
Message: "A profile collection is already in progress. Try again later.",
|
||||
})
|
||||
return
|
||||
}
|
||||
defer api.ProfileCollecting.Store(false)
|
||||
|
||||
// Temporarily enable block and mutex profiling so those profiles are
|
||||
// actually populated. Restore previous values when we are done.
|
||||
// SetBlockProfileRate does not return the previous value, so we
|
||||
// simply disable it again after collection (the default is 0).
|
||||
pc := api.ProfileCollector
|
||||
pc.SetBlockProfileRate(1)
|
||||
prevMutexFraction := pc.SetMutexProfileFraction(1)
|
||||
defer pc.SetBlockProfileRate(0)
|
||||
defer pc.SetMutexProfileFraction(prevMutexFraction)
|
||||
|
||||
// Determine which profiles need the timed collection (cpu, trace) vs
|
||||
// instant snapshots.
|
||||
wantCPU := false
|
||||
wantTrace := false
|
||||
for _, p := range profiles {
|
||||
switch p {
|
||||
case "cpu":
|
||||
wantCPU = true
|
||||
case "trace":
|
||||
wantTrace = true
|
||||
}
|
||||
}
|
||||
|
||||
// Collect timed profiles (cpu and/or trace) for the requested
|
||||
// duration. StartCPUProfile and StartTrace each return a stop
|
||||
// function that must be called to finish collection.
|
||||
var cpuBuf, traceBuf bytes.Buffer
|
||||
var stopCPU, stopTrace func()
|
||||
if wantCPU {
|
||||
var err error
|
||||
stopCPU, err = pc.StartCPUProfile(&cpuBuf)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Failed to start CPU profile.",
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
if wantTrace {
|
||||
var err error
|
||||
stopTrace, err = pc.StartTrace(&traceBuf)
|
||||
if err != nil {
|
||||
if stopCPU != nil {
|
||||
stopCPU()
|
||||
}
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Failed to start trace.",
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if wantCPU || wantTrace {
|
||||
timer := api.Clock.NewTimer(duration, "debugCollectProfile")
|
||||
defer timer.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
if stopCPU != nil {
|
||||
stopCPU()
|
||||
}
|
||||
if stopTrace != nil {
|
||||
stopTrace()
|
||||
}
|
||||
// Client disconnected; nothing to write.
|
||||
return
|
||||
case <-timer.C:
|
||||
}
|
||||
if stopCPU != nil {
|
||||
stopCPU()
|
||||
}
|
||||
if stopTrace != nil {
|
||||
stopTrace()
|
||||
}
|
||||
}
|
||||
|
||||
// Build the tar.gz archive.
|
||||
var archive bytes.Buffer
|
||||
gzw := gzip.NewWriter(&archive)
|
||||
tw := tar.NewWriter(gzw)
|
||||
|
||||
addFile := func(name string, data []byte) error {
|
||||
hdr := &tar.Header{
|
||||
Name: name,
|
||||
Mode: 0o644,
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
if err := tw.WriteHeader(hdr); err != nil {
|
||||
return xerrors.Errorf("write tar header for %s: %w", name, err)
|
||||
}
|
||||
if _, err := tw.Write(data); err != nil {
|
||||
return xerrors.Errorf("write tar data for %s: %w", name, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, p := range profiles {
|
||||
switch p {
|
||||
case "cpu":
|
||||
if err := addFile("cpu.prof", cpuBuf.Bytes()); err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Failed to write CPU profile to archive.",
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
case "trace":
|
||||
if err := addFile("trace.out", traceBuf.Bytes()); err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Failed to write trace to archive.",
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
default:
|
||||
// Snapshot profiles: heap, allocs, block, mutex, goroutine,
|
||||
// threadcreate.
|
||||
var buf bytes.Buffer
|
||||
if err := pc.LookupProfile(p, &buf); err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: fmt.Sprintf("Failed to collect %s profile.", p),
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
if err := addFile(p+".prof", buf.Bytes()); err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: fmt.Sprintf("Failed to write %s profile to archive.", p),
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := tw.Close(); err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Failed to finalize tar archive.",
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
if err := gzw.Close(); err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Failed to finalize gzip archive.",
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
filename := fmt.Sprintf("coderd-profile-%d.tar.gz", time.Now().Unix())
|
||||
rw.Header().Set("Content-Type", "application/gzip")
|
||||
rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filename))
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
_, _ = rw.Write(archive.Bytes())
|
||||
}
|
||||
|
||||
// @Summary Debug pprof index
|
||||
// @ID debug-pprof-index
|
||||
// @Security CoderSessionToken
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package coderd_test
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
@@ -13,8 +16,11 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"cdr.dev/slog/v3/sloggers/slogtest"
|
||||
"github.com/coder/coder/v2/coderd"
|
||||
"github.com/coder/coder/v2/coderd/coderdtest"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck"
|
||||
"github.com/coder/coder/v2/coderd/rbac"
|
||||
"github.com/coder/coder/v2/coderd/rbac/policy"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/healthsdk"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
@@ -370,3 +376,252 @@ func TestDebugWebsocket(t *testing.T) {
|
||||
t.Parallel()
|
||||
})
|
||||
}
|
||||
|
||||
// noopProfileCollector avoids calling process-global runtime functions
|
||||
// (CPU profiler, tracer) so that tests can run in parallel safely.
|
||||
type noopProfileCollector struct{}
|
||||
|
||||
func (noopProfileCollector) StartCPUProfile(io.Writer) (func(), error) { return func() {}, nil }
|
||||
func (noopProfileCollector) StartTrace(io.Writer) (func(), error) { return func() {}, nil }
|
||||
func (noopProfileCollector) LookupProfile(string, io.Writer) error { return nil }
|
||||
func (noopProfileCollector) SetBlockProfileRate(int) {}
|
||||
func (noopProfileCollector) SetMutexProfileFraction(int) int { return 0 }
|
||||
|
||||
// Compile-time check.
|
||||
var _ coderd.ProfileCollector = noopProfileCollector{}
|
||||
|
||||
// blockingProfileCollector blocks in StartCPUProfile until unblocked,
|
||||
// allowing deterministic testing of the concurrency guard.
|
||||
type blockingProfileCollector struct {
|
||||
noopProfileCollector
|
||||
started chan struct{} // closed when StartCPUProfile is entered
|
||||
block chan struct{} // StartCPUProfile blocks until this is closed
|
||||
}
|
||||
|
||||
func (b *blockingProfileCollector) StartCPUProfile(io.Writer) (func(), error) {
|
||||
close(b.started)
|
||||
<-b.block
|
||||
return func() {}, nil
|
||||
}
|
||||
|
||||
func newTestAPI(t *testing.T) (*codersdk.Client, io.Closer, *coderd.API) {
|
||||
t.Helper()
|
||||
client, closer, api := coderdtest.NewWithAPI(t, nil)
|
||||
api.ProfileCollector = noopProfileCollector{}
|
||||
return client, closer, api
|
||||
}
|
||||
|
||||
func TestDebugCollectProfile(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("Defaults", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitLong)
|
||||
|
||||
client, closer, api := newTestAPI(t)
|
||||
defer closer.Close()
|
||||
_ = coderdtest.CreateFirstUser(t, client)
|
||||
|
||||
asserter := coderdtest.AssertRBAC(t, api, client)
|
||||
|
||||
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
|
||||
// Use a very short duration so the test finishes quickly.
|
||||
// The noop collector means no real profiling occurs.
|
||||
Duration: 100 * time.Millisecond,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer body.Close()
|
||||
|
||||
data, err := io.ReadAll(body)
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, data, "archive should not be empty")
|
||||
|
||||
// Verify that the response is a valid tar.gz archive containing
|
||||
// the expected profile files.
|
||||
files := extractTarGzFiles(t, data)
|
||||
require.Contains(t, files, "cpu.prof")
|
||||
require.Contains(t, files, "heap.prof")
|
||||
require.Contains(t, files, "allocs.prof")
|
||||
require.Contains(t, files, "block.prof")
|
||||
require.Contains(t, files, "mutex.prof")
|
||||
require.Contains(t, files, "goroutine.prof")
|
||||
|
||||
// Verify the endpoint checks the correct RBAC permission.
|
||||
asserter.AssertChecked(t, policy.ActionRead, rbac.ResourceDebugInfo)
|
||||
})
|
||||
|
||||
t.Run("CustomProfiles", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitLong)
|
||||
|
||||
client, closer, _ := newTestAPI(t)
|
||||
defer closer.Close()
|
||||
_ = coderdtest.CreateFirstUser(t, client)
|
||||
|
||||
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
|
||||
Duration: 100 * time.Millisecond,
|
||||
Profiles: []string{"heap", "goroutine"},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer body.Close()
|
||||
|
||||
data, err := io.ReadAll(body)
|
||||
require.NoError(t, err)
|
||||
|
||||
files := extractTarGzFiles(t, data)
|
||||
require.Contains(t, files, "heap.prof")
|
||||
require.Contains(t, files, "goroutine.prof")
|
||||
// Should NOT contain profiles we didn't ask for.
|
||||
require.NotContains(t, files, "cpu.prof")
|
||||
require.NotContains(t, files, "allocs.prof")
|
||||
})
|
||||
|
||||
t.Run("WithTraceAndCPU", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitLong)
|
||||
|
||||
client, closer, _ := newTestAPI(t)
|
||||
defer closer.Close()
|
||||
_ = coderdtest.CreateFirstUser(t, client)
|
||||
|
||||
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
|
||||
Duration: 100 * time.Millisecond,
|
||||
Profiles: []string{"cpu", "trace"},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer body.Close()
|
||||
|
||||
data, err := io.ReadAll(body)
|
||||
require.NoError(t, err)
|
||||
|
||||
files := extractTarGzFiles(t, data)
|
||||
require.Contains(t, files, "cpu.prof")
|
||||
require.Contains(t, files, "trace.out")
|
||||
})
|
||||
|
||||
t.Run("DurationTooLong", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
client := coderdtest.New(t, nil)
|
||||
_ = coderdtest.CreateFirstUser(t, client)
|
||||
|
||||
res, err := client.Request(ctx, "POST", "/api/v2/debug/profile?duration=5m", nil)
|
||||
require.NoError(t, err)
|
||||
defer res.Body.Close()
|
||||
require.Equal(t, http.StatusBadRequest, res.StatusCode)
|
||||
})
|
||||
|
||||
t.Run("InvalidDuration", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
client := coderdtest.New(t, nil)
|
||||
_ = coderdtest.CreateFirstUser(t, client)
|
||||
|
||||
res, err := client.Request(ctx, "POST", "/api/v2/debug/profile?duration=notaduration", nil)
|
||||
require.NoError(t, err)
|
||||
defer res.Body.Close()
|
||||
require.Equal(t, http.StatusBadRequest, res.StatusCode)
|
||||
})
|
||||
|
||||
t.Run("InvalidProfile", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
client := coderdtest.New(t, nil)
|
||||
_ = coderdtest.CreateFirstUser(t, client)
|
||||
|
||||
res, err := client.Request(ctx, "POST", "/api/v2/debug/profile?profiles=nonexistent", nil)
|
||||
require.NoError(t, err)
|
||||
defer res.Body.Close()
|
||||
require.Equal(t, http.StatusBadRequest, res.StatusCode)
|
||||
})
|
||||
|
||||
t.Run("Unauthorized", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitShort)
|
||||
|
||||
client := coderdtest.New(t, nil)
|
||||
firstUser := coderdtest.CreateFirstUser(t, client)
|
||||
|
||||
// Create a non-admin user.
|
||||
memberClient, _ := coderdtest.CreateAnotherUser(t, client, firstUser.OrganizationID)
|
||||
|
||||
res, err := memberClient.Request(ctx, "POST", "/api/v2/debug/profile", nil)
|
||||
require.NoError(t, err)
|
||||
defer res.Body.Close()
|
||||
require.Equal(t, http.StatusForbidden, res.StatusCode)
|
||||
})
|
||||
|
||||
t.Run("Conflict", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitLong)
|
||||
|
||||
blocker := &blockingProfileCollector{
|
||||
started: make(chan struct{}),
|
||||
block: make(chan struct{}),
|
||||
}
|
||||
|
||||
client, closer, api := coderdtest.NewWithAPI(t, nil)
|
||||
defer closer.Close()
|
||||
api.ProfileCollector = blocker
|
||||
_ = coderdtest.CreateFirstUser(t, client)
|
||||
|
||||
// Start a profile collection that will block inside
|
||||
// StartCPUProfile until we explicitly unblock it.
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
|
||||
Duration: 1 * time.Second,
|
||||
})
|
||||
if err == nil {
|
||||
body.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait deterministically for the first request to enter the
|
||||
// collector — no time.Sleep needed.
|
||||
testutil.TryReceive(ctx, t, blocker.started)
|
||||
|
||||
// The second request should get 409 Conflict.
|
||||
res, err := client.Request(ctx, "POST", "/api/v2/debug/profile?duration=1s", nil)
|
||||
require.NoError(t, err)
|
||||
defer res.Body.Close()
|
||||
require.Equal(t, http.StatusConflict, res.StatusCode)
|
||||
|
||||
// Unblock the first request and wait for it to finish.
|
||||
close(blocker.block)
|
||||
testutil.TryReceive(ctx, t, done)
|
||||
})
|
||||
}
|
||||
|
||||
// extractTarGzFiles extracts file names from a tar.gz archive.
|
||||
func extractTarGzFiles(t *testing.T, data []byte) map[string]bool {
|
||||
t.Helper()
|
||||
|
||||
gr, err := gzip.NewReader(bytes.NewReader(data))
|
||||
require.NoError(t, err)
|
||||
defer gr.Close()
|
||||
|
||||
tr := tar.NewReader(gr)
|
||||
files := make(map[string]bool)
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
require.NoError(t, err)
|
||||
files[hdr.Name] = true
|
||||
}
|
||||
return files
|
||||
}
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
package codersdk
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// DebugProfileDurationMax is the maximum duration the server will accept
|
||||
// for a profile collection. Callers should ensure their context deadline
|
||||
// exceeds this to avoid premature cancellation.
|
||||
const DebugProfileDurationMax = 60 * time.Second
|
||||
|
||||
// DebugProfileOptions are options for collecting debug profiles from the
|
||||
// server via the consolidated /debug/profile endpoint.
|
||||
type DebugProfileOptions struct {
|
||||
// Duration controls how long time-based profiles (cpu, trace) run.
|
||||
// Zero uses the server default (10s).
|
||||
Duration time.Duration
|
||||
// Profiles is the list of profile types to collect. Nil or empty uses
|
||||
// the server default (cpu, heap, allocs, block, mutex, goroutine).
|
||||
Profiles []string
|
||||
}
|
||||
|
||||
// DebugCollectProfile fetches a tar.gz archive of pprof profiles from the
|
||||
// server. The caller is responsible for closing the returned ReadCloser.
|
||||
func (c *Client) DebugCollectProfile(ctx context.Context, opts DebugProfileOptions) (io.ReadCloser, error) {
|
||||
qp := url.Values{}
|
||||
if opts.Duration > 0 {
|
||||
qp.Set("duration", opts.Duration.String())
|
||||
}
|
||||
if len(opts.Profiles) > 0 {
|
||||
qp.Set("profiles", strings.Join(opts.Profiles, ","))
|
||||
}
|
||||
|
||||
reqPath := "/api/v2/debug/profile"
|
||||
if len(qp) > 0 {
|
||||
reqPath += "?" + qp.Encode()
|
||||
}
|
||||
|
||||
resp, err := c.Request(ctx, http.MethodPost, reqPath, nil)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("request debug profile: %w", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer resp.Body.Close()
|
||||
return nil, ReadBodyAsError(resp)
|
||||
}
|
||||
|
||||
return resp.Body, nil
|
||||
}
|
||||
Generated
+18
@@ -2369,6 +2369,24 @@ export interface DatabaseReport extends BaseReport {
|
||||
readonly threshold_ms: number;
|
||||
}
|
||||
|
||||
// From codersdk/debug.go
|
||||
/**
|
||||
* DebugProfileOptions are options for collecting debug profiles from the
|
||||
* server via the consolidated /debug/profile endpoint.
|
||||
*/
|
||||
export interface DebugProfileOptions {
|
||||
/**
|
||||
* Duration controls how long time-based profiles (cpu, trace) run.
|
||||
* Zero uses the server default (10s).
|
||||
*/
|
||||
readonly Duration: number;
|
||||
/**
|
||||
* Profiles is the list of profile types to collect. Nil or empty uses
|
||||
* the server default (cpu, heap, allocs, block, mutex, goroutine).
|
||||
*/
|
||||
readonly Profiles: readonly string[];
|
||||
}
|
||||
|
||||
// From codersdk/externalauth.go
|
||||
export interface DeleteExternalAuthByIDResponse {
|
||||
/**
|
||||
|
||||
+100
-1
@@ -1,15 +1,18 @@
|
||||
package support
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -772,6 +775,93 @@ func compressData(data []byte) []byte {
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// PprofInfoFromArchive uses the consolidated /api/v2/debug/profile endpoint
|
||||
// to collect pprof data in a single request. The server temporarily enables
|
||||
// block/mutex profiling, runs time-based profiles for the given duration,
|
||||
// takes snapshots, and returns a tar.gz archive.
|
||||
func PprofInfoFromArchive(ctx context.Context, client *codersdk.Client, log slog.Logger, duration time.Duration) (*PprofCollection, error) {
|
||||
if client == nil {
|
||||
return nil, xerrors.New("client is nil")
|
||||
}
|
||||
|
||||
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
|
||||
Duration: duration,
|
||||
// Use the server defaults plus trace.
|
||||
Profiles: []string{"cpu", "heap", "allocs", "block", "mutex", "goroutine", "threadcreate", "trace"},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("fetch consolidated profile: %w", err)
|
||||
}
|
||||
defer body.Close()
|
||||
|
||||
data, err := io.ReadAll(body)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("read profile archive: %w", err)
|
||||
}
|
||||
|
||||
var p PprofCollection
|
||||
if client.URL != nil {
|
||||
if u, err := client.URL.Parse("/api/v2/debug/profile"); err == nil {
|
||||
p.EndpointURL = u.String()
|
||||
}
|
||||
}
|
||||
if p.EndpointURL == "" {
|
||||
p.EndpointURL = "/api/v2/debug/profile"
|
||||
}
|
||||
p.CollectedAt = time.Now()
|
||||
|
||||
// Parse the tar.gz archive and populate the PprofCollection.
|
||||
gr, err := gzip.NewReader(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("open gzip reader: %w", err)
|
||||
}
|
||||
defer gr.Close()
|
||||
|
||||
tr := tar.NewReader(gr)
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("read tar entry %q: %w", hdr.Name, err)
|
||||
}
|
||||
|
||||
content, err := io.ReadAll(tr)
|
||||
if err != nil {
|
||||
log.Warn(ctx, "failed to read tar entry", slog.F("name", hdr.Name), slog.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
// Files in the archive are named like "cpu.prof", "heap.prof",
|
||||
// "trace.out", etc. Compress binary profile data for storage in
|
||||
// the bundle, matching what PprofInfo() does.
|
||||
base := path.Base(hdr.Name)
|
||||
switch base {
|
||||
case "cpu.prof":
|
||||
p.Profile = compressData(content)
|
||||
case "heap.prof":
|
||||
p.Heap = compressData(content)
|
||||
case "allocs.prof":
|
||||
p.Allocs = compressData(content)
|
||||
case "block.prof":
|
||||
p.Block = compressData(content)
|
||||
case "mutex.prof":
|
||||
p.Mutex = compressData(content)
|
||||
case "goroutine.prof":
|
||||
p.Goroutine = compressData(content)
|
||||
case "threadcreate.prof":
|
||||
p.Threadcreate = compressData(content)
|
||||
case "trace.out":
|
||||
p.Trace = compressData(content)
|
||||
default:
|
||||
log.Debug(ctx, "unknown profile in archive", slog.F("name", hdr.Name))
|
||||
}
|
||||
}
|
||||
|
||||
return &p, nil
|
||||
}
|
||||
|
||||
func PprofInfoFromAgent(ctx context.Context, conn workspacesdk.AgentConn, log slog.Logger) *PprofCollection {
|
||||
if conn == nil {
|
||||
return nil
|
||||
@@ -1049,7 +1139,16 @@ func collectPprof(ctx context.Context, d *Deps, b *Bundle) Pprof {
|
||||
return pprof
|
||||
}
|
||||
|
||||
serverPprof := PprofInfo(ctx, d.Client, d.Log)
|
||||
// Try the consolidated /debug/profile endpoint first. It
|
||||
// temporarily enables block/mutex profiling on the server and
|
||||
// returns a single tar.gz archive.
|
||||
serverPprof, err := PprofInfoFromArchive(ctx, d.Client, d.Log, 30*time.Second)
|
||||
if err != nil {
|
||||
d.Log.Warn(ctx, "consolidated profile endpoint unavailable, falling back to individual endpoints",
|
||||
slog.Error(err))
|
||||
// Fall back to the legacy per-profile endpoint approach.
|
||||
serverPprof = PprofInfo(ctx, d.Client, d.Log)
|
||||
}
|
||||
if serverPprof != nil {
|
||||
pprof.Server = serverPprof
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user