feat(coderd): add consolidated /debug/profile endpoint for pprof collection (#22892)

## Summary

Adds a new `GET /api/v2/debug/profile` endpoint that collects multiple
pprof profiles in a single request and returns them as a tar.gz archive.
This allows collecting profiles (including block and mutex) without
requiring `CODER_PPROF_ENABLE` to be set, and without restarting
`coderd`.

Closes #21679

## What it does

The endpoint:
- Temporarily enables block and mutex profiling (normally disabled at
runtime)
- Runs CPU profile and/or trace for a configurable duration (default
10s, max 60s)
- Collects snapshot profiles (heap, allocs, block, mutex, goroutine,
threadcreate)
- Returns a tar.gz archive containing all requested `.prof` files
- Uses an atomic bool to prevent concurrent collections (returns 409
Conflict)
- Is protected by the existing debug endpoint RBAC (owner-only)

**Supported profile types:** cpu, heap, allocs, block, mutex, goroutine,
threadcreate, trace

**Query parameters:**
- `duration`: How long to run timed profiles (default: `10s`, max:
`60s`)
- `profiles`: Comma-separated list of profile types (default:
`cpu,heap,allocs,block,mutex,goroutine`)

## Additional changes

- **SDK client method** (`codersdk.Client.DebugCollectProfile`) for easy
programmatic access
- **`coder support bundle --pprof` integration**: tries the consolidated
endpoint first, falls back to individual `/debug/pprof/*` endpoints for
older servers
- **8 new tests** covering defaults, custom profiles, trace+CPU,
validation errors, authorization, and conflict detection
This commit is contained in:
Kacper Sawicki
2026-03-13 15:09:39 +01:00
committed by GitHub
parent cc6716c730
commit df2360f56a
8 changed files with 783 additions and 2 deletions
+22
View File
@@ -869,6 +869,28 @@ const docTemplate = `{
} }
} }
}, },
"/debug/profile": {
"post": {
"security": [
{
"CoderSessionToken": []
}
],
"tags": [
"Debug"
],
"summary": "Collect debug profiles",
"operationId": "collect-debug-profiles",
"responses": {
"200": {
"description": "OK"
}
},
"x-apidocgen": {
"skip": true
}
}
},
"/debug/tailnet": { "/debug/tailnet": {
"get": { "get": {
"security": [ "security": [
+20
View File
@@ -752,6 +752,26 @@
} }
} }
}, },
"/debug/profile": {
"post": {
"security": [
{
"CoderSessionToken": []
}
],
"tags": ["Debug"],
"summary": "Collect debug profiles",
"operationId": "collect-debug-profiles",
"responses": {
"200": {
"description": "OK"
}
},
"x-apidocgen": {
"skip": true
}
}
},
"/debug/tailnet": { "/debug/tailnet": {
"get": { "get": {
"security": [ "security": [
+13 -1
View File
@@ -627,7 +627,8 @@ func New(options *Options) *API {
options.Database, options.Database,
options.Pubsub, options.Pubsub,
), ),
dbRolluper: options.DatabaseRolluper, dbRolluper: options.DatabaseRolluper,
ProfileCollector: defaultProfileCollector{},
} }
api.WorkspaceAppsProvider = workspaceapps.NewDBTokenProvider( api.WorkspaceAppsProvider = workspaceapps.NewDBTokenProvider(
ctx, ctx,
@@ -1732,6 +1733,8 @@ func New(options *Options) *API {
} }
r.Method("GET", "/expvar", expvar.Handler()) // contains DERP metrics as well as cmdline and memstats r.Method("GET", "/expvar", expvar.Handler()) // contains DERP metrics as well as cmdline and memstats
r.Post("/profile", api.debugCollectProfile)
r.Route("/pprof", func(r chi.Router) { r.Route("/pprof", func(r chi.Router) {
r.Use(func(next http.Handler) http.Handler { r.Use(func(next http.Handler) http.Handler {
// Some of the pprof handlers strip the `/debug/pprof` // Some of the pprof handlers strip the `/debug/pprof`
@@ -2019,6 +2022,15 @@ type API struct {
// gitSyncWorker refreshes stale chat diff statuses in the // gitSyncWorker refreshes stale chat diff statuses in the
// background. // background.
gitSyncWorker *gitsync.Worker gitSyncWorker *gitsync.Worker
// ProfileCollector abstracts the runtime/pprof and runtime/trace
// calls used by the /debug/profile endpoint. Tests override this
// with a stub to avoid process-global side-effects.
ProfileCollector ProfileCollector
// ProfileCollecting is used as a concurrency guard so that only one
// profile collection (via /debug/profile) can run at a time. The CPU
// profiler is process-global, so concurrent collections would fail.
ProfileCollecting atomic.Bool
} }
// Close waits for all WebSocket connections to drain before returning. // Close waits for all WebSocket connections to drain before returning.
+299
View File
@@ -1,13 +1,20 @@
package coderd package coderd
import ( import (
"archive/tar"
"bytes" "bytes"
"compress/gzip"
"context" "context"
"database/sql" "database/sql"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"net/http" "net/http"
"runtime"
"runtime/pprof"
"runtime/trace"
"slices" "slices"
"strings"
"time" "time"
"github.com/google/uuid" "github.com/google/uuid"
@@ -330,6 +337,298 @@ func loadDismissedHealthchecks(ctx context.Context, db database.Store, logger sl
return dismissedHealthchecks return dismissedHealthchecks
} }
// ProfileCollector abstracts the mechanics of collecting pprof/trace
// data from the Go runtime. Production code uses defaultProfileCollector;
// tests can substitute a stub to avoid process-global side-effects.
type ProfileCollector interface {
// StartCPUProfile begins CPU profiling, writing to w. It returns
// a stop function that must be called to finish profiling.
StartCPUProfile(w io.Writer) (stop func(), err error)
// StartTrace begins execution tracing, writing to w. It returns
// a stop function that must be called to finish tracing.
StartTrace(w io.Writer) (stop func(), err error)
// LookupProfile writes the named snapshot profile to w.
LookupProfile(name string, w io.Writer) error
// SetBlockProfileRate enables/disables block profiling.
SetBlockProfileRate(rate int)
// SetMutexProfileFraction enables/disables mutex profiling.
// Returns the previous fraction.
SetMutexProfileFraction(rate int) int
}
// defaultProfileCollector delegates to the real runtime/pprof and
// runtime/trace packages.
type defaultProfileCollector struct{}
func (defaultProfileCollector) StartCPUProfile(w io.Writer) (func(), error) {
if err := pprof.StartCPUProfile(w); err != nil {
return nil, err
}
return pprof.StopCPUProfile, nil
}
func (defaultProfileCollector) StartTrace(w io.Writer) (func(), error) {
if err := trace.Start(w); err != nil {
return nil, err
}
return trace.Stop, nil
}
func (defaultProfileCollector) LookupProfile(name string, w io.Writer) error {
p := pprof.Lookup(name)
if p == nil {
return nil
}
return p.WriteTo(w, 0)
}
func (defaultProfileCollector) SetBlockProfileRate(rate int) { runtime.SetBlockProfileRate(rate) }
func (defaultProfileCollector) SetMutexProfileFraction(rate int) int {
return runtime.SetMutexProfileFraction(rate)
}
// defaultProfiles is the set of profiles collected when none are specified.
var defaultProfiles = []string{"cpu", "heap", "allocs", "block", "mutex", "goroutine"}
// allValidProfiles enumerates every profile name accepted by the endpoint.
var allValidProfiles = map[string]bool{
"cpu": true,
"heap": true,
"allocs": true,
"block": true,
"mutex": true,
"goroutine": true,
"threadcreate": true,
"trace": true,
}
const (
// profileDurationDefault is used when no ?duration is supplied.
profileDurationDefault = 10 * time.Second
// profileDurationMax prevents callers from asking for arbitrarily long
// collections that tie up the runtime-global CPU profiler.
profileDurationMax = 60 * time.Second
)
// @Summary Collect debug profiles
// @ID collect-debug-profiles
// @Security CoderSessionToken
// @Tags Debug
// @Success 200
// @Router /debug/profile [post]
// @x-apidocgen {"skip": true}
func (api *API) debugCollectProfile(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
// Parse duration.
duration := profileDurationDefault
if v := r.URL.Query().Get("duration"); v != "" {
d, err := time.ParseDuration(v)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Invalid duration parameter.",
Detail: err.Error(),
})
return
}
if d <= 0 {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Duration must be positive.",
})
return
}
if d > profileDurationMax {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: fmt.Sprintf("Duration cannot exceed %s.", profileDurationMax),
})
return
}
duration = d
}
// Parse requested profiles.
profiles := defaultProfiles
if v := r.URL.Query().Get("profiles"); v != "" {
profiles = strings.Split(v, ",")
for _, p := range profiles {
if !allValidProfiles[p] {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: fmt.Sprintf("Unknown profile type: %q.", p),
Detail: "Valid types: cpu, heap, allocs, block, mutex, goroutine, threadcreate, trace",
})
return
}
}
}
// Only one profile collection can run at a time because the CPU
// profiler is process-global.
if !api.ProfileCollecting.CompareAndSwap(false, true) {
httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{
Message: "A profile collection is already in progress. Try again later.",
})
return
}
defer api.ProfileCollecting.Store(false)
// Temporarily enable block and mutex profiling so those profiles are
// actually populated. Restore previous values when we are done.
// SetBlockProfileRate does not return the previous value, so we
// simply disable it again after collection (the default is 0).
pc := api.ProfileCollector
pc.SetBlockProfileRate(1)
prevMutexFraction := pc.SetMutexProfileFraction(1)
defer pc.SetBlockProfileRate(0)
defer pc.SetMutexProfileFraction(prevMutexFraction)
// Determine which profiles need the timed collection (cpu, trace) vs
// instant snapshots.
wantCPU := false
wantTrace := false
for _, p := range profiles {
switch p {
case "cpu":
wantCPU = true
case "trace":
wantTrace = true
}
}
// Collect timed profiles (cpu and/or trace) for the requested
// duration. StartCPUProfile and StartTrace each return a stop
// function that must be called to finish collection.
var cpuBuf, traceBuf bytes.Buffer
var stopCPU, stopTrace func()
if wantCPU {
var err error
stopCPU, err = pc.StartCPUProfile(&cpuBuf)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to start CPU profile.",
Detail: err.Error(),
})
return
}
}
if wantTrace {
var err error
stopTrace, err = pc.StartTrace(&traceBuf)
if err != nil {
if stopCPU != nil {
stopCPU()
}
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to start trace.",
Detail: err.Error(),
})
return
}
}
if wantCPU || wantTrace {
timer := api.Clock.NewTimer(duration, "debugCollectProfile")
defer timer.Stop()
select {
case <-ctx.Done():
if stopCPU != nil {
stopCPU()
}
if stopTrace != nil {
stopTrace()
}
// Client disconnected; nothing to write.
return
case <-timer.C:
}
if stopCPU != nil {
stopCPU()
}
if stopTrace != nil {
stopTrace()
}
}
// Build the tar.gz archive.
var archive bytes.Buffer
gzw := gzip.NewWriter(&archive)
tw := tar.NewWriter(gzw)
addFile := func(name string, data []byte) error {
hdr := &tar.Header{
Name: name,
Mode: 0o644,
Size: int64(len(data)),
}
if err := tw.WriteHeader(hdr); err != nil {
return xerrors.Errorf("write tar header for %s: %w", name, err)
}
if _, err := tw.Write(data); err != nil {
return xerrors.Errorf("write tar data for %s: %w", name, err)
}
return nil
}
for _, p := range profiles {
switch p {
case "cpu":
if err := addFile("cpu.prof", cpuBuf.Bytes()); err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to write CPU profile to archive.",
Detail: err.Error(),
})
return
}
case "trace":
if err := addFile("trace.out", traceBuf.Bytes()); err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to write trace to archive.",
Detail: err.Error(),
})
return
}
default:
// Snapshot profiles: heap, allocs, block, mutex, goroutine,
// threadcreate.
var buf bytes.Buffer
if err := pc.LookupProfile(p, &buf); err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: fmt.Sprintf("Failed to collect %s profile.", p),
Detail: err.Error(),
})
return
}
if err := addFile(p+".prof", buf.Bytes()); err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: fmt.Sprintf("Failed to write %s profile to archive.", p),
Detail: err.Error(),
})
return
}
}
}
if err := tw.Close(); err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to finalize tar archive.",
Detail: err.Error(),
})
return
}
if err := gzw.Close(); err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to finalize gzip archive.",
Detail: err.Error(),
})
return
}
filename := fmt.Sprintf("coderd-profile-%d.tar.gz", time.Now().Unix())
rw.Header().Set("Content-Type", "application/gzip")
rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filename))
rw.WriteHeader(http.StatusOK)
_, _ = rw.Write(archive.Bytes())
}
// @Summary Debug pprof index // @Summary Debug pprof index
// @ID debug-pprof-index // @ID debug-pprof-index
// @Security CoderSessionToken // @Security CoderSessionToken
+255
View File
@@ -1,6 +1,9 @@
package coderd_test package coderd_test
import ( import (
"archive/tar"
"bytes"
"compress/gzip"
"context" "context"
"encoding/json" "encoding/json"
"io" "io"
@@ -13,8 +16,11 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"cdr.dev/slog/v3/sloggers/slogtest" "cdr.dev/slog/v3/sloggers/slogtest"
"github.com/coder/coder/v2/coderd"
"github.com/coder/coder/v2/coderd/coderdtest" "github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/healthcheck" "github.com/coder/coder/v2/coderd/healthcheck"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/rbac/policy"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk" "github.com/coder/coder/v2/codersdk/healthsdk"
"github.com/coder/coder/v2/testutil" "github.com/coder/coder/v2/testutil"
@@ -370,3 +376,252 @@ func TestDebugWebsocket(t *testing.T) {
t.Parallel() t.Parallel()
}) })
} }
// noopProfileCollector avoids calling process-global runtime functions
// (CPU profiler, tracer) so that tests can run in parallel safely.
type noopProfileCollector struct{}
func (noopProfileCollector) StartCPUProfile(io.Writer) (func(), error) { return func() {}, nil }
func (noopProfileCollector) StartTrace(io.Writer) (func(), error) { return func() {}, nil }
func (noopProfileCollector) LookupProfile(string, io.Writer) error { return nil }
func (noopProfileCollector) SetBlockProfileRate(int) {}
func (noopProfileCollector) SetMutexProfileFraction(int) int { return 0 }
// Compile-time check.
var _ coderd.ProfileCollector = noopProfileCollector{}
// blockingProfileCollector blocks in StartCPUProfile until unblocked,
// allowing deterministic testing of the concurrency guard.
type blockingProfileCollector struct {
noopProfileCollector
started chan struct{} // closed when StartCPUProfile is entered
block chan struct{} // StartCPUProfile blocks until this is closed
}
func (b *blockingProfileCollector) StartCPUProfile(io.Writer) (func(), error) {
close(b.started)
<-b.block
return func() {}, nil
}
func newTestAPI(t *testing.T) (*codersdk.Client, io.Closer, *coderd.API) {
t.Helper()
client, closer, api := coderdtest.NewWithAPI(t, nil)
api.ProfileCollector = noopProfileCollector{}
return client, closer, api
}
func TestDebugCollectProfile(t *testing.T) {
t.Parallel()
t.Run("Defaults", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
client, closer, api := newTestAPI(t)
defer closer.Close()
_ = coderdtest.CreateFirstUser(t, client)
asserter := coderdtest.AssertRBAC(t, api, client)
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
// Use a very short duration so the test finishes quickly.
// The noop collector means no real profiling occurs.
Duration: 100 * time.Millisecond,
})
require.NoError(t, err)
defer body.Close()
data, err := io.ReadAll(body)
require.NoError(t, err)
require.NotEmpty(t, data, "archive should not be empty")
// Verify that the response is a valid tar.gz archive containing
// the expected profile files.
files := extractTarGzFiles(t, data)
require.Contains(t, files, "cpu.prof")
require.Contains(t, files, "heap.prof")
require.Contains(t, files, "allocs.prof")
require.Contains(t, files, "block.prof")
require.Contains(t, files, "mutex.prof")
require.Contains(t, files, "goroutine.prof")
// Verify the endpoint checks the correct RBAC permission.
asserter.AssertChecked(t, policy.ActionRead, rbac.ResourceDebugInfo)
})
t.Run("CustomProfiles", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
client, closer, _ := newTestAPI(t)
defer closer.Close()
_ = coderdtest.CreateFirstUser(t, client)
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
Duration: 100 * time.Millisecond,
Profiles: []string{"heap", "goroutine"},
})
require.NoError(t, err)
defer body.Close()
data, err := io.ReadAll(body)
require.NoError(t, err)
files := extractTarGzFiles(t, data)
require.Contains(t, files, "heap.prof")
require.Contains(t, files, "goroutine.prof")
// Should NOT contain profiles we didn't ask for.
require.NotContains(t, files, "cpu.prof")
require.NotContains(t, files, "allocs.prof")
})
t.Run("WithTraceAndCPU", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
client, closer, _ := newTestAPI(t)
defer closer.Close()
_ = coderdtest.CreateFirstUser(t, client)
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
Duration: 100 * time.Millisecond,
Profiles: []string{"cpu", "trace"},
})
require.NoError(t, err)
defer body.Close()
data, err := io.ReadAll(body)
require.NoError(t, err)
files := extractTarGzFiles(t, data)
require.Contains(t, files, "cpu.prof")
require.Contains(t, files, "trace.out")
})
t.Run("DurationTooLong", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
client := coderdtest.New(t, nil)
_ = coderdtest.CreateFirstUser(t, client)
res, err := client.Request(ctx, "POST", "/api/v2/debug/profile?duration=5m", nil)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusBadRequest, res.StatusCode)
})
t.Run("InvalidDuration", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
client := coderdtest.New(t, nil)
_ = coderdtest.CreateFirstUser(t, client)
res, err := client.Request(ctx, "POST", "/api/v2/debug/profile?duration=notaduration", nil)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusBadRequest, res.StatusCode)
})
t.Run("InvalidProfile", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
client := coderdtest.New(t, nil)
_ = coderdtest.CreateFirstUser(t, client)
res, err := client.Request(ctx, "POST", "/api/v2/debug/profile?profiles=nonexistent", nil)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusBadRequest, res.StatusCode)
})
t.Run("Unauthorized", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
client := coderdtest.New(t, nil)
firstUser := coderdtest.CreateFirstUser(t, client)
// Create a non-admin user.
memberClient, _ := coderdtest.CreateAnotherUser(t, client, firstUser.OrganizationID)
res, err := memberClient.Request(ctx, "POST", "/api/v2/debug/profile", nil)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusForbidden, res.StatusCode)
})
t.Run("Conflict", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
blocker := &blockingProfileCollector{
started: make(chan struct{}),
block: make(chan struct{}),
}
client, closer, api := coderdtest.NewWithAPI(t, nil)
defer closer.Close()
api.ProfileCollector = blocker
_ = coderdtest.CreateFirstUser(t, client)
// Start a profile collection that will block inside
// StartCPUProfile until we explicitly unblock it.
done := make(chan struct{})
go func() {
defer close(done)
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
Duration: 1 * time.Second,
})
if err == nil {
body.Close()
}
}()
// Wait deterministically for the first request to enter the
// collector — no time.Sleep needed.
testutil.TryReceive(ctx, t, blocker.started)
// The second request should get 409 Conflict.
res, err := client.Request(ctx, "POST", "/api/v2/debug/profile?duration=1s", nil)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusConflict, res.StatusCode)
// Unblock the first request and wait for it to finish.
close(blocker.block)
testutil.TryReceive(ctx, t, done)
})
}
// extractTarGzFiles extracts file names from a tar.gz archive.
func extractTarGzFiles(t *testing.T, data []byte) map[string]bool {
t.Helper()
gr, err := gzip.NewReader(bytes.NewReader(data))
require.NoError(t, err)
defer gr.Close()
tr := tar.NewReader(gr)
files := make(map[string]bool)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
require.NoError(t, err)
files[hdr.Name] = true
}
return files
}
+56
View File
@@ -0,0 +1,56 @@
package codersdk
import (
"context"
"io"
"net/http"
"net/url"
"strings"
"time"
"golang.org/x/xerrors"
)
// DebugProfileDurationMax is the maximum duration the server will accept
// for a profile collection. Callers should ensure their context deadline
// exceeds this to avoid premature cancellation.
const DebugProfileDurationMax = 60 * time.Second
// DebugProfileOptions are options for collecting debug profiles from the
// server via the consolidated /debug/profile endpoint.
type DebugProfileOptions struct {
// Duration controls how long time-based profiles (cpu, trace) run.
// Zero uses the server default (10s).
Duration time.Duration
// Profiles is the list of profile types to collect. Nil or empty uses
// the server default (cpu, heap, allocs, block, mutex, goroutine).
Profiles []string
}
// DebugCollectProfile fetches a tar.gz archive of pprof profiles from the
// server. The caller is responsible for closing the returned ReadCloser.
func (c *Client) DebugCollectProfile(ctx context.Context, opts DebugProfileOptions) (io.ReadCloser, error) {
qp := url.Values{}
if opts.Duration > 0 {
qp.Set("duration", opts.Duration.String())
}
if len(opts.Profiles) > 0 {
qp.Set("profiles", strings.Join(opts.Profiles, ","))
}
reqPath := "/api/v2/debug/profile"
if len(qp) > 0 {
reqPath += "?" + qp.Encode()
}
resp, err := c.Request(ctx, http.MethodPost, reqPath, nil)
if err != nil {
return nil, xerrors.Errorf("request debug profile: %w", err)
}
if resp.StatusCode != http.StatusOK {
defer resp.Body.Close()
return nil, ReadBodyAsError(resp)
}
return resp.Body, nil
}
+18
View File
@@ -2369,6 +2369,24 @@ export interface DatabaseReport extends BaseReport {
readonly threshold_ms: number; readonly threshold_ms: number;
} }
// From codersdk/debug.go
/**
* DebugProfileOptions are options for collecting debug profiles from the
* server via the consolidated /debug/profile endpoint.
*/
export interface DebugProfileOptions {
/**
* Duration controls how long time-based profiles (cpu, trace) run.
* Zero uses the server default (10s).
*/
readonly Duration: number;
/**
* Profiles is the list of profile types to collect. Nil or empty uses
* the server default (cpu, heap, allocs, block, mutex, goroutine).
*/
readonly Profiles: readonly string[];
}
// From codersdk/externalauth.go // From codersdk/externalauth.go
export interface DeleteExternalAuthByIDResponse { export interface DeleteExternalAuthByIDResponse {
/** /**
+100 -1
View File
@@ -1,15 +1,18 @@
package support package support
import ( import (
"archive/tar"
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"context" "context"
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"errors"
"io" "io"
"net" "net"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"path"
"strings" "strings"
"time" "time"
@@ -772,6 +775,93 @@ func compressData(data []byte) []byte {
return buf.Bytes() return buf.Bytes()
} }
// PprofInfoFromArchive uses the consolidated /api/v2/debug/profile endpoint
// to collect pprof data in a single request. The server temporarily enables
// block/mutex profiling, runs time-based profiles for the given duration,
// takes snapshots, and returns a tar.gz archive.
func PprofInfoFromArchive(ctx context.Context, client *codersdk.Client, log slog.Logger, duration time.Duration) (*PprofCollection, error) {
if client == nil {
return nil, xerrors.New("client is nil")
}
body, err := client.DebugCollectProfile(ctx, codersdk.DebugProfileOptions{
Duration: duration,
// Use the server defaults plus trace.
Profiles: []string{"cpu", "heap", "allocs", "block", "mutex", "goroutine", "threadcreate", "trace"},
})
if err != nil {
return nil, xerrors.Errorf("fetch consolidated profile: %w", err)
}
defer body.Close()
data, err := io.ReadAll(body)
if err != nil {
return nil, xerrors.Errorf("read profile archive: %w", err)
}
var p PprofCollection
if client.URL != nil {
if u, err := client.URL.Parse("/api/v2/debug/profile"); err == nil {
p.EndpointURL = u.String()
}
}
if p.EndpointURL == "" {
p.EndpointURL = "/api/v2/debug/profile"
}
p.CollectedAt = time.Now()
// Parse the tar.gz archive and populate the PprofCollection.
gr, err := gzip.NewReader(bytes.NewReader(data))
if err != nil {
return nil, xerrors.Errorf("open gzip reader: %w", err)
}
defer gr.Close()
tr := tar.NewReader(gr)
for {
hdr, err := tr.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return nil, xerrors.Errorf("read tar entry %q: %w", hdr.Name, err)
}
content, err := io.ReadAll(tr)
if err != nil {
log.Warn(ctx, "failed to read tar entry", slog.F("name", hdr.Name), slog.Error(err))
continue
}
// Files in the archive are named like "cpu.prof", "heap.prof",
// "trace.out", etc. Compress binary profile data for storage in
// the bundle, matching what PprofInfo() does.
base := path.Base(hdr.Name)
switch base {
case "cpu.prof":
p.Profile = compressData(content)
case "heap.prof":
p.Heap = compressData(content)
case "allocs.prof":
p.Allocs = compressData(content)
case "block.prof":
p.Block = compressData(content)
case "mutex.prof":
p.Mutex = compressData(content)
case "goroutine.prof":
p.Goroutine = compressData(content)
case "threadcreate.prof":
p.Threadcreate = compressData(content)
case "trace.out":
p.Trace = compressData(content)
default:
log.Debug(ctx, "unknown profile in archive", slog.F("name", hdr.Name))
}
}
return &p, nil
}
func PprofInfoFromAgent(ctx context.Context, conn workspacesdk.AgentConn, log slog.Logger) *PprofCollection { func PprofInfoFromAgent(ctx context.Context, conn workspacesdk.AgentConn, log slog.Logger) *PprofCollection {
if conn == nil { if conn == nil {
return nil return nil
@@ -1049,7 +1139,16 @@ func collectPprof(ctx context.Context, d *Deps, b *Bundle) Pprof {
return pprof return pprof
} }
serverPprof := PprofInfo(ctx, d.Client, d.Log) // Try the consolidated /debug/profile endpoint first. It
// temporarily enables block/mutex profiling on the server and
// returns a single tar.gz archive.
serverPprof, err := PprofInfoFromArchive(ctx, d.Client, d.Log, 30*time.Second)
if err != nil {
d.Log.Warn(ctx, "consolidated profile endpoint unavailable, falling back to individual endpoints",
slog.Error(err))
// Fall back to the legacy per-profile endpoint approach.
serverPprof = PprofInfo(ctx, d.Client, d.Log)
}
if serverPprof != nil { if serverPprof != nil {
pprof.Server = serverPprof pprof.Server = serverPprof
} }