package coderd import ( "archive/tar" "bytes" "compress/gzip" "context" "database/sql" "encoding/json" "fmt" "io" "net/http" "runtime" "runtime/pprof" "runtime/trace" "slices" "strings" "time" "github.com/google/uuid" "golang.org/x/xerrors" "cdr.dev/slog/v3" "github.com/coder/coder/v2/coderd/audit" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/httpapi" "github.com/coder/coder/v2/coderd/httpmw" "github.com/coder/coder/v2/coderd/rbac" "github.com/coder/coder/v2/coderd/rbac/policy" "github.com/coder/coder/v2/coderd/util/slice" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk/healthsdk" ) // @Summary Debug Info Wireguard Coordinator // @ID debug-info-wireguard-coordinator // @Security CoderSessionToken // @Produce text/html // @Tags Debug // @Success 200 // @Router /debug/coordinator [get] func (api *API) debugCoordinator(rw http.ResponseWriter, r *http.Request) { (*api.TailnetCoordinator.Load()).ServeHTTPDebug(rw, r) } // @Summary Debug Info Tailnet // @ID debug-info-tailnet // @Security CoderSessionToken // @Produce text/html // @Tags Debug // @Success 200 // @Router /debug/tailnet [get] func (api *API) debugTailnet(rw http.ResponseWriter, r *http.Request) { api.agentProvider.ServeHTTPDebug(rw, r) } // @Summary Debug Info Deployment Health // @ID debug-info-deployment-health // @Security CoderSessionToken // @Produce json // @Tags Debug // @Success 200 {object} healthsdk.HealthcheckReport // @Router /debug/health [get] // @Param force query boolean false "Force a healthcheck to run" func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) { apiKey := httpmw.APITokenFromRequest(r) ctx, cancel := context.WithTimeout(r.Context(), api.Options.HealthcheckTimeout) defer cancel() // Load sections previously marked as dismissed. // We hydrate this here as we cache the healthcheck and hydrating in the // healthcheck function itself can lead to stale results. dismissed := loadDismissedHealthchecks(ctx, api.Database, api.Logger) // Check if the forced query parameter is set. forced := r.URL.Query().Get("force") == "true" // Get cached report if it exists and the requester did not force a refresh. if !forced { if report := api.healthCheckCache.Load(); report != nil { if time.Since(report.Time) < api.Options.HealthcheckRefresh { formatHealthcheck(ctx, rw, r, *report, dismissed...) return } } } resChan := api.healthCheckGroup.DoChan("", func() (*healthsdk.HealthcheckReport, error) { // Create a new context not tied to the request. ctx, cancel := context.WithTimeout(context.Background(), api.Options.HealthcheckTimeout) defer cancel() // Create and store progress tracker for timeout diagnostics. report := api.HealthcheckFunc(ctx, apiKey, &api.healthCheckProgress) if report != nil { // Only store non-nil reports. api.healthCheckCache.Store(report) } api.healthCheckProgress.Reset() return report, nil }) select { case <-ctx.Done(): summary := api.healthCheckProgress.Summary() httpapi.Write(ctx, rw, http.StatusServiceUnavailable, codersdk.Response{ Message: "Healthcheck timed out.", Detail: summary, }) return case res := <-resChan: report := res.Val if report == nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "There was an unknown error completing the healthcheck.", Detail: "nil report from healthcheck result channel", }) return } formatHealthcheck(ctx, rw, r, *report, dismissed...) return } } func formatHealthcheck(ctx context.Context, rw http.ResponseWriter, r *http.Request, hc healthsdk.HealthcheckReport, dismissed ...healthsdk.HealthSection) { // Mark any sections previously marked as dismissed. for _, d := range dismissed { switch d { case healthsdk.HealthSectionAccessURL: hc.AccessURL.Dismissed = true case healthsdk.HealthSectionDERP: hc.DERP.Dismissed = true case healthsdk.HealthSectionDatabase: hc.Database.Dismissed = true case healthsdk.HealthSectionWebsocket: hc.Websocket.Dismissed = true case healthsdk.HealthSectionWorkspaceProxy: hc.WorkspaceProxy.Dismissed = true } } format := r.URL.Query().Get("format") switch format { case "text": rw.Header().Set("Content-Type", "text/plain; charset=utf-8") rw.WriteHeader(http.StatusOK) _, _ = fmt.Fprintln(rw, "time:", hc.Time.Format(time.RFC3339)) _, _ = fmt.Fprintln(rw, "healthy:", hc.Healthy) _, _ = fmt.Fprintln(rw, "derp:", hc.DERP.Healthy) _, _ = fmt.Fprintln(rw, "access_url:", hc.AccessURL.Healthy) _, _ = fmt.Fprintln(rw, "websocket:", hc.Websocket.Healthy) _, _ = fmt.Fprintln(rw, "database:", hc.Database.Healthy) case "", "json": httpapi.WriteIndent(ctx, rw, http.StatusOK, hc) default: httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: fmt.Sprintf("Invalid format option %q.", format), Detail: "Allowed values are: \"json\", \"simple\".", }) } } // @Summary Get health settings // @ID get-health-settings // @Security CoderSessionToken // @Produce json // @Tags Debug // @Success 200 {object} healthsdk.HealthSettings // @Router /debug/health/settings [get] func (api *API) deploymentHealthSettings(rw http.ResponseWriter, r *http.Request) { settingsJSON, err := api.Database.GetHealthSettings(r.Context()) if err != nil { httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to fetch health settings.", Detail: err.Error(), }) return } var settings healthsdk.HealthSettings err = json.Unmarshal([]byte(settingsJSON), &settings) if err != nil { httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to unmarshal health settings.", Detail: err.Error(), }) return } if len(settings.DismissedHealthchecks) == 0 { settings.DismissedHealthchecks = []healthsdk.HealthSection{} } httpapi.Write(r.Context(), rw, http.StatusOK, settings) } // @Summary Update health settings // @ID update-health-settings // @Security CoderSessionToken // @Accept json // @Produce json // @Tags Debug // @Param request body healthsdk.UpdateHealthSettings true "Update health settings" // @Success 200 {object} healthsdk.UpdateHealthSettings // @Router /debug/health/settings [put] func (api *API) putDeploymentHealthSettings(rw http.ResponseWriter, r *http.Request) { ctx := r.Context() if !api.Authorize(r, policy.ActionUpdate, rbac.ResourceDeploymentConfig) { httpapi.Write(ctx, rw, http.StatusForbidden, codersdk.Response{ Message: "Insufficient permissions to update health settings.", }) return } var settings healthsdk.HealthSettings if !httpapi.Read(ctx, rw, r, &settings) { return } err := validateHealthSettings(settings) if err != nil { httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to validate health settings.", Detail: err.Error(), }) return } settingsJSON, err := json.Marshal(&settings) if err != nil { httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to marshal health settings.", Detail: err.Error(), }) return } currentSettingsJSON, err := api.Database.GetHealthSettings(r.Context()) if err != nil { httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to fetch current health settings.", Detail: err.Error(), }) return } if bytes.Equal(settingsJSON, []byte(currentSettingsJSON)) { // See: https://www.rfc-editor.org/rfc/rfc7231#section-6.3.5 rw.WriteHeader(http.StatusNoContent) return } auditor := api.Auditor.Load() aReq, commitAudit := audit.InitRequest[database.HealthSettings](rw, &audit.RequestParams{ Audit: *auditor, Log: api.Logger, Request: r, Action: database.AuditActionWrite, }) defer commitAudit() aReq.New = database.HealthSettings{ ID: uuid.New(), DismissedHealthchecks: slice.ToStrings(settings.DismissedHealthchecks), } err = api.Database.UpsertHealthSettings(ctx, string(settingsJSON)) if err != nil { httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to update health settings.", Detail: err.Error(), }) return } httpapi.Write(r.Context(), rw, http.StatusOK, settings) } func validateHealthSettings(settings healthsdk.HealthSettings) error { for _, dismissed := range settings.DismissedHealthchecks { ok := slices.Contains(healthsdk.HealthSections, dismissed) if !ok { return xerrors.Errorf("unknown healthcheck section: %s", dismissed) } } return nil } // For some reason the swagger docs need to be attached to a function. // @Summary Debug Info Websocket Test // @ID debug-info-websocket-test // @Security CoderSessionToken // @Produce json // @Tags Debug // @Success 201 {object} codersdk.Response // @Router /debug/ws [get] // @x-apidocgen {"skip": true} func _debugws(http.ResponseWriter, *http.Request) {} //nolint:unused // @Summary Debug DERP traffic // @ID debug-derp-traffic // @Security CoderSessionToken // @Produce json // @Success 200 {array} derp.BytesSentRecv // @Tags Debug // @Router /debug/derp/traffic [get] // @x-apidocgen {"skip": true} func _debugDERPTraffic(http.ResponseWriter, *http.Request) {} //nolint:unused // @Summary Debug expvar // @ID debug-expvar // @Security CoderSessionToken // @Produce json // @Tags Debug // @Success 200 {object} map[string]any // @Router /debug/expvar [get] // @x-apidocgen {"skip": true} func _debugExpVar(http.ResponseWriter, *http.Request) {} //nolint:unused func loadDismissedHealthchecks(ctx context.Context, db database.Store, logger slog.Logger) []healthsdk.HealthSection { dismissedHealthchecks := []healthsdk.HealthSection{} settingsJSON, err := db.GetHealthSettings(ctx) if err == nil { var settings healthsdk.HealthSettings err = json.Unmarshal([]byte(settingsJSON), &settings) if len(settings.DismissedHealthchecks) > 0 { dismissedHealthchecks = settings.DismissedHealthchecks } } if err != nil && !xerrors.Is(err, sql.ErrNoRows) { logger.Error(ctx, "unable to fetch health settings", slog.Error(err)) } return dismissedHealthchecks } // ProfileCollector abstracts the mechanics of collecting pprof/trace // data from the Go runtime. Production code uses defaultProfileCollector; // tests can substitute a stub to avoid process-global side-effects. type ProfileCollector interface { // StartCPUProfile begins CPU profiling, writing to w. It returns // a stop function that must be called to finish profiling. StartCPUProfile(w io.Writer) (stop func(), err error) // StartTrace begins execution tracing, writing to w. It returns // a stop function that must be called to finish tracing. StartTrace(w io.Writer) (stop func(), err error) // LookupProfile writes the named snapshot profile to w. LookupProfile(name string, w io.Writer) error // SetBlockProfileRate enables/disables block profiling. SetBlockProfileRate(rate int) // SetMutexProfileFraction enables/disables mutex profiling. // Returns the previous fraction. SetMutexProfileFraction(rate int) int } // defaultProfileCollector delegates to the real runtime/pprof and // runtime/trace packages. type defaultProfileCollector struct{} func (defaultProfileCollector) StartCPUProfile(w io.Writer) (func(), error) { if err := pprof.StartCPUProfile(w); err != nil { return nil, err } return pprof.StopCPUProfile, nil } func (defaultProfileCollector) StartTrace(w io.Writer) (func(), error) { if err := trace.Start(w); err != nil { return nil, err } return trace.Stop, nil } func (defaultProfileCollector) LookupProfile(name string, w io.Writer) error { p := pprof.Lookup(name) if p == nil { return nil } return p.WriteTo(w, 0) } func (defaultProfileCollector) SetBlockProfileRate(rate int) { runtime.SetBlockProfileRate(rate) } func (defaultProfileCollector) SetMutexProfileFraction(rate int) int { return runtime.SetMutexProfileFraction(rate) } // defaultProfiles is the set of profiles collected when none are specified. var defaultProfiles = []string{"cpu", "heap", "allocs", "block", "mutex", "goroutine"} // allValidProfiles enumerates every profile name accepted by the endpoint. var allValidProfiles = map[string]bool{ "cpu": true, "heap": true, "allocs": true, "block": true, "mutex": true, "goroutine": true, "threadcreate": true, "trace": true, } const ( // profileDurationDefault is used when no ?duration is supplied. profileDurationDefault = 10 * time.Second // profileDurationMax prevents callers from asking for arbitrarily long // collections that tie up the runtime-global CPU profiler. profileDurationMax = 60 * time.Second ) // @Summary Collect debug profiles // @ID collect-debug-profiles // @Security CoderSessionToken // @Tags Debug // @Success 200 // @Router /debug/profile [post] // @x-apidocgen {"skip": true} func (api *API) debugCollectProfile(rw http.ResponseWriter, r *http.Request) { ctx := r.Context() // Parse duration. duration := profileDurationDefault if v := r.URL.Query().Get("duration"); v != "" { d, err := time.ParseDuration(v) if err != nil { httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: "Invalid duration parameter.", Detail: err.Error(), }) return } if d <= 0 { httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: "Duration must be positive.", }) return } if d > profileDurationMax { httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: fmt.Sprintf("Duration cannot exceed %s.", profileDurationMax), }) return } duration = d } // Parse requested profiles. profiles := defaultProfiles if v := r.URL.Query().Get("profiles"); v != "" { profiles = strings.Split(v, ",") for _, p := range profiles { if !allValidProfiles[p] { httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{ Message: fmt.Sprintf("Unknown profile type: %q.", p), Detail: "Valid types: cpu, heap, allocs, block, mutex, goroutine, threadcreate, trace", }) return } } } // Only one profile collection can run at a time because the CPU // profiler is process-global. if !api.ProfileCollecting.CompareAndSwap(false, true) { httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{ Message: "A profile collection is already in progress. Try again later.", }) return } defer api.ProfileCollecting.Store(false) // Temporarily enable block and mutex profiling so those profiles are // actually populated. Restore previous values when we are done. // SetBlockProfileRate does not return the previous value, so we // simply disable it again after collection (the default is 0). pc := api.ProfileCollector pc.SetBlockProfileRate(1) prevMutexFraction := pc.SetMutexProfileFraction(1) defer pc.SetBlockProfileRate(0) defer pc.SetMutexProfileFraction(prevMutexFraction) // Determine which profiles need the timed collection (cpu, trace) vs // instant snapshots. wantCPU := false wantTrace := false for _, p := range profiles { switch p { case "cpu": wantCPU = true case "trace": wantTrace = true } } // Collect timed profiles (cpu and/or trace) for the requested // duration. StartCPUProfile and StartTrace each return a stop // function that must be called to finish collection. var cpuBuf, traceBuf bytes.Buffer var stopCPU, stopTrace func() if wantCPU { var err error stopCPU, err = pc.StartCPUProfile(&cpuBuf) if err != nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to start CPU profile.", Detail: err.Error(), }) return } } if wantTrace { var err error stopTrace, err = pc.StartTrace(&traceBuf) if err != nil { if stopCPU != nil { stopCPU() } httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to start trace.", Detail: err.Error(), }) return } } if wantCPU || wantTrace { timer := api.Clock.NewTimer(duration, "debugCollectProfile") defer timer.Stop() select { case <-ctx.Done(): if stopCPU != nil { stopCPU() } if stopTrace != nil { stopTrace() } // Client disconnected; nothing to write. return case <-timer.C: } if stopCPU != nil { stopCPU() } if stopTrace != nil { stopTrace() } } // Build the tar.gz archive. var archive bytes.Buffer gzw := gzip.NewWriter(&archive) tw := tar.NewWriter(gzw) addFile := func(name string, data []byte) error { hdr := &tar.Header{ Name: name, Mode: 0o644, Size: int64(len(data)), } if err := tw.WriteHeader(hdr); err != nil { return xerrors.Errorf("write tar header for %s: %w", name, err) } if _, err := tw.Write(data); err != nil { return xerrors.Errorf("write tar data for %s: %w", name, err) } return nil } for _, p := range profiles { switch p { case "cpu": if err := addFile("cpu.prof", cpuBuf.Bytes()); err != nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to write CPU profile to archive.", Detail: err.Error(), }) return } case "trace": if err := addFile("trace.out", traceBuf.Bytes()); err != nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to write trace to archive.", Detail: err.Error(), }) return } default: // Snapshot profiles: heap, allocs, block, mutex, goroutine, // threadcreate. var buf bytes.Buffer if err := pc.LookupProfile(p, &buf); err != nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: fmt.Sprintf("Failed to collect %s profile.", p), Detail: err.Error(), }) return } if err := addFile(p+".prof", buf.Bytes()); err != nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: fmt.Sprintf("Failed to write %s profile to archive.", p), Detail: err.Error(), }) return } } } if err := tw.Close(); err != nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to finalize tar archive.", Detail: err.Error(), }) return } if err := gzw.Close(); err != nil { httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{ Message: "Failed to finalize gzip archive.", Detail: err.Error(), }) return } filename := fmt.Sprintf("coderd-profile-%d.tar.gz", time.Now().Unix()) rw.Header().Set("Content-Type", "application/gzip") rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filename)) rw.WriteHeader(http.StatusOK) _, _ = rw.Write(archive.Bytes()) } // @Summary Debug pprof index // @ID debug-pprof-index // @Security CoderSessionToken // @Success 200 // @Tags Debug // @Router /debug/pprof [get] // @x-apidocgen {"skip": true} func _debugPprofIndex(http.ResponseWriter, *http.Request) {} //nolint:unused // @Summary Debug pprof cmdline // @ID debug-pprof-cmdline // @Security CoderSessionToken // @Success 200 // @Tags Debug // @Router /debug/pprof/cmdline [get] // @x-apidocgen {"skip": true} func _debugPprofCmdline(http.ResponseWriter, *http.Request) {} //nolint:unused // @Summary Debug pprof profile // @ID debug-pprof-profile // @Security CoderSessionToken // @Success 200 // @Tags Debug // @Router /debug/pprof/profile [get] // @x-apidocgen {"skip": true} func _debugPprofProfile(http.ResponseWriter, *http.Request) {} //nolint:unused // @Summary Debug pprof symbol // @ID debug-pprof-symbol // @Security CoderSessionToken // @Success 200 // @Tags Debug // @Router /debug/pprof/symbol [get] // @x-apidocgen {"skip": true} func _debugPprofSymbol(http.ResponseWriter, *http.Request) {} //nolint:unused // @Summary Debug pprof trace // @ID debug-pprof-trace // @Security CoderSessionToken // @Success 200 // @Tags Debug // @Router /debug/pprof/trace [get] // @x-apidocgen {"skip": true} func _debugPprofTrace(http.ResponseWriter, *http.Request) {} //nolint:unused // @Summary Debug metrics // @ID debug-metrics // @Security CoderSessionToken // @Success 200 // @Tags Debug // @Router /debug/metrics [get] // @x-apidocgen {"skip": true} func _debugMetrics(http.ResponseWriter, *http.Request) {} //nolint:unused