chore: improve healthcheck timeout message (#21520)

Relates to https://github.com/coder/internal/issues/272

This flake has been persisting for a while, and unfortunately there's no
detail on which healthcheck in particular is holding things up.

This PR adds a concurrency-safe `healthcheck.Progress` and wires it
through `healthcheck.Run`. If the healthcheck times out, it will provide
information on which healthchecks are completed / running, and how long
they took / are still taking.

🤖 Claude Opus 4.5 completed the first round of this implementation,
which I then refactored.
This commit is contained in:
Cian Johnston
2026-01-15 16:37:05 +00:00
committed by GitHub
parent 7fc84ecf0b
commit 3a62a8e70e
6 changed files with 216 additions and 24 deletions
+6 -2
View File
@@ -83,17 +83,21 @@ func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(context.Background(), api.Options.HealthcheckTimeout)
defer cancel()
report := api.HealthcheckFunc(ctx, apiKey)
// Create and store progress tracker for timeout diagnostics.
report := api.HealthcheckFunc(ctx, apiKey, &api.healthCheckProgress)
if report != nil { // Only store non-nil reports.
api.healthCheckCache.Store(report)
}
api.healthCheckProgress.Reset()
return report, nil
})
select {
case <-ctx.Done():
summary := api.healthCheckProgress.Summary()
httpapi.Write(ctx, rw, http.StatusServiceUnavailable, codersdk.Response{
Message: "Healthcheck is in progress and did not complete in time. Try again in a few seconds.",
Message: "Healthcheck timed out.",
Detail: summary,
})
return
case res := <-resChan: