mirror of
https://github.com/coder/coder.git
synced 2026-06-02 20:48:20 +00:00
chore: improve healthcheck timeout message (#21520)
Relates to https://github.com/coder/internal/issues/272 This flake has been persisting for a while, and unfortunately there's no detail on which healthcheck in particular is holding things up. This PR adds a concurrency-safe `healthcheck.Progress` and wires it through `healthcheck.Run`. If the healthcheck times out, it will provide information on which healthchecks are completed / running, and how long they took / are still taking. 🤖 Claude Opus 4.5 completed the first round of this implementation, which I then refactored.
This commit is contained in:
+20
-17
@@ -14,6 +14,8 @@ import (
|
||||
|
||||
"cdr.dev/slog/v3/sloggers/slogtest"
|
||||
"github.com/coder/coder/v2/coderd/coderdtest"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/healthsdk"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
)
|
||||
@@ -28,7 +30,7 @@ func TestDebugHealth(t *testing.T) {
|
||||
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
sessionToken string
|
||||
client = coderdtest.New(t, &coderdtest.Options{
|
||||
HealthcheckFunc: func(_ context.Context, apiKey string) *healthsdk.HealthcheckReport {
|
||||
HealthcheckFunc: func(_ context.Context, apiKey string, _ *healthcheck.Progress) *healthsdk.HealthcheckReport {
|
||||
calls.Add(1)
|
||||
assert.Equal(t, sessionToken, apiKey)
|
||||
return &healthsdk.HealthcheckReport{
|
||||
@@ -61,7 +63,7 @@ func TestDebugHealth(t *testing.T) {
|
||||
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
sessionToken string
|
||||
client = coderdtest.New(t, &coderdtest.Options{
|
||||
HealthcheckFunc: func(_ context.Context, apiKey string) *healthsdk.HealthcheckReport {
|
||||
HealthcheckFunc: func(_ context.Context, apiKey string, _ *healthcheck.Progress) *healthsdk.HealthcheckReport {
|
||||
calls.Add(1)
|
||||
assert.Equal(t, sessionToken, apiKey)
|
||||
return &healthsdk.HealthcheckReport{
|
||||
@@ -93,19 +95,14 @@ func TestDebugHealth(t *testing.T) {
|
||||
// Need to ignore errors due to ctx timeout
|
||||
logger = slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
|
||||
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
done = make(chan struct{})
|
||||
client = coderdtest.New(t, &coderdtest.Options{
|
||||
Logger: &logger,
|
||||
HealthcheckTimeout: time.Microsecond,
|
||||
HealthcheckFunc: func(context.Context, string) *healthsdk.HealthcheckReport {
|
||||
t := time.NewTimer(time.Second)
|
||||
defer t.Stop()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return &healthsdk.HealthcheckReport{}
|
||||
case <-t.C:
|
||||
return &healthsdk.HealthcheckReport{}
|
||||
}
|
||||
HealthcheckTimeout: time.Second,
|
||||
HealthcheckFunc: func(_ context.Context, _ string, progress *healthcheck.Progress) *healthsdk.HealthcheckReport {
|
||||
progress.Start("test")
|
||||
<-done
|
||||
return &healthsdk.HealthcheckReport{}
|
||||
},
|
||||
})
|
||||
_ = coderdtest.CreateFirstUser(t, client)
|
||||
@@ -115,8 +112,14 @@ func TestDebugHealth(t *testing.T) {
|
||||
res, err := client.Request(ctx, "GET", "/api/v2/debug/health", nil)
|
||||
require.NoError(t, err)
|
||||
defer res.Body.Close()
|
||||
_, _ = io.ReadAll(res.Body)
|
||||
close(done)
|
||||
bs, err := io.ReadAll(res.Body)
|
||||
require.NoError(t, err, "reading body")
|
||||
require.Equal(t, http.StatusServiceUnavailable, res.StatusCode)
|
||||
var sdkResp codersdk.Response
|
||||
require.NoError(t, json.Unmarshal(bs, &sdkResp), "unmarshaling sdk response")
|
||||
require.Equal(t, "Healthcheck timed out.", sdkResp.Message)
|
||||
require.Contains(t, sdkResp.Detail, "Still running: test (elapsed:")
|
||||
})
|
||||
|
||||
t.Run("Refresh", func(t *testing.T) {
|
||||
@@ -128,7 +131,7 @@ func TestDebugHealth(t *testing.T) {
|
||||
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
client = coderdtest.New(t, &coderdtest.Options{
|
||||
HealthcheckRefresh: time.Microsecond,
|
||||
HealthcheckFunc: func(context.Context, string) *healthsdk.HealthcheckReport {
|
||||
HealthcheckFunc: func(context.Context, string, *healthcheck.Progress) *healthsdk.HealthcheckReport {
|
||||
calls <- struct{}{}
|
||||
return &healthsdk.HealthcheckReport{}
|
||||
},
|
||||
@@ -173,7 +176,7 @@ func TestDebugHealth(t *testing.T) {
|
||||
client = coderdtest.New(t, &coderdtest.Options{
|
||||
HealthcheckRefresh: time.Hour,
|
||||
HealthcheckTimeout: time.Hour,
|
||||
HealthcheckFunc: func(context.Context, string) *healthsdk.HealthcheckReport {
|
||||
HealthcheckFunc: func(context.Context, string, *healthcheck.Progress) *healthsdk.HealthcheckReport {
|
||||
calls++
|
||||
return &healthsdk.HealthcheckReport{
|
||||
Time: time.Now(),
|
||||
@@ -207,7 +210,7 @@ func TestDebugHealth(t *testing.T) {
|
||||
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
sessionToken string
|
||||
client = coderdtest.New(t, &coderdtest.Options{
|
||||
HealthcheckFunc: func(_ context.Context, apiKey string) *healthsdk.HealthcheckReport {
|
||||
HealthcheckFunc: func(_ context.Context, apiKey string, _ *healthcheck.Progress) *healthsdk.HealthcheckReport {
|
||||
assert.Equal(t, sessionToken, apiKey)
|
||||
return &healthsdk.HealthcheckReport{
|
||||
Time: time.Now(),
|
||||
|
||||
Reference in New Issue
Block a user