From 7861fcf1f6d7420b08b5ac75f603d0e236bce2cd Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Wed, 1 Apr 2026 08:08:13 -0400 Subject: [PATCH] perf(coderd): stop inline-resolving diff status on every GetChat call (#23901) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Every `GET /api/experimental/chats/{chatID}` call was blocking for 200-800ms because the `getChat` handler called `resolveChatDiffStatus`, which unconditionally hit the git provider API (e.g. GitHub's `GET /repos/{owner}/{repo}/pulls?head=...`) via `ResolveBranchPullRequest` — even when the cached diff status was fresh. This made every chat page load at `/agents/{id}` noticeably slow. ## Root cause The call chain was: 1. `getChat` → `resolveChatDiffStatus` 2. `resolveChatDiffStatus` → `resolveChatDiffReference` → `gp.ResolveBranchPullRequest(...)` **(external HTTP call)** 3. Only **after** the external call: `chatDiffStatusIsStale(status, now)` check The staleness check happened after the expensive work, so every request paid the cost regardless of cache freshness. ## Fix `getChat` now returns the cached `chat_diff_statuses` row directly from the database. The background `gitsync` worker already keeps these rows fresh (every `DiffStatusTTL = 120s`), so inline resolution was redundant. The `resolveChatDiffContents` endpoint (which fetches actual diff content) still uses the full resolution path since it needs to make provider API calls by design. ## Changes - `getChat` reads cached diff status from DB instead of calling `resolveChatDiffStatus` - Remove `resolveChatDiffStatus` (dead code — no production callers) - Remove `chatDiffStatusIsStale` and `chatDiffStatusTTL` (dead code) - Remove `RefreshesStaleStatusWithExternalAuth` test (tested the removed inline refresh path)
Decision log - **Why not just add a staleness gate?** The background worker already handles refreshes on the same schedule. Adding an early-return-if-fresh would work but leaves dead code for the stale path that's never exercised in production (the worker gets there first). Removing the inline path entirely is simpler and eliminates the external API dependency from the read path. - **Why keep `resolveChatDiffContents` unchanged?** That endpoint's job is to fetch the actual diff content from the provider, so external API calls are inherent to its purpose.
--- coderd/exp_chats.go | 86 ++++---------------------- coderd/exp_chats_test.go | 129 --------------------------------------- 2 files changed, 12 insertions(+), 203 deletions(-) diff --git a/coderd/exp_chats.go b/coderd/exp_chats.go index f94e04a034..2532fb1155 100644 --- a/coderd/exp_chats.go +++ b/coderd/exp_chats.go @@ -56,7 +56,6 @@ import ( ) const ( - chatDiffStatusTTL = gitsync.DiffStatusTTL chatStreamBatchSize = 256 chatContextLimitModelConfigKey = "context_limit" @@ -1243,10 +1242,18 @@ func (api *API) getChat(rw http.ResponseWriter, r *http.Request) { ctx := r.Context() chat := httpmw.ChatParam(r) - diffStatus, err := api.resolveChatDiffStatus(ctx, chat) - if err != nil { - // Log but don't fail - diff status is supplementary. - api.Logger.Error(ctx, "failed to resolve chat diff status", + // Use the cached diff status from the database rather than + // resolving it inline. Inline resolution calls out to the + // git provider API (e.g. GitHub) on every request which + // blocks the response for 200-800ms. The background gitsync + // worker keeps the cached status fresh. + var diffStatus *database.ChatDiffStatus + status, err := api.Database.GetChatDiffStatusByChatID(ctx, chat.ID) + switch { + case err == nil: + diffStatus = &status + case !xerrors.Is(err, sql.ErrNoRows): + api.Logger.Error(ctx, "failed to get cached chat diff status", slog.F("chat_id", chat.ID), slog.Error(err), ) @@ -2358,68 +2365,6 @@ func chatWorkspaceAuditStatus(err error) int { return http.StatusInternalServerError } -func (api *API) resolveChatDiffStatus( - ctx context.Context, - chat database.Chat, -) (*database.ChatDiffStatus, error) { - status, found, err := api.getCachedChatDiffStatus(ctx, chat.ID) - if err != nil { - return nil, err - } - - now := time.Now().UTC() - - reference, err := api.resolveChatDiffReference(ctx, chat, found, status) - if err != nil { - return nil, err - } - if reference.PullRequestURL != "" { - if !found || !strings.EqualFold(strings.TrimSpace(status.Url.String), reference.PullRequestURL) { - status, err = api.upsertChatDiffStatusReference(ctx, chat.ID, reference.PullRequestURL, now.Add(-time.Second)) - if err != nil { - return nil, err - } - found = true - } - } - - if !found { - return nil, nil //nolint:nilnil // Callers handle nil status explicitly. - } - if !chatDiffStatusIsStale(status, now) { - return &status, nil - } - - // Use the same refresh pipeline as the background worker - // so both paths share identical provider/token resolution. - refreshed, err := api.gitSyncWorker.RefreshChat( - ctx, status, chat.OwnerID, - ) - if err == nil && refreshed != nil { - return refreshed, nil - } - if err == nil { - // No PR exists yet; return what we have. - return &status, nil - } - - api.Logger.Warn(ctx, "failed to refresh chat diff status", - slog.F("chat_id", chat.ID), - slog.Error(err), - ) - - backoffStatus, backoffErr := api.upsertChatDiffStatusReference(ctx, chat.ID, reference.PullRequestURL, now.Add(chatDiffStatusTTL)) - if backoffErr != nil { - api.Logger.Warn(ctx, "failed to extend chat diff status stale timestamp", - slog.F("chat_id", chat.ID), - slog.Error(backoffErr), - ) - return &status, nil - } - - return &backoffStatus, nil -} - func (api *API) resolveChatDiffContents( ctx context.Context, chat database.Chat, @@ -2686,13 +2631,6 @@ func (api *API) resolveGitProvider(origin string) gitprovider.Provider { return gp } -func chatDiffStatusIsStale(status database.ChatDiffStatus, now time.Time) bool { - if !status.RefreshedAt.Valid { - return true - } - return !status.StaleAt.After(now) -} - func (api *API) resolveChatGitAccessToken( ctx context.Context, userID uuid.UUID, diff --git a/coderd/exp_chats_test.go b/coderd/exp_chats_test.go index b43407b65e..e1e7621a7a 100644 --- a/coderd/exp_chats_test.go +++ b/coderd/exp_chats_test.go @@ -9,7 +9,6 @@ import ( "fmt" "mime" "net/http" - "net/http/httptest" "regexp" "strings" "sync/atomic" @@ -22,7 +21,6 @@ import ( "golang.org/x/xerrors" "github.com/coder/coder/v2/coderd/coderdtest" - "github.com/coder/coder/v2/coderd/coderdtest/oidctest" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/db2sdk" "github.com/coder/coder/v2/coderd/database/dbauthz" @@ -4100,133 +4098,6 @@ func TestGetChatDiffStatus(t *testing.T) { _, err = otherClient.GetChat(ctx, createdChat.ID) requireSDKError(t, err, http.StatusNotFound) }) - - // Integration test: exercises the full GetChat handler refresh - // path with a real DB, dbauthz, a mock GitHub API, and an - // external-auth-linked user. Verifies that a stale chat diff - // status is refreshed end-to-end via the gitsync worker's - // Refresh pipeline (provider resolution, token acquisition - // through external auth, and PR status fetch). - t.Run("RefreshesStaleStatusWithExternalAuth", func(t *testing.T) { - t.Parallel() - - ctx := testutil.Context(t, testutil.WaitLong) - - // Mock GitHub API over TLS so the git provider's URL patterns - // (which require https://) match our PR URLs. - ghAPI := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - switch { - // PR status: GET /repos/{owner}/{repo}/pulls/{number} - case r.URL.Path == "/repos/testorg/testrepo/pulls/42" && r.URL.Query().Get("per_page") == "": - _, _ = w.Write([]byte(`{ - "state": "open", - "merged": false, - "draft": false, - "additions": 25, - "deletions": 7, - "changed_files": 4, - "head": {"sha": "abc123"} - }`)) - // PR reviews: GET /repos/{owner}/{repo}/pulls/{number}/reviews - case strings.HasSuffix(r.URL.Path, "/reviews"): - _, _ = w.Write([]byte(`[]`)) - default: - http.NotFound(w, r) - } - })) - t.Cleanup(ghAPI.Close) - - // The git provider derives webBaseURL from apiBaseURL. - // For a TLS server at https://127.0.0.1:PORT, webBaseURL - // is the same, and PR URL patterns match - // https://127.0.0.1:PORT/{owner}/{repo}/pull/{number}. - ghWebHost := strings.TrimPrefix(ghAPI.URL, "https://") - prURL := fmt.Sprintf("https://%s/testorg/testrepo/pull/42", ghWebHost) - remoteOrigin := fmt.Sprintf("https://%s/testorg/testrepo.git", ghWebHost) - - // Set up a fake OIDC IDP for external auth login. - const providerID = "test-github" - fake := oidctest.NewFakeIDP(t, oidctest.WithServing()) - - rawClient, _, api := coderdtest.NewWithAPI(t, &coderdtest.Options{ - DeploymentValues: chatDeploymentValues(t), - ExternalAuthConfigs: []*externalauth.Config{ - fake.ExternalAuthConfig(t, providerID, nil, func(cfg *externalauth.Config) { - cfg.Type = codersdk.EnhancedExternalAuthProviderGitHub.String() - // Point the git provider at our mock API server. - cfg.APIBaseURL = ghAPI.URL - // Match the remote origin (127.0.0.1 host). - cfg.Regex = regexp.MustCompile(regexp.QuoteMeta(ghWebHost)) - }), - }, - }) - client := codersdk.NewExperimentalClient(rawClient) - db := api.Database - - // Use the TLS mock server's HTTP client (which trusts its - // self-signed cert) for git provider API calls. - api.HTTPClient = ghAPI.Client() - user := coderdtest.CreateFirstUser(t, client.Client) - modelConfig := createChatModelConfig(t, client) - - // Log in to the external auth provider so the user has an - // ExternalAuthLink row in the DB. This is what - // resolveChatGitAccessToken reads via GetExternalAuthLink. - fake.ExternalLogin(t, client.Client) - - // Insert a chat owned by the user. - chat, err := db.InsertChat(dbauthz.AsSystemRestricted(ctx), database.InsertChatParams{ - OwnerID: user.UserID, - LastModelConfigID: modelConfig.ID, - Title: "rbac integration test", - }) - require.NoError(t, err) - - // Store a pre-resolved PR URL so the refresh path uses - // ParsePullRequestURL directly (skipping branch-to-PR - // resolution, which isn't what we're testing). The status - // is stale (stale_at in the past) so the handler triggers - // a full refresh through RefreshChat. - _, err = db.UpsertChatDiffStatusReference( - dbauthz.AsSystemRestricted(ctx), - database.UpsertChatDiffStatusReferenceParams{ - ChatID: chat.ID, - Url: sql.NullString{String: prURL, Valid: true}, - GitBranch: "feature/rbac-fix", - GitRemoteOrigin: remoteOrigin, - StaleAt: time.Now().Add(-time.Minute), - }, - ) - require.NoError(t, err) - - // Call GetChat which now resolves diff status inline. - // This exercises the full code path: - // resolveChatDiffStatus -> RefreshChat (with - // AsSystemRestricted) -> Refresher.Refresh -> - // resolveChatGitAccessToken (GetExternalAuthLink with - // AsSystemRestricted) -> FetchPullRequestStatus (mock). - // - // Without the AsSystemRestricted fix, GetExternalAuthLink - // would fail under the chatd RBAC context (missing - // ActionReadPersonal), causing ErrNoTokenAvailable and a - // refresh failure that silently returns stale data. - result, err := client.GetChat(ctx, chat.ID) - require.NoError(t, err) - require.NotNil(t, result.DiffStatus) - status := result.DiffStatus - - // The mock GitHub API returned PR #42 with 25 additions, - // 7 deletions, 4 changed files, state "open". - require.NotNil(t, status.RefreshedAt, "status should have been refreshed") - require.NotNil(t, status.PullRequestState) - require.Equal(t, "open", *status.PullRequestState) - require.EqualValues(t, 25, status.Additions) - require.EqualValues(t, 7, status.Deletions) - require.EqualValues(t, 4, status.ChangedFiles) - require.NotNil(t, status.URL) - require.Contains(t, *status.URL, "pull/42") - }) } func TestGetChatDiffContents(t *testing.T) {