fix(coderd/externalauth): detect concurrent refresh race to prevent cache poisoning (#24228) (#24938)

Cherry-pick of https://github.com/coder/coder/pull/24228

Original PR: #24228 — fix(coderd/externalauth): detect concurrent
refresh race to prevent cache poisoning
Merge commit: da6e708bd2
Requested by: @f0ssel

Co-authored-by: Jason Barnett <J@sonBarnett.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Garrett Delfosse <garrett@coder.com>
This commit is contained in:
github-actions[bot]
2026-05-04 14:03:39 -04:00
committed by GitHub
parent eabb68d89e
commit e67d027786
2 changed files with 72 additions and 1 deletions
+18
View File
@@ -200,6 +200,24 @@ func (c *Config) RefreshToken(ctx context.Context, db database.Store, externalAu
//
// The error message is saved for debugging purposes.
if isFailedRefresh(existingToken, err) {
// Before caching the failure, re-read the external auth link
// from the database. A concurrent request may have already
// refreshed the token successfully, consuming the single-use
// refresh token (e.g., GitHub App tokens). In that case our
// "bad_refresh_token" error is a false positive from losing
// the race, and we should use the winner's updated token
// instead of poisoning the database with a cached failure.
currentLink, readErr := db.GetExternalAuthLink(ctx, database.GetExternalAuthLinkParams{
ProviderID: externalAuthLink.ProviderID,
UserID: externalAuthLink.UserID,
})
if readErr == nil && currentLink.OAuthRefreshToken != externalAuthLink.OAuthRefreshToken {
// Another caller won the refresh race and stored a new
// refresh token. Return their updated link instead of
// caching a failure.
return currentLink, nil
}
reason := err.Error()
if len(reason) > failureReasonLimit {
// Limit the length of the error message to prevent
+54 -1
View File
@@ -203,7 +203,9 @@ func TestRefreshToken(t *testing.T) {
}
// Try again with a bad refresh token error. This will invalidate the
// refresh token, and not retry again. Expect DB call to remove the refresh token
// refresh token, and not retry again. Expect DB calls to check for
// concurrent refresh (GetExternalAuthLink) and then remove the refresh token.
mDB.EXPECT().GetExternalAuthLink(gomock.Any(), gomock.Any()).Return(link, nil).Times(1)
mDB.EXPECT().UpdateExternalAuthLinkRefreshToken(gomock.Any(), gomock.Any()).Return(nil).Times(1)
refreshErr = &oauth2.RetrieveError{ // github error
Response: &http.Response{
@@ -225,6 +227,57 @@ func TestRefreshToken(t *testing.T) {
require.Equal(t, refreshCount, totalRefreshes)
})
// ConcurrentRefreshRace tests that when multiple concurrent requests
// race to refresh the same token, the loser does not poison the
// database with a cached "bad_refresh_token" failure. This
// reproduces the issue described in coder/coder#17069 where
// providers with single-use refresh tokens (e.g., GitHub Apps)
// reject the second refresh attempt, and the resulting error was
// incorrectly cached.
t.Run("ConcurrentRefreshRace", func(t *testing.T) {
t.Parallel()
ctrl := gomock.NewController(t)
mDB := dbmock.NewMockStore(ctrl)
fake, config, link := setupOauth2Test(t, testConfig{
FakeIDPOpts: []oidctest.FakeIDPOpt{
oidctest.WithRefresh(func(_ string) error {
return &oauth2.RetrieveError{
Response: &http.Response{
StatusCode: http.StatusOK,
},
ErrorCode: "bad_refresh_token",
}
}),
},
ExternalAuthOpt: func(cfg *externalauth.Config) {},
})
ctx := oidc.ClientContext(context.Background(), fake.HTTPClient(nil))
link.OAuthExpiry = time.Now().Add(time.Hour * -1)
// Simulate a concurrent winner: when the loser re-reads the
// DB, the refresh token has changed (the winner stored a new
// one). The loser should return the updated link instead of
// caching the failure.
winnerLink := link
winnerLink.OAuthRefreshToken = "winner-refresh-token"
winnerLink.OAuthAccessToken = "winner-access-token"
mDB.EXPECT().GetExternalAuthLink(gomock.Any(), database.GetExternalAuthLinkParams{
ProviderID: link.ProviderID,
UserID: link.UserID,
}).Return(winnerLink, nil).Times(1)
// UpdateExternalAuthLinkRefreshToken should NOT be called
// because the re-read detected the concurrent refresh.
result, err := config.RefreshToken(ctx, mDB, link)
require.NoError(t, err, "loser should succeed using the winner's token")
require.Equal(t, "winner-access-token", result.OAuthAccessToken)
require.Equal(t, "winner-refresh-token", result.OAuthRefreshToken)
})
// ValidateFailure tests if the token is no longer valid with a 401 response.
t.Run("ValidateFailure", func(t *testing.T) {
t.Parallel()