Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/locales/en/main.json
Original file line number Diff line number Diff line change
Expand Up @@ -3664,7 +3664,13 @@
"accessDenied": "You do not have access to this workspace",
"workspaceNotFound": "Workspace not found",
"tokenExchangeFailed": "Failed to authenticate with workspace: {error}"
}
},
"refreshRetrying": "Reconnecting...",
"refreshRetryingDetail": "Attempt {attempt}, retrying in {delay}s",
"refreshDegraded": "Connection issue",
"refreshDegradedDetail": "Using cached session. Will retry automatically.",
"sessionExpired": "Session expired",
"sessionExpiredDetail": "Please sign in again to continue."
},
"nightly": {
"badge": {
Expand Down
87 changes: 42 additions & 45 deletions src/platform/workspace/stores/useWorkspaceAuth.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ vi.mock('@/i18n', () => ({
t: (key: string) => key
}))

vi.mock('@/platform/updates/common/toastStore', () => ({
useToastStore: () => ({
add: vi.fn()
})
}))

const mockTeamWorkspacesEnabled = vi.hoisted(() => ({ value: true }))

vi.mock('@/composables/useFeatureFlags', () => ({
Expand Down Expand Up @@ -596,11 +602,13 @@ describe('useWorkspaceAuthStore', () => {

mockGetIdToken.mockResolvedValue(undefined)
const refreshPromise = store.refreshToken()
await vi.advanceTimersByTimeAsync(7_000)
// Advance enough for retries with jitter.
await vi.advanceTimersByTimeAsync(10_000)
await refreshPromise

expect(currentWorkspace.value).toEqual(mockWorkspaceWithRole)
expect(workspaceToken.value).toBe('workspace-token-abc')
// Only 1 fetch (initial switchWorkspace) - retries fail at getIdToken before fetch.
expect(mockFetch).toHaveBeenCalledTimes(1)

mockGetIdToken.mockResolvedValue('firebase-token-xyz')
Expand Down Expand Up @@ -639,7 +647,8 @@ describe('useWorkspaceAuthStore', () => {
mockGetIdToken.mockResolvedValue(undefined)

const refreshPromise = store.refreshToken()
await vi.advanceTimersByTimeAsync(7_000)
// Advance enough for retries with jitter (~1500 + 2500 + 4500 = 8500ms worst case).
await vi.advanceTimersByTimeAsync(10_000)
await refreshPromise

expect(currentWorkspace.value).toBeNull()
Expand Down Expand Up @@ -736,14 +745,10 @@ describe('useWorkspaceAuthStore', () => {
})

describe('refreshToken retry/race paths', () => {
// NOTE: This test documents the CURRENT behavior — exhausted refresh
// retries clear the workspace context unconditionally, even when the
// existing workspace token is still within its expiry window. That is a
// UX gap (transient backend outage manifests as forced logout) and the
// store should preserve a still-valid token across transient
// TOKEN_EXCHANGE_FAILED errors. Update the assertion alongside any source
// change that tracks token expiry to skip the context clear.
it('retries up to 3 times with exponential backoff on TOKEN_EXCHANGE_FAILED, then clears context', async () => {
// When refresh exhausts retries but the token is still valid, the store
// preserves context in a "degraded" state and schedules a later retry.
// This prevents transient backend outages from forcing logout.
it('retries up to 3 times with exponential backoff on TOKEN_EXCHANGE_FAILED, keeps context when token still valid', async () => {
mockGetIdToken.mockResolvedValue('firebase-token-xyz')

// Initial successful switchWorkspace establishes context.
Expand All @@ -754,7 +759,7 @@ describe('useWorkspaceAuthStore', () => {
vi.stubGlobal('fetch', mockFetch)

const store = useWorkspaceAuthStore()
const { currentWorkspace } = storeToRefs(store)
const { currentWorkspace, workspaceToken } = storeToRefs(store)

await store.switchWorkspace('workspace-123')
expect(currentWorkspace.value).not.toBeNull()
Expand All @@ -767,46 +772,33 @@ describe('useWorkspaceAuthStore', () => {
json: () => Promise.resolve({ message: 'Server error' })
})

const consoleErrorSpy = vi
.spyOn(console, 'error')
.mockImplementation(() => {})
const consoleWarnSpy = vi
.spyOn(console, 'warn')
.mockImplementation(() => {})

const refreshPromise = store.refreshToken()

// Drain the four attempts (initial + 3 retries) and their backoff delays.
await vi.runAllTimersAsync()
// Advance through retry delays (with jitter up to ~500ms each):
// attempt 0: ~1000-1500ms, attempt 1: ~2000-2500ms, attempt 2: ~4000-4500ms
await vi.advanceTimersByTimeAsync(8000)
await refreshPromise

// 1 initial switchWorkspace + 4 refresh attempts = 5 total fetch calls.
expect(mockFetch).toHaveBeenCalledTimes(5)
// Backoff: 1s + 2s + 4s = 7s of cumulative warn-logged delays.
expect(
consoleWarnSpy.mock.calls.some((c) =>
/retrying in 1000ms/.test(String(c[0]))
)
).toBe(true)
expect(
consoleWarnSpy.mock.calls.some((c) =>
/retrying in 2000ms/.test(String(c[0]))
)
).toBe(true)
expect(
consoleWarnSpy.mock.calls.some((c) =>
/retrying in 4000ms/.test(String(c[0]))
)
).toBe(true)
// Backoff with jitter logged.
const retryLogCalls = consoleWarnSpy.mock.calls.filter((c) =>
/retrying in \d+ms/.test(String(c[0]))
)
expect(retryLogCalls.length).toBe(3)

// After the final failure the context is cleared.
expect(currentWorkspace.value).toBeNull()
// Token still valid, so context is preserved (degraded state).
expect(currentWorkspace.value).not.toBeNull()
expect(workspaceToken.value).toBe('workspace-token-abc')

consoleErrorSpy.mockRestore()
consoleWarnSpy.mockRestore()
})

it('clears context immediately on INVALID_FIREBASE_TOKEN without retrying', async () => {
it('retries on INVALID_FIREBASE_TOKEN and keeps context when token still valid', async () => {
mockGetIdToken.mockResolvedValue('firebase-token-xyz')
const mockFetch = vi.fn().mockResolvedValueOnce({
ok: true,
Expand All @@ -815,30 +807,35 @@ describe('useWorkspaceAuthStore', () => {
vi.stubGlobal('fetch', mockFetch)

const store = useWorkspaceAuthStore()
const { currentWorkspace } = storeToRefs(store)
const { currentWorkspace, workspaceToken } = storeToRefs(store)

await store.switchWorkspace('workspace-123')
expect(currentWorkspace.value).not.toBeNull()

// Permanent error: 401 → INVALID_FIREBASE_TOKEN.
// INVALID_FIREBASE_TOKEN is retryable (Firebase token may just need refresh).
mockFetch.mockResolvedValue({
ok: false,
status: 401,
statusText: 'Unauthorized',
json: () => Promise.resolve({ message: 'Invalid token' })
})

const consoleErrorSpy = vi
.spyOn(console, 'error')
const consoleWarnSpy = vi
.spyOn(console, 'warn')
.mockImplementation(() => {})

await store.refreshToken()
const refreshPromise = store.refreshToken()
// Advance through retry delays with jitter.
await vi.advanceTimersByTimeAsync(8000)
await refreshPromise

// Initial + exactly one refresh attempt; no retries on permanent errors.
expect(mockFetch).toHaveBeenCalledTimes(2)
expect(currentWorkspace.value).toBeNull()
// 1 initial + 4 refresh attempts (initial + 3 retries) = 5 total.
expect(mockFetch).toHaveBeenCalledTimes(5)
// Token still valid, so context preserved (degraded state).
expect(currentWorkspace.value).not.toBeNull()
expect(workspaceToken.value).toBe('workspace-token-abc')

consoleErrorSpy.mockRestore()
consoleWarnSpy.mockRestore()
})

// KNOWN BUG (.fails): when an in-flight refresh's switchWorkspace call is
Expand Down
60 changes: 56 additions & 4 deletions src/platform/workspace/stores/workspaceAuthStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { useAuthStore } from '@/stores/authStore'
import type { AuthHeader } from '@/types/authTypes'
import type { WorkspaceWithRole } from '@/platform/workspace/workspaceTypes'
import { useFeatureFlags } from '@/composables/useFeatureFlags'
import { useToastStore } from '@/platform/updates/common/toastStore'

const WorkspaceWithRoleSchema = z.object({
id: z.string(),
Expand Down Expand Up @@ -56,6 +57,9 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {
// Timer state
let refreshTimerId: ReturnType<typeof setTimeout> | null = null

// AbortController for cancelling in-flight refresh operations
let currentRefreshAbort: AbortController | null = null

// Request ID to prevent stale refresh operations from overwriting newer workspace contexts
let refreshRequestId = 0

Expand All @@ -72,6 +76,13 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {
}
}

function abortCurrentRefresh(): void {
if (currentRefreshAbort) {
currentRefreshAbort.abort()
currentRefreshAbort = null
}
}

function scheduleTokenRefresh(expiresAt: number): void {
stopRefreshTimer()
const now = Date.now()
Expand Down Expand Up @@ -100,10 +111,12 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {
}

const remainingMs = workspaceTokenExpiresAt.value - Date.now()
// Add jitter to prevent thundering herd across browser tabs
const jitter = Math.random() * 5000
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: 5s of additive jitter is large relative to small remainingMs windows. When remainingMs is just above the 10s short-circuit (say ~11s), the non-short-circuit branch gives floor(11_000/2) + up to 5_000 = up to 10.5s, leaving as little as ~500ms before the workspace token actually expires. Was 5_000ms picked deliberately, or would something proportional to remainingMs (or a smaller fixed jitter) be safer? Same question applies to the retry-loop jitter at line 378 (500ms there feels reasonable, just want to confirm the asymmetry is intentional).

const retryDelay =
remainingMs <= 10_000
? remainingMs
: Math.min(60_000, Math.floor(remainingMs / 2))
: Math.min(60_000, Math.floor(remainingMs / 2)) + jitter
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: jitter is added outside the Math.min(60_000, …) cap, so retryDelay can be up to 60_000 + jitter (~65s) once remainingMs / 2 >= 60_000. The intent of the cap ("don't sleep longer than 60s before re-attempting") is defeated by the additive jitter.

Could we move the jitter inside the min so the cap is honored, e.g.:

const retryDelay =
  remainingMs <= 10_000
    ? remainingMs
    : Math.min(60_000, Math.floor(remainingMs / 2) + jitter)


refreshTimerId = setTimeout(() => {
void refreshToken()
Expand Down Expand Up @@ -147,6 +160,7 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {

function destroy(): void {
stopRefreshTimer()
abortCurrentRefresh()
}

function initializeFromSession(): boolean {
Expand Down Expand Up @@ -310,10 +324,19 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {
const capturedRequestId = refreshRequestId
const maxRetries = 3
const baseDelayMs = 1000
const toastStore = useToastStore()

// Create AbortController for this refresh operation
abortCurrentRefresh()
const abortController = new AbortController()
currentRefreshAbort = abortController
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: the AbortController is allocated and stored, but abortController.signal is never passed to the fetch inside switchWorkspace. So abort():

  • does not cancel the in-flight /auth/token request, and
  • does not prevent a late-resolving successful switchWorkspace from setting currentWorkspace/workspaceToken and calling scheduleTokenRefresh(expiresAt) after destroy() or clearWorkspaceContext() has run.

The PR description says "Proper cleanup when context is cleared or component unmounts." For clearWorkspaceContext, refreshRequestId++ already covers the stale-write case before mutating state, but destroy() does not increment refreshRequestId, so a successful in-flight refresh after destroy can re-arm the timer and resurrect state. Is the intent to thread abortController.signal into the fetch (and treat AbortError as a no-op in the catch), or are you deliberately keeping abort as a loop-only guard? If the latter, the new abortController.signal.aborted check in the loop overlaps almost entirely with the existing capturedRequestId !== refreshRequestId check called from clearWorkspaceContext, so it might be worth either wiring the signal end-to-end or dropping the AbortController and bumping refreshRequestId inside destroy() instead.


for (let attempt = 0; attempt <= maxRetries; attempt++) {
// Check if workspace context changed since refresh started (user switched workspaces)
if (capturedRequestId !== refreshRequestId) {
if (
capturedRequestId !== refreshRequestId ||
abortController.signal.aborted
) {
console.warn(
'Aborting stale token refresh: workspace context changed during refresh'
)
Expand All @@ -334,6 +357,11 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {
if (capturedRequestId === refreshRequestId) {
console.error('Workspace access revoked or auth invalid:', err)
clearWorkspaceContext()
toastStore.add({
severity: 'error',
summary: t('workspaceAuth.errors.accessDenied'),
life: 10000
})
}
return
}
Expand All @@ -346,11 +374,22 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {
err.code === 'NOT_AUTHENTICATED')

if (shouldRetryImmediately) {
const delay = baseDelayMs * Math.pow(2, attempt)
// Add jitter to prevent thundering herd
const jitter = Math.random() * 500
const delay = baseDelayMs * Math.pow(2, attempt) + jitter
console.warn(
`Token refresh failed (attempt ${attempt + 1}/${maxRetries + 1}), retrying in ${delay}ms:`,
`Token refresh failed (attempt ${attempt + 1}/${maxRetries + 1}), retrying in ${Math.round(delay)}ms:`,
err
)
toastStore.add({
severity: 'warn',
summary: t('workspaceAuth.refreshRetrying'),
detail: t('workspaceAuth.refreshRetryingDetail', {
attempt: attempt + 1,
delay: Math.round(delay / 1000)
}),
life: delay + 2000
})
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: every failed attempt adds a fresh "Reconnecting…" toast (3 per cycle, plus a final degraded/expired toast). Because scheduleRefreshRetry() will eventually trigger another refreshToken() (which restarts the whole 4-attempt cycle), a backend that's down for a few minutes can produce a long stream of warn toasts.

Would it make sense to either gate the retry toast to attempt === 0 (one "Reconnecting…" per refresh cycle) or reuse a single sticky toast that updates? Right now even a recoverable hiccup pops 3 stacked warn toasts before the success, which is noisier than the "toast notifications during retries" intent in the PR description suggests.

await new Promise((resolve) => setTimeout(resolve, delay))
continue
}
Expand All @@ -363,12 +402,24 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {
'Workspace token refresh failed, keeping current token until expiry:',
err
)
toastStore.add({
severity: 'warn',
summary: t('workspaceAuth.refreshDegraded'),
detail: t('workspaceAuth.refreshDegradedDetail'),
life: 10000
})
scheduleRefreshRetry()
return
}

if (capturedRequestId === refreshRequestId) {
console.error('Failed to refresh workspace token after retries:', err)
toastStore.add({
severity: 'error',
summary: t('workspaceAuth.sessionExpired'),
detail: t('workspaceAuth.sessionExpiredDetail'),
life: 10000
})
clearWorkspaceContext()
}
return
Expand All @@ -394,6 +445,7 @@ export const useWorkspaceAuthStore = defineStore('workspaceAuth', () => {
function clearWorkspaceContext(): void {
// Increment request ID to invalidate any in-flight stale refresh operations
refreshRequestId++
abortCurrentRefresh()
stopRefreshTimer()
currentWorkspace.value = null
workspaceToken.value = null
Expand Down
Loading