From 8b2ae957e5034f8583ed9e37353e4082bf6e1672 Mon Sep 17 00:00:00 2001 From: Youngbin Kim <64558592+youngbinkim0@users.noreply.github.com> Date: Thu, 12 Feb 2026 17:13:34 -0500 Subject: [PATCH] feat(runtime-fallback): generalize provider auto-retry signal detection Refactor retry signal detection to be provider-agnostic: - Replace hardcoded Copilot/OpenAI checks with generic pattern matching - Detect any provider message containing limit/quota keywords + [retrying in X] - Add OpenAI pattern: 'usage limit has been reached [retrying in X]' - Update logging to use generic 'provider' instead of specific names - Add 'usage limit has been reached' to RETRYABLE_ERROR_PATTERNS This enables fallback escalation for any provider that signals automatic retries due to quota/rate limits, not just Copilot and OpenAI. Closes PR discussion: generalize retry pattern detection --- src/hooks/runtime-fallback/constants.ts | 1 + src/hooks/runtime-fallback/index.test.ts | 115 ++++++++++++++++++++++- src/hooks/runtime-fallback/index.ts | 41 ++++++-- 3 files changed, 148 insertions(+), 9 deletions(-) diff --git a/src/hooks/runtime-fallback/constants.ts b/src/hooks/runtime-fallback/constants.ts index a49831fb..b8001b96 100644 --- a/src/hooks/runtime-fallback/constants.ts +++ b/src/hooks/runtime-fallback/constants.ts @@ -26,6 +26,7 @@ export const RETRYABLE_ERROR_PATTERNS = [ /rate.?limit/i, /too.?many.?requests/i, /quota.?exceeded/i, + /usage\s+limit\s+has\s+been\s+reached/i, /service.?unavailable/i, /overloaded/i, /temporarily.?unavailable/i, diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts index 38e57d51..82895086 100644 --- a/src/hooks/runtime-fallback/index.test.ts +++ b/src/hooks/runtime-fallback/index.test.ts @@ -341,7 +341,7 @@ describe("runtime-fallback", () => { }, }) - const signalLog = logCalls.find((c) => c.msg.includes("Detected Copilot auto-retry signal")) + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal")) expect(signalLog).toBeDefined() const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) @@ -349,6 +349,44 @@ describe("runtime-fallback", () => { expect(fallbackLog?.data).toMatchObject({ from: "github-copilot/claude-opus-4.6", to: "openai/gpt-5.2" }) }) + test("should trigger fallback on OpenAI auto-retry signal in message.updated", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]), + }) + + const sessionID = "test-session-openai-auto-retry" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "openai/gpt-5.3-codex", + status: "The usage limit has been reached [retrying in 27s attempt #6]", + }, + }, + }, + }) + + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal")) + expect(signalLog).toBeDefined() + + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" }) + }) + test("should log when no fallback models configured", async () => { const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig(), @@ -1243,6 +1281,81 @@ describe("runtime-fallback", () => { expect(retriedModels).toContain("openai/gpt-5.3-codex") }) + test("should not clear fallback timeout on assistant non-error update with OpenAI retry signal", async () => { + const retriedModels: string[] = [] + + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }], + }), + promptAsync: async (args: unknown) => { + const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model + if (model?.providerID && model?.modelID) { + retriedModels.push(`${model.providerID}/${model.modelID}`) + } + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }), + pluginConfig: createMockPluginConfigWithCategoryFallback([ + "openai/gpt-5.3-codex", + "anthropic/claude-opus-4-6", + ]), + session_timeout_ms: 20, + } + ) + + const sessionID = "test-session-openai-retry-signal-no-error" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "ProviderAuthError", + data: { + providerID: "google", + message: + "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.", + }, + }, + }, + }, + }) + + expect(retriedModels).toEqual(["openai/gpt-5.3-codex"]) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + status: "The usage limit has been reached [retrying in 27s attempt #6]", + }, + }, + }, + }) + + await new Promise((resolve) => setTimeout(resolve, 60)) + + expect(retriedModels).toContain("anthropic/claude-opus-4-6") + }) + test("should not clear fallback timeout on assistant non-error update without user-visible content", async () => { const retriedModels: string[] = [] diff --git a/src/hooks/runtime-fallback/index.ts b/src/hooks/runtime-fallback/index.ts index 29fac0e2..abefddf0 100644 --- a/src/hooks/runtime-fallback/index.ts +++ b/src/hooks/runtime-fallback/index.ts @@ -111,7 +111,29 @@ function classifyErrorType(error: unknown): string | undefined { return undefined } -function extractCopilotAutoRetrySignal(info: Record | undefined): string | undefined { +interface AutoRetrySignal { + signal: string +} + +/** + * Detects provider auto-retry signals - when a provider hits a quota/limit + * and indicates it will automatically retry after a delay. + * + * Pattern: mentions limit/quota/rate limit AND indicates [retrying in X] + * Examples: + * - "Too Many Requests: quota exceeded [retrying in ~2 weeks attempt #1]" + * - "The usage limit has been reached [retrying in 27s attempt #6]" + * - "Rate limit exceeded. [retrying in 30s]" + */ +const AUTO_RETRY_PATTERNS: Array<(combined: string) => boolean> = [ + // Must have retry indicator + (combined) => /retrying\s+in/i.test(combined), + // And mention some kind of limit/quota + (combined) => + /(?:too\s+many\s+requests|quota\s*exceeded|usage\s+limit|rate\s+limit|limit\s+reached)/i.test(combined), +] + +function extractAutoRetrySignal(info: Record | undefined): AutoRetrySignal | undefined { if (!info) return undefined const candidates: string[] = [] @@ -131,8 +153,10 @@ function extractCopilotAutoRetrySignal(info: Record | undefined const combined = candidates.join("\n") if (!combined) return undefined - if (/too.?many.?requests/i.test(combined) && /quota.?exceeded/i.test(combined) && /retrying\s+in/i.test(combined)) { - return combined + // All patterns must match to be considered an auto-retry signal + const isAutoRetry = AUTO_RETRY_PATTERNS.every((test) => test(combined)) + if (isAutoRetry) { + return { signal: combined } } return undefined @@ -592,7 +616,7 @@ export function createRuntimeFallbackHook( .join("\n") if (!textFromParts) return false - if (extractCopilotAutoRetrySignal({ message: textFromParts })) return false + if (extractAutoRetrySignal({ message: textFromParts })) return false return true } catch { @@ -779,7 +803,8 @@ export function createRuntimeFallbackHook( if (event.type === "message.updated") { const info = props?.info as Record | undefined const sessionID = info?.sessionID as string | undefined - const retrySignal = extractCopilotAutoRetrySignal(info) + const retrySignalResult = extractAutoRetrySignal(info) + const retrySignal = retrySignalResult?.signal const error = info?.error ?? (retrySignal ? { name: "ProviderRateLimitError", message: retrySignal } : undefined) const role = info?.role as string | undefined const model = info?.model as string | undefined @@ -816,7 +841,7 @@ export function createRuntimeFallbackHook( } if (retrySignal && sessionRetryInFlight.has(sessionID)) { - log(`[${HOOK_NAME}] Overriding in-flight retry due to Copilot auto-retry signal`, { + log(`[${HOOK_NAME}] Overriding in-flight retry due to provider auto-retry signal`, { sessionID, model, }) @@ -825,7 +850,7 @@ export function createRuntimeFallbackHook( } if (retrySignal) { - log(`[${HOOK_NAME}] Detected Copilot auto-retry signal`, { sessionID, model }) + log(`[${HOOK_NAME}] Detected provider auto-retry signal`, { sessionID, model }) } if (!retrySignal) { @@ -894,7 +919,7 @@ export function createRuntimeFallbackHook( if (state.pendingFallbackModel) { if (retrySignal) { - log(`[${HOOK_NAME}] Clearing pending fallback due to Copilot auto-retry signal`, { + log(`[${HOOK_NAME}] Clearing pending fallback due to provider auto-retry signal`, { sessionID, pendingFallbackModel: state.pendingFallbackModel, })