From 8b2ae957e5034f8583ed9e37353e4082bf6e1672 Mon Sep 17 00:00:00 2001
From: Youngbin Kim <64558592+youngbinkim0@users.noreply.github.com>
Date: Thu, 12 Feb 2026 17:13:34 -0500
Subject: [PATCH] feat(runtime-fallback): generalize provider auto-retry signal
 detection

Refactor retry signal detection to be provider-agnostic:
- Replace hardcoded Copilot/OpenAI checks with generic pattern matching
- Detect any provider message containing limit/quota keywords + [retrying in X]
- Add OpenAI pattern: 'usage limit has been reached [retrying in X]'
- Update logging to use generic 'provider' instead of specific names
- Add 'usage limit has been reached' to RETRYABLE_ERROR_PATTERNS

This enables fallback escalation for any provider that signals automatic
retries due to quota/rate limits, not just Copilot and OpenAI.

Closes PR discussion: generalize retry pattern detection
---
 src/hooks/runtime-fallback/constants.ts  |   1 +
 src/hooks/runtime-fallback/index.test.ts | 115 ++++++++++++++++++++++-
 src/hooks/runtime-fallback/index.ts      |  41 ++++++--
 3 files changed, 148 insertions(+), 9 deletions(-)

diff --git a/src/hooks/runtime-fallback/constants.ts b/src/hooks/runtime-fallback/constants.ts
index a49831fb..b8001b96 100644
--- a/src/hooks/runtime-fallback/constants.ts
+++ b/src/hooks/runtime-fallback/constants.ts
@@ -26,6 +26,7 @@ export const RETRYABLE_ERROR_PATTERNS = [
   /rate.?limit/i,
   /too.?many.?requests/i,
   /quota.?exceeded/i,
+  /usage\s+limit\s+has\s+been\s+reached/i,
   /service.?unavailable/i,
   /overloaded/i,
   /temporarily.?unavailable/i,
diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts
index 38e57d51..82895086 100644
--- a/src/hooks/runtime-fallback/index.test.ts
+++ b/src/hooks/runtime-fallback/index.test.ts
@@ -341,7 +341,7 @@ describe("runtime-fallback", () => {
         },
       })
 
-      const signalLog = logCalls.find((c) => c.msg.includes("Detected Copilot auto-retry signal"))
+      const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal"))
       expect(signalLog).toBeDefined()
 
       const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
@@ -349,6 +349,44 @@ describe("runtime-fallback", () => {
       expect(fallbackLog?.data).toMatchObject({ from: "github-copilot/claude-opus-4.6", to: "openai/gpt-5.2" })
     })
 
+    test("should trigger fallback on OpenAI auto-retry signal in message.updated", async () => {
+      const hook = createRuntimeFallbackHook(createMockPluginInput(), {
+        config: createMockConfig({ notify_on_fallback: false }),
+        pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]),
+      })
+
+      const sessionID = "test-session-openai-auto-retry"
+      SessionCategoryRegistry.register(sessionID, "test")
+
+      await hook.event({
+        event: {
+          type: "session.created",
+          properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } },
+        },
+      })
+
+      await hook.event({
+        event: {
+          type: "message.updated",
+          properties: {
+            info: {
+              sessionID,
+              role: "assistant",
+              model: "openai/gpt-5.3-codex",
+              status: "The usage limit has been reached [retrying in 27s attempt #6]",
+            },
+          },
+        },
+      })
+
+      const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal"))
+      expect(signalLog).toBeDefined()
+
+      const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
+      expect(fallbackLog).toBeDefined()
+      expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" })
+    })
+
     test("should log when no fallback models configured", async () => {
       const hook = createRuntimeFallbackHook(createMockPluginInput(), {
         config: createMockConfig(),
@@ -1243,6 +1281,81 @@ describe("runtime-fallback", () => {
       expect(retriedModels).toContain("openai/gpt-5.3-codex")
     })
 
+    test("should not clear fallback timeout on assistant non-error update with OpenAI retry signal", async () => {
+      const retriedModels: string[] = []
+
+      const hook = createRuntimeFallbackHook(
+        createMockPluginInput({
+          session: {
+            messages: async () => ({
+              data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }],
+            }),
+            promptAsync: async (args: unknown) => {
+              const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model
+              if (model?.providerID && model?.modelID) {
+                retriedModels.push(`${model.providerID}/${model.modelID}`)
+              }
+              return {}
+            },
+          },
+        }),
+        {
+          config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }),
+          pluginConfig: createMockPluginConfigWithCategoryFallback([
+            "openai/gpt-5.3-codex",
+            "anthropic/claude-opus-4-6",
+          ]),
+          session_timeout_ms: 20,
+        }
+      )
+
+      const sessionID = "test-session-openai-retry-signal-no-error"
+      SessionCategoryRegistry.register(sessionID, "test")
+
+      await hook.event({
+        event: {
+          type: "session.created",
+          properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } },
+        },
+      })
+
+      await hook.event({
+        event: {
+          type: "session.error",
+          properties: {
+            sessionID,
+            error: {
+              name: "ProviderAuthError",
+              data: {
+                providerID: "google",
+                message:
+                  "Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.",
+              },
+            },
+          },
+        },
+      })
+
+      expect(retriedModels).toEqual(["openai/gpt-5.3-codex"])
+
+      await hook.event({
+        event: {
+          type: "message.updated",
+          properties: {
+            info: {
+              sessionID,
+              role: "assistant",
+              status: "The usage limit has been reached [retrying in 27s attempt #6]",
+            },
+          },
+        },
+      })
+
+      await new Promise((resolve) => setTimeout(resolve, 60))
+
+      expect(retriedModels).toContain("anthropic/claude-opus-4-6")
+    })
+
     test("should not clear fallback timeout on assistant non-error update without user-visible content", async () => {
       const retriedModels: string[] = []
 
diff --git a/src/hooks/runtime-fallback/index.ts b/src/hooks/runtime-fallback/index.ts
index 29fac0e2..abefddf0 100644
--- a/src/hooks/runtime-fallback/index.ts
+++ b/src/hooks/runtime-fallback/index.ts
@@ -111,7 +111,29 @@ function classifyErrorType(error: unknown): string | undefined {
   return undefined
 }
 
-function extractCopilotAutoRetrySignal(info: Record<string, unknown> | undefined): string | undefined {
+interface AutoRetrySignal {
+  signal: string
+}
+
+/**
+ * Detects provider auto-retry signals - when a provider hits a quota/limit
+ * and indicates it will automatically retry after a delay.
+ * 
+ * Pattern: mentions limit/quota/rate limit AND indicates [retrying in X]
+ * Examples:
+ * - "Too Many Requests: quota exceeded [retrying in ~2 weeks attempt #1]"
+ * - "The usage limit has been reached [retrying in 27s attempt #6]"
+ * - "Rate limit exceeded. [retrying in 30s]"
+ */
+const AUTO_RETRY_PATTERNS: Array<(combined: string) => boolean> = [
+  // Must have retry indicator
+  (combined) => /retrying\s+in/i.test(combined),
+  // And mention some kind of limit/quota
+  (combined) =>
+    /(?:too\s+many\s+requests|quota\s*exceeded|usage\s+limit|rate\s+limit|limit\s+reached)/i.test(combined),
+]
+
+function extractAutoRetrySignal(info: Record<string, unknown> | undefined): AutoRetrySignal | undefined {
   if (!info) return undefined
 
   const candidates: string[] = []
@@ -131,8 +153,10 @@ function extractCopilotAutoRetrySignal(info: Record<string, unknown> | undefined
   const combined = candidates.join("\n")
   if (!combined) return undefined
 
-  if (/too.?many.?requests/i.test(combined) && /quota.?exceeded/i.test(combined) && /retrying\s+in/i.test(combined)) {
-    return combined
+  // All patterns must match to be considered an auto-retry signal
+  const isAutoRetry = AUTO_RETRY_PATTERNS.every((test) => test(combined))
+  if (isAutoRetry) {
+    return { signal: combined }
   }
 
   return undefined
@@ -592,7 +616,7 @@ export function createRuntimeFallbackHook(
         .join("\n")
 
       if (!textFromParts) return false
-      if (extractCopilotAutoRetrySignal({ message: textFromParts })) return false
+      if (extractAutoRetrySignal({ message: textFromParts })) return false
 
       return true
     } catch {
@@ -779,7 +803,8 @@ export function createRuntimeFallbackHook(
     if (event.type === "message.updated") {
       const info = props?.info as Record<string, unknown> | undefined
       const sessionID = info?.sessionID as string | undefined
-      const retrySignal = extractCopilotAutoRetrySignal(info)
+      const retrySignalResult = extractAutoRetrySignal(info)
+      const retrySignal = retrySignalResult?.signal
       const error = info?.error ?? (retrySignal ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
       const role = info?.role as string | undefined
       const model = info?.model as string | undefined
@@ -816,7 +841,7 @@ export function createRuntimeFallbackHook(
         }
 
         if (retrySignal && sessionRetryInFlight.has(sessionID)) {
-          log(`[${HOOK_NAME}] Overriding in-flight retry due to Copilot auto-retry signal`, {
+          log(`[${HOOK_NAME}] Overriding in-flight retry due to provider auto-retry signal`, {
             sessionID,
             model,
           })
@@ -825,7 +850,7 @@ export function createRuntimeFallbackHook(
         }
 
         if (retrySignal) {
-          log(`[${HOOK_NAME}] Detected Copilot auto-retry signal`, { sessionID, model })
+          log(`[${HOOK_NAME}] Detected provider auto-retry signal`, { sessionID, model })
         }
 
         if (!retrySignal) {
@@ -894,7 +919,7 @@ export function createRuntimeFallbackHook(
 
           if (state.pendingFallbackModel) {
             if (retrySignal) {
-              log(`[${HOOK_NAME}] Clearing pending fallback due to Copilot auto-retry signal`, {
+              log(`[${HOOK_NAME}] Clearing pending fallback due to provider auto-retry signal`, {
                 sessionID,
                 pendingFallbackModel: state.pendingFallbackModel,
               })