feat(runtime-fallback): generalize provider auto-retry signal detection
Refactor retry signal detection to be provider-agnostic: - Replace hardcoded Copilot/OpenAI checks with generic pattern matching - Detect any provider message containing limit/quota keywords + [retrying in X] - Add OpenAI pattern: 'usage limit has been reached [retrying in X]' - Update logging to use generic 'provider' instead of specific names - Add 'usage limit has been reached' to RETRYABLE_ERROR_PATTERNS This enables fallback escalation for any provider that signals automatic retries due to quota/rate limits, not just Copilot and OpenAI. Closes PR discussion: generalize retry pattern detection
This commit is contained in:
parent
31f61078b1
commit
8b2ae957e5
@ -26,6 +26,7 @@ export const RETRYABLE_ERROR_PATTERNS = [
|
|||||||
/rate.?limit/i,
|
/rate.?limit/i,
|
||||||
/too.?many.?requests/i,
|
/too.?many.?requests/i,
|
||||||
/quota.?exceeded/i,
|
/quota.?exceeded/i,
|
||||||
|
/usage\s+limit\s+has\s+been\s+reached/i,
|
||||||
/service.?unavailable/i,
|
/service.?unavailable/i,
|
||||||
/overloaded/i,
|
/overloaded/i,
|
||||||
/temporarily.?unavailable/i,
|
/temporarily.?unavailable/i,
|
||||||
|
|||||||
@ -341,7 +341,7 @@ describe("runtime-fallback", () => {
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
const signalLog = logCalls.find((c) => c.msg.includes("Detected Copilot auto-retry signal"))
|
const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal"))
|
||||||
expect(signalLog).toBeDefined()
|
expect(signalLog).toBeDefined()
|
||||||
|
|
||||||
const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
|
const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
|
||||||
@ -349,6 +349,44 @@ describe("runtime-fallback", () => {
|
|||||||
expect(fallbackLog?.data).toMatchObject({ from: "github-copilot/claude-opus-4.6", to: "openai/gpt-5.2" })
|
expect(fallbackLog?.data).toMatchObject({ from: "github-copilot/claude-opus-4.6", to: "openai/gpt-5.2" })
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should trigger fallback on OpenAI auto-retry signal in message.updated", async () => {
|
||||||
|
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
|
||||||
|
config: createMockConfig({ notify_on_fallback: false }),
|
||||||
|
pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]),
|
||||||
|
})
|
||||||
|
|
||||||
|
const sessionID = "test-session-openai-auto-retry"
|
||||||
|
SessionCategoryRegistry.register(sessionID, "test")
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "session.created",
|
||||||
|
properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } },
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "message.updated",
|
||||||
|
properties: {
|
||||||
|
info: {
|
||||||
|
sessionID,
|
||||||
|
role: "assistant",
|
||||||
|
model: "openai/gpt-5.3-codex",
|
||||||
|
status: "The usage limit has been reached [retrying in 27s attempt #6]",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal"))
|
||||||
|
expect(signalLog).toBeDefined()
|
||||||
|
|
||||||
|
const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
|
||||||
|
expect(fallbackLog).toBeDefined()
|
||||||
|
expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" })
|
||||||
|
})
|
||||||
|
|
||||||
test("should log when no fallback models configured", async () => {
|
test("should log when no fallback models configured", async () => {
|
||||||
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
|
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
|
||||||
config: createMockConfig(),
|
config: createMockConfig(),
|
||||||
@ -1243,6 +1281,81 @@ describe("runtime-fallback", () => {
|
|||||||
expect(retriedModels).toContain("openai/gpt-5.3-codex")
|
expect(retriedModels).toContain("openai/gpt-5.3-codex")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should not clear fallback timeout on assistant non-error update with OpenAI retry signal", async () => {
|
||||||
|
const retriedModels: string[] = []
|
||||||
|
|
||||||
|
const hook = createRuntimeFallbackHook(
|
||||||
|
createMockPluginInput({
|
||||||
|
session: {
|
||||||
|
messages: async () => ({
|
||||||
|
data: [{ info: { role: "user" }, parts: [{ type: "text", text: "test" }] }],
|
||||||
|
}),
|
||||||
|
promptAsync: async (args: unknown) => {
|
||||||
|
const model = (args as { body?: { model?: { providerID?: string; modelID?: string } } })?.body?.model
|
||||||
|
if (model?.providerID && model?.modelID) {
|
||||||
|
retriedModels.push(`${model.providerID}/${model.modelID}`)
|
||||||
|
}
|
||||||
|
return {}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 30 }),
|
||||||
|
pluginConfig: createMockPluginConfigWithCategoryFallback([
|
||||||
|
"openai/gpt-5.3-codex",
|
||||||
|
"anthropic/claude-opus-4-6",
|
||||||
|
]),
|
||||||
|
session_timeout_ms: 20,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
const sessionID = "test-session-openai-retry-signal-no-error"
|
||||||
|
SessionCategoryRegistry.register(sessionID, "test")
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "session.created",
|
||||||
|
properties: { info: { id: sessionID, model: "google/gemini-2.5-pro" } },
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "session.error",
|
||||||
|
properties: {
|
||||||
|
sessionID,
|
||||||
|
error: {
|
||||||
|
name: "ProviderAuthError",
|
||||||
|
data: {
|
||||||
|
providerID: "google",
|
||||||
|
message:
|
||||||
|
"Google Generative AI API key is missing. Pass it using the 'apiKey' parameter or the GOOGLE_GENERATIVE_AI_API_KEY environment variable.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(retriedModels).toEqual(["openai/gpt-5.3-codex"])
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "message.updated",
|
||||||
|
properties: {
|
||||||
|
info: {
|
||||||
|
sessionID,
|
||||||
|
role: "assistant",
|
||||||
|
status: "The usage limit has been reached [retrying in 27s attempt #6]",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 60))
|
||||||
|
|
||||||
|
expect(retriedModels).toContain("anthropic/claude-opus-4-6")
|
||||||
|
})
|
||||||
|
|
||||||
test("should not clear fallback timeout on assistant non-error update without user-visible content", async () => {
|
test("should not clear fallback timeout on assistant non-error update without user-visible content", async () => {
|
||||||
const retriedModels: string[] = []
|
const retriedModels: string[] = []
|
||||||
|
|
||||||
|
|||||||
@ -111,7 +111,29 @@ function classifyErrorType(error: unknown): string | undefined {
|
|||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractCopilotAutoRetrySignal(info: Record<string, unknown> | undefined): string | undefined {
|
interface AutoRetrySignal {
|
||||||
|
signal: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detects provider auto-retry signals - when a provider hits a quota/limit
|
||||||
|
* and indicates it will automatically retry after a delay.
|
||||||
|
*
|
||||||
|
* Pattern: mentions limit/quota/rate limit AND indicates [retrying in X]
|
||||||
|
* Examples:
|
||||||
|
* - "Too Many Requests: quota exceeded [retrying in ~2 weeks attempt #1]"
|
||||||
|
* - "The usage limit has been reached [retrying in 27s attempt #6]"
|
||||||
|
* - "Rate limit exceeded. [retrying in 30s]"
|
||||||
|
*/
|
||||||
|
const AUTO_RETRY_PATTERNS: Array<(combined: string) => boolean> = [
|
||||||
|
// Must have retry indicator
|
||||||
|
(combined) => /retrying\s+in/i.test(combined),
|
||||||
|
// And mention some kind of limit/quota
|
||||||
|
(combined) =>
|
||||||
|
/(?:too\s+many\s+requests|quota\s*exceeded|usage\s+limit|rate\s+limit|limit\s+reached)/i.test(combined),
|
||||||
|
]
|
||||||
|
|
||||||
|
function extractAutoRetrySignal(info: Record<string, unknown> | undefined): AutoRetrySignal | undefined {
|
||||||
if (!info) return undefined
|
if (!info) return undefined
|
||||||
|
|
||||||
const candidates: string[] = []
|
const candidates: string[] = []
|
||||||
@ -131,8 +153,10 @@ function extractCopilotAutoRetrySignal(info: Record<string, unknown> | undefined
|
|||||||
const combined = candidates.join("\n")
|
const combined = candidates.join("\n")
|
||||||
if (!combined) return undefined
|
if (!combined) return undefined
|
||||||
|
|
||||||
if (/too.?many.?requests/i.test(combined) && /quota.?exceeded/i.test(combined) && /retrying\s+in/i.test(combined)) {
|
// All patterns must match to be considered an auto-retry signal
|
||||||
return combined
|
const isAutoRetry = AUTO_RETRY_PATTERNS.every((test) => test(combined))
|
||||||
|
if (isAutoRetry) {
|
||||||
|
return { signal: combined }
|
||||||
}
|
}
|
||||||
|
|
||||||
return undefined
|
return undefined
|
||||||
@ -592,7 +616,7 @@ export function createRuntimeFallbackHook(
|
|||||||
.join("\n")
|
.join("\n")
|
||||||
|
|
||||||
if (!textFromParts) return false
|
if (!textFromParts) return false
|
||||||
if (extractCopilotAutoRetrySignal({ message: textFromParts })) return false
|
if (extractAutoRetrySignal({ message: textFromParts })) return false
|
||||||
|
|
||||||
return true
|
return true
|
||||||
} catch {
|
} catch {
|
||||||
@ -779,7 +803,8 @@ export function createRuntimeFallbackHook(
|
|||||||
if (event.type === "message.updated") {
|
if (event.type === "message.updated") {
|
||||||
const info = props?.info as Record<string, unknown> | undefined
|
const info = props?.info as Record<string, unknown> | undefined
|
||||||
const sessionID = info?.sessionID as string | undefined
|
const sessionID = info?.sessionID as string | undefined
|
||||||
const retrySignal = extractCopilotAutoRetrySignal(info)
|
const retrySignalResult = extractAutoRetrySignal(info)
|
||||||
|
const retrySignal = retrySignalResult?.signal
|
||||||
const error = info?.error ?? (retrySignal ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
|
const error = info?.error ?? (retrySignal ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
|
||||||
const role = info?.role as string | undefined
|
const role = info?.role as string | undefined
|
||||||
const model = info?.model as string | undefined
|
const model = info?.model as string | undefined
|
||||||
@ -816,7 +841,7 @@ export function createRuntimeFallbackHook(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (retrySignal && sessionRetryInFlight.has(sessionID)) {
|
if (retrySignal && sessionRetryInFlight.has(sessionID)) {
|
||||||
log(`[${HOOK_NAME}] Overriding in-flight retry due to Copilot auto-retry signal`, {
|
log(`[${HOOK_NAME}] Overriding in-flight retry due to provider auto-retry signal`, {
|
||||||
sessionID,
|
sessionID,
|
||||||
model,
|
model,
|
||||||
})
|
})
|
||||||
@ -825,7 +850,7 @@ export function createRuntimeFallbackHook(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (retrySignal) {
|
if (retrySignal) {
|
||||||
log(`[${HOOK_NAME}] Detected Copilot auto-retry signal`, { sessionID, model })
|
log(`[${HOOK_NAME}] Detected provider auto-retry signal`, { sessionID, model })
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!retrySignal) {
|
if (!retrySignal) {
|
||||||
@ -894,7 +919,7 @@ export function createRuntimeFallbackHook(
|
|||||||
|
|
||||||
if (state.pendingFallbackModel) {
|
if (state.pendingFallbackModel) {
|
||||||
if (retrySignal) {
|
if (retrySignal) {
|
||||||
log(`[${HOOK_NAME}] Clearing pending fallback due to Copilot auto-retry signal`, {
|
log(`[${HOOK_NAME}] Clearing pending fallback due to provider auto-retry signal`, {
|
||||||
sessionID,
|
sessionID,
|
||||||
pendingFallbackModel: state.pendingFallbackModel,
|
pendingFallbackModel: state.pendingFallbackModel,
|
||||||
})
|
})
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user