From fbafb8cf6713f5c860db3fa0c4b693ff3ceebbd2 Mon Sep 17 00:00:00 2001 From: Youngbin Kim <64558592+youngbinkim0@users.noreply.github.com> Date: Wed, 11 Feb 2026 16:59:26 -0500 Subject: [PATCH] fix(runtime-fallback): 9 critical bug fixes for auto-retry, agent preservation, and model override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug fixes: 1. extractStatusCode: handle nested data.statusCode (Anthropic error structure) 2. Error regex: relax credit.*balance.*too.*low pattern for multi-char gaps 3. Zod schema: bump max_fallback_attempts from 10 to 20 (config rejected silently) 4. getFallbackModelsForSession: fallback to sisyphus/any agent when session.error lacks agent 5. Model detection: derive model from agent config when session.error lacks model info 6. Auto-retry: resend last user message with fallback model via promptAsync 7. Persistent fallback: override model on every chat.message (not just pendingFallbackModel) 8. Manual model change: detect UI model changes and reset fallback state 9. Agent preservation: include agent in promptAsync body to prevent defaulting to sisyphus Additional: - Add sessionRetryInFlight guard to prevent double-retries - Add resolveAgentForSession with 3-tier resolution (event → session memory → session ID) - Add normalizeAgentName for display names like "Prometheus (Planner)" → "prometheus" - Add resolveAgentForSessionFromContext to fetch agent from session messages - Move AGENT_NAMES and agentPattern to module scope for reuse - Register runtime-fallback hooks in event.ts and chat-message.ts - Remove diagnostic debug logging from isRetryableError - Add 400 to default retry_on_errors and credit/balance patterns to RETRYABLE_ERROR_PATTERNS --- src/config/schema/runtime-fallback.ts | 15 +- src/hooks/runtime-fallback/constants.ts | 4 +- src/hooks/runtime-fallback/index.test.ts | 62 +++- src/hooks/runtime-fallback/index.ts | 345 ++++++++++++++++++++--- 4 files changed, 380 insertions(+), 46 deletions(-) diff --git a/src/config/schema/runtime-fallback.ts b/src/config/schema/runtime-fallback.ts index 7566c0fb..6c25a113 100644 --- a/src/config/schema/runtime-fallback.ts +++ b/src/config/schema/runtime-fallback.ts @@ -1,11 +1,16 @@ import { z } from "zod" export const RuntimeFallbackConfigSchema = z.object({ - enabled: z.boolean().default(true), - retry_on_errors: z.array(z.number()).default([429, 503, 529]), - max_fallback_attempts: z.number().min(1).max(10).default(3), - cooldown_seconds: z.number().min(0).default(60), - notify_on_fallback: z.boolean().default(true), + /** Enable runtime fallback (default: true) */ + enabled: z.boolean().optional(), + /** HTTP status codes that trigger fallback (default: [429, 503, 529]) */ + retry_on_errors: z.array(z.number()).optional(), + /** Maximum fallback attempts per session (default: 3) */ + max_fallback_attempts: z.number().min(1).max(20).optional(), + /** Cooldown in seconds before retrying a failed model (default: 60) */ + cooldown_seconds: z.number().min(0).optional(), + /** Show toast notification when switching to fallback model (default: true) */ + notify_on_fallback: z.boolean().optional(), }) export type RuntimeFallbackConfig = z.infer diff --git a/src/hooks/runtime-fallback/constants.ts b/src/hooks/runtime-fallback/constants.ts index f3f25956..e0ea1fb5 100644 --- a/src/hooks/runtime-fallback/constants.ts +++ b/src/hooks/runtime-fallback/constants.ts @@ -11,7 +11,7 @@ import type { RuntimeFallbackConfig } from "../../config" */ export const DEFAULT_CONFIG: Required = { enabled: true, - retry_on_errors: [429, 503, 529], + retry_on_errors: [400, 429, 503, 529], max_fallback_attempts: 3, cooldown_seconds: 60, notify_on_fallback: true, @@ -29,6 +29,8 @@ export const RETRYABLE_ERROR_PATTERNS = [ /overloaded/i, /temporarily.?unavailable/i, /try.?again/i, + /credit.*balance.*too.*low/i, + /insufficient.?(?:credits?|funds?|balance)/i, /(?:^|\s)429(?:\s|$)/, /(?:^|\s)503(?:\s|$)/, /(?:^|\s)529(?:\s|$)/, diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts index bd4a0122..0d277d2b 100644 --- a/src/hooks/runtime-fallback/index.test.ts +++ b/src/hooks/runtime-fallback/index.test.ts @@ -23,7 +23,12 @@ describe("runtime-fallback", () => { logSpy?.mockRestore() }) - function createMockPluginInput() { + function createMockPluginInput(overrides?: { + session?: { + messages?: (args: unknown) => Promise + promptAsync?: (args: unknown) => Promise + } + }) { return { client: { tui: { @@ -35,6 +40,10 @@ describe("runtime-fallback", () => { }) }, }, + session: { + messages: overrides?.session?.messages ?? (async () => ({ data: [] })), + promptAsync: overrides?.session?.promptAsync ?? (async () => ({})), + }, }, directory: "/test/dir", } as any @@ -174,7 +183,10 @@ describe("runtime-fallback", () => { }) test("should log when no fallback models configured", async () => { - const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig(), + pluginConfig: {}, + }) const sessionID = "test-session-no-fallbacks" await hook.event({ @@ -487,7 +499,7 @@ describe("runtime-fallback", () => { const output = { message: {}, parts: [] } await hook["chat.message"]?.( - { sessionID, model: { providerID: "anthropic", modelID: "claude-opus-4-5" } }, + { sessionID }, output ) @@ -588,6 +600,50 @@ describe("runtime-fallback", () => { expect(fallbackLog).toBeDefined() expect(fallbackLog?.data).toMatchObject({ to: "openai/gpt-5.2" }) }) + + test("should preserve resolved agent during auto-retry", async () => { + const promptCalls: Array> = [] + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [ + { + info: { role: "user" }, + parts: [{ type: "text", text: "test" }], + }, + ], + }), + promptAsync: async (args: unknown) => { + promptCalls.push(args as Record) + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithAgentFallback("prometheus", ["github-copilot/claude-opus-4.6"]), + }, + ) + const sessionID = "test-preserve-agent-on-retry" + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + model: "anthropic/claude-opus-4-6", + error: { statusCode: 503, message: "Service unavailable" }, + agent: "prometheus", + }, + }, + }) + + expect(promptCalls.length).toBe(1) + const callBody = promptCalls[0]?.body as Record + expect(callBody?.agent).toBe("prometheus") + expect(callBody?.model).toEqual({ providerID: "github-copilot", modelID: "claude-opus-4.6" }) + }) }) describe("cooldown mechanism", () => { diff --git a/src/hooks/runtime-fallback/index.ts b/src/hooks/runtime-fallback/index.ts index 89e0b0cc..3743bef7 100644 --- a/src/hooks/runtime-fallback/index.ts +++ b/src/hooks/runtime-fallback/index.ts @@ -5,6 +5,7 @@ import { DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS, HOOK_NAME } from "./constants import { log } from "../../shared/logger" import { SessionCategoryRegistry } from "../../shared/session-category-registry" import { normalizeFallbackModels } from "../../shared/model-resolver" +import { getSessionAgent } from "../../features/claude-code-session-state" function createFallbackState(originalModel: string): FallbackState { return { @@ -56,7 +57,7 @@ function extractStatusCode(error: unknown): number | undefined { } const message = getErrorMessage(error) - const statusMatch = message.match(/\b(429|503|529)\b/) + const statusMatch = message.match(/\b(400|402|429|503|529)\b/) if (statusMatch) { return parseInt(statusMatch[1], 10) } @@ -66,15 +67,68 @@ function extractStatusCode(error: unknown): number | undefined { function isRetryableError(error: unknown, retryOnErrors: number[]): boolean { const statusCode = extractStatusCode(error) + const message = getErrorMessage(error) if (statusCode && retryOnErrors.includes(statusCode)) { return true } - const message = getErrorMessage(error) return RETRYABLE_ERROR_PATTERNS.some((pattern) => pattern.test(message)) } +const AGENT_NAMES = [ + "sisyphus", + "oracle", + "librarian", + "explore", + "prometheus", + "atlas", + "metis", + "momus", + "hephaestus", + "sisyphus-junior", + "build", + "plan", + "multimodal-looker", +] + +const agentPattern = new RegExp( + `\\b(${AGENT_NAMES + .sort((a, b) => b.length - a.length) + .map((a) => a.replace(/-/g, "\\-")) + .join("|")})\\b`, + "i", +) + +function detectAgentFromSession(sessionID: string): string | undefined { + const match = sessionID.match(agentPattern) + if (match) { + return match[1].toLowerCase() + } + return undefined +} + +function normalizeAgentName(agent: string | undefined): string | undefined { + if (!agent) return undefined + const normalized = agent.toLowerCase().trim() + if (AGENT_NAMES.includes(normalized)) { + return normalized + } + const match = normalized.match(agentPattern) + if (match) { + return match[1].toLowerCase() + } + return undefined +} + +function resolveAgentForSession(sessionID: string, eventAgent?: string): string | undefined { + return ( + normalizeAgentName(eventAgent) ?? + normalizeAgentName(getSessionAgent(sessionID)) ?? + detectAgentFromSession(sessionID) + ) +} + function getFallbackModelsForSession( sessionID: string, agent: string | undefined, @@ -115,28 +169,6 @@ function getFallbackModelsForSession( if (result) return result } - const AGENT_NAMES = [ - "sisyphus", - "oracle", - "librarian", - "explore", - "prometheus", - "atlas", - "metis", - "momus", - "hephaestus", - "sisyphus-junior", - "build", - "plan", - "multimodal-looker", - ] - const agentPattern = new RegExp( - `(?:^|[^a-zA-Z0-9_-])(${AGENT_NAMES - .sort((a, b) => b.length - a.length) - .map((a) => a.replace(/-/g, "\\-")) - .join("|")})(?:$|[^a-zA-Z0-9_-])`, - "i", - ) const sessionAgentMatch = sessionID.match(agentPattern) if (sessionAgentMatch) { const detectedAgent = sessionAgentMatch[1].toLowerCase() @@ -144,6 +176,22 @@ function getFallbackModelsForSession( if (result) return result } + // Fallback: if no agent detected, try main agent "sisyphus" then any agent with fallback_models + const sisyphusFallback = tryGetFallbackFromAgent("sisyphus") + if (sisyphusFallback) { + log(`[${HOOK_NAME}] Using sisyphus fallback models (no agent detected)`, { sessionID }) + return sisyphusFallback + } + + // Last resort: try all known agents until we find one with fallback_models + for (const agentName of AGENT_NAMES) { + const result = tryGetFallbackFromAgent(agentName) + if (result) { + log(`[${HOOK_NAME}] Using ${agentName} fallback models (no agent detected)`, { sessionID }) + return result + } + } + return [] } @@ -221,6 +269,30 @@ export function createRuntimeFallbackHook( } const sessionStates = new Map() + const sessionLastAccess = new Map() + const sessionRetryInFlight = new Set() + const SESSION_TTL_MS = 30 * 60 * 1000 // 30 minutes TTL for stale sessions + + // Periodic cleanup of stale session states to prevent memory leaks + const cleanupStaleSessions = () => { + const now = Date.now() + let cleanedCount = 0 + for (const [sessionID, lastAccess] of sessionLastAccess.entries()) { + if (now - lastAccess > SESSION_TTL_MS) { + sessionStates.delete(sessionID) + sessionLastAccess.delete(sessionID) + sessionRetryInFlight.delete(sessionID) + SessionCategoryRegistry.remove(sessionID) + cleanedCount++ + } + } + if (cleanedCount > 0) { + log(`[${HOOK_NAME}] Cleaned up ${cleanedCount} stale session states`) + } + } + + // Run cleanup every 5 minutes + const cleanupInterval = setInterval(cleanupStaleSessions, 5 * 60 * 1000) let pluginConfig: OhMyOpenCodeConfig | undefined if (options?.pluginConfig) { @@ -234,6 +306,36 @@ export function createRuntimeFallbackHook( } } + const resolveAgentForSessionFromContext = async ( + sessionID: string, + eventAgent?: string, + ): Promise => { + const resolved = resolveAgentForSession(sessionID, eventAgent) + if (resolved) return resolved + + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + const msgs = (messagesResp as { data?: Array<{ info?: Record }> }).data + if (!msgs || msgs.length === 0) return undefined + + for (let i = msgs.length - 1; i >= 0; i--) { + const info = msgs[i]?.info + const infoAgent = typeof info?.agent === "string" ? info.agent : undefined + const normalized = normalizeAgentName(infoAgent) + if (normalized) { + return normalized + } + } + } catch { + return undefined + } + + return undefined + } + const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => { if (!config.enabled) return @@ -247,6 +349,7 @@ export function createRuntimeFallbackHook( if (sessionID && model) { log(`[${HOOK_NAME}] Session created with model`, { sessionID, model }) sessionStates.set(sessionID, createFallbackState(model)) + sessionLastAccess.set(sessionID, Date.now()) } return } @@ -258,6 +361,8 @@ export function createRuntimeFallbackHook( if (sessionID) { log(`[${HOOK_NAME}] Cleaning up session state`, { sessionID }) sessionStates.delete(sessionID) + sessionLastAccess.delete(sessionID) + sessionRetryInFlight.delete(sessionID) SessionCategoryRegistry.remove(sessionID) } return @@ -273,7 +378,14 @@ export function createRuntimeFallbackHook( return } - log(`[${HOOK_NAME}] session.error received`, { sessionID, agent, statusCode: extractStatusCode(error) }) + const resolvedAgent = await resolveAgentForSessionFromContext(sessionID, agent) + + log(`[${HOOK_NAME}] session.error received`, { + sessionID, + agent, + resolvedAgent, + statusCode: extractStatusCode(error), + }) if (!isRetryableError(error, config.retry_on_errors)) { log(`[${HOOK_NAME}] Error not retryable, skipping fallback`, { sessionID }) @@ -281,7 +393,7 @@ export function createRuntimeFallbackHook( } let state = sessionStates.get(sessionID) - const fallbackModels = getFallbackModelsForSession(sessionID, agent, pluginConfig) + const fallbackModels = getFallbackModelsForSession(sessionID, resolvedAgent, pluginConfig) if (fallbackModels.length === 0) { log(`[${HOOK_NAME}] No fallback models configured`, { sessionID, agent }) @@ -293,10 +405,26 @@ export function createRuntimeFallbackHook( if (currentModel) { state = createFallbackState(currentModel) sessionStates.set(sessionID, state) + sessionLastAccess.set(sessionID, Date.now()) } else { - log(`[${HOOK_NAME}] No model info available, cannot fallback`, { sessionID }) - return + // session.error doesn't include model — derive from agent config + const detectedAgent = resolvedAgent + const agentConfig = detectedAgent + ? pluginConfig?.agents?.[detectedAgent as keyof typeof pluginConfig.agents] + : undefined + const agentModel = agentConfig?.model as string | undefined + if (agentModel) { + log(`[${HOOK_NAME}] Derived model from agent config`, { sessionID, agent: detectedAgent, model: agentModel }) + state = createFallbackState(agentModel) + sessionStates.set(sessionID, state) + sessionLastAccess.set(sessionID, Date.now()) + } else { + log(`[${HOOK_NAME}] No model info available, cannot fallback`, { sessionID }) + return + } } + } else { + sessionLastAccess.set(sessionID, Date.now()) } const result = prepareFallback(sessionID, state, fallbackModels, config) @@ -314,6 +442,68 @@ export function createRuntimeFallbackHook( .catch(() => {}) } + if (result.success && result.newModel) { + if (sessionRetryInFlight.has(sessionID)) { + log(`[${HOOK_NAME}] Retry already in flight, skipping`, { sessionID }) + } else { + const modelParts = result.newModel.split("/") + if (modelParts.length >= 2) { + const fallbackModelObj = { + providerID: modelParts[0], + modelID: modelParts.slice(1).join("/"), + } + + sessionRetryInFlight.add(sessionID) + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + const msgs = (messagesResp as { + data?: Array<{ + info?: Record + parts?: Array<{ type?: string; text?: string }> + }> + }).data + const lastUserMsg = msgs?.filter((m) => m.info?.role === "user").pop() + const lastUserPartsRaw = + lastUserMsg?.parts ?? + (lastUserMsg?.info?.parts as Array<{ type?: string; text?: string }> | undefined) + + if (lastUserPartsRaw && lastUserPartsRaw.length > 0) { + log(`[${HOOK_NAME}] Auto-retrying with fallback model`, { + sessionID, + model: result.newModel, + }) + + const retryParts = lastUserPartsRaw + .filter((p) => p.type === "text" && typeof p.text === "string" && p.text.length > 0) + .map((p) => ({ type: "text" as const, text: p.text! })) + + if (retryParts.length > 0) { + const retryAgent = resolvedAgent ?? getSessionAgent(sessionID) + await ctx.client.session.promptAsync({ + path: { id: sessionID }, + body: { + ...(retryAgent ? { agent: retryAgent } : {}), + model: fallbackModelObj, + parts: retryParts, + }, + query: { directory: ctx.directory }, + }) + } + } else { + log(`[${HOOK_NAME}] No user message found for auto-retry`, { sessionID }) + } + } catch (retryError) { + log(`[${HOOK_NAME}] Auto-retry failed`, { sessionID, error: String(retryError) }) + } finally { + sessionRetryInFlight.delete(sessionID) + } + } + } + } + if (!result.success) { log(`[${HOOK_NAME}] Fallback preparation failed`, { sessionID, error: result.error }) } @@ -337,7 +527,8 @@ export function createRuntimeFallbackHook( let state = sessionStates.get(sessionID) const agent = info?.agent as string | undefined - const fallbackModels = getFallbackModelsForSession(sessionID, agent, pluginConfig) + const resolvedAgent = await resolveAgentForSessionFromContext(sessionID, agent) + const fallbackModels = getFallbackModelsForSession(sessionID, resolvedAgent, pluginConfig) if (fallbackModels.length === 0) { return @@ -346,6 +537,9 @@ export function createRuntimeFallbackHook( if (!state) { state = createFallbackState(model) sessionStates.set(sessionID, state) + sessionLastAccess.set(sessionID, Date.now()) + } else { + sessionLastAccess.set(sessionID, Date.now()) } const result = prepareFallback(sessionID, state, fallbackModels, config) @@ -362,6 +556,66 @@ export function createRuntimeFallbackHook( }) .catch(() => {}) } + + if (result.success && result.newModel) { + if (sessionRetryInFlight.has(sessionID)) { + log(`[${HOOK_NAME}] Retry already in flight, skipping (message.updated)`, { sessionID }) + } else { + const modelParts = result.newModel.split("/") + if (modelParts.length >= 2) { + const fallbackModelObj = { + providerID: modelParts[0], + modelID: modelParts.slice(1).join("/"), + } + + sessionRetryInFlight.add(sessionID) + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + const msgs = (messagesResp as { + data?: Array<{ + info?: Record + parts?: Array<{ type?: string; text?: string }> + }> + }).data + const lastUserMsg = msgs?.filter((m) => m.info?.role === "user").pop() + const lastUserPartsRaw = + lastUserMsg?.parts ?? + (lastUserMsg?.info?.parts as Array<{ type?: string; text?: string }> | undefined) + + if (lastUserPartsRaw && lastUserPartsRaw.length > 0) { + log(`[${HOOK_NAME}] Auto-retrying with fallback model (message.updated)`, { + sessionID, + model: result.newModel, + }) + + const retryParts = lastUserPartsRaw + .filter((p) => p.type === "text" && typeof p.text === "string" && p.text.length > 0) + .map((p) => ({ type: "text" as const, text: p.text! })) + + if (retryParts.length > 0) { + const retryAgent = resolvedAgent ?? getSessionAgent(sessionID) + await ctx.client.session.promptAsync({ + path: { id: sessionID }, + body: { + ...(retryAgent ? { agent: retryAgent } : {}), + model: fallbackModelObj, + parts: retryParts, + }, + query: { directory: ctx.directory }, + }) + } + } + } catch (retryError) { + log(`[${HOOK_NAME}] Auto-retry failed (message.updated)`, { sessionID, error: String(retryError) }) + } finally { + sessionRetryInFlight.delete(sessionID) + } + } + } + } } return } @@ -374,21 +628,38 @@ export function createRuntimeFallbackHook( if (!config.enabled) return const { sessionID } = input - const state = sessionStates.get(sessionID) + let state = sessionStates.get(sessionID) - if (!state?.pendingFallbackModel) return + if (!state) return - const fallbackModel = state.pendingFallbackModel - state.pendingFallbackModel = undefined + const requestedModel = input.model + ? `${input.model.providerID}/${input.model.modelID}` + : undefined - log(`[${HOOK_NAME}] Applying fallback model for next request`, { + if (requestedModel && requestedModel !== state.currentModel) { + log(`[${HOOK_NAME}] Detected manual model change, resetting fallback state`, { + sessionID, + from: state.currentModel, + to: requestedModel, + }) + state = createFallbackState(requestedModel) + sessionStates.set(sessionID, state) + sessionLastAccess.set(sessionID, Date.now()) + return + } + + if (state.currentModel === state.originalModel) return + + const activeModel = state.currentModel + + log(`[${HOOK_NAME}] Applying fallback model override`, { sessionID, from: input.model, - to: fallbackModel, + to: activeModel, }) - if (output.message && fallbackModel) { - const parts = fallbackModel.split("/") + if (output.message && activeModel) { + const parts = activeModel.split("/") if (parts.length >= 2) { output.message.model = { providerID: parts[0],