diff --git a/src/config/index.ts b/src/config/index.ts index 5f881831..213c78d5 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -11,6 +11,8 @@ export { RalphLoopConfigSchema, TmuxConfigSchema, TmuxLayoutSchema, + RuntimeFallbackConfigSchema, + FallbackModelsSchema, } from "./schema" export type { @@ -29,4 +31,5 @@ export type { TmuxLayout, SisyphusConfig, SisyphusTasksConfig, + RuntimeFallbackConfig, } from "./schema" diff --git a/src/config/schema.ts b/src/config/schema.ts index e4c55c6f..0d2c590b 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -9,11 +9,13 @@ export * from "./schema/comment-checker" export * from "./schema/commands" export * from "./schema/dynamic-context-pruning" export * from "./schema/experimental" +export * from "./schema/fallback-models" export * from "./schema/git-master" export * from "./schema/hooks" export * from "./schema/notification" export * from "./schema/oh-my-opencode-config" export * from "./schema/ralph-loop" +export * from "./schema/runtime-fallback" export * from "./schema/skills" export * from "./schema/sisyphus" export * from "./schema/sisyphus-agent" diff --git a/src/config/schema/agent-overrides.ts b/src/config/schema/agent-overrides.ts index 59bb360e..0b142cb3 100644 --- a/src/config/schema/agent-overrides.ts +++ b/src/config/schema/agent-overrides.ts @@ -1,9 +1,11 @@ import { z } from "zod" +import { FallbackModelsSchema } from "./fallback-models" import { AgentPermissionSchema } from "./internal/permission" export const AgentOverrideConfigSchema = z.object({ /** @deprecated Use `category` instead. Model is inherited from category defaults. */ model: z.string().optional(), + fallback_models: FallbackModelsSchema.optional(), variant: z.string().optional(), /** Category name to inherit model and other settings from CategoryConfig */ category: z.string().optional(), diff --git a/src/config/schema/categories.ts b/src/config/schema/categories.ts index 980b3728..b1200593 100644 --- a/src/config/schema/categories.ts +++ b/src/config/schema/categories.ts @@ -1,9 +1,11 @@ import { z } from "zod" +import { FallbackModelsSchema } from "./fallback-models" export const CategoryConfigSchema = z.object({ /** Human-readable description of the category's purpose. Shown in task prompt. */ description: z.string().optional(), model: z.string().optional(), + fallback_models: FallbackModelsSchema.optional(), variant: z.string().optional(), temperature: z.number().min(0).max(2).optional(), top_p: z.number().min(0).max(1).optional(), diff --git a/src/config/schema/fallback-models.ts b/src/config/schema/fallback-models.ts new file mode 100644 index 00000000..f9c28f43 --- /dev/null +++ b/src/config/schema/fallback-models.ts @@ -0,0 +1,5 @@ +import { z } from "zod" + +export const FallbackModelsSchema = z.union([z.string(), z.array(z.string())]) + +export type FallbackModels = z.infer diff --git a/src/config/schema/hooks.ts b/src/config/schema/hooks.ts index d6574df9..ae9577c2 100644 --- a/src/config/schema/hooks.ts +++ b/src/config/schema/hooks.ts @@ -46,6 +46,7 @@ export const HookNameSchema = z.enum([ "task-resume-info", "stop-continuation-guard", "tasks-todowrite-disabler", + "runtime-fallback", "write-existing-file-guard", "anthropic-effort", "hashline-read-enhancer", diff --git a/src/config/schema/oh-my-opencode-config.ts b/src/config/schema/oh-my-opencode-config.ts index dbeedc37..d2179b15 100644 --- a/src/config/schema/oh-my-opencode-config.ts +++ b/src/config/schema/oh-my-opencode-config.ts @@ -14,6 +14,7 @@ import { GitMasterConfigSchema } from "./git-master" import { HookNameSchema } from "./hooks" import { NotificationConfigSchema } from "./notification" import { RalphLoopConfigSchema } from "./ralph-loop" +import { RuntimeFallbackConfigSchema } from "./runtime-fallback" import { SkillsConfigSchema } from "./skills" import { SisyphusConfigSchema } from "./sisyphus" import { SisyphusAgentConfigSchema } from "./sisyphus-agent" @@ -52,6 +53,7 @@ export const OhMyOpenCodeConfigSchema = z.object({ websearch: WebsearchConfigSchema.optional(), tmux: TmuxConfigSchema.optional(), sisyphus: SisyphusConfigSchema.optional(), + runtime_fallback: RuntimeFallbackConfigSchema.optional(), /** Migration history to prevent re-applying migrations (e.g., model version upgrades) */ _migrations: z.array(z.string()).optional(), }) diff --git a/src/config/schema/runtime-fallback.ts b/src/config/schema/runtime-fallback.ts new file mode 100644 index 00000000..7566c0fb --- /dev/null +++ b/src/config/schema/runtime-fallback.ts @@ -0,0 +1,11 @@ +import { z } from "zod" + +export const RuntimeFallbackConfigSchema = z.object({ + enabled: z.boolean().default(true), + retry_on_errors: z.array(z.number()).default([429, 503, 529]), + max_fallback_attempts: z.number().min(1).max(10).default(3), + cooldown_seconds: z.number().min(0).default(60), + notify_on_fallback: z.boolean().default(true), +}) + +export type RuntimeFallbackConfig = z.infer diff --git a/src/hooks/index.ts b/src/hooks/index.ts index 72845f67..7d7524e4 100644 --- a/src/hooks/index.ts +++ b/src/hooks/index.ts @@ -45,7 +45,7 @@ export { createCompactionTodoPreserverHook } from "./compaction-todo-preserver"; export { createUnstableAgentBabysitterHook } from "./unstable-agent-babysitter"; export { createPreemptiveCompactionHook } from "./preemptive-compaction"; export { createTasksTodowriteDisablerHook } from "./tasks-todowrite-disabler"; +export { createRuntimeFallbackHook, type RuntimeFallbackHook, type RuntimeFallbackOptions } from "./runtime-fallback"; export { createWriteExistingFileGuardHook } from "./write-existing-file-guard"; export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer"; export { createHashlineEditDiffEnhancerHook } from "./hashline-edit-diff-enhancer"; - diff --git a/src/hooks/runtime-fallback/constants.ts b/src/hooks/runtime-fallback/constants.ts new file mode 100644 index 00000000..a321a57e --- /dev/null +++ b/src/hooks/runtime-fallback/constants.ts @@ -0,0 +1,40 @@ +/** + * Runtime Fallback Hook - Constants + * + * Default values and configuration constants for the runtime fallback feature. + */ + +import type { RuntimeFallbackConfig } from "../../config" + +/** + * Default configuration values for runtime fallback + */ +export const DEFAULT_CONFIG: Required = { + enabled: true, + retry_on_errors: [429, 503, 529], + max_fallback_attempts: 3, + cooldown_seconds: 60, + notify_on_fallback: true, +} + +/** + * Error patterns that indicate rate limiting or temporary failures + * These are checked in addition to HTTP status codes + */ +export const RETRYABLE_ERROR_PATTERNS = [ + /rate.?limit/i, + /too.?many.?requests/i, + /quota.?exceeded/i, + /service.?unavailable/i, + /overloaded/i, + /temporarily.?unavailable/i, + /try.?again/i, + /429/, + /503/, + /529/, +] + +/** + * Hook name for identification and logging + */ +export const HOOK_NAME = "runtime-fallback" diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts new file mode 100644 index 00000000..02d05d6f --- /dev/null +++ b/src/hooks/runtime-fallback/index.test.ts @@ -0,0 +1,449 @@ +import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test" +import { createRuntimeFallbackHook, type RuntimeFallbackHook } from "./index" +import type { RuntimeFallbackConfig } from "../../config" +import * as sharedModule from "../../shared" + +describe("runtime-fallback", () => { + let logCalls: Array<{ msg: string; data?: unknown }> + let logSpy: ReturnType + let toastCalls: Array<{ title: string; message: string; variant: string }> + + beforeEach(() => { + logCalls = [] + toastCalls = [] + logSpy = spyOn(sharedModule, "log").mockImplementation((msg: string, data?: unknown) => { + logCalls.push({ msg, data }) + }) + }) + + afterEach(() => { + logSpy?.mockRestore() + }) + + function createMockPluginInput() { + return { + client: { + tui: { + showToast: async (opts: { body: { title: string; message: string; variant: string; duration: number } }) => { + toastCalls.push({ + title: opts.body.title, + message: opts.body.message, + variant: opts.body.variant, + }) + }, + }, + }, + directory: "/test/dir", + } as any + } + + function createMockConfig(overrides?: Partial): RuntimeFallbackConfig { + return { + enabled: true, + retry_on_errors: [429, 503, 529], + max_fallback_attempts: 3, + cooldown_seconds: 60, + notify_on_fallback: true, + ...overrides, + } + } + + describe("session.error handling", () => { + test("should detect retryable error with status code 429", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-123" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429, message: "Rate limit exceeded" } }, + }, + }) + + const fallbackLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ sessionID, statusCode: 429 }) + }) + + test("should detect retryable error with status code 503", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-503" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "openai/gpt-5.2" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 503, message: "Service unavailable" } }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + + test("should detect retryable error with status code 529", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-529" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "google/gemini-3-pro" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 529, message: "Overloaded" } }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + + test("should skip non-retryable errors", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-400" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 400, message: "Bad request" } }, + }, + }) + + const skipLog = logCalls.find((c) => c.msg.includes("Error not retryable")) + expect(skipLog).toBeDefined() + }) + + test("should detect retryable error from message pattern 'rate limit'", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-pattern" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { message: "You have hit the rate limit" } }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + + test("should log when no fallback models configured", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-no-fallbacks" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429, message: "Rate limit" } }, + }, + }) + + const noFallbackLog = logCalls.find((c) => c.msg.includes("No fallback models configured")) + expect(noFallbackLog).toBeDefined() + }) + }) + + describe("disabled hook", () => { + test("should not process events when disabled", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ enabled: false }), + }) + const sessionID = "test-session-disabled" + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 429 } }, + }, + }) + + const sessionErrorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(sessionErrorLog).toBeUndefined() + }) + }) + + describe("session lifecycle", () => { + test("should create state on session.created", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-create" + const model = "anthropic/claude-opus-4-5" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model } }, + }, + }) + + const createLog = logCalls.find((c) => c.msg.includes("Session created with model")) + expect(createLog).toBeDefined() + expect(createLog?.data).toMatchObject({ sessionID, model }) + }) + + test("should cleanup state on session.deleted", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-delete" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-5" } }, + }, + }) + + await hook.event({ + event: { + type: "session.deleted", + properties: { info: { id: sessionID } }, + }, + }) + + const deleteLog = logCalls.find((c) => c.msg.includes("Cleaning up session state")) + expect(deleteLog).toBeDefined() + expect(deleteLog?.data).toMatchObject({ sessionID }) + }) + + test("should handle session.error without prior session.created", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-session-no-create" + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { statusCode: 429 }, + model: "anthropic/claude-opus-4-5", + }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + }) + + describe("error code extraction", () => { + test("should extract status code from error object", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-extract-status" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { statusCode: 429, message: "Rate limit" }, + }, + }, + }) + + const statusLog = logCalls.find((c) => c.data && typeof c.data === "object" && "statusCode" in c.data) + expect(statusLog?.data).toMatchObject({ statusCode: 429 }) + }) + + test("should extract status code from nested error.data", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-nested-status" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { + sessionID, + error: { data: { statusCode: 503, message: "Service unavailable" } }, + }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + }) + + describe("custom error codes", () => { + test("should support custom retry_on_errors configuration", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ retry_on_errors: [500, 502] }), + }) + const sessionID = "test-session-custom" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: { statusCode: 500 } }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + }) + + describe("message.updated handling", () => { + test("should handle assistant message errors", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-message-updated" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + error: { statusCode: 429, message: "Rate limit" }, + model: "anthropic/claude-opus-4-5", + }, + }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("message.updated with assistant error")) + expect(errorLog).toBeDefined() + }) + + test("should skip non-assistant message errors", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-message-user" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "user", + error: { statusCode: 429 }, + model: "anthropic/claude-opus-4-5", + }, + }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("message.updated with assistant error")) + expect(errorLog).toBeUndefined() + }) + }) + + describe("edge cases", () => { + test("should handle session.error without sessionID", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + + await hook.event({ + event: { + type: "session.error", + properties: { error: { statusCode: 429 } }, + }, + }) + + const skipLog = logCalls.find((c) => c.msg.includes("session.error without sessionID")) + expect(skipLog).toBeDefined() + }) + + test("should handle error as string", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-error-string" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: "rate limit exceeded" }, + }, + }) + + const errorLog = logCalls.find((c) => c.msg.includes("session.error received")) + expect(errorLog).toBeDefined() + }) + + test("should handle null error", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig() }) + const sessionID = "test-error-null" + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "test-model" } }, + }, + }) + + await hook.event({ + event: { + type: "session.error", + properties: { sessionID, error: null }, + }, + }) + + const skipLog = logCalls.find((c) => c.msg.includes("Error not retryable")) + expect(skipLog).toBeDefined() + }) + }) +}) diff --git a/src/hooks/runtime-fallback/index.ts b/src/hooks/runtime-fallback/index.ts new file mode 100644 index 00000000..91c48f6d --- /dev/null +++ b/src/hooks/runtime-fallback/index.ts @@ -0,0 +1,361 @@ +import type { PluginInput } from "@opencode-ai/plugin" +import type { RuntimeFallbackConfig, OhMyOpenCodeConfig } from "../../config" +import type { FallbackState, FallbackResult, RuntimeFallbackHook } from "./types" +import { DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS, HOOK_NAME } from "./constants" +import { log } from "../../shared/logger" + +function createFallbackState(originalModel: string): FallbackState { + return { + originalModel, + currentModel: originalModel, + fallbackIndex: -1, + lastFallbackTime: 0, + failedModels: new Set(), + attemptCount: 0, + pendingFallbackModel: undefined, + } +} + +function getErrorMessage(error: unknown): string { + if (!error) return "" + if (typeof error === "string") return error.toLowerCase() + + const errorObj = error as Record + const paths = [ + errorObj.data, + errorObj.error, + errorObj, + (errorObj.data as Record)?.error, + ] + + for (const obj of paths) { + if (obj && typeof obj === "object") { + const msg = (obj as Record).message + if (typeof msg === "string" && msg.length > 0) { + return msg.toLowerCase() + } + } + } + + try { + return JSON.stringify(error).toLowerCase() + } catch { + return "" + } +} + +function extractStatusCode(error: unknown): number | undefined { + if (!error) return undefined + + const errorObj = error as Record + + const statusCode = errorObj.statusCode ?? errorObj.status ?? (errorObj.data as Record)?.statusCode + if (typeof statusCode === "number") { + return statusCode + } + + const message = getErrorMessage(error) + const statusMatch = message.match(/\b(429|503|529)\b/) + if (statusMatch) { + return parseInt(statusMatch[1], 10) + } + + return undefined +} + +function isRetryableError(error: unknown, retryOnErrors: number[]): boolean { + const statusCode = extractStatusCode(error) + + if (statusCode && retryOnErrors.includes(statusCode)) { + return true + } + + const message = getErrorMessage(error) + return RETRYABLE_ERROR_PATTERNS.some((pattern) => pattern.test(message)) +} + +function normalizeFallbackModels(models: string | string[] | undefined): string[] { + if (!models) return [] + const list = Array.isArray(models) ? models : [models] + return list.filter((m): m is string => typeof m === "string" && m.length > 0) +} + +function getFallbackModelsForSession( + sessionID: string, + agent: string | undefined, + pluginConfig: OhMyOpenCodeConfig | undefined +): string[] { + if (!pluginConfig) return [] + + if (agent && pluginConfig.agents?.[agent as keyof typeof pluginConfig.agents]) { + const agentConfig = pluginConfig.agents[agent as keyof typeof pluginConfig.agents] + if (agentConfig?.fallback_models) { + return normalizeFallbackModels(agentConfig.fallback_models) + } + } + + const sessionAgentMatch = sessionID.match(/\b(sisyphus|oracle|librarian|explore|prometheus|atlas|metis|momus)\b/i) + if (sessionAgentMatch) { + const detectedAgent = sessionAgentMatch[1].toLowerCase() + if (pluginConfig.agents?.[detectedAgent as keyof typeof pluginConfig.agents]) { + const agentConfig = pluginConfig.agents[detectedAgent as keyof typeof pluginConfig.agents] + if (agentConfig?.fallback_models) { + return normalizeFallbackModels(agentConfig.fallback_models) + } + } + } + + return [] +} + +function isModelInCooldown(model: string, state: FallbackState, cooldownSeconds: number): boolean { + if (!state.failedModels.has(model)) return false + + const cooldownMs = cooldownSeconds * 1000 + const timeSinceLastFallback = Date.now() - state.lastFallbackTime + + return timeSinceLastFallback < cooldownMs +} + +function findNextAvailableFallback( + state: FallbackState, + fallbackModels: string[], + cooldownSeconds: number +): string | undefined { + for (let i = state.fallbackIndex + 1; i < fallbackModels.length; i++) { + const candidate = fallbackModels[i] + if (!isModelInCooldown(candidate, state, cooldownSeconds)) { + return candidate + } + log(`[${HOOK_NAME}] Skipping fallback model in cooldown`, { model: candidate, index: i }) + } + return undefined +} + +function prepareFallback( + sessionID: string, + state: FallbackState, + fallbackModels: string[], + config: Required +): FallbackResult { + if (state.attemptCount >= config.max_fallback_attempts) { + log(`[${HOOK_NAME}] Max fallback attempts reached`, { sessionID, attempts: state.attemptCount }) + return { success: false, error: "Max fallback attempts reached", maxAttemptsReached: true } + } + + const nextModel = findNextAvailableFallback(state, fallbackModels, config.cooldown_seconds) + + if (!nextModel) { + log(`[${HOOK_NAME}] No available fallback models`, { sessionID }) + return { success: false, error: "No available fallback models (all in cooldown or exhausted)" } + } + + log(`[${HOOK_NAME}] Preparing fallback`, { + sessionID, + from: state.currentModel, + to: nextModel, + attempt: state.attemptCount + 1, + }) + + state.fallbackIndex = fallbackModels.indexOf(nextModel) + state.failedModels.add(state.currentModel) + state.lastFallbackTime = Date.now() + state.attemptCount++ + state.currentModel = nextModel + state.pendingFallbackModel = nextModel + + return { success: true, newModel: nextModel } +} + +export type { RuntimeFallbackHook, RuntimeFallbackOptions } from "./types" + +export function createRuntimeFallbackHook( + ctx: PluginInput, + options?: { config?: RuntimeFallbackConfig } +): RuntimeFallbackHook { + const config: Required = { + enabled: options?.config?.enabled ?? DEFAULT_CONFIG.enabled, + retry_on_errors: options?.config?.retry_on_errors ?? DEFAULT_CONFIG.retry_on_errors, + max_fallback_attempts: options?.config?.max_fallback_attempts ?? DEFAULT_CONFIG.max_fallback_attempts, + cooldown_seconds: options?.config?.cooldown_seconds ?? DEFAULT_CONFIG.cooldown_seconds, + notify_on_fallback: options?.config?.notify_on_fallback ?? DEFAULT_CONFIG.notify_on_fallback, + } + + const sessionStates = new Map() + + let pluginConfig: OhMyOpenCodeConfig | undefined + try { + const { loadPluginConfig } = require("../../plugin-config") + pluginConfig = loadPluginConfig(ctx.directory, ctx) + } catch { + log(`[${HOOK_NAME}] Plugin config not available`) + } + + const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => { + if (!config.enabled) return + + const props = event.properties as Record | undefined + + if (event.type === "session.created") { + const sessionInfo = props?.info as { id?: string; model?: string } | undefined + const sessionID = sessionInfo?.id + const model = sessionInfo?.model + + if (sessionID && model) { + log(`[${HOOK_NAME}] Session created with model`, { sessionID, model }) + sessionStates.set(sessionID, createFallbackState(model)) + } + return + } + + if (event.type === "session.deleted") { + const sessionInfo = props?.info as { id?: string } | undefined + const sessionID = sessionInfo?.id + + if (sessionID) { + log(`[${HOOK_NAME}] Cleaning up session state`, { sessionID }) + sessionStates.delete(sessionID) + } + return + } + + if (event.type === "session.error") { + const sessionID = props?.sessionID as string | undefined + const error = props?.error + const agent = props?.agent as string | undefined + + if (!sessionID) { + log(`[${HOOK_NAME}] session.error without sessionID, skipping`) + return + } + + log(`[${HOOK_NAME}] session.error received`, { sessionID, agent, statusCode: extractStatusCode(error) }) + + if (!isRetryableError(error, config.retry_on_errors)) { + log(`[${HOOK_NAME}] Error not retryable, skipping fallback`, { sessionID }) + return + } + + let state = sessionStates.get(sessionID) + const fallbackModels = getFallbackModelsForSession(sessionID, agent, pluginConfig) + + if (fallbackModels.length === 0) { + log(`[${HOOK_NAME}] No fallback models configured`, { sessionID, agent }) + return + } + + if (!state) { + const currentModel = props?.model as string | undefined + if (currentModel) { + state = createFallbackState(currentModel) + sessionStates.set(sessionID, state) + } else { + log(`[${HOOK_NAME}] No model info available, cannot fallback`, { sessionID }) + return + } + } + + const result = prepareFallback(sessionID, state, fallbackModels, config) + + if (result.success && config.notify_on_fallback) { + await ctx.client.tui + .showToast({ + body: { + title: "Model Fallback", + message: `Switching to ${result.newModel?.split("/").pop() || result.newModel} for next request`, + variant: "warning", + duration: 5000, + }, + }) + .catch(() => {}) + } + + if (!result.success) { + log(`[${HOOK_NAME}] Fallback preparation failed`, { sessionID, error: result.error }) + } + + return + } + + if (event.type === "message.updated") { + const info = props?.info as Record | undefined + const sessionID = info?.sessionID as string | undefined + const error = info?.error + const role = info?.role as string | undefined + const model = info?.model as string | undefined + + if (sessionID && role === "assistant" && error && model) { + log(`[${HOOK_NAME}] message.updated with assistant error`, { sessionID, model }) + + if (!isRetryableError(error, config.retry_on_errors)) { + return + } + + let state = sessionStates.get(sessionID) + const agent = info?.agent as string | undefined + const fallbackModels = getFallbackModelsForSession(sessionID, agent, pluginConfig) + + if (fallbackModels.length === 0) { + return + } + + if (!state) { + state = createFallbackState(model) + sessionStates.set(sessionID, state) + } + + const result = prepareFallback(sessionID, state, fallbackModels, config) + + if (result.success && config.notify_on_fallback) { + await ctx.client.tui + .showToast({ + body: { + title: "Model Fallback", + message: `Switching to ${result.newModel?.split("/").pop() || result.newModel} for next request`, + variant: "warning", + duration: 5000, + }, + }) + .catch(() => {}) + } + } + return + } + } + + const chatMessageHandler = async ( + input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } }, + output: { message: { model?: { providerID: string; modelID: string } }; parts?: Array<{ type: string; text?: string }> } + ) => { + if (!config.enabled) return + + const { sessionID } = input + const state = sessionStates.get(sessionID) + + if (!state?.pendingFallbackModel) return + + const fallbackModel = state.pendingFallbackModel + state.pendingFallbackModel = undefined + + log(`[${HOOK_NAME}] Applying fallback model for next request`, { + sessionID, + from: input.model, + to: fallbackModel, + }) + + if (output.message && fallbackModel) { + const parts = fallbackModel.split("/") + if (parts.length >= 2) { + output.message.model = { + providerID: parts[0], + modelID: parts.slice(1).join("/"), + } + } + } + } + + return { + event: eventHandler, + "chat.message": chatMessageHandler, + } as RuntimeFallbackHook +} diff --git a/src/hooks/runtime-fallback/types.ts b/src/hooks/runtime-fallback/types.ts new file mode 100644 index 00000000..42183301 --- /dev/null +++ b/src/hooks/runtime-fallback/types.ts @@ -0,0 +1,65 @@ +/** + * Runtime Fallback Hook - Type Definitions + * + * Types for managing runtime model fallback when API errors occur. + */ + +import type { RuntimeFallbackConfig } from "../../config" + +/** + * Tracks the state of fallback attempts for a session + */ +export interface FallbackState { + originalModel: string + currentModel: string + fallbackIndex: number + lastFallbackTime: number + failedModels: Set + attemptCount: number + pendingFallbackModel?: string +} + +/** + * Error information extracted from session.error event + */ +export interface SessionErrorInfo { + /** Session ID that encountered the error */ + sessionID: string + /** The error object */ + error: unknown + /** Error message (extracted) */ + message: string + /** HTTP status code if available */ + statusCode?: number + /** Current model when error occurred */ + currentModel?: string + /** Agent name if available */ + agent?: string +} + +/** + * Result of a fallback attempt + */ +export interface FallbackResult { + /** Whether the fallback was successful */ + success: boolean + /** The model switched to (if successful) */ + newModel?: string + /** Error message (if failed) */ + error?: string + /** Whether max attempts were reached */ + maxAttemptsReached?: boolean +} + +/** + * Options for creating the runtime fallback hook + */ +export interface RuntimeFallbackOptions { + /** Runtime fallback configuration */ + config?: RuntimeFallbackConfig +} + +export interface RuntimeFallbackHook { + event: (input: { event: { type: string; properties?: unknown } }) => Promise + "chat.message"?: (input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } }, output: { message: { model?: { providerID: string; modelID: string } }; parts?: Array<{ type: string; text?: string }> }) => Promise +} diff --git a/src/plugin/chat-message.ts b/src/plugin/chat-message.ts index f035c99a..84b34287 100644 --- a/src/plugin/chat-message.ts +++ b/src/plugin/chat-message.ts @@ -54,6 +54,7 @@ export function createChatMessageHandler(args: { } await hooks.stopContinuationGuard?.["chat.message"]?.(input) + await hooks.runtimeFallback?.["chat.message"]?.(input, output) await hooks.keywordDetector?.["chat.message"]?.(input, output) await hooks.claudeCodeHooks?.["chat.message"]?.(input, output) await hooks.autoSlashCommand?.["chat.message"]?.(input, output) diff --git a/src/plugin/event.ts b/src/plugin/event.ts index ce5f5af3..56baa987 100644 --- a/src/plugin/event.ts +++ b/src/plugin/event.ts @@ -42,6 +42,7 @@ export function createEventHandler(args: { await Promise.resolve(hooks.rulesInjector?.event?.(input)) await Promise.resolve(hooks.thinkMode?.event?.(input)) await Promise.resolve(hooks.anthropicContextWindowLimitRecovery?.event?.(input)) + await Promise.resolve(hooks.runtimeFallback?.event?.(input)) await Promise.resolve(hooks.agentUsageReminder?.event?.(input)) await Promise.resolve(hooks.categorySkillReminder?.event?.(input)) await Promise.resolve(hooks.interactiveBashSession?.event?.(input)) diff --git a/src/plugin/hooks/create-session-hooks.ts b/src/plugin/hooks/create-session-hooks.ts index e2596011..95356534 100644 --- a/src/plugin/hooks/create-session-hooks.ts +++ b/src/plugin/hooks/create-session-hooks.ts @@ -24,6 +24,7 @@ import { createNoHephaestusNonGptHook, createQuestionLabelTruncatorHook, createPreemptiveCompactionHook, + createRuntimeFallbackHook, } from "../../hooks" import { createAnthropicEffortHook } from "../../hooks/anthropic-effort" import { @@ -57,6 +58,7 @@ export type SessionHooks = { questionLabelTruncator: ReturnType taskResumeInfo: ReturnType anthropicEffort: ReturnType | null + runtimeFallback: ReturnType | null } export function createSessionHooks(args: { @@ -175,6 +177,11 @@ export function createSessionHooks(args: { ? safeHook("anthropic-effort", () => createAnthropicEffortHook()) : null + const runtimeFallback = isHookEnabled("runtime-fallback") + ? safeHook("runtime-fallback", () => + createRuntimeFallbackHook(ctx, { config: pluginConfig.runtime_fallback })) + : null + return { contextWindowMonitor, preemptiveCompaction, @@ -198,5 +205,6 @@ export function createSessionHooks(args: { questionLabelTruncator, taskResumeInfo, anthropicEffort, + runtimeFallback, } }