fix: add post-max-failure recovery window for todo continuation

This commit is contained in:
YeonGyu-Kim 2026-02-16 15:27:00 +09:00
parent c2f22cd6e5
commit 2a4009e692
4 changed files with 59 additions and 3 deletions

View File

@ -19,3 +19,4 @@ export const COUNTDOWN_GRACE_PERIOD_MS = 500
export const ABORT_WINDOW_MS = 3000
export const CONTINUATION_COOLDOWN_MS = 30_000
export const MAX_CONSECUTIVE_FAILURES = 5
export const FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000

View File

@ -148,7 +148,7 @@ ${todoList}`
if (injectionState) {
injectionState.inFlight = false
injectionState.lastInjectedAt = Date.now()
injectionState.consecutiveFailures += 1
injectionState.consecutiveFailures = (injectionState.consecutiveFailures ?? 0) + 1
}
}
}

View File

@ -8,6 +8,7 @@ import {
ABORT_WINDOW_MS,
CONTINUATION_COOLDOWN_MS,
DEFAULT_SKIP_AGENTS,
FAILURE_RESET_WINDOW_MS,
HOOK_NAME,
MAX_CONSECUTIVE_FAILURES,
} from "./constants"
@ -100,6 +101,18 @@ export async function handleSessionIdle(args: {
return
}
if (
state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES
&& state.lastInjectedAt
&& Date.now() - state.lastInjectedAt >= FAILURE_RESET_WINDOW_MS
) {
state.consecutiveFailures = 0
log(`[${HOOK_NAME}] Reset consecutive failures after recovery window`, {
sessionID,
failureResetWindowMs: FAILURE_RESET_WINDOW_MS,
})
}
if (state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
log(`[${HOOK_NAME}] Skipped: max consecutive failures reached`, {
sessionID,

View File

@ -4,7 +4,11 @@ import { afterEach, beforeEach, describe, expect, test } from "bun:test"
import type { BackgroundManager } from "../../features/background-agent"
import { setMainSession, subagentSessions, _resetForTesting } from "../../features/claude-code-session-state"
import { createTodoContinuationEnforcer } from "."
import { CONTINUATION_COOLDOWN_MS, MAX_CONSECUTIVE_FAILURES } from "./constants"
import {
CONTINUATION_COOLDOWN_MS,
FAILURE_RESET_WINDOW_MS,
MAX_CONSECUTIVE_FAILURES,
} from "./constants"
type TimerCallback = (...args: any[]) => void
@ -606,7 +610,9 @@ describe("todo-continuation-enforcer", () => {
for (let index = 0; index < MAX_CONSECUTIVE_FAILURES; index++) {
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
await fakeTimers.advanceBy(2500, true)
await fakeTimers.advanceClockBy(1_000_000)
if (index < MAX_CONSECUTIVE_FAILURES - 1) {
await fakeTimers.advanceClockBy(1_000_000)
}
}
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
await fakeTimers.advanceBy(2500, true)
@ -615,6 +621,42 @@ describe("todo-continuation-enforcer", () => {
expect(promptCalls).toHaveLength(MAX_CONSECUTIVE_FAILURES)
}, { timeout: 30000 })
test("should resume retries after reset window when max failures reached", async () => {
//#given
const sessionID = "main-recovery-after-max-failures"
setMainSession(sessionID)
const mockInput = createMockPluginInput()
mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
promptCalls.push({
sessionID: opts.path.id,
agent: opts.body.agent,
model: opts.body.model,
text: opts.body.parts[0].text,
})
throw new Error("simulated auth failure")
}
const hook = createTodoContinuationEnforcer(mockInput, {})
//#when
for (let index = 0; index < MAX_CONSECUTIVE_FAILURES; index++) {
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
await fakeTimers.advanceBy(2500, true)
if (index < MAX_CONSECUTIVE_FAILURES - 1) {
await fakeTimers.advanceClockBy(1_000_000)
}
}
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
await fakeTimers.advanceBy(2500, true)
await fakeTimers.advanceClockBy(FAILURE_RESET_WINDOW_MS)
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
await fakeTimers.advanceBy(2500, true)
//#then
expect(promptCalls).toHaveLength(MAX_CONSECUTIVE_FAILURES + 1)
}, { timeout: 30000 })
test("should increase cooldown exponentially after consecutive failures", async () => {
//#given
const sessionID = "main-exponential-backoff"