Merge pull request #1553 from code-yeongyu/fix/1355-atlas-continuation-guard
fix(atlas): stop continuation retry loop on repeated prompt failures
This commit is contained in:
commit
e9a3d579b3
@ -624,6 +624,11 @@ describe("atlas hook", () => {
|
|||||||
describe("session.idle handler (boulder continuation)", () => {
|
describe("session.idle handler (boulder continuation)", () => {
|
||||||
const MAIN_SESSION_ID = "main-session-123"
|
const MAIN_SESSION_ID = "main-session-123"
|
||||||
|
|
||||||
|
async function flushMicrotasks(): Promise<void> {
|
||||||
|
await Promise.resolve()
|
||||||
|
await Promise.resolve()
|
||||||
|
}
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
mock.module("../../features/claude-code-session-state", () => ({
|
mock.module("../../features/claude-code-session-state", () => ({
|
||||||
getMainSessionID: () => MAIN_SESSION_ID,
|
getMainSessionID: () => MAIN_SESSION_ID,
|
||||||
@ -965,6 +970,135 @@ describe("atlas hook", () => {
|
|||||||
expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
|
expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should stop continuation after 2 consecutive prompt failures (issue #1355)", async () => {
|
||||||
|
//#given - boulder state with incomplete plan and prompt always fails
|
||||||
|
const planPath = join(TEST_DIR, "test-plan.md")
|
||||||
|
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||||
|
|
||||||
|
const state: BoulderState = {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: [MAIN_SESSION_ID],
|
||||||
|
plan_name: "test-plan",
|
||||||
|
}
|
||||||
|
writeBoulderState(TEST_DIR, state)
|
||||||
|
|
||||||
|
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
|
||||||
|
const mockInput = createMockPluginInput({ promptMock })
|
||||||
|
const hook = createAtlasHook(mockInput)
|
||||||
|
|
||||||
|
const originalDateNow = Date.now
|
||||||
|
let now = 0
|
||||||
|
Date.now = () => now
|
||||||
|
|
||||||
|
try {
|
||||||
|
//#when - idle fires repeatedly, past cooldown each time
|
||||||
|
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
await flushMicrotasks()
|
||||||
|
now += 6000
|
||||||
|
|
||||||
|
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
await flushMicrotasks()
|
||||||
|
now += 6000
|
||||||
|
|
||||||
|
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
await flushMicrotasks()
|
||||||
|
|
||||||
|
//#then - should attempt only twice, then disable continuation
|
||||||
|
expect(promptMock).toHaveBeenCalledTimes(2)
|
||||||
|
} finally {
|
||||||
|
Date.now = originalDateNow
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should reset prompt failure counter on success and only stop after 2 consecutive failures", async () => {
|
||||||
|
//#given - boulder state with incomplete plan
|
||||||
|
const planPath = join(TEST_DIR, "test-plan.md")
|
||||||
|
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||||
|
|
||||||
|
const state: BoulderState = {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: [MAIN_SESSION_ID],
|
||||||
|
plan_name: "test-plan",
|
||||||
|
}
|
||||||
|
writeBoulderState(TEST_DIR, state)
|
||||||
|
|
||||||
|
const promptMock = mock(() => Promise.resolve())
|
||||||
|
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||||
|
promptMock.mockImplementationOnce(() => Promise.resolve())
|
||||||
|
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||||
|
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
|
||||||
|
|
||||||
|
const mockInput = createMockPluginInput({ promptMock })
|
||||||
|
const hook = createAtlasHook(mockInput)
|
||||||
|
|
||||||
|
const originalDateNow = Date.now
|
||||||
|
let now = 0
|
||||||
|
Date.now = () => now
|
||||||
|
|
||||||
|
try {
|
||||||
|
//#when - fail, succeed (reset), then fail twice (disable), then attempt again
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
await flushMicrotasks()
|
||||||
|
now += 6000
|
||||||
|
}
|
||||||
|
|
||||||
|
//#then - 4 prompt attempts; 5th idle is skipped after 2 consecutive failures
|
||||||
|
expect(promptMock).toHaveBeenCalledTimes(4)
|
||||||
|
} finally {
|
||||||
|
Date.now = originalDateNow
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should reset continuation failure state on session.compacted event", async () => {
|
||||||
|
//#given - boulder state with incomplete plan and prompt always fails
|
||||||
|
const planPath = join(TEST_DIR, "test-plan.md")
|
||||||
|
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||||
|
|
||||||
|
const state: BoulderState = {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: [MAIN_SESSION_ID],
|
||||||
|
plan_name: "test-plan",
|
||||||
|
}
|
||||||
|
writeBoulderState(TEST_DIR, state)
|
||||||
|
|
||||||
|
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
|
||||||
|
const mockInput = createMockPluginInput({ promptMock })
|
||||||
|
const hook = createAtlasHook(mockInput)
|
||||||
|
|
||||||
|
const originalDateNow = Date.now
|
||||||
|
let now = 0
|
||||||
|
Date.now = () => now
|
||||||
|
|
||||||
|
try {
|
||||||
|
//#when - two failures disables continuation, then compaction resets it
|
||||||
|
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
await flushMicrotasks()
|
||||||
|
now += 6000
|
||||||
|
|
||||||
|
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
await flushMicrotasks()
|
||||||
|
now += 6000
|
||||||
|
|
||||||
|
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
await flushMicrotasks()
|
||||||
|
|
||||||
|
await hook.handler({ event: { type: "session.compacted", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
now += 6000
|
||||||
|
|
||||||
|
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
|
||||||
|
await flushMicrotasks()
|
||||||
|
|
||||||
|
//#then - 2 attempts + 1 after compaction (3 total)
|
||||||
|
expect(promptMock).toHaveBeenCalledTimes(3)
|
||||||
|
} finally {
|
||||||
|
Date.now = originalDateNow
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
test("should cleanup on session.deleted", async () => {
|
test("should cleanup on session.deleted", async () => {
|
||||||
// given - boulder state
|
// given - boulder state
|
||||||
const planPath = join(TEST_DIR, "test-plan.md")
|
const planPath = join(TEST_DIR, "test-plan.md")
|
||||||
|
|||||||
@ -391,6 +391,7 @@ interface ToolExecuteAfterOutput {
|
|||||||
interface SessionState {
|
interface SessionState {
|
||||||
lastEventWasAbortError?: boolean
|
lastEventWasAbortError?: boolean
|
||||||
lastContinuationInjectedAt?: number
|
lastContinuationInjectedAt?: number
|
||||||
|
promptFailureCount: number
|
||||||
}
|
}
|
||||||
|
|
||||||
const CONTINUATION_COOLDOWN_MS = 5000
|
const CONTINUATION_COOLDOWN_MS = 5000
|
||||||
@ -432,13 +433,14 @@ export function createAtlasHook(
|
|||||||
function getState(sessionID: string): SessionState {
|
function getState(sessionID: string): SessionState {
|
||||||
let state = sessions.get(sessionID)
|
let state = sessions.get(sessionID)
|
||||||
if (!state) {
|
if (!state) {
|
||||||
state = {}
|
state = { promptFailureCount: 0 }
|
||||||
sessions.set(sessionID, state)
|
sessions.set(sessionID, state)
|
||||||
}
|
}
|
||||||
return state
|
return state
|
||||||
}
|
}
|
||||||
|
|
||||||
async function injectContinuation(sessionID: string, planName: string, remaining: number, total: number, agent?: string): Promise<void> {
|
async function injectContinuation(sessionID: string, planName: string, remaining: number, total: number, agent?: string): Promise<void> {
|
||||||
|
const state = getState(sessionID)
|
||||||
const hasRunningBgTasks = backgroundManager
|
const hasRunningBgTasks = backgroundManager
|
||||||
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
|
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
|
||||||
: false
|
: false
|
||||||
@ -481,21 +483,28 @@ export function createAtlasHook(
|
|||||||
: undefined
|
: undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
await ctx.client.session.prompt({
|
await ctx.client.session.prompt({
|
||||||
path: { id: sessionID },
|
path: { id: sessionID },
|
||||||
body: {
|
body: {
|
||||||
agent: agent ?? "atlas",
|
agent: agent ?? "atlas",
|
||||||
...(model !== undefined ? { model } : {}),
|
...(model !== undefined ? { model } : {}),
|
||||||
parts: [{ type: "text", text: prompt }],
|
parts: [{ type: "text", text: prompt }],
|
||||||
},
|
},
|
||||||
query: { directory: ctx.directory },
|
query: { directory: ctx.directory },
|
||||||
})
|
})
|
||||||
|
|
||||||
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
|
state.promptFailureCount = 0
|
||||||
} catch (err) {
|
|
||||||
log(`[${HOOK_NAME}] Boulder continuation failed`, { sessionID, error: String(err) })
|
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
|
||||||
}
|
} catch (err) {
|
||||||
}
|
state.promptFailureCount += 1
|
||||||
|
log(`[${HOOK_NAME}] Boulder continuation failed`, {
|
||||||
|
sessionID,
|
||||||
|
error: String(err),
|
||||||
|
promptFailureCount: state.promptFailureCount,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
handler: async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
|
handler: async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
|
||||||
@ -541,6 +550,14 @@ export function createAtlasHook(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (state.promptFailureCount >= 2) {
|
||||||
|
log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, {
|
||||||
|
sessionID,
|
||||||
|
promptFailureCount: state.promptFailureCount,
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
const hasRunningBgTasks = backgroundManager
|
const hasRunningBgTasks = backgroundManager
|
||||||
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
|
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
|
||||||
: false
|
: false
|
||||||
@ -631,6 +648,17 @@ export function createAtlasHook(
|
|||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (event.type === "session.compacted") {
|
||||||
|
const sessionID = (props?.sessionID ?? (props?.info as { id?: string } | undefined)?.id) as
|
||||||
|
| string
|
||||||
|
| undefined
|
||||||
|
if (sessionID) {
|
||||||
|
sessions.delete(sessionID)
|
||||||
|
log(`[${HOOK_NAME}] Session compacted: cleaned up`, { sessionID })
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"tool.execute.before": async (
|
"tool.execute.before": async (
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user