fix: harden run completion checks and graceful timeout

This commit is contained in:
YeonGyu-Kim 2026-02-17 14:43:27 +09:00
parent 5f5b476f12
commit 47a8c3e4a9
5 changed files with 124 additions and 13 deletions

View File

@ -143,6 +143,25 @@ describe("checkCompletionConditions", () => {
expect(result).toBe(false)
})
it("returns false when child status is missing", async () => {
// given
spyOn(console, "log").mockImplementation(() => {})
const ctx = createMockContext({
childrenBySession: {
"test-session": [{ id: "child-1" }],
"child-1": [],
},
statuses: {},
})
const { checkCompletionConditions } = await import("./completion")
// when
const result = await checkCompletionConditions(ctx)
// then
expect(result).toBe(false)
})
it("returns true when all descendants idle (recursive)", async () => {
// given
spyOn(console, "log").mockImplementation(() => {})

View File

@ -65,7 +65,14 @@ async function areAllDescendantsIdle(
for (const child of children) {
const status = allStatuses[child.id]
if (status && status.type !== "idle") {
if (!status) {
console.log(
pc.dim(` Waiting: session ${child.id.slice(0, 8)}... status unknown`)
)
return false
}
if (status.type !== "idle") {
console.log(
pc.dim(` Waiting: session ${child.id.slice(0, 8)}... is ${status.type}`)
)

View File

@ -310,7 +310,7 @@ describe("pollForCompletion", () => {
//#then - returns 1 (not 130/timeout), error message printed
expect(result).toBe(1)
const errorCalls = (console.error as ReturnType<typeof mock>).mock.calls
expect(errorCalls.some((call) => call[0]?.includes("Session ended with error"))).toBe(true)
expect(errorCalls.some((call: unknown[]) => String(call[0] ?? "").includes("Session ended with error"))).toBe(true)
})
it("returns 1 when session errors while tool is active (error not masked by tool gate)", async () => {
@ -335,4 +335,69 @@ describe("pollForCompletion", () => {
//#then - returns 1
expect(result).toBe(1)
})
it("returns 130 after graceful timeout window expires", async () => {
//#given
spyOn(console, "log").mockImplementation(() => {})
spyOn(console, "error").mockImplementation(() => {})
const ctx = createMockContext({
statuses: {
"test-session": { type: "busy" },
},
})
const eventState = createEventState()
eventState.mainSessionIdle = false
eventState.hasReceivedMeaningfulWork = true
const abortController = new AbortController()
//#when
const result = await pollForCompletion(ctx, eventState, abortController, {
pollIntervalMs: 10,
requiredConsecutive: 1,
minStabilizationMs: 0,
timeoutMs: 30,
timeoutGraceMs: 40,
})
//#then
expect(result).toBe(130)
expect(abortController.signal.aborted).toBe(true)
})
it("allows completion during graceful timeout window", async () => {
//#given
spyOn(console, "log").mockImplementation(() => {})
spyOn(console, "error").mockImplementation(() => {})
const ctx = createMockContext()
const eventState = createEventState()
eventState.mainSessionIdle = true
eventState.hasReceivedMeaningfulWork = true
const abortController = new AbortController()
let todoCalls = 0
;(ctx.client.session as unknown as {
todo: ReturnType<typeof mock>
children: ReturnType<typeof mock>
status: ReturnType<typeof mock>
}).todo = mock(async () => {
todoCalls++
if (todoCalls === 1) {
return { data: [{ id: "1", content: "wip", status: "in_progress", priority: "high" }] }
}
return { data: [] }
})
//#when
const result = await pollForCompletion(ctx, eventState, abortController, {
pollIntervalMs: 10,
requiredConsecutive: 1,
minStabilizationMs: 0,
timeoutMs: 20,
timeoutGraceMs: 80,
})
//#then
expect(result).toBe(0)
expect(abortController.signal.aborted).toBe(false)
})
})

View File

@ -8,11 +8,14 @@ const DEFAULT_POLL_INTERVAL_MS = 500
const DEFAULT_REQUIRED_CONSECUTIVE = 3
const ERROR_GRACE_CYCLES = 3
const MIN_STABILIZATION_MS = 10_000
const DEFAULT_TIMEOUT_GRACE_MS = 15_000
export interface PollOptions {
pollIntervalMs?: number
requiredConsecutive?: number
minStabilizationMs?: number
timeoutMs?: number
timeoutGraceMs?: number
}
export async function pollForCompletion(
@ -26,14 +29,35 @@ export async function pollForCompletion(
options.requiredConsecutive ?? DEFAULT_REQUIRED_CONSECUTIVE
const minStabilizationMs =
options.minStabilizationMs ?? MIN_STABILIZATION_MS
const timeoutMs = options.timeoutMs ?? 0
const timeoutGraceMs = options.timeoutGraceMs ?? DEFAULT_TIMEOUT_GRACE_MS
let consecutiveCompleteChecks = 0
let errorCycleCount = 0
let firstWorkTimestamp: number | null = null
const pollStartTimestamp = Date.now()
let timeoutNoticePrinted = false
while (!abortController.signal.aborted) {
await new Promise((resolve) => setTimeout(resolve, pollIntervalMs))
if (abortController.signal.aborted) {
return 130
}
if (timeoutMs > 0) {
const elapsedMs = Date.now() - pollStartTimestamp
if (elapsedMs >= timeoutMs && !timeoutNoticePrinted) {
console.log(pc.yellow("\nTimeout reached. Entering graceful shutdown window..."))
timeoutNoticePrinted = true
}
if (elapsedMs >= timeoutMs + timeoutGraceMs) {
console.log(pc.yellow("Grace period expired. Aborting..."))
abortController.abort()
return 130
}
}
// ERROR CHECK FIRST — errors must not be masked by other gates
if (eventState.mainSessionError) {
errorCycleCount++
@ -91,6 +115,10 @@ export async function pollForCompletion(
const shouldExit = await checkCompletionConditions(ctx)
if (shouldExit) {
if (abortController.signal.aborted) {
return 130
}
consecutiveCompleteChecks++
if (consecutiveCompleteChecks >= requiredConsecutive) {
console.log(pc.green("\n\nAll tasks completed."))

View File

@ -48,14 +48,6 @@ export async function run(options: RunOptions): Promise<number> {
const pluginConfig = loadPluginConfig(directory, { command: "run" })
const resolvedAgent = resolveRunAgent(options, pluginConfig)
const abortController = new AbortController()
let timeoutId: ReturnType<typeof setTimeout> | null = null
if (timeout > 0) {
timeoutId = setTimeout(() => {
console.log(pc.yellow("\nTimeout reached. Aborting..."))
abortController.abort()
}, timeout)
}
try {
const { client, cleanup: serverCleanup } = await createServerConnection({
@ -65,7 +57,6 @@ export async function run(options: RunOptions): Promise<number> {
})
const cleanup = () => {
if (timeoutId) clearTimeout(timeoutId)
serverCleanup()
}
@ -108,7 +99,9 @@ export async function run(options: RunOptions): Promise<number> {
})
console.log(pc.dim("Waiting for completion...\n"))
const exitCode = await pollForCompletion(ctx, eventState, abortController)
const exitCode = await pollForCompletion(ctx, eventState, abortController, {
timeoutMs: timeout,
})
// Abort the event stream to stop the processor
abortController.abort()
@ -144,7 +137,6 @@ export async function run(options: RunOptions): Promise<number> {
throw err
}
} catch (err) {
if (timeoutId) clearTimeout(timeoutId)
if (jsonManager) jsonManager.restore()
if (err instanceof Error && err.name === "AbortError") {
return 130