feat(hooks): add resume session_id to verification reminders for orchestrator subagent work

When subagent work fails verification, show exact sisyphus_task(resume="...")
command with session_id for immediate retry. Consolidates verification workflow
across boulder and standalone modes.
This commit is contained in:
YeonGyu-Kim 2026-01-06 14:01:58 +09:00
parent 9eafe6b5f9
commit 3b89a35a13
2 changed files with 172 additions and 47 deletions

View File

@ -77,7 +77,7 @@ describe("sisyphus-orchestrator hook", () => {
// #when // #when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "other_tool", sessionID: "session-123", agent: "orchestrator-sisyphus" }, { tool: "other_tool", sessionID: "session-123" },
output output
) )
@ -140,8 +140,8 @@ describe("sisyphus-orchestrator hook", () => {
// #then - standalone verification reminder appended // #then - standalone verification reminder appended
expect(output.output).toContain("Task completed successfully") expect(output.output).toContain("Task completed successfully")
expect(output.output).toContain("VERIFICATION REQUIRED") expect(output.output).toContain("MANDATORY VERIFICATION")
expect(output.output).toContain("SUBAGENTS LIE") expect(output.output).toContain("sisyphus_task(resume=")
cleanupMessageStorage(sessionID) cleanupMessageStorage(sessionID)
}) })
@ -180,6 +180,7 @@ describe("sisyphus-orchestrator hook", () => {
expect(output.output).toContain("SUBAGENT WORK COMPLETED") expect(output.output).toContain("SUBAGENT WORK COMPLETED")
expect(output.output).toContain("test-plan") expect(output.output).toContain("test-plan")
expect(output.output).toContain("SUBAGENTS LIE") expect(output.output).toContain("SUBAGENTS LIE")
expect(output.output).toContain("sisyphus_task(resume=")
cleanupMessageStorage(sessionID) cleanupMessageStorage(sessionID)
}) })
@ -323,9 +324,8 @@ describe("sisyphus-orchestrator hook", () => {
output output
) )
// #then - output should contain boulder.json path and notepad path format // #then - output should contain plan name and progress
expect(output.output).toContain(".sisyphus/boulder.json") expect(output.output).toContain("my-feature")
expect(output.output).toContain(".sisyphus/notepads/my-feature/{category}.md")
expect(output.output).toContain("1/3 done") expect(output.output).toContain("1/3 done")
expect(output.output).toContain("2 left") expect(output.output).toContain("2 left")
@ -361,14 +361,152 @@ describe("sisyphus-orchestrator hook", () => {
output output
) )
// #then - should include resume and checkbox instructions // #then - should include resume instructions and verification
expect(output.output).toContain("sisyphus_task(resume=") expect(output.output).toContain("sisyphus_task(resume=")
expect(output.output).toContain("- [ ]") expect(output.output).toContain("[x]")
expect(output.output).toContain("- [x]") expect(output.output).toContain("MANDATORY VERIFICATION")
expect(output.output).toContain("VERIFY")
cleanupMessageStorage(sessionID) cleanupMessageStorage(sessionID)
}) })
describe("Write/Edit tool direct work reminder", () => {
const ORCHESTRATOR_SESSION = "orchestrator-write-test"
beforeEach(() => {
setupMessageStorage(ORCHESTRATOR_SESSION, "orchestrator-sisyphus")
})
afterEach(() => {
cleanupMessageStorage(ORCHESTRATOR_SESSION)
})
test("should append delegation reminder when orchestrator writes outside .sisyphus/", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const output = {
title: "Write",
output: "File written successfully",
metadata: { filePath: "/path/to/code.ts" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toContain("DELEGATION REQUIRED")
expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER")
expect(output.output).toContain("sisyphus_task")
})
test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const output = {
title: "Edit",
output: "File edited successfully",
metadata: { filePath: "/src/components/button.tsx" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Edit", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toContain("DELEGATION REQUIRED")
})
test("should NOT append reminder when orchestrator writes inside .sisyphus/", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const originalOutput = "File written successfully"
const output = {
title: "Write",
output: originalOutput,
metadata: { filePath: "/project/.sisyphus/plans/work-plan.md" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toBe(originalOutput)
expect(output.output).not.toContain("DELEGATION REQUIRED")
})
test("should NOT append reminder when non-orchestrator writes outside .sisyphus/", async () => {
// #given
const nonOrchestratorSession = "non-orchestrator-session"
setupMessageStorage(nonOrchestratorSession, "Sisyphus-Junior")
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const originalOutput = "File written successfully"
const output = {
title: "Write",
output: originalOutput,
metadata: { filePath: "/path/to/code.ts" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Write", sessionID: nonOrchestratorSession },
output
)
// #then
expect(output.output).toBe(originalOutput)
expect(output.output).not.toContain("DELEGATION REQUIRED")
cleanupMessageStorage(nonOrchestratorSession)
})
test("should NOT append reminder for read-only tools", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const originalOutput = "File content"
const output = {
title: "Read",
output: originalOutput,
metadata: { filePath: "/path/to/code.ts" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Read", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toBe(originalOutput)
})
test("should handle missing filePath gracefully", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const originalOutput = "File written successfully"
const output = {
title: "Write",
output: originalOutput,
metadata: {},
}
// #when
await hook["tool.execute.after"](
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toBe(originalOutput)
})
})
}) })
describe("session.idle handler (boulder continuation)", () => { describe("session.idle handler (boulder continuation)", () => {

View File

@ -70,10 +70,8 @@ Subagents FREQUENTLY claim completion when:
2. Run tests yourself - Must PASS (not "agent said it passed") 2. Run tests yourself - Must PASS (not "agent said it passed")
3. Read the actual code - Must match requirements 3. Read the actual code - Must match requirements
4. Check build/typecheck - Must succeed 4. Check build/typecheck - Must succeed
5. Verify notepad was updated - Must have substantive content
DO NOT TRUST THE AGENT'S SELF-REPORT. DO NOT TRUST THE AGENT'S SELF-REPORT.
They are non-deterministic and not exceptional - they CANNOT distinguish between completed and incomplete states.
VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS. VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
**HANDS-ON QA REQUIRED (after ALL tasks complete):** **HANDS-ON QA REQUIRED (after ALL tasks complete):**
@ -87,45 +85,41 @@ VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems. Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems.
**FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**` **FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**`
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }): string { function buildVerificationReminder(sessionId: string): string {
return `${VERIFICATION_REMINDER}
---
**If ANY verification fails, use this immediately:**
\`\`\`
sisyphus_task(resume="${sessionId}", prompt="fix: [describe the specific failure]")
\`\`\``
}
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }, sessionId: string): string {
const remaining = progress.total - progress.completed const remaining = progress.total - progress.completed
return ` return `
--- ---
**State:** \`.sisyphus/boulder.json\` | Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left **State:** Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left
**Notepad:** \`.sisyphus/notepads/${planName}/{category}.md\`
--- ---
${VERIFICATION_REMINDER} ${buildVerificationReminder(sessionId)}
**COMMIT FREQUENTLY:** ALL pass? commit atomic unit, mark \`[x]\`, next task.`
- Commit after each verified task unit - one logical change per commit
- Do NOT accumulate multiple tasks into one big commit
- Atomic commits make rollback and review easier
- If verification passes, commit immediately before moving on
**THEN:**
- Broken? \`sisyphus_task(resume="<session_id>", prompt="fix: ...")\`
- Verified? Commit atomic unit, mark \`- [ ]\` to \`- [x]\`, next task`
} }
function buildStandaloneVerificationReminder(): string { function buildStandaloneVerificationReminder(sessionId: string): string {
return ` return `
--- ---
## SISYPHUS_TASK COMPLETED - VERIFICATION REQUIRED ${buildVerificationReminder(sessionId)}`
}
${VERIFICATION_REMINDER} function extractSessionIdFromOutput(output: string): string {
const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/)
**VERIFICATION CHECKLIST:** return match?.[1] ?? "<session_id>"
- [ ] lsp_diagnostics on changed files - Run it yourself
- [ ] Tests pass - Run the test command yourself
- [ ] Code correct - Read the files yourself
- [ ] No regressions - Check related functionality
**REMEMBER:** Agent's "done" does NOT mean actually done.`
} }
interface GitFileStat { interface GitFileStat {
@ -233,12 +227,6 @@ function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string {
return lines.join("\n") return lines.join("\n")
} }
interface ToolExecuteInput {
tool: string
sessionID?: string
agent?: string
}
interface ToolExecuteAfterInput { interface ToolExecuteAfterInput {
tool: string tool: string
sessionID?: string sessionID?: string
@ -251,8 +239,6 @@ interface ToolExecuteAfterOutput {
metadata: Record<string, unknown> metadata: Record<string, unknown>
} }
type ToolExecuteOutput = ToolExecuteAfterOutput
function getMessageDir(sessionID: string): string | null { function getMessageDir(sessionID: string): string | null {
if (!existsSync(MESSAGE_STORAGE)) return null if (!existsSync(MESSAGE_STORAGE)) return null
@ -500,6 +486,7 @@ export function createSisyphusOrchestratorHook(
if (output.output && typeof output.output === "string") { if (output.output && typeof output.output === "string") {
const gitStats = getGitDiffStats(ctx.directory) const gitStats = getGitDiffStats(ctx.directory)
const fileChanges = formatFileChanges(gitStats) const fileChanges = formatFileChanges(gitStats)
const subagentSessionId = extractSessionIdFromOutput(output.output)
const boulderState = readBoulderState(ctx.directory) const boulderState = readBoulderState(ctx.directory)
@ -518,7 +505,7 @@ export function createSisyphusOrchestratorHook(
## SUBAGENT WORK COMPLETED ## SUBAGENT WORK COMPLETED
${fileChanges} ${fileChanges}
${buildOrchestratorReminder(boulderState.plan_name, progress)}` ${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId)}`
log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, { log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
plan: boulderState.plan_name, plan: boulderState.plan_name,
@ -526,7 +513,7 @@ ${buildOrchestratorReminder(boulderState.plan_name, progress)}`
fileCount: gitStats.length, fileCount: gitStats.length,
}) })
} else { } else {
output.output += `\n${buildStandaloneVerificationReminder()}` output.output += `\n${buildStandaloneVerificationReminder(subagentSessionId)}`
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, { log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
sessionID: input.sessionID, sessionID: input.sessionID,