feat(hooks): add resume session_id to verification reminders for orchestrator subagent work
When subagent work fails verification, show exact sisyphus_task(resume="...") command with session_id for immediate retry. Consolidates verification workflow across boulder and standalone modes.
This commit is contained in:
parent
9eafe6b5f9
commit
3b89a35a13
@ -77,7 +77,7 @@ describe("sisyphus-orchestrator hook", () => {
|
|||||||
|
|
||||||
// #when
|
// #when
|
||||||
await hook["tool.execute.after"](
|
await hook["tool.execute.after"](
|
||||||
{ tool: "other_tool", sessionID: "session-123", agent: "orchestrator-sisyphus" },
|
{ tool: "other_tool", sessionID: "session-123" },
|
||||||
output
|
output
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -140,8 +140,8 @@ describe("sisyphus-orchestrator hook", () => {
|
|||||||
|
|
||||||
// #then - standalone verification reminder appended
|
// #then - standalone verification reminder appended
|
||||||
expect(output.output).toContain("Task completed successfully")
|
expect(output.output).toContain("Task completed successfully")
|
||||||
expect(output.output).toContain("VERIFICATION REQUIRED")
|
expect(output.output).toContain("MANDATORY VERIFICATION")
|
||||||
expect(output.output).toContain("SUBAGENTS LIE")
|
expect(output.output).toContain("sisyphus_task(resume=")
|
||||||
|
|
||||||
cleanupMessageStorage(sessionID)
|
cleanupMessageStorage(sessionID)
|
||||||
})
|
})
|
||||||
@ -180,6 +180,7 @@ describe("sisyphus-orchestrator hook", () => {
|
|||||||
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
|
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
|
||||||
expect(output.output).toContain("test-plan")
|
expect(output.output).toContain("test-plan")
|
||||||
expect(output.output).toContain("SUBAGENTS LIE")
|
expect(output.output).toContain("SUBAGENTS LIE")
|
||||||
|
expect(output.output).toContain("sisyphus_task(resume=")
|
||||||
|
|
||||||
cleanupMessageStorage(sessionID)
|
cleanupMessageStorage(sessionID)
|
||||||
})
|
})
|
||||||
@ -323,9 +324,8 @@ describe("sisyphus-orchestrator hook", () => {
|
|||||||
output
|
output
|
||||||
)
|
)
|
||||||
|
|
||||||
// #then - output should contain boulder.json path and notepad path format
|
// #then - output should contain plan name and progress
|
||||||
expect(output.output).toContain(".sisyphus/boulder.json")
|
expect(output.output).toContain("my-feature")
|
||||||
expect(output.output).toContain(".sisyphus/notepads/my-feature/{category}.md")
|
|
||||||
expect(output.output).toContain("1/3 done")
|
expect(output.output).toContain("1/3 done")
|
||||||
expect(output.output).toContain("2 left")
|
expect(output.output).toContain("2 left")
|
||||||
|
|
||||||
@ -361,14 +361,152 @@ describe("sisyphus-orchestrator hook", () => {
|
|||||||
output
|
output
|
||||||
)
|
)
|
||||||
|
|
||||||
// #then - should include resume and checkbox instructions
|
// #then - should include resume instructions and verification
|
||||||
expect(output.output).toContain("sisyphus_task(resume=")
|
expect(output.output).toContain("sisyphus_task(resume=")
|
||||||
expect(output.output).toContain("- [ ]")
|
expect(output.output).toContain("[x]")
|
||||||
expect(output.output).toContain("- [x]")
|
expect(output.output).toContain("MANDATORY VERIFICATION")
|
||||||
expect(output.output).toContain("VERIFY")
|
|
||||||
|
|
||||||
cleanupMessageStorage(sessionID)
|
cleanupMessageStorage(sessionID)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe("Write/Edit tool direct work reminder", () => {
|
||||||
|
const ORCHESTRATOR_SESSION = "orchestrator-write-test"
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
setupMessageStorage(ORCHESTRATOR_SESSION, "orchestrator-sisyphus")
|
||||||
|
})
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
cleanupMessageStorage(ORCHESTRATOR_SESSION)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should append delegation reminder when orchestrator writes outside .sisyphus/", async () => {
|
||||||
|
// #given
|
||||||
|
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||||
|
const output = {
|
||||||
|
title: "Write",
|
||||||
|
output: "File written successfully",
|
||||||
|
metadata: { filePath: "/path/to/code.ts" },
|
||||||
|
}
|
||||||
|
|
||||||
|
// #when
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(output.output).toContain("DELEGATION REQUIRED")
|
||||||
|
expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER")
|
||||||
|
expect(output.output).toContain("sisyphus_task")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => {
|
||||||
|
// #given
|
||||||
|
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||||
|
const output = {
|
||||||
|
title: "Edit",
|
||||||
|
output: "File edited successfully",
|
||||||
|
metadata: { filePath: "/src/components/button.tsx" },
|
||||||
|
}
|
||||||
|
|
||||||
|
// #when
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "Edit", sessionID: ORCHESTRATOR_SESSION },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(output.output).toContain("DELEGATION REQUIRED")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should NOT append reminder when orchestrator writes inside .sisyphus/", async () => {
|
||||||
|
// #given
|
||||||
|
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||||
|
const originalOutput = "File written successfully"
|
||||||
|
const output = {
|
||||||
|
title: "Write",
|
||||||
|
output: originalOutput,
|
||||||
|
metadata: { filePath: "/project/.sisyphus/plans/work-plan.md" },
|
||||||
|
}
|
||||||
|
|
||||||
|
// #when
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(output.output).toBe(originalOutput)
|
||||||
|
expect(output.output).not.toContain("DELEGATION REQUIRED")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should NOT append reminder when non-orchestrator writes outside .sisyphus/", async () => {
|
||||||
|
// #given
|
||||||
|
const nonOrchestratorSession = "non-orchestrator-session"
|
||||||
|
setupMessageStorage(nonOrchestratorSession, "Sisyphus-Junior")
|
||||||
|
|
||||||
|
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||||
|
const originalOutput = "File written successfully"
|
||||||
|
const output = {
|
||||||
|
title: "Write",
|
||||||
|
output: originalOutput,
|
||||||
|
metadata: { filePath: "/path/to/code.ts" },
|
||||||
|
}
|
||||||
|
|
||||||
|
// #when
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "Write", sessionID: nonOrchestratorSession },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(output.output).toBe(originalOutput)
|
||||||
|
expect(output.output).not.toContain("DELEGATION REQUIRED")
|
||||||
|
|
||||||
|
cleanupMessageStorage(nonOrchestratorSession)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should NOT append reminder for read-only tools", async () => {
|
||||||
|
// #given
|
||||||
|
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||||
|
const originalOutput = "File content"
|
||||||
|
const output = {
|
||||||
|
title: "Read",
|
||||||
|
output: originalOutput,
|
||||||
|
metadata: { filePath: "/path/to/code.ts" },
|
||||||
|
}
|
||||||
|
|
||||||
|
// #when
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "Read", sessionID: ORCHESTRATOR_SESSION },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(output.output).toBe(originalOutput)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should handle missing filePath gracefully", async () => {
|
||||||
|
// #given
|
||||||
|
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||||
|
const originalOutput = "File written successfully"
|
||||||
|
const output = {
|
||||||
|
title: "Write",
|
||||||
|
output: originalOutput,
|
||||||
|
metadata: {},
|
||||||
|
}
|
||||||
|
|
||||||
|
// #when
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// #then
|
||||||
|
expect(output.output).toBe(originalOutput)
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe("session.idle handler (boulder continuation)", () => {
|
describe("session.idle handler (boulder continuation)", () => {
|
||||||
|
|||||||
@ -70,10 +70,8 @@ Subagents FREQUENTLY claim completion when:
|
|||||||
2. Run tests yourself - Must PASS (not "agent said it passed")
|
2. Run tests yourself - Must PASS (not "agent said it passed")
|
||||||
3. Read the actual code - Must match requirements
|
3. Read the actual code - Must match requirements
|
||||||
4. Check build/typecheck - Must succeed
|
4. Check build/typecheck - Must succeed
|
||||||
5. Verify notepad was updated - Must have substantive content
|
|
||||||
|
|
||||||
DO NOT TRUST THE AGENT'S SELF-REPORT.
|
DO NOT TRUST THE AGENT'S SELF-REPORT.
|
||||||
They are non-deterministic and not exceptional - they CANNOT distinguish between completed and incomplete states.
|
|
||||||
VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
|
VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
|
||||||
|
|
||||||
**HANDS-ON QA REQUIRED (after ALL tasks complete):**
|
**HANDS-ON QA REQUIRED (after ALL tasks complete):**
|
||||||
@ -87,45 +85,41 @@ VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
|
|||||||
Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems.
|
Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems.
|
||||||
**FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**`
|
**FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**`
|
||||||
|
|
||||||
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }): string {
|
function buildVerificationReminder(sessionId: string): string {
|
||||||
|
return `${VERIFICATION_REMINDER}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**If ANY verification fails, use this immediately:**
|
||||||
|
\`\`\`
|
||||||
|
sisyphus_task(resume="${sessionId}", prompt="fix: [describe the specific failure]")
|
||||||
|
\`\`\``
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }, sessionId: string): string {
|
||||||
const remaining = progress.total - progress.completed
|
const remaining = progress.total - progress.completed
|
||||||
return `
|
return `
|
||||||
---
|
---
|
||||||
|
|
||||||
**State:** \`.sisyphus/boulder.json\` | Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left
|
**State:** Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left
|
||||||
|
|
||||||
**Notepad:** \`.sisyphus/notepads/${planName}/{category}.md\`
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
${VERIFICATION_REMINDER}
|
${buildVerificationReminder(sessionId)}
|
||||||
|
|
||||||
**COMMIT FREQUENTLY:**
|
ALL pass? → commit atomic unit, mark \`[x]\`, next task.`
|
||||||
- Commit after each verified task unit - one logical change per commit
|
|
||||||
- Do NOT accumulate multiple tasks into one big commit
|
|
||||||
- Atomic commits make rollback and review easier
|
|
||||||
- If verification passes, commit immediately before moving on
|
|
||||||
|
|
||||||
**THEN:**
|
|
||||||
- Broken? \`sisyphus_task(resume="<session_id>", prompt="fix: ...")\`
|
|
||||||
- Verified? Commit atomic unit, mark \`- [ ]\` to \`- [x]\`, next task`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildStandaloneVerificationReminder(): string {
|
function buildStandaloneVerificationReminder(sessionId: string): string {
|
||||||
return `
|
return `
|
||||||
---
|
---
|
||||||
|
|
||||||
## SISYPHUS_TASK COMPLETED - VERIFICATION REQUIRED
|
${buildVerificationReminder(sessionId)}`
|
||||||
|
}
|
||||||
|
|
||||||
${VERIFICATION_REMINDER}
|
function extractSessionIdFromOutput(output: string): string {
|
||||||
|
const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/)
|
||||||
**VERIFICATION CHECKLIST:**
|
return match?.[1] ?? "<session_id>"
|
||||||
- [ ] lsp_diagnostics on changed files - Run it yourself
|
|
||||||
- [ ] Tests pass - Run the test command yourself
|
|
||||||
- [ ] Code correct - Read the files yourself
|
|
||||||
- [ ] No regressions - Check related functionality
|
|
||||||
|
|
||||||
**REMEMBER:** Agent's "done" does NOT mean actually done.`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
interface GitFileStat {
|
interface GitFileStat {
|
||||||
@ -233,12 +227,6 @@ function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string {
|
|||||||
return lines.join("\n")
|
return lines.join("\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ToolExecuteInput {
|
|
||||||
tool: string
|
|
||||||
sessionID?: string
|
|
||||||
agent?: string
|
|
||||||
}
|
|
||||||
|
|
||||||
interface ToolExecuteAfterInput {
|
interface ToolExecuteAfterInput {
|
||||||
tool: string
|
tool: string
|
||||||
sessionID?: string
|
sessionID?: string
|
||||||
@ -251,8 +239,6 @@ interface ToolExecuteAfterOutput {
|
|||||||
metadata: Record<string, unknown>
|
metadata: Record<string, unknown>
|
||||||
}
|
}
|
||||||
|
|
||||||
type ToolExecuteOutput = ToolExecuteAfterOutput
|
|
||||||
|
|
||||||
function getMessageDir(sessionID: string): string | null {
|
function getMessageDir(sessionID: string): string | null {
|
||||||
if (!existsSync(MESSAGE_STORAGE)) return null
|
if (!existsSync(MESSAGE_STORAGE)) return null
|
||||||
|
|
||||||
@ -500,6 +486,7 @@ export function createSisyphusOrchestratorHook(
|
|||||||
if (output.output && typeof output.output === "string") {
|
if (output.output && typeof output.output === "string") {
|
||||||
const gitStats = getGitDiffStats(ctx.directory)
|
const gitStats = getGitDiffStats(ctx.directory)
|
||||||
const fileChanges = formatFileChanges(gitStats)
|
const fileChanges = formatFileChanges(gitStats)
|
||||||
|
const subagentSessionId = extractSessionIdFromOutput(output.output)
|
||||||
|
|
||||||
const boulderState = readBoulderState(ctx.directory)
|
const boulderState = readBoulderState(ctx.directory)
|
||||||
|
|
||||||
@ -518,7 +505,7 @@ export function createSisyphusOrchestratorHook(
|
|||||||
## SUBAGENT WORK COMPLETED
|
## SUBAGENT WORK COMPLETED
|
||||||
|
|
||||||
${fileChanges}
|
${fileChanges}
|
||||||
${buildOrchestratorReminder(boulderState.plan_name, progress)}`
|
${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId)}`
|
||||||
|
|
||||||
log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
|
log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
|
||||||
plan: boulderState.plan_name,
|
plan: boulderState.plan_name,
|
||||||
@ -526,7 +513,7 @@ ${buildOrchestratorReminder(boulderState.plan_name, progress)}`
|
|||||||
fileCount: gitStats.length,
|
fileCount: gitStats.length,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
output.output += `\n${buildStandaloneVerificationReminder()}`
|
output.output += `\n${buildStandaloneVerificationReminder(subagentSessionId)}`
|
||||||
|
|
||||||
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
|
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
|
||||||
sessionID: input.sessionID,
|
sessionID: input.sessionID,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user