diff --git a/src/hooks/sisyphus-orchestrator/index.test.ts b/src/hooks/sisyphus-orchestrator/index.test.ts index 7baee1ea..39ba4257 100644 --- a/src/hooks/sisyphus-orchestrator/index.test.ts +++ b/src/hooks/sisyphus-orchestrator/index.test.ts @@ -77,7 +77,7 @@ describe("sisyphus-orchestrator hook", () => { // #when await hook["tool.execute.after"]( - { tool: "other_tool", sessionID: "session-123", agent: "orchestrator-sisyphus" }, + { tool: "other_tool", sessionID: "session-123" }, output ) @@ -140,8 +140,8 @@ describe("sisyphus-orchestrator hook", () => { // #then - standalone verification reminder appended expect(output.output).toContain("Task completed successfully") - expect(output.output).toContain("VERIFICATION REQUIRED") - expect(output.output).toContain("SUBAGENTS LIE") + expect(output.output).toContain("MANDATORY VERIFICATION") + expect(output.output).toContain("sisyphus_task(resume=") cleanupMessageStorage(sessionID) }) @@ -180,6 +180,7 @@ describe("sisyphus-orchestrator hook", () => { expect(output.output).toContain("SUBAGENT WORK COMPLETED") expect(output.output).toContain("test-plan") expect(output.output).toContain("SUBAGENTS LIE") + expect(output.output).toContain("sisyphus_task(resume=") cleanupMessageStorage(sessionID) }) @@ -323,9 +324,8 @@ describe("sisyphus-orchestrator hook", () => { output ) - // #then - output should contain boulder.json path and notepad path format - expect(output.output).toContain(".sisyphus/boulder.json") - expect(output.output).toContain(".sisyphus/notepads/my-feature/{category}.md") + // #then - output should contain plan name and progress + expect(output.output).toContain("my-feature") expect(output.output).toContain("1/3 done") expect(output.output).toContain("2 left") @@ -361,14 +361,152 @@ describe("sisyphus-orchestrator hook", () => { output ) - // #then - should include resume and checkbox instructions + // #then - should include resume instructions and verification expect(output.output).toContain("sisyphus_task(resume=") - expect(output.output).toContain("- [ ]") - expect(output.output).toContain("- [x]") - expect(output.output).toContain("VERIFY") + expect(output.output).toContain("[x]") + expect(output.output).toContain("MANDATORY VERIFICATION") cleanupMessageStorage(sessionID) }) + + describe("Write/Edit tool direct work reminder", () => { + const ORCHESTRATOR_SESSION = "orchestrator-write-test" + + beforeEach(() => { + setupMessageStorage(ORCHESTRATOR_SESSION, "orchestrator-sisyphus") + }) + + afterEach(() => { + cleanupMessageStorage(ORCHESTRATOR_SESSION) + }) + + test("should append delegation reminder when orchestrator writes outside .sisyphus/", async () => { + // #given + const hook = createSisyphusOrchestratorHook(createMockPluginInput()) + const output = { + title: "Write", + output: "File written successfully", + metadata: { filePath: "/path/to/code.ts" }, + } + + // #when + await hook["tool.execute.after"]( + { tool: "Write", sessionID: ORCHESTRATOR_SESSION }, + output + ) + + // #then + expect(output.output).toContain("DELEGATION REQUIRED") + expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER") + expect(output.output).toContain("sisyphus_task") + }) + + test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => { + // #given + const hook = createSisyphusOrchestratorHook(createMockPluginInput()) + const output = { + title: "Edit", + output: "File edited successfully", + metadata: { filePath: "/src/components/button.tsx" }, + } + + // #when + await hook["tool.execute.after"]( + { tool: "Edit", sessionID: ORCHESTRATOR_SESSION }, + output + ) + + // #then + expect(output.output).toContain("DELEGATION REQUIRED") + }) + + test("should NOT append reminder when orchestrator writes inside .sisyphus/", async () => { + // #given + const hook = createSisyphusOrchestratorHook(createMockPluginInput()) + const originalOutput = "File written successfully" + const output = { + title: "Write", + output: originalOutput, + metadata: { filePath: "/project/.sisyphus/plans/work-plan.md" }, + } + + // #when + await hook["tool.execute.after"]( + { tool: "Write", sessionID: ORCHESTRATOR_SESSION }, + output + ) + + // #then + expect(output.output).toBe(originalOutput) + expect(output.output).not.toContain("DELEGATION REQUIRED") + }) + + test("should NOT append reminder when non-orchestrator writes outside .sisyphus/", async () => { + // #given + const nonOrchestratorSession = "non-orchestrator-session" + setupMessageStorage(nonOrchestratorSession, "Sisyphus-Junior") + + const hook = createSisyphusOrchestratorHook(createMockPluginInput()) + const originalOutput = "File written successfully" + const output = { + title: "Write", + output: originalOutput, + metadata: { filePath: "/path/to/code.ts" }, + } + + // #when + await hook["tool.execute.after"]( + { tool: "Write", sessionID: nonOrchestratorSession }, + output + ) + + // #then + expect(output.output).toBe(originalOutput) + expect(output.output).not.toContain("DELEGATION REQUIRED") + + cleanupMessageStorage(nonOrchestratorSession) + }) + + test("should NOT append reminder for read-only tools", async () => { + // #given + const hook = createSisyphusOrchestratorHook(createMockPluginInput()) + const originalOutput = "File content" + const output = { + title: "Read", + output: originalOutput, + metadata: { filePath: "/path/to/code.ts" }, + } + + // #when + await hook["tool.execute.after"]( + { tool: "Read", sessionID: ORCHESTRATOR_SESSION }, + output + ) + + // #then + expect(output.output).toBe(originalOutput) + }) + + test("should handle missing filePath gracefully", async () => { + // #given + const hook = createSisyphusOrchestratorHook(createMockPluginInput()) + const originalOutput = "File written successfully" + const output = { + title: "Write", + output: originalOutput, + metadata: {}, + } + + // #when + await hook["tool.execute.after"]( + { tool: "Write", sessionID: ORCHESTRATOR_SESSION }, + output + ) + + // #then + expect(output.output).toBe(originalOutput) + }) + }) }) describe("session.idle handler (boulder continuation)", () => { diff --git a/src/hooks/sisyphus-orchestrator/index.ts b/src/hooks/sisyphus-orchestrator/index.ts index 5be7b037..8cfecfe2 100644 --- a/src/hooks/sisyphus-orchestrator/index.ts +++ b/src/hooks/sisyphus-orchestrator/index.ts @@ -70,10 +70,8 @@ Subagents FREQUENTLY claim completion when: 2. Run tests yourself - Must PASS (not "agent said it passed") 3. Read the actual code - Must match requirements 4. Check build/typecheck - Must succeed -5. Verify notepad was updated - Must have substantive content DO NOT TRUST THE AGENT'S SELF-REPORT. -They are non-deterministic and not exceptional - they CANNOT distinguish between completed and incomplete states. VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS. **HANDS-ON QA REQUIRED (after ALL tasks complete):** @@ -87,45 +85,41 @@ VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS. Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems. **FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**` -function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }): string { +function buildVerificationReminder(sessionId: string): string { + return `${VERIFICATION_REMINDER} + +--- + +**If ANY verification fails, use this immediately:** +\`\`\` +sisyphus_task(resume="${sessionId}", prompt="fix: [describe the specific failure]") +\`\`\`` +} + +function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }, sessionId: string): string { const remaining = progress.total - progress.completed return ` --- -**State:** \`.sisyphus/boulder.json\` | Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left - -**Notepad:** \`.sisyphus/notepads/${planName}/{category}.md\` +**State:** Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left --- -${VERIFICATION_REMINDER} +${buildVerificationReminder(sessionId)} -**COMMIT FREQUENTLY:** -- Commit after each verified task unit - one logical change per commit -- Do NOT accumulate multiple tasks into one big commit -- Atomic commits make rollback and review easier -- If verification passes, commit immediately before moving on - -**THEN:** -- Broken? \`sisyphus_task(resume="", prompt="fix: ...")\` -- Verified? Commit atomic unit, mark \`- [ ]\` to \`- [x]\`, next task` +ALL pass? → commit atomic unit, mark \`[x]\`, next task.` } -function buildStandaloneVerificationReminder(): string { +function buildStandaloneVerificationReminder(sessionId: string): string { return ` --- -## SISYPHUS_TASK COMPLETED - VERIFICATION REQUIRED +${buildVerificationReminder(sessionId)}` +} -${VERIFICATION_REMINDER} - -**VERIFICATION CHECKLIST:** -- [ ] lsp_diagnostics on changed files - Run it yourself -- [ ] Tests pass - Run the test command yourself -- [ ] Code correct - Read the files yourself -- [ ] No regressions - Check related functionality - -**REMEMBER:** Agent's "done" does NOT mean actually done.` +function extractSessionIdFromOutput(output: string): string { + const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/) + return match?.[1] ?? "" } interface GitFileStat { @@ -233,12 +227,6 @@ function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string { return lines.join("\n") } -interface ToolExecuteInput { - tool: string - sessionID?: string - agent?: string -} - interface ToolExecuteAfterInput { tool: string sessionID?: string @@ -251,8 +239,6 @@ interface ToolExecuteAfterOutput { metadata: Record } -type ToolExecuteOutput = ToolExecuteAfterOutput - function getMessageDir(sessionID: string): string | null { if (!existsSync(MESSAGE_STORAGE)) return null @@ -500,6 +486,7 @@ export function createSisyphusOrchestratorHook( if (output.output && typeof output.output === "string") { const gitStats = getGitDiffStats(ctx.directory) const fileChanges = formatFileChanges(gitStats) + const subagentSessionId = extractSessionIdFromOutput(output.output) const boulderState = readBoulderState(ctx.directory) @@ -518,7 +505,7 @@ export function createSisyphusOrchestratorHook( ## SUBAGENT WORK COMPLETED ${fileChanges} -${buildOrchestratorReminder(boulderState.plan_name, progress)}` +${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId)}` log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, { plan: boulderState.plan_name, @@ -526,7 +513,7 @@ ${buildOrchestratorReminder(boulderState.plan_name, progress)}` fileCount: gitStats.length, }) } else { - output.output += `\n${buildStandaloneVerificationReminder()}` + output.output += `\n${buildStandaloneVerificationReminder(subagentSessionId)}` log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, { sessionID: input.sessionID,