From 39e92b1900147b2b70e0ae660e0faa03d54a991e Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 6 Jan 2026 13:16:51 +0900 Subject: [PATCH] feat(hooks): add mandatory hands-on verification enforcement for orchestrated tasks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - sisyphus-orchestrator: Add verification reminder with tool matrix (playwright/interactive_bash/curl) - start-work: Inject detailed verification workflow with deliverable-specific guidance 🤖 Generated with [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode) assistance --- src/hooks/sisyphus-orchestrator/index.ts | 14 +++++- src/hooks/start-work/index.ts | 54 +++++++++++++++++++++++- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/src/hooks/sisyphus-orchestrator/index.ts b/src/hooks/sisyphus-orchestrator/index.ts index 909732f6..ec12d120 100644 --- a/src/hooks/sisyphus-orchestrator/index.ts +++ b/src/hooks/sisyphus-orchestrator/index.ts @@ -42,7 +42,19 @@ Subagents FREQUENTLY claim completion when: 5. Verify notepad was updated - Must have substantive content DO NOT TRUST THE AGENT'S SELF-REPORT. -VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.` +They are non-deterministic and not exceptional - they CANNOT distinguish between completed and incomplete states. +VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS. + +**HANDS-ON QA REQUIRED (after ALL tasks complete):** + +| Deliverable Type | Verification Tool | Action | +|------------------|-------------------|--------| +| **Frontend/UI** | \`/playwright\` skill | Navigate, interact, screenshot evidence | +| **TUI/CLI** | \`interactive_bash\` (tmux) | Run interactively, verify output | +| **API/Backend** | \`bash\` with curl | Send requests, verify responses | + +Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems. +**FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**` function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }): string { const remaining = progress.total - progress.completed diff --git a/src/hooks/start-work/index.ts b/src/hooks/start-work/index.ts index d58c2455..822d1ce4 100644 --- a/src/hooks/start-work/index.ts +++ b/src/hooks/start-work/index.ts @@ -126,13 +126,65 @@ Which plan would you like to work on? Reply with the number or plan name.` } } + const verificationEnforcement = ` + +--- + +## MANDATORY VERIFICATION ENFORCEMENT (NON-NEGOTIABLE) + +**CRITICAL: You MUST perform hands-on verification after completing ALL tasks. Static analysis alone is NOT sufficient.** + +### Verification by Deliverable Type + +| Type | Tool | How to Verify | +|------|------|---------------| +| **Frontend/UI** | \`/playwright\` skill | Navigate, click, verify visual state, take screenshots | +| **TUI/CLI** | \`interactive_bash\` (tmux) | Run commands interactively, verify output | +| **API/Backend** | \`bash\` with curl/httpie | Send requests, verify responses | +| **Library/Module** | REPL via \`interactive_bash\` | Import, call functions, verify results | + +### Verification Workflow + +1. **After ALL tasks complete** (not after each task): + - Start dev server if needed: \`bun run dev\` / \`npm run dev\` + - Wait for server to be ready + +2. **For Frontend changes**: + \`\`\` + Load /playwright skill → Navigate to page → Interact with UI → Verify expected behavior → Screenshot evidence + \`\`\` + +3. **For TUI/CLI changes**: + \`\`\` + interactive_bash(tmux_command="new-session -d -s qa") → send-keys with commands → capture-pane output → verify + \`\`\` + +4. **Evidence required**: + - Screenshots for visual changes (saved to \`.sisyphus/evidence/\`) + - Terminal output for CLI changes + - Response bodies for API changes + +### What Static Analysis CANNOT Catch + +- Visual rendering issues (wrong colors, broken layouts) +- Animation/transition bugs +- Race conditions in UI interactions +- User flow breakages +- Integration issues between components + +### FAILURE TO VERIFY = INCOMPLETE WORK + +**Do NOT mark tasks complete or report "done" without hands-on verification.** +If you skip this step, the user will find bugs you could have caught. +` + const idx = output.parts.findIndex((p) => p.type === "text" && p.text) if (idx >= 0 && output.parts[idx].text) { output.parts[idx].text = output.parts[idx].text .replace(/\$SESSION_ID/g, sessionId) .replace(/\$TIMESTAMP/g, timestamp) - output.parts[idx].text += `\n\n---\n${contextInfo}` + output.parts[idx].text += `\n\n---\n${contextInfo}${verificationEnforcement}` } log(`[${HOOK_NAME}] Context injected`, {