diff --git a/hooks/hooks.json b/hooks/hooks.json index cdd2584d..22799c08 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -126,6 +126,30 @@ ], "description": "Check MCP server health before MCP tool execution and block unhealthy MCP calls", "id": "pre:mcp-health-check" + }, + { + "matcher": "Edit|Write|MultiEdit", + "hooks": [ + { + "type": "command", + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/hooks/run-with-flags.js\" \"pre:edit-write:gateguard-fact-force\" \"scripts/hooks/gateguard-fact-force.js\" \"standard,strict\"", + "timeout": 5 + } + ], + "description": "Fact-forcing gate: block first Edit/Write/MultiEdit per file and demand investigation (importers, data schemas, user instruction) before allowing", + "id": "pre:edit-write:gateguard-fact-force" + }, + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/hooks/run-with-flags.js\" \"pre:bash:gateguard-fact-force\" \"scripts/hooks/gateguard-fact-force.js\" \"standard,strict\"", + "timeout": 5 + } + ], + "description": "Fact-forcing gate: block destructive Bash commands and demand rollback plan; quote user instruction on first Bash per session", + "id": "pre:bash:gateguard-fact-force" } ], "PreCompact": [ diff --git a/scripts/hooks/gateguard-fact-force.js b/scripts/hooks/gateguard-fact-force.js new file mode 100644 index 00000000..b8a0fcc8 --- /dev/null +++ b/scripts/hooks/gateguard-fact-force.js @@ -0,0 +1,265 @@ +#!/usr/bin/env node +/** + * PreToolUse Hook: GateGuard Fact-Forcing Gate + * + * Forces Claude to investigate before editing files or running commands. + * Instead of asking "are you sure?" (which LLMs always answer "yes"), + * this hook demands concrete facts: importers, public API, data schemas. + * + * The act of investigation creates awareness that self-evaluation never did. + * + * Gates: + * - Edit/Write: list importers, affected API, verify data schemas, quote instruction + * - Bash (destructive): list targets, rollback plan, quote instruction + * - Bash (routine): quote current instruction (once per session) + * + * Compatible with run-with-flags.js via module.exports.run(). + * Cross-platform (Windows, macOS, Linux). + * + * Full package with config support: pip install gateguard-ai + * Repo: https://github.com/zunoworks/gateguard + */ + +'use strict'; + +const crypto = require('crypto'); +const fs = require('fs'); +const path = require('path'); + +// Session state — scoped per session to avoid cross-session races. +// Uses CLAUDE_SESSION_ID (set by Claude Code) or falls back to PID-based isolation. +const STATE_DIR = process.env.GATEGUARD_STATE_DIR || path.join(process.env.HOME || process.env.USERPROFILE || '/tmp', '.gateguard'); +const SESSION_ID = process.env.CLAUDE_SESSION_ID || process.env.ECC_SESSION_ID || `pid-${process.ppid || process.pid}`; +const STATE_FILE = path.join(STATE_DIR, `state-${SESSION_ID.replace(/[^a-zA-Z0-9_-]/g, '_')}.json`); + +// State expires after 30 minutes of inactivity +const SESSION_TIMEOUT_MS = 30 * 60 * 1000; + +// Maximum checked entries to prevent unbounded growth +const MAX_CHECKED_ENTRIES = 500; +const MAX_SESSION_KEYS = 50; +const ROUTINE_BASH_SESSION_KEY = '__bash_session__'; + +const DESTRUCTIVE_BASH = /\b(rm\s+-rf|git\s+reset\s+--hard|git\s+checkout\s+--|git\s+clean\s+-f|drop\s+table|delete\s+from|truncate|git\s+push\s+--force|dd\s+if=)\b/i; + +// --- State management (per-session, atomic writes, bounded) --- + +function loadState() { + try { + if (fs.existsSync(STATE_FILE)) { + const state = JSON.parse(fs.readFileSync(STATE_FILE, 'utf8')); + const lastActive = state.last_active || 0; + if (Date.now() - lastActive > SESSION_TIMEOUT_MS) { + try { fs.unlinkSync(STATE_FILE); } catch (_) { /* ignore */ } + return { checked: [], last_active: Date.now() }; + } + return state; + } + } catch (_) { /* ignore */ } + return { checked: [], last_active: Date.now() }; +} + +function pruneCheckedEntries(checked) { + if (checked.length <= MAX_CHECKED_ENTRIES) { + return checked; + } + + const preserved = checked.includes(ROUTINE_BASH_SESSION_KEY) ? [ROUTINE_BASH_SESSION_KEY] : []; + const sessionKeys = checked.filter(k => k.startsWith('__') && k !== ROUTINE_BASH_SESSION_KEY); + const fileKeys = checked.filter(k => !k.startsWith('__')); + const remainingSessionSlots = Math.max(MAX_SESSION_KEYS - preserved.length, 0); + const cappedSession = sessionKeys.slice(-remainingSessionSlots); + const remainingFileSlots = Math.max(MAX_CHECKED_ENTRIES - preserved.length - cappedSession.length, 0); + const cappedFiles = fileKeys.slice(-remainingFileSlots); + return [...preserved, ...cappedSession, ...cappedFiles]; +} + +function saveState(state) { + try { + state.last_active = Date.now(); + state.checked = pruneCheckedEntries(state.checked); + fs.mkdirSync(STATE_DIR, { recursive: true }); + // Atomic write: temp file + rename prevents partial reads + const tmpFile = STATE_FILE + '.tmp.' + process.pid; + fs.writeFileSync(tmpFile, JSON.stringify(state, null, 2), 'utf8'); + fs.renameSync(tmpFile, STATE_FILE); + } catch (_) { /* ignore */ } +} + +function markChecked(key) { + const state = loadState(); + if (!state.checked.includes(key)) { + state.checked.push(key); + saveState(state); + } +} + +function isChecked(key) { + const state = loadState(); + const found = state.checked.includes(key); + saveState(state); + return found; +} + +// Prune stale session files older than 1 hour +(function pruneStaleFiles() { + try { + const files = fs.readdirSync(STATE_DIR); + const now = Date.now(); + for (const f of files) { + if (!f.startsWith('state-') || !f.endsWith('.json')) continue; + const fp = path.join(STATE_DIR, f); + const stat = fs.statSync(fp); + if (now - stat.mtimeMs > SESSION_TIMEOUT_MS * 2) { + fs.unlinkSync(fp); + } + } + } catch (_) { /* ignore */ } +})(); + +// --- Sanitize file path against injection --- + +function sanitizePath(filePath) { + // Strip control chars (including null), bidi overrides, and newlines + return filePath.replace(/[\x00-\x1f\x7f\u200e\u200f\u202a-\u202e\u2066-\u2069]/g, ' ').trim().slice(0, 500); +} + +// --- Gate messages --- + +function editGateMsg(filePath) { + const safe = sanitizePath(filePath); + return [ + '[Fact-Forcing Gate]', + '', + `Before editing ${safe}, present these facts:`, + '', + '1. List ALL files that import/require this file (use Grep)', + '2. List the public functions/classes affected by this change', + '3. If this file reads/writes data files, show field names, structure, and date format (use redacted or synthetic values, not raw production data)', + '4. Quote the user\'s current instruction verbatim', + '', + 'Present the facts, then retry the same operation.' + ].join('\n'); +} + +function writeGateMsg(filePath) { + const safe = sanitizePath(filePath); + return [ + '[Fact-Forcing Gate]', + '', + `Before creating ${safe}, present these facts:`, + '', + '1. Name the file(s) and line(s) that will call this new file', + '2. Confirm no existing file serves the same purpose (use Glob)', + '3. If this file reads/writes data files, show field names, structure, and date format (use redacted or synthetic values, not raw production data)', + '4. Quote the user\'s current instruction verbatim', + '', + 'Present the facts, then retry the same operation.' + ].join('\n'); +} + +function destructiveBashMsg() { + return [ + '[Fact-Forcing Gate]', + '', + 'Destructive command detected. Before running, present:', + '', + '1. List all files/data this command will modify or delete', + '2. Write a one-line rollback procedure', + '3. Quote the user\'s current instruction verbatim', + '', + 'Present the facts, then retry the same operation.' + ].join('\n'); +} + +function routineBashMsg() { + return [ + '[Fact-Forcing Gate]', + '', + 'Quote the user\'s current instruction verbatim.', + 'Then retry the same operation.' + ].join('\n'); +} + +// --- Deny helper --- + +function denyResult(reason) { + return { + stdout: JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: 'deny', + permissionDecisionReason: reason + } + }), + exitCode: 0 + }; +} + +// --- Core logic (exported for run-with-flags.js) --- + +function run(rawInput) { + let data; + try { + data = typeof rawInput === 'string' ? JSON.parse(rawInput) : rawInput; + } catch (_) { + return rawInput; // allow on parse error + } + + const rawToolName = data.tool_name || ''; + const toolInput = data.tool_input || {}; + // Normalize: case-insensitive matching via lookup map + const TOOL_MAP = { 'edit': 'Edit', 'write': 'Write', 'multiedit': 'MultiEdit', 'bash': 'Bash' }; + const toolName = TOOL_MAP[rawToolName.toLowerCase()] || rawToolName; + + if (toolName === 'Edit' || toolName === 'Write') { + const filePath = toolInput.file_path || ''; + if (!filePath) { + return rawInput; // allow + } + + if (!isChecked(filePath)) { + markChecked(filePath); + return denyResult(toolName === 'Edit' ? editGateMsg(filePath) : writeGateMsg(filePath)); + } + + return rawInput; // allow + } + + if (toolName === 'MultiEdit') { + const edits = toolInput.edits || []; + for (const edit of edits) { + const filePath = edit.file_path || ''; + if (filePath && !isChecked(filePath)) { + markChecked(filePath); + return denyResult(editGateMsg(filePath)); + } + } + return rawInput; // allow + } + + if (toolName === 'Bash') { + const command = toolInput.command || ''; + + if (DESTRUCTIVE_BASH.test(command)) { + // Gate destructive commands on first attempt; allow retry after facts presented + const key = '__destructive__' + crypto.createHash('sha256').update(command).digest('hex').slice(0, 16); + if (!isChecked(key)) { + markChecked(key); + return denyResult(destructiveBashMsg()); + } + return rawInput; // allow retry after facts presented + } + + if (!isChecked(ROUTINE_BASH_SESSION_KEY)) { + markChecked(ROUTINE_BASH_SESSION_KEY); + return denyResult(routineBashMsg()); + } + + return rawInput; // allow + } + + return rawInput; // allow +} + +module.exports = { run }; diff --git a/skills/gateguard/SKILL.md b/skills/gateguard/SKILL.md new file mode 100644 index 00000000..e6c69a4f --- /dev/null +++ b/skills/gateguard/SKILL.md @@ -0,0 +1,120 @@ +--- +name: gateguard +description: Fact-forcing gate that blocks Edit/Write/Bash (including MultiEdit) and demands concrete investigation (importers, data schemas, user instruction) before allowing the action. Measurably improves output quality by +2.25 points vs ungated agents. +origin: community +--- + +# GateGuard — Fact-Forcing Pre-Action Gate + +A PreToolUse hook that forces Claude to investigate before editing. Instead of self-evaluation ("are you sure?"), it demands concrete facts. The act of investigation creates awareness that self-evaluation never did. + +## When to Activate + +- Working on any codebase where file edits affect multiple modules +- Projects with data files that have specific schemas or date formats +- Teams where AI-generated code must match existing patterns +- Any workflow where Claude tends to guess instead of investigating + +## Core Concept + +LLM self-evaluation doesn't work. Ask "did you violate any policies?" and the answer is always "no." This is verified experimentally. + +But asking "list every file that imports this module" forces the LLM to run Grep and Read. The investigation itself creates context that changes the output. + +**Three-stage gate:** + +``` +1. DENY — block the first Edit/Write/Bash attempt +2. FORCE — tell the model exactly which facts to gather +3. ALLOW — permit retry after facts are presented +``` + +No competitor does all three. Most stop at deny. + +## Evidence + +Two independent A/B tests, identical agents, same task: + +| Task | Gated | Ungated | Gap | +| --- | --- | --- | --- | +| Analytics module | 8.0/10 | 6.5/10 | +1.5 | +| Webhook validator | 10.0/10 | 7.0/10 | +3.0 | +| **Average** | **9.0** | **6.75** | **+2.25** | + +Both agents produce code that runs and passes tests. The difference is design depth. + +## Gate Types + +### Edit / MultiEdit Gate (first edit per file) + +MultiEdit is handled identically — each file in the batch is gated individually. + +``` +Before editing {file_path}, present these facts: + +1. List ALL files that import/require this file (use Grep) +2. List the public functions/classes affected by this change +3. If this file reads/writes data files, show field names, structure, + and date format (use redacted or synthetic values, not raw production data) +4. Quote the user's current instruction verbatim +``` + +### Write Gate (first new file creation) + +``` +Before creating {file_path}, present these facts: + +1. Name the file(s) and line(s) that will call this new file +2. Confirm no existing file serves the same purpose (use Glob) +3. If this file reads/writes data files, show field names, structure, + and date format (use redacted or synthetic values, not raw production data) +4. Quote the user's current instruction verbatim +``` + +### Destructive Bash Gate (every destructive command) + +Triggers on: `rm -rf`, `git reset --hard`, `git push --force`, `drop table`, etc. + +``` +1. List all files/data this command will modify or delete +2. Write a one-line rollback procedure +3. Quote the user's current instruction verbatim +``` + +### Routine Bash Gate (once per session) + +``` +Quote the user's current instruction verbatim. +``` + +## Quick Start + +### Option A: Use the ECC hook (zero install) + +The hook at `scripts/hooks/gateguard-fact-force.js` is included in this plugin. Enable it via hooks.json. + +### Option B: Full package with config + +```bash +pip install gateguard-ai +gateguard init +``` + +This adds `.gateguard.yml` for per-project configuration (custom messages, ignore paths, gate toggles). + +## Anti-Patterns + +- **Don't use self-evaluation instead.** "Are you sure?" always gets "yes." This is experimentally verified. +- **Don't skip the data schema check.** Both A/B test agents assumed ISO-8601 dates when real data used `%Y/%m/%d %H:%M`. Checking data structure (with redacted values) prevents this entire class of bugs. +- **Don't gate every single Bash command.** Routine bash gates once per session. Destructive bash gates every time. This balance avoids slowdown while catching real risks. + +## Best Practices + +- Let the gate fire naturally. Don't try to pre-answer the gate questions — the investigation itself is what improves quality. +- Customize gate messages for your domain. If your project has specific conventions, add them to the gate prompts. +- Use `.gateguard.yml` to ignore paths like `.venv/`, `node_modules/`, `.git/`. + +## Related Skills + +- `safety-guard` — Runtime safety checks (complementary, not overlapping) +- `code-reviewer` — Post-edit review (GateGuard is pre-edit investigation) diff --git a/tests/hooks/gateguard-fact-force.test.js b/tests/hooks/gateguard-fact-force.test.js new file mode 100644 index 00000000..2f0837c3 --- /dev/null +++ b/tests/hooks/gateguard-fact-force.test.js @@ -0,0 +1,437 @@ +/** + * Tests for scripts/hooks/gateguard-fact-force.js via run-with-flags.js + */ + +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const { spawnSync } = require('child_process'); + +const runner = path.join(__dirname, '..', '..', 'scripts', 'hooks', 'run-with-flags.js'); +const externalStateDir = process.env.GATEGUARD_STATE_DIR; +const tmpRoot = process.env.TMPDIR || process.env.TEMP || process.env.TMP || '/tmp'; +const stateDir = externalStateDir || fs.mkdtempSync(path.join(tmpRoot, 'gateguard-test-')); +// Use a fixed session ID so test process and spawned hook process share the same state file +const TEST_SESSION_ID = 'gateguard-test-session'; +const stateFile = path.join(stateDir, `state-${TEST_SESSION_ID}.json`); + +function test(name, fn) { + try { + fn(); + console.log(` ✓ ${name}`); + return true; + } catch (error) { + console.log(` ✗ ${name}`); + console.log(` Error: ${error.message}`); + return false; + } +} + +function clearState() { + try { + if (fs.existsSync(stateFile)) { + fs.unlinkSync(stateFile); + } + } catch (err) { + console.error(` [clearState] failed to remove ${stateFile}: ${err.message}`); + } +} + +function writeExpiredState() { + try { + fs.mkdirSync(stateDir, { recursive: true }); + const expired = { + checked: ['some_file.js', '__bash_session__'], + last_active: Date.now() - (31 * 60 * 1000) // 31 minutes ago + }; + fs.writeFileSync(stateFile, JSON.stringify(expired), 'utf8'); + } catch (_) { /* ignore */ } +} + +function writeState(state) { + fs.mkdirSync(stateDir, { recursive: true }); + fs.writeFileSync(stateFile, JSON.stringify(state), 'utf8'); +} + +function runHook(input, env = {}) { + const rawInput = typeof input === 'string' ? input : JSON.stringify(input); + const result = spawnSync('node', [ + runner, + 'pre:edit-write:gateguard-fact-force', + 'scripts/hooks/gateguard-fact-force.js', + 'standard,strict' + ], { + input: rawInput, + encoding: 'utf8', + env: { + ...process.env, + ECC_HOOK_PROFILE: 'standard', + GATEGUARD_STATE_DIR: stateDir, + CLAUDE_SESSION_ID: TEST_SESSION_ID, + ...env + }, + timeout: 15000, + stdio: ['pipe', 'pipe', 'pipe'] + }); + + return { + code: Number.isInteger(result.status) ? result.status : 1, + stdout: result.stdout || '', + stderr: result.stderr || '' + }; +} + +function runBashHook(input, env = {}) { + const rawInput = typeof input === 'string' ? input : JSON.stringify(input); + const result = spawnSync('node', [ + runner, + 'pre:bash:gateguard-fact-force', + 'scripts/hooks/gateguard-fact-force.js', + 'standard,strict' + ], { + input: rawInput, + encoding: 'utf8', + env: { + ...process.env, + ECC_HOOK_PROFILE: 'standard', + GATEGUARD_STATE_DIR: stateDir, + CLAUDE_SESSION_ID: TEST_SESSION_ID, + ...env + }, + timeout: 15000, + stdio: ['pipe', 'pipe', 'pipe'] + }); + + return { + code: Number.isInteger(result.status) ? result.status : 1, + stdout: result.stdout || '', + stderr: result.stderr || '' + }; +} + +function parseOutput(stdout) { + try { + return JSON.parse(stdout); + } catch (_) { + return null; + } +} + +function runTests() { + console.log('\n=== Testing gateguard-fact-force ===\n'); + + let passed = 0; + let failed = 0; + + // --- Test 1: denies first Edit per file --- + clearState(); + if (test('denies first Edit per file with fact-forcing message', () => { + const input = { + tool_name: 'Edit', + tool_input: { file_path: '/src/app.js', old_string: 'foo', new_string: 'bar' } + }; + const result = runHook(input); + assert.strictEqual(result.code, 0, 'exit code should be 0'); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce JSON output'); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny'); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('Fact-Forcing Gate')); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('import/require')); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('/src/app.js')); + })) passed++; else failed++; + + // --- Test 2: allows second Edit on same file --- + if (test('allows second Edit on same file (gate already passed)', () => { + const input = { + tool_name: 'Edit', + tool_input: { file_path: '/src/app.js', old_string: 'foo', new_string: 'bar' } + }; + const result = runHook(input); + assert.strictEqual(result.code, 0, 'exit code should be 0'); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce valid JSON output'); + // When allowed, the hook passes through the raw input (no hookSpecificOutput) + // OR if hookSpecificOutput exists, it must not be deny + if (output.hookSpecificOutput) { + assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny', + 'should not deny second edit on same file'); + } else { + // Pass-through: output matches original input (allow) + assert.strictEqual(output.tool_name, 'Edit', 'pass-through should preserve input'); + } + })) passed++; else failed++; + + // --- Test 3: denies first Write per file --- + clearState(); + if (test('denies first Write per file with fact-forcing message', () => { + const input = { + tool_name: 'Write', + tool_input: { file_path: '/src/new-file.js', content: 'console.log("hello")' } + }; + const result = runHook(input); + assert.strictEqual(result.code, 0, 'exit code should be 0'); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce JSON output'); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny'); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('creating')); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('call this new file')); + })) passed++; else failed++; + + // --- Test 4: denies destructive Bash, allows retry --- + clearState(); + if (test('denies destructive Bash commands, allows retry after facts presented', () => { + const input = { + tool_name: 'Bash', + tool_input: { command: 'rm -rf /important/data' } + }; + + // First call: should deny + const result1 = runBashHook(input); + assert.strictEqual(result1.code, 0, 'first call exit code should be 0'); + const output1 = parseOutput(result1.stdout); + assert.ok(output1, 'first call should produce JSON output'); + assert.strictEqual(output1.hookSpecificOutput.permissionDecision, 'deny'); + assert.ok(output1.hookSpecificOutput.permissionDecisionReason.includes('Destructive')); + assert.ok(output1.hookSpecificOutput.permissionDecisionReason.includes('rollback')); + + // Second call (retry after facts presented): should allow + const result2 = runBashHook(input); + assert.strictEqual(result2.code, 0, 'second call exit code should be 0'); + const output2 = parseOutput(result2.stdout); + assert.ok(output2, 'second call should produce valid JSON output'); + if (output2.hookSpecificOutput) { + assert.notStrictEqual(output2.hookSpecificOutput.permissionDecision, 'deny', + 'should not deny destructive bash retry after facts presented'); + } else { + assert.strictEqual(output2.tool_name, 'Bash', 'pass-through should preserve input'); + } + })) passed++; else failed++; + + // --- Test 5: denies first routine Bash, allows second --- + clearState(); + if (test('denies first routine Bash, allows second', () => { + const input = { + tool_name: 'Bash', + tool_input: { command: 'ls -la' } + }; + + // First call: should deny + const result1 = runBashHook(input); + assert.strictEqual(result1.code, 0, 'first call exit code should be 0'); + const output1 = parseOutput(result1.stdout); + assert.ok(output1, 'first call should produce JSON output'); + assert.strictEqual(output1.hookSpecificOutput.permissionDecision, 'deny'); + + // Second call: should allow + const result2 = runBashHook(input); + assert.strictEqual(result2.code, 0, 'second call exit code should be 0'); + const output2 = parseOutput(result2.stdout); + assert.ok(output2, 'second call should produce valid JSON output'); + if (output2.hookSpecificOutput) { + assert.notStrictEqual(output2.hookSpecificOutput.permissionDecision, 'deny', + 'should not deny second routine bash'); + } else { + assert.strictEqual(output2.tool_name, 'Bash', 'pass-through should preserve input'); + } + })) passed++; else failed++; + + // --- Test 6: session state resets after timeout --- + if (test('session state resets after 30-minute timeout', () => { + writeExpiredState(); + const input = { + tool_name: 'Edit', + tool_input: { file_path: 'some_file.js', old_string: 'a', new_string: 'b' } + }; + const result = runHook(input); + assert.strictEqual(result.code, 0, 'exit code should be 0'); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce JSON output after expired state'); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny', + 'should deny again after session timeout (state was reset)'); + })) passed++; else failed++; + + // --- Test 7: allows unknown tool names --- + clearState(); + if (test('allows unknown tool names through', () => { + const input = { + tool_name: 'Read', + tool_input: { file_path: '/src/app.js' } + }; + const result = runHook(input); + assert.strictEqual(result.code, 0, 'exit code should be 0'); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce valid JSON output'); + if (output.hookSpecificOutput) { + assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny', + 'should not deny unknown tool'); + } else { + assert.strictEqual(output.tool_name, 'Read', 'pass-through should preserve input'); + } + })) passed++; else failed++; + + // --- Test 8: sanitizes file paths with newlines --- + clearState(); + if (test('sanitizes file paths containing newlines', () => { + const input = { + tool_name: 'Edit', + tool_input: { file_path: '/src/app.js\ninjected content', old_string: 'a', new_string: 'b' } + }; + const result = runHook(input); + assert.strictEqual(result.code, 0, 'exit code should be 0'); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce JSON output'); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny'); + const reason = output.hookSpecificOutput.permissionDecisionReason; + // The file path portion of the reason must not contain any raw newlines + // (sanitizePath replaces \n and \r with spaces) + const pathLine = reason.split('\n').find(l => l.includes('/src/app.js')); + assert.ok(pathLine, 'reason should mention the file path'); + assert.ok(!pathLine.includes('\n'), 'file path line must not contain raw newlines'); + assert.ok(!reason.includes('/src/app.js\n'), 'newline after file path should be sanitized'); + assert.ok(!reason.includes('\ninjected'), 'injected content must not appear on its own line'); + })) passed++; else failed++; + + // --- Test 9: respects ECC_DISABLED_HOOKS --- + clearState(); + if (test('respects ECC_DISABLED_HOOKS (skips when disabled)', () => { + const input = { + tool_name: 'Edit', + tool_input: { file_path: '/src/disabled.js', old_string: 'a', new_string: 'b' } + }; + const result = runHook(input, { + ECC_DISABLED_HOOKS: 'pre:edit-write:gateguard-fact-force' + }); + + assert.strictEqual(result.code, 0, 'exit code should be 0'); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce valid JSON output'); + if (output.hookSpecificOutput) { + assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny', + 'should not deny when hook is disabled'); + } else { + // When disabled, hook passes through raw input + assert.strictEqual(output.tool_name, 'Edit', 'pass-through should preserve input'); + } + })) passed++; else failed++; + + // --- Test 10: MultiEdit gates first unchecked file --- + clearState(); + if (test('denies first MultiEdit with unchecked file', () => { + const input = { + tool_name: 'MultiEdit', + tool_input: { + edits: [ + { file_path: '/src/multi-a.js', old_string: 'a', new_string: 'b' }, + { file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' } + ] + } + }; + const result = runHook(input); + assert.strictEqual(result.code, 0, 'exit code should be 0'); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce JSON output'); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny'); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('Fact-Forcing Gate')); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('/src/multi-a.js')); + })) passed++; else failed++; + + // --- Test 11: MultiEdit allows after all files gated --- + if (test('allows MultiEdit after all files gated', () => { + // multi-a.js was gated in test 10; gate multi-b.js + const input2 = { + tool_name: 'MultiEdit', + tool_input: { edits: [{ file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }] } + }; + runHook(input2); // gates multi-b.js + + // Now both files are gated — retry should allow + const input3 = { + tool_name: 'MultiEdit', + tool_input: { + edits: [ + { file_path: '/src/multi-a.js', old_string: 'a', new_string: 'b' }, + { file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' } + ] + } + }; + const result3 = runHook(input3); + const output3 = parseOutput(result3.stdout); + assert.ok(output3, 'should produce valid JSON'); + if (output3.hookSpecificOutput) { + assert.notStrictEqual(output3.hookSpecificOutput.permissionDecision, 'deny', + 'should allow MultiEdit after all files gated'); + } + })) passed++; else failed++; + + // --- Test 12: reads refresh active session state --- + clearState(); + if (test('touches last_active on read so active sessions do not age out', () => { + const staleButActive = Date.now() - (29 * 60 * 1000); + writeState({ + checked: ['/src/keep-alive.js'], + last_active: staleButActive + }); + + const before = JSON.parse(fs.readFileSync(stateFile, 'utf8')); + assert.strictEqual(before.last_active, staleButActive, 'seed state should use the expected timestamp'); + + const result = runHook({ + tool_name: 'Edit', + tool_input: { file_path: '/src/keep-alive.js', old_string: 'a', new_string: 'b' } + }); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce valid JSON output'); + if (output.hookSpecificOutput) { + assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny', + 'already-checked file should still be allowed'); + } + + const after = JSON.parse(fs.readFileSync(stateFile, 'utf8')); + assert.ok(after.last_active > staleButActive, 'successful reads should refresh last_active'); + })) passed++; else failed++; + + // --- Test 13: pruning preserves routine bash gate marker --- + clearState(); + if (test('preserves __bash_session__ when pruning oversized state', () => { + const checked = ['__bash_session__']; + for (let i = 0; i < 80; i++) checked.push(`__destructive__${i}`); + for (let i = 0; i < 700; i++) checked.push(`/src/file-${i}.js`); + writeState({ checked, last_active: Date.now() }); + + runHook({ + tool_name: 'Edit', + tool_input: { file_path: '/src/newly-gated.js', old_string: 'a', new_string: 'b' } + }); + + const result = runBashHook({ + tool_name: 'Bash', + tool_input: { command: 'pwd' } + }); + const output = parseOutput(result.stdout); + assert.ok(output, 'should produce valid JSON output'); + if (output.hookSpecificOutput) { + assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny', + 'routine bash marker should survive pruning'); + } + + const persisted = JSON.parse(fs.readFileSync(stateFile, 'utf8')); + assert.ok(persisted.checked.includes('__bash_session__'), 'pruned state should retain __bash_session__'); + assert.ok(persisted.checked.length <= 500, 'pruned state should still honor the checked-entry cap'); + })) passed++; else failed++; + + // Cleanup only the temp directory created by this test file. + if (!externalStateDir) { + try { + if (fs.existsSync(stateDir)) { + fs.rmSync(stateDir, { recursive: true, force: true }); + } + } catch (err) { + console.error(` [cleanup] failed to remove ${stateDir}: ${err.message}`); + } + } + + console.log(`\n ${passed} passed, ${failed} failed\n`); + process.exit(failed > 0 ? 1 : 0); +} + +runTests();