fix: 5 bugs + 2 tests from 3-agent deep bughunt

Bugs fixed:
- B1: JS gate messages still said "cat one real record" -> redacted/synthetic
- B2: Destructive bash key used 200-char truncation (collision bypass) -> SHA256 hash
- B3: sanitizePath only stripped \n\r -> now strips null bytes, bidi overrides, all control chars
- B4: Tool name matching was case-sensitive (latent bypass) -> lookup map normalization
- B5: SKILL.md Gate Types missing MultiEdit -> added with explanation

Tests added:
- T1: MultiEdit gate denies first unchecked file (CRITICAL - was untested)
- T2: MultiEdit allows after all files gated

11/11 tests pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
seto 2026-04-13 16:32:46 +09:00
parent 8cd6378c81
commit dd2962ee92
3 changed files with 62 additions and 6 deletions

View File

@ -22,6 +22,7 @@
'use strict';
const crypto = require('crypto');
const fs = require('fs');
const path = require('path');
@ -109,7 +110,8 @@ function isChecked(key) {
// --- Sanitize file path against injection ---
function sanitizePath(filePath) {
return filePath.replace(/[\n\r]/g, ' ').trim().slice(0, 500);
// Strip control chars (including null), bidi overrides, and newlines
return filePath.replace(/[\x00-\x1f\x7f\u200e\u200f\u202a-\u202e\u2066-\u2069]/g, ' ').trim().slice(0, 500);
}
// --- Gate messages ---
@ -123,7 +125,7 @@ function editGateMsg(filePath) {
'',
'1. List ALL files that import/require this file (use Grep)',
'2. List the public functions/classes affected by this change',
'3. If this file reads/writes data files, cat one real record and show actual field names, structure, and date format',
'3. If this file reads/writes data files, show field names, structure, and date format (use redacted or synthetic values, not raw production data)',
'4. Quote the user\'s current instruction verbatim',
'',
'Present the facts, then retry the same operation.'
@ -139,7 +141,7 @@ function writeGateMsg(filePath) {
'',
'1. Name the file(s) and line(s) that will call this new file',
'2. Confirm no existing file serves the same purpose (use Glob)',
'3. If this file reads/writes data files, cat one real record and show actual field names, structure, and date format',
'3. If this file reads/writes data files, show field names, structure, and date format (use redacted or synthetic values, not raw production data)',
'4. Quote the user\'s current instruction verbatim',
'',
'Present the facts, then retry the same operation.'
@ -194,8 +196,11 @@ function run(rawInput) {
return rawInput; // allow on parse error
}
const toolName = data.tool_name || '';
const rawToolName = data.tool_name || '';
const toolInput = data.tool_input || {};
// Normalize: case-insensitive matching via lookup map
const TOOL_MAP = { 'edit': 'Edit', 'write': 'Write', 'multiedit': 'MultiEdit', 'bash': 'Bash' };
const toolName = TOOL_MAP[rawToolName.toLowerCase()] || rawToolName;
if (toolName === 'Edit' || toolName === 'Write') {
const filePath = toolInput.file_path || '';
@ -228,7 +233,7 @@ function run(rawInput) {
if (DESTRUCTIVE_BASH.test(command)) {
// Gate destructive commands on first attempt; allow retry after facts presented
const key = '__destructive__' + command.slice(0, 200);
const key = '__destructive__' + crypto.createHash('sha256').update(command).digest('hex').slice(0, 16);
if (!isChecked(key)) {
markChecked(key);
return denyResult(destructiveBashMsg());

View File

@ -45,7 +45,9 @@ Both agents produce code that runs and passes tests. The difference is design de
## Gate Types
### Edit Gate (first edit per file)
### Edit / MultiEdit Gate (first edit per file)
MultiEdit is handled identically — each file in the batch is gated individually.
```
Before editing {file_path}, present these facts:

View File

@ -307,6 +307,55 @@ function runTests() {
}
})) passed++; else failed++;
// --- Test 10: MultiEdit gates first unchecked file ---
clearState();
if (test('denies first MultiEdit with unchecked file', () => {
const input = {
tool_name: 'MultiEdit',
tool_input: {
edits: [
{ file_path: '/src/multi-a.js', old_string: 'a', new_string: 'b' },
{ file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }
]
}
};
const result = runHook(input);
assert.strictEqual(result.code, 0, 'exit code should be 0');
const output = parseOutput(result.stdout);
assert.ok(output, 'should produce JSON output');
assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny');
assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('Fact-Forcing Gate'));
assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('/src/multi-a.js'));
})) passed++; else failed++;
// --- Test 11: MultiEdit allows after all files gated ---
if (test('allows MultiEdit after all files gated', () => {
// multi-a.js was gated in test 10; gate multi-b.js
const input2 = {
tool_name: 'MultiEdit',
tool_input: { edits: [{ file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }] }
};
runHook(input2); // gates multi-b.js
// Now both files are gated — retry should allow
const input3 = {
tool_name: 'MultiEdit',
tool_input: {
edits: [
{ file_path: '/src/multi-a.js', old_string: 'a', new_string: 'b' },
{ file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }
]
}
};
const result3 = runHook(input3);
const output3 = parseOutput(result3.stdout);
assert.ok(output3, 'should produce valid JSON');
if (output3.hookSpecificOutput) {
assert.notStrictEqual(output3.hookSpecificOutput.permissionDecision, 'deny',
'should allow MultiEdit after all files gated');
}
})) passed++; else failed++;
// Cleanup: remove test-isolated state directory
try {
if (fs.existsSync(stateDir)) {