fix(security): gateguard classifier bypasses (GHSA-4v57) + Windows CI + claw ReDoS

- gateguard (GHSA-4v57-ph3x-gf55): add a quote-aware detection pass that dequotes command words and splits on UNQUOTED separators incl. newlines, so newline-separated commands, quoted command words ('rm'/"rm"), quoted find -exec, and sh/bash -c wrappers are all classified destructive. Additive — existing 133 cases still pass; +7 bypass regressions + a false-positive guard (rm inside a quoted echo arg stays allowed). 140/140. - Windows CI: format-code.ts emitted backslash paths via path.normalize, breaking forward-slash assertions on all Windows matrix cells — force forward slashes. - claw.js (CodeQL #1 js/polynomial-redos): bound parseTurns input so the lazy [\s\S]*? body can't drive O(n^2) scanning on adversarial history files. Full suite 2852/2852; lint green.
2026-06-19 19:30:29 +08:00 · 2026-06-18 20:02:30 -04:00 · 2026-06-18 20:02:30 -04:00 · bd9083ca1e
commit bd9083ca1e
parent 5994d3fac1
4 changed files with 1981 additions and 1359 deletions
--- a/.opencode/tools/format-code.ts
+++ b/.opencode/tools/format-code.ts
@ -104,9 +104,11 @@ function detectFormatter(cwd: string, ext: string): Formatter | null {
 }

 function buildFormatterCommand(formatter: Formatter, filePath: string, cwd?: string): string {
-  // Normalize path for cross-platform compatibility
-  const normalizedPath = path.normalize(filePath)
-  
+  // Normalize to forward slashes so the emitted command is identical on every
+  // platform. `path.normalize` yields backslashes on Windows, which broke the
+  // command string (and Windows CI); all formatter CLIs accept `/` on Windows.
+  const normalizedPath = path.normalize(filePath).split(path.sep).join("/")
+
  // Build command based on formatter and platform
  const commands: Record<Formatter, string> = {
    biome: `npx @biomejs/biome format --write ${normalizedPath}`,
--- a/scripts/claw.js
+++ b/scripts/claw.js
@ -32,7 +32,8 @@ function getSessionPath(name) {
 function listSessions(dir) {
  const clawDir = dir || getClawDir();
  if (!fs.existsSync(clawDir)) return [];
-  return fs.readdirSync(clawDir)
+  return fs
+    .readdirSync(clawDir)
    .filter(f => f.endsWith('.md'))
    .map(f => f.replace(/\.md$/, ''));
 }
@ -55,7 +56,10 @@ function appendTurn(filePath, role, content, timestamp) {
 function normalizeSkillList(raw) {
  if (!raw) return [];
  if (Array.isArray(raw)) return raw.map(s => String(s).trim()).filter(Boolean);
-  return String(raw).split(',').map(s => s.trim()).filter(Boolean);
+  return String(raw)
+    .split(',')
+    .map(s => s.trim())
+    .filter(Boolean);
 }

 function loadECCContext(skillList) {
@ -104,7 +108,7 @@ function askClaude(systemPrompt, history, userMessage, model) {
    stdio: ['pipe', 'pipe', 'pipe'],
    env: { ...process.env, CLAUDECODE: '' },
    timeout: 300000,
-    shell: process.platform === 'win32',
+    shell: process.platform === 'win32'
  });

  if (result.error) {
@ -120,9 +124,14 @@ function askClaude(systemPrompt, history, userMessage, model) {

 function parseTurns(history) {
  const turns = [];
+  // Bound the input: the lazy `[\s\S]*?` body re-scans toward EOF from each
+  // `### [` start, so a very large/adversarial history file can drive O(n^2)
+  // scanning (ReDoS). Session histories are far below this cap.
+  const text = String(history || '');
+  const safe = text.length > 5_000_000 ? text.slice(0, 5_000_000) : text;
  const regex = /### \[([^\]]+)\] ([^\n]+)\n([\s\S]*?)\n---\n/g;
  let match;
-  while ((match = regex.exec(history)) !== null) {
+  while ((match = regex.exec(safe)) !== null) {
    turns.push({ timestamp: match[1], role: match[2], content: match[3] });
  }
  return turns;
@ -145,12 +154,14 @@ function getSessionMetrics(filePath) {
    userTurns,
    assistantTurns,
    charCount,
-    tokenEstimate,
+    tokenEstimate
  };
 }

 function searchSessions(query, dir) {
-  const q = String(query || '').toLowerCase().trim();
+  const q = String(query || '')
+    .toLowerCase()
+    .trim();
  if (!q) return [];

  const sessionDir = dir || getClawDir();
@ -297,7 +308,7 @@ function main() {
    sessionName: initialSessionName,
    sessionPath: getSessionPath(initialSessionName),
    model: DEFAULT_MODEL,
-    skills: normalizeSkillList(process.env.CLAW_SKILLS || ''),
+    skills: normalizeSkillList(process.env.CLAW_SKILLS || '')
  };

  let eccContext = loadECCContext(state.skills);
@ -314,7 +325,7 @@ function main() {
  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });

  const prompt = () => {
-    rl.question('claw> ', (input) => {
+    rl.question('claw> ', input => {
      const line = input.trim();
      if (!line) return prompt();

@ -469,7 +480,7 @@ module.exports = {
  compactSession,
  exportSession,
  branchSession,
-  main,
+  main
 };

 if (require.main === module) {
--- a/scripts/hooks/gateguard-fact-force.js
+++ b/scripts/hooks/gateguard-fact-force.js
@ -220,6 +220,106 @@ function tokenizeAllowlistedShellWords(input) {
  return tokens;
 }

+const SHELL_SEGMENT_SEPARATORS = new Set([';', '|', '&', '\n', '\r']);
+
+/**
+ * Quote-aware split of a command line into segments, with quotes removed from
+ * the resulting words. Splits only on UNQUOTED `;`, `|`, `&`, and newlines so:
+ *  - a quoted command word (`'rm'`, `"rm"`) normalizes to `rm` (the shell
+ *    treats quotes around a command name as transparent), and
+ *  - a newline behaves as a command separator (the shell runs each line),
+ * neither of which `stripQuotedStrings` + naive splitting handles — both were
+ * destructive-classifier bypasses (GHSA-4v57-ph3x-gf55).
+ *
+ * @param {string} input
+ * @returns {string[][]} array of dequoted token arrays, one per segment
+ */
+function quoteAwareSegments(input) {
+  const segments = [];
+  let words = [];
+  let current = '';
+  let hasWord = false;
+  let quote = null;
+  let escaped = false;
+
+  const flushWord = () => {
+    if (hasWord) words.push(current);
+    current = '';
+    hasWord = false;
+  };
+  const flushSegment = () => {
+    flushWord();
+    if (words.length) segments.push(words);
+    words = [];
+  };
+
+  for (const ch of String(input || '')) {
+    if (escaped) {
+      current += ch;
+      hasWord = true;
+      escaped = false;
+      continue;
+    }
+    if (ch === '\\') {
+      escaped = true;
+      hasWord = true;
+      continue;
+    }
+    if (quote) {
+      if (ch === quote) quote = null;
+      else current += ch;
+      hasWord = true;
+      continue;
+    }
+    if (ch === '"' || ch === "'") {
+      quote = ch;
+      hasWord = true; // entering a quote starts a word, even if its content is empty
+      continue;
+    }
+    if (SHELL_SEGMENT_SEPARATORS.has(ch)) {
+      flushSegment();
+      continue;
+    }
+    if (/\s/.test(ch)) {
+      flushWord();
+      continue;
+    }
+    current += ch;
+    hasWord = true;
+  }
+  flushSegment();
+  return segments;
+}
+
+const SHELL_WRAPPERS = new Set(['sh', 'bash', 'zsh', 'dash', 'ksh']);
+
+/**
+ * Quote-aware destructive check: catches quoted command words, newline
+ * separators, quoted `find -exec`, and `sh -c`/`bash -c` wrappers that evade
+ * the quote-stripping path (GHSA-4v57-ph3x-gf55).
+ *
+ * @param {string} raw
+ * @param {number} [depth] recursion guard for shell -c wrappers
+ * @returns {boolean}
+ */
+function isDestructiveQuoteAware(raw, depth = 0) {
+  if (depth > 4) return false;
+  for (const tokens of quoteAwareSegments(raw)) {
+    if (tokens.length === 0) continue;
+    if (isDestructiveRm(tokens)) return true;
+    if (isDestructiveGit(tokens)) return true;
+    if (isDestructiveFindExec(tokens.join(' '))) return true;
+    const base = commandBasename(tokens[0]);
+    if (SHELL_WRAPPERS.has(base)) {
+      const ci = tokens.indexOf('-c');
+      if (ci !== -1 && tokens[ci + 1] && isDestructiveQuoteAware(tokens[ci + 1], depth + 1)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 /**
 * Strip a leading path and trailing `.exe` from a command token so
 * `/usr/bin/git`, `git.exe`, and `GIT` all normalize to `git`.
@ -566,6 +666,11 @@ function isDestructiveBash(command) {
    if (isDestructiveRm(tokens)) return true;
    if (isDestructiveGit(tokens)) return true;
  }
+
+  // Quote-aware pass: closes the quoted-command-word, newline-separator,
+  // quoted-find-exec, and sh/bash -c bypasses (GHSA-4v57-ph3x-gf55).
+  if (isDestructiveQuoteAware(raw)) return true;
+
  return false;
 }

--- a/tests/hooks/gateguard-fact-force.test.js
+++ b/tests/hooks/gateguard-fact-force.test.js