fix(security): gateguard classifier bypasses (GHSA-4v57) + Windows CI + claw ReDoS

- gateguard (GHSA-4v57-ph3x-gf55): add a quote-aware detection pass that
  dequotes command words and splits on UNQUOTED separators incl. newlines, so
  newline-separated commands, quoted command words ('rm'/"rm"), quoted
  find -exec, and sh/bash -c wrappers are all classified destructive. Additive —
  existing 133 cases still pass; +7 bypass regressions + a false-positive guard
  (rm inside a quoted echo arg stays allowed). 140/140.
- Windows CI: format-code.ts emitted backslash paths via path.normalize, breaking
  forward-slash assertions on all Windows matrix cells — force forward slashes.
- claw.js (CodeQL #1 js/polynomial-redos): bound parseTurns input so the lazy
  [\s\S]*? body can't drive O(n^2) scanning on adversarial history files.

Full suite 2852/2852; lint green.
This commit is contained in:
Affaan Mustafa 2026-06-18 20:02:30 -04:00
parent 5994d3fac1
commit bd9083ca1e
4 changed files with 1981 additions and 1359 deletions

View File

@ -104,9 +104,11 @@ function detectFormatter(cwd: string, ext: string): Formatter | null {
}
function buildFormatterCommand(formatter: Formatter, filePath: string, cwd?: string): string {
// Normalize path for cross-platform compatibility
const normalizedPath = path.normalize(filePath)
// Normalize to forward slashes so the emitted command is identical on every
// platform. `path.normalize` yields backslashes on Windows, which broke the
// command string (and Windows CI); all formatter CLIs accept `/` on Windows.
const normalizedPath = path.normalize(filePath).split(path.sep).join("/")
// Build command based on formatter and platform
const commands: Record<Formatter, string> = {
biome: `npx @biomejs/biome format --write ${normalizedPath}`,

View File

@ -32,7 +32,8 @@ function getSessionPath(name) {
function listSessions(dir) {
const clawDir = dir || getClawDir();
if (!fs.existsSync(clawDir)) return [];
return fs.readdirSync(clawDir)
return fs
.readdirSync(clawDir)
.filter(f => f.endsWith('.md'))
.map(f => f.replace(/\.md$/, ''));
}
@ -55,7 +56,10 @@ function appendTurn(filePath, role, content, timestamp) {
function normalizeSkillList(raw) {
if (!raw) return [];
if (Array.isArray(raw)) return raw.map(s => String(s).trim()).filter(Boolean);
return String(raw).split(',').map(s => s.trim()).filter(Boolean);
return String(raw)
.split(',')
.map(s => s.trim())
.filter(Boolean);
}
function loadECCContext(skillList) {
@ -104,7 +108,7 @@ function askClaude(systemPrompt, history, userMessage, model) {
stdio: ['pipe', 'pipe', 'pipe'],
env: { ...process.env, CLAUDECODE: '' },
timeout: 300000,
shell: process.platform === 'win32',
shell: process.platform === 'win32'
});
if (result.error) {
@ -120,9 +124,14 @@ function askClaude(systemPrompt, history, userMessage, model) {
function parseTurns(history) {
const turns = [];
// Bound the input: the lazy `[\s\S]*?` body re-scans toward EOF from each
// `### [` start, so a very large/adversarial history file can drive O(n^2)
// scanning (ReDoS). Session histories are far below this cap.
const text = String(history || '');
const safe = text.length > 5_000_000 ? text.slice(0, 5_000_000) : text;
const regex = /### \[([^\]]+)\] ([^\n]+)\n([\s\S]*?)\n---\n/g;
let match;
while ((match = regex.exec(history)) !== null) {
while ((match = regex.exec(safe)) !== null) {
turns.push({ timestamp: match[1], role: match[2], content: match[3] });
}
return turns;
@ -145,12 +154,14 @@ function getSessionMetrics(filePath) {
userTurns,
assistantTurns,
charCount,
tokenEstimate,
tokenEstimate
};
}
function searchSessions(query, dir) {
const q = String(query || '').toLowerCase().trim();
const q = String(query || '')
.toLowerCase()
.trim();
if (!q) return [];
const sessionDir = dir || getClawDir();
@ -297,7 +308,7 @@ function main() {
sessionName: initialSessionName,
sessionPath: getSessionPath(initialSessionName),
model: DEFAULT_MODEL,
skills: normalizeSkillList(process.env.CLAW_SKILLS || ''),
skills: normalizeSkillList(process.env.CLAW_SKILLS || '')
};
let eccContext = loadECCContext(state.skills);
@ -314,7 +325,7 @@ function main() {
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
const prompt = () => {
rl.question('claw> ', (input) => {
rl.question('claw> ', input => {
const line = input.trim();
if (!line) return prompt();
@ -469,7 +480,7 @@ module.exports = {
compactSession,
exportSession,
branchSession,
main,
main
};
if (require.main === module) {

View File

@ -220,6 +220,106 @@ function tokenizeAllowlistedShellWords(input) {
return tokens;
}
const SHELL_SEGMENT_SEPARATORS = new Set([';', '|', '&', '\n', '\r']);
/**
* Quote-aware split of a command line into segments, with quotes removed from
* the resulting words. Splits only on UNQUOTED `;`, `|`, `&`, and newlines so:
* - a quoted command word (`'rm'`, `"rm"`) normalizes to `rm` (the shell
* treats quotes around a command name as transparent), and
* - a newline behaves as a command separator (the shell runs each line),
* neither of which `stripQuotedStrings` + naive splitting handles both were
* destructive-classifier bypasses (GHSA-4v57-ph3x-gf55).
*
* @param {string} input
* @returns {string[][]} array of dequoted token arrays, one per segment
*/
function quoteAwareSegments(input) {
const segments = [];
let words = [];
let current = '';
let hasWord = false;
let quote = null;
let escaped = false;
const flushWord = () => {
if (hasWord) words.push(current);
current = '';
hasWord = false;
};
const flushSegment = () => {
flushWord();
if (words.length) segments.push(words);
words = [];
};
for (const ch of String(input || '')) {
if (escaped) {
current += ch;
hasWord = true;
escaped = false;
continue;
}
if (ch === '\\') {
escaped = true;
hasWord = true;
continue;
}
if (quote) {
if (ch === quote) quote = null;
else current += ch;
hasWord = true;
continue;
}
if (ch === '"' || ch === "'") {
quote = ch;
hasWord = true; // entering a quote starts a word, even if its content is empty
continue;
}
if (SHELL_SEGMENT_SEPARATORS.has(ch)) {
flushSegment();
continue;
}
if (/\s/.test(ch)) {
flushWord();
continue;
}
current += ch;
hasWord = true;
}
flushSegment();
return segments;
}
const SHELL_WRAPPERS = new Set(['sh', 'bash', 'zsh', 'dash', 'ksh']);
/**
* Quote-aware destructive check: catches quoted command words, newline
* separators, quoted `find -exec`, and `sh -c`/`bash -c` wrappers that evade
* the quote-stripping path (GHSA-4v57-ph3x-gf55).
*
* @param {string} raw
* @param {number} [depth] recursion guard for shell -c wrappers
* @returns {boolean}
*/
function isDestructiveQuoteAware(raw, depth = 0) {
if (depth > 4) return false;
for (const tokens of quoteAwareSegments(raw)) {
if (tokens.length === 0) continue;
if (isDestructiveRm(tokens)) return true;
if (isDestructiveGit(tokens)) return true;
if (isDestructiveFindExec(tokens.join(' '))) return true;
const base = commandBasename(tokens[0]);
if (SHELL_WRAPPERS.has(base)) {
const ci = tokens.indexOf('-c');
if (ci !== -1 && tokens[ci + 1] && isDestructiveQuoteAware(tokens[ci + 1], depth + 1)) {
return true;
}
}
}
return false;
}
/**
* Strip a leading path and trailing `.exe` from a command token so
* `/usr/bin/git`, `git.exe`, and `GIT` all normalize to `git`.
@ -566,6 +666,11 @@ function isDestructiveBash(command) {
if (isDestructiveRm(tokens)) return true;
if (isDestructiveGit(tokens)) return true;
}
// Quote-aware pass: closes the quoted-command-word, newline-separator,
// quoted-find-exec, and sh/bash -c bypasses (GHSA-4v57-ph3x-gf55).
if (isDestructiveQuoteAware(raw)) return true;
return false;
}

File diff suppressed because it is too large Load Diff