export const meta = { name: 'orch-review', description: 'ECC Review phase as a native Claude Code workflow: multi-dimension review (quality + language + conditional security) then adversarial verification of every CRITICAL/HIGH finding. Returns blocking + advisory findings for Gate 2.', phases: [ { title: 'Review', detail: 'one reviewer agent per dimension, in parallel' }, { title: 'Verify', detail: 'adversarially refute each CRITICAL/HIGH finding' } ] }; // --------------------------------------------------------------------------- // Pilot port of orch-pipeline Phase 5 (Review). The gated outer loop stays in // the main conversation; this script owns only the autonomous, fan-out-heavy // review+verify segment between the two human gates. // // Caller contract — pass `args` (the main loop computes the diff and language): // { // diff: string, // unified `git diff` text to review (required) // language?: string, // e.g. "typescript" — selects a language reviewer // changedFiles?: string[], // paths touched, used for the security trigger // } // Invalid input (missing/empty diff, bad JSON, non-array changedFiles) throws — // the gate fails closed rather than silently approving an unreviewed payload. // // Returns: // { verdict: 'APPROVE' | 'CHANGES_REQUESTED', // CHANGES_REQUESTED if any blocker OR a dimension failed // incomplete: boolean, // true when one or more review dimensions failed to run // failedDimensions: { dimension, error }[], // blocking: Finding[], // confirmed CRITICAL/HIGH + unverifiable ones — must clear before Gate 2 // advisory: Finding[], // MEDIUM/LOW + refuted findings, informational // stats: { dimensions, failed, raw, unique, confirmed, unverified, refuted } } // --------------------------------------------------------------------------- // Language → ECC reviewer agent. Mirrors the agents present in agents/. const LANGUAGE_REVIEWER = { typescript: 'ecc:typescript-reviewer', javascript: 'ecc:typescript-reviewer', python: 'ecc:python-reviewer', go: 'ecc:go-reviewer', rust: 'ecc:rust-reviewer', java: 'ecc:java-reviewer', kotlin: 'ecc:kotlin-reviewer', swift: 'ecc:swift-reviewer', php: 'ecc:php-reviewer', csharp: 'ecc:csharp-reviewer', fsharp: 'ecc:fsharp-reviewer', react: 'ecc:react-reviewer', vue: 'ecc:vue-reviewer', flutter: 'ecc:flutter-reviewer', dart: 'ecc:flutter-reviewer', django: 'ecc:django-reviewer', fastapi: 'ecc:fastapi-reviewer', cpp: 'ecc:cpp-reviewer' }; // orch-pipeline security trigger: auth/authz, user input, db queries, fs paths, // external calls, crypto, secrets. Matched against the diff text + file paths. const SECURITY_TRIGGER = /\b(auth|login|password|passwd|token|secret|credential|api[_-]?key|session|jwt|oauth|cookie|sql|query|exec|eval|crypto|cipher|hash|hmac|sign|fs\.|readFile|writeFile|fetch|axios|request|subprocess|os\.system)\b/i; // A reviewer agent must emit findings in this shape — validated at the tool layer. const FINDINGS_SCHEMA = { type: 'object', additionalProperties: false, required: ['verdict', 'findings'], properties: { verdict: { type: 'string', enum: ['APPROVE', 'CHANGES_REQUESTED'] }, findings: { type: 'array', items: { type: 'object', additionalProperties: false, required: ['title', 'severity', 'file', 'evidence'], properties: { title: { type: 'string' }, severity: { type: 'string', enum: ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW'] }, file: { type: 'string' }, line: { type: ['integer', 'null'] }, evidence: { type: 'string', minLength: 1, description: 'the offending snippet or exact location' }, proof: { type: 'string', description: 'why it is a real problem (required for HIGH/CRITICAL)' }, fix: { type: 'string', description: 'concrete suggested remediation' } } } } } }; // Independent skeptic verdict for one finding. const VERDICT_SCHEMA = { type: 'object', additionalProperties: false, required: ['isReal', 'confidence', 'reasoning'], properties: { isReal: { type: 'boolean', description: 'true only if the finding genuinely holds against the diff' }, confidence: { type: 'number', minimum: 0, maximum: 1 }, reasoning: { type: 'string' } } }; const SEVERITY_RANK = { LOW: 0, MEDIUM: 1, HIGH: 2, CRITICAL: 3 }; const isBlocking = f => f.severity === 'CRITICAL' || f.severity === 'HIGH'; const normalize = s => (s || '').replace(/\s+/g, ' ').trim().toLowerCase(); function reviewPrompt(dimensionLabel, diff) { return [ `You are reviewing a unified diff along the "${dimensionLabel}" dimension.`, 'Apply your standard checklist. Only report issues you are >80% sure are real problems.', 'For any CRITICAL or HIGH finding you MUST supply concrete `evidence` and a `proof` of impact; if you cannot, demote it or drop it.', 'Returning zero findings with verdict APPROVE is an acceptable and expected outcome for clean diffs.', '', 'DIFF:', diff ].join('\n'); } function verifyPrompt(finding, diff) { return [ 'You are an independent skeptic. Try to REFUTE the finding below by checking it against the diff text provided here — and ONLY that text.', 'The diff may be unapplied (a proposed PR), so the referenced file may not exist on disk yet. Do NOT refute a finding merely because the file is absent from the working tree; judge solely from the diff content.', 'Default to isReal=false when you are uncertain or cannot locate supporting evidence in the diff text.', '', `Finding (${finding.severity}) in ${finding.file}: ${finding.title}`, `Claimed evidence: ${finding.evidence}`, finding.proof ? `Claimed proof: ${finding.proof}` : '', '', 'DIFF:', diff ].join('\n'); } // --- main ----------------------------------------------------------------- // `args` arrives verbatim. Accept a JSON-encoded string too, so the workflow // works whether the caller passes an object or a stringified payload. // Fail CLOSED on invalid input: a review gate must never silently APPROVE a // payload it could not actually review. let input; try { input = typeof args === 'string' ? JSON.parse(args) : (args ?? {}); } catch { throw new Error('orch-review: args must be an object or valid JSON'); } if (typeof input !== 'object' || input === null) { throw new Error('orch-review: args must be an object'); } if (typeof input.diff !== 'string' || input.diff.trim() === '') { throw new Error('orch-review: args.diff must be a non-empty unified diff'); } if (input.changedFiles != null && !Array.isArray(input.changedFiles)) { throw new Error('orch-review: args.changedFiles must be an array of paths'); } const diff = input.diff; const haystack = `${diff}\n${(input.changedFiles || []).join('\n')}`; // Build the review dimensions. Quality always runs; language + security are conditional. const dimensions = [{ key: 'quality', label: 'correctness & quality', agentType: 'ecc:code-reviewer' }]; const langReviewer = input.language && LANGUAGE_REVIEWER[String(input.language).toLowerCase()]; if (langReviewer) { dimensions.push({ key: `lang:${input.language}`, label: `${input.language} idioms & pitfalls`, agentType: langReviewer }); } if (SECURITY_TRIGGER.test(haystack)) { dimensions.push({ key: 'security', label: 'security (OWASP, secrets, injection)', agentType: 'ecc:security-reviewer' }); log('Security trigger matched — adding security-reviewer dimension.'); } log(`Reviewing across ${dimensions.length} dimension(s): ${dimensions.map(d => d.key).join(', ')}`); // Stage 1 — every dimension reviews in parallel. This is a deliberate BARRIER: // independent reviewers routinely flag the same line, so we need the full set // before we can dedup. Verifying first and deduping later would waste verifier // calls on duplicates (e.g. one SQL-injection bug reported by all 3 dimensions). // A reviewer can fail two ways: agent() returns null on a terminal error/skip, // or the thunk rejects. Capture both per-dimension so a lost dimension is never // silently dropped — an unreviewed security dimension must not pass as APPROVE. const reviews = await parallel( dimensions.map( d => () => agent(reviewPrompt(d.label, diff), { agentType: d.agentType, phase: 'Review', label: `review:${d.key}`, schema: FINDINGS_SCHEMA }) .then(r => (r === null ? { dim: d.key, ok: false, error: 'agent returned null (terminal failure or skip)', findings: [] } : { dim: d.key, ok: true, findings: r.findings || [] })) // Log the raw error for operators; never return provider/runtime internals to the caller. .catch(err => { log(`Review dimension ${d.key} failed: ${String((err && err.message) || err)}`); return { dim: d.key, ok: false, error: 'review agent failed', findings: [] }; }) ) ); const failedDimensions = reviews.filter(r => r && !r.ok).map(r => ({ dimension: r.dim, error: r.error })); if (failedDimensions.length > 0) { log(`WARNING: ${failedDimensions.length} review dimension(s) failed: ${failedDimensions.map(f => f.dimension).join(', ')}. Verdict will fail closed.`); } // Dedup across dimensions. The evidence snippet (the offending code) is the most // stable key — titles are phrased differently and line numbers drift per reviewer. const tagged = reviews.filter(r => r && r.ok).flatMap(r => r.findings.map(f => ({ ...f, dimension: r.dim }))); const byKey = new Map(); for (const f of tagged) { // Prefer the evidence snippet; fall back to title+line so empty-evidence // findings in the same file don't all collapse onto one `${file}::` key. const evidenceKey = normalize(f.evidence); const key = evidenceKey ? `${f.file}::${evidenceKey}` : `${f.file}::${normalize(f.title)}::${f.line ?? 'na'}`; const prev = byKey.get(key); if (!prev) { byKey.set(key, { ...f, dimensions: [f.dimension] }); } else { if (!prev.dimensions.includes(f.dimension)) prev.dimensions.push(f.dimension); if (SEVERITY_RANK[f.severity] > SEVERITY_RANK[prev.severity]) prev.severity = f.severity; // keep the strictest } } const unique = [...byKey.values()]; log(`Reviews returned ${tagged.length} findings → ${unique.length} unique after dedup.`); // Stage 2 — adversarially verify each unique CRITICAL/HIGH. MEDIUM/LOW are advisory. const advisory = unique.filter(f => !isBlocking(f)); const verified = await parallel( unique.filter(isBlocking).map( f => () => agent(verifyPrompt(f, diff), { phase: 'Verify', label: `verify:${f.file}:${normalize(f.evidence).slice(0, 40)}`, schema: VERDICT_SCHEMA }) // A null return (terminal failure/skip) or a rejection means we could NOT // verify the finding. Mark it `unverified` rather than refuted so it stays // blocking (fail closed) — an unverifiable CRITICAL must never be demoted // to advisory just because the verifier did not run. .then(v => (v ? { ...f, verdict: v } : { ...f, unverified: true, verdict: { isReal: false, confidence: 0, reasoning: 'verifier returned null (terminal failure or skip)' } })) .catch(err => { log(`Verifier failed for ${f.file}: ${String((err && err.message) || err)}`); return { ...f, unverified: true, verdict: { isReal: false, confidence: 0, reasoning: 'verifier error' } }; }) ) ); const verifiedClean = verified.filter(Boolean); const confirmed = verifiedClean.filter(f => !f.unverified && f.verdict && f.verdict.isReal); const unverified = verifiedClean.filter(f => f.unverified); const refuted = verifiedClean.filter(f => !f.unverified && !(f.verdict && f.verdict.isReal)); // Unverifiable blockers stay in `blocking` (fail closed), tagged so the human // at Gate 2 knows they were not independently confirmed. const blocking = [...confirmed, ...unverified.map(f => ({ ...f, note: 'could not be verified — kept as blocking' }))]; log(`Done: ${confirmed.length} confirmed, ${unverified.length} unverified (kept blocking), ${refuted.length} refuted, ${advisory.length} advisory.`); // Fail closed: APPROVE only when every dimension ran AND nothing blocks. const incomplete = failedDimensions.length > 0; return { verdict: blocking.length > 0 || incomplete ? 'CHANGES_REQUESTED' : 'APPROVE', incomplete, failedDimensions, blocking, advisory: [...advisory, ...refuted.map(f => ({ ...f, note: 'refuted by adversarial verifier' }))], stats: { dimensions: dimensions.length, failed: failedDimensions.length, raw: tagged.length, unique: unique.length, confirmed: confirmed.length, unverified: unverified.length, refuted: refuted.length } };