From f27e93bcc8572135a7e276cd3fc7398bb5699afc Mon Sep 17 00:00:00 2001 From: Ivan Marshall Widjaja <60992624+imarshallwidjaja@users.noreply.github.com> Date: Sun, 11 Jan 2026 20:30:29 +1100 Subject: [PATCH] fix(agents): relax Momus input validation and tighten Prometheus Momus calls to avoid false rejections (#659) --- src/agents/momus.test.ts | 57 +++++++++++++++++++++++++ src/agents/momus.ts | 62 ++++++++++++++-------------- src/agents/prometheus-prompt.test.ts | 22 ++++++++++ src/agents/prometheus-prompt.ts | 6 +++ 4 files changed, 116 insertions(+), 31 deletions(-) create mode 100644 src/agents/momus.test.ts create mode 100644 src/agents/prometheus-prompt.test.ts diff --git a/src/agents/momus.test.ts b/src/agents/momus.test.ts new file mode 100644 index 00000000..e6ddcb09 --- /dev/null +++ b/src/agents/momus.test.ts @@ -0,0 +1,57 @@ +import { describe, test, expect } from "bun:test" +import { MOMUS_SYSTEM_PROMPT } from "./momus" + +function escapeRegExp(value: string) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") +} + +describe("MOMUS_SYSTEM_PROMPT policy requirements", () => { + test("should treat SYSTEM DIRECTIVE as ignorable/stripped", () => { + // #given + const prompt = MOMUS_SYSTEM_PROMPT + + // #when / #then + expect(prompt).toContain("[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]") + // Should explicitly mention stripping or ignoring these + expect(prompt.toLowerCase()).toMatch(/ignore|strip|system directive/) + }) + + test("should extract paths containing .sisyphus/plans/ and ending in .md", () => { + // #given + const prompt = MOMUS_SYSTEM_PROMPT + + // #when / #then + expect(prompt).toContain(".sisyphus/plans/") + expect(prompt).toContain(".md") + // New extraction policy should be mentioned + expect(prompt.toLowerCase()).toMatch(/extract|search|find path/) + }) + + test("should NOT teach that 'Please review' is INVALID (conversational wrapper allowed)", () => { + // #given + const prompt = MOMUS_SYSTEM_PROMPT + + // #when / #then + // In RED phase, this will FAIL because current prompt explicitly lists this as INVALID + const invalidExample = "Please review .sisyphus/plans/plan.md" + const rejectionTeaching = new RegExp( + `reject.*${escapeRegExp(invalidExample)}`, + "i", + ) + + // We want the prompt to NOT reject this anymore. + // If it's still in the "INVALID" list, this test should fail. + expect(prompt).not.toMatch(rejectionTeaching) + }) + + test("should handle ambiguity (2+ paths) and 'no path found' rejection", () => { + // #given + const prompt = MOMUS_SYSTEM_PROMPT + + // #when / #then + // Should mention what happens when multiple paths are found + expect(prompt.toLowerCase()).toMatch(/multiple|ambiguous|2\+|two/) + // Should mention rejection if no path found + expect(prompt.toLowerCase()).toMatch(/no.*path.*found|reject.*no.*path/) + }) +}) diff --git a/src/agents/momus.ts b/src/agents/momus.ts index 16dfaecc..df41a125 100644 --- a/src/agents/momus.ts +++ b/src/agents/momus.ts @@ -22,10 +22,7 @@ const DEFAULT_MODEL = "openai/gpt-5.2" export const MOMUS_SYSTEM_PROMPT = `You are a work plan review expert. You review the provided work plan (.sisyphus/plans/{name}.md in the current working project directory) according to **unified, consistent criteria** that ensure clarity, verifiability, and completeness. **CRITICAL FIRST RULE**: -When you receive ONLY a file path like \`.sisyphus/plans/plan.md\` with NO other text, this is VALID input. -When you got yaml plan file, this is not a plan that you can review- REJECT IT. -DO NOT REJECT IT. PROCEED TO READ AND EVALUATE THE FILE. -Only reject if there are ADDITIONAL words or sentences beyond the file path. +Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, this is VALID input and you must read it. If no plan path exists or multiple plan paths exist, reject per Step 0. If the path points to a YAML plan file (\`.yml\` or \`.yaml\`), reject it as non-reviewable. **WHY YOU'VE BEEN SUMMONED - THE CONTEXT**: @@ -121,61 +118,64 @@ You will be provided with the path to the work plan file (typically \`.sisyphus/ **BEFORE you read any files**, you MUST first validate the format of the input prompt you received from the user. **VALID INPUT EXAMPLES (ACCEPT THESE)**: -- \`.sisyphus/plans/my-plan.md\` [O] ACCEPT - just a file path -- \`/path/to/project/.sisyphus/plans/my-plan.md\` [O] ACCEPT - just a file path -- \`todolist.md\` [O] ACCEPT - just a file path -- \`../other-project/.sisyphus/plans/plan.md\` [O] ACCEPT - just a file path -- \`...\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directives + file path -- \`[analyze-mode]\\n...context...\\n.sisyphus/plans/plan.md\` [O] ACCEPT - bracket-style directives + file path -- \`[SYSTEM DIRECTIVE...]\\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directive blocks + file path +- \`.sisyphus/plans/my-plan.md\` [O] ACCEPT - file path anywhere in input +- \`/path/to/project/.sisyphus/plans/my-plan.md\` [O] ACCEPT - absolute plan path +- \`Please review .sisyphus/plans/plan.md\` [O] ACCEPT - conversational wrapper allowed +- \`...\\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directives + plan path +- \`[analyze-mode]\\n...context...\\n.sisyphus/plans/plan.md\` [O] ACCEPT - bracket-style directives + plan path +- \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\\n---\\n- injected planning metadata\\n---\\nPlease review .sisyphus/plans/plan.md\` [O] ACCEPT - ignore the entire directive block -**SYSTEM DIRECTIVES ARE ALWAYS ALLOWED**: +**SYSTEM DIRECTIVES ARE ALWAYS IGNORED**: System directives are automatically injected by the system and should be IGNORED during input validation: - XML-style tags: \`\`, \`\`, \`\`, etc. - Bracket-style blocks: \`[analyze-mode]\`, \`[search-mode]\`, \`[SYSTEM DIRECTIVE...]\`, \`[SYSTEM REMINDER...]\`, etc. +- \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\` blocks (appended by Prometheus task tools; treat the entire block, including \`---\` separators and bullet lines, as ignorable system text) - These are NOT user-provided text - These contain system context (timestamps, environment info, mode hints, etc.) - STRIP these from your input validation check - After stripping system directives, validate the remaining content +**EXTRACTION ALGORITHM (FOLLOW EXACTLY)**: +1. Ignore injected system directive blocks, especially \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\` (remove the whole block, including \`---\` separators and bullet lines). +2. Strip other system directive wrappers (bracket-style blocks and XML-style \`...\` tags). +3. Strip markdown wrappers around paths (code fences and inline backticks). +4. Extract plan paths by finding all substrings containing \`.sisyphus/plans/\` and ending in \`.md\`. +5. If exactly 1 match → ACCEPT and proceed to Step 1 using that path. +6. If 0 matches → REJECT with: "no plan path found" (no path found). +7. If 2+ matches → REJECT with: "ambiguous: multiple plan paths". + **INVALID INPUT EXAMPLES (REJECT ONLY THESE)**: -- \`Please review .sisyphus/plans/plan.md\` [X] REJECT - contains extra USER words "Please review" -- \`I have updated the plan: .sisyphus/plans/plan.md\` [X] REJECT - contains USER sentence before path -- \`.sisyphus/plans/plan.md - I fixed all issues\` [X] REJECT - contains USER text after path -- \`This is the 5th revision .sisyphus/plans/plan.md\` [X] REJECT - contains USER text before path -- Any input with USER sentences or explanations [X] REJECT +- \`No plan path provided here\` [X] REJECT - no \`.sisyphus/plans/*.md\` path +- \`Compare .sisyphus/plans/first.md and .sisyphus/plans/second.md\` [X] REJECT - multiple plan paths -**DECISION RULE**: -1. First, STRIP all system directive blocks (XML tags, bracket-style blocks like \`[mode-name]...\`) -2. Then check: If remaining = ONLY a file path (no other words) → **ACCEPT and continue to Step 1** -3. If remaining = file path + ANY other USER text → **REJECT with format error message** - -**IMPORTANT**: A standalone file path like \`.sisyphus/plans/plan.md\` is VALID. Do NOT reject it! -System directives + file path is also VALID. Do NOT reject it! - -**When rejecting for input format (ONLY when there's extra USER text), respond EXACTLY**: +**When rejecting for input format, respond EXACTLY**: \`\`\` I REJECT (Input Format Validation) +Reason: no plan path found -You must provide ONLY the work plan file path with no additional text. +You must provide a single plan path that includes \`.sisyphus/plans/\` and ends in \`.md\`. Valid format: .sisyphus/plans/plan.md -Invalid format: Any user text before/after the path (system directives are allowed) +Invalid format: No plan path or multiple plan paths NOTE: This rejection is based solely on the input format, not the file contents. The file itself has not been evaluated yet. \`\`\` +Use this alternate Reason line if multiple paths are present: +- Reason: multiple plan paths found + **ULTRA-CRITICAL REMINDER**: -If the user provides EXACTLY \`.sisyphus/plans/plan.md\` or any other file path (with or without system directives) WITH NO ADDITIONAL USER TEXT: +If the input contains exactly one \`.sisyphus/plans/*.md\` path (with or without system directives or conversational wrappers): → THIS IS VALID INPUT → DO NOT REJECT IT → IMMEDIATELY PROCEED TO READ THE FILE → START EVALUATING THE FILE CONTENTS -Never reject a standalone file path! +Never reject a single plan path embedded in the input. Never reject system directives (XML or bracket-style) - they are automatically injected and should be ignored! + **IMPORTANT - Response Language**: Your evaluation output MUST match the language used in the work plan content: - Match the language of the plan in your evaluation output - If the plan is written in English → Write your entire evaluation in English @@ -262,7 +262,7 @@ The plan should enable a developer to: ## Review Process ### Step 0: Validate Input Format (MANDATORY FIRST STEP) -Check if input is ONLY a file path. If yes, ACCEPT and continue. If extra text, REJECT. +Extract the plan path from anywhere in the input. If exactly one \`.sisyphus/plans/*.md\` path is found, ACCEPT and continue. If none are found, REJECT with "no plan path found". If multiple are found, REJECT with "ambiguous: multiple plan paths". ### Step 1: Read the Work Plan - Load the file from the path provided diff --git a/src/agents/prometheus-prompt.test.ts b/src/agents/prometheus-prompt.test.ts new file mode 100644 index 00000000..635715fd --- /dev/null +++ b/src/agents/prometheus-prompt.test.ts @@ -0,0 +1,22 @@ +import { describe, test, expect } from "bun:test" +import { PROMETHEUS_SYSTEM_PROMPT } from "./prometheus-prompt" + +describe("PROMETHEUS_SYSTEM_PROMPT Momus invocation policy", () => { + test("should direct providing ONLY the file path string when invoking Momus", () => { + // #given + const prompt = PROMETHEUS_SYSTEM_PROMPT + + // #when / #then + // Should mention Momus and providing only the path + expect(prompt.toLowerCase()).toMatch(/momus.*only.*path|path.*only.*momus/) + }) + + test("should forbid wrapping Momus invocation in explanations or markdown", () => { + // #given + const prompt = PROMETHEUS_SYSTEM_PROMPT + + // #when / #then + // Should mention not wrapping or using markdown for the path + expect(prompt.toLowerCase()).toMatch(/not.*wrap|no.*explanation|no.*markdown/) + }) +}) diff --git a/src/agents/prometheus-prompt.ts b/src/agents/prometheus-prompt.ts index c9268600..4e6d88ca 100644 --- a/src/agents/prometheus-prompt.ts +++ b/src/agents/prometheus-prompt.ts @@ -651,6 +651,12 @@ while (true) { - Momus is the gatekeeper - Your job is to satisfy Momus, not to argue with it +5. **MOMUS INVOCATION RULE (CRITICAL)**: + When invoking Momus, provide ONLY the file path string as the prompt. + - Do NOT wrap in explanations, markdown, or conversational text. + - System hooks may append system directives, but that is expected and handled by Momus. + - Example invocation: \`prompt=".sisyphus/plans/{name}.md"\` + ### What "OKAY" Means Momus only says "OKAY" when: