fix(agents): relax Momus input validation and tighten Prometheus Momus calls to avoid false rejections (#659)
This commit is contained in:
parent
10a5bab94d
commit
f27e93bcc8
57
src/agents/momus.test.ts
Normal file
57
src/agents/momus.test.ts
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
import { describe, test, expect } from "bun:test"
|
||||||
|
import { MOMUS_SYSTEM_PROMPT } from "./momus"
|
||||||
|
|
||||||
|
function escapeRegExp(value: string) {
|
||||||
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("MOMUS_SYSTEM_PROMPT policy requirements", () => {
|
||||||
|
test("should treat SYSTEM DIRECTIVE as ignorable/stripped", () => {
|
||||||
|
// #given
|
||||||
|
const prompt = MOMUS_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// #when / #then
|
||||||
|
expect(prompt).toContain("[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]")
|
||||||
|
// Should explicitly mention stripping or ignoring these
|
||||||
|
expect(prompt.toLowerCase()).toMatch(/ignore|strip|system directive/)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should extract paths containing .sisyphus/plans/ and ending in .md", () => {
|
||||||
|
// #given
|
||||||
|
const prompt = MOMUS_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// #when / #then
|
||||||
|
expect(prompt).toContain(".sisyphus/plans/")
|
||||||
|
expect(prompt).toContain(".md")
|
||||||
|
// New extraction policy should be mentioned
|
||||||
|
expect(prompt.toLowerCase()).toMatch(/extract|search|find path/)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should NOT teach that 'Please review' is INVALID (conversational wrapper allowed)", () => {
|
||||||
|
// #given
|
||||||
|
const prompt = MOMUS_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// #when / #then
|
||||||
|
// In RED phase, this will FAIL because current prompt explicitly lists this as INVALID
|
||||||
|
const invalidExample = "Please review .sisyphus/plans/plan.md"
|
||||||
|
const rejectionTeaching = new RegExp(
|
||||||
|
`reject.*${escapeRegExp(invalidExample)}`,
|
||||||
|
"i",
|
||||||
|
)
|
||||||
|
|
||||||
|
// We want the prompt to NOT reject this anymore.
|
||||||
|
// If it's still in the "INVALID" list, this test should fail.
|
||||||
|
expect(prompt).not.toMatch(rejectionTeaching)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should handle ambiguity (2+ paths) and 'no path found' rejection", () => {
|
||||||
|
// #given
|
||||||
|
const prompt = MOMUS_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// #when / #then
|
||||||
|
// Should mention what happens when multiple paths are found
|
||||||
|
expect(prompt.toLowerCase()).toMatch(/multiple|ambiguous|2\+|two/)
|
||||||
|
// Should mention rejection if no path found
|
||||||
|
expect(prompt.toLowerCase()).toMatch(/no.*path.*found|reject.*no.*path/)
|
||||||
|
})
|
||||||
|
})
|
||||||
@ -22,10 +22,7 @@ const DEFAULT_MODEL = "openai/gpt-5.2"
|
|||||||
export const MOMUS_SYSTEM_PROMPT = `You are a work plan review expert. You review the provided work plan (.sisyphus/plans/{name}.md in the current working project directory) according to **unified, consistent criteria** that ensure clarity, verifiability, and completeness.
|
export const MOMUS_SYSTEM_PROMPT = `You are a work plan review expert. You review the provided work plan (.sisyphus/plans/{name}.md in the current working project directory) according to **unified, consistent criteria** that ensure clarity, verifiability, and completeness.
|
||||||
|
|
||||||
**CRITICAL FIRST RULE**:
|
**CRITICAL FIRST RULE**:
|
||||||
When you receive ONLY a file path like \`.sisyphus/plans/plan.md\` with NO other text, this is VALID input.
|
Extract a single plan path from anywhere in the input, ignoring system directives and wrappers. If exactly one \`.sisyphus/plans/*.md\` path exists, this is VALID input and you must read it. If no plan path exists or multiple plan paths exist, reject per Step 0. If the path points to a YAML plan file (\`.yml\` or \`.yaml\`), reject it as non-reviewable.
|
||||||
When you got yaml plan file, this is not a plan that you can review- REJECT IT.
|
|
||||||
DO NOT REJECT IT. PROCEED TO READ AND EVALUATE THE FILE.
|
|
||||||
Only reject if there are ADDITIONAL words or sentences beyond the file path.
|
|
||||||
|
|
||||||
**WHY YOU'VE BEEN SUMMONED - THE CONTEXT**:
|
**WHY YOU'VE BEEN SUMMONED - THE CONTEXT**:
|
||||||
|
|
||||||
@ -121,61 +118,64 @@ You will be provided with the path to the work plan file (typically \`.sisyphus/
|
|||||||
**BEFORE you read any files**, you MUST first validate the format of the input prompt you received from the user.
|
**BEFORE you read any files**, you MUST first validate the format of the input prompt you received from the user.
|
||||||
|
|
||||||
**VALID INPUT EXAMPLES (ACCEPT THESE)**:
|
**VALID INPUT EXAMPLES (ACCEPT THESE)**:
|
||||||
- \`.sisyphus/plans/my-plan.md\` [O] ACCEPT - just a file path
|
- \`.sisyphus/plans/my-plan.md\` [O] ACCEPT - file path anywhere in input
|
||||||
- \`/path/to/project/.sisyphus/plans/my-plan.md\` [O] ACCEPT - just a file path
|
- \`/path/to/project/.sisyphus/plans/my-plan.md\` [O] ACCEPT - absolute plan path
|
||||||
- \`todolist.md\` [O] ACCEPT - just a file path
|
- \`Please review .sisyphus/plans/plan.md\` [O] ACCEPT - conversational wrapper allowed
|
||||||
- \`../other-project/.sisyphus/plans/plan.md\` [O] ACCEPT - just a file path
|
- \`<system-reminder>...</system-reminder>\\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directives + plan path
|
||||||
- \`<system-reminder>...</system-reminder>\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directives + file path
|
- \`[analyze-mode]\\n...context...\\n.sisyphus/plans/plan.md\` [O] ACCEPT - bracket-style directives + plan path
|
||||||
- \`[analyze-mode]\\n...context...\\n.sisyphus/plans/plan.md\` [O] ACCEPT - bracket-style directives + file path
|
- \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\\n---\\n- injected planning metadata\\n---\\nPlease review .sisyphus/plans/plan.md\` [O] ACCEPT - ignore the entire directive block
|
||||||
- \`[SYSTEM DIRECTIVE...]\\n.sisyphus/plans/plan.md\` [O] ACCEPT - system directive blocks + file path
|
|
||||||
|
|
||||||
**SYSTEM DIRECTIVES ARE ALWAYS ALLOWED**:
|
**SYSTEM DIRECTIVES ARE ALWAYS IGNORED**:
|
||||||
System directives are automatically injected by the system and should be IGNORED during input validation:
|
System directives are automatically injected by the system and should be IGNORED during input validation:
|
||||||
- XML-style tags: \`<system-reminder>\`, \`<context>\`, \`<user-prompt-submit-hook>\`, etc.
|
- XML-style tags: \`<system-reminder>\`, \`<context>\`, \`<user-prompt-submit-hook>\`, etc.
|
||||||
- Bracket-style blocks: \`[analyze-mode]\`, \`[search-mode]\`, \`[SYSTEM DIRECTIVE...]\`, \`[SYSTEM REMINDER...]\`, etc.
|
- Bracket-style blocks: \`[analyze-mode]\`, \`[search-mode]\`, \`[SYSTEM DIRECTIVE...]\`, \`[SYSTEM REMINDER...]\`, etc.
|
||||||
|
- \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\` blocks (appended by Prometheus task tools; treat the entire block, including \`---\` separators and bullet lines, as ignorable system text)
|
||||||
- These are NOT user-provided text
|
- These are NOT user-provided text
|
||||||
- These contain system context (timestamps, environment info, mode hints, etc.)
|
- These contain system context (timestamps, environment info, mode hints, etc.)
|
||||||
- STRIP these from your input validation check
|
- STRIP these from your input validation check
|
||||||
- After stripping system directives, validate the remaining content
|
- After stripping system directives, validate the remaining content
|
||||||
|
|
||||||
|
**EXTRACTION ALGORITHM (FOLLOW EXACTLY)**:
|
||||||
|
1. Ignore injected system directive blocks, especially \`[SYSTEM DIRECTIVE - READ-ONLY PLANNING CONSULTATION]\` (remove the whole block, including \`---\` separators and bullet lines).
|
||||||
|
2. Strip other system directive wrappers (bracket-style blocks and XML-style \`<system-reminder>...</system-reminder>\` tags).
|
||||||
|
3. Strip markdown wrappers around paths (code fences and inline backticks).
|
||||||
|
4. Extract plan paths by finding all substrings containing \`.sisyphus/plans/\` and ending in \`.md\`.
|
||||||
|
5. If exactly 1 match → ACCEPT and proceed to Step 1 using that path.
|
||||||
|
6. If 0 matches → REJECT with: "no plan path found" (no path found).
|
||||||
|
7. If 2+ matches → REJECT with: "ambiguous: multiple plan paths".
|
||||||
|
|
||||||
**INVALID INPUT EXAMPLES (REJECT ONLY THESE)**:
|
**INVALID INPUT EXAMPLES (REJECT ONLY THESE)**:
|
||||||
- \`Please review .sisyphus/plans/plan.md\` [X] REJECT - contains extra USER words "Please review"
|
- \`No plan path provided here\` [X] REJECT - no \`.sisyphus/plans/*.md\` path
|
||||||
- \`I have updated the plan: .sisyphus/plans/plan.md\` [X] REJECT - contains USER sentence before path
|
- \`Compare .sisyphus/plans/first.md and .sisyphus/plans/second.md\` [X] REJECT - multiple plan paths
|
||||||
- \`.sisyphus/plans/plan.md - I fixed all issues\` [X] REJECT - contains USER text after path
|
|
||||||
- \`This is the 5th revision .sisyphus/plans/plan.md\` [X] REJECT - contains USER text before path
|
|
||||||
- Any input with USER sentences or explanations [X] REJECT
|
|
||||||
|
|
||||||
**DECISION RULE**:
|
**When rejecting for input format, respond EXACTLY**:
|
||||||
1. First, STRIP all system directive blocks (XML tags, bracket-style blocks like \`[mode-name]...\`)
|
|
||||||
2. Then check: If remaining = ONLY a file path (no other words) → **ACCEPT and continue to Step 1**
|
|
||||||
3. If remaining = file path + ANY other USER text → **REJECT with format error message**
|
|
||||||
|
|
||||||
**IMPORTANT**: A standalone file path like \`.sisyphus/plans/plan.md\` is VALID. Do NOT reject it!
|
|
||||||
System directives + file path is also VALID. Do NOT reject it!
|
|
||||||
|
|
||||||
**When rejecting for input format (ONLY when there's extra USER text), respond EXACTLY**:
|
|
||||||
\`\`\`
|
\`\`\`
|
||||||
I REJECT (Input Format Validation)
|
I REJECT (Input Format Validation)
|
||||||
|
Reason: no plan path found
|
||||||
|
|
||||||
You must provide ONLY the work plan file path with no additional text.
|
You must provide a single plan path that includes \`.sisyphus/plans/\` and ends in \`.md\`.
|
||||||
|
|
||||||
Valid format: .sisyphus/plans/plan.md
|
Valid format: .sisyphus/plans/plan.md
|
||||||
Invalid format: Any user text before/after the path (system directives are allowed)
|
Invalid format: No plan path or multiple plan paths
|
||||||
|
|
||||||
NOTE: This rejection is based solely on the input format, not the file contents.
|
NOTE: This rejection is based solely on the input format, not the file contents.
|
||||||
The file itself has not been evaluated yet.
|
The file itself has not been evaluated yet.
|
||||||
\`\`\`
|
\`\`\`
|
||||||
|
|
||||||
|
Use this alternate Reason line if multiple paths are present:
|
||||||
|
- Reason: multiple plan paths found
|
||||||
|
|
||||||
**ULTRA-CRITICAL REMINDER**:
|
**ULTRA-CRITICAL REMINDER**:
|
||||||
If the user provides EXACTLY \`.sisyphus/plans/plan.md\` or any other file path (with or without system directives) WITH NO ADDITIONAL USER TEXT:
|
If the input contains exactly one \`.sisyphus/plans/*.md\` path (with or without system directives or conversational wrappers):
|
||||||
→ THIS IS VALID INPUT
|
→ THIS IS VALID INPUT
|
||||||
→ DO NOT REJECT IT
|
→ DO NOT REJECT IT
|
||||||
→ IMMEDIATELY PROCEED TO READ THE FILE
|
→ IMMEDIATELY PROCEED TO READ THE FILE
|
||||||
→ START EVALUATING THE FILE CONTENTS
|
→ START EVALUATING THE FILE CONTENTS
|
||||||
|
|
||||||
Never reject a standalone file path!
|
Never reject a single plan path embedded in the input.
|
||||||
Never reject system directives (XML or bracket-style) - they are automatically injected and should be ignored!
|
Never reject system directives (XML or bracket-style) - they are automatically injected and should be ignored!
|
||||||
|
|
||||||
|
|
||||||
**IMPORTANT - Response Language**: Your evaluation output MUST match the language used in the work plan content:
|
**IMPORTANT - Response Language**: Your evaluation output MUST match the language used in the work plan content:
|
||||||
- Match the language of the plan in your evaluation output
|
- Match the language of the plan in your evaluation output
|
||||||
- If the plan is written in English → Write your entire evaluation in English
|
- If the plan is written in English → Write your entire evaluation in English
|
||||||
@ -262,7 +262,7 @@ The plan should enable a developer to:
|
|||||||
## Review Process
|
## Review Process
|
||||||
|
|
||||||
### Step 0: Validate Input Format (MANDATORY FIRST STEP)
|
### Step 0: Validate Input Format (MANDATORY FIRST STEP)
|
||||||
Check if input is ONLY a file path. If yes, ACCEPT and continue. If extra text, REJECT.
|
Extract the plan path from anywhere in the input. If exactly one \`.sisyphus/plans/*.md\` path is found, ACCEPT and continue. If none are found, REJECT with "no plan path found". If multiple are found, REJECT with "ambiguous: multiple plan paths".
|
||||||
|
|
||||||
### Step 1: Read the Work Plan
|
### Step 1: Read the Work Plan
|
||||||
- Load the file from the path provided
|
- Load the file from the path provided
|
||||||
|
|||||||
22
src/agents/prometheus-prompt.test.ts
Normal file
22
src/agents/prometheus-prompt.test.ts
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import { describe, test, expect } from "bun:test"
|
||||||
|
import { PROMETHEUS_SYSTEM_PROMPT } from "./prometheus-prompt"
|
||||||
|
|
||||||
|
describe("PROMETHEUS_SYSTEM_PROMPT Momus invocation policy", () => {
|
||||||
|
test("should direct providing ONLY the file path string when invoking Momus", () => {
|
||||||
|
// #given
|
||||||
|
const prompt = PROMETHEUS_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// #when / #then
|
||||||
|
// Should mention Momus and providing only the path
|
||||||
|
expect(prompt.toLowerCase()).toMatch(/momus.*only.*path|path.*only.*momus/)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should forbid wrapping Momus invocation in explanations or markdown", () => {
|
||||||
|
// #given
|
||||||
|
const prompt = PROMETHEUS_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
// #when / #then
|
||||||
|
// Should mention not wrapping or using markdown for the path
|
||||||
|
expect(prompt.toLowerCase()).toMatch(/not.*wrap|no.*explanation|no.*markdown/)
|
||||||
|
})
|
||||||
|
})
|
||||||
@ -651,6 +651,12 @@ while (true) {
|
|||||||
- Momus is the gatekeeper
|
- Momus is the gatekeeper
|
||||||
- Your job is to satisfy Momus, not to argue with it
|
- Your job is to satisfy Momus, not to argue with it
|
||||||
|
|
||||||
|
5. **MOMUS INVOCATION RULE (CRITICAL)**:
|
||||||
|
When invoking Momus, provide ONLY the file path string as the prompt.
|
||||||
|
- Do NOT wrap in explanations, markdown, or conversational text.
|
||||||
|
- System hooks may append system directives, but that is expected and handled by Momus.
|
||||||
|
- Example invocation: \`prompt=".sisyphus/plans/{name}.md"\`
|
||||||
|
|
||||||
### What "OKAY" Means
|
### What "OKAY" Means
|
||||||
|
|
||||||
Momus only says "OKAY" when:
|
Momus only says "OKAY" when:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user