diff --git a/src/agents/athena/council-orchestrator.test.ts b/src/agents/athena/council-orchestrator.test.ts index db6f9a8f..4e32ce18 100644 --- a/src/agents/athena/council-orchestrator.test.ts +++ b/src/agents/athena/council-orchestrator.test.ts @@ -3,16 +3,6 @@ import { buildCouncilPrompt } from "./council-prompt" import { executeCouncil } from "./council-orchestrator" import type { CouncilConfig } from "./types" -type MockTaskStatus = "completed" | "error" | "cancelled" | "interrupt" - -interface MockTask { - id: string - status: MockTaskStatus - result?: string - error?: string - completedAt?: Date -} - interface MockLaunchInput { description: string prompt: string @@ -25,20 +15,15 @@ interface MockLaunchInput { permission?: Record } -function createMockTask(task: MockTask, launch: MockLaunchInput): MockTask & { - parentSessionID: string - parentMessageID: string - description: string - prompt: string - agent: string -} { +function createMockTask(id: string, launch: MockLaunchInput) { return { + id, + status: "pending" as const, parentSessionID: launch.parentSessionID, parentMessageID: launch.parentMessageID, description: launch.description, prompt: launch.prompt, agent: launch.agent, - ...task, } } @@ -51,15 +36,7 @@ describe("executeCouncil", () => { const launcher = { launch: async (input: MockLaunchInput) => { launches.push(input) - return createMockTask( - { - id: `task-${launches.length}`, - status: "completed", - result: `response-${launches.length}`, - completedAt: new Date(), - }, - input - ) + return createMockTask(`task-${launches.length}`, input) }, } @@ -84,8 +61,9 @@ describe("executeCouncil", () => { const expectedPrompt = buildCouncilPrompt(question) expect(launches).toHaveLength(3) - expect(result.completedCount).toBe(3) - expect(result.failedCount).toBe(0) + expect(result.launched).toHaveLength(3) + expect(result.failures).toHaveLength(0) + expect(result.totalMembers).toBe(3) for (const launch of launches) { expect(launch.prompt).toBe(expectedPrompt) @@ -98,33 +76,16 @@ describe("executeCouncil", () => { expect(launches[2]?.model).toEqual({ providerID: "google", modelID: "gemini-3-pro" }) }) - //#given a council with 3 members where 1 member fails + //#given a council with 3 members where 1 launch throws //#when executeCouncil is called - //#then partial failures are tolerated and preserved in responses - test("returns successful result for partial failures", async () => { + //#then launch failures are captured separately from successful launches + test("captures launch failures separately from successful launches", async () => { const launcher = { launch: async (input: MockLaunchInput) => { if (input.model?.providerID === "anthropic") { - return createMockTask( - { - id: "task-failed", - status: "error", - error: "Token limit exceeded", - completedAt: new Date(), - }, - input - ) + throw new Error("Provider unavailable") } - - return createMockTask( - { - id: `task-${input.model?.providerID}`, - status: "completed", - result: `ok-${input.model?.providerID}`, - completedAt: new Date(), - }, - input - ) + return createMockTask(`task-${input.model?.providerID}`, input) }, } @@ -142,28 +103,21 @@ describe("executeCouncil", () => { parentMessageID: "message-1", }) - expect(result.completedCount).toBe(2) - expect(result.failedCount).toBe(1) - expect(result.responses).toHaveLength(3) - expect(result.responses.filter((response) => response.status === "completed")).toHaveLength(2) - expect(result.responses.filter((response) => response.status === "error")).toHaveLength(1) + expect(result.launched).toHaveLength(2) + expect(result.failures).toHaveLength(1) + expect(result.totalMembers).toBe(3) + expect(result.failures[0]?.member.model).toBe("anthropic/claude-sonnet-4-5") + expect(result.failures[0]?.error).toContain("Launch failed") }) - //#given a council where all members fail + //#given a council where all launches throw //#when executeCouncil is called - //#then it returns structured error result with zero completions - test("returns all failures when every member fails", async () => { + //#then all members appear as failures with zero launched + test("returns all failures when every launch throws", async () => { const launcher = { - launch: async (input: MockLaunchInput) => - createMockTask( - { - id: `task-${input.model?.providerID}`, - status: "error", - error: "Model unavailable", - completedAt: new Date(), - }, - input - ), + launch: async () => { + throw new Error("Model unavailable") + }, } const result = await executeCouncil({ @@ -179,29 +133,21 @@ describe("executeCouncil", () => { parentMessageID: "message-1", }) - expect(result.completedCount).toBe(0) - expect(result.failedCount).toBe(2) - expect(result.responses).toHaveLength(2) - expect(result.responses.every((response) => response.status === "error")).toBe(true) + expect(result.launched).toHaveLength(0) + expect(result.failures).toHaveLength(2) + expect(result.totalMembers).toBe(2) + expect(result.failures.every((f) => f.error.includes("Launch failed"))).toBe(true) }) //#given a council with one invalid model string //#when executeCouncil is called - //#then invalid member becomes an error response while others still execute + //#then invalid member becomes a failure while others still launch test("handles invalid model strings without crashing council execution", async () => { const launches: MockLaunchInput[] = [] const launcher = { launch: async (input: MockLaunchInput) => { launches.push(input) - return createMockTask( - { - id: `task-${launches.length}`, - status: "completed", - result: "valid-member-response", - completedAt: new Date(), - }, - input - ) + return createMockTask(`task-${launches.length}`, input) }, } @@ -219,10 +165,9 @@ describe("executeCouncil", () => { }) expect(launches).toHaveLength(1) - expect(result.completedCount).toBe(1) - expect(result.failedCount).toBe(1) - expect(result.responses).toHaveLength(2) - expect(result.responses.find((response) => response.member.model === "invalid-model")?.status).toBe("error") + expect(result.launched).toHaveLength(1) + expect(result.failures).toHaveLength(1) + expect(result.failures.find((f) => f.member.model === "invalid-model")?.error).toContain("Launch failed") }) //#given members with per-member temperature and variant @@ -233,15 +178,7 @@ describe("executeCouncil", () => { const launcher = { launch: async (input: MockLaunchInput) => { launches.push(input) - return createMockTask( - { - id: `task-${launches.length}`, - status: "completed", - result: "ok", - completedAt: new Date(), - }, - input - ) + return createMockTask(`task-${launches.length}`, input) }, } @@ -264,4 +201,33 @@ describe("executeCouncil", () => { expect(launches[1]?.temperature).toBe(0.3) expect(launches[1]?.model?.variant).toBeUndefined() }) + + //#given launched members + //#when executeCouncil returns + //#then each launched member has a taskId for background_output retrieval + test("returns task IDs for background_output retrieval", async () => { + const launcher = { + launch: async (input: MockLaunchInput) => + createMockTask(`bg_${input.model?.providerID}`, input), + } + + const result = await executeCouncil({ + question: "Review error handling", + council: { + members: [ + { model: "openai/gpt-5.3-codex", name: "OpenAI" }, + { model: "google/gemini-3-pro", name: "Gemini" }, + ], + }, + launcher, + parentSessionID: "session-1", + parentMessageID: "message-1", + }) + + expect(result.launched).toHaveLength(2) + expect(result.launched[0]?.taskId).toBe("bg_openai") + expect(result.launched[0]?.member.name).toBe("OpenAI") + expect(result.launched[1]?.taskId).toBe("bg_google") + expect(result.launched[1]?.member.name).toBe("Gemini") + }) }) diff --git a/src/agents/athena/council-orchestrator.ts b/src/agents/athena/council-orchestrator.ts index 231f62a2..31f81fa0 100644 --- a/src/agents/athena/council-orchestrator.ts +++ b/src/agents/athena/council-orchestrator.ts @@ -1,9 +1,8 @@ import type { LaunchInput, BackgroundTask } from "../../features/background-agent/types" import { createAgentToolRestrictions } from "../../shared/permission-compat" import { buildCouncilPrompt } from "./council-prompt" -import { collectCouncilResults } from "./council-result-collector" import { parseModelString } from "./model-parser" -import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types" +import type { CouncilConfig, CouncilLaunchFailure, CouncilLaunchedMember, CouncilLaunchResult, CouncilMemberConfig } from "./types" export type CouncilLaunchInput = LaunchInput @@ -20,57 +19,43 @@ export interface CouncilExecutionInput { parentAgent?: string } -export async function executeCouncil(input: CouncilExecutionInput): Promise { +/** + * Launches all council members in parallel and returns launch outcomes. + * Does NOT wait for task completion — actual results are collected by the + * agent via background_output calls after this returns. + */ +export async function executeCouncil(input: CouncilExecutionInput): Promise { const { question, council, launcher, parentSessionID, parentMessageID, parentAgent } = input const prompt = buildCouncilPrompt(question) - const startTimes = new Map() const launchResults = await Promise.allSettled( council.members.map((member) => - launchMember( - member, - prompt, - launcher, - parentSessionID, - parentMessageID, - parentAgent, - startTimes - ) + launchMember(member, prompt, launcher, parentSessionID, parentMessageID, parentAgent) ) ) - const launchedTasks: BackgroundTask[] = [] - const launchedMembers: CouncilMemberConfig[] = [] - const launchFailures: CouncilMemberResponse[] = [] + const launched: CouncilLaunchedMember[] = [] + const failures: CouncilLaunchFailure[] = [] launchResults.forEach((result, index) => { const member = council.members[index] if (result.status === "fulfilled") { - launchedTasks.push(result.value) - launchedMembers.push(member) + launched.push({ member, taskId: result.value.id }) return } - launchFailures.push({ + failures.push({ member, - status: "error", error: `Launch failed: ${String(result.reason)}`, - taskId: "", - durationMs: 0, }) }) - const collected = collectCouncilResults(launchedTasks, launchedMembers, startTimes) - const responses = [...collected, ...launchFailures] - const completedCount = responses.filter((response) => response.status === "completed").length - return { question, - responses, + launched, + failures, totalMembers: council.members.length, - completedCount, - failedCount: council.members.length - completedCount, } } @@ -80,8 +65,7 @@ async function launchMember( launcher: CouncilLauncher, parentSessionID: string, parentMessageID: string, - parentAgent: string | undefined, - startTimes: Map + parentAgent: string | undefined ): Promise { const parsedModel = parseModelString(member.model) if (!parsedModel) { @@ -90,7 +74,7 @@ async function launchMember( const restrictions = createAgentToolRestrictions(["write", "edit", "task"]) const memberName = member.name ?? member.model - const task = await launcher.launch({ + return launcher.launch({ description: `Council member: ${memberName}`, prompt, agent: "athena", @@ -105,7 +89,4 @@ async function launchMember( ...(member.temperature !== undefined ? { temperature: member.temperature } : {}), permission: restrictions.permission, }) - - startTimes.set(task.id, Date.now()) - return task } diff --git a/src/agents/athena/council-result-collector.ts b/src/agents/athena/council-result-collector.ts deleted file mode 100644 index 31faac9c..00000000 --- a/src/agents/athena/council-result-collector.ts +++ /dev/null @@ -1,36 +0,0 @@ -import type { BackgroundTask, BackgroundTaskStatus } from "../../features/background-agent/types" -import type { CouncilMemberConfig, CouncilMemberResponse, CouncilMemberStatus } from "./types" - -export function collectCouncilResults( - tasks: BackgroundTask[], - members: CouncilMemberConfig[], - startTimes: Map -): CouncilMemberResponse[] { - return tasks.map((task, index) => { - const member = members[index] - const status = mapTaskStatus(task.status) - const startTime = startTimes.get(task.id) ?? Date.now() - const finishedAt = task.completedAt?.getTime() ?? Date.now() - - return { - member, - status, - response: status === "completed" ? task.result : undefined, - error: status === "completed" ? undefined : (task.error ?? `Task status: ${task.status}`), - taskId: task.id, - durationMs: Math.max(0, finishedAt - startTime), - } - }) -} - -function mapTaskStatus(taskStatus: BackgroundTaskStatus): CouncilMemberStatus { - if (taskStatus === "completed") { - return "completed" - } - - if (taskStatus === "cancelled" || taskStatus === "interrupt") { - return "timeout" - } - - return "error" -} diff --git a/src/agents/athena/delegation-prompts.test.ts b/src/agents/athena/delegation-prompts.test.ts deleted file mode 100644 index 3d8e850d..00000000 --- a/src/agents/athena/delegation-prompts.test.ts +++ /dev/null @@ -1,125 +0,0 @@ -import { describe, expect, test } from "bun:test" -import type { SynthesizedFinding } from "./synthesis-types" -import { buildAtlasDelegationPrompt, buildPrometheusDelegationPrompt } from "./delegation-prompts" - -function createConfirmedFindings(): SynthesizedFinding[] { - return [ - { - summary: "Guard missing council config in startup", - details: "Athena path can proceed with undefined council members in some flows.", - agreementLevel: "unanimous", - reportedBy: ["OpenAI", "Claude", "Gemini"], - assessment: { - agrees: true, - rationale: "Directly observed from startup and config fallback paths.", - }, - isFalsePositiveRisk: false, - }, - { - summary: "Potential retry thrash in background runner", - details: "Repeated failures can cascade retry windows under high load.", - agreementLevel: "minority", - reportedBy: ["Claude"], - assessment: { - agrees: true, - rationale: "Worth addressing to lower operational risk.", - }, - isFalsePositiveRisk: false, - }, - ] -} - -describe("buildAtlasDelegationPrompt", () => { - //#given confirmed findings and an original question - //#when the Atlas delegation prompt is built - //#then it includes both findings and the original question context - test("includes confirmed findings summaries and original question", () => { - const findings = createConfirmedFindings() - const question = "Which issues should we fix first in Athena integration?" - - const prompt = buildAtlasDelegationPrompt(findings, question) - - expect(prompt).toContain("Original question") - expect(prompt).toContain(question) - expect(prompt).toContain("Guard missing council config in startup") - expect(prompt).toContain("Potential retry thrash in background runner") - }) - - //#given confirmed findings - //#when Atlas prompt is generated - //#then it explicitly asks Atlas to fix those specific issues - test("instructs Atlas to implement direct fixes", () => { - const prompt = buildAtlasDelegationPrompt(createConfirmedFindings(), "Fix Athena reliability issues") - - expect(prompt).toContain("Fix these confirmed issues directly") - expect(prompt).toContain("Implement code changes") - expect(prompt).toContain("prioritize by agreement level") - }) - - //#given a single confirmed finding - //#when Atlas prompt is generated - //#then prompt still renders correctly for edge case input - test("handles a single finding edge case", () => { - const [singleFinding] = createConfirmedFindings() - - const prompt = buildAtlasDelegationPrompt([singleFinding], "Fix this one issue") - - expect(prompt).toContain("1. Guard missing council config in startup") - expect(prompt).toContain("Agreement level: unanimous") - }) -}) - -describe("buildPrometheusDelegationPrompt", () => { - //#given confirmed findings and an original question - //#when the Prometheus delegation prompt is built - //#then it includes both findings and the original question context - test("includes confirmed findings summaries and original question", () => { - const findings = createConfirmedFindings() - const question = "How should we sequence Athena integration hardening work?" - - const prompt = buildPrometheusDelegationPrompt(findings, question) - - expect(prompt).toContain("Original question") - expect(prompt).toContain(question) - expect(prompt).toContain("Guard missing council config in startup") - expect(prompt).toContain("Potential retry thrash in background runner") - }) - - //#given confirmed findings - //#when Prometheus prompt is generated - //#then it explicitly asks for phased planning and prioritization - test("instructs Prometheus to create an execution plan", () => { - const prompt = buildPrometheusDelegationPrompt(createConfirmedFindings(), "Plan Athena stabilization") - - expect(prompt).toContain("Create an execution plan") - expect(prompt).toContain("phased implementation plan") - expect(prompt).toContain("prioritize by agreement level and impact") - }) - - //#given a single confirmed finding - //#when Prometheus prompt is generated - //#then prompt still renders correctly for edge case input - test("handles a single finding edge case", () => { - const [singleFinding] = createConfirmedFindings() - - const prompt = buildPrometheusDelegationPrompt([singleFinding], "Plan this one issue") - - expect(prompt).toContain("1. Guard missing council config in startup") - expect(prompt).toContain("Agreement level: unanimous") - }) - - //#given findings at multiple agreement levels - //#when either delegation prompt is generated - //#then each finding includes agreement level context - test("includes agreement level context for each finding in both prompts", () => { - const findings = createConfirmedFindings() - - const atlasPrompt = buildAtlasDelegationPrompt(findings, "Atlas context") - const prometheusPrompt = buildPrometheusDelegationPrompt(findings, "Prometheus context") - - expect(atlasPrompt).toContain("Agreement level: unanimous") - expect(atlasPrompt).toContain("Agreement level: minority") - expect(prometheusPrompt).toContain("Agreement level: unanimous") - expect(prometheusPrompt).toContain("Agreement level: minority") - }) -}) diff --git a/src/agents/athena/delegation-prompts.ts b/src/agents/athena/delegation-prompts.ts deleted file mode 100644 index 81cd302b..00000000 --- a/src/agents/athena/delegation-prompts.ts +++ /dev/null @@ -1,55 +0,0 @@ -import type { SynthesizedFinding } from "./synthesis-types" - -function formatFindingBlock(finding: SynthesizedFinding, index: number): string { - const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees" - - return [ - `${index + 1}. ${finding.summary}`, - ` Details: ${finding.details}`, - ` Agreement level: ${finding.agreementLevel}`, - ` Athena assessment: ${assessment}`, - ` Rationale: ${finding.assessment.rationale}`, - ].join("\n") -} - -function formatConfirmedFindings(confirmedFindings: SynthesizedFinding[]): string { - return confirmedFindings.map((finding, index) => formatFindingBlock(finding, index)).join("\n\n") -} - -export function buildAtlasDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string { - return [ - "# Atlas Delegation Brief", - "Original question:", - question, - "", - "Task:", - "Fix these confirmed issues directly.", - "", - "Confirmed findings:", - formatConfirmedFindings(confirmedFindings), - "", - "Execution instructions:", - "- Implement code changes to resolve each confirmed issue.", - "- prioritize by agreement level, addressing unanimous findings first.", - "- Validate fixes with relevant tests and type safety checks.", - ].join("\n") -} - -export function buildPrometheusDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string { - return [ - "# Prometheus Delegation Brief", - "Original question:", - question, - "", - "Task:", - "Create an execution plan for these confirmed issues.", - "", - "Confirmed findings:", - formatConfirmedFindings(confirmedFindings), - "", - "Planning instructions:", - "- Produce a phased implementation plan with clear task boundaries.", - "- prioritize by agreement level and impact.", - "- Include verification checkpoints for each phase.", - ].join("\n") -} diff --git a/src/agents/athena/findings-presenter.test.ts b/src/agents/athena/findings-presenter.test.ts deleted file mode 100644 index 7c45f253..00000000 --- a/src/agents/athena/findings-presenter.test.ts +++ /dev/null @@ -1,167 +0,0 @@ -import { describe, expect, test } from "bun:test" -import type { SynthesisResult } from "./synthesis-types" -import { formatFindingsForUser } from "./findings-presenter" - -function createSynthesisResult(overrides?: Partial): SynthesisResult { - return { - question: "Review the Athena council outputs for actionable risks", - findings: [ - { - summary: "Validate configuration before execution", - details: "Missing guard clauses can allow invalid member configs.", - agreementLevel: "majority", - reportedBy: ["OpenAI", "Claude"], - assessment: { - agrees: true, - rationale: "This aligns with repeated failures observed in setup paths.", - }, - isFalsePositiveRisk: false, - }, - { - summary: "Retry strategy lacks upper bounds", - details: "Unbounded retries may cause runaway background tasks.", - agreementLevel: "solo", - reportedBy: ["Gemini"], - assessment: { - agrees: false, - rationale: "Current retry count is already constrained in most flows.", - }, - isFalsePositiveRisk: true, - }, - { - summary: "Preserve partial successes", - details: "Do not fail entire council run when one member errors.", - agreementLevel: "unanimous", - reportedBy: ["OpenAI", "Claude", "Gemini"], - assessment: { - agrees: true, - rationale: "This is required for resilient multi-model orchestration.", - }, - isFalsePositiveRisk: false, - }, - { - summary: "Reduce prompt token duplication", - details: "Duplicate context blocks increase cost without improving quality.", - agreementLevel: "minority", - reportedBy: ["Claude"], - assessment: { - agrees: true, - rationale: "Consolidation should lower cost while preserving intent.", - }, - isFalsePositiveRisk: false, - }, - ], - memberProvenance: [], - totalFindings: 4, - consensusCount: 2, - outlierCount: 1, - ...overrides, - } -} - -describe("formatFindingsForUser", () => { - //#given findings across all agreement levels - //#when formatFindingsForUser is called - //#then groups appear in deterministic order: unanimous, majority, minority, solo - test("groups findings by agreement level in required order", () => { - const result = createSynthesisResult() - - const output = formatFindingsForUser(result) - - const unanimousIndex = output.indexOf("## Unanimous Findings") - const majorityIndex = output.indexOf("## Majority Findings") - const minorityIndex = output.indexOf("## Minority Findings") - const soloIndex = output.indexOf("## Solo Findings") - - expect(unanimousIndex).toBeGreaterThan(-1) - expect(majorityIndex).toBeGreaterThan(unanimousIndex) - expect(minorityIndex).toBeGreaterThan(majorityIndex) - expect(soloIndex).toBeGreaterThan(minorityIndex) - }) - - //#given a finding with assessment details - //#when formatting is generated - //#then each finding includes summary, details, reported-by, and Athena rationale - test("renders finding body and Athena assessment rationale", () => { - const result = createSynthesisResult() - - const output = formatFindingsForUser(result) - - expect(output).toContain("Validate configuration before execution") - expect(output).toContain("Missing guard clauses can allow invalid member configs.") - expect(output).toContain("Reported by: OpenAI, Claude") - expect(output).toContain("Athena assessment: Agrees") - expect(output).toContain("Rationale: This aligns with repeated failures observed in setup paths.") - }) - - //#given a solo finding flagged as false-positive risk - //#when formatting is generated - //#then a visible warning marker is included - test("shows false-positive warning for risky solo findings", () => { - const result = createSynthesisResult() - - const output = formatFindingsForUser(result) - - expect(output).toContain("[False Positive Risk]") - expect(output).toContain("Retry strategy lacks upper bounds") - }) - - //#given no findings - //#when formatFindingsForUser is called - //#then output includes a graceful no-findings message - test("handles empty findings with a no-findings message", () => { - const result = createSynthesisResult({ findings: [], totalFindings: 0, consensusCount: 0, outlierCount: 0 }) - - const output = formatFindingsForUser(result) - - expect(output).toContain("No synthesized findings are available") - }) - - //#given multiple majority findings with different reporter counts - //#when formatting is generated - //#then group header shows the agreement level label without a misleading single count - test("shows agreement level label in group header without single-finding count", () => { - const result = createSynthesisResult({ - findings: [ - { - summary: "Finding A", - details: "Reported by 3 members", - agreementLevel: "majority", - reportedBy: ["OpenAI", "Claude", "Gemini"], - assessment: { agrees: true, rationale: "Valid" }, - isFalsePositiveRisk: false, - }, - { - summary: "Finding B", - details: "Reported by 2 members", - agreementLevel: "majority", - reportedBy: ["OpenAI", "Claude"], - assessment: { agrees: true, rationale: "Also valid" }, - isFalsePositiveRisk: false, - }, - ], - }) - - const output = formatFindingsForUser(result) - - // The header should show the level label without a misleading single-finding count - // It should NOT use the first finding's count as the group header - expect(output).not.toContain("## Majority Findings (3 members report this (majority))") - expect(output).toContain("## Majority Findings") - // Each individual finding still shows its own agreement context - expect(output).toContain("Agreement context: 3 members report this (majority)") - expect(output).toContain("Agreement context: 2 members report this (majority)") - }) - - //#given a non-empty findings result - //#when formatting is generated - //#then output ends with an action recommendation section - test("includes a final action recommendation section", () => { - const result = createSynthesisResult() - - const output = formatFindingsForUser(result) - - expect(output.trimEnd()).toMatch(/## Action Recommendation[\s\S]*$/) - expect(output).toContain("Prioritize unanimous and majority findings") - }) -}) diff --git a/src/agents/athena/findings-presenter.ts b/src/agents/athena/findings-presenter.ts deleted file mode 100644 index b8470815..00000000 --- a/src/agents/athena/findings-presenter.ts +++ /dev/null @@ -1,81 +0,0 @@ -import type { SynthesisResult, SynthesizedFinding } from "./synthesis-types" -import type { AgreementLevel } from "./types" - -const AGREEMENT_ORDER: AgreementLevel[] = ["unanimous", "majority", "minority", "solo"] - -function toTitle(level: AgreementLevel): string { - return `${level.charAt(0).toUpperCase()}${level.slice(1)}` -} - -function formatAgreementLine(level: AgreementLevel, finding: SynthesizedFinding): string { - const memberCount = finding.reportedBy.length - - switch (level) { - case "unanimous": - return `${memberCount}/${memberCount} members agree` - case "majority": - return `${memberCount} members report this (majority)` - case "minority": - return `${memberCount} members report this (minority)` - case "solo": - return `${memberCount} member reported this` - } -} - -function formatFinding(level: AgreementLevel, finding: SynthesizedFinding): string { - const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees" - const warning = level === "solo" && finding.isFalsePositiveRisk ? " [False Positive Risk]" : "" - - return [ - `### ${finding.summary}${warning}`, - `Details: ${finding.details}`, - `Reported by: ${finding.reportedBy.join(", ")}`, - `Agreement context: ${formatAgreementLine(level, finding)}`, - `Athena assessment: ${assessment}`, - `Rationale: ${finding.assessment.rationale}`, - ].join("\n") -} - -function formatActionRecommendation(result: SynthesisResult, groupedFindings: Map): string { - const counts = AGREEMENT_ORDER.map((level) => `${toTitle(level)}: ${groupedFindings.get(level)?.length ?? 0}`).join(" | ") - - return [ - "## Action Recommendation", - `Findings by agreement level: ${counts}`, - "Prioritize unanimous and majority findings for immediate execution,", - "then review minority findings, and manually validate solo findings before delegating changes.", - `Question context: ${result.question}`, - ].join("\n") -} - -export function formatFindingsForUser(result: SynthesisResult): string { - if (result.findings.length === 0) { - return [ - "# Synthesized Findings", - "No synthesized findings are available.", - "## Action Recommendation", - "Gather additional council responses or re-run synthesis before delegation.", - `Question context: ${result.question}`, - ].join("\n\n") - } - - const groupedFindings = new Map( - AGREEMENT_ORDER.map((level) => [ - level, - result.findings.filter((finding) => finding.agreementLevel === level), - ]), - ) - - const sections = AGREEMENT_ORDER.flatMap((level) => { - const findings = groupedFindings.get(level) ?? [] - if (findings.length === 0) { - return [] - } - - const header = `## ${toTitle(level)} Findings (${findings.length})` - const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n") - return [`${header}\n\n${entries}`] - }) - - return ["# Synthesized Findings", ...sections, formatActionRecommendation(result, groupedFindings)].join("\n\n") -} diff --git a/src/agents/athena/index.ts b/src/agents/athena/index.ts index 694c0449..1d67f30d 100644 --- a/src/agents/athena/index.ts +++ b/src/agents/athena/index.ts @@ -3,10 +3,4 @@ export * from "./agent" export * from "./model-parser" export * from "./council-prompt" export * from "./council-orchestrator" -export * from "./council-result-collector" -export * from "./synthesis-types" -export * from "./synthesis-prompt" -export * from "./synthesis-formatter" -export * from "./findings-presenter" -export * from "./delegation-prompts" export * from "../../config/schema/athena" diff --git a/src/agents/athena/synthesis-formatter.test.ts b/src/agents/athena/synthesis-formatter.test.ts deleted file mode 100644 index c1a6e16d..00000000 --- a/src/agents/athena/synthesis-formatter.test.ts +++ /dev/null @@ -1,157 +0,0 @@ -import { describe, expect, test } from "bun:test" -import { formatCouncilResultsForSynthesis } from "./synthesis-formatter" -import type { CouncilExecutionResult } from "./types" - -function createResult(overrides?: Partial): CouncilExecutionResult { - const responses: CouncilExecutionResult["responses"] = [ - { - member: { model: "openai/gpt-5.3-codex", name: "OpenAI" }, - status: "completed", - response: "Finding A from OpenAI", - taskId: "task-1", - durationMs: 120, - }, - { - member: { model: "anthropic/claude-sonnet-4-5", name: "Claude" }, - status: "completed", - response: "Finding B from Claude", - taskId: "task-2", - durationMs: 240, - }, - { - member: { model: "google/gemini-3-pro", name: "Gemini" }, - status: "completed", - response: "Finding C from Gemini", - taskId: "task-3", - durationMs: 360, - }, - ] - - return { - question: "What reliability risks exist?", - responses, - totalMembers: 3, - completedCount: 3, - failedCount: 0, - ...overrides, - } -} - -describe("formatCouncilResultsForSynthesis", () => { - //#given a CouncilExecutionResult with 3 completed members - //#when formatCouncilResultsForSynthesis is called - //#then output contains each member's model name as a header - //#then output contains each member's raw response text - //#then output contains member status and duration - test("formats all completed members with provenance and response text", () => { - const result = createResult() - - const output = formatCouncilResultsForSynthesis(result) - - expect(output).toContain("openai/gpt-5.3-codex") - expect(output).toContain("anthropic/claude-sonnet-4-5") - expect(output).toContain("google/gemini-3-pro") - - expect(output).toContain("Finding A from OpenAI") - expect(output).toContain("Finding B from Claude") - expect(output).toContain("Finding C from Gemini") - - expect(output).toContain("Status: completed") - expect(output).toContain("Duration: 120ms") - expect(output).toContain("Duration: 240ms") - expect(output).toContain("Duration: 360ms") - }) - - //#given a CouncilExecutionResult with 1 completed and 1 failed member - //#when formatCouncilResultsForSynthesis is called - //#then completed member's response is included - //#then failed member shows error status and error message - //#then failed member does NOT have a response section - test("includes completed response and failed error without response section", () => { - const result = createResult({ - responses: [ - { - member: { model: "openai/gpt-5.3-codex" }, - status: "completed", - response: "Primary finding", - taskId: "task-1", - durationMs: 80, - }, - { - member: { model: "xai/grok-code-fast-1" }, - status: "error", - error: "Timeout from provider", - taskId: "task-2", - durationMs: 500, - }, - ], - totalMembers: 2, - completedCount: 1, - failedCount: 1, - }) - - const output = formatCouncilResultsForSynthesis(result) - - expect(output).toContain("Primary finding") - expect(output).toContain("xai/grok-code-fast-1") - expect(output).toContain("Status: error") - expect(output).toContain("Error: Timeout from provider") - expect(output).not.toContain("Response:\nTimeout from provider") - }) - - //#given a CouncilExecutionResult with 0 completed members - //#when formatCouncilResultsForSynthesis is called - //#then output contains a "no successful responses" message - test("shows no successful responses message when all members fail", () => { - const result = createResult({ - responses: [ - { - member: { model: "openai/gpt-5.3-codex" }, - status: "error", - error: "No output", - taskId: "task-1", - durationMs: 200, - }, - ], - totalMembers: 1, - completedCount: 0, - failedCount: 1, - }) - - const output = formatCouncilResultsForSynthesis(result) - - expect(output).toContain("No successful responses") - }) - - //#given members with custom names - //#when formatCouncilResultsForSynthesis is called - //#then output uses member.name if provided, falls back to member.model - test("prefers custom member name and falls back to model", () => { - const result = createResult({ - responses: [ - { - member: { model: "openai/gpt-5.3-codex", name: "Council Alpha" }, - status: "completed", - response: "Custom member response", - taskId: "task-1", - durationMs: 10, - }, - { - member: { model: "google/gemini-3-pro" }, - status: "completed", - response: "Default member response", - taskId: "task-2", - durationMs: 11, - }, - ], - totalMembers: 2, - completedCount: 2, - failedCount: 0, - }) - - const output = formatCouncilResultsForSynthesis(result) - - expect(output).toContain("Council Alpha") - expect(output).toContain("google/gemini-3-pro") - }) -}) diff --git a/src/agents/athena/synthesis-formatter.ts b/src/agents/athena/synthesis-formatter.ts deleted file mode 100644 index cff06e60..00000000 --- a/src/agents/athena/synthesis-formatter.ts +++ /dev/null @@ -1,48 +0,0 @@ -import type { CouncilExecutionResult } from "./types" - -export function formatCouncilResultsForSynthesis(result: CouncilExecutionResult): string { - const completedResponses = result.responses.filter((response) => response.status === "completed") - - if (completedResponses.length === 0) { - return [ - "# Council Responses for Synthesis", - `Question: ${result.question}`, - "No successful responses from council members.", - "Review failed member details below for provenance.", - ...result.responses.map((response) => { - const memberName = response.member.name ?? response.member.model - return [ - `## Member: ${memberName} (${response.status})`, - `Model: ${response.member.model}`, - `Status: ${response.status}`, - `Duration: ${response.durationMs}ms`, - `Error: ${response.error ?? "No error message provided"}`, - ].join("\n") - }), - ].join("\n\n") - } - - const sections = result.responses.map((response) => { - const memberName = response.member.name ?? response.member.model - const header = [ - `## Member: ${memberName} (${response.status})`, - `Model: ${response.member.model}`, - `Status: ${response.status}`, - `Duration: ${response.durationMs}ms`, - ] - - if (response.status === "completed") { - const responseBody = response.response?.trim() ? response.response : "No response content provided" - return [...header, "Response:", responseBody].join("\n") - } - - return [...header, `Error: ${response.error ?? "No error message provided"}`].join("\n") - }) - - return [ - "# Council Responses for Synthesis", - `Question: ${result.question}`, - `Completed responses: ${result.completedCount}/${result.totalMembers}`, - ...sections, - ].join("\n\n") -} diff --git a/src/agents/athena/synthesis-prompt.ts b/src/agents/athena/synthesis-prompt.ts deleted file mode 100644 index 0470955c..00000000 --- a/src/agents/athena/synthesis-prompt.ts +++ /dev/null @@ -1,44 +0,0 @@ -export function buildSynthesisPrompt(formattedResponses: string, question: string, completedCount: number): string { - return `You are Athena, the synthesis lead for a multi-model council. Your job is to merge independent model outputs into a single, evidence-grounded synthesis. - -## Original Question -${question} - -## Council Responses -${formattedResponses} - -## Your Responsibilities -1. Identify distinct findings across all completed member responses. -2. Group findings that refer to the same underlying issue (semantic similarity, not exact wording). -3. Classify agreementLevel for each finding using ${completedCount} completed member(s): - - unanimous: all completed members reported the finding - - majority: more than 50% of completed members reported the finding - - minority: 2 or more members reported it, but not a majority - - solo: only 1 member reported it -4. Add AthenaAssessment for each finding: - - agrees: whether you agree with the finding - - rationale: concise reason for agreement or disagreement -5. Set isFalsePositiveRisk: - - true for solo findings (likely false positives unless strongly supported) - - false for findings reported by multiple members - -## Output Contract -Return JSON only with this shape: -{ - "findings": [ - { - "summary": "string", - "details": "string", - "agreementLevel": "unanimous | majority | minority | solo", - "reportedBy": ["model/name"], - "assessment": { - "agrees": true, - "rationale": "string" - }, - "isFalsePositiveRisk": false - } - ] -} - -The finding object must match the SynthesizedFinding type exactly. Keep findings concise, concrete, and tied to source responses.` -} diff --git a/src/agents/athena/synthesis-types.ts b/src/agents/athena/synthesis-types.ts deleted file mode 100644 index 4efb8235..00000000 --- a/src/agents/athena/synthesis-types.ts +++ /dev/null @@ -1,31 +0,0 @@ -import type { AgreementLevel, CouncilMemberConfig, CouncilMemberStatus } from "./types" - -export interface AthenaAssessment { - agrees: boolean - rationale: string -} - -export interface SynthesizedFinding { - summary: string - details: string - agreementLevel: AgreementLevel - reportedBy: string[] - assessment: AthenaAssessment - isFalsePositiveRisk: boolean -} - -export interface MemberProvenance { - member: CouncilMemberConfig - status: CouncilMemberStatus - rawResponse?: string - durationMs: number -} - -export interface SynthesisResult { - question: string - findings: SynthesizedFinding[] - memberProvenance: MemberProvenance[] - totalFindings: number - consensusCount: number - outlierCount: number -} diff --git a/src/agents/athena/types.ts b/src/agents/athena/types.ts index a79e2f67..cb881b9b 100644 --- a/src/agents/athena/types.ts +++ b/src/agents/athena/types.ts @@ -14,23 +14,20 @@ export interface AthenaConfig { council: CouncilConfig } -export type CouncilMemberStatus = "completed" | "timeout" | "error" - -export type AgreementLevel = "unanimous" | "majority" | "minority" | "solo" - -export interface CouncilMemberResponse { +export interface CouncilLaunchFailure { member: CouncilMemberConfig - status: CouncilMemberStatus - response?: string - error?: string - taskId: string - durationMs: number + error: string } -export interface CouncilExecutionResult { - question: string - responses: CouncilMemberResponse[] - totalMembers: number - completedCount: number - failedCount: number +export interface CouncilLaunchedMember { + member: CouncilMemberConfig + taskId: string +} + +/** Return type of executeCouncil — only tracks launch outcomes, not task completion */ +export interface CouncilLaunchResult { + question: string + launched: CouncilLaunchedMember[] + failures: CouncilLaunchFailure[] + totalMembers: number } diff --git a/src/tools/athena-council/tools.ts b/src/tools/athena-council/tools.ts index 11c75eda..b4a6c218 100644 --- a/src/tools/athena-council/tools.ts +++ b/src/tools/athena-council/tools.ts @@ -113,22 +113,18 @@ export function createAthenaCouncilTool(args: { }) const launchResult: AthenaCouncilLaunchResult = { - launched: execution.responses.filter((response) => response.taskId.length > 0).length, - members: execution.responses - .filter((response) => response.taskId.length > 0) - .map((response) => ({ - task_id: response.taskId, - name: response.member.name ?? response.member.model, - model: response.member.model, - status: "running", - })), - failed: execution.responses - .filter((response) => response.taskId.length === 0) - .map((response) => ({ - name: response.member.name ?? response.member.model, - model: response.member.model, - error: response.error ?? "Launch failed", - })), + launched: execution.launched.length, + members: execution.launched.map((entry) => ({ + task_id: entry.taskId, + name: entry.member.name ?? entry.member.model, + model: entry.member.model, + status: "running", + })), + failed: execution.failures.map((entry) => ({ + name: entry.member.name ?? entry.member.model, + model: entry.member.model, + error: entry.error, + })), } markCouncilDone(toolContext.sessionID)