refactor(athena): remove dead code from phases 2, 3, 5 pipeline

Remove 9 files (913 lines) from the code-driven synthesis pipeline that
was superseded by the agent-driven approach in phases 6-8.

Phases 3/5 built: collectCouncilResults → formatForSynthesis →
buildSynthesisPrompt → formatFindingsForUser → buildDelegationPrompt.

Phases 6-8 replaced with: launch → background_output → Athena
synthesizes in conversation → switch_agent. The old pipeline was
never wired into runtime and all consumers were other dead code.

Also simplifies executeCouncil to return CouncilLaunchResult (task IDs
+ failures) instead of reading stale task status via collectCouncilResults.

Deleted: council-result-collector, synthesis-types, synthesis-prompt,
synthesis-formatter, findings-presenter, delegation-prompts (+ 4 tests).
Cleaned: CouncilMemberStatus, AgreementLevel, CouncilMemberResponse,
CouncilExecutionResult types from types.ts.
This commit is contained in:
ismeth 2026-02-13 17:05:23 +01:00 committed by YeonGyu-Kim
parent d8ba9b1f0c
commit 4d675bac89
14 changed files with 103 additions and 913 deletions

View File

@ -3,16 +3,6 @@ import { buildCouncilPrompt } from "./council-prompt"
import { executeCouncil } from "./council-orchestrator"
import type { CouncilConfig } from "./types"
type MockTaskStatus = "completed" | "error" | "cancelled" | "interrupt"
interface MockTask {
id: string
status: MockTaskStatus
result?: string
error?: string
completedAt?: Date
}
interface MockLaunchInput {
description: string
prompt: string
@ -25,20 +15,15 @@ interface MockLaunchInput {
permission?: Record<string, "ask" | "allow" | "deny">
}
function createMockTask(task: MockTask, launch: MockLaunchInput): MockTask & {
parentSessionID: string
parentMessageID: string
description: string
prompt: string
agent: string
} {
function createMockTask(id: string, launch: MockLaunchInput) {
return {
id,
status: "pending" as const,
parentSessionID: launch.parentSessionID,
parentMessageID: launch.parentMessageID,
description: launch.description,
prompt: launch.prompt,
agent: launch.agent,
...task,
}
}
@ -51,15 +36,7 @@ describe("executeCouncil", () => {
const launcher = {
launch: async (input: MockLaunchInput) => {
launches.push(input)
return createMockTask(
{
id: `task-${launches.length}`,
status: "completed",
result: `response-${launches.length}`,
completedAt: new Date(),
},
input
)
return createMockTask(`task-${launches.length}`, input)
},
}
@ -84,8 +61,9 @@ describe("executeCouncil", () => {
const expectedPrompt = buildCouncilPrompt(question)
expect(launches).toHaveLength(3)
expect(result.completedCount).toBe(3)
expect(result.failedCount).toBe(0)
expect(result.launched).toHaveLength(3)
expect(result.failures).toHaveLength(0)
expect(result.totalMembers).toBe(3)
for (const launch of launches) {
expect(launch.prompt).toBe(expectedPrompt)
@ -98,33 +76,16 @@ describe("executeCouncil", () => {
expect(launches[2]?.model).toEqual({ providerID: "google", modelID: "gemini-3-pro" })
})
//#given a council with 3 members where 1 member fails
//#given a council with 3 members where 1 launch throws
//#when executeCouncil is called
//#then partial failures are tolerated and preserved in responses
test("returns successful result for partial failures", async () => {
//#then launch failures are captured separately from successful launches
test("captures launch failures separately from successful launches", async () => {
const launcher = {
launch: async (input: MockLaunchInput) => {
if (input.model?.providerID === "anthropic") {
return createMockTask(
{
id: "task-failed",
status: "error",
error: "Token limit exceeded",
completedAt: new Date(),
},
input
)
throw new Error("Provider unavailable")
}
return createMockTask(
{
id: `task-${input.model?.providerID}`,
status: "completed",
result: `ok-${input.model?.providerID}`,
completedAt: new Date(),
},
input
)
return createMockTask(`task-${input.model?.providerID}`, input)
},
}
@ -142,28 +103,21 @@ describe("executeCouncil", () => {
parentMessageID: "message-1",
})
expect(result.completedCount).toBe(2)
expect(result.failedCount).toBe(1)
expect(result.responses).toHaveLength(3)
expect(result.responses.filter((response) => response.status === "completed")).toHaveLength(2)
expect(result.responses.filter((response) => response.status === "error")).toHaveLength(1)
expect(result.launched).toHaveLength(2)
expect(result.failures).toHaveLength(1)
expect(result.totalMembers).toBe(3)
expect(result.failures[0]?.member.model).toBe("anthropic/claude-sonnet-4-5")
expect(result.failures[0]?.error).toContain("Launch failed")
})
//#given a council where all members fail
//#given a council where all launches throw
//#when executeCouncil is called
//#then it returns structured error result with zero completions
test("returns all failures when every member fails", async () => {
//#then all members appear as failures with zero launched
test("returns all failures when every launch throws", async () => {
const launcher = {
launch: async (input: MockLaunchInput) =>
createMockTask(
{
id: `task-${input.model?.providerID}`,
status: "error",
error: "Model unavailable",
completedAt: new Date(),
},
input
),
launch: async () => {
throw new Error("Model unavailable")
},
}
const result = await executeCouncil({
@ -179,29 +133,21 @@ describe("executeCouncil", () => {
parentMessageID: "message-1",
})
expect(result.completedCount).toBe(0)
expect(result.failedCount).toBe(2)
expect(result.responses).toHaveLength(2)
expect(result.responses.every((response) => response.status === "error")).toBe(true)
expect(result.launched).toHaveLength(0)
expect(result.failures).toHaveLength(2)
expect(result.totalMembers).toBe(2)
expect(result.failures.every((f) => f.error.includes("Launch failed"))).toBe(true)
})
//#given a council with one invalid model string
//#when executeCouncil is called
//#then invalid member becomes an error response while others still execute
//#then invalid member becomes a failure while others still launch
test("handles invalid model strings without crashing council execution", async () => {
const launches: MockLaunchInput[] = []
const launcher = {
launch: async (input: MockLaunchInput) => {
launches.push(input)
return createMockTask(
{
id: `task-${launches.length}`,
status: "completed",
result: "valid-member-response",
completedAt: new Date(),
},
input
)
return createMockTask(`task-${launches.length}`, input)
},
}
@ -219,10 +165,9 @@ describe("executeCouncil", () => {
})
expect(launches).toHaveLength(1)
expect(result.completedCount).toBe(1)
expect(result.failedCount).toBe(1)
expect(result.responses).toHaveLength(2)
expect(result.responses.find((response) => response.member.model === "invalid-model")?.status).toBe("error")
expect(result.launched).toHaveLength(1)
expect(result.failures).toHaveLength(1)
expect(result.failures.find((f) => f.member.model === "invalid-model")?.error).toContain("Launch failed")
})
//#given members with per-member temperature and variant
@ -233,15 +178,7 @@ describe("executeCouncil", () => {
const launcher = {
launch: async (input: MockLaunchInput) => {
launches.push(input)
return createMockTask(
{
id: `task-${launches.length}`,
status: "completed",
result: "ok",
completedAt: new Date(),
},
input
)
return createMockTask(`task-${launches.length}`, input)
},
}
@ -264,4 +201,33 @@ describe("executeCouncil", () => {
expect(launches[1]?.temperature).toBe(0.3)
expect(launches[1]?.model?.variant).toBeUndefined()
})
//#given launched members
//#when executeCouncil returns
//#then each launched member has a taskId for background_output retrieval
test("returns task IDs for background_output retrieval", async () => {
const launcher = {
launch: async (input: MockLaunchInput) =>
createMockTask(`bg_${input.model?.providerID}`, input),
}
const result = await executeCouncil({
question: "Review error handling",
council: {
members: [
{ model: "openai/gpt-5.3-codex", name: "OpenAI" },
{ model: "google/gemini-3-pro", name: "Gemini" },
],
},
launcher,
parentSessionID: "session-1",
parentMessageID: "message-1",
})
expect(result.launched).toHaveLength(2)
expect(result.launched[0]?.taskId).toBe("bg_openai")
expect(result.launched[0]?.member.name).toBe("OpenAI")
expect(result.launched[1]?.taskId).toBe("bg_google")
expect(result.launched[1]?.member.name).toBe("Gemini")
})
})

View File

@ -1,9 +1,8 @@
import type { LaunchInput, BackgroundTask } from "../../features/background-agent/types"
import { createAgentToolRestrictions } from "../../shared/permission-compat"
import { buildCouncilPrompt } from "./council-prompt"
import { collectCouncilResults } from "./council-result-collector"
import { parseModelString } from "./model-parser"
import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"
import type { CouncilConfig, CouncilLaunchFailure, CouncilLaunchedMember, CouncilLaunchResult, CouncilMemberConfig } from "./types"
export type CouncilLaunchInput = LaunchInput
@ -20,57 +19,43 @@ export interface CouncilExecutionInput {
parentAgent?: string
}
export async function executeCouncil(input: CouncilExecutionInput): Promise<CouncilExecutionResult> {
/**
* Launches all council members in parallel and returns launch outcomes.
* Does NOT wait for task completion actual results are collected by the
* agent via background_output calls after this returns.
*/
export async function executeCouncil(input: CouncilExecutionInput): Promise<CouncilLaunchResult> {
const { question, council, launcher, parentSessionID, parentMessageID, parentAgent } = input
const prompt = buildCouncilPrompt(question)
const startTimes = new Map<string, number>()
const launchResults = await Promise.allSettled(
council.members.map((member) =>
launchMember(
member,
prompt,
launcher,
parentSessionID,
parentMessageID,
parentAgent,
startTimes
)
launchMember(member, prompt, launcher, parentSessionID, parentMessageID, parentAgent)
)
)
const launchedTasks: BackgroundTask[] = []
const launchedMembers: CouncilMemberConfig[] = []
const launchFailures: CouncilMemberResponse[] = []
const launched: CouncilLaunchedMember[] = []
const failures: CouncilLaunchFailure[] = []
launchResults.forEach((result, index) => {
const member = council.members[index]
if (result.status === "fulfilled") {
launchedTasks.push(result.value)
launchedMembers.push(member)
launched.push({ member, taskId: result.value.id })
return
}
launchFailures.push({
failures.push({
member,
status: "error",
error: `Launch failed: ${String(result.reason)}`,
taskId: "",
durationMs: 0,
})
})
const collected = collectCouncilResults(launchedTasks, launchedMembers, startTimes)
const responses = [...collected, ...launchFailures]
const completedCount = responses.filter((response) => response.status === "completed").length
return {
question,
responses,
launched,
failures,
totalMembers: council.members.length,
completedCount,
failedCount: council.members.length - completedCount,
}
}
@ -80,8 +65,7 @@ async function launchMember(
launcher: CouncilLauncher,
parentSessionID: string,
parentMessageID: string,
parentAgent: string | undefined,
startTimes: Map<string, number>
parentAgent: string | undefined
): Promise<BackgroundTask> {
const parsedModel = parseModelString(member.model)
if (!parsedModel) {
@ -90,7 +74,7 @@ async function launchMember(
const restrictions = createAgentToolRestrictions(["write", "edit", "task"])
const memberName = member.name ?? member.model
const task = await launcher.launch({
return launcher.launch({
description: `Council member: ${memberName}`,
prompt,
agent: "athena",
@ -105,7 +89,4 @@ async function launchMember(
...(member.temperature !== undefined ? { temperature: member.temperature } : {}),
permission: restrictions.permission,
})
startTimes.set(task.id, Date.now())
return task
}

View File

@ -1,36 +0,0 @@
import type { BackgroundTask, BackgroundTaskStatus } from "../../features/background-agent/types"
import type { CouncilMemberConfig, CouncilMemberResponse, CouncilMemberStatus } from "./types"
export function collectCouncilResults(
tasks: BackgroundTask[],
members: CouncilMemberConfig[],
startTimes: Map<string, number>
): CouncilMemberResponse[] {
return tasks.map((task, index) => {
const member = members[index]
const status = mapTaskStatus(task.status)
const startTime = startTimes.get(task.id) ?? Date.now()
const finishedAt = task.completedAt?.getTime() ?? Date.now()
return {
member,
status,
response: status === "completed" ? task.result : undefined,
error: status === "completed" ? undefined : (task.error ?? `Task status: ${task.status}`),
taskId: task.id,
durationMs: Math.max(0, finishedAt - startTime),
}
})
}
function mapTaskStatus(taskStatus: BackgroundTaskStatus): CouncilMemberStatus {
if (taskStatus === "completed") {
return "completed"
}
if (taskStatus === "cancelled" || taskStatus === "interrupt") {
return "timeout"
}
return "error"
}

View File

@ -1,125 +0,0 @@
import { describe, expect, test } from "bun:test"
import type { SynthesizedFinding } from "./synthesis-types"
import { buildAtlasDelegationPrompt, buildPrometheusDelegationPrompt } from "./delegation-prompts"
function createConfirmedFindings(): SynthesizedFinding[] {
return [
{
summary: "Guard missing council config in startup",
details: "Athena path can proceed with undefined council members in some flows.",
agreementLevel: "unanimous",
reportedBy: ["OpenAI", "Claude", "Gemini"],
assessment: {
agrees: true,
rationale: "Directly observed from startup and config fallback paths.",
},
isFalsePositiveRisk: false,
},
{
summary: "Potential retry thrash in background runner",
details: "Repeated failures can cascade retry windows under high load.",
agreementLevel: "minority",
reportedBy: ["Claude"],
assessment: {
agrees: true,
rationale: "Worth addressing to lower operational risk.",
},
isFalsePositiveRisk: false,
},
]
}
describe("buildAtlasDelegationPrompt", () => {
//#given confirmed findings and an original question
//#when the Atlas delegation prompt is built
//#then it includes both findings and the original question context
test("includes confirmed findings summaries and original question", () => {
const findings = createConfirmedFindings()
const question = "Which issues should we fix first in Athena integration?"
const prompt = buildAtlasDelegationPrompt(findings, question)
expect(prompt).toContain("Original question")
expect(prompt).toContain(question)
expect(prompt).toContain("Guard missing council config in startup")
expect(prompt).toContain("Potential retry thrash in background runner")
})
//#given confirmed findings
//#when Atlas prompt is generated
//#then it explicitly asks Atlas to fix those specific issues
test("instructs Atlas to implement direct fixes", () => {
const prompt = buildAtlasDelegationPrompt(createConfirmedFindings(), "Fix Athena reliability issues")
expect(prompt).toContain("Fix these confirmed issues directly")
expect(prompt).toContain("Implement code changes")
expect(prompt).toContain("prioritize by agreement level")
})
//#given a single confirmed finding
//#when Atlas prompt is generated
//#then prompt still renders correctly for edge case input
test("handles a single finding edge case", () => {
const [singleFinding] = createConfirmedFindings()
const prompt = buildAtlasDelegationPrompt([singleFinding], "Fix this one issue")
expect(prompt).toContain("1. Guard missing council config in startup")
expect(prompt).toContain("Agreement level: unanimous")
})
})
describe("buildPrometheusDelegationPrompt", () => {
//#given confirmed findings and an original question
//#when the Prometheus delegation prompt is built
//#then it includes both findings and the original question context
test("includes confirmed findings summaries and original question", () => {
const findings = createConfirmedFindings()
const question = "How should we sequence Athena integration hardening work?"
const prompt = buildPrometheusDelegationPrompt(findings, question)
expect(prompt).toContain("Original question")
expect(prompt).toContain(question)
expect(prompt).toContain("Guard missing council config in startup")
expect(prompt).toContain("Potential retry thrash in background runner")
})
//#given confirmed findings
//#when Prometheus prompt is generated
//#then it explicitly asks for phased planning and prioritization
test("instructs Prometheus to create an execution plan", () => {
const prompt = buildPrometheusDelegationPrompt(createConfirmedFindings(), "Plan Athena stabilization")
expect(prompt).toContain("Create an execution plan")
expect(prompt).toContain("phased implementation plan")
expect(prompt).toContain("prioritize by agreement level and impact")
})
//#given a single confirmed finding
//#when Prometheus prompt is generated
//#then prompt still renders correctly for edge case input
test("handles a single finding edge case", () => {
const [singleFinding] = createConfirmedFindings()
const prompt = buildPrometheusDelegationPrompt([singleFinding], "Plan this one issue")
expect(prompt).toContain("1. Guard missing council config in startup")
expect(prompt).toContain("Agreement level: unanimous")
})
//#given findings at multiple agreement levels
//#when either delegation prompt is generated
//#then each finding includes agreement level context
test("includes agreement level context for each finding in both prompts", () => {
const findings = createConfirmedFindings()
const atlasPrompt = buildAtlasDelegationPrompt(findings, "Atlas context")
const prometheusPrompt = buildPrometheusDelegationPrompt(findings, "Prometheus context")
expect(atlasPrompt).toContain("Agreement level: unanimous")
expect(atlasPrompt).toContain("Agreement level: minority")
expect(prometheusPrompt).toContain("Agreement level: unanimous")
expect(prometheusPrompt).toContain("Agreement level: minority")
})
})

View File

@ -1,55 +0,0 @@
import type { SynthesizedFinding } from "./synthesis-types"
function formatFindingBlock(finding: SynthesizedFinding, index: number): string {
const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees"
return [
`${index + 1}. ${finding.summary}`,
` Details: ${finding.details}`,
` Agreement level: ${finding.agreementLevel}`,
` Athena assessment: ${assessment}`,
` Rationale: ${finding.assessment.rationale}`,
].join("\n")
}
function formatConfirmedFindings(confirmedFindings: SynthesizedFinding[]): string {
return confirmedFindings.map((finding, index) => formatFindingBlock(finding, index)).join("\n\n")
}
export function buildAtlasDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string {
return [
"# Atlas Delegation Brief",
"Original question:",
question,
"",
"Task:",
"Fix these confirmed issues directly.",
"",
"Confirmed findings:",
formatConfirmedFindings(confirmedFindings),
"",
"Execution instructions:",
"- Implement code changes to resolve each confirmed issue.",
"- prioritize by agreement level, addressing unanimous findings first.",
"- Validate fixes with relevant tests and type safety checks.",
].join("\n")
}
export function buildPrometheusDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string {
return [
"# Prometheus Delegation Brief",
"Original question:",
question,
"",
"Task:",
"Create an execution plan for these confirmed issues.",
"",
"Confirmed findings:",
formatConfirmedFindings(confirmedFindings),
"",
"Planning instructions:",
"- Produce a phased implementation plan with clear task boundaries.",
"- prioritize by agreement level and impact.",
"- Include verification checkpoints for each phase.",
].join("\n")
}

View File

@ -1,167 +0,0 @@
import { describe, expect, test } from "bun:test"
import type { SynthesisResult } from "./synthesis-types"
import { formatFindingsForUser } from "./findings-presenter"
function createSynthesisResult(overrides?: Partial<SynthesisResult>): SynthesisResult {
return {
question: "Review the Athena council outputs for actionable risks",
findings: [
{
summary: "Validate configuration before execution",
details: "Missing guard clauses can allow invalid member configs.",
agreementLevel: "majority",
reportedBy: ["OpenAI", "Claude"],
assessment: {
agrees: true,
rationale: "This aligns with repeated failures observed in setup paths.",
},
isFalsePositiveRisk: false,
},
{
summary: "Retry strategy lacks upper bounds",
details: "Unbounded retries may cause runaway background tasks.",
agreementLevel: "solo",
reportedBy: ["Gemini"],
assessment: {
agrees: false,
rationale: "Current retry count is already constrained in most flows.",
},
isFalsePositiveRisk: true,
},
{
summary: "Preserve partial successes",
details: "Do not fail entire council run when one member errors.",
agreementLevel: "unanimous",
reportedBy: ["OpenAI", "Claude", "Gemini"],
assessment: {
agrees: true,
rationale: "This is required for resilient multi-model orchestration.",
},
isFalsePositiveRisk: false,
},
{
summary: "Reduce prompt token duplication",
details: "Duplicate context blocks increase cost without improving quality.",
agreementLevel: "minority",
reportedBy: ["Claude"],
assessment: {
agrees: true,
rationale: "Consolidation should lower cost while preserving intent.",
},
isFalsePositiveRisk: false,
},
],
memberProvenance: [],
totalFindings: 4,
consensusCount: 2,
outlierCount: 1,
...overrides,
}
}
describe("formatFindingsForUser", () => {
//#given findings across all agreement levels
//#when formatFindingsForUser is called
//#then groups appear in deterministic order: unanimous, majority, minority, solo
test("groups findings by agreement level in required order", () => {
const result = createSynthesisResult()
const output = formatFindingsForUser(result)
const unanimousIndex = output.indexOf("## Unanimous Findings")
const majorityIndex = output.indexOf("## Majority Findings")
const minorityIndex = output.indexOf("## Minority Findings")
const soloIndex = output.indexOf("## Solo Findings")
expect(unanimousIndex).toBeGreaterThan(-1)
expect(majorityIndex).toBeGreaterThan(unanimousIndex)
expect(minorityIndex).toBeGreaterThan(majorityIndex)
expect(soloIndex).toBeGreaterThan(minorityIndex)
})
//#given a finding with assessment details
//#when formatting is generated
//#then each finding includes summary, details, reported-by, and Athena rationale
test("renders finding body and Athena assessment rationale", () => {
const result = createSynthesisResult()
const output = formatFindingsForUser(result)
expect(output).toContain("Validate configuration before execution")
expect(output).toContain("Missing guard clauses can allow invalid member configs.")
expect(output).toContain("Reported by: OpenAI, Claude")
expect(output).toContain("Athena assessment: Agrees")
expect(output).toContain("Rationale: This aligns with repeated failures observed in setup paths.")
})
//#given a solo finding flagged as false-positive risk
//#when formatting is generated
//#then a visible warning marker is included
test("shows false-positive warning for risky solo findings", () => {
const result = createSynthesisResult()
const output = formatFindingsForUser(result)
expect(output).toContain("[False Positive Risk]")
expect(output).toContain("Retry strategy lacks upper bounds")
})
//#given no findings
//#when formatFindingsForUser is called
//#then output includes a graceful no-findings message
test("handles empty findings with a no-findings message", () => {
const result = createSynthesisResult({ findings: [], totalFindings: 0, consensusCount: 0, outlierCount: 0 })
const output = formatFindingsForUser(result)
expect(output).toContain("No synthesized findings are available")
})
//#given multiple majority findings with different reporter counts
//#when formatting is generated
//#then group header shows the agreement level label without a misleading single count
test("shows agreement level label in group header without single-finding count", () => {
const result = createSynthesisResult({
findings: [
{
summary: "Finding A",
details: "Reported by 3 members",
agreementLevel: "majority",
reportedBy: ["OpenAI", "Claude", "Gemini"],
assessment: { agrees: true, rationale: "Valid" },
isFalsePositiveRisk: false,
},
{
summary: "Finding B",
details: "Reported by 2 members",
agreementLevel: "majority",
reportedBy: ["OpenAI", "Claude"],
assessment: { agrees: true, rationale: "Also valid" },
isFalsePositiveRisk: false,
},
],
})
const output = formatFindingsForUser(result)
// The header should show the level label without a misleading single-finding count
// It should NOT use the first finding's count as the group header
expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
expect(output).toContain("## Majority Findings")
// Each individual finding still shows its own agreement context
expect(output).toContain("Agreement context: 3 members report this (majority)")
expect(output).toContain("Agreement context: 2 members report this (majority)")
})
//#given a non-empty findings result
//#when formatting is generated
//#then output ends with an action recommendation section
test("includes a final action recommendation section", () => {
const result = createSynthesisResult()
const output = formatFindingsForUser(result)
expect(output.trimEnd()).toMatch(/## Action Recommendation[\s\S]*$/)
expect(output).toContain("Prioritize unanimous and majority findings")
})
})

View File

@ -1,81 +0,0 @@
import type { SynthesisResult, SynthesizedFinding } from "./synthesis-types"
import type { AgreementLevel } from "./types"
const AGREEMENT_ORDER: AgreementLevel[] = ["unanimous", "majority", "minority", "solo"]
function toTitle(level: AgreementLevel): string {
return `${level.charAt(0).toUpperCase()}${level.slice(1)}`
}
function formatAgreementLine(level: AgreementLevel, finding: SynthesizedFinding): string {
const memberCount = finding.reportedBy.length
switch (level) {
case "unanimous":
return `${memberCount}/${memberCount} members agree`
case "majority":
return `${memberCount} members report this (majority)`
case "minority":
return `${memberCount} members report this (minority)`
case "solo":
return `${memberCount} member reported this`
}
}
function formatFinding(level: AgreementLevel, finding: SynthesizedFinding): string {
const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees"
const warning = level === "solo" && finding.isFalsePositiveRisk ? " [False Positive Risk]" : ""
return [
`### ${finding.summary}${warning}`,
`Details: ${finding.details}`,
`Reported by: ${finding.reportedBy.join(", ")}`,
`Agreement context: ${formatAgreementLine(level, finding)}`,
`Athena assessment: ${assessment}`,
`Rationale: ${finding.assessment.rationale}`,
].join("\n")
}
function formatActionRecommendation(result: SynthesisResult, groupedFindings: Map<AgreementLevel, SynthesizedFinding[]>): string {
const counts = AGREEMENT_ORDER.map((level) => `${toTitle(level)}: ${groupedFindings.get(level)?.length ?? 0}`).join(" | ")
return [
"## Action Recommendation",
`Findings by agreement level: ${counts}`,
"Prioritize unanimous and majority findings for immediate execution,",
"then review minority findings, and manually validate solo findings before delegating changes.",
`Question context: ${result.question}`,
].join("\n")
}
export function formatFindingsForUser(result: SynthesisResult): string {
if (result.findings.length === 0) {
return [
"# Synthesized Findings",
"No synthesized findings are available.",
"## Action Recommendation",
"Gather additional council responses or re-run synthesis before delegation.",
`Question context: ${result.question}`,
].join("\n\n")
}
const groupedFindings = new Map<AgreementLevel, SynthesizedFinding[]>(
AGREEMENT_ORDER.map((level) => [
level,
result.findings.filter((finding) => finding.agreementLevel === level),
]),
)
const sections = AGREEMENT_ORDER.flatMap((level) => {
const findings = groupedFindings.get(level) ?? []
if (findings.length === 0) {
return []
}
const header = `## ${toTitle(level)} Findings (${findings.length})`
const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
return [`${header}\n\n${entries}`]
})
return ["# Synthesized Findings", ...sections, formatActionRecommendation(result, groupedFindings)].join("\n\n")
}

View File

@ -3,10 +3,4 @@ export * from "./agent"
export * from "./model-parser"
export * from "./council-prompt"
export * from "./council-orchestrator"
export * from "./council-result-collector"
export * from "./synthesis-types"
export * from "./synthesis-prompt"
export * from "./synthesis-formatter"
export * from "./findings-presenter"
export * from "./delegation-prompts"
export * from "../../config/schema/athena"

View File

@ -1,157 +0,0 @@
import { describe, expect, test } from "bun:test"
import { formatCouncilResultsForSynthesis } from "./synthesis-formatter"
import type { CouncilExecutionResult } from "./types"
function createResult(overrides?: Partial<CouncilExecutionResult>): CouncilExecutionResult {
const responses: CouncilExecutionResult["responses"] = [
{
member: { model: "openai/gpt-5.3-codex", name: "OpenAI" },
status: "completed",
response: "Finding A from OpenAI",
taskId: "task-1",
durationMs: 120,
},
{
member: { model: "anthropic/claude-sonnet-4-5", name: "Claude" },
status: "completed",
response: "Finding B from Claude",
taskId: "task-2",
durationMs: 240,
},
{
member: { model: "google/gemini-3-pro", name: "Gemini" },
status: "completed",
response: "Finding C from Gemini",
taskId: "task-3",
durationMs: 360,
},
]
return {
question: "What reliability risks exist?",
responses,
totalMembers: 3,
completedCount: 3,
failedCount: 0,
...overrides,
}
}
describe("formatCouncilResultsForSynthesis", () => {
//#given a CouncilExecutionResult with 3 completed members
//#when formatCouncilResultsForSynthesis is called
//#then output contains each member's model name as a header
//#then output contains each member's raw response text
//#then output contains member status and duration
test("formats all completed members with provenance and response text", () => {
const result = createResult()
const output = formatCouncilResultsForSynthesis(result)
expect(output).toContain("openai/gpt-5.3-codex")
expect(output).toContain("anthropic/claude-sonnet-4-5")
expect(output).toContain("google/gemini-3-pro")
expect(output).toContain("Finding A from OpenAI")
expect(output).toContain("Finding B from Claude")
expect(output).toContain("Finding C from Gemini")
expect(output).toContain("Status: completed")
expect(output).toContain("Duration: 120ms")
expect(output).toContain("Duration: 240ms")
expect(output).toContain("Duration: 360ms")
})
//#given a CouncilExecutionResult with 1 completed and 1 failed member
//#when formatCouncilResultsForSynthesis is called
//#then completed member's response is included
//#then failed member shows error status and error message
//#then failed member does NOT have a response section
test("includes completed response and failed error without response section", () => {
const result = createResult({
responses: [
{
member: { model: "openai/gpt-5.3-codex" },
status: "completed",
response: "Primary finding",
taskId: "task-1",
durationMs: 80,
},
{
member: { model: "xai/grok-code-fast-1" },
status: "error",
error: "Timeout from provider",
taskId: "task-2",
durationMs: 500,
},
],
totalMembers: 2,
completedCount: 1,
failedCount: 1,
})
const output = formatCouncilResultsForSynthesis(result)
expect(output).toContain("Primary finding")
expect(output).toContain("xai/grok-code-fast-1")
expect(output).toContain("Status: error")
expect(output).toContain("Error: Timeout from provider")
expect(output).not.toContain("Response:\nTimeout from provider")
})
//#given a CouncilExecutionResult with 0 completed members
//#when formatCouncilResultsForSynthesis is called
//#then output contains a "no successful responses" message
test("shows no successful responses message when all members fail", () => {
const result = createResult({
responses: [
{
member: { model: "openai/gpt-5.3-codex" },
status: "error",
error: "No output",
taskId: "task-1",
durationMs: 200,
},
],
totalMembers: 1,
completedCount: 0,
failedCount: 1,
})
const output = formatCouncilResultsForSynthesis(result)
expect(output).toContain("No successful responses")
})
//#given members with custom names
//#when formatCouncilResultsForSynthesis is called
//#then output uses member.name if provided, falls back to member.model
test("prefers custom member name and falls back to model", () => {
const result = createResult({
responses: [
{
member: { model: "openai/gpt-5.3-codex", name: "Council Alpha" },
status: "completed",
response: "Custom member response",
taskId: "task-1",
durationMs: 10,
},
{
member: { model: "google/gemini-3-pro" },
status: "completed",
response: "Default member response",
taskId: "task-2",
durationMs: 11,
},
],
totalMembers: 2,
completedCount: 2,
failedCount: 0,
})
const output = formatCouncilResultsForSynthesis(result)
expect(output).toContain("Council Alpha")
expect(output).toContain("google/gemini-3-pro")
})
})

View File

@ -1,48 +0,0 @@
import type { CouncilExecutionResult } from "./types"
export function formatCouncilResultsForSynthesis(result: CouncilExecutionResult): string {
const completedResponses = result.responses.filter((response) => response.status === "completed")
if (completedResponses.length === 0) {
return [
"# Council Responses for Synthesis",
`Question: ${result.question}`,
"No successful responses from council members.",
"Review failed member details below for provenance.",
...result.responses.map((response) => {
const memberName = response.member.name ?? response.member.model
return [
`## Member: ${memberName} (${response.status})`,
`Model: ${response.member.model}`,
`Status: ${response.status}`,
`Duration: ${response.durationMs}ms`,
`Error: ${response.error ?? "No error message provided"}`,
].join("\n")
}),
].join("\n\n")
}
const sections = result.responses.map((response) => {
const memberName = response.member.name ?? response.member.model
const header = [
`## Member: ${memberName} (${response.status})`,
`Model: ${response.member.model}`,
`Status: ${response.status}`,
`Duration: ${response.durationMs}ms`,
]
if (response.status === "completed") {
const responseBody = response.response?.trim() ? response.response : "No response content provided"
return [...header, "Response:", responseBody].join("\n")
}
return [...header, `Error: ${response.error ?? "No error message provided"}`].join("\n")
})
return [
"# Council Responses for Synthesis",
`Question: ${result.question}`,
`Completed responses: ${result.completedCount}/${result.totalMembers}`,
...sections,
].join("\n\n")
}

View File

@ -1,44 +0,0 @@
export function buildSynthesisPrompt(formattedResponses: string, question: string, completedCount: number): string {
return `You are Athena, the synthesis lead for a multi-model council. Your job is to merge independent model outputs into a single, evidence-grounded synthesis.
## Original Question
${question}
## Council Responses
${formattedResponses}
## Your Responsibilities
1. Identify distinct findings across all completed member responses.
2. Group findings that refer to the same underlying issue (semantic similarity, not exact wording).
3. Classify agreementLevel for each finding using ${completedCount} completed member(s):
- unanimous: all completed members reported the finding
- majority: more than 50% of completed members reported the finding
- minority: 2 or more members reported it, but not a majority
- solo: only 1 member reported it
4. Add AthenaAssessment for each finding:
- agrees: whether you agree with the finding
- rationale: concise reason for agreement or disagreement
5. Set isFalsePositiveRisk:
- true for solo findings (likely false positives unless strongly supported)
- false for findings reported by multiple members
## Output Contract
Return JSON only with this shape:
{
"findings": [
{
"summary": "string",
"details": "string",
"agreementLevel": "unanimous | majority | minority | solo",
"reportedBy": ["model/name"],
"assessment": {
"agrees": true,
"rationale": "string"
},
"isFalsePositiveRisk": false
}
]
}
The finding object must match the SynthesizedFinding type exactly. Keep findings concise, concrete, and tied to source responses.`
}

View File

@ -1,31 +0,0 @@
import type { AgreementLevel, CouncilMemberConfig, CouncilMemberStatus } from "./types"
export interface AthenaAssessment {
agrees: boolean
rationale: string
}
export interface SynthesizedFinding {
summary: string
details: string
agreementLevel: AgreementLevel
reportedBy: string[]
assessment: AthenaAssessment
isFalsePositiveRisk: boolean
}
export interface MemberProvenance {
member: CouncilMemberConfig
status: CouncilMemberStatus
rawResponse?: string
durationMs: number
}
export interface SynthesisResult {
question: string
findings: SynthesizedFinding[]
memberProvenance: MemberProvenance[]
totalFindings: number
consensusCount: number
outlierCount: number
}

View File

@ -14,23 +14,20 @@ export interface AthenaConfig {
council: CouncilConfig
}
export type CouncilMemberStatus = "completed" | "timeout" | "error"
export type AgreementLevel = "unanimous" | "majority" | "minority" | "solo"
export interface CouncilMemberResponse {
export interface CouncilLaunchFailure {
member: CouncilMemberConfig
status: CouncilMemberStatus
response?: string
error?: string
taskId: string
durationMs: number
error: string
}
export interface CouncilExecutionResult {
question: string
responses: CouncilMemberResponse[]
totalMembers: number
completedCount: number
failedCount: number
export interface CouncilLaunchedMember {
member: CouncilMemberConfig
taskId: string
}
/** Return type of executeCouncil — only tracks launch outcomes, not task completion */
export interface CouncilLaunchResult {
question: string
launched: CouncilLaunchedMember[]
failures: CouncilLaunchFailure[]
totalMembers: number
}

View File

@ -113,22 +113,18 @@ export function createAthenaCouncilTool(args: {
})
const launchResult: AthenaCouncilLaunchResult = {
launched: execution.responses.filter((response) => response.taskId.length > 0).length,
members: execution.responses
.filter((response) => response.taskId.length > 0)
.map((response) => ({
task_id: response.taskId,
name: response.member.name ?? response.member.model,
model: response.member.model,
status: "running",
})),
failed: execution.responses
.filter((response) => response.taskId.length === 0)
.map((response) => ({
name: response.member.name ?? response.member.model,
model: response.member.model,
error: response.error ?? "Launch failed",
})),
launched: execution.launched.length,
members: execution.launched.map((entry) => ({
task_id: entry.taskId,
name: entry.member.name ?? entry.member.model,
model: entry.member.model,
status: "running",
})),
failed: execution.failures.map((entry) => ({
name: entry.member.name ?? entry.member.model,
model: entry.member.model,
error: entry.error,
})),
}
markCouncilDone(toolContext.sessionID)