refactor(athena): remove dead code from phases 2, 3, 5 pipeline

Remove 9 files (913 lines) from the code-driven synthesis pipeline that was superseded by the agent-driven approach in phases 6-8. Phases 3/5 built: collectCouncilResults → formatForSynthesis → buildSynthesisPrompt → formatFindingsForUser → buildDelegationPrompt. Phases 6-8 replaced with: launch → background_output → Athena synthesizes in conversation → switch_agent. The old pipeline was never wired into runtime and all consumers were other dead code. Also simplifies executeCouncil to return CouncilLaunchResult (task IDs + failures) instead of reading stale task status via collectCouncilResults. Deleted: council-result-collector, synthesis-types, synthesis-prompt, synthesis-formatter, findings-presenter, delegation-prompts (+ 4 tests). Cleaned: CouncilMemberStatus, AgreementLevel, CouncilMemberResponse, CouncilExecutionResult types from types.ts.
2026-02-13 17:05:23 +01:00 · 2026-02-13 17:05:23 +01:00 · 4d675bac89
commit 4d675bac89
parent d8ba9b1f0c
14 changed files with 103 additions and 913 deletions
--- a/src/agents/athena/council-orchestrator.test.ts
+++ b/src/agents/athena/council-orchestrator.test.ts
@ -3,16 +3,6 @@ import { buildCouncilPrompt } from "./council-prompt"
 import { executeCouncil } from "./council-orchestrator"
 import type { CouncilConfig } from "./types"
 type MockTaskStatus = "completed" | "error" | "cancelled" | "interrupt"
 interface MockTask {
  id: string
  status: MockTaskStatus
  result?: string
  error?: string
  completedAt?: Date
 }
 interface MockLaunchInput {
  description: string
  prompt: string
@ -25,20 +15,15 @@ interface MockLaunchInput {
  permission?: Record<string, "ask" | "allow" | "deny">
 }
-function createMockTask(task: MockTask, launch: MockLaunchInput): MockTask & {
+function createMockTask(id: string, launch: MockLaunchInput) {
  parentSessionID: string
  parentMessageID: string
  description: string
  prompt: string
  agent: string
 } {
  return {
    id,
    status: "pending" as const,
    parentSessionID: launch.parentSessionID,
    parentMessageID: launch.parentMessageID,
    description: launch.description,
    prompt: launch.prompt,
    agent: launch.agent,
    ...task,
  }
 }
@ -51,15 +36,7 @@ describe("executeCouncil", () => {
    const launcher = {
      launch: async (input: MockLaunchInput) => {
        launches.push(input)
-        return createMockTask(
+        return createMockTask(`task-${launches.length}`, input)
          {
            id: `task-${launches.length}`,
            status: "completed",
            result: `response-${launches.length}`,
            completedAt: new Date(),
          },
          input
        )
      },
    }
@ -84,8 +61,9 @@ describe("executeCouncil", () => {
    const expectedPrompt = buildCouncilPrompt(question)
    expect(launches).toHaveLength(3)
-    expect(result.completedCount).toBe(3)
+    expect(result.launched).toHaveLength(3)
-    expect(result.failedCount).toBe(0)
+    expect(result.failures).toHaveLength(0)
    expect(result.totalMembers).toBe(3)
    for (const launch of launches) {
      expect(launch.prompt).toBe(expectedPrompt)
@ -98,33 +76,16 @@ describe("executeCouncil", () => {
    expect(launches[2]?.model).toEqual({ providerID: "google", modelID: "gemini-3-pro" })
  })
-  //#given a council with 3 members where 1 member fails
+  //#given a council with 3 members where 1 launch throws
  //#when executeCouncil is called
-  //#then partial failures are tolerated and preserved in responses
+  //#then launch failures are captured separately from successful launches
-  test("returns successful result for partial failures", async () => {
+  test("captures launch failures separately from successful launches", async () => {
    const launcher = {
      launch: async (input: MockLaunchInput) => {
        if (input.model?.providerID === "anthropic") {
-          return createMockTask(
+          throw new Error("Provider unavailable")
            {
              id: "task-failed",
              status: "error",
              error: "Token limit exceeded",
              completedAt: new Date(),
            },
            input
          )
        }
-
+        return createMockTask(`task-${input.model?.providerID}`, input)
        return createMockTask(
          {
            id: `task-${input.model?.providerID}`,
            status: "completed",
            result: `ok-${input.model?.providerID}`,
            completedAt: new Date(),
          },
          input
        )
      },
    }
@ -142,28 +103,21 @@ describe("executeCouncil", () => {
      parentMessageID: "message-1",
    })
-    expect(result.completedCount).toBe(2)
+    expect(result.launched).toHaveLength(2)
-    expect(result.failedCount).toBe(1)
+    expect(result.failures).toHaveLength(1)
-    expect(result.responses).toHaveLength(3)
+    expect(result.totalMembers).toBe(3)
-    expect(result.responses.filter((response) => response.status === "completed")).toHaveLength(2)
+    expect(result.failures[0]?.member.model).toBe("anthropic/claude-sonnet-4-5")
-    expect(result.responses.filter((response) => response.status === "error")).toHaveLength(1)
+    expect(result.failures[0]?.error).toContain("Launch failed")
  })
-  //#given a council where all members fail
+  //#given a council where all launches throw
  //#when executeCouncil is called
-  //#then it returns structured error result with zero completions
+  //#then all members appear as failures with zero launched
-  test("returns all failures when every member fails", async () => {
+  test("returns all failures when every launch throws", async () => {
    const launcher = {
-      launch: async (input: MockLaunchInput) =>
+      launch: async () => {
-        createMockTask(
+        throw new Error("Model unavailable")
          {
            id: `task-${input.model?.providerID}`,
            status: "error",
            error: "Model unavailable",
            completedAt: new Date(),
      },
          input
        ),
    }
    const result = await executeCouncil({
@ -179,29 +133,21 @@ describe("executeCouncil", () => {
      parentMessageID: "message-1",
    })
-    expect(result.completedCount).toBe(0)
+    expect(result.launched).toHaveLength(0)
-    expect(result.failedCount).toBe(2)
+    expect(result.failures).toHaveLength(2)
-    expect(result.responses).toHaveLength(2)
+    expect(result.totalMembers).toBe(2)
-    expect(result.responses.every((response) => response.status === "error")).toBe(true)
+    expect(result.failures.every((f) => f.error.includes("Launch failed"))).toBe(true)
  })
  //#given a council with one invalid model string
  //#when executeCouncil is called
-  //#then invalid member becomes an error response while others still execute
+  //#then invalid member becomes a failure while others still launch
  test("handles invalid model strings without crashing council execution", async () => {
    const launches: MockLaunchInput[] = []
    const launcher = {
      launch: async (input: MockLaunchInput) => {
        launches.push(input)
-        return createMockTask(
+        return createMockTask(`task-${launches.length}`, input)
          {
            id: `task-${launches.length}`,
            status: "completed",
            result: "valid-member-response",
            completedAt: new Date(),
          },
          input
        )
      },
    }
@ -219,10 +165,9 @@ describe("executeCouncil", () => {
    })
    expect(launches).toHaveLength(1)
-    expect(result.completedCount).toBe(1)
+    expect(result.launched).toHaveLength(1)
-    expect(result.failedCount).toBe(1)
+    expect(result.failures).toHaveLength(1)
-    expect(result.responses).toHaveLength(2)
+    expect(result.failures.find((f) => f.member.model === "invalid-model")?.error).toContain("Launch failed")
    expect(result.responses.find((response) => response.member.model === "invalid-model")?.status).toBe("error")
  })
  //#given members with per-member temperature and variant
@ -233,15 +178,7 @@ describe("executeCouncil", () => {
    const launcher = {
      launch: async (input: MockLaunchInput) => {
        launches.push(input)
-        return createMockTask(
+        return createMockTask(`task-${launches.length}`, input)
          {
            id: `task-${launches.length}`,
            status: "completed",
            result: "ok",
            completedAt: new Date(),
          },
          input
        )
      },
    }
@ -264,4 +201,33 @@ describe("executeCouncil", () => {
    expect(launches[1]?.temperature).toBe(0.3)
    expect(launches[1]?.model?.variant).toBeUndefined()
  })
  //#given launched members
  //#when executeCouncil returns
  //#then each launched member has a taskId for background_output retrieval
  test("returns task IDs for background_output retrieval", async () => {
    const launcher = {
      launch: async (input: MockLaunchInput) =>
        createMockTask(`bg_${input.model?.providerID}`, input),
    }
    const result = await executeCouncil({
      question: "Review error handling",
      council: {
        members: [
          { model: "openai/gpt-5.3-codex", name: "OpenAI" },
          { model: "google/gemini-3-pro", name: "Gemini" },
        ],
      },
      launcher,
      parentSessionID: "session-1",
      parentMessageID: "message-1",
    })
    expect(result.launched).toHaveLength(2)
    expect(result.launched[0]?.taskId).toBe("bg_openai")
    expect(result.launched[0]?.member.name).toBe("OpenAI")
    expect(result.launched[1]?.taskId).toBe("bg_google")
    expect(result.launched[1]?.member.name).toBe("Gemini")
  })
 })
--- a/src/agents/athena/council-orchestrator.ts
+++ b/src/agents/athena/council-orchestrator.ts
@ -1,9 +1,8 @@
 import type { LaunchInput, BackgroundTask } from "../../features/background-agent/types"
 import { createAgentToolRestrictions } from "../../shared/permission-compat"
 import { buildCouncilPrompt } from "./council-prompt"
 import { collectCouncilResults } from "./council-result-collector"
 import { parseModelString } from "./model-parser"
-import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"
+import type { CouncilConfig, CouncilLaunchFailure, CouncilLaunchedMember, CouncilLaunchResult, CouncilMemberConfig } from "./types"
 export type CouncilLaunchInput = LaunchInput
@ -20,57 +19,43 @@ export interface CouncilExecutionInput {
  parentAgent?: string
 }
-export async function executeCouncil(input: CouncilExecutionInput): Promise<CouncilExecutionResult> {
+/**
 * Launches all council members in parallel and returns launch outcomes.
 * Does NOT wait for task completion — actual results are collected by the
 * agent via background_output calls after this returns.
 */
 export async function executeCouncil(input: CouncilExecutionInput): Promise<CouncilLaunchResult> {
  const { question, council, launcher, parentSessionID, parentMessageID, parentAgent } = input
  const prompt = buildCouncilPrompt(question)
  const startTimes = new Map<string, number>()
  const launchResults = await Promise.allSettled(
    council.members.map((member) =>
-      launchMember(
+      launchMember(member, prompt, launcher, parentSessionID, parentMessageID, parentAgent)
        member,
        prompt,
        launcher,
        parentSessionID,
        parentMessageID,
        parentAgent,
        startTimes
      )
    )
  )
-  const launchedTasks: BackgroundTask[] = []
+  const launched: CouncilLaunchedMember[] = []
-  const launchedMembers: CouncilMemberConfig[] = []
+  const failures: CouncilLaunchFailure[] = []
  const launchFailures: CouncilMemberResponse[] = []
  launchResults.forEach((result, index) => {
    const member = council.members[index]
    if (result.status === "fulfilled") {
-      launchedTasks.push(result.value)
+      launched.push({ member, taskId: result.value.id })
      launchedMembers.push(member)
      return
    }
-    launchFailures.push({
+    failures.push({
      member,
      status: "error",
      error: `Launch failed: ${String(result.reason)}`,
      taskId: "",
      durationMs: 0,
    })
  })
  const collected = collectCouncilResults(launchedTasks, launchedMembers, startTimes)
  const responses = [...collected, ...launchFailures]
  const completedCount = responses.filter((response) => response.status === "completed").length
  return {
    question,
-    responses,
+    launched,
    failures,
    totalMembers: council.members.length,
    completedCount,
    failedCount: council.members.length - completedCount,
  }
 }
@ -80,8 +65,7 @@ async function launchMember(
  launcher: CouncilLauncher,
  parentSessionID: string,
  parentMessageID: string,
-  parentAgent: string | undefined,
+  parentAgent: string | undefined
  startTimes: Map<string, number>
 ): Promise<BackgroundTask> {
  const parsedModel = parseModelString(member.model)
  if (!parsedModel) {
@ -90,7 +74,7 @@ async function launchMember(
  const restrictions = createAgentToolRestrictions(["write", "edit", "task"])
  const memberName = member.name ?? member.model
-  const task = await launcher.launch({
+  return launcher.launch({
    description: `Council member: ${memberName}`,
    prompt,
    agent: "athena",
@ -105,7 +89,4 @@ async function launchMember(
    ...(member.temperature !== undefined ? { temperature: member.temperature } : {}),
    permission: restrictions.permission,
  })
  startTimes.set(task.id, Date.now())
  return task
 }
--- a/src/agents/athena/council-result-collector.ts
+++ b/src/agents/athena/council-result-collector.ts
@ -1,36 +0,0 @@
 import type { BackgroundTask, BackgroundTaskStatus } from "../../features/background-agent/types"
 import type { CouncilMemberConfig, CouncilMemberResponse, CouncilMemberStatus } from "./types"
 export function collectCouncilResults(
  tasks: BackgroundTask[],
  members: CouncilMemberConfig[],
  startTimes: Map<string, number>
 ): CouncilMemberResponse[] {
  return tasks.map((task, index) => {
    const member = members[index]
    const status = mapTaskStatus(task.status)
    const startTime = startTimes.get(task.id) ?? Date.now()
    const finishedAt = task.completedAt?.getTime() ?? Date.now()
    return {
      member,
      status,
      response: status === "completed" ? task.result : undefined,
      error: status === "completed" ? undefined : (task.error ?? `Task status: ${task.status}`),
      taskId: task.id,
      durationMs: Math.max(0, finishedAt - startTime),
    }
  })
 }
 function mapTaskStatus(taskStatus: BackgroundTaskStatus): CouncilMemberStatus {
  if (taskStatus === "completed") {
    return "completed"
  }
  if (taskStatus === "cancelled" || taskStatus === "interrupt") {
    return "timeout"
  }
  return "error"
 }
--- a/src/agents/athena/delegation-prompts.test.ts
+++ b/src/agents/athena/delegation-prompts.test.ts
@ -1,125 +0,0 @@
 import { describe, expect, test } from "bun:test"
 import type { SynthesizedFinding } from "./synthesis-types"
 import { buildAtlasDelegationPrompt, buildPrometheusDelegationPrompt } from "./delegation-prompts"
 function createConfirmedFindings(): SynthesizedFinding[] {
  return [
    {
      summary: "Guard missing council config in startup",
      details: "Athena path can proceed with undefined council members in some flows.",
      agreementLevel: "unanimous",
      reportedBy: ["OpenAI", "Claude", "Gemini"],
      assessment: {
        agrees: true,
        rationale: "Directly observed from startup and config fallback paths.",
      },
      isFalsePositiveRisk: false,
    },
    {
      summary: "Potential retry thrash in background runner",
      details: "Repeated failures can cascade retry windows under high load.",
      agreementLevel: "minority",
      reportedBy: ["Claude"],
      assessment: {
        agrees: true,
        rationale: "Worth addressing to lower operational risk.",
      },
      isFalsePositiveRisk: false,
    },
  ]
 }
 describe("buildAtlasDelegationPrompt", () => {
  //#given confirmed findings and an original question
  //#when the Atlas delegation prompt is built
  //#then it includes both findings and the original question context
  test("includes confirmed findings summaries and original question", () => {
    const findings = createConfirmedFindings()
    const question = "Which issues should we fix first in Athena integration?"
    const prompt = buildAtlasDelegationPrompt(findings, question)
    expect(prompt).toContain("Original question")
    expect(prompt).toContain(question)
    expect(prompt).toContain("Guard missing council config in startup")
    expect(prompt).toContain("Potential retry thrash in background runner")
  })
  //#given confirmed findings
  //#when Atlas prompt is generated
  //#then it explicitly asks Atlas to fix those specific issues
  test("instructs Atlas to implement direct fixes", () => {
    const prompt = buildAtlasDelegationPrompt(createConfirmedFindings(), "Fix Athena reliability issues")
    expect(prompt).toContain("Fix these confirmed issues directly")
    expect(prompt).toContain("Implement code changes")
    expect(prompt).toContain("prioritize by agreement level")
  })
  //#given a single confirmed finding
  //#when Atlas prompt is generated
  //#then prompt still renders correctly for edge case input
  test("handles a single finding edge case", () => {
    const [singleFinding] = createConfirmedFindings()
    const prompt = buildAtlasDelegationPrompt([singleFinding], "Fix this one issue")
    expect(prompt).toContain("1. Guard missing council config in startup")
    expect(prompt).toContain("Agreement level: unanimous")
  })
 })
 describe("buildPrometheusDelegationPrompt", () => {
  //#given confirmed findings and an original question
  //#when the Prometheus delegation prompt is built
  //#then it includes both findings and the original question context
  test("includes confirmed findings summaries and original question", () => {
    const findings = createConfirmedFindings()
    const question = "How should we sequence Athena integration hardening work?"
    const prompt = buildPrometheusDelegationPrompt(findings, question)
    expect(prompt).toContain("Original question")
    expect(prompt).toContain(question)
    expect(prompt).toContain("Guard missing council config in startup")
    expect(prompt).toContain("Potential retry thrash in background runner")
  })
  //#given confirmed findings
  //#when Prometheus prompt is generated
  //#then it explicitly asks for phased planning and prioritization
  test("instructs Prometheus to create an execution plan", () => {
    const prompt = buildPrometheusDelegationPrompt(createConfirmedFindings(), "Plan Athena stabilization")
    expect(prompt).toContain("Create an execution plan")
    expect(prompt).toContain("phased implementation plan")
    expect(prompt).toContain("prioritize by agreement level and impact")
  })
  //#given a single confirmed finding
  //#when Prometheus prompt is generated
  //#then prompt still renders correctly for edge case input
  test("handles a single finding edge case", () => {
    const [singleFinding] = createConfirmedFindings()
    const prompt = buildPrometheusDelegationPrompt([singleFinding], "Plan this one issue")
    expect(prompt).toContain("1. Guard missing council config in startup")
    expect(prompt).toContain("Agreement level: unanimous")
  })
  //#given findings at multiple agreement levels
  //#when either delegation prompt is generated
  //#then each finding includes agreement level context
  test("includes agreement level context for each finding in both prompts", () => {
    const findings = createConfirmedFindings()
    const atlasPrompt = buildAtlasDelegationPrompt(findings, "Atlas context")
    const prometheusPrompt = buildPrometheusDelegationPrompt(findings, "Prometheus context")
    expect(atlasPrompt).toContain("Agreement level: unanimous")
    expect(atlasPrompt).toContain("Agreement level: minority")
    expect(prometheusPrompt).toContain("Agreement level: unanimous")
    expect(prometheusPrompt).toContain("Agreement level: minority")
  })
 })
--- a/src/agents/athena/delegation-prompts.ts
+++ b/src/agents/athena/delegation-prompts.ts
@ -1,55 +0,0 @@
 import type { SynthesizedFinding } from "./synthesis-types"
 function formatFindingBlock(finding: SynthesizedFinding, index: number): string {
  const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees"
  return [
    `${index + 1}. ${finding.summary}`,
    `   Details: ${finding.details}`,
    `   Agreement level: ${finding.agreementLevel}`,
    `   Athena assessment: ${assessment}`,
    `   Rationale: ${finding.assessment.rationale}`,
  ].join("\n")
 }
 function formatConfirmedFindings(confirmedFindings: SynthesizedFinding[]): string {
  return confirmedFindings.map((finding, index) => formatFindingBlock(finding, index)).join("\n\n")
 }
 export function buildAtlasDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string {
  return [
    "# Atlas Delegation Brief",
    "Original question:",
    question,
    "",
    "Task:",
    "Fix these confirmed issues directly.",
    "",
    "Confirmed findings:",
    formatConfirmedFindings(confirmedFindings),
    "",
    "Execution instructions:",
    "- Implement code changes to resolve each confirmed issue.",
    "- prioritize by agreement level, addressing unanimous findings first.",
    "- Validate fixes with relevant tests and type safety checks.",
  ].join("\n")
 }
 export function buildPrometheusDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string {
  return [
    "# Prometheus Delegation Brief",
    "Original question:",
    question,
    "",
    "Task:",
    "Create an execution plan for these confirmed issues.",
    "",
    "Confirmed findings:",
    formatConfirmedFindings(confirmedFindings),
    "",
    "Planning instructions:",
    "- Produce a phased implementation plan with clear task boundaries.",
    "- prioritize by agreement level and impact.",
    "- Include verification checkpoints for each phase.",
  ].join("\n")
 }
--- a/src/agents/athena/findings-presenter.test.ts
+++ b/src/agents/athena/findings-presenter.test.ts
@ -1,167 +0,0 @@
 import { describe, expect, test } from "bun:test"
 import type { SynthesisResult } from "./synthesis-types"
 import { formatFindingsForUser } from "./findings-presenter"
 function createSynthesisResult(overrides?: Partial<SynthesisResult>): SynthesisResult {
  return {
    question: "Review the Athena council outputs for actionable risks",
    findings: [
      {
        summary: "Validate configuration before execution",
        details: "Missing guard clauses can allow invalid member configs.",
        agreementLevel: "majority",
        reportedBy: ["OpenAI", "Claude"],
        assessment: {
          agrees: true,
          rationale: "This aligns with repeated failures observed in setup paths.",
        },
        isFalsePositiveRisk: false,
      },
      {
        summary: "Retry strategy lacks upper bounds",
        details: "Unbounded retries may cause runaway background tasks.",
        agreementLevel: "solo",
        reportedBy: ["Gemini"],
        assessment: {
          agrees: false,
          rationale: "Current retry count is already constrained in most flows.",
        },
        isFalsePositiveRisk: true,
      },
      {
        summary: "Preserve partial successes",
        details: "Do not fail entire council run when one member errors.",
        agreementLevel: "unanimous",
        reportedBy: ["OpenAI", "Claude", "Gemini"],
        assessment: {
          agrees: true,
          rationale: "This is required for resilient multi-model orchestration.",
        },
        isFalsePositiveRisk: false,
      },
      {
        summary: "Reduce prompt token duplication",
        details: "Duplicate context blocks increase cost without improving quality.",
        agreementLevel: "minority",
        reportedBy: ["Claude"],
        assessment: {
          agrees: true,
          rationale: "Consolidation should lower cost while preserving intent.",
        },
        isFalsePositiveRisk: false,
      },
    ],
    memberProvenance: [],
    totalFindings: 4,
    consensusCount: 2,
    outlierCount: 1,
    ...overrides,
  }
 }
 describe("formatFindingsForUser", () => {
  //#given findings across all agreement levels
  //#when formatFindingsForUser is called
  //#then groups appear in deterministic order: unanimous, majority, minority, solo
  test("groups findings by agreement level in required order", () => {
    const result = createSynthesisResult()
    const output = formatFindingsForUser(result)
    const unanimousIndex = output.indexOf("## Unanimous Findings")
    const majorityIndex = output.indexOf("## Majority Findings")
    const minorityIndex = output.indexOf("## Minority Findings")
    const soloIndex = output.indexOf("## Solo Findings")
    expect(unanimousIndex).toBeGreaterThan(-1)
    expect(majorityIndex).toBeGreaterThan(unanimousIndex)
    expect(minorityIndex).toBeGreaterThan(majorityIndex)
    expect(soloIndex).toBeGreaterThan(minorityIndex)
  })
  //#given a finding with assessment details
  //#when formatting is generated
  //#then each finding includes summary, details, reported-by, and Athena rationale
  test("renders finding body and Athena assessment rationale", () => {
    const result = createSynthesisResult()
    const output = formatFindingsForUser(result)
    expect(output).toContain("Validate configuration before execution")
    expect(output).toContain("Missing guard clauses can allow invalid member configs.")
    expect(output).toContain("Reported by: OpenAI, Claude")
    expect(output).toContain("Athena assessment: Agrees")
    expect(output).toContain("Rationale: This aligns with repeated failures observed in setup paths.")
  })
  //#given a solo finding flagged as false-positive risk
  //#when formatting is generated
  //#then a visible warning marker is included
  test("shows false-positive warning for risky solo findings", () => {
    const result = createSynthesisResult()
    const output = formatFindingsForUser(result)
    expect(output).toContain("[False Positive Risk]")
    expect(output).toContain("Retry strategy lacks upper bounds")
  })
  //#given no findings
  //#when formatFindingsForUser is called
  //#then output includes a graceful no-findings message
  test("handles empty findings with a no-findings message", () => {
    const result = createSynthesisResult({ findings: [], totalFindings: 0, consensusCount: 0, outlierCount: 0 })
    const output = formatFindingsForUser(result)
    expect(output).toContain("No synthesized findings are available")
  })
  //#given multiple majority findings with different reporter counts
  //#when formatting is generated
  //#then group header shows the agreement level label without a misleading single count
  test("shows agreement level label in group header without single-finding count", () => {
    const result = createSynthesisResult({
      findings: [
        {
          summary: "Finding A",
          details: "Reported by 3 members",
          agreementLevel: "majority",
          reportedBy: ["OpenAI", "Claude", "Gemini"],
          assessment: { agrees: true, rationale: "Valid" },
          isFalsePositiveRisk: false,
        },
        {
          summary: "Finding B",
          details: "Reported by 2 members",
          agreementLevel: "majority",
          reportedBy: ["OpenAI", "Claude"],
          assessment: { agrees: true, rationale: "Also valid" },
          isFalsePositiveRisk: false,
        },
      ],
    })
    const output = formatFindingsForUser(result)
    // The header should show the level label without a misleading single-finding count
    // It should NOT use the first finding's count as the group header
    expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
    expect(output).toContain("## Majority Findings")
    // Each individual finding still shows its own agreement context
    expect(output).toContain("Agreement context: 3 members report this (majority)")
    expect(output).toContain("Agreement context: 2 members report this (majority)")
  })
  //#given a non-empty findings result
  //#when formatting is generated
  //#then output ends with an action recommendation section
  test("includes a final action recommendation section", () => {
    const result = createSynthesisResult()
    const output = formatFindingsForUser(result)
    expect(output.trimEnd()).toMatch(/## Action Recommendation[\s\S]*$/)
    expect(output).toContain("Prioritize unanimous and majority findings")
  })
 })
--- a/src/agents/athena/findings-presenter.ts
+++ b/src/agents/athena/findings-presenter.ts
@ -1,81 +0,0 @@
 import type { SynthesisResult, SynthesizedFinding } from "./synthesis-types"
 import type { AgreementLevel } from "./types"
 const AGREEMENT_ORDER: AgreementLevel[] = ["unanimous", "majority", "minority", "solo"]
 function toTitle(level: AgreementLevel): string {
  return `${level.charAt(0).toUpperCase()}${level.slice(1)}`
 }
 function formatAgreementLine(level: AgreementLevel, finding: SynthesizedFinding): string {
  const memberCount = finding.reportedBy.length
  switch (level) {
    case "unanimous":
      return `${memberCount}/${memberCount} members agree`
    case "majority":
      return `${memberCount} members report this (majority)`
    case "minority":
      return `${memberCount} members report this (minority)`
    case "solo":
      return `${memberCount} member reported this`
  }
 }
 function formatFinding(level: AgreementLevel, finding: SynthesizedFinding): string {
  const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees"
  const warning = level === "solo" && finding.isFalsePositiveRisk ? " [False Positive Risk]" : ""
  return [
    `### ${finding.summary}${warning}`,
    `Details: ${finding.details}`,
    `Reported by: ${finding.reportedBy.join(", ")}`,
    `Agreement context: ${formatAgreementLine(level, finding)}`,
    `Athena assessment: ${assessment}`,
    `Rationale: ${finding.assessment.rationale}`,
  ].join("\n")
 }
 function formatActionRecommendation(result: SynthesisResult, groupedFindings: Map<AgreementLevel, SynthesizedFinding[]>): string {
  const counts = AGREEMENT_ORDER.map((level) => `${toTitle(level)}: ${groupedFindings.get(level)?.length ?? 0}`).join(" | ")
  return [
    "## Action Recommendation",
    `Findings by agreement level: ${counts}`,
    "Prioritize unanimous and majority findings for immediate execution,",
    "then review minority findings, and manually validate solo findings before delegating changes.",
    `Question context: ${result.question}`,
  ].join("\n")
 }
 export function formatFindingsForUser(result: SynthesisResult): string {
  if (result.findings.length === 0) {
    return [
      "# Synthesized Findings",
      "No synthesized findings are available.",
      "## Action Recommendation",
      "Gather additional council responses or re-run synthesis before delegation.",
      `Question context: ${result.question}`,
    ].join("\n\n")
  }
  const groupedFindings = new Map<AgreementLevel, SynthesizedFinding[]>(
    AGREEMENT_ORDER.map((level) => [
      level,
      result.findings.filter((finding) => finding.agreementLevel === level),
    ]),
  )
  const sections = AGREEMENT_ORDER.flatMap((level) => {
    const findings = groupedFindings.get(level) ?? []
    if (findings.length === 0) {
      return []
    }
    const header = `## ${toTitle(level)} Findings (${findings.length})`
    const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
    return [`${header}\n\n${entries}`]
  })
  return ["# Synthesized Findings", ...sections, formatActionRecommendation(result, groupedFindings)].join("\n\n")
 }
--- a/src/agents/athena/index.ts
+++ b/src/agents/athena/index.ts
@ -3,10 +3,4 @@ export * from "./agent"
 export * from "./model-parser"
 export * from "./council-prompt"
 export * from "./council-orchestrator"
 export * from "./council-result-collector"
 export * from "./synthesis-types"
 export * from "./synthesis-prompt"
 export * from "./synthesis-formatter"
 export * from "./findings-presenter"
 export * from "./delegation-prompts"
 export * from "../../config/schema/athena"
--- a/src/agents/athena/synthesis-formatter.test.ts
+++ b/src/agents/athena/synthesis-formatter.test.ts
@ -1,157 +0,0 @@
 import { describe, expect, test } from "bun:test"
 import { formatCouncilResultsForSynthesis } from "./synthesis-formatter"
 import type { CouncilExecutionResult } from "./types"
 function createResult(overrides?: Partial<CouncilExecutionResult>): CouncilExecutionResult {
  const responses: CouncilExecutionResult["responses"] = [
    {
      member: { model: "openai/gpt-5.3-codex", name: "OpenAI" },
      status: "completed",
      response: "Finding A from OpenAI",
      taskId: "task-1",
      durationMs: 120,
    },
    {
      member: { model: "anthropic/claude-sonnet-4-5", name: "Claude" },
      status: "completed",
      response: "Finding B from Claude",
      taskId: "task-2",
      durationMs: 240,
    },
    {
      member: { model: "google/gemini-3-pro", name: "Gemini" },
      status: "completed",
      response: "Finding C from Gemini",
      taskId: "task-3",
      durationMs: 360,
    },
  ]
  return {
    question: "What reliability risks exist?",
    responses,
    totalMembers: 3,
    completedCount: 3,
    failedCount: 0,
    ...overrides,
  }
 }
 describe("formatCouncilResultsForSynthesis", () => {
  //#given a CouncilExecutionResult with 3 completed members
  //#when formatCouncilResultsForSynthesis is called
  //#then output contains each member's model name as a header
  //#then output contains each member's raw response text
  //#then output contains member status and duration
  test("formats all completed members with provenance and response text", () => {
    const result = createResult()
    const output = formatCouncilResultsForSynthesis(result)
    expect(output).toContain("openai/gpt-5.3-codex")
    expect(output).toContain("anthropic/claude-sonnet-4-5")
    expect(output).toContain("google/gemini-3-pro")
    expect(output).toContain("Finding A from OpenAI")
    expect(output).toContain("Finding B from Claude")
    expect(output).toContain("Finding C from Gemini")
    expect(output).toContain("Status: completed")
    expect(output).toContain("Duration: 120ms")
    expect(output).toContain("Duration: 240ms")
    expect(output).toContain("Duration: 360ms")
  })
  //#given a CouncilExecutionResult with 1 completed and 1 failed member
  //#when formatCouncilResultsForSynthesis is called
  //#then completed member's response is included
  //#then failed member shows error status and error message
  //#then failed member does NOT have a response section
  test("includes completed response and failed error without response section", () => {
    const result = createResult({
      responses: [
        {
          member: { model: "openai/gpt-5.3-codex" },
          status: "completed",
          response: "Primary finding",
          taskId: "task-1",
          durationMs: 80,
        },
        {
          member: { model: "xai/grok-code-fast-1" },
          status: "error",
          error: "Timeout from provider",
          taskId: "task-2",
          durationMs: 500,
        },
      ],
      totalMembers: 2,
      completedCount: 1,
      failedCount: 1,
    })
    const output = formatCouncilResultsForSynthesis(result)
    expect(output).toContain("Primary finding")
    expect(output).toContain("xai/grok-code-fast-1")
    expect(output).toContain("Status: error")
    expect(output).toContain("Error: Timeout from provider")
    expect(output).not.toContain("Response:\nTimeout from provider")
  })
  //#given a CouncilExecutionResult with 0 completed members
  //#when formatCouncilResultsForSynthesis is called
  //#then output contains a "no successful responses" message
  test("shows no successful responses message when all members fail", () => {
    const result = createResult({
      responses: [
        {
          member: { model: "openai/gpt-5.3-codex" },
          status: "error",
          error: "No output",
          taskId: "task-1",
          durationMs: 200,
        },
      ],
      totalMembers: 1,
      completedCount: 0,
      failedCount: 1,
    })
    const output = formatCouncilResultsForSynthesis(result)
    expect(output).toContain("No successful responses")
  })
  //#given members with custom names
  //#when formatCouncilResultsForSynthesis is called
  //#then output uses member.name if provided, falls back to member.model
  test("prefers custom member name and falls back to model", () => {
    const result = createResult({
      responses: [
        {
          member: { model: "openai/gpt-5.3-codex", name: "Council Alpha" },
          status: "completed",
          response: "Custom member response",
          taskId: "task-1",
          durationMs: 10,
        },
        {
          member: { model: "google/gemini-3-pro" },
          status: "completed",
          response: "Default member response",
          taskId: "task-2",
          durationMs: 11,
        },
      ],
      totalMembers: 2,
      completedCount: 2,
      failedCount: 0,
    })
    const output = formatCouncilResultsForSynthesis(result)
    expect(output).toContain("Council Alpha")
    expect(output).toContain("google/gemini-3-pro")
  })
 })
--- a/src/agents/athena/synthesis-formatter.ts
+++ b/src/agents/athena/synthesis-formatter.ts
@ -1,48 +0,0 @@
 import type { CouncilExecutionResult } from "./types"
 export function formatCouncilResultsForSynthesis(result: CouncilExecutionResult): string {
  const completedResponses = result.responses.filter((response) => response.status === "completed")
  if (completedResponses.length === 0) {
    return [
      "# Council Responses for Synthesis",
      `Question: ${result.question}`,
      "No successful responses from council members.",
      "Review failed member details below for provenance.",
      ...result.responses.map((response) => {
        const memberName = response.member.name ?? response.member.model
        return [
          `## Member: ${memberName} (${response.status})`,
          `Model: ${response.member.model}`,
          `Status: ${response.status}`,
          `Duration: ${response.durationMs}ms`,
          `Error: ${response.error ?? "No error message provided"}`,
        ].join("\n")
      }),
    ].join("\n\n")
  }
  const sections = result.responses.map((response) => {
    const memberName = response.member.name ?? response.member.model
    const header = [
      `## Member: ${memberName} (${response.status})`,
      `Model: ${response.member.model}`,
      `Status: ${response.status}`,
      `Duration: ${response.durationMs}ms`,
    ]
    if (response.status === "completed") {
      const responseBody = response.response?.trim() ? response.response : "No response content provided"
      return [...header, "Response:", responseBody].join("\n")
    }
    return [...header, `Error: ${response.error ?? "No error message provided"}`].join("\n")
  })
  return [
    "# Council Responses for Synthesis",
    `Question: ${result.question}`,
    `Completed responses: ${result.completedCount}/${result.totalMembers}`,
    ...sections,
  ].join("\n\n")
 }
--- a/src/agents/athena/synthesis-prompt.ts
+++ b/src/agents/athena/synthesis-prompt.ts
@ -1,44 +0,0 @@
 export function buildSynthesisPrompt(formattedResponses: string, question: string, completedCount: number): string {
  return `You are Athena, the synthesis lead for a multi-model council. Your job is to merge independent model outputs into a single, evidence-grounded synthesis.
 ## Original Question
 ${question}
 ## Council Responses
 ${formattedResponses}
 ## Your Responsibilities
 1. Identify distinct findings across all completed member responses.
 2. Group findings that refer to the same underlying issue (semantic similarity, not exact wording).
 3. Classify agreementLevel for each finding using ${completedCount} completed member(s):
   - unanimous: all completed members reported the finding
   - majority: more than 50% of completed members reported the finding
   - minority: 2 or more members reported it, but not a majority
   - solo: only 1 member reported it
 4. Add AthenaAssessment for each finding:
   - agrees: whether you agree with the finding
   - rationale: concise reason for agreement or disagreement
 5. Set isFalsePositiveRisk:
   - true for solo findings (likely false positives unless strongly supported)
   - false for findings reported by multiple members
 ## Output Contract
 Return JSON only with this shape:
 {
  "findings": [
    {
      "summary": "string",
      "details": "string",
      "agreementLevel": "unanimous | majority | minority | solo",
      "reportedBy": ["model/name"],
      "assessment": {
        "agrees": true,
        "rationale": "string"
      },
      "isFalsePositiveRisk": false
    }
  ]
 }
 The finding object must match the SynthesizedFinding type exactly. Keep findings concise, concrete, and tied to source responses.`
 }
--- a/src/agents/athena/synthesis-types.ts
+++ b/src/agents/athena/synthesis-types.ts
@ -1,31 +0,0 @@
 import type { AgreementLevel, CouncilMemberConfig, CouncilMemberStatus } from "./types"
 export interface AthenaAssessment {
  agrees: boolean
  rationale: string
 }
 export interface SynthesizedFinding {
  summary: string
  details: string
  agreementLevel: AgreementLevel
  reportedBy: string[]
  assessment: AthenaAssessment
  isFalsePositiveRisk: boolean
 }
 export interface MemberProvenance {
  member: CouncilMemberConfig
  status: CouncilMemberStatus
  rawResponse?: string
  durationMs: number
 }
 export interface SynthesisResult {
  question: string
  findings: SynthesizedFinding[]
  memberProvenance: MemberProvenance[]
  totalFindings: number
  consensusCount: number
  outlierCount: number
 }
--- a/src/agents/athena/types.ts
+++ b/src/agents/athena/types.ts
@ -14,23 +14,20 @@ export interface AthenaConfig {
  council: CouncilConfig
 }
-export type CouncilMemberStatus = "completed" | "timeout" | "error"
+export interface CouncilLaunchFailure {
 export type AgreementLevel = "unanimous" | "majority" | "minority" | "solo"
 export interface CouncilMemberResponse {
  member: CouncilMemberConfig
-  status: CouncilMemberStatus
+  error: string
  response?: string
  error?: string
  taskId: string
  durationMs: number
 }
-export interface CouncilExecutionResult {
+export interface CouncilLaunchedMember {
-  question: string
+  member: CouncilMemberConfig
-  responses: CouncilMemberResponse[]
+  taskId: string
-  totalMembers: number
+}
-  completedCount: number
+
-  failedCount: number
+/** Return type of executeCouncil — only tracks launch outcomes, not task completion */
 export interface CouncilLaunchResult {
  question: string
  launched: CouncilLaunchedMember[]
  failures: CouncilLaunchFailure[]
  totalMembers: number
 }
--- a/src/tools/athena-council/tools.ts
+++ b/src/tools/athena-council/tools.ts
@ -113,21 +113,17 @@ export function createAthenaCouncilTool(args: {
        })
        const launchResult: AthenaCouncilLaunchResult = {
-          launched: execution.responses.filter((response) => response.taskId.length > 0).length,
+          launched: execution.launched.length,
-          members: execution.responses
+          members: execution.launched.map((entry) => ({
-            .filter((response) => response.taskId.length > 0)
+            task_id: entry.taskId,
-            .map((response) => ({
+            name: entry.member.name ?? entry.member.model,
-              task_id: response.taskId,
+            model: entry.member.model,
              name: response.member.name ?? response.member.model,
              model: response.member.model,
            status: "running",
          })),
-          failed: execution.responses
+          failed: execution.failures.map((entry) => ({
-            .filter((response) => response.taskId.length === 0)
+            name: entry.member.name ?? entry.member.model,
-            .map((response) => ({
+            model: entry.member.model,
-              name: response.member.name ?? response.member.model,
+            error: entry.error,
              model: response.member.model,
              error: response.error ?? "Launch failed",
          })),
        }