refactor(athena): remove dead code from phases 2, 3, 5 pipeline

Remove 9 files (913 lines) from the code-driven synthesis pipeline that was superseded by the agent-driven approach in phases 6-8. Phases 3/5 built: collectCouncilResults → formatForSynthesis → buildSynthesisPrompt → formatFindingsForUser → buildDelegationPrompt. Phases 6-8 replaced with: launch → background_output → Athena synthesizes in conversation → switch_agent. The old pipeline was never wired into runtime and all consumers were other dead code. Also simplifies executeCouncil to return CouncilLaunchResult (task IDs + failures) instead of reading stale task status via collectCouncilResults. Deleted: council-result-collector, synthesis-types, synthesis-prompt, synthesis-formatter, findings-presenter, delegation-prompts (+ 4 tests). Cleaned: CouncilMemberStatus, AgreementLevel, CouncilMemberResponse, CouncilExecutionResult types from types.ts.
2026-02-13 17:05:23 +01:00 · 2026-02-13 17:05:23 +01:00 · 4d675bac89
commit 4d675bac89
parent d8ba9b1f0c
14 changed files with 103 additions and 913 deletions
--- a/src/agents/athena/council-orchestrator.test.ts
+++ b/src/agents/athena/council-orchestrator.test.ts
@ -3,16 +3,6 @@ import { buildCouncilPrompt } from "./council-prompt"
 import { executeCouncil } from "./council-orchestrator"
 import type { CouncilConfig } from "./types"

-type MockTaskStatus = "completed" | "error" | "cancelled" | "interrupt"
-
-interface MockTask {
-  id: string
-  status: MockTaskStatus
-  result?: string
-  error?: string
-  completedAt?: Date
-}
-
 interface MockLaunchInput {
  description: string
  prompt: string
@ -25,20 +15,15 @@ interface MockLaunchInput {
  permission?: Record<string, "ask" | "allow" | "deny">
 }

-function createMockTask(task: MockTask, launch: MockLaunchInput): MockTask & {
-  parentSessionID: string
-  parentMessageID: string
-  description: string
-  prompt: string
-  agent: string
-} {
+function createMockTask(id: string, launch: MockLaunchInput) {
  return {
+    id,
+    status: "pending" as const,
    parentSessionID: launch.parentSessionID,
    parentMessageID: launch.parentMessageID,
    description: launch.description,
    prompt: launch.prompt,
    agent: launch.agent,
-    ...task,
  }
 }

@ -51,15 +36,7 @@ describe("executeCouncil", () => {
    const launcher = {
      launch: async (input: MockLaunchInput) => {
        launches.push(input)
-        return createMockTask(
-          {
-            id: `task-${launches.length}`,
-            status: "completed",
-            result: `response-${launches.length}`,
-            completedAt: new Date(),
-          },
-          input
-        )
+        return createMockTask(`task-${launches.length}`, input)
      },
    }

@ -84,8 +61,9 @@ describe("executeCouncil", () => {
    const expectedPrompt = buildCouncilPrompt(question)

    expect(launches).toHaveLength(3)
-    expect(result.completedCount).toBe(3)
-    expect(result.failedCount).toBe(0)
+    expect(result.launched).toHaveLength(3)
+    expect(result.failures).toHaveLength(0)
+    expect(result.totalMembers).toBe(3)

    for (const launch of launches) {
      expect(launch.prompt).toBe(expectedPrompt)
@ -98,33 +76,16 @@ describe("executeCouncil", () => {
    expect(launches[2]?.model).toEqual({ providerID: "google", modelID: "gemini-3-pro" })
  })

-  //#given a council with 3 members where 1 member fails
+  //#given a council with 3 members where 1 launch throws
  //#when executeCouncil is called
-  //#then partial failures are tolerated and preserved in responses
-  test("returns successful result for partial failures", async () => {
+  //#then launch failures are captured separately from successful launches
+  test("captures launch failures separately from successful launches", async () => {
    const launcher = {
      launch: async (input: MockLaunchInput) => {
        if (input.model?.providerID === "anthropic") {
-          return createMockTask(
-            {
-              id: "task-failed",
-              status: "error",
-              error: "Token limit exceeded",
-              completedAt: new Date(),
-            },
-            input
-          )
+          throw new Error("Provider unavailable")
        }
-
-        return createMockTask(
-          {
-            id: `task-${input.model?.providerID}`,
-            status: "completed",
-            result: `ok-${input.model?.providerID}`,
-            completedAt: new Date(),
-          },
-          input
-        )
+        return createMockTask(`task-${input.model?.providerID}`, input)
      },
    }

@ -142,28 +103,21 @@ describe("executeCouncil", () => {
      parentMessageID: "message-1",
    })

-    expect(result.completedCount).toBe(2)
-    expect(result.failedCount).toBe(1)
-    expect(result.responses).toHaveLength(3)
-    expect(result.responses.filter((response) => response.status === "completed")).toHaveLength(2)
-    expect(result.responses.filter((response) => response.status === "error")).toHaveLength(1)
+    expect(result.launched).toHaveLength(2)
+    expect(result.failures).toHaveLength(1)
+    expect(result.totalMembers).toBe(3)
+    expect(result.failures[0]?.member.model).toBe("anthropic/claude-sonnet-4-5")
+    expect(result.failures[0]?.error).toContain("Launch failed")
  })

-  //#given a council where all members fail
+  //#given a council where all launches throw
  //#when executeCouncil is called
-  //#then it returns structured error result with zero completions
-  test("returns all failures when every member fails", async () => {
+  //#then all members appear as failures with zero launched
+  test("returns all failures when every launch throws", async () => {
    const launcher = {
-      launch: async (input: MockLaunchInput) =>
-        createMockTask(
-          {
-            id: `task-${input.model?.providerID}`,
-            status: "error",
-            error: "Model unavailable",
-            completedAt: new Date(),
-          },
-          input
-        ),
+      launch: async () => {
+        throw new Error("Model unavailable")
+      },
    }

    const result = await executeCouncil({
@ -179,29 +133,21 @@ describe("executeCouncil", () => {
      parentMessageID: "message-1",
    })

-    expect(result.completedCount).toBe(0)
-    expect(result.failedCount).toBe(2)
-    expect(result.responses).toHaveLength(2)
-    expect(result.responses.every((response) => response.status === "error")).toBe(true)
+    expect(result.launched).toHaveLength(0)
+    expect(result.failures).toHaveLength(2)
+    expect(result.totalMembers).toBe(2)
+    expect(result.failures.every((f) => f.error.includes("Launch failed"))).toBe(true)
  })

  //#given a council with one invalid model string
  //#when executeCouncil is called
-  //#then invalid member becomes an error response while others still execute
+  //#then invalid member becomes a failure while others still launch
  test("handles invalid model strings without crashing council execution", async () => {
    const launches: MockLaunchInput[] = []
    const launcher = {
      launch: async (input: MockLaunchInput) => {
        launches.push(input)
-        return createMockTask(
-          {
-            id: `task-${launches.length}`,
-            status: "completed",
-            result: "valid-member-response",
-            completedAt: new Date(),
-          },
-          input
-        )
+        return createMockTask(`task-${launches.length}`, input)
      },
    }

@ -219,10 +165,9 @@ describe("executeCouncil", () => {
    })

    expect(launches).toHaveLength(1)
-    expect(result.completedCount).toBe(1)
-    expect(result.failedCount).toBe(1)
-    expect(result.responses).toHaveLength(2)
-    expect(result.responses.find((response) => response.member.model === "invalid-model")?.status).toBe("error")
+    expect(result.launched).toHaveLength(1)
+    expect(result.failures).toHaveLength(1)
+    expect(result.failures.find((f) => f.member.model === "invalid-model")?.error).toContain("Launch failed")
  })

  //#given members with per-member temperature and variant
@ -233,15 +178,7 @@ describe("executeCouncil", () => {
    const launcher = {
      launch: async (input: MockLaunchInput) => {
        launches.push(input)
-        return createMockTask(
-          {
-            id: `task-${launches.length}`,
-            status: "completed",
-            result: "ok",
-            completedAt: new Date(),
-          },
-          input
-        )
+        return createMockTask(`task-${launches.length}`, input)
      },
    }

@ -264,4 +201,33 @@ describe("executeCouncil", () => {
    expect(launches[1]?.temperature).toBe(0.3)
    expect(launches[1]?.model?.variant).toBeUndefined()
  })
+
+  //#given launched members
+  //#when executeCouncil returns
+  //#then each launched member has a taskId for background_output retrieval
+  test("returns task IDs for background_output retrieval", async () => {
+    const launcher = {
+      launch: async (input: MockLaunchInput) =>
+        createMockTask(`bg_${input.model?.providerID}`, input),
+    }
+
+    const result = await executeCouncil({
+      question: "Review error handling",
+      council: {
+        members: [
+          { model: "openai/gpt-5.3-codex", name: "OpenAI" },
+          { model: "google/gemini-3-pro", name: "Gemini" },
+        ],
+      },
+      launcher,
+      parentSessionID: "session-1",
+      parentMessageID: "message-1",
+    })
+
+    expect(result.launched).toHaveLength(2)
+    expect(result.launched[0]?.taskId).toBe("bg_openai")
+    expect(result.launched[0]?.member.name).toBe("OpenAI")
+    expect(result.launched[1]?.taskId).toBe("bg_google")
+    expect(result.launched[1]?.member.name).toBe("Gemini")
+  })
 })
--- a/src/agents/athena/council-orchestrator.ts
+++ b/src/agents/athena/council-orchestrator.ts
@ -1,9 +1,8 @@
 import type { LaunchInput, BackgroundTask } from "../../features/background-agent/types"
 import { createAgentToolRestrictions } from "../../shared/permission-compat"
 import { buildCouncilPrompt } from "./council-prompt"
-import { collectCouncilResults } from "./council-result-collector"
 import { parseModelString } from "./model-parser"
-import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"
+import type { CouncilConfig, CouncilLaunchFailure, CouncilLaunchedMember, CouncilLaunchResult, CouncilMemberConfig } from "./types"

 export type CouncilLaunchInput = LaunchInput

@ -20,57 +19,43 @@ export interface CouncilExecutionInput {
  parentAgent?: string
 }

-export async function executeCouncil(input: CouncilExecutionInput): Promise<CouncilExecutionResult> {
+/**
+ * Launches all council members in parallel and returns launch outcomes.
+ * Does NOT wait for task completion — actual results are collected by the
+ * agent via background_output calls after this returns.
+ */
+export async function executeCouncil(input: CouncilExecutionInput): Promise<CouncilLaunchResult> {
  const { question, council, launcher, parentSessionID, parentMessageID, parentAgent } = input
  const prompt = buildCouncilPrompt(question)
-  const startTimes = new Map<string, number>()

  const launchResults = await Promise.allSettled(
    council.members.map((member) =>
-      launchMember(
-        member,
-        prompt,
-        launcher,
-        parentSessionID,
-        parentMessageID,
-        parentAgent,
-        startTimes
-      )
+      launchMember(member, prompt, launcher, parentSessionID, parentMessageID, parentAgent)
    )
  )

-  const launchedTasks: BackgroundTask[] = []
-  const launchedMembers: CouncilMemberConfig[] = []
-  const launchFailures: CouncilMemberResponse[] = []
+  const launched: CouncilLaunchedMember[] = []
+  const failures: CouncilLaunchFailure[] = []

  launchResults.forEach((result, index) => {
    const member = council.members[index]

    if (result.status === "fulfilled") {
-      launchedTasks.push(result.value)
-      launchedMembers.push(member)
+      launched.push({ member, taskId: result.value.id })
      return
    }

-    launchFailures.push({
+    failures.push({
      member,
-      status: "error",
      error: `Launch failed: ${String(result.reason)}`,
-      taskId: "",
-      durationMs: 0,
    })
  })

-  const collected = collectCouncilResults(launchedTasks, launchedMembers, startTimes)
-  const responses = [...collected, ...launchFailures]
-  const completedCount = responses.filter((response) => response.status === "completed").length
-
  return {
    question,
-    responses,
+    launched,
+    failures,
    totalMembers: council.members.length,
-    completedCount,
-    failedCount: council.members.length - completedCount,
  }
 }

@ -80,8 +65,7 @@ async function launchMember(
  launcher: CouncilLauncher,
  parentSessionID: string,
  parentMessageID: string,
-  parentAgent: string | undefined,
-  startTimes: Map<string, number>
+  parentAgent: string | undefined
 ): Promise<BackgroundTask> {
  const parsedModel = parseModelString(member.model)
  if (!parsedModel) {
@ -90,7 +74,7 @@ async function launchMember(

  const restrictions = createAgentToolRestrictions(["write", "edit", "task"])
  const memberName = member.name ?? member.model
-  const task = await launcher.launch({
+  return launcher.launch({
    description: `Council member: ${memberName}`,
    prompt,
    agent: "athena",
@ -105,7 +89,4 @@ async function launchMember(
    ...(member.temperature !== undefined ? { temperature: member.temperature } : {}),
    permission: restrictions.permission,
  })
-
-  startTimes.set(task.id, Date.now())
-  return task
 }
--- a/src/agents/athena/council-result-collector.ts
+++ b/src/agents/athena/council-result-collector.ts
@ -1,36 +0,0 @@
-import type { BackgroundTask, BackgroundTaskStatus } from "../../features/background-agent/types"
-import type { CouncilMemberConfig, CouncilMemberResponse, CouncilMemberStatus } from "./types"
-
-export function collectCouncilResults(
-  tasks: BackgroundTask[],
-  members: CouncilMemberConfig[],
-  startTimes: Map<string, number>
-): CouncilMemberResponse[] {
-  return tasks.map((task, index) => {
-    const member = members[index]
-    const status = mapTaskStatus(task.status)
-    const startTime = startTimes.get(task.id) ?? Date.now()
-    const finishedAt = task.completedAt?.getTime() ?? Date.now()
-
-    return {
-      member,
-      status,
-      response: status === "completed" ? task.result : undefined,
-      error: status === "completed" ? undefined : (task.error ?? `Task status: ${task.status}`),
-      taskId: task.id,
-      durationMs: Math.max(0, finishedAt - startTime),
-    }
-  })
-}
-
-function mapTaskStatus(taskStatus: BackgroundTaskStatus): CouncilMemberStatus {
-  if (taskStatus === "completed") {
-    return "completed"
-  }
-
-  if (taskStatus === "cancelled" || taskStatus === "interrupt") {
-    return "timeout"
-  }
-
-  return "error"
-}
--- a/src/agents/athena/delegation-prompts.test.ts
+++ b/src/agents/athena/delegation-prompts.test.ts
@ -1,125 +0,0 @@
-import { describe, expect, test } from "bun:test"
-import type { SynthesizedFinding } from "./synthesis-types"
-import { buildAtlasDelegationPrompt, buildPrometheusDelegationPrompt } from "./delegation-prompts"
-
-function createConfirmedFindings(): SynthesizedFinding[] {
-  return [
-    {
-      summary: "Guard missing council config in startup",
-      details: "Athena path can proceed with undefined council members in some flows.",
-      agreementLevel: "unanimous",
-      reportedBy: ["OpenAI", "Claude", "Gemini"],
-      assessment: {
-        agrees: true,
-        rationale: "Directly observed from startup and config fallback paths.",
-      },
-      isFalsePositiveRisk: false,
-    },
-    {
-      summary: "Potential retry thrash in background runner",
-      details: "Repeated failures can cascade retry windows under high load.",
-      agreementLevel: "minority",
-      reportedBy: ["Claude"],
-      assessment: {
-        agrees: true,
-        rationale: "Worth addressing to lower operational risk.",
-      },
-      isFalsePositiveRisk: false,
-    },
-  ]
-}
-
-describe("buildAtlasDelegationPrompt", () => {
-  //#given confirmed findings and an original question
-  //#when the Atlas delegation prompt is built
-  //#then it includes both findings and the original question context
-  test("includes confirmed findings summaries and original question", () => {
-    const findings = createConfirmedFindings()
-    const question = "Which issues should we fix first in Athena integration?"
-
-    const prompt = buildAtlasDelegationPrompt(findings, question)
-
-    expect(prompt).toContain("Original question")
-    expect(prompt).toContain(question)
-    expect(prompt).toContain("Guard missing council config in startup")
-    expect(prompt).toContain("Potential retry thrash in background runner")
-  })
-
-  //#given confirmed findings
-  //#when Atlas prompt is generated
-  //#then it explicitly asks Atlas to fix those specific issues
-  test("instructs Atlas to implement direct fixes", () => {
-    const prompt = buildAtlasDelegationPrompt(createConfirmedFindings(), "Fix Athena reliability issues")
-
-    expect(prompt).toContain("Fix these confirmed issues directly")
-    expect(prompt).toContain("Implement code changes")
-    expect(prompt).toContain("prioritize by agreement level")
-  })
-
-  //#given a single confirmed finding
-  //#when Atlas prompt is generated
-  //#then prompt still renders correctly for edge case input
-  test("handles a single finding edge case", () => {
-    const [singleFinding] = createConfirmedFindings()
-
-    const prompt = buildAtlasDelegationPrompt([singleFinding], "Fix this one issue")
-
-    expect(prompt).toContain("1. Guard missing council config in startup")
-    expect(prompt).toContain("Agreement level: unanimous")
-  })
-})
-
-describe("buildPrometheusDelegationPrompt", () => {
-  //#given confirmed findings and an original question
-  //#when the Prometheus delegation prompt is built
-  //#then it includes both findings and the original question context
-  test("includes confirmed findings summaries and original question", () => {
-    const findings = createConfirmedFindings()
-    const question = "How should we sequence Athena integration hardening work?"
-
-    const prompt = buildPrometheusDelegationPrompt(findings, question)
-
-    expect(prompt).toContain("Original question")
-    expect(prompt).toContain(question)
-    expect(prompt).toContain("Guard missing council config in startup")
-    expect(prompt).toContain("Potential retry thrash in background runner")
-  })
-
-  //#given confirmed findings
-  //#when Prometheus prompt is generated
-  //#then it explicitly asks for phased planning and prioritization
-  test("instructs Prometheus to create an execution plan", () => {
-    const prompt = buildPrometheusDelegationPrompt(createConfirmedFindings(), "Plan Athena stabilization")
-
-    expect(prompt).toContain("Create an execution plan")
-    expect(prompt).toContain("phased implementation plan")
-    expect(prompt).toContain("prioritize by agreement level and impact")
-  })
-
-  //#given a single confirmed finding
-  //#when Prometheus prompt is generated
-  //#then prompt still renders correctly for edge case input
-  test("handles a single finding edge case", () => {
-    const [singleFinding] = createConfirmedFindings()
-
-    const prompt = buildPrometheusDelegationPrompt([singleFinding], "Plan this one issue")
-
-    expect(prompt).toContain("1. Guard missing council config in startup")
-    expect(prompt).toContain("Agreement level: unanimous")
-  })
-
-  //#given findings at multiple agreement levels
-  //#when either delegation prompt is generated
-  //#then each finding includes agreement level context
-  test("includes agreement level context for each finding in both prompts", () => {
-    const findings = createConfirmedFindings()
-
-    const atlasPrompt = buildAtlasDelegationPrompt(findings, "Atlas context")
-    const prometheusPrompt = buildPrometheusDelegationPrompt(findings, "Prometheus context")
-
-    expect(atlasPrompt).toContain("Agreement level: unanimous")
-    expect(atlasPrompt).toContain("Agreement level: minority")
-    expect(prometheusPrompt).toContain("Agreement level: unanimous")
-    expect(prometheusPrompt).toContain("Agreement level: minority")
-  })
-})
--- a/src/agents/athena/delegation-prompts.ts
+++ b/src/agents/athena/delegation-prompts.ts
@ -1,55 +0,0 @@
-import type { SynthesizedFinding } from "./synthesis-types"
-
-function formatFindingBlock(finding: SynthesizedFinding, index: number): string {
-  const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees"
-
-  return [
-    `${index + 1}. ${finding.summary}`,
-    `   Details: ${finding.details}`,
-    `   Agreement level: ${finding.agreementLevel}`,
-    `   Athena assessment: ${assessment}`,
-    `   Rationale: ${finding.assessment.rationale}`,
-  ].join("\n")
-}
-
-function formatConfirmedFindings(confirmedFindings: SynthesizedFinding[]): string {
-  return confirmedFindings.map((finding, index) => formatFindingBlock(finding, index)).join("\n\n")
-}
-
-export function buildAtlasDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string {
-  return [
-    "# Atlas Delegation Brief",
-    "Original question:",
-    question,
-    "",
-    "Task:",
-    "Fix these confirmed issues directly.",
-    "",
-    "Confirmed findings:",
-    formatConfirmedFindings(confirmedFindings),
-    "",
-    "Execution instructions:",
-    "- Implement code changes to resolve each confirmed issue.",
-    "- prioritize by agreement level, addressing unanimous findings first.",
-    "- Validate fixes with relevant tests and type safety checks.",
-  ].join("\n")
-}
-
-export function buildPrometheusDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string {
-  return [
-    "# Prometheus Delegation Brief",
-    "Original question:",
-    question,
-    "",
-    "Task:",
-    "Create an execution plan for these confirmed issues.",
-    "",
-    "Confirmed findings:",
-    formatConfirmedFindings(confirmedFindings),
-    "",
-    "Planning instructions:",
-    "- Produce a phased implementation plan with clear task boundaries.",
-    "- prioritize by agreement level and impact.",
-    "- Include verification checkpoints for each phase.",
-  ].join("\n")
-}
--- a/src/agents/athena/findings-presenter.test.ts
+++ b/src/agents/athena/findings-presenter.test.ts
@ -1,167 +0,0 @@
-import { describe, expect, test } from "bun:test"
-import type { SynthesisResult } from "./synthesis-types"
-import { formatFindingsForUser } from "./findings-presenter"
-
-function createSynthesisResult(overrides?: Partial<SynthesisResult>): SynthesisResult {
-  return {
-    question: "Review the Athena council outputs for actionable risks",
-    findings: [
-      {
-        summary: "Validate configuration before execution",
-        details: "Missing guard clauses can allow invalid member configs.",
-        agreementLevel: "majority",
-        reportedBy: ["OpenAI", "Claude"],
-        assessment: {
-          agrees: true,
-          rationale: "This aligns with repeated failures observed in setup paths.",
-        },
-        isFalsePositiveRisk: false,
-      },
-      {
-        summary: "Retry strategy lacks upper bounds",
-        details: "Unbounded retries may cause runaway background tasks.",
-        agreementLevel: "solo",
-        reportedBy: ["Gemini"],
-        assessment: {
-          agrees: false,
-          rationale: "Current retry count is already constrained in most flows.",
-        },
-        isFalsePositiveRisk: true,
-      },
-      {
-        summary: "Preserve partial successes",
-        details: "Do not fail entire council run when one member errors.",
-        agreementLevel: "unanimous",
-        reportedBy: ["OpenAI", "Claude", "Gemini"],
-        assessment: {
-          agrees: true,
-          rationale: "This is required for resilient multi-model orchestration.",
-        },
-        isFalsePositiveRisk: false,
-      },
-      {
-        summary: "Reduce prompt token duplication",
-        details: "Duplicate context blocks increase cost without improving quality.",
-        agreementLevel: "minority",
-        reportedBy: ["Claude"],
-        assessment: {
-          agrees: true,
-          rationale: "Consolidation should lower cost while preserving intent.",
-        },
-        isFalsePositiveRisk: false,
-      },
-    ],
-    memberProvenance: [],
-    totalFindings: 4,
-    consensusCount: 2,
-    outlierCount: 1,
-    ...overrides,
-  }
-}
-
-describe("formatFindingsForUser", () => {
-  //#given findings across all agreement levels
-  //#when formatFindingsForUser is called
-  //#then groups appear in deterministic order: unanimous, majority, minority, solo
-  test("groups findings by agreement level in required order", () => {
-    const result = createSynthesisResult()
-
-    const output = formatFindingsForUser(result)
-
-    const unanimousIndex = output.indexOf("## Unanimous Findings")
-    const majorityIndex = output.indexOf("## Majority Findings")
-    const minorityIndex = output.indexOf("## Minority Findings")
-    const soloIndex = output.indexOf("## Solo Findings")
-
-    expect(unanimousIndex).toBeGreaterThan(-1)
-    expect(majorityIndex).toBeGreaterThan(unanimousIndex)
-    expect(minorityIndex).toBeGreaterThan(majorityIndex)
-    expect(soloIndex).toBeGreaterThan(minorityIndex)
-  })
-
-  //#given a finding with assessment details
-  //#when formatting is generated
-  //#then each finding includes summary, details, reported-by, and Athena rationale
-  test("renders finding body and Athena assessment rationale", () => {
-    const result = createSynthesisResult()
-
-    const output = formatFindingsForUser(result)
-
-    expect(output).toContain("Validate configuration before execution")
-    expect(output).toContain("Missing guard clauses can allow invalid member configs.")
-    expect(output).toContain("Reported by: OpenAI, Claude")
-    expect(output).toContain("Athena assessment: Agrees")
-    expect(output).toContain("Rationale: This aligns with repeated failures observed in setup paths.")
-  })
-
-  //#given a solo finding flagged as false-positive risk
-  //#when formatting is generated
-  //#then a visible warning marker is included
-  test("shows false-positive warning for risky solo findings", () => {
-    const result = createSynthesisResult()
-
-    const output = formatFindingsForUser(result)
-
-    expect(output).toContain("[False Positive Risk]")
-    expect(output).toContain("Retry strategy lacks upper bounds")
-  })
-
-  //#given no findings
-  //#when formatFindingsForUser is called
-  //#then output includes a graceful no-findings message
-  test("handles empty findings with a no-findings message", () => {
-    const result = createSynthesisResult({ findings: [], totalFindings: 0, consensusCount: 0, outlierCount: 0 })
-
-    const output = formatFindingsForUser(result)
-
-    expect(output).toContain("No synthesized findings are available")
-  })
-
-  //#given multiple majority findings with different reporter counts
-  //#when formatting is generated
-  //#then group header shows the agreement level label without a misleading single count
-  test("shows agreement level label in group header without single-finding count", () => {
-    const result = createSynthesisResult({
-      findings: [
-        {
-          summary: "Finding A",
-          details: "Reported by 3 members",
-          agreementLevel: "majority",
-          reportedBy: ["OpenAI", "Claude", "Gemini"],
-          assessment: { agrees: true, rationale: "Valid" },
-          isFalsePositiveRisk: false,
-        },
-        {
-          summary: "Finding B",
-          details: "Reported by 2 members",
-          agreementLevel: "majority",
-          reportedBy: ["OpenAI", "Claude"],
-          assessment: { agrees: true, rationale: "Also valid" },
-          isFalsePositiveRisk: false,
-        },
-      ],
-    })
-
-    const output = formatFindingsForUser(result)
-
-    // The header should show the level label without a misleading single-finding count
-    // It should NOT use the first finding's count as the group header
-    expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
-    expect(output).toContain("## Majority Findings")
-    // Each individual finding still shows its own agreement context
-    expect(output).toContain("Agreement context: 3 members report this (majority)")
-    expect(output).toContain("Agreement context: 2 members report this (majority)")
-  })
-
-  //#given a non-empty findings result
-  //#when formatting is generated
-  //#then output ends with an action recommendation section
-  test("includes a final action recommendation section", () => {
-    const result = createSynthesisResult()
-
-    const output = formatFindingsForUser(result)
-
-    expect(output.trimEnd()).toMatch(/## Action Recommendation[\s\S]*$/)
-    expect(output).toContain("Prioritize unanimous and majority findings")
-  })
-})
--- a/src/agents/athena/findings-presenter.ts
+++ b/src/agents/athena/findings-presenter.ts
@ -1,81 +0,0 @@
-import type { SynthesisResult, SynthesizedFinding } from "./synthesis-types"
-import type { AgreementLevel } from "./types"
-
-const AGREEMENT_ORDER: AgreementLevel[] = ["unanimous", "majority", "minority", "solo"]
-
-function toTitle(level: AgreementLevel): string {
-  return `${level.charAt(0).toUpperCase()}${level.slice(1)}`
-}
-
-function formatAgreementLine(level: AgreementLevel, finding: SynthesizedFinding): string {
-  const memberCount = finding.reportedBy.length
-
-  switch (level) {
-    case "unanimous":
-      return `${memberCount}/${memberCount} members agree`
-    case "majority":
-      return `${memberCount} members report this (majority)`
-    case "minority":
-      return `${memberCount} members report this (minority)`
-    case "solo":
-      return `${memberCount} member reported this`
-  }
-}
-
-function formatFinding(level: AgreementLevel, finding: SynthesizedFinding): string {
-  const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees"
-  const warning = level === "solo" && finding.isFalsePositiveRisk ? " [False Positive Risk]" : ""
-
-  return [
-    `### ${finding.summary}${warning}`,
-    `Details: ${finding.details}`,
-    `Reported by: ${finding.reportedBy.join(", ")}`,
-    `Agreement context: ${formatAgreementLine(level, finding)}`,
-    `Athena assessment: ${assessment}`,
-    `Rationale: ${finding.assessment.rationale}`,
-  ].join("\n")
-}
-
-function formatActionRecommendation(result: SynthesisResult, groupedFindings: Map<AgreementLevel, SynthesizedFinding[]>): string {
-  const counts = AGREEMENT_ORDER.map((level) => `${toTitle(level)}: ${groupedFindings.get(level)?.length ?? 0}`).join(" | ")
-
-  return [
-    "## Action Recommendation",
-    `Findings by agreement level: ${counts}`,
-    "Prioritize unanimous and majority findings for immediate execution,",
-    "then review minority findings, and manually validate solo findings before delegating changes.",
-    `Question context: ${result.question}`,
-  ].join("\n")
-}
-
-export function formatFindingsForUser(result: SynthesisResult): string {
-  if (result.findings.length === 0) {
-    return [
-      "# Synthesized Findings",
-      "No synthesized findings are available.",
-      "## Action Recommendation",
-      "Gather additional council responses or re-run synthesis before delegation.",
-      `Question context: ${result.question}`,
-    ].join("\n\n")
-  }
-
-  const groupedFindings = new Map<AgreementLevel, SynthesizedFinding[]>(
-    AGREEMENT_ORDER.map((level) => [
-      level,
-      result.findings.filter((finding) => finding.agreementLevel === level),
-    ]),
-  )
-
-  const sections = AGREEMENT_ORDER.flatMap((level) => {
-    const findings = groupedFindings.get(level) ?? []
-    if (findings.length === 0) {
-      return []
-    }
-
-    const header = `## ${toTitle(level)} Findings (${findings.length})`
-    const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
-    return [`${header}\n\n${entries}`]
-  })
-
-  return ["# Synthesized Findings", ...sections, formatActionRecommendation(result, groupedFindings)].join("\n\n")
-}
--- a/src/agents/athena/index.ts
+++ b/src/agents/athena/index.ts
@ -3,10 +3,4 @@ export * from "./agent"
 export * from "./model-parser"
 export * from "./council-prompt"
 export * from "./council-orchestrator"
-export * from "./council-result-collector"
-export * from "./synthesis-types"
-export * from "./synthesis-prompt"
-export * from "./synthesis-formatter"
-export * from "./findings-presenter"
-export * from "./delegation-prompts"
 export * from "../../config/schema/athena"
--- a/src/agents/athena/synthesis-formatter.test.ts
+++ b/src/agents/athena/synthesis-formatter.test.ts
@ -1,157 +0,0 @@
-import { describe, expect, test } from "bun:test"
-import { formatCouncilResultsForSynthesis } from "./synthesis-formatter"
-import type { CouncilExecutionResult } from "./types"
-
-function createResult(overrides?: Partial<CouncilExecutionResult>): CouncilExecutionResult {
-  const responses: CouncilExecutionResult["responses"] = [
-    {
-      member: { model: "openai/gpt-5.3-codex", name: "OpenAI" },
-      status: "completed",
-      response: "Finding A from OpenAI",
-      taskId: "task-1",
-      durationMs: 120,
-    },
-    {
-      member: { model: "anthropic/claude-sonnet-4-5", name: "Claude" },
-      status: "completed",
-      response: "Finding B from Claude",
-      taskId: "task-2",
-      durationMs: 240,
-    },
-    {
-      member: { model: "google/gemini-3-pro", name: "Gemini" },
-      status: "completed",
-      response: "Finding C from Gemini",
-      taskId: "task-3",
-      durationMs: 360,
-    },
-  ]
-
-  return {
-    question: "What reliability risks exist?",
-    responses,
-    totalMembers: 3,
-    completedCount: 3,
-    failedCount: 0,
-    ...overrides,
-  }
-}
-
-describe("formatCouncilResultsForSynthesis", () => {
-  //#given a CouncilExecutionResult with 3 completed members
-  //#when formatCouncilResultsForSynthesis is called
-  //#then output contains each member's model name as a header
-  //#then output contains each member's raw response text
-  //#then output contains member status and duration
-  test("formats all completed members with provenance and response text", () => {
-    const result = createResult()
-
-    const output = formatCouncilResultsForSynthesis(result)
-
-    expect(output).toContain("openai/gpt-5.3-codex")
-    expect(output).toContain("anthropic/claude-sonnet-4-5")
-    expect(output).toContain("google/gemini-3-pro")
-
-    expect(output).toContain("Finding A from OpenAI")
-    expect(output).toContain("Finding B from Claude")
-    expect(output).toContain("Finding C from Gemini")
-
-    expect(output).toContain("Status: completed")
-    expect(output).toContain("Duration: 120ms")
-    expect(output).toContain("Duration: 240ms")
-    expect(output).toContain("Duration: 360ms")
-  })
-
-  //#given a CouncilExecutionResult with 1 completed and 1 failed member
-  //#when formatCouncilResultsForSynthesis is called
-  //#then completed member's response is included
-  //#then failed member shows error status and error message
-  //#then failed member does NOT have a response section
-  test("includes completed response and failed error without response section", () => {
-    const result = createResult({
-      responses: [
-        {
-          member: { model: "openai/gpt-5.3-codex" },
-          status: "completed",
-          response: "Primary finding",
-          taskId: "task-1",
-          durationMs: 80,
-        },
-        {
-          member: { model: "xai/grok-code-fast-1" },
-          status: "error",
-          error: "Timeout from provider",
-          taskId: "task-2",
-          durationMs: 500,
-        },
-      ],
-      totalMembers: 2,
-      completedCount: 1,
-      failedCount: 1,
-    })
-
-    const output = formatCouncilResultsForSynthesis(result)
-
-    expect(output).toContain("Primary finding")
-    expect(output).toContain("xai/grok-code-fast-1")
-    expect(output).toContain("Status: error")
-    expect(output).toContain("Error: Timeout from provider")
-    expect(output).not.toContain("Response:\nTimeout from provider")
-  })
-
-  //#given a CouncilExecutionResult with 0 completed members
-  //#when formatCouncilResultsForSynthesis is called
-  //#then output contains a "no successful responses" message
-  test("shows no successful responses message when all members fail", () => {
-    const result = createResult({
-      responses: [
-        {
-          member: { model: "openai/gpt-5.3-codex" },
-          status: "error",
-          error: "No output",
-          taskId: "task-1",
-          durationMs: 200,
-        },
-      ],
-      totalMembers: 1,
-      completedCount: 0,
-      failedCount: 1,
-    })
-
-    const output = formatCouncilResultsForSynthesis(result)
-
-    expect(output).toContain("No successful responses")
-  })
-
-  //#given members with custom names
-  //#when formatCouncilResultsForSynthesis is called
-  //#then output uses member.name if provided, falls back to member.model
-  test("prefers custom member name and falls back to model", () => {
-    const result = createResult({
-      responses: [
-        {
-          member: { model: "openai/gpt-5.3-codex", name: "Council Alpha" },
-          status: "completed",
-          response: "Custom member response",
-          taskId: "task-1",
-          durationMs: 10,
-        },
-        {
-          member: { model: "google/gemini-3-pro" },
-          status: "completed",
-          response: "Default member response",
-          taskId: "task-2",
-          durationMs: 11,
-        },
-      ],
-      totalMembers: 2,
-      completedCount: 2,
-      failedCount: 0,
-    })
-
-    const output = formatCouncilResultsForSynthesis(result)
-
-    expect(output).toContain("Council Alpha")
-    expect(output).toContain("google/gemini-3-pro")
-  })
-})
--- a/src/agents/athena/synthesis-formatter.ts
+++ b/src/agents/athena/synthesis-formatter.ts
@ -1,48 +0,0 @@
-import type { CouncilExecutionResult } from "./types"
-
-export function formatCouncilResultsForSynthesis(result: CouncilExecutionResult): string {
-  const completedResponses = result.responses.filter((response) => response.status === "completed")
-
-  if (completedResponses.length === 0) {
-    return [
-      "# Council Responses for Synthesis",
-      `Question: ${result.question}`,
-      "No successful responses from council members.",
-      "Review failed member details below for provenance.",
-      ...result.responses.map((response) => {
-        const memberName = response.member.name ?? response.member.model
-        return [
-          `## Member: ${memberName} (${response.status})`,
-          `Model: ${response.member.model}`,
-          `Status: ${response.status}`,
-          `Duration: ${response.durationMs}ms`,
-          `Error: ${response.error ?? "No error message provided"}`,
-        ].join("\n")
-      }),
-    ].join("\n\n")
-  }
-
-  const sections = result.responses.map((response) => {
-    const memberName = response.member.name ?? response.member.model
-    const header = [
-      `## Member: ${memberName} (${response.status})`,
-      `Model: ${response.member.model}`,
-      `Status: ${response.status}`,
-      `Duration: ${response.durationMs}ms`,
-    ]
-
-    if (response.status === "completed") {
-      const responseBody = response.response?.trim() ? response.response : "No response content provided"
-      return [...header, "Response:", responseBody].join("\n")
-    }
-
-    return [...header, `Error: ${response.error ?? "No error message provided"}`].join("\n")
-  })
-
-  return [
-    "# Council Responses for Synthesis",
-    `Question: ${result.question}`,
-    `Completed responses: ${result.completedCount}/${result.totalMembers}`,
-    ...sections,
-  ].join("\n\n")
-}
--- a/src/agents/athena/synthesis-prompt.ts
+++ b/src/agents/athena/synthesis-prompt.ts
@ -1,44 +0,0 @@
-export function buildSynthesisPrompt(formattedResponses: string, question: string, completedCount: number): string {
-  return `You are Athena, the synthesis lead for a multi-model council. Your job is to merge independent model outputs into a single, evidence-grounded synthesis.
-
-## Original Question
-${question}
-
-## Council Responses
-${formattedResponses}
-
-## Your Responsibilities
-1. Identify distinct findings across all completed member responses.
-2. Group findings that refer to the same underlying issue (semantic similarity, not exact wording).
-3. Classify agreementLevel for each finding using ${completedCount} completed member(s):
-   - unanimous: all completed members reported the finding
-   - majority: more than 50% of completed members reported the finding
-   - minority: 2 or more members reported it, but not a majority
-   - solo: only 1 member reported it
-4. Add AthenaAssessment for each finding:
-   - agrees: whether you agree with the finding
-   - rationale: concise reason for agreement or disagreement
-5. Set isFalsePositiveRisk:
-   - true for solo findings (likely false positives unless strongly supported)
-   - false for findings reported by multiple members
-
-## Output Contract
-Return JSON only with this shape:
-{
-  "findings": [
-    {
-      "summary": "string",
-      "details": "string",
-      "agreementLevel": "unanimous | majority | minority | solo",
-      "reportedBy": ["model/name"],
-      "assessment": {
-        "agrees": true,
-        "rationale": "string"
-      },
-      "isFalsePositiveRisk": false
-    }
-  ]
-}
-
-The finding object must match the SynthesizedFinding type exactly. Keep findings concise, concrete, and tied to source responses.`
-}
--- a/src/agents/athena/synthesis-types.ts
+++ b/src/agents/athena/synthesis-types.ts
@ -1,31 +0,0 @@
-import type { AgreementLevel, CouncilMemberConfig, CouncilMemberStatus } from "./types"
-
-export interface AthenaAssessment {
-  agrees: boolean
-  rationale: string
-}
-
-export interface SynthesizedFinding {
-  summary: string
-  details: string
-  agreementLevel: AgreementLevel
-  reportedBy: string[]
-  assessment: AthenaAssessment
-  isFalsePositiveRisk: boolean
-}
-
-export interface MemberProvenance {
-  member: CouncilMemberConfig
-  status: CouncilMemberStatus
-  rawResponse?: string
-  durationMs: number
-}
-
-export interface SynthesisResult {
-  question: string
-  findings: SynthesizedFinding[]
-  memberProvenance: MemberProvenance[]
-  totalFindings: number
-  consensusCount: number
-  outlierCount: number
-}
--- a/src/agents/athena/types.ts
+++ b/src/agents/athena/types.ts
@ -14,23 +14,20 @@ export interface AthenaConfig {
  council: CouncilConfig
 }

-export type CouncilMemberStatus = "completed" | "timeout" | "error"
-
-export type AgreementLevel = "unanimous" | "majority" | "minority" | "solo"
-
-export interface CouncilMemberResponse {
+export interface CouncilLaunchFailure {
  member: CouncilMemberConfig
-  status: CouncilMemberStatus
-  response?: string
-  error?: string
-  taskId: string
-  durationMs: number
+  error: string
 }

-export interface CouncilExecutionResult {
-  question: string
-  responses: CouncilMemberResponse[]
-  totalMembers: number
-  completedCount: number
-  failedCount: number
+export interface CouncilLaunchedMember {
+  member: CouncilMemberConfig
+  taskId: string
+}
+
+/** Return type of executeCouncil — only tracks launch outcomes, not task completion */
+export interface CouncilLaunchResult {
+  question: string
+  launched: CouncilLaunchedMember[]
+  failures: CouncilLaunchFailure[]
+  totalMembers: number
 }
--- a/src/tools/athena-council/tools.ts
+++ b/src/tools/athena-council/tools.ts
@ -113,22 +113,18 @@ export function createAthenaCouncilTool(args: {
        })

        const launchResult: AthenaCouncilLaunchResult = {
-          launched: execution.responses.filter((response) => response.taskId.length > 0).length,
-          members: execution.responses
-            .filter((response) => response.taskId.length > 0)
-            .map((response) => ({
-              task_id: response.taskId,
-              name: response.member.name ?? response.member.model,
-              model: response.member.model,
-              status: "running",
-            })),
-          failed: execution.responses
-            .filter((response) => response.taskId.length === 0)
-            .map((response) => ({
-              name: response.member.name ?? response.member.model,
-              model: response.member.model,
-              error: response.error ?? "Launch failed",
-            })),
+          launched: execution.launched.length,
+          members: execution.launched.map((entry) => ({
+            task_id: entry.taskId,
+            name: entry.member.name ?? entry.member.model,
+            model: entry.member.model,
+            status: "running",
+          })),
+          failed: execution.failures.map((entry) => ({
+            name: entry.member.name ?? entry.member.model,
+            model: entry.member.model,
+            error: entry.error,
+          })),
        }

        markCouncilDone(toolContext.sessionID)