fix(athena): address 6 council review findings — launcher, schema, filtering, presentation

- Forward temperature and permission through council-launcher to background manager - Add LaunchInput.temperature and LaunchInput.permission to background-agent types - Extract session guard with 5-minute timeout to prevent stale council locks - Make council optional in AthenaOverrideConfigSchema for partial user overrides - Support member lookup by both name and model ID in filterCouncilMembers - Add provider/model-id format validation to CouncilMemberSchema - Fix findings-presenter group header to show finding count instead of first finding's reporter count
2026-02-13 16:00:04 +01:00 · 2026-02-13 16:00:04 +01:00 · d8ba9b1f0c
commit d8ba9b1f0c
parent 7cfdc68100
14 changed files with 327 additions and 24 deletions
--- a/src/agents/athena/council-orchestrator.ts
+++ b/src/agents/athena/council-orchestrator.ts
@ -5,10 +5,7 @@ import { collectCouncilResults } from "./council-result-collector"
 import { parseModelString } from "./model-parser"
 import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"
-export interface CouncilLaunchInput extends LaunchInput {
+export type CouncilLaunchInput = LaunchInput
  temperature?: number
  permission?: Record<string, "ask" | "allow" | "deny">
 }
 export interface CouncilLauncher {
  launch(input: CouncilLaunchInput): Promise<BackgroundTask>
--- a/src/agents/athena/findings-presenter.test.ts
+++ b/src/agents/athena/findings-presenter.test.ts
@ -117,6 +117,42 @@ describe("formatFindingsForUser", () => {
    expect(output).toContain("No synthesized findings are available")
  })
  //#given multiple majority findings with different reporter counts
  //#when formatting is generated
  //#then group header shows the agreement level label without a misleading single count
  test("shows agreement level label in group header without single-finding count", () => {
    const result = createSynthesisResult({
      findings: [
        {
          summary: "Finding A",
          details: "Reported by 3 members",
          agreementLevel: "majority",
          reportedBy: ["OpenAI", "Claude", "Gemini"],
          assessment: { agrees: true, rationale: "Valid" },
          isFalsePositiveRisk: false,
        },
        {
          summary: "Finding B",
          details: "Reported by 2 members",
          agreementLevel: "majority",
          reportedBy: ["OpenAI", "Claude"],
          assessment: { agrees: true, rationale: "Also valid" },
          isFalsePositiveRisk: false,
        },
      ],
    })
    const output = formatFindingsForUser(result)
    // The header should show the level label without a misleading single-finding count
    // It should NOT use the first finding's count as the group header
    expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
    expect(output).toContain("## Majority Findings")
    // Each individual finding still shows its own agreement context
    expect(output).toContain("Agreement context: 3 members report this (majority)")
    expect(output).toContain("Agreement context: 2 members report this (majority)")
  })
  //#given a non-empty findings result
  //#when formatting is generated
  //#then output ends with an action recommendation section
--- a/src/agents/athena/findings-presenter.ts
+++ b/src/agents/athena/findings-presenter.ts
@ -72,8 +72,7 @@ export function formatFindingsForUser(result: SynthesisResult): string {
      return []
    }
-    const firstFinding = findings[0]
+    const header = `## ${toTitle(level)} Findings (${findings.length})`
    const header = `## ${toTitle(level)} Findings (${formatAgreementLine(level, firstFinding)})`
    const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
    return [`${header}\n\n${entries}`]
  })
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@ -561,7 +561,7 @@ describe("Athena agent override", () => {
      expect(result.data.agents?.athena?.model).toBe("openai/gpt-5.3-codex")
      expect(result.data.agents?.athena?.temperature).toBe(0.2)
      expect(result.data.agents?.athena?.prompt_append).toBe("Use consensus-first synthesis.")
-      expect(result.data.agents?.athena?.council.members).toHaveLength(3)
+      expect(result.data.agents?.athena?.council?.members).toHaveLength(3)
    }
  })
@ -584,7 +584,7 @@ describe("Athena agent override", () => {
    expect(result.success).toBe(false)
  })
-  test("rejects athena override when council is missing", () => {
+  test("accepts athena override without council (temperature-only override)", () => {
    // given
    const config = {
      agents: {
@ -598,7 +598,7 @@ describe("Athena agent override", () => {
    const result = OhMyOpenCodeConfigSchema.safeParse(config)
    // then
-    expect(result.success).toBe(false)
+    expect(result.success).toBe(true)
  })
 })
--- a/src/config/schema/agent-overrides.ts
+++ b/src/config/schema/agent-overrides.ts
@ -58,7 +58,7 @@ export const AgentOverrideConfigSchema = z.object({
 export const AthenaOverrideConfigSchema = AgentOverrideConfigSchema.merge(
  z.object({
-    council: AthenaConfigSchema.shape.council,
+    council: AthenaConfigSchema.shape.council.optional(),
  })
 )
--- a/src/config/schema/athena.test.ts
+++ b/src/config/schema/athena.test.ts
@ -41,6 +41,50 @@ describe("CouncilMemberSchema", () => {
    expect(result.success).toBe(false)
  })
  test("rejects model string without provider/model separator", () => {
    //#given
    const config = { model: "invalid-model" }
    //#when
    const result = CouncilMemberSchema.safeParse(config)
    //#then
    expect(result.success).toBe(false)
  })
  test("rejects model string with empty provider", () => {
    //#given
    const config = { model: "/gpt-5.3-codex" }
    //#when
    const result = CouncilMemberSchema.safeParse(config)
    //#then
    expect(result.success).toBe(false)
  })
  test("rejects model string with empty model ID", () => {
    //#given
    const config = { model: "openai/" }
    //#when
    const result = CouncilMemberSchema.safeParse(config)
    //#then
    expect(result.success).toBe(false)
  })
  test("rejects empty model string", () => {
    //#given
    const config = { model: "" }
    //#when
    const result = CouncilMemberSchema.safeParse(config)
    //#then
    expect(result.success).toBe(false)
  })
  test("rejects temperature below 0", () => {
    //#given
    const config = { model: "openai/gpt-5.3-codex", temperature: -0.1 }
--- a/src/config/schema/athena.ts
+++ b/src/config/schema/athena.ts
@ -1,7 +1,19 @@
 import { z } from "zod"
 /** Validates model string format: "provider/model-id" (e.g., "openai/gpt-5.3-codex"). */
 const ModelStringSchema = z
  .string()
  .min(1)
  .refine(
    (model) => {
      const slashIndex = model.indexOf("/")
      return slashIndex > 0 && slashIndex < model.length - 1
    },
    { message: 'Model must be in "provider/model-id" format (e.g., "openai/gpt-5.3-codex")' }
  )
 export const CouncilMemberSchema = z.object({
-  model: z.string(),
+  model: ModelStringSchema,
  temperature: z.number().min(0).max(2).optional(),
  variant: z.string().optional(),
  name: z.string().optional(),
--- a/src/features/background-agent/types.ts
+++ b/src/features/background-agent/types.ts
@ -72,6 +72,10 @@ export interface LaunchInput {
  skills?: string[]
  skillContent?: string
  category?: string
  /** Per-task temperature override for council members or custom launches */
  temperature?: number
  /** Tool permission overrides (e.g., { write: "deny", edit: "deny" }) */
  permission?: Record<string, "ask" | "allow" | "deny">
 }
 export interface ResumeInput {
--- a/src/tools/athena-council/council-launcher.test.ts
+++ b/src/tools/athena-council/council-launcher.test.ts
@ -0,0 +1,77 @@
 import { describe, expect, test } from "bun:test"
 import type { BackgroundManager } from "../../features/background-agent"
 import type { BackgroundTask, LaunchInput } from "../../features/background-agent/types"
 import { createCouncilLauncher } from "./council-launcher"
 function createMockTask(id: string): BackgroundTask {
  return {
    id,
    parentSessionID: "session-1",
    parentMessageID: "message-1",
    description: "test",
    prompt: "test",
    agent: "athena",
    status: "running",
  }
 }
 describe("createCouncilLauncher", () => {
  //#given a council launch input with temperature and permission
  //#when launch is called
  //#then temperature and permission are forwarded to the background manager
  test("forwards temperature and permission to background manager", async () => {
    const capturedInputs: LaunchInput[] = []
    const mockManager = {
      launch: async (input: LaunchInput) => {
        capturedInputs.push(input)
        return createMockTask("bg-1")
      },
      getTask: () => undefined,
    } as unknown as BackgroundManager
    const launcher = createCouncilLauncher(mockManager)
    await launcher.launch({
      description: "Council member: test",
      prompt: "Analyze this",
      agent: "athena",
      parentSessionID: "session-1",
      parentMessageID: "message-1",
      model: { providerID: "openai", modelID: "gpt-5.3-codex" },
      temperature: 0.3,
      permission: { write: "deny", edit: "deny", task: "deny" },
    })
    expect(capturedInputs).toHaveLength(1)
    expect(capturedInputs[0]?.temperature).toBe(0.3)
    expect(capturedInputs[0]?.permission).toEqual({ write: "deny", edit: "deny", task: "deny" })
  })
  //#given a council launch input without temperature and permission
  //#when launch is called
  //#then undefined temperature and permission are forwarded (not dropped)
  test("forwards undefined temperature and permission without error", async () => {
    const capturedInputs: LaunchInput[] = []
    const mockManager = {
      launch: async (input: LaunchInput) => {
        capturedInputs.push(input)
        return createMockTask("bg-2")
      },
      getTask: () => undefined,
    } as unknown as BackgroundManager
    const launcher = createCouncilLauncher(mockManager)
    await launcher.launch({
      description: "Council member: test",
      prompt: "Analyze this",
      agent: "athena",
      parentSessionID: "session-1",
      parentMessageID: "message-1",
    })
    expect(capturedInputs).toHaveLength(1)
    expect(capturedInputs[0]?.temperature).toBeUndefined()
    expect(capturedInputs[0]?.permission).toBeUndefined()
  })
 })
--- a/src/tools/athena-council/council-launcher.ts
+++ b/src/tools/athena-council/council-launcher.ts
@ -12,6 +12,8 @@ export function createCouncilLauncher(manager: BackgroundManager): CouncilLaunch
        parentMessageID: input.parentMessageID,
        parentAgent: input.parentAgent,
        model: input.model,
        temperature: input.temperature,
        permission: input.permission,
      })
    },
  }
--- a/src/tools/athena-council/session-guard.test.ts
+++ b/src/tools/athena-council/session-guard.test.ts
@ -0,0 +1,72 @@
 import { afterEach, describe, expect, test } from "bun:test"
 import {
  isCouncilRunning,
  markCouncilDone,
  markCouncilRunning,
  _resetForTesting,
  _setTimestampForTesting,
 } from "./session-guard"
 afterEach(() => {
  _resetForTesting()
 })
 describe("session-guard", () => {
  //#given no active sessions
  //#when isCouncilRunning is checked
  //#then returns false
  test("returns false for unknown session", () => {
    expect(isCouncilRunning("session-1")).toBe(false)
  })
  //#given a session is marked as running
  //#when isCouncilRunning is checked
  //#then returns true
  test("returns true after markCouncilRunning", () => {
    markCouncilRunning("session-1")
    expect(isCouncilRunning("session-1")).toBe(true)
  })
  //#given a session was marked running then done
  //#when isCouncilRunning is checked
  //#then returns false
  test("returns false after markCouncilDone", () => {
    markCouncilRunning("session-1")
    markCouncilDone("session-1")
    expect(isCouncilRunning("session-1")).toBe(false)
  })
  //#given a session was marked running 6 minutes ago (past 5-minute timeout)
  //#when isCouncilRunning is checked
  //#then stale entry is purged and returns false
  test("purges stale entries older than 5 minutes", () => {
    const sixMinutesAgo = Date.now() - 6 * 60 * 1000
    _setTimestampForTesting("stale-session", sixMinutesAgo)
    expect(isCouncilRunning("stale-session")).toBe(false)
  })
  //#given a session was marked running 4 minutes ago (within 5-minute timeout)
  //#when isCouncilRunning is checked
  //#then entry is kept and returns true
  test("keeps entries within timeout window", () => {
    const fourMinutesAgo = Date.now() - 4 * 60 * 1000
    _setTimestampForTesting("recent-session", fourMinutesAgo)
    expect(isCouncilRunning("recent-session")).toBe(true)
  })
  //#given multiple sessions where one is stale and one is fresh
  //#when isCouncilRunning is checked for the fresh one
  //#then stale entry is purged but fresh entry remains
  test("purges only stale entries while keeping fresh ones", () => {
    const sixMinutesAgo = Date.now() - 6 * 60 * 1000
    _setTimestampForTesting("stale-session", sixMinutesAgo)
    markCouncilRunning("fresh-session")
    expect(isCouncilRunning("stale-session")).toBe(false)
    expect(isCouncilRunning("fresh-session")).toBe(true)
  })
 })
--- a/src/tools/athena-council/session-guard.ts
+++ b/src/tools/athena-council/session-guard.ts
@ -0,0 +1,37 @@
 /** Timeout in ms after which a stale council session lock is automatically released. */
 const COUNCIL_SESSION_TIMEOUT_MS = 5 * 60 * 1000
 /** Tracks active council executions per session with timestamps for stale entry cleanup. */
 const activeCouncilSessions = new Map<string, number>()
 function purgeStaleEntries(): void {
  const now = Date.now()
  for (const [sessionId, startedAt] of activeCouncilSessions) {
    if (now - startedAt > COUNCIL_SESSION_TIMEOUT_MS) {
      activeCouncilSessions.delete(sessionId)
    }
  }
 }
 export function isCouncilRunning(sessionId: string): boolean {
  purgeStaleEntries()
  return activeCouncilSessions.has(sessionId)
 }
 export function markCouncilRunning(sessionId: string): void {
  activeCouncilSessions.set(sessionId, Date.now())
 }
 export function markCouncilDone(sessionId: string): void {
  activeCouncilSessions.delete(sessionId)
 }
 /** Visible for testing only. */
 export function _resetForTesting(): void {
  activeCouncilSessions.clear()
 }
 /** Visible for testing only. */
 export function _setTimestampForTesting(sessionId: string, timestamp: number): void {
  activeCouncilSessions.set(sessionId, timestamp)
 }
--- a/src/tools/athena-council/tools.test.ts
+++ b/src/tools/athena-council/tools.test.ts
@ -97,6 +97,30 @@ describe("filterCouncilMembers", () => {
    )
  })
  test("selects named member by model ID when name differs from model", () => {
    // #given - "Claude" has name "Claude" but model "anthropic/claude-sonnet-4-5"
    const selectedMembers = ["anthropic/claude-sonnet-4-5"]
    // #when
    const result = filterCouncilMembers(configuredMembers, selectedMembers)
    // #then - should find the member by model ID even though it has a custom name
    expect(result.members).toEqual([configuredMembers[0]])
    expect(result.error).toBeUndefined()
  })
  test("deduplicates when same member is selected by both name and model", () => {
    // #given
    const selectedMembers = ["Claude", "anthropic/claude-sonnet-4-5"]
    // #when
    const result = filterCouncilMembers(configuredMembers, selectedMembers)
    // #then - should return only one copy
    expect(result.members).toEqual([configuredMembers[0]])
    expect(result.error).toBeUndefined()
  })
  test("returns error listing only unmatched names when partially matched", () => {
    // #given
    const selectedMembers = ["claude", "non-existent"]
--- a/src/tools/athena-council/tools.ts
+++ b/src/tools/athena-council/tools.ts
@ -4,11 +4,9 @@ import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/typ
 import type { BackgroundManager } from "../../features/background-agent"
 import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants"
 import { createCouncilLauncher } from "./council-launcher"
 import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard"
 import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types"
 /** Tracks active council executions per session to prevent duplicate launches. */
 const activeCouncilSessions = new Set<string>()
 function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig {
  return Boolean(councilConfig && councilConfig.members.length > 0)
 }
@ -28,13 +26,15 @@ export function filterCouncilMembers(
  const memberLookup = new Map<string, CouncilMemberConfig>()
  members.forEach((member) => {
-    const key = (member.name ?? member.model).toLowerCase()
+    memberLookup.set(member.model.toLowerCase(), member)
-    memberLookup.set(key, member)
+    if (member.name) {
      memberLookup.set(member.name.toLowerCase(), member)
    }
  })
  const unresolved: string[] = []
  const filteredMembers: CouncilMemberConfig[] = []
-  const includedMemberKeys = new Set<string>()
+  const includedMembers = new Set<CouncilMemberConfig>()
  selectedNames.forEach((selectedName) => {
    const selectedKey = selectedName.toLowerCase()
@ -44,12 +44,11 @@ export function filterCouncilMembers(
      return
    }
-    const memberKey = matchedMember.model
+    if (includedMembers.has(matchedMember)) {
    if (includedMemberKeys.has(memberKey)) {
      return
    }
-    includedMemberKeys.add(memberKey)
+    includedMembers.add(matchedMember)
    filteredMembers.push(matchedMember)
  })
@ -98,11 +97,11 @@ export function createAthenaCouncilTool(args: {
        return filteredMembers.error
      }
-      if (activeCouncilSessions.has(toolContext.sessionID)) {
+      if (isCouncilRunning(toolContext.sessionID)) {
        return "Council is already running for this session. Wait for the current council execution to complete."
      }
-      activeCouncilSessions.add(toolContext.sessionID)
+      markCouncilRunning(toolContext.sessionID)
      try {
        const execution = await executeCouncil({
          question: toolArgs.question,
@ -132,10 +131,10 @@ export function createAthenaCouncilTool(args: {
            })),
        }
-        activeCouncilSessions.delete(toolContext.sessionID)
+        markCouncilDone(toolContext.sessionID)
        return JSON.stringify(launchResult)
      } catch (error) {
-        activeCouncilSessions.delete(toolContext.sessionID)
+        markCouncilDone(toolContext.sessionID)
        throw error
      }
    },