fix(athena): address 6 council review findings — launcher, schema, filtering, presentation

- Forward temperature and permission through council-launcher to background manager
- Add LaunchInput.temperature and LaunchInput.permission to background-agent types
- Extract session guard with 5-minute timeout to prevent stale council locks
- Make council optional in AthenaOverrideConfigSchema for partial user overrides
- Support member lookup by both name and model ID in filterCouncilMembers
- Add provider/model-id format validation to CouncilMemberSchema
- Fix findings-presenter group header to show finding count instead of first finding's reporter count
This commit is contained in:
ismeth 2026-02-13 16:00:04 +01:00 committed by YeonGyu-Kim
parent 7cfdc68100
commit d8ba9b1f0c
14 changed files with 327 additions and 24 deletions

View File

@ -5,10 +5,7 @@ import { collectCouncilResults } from "./council-result-collector"
import { parseModelString } from "./model-parser" import { parseModelString } from "./model-parser"
import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types" import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"
export interface CouncilLaunchInput extends LaunchInput { export type CouncilLaunchInput = LaunchInput
temperature?: number
permission?: Record<string, "ask" | "allow" | "deny">
}
export interface CouncilLauncher { export interface CouncilLauncher {
launch(input: CouncilLaunchInput): Promise<BackgroundTask> launch(input: CouncilLaunchInput): Promise<BackgroundTask>

View File

@ -117,6 +117,42 @@ describe("formatFindingsForUser", () => {
expect(output).toContain("No synthesized findings are available") expect(output).toContain("No synthesized findings are available")
}) })
//#given multiple majority findings with different reporter counts
//#when formatting is generated
//#then group header shows the agreement level label without a misleading single count
test("shows agreement level label in group header without single-finding count", () => {
const result = createSynthesisResult({
findings: [
{
summary: "Finding A",
details: "Reported by 3 members",
agreementLevel: "majority",
reportedBy: ["OpenAI", "Claude", "Gemini"],
assessment: { agrees: true, rationale: "Valid" },
isFalsePositiveRisk: false,
},
{
summary: "Finding B",
details: "Reported by 2 members",
agreementLevel: "majority",
reportedBy: ["OpenAI", "Claude"],
assessment: { agrees: true, rationale: "Also valid" },
isFalsePositiveRisk: false,
},
],
})
const output = formatFindingsForUser(result)
// The header should show the level label without a misleading single-finding count
// It should NOT use the first finding's count as the group header
expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
expect(output).toContain("## Majority Findings")
// Each individual finding still shows its own agreement context
expect(output).toContain("Agreement context: 3 members report this (majority)")
expect(output).toContain("Agreement context: 2 members report this (majority)")
})
//#given a non-empty findings result //#given a non-empty findings result
//#when formatting is generated //#when formatting is generated
//#then output ends with an action recommendation section //#then output ends with an action recommendation section

View File

@ -72,8 +72,7 @@ export function formatFindingsForUser(result: SynthesisResult): string {
return [] return []
} }
const firstFinding = findings[0] const header = `## ${toTitle(level)} Findings (${findings.length})`
const header = `## ${toTitle(level)} Findings (${formatAgreementLine(level, firstFinding)})`
const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n") const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
return [`${header}\n\n${entries}`] return [`${header}\n\n${entries}`]
}) })

View File

@ -561,7 +561,7 @@ describe("Athena agent override", () => {
expect(result.data.agents?.athena?.model).toBe("openai/gpt-5.3-codex") expect(result.data.agents?.athena?.model).toBe("openai/gpt-5.3-codex")
expect(result.data.agents?.athena?.temperature).toBe(0.2) expect(result.data.agents?.athena?.temperature).toBe(0.2)
expect(result.data.agents?.athena?.prompt_append).toBe("Use consensus-first synthesis.") expect(result.data.agents?.athena?.prompt_append).toBe("Use consensus-first synthesis.")
expect(result.data.agents?.athena?.council.members).toHaveLength(3) expect(result.data.agents?.athena?.council?.members).toHaveLength(3)
} }
}) })
@ -584,7 +584,7 @@ describe("Athena agent override", () => {
expect(result.success).toBe(false) expect(result.success).toBe(false)
}) })
test("rejects athena override when council is missing", () => { test("accepts athena override without council (temperature-only override)", () => {
// given // given
const config = { const config = {
agents: { agents: {
@ -598,7 +598,7 @@ describe("Athena agent override", () => {
const result = OhMyOpenCodeConfigSchema.safeParse(config) const result = OhMyOpenCodeConfigSchema.safeParse(config)
// then // then
expect(result.success).toBe(false) expect(result.success).toBe(true)
}) })
}) })

View File

@ -58,7 +58,7 @@ export const AgentOverrideConfigSchema = z.object({
export const AthenaOverrideConfigSchema = AgentOverrideConfigSchema.merge( export const AthenaOverrideConfigSchema = AgentOverrideConfigSchema.merge(
z.object({ z.object({
council: AthenaConfigSchema.shape.council, council: AthenaConfigSchema.shape.council.optional(),
}) })
) )

View File

@ -41,6 +41,50 @@ describe("CouncilMemberSchema", () => {
expect(result.success).toBe(false) expect(result.success).toBe(false)
}) })
test("rejects model string without provider/model separator", () => {
//#given
const config = { model: "invalid-model" }
//#when
const result = CouncilMemberSchema.safeParse(config)
//#then
expect(result.success).toBe(false)
})
test("rejects model string with empty provider", () => {
//#given
const config = { model: "/gpt-5.3-codex" }
//#when
const result = CouncilMemberSchema.safeParse(config)
//#then
expect(result.success).toBe(false)
})
test("rejects model string with empty model ID", () => {
//#given
const config = { model: "openai/" }
//#when
const result = CouncilMemberSchema.safeParse(config)
//#then
expect(result.success).toBe(false)
})
test("rejects empty model string", () => {
//#given
const config = { model: "" }
//#when
const result = CouncilMemberSchema.safeParse(config)
//#then
expect(result.success).toBe(false)
})
test("rejects temperature below 0", () => { test("rejects temperature below 0", () => {
//#given //#given
const config = { model: "openai/gpt-5.3-codex", temperature: -0.1 } const config = { model: "openai/gpt-5.3-codex", temperature: -0.1 }

View File

@ -1,7 +1,19 @@
import { z } from "zod" import { z } from "zod"
/** Validates model string format: "provider/model-id" (e.g., "openai/gpt-5.3-codex"). */
const ModelStringSchema = z
.string()
.min(1)
.refine(
(model) => {
const slashIndex = model.indexOf("/")
return slashIndex > 0 && slashIndex < model.length - 1
},
{ message: 'Model must be in "provider/model-id" format (e.g., "openai/gpt-5.3-codex")' }
)
export const CouncilMemberSchema = z.object({ export const CouncilMemberSchema = z.object({
model: z.string(), model: ModelStringSchema,
temperature: z.number().min(0).max(2).optional(), temperature: z.number().min(0).max(2).optional(),
variant: z.string().optional(), variant: z.string().optional(),
name: z.string().optional(), name: z.string().optional(),

View File

@ -72,6 +72,10 @@ export interface LaunchInput {
skills?: string[] skills?: string[]
skillContent?: string skillContent?: string
category?: string category?: string
/** Per-task temperature override for council members or custom launches */
temperature?: number
/** Tool permission overrides (e.g., { write: "deny", edit: "deny" }) */
permission?: Record<string, "ask" | "allow" | "deny">
} }
export interface ResumeInput { export interface ResumeInput {

View File

@ -0,0 +1,77 @@
import { describe, expect, test } from "bun:test"
import type { BackgroundManager } from "../../features/background-agent"
import type { BackgroundTask, LaunchInput } from "../../features/background-agent/types"
import { createCouncilLauncher } from "./council-launcher"
function createMockTask(id: string): BackgroundTask {
return {
id,
parentSessionID: "session-1",
parentMessageID: "message-1",
description: "test",
prompt: "test",
agent: "athena",
status: "running",
}
}
describe("createCouncilLauncher", () => {
//#given a council launch input with temperature and permission
//#when launch is called
//#then temperature and permission are forwarded to the background manager
test("forwards temperature and permission to background manager", async () => {
const capturedInputs: LaunchInput[] = []
const mockManager = {
launch: async (input: LaunchInput) => {
capturedInputs.push(input)
return createMockTask("bg-1")
},
getTask: () => undefined,
} as unknown as BackgroundManager
const launcher = createCouncilLauncher(mockManager)
await launcher.launch({
description: "Council member: test",
prompt: "Analyze this",
agent: "athena",
parentSessionID: "session-1",
parentMessageID: "message-1",
model: { providerID: "openai", modelID: "gpt-5.3-codex" },
temperature: 0.3,
permission: { write: "deny", edit: "deny", task: "deny" },
})
expect(capturedInputs).toHaveLength(1)
expect(capturedInputs[0]?.temperature).toBe(0.3)
expect(capturedInputs[0]?.permission).toEqual({ write: "deny", edit: "deny", task: "deny" })
})
//#given a council launch input without temperature and permission
//#when launch is called
//#then undefined temperature and permission are forwarded (not dropped)
test("forwards undefined temperature and permission without error", async () => {
const capturedInputs: LaunchInput[] = []
const mockManager = {
launch: async (input: LaunchInput) => {
capturedInputs.push(input)
return createMockTask("bg-2")
},
getTask: () => undefined,
} as unknown as BackgroundManager
const launcher = createCouncilLauncher(mockManager)
await launcher.launch({
description: "Council member: test",
prompt: "Analyze this",
agent: "athena",
parentSessionID: "session-1",
parentMessageID: "message-1",
})
expect(capturedInputs).toHaveLength(1)
expect(capturedInputs[0]?.temperature).toBeUndefined()
expect(capturedInputs[0]?.permission).toBeUndefined()
})
})

View File

@ -12,6 +12,8 @@ export function createCouncilLauncher(manager: BackgroundManager): CouncilLaunch
parentMessageID: input.parentMessageID, parentMessageID: input.parentMessageID,
parentAgent: input.parentAgent, parentAgent: input.parentAgent,
model: input.model, model: input.model,
temperature: input.temperature,
permission: input.permission,
}) })
}, },
} }

View File

@ -0,0 +1,72 @@
import { afterEach, describe, expect, test } from "bun:test"
import {
isCouncilRunning,
markCouncilDone,
markCouncilRunning,
_resetForTesting,
_setTimestampForTesting,
} from "./session-guard"
afterEach(() => {
_resetForTesting()
})
describe("session-guard", () => {
//#given no active sessions
//#when isCouncilRunning is checked
//#then returns false
test("returns false for unknown session", () => {
expect(isCouncilRunning("session-1")).toBe(false)
})
//#given a session is marked as running
//#when isCouncilRunning is checked
//#then returns true
test("returns true after markCouncilRunning", () => {
markCouncilRunning("session-1")
expect(isCouncilRunning("session-1")).toBe(true)
})
//#given a session was marked running then done
//#when isCouncilRunning is checked
//#then returns false
test("returns false after markCouncilDone", () => {
markCouncilRunning("session-1")
markCouncilDone("session-1")
expect(isCouncilRunning("session-1")).toBe(false)
})
//#given a session was marked running 6 minutes ago (past 5-minute timeout)
//#when isCouncilRunning is checked
//#then stale entry is purged and returns false
test("purges stale entries older than 5 minutes", () => {
const sixMinutesAgo = Date.now() - 6 * 60 * 1000
_setTimestampForTesting("stale-session", sixMinutesAgo)
expect(isCouncilRunning("stale-session")).toBe(false)
})
//#given a session was marked running 4 minutes ago (within 5-minute timeout)
//#when isCouncilRunning is checked
//#then entry is kept and returns true
test("keeps entries within timeout window", () => {
const fourMinutesAgo = Date.now() - 4 * 60 * 1000
_setTimestampForTesting("recent-session", fourMinutesAgo)
expect(isCouncilRunning("recent-session")).toBe(true)
})
//#given multiple sessions where one is stale and one is fresh
//#when isCouncilRunning is checked for the fresh one
//#then stale entry is purged but fresh entry remains
test("purges only stale entries while keeping fresh ones", () => {
const sixMinutesAgo = Date.now() - 6 * 60 * 1000
_setTimestampForTesting("stale-session", sixMinutesAgo)
markCouncilRunning("fresh-session")
expect(isCouncilRunning("stale-session")).toBe(false)
expect(isCouncilRunning("fresh-session")).toBe(true)
})
})

View File

@ -0,0 +1,37 @@
/** Timeout in ms after which a stale council session lock is automatically released. */
const COUNCIL_SESSION_TIMEOUT_MS = 5 * 60 * 1000
/** Tracks active council executions per session with timestamps for stale entry cleanup. */
const activeCouncilSessions = new Map<string, number>()
function purgeStaleEntries(): void {
const now = Date.now()
for (const [sessionId, startedAt] of activeCouncilSessions) {
if (now - startedAt > COUNCIL_SESSION_TIMEOUT_MS) {
activeCouncilSessions.delete(sessionId)
}
}
}
export function isCouncilRunning(sessionId: string): boolean {
purgeStaleEntries()
return activeCouncilSessions.has(sessionId)
}
export function markCouncilRunning(sessionId: string): void {
activeCouncilSessions.set(sessionId, Date.now())
}
export function markCouncilDone(sessionId: string): void {
activeCouncilSessions.delete(sessionId)
}
/** Visible for testing only. */
export function _resetForTesting(): void {
activeCouncilSessions.clear()
}
/** Visible for testing only. */
export function _setTimestampForTesting(sessionId: string, timestamp: number): void {
activeCouncilSessions.set(sessionId, timestamp)
}

View File

@ -97,6 +97,30 @@ describe("filterCouncilMembers", () => {
) )
}) })
test("selects named member by model ID when name differs from model", () => {
// #given - "Claude" has name "Claude" but model "anthropic/claude-sonnet-4-5"
const selectedMembers = ["anthropic/claude-sonnet-4-5"]
// #when
const result = filterCouncilMembers(configuredMembers, selectedMembers)
// #then - should find the member by model ID even though it has a custom name
expect(result.members).toEqual([configuredMembers[0]])
expect(result.error).toBeUndefined()
})
test("deduplicates when same member is selected by both name and model", () => {
// #given
const selectedMembers = ["Claude", "anthropic/claude-sonnet-4-5"]
// #when
const result = filterCouncilMembers(configuredMembers, selectedMembers)
// #then - should return only one copy
expect(result.members).toEqual([configuredMembers[0]])
expect(result.error).toBeUndefined()
})
test("returns error listing only unmatched names when partially matched", () => { test("returns error listing only unmatched names when partially matched", () => {
// #given // #given
const selectedMembers = ["claude", "non-existent"] const selectedMembers = ["claude", "non-existent"]

View File

@ -4,11 +4,9 @@ import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/typ
import type { BackgroundManager } from "../../features/background-agent" import type { BackgroundManager } from "../../features/background-agent"
import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants" import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants"
import { createCouncilLauncher } from "./council-launcher" import { createCouncilLauncher } from "./council-launcher"
import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard"
import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types" import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types"
/** Tracks active council executions per session to prevent duplicate launches. */
const activeCouncilSessions = new Set<string>()
function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig { function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig {
return Boolean(councilConfig && councilConfig.members.length > 0) return Boolean(councilConfig && councilConfig.members.length > 0)
} }
@ -28,13 +26,15 @@ export function filterCouncilMembers(
const memberLookup = new Map<string, CouncilMemberConfig>() const memberLookup = new Map<string, CouncilMemberConfig>()
members.forEach((member) => { members.forEach((member) => {
const key = (member.name ?? member.model).toLowerCase() memberLookup.set(member.model.toLowerCase(), member)
memberLookup.set(key, member) if (member.name) {
memberLookup.set(member.name.toLowerCase(), member)
}
}) })
const unresolved: string[] = [] const unresolved: string[] = []
const filteredMembers: CouncilMemberConfig[] = [] const filteredMembers: CouncilMemberConfig[] = []
const includedMemberKeys = new Set<string>() const includedMembers = new Set<CouncilMemberConfig>()
selectedNames.forEach((selectedName) => { selectedNames.forEach((selectedName) => {
const selectedKey = selectedName.toLowerCase() const selectedKey = selectedName.toLowerCase()
@ -44,12 +44,11 @@ export function filterCouncilMembers(
return return
} }
const memberKey = matchedMember.model if (includedMembers.has(matchedMember)) {
if (includedMemberKeys.has(memberKey)) {
return return
} }
includedMemberKeys.add(memberKey) includedMembers.add(matchedMember)
filteredMembers.push(matchedMember) filteredMembers.push(matchedMember)
}) })
@ -98,11 +97,11 @@ export function createAthenaCouncilTool(args: {
return filteredMembers.error return filteredMembers.error
} }
if (activeCouncilSessions.has(toolContext.sessionID)) { if (isCouncilRunning(toolContext.sessionID)) {
return "Council is already running for this session. Wait for the current council execution to complete." return "Council is already running for this session. Wait for the current council execution to complete."
} }
activeCouncilSessions.add(toolContext.sessionID) markCouncilRunning(toolContext.sessionID)
try { try {
const execution = await executeCouncil({ const execution = await executeCouncil({
question: toolArgs.question, question: toolArgs.question,
@ -132,10 +131,10 @@ export function createAthenaCouncilTool(args: {
})), })),
} }
activeCouncilSessions.delete(toolContext.sessionID) markCouncilDone(toolContext.sessionID)
return JSON.stringify(launchResult) return JSON.stringify(launchResult)
} catch (error) { } catch (error) {
activeCouncilSessions.delete(toolContext.sessionID) markCouncilDone(toolContext.sessionID)
throw error throw error
} }
}, },