diff --git a/src/agents/athena/agent.ts b/src/agents/athena/agent.ts index 06fa7fdf..47d10763 100644 --- a/src/agents/athena/agent.ts +++ b/src/agents/athena/agent.ts @@ -51,8 +51,8 @@ Question({ }) **Shortcut — skip the Question tool if:** -- The user already specified models in their message (e.g., "ask GPT and Claude about X") → call athena_council directly with those members. -- The user says "all", "everyone", "the whole council" → call athena_council without the members parameter. +- The user already specified models in their message (e.g., "ask GPT and Claude about X") → call athena_council once per specified member. +- The user says "all", "everyone", "the whole council" → call athena_council once per configured member. DO NOT: - Read files yourself @@ -65,16 +65,32 @@ You are an ORCHESTRATOR, not an analyst. Your council members do the analysis. Y ## Workflow -Step 1: Present the Question tool multi-select for council member selection (see above). Once the user responds, call athena_council with the user's question. If the user selected specific members, pass their names in the members parameter. If the user selected "All Members", omit the members parameter. +Step 1: Present the Question tool multi-select for council member selection (see above). -Step 2: Call athena_council with the question and selected members. The tool launches all council members in parallel, waits for them to complete, and returns ALL of their responses in a single result. This may take a few minutes — that is expected. +Step 2: Resolve the selected member list: +- If user selected "All Members", resolve to every configured member listed in the athena_council tool description. +- Otherwise resolve to the explicitly selected member labels. -Step 3: Synthesize the findings returned by athena_council: +Step 3: Call athena_council ONCE PER MEMBER (member per tool call): +- For each selected member, call athena_council with: + - question: the user's original question + - members: [""] // single-item array only +- Launch all selected members first (one athena_council call per member) so they run in parallel. +- Track every returned task_id and member mapping. + +Step 4: Collect all member outputs after launch: +- For each tracked task_id, call background_output with block=true. +- Gather each member's final output/status. +- Do not proceed until every launched member has reached a terminal status (completed, error, cancelled, timeout). +- Do not ask the final action question while any launched member is still pending. +- Do not present interim synthesis from partial results. Wait for all members first. + +Step 5: Synthesize the findings returned by all collected member outputs: - Group findings by agreement level: unanimous, majority, minority, solo - Solo findings are potential false positives — flag the risk explicitly - Add your own assessment and rationale to each finding -Step 4: Present synthesized findings to the user grouped by agreement level (unanimous first, then majority, minority, solo). Then use the Question tool to ask which action to take: +Step 6: Present synthesized findings to the user grouped by agreement level (unanimous first, then majority, minority, solo). Then use the Question tool to ask which action to take: Question({ questions: [{ @@ -89,7 +105,7 @@ Question({ }] }) -Step 5: After the user selects an action: +Step 7: After the user selects an action: - **"Fix now (Atlas)"** → Call switch_agent with agent="atlas" and context containing the confirmed findings summary, the original question, and instruction to implement the fixes. - **"Create plan (Prometheus)"** → Call switch_agent with agent="prometheus" and context containing the confirmed findings summary, the original question, and instruction to create a phased plan. - **"No action"** → Acknowledge and end. Do not delegate. @@ -99,7 +115,10 @@ The switch_agent tool switches the active agent. After you call it, end your res ## Constraints - Use the Question tool for member selection BEFORE calling athena_council (unless user pre-specified). - Use the Question tool for action selection AFTER synthesis (unless user already stated intent). -- Do NOT use background_output — athena_council returns all member responses directly. +- Use background_output (block=true) to collect member outputs after launch. +- Do NOT call athena_council with multiple members in one call. +- Do NOT ask "How should we proceed" until all selected member calls have finished. +- Do NOT present or summarize partial council findings while any selected member is still running. - Do NOT write or edit files directly. - Do NOT delegate without explicit user confirmation via Question tool. - Do NOT ignore solo finding false-positive warnings. diff --git a/src/plugin-interface.ts b/src/plugin-interface.ts index 634b08d2..57994f38 100644 --- a/src/plugin-interface.ts +++ b/src/plugin-interface.ts @@ -65,6 +65,7 @@ export function createPluginInterface(args: { "tool.execute.before": createToolExecuteBeforeHandler({ ctx, hooks, + backgroundManager: managers.backgroundManager, }), "tool.execute.after": createToolExecuteAfterHandler({ diff --git a/src/plugin/tool-execute-before.test.ts b/src/plugin/tool-execute-before.test.ts index b3cd3f7f..1bc8bdb4 100644 --- a/src/plugin/tool-execute-before.test.ts +++ b/src/plugin/tool-execute-before.test.ts @@ -2,6 +2,109 @@ const { describe, expect, test } = require("bun:test") const { createToolExecuteBeforeHandler } = require("./tool-execute-before") describe("createToolExecuteBeforeHandler", () => { + test("blocks Athena question tool while council members are still running", async () => { + //#given + const ctx = { + client: { + session: { + messages: async () => ({ + data: [{ info: { role: "assistant", agent: "Athena (Council)" } }], + }), + }, + }, + } + + const backgroundManager = { + getTasksByParentSession: () => [ + { agent: "council-member", status: "running" }, + ], + } + + const handler = createToolExecuteBeforeHandler({ + ctx, + hooks: {}, + backgroundManager, + }) + + //#when + const run = handler( + { tool: "question", sessionID: "ses_athena", callID: "call_1" }, + { args: { questions: [] } } + ) + + //#then + await expect(run).rejects.toThrow("Council members are still running") + }) + + test("blocks Athena switch_agent while council members are still running", async () => { + //#given + const ctx = { + client: { + session: { + messages: async () => ({ + data: [{ info: { role: "assistant", agent: "Athena (Council)" } }], + }), + }, + }, + } + + const backgroundManager = { + getTasksByParentSession: () => [ + { agent: "council-member", status: "pending" }, + ], + } + + const handler = createToolExecuteBeforeHandler({ + ctx, + hooks: {}, + backgroundManager, + }) + + //#when + const run = handler( + { tool: "switch_agent", sessionID: "ses_athena", callID: "call_1" }, + { args: { agent: "atlas", context: "ctx" } } + ) + + //#then + await expect(run).rejects.toThrow("Council members are still running") + }) + + test("allows Athena question tool when no council members are pending", async () => { + //#given + const ctx = { + client: { + session: { + messages: async () => ({ + data: [{ info: { role: "assistant", agent: "Athena (Council)" } }], + }), + }, + }, + } + + const backgroundManager = { + getTasksByParentSession: () => [ + { agent: "council-member", status: "completed" }, + { agent: "council-member", status: "cancelled" }, + ], + } + + const handler = createToolExecuteBeforeHandler({ + ctx, + hooks: {}, + backgroundManager, + }) + + //#when + const run = handler( + { tool: "question", sessionID: "ses_athena", callID: "call_1" }, + { args: { questions: [] } } + ) + + //#then + await expect(run).resolves.toBeUndefined() + }) + test("does not execute subagent question blocker hook for question tool", async () => { //#given const ctx = { diff --git a/src/plugin/tool-execute-before.ts b/src/plugin/tool-execute-before.ts index 65ebaed6..718d1865 100644 --- a/src/plugin/tool-execute-before.ts +++ b/src/plugin/tool-execute-before.ts @@ -1,23 +1,51 @@ import type { PluginContext } from "./types" +import type { BackgroundManager } from "../features/background-agent" import { getMainSessionID } from "../features/claude-code-session-state" import { clearBoulderState } from "../features/boulder-state" import { log } from "../shared" import { resolveSessionAgent } from "./session-agent-resolver" import { parseRalphLoopArguments } from "../hooks/ralph-loop/command-arguments" +import { getAgentConfigKey } from "../shared/agent-display-names" import type { CreatedHooks } from "../create-hooks" export function createToolExecuteBeforeHandler(args: { ctx: PluginContext hooks: CreatedHooks + backgroundManager?: Pick }): ( input: { tool: string; sessionID: string; callID: string }, output: { args: Record }, ) => Promise { - const { ctx, hooks } = args + const { ctx, hooks, backgroundManager } = args + + function hasPendingCouncilMembers(sessionID: string): boolean { + if (!backgroundManager) { + return false + } + + const tasks = backgroundManager.getTasksByParentSession(sessionID) + return tasks.some((task) => + task.agent === "council-member" && + (task.status === "pending" || task.status === "running") + ) + } return async (input, output): Promise => { + const toolNameLower = input.tool?.toLowerCase() + + if (toolNameLower === "question" || toolNameLower === "askuserquestion" || toolNameLower === "ask_user_question" || toolNameLower === "switch_agent") { + const sessionAgent = await resolveSessionAgent(ctx.client, input.sessionID) + const sessionAgentKey = sessionAgent ? getAgentConfigKey(sessionAgent) : undefined + + if (sessionAgentKey === "athena" && hasPendingCouncilMembers(input.sessionID)) { + throw new Error( + "Council members are still running. Wait for all launched members to finish and collect their outputs before asking next-step questions or switching agents." + ) + } + } + await hooks.writeExistingFileGuard?.["tool.execute.before"]?.(input, output) await hooks.questionLabelTruncator?.["tool.execute.before"]?.(input, output) await hooks.claudeCodeHooks?.["tool.execute.before"]?.(input, output) diff --git a/src/tools/athena-council/constants.ts b/src/tools/athena-council/constants.ts index af11f7e0..e3dc5b77 100644 --- a/src/tools/athena-council/constants.ts +++ b/src/tools/athena-council/constants.ts @@ -1,9 +1,10 @@ -export const ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE = `Execute Athena's multi-model council. Launches council members as background tasks and returns their task IDs immediately. +export const ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE = `Execute Athena's multi-model council for exactly ONE member per call. -Optionally pass a members array of member names or model IDs to consult only specific council members. If omitted, all configured members are consulted. +Pass members as a single-item array containing one member name or model ID. Athena should call this tool once per selected member. + +This tool launches the selected member as a background task and returns task/session metadata immediately. +Use background_output(task_id=..., block=true) to collect each member result. {members} -Use background_output(task_id=...) to retrieve each member's response. The system will notify you when tasks complete. - IMPORTANT: This tool is designed for Athena agent use only. It requires council configuration to be present.` diff --git a/src/tools/athena-council/result-collector.ts b/src/tools/athena-council/result-collector.ts deleted file mode 100644 index bcfe25ef..00000000 --- a/src/tools/athena-council/result-collector.ts +++ /dev/null @@ -1,116 +0,0 @@ -import type { BackgroundManager } from "../../features/background-agent" -import type { CouncilLaunchedMember } from "../../agents/athena/types" -import type { BackgroundOutputClient, BackgroundOutputMessagesResult } from "../background-task/clients" -import { extractMessages, getErrorMessage } from "../background-task/session-messages" - -const POLL_INTERVAL_MS = 2_000 -const DEFAULT_TIMEOUT_MS = 5 * 60 * 1_000 - -export interface CollectedMemberResult { - name: string - model: string - taskId: string - status: "completed" | "error" | "cancelled" | "timeout" - content: string -} - -export interface CollectedCouncilResults { - results: CollectedMemberResult[] - allCompleted: boolean -} - -/** - * Waits for all launched council members to complete, then fetches their - * session messages and returns extracted text content. - * - * This replaces the previous flow where Athena had to manually poll - * background_output for each member, which created excessive UI noise. - */ -export async function collectCouncilResults( - launched: CouncilLaunchedMember[], - manager: BackgroundManager, - client: BackgroundOutputClient, - abort?: AbortSignal, - timeoutMs = DEFAULT_TIMEOUT_MS -): Promise { - const pendingIds = new Set(launched.map((m) => m.taskId)) - const completedMap = new Map() - const deadline = Date.now() + timeoutMs - - while (pendingIds.size > 0 && Date.now() < deadline) { - if (abort?.aborted) break - - for (const taskId of pendingIds) { - const task = manager.getTask(taskId) - if (!task) { - completedMap.set(taskId, "error") - pendingIds.delete(taskId) - continue - } - if (task.status === "completed") { - completedMap.set(taskId, "completed") - pendingIds.delete(taskId) - } else if (task.status === "error" || task.status === "cancelled" || task.status === "interrupt") { - completedMap.set(taskId, task.status === "interrupt" ? "cancelled" : task.status) - pendingIds.delete(taskId) - } - } - - if (pendingIds.size > 0) { - await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) - } - } - - const results: CollectedMemberResult[] = [] - - for (const entry of launched) { - const memberName = entry.member.name ?? entry.member.model - const status = completedMap.get(entry.taskId) ?? "timeout" - - if (status !== "completed") { - results.push({ name: memberName, model: entry.member.model, taskId: entry.taskId, status, content: "" }) - continue - } - - const content = await fetchMemberContent(entry.taskId, manager, client) - results.push({ name: memberName, model: entry.member.model, taskId: entry.taskId, status, content }) - } - - return { - results, - allCompleted: pendingIds.size === 0, - } -} - -async function fetchMemberContent( - taskId: string, - manager: BackgroundManager, - client: BackgroundOutputClient -): Promise { - const task = manager.getTask(taskId) - if (!task?.sessionID) return "(No session available)" - - const messagesResult: BackgroundOutputMessagesResult = await client.session.messages({ - path: { id: task.sessionID }, - }) - - const errorMsg = getErrorMessage(messagesResult) - if (errorMsg) return `(Error fetching results: ${errorMsg})` - - const messages = extractMessages(messagesResult) - if (!Array.isArray(messages) || messages.length === 0) return "(No messages found)" - - const assistantMessages = messages.filter((m) => m.info?.role === "assistant") - if (assistantMessages.length === 0) return "(No assistant response found)" - - const textParts: string[] = [] - for (const message of assistantMessages) { - for (const part of message.parts ?? []) { - if ((part.type === "text" || part.type === "reasoning") && part.text) { - textParts.push(part.text) - } - } - } - - return textParts.join("\n\n") || "(No text content)" -} diff --git a/src/tools/athena-council/tools.test.ts b/src/tools/athena-council/tools.test.ts index 1d889022..8cfb957c 100644 --- a/src/tools/athena-council/tools.test.ts +++ b/src/tools/athena-council/tools.test.ts @@ -3,21 +3,8 @@ import { describe, expect, test } from "bun:test" import type { BackgroundManager } from "../../features/background-agent" import type { BackgroundTask } from "../../features/background-agent/types" -import type { BackgroundOutputClient } from "../background-task/clients" import { createAthenaCouncilTool, filterCouncilMembers } from "./tools" -const mockClient = { - session: { - messages: async () => ({ - data: [{ - id: "msg-1", - info: { role: "assistant" }, - parts: [{ type: "text", text: "Test analysis result" }], - }], - }), - }, -} as unknown as BackgroundOutputClient - const mockManager = { getTask: () => undefined, launch: async () => { @@ -38,7 +25,7 @@ const configuredMembers = [ { model: "google/gemini-3-pro" }, ] -function createCompletedTask(id: string): BackgroundTask { +function createRunningTask(id: string, sessionID = `ses-${id}`): BackgroundTask { return { id, parentSessionID: "session-1", @@ -46,288 +33,168 @@ function createCompletedTask(id: string): BackgroundTask { description: `Council member task ${id}`, prompt: "prompt", agent: "council-member", - status: "completed", - sessionID: `ses-${id}`, + status: "running", + sessionID, } } describe("filterCouncilMembers", () => { test("returns all members when selection is undefined", () => { - // #given - const selectedMembers = undefined - - // #when - const result = filterCouncilMembers(configuredMembers, selectedMembers) - - // #then + const result = filterCouncilMembers(configuredMembers, undefined) expect(result.members).toEqual(configuredMembers) expect(result.error).toBeUndefined() }) test("returns all members when selection is empty", () => { - // #given - const selectedMembers: string[] = [] - - // #when - const result = filterCouncilMembers(configuredMembers, selectedMembers) - - // #then + const result = filterCouncilMembers(configuredMembers, []) expect(result.members).toEqual(configuredMembers) expect(result.error).toBeUndefined() }) test("filters members using case-insensitive name and model matching", () => { - // #given - const selectedMembers = ["gpt", "GOOGLE/GEMINI-3-PRO"] - - // #when - const result = filterCouncilMembers(configuredMembers, selectedMembers) - - // #then + const result = filterCouncilMembers(configuredMembers, ["gpt", "GOOGLE/GEMINI-3-PRO"]) expect(result.members).toEqual([configuredMembers[1], configuredMembers[2]]) expect(result.error).toBeUndefined() }) test("returns helpful error when selected members are not configured", () => { - // #given - const selectedMembers = ["mistral", "xai/grok-3"] - - // #when - const result = filterCouncilMembers(configuredMembers, selectedMembers) - - // #then + const result = filterCouncilMembers(configuredMembers, ["mistral", "xai/grok-3"]) expect(result.members).toEqual([]) expect(result.error).toBe( "Unknown council members: mistral, xai/grok-3. Available members: Claude, GPT, google/gemini-3-pro." ) }) - test("selects named member by model ID when name differs from model", () => { - // #given - "Claude" has name "Claude" but model "anthropic/claude-sonnet-4-5" - const selectedMembers = ["anthropic/claude-sonnet-4-5"] - - // #when - const result = filterCouncilMembers(configuredMembers, selectedMembers) - - // #then - should find the member by model ID even though it has a custom name - expect(result.members).toEqual([configuredMembers[0]]) - expect(result.error).toBeUndefined() - }) - test("deduplicates when same member is selected by both name and model", () => { - // #given - const selectedMembers = ["Claude", "anthropic/claude-sonnet-4-5"] - - // #when - const result = filterCouncilMembers(configuredMembers, selectedMembers) - - // #then - should return only one copy + const result = filterCouncilMembers(configuredMembers, ["Claude", "anthropic/claude-sonnet-4-5"]) expect(result.members).toEqual([configuredMembers[0]]) expect(result.error).toBeUndefined() }) - - test("returns error listing only unmatched names when partially matched", () => { - // #given - const selectedMembers = ["claude", "non-existent"] - - // #when - const result = filterCouncilMembers(configuredMembers, selectedMembers) - - // #then - expect(result.members).toEqual([]) - expect(result.error).toBe( - "Unknown council members: non-existent. Available members: Claude, GPT, google/gemini-3-pro." - ) - }) }) describe("createAthenaCouncilTool", () => { test("returns error when councilConfig is undefined", async () => { - // #given const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: mockManager, councilConfig: undefined, - client: mockClient, }) - // #when const result = await athenaCouncilTool.execute({ question: "How should we proceed?" }, mockToolContext) - // #then expect(result).toBe("Athena council not configured. Add agents.athena.council.members to your config.") }) test("returns error when councilConfig has empty members", async () => { - // #given const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: mockManager, councilConfig: { members: [] }, - client: mockClient, }) - // #when const result = await athenaCouncilTool.execute({ question: "Any concerns?" }, mockToolContext) - // #then expect(result).toBe("Athena council not configured. Add agents.athena.council.members to your config.") }) - test("uses expected description and question arg schema", () => { - // #given - const athenaCouncilTool = createAthenaCouncilTool({ - backgroundManager: mockManager, - councilConfig: { members: [{ model: "openai/gpt-5.3-codex" }] }, - client: mockClient, - }) - - // #then - description should be dynamic and include the member model - expect(athenaCouncilTool.description).toContain("openai/gpt-5.3-codex") - expect(athenaCouncilTool.description).toContain("Available council members:") - expect((athenaCouncilTool as { args: Record }).args.question).toBeDefined() - expect((athenaCouncilTool as { args: Record }).args.members).toBeDefined() - }) - test("returns helpful error when members contains invalid names", async () => { - // #given const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: mockManager, councilConfig: { members: configuredMembers }, - client: mockClient, - }) - const toolArgs = { - question: "Who should investigate this?", - members: ["unknown-model"], - } - - // #when - const result = await athenaCouncilTool.execute(toolArgs, mockToolContext) - - // #then - expect(result).toBe("Unknown council members: unknown-model. Available members: Claude, GPT, google/gemini-3-pro.") - }) - - test("returns collected markdown results for all configured council members", async () => { - // #given - let launchCount = 0 - const taskStore = new Map() - const launchManager = { - launch: async () => { - launchCount += 1 - const task = createCompletedTask(`bg-${launchCount}`) - taskStore.set(task.id, task) - return task - }, - getTask: (id: string) => taskStore.get(id), - } as unknown as BackgroundManager - const athenaCouncilTool = createAthenaCouncilTool({ - backgroundManager: launchManager, - councilConfig: { members: configuredMembers }, - client: mockClient, }) - // #when - const result = await athenaCouncilTool.execute({ question: "How should we proceed?" }, mockToolContext) - - // #then - returns markdown with council results, one section per member - expect(result).toContain("## Council Results") - expect(result).toContain("How should we proceed?") - expect(result).toContain("### Claude (anthropic/claude-sonnet-4-5)") - expect(result).toContain("### GPT (openai/gpt-5.3-codex)") - expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)") - expect(result).toContain("Test analysis result") - }) - - test("returns collected results only for selected members", async () => { - // #given - let launchCount = 0 - const taskStore = new Map() - const launchManager = { - launch: async () => { - launchCount += 1 - const task = createCompletedTask(`bg-${launchCount}`) - taskStore.set(task.id, task) - return task - }, - getTask: (id: string) => taskStore.get(id), - } as unknown as BackgroundManager - const athenaCouncilTool = createAthenaCouncilTool({ - backgroundManager: launchManager, - councilConfig: { members: configuredMembers }, - client: mockClient, - }) - - // #when const result = await athenaCouncilTool.execute( - { - question: "Who should investigate this?", - members: ["GPT", "google/gemini-3-pro"], - }, + { question: "Who should investigate this?", members: ["unknown-model"] }, mockToolContext ) - // #then - only selected members appear in output - expect(result).toContain("### GPT (openai/gpt-5.3-codex)") - expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)") - expect(result).not.toContain("### Claude") - expect(launchCount).toBe(2) + expect(result).toBe("Unknown council members: unknown-model. Available members: Claude, GPT, google/gemini-3-pro.") }) - test("includes launch failures alongside successful member results", async () => { - // #given + test("returns selection error when members are omitted", async () => { + const athenaCouncilTool = createAthenaCouncilTool({ + backgroundManager: mockManager, + councilConfig: { members: configuredMembers }, + }) + + const result = await athenaCouncilTool.execute({ question: "How should we proceed?" }, mockToolContext) + + expect(result).toBe( + "athena_council runs one member per call. Pass exactly one member in members (single-item array). Available members: Claude, GPT, google/gemini-3-pro." + ) + }) + + test("returns selection error when multiple members are provided", async () => { + const athenaCouncilTool = createAthenaCouncilTool({ + backgroundManager: mockManager, + councilConfig: { members: configuredMembers }, + }) + + const result = await athenaCouncilTool.execute( + { question: "How should we proceed?", members: ["Claude", "GPT"] }, + mockToolContext + ) + + expect(result).toBe( + "athena_council runs one member per call. Pass exactly one member in members (single-item array). Available members: Claude, GPT, google/gemini-3-pro." + ) + }) + + test("launches selected member and returns background task metadata", async () => { let launchCount = 0 const taskStore = new Map() const launchManager = { launch: async () => { launchCount += 1 - if (launchCount === 2) { - throw new Error("provider outage") - } - const task = createCompletedTask(`bg-${launchCount}`) + const task = createRunningTask(`bg-${launchCount}`) taskStore.set(task.id, task) return task }, getTask: (id: string) => taskStore.get(id), } as unknown as BackgroundManager + const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: launchManager, councilConfig: { members: configuredMembers }, - client: mockClient, }) - // #when - const result = await athenaCouncilTool.execute({ question: "Any concerns?" }, mockToolContext) + const result = await athenaCouncilTool.execute( + { question: "Who should investigate this?", members: ["GPT"] }, + mockToolContext + ) - // #then - successful members have results, failed member listed in failures section - expect(result).toContain("### Claude (anthropic/claude-sonnet-4-5)") - expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)") + expect(launchCount).toBe(1) + expect(result).toContain("Council member launched in background.") + expect(result).toContain("Task ID: bg-1") + expect(result).toContain("Session ID: ses-bg-1") + expect(result).toContain("Member: GPT") + expect(result).toContain("Model: openai/gpt-5.3-codex") + expect(result).toContain("Status: running") + expect(result).toContain("background_output") + expect(result).toContain("task_id=\"bg-1\"") + expect(result).toContain("") + expect(result).toContain("session_id: ses-bg-1") + }) + + test("returns launch failure details when selected member fails", async () => { + const launchManager = { + launch: async () => { + throw new Error("provider outage") + }, + getTask: () => undefined, + } as unknown as BackgroundManager + + const athenaCouncilTool = createAthenaCouncilTool({ + backgroundManager: launchManager, + councilConfig: { members: configuredMembers }, + }) + + const result = await athenaCouncilTool.execute( + { question: "Any concerns?", members: ["GPT"] }, + mockToolContext + ) + + expect(result).toContain("Failed to launch council member.") expect(result).toContain("### Launch Failures") expect(result).toContain("**GPT**") expect(result).toContain("provider outage") }) - - test("returns dedup error when council is already running in same session", async () => { - // #given - use a never-resolving launch to keep the first execution in-flight - const pendingLaunch = new Promise(() => {}) - const launchManager = { - launch: async () => pendingLaunch, - getTask: () => undefined, - } as unknown as BackgroundManager - const athenaCouncilTool = createAthenaCouncilTool({ - backgroundManager: launchManager, - councilConfig: { members: [{ model: "openai/gpt-5.3-codex" }] }, - client: mockClient, - }) - - // #when - first call starts but never resolves (stuck in launch) - // second call should be rejected by session guard - const _firstExecution = athenaCouncilTool.execute({ question: "First run" }, mockToolContext) - - // Allow microtask queue to process so markCouncilRunning is called - await new Promise((resolve) => setTimeout(resolve, 0)) - - const secondExecution = await athenaCouncilTool.execute({ question: "Second run" }, mockToolContext) - - // #then - expect(secondExecution).toBe("Council is already running for this session. Wait for the current council execution to complete.") - }) }) diff --git a/src/tools/athena-council/tools.ts b/src/tools/athena-council/tools.ts index ed74b6c8..770754e1 100644 --- a/src/tools/athena-council/tools.ts +++ b/src/tools/athena-council/tools.ts @@ -2,13 +2,11 @@ import { tool, type ToolDefinition } from "@opencode-ai/plugin" import { executeCouncil } from "../../agents/athena/council-orchestrator" import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/types" import type { BackgroundManager } from "../../features/background-agent" -import type { BackgroundOutputClient } from "../background-task/clients" import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants" import { createCouncilLauncher } from "./council-launcher" -import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard" import { waitForCouncilSessions } from "./session-waiter" -import { collectCouncilResults } from "./result-collector" import type { AthenaCouncilToolArgs } from "./types" +import { storeToolMetadata } from "../../features/tool-metadata-store" function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig { return Boolean(councilConfig && councilConfig.members.length > 0) @@ -19,6 +17,11 @@ interface FilterCouncilMembersResult { error?: string } +function buildSingleMemberSelectionError(members: CouncilMemberConfig[]): string { + const availableNames = members.map((member) => member.name ?? member.model).join(", ") + return `athena_council runs one member per call. Pass exactly one member in members (single-item array). Available members: ${availableNames}.` +} + export function filterCouncilMembers( members: CouncilMemberConfig[], selectedNames: string[] | undefined @@ -77,9 +80,8 @@ function buildToolDescription(councilConfig: CouncilConfig | undefined): string export function createAthenaCouncilTool(args: { backgroundManager: BackgroundManager councilConfig: CouncilConfig | undefined - client: BackgroundOutputClient }): ToolDefinition { - const { backgroundManager, councilConfig, client } = args + const { backgroundManager, councilConfig } = args const description = buildToolDescription(councilConfig) return tool({ @@ -89,7 +91,7 @@ export function createAthenaCouncilTool(args: { members: tool.schema .array(tool.schema.string()) .optional() - .describe("Optional list of council member names or models to consult. Defaults to all configured members."), + .describe("Single-item list containing exactly one council member name or model ID."), }, async execute(toolArgs: AthenaCouncilToolArgs, toolContext) { if (!isCouncilConfigured(councilConfig)) { @@ -100,83 +102,82 @@ export function createAthenaCouncilTool(args: { if (filteredMembers.error) { return filteredMembers.error } - - if (isCouncilRunning(toolContext.sessionID)) { - return "Council is already running for this session. Wait for the current council execution to complete." + if (filteredMembers.members.length !== 1) { + return buildSingleMemberSelectionError(councilConfig.members) } - markCouncilRunning(toolContext.sessionID) - try { - const execution = await executeCouncil({ - question: toolArgs.question, - council: { members: filteredMembers.members }, - launcher: createCouncilLauncher(backgroundManager), - parentSessionID: toolContext.sessionID, - parentMessageID: toolContext.messageID, - parentAgent: toolContext.agent, - }) + const execution = await executeCouncil({ + question: toolArgs.question, + council: { members: filteredMembers.members }, + launcher: createCouncilLauncher(backgroundManager), + parentSessionID: toolContext.sessionID, + parentMessageID: toolContext.messageID, + parentAgent: toolContext.agent, + }) - // Register metadata for UI visibility (makes sessions clickable in TUI). - const metadataFn = (toolContext as Record).metadata as - | ((input: { title?: string; metadata?: Record }) => Promise) - | undefined - if (metadataFn && execution.launched.length > 0) { - const sessions = await waitForCouncilSessions( - execution.launched, backgroundManager, toolContext.abort - ) - for (const session of sessions) { - await metadataFn({ - title: `Council: ${session.memberName}`, - metadata: { - sessionId: session.sessionId, - agent: "council-member", - model: session.model, - description: `Council member: ${session.memberName}`, - }, - }) - } + if (execution.launched.length === 0) { + return formatCouncilLaunchFailure(execution.failures) + } + + const launched = execution.launched[0] + const launchedMemberName = launched?.member.name ?? launched?.member.model + const launchedMemberModel = launched?.member.model ?? "unknown" + const launchedTaskId = launched?.taskId ?? "unknown" + + const sessionInfos = await waitForCouncilSessions(execution.launched, backgroundManager, toolContext.abort) + const launchedSession = sessionInfos.find((session) => session.taskId === launchedTaskId) + const sessionId = launchedSession?.sessionId ?? "pending" + + const metadataFn = (toolContext as Record).metadata as + | ((input: { title?: string; metadata?: Record }) => Promise) + | undefined + if (metadataFn) { + const memberMetadata = { + title: `Council: ${launchedMemberName}`, + metadata: { + sessionId, + agent: "council-member", + model: launchedMemberModel, + description: `Council member: ${launchedMemberName}`, + }, } + await metadataFn(memberMetadata) - // Wait for all members to complete and collect their actual results. - // This eliminates the need for Athena to poll background_output repeatedly. - const collected = await collectCouncilResults( - execution.launched, backgroundManager, client, toolContext.abort - ) - - return formatCouncilOutput(toolArgs.question, collected.results, execution.failures) - } catch (error) { - throw error - } finally { - markCouncilDone(toolContext.sessionID) + const callID = (toolContext as Record).callID + if (typeof callID === "string") { + storeToolMetadata(toolContext.sessionID, callID, memberMetadata) + } } + + return `Council member launched in background. + +Task ID: ${launchedTaskId} +Session ID: ${sessionId} +Member: ${launchedMemberName} +Model: ${launchedMemberModel} +Status: running + +Use \`background_output\` with task_id="${launchedTaskId}" to collect this member's result. +- block=true: Wait for completion and return the result +- full_session=true: Include full session messages when needed + + +session_id: ${sessionId} +` }, }) } -function formatCouncilOutput( - question: string, - results: Array<{ name: string; model: string; taskId: string; status: string; content: string }>, +function formatCouncilLaunchFailure( failures: Array<{ member: { name?: string; model: string }; error: string }> ): string { - const sections: string[] = [] - - sections.push(`## Council Results\n\n**Question:** ${question}\n`) - - for (const result of results) { - const header = `### ${result.name} (${result.model})` - if (result.status !== "completed") { - sections.push(`${header}\n\n*Status: ${result.status}*`) - continue - } - sections.push(`${header}\n\n${result.content}`) + if (failures.length === 0) { + return "Failed to launch council member." } - if (failures.length > 0) { - const failureLines = failures - .map((f) => `- **${f.member.name ?? f.member.model}**: ${f.error}`) - .join("\n") - sections.push(`### Launch Failures\n\n${failureLines}`) - } + const failureLines = failures + .map((failure) => `- **${failure.member.name ?? failure.member.model}**: ${failure.error}`) + .join("\n") - return sections.join("\n\n---\n\n") + return `Failed to launch council member.\n\n### Launch Failures\n\n${failureLines}` }