refactor(athena): remove dead code from phases 2, 3, 5 pipeline
Remove 9 files (913 lines) from the code-driven synthesis pipeline that was superseded by the agent-driven approach in phases 6-8. Phases 3/5 built: collectCouncilResults → formatForSynthesis → buildSynthesisPrompt → formatFindingsForUser → buildDelegationPrompt. Phases 6-8 replaced with: launch → background_output → Athena synthesizes in conversation → switch_agent. The old pipeline was never wired into runtime and all consumers were other dead code. Also simplifies executeCouncil to return CouncilLaunchResult (task IDs + failures) instead of reading stale task status via collectCouncilResults. Deleted: council-result-collector, synthesis-types, synthesis-prompt, synthesis-formatter, findings-presenter, delegation-prompts (+ 4 tests). Cleaned: CouncilMemberStatus, AgreementLevel, CouncilMemberResponse, CouncilExecutionResult types from types.ts.
This commit is contained in:
parent
d8ba9b1f0c
commit
4d675bac89
@ -3,16 +3,6 @@ import { buildCouncilPrompt } from "./council-prompt"
|
|||||||
import { executeCouncil } from "./council-orchestrator"
|
import { executeCouncil } from "./council-orchestrator"
|
||||||
import type { CouncilConfig } from "./types"
|
import type { CouncilConfig } from "./types"
|
||||||
|
|
||||||
type MockTaskStatus = "completed" | "error" | "cancelled" | "interrupt"
|
|
||||||
|
|
||||||
interface MockTask {
|
|
||||||
id: string
|
|
||||||
status: MockTaskStatus
|
|
||||||
result?: string
|
|
||||||
error?: string
|
|
||||||
completedAt?: Date
|
|
||||||
}
|
|
||||||
|
|
||||||
interface MockLaunchInput {
|
interface MockLaunchInput {
|
||||||
description: string
|
description: string
|
||||||
prompt: string
|
prompt: string
|
||||||
@ -25,20 +15,15 @@ interface MockLaunchInput {
|
|||||||
permission?: Record<string, "ask" | "allow" | "deny">
|
permission?: Record<string, "ask" | "allow" | "deny">
|
||||||
}
|
}
|
||||||
|
|
||||||
function createMockTask(task: MockTask, launch: MockLaunchInput): MockTask & {
|
function createMockTask(id: string, launch: MockLaunchInput) {
|
||||||
parentSessionID: string
|
|
||||||
parentMessageID: string
|
|
||||||
description: string
|
|
||||||
prompt: string
|
|
||||||
agent: string
|
|
||||||
} {
|
|
||||||
return {
|
return {
|
||||||
|
id,
|
||||||
|
status: "pending" as const,
|
||||||
parentSessionID: launch.parentSessionID,
|
parentSessionID: launch.parentSessionID,
|
||||||
parentMessageID: launch.parentMessageID,
|
parentMessageID: launch.parentMessageID,
|
||||||
description: launch.description,
|
description: launch.description,
|
||||||
prompt: launch.prompt,
|
prompt: launch.prompt,
|
||||||
agent: launch.agent,
|
agent: launch.agent,
|
||||||
...task,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,15 +36,7 @@ describe("executeCouncil", () => {
|
|||||||
const launcher = {
|
const launcher = {
|
||||||
launch: async (input: MockLaunchInput) => {
|
launch: async (input: MockLaunchInput) => {
|
||||||
launches.push(input)
|
launches.push(input)
|
||||||
return createMockTask(
|
return createMockTask(`task-${launches.length}`, input)
|
||||||
{
|
|
||||||
id: `task-${launches.length}`,
|
|
||||||
status: "completed",
|
|
||||||
result: `response-${launches.length}`,
|
|
||||||
completedAt: new Date(),
|
|
||||||
},
|
|
||||||
input
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,8 +61,9 @@ describe("executeCouncil", () => {
|
|||||||
const expectedPrompt = buildCouncilPrompt(question)
|
const expectedPrompt = buildCouncilPrompt(question)
|
||||||
|
|
||||||
expect(launches).toHaveLength(3)
|
expect(launches).toHaveLength(3)
|
||||||
expect(result.completedCount).toBe(3)
|
expect(result.launched).toHaveLength(3)
|
||||||
expect(result.failedCount).toBe(0)
|
expect(result.failures).toHaveLength(0)
|
||||||
|
expect(result.totalMembers).toBe(3)
|
||||||
|
|
||||||
for (const launch of launches) {
|
for (const launch of launches) {
|
||||||
expect(launch.prompt).toBe(expectedPrompt)
|
expect(launch.prompt).toBe(expectedPrompt)
|
||||||
@ -98,33 +76,16 @@ describe("executeCouncil", () => {
|
|||||||
expect(launches[2]?.model).toEqual({ providerID: "google", modelID: "gemini-3-pro" })
|
expect(launches[2]?.model).toEqual({ providerID: "google", modelID: "gemini-3-pro" })
|
||||||
})
|
})
|
||||||
|
|
||||||
//#given a council with 3 members where 1 member fails
|
//#given a council with 3 members where 1 launch throws
|
||||||
//#when executeCouncil is called
|
//#when executeCouncil is called
|
||||||
//#then partial failures are tolerated and preserved in responses
|
//#then launch failures are captured separately from successful launches
|
||||||
test("returns successful result for partial failures", async () => {
|
test("captures launch failures separately from successful launches", async () => {
|
||||||
const launcher = {
|
const launcher = {
|
||||||
launch: async (input: MockLaunchInput) => {
|
launch: async (input: MockLaunchInput) => {
|
||||||
if (input.model?.providerID === "anthropic") {
|
if (input.model?.providerID === "anthropic") {
|
||||||
return createMockTask(
|
throw new Error("Provider unavailable")
|
||||||
{
|
|
||||||
id: "task-failed",
|
|
||||||
status: "error",
|
|
||||||
error: "Token limit exceeded",
|
|
||||||
completedAt: new Date(),
|
|
||||||
},
|
|
||||||
input
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
return createMockTask(`task-${input.model?.providerID}`, input)
|
||||||
return createMockTask(
|
|
||||||
{
|
|
||||||
id: `task-${input.model?.providerID}`,
|
|
||||||
status: "completed",
|
|
||||||
result: `ok-${input.model?.providerID}`,
|
|
||||||
completedAt: new Date(),
|
|
||||||
},
|
|
||||||
input
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,28 +103,21 @@ describe("executeCouncil", () => {
|
|||||||
parentMessageID: "message-1",
|
parentMessageID: "message-1",
|
||||||
})
|
})
|
||||||
|
|
||||||
expect(result.completedCount).toBe(2)
|
expect(result.launched).toHaveLength(2)
|
||||||
expect(result.failedCount).toBe(1)
|
expect(result.failures).toHaveLength(1)
|
||||||
expect(result.responses).toHaveLength(3)
|
expect(result.totalMembers).toBe(3)
|
||||||
expect(result.responses.filter((response) => response.status === "completed")).toHaveLength(2)
|
expect(result.failures[0]?.member.model).toBe("anthropic/claude-sonnet-4-5")
|
||||||
expect(result.responses.filter((response) => response.status === "error")).toHaveLength(1)
|
expect(result.failures[0]?.error).toContain("Launch failed")
|
||||||
})
|
})
|
||||||
|
|
||||||
//#given a council where all members fail
|
//#given a council where all launches throw
|
||||||
//#when executeCouncil is called
|
//#when executeCouncil is called
|
||||||
//#then it returns structured error result with zero completions
|
//#then all members appear as failures with zero launched
|
||||||
test("returns all failures when every member fails", async () => {
|
test("returns all failures when every launch throws", async () => {
|
||||||
const launcher = {
|
const launcher = {
|
||||||
launch: async (input: MockLaunchInput) =>
|
launch: async () => {
|
||||||
createMockTask(
|
throw new Error("Model unavailable")
|
||||||
{
|
|
||||||
id: `task-${input.model?.providerID}`,
|
|
||||||
status: "error",
|
|
||||||
error: "Model unavailable",
|
|
||||||
completedAt: new Date(),
|
|
||||||
},
|
},
|
||||||
input
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await executeCouncil({
|
const result = await executeCouncil({
|
||||||
@ -179,29 +133,21 @@ describe("executeCouncil", () => {
|
|||||||
parentMessageID: "message-1",
|
parentMessageID: "message-1",
|
||||||
})
|
})
|
||||||
|
|
||||||
expect(result.completedCount).toBe(0)
|
expect(result.launched).toHaveLength(0)
|
||||||
expect(result.failedCount).toBe(2)
|
expect(result.failures).toHaveLength(2)
|
||||||
expect(result.responses).toHaveLength(2)
|
expect(result.totalMembers).toBe(2)
|
||||||
expect(result.responses.every((response) => response.status === "error")).toBe(true)
|
expect(result.failures.every((f) => f.error.includes("Launch failed"))).toBe(true)
|
||||||
})
|
})
|
||||||
|
|
||||||
//#given a council with one invalid model string
|
//#given a council with one invalid model string
|
||||||
//#when executeCouncil is called
|
//#when executeCouncil is called
|
||||||
//#then invalid member becomes an error response while others still execute
|
//#then invalid member becomes a failure while others still launch
|
||||||
test("handles invalid model strings without crashing council execution", async () => {
|
test("handles invalid model strings without crashing council execution", async () => {
|
||||||
const launches: MockLaunchInput[] = []
|
const launches: MockLaunchInput[] = []
|
||||||
const launcher = {
|
const launcher = {
|
||||||
launch: async (input: MockLaunchInput) => {
|
launch: async (input: MockLaunchInput) => {
|
||||||
launches.push(input)
|
launches.push(input)
|
||||||
return createMockTask(
|
return createMockTask(`task-${launches.length}`, input)
|
||||||
{
|
|
||||||
id: `task-${launches.length}`,
|
|
||||||
status: "completed",
|
|
||||||
result: "valid-member-response",
|
|
||||||
completedAt: new Date(),
|
|
||||||
},
|
|
||||||
input
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,10 +165,9 @@ describe("executeCouncil", () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
expect(launches).toHaveLength(1)
|
expect(launches).toHaveLength(1)
|
||||||
expect(result.completedCount).toBe(1)
|
expect(result.launched).toHaveLength(1)
|
||||||
expect(result.failedCount).toBe(1)
|
expect(result.failures).toHaveLength(1)
|
||||||
expect(result.responses).toHaveLength(2)
|
expect(result.failures.find((f) => f.member.model === "invalid-model")?.error).toContain("Launch failed")
|
||||||
expect(result.responses.find((response) => response.member.model === "invalid-model")?.status).toBe("error")
|
|
||||||
})
|
})
|
||||||
|
|
||||||
//#given members with per-member temperature and variant
|
//#given members with per-member temperature and variant
|
||||||
@ -233,15 +178,7 @@ describe("executeCouncil", () => {
|
|||||||
const launcher = {
|
const launcher = {
|
||||||
launch: async (input: MockLaunchInput) => {
|
launch: async (input: MockLaunchInput) => {
|
||||||
launches.push(input)
|
launches.push(input)
|
||||||
return createMockTask(
|
return createMockTask(`task-${launches.length}`, input)
|
||||||
{
|
|
||||||
id: `task-${launches.length}`,
|
|
||||||
status: "completed",
|
|
||||||
result: "ok",
|
|
||||||
completedAt: new Date(),
|
|
||||||
},
|
|
||||||
input
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -264,4 +201,33 @@ describe("executeCouncil", () => {
|
|||||||
expect(launches[1]?.temperature).toBe(0.3)
|
expect(launches[1]?.temperature).toBe(0.3)
|
||||||
expect(launches[1]?.model?.variant).toBeUndefined()
|
expect(launches[1]?.model?.variant).toBeUndefined()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
//#given launched members
|
||||||
|
//#when executeCouncil returns
|
||||||
|
//#then each launched member has a taskId for background_output retrieval
|
||||||
|
test("returns task IDs for background_output retrieval", async () => {
|
||||||
|
const launcher = {
|
||||||
|
launch: async (input: MockLaunchInput) =>
|
||||||
|
createMockTask(`bg_${input.model?.providerID}`, input),
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await executeCouncil({
|
||||||
|
question: "Review error handling",
|
||||||
|
council: {
|
||||||
|
members: [
|
||||||
|
{ model: "openai/gpt-5.3-codex", name: "OpenAI" },
|
||||||
|
{ model: "google/gemini-3-pro", name: "Gemini" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
launcher,
|
||||||
|
parentSessionID: "session-1",
|
||||||
|
parentMessageID: "message-1",
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(result.launched).toHaveLength(2)
|
||||||
|
expect(result.launched[0]?.taskId).toBe("bg_openai")
|
||||||
|
expect(result.launched[0]?.member.name).toBe("OpenAI")
|
||||||
|
expect(result.launched[1]?.taskId).toBe("bg_google")
|
||||||
|
expect(result.launched[1]?.member.name).toBe("Gemini")
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@ -1,9 +1,8 @@
|
|||||||
import type { LaunchInput, BackgroundTask } from "../../features/background-agent/types"
|
import type { LaunchInput, BackgroundTask } from "../../features/background-agent/types"
|
||||||
import { createAgentToolRestrictions } from "../../shared/permission-compat"
|
import { createAgentToolRestrictions } from "../../shared/permission-compat"
|
||||||
import { buildCouncilPrompt } from "./council-prompt"
|
import { buildCouncilPrompt } from "./council-prompt"
|
||||||
import { collectCouncilResults } from "./council-result-collector"
|
|
||||||
import { parseModelString } from "./model-parser"
|
import { parseModelString } from "./model-parser"
|
||||||
import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"
|
import type { CouncilConfig, CouncilLaunchFailure, CouncilLaunchedMember, CouncilLaunchResult, CouncilMemberConfig } from "./types"
|
||||||
|
|
||||||
export type CouncilLaunchInput = LaunchInput
|
export type CouncilLaunchInput = LaunchInput
|
||||||
|
|
||||||
@ -20,57 +19,43 @@ export interface CouncilExecutionInput {
|
|||||||
parentAgent?: string
|
parentAgent?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function executeCouncil(input: CouncilExecutionInput): Promise<CouncilExecutionResult> {
|
/**
|
||||||
|
* Launches all council members in parallel and returns launch outcomes.
|
||||||
|
* Does NOT wait for task completion — actual results are collected by the
|
||||||
|
* agent via background_output calls after this returns.
|
||||||
|
*/
|
||||||
|
export async function executeCouncil(input: CouncilExecutionInput): Promise<CouncilLaunchResult> {
|
||||||
const { question, council, launcher, parentSessionID, parentMessageID, parentAgent } = input
|
const { question, council, launcher, parentSessionID, parentMessageID, parentAgent } = input
|
||||||
const prompt = buildCouncilPrompt(question)
|
const prompt = buildCouncilPrompt(question)
|
||||||
const startTimes = new Map<string, number>()
|
|
||||||
|
|
||||||
const launchResults = await Promise.allSettled(
|
const launchResults = await Promise.allSettled(
|
||||||
council.members.map((member) =>
|
council.members.map((member) =>
|
||||||
launchMember(
|
launchMember(member, prompt, launcher, parentSessionID, parentMessageID, parentAgent)
|
||||||
member,
|
|
||||||
prompt,
|
|
||||||
launcher,
|
|
||||||
parentSessionID,
|
|
||||||
parentMessageID,
|
|
||||||
parentAgent,
|
|
||||||
startTimes
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
const launchedTasks: BackgroundTask[] = []
|
const launched: CouncilLaunchedMember[] = []
|
||||||
const launchedMembers: CouncilMemberConfig[] = []
|
const failures: CouncilLaunchFailure[] = []
|
||||||
const launchFailures: CouncilMemberResponse[] = []
|
|
||||||
|
|
||||||
launchResults.forEach((result, index) => {
|
launchResults.forEach((result, index) => {
|
||||||
const member = council.members[index]
|
const member = council.members[index]
|
||||||
|
|
||||||
if (result.status === "fulfilled") {
|
if (result.status === "fulfilled") {
|
||||||
launchedTasks.push(result.value)
|
launched.push({ member, taskId: result.value.id })
|
||||||
launchedMembers.push(member)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
launchFailures.push({
|
failures.push({
|
||||||
member,
|
member,
|
||||||
status: "error",
|
|
||||||
error: `Launch failed: ${String(result.reason)}`,
|
error: `Launch failed: ${String(result.reason)}`,
|
||||||
taskId: "",
|
|
||||||
durationMs: 0,
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
const collected = collectCouncilResults(launchedTasks, launchedMembers, startTimes)
|
|
||||||
const responses = [...collected, ...launchFailures]
|
|
||||||
const completedCount = responses.filter((response) => response.status === "completed").length
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
question,
|
question,
|
||||||
responses,
|
launched,
|
||||||
|
failures,
|
||||||
totalMembers: council.members.length,
|
totalMembers: council.members.length,
|
||||||
completedCount,
|
|
||||||
failedCount: council.members.length - completedCount,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,8 +65,7 @@ async function launchMember(
|
|||||||
launcher: CouncilLauncher,
|
launcher: CouncilLauncher,
|
||||||
parentSessionID: string,
|
parentSessionID: string,
|
||||||
parentMessageID: string,
|
parentMessageID: string,
|
||||||
parentAgent: string | undefined,
|
parentAgent: string | undefined
|
||||||
startTimes: Map<string, number>
|
|
||||||
): Promise<BackgroundTask> {
|
): Promise<BackgroundTask> {
|
||||||
const parsedModel = parseModelString(member.model)
|
const parsedModel = parseModelString(member.model)
|
||||||
if (!parsedModel) {
|
if (!parsedModel) {
|
||||||
@ -90,7 +74,7 @@ async function launchMember(
|
|||||||
|
|
||||||
const restrictions = createAgentToolRestrictions(["write", "edit", "task"])
|
const restrictions = createAgentToolRestrictions(["write", "edit", "task"])
|
||||||
const memberName = member.name ?? member.model
|
const memberName = member.name ?? member.model
|
||||||
const task = await launcher.launch({
|
return launcher.launch({
|
||||||
description: `Council member: ${memberName}`,
|
description: `Council member: ${memberName}`,
|
||||||
prompt,
|
prompt,
|
||||||
agent: "athena",
|
agent: "athena",
|
||||||
@ -105,7 +89,4 @@ async function launchMember(
|
|||||||
...(member.temperature !== undefined ? { temperature: member.temperature } : {}),
|
...(member.temperature !== undefined ? { temperature: member.temperature } : {}),
|
||||||
permission: restrictions.permission,
|
permission: restrictions.permission,
|
||||||
})
|
})
|
||||||
|
|
||||||
startTimes.set(task.id, Date.now())
|
|
||||||
return task
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,36 +0,0 @@
|
|||||||
import type { BackgroundTask, BackgroundTaskStatus } from "../../features/background-agent/types"
|
|
||||||
import type { CouncilMemberConfig, CouncilMemberResponse, CouncilMemberStatus } from "./types"
|
|
||||||
|
|
||||||
export function collectCouncilResults(
|
|
||||||
tasks: BackgroundTask[],
|
|
||||||
members: CouncilMemberConfig[],
|
|
||||||
startTimes: Map<string, number>
|
|
||||||
): CouncilMemberResponse[] {
|
|
||||||
return tasks.map((task, index) => {
|
|
||||||
const member = members[index]
|
|
||||||
const status = mapTaskStatus(task.status)
|
|
||||||
const startTime = startTimes.get(task.id) ?? Date.now()
|
|
||||||
const finishedAt = task.completedAt?.getTime() ?? Date.now()
|
|
||||||
|
|
||||||
return {
|
|
||||||
member,
|
|
||||||
status,
|
|
||||||
response: status === "completed" ? task.result : undefined,
|
|
||||||
error: status === "completed" ? undefined : (task.error ?? `Task status: ${task.status}`),
|
|
||||||
taskId: task.id,
|
|
||||||
durationMs: Math.max(0, finishedAt - startTime),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
function mapTaskStatus(taskStatus: BackgroundTaskStatus): CouncilMemberStatus {
|
|
||||||
if (taskStatus === "completed") {
|
|
||||||
return "completed"
|
|
||||||
}
|
|
||||||
|
|
||||||
if (taskStatus === "cancelled" || taskStatus === "interrupt") {
|
|
||||||
return "timeout"
|
|
||||||
}
|
|
||||||
|
|
||||||
return "error"
|
|
||||||
}
|
|
||||||
@ -1,125 +0,0 @@
|
|||||||
import { describe, expect, test } from "bun:test"
|
|
||||||
import type { SynthesizedFinding } from "./synthesis-types"
|
|
||||||
import { buildAtlasDelegationPrompt, buildPrometheusDelegationPrompt } from "./delegation-prompts"
|
|
||||||
|
|
||||||
function createConfirmedFindings(): SynthesizedFinding[] {
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
summary: "Guard missing council config in startup",
|
|
||||||
details: "Athena path can proceed with undefined council members in some flows.",
|
|
||||||
agreementLevel: "unanimous",
|
|
||||||
reportedBy: ["OpenAI", "Claude", "Gemini"],
|
|
||||||
assessment: {
|
|
||||||
agrees: true,
|
|
||||||
rationale: "Directly observed from startup and config fallback paths.",
|
|
||||||
},
|
|
||||||
isFalsePositiveRisk: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
summary: "Potential retry thrash in background runner",
|
|
||||||
details: "Repeated failures can cascade retry windows under high load.",
|
|
||||||
agreementLevel: "minority",
|
|
||||||
reportedBy: ["Claude"],
|
|
||||||
assessment: {
|
|
||||||
agrees: true,
|
|
||||||
rationale: "Worth addressing to lower operational risk.",
|
|
||||||
},
|
|
||||||
isFalsePositiveRisk: false,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
describe("buildAtlasDelegationPrompt", () => {
|
|
||||||
//#given confirmed findings and an original question
|
|
||||||
//#when the Atlas delegation prompt is built
|
|
||||||
//#then it includes both findings and the original question context
|
|
||||||
test("includes confirmed findings summaries and original question", () => {
|
|
||||||
const findings = createConfirmedFindings()
|
|
||||||
const question = "Which issues should we fix first in Athena integration?"
|
|
||||||
|
|
||||||
const prompt = buildAtlasDelegationPrompt(findings, question)
|
|
||||||
|
|
||||||
expect(prompt).toContain("Original question")
|
|
||||||
expect(prompt).toContain(question)
|
|
||||||
expect(prompt).toContain("Guard missing council config in startup")
|
|
||||||
expect(prompt).toContain("Potential retry thrash in background runner")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given confirmed findings
|
|
||||||
//#when Atlas prompt is generated
|
|
||||||
//#then it explicitly asks Atlas to fix those specific issues
|
|
||||||
test("instructs Atlas to implement direct fixes", () => {
|
|
||||||
const prompt = buildAtlasDelegationPrompt(createConfirmedFindings(), "Fix Athena reliability issues")
|
|
||||||
|
|
||||||
expect(prompt).toContain("Fix these confirmed issues directly")
|
|
||||||
expect(prompt).toContain("Implement code changes")
|
|
||||||
expect(prompt).toContain("prioritize by agreement level")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given a single confirmed finding
|
|
||||||
//#when Atlas prompt is generated
|
|
||||||
//#then prompt still renders correctly for edge case input
|
|
||||||
test("handles a single finding edge case", () => {
|
|
||||||
const [singleFinding] = createConfirmedFindings()
|
|
||||||
|
|
||||||
const prompt = buildAtlasDelegationPrompt([singleFinding], "Fix this one issue")
|
|
||||||
|
|
||||||
expect(prompt).toContain("1. Guard missing council config in startup")
|
|
||||||
expect(prompt).toContain("Agreement level: unanimous")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe("buildPrometheusDelegationPrompt", () => {
|
|
||||||
//#given confirmed findings and an original question
|
|
||||||
//#when the Prometheus delegation prompt is built
|
|
||||||
//#then it includes both findings and the original question context
|
|
||||||
test("includes confirmed findings summaries and original question", () => {
|
|
||||||
const findings = createConfirmedFindings()
|
|
||||||
const question = "How should we sequence Athena integration hardening work?"
|
|
||||||
|
|
||||||
const prompt = buildPrometheusDelegationPrompt(findings, question)
|
|
||||||
|
|
||||||
expect(prompt).toContain("Original question")
|
|
||||||
expect(prompt).toContain(question)
|
|
||||||
expect(prompt).toContain("Guard missing council config in startup")
|
|
||||||
expect(prompt).toContain("Potential retry thrash in background runner")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given confirmed findings
|
|
||||||
//#when Prometheus prompt is generated
|
|
||||||
//#then it explicitly asks for phased planning and prioritization
|
|
||||||
test("instructs Prometheus to create an execution plan", () => {
|
|
||||||
const prompt = buildPrometheusDelegationPrompt(createConfirmedFindings(), "Plan Athena stabilization")
|
|
||||||
|
|
||||||
expect(prompt).toContain("Create an execution plan")
|
|
||||||
expect(prompt).toContain("phased implementation plan")
|
|
||||||
expect(prompt).toContain("prioritize by agreement level and impact")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given a single confirmed finding
|
|
||||||
//#when Prometheus prompt is generated
|
|
||||||
//#then prompt still renders correctly for edge case input
|
|
||||||
test("handles a single finding edge case", () => {
|
|
||||||
const [singleFinding] = createConfirmedFindings()
|
|
||||||
|
|
||||||
const prompt = buildPrometheusDelegationPrompt([singleFinding], "Plan this one issue")
|
|
||||||
|
|
||||||
expect(prompt).toContain("1. Guard missing council config in startup")
|
|
||||||
expect(prompt).toContain("Agreement level: unanimous")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given findings at multiple agreement levels
|
|
||||||
//#when either delegation prompt is generated
|
|
||||||
//#then each finding includes agreement level context
|
|
||||||
test("includes agreement level context for each finding in both prompts", () => {
|
|
||||||
const findings = createConfirmedFindings()
|
|
||||||
|
|
||||||
const atlasPrompt = buildAtlasDelegationPrompt(findings, "Atlas context")
|
|
||||||
const prometheusPrompt = buildPrometheusDelegationPrompt(findings, "Prometheus context")
|
|
||||||
|
|
||||||
expect(atlasPrompt).toContain("Agreement level: unanimous")
|
|
||||||
expect(atlasPrompt).toContain("Agreement level: minority")
|
|
||||||
expect(prometheusPrompt).toContain("Agreement level: unanimous")
|
|
||||||
expect(prometheusPrompt).toContain("Agreement level: minority")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@ -1,55 +0,0 @@
|
|||||||
import type { SynthesizedFinding } from "./synthesis-types"
|
|
||||||
|
|
||||||
function formatFindingBlock(finding: SynthesizedFinding, index: number): string {
|
|
||||||
const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees"
|
|
||||||
|
|
||||||
return [
|
|
||||||
`${index + 1}. ${finding.summary}`,
|
|
||||||
` Details: ${finding.details}`,
|
|
||||||
` Agreement level: ${finding.agreementLevel}`,
|
|
||||||
` Athena assessment: ${assessment}`,
|
|
||||||
` Rationale: ${finding.assessment.rationale}`,
|
|
||||||
].join("\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
function formatConfirmedFindings(confirmedFindings: SynthesizedFinding[]): string {
|
|
||||||
return confirmedFindings.map((finding, index) => formatFindingBlock(finding, index)).join("\n\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
export function buildAtlasDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string {
|
|
||||||
return [
|
|
||||||
"# Atlas Delegation Brief",
|
|
||||||
"Original question:",
|
|
||||||
question,
|
|
||||||
"",
|
|
||||||
"Task:",
|
|
||||||
"Fix these confirmed issues directly.",
|
|
||||||
"",
|
|
||||||
"Confirmed findings:",
|
|
||||||
formatConfirmedFindings(confirmedFindings),
|
|
||||||
"",
|
|
||||||
"Execution instructions:",
|
|
||||||
"- Implement code changes to resolve each confirmed issue.",
|
|
||||||
"- prioritize by agreement level, addressing unanimous findings first.",
|
|
||||||
"- Validate fixes with relevant tests and type safety checks.",
|
|
||||||
].join("\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
export function buildPrometheusDelegationPrompt(confirmedFindings: SynthesizedFinding[], question: string): string {
|
|
||||||
return [
|
|
||||||
"# Prometheus Delegation Brief",
|
|
||||||
"Original question:",
|
|
||||||
question,
|
|
||||||
"",
|
|
||||||
"Task:",
|
|
||||||
"Create an execution plan for these confirmed issues.",
|
|
||||||
"",
|
|
||||||
"Confirmed findings:",
|
|
||||||
formatConfirmedFindings(confirmedFindings),
|
|
||||||
"",
|
|
||||||
"Planning instructions:",
|
|
||||||
"- Produce a phased implementation plan with clear task boundaries.",
|
|
||||||
"- prioritize by agreement level and impact.",
|
|
||||||
"- Include verification checkpoints for each phase.",
|
|
||||||
].join("\n")
|
|
||||||
}
|
|
||||||
@ -1,167 +0,0 @@
|
|||||||
import { describe, expect, test } from "bun:test"
|
|
||||||
import type { SynthesisResult } from "./synthesis-types"
|
|
||||||
import { formatFindingsForUser } from "./findings-presenter"
|
|
||||||
|
|
||||||
function createSynthesisResult(overrides?: Partial<SynthesisResult>): SynthesisResult {
|
|
||||||
return {
|
|
||||||
question: "Review the Athena council outputs for actionable risks",
|
|
||||||
findings: [
|
|
||||||
{
|
|
||||||
summary: "Validate configuration before execution",
|
|
||||||
details: "Missing guard clauses can allow invalid member configs.",
|
|
||||||
agreementLevel: "majority",
|
|
||||||
reportedBy: ["OpenAI", "Claude"],
|
|
||||||
assessment: {
|
|
||||||
agrees: true,
|
|
||||||
rationale: "This aligns with repeated failures observed in setup paths.",
|
|
||||||
},
|
|
||||||
isFalsePositiveRisk: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
summary: "Retry strategy lacks upper bounds",
|
|
||||||
details: "Unbounded retries may cause runaway background tasks.",
|
|
||||||
agreementLevel: "solo",
|
|
||||||
reportedBy: ["Gemini"],
|
|
||||||
assessment: {
|
|
||||||
agrees: false,
|
|
||||||
rationale: "Current retry count is already constrained in most flows.",
|
|
||||||
},
|
|
||||||
isFalsePositiveRisk: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
summary: "Preserve partial successes",
|
|
||||||
details: "Do not fail entire council run when one member errors.",
|
|
||||||
agreementLevel: "unanimous",
|
|
||||||
reportedBy: ["OpenAI", "Claude", "Gemini"],
|
|
||||||
assessment: {
|
|
||||||
agrees: true,
|
|
||||||
rationale: "This is required for resilient multi-model orchestration.",
|
|
||||||
},
|
|
||||||
isFalsePositiveRisk: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
summary: "Reduce prompt token duplication",
|
|
||||||
details: "Duplicate context blocks increase cost without improving quality.",
|
|
||||||
agreementLevel: "minority",
|
|
||||||
reportedBy: ["Claude"],
|
|
||||||
assessment: {
|
|
||||||
agrees: true,
|
|
||||||
rationale: "Consolidation should lower cost while preserving intent.",
|
|
||||||
},
|
|
||||||
isFalsePositiveRisk: false,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
memberProvenance: [],
|
|
||||||
totalFindings: 4,
|
|
||||||
consensusCount: 2,
|
|
||||||
outlierCount: 1,
|
|
||||||
...overrides,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
describe("formatFindingsForUser", () => {
|
|
||||||
//#given findings across all agreement levels
|
|
||||||
//#when formatFindingsForUser is called
|
|
||||||
//#then groups appear in deterministic order: unanimous, majority, minority, solo
|
|
||||||
test("groups findings by agreement level in required order", () => {
|
|
||||||
const result = createSynthesisResult()
|
|
||||||
|
|
||||||
const output = formatFindingsForUser(result)
|
|
||||||
|
|
||||||
const unanimousIndex = output.indexOf("## Unanimous Findings")
|
|
||||||
const majorityIndex = output.indexOf("## Majority Findings")
|
|
||||||
const minorityIndex = output.indexOf("## Minority Findings")
|
|
||||||
const soloIndex = output.indexOf("## Solo Findings")
|
|
||||||
|
|
||||||
expect(unanimousIndex).toBeGreaterThan(-1)
|
|
||||||
expect(majorityIndex).toBeGreaterThan(unanimousIndex)
|
|
||||||
expect(minorityIndex).toBeGreaterThan(majorityIndex)
|
|
||||||
expect(soloIndex).toBeGreaterThan(minorityIndex)
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given a finding with assessment details
|
|
||||||
//#when formatting is generated
|
|
||||||
//#then each finding includes summary, details, reported-by, and Athena rationale
|
|
||||||
test("renders finding body and Athena assessment rationale", () => {
|
|
||||||
const result = createSynthesisResult()
|
|
||||||
|
|
||||||
const output = formatFindingsForUser(result)
|
|
||||||
|
|
||||||
expect(output).toContain("Validate configuration before execution")
|
|
||||||
expect(output).toContain("Missing guard clauses can allow invalid member configs.")
|
|
||||||
expect(output).toContain("Reported by: OpenAI, Claude")
|
|
||||||
expect(output).toContain("Athena assessment: Agrees")
|
|
||||||
expect(output).toContain("Rationale: This aligns with repeated failures observed in setup paths.")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given a solo finding flagged as false-positive risk
|
|
||||||
//#when formatting is generated
|
|
||||||
//#then a visible warning marker is included
|
|
||||||
test("shows false-positive warning for risky solo findings", () => {
|
|
||||||
const result = createSynthesisResult()
|
|
||||||
|
|
||||||
const output = formatFindingsForUser(result)
|
|
||||||
|
|
||||||
expect(output).toContain("[False Positive Risk]")
|
|
||||||
expect(output).toContain("Retry strategy lacks upper bounds")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given no findings
|
|
||||||
//#when formatFindingsForUser is called
|
|
||||||
//#then output includes a graceful no-findings message
|
|
||||||
test("handles empty findings with a no-findings message", () => {
|
|
||||||
const result = createSynthesisResult({ findings: [], totalFindings: 0, consensusCount: 0, outlierCount: 0 })
|
|
||||||
|
|
||||||
const output = formatFindingsForUser(result)
|
|
||||||
|
|
||||||
expect(output).toContain("No synthesized findings are available")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given multiple majority findings with different reporter counts
|
|
||||||
//#when formatting is generated
|
|
||||||
//#then group header shows the agreement level label without a misleading single count
|
|
||||||
test("shows agreement level label in group header without single-finding count", () => {
|
|
||||||
const result = createSynthesisResult({
|
|
||||||
findings: [
|
|
||||||
{
|
|
||||||
summary: "Finding A",
|
|
||||||
details: "Reported by 3 members",
|
|
||||||
agreementLevel: "majority",
|
|
||||||
reportedBy: ["OpenAI", "Claude", "Gemini"],
|
|
||||||
assessment: { agrees: true, rationale: "Valid" },
|
|
||||||
isFalsePositiveRisk: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
summary: "Finding B",
|
|
||||||
details: "Reported by 2 members",
|
|
||||||
agreementLevel: "majority",
|
|
||||||
reportedBy: ["OpenAI", "Claude"],
|
|
||||||
assessment: { agrees: true, rationale: "Also valid" },
|
|
||||||
isFalsePositiveRisk: false,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
})
|
|
||||||
|
|
||||||
const output = formatFindingsForUser(result)
|
|
||||||
|
|
||||||
// The header should show the level label without a misleading single-finding count
|
|
||||||
// It should NOT use the first finding's count as the group header
|
|
||||||
expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
|
|
||||||
expect(output).toContain("## Majority Findings")
|
|
||||||
// Each individual finding still shows its own agreement context
|
|
||||||
expect(output).toContain("Agreement context: 3 members report this (majority)")
|
|
||||||
expect(output).toContain("Agreement context: 2 members report this (majority)")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given a non-empty findings result
|
|
||||||
//#when formatting is generated
|
|
||||||
//#then output ends with an action recommendation section
|
|
||||||
test("includes a final action recommendation section", () => {
|
|
||||||
const result = createSynthesisResult()
|
|
||||||
|
|
||||||
const output = formatFindingsForUser(result)
|
|
||||||
|
|
||||||
expect(output.trimEnd()).toMatch(/## Action Recommendation[\s\S]*$/)
|
|
||||||
expect(output).toContain("Prioritize unanimous and majority findings")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@ -1,81 +0,0 @@
|
|||||||
import type { SynthesisResult, SynthesizedFinding } from "./synthesis-types"
|
|
||||||
import type { AgreementLevel } from "./types"
|
|
||||||
|
|
||||||
const AGREEMENT_ORDER: AgreementLevel[] = ["unanimous", "majority", "minority", "solo"]
|
|
||||||
|
|
||||||
function toTitle(level: AgreementLevel): string {
|
|
||||||
return `${level.charAt(0).toUpperCase()}${level.slice(1)}`
|
|
||||||
}
|
|
||||||
|
|
||||||
function formatAgreementLine(level: AgreementLevel, finding: SynthesizedFinding): string {
|
|
||||||
const memberCount = finding.reportedBy.length
|
|
||||||
|
|
||||||
switch (level) {
|
|
||||||
case "unanimous":
|
|
||||||
return `${memberCount}/${memberCount} members agree`
|
|
||||||
case "majority":
|
|
||||||
return `${memberCount} members report this (majority)`
|
|
||||||
case "minority":
|
|
||||||
return `${memberCount} members report this (minority)`
|
|
||||||
case "solo":
|
|
||||||
return `${memberCount} member reported this`
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function formatFinding(level: AgreementLevel, finding: SynthesizedFinding): string {
|
|
||||||
const assessment = finding.assessment.agrees ? "Agrees" : "Disagrees"
|
|
||||||
const warning = level === "solo" && finding.isFalsePositiveRisk ? " [False Positive Risk]" : ""
|
|
||||||
|
|
||||||
return [
|
|
||||||
`### ${finding.summary}${warning}`,
|
|
||||||
`Details: ${finding.details}`,
|
|
||||||
`Reported by: ${finding.reportedBy.join(", ")}`,
|
|
||||||
`Agreement context: ${formatAgreementLine(level, finding)}`,
|
|
||||||
`Athena assessment: ${assessment}`,
|
|
||||||
`Rationale: ${finding.assessment.rationale}`,
|
|
||||||
].join("\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
function formatActionRecommendation(result: SynthesisResult, groupedFindings: Map<AgreementLevel, SynthesizedFinding[]>): string {
|
|
||||||
const counts = AGREEMENT_ORDER.map((level) => `${toTitle(level)}: ${groupedFindings.get(level)?.length ?? 0}`).join(" | ")
|
|
||||||
|
|
||||||
return [
|
|
||||||
"## Action Recommendation",
|
|
||||||
`Findings by agreement level: ${counts}`,
|
|
||||||
"Prioritize unanimous and majority findings for immediate execution,",
|
|
||||||
"then review minority findings, and manually validate solo findings before delegating changes.",
|
|
||||||
`Question context: ${result.question}`,
|
|
||||||
].join("\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
export function formatFindingsForUser(result: SynthesisResult): string {
|
|
||||||
if (result.findings.length === 0) {
|
|
||||||
return [
|
|
||||||
"# Synthesized Findings",
|
|
||||||
"No synthesized findings are available.",
|
|
||||||
"## Action Recommendation",
|
|
||||||
"Gather additional council responses or re-run synthesis before delegation.",
|
|
||||||
`Question context: ${result.question}`,
|
|
||||||
].join("\n\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
const groupedFindings = new Map<AgreementLevel, SynthesizedFinding[]>(
|
|
||||||
AGREEMENT_ORDER.map((level) => [
|
|
||||||
level,
|
|
||||||
result.findings.filter((finding) => finding.agreementLevel === level),
|
|
||||||
]),
|
|
||||||
)
|
|
||||||
|
|
||||||
const sections = AGREEMENT_ORDER.flatMap((level) => {
|
|
||||||
const findings = groupedFindings.get(level) ?? []
|
|
||||||
if (findings.length === 0) {
|
|
||||||
return []
|
|
||||||
}
|
|
||||||
|
|
||||||
const header = `## ${toTitle(level)} Findings (${findings.length})`
|
|
||||||
const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
|
|
||||||
return [`${header}\n\n${entries}`]
|
|
||||||
})
|
|
||||||
|
|
||||||
return ["# Synthesized Findings", ...sections, formatActionRecommendation(result, groupedFindings)].join("\n\n")
|
|
||||||
}
|
|
||||||
@ -3,10 +3,4 @@ export * from "./agent"
|
|||||||
export * from "./model-parser"
|
export * from "./model-parser"
|
||||||
export * from "./council-prompt"
|
export * from "./council-prompt"
|
||||||
export * from "./council-orchestrator"
|
export * from "./council-orchestrator"
|
||||||
export * from "./council-result-collector"
|
|
||||||
export * from "./synthesis-types"
|
|
||||||
export * from "./synthesis-prompt"
|
|
||||||
export * from "./synthesis-formatter"
|
|
||||||
export * from "./findings-presenter"
|
|
||||||
export * from "./delegation-prompts"
|
|
||||||
export * from "../../config/schema/athena"
|
export * from "../../config/schema/athena"
|
||||||
|
|||||||
@ -1,157 +0,0 @@
|
|||||||
import { describe, expect, test } from "bun:test"
|
|
||||||
import { formatCouncilResultsForSynthesis } from "./synthesis-formatter"
|
|
||||||
import type { CouncilExecutionResult } from "./types"
|
|
||||||
|
|
||||||
function createResult(overrides?: Partial<CouncilExecutionResult>): CouncilExecutionResult {
|
|
||||||
const responses: CouncilExecutionResult["responses"] = [
|
|
||||||
{
|
|
||||||
member: { model: "openai/gpt-5.3-codex", name: "OpenAI" },
|
|
||||||
status: "completed",
|
|
||||||
response: "Finding A from OpenAI",
|
|
||||||
taskId: "task-1",
|
|
||||||
durationMs: 120,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
member: { model: "anthropic/claude-sonnet-4-5", name: "Claude" },
|
|
||||||
status: "completed",
|
|
||||||
response: "Finding B from Claude",
|
|
||||||
taskId: "task-2",
|
|
||||||
durationMs: 240,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
member: { model: "google/gemini-3-pro", name: "Gemini" },
|
|
||||||
status: "completed",
|
|
||||||
response: "Finding C from Gemini",
|
|
||||||
taskId: "task-3",
|
|
||||||
durationMs: 360,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
return {
|
|
||||||
question: "What reliability risks exist?",
|
|
||||||
responses,
|
|
||||||
totalMembers: 3,
|
|
||||||
completedCount: 3,
|
|
||||||
failedCount: 0,
|
|
||||||
...overrides,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
describe("formatCouncilResultsForSynthesis", () => {
|
|
||||||
//#given a CouncilExecutionResult with 3 completed members
|
|
||||||
//#when formatCouncilResultsForSynthesis is called
|
|
||||||
//#then output contains each member's model name as a header
|
|
||||||
//#then output contains each member's raw response text
|
|
||||||
//#then output contains member status and duration
|
|
||||||
test("formats all completed members with provenance and response text", () => {
|
|
||||||
const result = createResult()
|
|
||||||
|
|
||||||
const output = formatCouncilResultsForSynthesis(result)
|
|
||||||
|
|
||||||
expect(output).toContain("openai/gpt-5.3-codex")
|
|
||||||
expect(output).toContain("anthropic/claude-sonnet-4-5")
|
|
||||||
expect(output).toContain("google/gemini-3-pro")
|
|
||||||
|
|
||||||
expect(output).toContain("Finding A from OpenAI")
|
|
||||||
expect(output).toContain("Finding B from Claude")
|
|
||||||
expect(output).toContain("Finding C from Gemini")
|
|
||||||
|
|
||||||
expect(output).toContain("Status: completed")
|
|
||||||
expect(output).toContain("Duration: 120ms")
|
|
||||||
expect(output).toContain("Duration: 240ms")
|
|
||||||
expect(output).toContain("Duration: 360ms")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given a CouncilExecutionResult with 1 completed and 1 failed member
|
|
||||||
//#when formatCouncilResultsForSynthesis is called
|
|
||||||
//#then completed member's response is included
|
|
||||||
//#then failed member shows error status and error message
|
|
||||||
//#then failed member does NOT have a response section
|
|
||||||
test("includes completed response and failed error without response section", () => {
|
|
||||||
const result = createResult({
|
|
||||||
responses: [
|
|
||||||
{
|
|
||||||
member: { model: "openai/gpt-5.3-codex" },
|
|
||||||
status: "completed",
|
|
||||||
response: "Primary finding",
|
|
||||||
taskId: "task-1",
|
|
||||||
durationMs: 80,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
member: { model: "xai/grok-code-fast-1" },
|
|
||||||
status: "error",
|
|
||||||
error: "Timeout from provider",
|
|
||||||
taskId: "task-2",
|
|
||||||
durationMs: 500,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
totalMembers: 2,
|
|
||||||
completedCount: 1,
|
|
||||||
failedCount: 1,
|
|
||||||
})
|
|
||||||
|
|
||||||
const output = formatCouncilResultsForSynthesis(result)
|
|
||||||
|
|
||||||
expect(output).toContain("Primary finding")
|
|
||||||
expect(output).toContain("xai/grok-code-fast-1")
|
|
||||||
expect(output).toContain("Status: error")
|
|
||||||
expect(output).toContain("Error: Timeout from provider")
|
|
||||||
expect(output).not.toContain("Response:\nTimeout from provider")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given a CouncilExecutionResult with 0 completed members
|
|
||||||
//#when formatCouncilResultsForSynthesis is called
|
|
||||||
//#then output contains a "no successful responses" message
|
|
||||||
test("shows no successful responses message when all members fail", () => {
|
|
||||||
const result = createResult({
|
|
||||||
responses: [
|
|
||||||
{
|
|
||||||
member: { model: "openai/gpt-5.3-codex" },
|
|
||||||
status: "error",
|
|
||||||
error: "No output",
|
|
||||||
taskId: "task-1",
|
|
||||||
durationMs: 200,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
totalMembers: 1,
|
|
||||||
completedCount: 0,
|
|
||||||
failedCount: 1,
|
|
||||||
})
|
|
||||||
|
|
||||||
const output = formatCouncilResultsForSynthesis(result)
|
|
||||||
|
|
||||||
expect(output).toContain("No successful responses")
|
|
||||||
})
|
|
||||||
|
|
||||||
//#given members with custom names
|
|
||||||
//#when formatCouncilResultsForSynthesis is called
|
|
||||||
//#then output uses member.name if provided, falls back to member.model
|
|
||||||
test("prefers custom member name and falls back to model", () => {
|
|
||||||
const result = createResult({
|
|
||||||
responses: [
|
|
||||||
{
|
|
||||||
member: { model: "openai/gpt-5.3-codex", name: "Council Alpha" },
|
|
||||||
status: "completed",
|
|
||||||
response: "Custom member response",
|
|
||||||
taskId: "task-1",
|
|
||||||
durationMs: 10,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
member: { model: "google/gemini-3-pro" },
|
|
||||||
status: "completed",
|
|
||||||
response: "Default member response",
|
|
||||||
taskId: "task-2",
|
|
||||||
durationMs: 11,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
totalMembers: 2,
|
|
||||||
completedCount: 2,
|
|
||||||
failedCount: 0,
|
|
||||||
})
|
|
||||||
|
|
||||||
const output = formatCouncilResultsForSynthesis(result)
|
|
||||||
|
|
||||||
expect(output).toContain("Council Alpha")
|
|
||||||
expect(output).toContain("google/gemini-3-pro")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@ -1,48 +0,0 @@
|
|||||||
import type { CouncilExecutionResult } from "./types"
|
|
||||||
|
|
||||||
export function formatCouncilResultsForSynthesis(result: CouncilExecutionResult): string {
|
|
||||||
const completedResponses = result.responses.filter((response) => response.status === "completed")
|
|
||||||
|
|
||||||
if (completedResponses.length === 0) {
|
|
||||||
return [
|
|
||||||
"# Council Responses for Synthesis",
|
|
||||||
`Question: ${result.question}`,
|
|
||||||
"No successful responses from council members.",
|
|
||||||
"Review failed member details below for provenance.",
|
|
||||||
...result.responses.map((response) => {
|
|
||||||
const memberName = response.member.name ?? response.member.model
|
|
||||||
return [
|
|
||||||
`## Member: ${memberName} (${response.status})`,
|
|
||||||
`Model: ${response.member.model}`,
|
|
||||||
`Status: ${response.status}`,
|
|
||||||
`Duration: ${response.durationMs}ms`,
|
|
||||||
`Error: ${response.error ?? "No error message provided"}`,
|
|
||||||
].join("\n")
|
|
||||||
}),
|
|
||||||
].join("\n\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
const sections = result.responses.map((response) => {
|
|
||||||
const memberName = response.member.name ?? response.member.model
|
|
||||||
const header = [
|
|
||||||
`## Member: ${memberName} (${response.status})`,
|
|
||||||
`Model: ${response.member.model}`,
|
|
||||||
`Status: ${response.status}`,
|
|
||||||
`Duration: ${response.durationMs}ms`,
|
|
||||||
]
|
|
||||||
|
|
||||||
if (response.status === "completed") {
|
|
||||||
const responseBody = response.response?.trim() ? response.response : "No response content provided"
|
|
||||||
return [...header, "Response:", responseBody].join("\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
return [...header, `Error: ${response.error ?? "No error message provided"}`].join("\n")
|
|
||||||
})
|
|
||||||
|
|
||||||
return [
|
|
||||||
"# Council Responses for Synthesis",
|
|
||||||
`Question: ${result.question}`,
|
|
||||||
`Completed responses: ${result.completedCount}/${result.totalMembers}`,
|
|
||||||
...sections,
|
|
||||||
].join("\n\n")
|
|
||||||
}
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
export function buildSynthesisPrompt(formattedResponses: string, question: string, completedCount: number): string {
|
|
||||||
return `You are Athena, the synthesis lead for a multi-model council. Your job is to merge independent model outputs into a single, evidence-grounded synthesis.
|
|
||||||
|
|
||||||
## Original Question
|
|
||||||
${question}
|
|
||||||
|
|
||||||
## Council Responses
|
|
||||||
${formattedResponses}
|
|
||||||
|
|
||||||
## Your Responsibilities
|
|
||||||
1. Identify distinct findings across all completed member responses.
|
|
||||||
2. Group findings that refer to the same underlying issue (semantic similarity, not exact wording).
|
|
||||||
3. Classify agreementLevel for each finding using ${completedCount} completed member(s):
|
|
||||||
- unanimous: all completed members reported the finding
|
|
||||||
- majority: more than 50% of completed members reported the finding
|
|
||||||
- minority: 2 or more members reported it, but not a majority
|
|
||||||
- solo: only 1 member reported it
|
|
||||||
4. Add AthenaAssessment for each finding:
|
|
||||||
- agrees: whether you agree with the finding
|
|
||||||
- rationale: concise reason for agreement or disagreement
|
|
||||||
5. Set isFalsePositiveRisk:
|
|
||||||
- true for solo findings (likely false positives unless strongly supported)
|
|
||||||
- false for findings reported by multiple members
|
|
||||||
|
|
||||||
## Output Contract
|
|
||||||
Return JSON only with this shape:
|
|
||||||
{
|
|
||||||
"findings": [
|
|
||||||
{
|
|
||||||
"summary": "string",
|
|
||||||
"details": "string",
|
|
||||||
"agreementLevel": "unanimous | majority | minority | solo",
|
|
||||||
"reportedBy": ["model/name"],
|
|
||||||
"assessment": {
|
|
||||||
"agrees": true,
|
|
||||||
"rationale": "string"
|
|
||||||
},
|
|
||||||
"isFalsePositiveRisk": false
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
The finding object must match the SynthesizedFinding type exactly. Keep findings concise, concrete, and tied to source responses.`
|
|
||||||
}
|
|
||||||
@ -1,31 +0,0 @@
|
|||||||
import type { AgreementLevel, CouncilMemberConfig, CouncilMemberStatus } from "./types"
|
|
||||||
|
|
||||||
export interface AthenaAssessment {
|
|
||||||
agrees: boolean
|
|
||||||
rationale: string
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface SynthesizedFinding {
|
|
||||||
summary: string
|
|
||||||
details: string
|
|
||||||
agreementLevel: AgreementLevel
|
|
||||||
reportedBy: string[]
|
|
||||||
assessment: AthenaAssessment
|
|
||||||
isFalsePositiveRisk: boolean
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface MemberProvenance {
|
|
||||||
member: CouncilMemberConfig
|
|
||||||
status: CouncilMemberStatus
|
|
||||||
rawResponse?: string
|
|
||||||
durationMs: number
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface SynthesisResult {
|
|
||||||
question: string
|
|
||||||
findings: SynthesizedFinding[]
|
|
||||||
memberProvenance: MemberProvenance[]
|
|
||||||
totalFindings: number
|
|
||||||
consensusCount: number
|
|
||||||
outlierCount: number
|
|
||||||
}
|
|
||||||
@ -14,23 +14,20 @@ export interface AthenaConfig {
|
|||||||
council: CouncilConfig
|
council: CouncilConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
export type CouncilMemberStatus = "completed" | "timeout" | "error"
|
export interface CouncilLaunchFailure {
|
||||||
|
|
||||||
export type AgreementLevel = "unanimous" | "majority" | "minority" | "solo"
|
|
||||||
|
|
||||||
export interface CouncilMemberResponse {
|
|
||||||
member: CouncilMemberConfig
|
member: CouncilMemberConfig
|
||||||
status: CouncilMemberStatus
|
error: string
|
||||||
response?: string
|
|
||||||
error?: string
|
|
||||||
taskId: string
|
|
||||||
durationMs: number
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface CouncilExecutionResult {
|
export interface CouncilLaunchedMember {
|
||||||
question: string
|
member: CouncilMemberConfig
|
||||||
responses: CouncilMemberResponse[]
|
taskId: string
|
||||||
totalMembers: number
|
}
|
||||||
completedCount: number
|
|
||||||
failedCount: number
|
/** Return type of executeCouncil — only tracks launch outcomes, not task completion */
|
||||||
|
export interface CouncilLaunchResult {
|
||||||
|
question: string
|
||||||
|
launched: CouncilLaunchedMember[]
|
||||||
|
failures: CouncilLaunchFailure[]
|
||||||
|
totalMembers: number
|
||||||
}
|
}
|
||||||
|
|||||||
@ -113,21 +113,17 @@ export function createAthenaCouncilTool(args: {
|
|||||||
})
|
})
|
||||||
|
|
||||||
const launchResult: AthenaCouncilLaunchResult = {
|
const launchResult: AthenaCouncilLaunchResult = {
|
||||||
launched: execution.responses.filter((response) => response.taskId.length > 0).length,
|
launched: execution.launched.length,
|
||||||
members: execution.responses
|
members: execution.launched.map((entry) => ({
|
||||||
.filter((response) => response.taskId.length > 0)
|
task_id: entry.taskId,
|
||||||
.map((response) => ({
|
name: entry.member.name ?? entry.member.model,
|
||||||
task_id: response.taskId,
|
model: entry.member.model,
|
||||||
name: response.member.name ?? response.member.model,
|
|
||||||
model: response.member.model,
|
|
||||||
status: "running",
|
status: "running",
|
||||||
})),
|
})),
|
||||||
failed: execution.responses
|
failed: execution.failures.map((entry) => ({
|
||||||
.filter((response) => response.taskId.length === 0)
|
name: entry.member.name ?? entry.member.model,
|
||||||
.map((response) => ({
|
model: entry.member.model,
|
||||||
name: response.member.name ?? response.member.model,
|
error: entry.error,
|
||||||
model: response.member.model,
|
|
||||||
error: response.error ?? "Launch failed",
|
|
||||||
})),
|
})),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user