diff --git a/package.json b/package.json index 07ee25b3..48452347 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode", - "version": "3.3.1", + "version": "3.3.2", "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -74,13 +74,13 @@ "typescript": "^5.7.3" }, "optionalDependencies": { - "oh-my-opencode-darwin-arm64": "3.3.1", - "oh-my-opencode-darwin-x64": "3.3.1", - "oh-my-opencode-linux-arm64": "3.3.1", - "oh-my-opencode-linux-arm64-musl": "3.3.1", - "oh-my-opencode-linux-x64": "3.3.1", - "oh-my-opencode-linux-x64-musl": "3.3.1", - "oh-my-opencode-windows-x64": "3.3.1" + "oh-my-opencode-darwin-arm64": "3.3.2", + "oh-my-opencode-darwin-x64": "3.3.2", + "oh-my-opencode-linux-arm64": "3.3.2", + "oh-my-opencode-linux-arm64-musl": "3.3.2", + "oh-my-opencode-linux-x64": "3.3.2", + "oh-my-opencode-linux-x64-musl": "3.3.2", + "oh-my-opencode-windows-x64": "3.3.2" }, "trustedDependencies": [ "@ast-grep/cli", diff --git a/packages/darwin-arm64/package.json b/packages/darwin-arm64/package.json index 2fed414c..5b57136a 100644 --- a/packages/darwin-arm64/package.json +++ b/packages/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-darwin-arm64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)", "license": "MIT", "repository": { diff --git a/packages/darwin-x64/package.json b/packages/darwin-x64/package.json index 090606c5..8de8e3e3 100644 --- a/packages/darwin-x64/package.json +++ b/packages/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-darwin-x64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (darwin-x64)", "license": "MIT", "repository": { diff --git a/packages/linux-arm64-musl/package.json b/packages/linux-arm64-musl/package.json index d81b1608..97db05f7 100644 --- a/packages/linux-arm64-musl/package.json +++ b/packages/linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-arm64-musl", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)", "license": "MIT", "repository": { diff --git a/packages/linux-arm64/package.json b/packages/linux-arm64/package.json index c3a6d1c2..51af31d1 100644 --- a/packages/linux-arm64/package.json +++ b/packages/linux-arm64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-arm64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (linux-arm64)", "license": "MIT", "repository": { diff --git a/packages/linux-x64-musl/package.json b/packages/linux-x64-musl/package.json index 7618b168..042d7180 100644 --- a/packages/linux-x64-musl/package.json +++ b/packages/linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-x64-musl", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)", "license": "MIT", "repository": { diff --git a/packages/linux-x64/package.json b/packages/linux-x64/package.json index 0f5b0d53..4310493e 100644 --- a/packages/linux-x64/package.json +++ b/packages/linux-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-x64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (linux-x64)", "license": "MIT", "repository": { diff --git a/packages/windows-x64/package.json b/packages/windows-x64/package.json index dba03742..84932c29 100644 --- a/packages/windows-x64/package.json +++ b/packages/windows-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-windows-x64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (windows-x64)", "license": "MIT", "repository": { diff --git a/src/agents/atlas/default.ts b/src/agents/atlas/default.ts index dfe5cf5f..2568d511 100644 --- a/src/agents/atlas/default.ts +++ b/src/agents/atlas/default.ts @@ -274,13 +274,13 @@ ACCUMULATED WISDOM: **For exploration (explore/librarian)**: ALWAYS background \`\`\`typescript -task(subagent_type="explore", run_in_background=true, ...) -task(subagent_type="librarian", run_in_background=true, ...) +task(subagent_type="explore", load_skills=[], run_in_background=true, ...) +task(subagent_type="librarian", load_skills=[], run_in_background=true, ...) \`\`\` **For task execution**: NEVER background \`\`\`typescript -task(category="...", run_in_background=false, ...) +task(category="...", load_skills=[...], run_in_background=false, ...) \`\`\` **Parallel task groups**: Invoke multiple in ONE message diff --git a/src/agents/atlas/gpt.ts b/src/agents/atlas/gpt.ts index d7d20fd7..d81620e6 100644 --- a/src/agents/atlas/gpt.ts +++ b/src/agents/atlas/gpt.ts @@ -231,12 +231,12 @@ ACCUMULATED WISDOM: [from notepad] **Exploration (explore/librarian)**: ALWAYS background \`\`\`typescript -task(subagent_type="explore", run_in_background=true, ...) +task(subagent_type="explore", load_skills=[], run_in_background=true, ...) \`\`\` **Task execution**: NEVER background \`\`\`typescript -task(category="...", run_in_background=false, ...) +task(category="...", load_skills=[...], run_in_background=false, ...) \`\`\` **Parallel task groups**: Invoke multiple in ONE message diff --git a/src/agents/builtin-agents.ts b/src/agents/builtin-agents.ts index 8e74ca7a..0a0a3ffb 100644 --- a/src/agents/builtin-agents.ts +++ b/src/agents/builtin-agents.ts @@ -20,6 +20,7 @@ import { collectPendingBuiltinAgents } from "./builtin-agents/general-agents" import { maybeCreateSisyphusConfig } from "./builtin-agents/sisyphus-agent" import { maybeCreateHephaestusConfig } from "./builtin-agents/hephaestus-agent" import { maybeCreateAtlasConfig } from "./builtin-agents/atlas-agent" +import { buildCustomAgentMetadata, parseRegisteredAgentSummaries } from "./custom-agent-summaries" type AgentSource = AgentFactory | AgentConfig @@ -59,15 +60,13 @@ export async function createBuiltinAgents( categories?: CategoriesConfig, gitMasterConfig?: GitMasterConfig, discoveredSkills: LoadedSkill[] = [], - client?: any, + customAgentSummaries?: unknown, browserProvider?: BrowserAutomationProvider, uiSelectedModel?: string, disabledSkills?: Set ): Promise> { - void client - const connectedProviders = readConnectedProvidersCache() - // IMPORTANT: Do NOT pass client to fetchAvailableModels during plugin initialization. + // IMPORTANT: Do NOT call OpenCode client APIs during plugin initialization. // This function is called from config handler, and calling client API causes deadlock. // See: https://github.com/code-yeongyu/oh-my-opencode/issues/1301 const availableModels = await fetchAvailableModels(undefined, { @@ -105,6 +104,23 @@ export async function createBuiltinAgents( disabledSkills, }) + const registeredAgents = parseRegisteredAgentSummaries(customAgentSummaries) + const builtinAgentNames = new Set(Object.keys(agentSources).map((name) => name.toLowerCase())) + const disabledAgentNames = new Set(disabledAgents.map((name) => name.toLowerCase())) + + for (const agent of registeredAgents) { + const lowerName = agent.name.toLowerCase() + if (builtinAgentNames.has(lowerName)) continue + if (disabledAgentNames.has(lowerName)) continue + if (availableAgents.some((availableAgent) => availableAgent.name.toLowerCase() === lowerName)) continue + + availableAgents.push({ + name: agent.name, + description: agent.description, + metadata: buildCustomAgentMetadata(agent.name, agent.description), + }) + } + const sisyphusConfig = maybeCreateSisyphusConfig({ disabledAgents, agentOverrides, diff --git a/src/agents/custom-agent-summaries.ts b/src/agents/custom-agent-summaries.ts new file mode 100644 index 00000000..bdcb7b68 --- /dev/null +++ b/src/agents/custom-agent-summaries.ts @@ -0,0 +1,61 @@ +import type { AgentPromptMetadata } from "./types" +import { truncateDescription } from "../shared/truncate-description" + +type RegisteredAgentSummary = { + name: string + description: string +} + +function sanitizeMarkdownTableCell(value: string): string { + return value + .replace(/\r?\n/g, " ") + .replace(/\|/g, "\\|") + .replace(/\s+/g, " ") + .trim() +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + +export function parseRegisteredAgentSummaries(input: unknown): RegisteredAgentSummary[] { + if (!Array.isArray(input)) return [] + + const result: RegisteredAgentSummary[] = [] + for (const item of input) { + if (!isRecord(item)) continue + + const name = typeof item.name === "string" ? item.name : undefined + if (!name) continue + + const hidden = item.hidden + if (hidden === true) continue + + const disabled = item.disabled + if (disabled === true) continue + + const enabled = item.enabled + if (enabled === false) continue + + const description = typeof item.description === "string" ? item.description : "" + result.push({ name, description: sanitizeMarkdownTableCell(description) }) + } + + return result +} + +export function buildCustomAgentMetadata(agentName: string, description: string): AgentPromptMetadata { + const shortDescription = sanitizeMarkdownTableCell(truncateDescription(description)) + const safeAgentName = sanitizeMarkdownTableCell(agentName) + + return { + category: "specialist", + cost: "CHEAP", + triggers: [ + { + domain: `Custom agent: ${safeAgentName}`, + trigger: shortDescription || "Use when this agent's description matches the task", + }, + ], + } +} diff --git a/src/agents/dynamic-agent-prompt-builder.ts b/src/agents/dynamic-agent-prompt-builder.ts index c70da062..defaeecb 100644 --- a/src/agents/dynamic-agent-prompt-builder.ts +++ b/src/agents/dynamic-agent-prompt-builder.ts @@ -1,8 +1,8 @@ -import type { AgentPromptMetadata, BuiltinAgentName } from "./types" +import type { AgentPromptMetadata } from "./types" import { truncateDescription } from "../shared/truncate-description" export interface AvailableAgent { - name: BuiltinAgentName + name: string description: string metadata: AgentPromptMetadata } diff --git a/src/agents/prometheus/high-accuracy-mode.ts b/src/agents/prometheus/high-accuracy-mode.ts index d6ecc821..5eca99a8 100644 --- a/src/agents/prometheus/high-accuracy-mode.ts +++ b/src/agents/prometheus/high-accuracy-mode.ts @@ -17,6 +17,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION while (true) { const result = task( subagent_type="momus", + load_skills=[], prompt=".sisyphus/plans/{name}.md", run_in_background=false ) diff --git a/src/agents/prometheus/interview-mode.ts b/src/agents/prometheus/interview-mode.ts index 8692fd2f..5d445f0c 100644 --- a/src/agents/prometheus/interview-mode.ts +++ b/src/agents/prometheus/interview-mode.ts @@ -66,8 +66,8 @@ Or should I just note down this single fix?" **Research First:** \`\`\`typescript // Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find) -task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true) -task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true) \`\`\` **Interview Focus:** @@ -91,9 +91,9 @@ task(subagent_type="explore", prompt="I'm about to modify [affected code] and ne \`\`\`typescript // Launch BEFORE asking user questions // Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST -task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true) -task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true) \`\`\` **Interview Focus** (AFTER research): @@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js Run this check: \`\`\`typescript -task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true) \`\`\` #### Step 2: Ask the Test Question (MANDATORY) @@ -230,13 +230,13 @@ Add to draft immediately: **Research First:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true) \`\`\` **Oracle Consultation** (recommend when stakes are high): \`\`\`typescript -task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false) +task(subagent_type="oracle", load_skills=[], prompt="Architecture consultation needed: [context]...", run_in_background=false) \`\`\` **Interview Focus:** @@ -253,9 +253,9 @@ task(subagent_type="oracle", prompt="Architecture consultation needed: [context] **Parallel Investigation:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true) \`\`\` **Interview Focus:** @@ -281,17 +281,17 @@ task(subagent_type="librarian", prompt="I'm looking for battle-tested implementa **For Understanding Codebase:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true) \`\`\` **For External Knowledge:** \`\`\`typescript -task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true) \`\`\` **For Implementation Examples:** \`\`\`typescript -task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true) \`\`\` ## Interview Mode Anti-Patterns diff --git a/src/agents/prometheus/plan-generation.ts b/src/agents/prometheus/plan-generation.ts index 3443d688..f5c1270e 100644 --- a/src/agents/prometheus/plan-generation.ts +++ b/src/agents/prometheus/plan-generation.ts @@ -61,6 +61,7 @@ todoWrite([ \`\`\`typescript task( subagent_type="metis", + load_skills=[], prompt=\`Review this planning session before I generate the work plan: **User's Goal**: {summarize what user wants} diff --git a/src/agents/utils.test.ts b/src/agents/utils.test.ts index 2a0908fa..ad3a7ba1 100644 --- a/src/agents/utils.test.ts +++ b/src/agents/utils.test.ts @@ -251,6 +251,222 @@ describe("createBuiltinAgents with model overrides", () => { expect(agents.sisyphus.prompt).toContain("frontend-ui-ux") expect(agents.sisyphus.prompt).toContain("git-master") }) + + test("includes custom agents in orchestrator prompts when provided via config", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set([ + "anthropic/claude-opus-4-6", + "kimi-for-coding/k2p5", + "opencode/kimi-k2.5-free", + "zai-coding-plan/glm-4.7", + "opencode/glm-4.7-free", + "openai/gpt-5.2", + ]) + ) + + const customAgentSummaries = [ + { + name: "researcher", + description: "Research agent for deep analysis", + hidden: false, + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).toContain("researcher") + expect(agents.hephaestus.prompt).toContain("researcher") + expect(agents.atlas.prompt).toContain("researcher") + } finally { + fetchSpy.mockRestore() + } + }) + + test("excludes hidden custom agents from orchestrator prompts", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const customAgentSummaries = [ + { + name: "hidden-agent", + description: "Should never show", + hidden: true, + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).not.toContain("hidden-agent") + expect(agents.hephaestus.prompt).not.toContain("hidden-agent") + expect(agents.atlas.prompt).not.toContain("hidden-agent") + } finally { + fetchSpy.mockRestore() + } + }) + + test("excludes disabled custom agents from orchestrator prompts", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const customAgentSummaries = [ + { + name: "disabled-agent", + description: "Should never show", + disabled: true, + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).not.toContain("disabled-agent") + expect(agents.hephaestus.prompt).not.toContain("disabled-agent") + expect(agents.atlas.prompt).not.toContain("disabled-agent") + } finally { + fetchSpy.mockRestore() + } + }) + + test("excludes custom agents when disabledAgents contains their name (case-insensitive)", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const disabledAgents = ["ReSeArChEr"] + const customAgentSummaries = [ + { + name: "researcher", + description: "Should never show", + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + disabledAgents, + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).not.toContain("researcher") + expect(agents.hephaestus.prompt).not.toContain("researcher") + expect(agents.atlas.prompt).not.toContain("researcher") + } finally { + fetchSpy.mockRestore() + } + }) + + test("deduplicates custom agents case-insensitively", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const customAgentSummaries = [ + { name: "Researcher", description: "First" }, + { name: "researcher", description: "Second" }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + const matches = agents.sisyphus.prompt.match(/Custom agent: researcher/gi) ?? [] + expect(matches.length).toBe(1) + } finally { + fetchSpy.mockRestore() + } + }) + + test("sanitizes custom agent strings for markdown tables", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const customAgentSummaries = [ + { + name: "table-agent", + description: "Line1\nAlpha | Beta", + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).toContain("Line1 Alpha \\| Beta") + } finally { + fetchSpy.mockRestore() + } + }) }) describe("createBuiltinAgents without systemDefaultModel", () => { diff --git a/src/config/schema/hooks.ts b/src/config/schema/hooks.ts index bb5f6bdb..f4b5a987 100644 --- a/src/config/schema/hooks.ts +++ b/src/config/schema/hooks.ts @@ -2,6 +2,7 @@ import { z } from "zod" export const HookNameSchema = z.enum([ "todo-continuation-enforcer", + "task-continuation-enforcer", "context-window-monitor", "session-recovery", "session-notification", diff --git a/src/features/background-agent/manager.test.ts b/src/features/background-agent/manager.test.ts index d81698cb..c4db9056 100644 --- a/src/features/background-agent/manager.test.ts +++ b/src/features/background-agent/manager.test.ts @@ -1123,6 +1123,99 @@ describe("BackgroundManager.tryCompleteTask", () => { expect(task.status).toBe("completed") expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined() }) + + test("should avoid overlapping promptAsync calls when tasks complete concurrently", async () => { + // given + type PromptAsyncBody = Record & { noReply?: boolean } + + let resolveMessages: ((value: { data: unknown[] }) => void) | undefined + const messagesBarrier = new Promise<{ data: unknown[] }>((resolve) => { + resolveMessages = resolve + }) + + const promptBodies: PromptAsyncBody[] = [] + let promptInFlight = false + let rejectedCount = 0 + let promptCallCount = 0 + + let releaseFirstPrompt: (() => void) | undefined + let resolveFirstStarted: (() => void) | undefined + const firstStarted = new Promise((resolve) => { + resolveFirstStarted = resolve + }) + + const client = { + session: { + prompt: async () => ({}), + abort: async () => ({}), + messages: async () => messagesBarrier, + promptAsync: async (args: { path: { id: string }; body: PromptAsyncBody }) => { + promptBodies.push(args.body) + + if (!promptInFlight) { + promptCallCount += 1 + if (promptCallCount === 1) { + promptInFlight = true + resolveFirstStarted?.() + return await new Promise((resolve) => { + releaseFirstPrompt = () => { + promptInFlight = false + resolve({}) + } + }) + } + + return {} + } + + rejectedCount += 1 + throw new Error("BUSY") + }, + }, + } + + manager.shutdown() + manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput) + + const parentSessionID = "parent-session" + const taskA = createMockTask({ + id: "task-a", + sessionID: "session-a", + parentSessionID, + }) + const taskB = createMockTask({ + id: "task-b", + sessionID: "session-b", + parentSessionID, + }) + + getTaskMap(manager).set(taskA.id, taskA) + getTaskMap(manager).set(taskB.id, taskB) + getPendingByParent(manager).set(parentSessionID, new Set([taskA.id, taskB.id])) + + // when + const completionA = tryCompleteTaskForTest(manager, taskA) + const completionB = tryCompleteTaskForTest(manager, taskB) + resolveMessages?.({ data: [] }) + + await firstStarted + + // Give the second completion a chance to attempt promptAsync while the first is in-flight. + // In the buggy implementation, this triggers an overlap and increments rejectedCount. + for (let i = 0; i < 20; i++) { + await Promise.resolve() + if (rejectedCount > 0) break + if (promptBodies.length >= 2) break + } + + releaseFirstPrompt?.() + await Promise.all([completionA, completionB]) + + // then + expect(rejectedCount).toBe(0) + expect(promptBodies.length).toBe(2) + expect(promptBodies.some((b) => b.noReply === false)).toBe(true) + }) }) describe("BackgroundManager.trackTask", () => { diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts index 808221af..320a93f9 100644 --- a/src/features/background-agent/manager.ts +++ b/src/features/background-agent/manager.ts @@ -41,6 +41,7 @@ export class BackgroundManager { private processingKeys = new Set() private completionTimers = new Map>() private idleDeferralTimers = new Map>() + private notificationQueueByParent = new Map>() private client: PluginInput["client"] private directory: string @@ -72,7 +73,7 @@ export class BackgroundManager { } async resume(input: ResumeInput): Promise { - return resumeBackgroundTask({ input, findBySession: (id) => this.findBySession(id), client: this.client, concurrencyManager: this.concurrencyManager, pendingByParent: this.pendingByParent, startPolling: () => this.startPolling(), markForNotification: (task) => this.markForNotification(task), cleanupPendingByParent: (task) => this.cleanupPendingByParent(task), notifyParentSession: (task) => this.notifyParentSession(task) }) + return resumeBackgroundTask({ input, findBySession: (id) => this.findBySession(id), client: this.client, concurrencyManager: this.concurrencyManager, pendingByParent: this.pendingByParent, startPolling: () => this.startPolling(), markForNotification: (task) => this.markForNotification(task), cleanupPendingByParent: (task) => this.cleanupPendingByParent(task), notifyParentSession: (task) => this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)) }) } getTask(id: string): BackgroundTask | undefined { return this.tasks.get(id) } @@ -94,7 +95,7 @@ export class BackgroundManager { } async cancelTask(taskId: string, options?: { source?: string; reason?: string; abortSession?: boolean; skipNotification?: boolean }): Promise { - return cancelBackgroundTask({ taskId, options, tasks: this.tasks, queuesByKey: this.queuesByKey, completionTimers: this.completionTimers, idleDeferralTimers: this.idleDeferralTimers, concurrencyManager: this.concurrencyManager, client: this.client, cleanupPendingByParent: (task) => this.cleanupPendingByParent(task), markForNotification: (task) => this.markForNotification(task), notifyParentSession: (task) => this.notifyParentSession(task) }) + return cancelBackgroundTask({ taskId, options, tasks: this.tasks, queuesByKey: this.queuesByKey, completionTimers: this.completionTimers, idleDeferralTimers: this.idleDeferralTimers, concurrencyManager: this.concurrencyManager, client: this.client, cleanupPendingByParent: (task) => this.cleanupPendingByParent(task), markForNotification: (task) => this.markForNotification(task), notifyParentSession: (task) => this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)) }) } handleEvent(event: { type: string; properties?: Record }): void { @@ -102,13 +103,14 @@ export class BackgroundManager { } shutdown(): void { + this.notificationQueueByParent.clear() shutdownBackgroundManager({ shutdownTriggered: this.shutdownTriggered, stopPolling: () => this.stopPolling(), tasks: this.tasks, client: this.client, onShutdown: this.onShutdown, concurrencyManager: this.concurrencyManager, completionTimers: this.completionTimers, idleDeferralTimers: this.idleDeferralTimers, notifications: this.notifications, pendingByParent: this.pendingByParent, queuesByKey: this.queuesByKey, processingKeys: this.processingKeys, unregisterProcessCleanup: () => this.unregisterProcessCleanup() }) } private getConcurrencyKeyFromInput(input: LaunchInput): string { return input.model ? `${input.model.providerID}/${input.model.modelID}` : input.agent } private async processKey(key: string): Promise { await processConcurrencyKeyQueue({ key, queuesByKey: this.queuesByKey, processingKeys: this.processingKeys, concurrencyManager: this.concurrencyManager, startTask: (item) => this.startTask(item) }) } private async startTask(item: QueueItem): Promise { - await startQueuedTask({ item, client: this.client, defaultDirectory: this.directory, tmuxEnabled: this.tmuxEnabled, onSubagentSessionCreated: this.onSubagentSessionCreated, startPolling: () => this.startPolling(), getConcurrencyKeyFromInput: (i) => this.getConcurrencyKeyFromInput(i), concurrencyManager: this.concurrencyManager, findBySession: (id) => this.findBySession(id), markForNotification: (task) => this.markForNotification(task), cleanupPendingByParent: (task) => this.cleanupPendingByParent(task), notifyParentSession: (task) => this.notifyParentSession(task) }) + await startQueuedTask({ item, client: this.client, defaultDirectory: this.directory, tmuxEnabled: this.tmuxEnabled, onSubagentSessionCreated: this.onSubagentSessionCreated, startPolling: () => this.startPolling(), getConcurrencyKeyFromInput: (i) => this.getConcurrencyKeyFromInput(i), concurrencyManager: this.concurrencyManager, findBySession: (id) => this.findBySession(id), markForNotification: (task) => this.markForNotification(task), cleanupPendingByParent: (task) => this.cleanupPendingByParent(task), notifyParentSession: (task) => this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)) }) } private startPolling(): void { @@ -126,17 +128,38 @@ export class BackgroundManager { pruneStaleState({ tasks: this.tasks, notifications: this.notifications, concurrencyManager: this.concurrencyManager, cleanupPendingByParent: (task) => this.cleanupPendingByParent(task), clearNotificationsForTask: (id) => this.clearNotificationsForTask(id) }) } private async checkAndInterruptStaleTasks(): Promise { - await checkAndInterruptStaleTasks({ tasks: this.tasks.values(), client: this.client, config: this.config, concurrencyManager: this.concurrencyManager, notifyParentSession: (task) => this.notifyParentSession(task) }) + await checkAndInterruptStaleTasks({ tasks: this.tasks.values(), client: this.client, config: this.config, concurrencyManager: this.concurrencyManager, notifyParentSession: (task) => this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)) }) } private hasRunningTasks(): boolean { return hasRunningTasks(this.tasks.values()) } private async tryCompleteTask(task: BackgroundTask, source: string): Promise { - return tryCompleteBackgroundTask({ task, source, concurrencyManager: this.concurrencyManager, idleDeferralTimers: this.idleDeferralTimers, client: this.client, markForNotification: (t) => this.markForNotification(t), cleanupPendingByParent: (t) => this.cleanupPendingByParent(t), notifyParentSession: (t) => this.notifyParentSession(t) }) + return tryCompleteBackgroundTask({ task, source, concurrencyManager: this.concurrencyManager, idleDeferralTimers: this.idleDeferralTimers, client: this.client, markForNotification: (t) => this.markForNotification(t), cleanupPendingByParent: (t) => this.cleanupPendingByParent(t), notifyParentSession: (t) => this.enqueueNotificationForParent(t.parentSessionID, () => this.notifyParentSession(t)) }) } private async notifyParentSession(task: BackgroundTask): Promise { await notifyParentSessionInternal({ task, tasks: this.tasks, pendingByParent: this.pendingByParent, completionTimers: this.completionTimers, clearNotificationsForTask: (id) => this.clearNotificationsForTask(id), client: this.client }) } + private enqueueNotificationForParent(parentSessionID: string | undefined, operation: () => Promise): Promise { + if (!parentSessionID) return operation() + + const previous = this.notificationQueueByParent.get(parentSessionID) ?? Promise.resolve() + const current = previous + .catch(() => {}) + .then(operation) + + this.notificationQueueByParent.set(parentSessionID, current) + + void current + .finally(() => { + if (this.notificationQueueByParent.get(parentSessionID) === current) { + this.notificationQueueByParent.delete(parentSessionID) + } + }) + .catch(() => {}) + + return current + } + private async validateSessionHasOutput(sessionID: string): Promise { return validateSessionHasOutput(this.client, sessionID) } private async checkSessionTodos(sessionID: string): Promise { return checkSessionTodos(this.client, sessionID) } private clearNotificationsForTask(taskId: string): void { clearNotificationsForTask(this.notifications, taskId) } diff --git a/src/features/builtin-commands/commands.test.ts b/src/features/builtin-commands/commands.test.ts new file mode 100644 index 00000000..c6927bc7 --- /dev/null +++ b/src/features/builtin-commands/commands.test.ts @@ -0,0 +1,138 @@ +import { describe, test, expect } from "bun:test" +import { loadBuiltinCommands } from "./commands" +import { HANDOFF_TEMPLATE } from "./templates/handoff" +import type { BuiltinCommandName } from "./types" + +describe("loadBuiltinCommands", () => { + test("should include handoff command in loaded commands", () => { + //#given + const disabledCommands: BuiltinCommandName[] = [] + + //#when + const commands = loadBuiltinCommands(disabledCommands) + + //#then + expect(commands.handoff).toBeDefined() + expect(commands.handoff.name).toBe("handoff") + }) + + test("should exclude handoff when disabled", () => { + //#given + const disabledCommands: BuiltinCommandName[] = ["handoff"] + + //#when + const commands = loadBuiltinCommands(disabledCommands) + + //#then + expect(commands.handoff).toBeUndefined() + }) + + test("should include handoff template content in command template", () => { + //#given - no disabled commands + + //#when + const commands = loadBuiltinCommands() + + //#then + expect(commands.handoff.template).toContain(HANDOFF_TEMPLATE) + }) + + test("should include session context variables in handoff template", () => { + //#given - no disabled commands + + //#when + const commands = loadBuiltinCommands() + + //#then + expect(commands.handoff.template).toContain("$SESSION_ID") + expect(commands.handoff.template).toContain("$TIMESTAMP") + expect(commands.handoff.template).toContain("$ARGUMENTS") + }) + + test("should have correct description for handoff", () => { + //#given - no disabled commands + + //#when + const commands = loadBuiltinCommands() + + //#then + expect(commands.handoff.description).toContain("context summary") + }) +}) + +describe("HANDOFF_TEMPLATE", () => { + test("should include session reading instruction", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("session_read") + }) + + test("should include compaction-style sections in output format", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("USER REQUESTS (AS-IS)") + expect(HANDOFF_TEMPLATE).toContain("EXPLICIT CONSTRAINTS") + }) + + test("should include programmatic context gathering instructions", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("todoread") + expect(HANDOFF_TEMPLATE).toContain("git diff") + expect(HANDOFF_TEMPLATE).toContain("git status") + }) + + test("should include context extraction format", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("WORK COMPLETED") + expect(HANDOFF_TEMPLATE).toContain("CURRENT STATE") + expect(HANDOFF_TEMPLATE).toContain("PENDING TASKS") + expect(HANDOFF_TEMPLATE).toContain("KEY FILES") + expect(HANDOFF_TEMPLATE).toContain("IMPORTANT DECISIONS") + expect(HANDOFF_TEMPLATE).toContain("CONTEXT FOR CONTINUATION") + expect(HANDOFF_TEMPLATE).toContain("GOAL") + }) + + test("should enforce first person perspective", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("first person perspective") + }) + + test("should limit key files to 10", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("Maximum 10 files") + }) + + test("should instruct plain text format without markdown", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("Plain text with bullets") + expect(HANDOFF_TEMPLATE).toContain("No markdown headers") + }) + + test("should include user instructions for new session", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("new session") + expect(HANDOFF_TEMPLATE).toContain("opencode") + }) + + test("should not contain emojis", () => { + //#given - the template string + + //#when / #then + const emojiRegex = /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2702}-\u{27B0}\u{24C2}-\u{1F251}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/u + expect(emojiRegex.test(HANDOFF_TEMPLATE)).toBe(false) + }) +}) diff --git a/src/features/builtin-commands/commands.ts b/src/features/builtin-commands/commands.ts index 998ce253..aee5dc28 100644 --- a/src/features/builtin-commands/commands.ts +++ b/src/features/builtin-commands/commands.ts @@ -5,6 +5,7 @@ import { RALPH_LOOP_TEMPLATE, CANCEL_RALPH_TEMPLATE } from "./templates/ralph-lo import { STOP_CONTINUATION_TEMPLATE } from "./templates/stop-continuation" import { REFACTOR_TEMPLATE } from "./templates/refactor" import { START_WORK_TEMPLATE } from "./templates/start-work" +import { HANDOFF_TEMPLATE } from "./templates/handoff" const BUILTIN_COMMAND_DEFINITIONS: Record> = { "init-deep": { @@ -77,6 +78,22 @@ $ARGUMENTS ${STOP_CONTINUATION_TEMPLATE} `, }, + handoff: { + description: "(builtin) Create a detailed context summary for continuing work in a new session", + template: ` +${HANDOFF_TEMPLATE} + + + +Session ID: $SESSION_ID +Timestamp: $TIMESTAMP + + + +$ARGUMENTS +`, + argumentHint: "[goal]", + }, } export function loadBuiltinCommands( diff --git a/src/features/builtin-commands/templates/handoff.ts b/src/features/builtin-commands/templates/handoff.ts new file mode 100644 index 00000000..d8010994 --- /dev/null +++ b/src/features/builtin-commands/templates/handoff.ts @@ -0,0 +1,177 @@ +export const HANDOFF_TEMPLATE = `# Handoff Command + +## Purpose + +Use /handoff when: +- The current session context is getting too long and quality is degrading +- You want to start fresh while preserving essential context from this session +- The context window is approaching capacity + +This creates a detailed context summary that can be used to continue work in a new session. + +--- + +# PHASE 0: VALIDATE REQUEST + +Before proceeding, confirm: +- [ ] There is meaningful work or context in this session to preserve +- [ ] The user wants to create a handoff summary (not just asking about it) + +If the session is nearly empty or has no meaningful context, inform the user there is nothing substantial to hand off. + +--- + +# PHASE 1: GATHER PROGRAMMATIC CONTEXT + +Execute these tools to gather concrete data: + +1. session_read({ session_id: "$SESSION_ID" }) — full session history +2. todoread() — current task progress +3. Bash({ command: "git diff --stat HEAD~10..HEAD" }) — recent file changes +4. Bash({ command: "git status --porcelain" }) — uncommitted changes + +Suggested execution order: + +\`\`\` +session_read({ session_id: "$SESSION_ID" }) +todoread() +Bash({ command: "git diff --stat HEAD~10..HEAD" }) +Bash({ command: "git status --porcelain" }) +\`\`\` + +Analyze the gathered outputs to understand: +- What the user asked for (exact wording) +- What work was completed +- What tasks remain incomplete (include todo state) +- What decisions were made +- What files were modified or discussed (include git diff/stat + status) +- What patterns, constraints, or preferences were established + +--- + +# PHASE 2: EXTRACT CONTEXT + +Write the context summary from first person perspective ("I did...", "I told you..."). + +Focus on: +- Capabilities and behavior, not file-by-file implementation details +- What matters for continuing the work +- Avoiding excessive implementation details (variable names, storage keys, constants) unless critical +- USER REQUESTS (AS-IS) must be verbatim (do not paraphrase) +- EXPLICIT CONSTRAINTS must be verbatim only (do not invent) + +Questions to consider when extracting: +- What did I just do or implement? +- What instructions did I already give which are still relevant (e.g. follow patterns in the codebase)? +- What files did I tell you are important or that I am working on? +- Did I provide a plan or spec that should be included? +- What did I already tell you that is important (libraries, patterns, constraints, preferences)? +- What important technical details did I discover (APIs, methods, patterns)? +- What caveats, limitations, or open questions did I find? + +--- + +# PHASE 3: FORMAT OUTPUT + +Generate a handoff summary using this exact format: + +\`\`\` +HANDOFF CONTEXT +=============== + +USER REQUESTS (AS-IS) +--------------------- +- [Exact verbatim user requests - NOT paraphrased] + +GOAL +---- +[One sentence describing what should be done next] + +WORK COMPLETED +-------------- +- [First person bullet points of what was done] +- [Include specific file paths when relevant] +- [Note key implementation decisions] + +CURRENT STATE +------------- +- [Current state of the codebase or task] +- [Build/test status if applicable] +- [Any environment or configuration state] + +PENDING TASKS +------------- +- [Tasks that were planned but not completed] +- [Next logical steps to take] +- [Any blockers or issues encountered] +- [Include current todo state from todoread()] + +KEY FILES +--------- +- [path/to/file1] - [brief role description] +- [path/to/file2] - [brief role description] +(Maximum 10 files, prioritized by importance) +- (Include files from git diff/stat and git status) + +IMPORTANT DECISIONS +------------------- +- [Technical decisions that were made and why] +- [Trade-offs that were considered] +- [Patterns or conventions established] + +EXPLICIT CONSTRAINTS +-------------------- +- [Verbatim constraints only - from user or existing AGENTS.md] +- If none, write: None + +CONTEXT FOR CONTINUATION +------------------------ +- [What the next session needs to know to continue] +- [Warnings or gotchas to be aware of] +- [References to documentation if relevant] +\`\`\` + +Rules for the summary: +- Plain text with bullets +- No markdown headers with # (use the format above with dashes) +- No bold, italic, or code fences within content +- Use workspace-relative paths for files +- Keep it focused - only include what matters for continuation +- Pick an appropriate length based on complexity +- USER REQUESTS (AS-IS) and EXPLICIT CONSTRAINTS must be verbatim only + +--- + +# PHASE 4: PROVIDE INSTRUCTIONS + +After generating the summary, instruct the user: + +\`\`\` +--- + +TO CONTINUE IN A NEW SESSION: + +1. Press 'n' in OpenCode TUI to open a new session, or run 'opencode' in a new terminal +2. Paste the HANDOFF CONTEXT above as your first message +3. Add your request: "Continue from the handoff context above. [Your next task]" + +The new session will have all context needed to continue seamlessly. +\`\`\` + +--- + +# IMPORTANT CONSTRAINTS + +- DO NOT attempt to programmatically create new sessions (no API available to agents) +- DO provide a self-contained summary that works without access to this session +- DO include workspace-relative file paths +- DO NOT include sensitive information (API keys, credentials, secrets) +- DO NOT exceed 10 files in the KEY FILES section +- DO keep the GOAL section to a single sentence or short paragraph + +--- + +# EXECUTE NOW + +Begin by gathering programmatic context, then synthesize the handoff summary. +` diff --git a/src/features/builtin-commands/types.ts b/src/features/builtin-commands/types.ts index 1b148774..0c2624f1 100644 --- a/src/features/builtin-commands/types.ts +++ b/src/features/builtin-commands/types.ts @@ -1,6 +1,6 @@ import type { CommandDefinition } from "../claude-code-command-loader" -export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation" +export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation" | "handoff" export interface BuiltinCommandConfig { disabled_commands?: BuiltinCommandName[] diff --git a/src/features/claude-code-mcp-loader/loader.test.ts b/src/features/claude-code-mcp-loader/loader.test.ts index 33c5a0ae..848c7aed 100644 --- a/src/features/claude-code-mcp-loader/loader.test.ts +++ b/src/features/claude-code-mcp-loader/loader.test.ts @@ -20,6 +20,7 @@ describe("getSystemMcpServerNames", () => { }) afterEach(() => { + mock.restore() rmSync(TEST_DIR, { recursive: true, force: true }) }) diff --git a/src/hooks/atlas/git-diff-stats.ts b/src/hooks/atlas/git-diff-stats.ts deleted file mode 100644 index d154c50a..00000000 --- a/src/hooks/atlas/git-diff-stats.ts +++ /dev/null @@ -1,133 +0,0 @@ -import { execSync } from "node:child_process" - -interface GitFileStat { - path: string - added: number - removed: number - status: "modified" | "added" | "deleted" -} - -export function getGitDiffStats(directory: string): GitFileStat[] { - try { - const statusOutput = execSync("git status --porcelain", { - cwd: directory, - encoding: "utf-8", - timeout: 5000, - stdio: ["pipe", "pipe", "pipe"], - }).trim() - - if (!statusOutput) return [] - - const statusMap = new Map() - const untrackedFiles: string[] = [] - for (const line of statusOutput.split("\n")) { - if (!line) continue - const status = line.substring(0, 2).trim() - const filePath = line.substring(3) - if (status === "??") { - statusMap.set(filePath, "added") - untrackedFiles.push(filePath) - } else if (status === "A") { - statusMap.set(filePath, "added") - } else if (status === "D") { - statusMap.set(filePath, "deleted") - } else { - statusMap.set(filePath, "modified") - } - } - - const output = execSync("git diff --numstat HEAD", { - cwd: directory, - encoding: "utf-8", - timeout: 5000, - stdio: ["pipe", "pipe", "pipe"], - }).trim() - - const stats: GitFileStat[] = [] - const trackedPaths = new Set() - - if (output) { - for (const line of output.split("\n")) { - const parts = line.split("\t") - if (parts.length < 3) continue - - const [addedStr, removedStr, path] = parts - const added = addedStr === "-" ? 0 : parseInt(addedStr, 10) - const removed = removedStr === "-" ? 0 : parseInt(removedStr, 10) - trackedPaths.add(path) - - stats.push({ - path, - added, - removed, - status: statusMap.get(path) ?? "modified", - }) - } - } - - for (const filePath of untrackedFiles) { - if (trackedPaths.has(filePath)) continue - try { - const content = execSync(`wc -l < "${filePath}"`, { - cwd: directory, - encoding: "utf-8", - timeout: 3000, - stdio: ["pipe", "pipe", "pipe"], - }).trim() - const lineCount = parseInt(content, 10) || 0 - stats.push({ path: filePath, added: lineCount, removed: 0, status: "added" }) - } catch { - stats.push({ path: filePath, added: 0, removed: 0, status: "added" }) - } - } - - return stats - } catch { - return [] - } -} - -export function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string { - if (stats.length === 0) return "[FILE CHANGES SUMMARY]\nNo file changes detected.\n" - - const modified = stats.filter((s) => s.status === "modified") - const added = stats.filter((s) => s.status === "added") - const deleted = stats.filter((s) => s.status === "deleted") - - const lines: string[] = ["[FILE CHANGES SUMMARY]"] - - if (modified.length > 0) { - lines.push("Modified files:") - for (const f of modified) { - lines.push(` ${f.path} (+${f.added}, -${f.removed})`) - } - lines.push("") - } - - if (added.length > 0) { - lines.push("Created files:") - for (const f of added) { - lines.push(` ${f.path} (+${f.added})`) - } - lines.push("") - } - - if (deleted.length > 0) { - lines.push("Deleted files:") - for (const f of deleted) { - lines.push(` ${f.path} (-${f.removed})`) - } - lines.push("") - } - - if (notepadPath) { - const notepadStat = stats.find((s) => s.path.includes("notepad") || s.path.includes(".sisyphus")) - if (notepadStat) { - lines.push("[NOTEPAD UPDATED]") - lines.push(` ${notepadStat.path} (+${notepadStat.added})`) - lines.push("") - } - } - - return lines.join("\n") -} diff --git a/src/hooks/atlas/tool-execute-after.ts b/src/hooks/atlas/tool-execute-after.ts index ef21ac33..a922e422 100644 --- a/src/hooks/atlas/tool-execute-after.ts +++ b/src/hooks/atlas/tool-execute-after.ts @@ -2,9 +2,9 @@ import type { PluginInput } from "@opencode-ai/plugin" import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state" import { log } from "../../shared/logger" import { isCallerOrchestrator } from "../../shared/session-utils" +import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree" import { HOOK_NAME } from "./hook-name" import { DIRECT_WORK_REMINDER } from "./system-reminder-templates" -import { formatFileChanges, getGitDiffStats } from "./git-diff-stats" import { isSisyphusPath } from "./sisyphus-path" import { extractSessionIdFromOutput } from "./subagent-session-id" import { buildOrchestratorReminder, buildStandaloneVerificationReminder } from "./verification-reminders" @@ -57,7 +57,7 @@ export function createToolExecuteAfterHandler(input: { } if (toolOutput.output && typeof toolOutput.output === "string") { - const gitStats = getGitDiffStats(ctx.directory) + const gitStats = collectGitDiffStats(ctx.directory) const fileChanges = formatFileChanges(gitStats) const subagentSessionId = extractSessionIdFromOutput(toolOutput.output) diff --git a/src/hooks/atlas/types.ts b/src/hooks/atlas/types.ts index e08c1f47..e1919cd2 100644 --- a/src/hooks/atlas/types.ts +++ b/src/hooks/atlas/types.ts @@ -1,3 +1,4 @@ +import type { AgentOverrides } from "../../config" import type { BackgroundManager } from "../../features/background-agent" export type ModelInfo = { providerID: string; modelID: string } @@ -6,6 +7,7 @@ export interface AtlasHookOptions { directory: string backgroundManager?: BackgroundManager isContinuationStopped?: (sessionID: string) => boolean + agentOverrides?: AgentOverrides } export interface ToolExecuteAfterInput { diff --git a/src/hooks/index.ts b/src/hooks/index.ts index 95447527..becdc216 100644 --- a/src/hooks/index.ts +++ b/src/hooks/index.ts @@ -1,4 +1,5 @@ export { createTodoContinuationEnforcer, type TodoContinuationEnforcer } from "./todo-continuation-enforcer"; +export { createTaskContinuationEnforcer, type TaskContinuationEnforcer } from "./task-continuation-enforcer"; export { createContextWindowMonitorHook } from "./context-window-monitor"; export { createSessionNotification } from "./session-notification"; export { sendSessionNotification, playSessionNotificationSound, detectPlatform, getDefaultSoundPath } from "./session-notification-sender"; diff --git a/src/hooks/keyword-detector/ultrawork/default.ts b/src/hooks/keyword-detector/ultrawork/default.ts index fb7fce31..93ddc648 100644 --- a/src/hooks/keyword-detector/ultrawork/default.ts +++ b/src/hooks/keyword-detector/ultrawork/default.ts @@ -104,7 +104,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST. | Architecture decision needed | MUST call plan agent | \`\`\` -task(subagent_type="plan", prompt="") +task(subagent_type="plan", load_skills=[], prompt="") \`\`\` **WHY PLAN AGENT IS MANDATORY:** @@ -119,9 +119,9 @@ task(subagent_type="plan", prompt="") | Scenario | Action | |----------|--------| -| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", prompt="")\` | -| Need to refine the plan | \`task(session_id="{returned_session_id}", prompt="Please adjust: ")\` | -| Plan needs more detail | \`task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` | +| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", load_skills=[], prompt="")\` | +| Need to refine the plan | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Please adjust: ")\` | +| Plan needs more detail | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Add more detail to Task N")\` | **WHY SESSION_ID IS CRITICAL:** - Plan agent retains FULL conversation context @@ -131,10 +131,10 @@ task(subagent_type="plan", prompt="") \`\`\` // WRONG: Starting fresh loses all context -task(subagent_type="plan", prompt="Here's more info...") +task(subagent_type="plan", load_skills=[], prompt="Here's more info...") // CORRECT: Resume preserves everything -task(session_id="ses_abc123", prompt="Here's my answer to your question: ...") +task(session_id="ses_abc123", load_skills=[], prompt="Here's my answer to your question: ...") \`\`\` **FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.** @@ -147,10 +147,10 @@ task(session_id="ses_abc123", prompt="Here's my answer to your question: ...") | Task Type | Action | Why | |-----------|--------|-----| -| Codebase exploration | task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient | -| Documentation lookup | task(subagent_type="librarian", run_in_background=true) | Specialized knowledge | -| Planning | task(subagent_type="plan") | Parallel task graph + structured TODO list | -| Hard problem (conventional) | task(subagent_type="oracle") | Architecture, debugging, complex logic | +| Codebase exploration | task(subagent_type="explore", load_skills=[], run_in_background=true) | Parallel, context-efficient | +| Documentation lookup | task(subagent_type="librarian", load_skills=[], run_in_background=true) | Specialized knowledge | +| Planning | task(subagent_type="plan", load_skills=[]) | Parallel task graph + structured TODO list | +| Hard problem (conventional) | task(subagent_type="oracle", load_skills=[]) | Architecture, debugging, complex logic | | Hard problem (non-conventional) | task(category="artistry", load_skills=[...]) | Different approach needed | | Implementation | task(category="...", load_skills=[...]) | Domain-optimized models | diff --git a/src/hooks/keyword-detector/ultrawork/gpt5.2.ts b/src/hooks/keyword-detector/ultrawork/gpt5.2.ts index 9309f429..a9258e0d 100644 --- a/src/hooks/keyword-detector/ultrawork/gpt5.2.ts +++ b/src/hooks/keyword-detector/ultrawork/gpt5.2.ts @@ -73,10 +73,10 @@ Use these when they provide clear value based on the decision framework above: | Resource | When to Use | How to Use | |----------|-------------|------------| -| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", run_in_background=true, ...)\` | -| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", run_in_background=true, ...)\` | -| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", ...)\` | -| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", ...)\` | +| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", load_skills=[], run_in_background=true, ...)\` | +| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)\` | +| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", load_skills=[], ...)\` | +| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", load_skills=[], ...)\` | | task category | Specialized work matching a category | \`task(category="...", load_skills=[...])\` | diff --git a/src/hooks/keyword-detector/ultrawork/planner.ts b/src/hooks/keyword-detector/ultrawork/planner.ts index 426926f4..e152221f 100644 --- a/src/hooks/keyword-detector/ultrawork/planner.ts +++ b/src/hooks/keyword-detector/ultrawork/planner.ts @@ -38,9 +38,9 @@ You ARE the planner. Your job: create bulletproof work plans. ### Research Protocol 1. **Fire parallel background agents** for comprehensive context: \`\`\` - task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true) - task(agent="explore", prompt="Find test infrastructure and conventions", background=true) - task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true) + task(subagent_type="explore", load_skills=[], prompt="Find existing patterns for [topic] in codebase", run_in_background=true) + task(subagent_type="explore", load_skills=[], prompt="Find test infrastructure and conventions", run_in_background=true) + task(subagent_type="librarian", load_skills=[], prompt="Find official docs and best practices for [technology]", run_in_background=true) \`\`\` 2. **Wait for results** before planning - rushed plans fail 3. **Synthesize findings** into informed requirements diff --git a/src/hooks/ralph-loop/completion-promise-detector.ts b/src/hooks/ralph-loop/completion-promise-detector.ts index 8fa1f914..5f60d208 100644 --- a/src/hooks/ralph-loop/completion-promise-detector.ts +++ b/src/hooks/ralph-loop/completion-promise-detector.ts @@ -2,6 +2,7 @@ import type { PluginInput } from "@opencode-ai/plugin" import { existsSync, readFileSync } from "node:fs" import { log } from "../../shared/logger" import { HOOK_NAME } from "./constants" +import { withTimeout } from "./with-timeout" interface OpenCodeSessionMessage { info?: { role?: string } @@ -54,37 +55,43 @@ export async function detectCompletionInSessionMessages( }, ): Promise { try { - const response = await Promise.race([ + const response = await withTimeout( ctx.client.session.messages({ path: { id: options.sessionID }, query: { directory: options.directory }, }), - new Promise((_, reject) => - setTimeout(() => reject(new Error("API timeout")), options.apiTimeoutMs), - ), - ]) + options.apiTimeoutMs, + ) const messages = (response as { data?: unknown[] }).data ?? [] if (!Array.isArray(messages)) return false - const assistantMessages = (messages as OpenCodeSessionMessage[]).filter( - (msg) => msg.info?.role === "assistant", - ) - const lastAssistant = assistantMessages[assistantMessages.length - 1] - if (!lastAssistant?.parts) return false + const assistantMessages = (messages as OpenCodeSessionMessage[]).filter((msg) => msg.info?.role === "assistant") + if (assistantMessages.length === 0) return false const pattern = buildPromisePattern(options.promise) - const responseText = lastAssistant.parts - .filter((p) => p.type === "text") - .map((p) => p.text ?? "") - .join("\n") + const recentAssistants = assistantMessages.slice(-3) + for (const assistant of recentAssistants) { + if (!assistant.parts) continue - return pattern.test(responseText) + const responseText = assistant.parts + .filter((p) => p.type === "text" || p.type === "reasoning") + .map((p) => p.text ?? "") + .join("\n") + + if (pattern.test(responseText)) { + return true + } + } + + return false } catch (err) { - log(`[${HOOK_NAME}] Session messages check failed`, { - sessionID: options.sessionID, - error: String(err), - }) + setTimeout(() => { + log(`[${HOOK_NAME}] Session messages check failed`, { + sessionID: options.sessionID, + error: String(err), + }) + }, 0) return false } } diff --git a/src/hooks/ralph-loop/continuation-prompt-injector.ts b/src/hooks/ralph-loop/continuation-prompt-injector.ts index 45e6dba5..84af442f 100644 --- a/src/hooks/ralph-loop/continuation-prompt-injector.ts +++ b/src/hooks/ralph-loop/continuation-prompt-injector.ts @@ -2,6 +2,7 @@ import type { PluginInput } from "@opencode-ai/plugin" import { log } from "../../shared/logger" import { findNearestMessageWithFields } from "../../features/hook-message-injector" import { getMessageDir } from "./message-storage-directory" +import { withTimeout } from "./with-timeout" type MessageInfo = { agent?: string @@ -12,15 +13,18 @@ type MessageInfo = { export async function injectContinuationPrompt( ctx: PluginInput, - options: { sessionID: string; prompt: string; directory: string }, + options: { sessionID: string; prompt: string; directory: string; apiTimeoutMs: number }, ): Promise { let agent: string | undefined let model: { providerID: string; modelID: string } | undefined try { - const messagesResp = await ctx.client.session.messages({ - path: { id: options.sessionID }, - }) + const messagesResp = await withTimeout( + ctx.client.session.messages({ + path: { id: options.sessionID }, + }), + options.apiTimeoutMs, + ) const messages = (messagesResp.data ?? []) as Array<{ info?: MessageInfo }> for (let i = messages.length - 1; i >= 0; i--) { const info = messages[i]?.info diff --git a/src/hooks/ralph-loop/index.test.ts b/src/hooks/ralph-loop/index.test.ts index 9c7ce4f1..851e0ce1 100644 --- a/src/hooks/ralph-loop/index.test.ts +++ b/src/hooks/ralph-loop/index.test.ts @@ -511,6 +511,38 @@ describe("ralph-loop", () => { expect(messagesCalls[0].sessionID).toBe("session-123") }) + test("should detect completion promise in reasoning part via session messages API", async () => { + //#given - active loop with assistant reasoning containing completion promise + mockSessionMessages = [ + { info: { role: "user" }, parts: [{ type: "text", text: "Build something" }] }, + { + info: { role: "assistant" }, + parts: [ + { type: "reasoning", text: "I am done now. REASONING_DONE" }, + ], + }, + ] + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), + }) + hook.startLoop("session-123", "Build something", { + completionPromise: "REASONING_DONE", + }) + + //#when - session goes idle + await hook.event({ + event: { + type: "session.idle", + properties: { sessionID: "session-123" }, + }, + }) + + //#then - loop completed via API detection, no continuation + expect(promptCalls.length).toBe(0) + expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true) + expect(hook.getState()).toBeNull() + }) + test("should handle multiple iterations correctly", async () => { // given - active loop const hook = createRalphLoopHook(createMockPluginInput()) @@ -596,13 +628,14 @@ describe("ralph-loop", () => { expect(promptCalls.length).toBe(1) }) - test("should only check LAST assistant message for completion", async () => { - // given - multiple assistant messages, only first has completion promise + test("should check last 3 assistant messages for completion", async () => { + // given - multiple assistant messages, promise in recent (not last) assistant message mockSessionMessages = [ { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "I'll work on it. DONE" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Working on it." }] }, { info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Working on more features..." }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Nearly there... DONE" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "(extra output after promise)" }] }, ] const hook = createRalphLoopHook(createMockPluginInput(), { getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), @@ -614,35 +647,36 @@ describe("ralph-loop", () => { event: { type: "session.idle", properties: { sessionID: "session-123" } }, }) - // then - loop should continue (last message has no completion promise) - expect(promptCalls.length).toBe(1) - expect(hook.getState()?.iteration).toBe(2) - }) - - test("should detect completion only in LAST assistant message", async () => { - // given - last assistant message has completion promise - mockSessionMessages = [ - { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Starting work..." }] }, - { info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Task complete! DONE" }] }, - ] - const hook = createRalphLoopHook(createMockPluginInput(), { - getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), - }) - hook.startLoop("session-123", "Build something", { completionPromise: "DONE" }) - - // when - session goes idle - await hook.event({ - event: { type: "session.idle", properties: { sessionID: "session-123" } }, - }) - - // then - loop should complete (last message has completion promise) + // then - loop should complete (promise found within last 3 assistant messages) expect(promptCalls.length).toBe(0) expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true) expect(hook.getState()).toBeNull() }) + test("should NOT detect completion if promise is older than last 3 assistant messages", async () => { + // given - promise appears in an assistant message older than last 3 + mockSessionMessages = [ + { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early DONE" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 1" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 2" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 3" }] }, + ] + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), + }) + hook.startLoop("session-123", "Build something", { completionPromise: "DONE" }) + + // when - session goes idle + await hook.event({ + event: { type: "session.idle", properties: { sessionID: "session-123" } }, + }) + + // then - loop should continue (promise is older than last 3 assistant messages) + expect(promptCalls.length).toBe(1) + expect(hook.getState()?.iteration).toBe(2) + }) + test("should allow starting new loop while previous loop is active (different session)", async () => { // given - active loop in session A const hook = createRalphLoopHook(createMockPluginInput()) @@ -928,7 +962,7 @@ Original task: Build something` const elapsed = Date.now() - startTime // then - should complete quickly (not hang for 10s) - expect(elapsed).toBeLessThan(2000) + expect(elapsed).toBeLessThan(6000) // then - loop should continue (API error = no completion detected) expect(promptCalls.length).toBe(1) expect(apiCallCount).toBeGreaterThan(0) diff --git a/src/hooks/ralph-loop/ralph-loop-event-handler.ts b/src/hooks/ralph-loop/ralph-loop-event-handler.ts index 5ba52b87..3c3d6e3e 100644 --- a/src/hooks/ralph-loop/ralph-loop-event-handler.ts +++ b/src/hooks/ralph-loop/ralph-loop-event-handler.ts @@ -132,6 +132,7 @@ export function createRalphLoopEventHandler( sessionID, prompt: buildContinuationPrompt(newState), directory: options.directory, + apiTimeoutMs: options.apiTimeoutMs, }) } catch (err) { log(`[${HOOK_NAME}] Failed to inject continuation`, { diff --git a/src/hooks/ralph-loop/ralph-loop-hook.ts b/src/hooks/ralph-loop/ralph-loop-hook.ts index d55a1882..5180f030 100644 --- a/src/hooks/ralph-loop/ralph-loop-hook.ts +++ b/src/hooks/ralph-loop/ralph-loop-hook.ts @@ -16,7 +16,7 @@ export interface RalphLoopHook { getState: () => RalphLoopState | null } -const DEFAULT_API_TIMEOUT = 3000 as const +const DEFAULT_API_TIMEOUT = 5000 as const export function createRalphLoopHook( ctx: PluginInput, diff --git a/src/hooks/ralph-loop/with-timeout.ts b/src/hooks/ralph-loop/with-timeout.ts new file mode 100644 index 00000000..70312443 --- /dev/null +++ b/src/hooks/ralph-loop/with-timeout.ts @@ -0,0 +1,20 @@ +export async function withTimeout( + promise: Promise, + timeoutMs: number, +): Promise { + let timeoutId: ReturnType | undefined + + const timeoutPromise = new Promise((_, reject) => { + timeoutId = setTimeout(() => { + reject(new Error("API timeout")) + }, timeoutMs) + }) + + try { + return await Promise.race([promise, timeoutPromise]) + } finally { + if (timeoutId !== undefined) { + clearTimeout(timeoutId) + } + } +} diff --git a/src/hooks/session-recovery/detect-error-type.ts b/src/hooks/session-recovery/detect-error-type.ts index 763370d1..f51c0d0d 100644 --- a/src/hooks/session-recovery/detect-error-type.ts +++ b/src/hooks/session-recovery/detect-error-type.ts @@ -2,6 +2,7 @@ export type RecoveryErrorType = | "tool_result_missing" | "thinking_block_order" | "thinking_disabled_violation" + | "assistant_prefill_unsupported" | null function getErrorMessage(error: unknown): string { @@ -41,6 +42,13 @@ export function extractMessageIndex(error: unknown): number | null { export function detectErrorType(error: unknown): RecoveryErrorType { const message = getErrorMessage(error) + if ( + message.includes("assistant message prefill") || + message.includes("conversation must end with a user message") + ) { + return "assistant_prefill_unsupported" + } + if ( message.includes("thinking") && (message.includes("first block") || diff --git a/src/hooks/session-recovery/hook.ts b/src/hooks/session-recovery/hook.ts index 55a16662..324a2731 100644 --- a/src/hooks/session-recovery/hook.ts +++ b/src/hooks/session-recovery/hook.ts @@ -81,11 +81,13 @@ export function createSessionRecoveryHook(ctx: PluginInput, options?: SessionRec tool_result_missing: "Tool Crash Recovery", thinking_block_order: "Thinking Block Recovery", thinking_disabled_violation: "Thinking Strip Recovery", + "assistant_prefill_unsupported": "Prefill Unsupported", } const toastMessages: Record = { tool_result_missing: "Injecting cancelled tool results...", thinking_block_order: "Fixing message structure...", thinking_disabled_violation: "Stripping thinking blocks...", + "assistant_prefill_unsupported": "Prefill not supported; continuing without recovery.", } await ctx.client.tui @@ -117,6 +119,8 @@ export function createSessionRecoveryHook(ctx: PluginInput, options?: SessionRec const resumeConfig = extractResumeConfig(lastUser, sessionID) await resumeSession(ctx.client, resumeConfig) } + } else if (errorType === "assistant_prefill_unsupported") { + success = true } return success diff --git a/src/hooks/session-recovery/index.test.ts b/src/hooks/session-recovery/index.test.ts index 93d7990a..257fe05a 100644 --- a/src/hooks/session-recovery/index.test.ts +++ b/src/hooks/session-recovery/index.test.ts @@ -129,6 +129,63 @@ describe("detectErrorType", () => { }) }) + describe("assistant_prefill_unsupported errors", () => { + it("should detect assistant message prefill error from direct message", () => { + //#given an error about assistant message prefill not being supported + const error = { + message: "This model does not support assistant message prefill. The conversation must end with a user message.", + } + + //#when detectErrorType is called + const result = detectErrorType(error) + + //#then should return assistant_prefill_unsupported + expect(result).toBe("assistant_prefill_unsupported") + }) + + it("should detect assistant message prefill error from nested error object", () => { + //#given an Anthropic API error with nested structure matching the real error format + const error = { + error: { + type: "invalid_request_error", + message: "This model does not support assistant message prefill. The conversation must end with a user message.", + }, + } + + //#when detectErrorType is called + const result = detectErrorType(error) + + //#then should return assistant_prefill_unsupported + expect(result).toBe("assistant_prefill_unsupported") + }) + + it("should detect error with only 'conversation must end with a user message' fragment", () => { + //#given an error containing only the user message requirement + const error = { + message: "The conversation must end with a user message.", + } + + //#when detectErrorType is called + const result = detectErrorType(error) + + //#then should return assistant_prefill_unsupported + expect(result).toBe("assistant_prefill_unsupported") + }) + + it("should detect error with only 'assistant message prefill' fragment", () => { + //#given an error containing only the prefill mention + const error = { + message: "This model does not support assistant message prefill.", + } + + //#when detectErrorType is called + const result = detectErrorType(error) + + //#then should return assistant_prefill_unsupported + expect(result).toBe("assistant_prefill_unsupported") + }) + }) + describe("unrecognized errors", () => { it("should return null for unrecognized error patterns", () => { // given an unrelated error diff --git a/src/hooks/task-continuation-enforcer.test.ts b/src/hooks/task-continuation-enforcer.test.ts new file mode 100644 index 00000000..1a0cbc75 --- /dev/null +++ b/src/hooks/task-continuation-enforcer.test.ts @@ -0,0 +1,763 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test" + +import { mkdtempSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { BackgroundManager } from "../features/background-agent" +import { setMainSession, subagentSessions, _resetForTesting } from "../features/claude-code-session-state" +import type { OhMyOpenCodeConfig } from "../config/schema" +import { TaskObjectSchema } from "../tools/task/types" +import type { TaskObject } from "../tools/task/types" +import { createTaskContinuationEnforcer } from "./task-continuation-enforcer" + +type TimerCallback = (...args: any[]) => void + +interface FakeTimers { + advanceBy: (ms: number, advanceClock?: boolean) => Promise + restore: () => void +} + +function createFakeTimers(): FakeTimers { + const originalNow = Date.now() + let clockNow = originalNow + let timerNow = 0 + let nextId = 1 + const timers = new Map() + const cleared = new Set() + + const original = { + setTimeout: globalThis.setTimeout, + clearTimeout: globalThis.clearTimeout, + setInterval: globalThis.setInterval, + clearInterval: globalThis.clearInterval, + dateNow: Date.now, + } + + const normalizeDelay = (delay?: number) => { + if (typeof delay !== "number" || !Number.isFinite(delay)) return 0 + return delay < 0 ? 0 : delay + } + + const schedule = (callback: TimerCallback, delay: number | undefined, interval: number | null, args: any[]) => { + const id = nextId++ + timers.set(id, { + id, + time: timerNow + normalizeDelay(delay), + interval, + callback, + args, + }) + return id + } + + const clear = (id: number | undefined) => { + if (typeof id !== "number") return + cleared.add(id) + timers.delete(id) + } + + globalThis.setTimeout = ((callback: TimerCallback, delay?: number, ...args: any[]) => { + return schedule(callback, delay, null, args) as unknown as ReturnType + }) as typeof setTimeout + + globalThis.setInterval = ((callback: TimerCallback, delay?: number, ...args: any[]) => { + const interval = normalizeDelay(delay) + return schedule(callback, delay, interval, args) as unknown as ReturnType + }) as typeof setInterval + + globalThis.clearTimeout = ((id?: number) => { + clear(id) + }) as typeof clearTimeout + + globalThis.clearInterval = ((id?: number) => { + clear(id) + }) as typeof clearInterval + + Date.now = () => clockNow + + const advanceBy = async (ms: number, advanceClock: boolean = false) => { + const clamped = Math.max(0, ms) + const target = timerNow + clamped + if (advanceClock) { + clockNow += clamped + } + while (true) { + let next: { id: number; time: number; interval: number | null; callback: TimerCallback; args: any[] } | undefined + for (const timer of timers.values()) { + if (timer.time <= target && (!next || timer.time < next.time)) { + next = timer + } + } + if (!next) break + + timerNow = next.time + timers.delete(next.id) + next.callback(...next.args) + + if (next.interval !== null && !cleared.has(next.id)) { + timers.set(next.id, { + id: next.id, + time: timerNow + next.interval, + interval: next.interval, + callback: next.callback, + args: next.args, + }) + } else { + cleared.delete(next.id) + } + + await Promise.resolve() + } + timerNow = target + await Promise.resolve() + } + + const restore = () => { + globalThis.setTimeout = original.setTimeout + globalThis.clearTimeout = original.clearTimeout + globalThis.setInterval = original.setInterval + globalThis.clearInterval = original.clearInterval + Date.now = original.dateNow + } + + return { advanceBy, restore } +} + +const wait = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) + +describe("task-continuation-enforcer", () => { + let promptCalls: Array<{ sessionID: string; agent?: string; model?: { providerID?: string; modelID?: string }; text: string }> + let toastCalls: Array<{ title: string; message: string }> + let fakeTimers: FakeTimers + let taskDir: string + + interface MockMessage { + info: { + id: string + role: "user" | "assistant" + error?: { name: string; data?: { message: string } } + } + } + + let mockMessages: MockMessage[] = [] + + function createMockPluginInput() { + return { + client: { + session: { + messages: async () => ({ data: mockMessages }), + prompt: async (opts: any) => { + promptCalls.push({ + sessionID: opts.path.id, + agent: opts.body.agent, + model: opts.body.model, + text: opts.body.parts[0].text, + }) + return {} + }, + }, + tui: { + showToast: async (opts: any) => { + toastCalls.push({ + title: opts.body.title, + message: opts.body.message, + }) + return {} + }, + }, + }, + directory: "/tmp/test", + } as any + } + + function createTempTaskDir(): string { + return mkdtempSync(join(tmpdir(), "omo-task-continuation-")) + } + + function writeTaskFile(dir: string, task: TaskObject): void { + const parsed = TaskObjectSchema.safeParse(task) + expect(parsed.success).toBe(true) + if (!parsed.success) return + writeFileSync(join(dir, `${parsed.data.id}.json`), JSON.stringify(parsed.data), "utf-8") + } + + function writeCorruptedTaskFile(dir: string, taskId: string): void { + writeFileSync(join(dir, `${taskId}.json`), "{ this is not valid json", "utf-8") + } + + function createConfig(dir: string): Partial { + return { + sisyphus: { + tasks: { + claude_code_compat: true, + storage_path: dir, + }, + }, + } + } + + function createMockBackgroundManager(runningTasks: boolean = false): BackgroundManager { + return { + getTasksByParentSession: () => (runningTasks ? [{ status: "running" }] : []), + } as any + } + + beforeEach(() => { + fakeTimers = createFakeTimers() + _resetForTesting() + promptCalls = [] + toastCalls = [] + mockMessages = [] + taskDir = createTempTaskDir() + }) + + afterEach(() => { + fakeTimers.restore() + _resetForTesting() + rmSync(taskDir, { recursive: true, force: true }) + }) + + test("should inject continuation when idle with incomplete tasks on disk", async () => { + fakeTimers.restore() + // given - main session with incomplete tasks + const sessionID = "main-123" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + writeTaskFile(taskDir, { + id: "T-2", + subject: "Task 2", + description: "", + status: "completed", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), { + backgroundManager: new BackgroundManager(createMockPluginInput()), + }) + + // when - session goes idle + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + + // then - countdown toast shown + await wait(50) + expect(toastCalls.length).toBeGreaterThanOrEqual(1) + expect(toastCalls[0].title).toBe("Task Continuation") + + // then - after countdown, continuation injected + await wait(2500) + expect(promptCalls.length).toBe(1) + expect(promptCalls[0].text).toContain("TASK CONTINUATION") + }, { timeout: 15000 }) + + test("should NOT inject when all tasks are completed", async () => { + // given - session with all tasks completed + const sessionID = "main-456" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "completed", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when - session goes idle + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then - no continuation injected + expect(promptCalls).toHaveLength(0) + }) + + test("should NOT inject when all tasks are deleted", async () => { + // given - session with all tasks deleted + const sessionID = "main-deleted" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "deleted", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should NOT inject when no task files exist", async () => { + // given - empty task directory + const sessionID = "main-none" + setMainSession(sessionID) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should NOT inject when background tasks are running", async () => { + // given - session with incomplete tasks and running background tasks + const sessionID = "main-bg-running" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), { + backgroundManager: createMockBackgroundManager(true), + }) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should NOT inject for non-main session", async () => { + // given - main session set, different session goes idle + setMainSession("main-session") + const otherSession = "other-session" + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID: otherSession } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should inject for background task session (subagent)", async () => { + fakeTimers.restore() + // given - main session set, background task session registered + setMainSession("main-session") + const bgTaskSession = "bg-task-session" + subagentSessions.add(bgTaskSession) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID: bgTaskSession } } }) + + // then + await wait(2500) + expect(promptCalls.length).toBe(1) + expect(promptCalls[0].sessionID).toBe(bgTaskSession) + }, { timeout: 15000 }) + + test("should cancel countdown on user message after grace period", async () => { + // given + const sessionID = "main-cancel" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when - session goes idle + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + + // when - wait past grace period (500ms), then user sends message + await fakeTimers.advanceBy(600, true) + await hook.handler({ + event: { + type: "message.updated", + properties: { info: { sessionID, role: "user" } }, + }, + }) + + // then + await fakeTimers.advanceBy(2500) + expect(promptCalls).toHaveLength(0) + }) + + test("should ignore user message within grace period", async () => { + fakeTimers.restore() + // given + const sessionID = "main-grace" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await hook.handler({ + event: { + type: "message.updated", + properties: { info: { sessionID, role: "user" } }, + }, + }) + + // then - countdown should continue + await wait(2500) + expect(promptCalls).toHaveLength(1) + }, { timeout: 15000 }) + + test("should cancel countdown on assistant activity", async () => { + // given + const sessionID = "main-assistant" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(500) + await hook.handler({ + event: { + type: "message.part.updated", + properties: { info: { sessionID, role: "assistant" } }, + }, + }) + + // then + await fakeTimers.advanceBy(3000) + expect(promptCalls).toHaveLength(0) + }) + + test("should cancel countdown on tool execution", async () => { + // given + const sessionID = "main-tool" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(500) + await hook.handler({ event: { type: "tool.execute.before", properties: { sessionID } } }) + + // then + await fakeTimers.advanceBy(3000) + expect(promptCalls).toHaveLength(0) + }) + + test("should skip injection during recovery mode", async () => { + // given + const sessionID = "main-recovery" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + hook.markRecovering(sessionID) + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should inject after recovery complete", async () => { + fakeTimers.restore() + // given + const sessionID = "main-recovery-done" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + hook.markRecovering(sessionID) + hook.markRecoveryComplete(sessionID) + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + + // then + await wait(3000) + expect(promptCalls.length).toBe(1) + }, { timeout: 15000 }) + + test("should cleanup on session deleted", async () => { + // given + const sessionID = "main-delete" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(500) + await hook.handler({ event: { type: "session.deleted", properties: { info: { id: sessionID } } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should skip when last assistant message was aborted (API fallback)", async () => { + // given + const sessionID = "main-api-abort" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + mockMessages = [ + { info: { id: "msg-1", role: "user" } }, + { info: { id: "msg-2", role: "assistant", error: { name: "MessageAbortedError", data: { message: "aborted" } } } }, + ] + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should skip when abort detected via session.error event", async () => { + // given + const sessionID = "main-event-abort" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + mockMessages = [ + { info: { id: "msg-1", role: "user" } }, + { info: { id: "msg-2", role: "assistant" } }, + ] + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when - abort error event fires + await hook.handler({ + event: { + type: "session.error", + properties: { sessionID, error: { name: "MessageAbortedError" } }, + }, + }) + + // when - session goes idle immediately after + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should handle corrupted task files gracefully (readJsonSafe returns null)", async () => { + fakeTimers.restore() + // given + const sessionID = "main-corrupt" + setMainSession(sessionID) + + writeCorruptedTaskFile(taskDir, "T-corrupt") + writeTaskFile(taskDir, { + id: "T-ok", + subject: "Task OK", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await wait(2500) + + // then + expect(promptCalls).toHaveLength(1) + }, { timeout: 15000 }) + + test("should NOT inject when isContinuationStopped returns true", async () => { + // given + const sessionID = "main-stopped" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), { + isContinuationStopped: (id) => id === sessionID, + }) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should cancel all countdowns via cancelAllCountdowns", async () => { + // given + const sessionID = "main-cancel-all" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(500) + hook.cancelAllCountdowns() + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) +}) diff --git a/src/hooks/task-continuation-enforcer.ts b/src/hooks/task-continuation-enforcer.ts new file mode 100644 index 00000000..f3b7f9c5 --- /dev/null +++ b/src/hooks/task-continuation-enforcer.ts @@ -0,0 +1,530 @@ +import type { PluginInput } from "@opencode-ai/plugin" +import { existsSync, readdirSync } from "node:fs" +import { join } from "node:path" + +import type { BackgroundManager } from "../features/background-agent" +import { getMainSessionID, subagentSessions } from "../features/claude-code-session-state" +import { + findNearestMessageWithFields, + MESSAGE_STORAGE, + type ToolPermission, +} from "../features/hook-message-injector" +import { listTaskFiles, readJsonSafe, getTaskDir } from "../features/claude-tasks/storage" +import type { OhMyOpenCodeConfig } from "../config/schema" +import { TaskObjectSchema } from "../tools/task/types" +import type { TaskObject } from "../tools/task/types" +import { log } from "../shared/logger" +import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-directive" + +const HOOK_NAME = "task-continuation-enforcer" + +const DEFAULT_SKIP_AGENTS = ["prometheus", "compaction"] + +export interface TaskContinuationEnforcerOptions { + backgroundManager?: BackgroundManager + skipAgents?: string[] + isContinuationStopped?: (sessionID: string) => boolean +} + +export interface TaskContinuationEnforcer { + handler: (input: { event: { type: string; properties?: unknown } }) => Promise + markRecovering: (sessionID: string) => void + markRecoveryComplete: (sessionID: string) => void + cancelAllCountdowns: () => void +} + +interface SessionState { + countdownTimer?: ReturnType + countdownInterval?: ReturnType + isRecovering?: boolean + countdownStartedAt?: number + abortDetectedAt?: number +} + +const CONTINUATION_PROMPT = `${createSystemDirective(SystemDirectiveTypes.TASK_CONTINUATION)} + +Incomplete tasks remain in your task list. Continue working on the next pending task. + +- Proceed without asking for permission +- Mark each task complete when finished +- Do not stop until all tasks are done` + +const COUNTDOWN_SECONDS = 2 +const TOAST_DURATION_MS = 900 +const COUNTDOWN_GRACE_PERIOD_MS = 500 + +function getMessageDir(sessionID: string): string | null { + if (!existsSync(MESSAGE_STORAGE)) return null + + const directPath = join(MESSAGE_STORAGE, sessionID) + if (existsSync(directPath)) return directPath + + for (const dir of readdirSync(MESSAGE_STORAGE)) { + const sessionPath = join(MESSAGE_STORAGE, dir, sessionID) + if (existsSync(sessionPath)) return sessionPath + } + + return null +} + +function getIncompleteCount(tasks: TaskObject[]): number { + return tasks.filter(t => t.status !== "completed" && t.status !== "deleted").length +} + +interface MessageInfo { + id?: string + role?: string + error?: { name?: string; data?: unknown } +} + +function isLastAssistantMessageAborted(messages: Array<{ info?: MessageInfo }>): boolean { + if (!messages || messages.length === 0) return false + + const assistantMessages = messages.filter(m => m.info?.role === "assistant") + if (assistantMessages.length === 0) return false + + const lastAssistant = assistantMessages[assistantMessages.length - 1] + const errorName = lastAssistant.info?.error?.name + + if (!errorName) return false + + return errorName === "MessageAbortedError" || errorName === "AbortError" +} + +function loadTasksFromDisk(config: Partial): TaskObject[] { + const taskIds = listTaskFiles(config) + const taskDirectory = getTaskDir(config) + const tasks: TaskObject[] = [] + + for (const id of taskIds) { + const task = readJsonSafe(join(taskDirectory, `${id}.json`), TaskObjectSchema) + if (task) tasks.push(task) + } + + return tasks +} + +export function createTaskContinuationEnforcer( + ctx: PluginInput, + config: Partial, + options: TaskContinuationEnforcerOptions = {} +): TaskContinuationEnforcer { + const { backgroundManager, skipAgents = DEFAULT_SKIP_AGENTS, isContinuationStopped } = options + const sessions = new Map() + + function getState(sessionID: string): SessionState { + let state = sessions.get(sessionID) + if (!state) { + state = {} + sessions.set(sessionID, state) + } + return state + } + + function cancelCountdown(sessionID: string): void { + const state = sessions.get(sessionID) + if (!state) return + + if (state.countdownTimer) { + clearTimeout(state.countdownTimer) + state.countdownTimer = undefined + } + if (state.countdownInterval) { + clearInterval(state.countdownInterval) + state.countdownInterval = undefined + } + state.countdownStartedAt = undefined + } + + function cleanup(sessionID: string): void { + cancelCountdown(sessionID) + sessions.delete(sessionID) + } + + const markRecovering = (sessionID: string): void => { + const state = getState(sessionID) + state.isRecovering = true + cancelCountdown(sessionID) + log(`[${HOOK_NAME}] Session marked as recovering`, { sessionID }) + } + + const markRecoveryComplete = (sessionID: string): void => { + const state = sessions.get(sessionID) + if (state) { + state.isRecovering = false + log(`[${HOOK_NAME}] Session recovery complete`, { sessionID }) + } + } + + async function showCountdownToast(seconds: number, incompleteCount: number): Promise { + await ctx.client.tui + .showToast({ + body: { + title: "Task Continuation", + message: `Resuming in ${seconds}s... (${incompleteCount} tasks remaining)`, + variant: "warning" as const, + duration: TOAST_DURATION_MS, + }, + }) + .catch(() => {}) + } + + interface ResolvedMessageInfo { + agent?: string + model?: { providerID: string; modelID: string } + tools?: Record + } + + async function injectContinuation( + sessionID: string, + incompleteCount: number, + total: number, + resolvedInfo?: ResolvedMessageInfo + ): Promise { + const state = sessions.get(sessionID) + + if (state?.isRecovering) { + log(`[${HOOK_NAME}] Skipped injection: in recovery`, { sessionID }) + return + } + + const hasRunningBgTasks = backgroundManager + ? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running") + : false + + if (hasRunningBgTasks) { + log(`[${HOOK_NAME}] Skipped injection: background tasks running`, { sessionID }) + return + } + + const tasks = loadTasksFromDisk(config) + const freshIncompleteCount = getIncompleteCount(tasks) + if (freshIncompleteCount === 0) { + log(`[${HOOK_NAME}] Skipped injection: no incomplete tasks`, { sessionID }) + return + } + + let agentName = resolvedInfo?.agent + let model = resolvedInfo?.model + let tools = resolvedInfo?.tools + + if (!agentName || !model) { + const messageDir = getMessageDir(sessionID) + const prevMessage = messageDir ? findNearestMessageWithFields(messageDir) : null + agentName = agentName ?? prevMessage?.agent + model = + model ?? + (prevMessage?.model?.providerID && prevMessage?.model?.modelID + ? { + providerID: prevMessage.model.providerID, + modelID: prevMessage.model.modelID, + ...(prevMessage.model.variant ? { variant: prevMessage.model.variant } : {}), + } + : undefined) + tools = tools ?? prevMessage?.tools + } + + if (agentName && skipAgents.includes(agentName)) { + log(`[${HOOK_NAME}] Skipped: agent in skipAgents list`, { sessionID, agent: agentName }) + return + } + + const editPermission = tools?.edit + const writePermission = tools?.write + const hasWritePermission = + !tools || + (editPermission !== false && editPermission !== "deny" && writePermission !== false && writePermission !== "deny") + if (!hasWritePermission) { + log(`[${HOOK_NAME}] Skipped: agent lacks write permission`, { sessionID, agent: agentName }) + return + } + + const incompleteTasks = tasks.filter(t => t.status !== "completed" && t.status !== "deleted") + const taskList = incompleteTasks.map(t => `- [${t.status}] ${t.subject}`).join("\n") + const prompt = `${CONTINUATION_PROMPT} + +[Status: ${tasks.length - freshIncompleteCount}/${tasks.length} completed, ${freshIncompleteCount} remaining] + +Remaining tasks: +${taskList}` + + try { + log(`[${HOOK_NAME}] Injecting continuation`, { + sessionID, + agent: agentName, + model, + incompleteCount: freshIncompleteCount, + }) + + await ctx.client.session.prompt({ + path: { id: sessionID }, + body: { + agent: agentName, + ...(model !== undefined ? { model } : {}), + parts: [{ type: "text", text: prompt }], + }, + query: { directory: ctx.directory }, + }) + + log(`[${HOOK_NAME}] Injection successful`, { sessionID }) + } catch (err) { + log(`[${HOOK_NAME}] Injection failed`, { sessionID, error: String(err) }) + } + } + + function startCountdown( + sessionID: string, + incompleteCount: number, + total: number, + resolvedInfo?: ResolvedMessageInfo + ): void { + const state = getState(sessionID) + cancelCountdown(sessionID) + + let secondsRemaining = COUNTDOWN_SECONDS + showCountdownToast(secondsRemaining, incompleteCount) + state.countdownStartedAt = Date.now() + + state.countdownInterval = setInterval(() => { + secondsRemaining-- + if (secondsRemaining > 0) { + showCountdownToast(secondsRemaining, incompleteCount) + } + }, 1000) + + state.countdownTimer = setTimeout(() => { + cancelCountdown(sessionID) + injectContinuation(sessionID, incompleteCount, total, resolvedInfo) + }, COUNTDOWN_SECONDS * 1000) + + log(`[${HOOK_NAME}] Countdown started`, { sessionID, seconds: COUNTDOWN_SECONDS, incompleteCount }) + } + + const handler = async ({ event }: { event: { type: string; properties?: unknown } }): Promise => { + const props = event.properties as Record | undefined + + if (event.type === "session.error") { + const sessionID = props?.sessionID as string | undefined + if (!sessionID) return + + const error = props?.error as { name?: string } | undefined + if (error?.name === "MessageAbortedError" || error?.name === "AbortError") { + const state = getState(sessionID) + state.abortDetectedAt = Date.now() + log(`[${HOOK_NAME}] Abort detected via session.error`, { sessionID, errorName: error.name }) + } + + cancelCountdown(sessionID) + log(`[${HOOK_NAME}] session.error`, { sessionID }) + return + } + + if (event.type === "session.idle") { + const sessionID = props?.sessionID as string | undefined + if (!sessionID) return + + log(`[${HOOK_NAME}] session.idle`, { sessionID }) + + const mainSessionID = getMainSessionID() + const isMainSession = sessionID === mainSessionID + const isBackgroundTaskSession = subagentSessions.has(sessionID) + + if (mainSessionID && !isMainSession && !isBackgroundTaskSession) { + log(`[${HOOK_NAME}] Skipped: not main or background task session`, { sessionID }) + return + } + + const state = getState(sessionID) + + if (state.isRecovering) { + log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID }) + return + } + + // Check 1: Event-based abort detection (primary, most reliable) + if (state.abortDetectedAt) { + const timeSinceAbort = Date.now() - state.abortDetectedAt + const ABORT_WINDOW_MS = 3000 + if (timeSinceAbort < ABORT_WINDOW_MS) { + log(`[${HOOK_NAME}] Skipped: abort detected via event ${timeSinceAbort}ms ago`, { sessionID }) + state.abortDetectedAt = undefined + return + } + state.abortDetectedAt = undefined + } + + const hasRunningBgTasks = backgroundManager + ? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running") + : false + + if (hasRunningBgTasks) { + log(`[${HOOK_NAME}] Skipped: background tasks running`, { sessionID }) + return + } + + // Check 2: API-based abort detection (fallback, for cases where event was missed) + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + const messages = (messagesResp as { data?: Array<{ info?: MessageInfo }> }).data ?? [] + + if (isLastAssistantMessageAborted(messages)) { + log(`[${HOOK_NAME}] Skipped: last assistant message was aborted (API fallback)`, { sessionID }) + return + } + } catch (err) { + log(`[${HOOK_NAME}] Messages fetch failed, continuing`, { sessionID, error: String(err) }) + } + + const tasks = loadTasksFromDisk(config) + + if (!tasks || tasks.length === 0) { + log(`[${HOOK_NAME}] No tasks`, { sessionID }) + return + } + + const incompleteCount = getIncompleteCount(tasks) + if (incompleteCount === 0) { + log(`[${HOOK_NAME}] All tasks complete`, { sessionID, total: tasks.length }) + return + } + + let resolvedInfo: ResolvedMessageInfo | undefined + let hasCompactionMessage = false + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + }) + const messages = (messagesResp.data ?? []) as Array<{ + info?: { + agent?: string + model?: { providerID: string; modelID: string } + modelID?: string + providerID?: string + tools?: Record + } + }> + for (let i = messages.length - 1; i >= 0; i--) { + const info = messages[i].info + if (info?.agent === "compaction") { + hasCompactionMessage = true + continue + } + if (info?.agent || info?.model || (info?.modelID && info?.providerID)) { + resolvedInfo = { + agent: info.agent, + model: + info.model ?? + (info.providerID && info.modelID + ? { providerID: info.providerID, modelID: info.modelID } + : undefined), + tools: info.tools, + } + break + } + } + } catch (err) { + log(`[${HOOK_NAME}] Failed to fetch messages for agent check`, { sessionID, error: String(err) }) + } + + log(`[${HOOK_NAME}] Agent check`, { + sessionID, + agentName: resolvedInfo?.agent, + skipAgents, + hasCompactionMessage, + }) + if (resolvedInfo?.agent && skipAgents.includes(resolvedInfo.agent)) { + log(`[${HOOK_NAME}] Skipped: agent in skipAgents list`, { sessionID, agent: resolvedInfo.agent }) + return + } + if (hasCompactionMessage && !resolvedInfo?.agent) { + log(`[${HOOK_NAME}] Skipped: compaction occurred but no agent info resolved`, { sessionID }) + return + } + + if (isContinuationStopped?.(sessionID)) { + log(`[${HOOK_NAME}] Skipped: continuation stopped for session`, { sessionID }) + return + } + + startCountdown(sessionID, incompleteCount, tasks.length, resolvedInfo) + return + } + + if (event.type === "message.updated") { + const info = props?.info as Record | undefined + const sessionID = info?.sessionID as string | undefined + const role = info?.role as string | undefined + + if (!sessionID) return + + if (role === "user") { + const state = sessions.get(sessionID) + if (state?.countdownStartedAt) { + const elapsed = Date.now() - state.countdownStartedAt + if (elapsed < COUNTDOWN_GRACE_PERIOD_MS) { + log(`[${HOOK_NAME}] Ignoring user message in grace period`, { sessionID, elapsed }) + return + } + } + if (state) state.abortDetectedAt = undefined + cancelCountdown(sessionID) + } + + if (role === "assistant") { + const state = sessions.get(sessionID) + if (state) state.abortDetectedAt = undefined + cancelCountdown(sessionID) + } + return + } + + if (event.type === "message.part.updated") { + const info = props?.info as Record | undefined + const sessionID = info?.sessionID as string | undefined + const role = info?.role as string | undefined + + if (sessionID && role === "assistant") { + const state = sessions.get(sessionID) + if (state) state.abortDetectedAt = undefined + cancelCountdown(sessionID) + } + return + } + + if (event.type === "tool.execute.before" || event.type === "tool.execute.after") { + const sessionID = props?.sessionID as string | undefined + if (sessionID) { + const state = sessions.get(sessionID) + if (state) state.abortDetectedAt = undefined + cancelCountdown(sessionID) + } + return + } + + if (event.type === "session.deleted") { + const sessionInfo = props?.info as { id?: string } | undefined + if (sessionInfo?.id) { + cleanup(sessionInfo.id) + log(`[${HOOK_NAME}] Session deleted: cleaned up`, { sessionID: sessionInfo.id }) + } + return + } + } + + const cancelAllCountdowns = (): void => { + for (const sessionID of sessions.keys()) { + cancelCountdown(sessionID) + } + log(`[${HOOK_NAME}] All countdowns cancelled`) + } + + return { + handler, + markRecovering, + markRecoveryComplete, + cancelAllCountdowns, + } +} diff --git a/src/hooks/unstable-agent-babysitter/index.test.ts b/src/hooks/unstable-agent-babysitter/index.test.ts index 9fc309ec..355072b5 100644 --- a/src/hooks/unstable-agent-babysitter/index.test.ts +++ b/src/hooks/unstable-agent-babysitter/index.test.ts @@ -1,8 +1,9 @@ +import { afterEach, describe, expect, test } from "bun:test" import { _resetForTesting, setMainSession } from "../../features/claude-code-session-state" import type { BackgroundTask } from "../../features/background-agent" import { createUnstableAgentBabysitterHook } from "./index" -const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode" +const projectDir = process.cwd() type BabysitterContext = Parameters[0] diff --git a/src/mcp/websearch.test.ts b/src/mcp/websearch.test.ts index f29bf663..2b09e395 100644 --- a/src/mcp/websearch.test.ts +++ b/src/mcp/websearch.test.ts @@ -1,16 +1,30 @@ -import { describe, expect, test, beforeEach, afterEach } from "bun:test" +import { afterEach, beforeEach, describe, expect, test } from "bun:test" import { createWebsearchConfig } from "./websearch" describe("websearch MCP provider configuration", () => { - const originalEnv = { ...process.env } + let originalExaApiKey: string | undefined + let originalTavilyApiKey: string | undefined beforeEach(() => { + originalExaApiKey = process.env.EXA_API_KEY + originalTavilyApiKey = process.env.TAVILY_API_KEY + delete process.env.EXA_API_KEY delete process.env.TAVILY_API_KEY }) afterEach(() => { - process.env = { ...originalEnv } + if (originalExaApiKey === undefined) { + delete process.env.EXA_API_KEY + } else { + process.env.EXA_API_KEY = originalExaApiKey + } + + if (originalTavilyApiKey === undefined) { + delete process.env.TAVILY_API_KEY + } else { + process.env.TAVILY_API_KEY = originalTavilyApiKey + } }) test("returns Exa config when no config provided", () => { @@ -21,6 +35,7 @@ describe("websearch MCP provider configuration", () => { //#then expect(result.url).toContain("mcp.exa.ai") + expect(result.url).toContain("tools=web_search_exa") expect(result.type).toBe("remote") expect(result.enabled).toBe(true) }) @@ -34,10 +49,11 @@ describe("websearch MCP provider configuration", () => { //#then expect(result.url).toContain("mcp.exa.ai") + expect(result.url).toContain("tools=web_search_exa") expect(result.type).toBe("remote") }) - test("includes x-api-key header when EXA_API_KEY is set", () => { + test("appends exaApiKey query param when EXA_API_KEY is set", () => { //#given const apiKey = "test-exa-key-12345" process.env.EXA_API_KEY = apiKey @@ -46,7 +62,30 @@ describe("websearch MCP provider configuration", () => { const result = createWebsearchConfig() //#then - expect(result.headers).toEqual({ "x-api-key": apiKey }) + expect(result.url).toContain(`exaApiKey=${encodeURIComponent(apiKey)}`) + }) + + test("does not set x-api-key header when EXA_API_KEY is set", () => { + //#given + process.env.EXA_API_KEY = "test-exa-key-12345" + + //#when + const result = createWebsearchConfig() + + //#then + expect(result.headers).toBeUndefined() + }) + + test("URL-encodes EXA_API_KEY when it contains special characters", () => { + //#given an EXA_API_KEY with special characters (+ & =) + const apiKey = "a+b&c=d" + process.env.EXA_API_KEY = apiKey + + //#when createWebsearchConfig is called + const result = createWebsearchConfig() + + //#then the URL contains the properly encoded key via encodeURIComponent + expect(result.url).toContain(`exaApiKey=${encodeURIComponent(apiKey)}`) }) test("returns Tavily config when provider is 'tavily' and TAVILY_API_KEY set", () => { @@ -77,7 +116,8 @@ describe("websearch MCP provider configuration", () => { test("returns Exa when both keys present but no explicit provider", () => { //#given - process.env.EXA_API_KEY = "test-exa-key" + const exaKey = "test-exa-key" + process.env.EXA_API_KEY = exaKey process.env.TAVILY_API_KEY = "test-tavily-key" //#when @@ -85,7 +125,8 @@ describe("websearch MCP provider configuration", () => { //#then expect(result.url).toContain("mcp.exa.ai") - expect(result.headers).toEqual({ "x-api-key": "test-exa-key" }) + expect(result.url).toContain(`exaApiKey=${encodeURIComponent(exaKey)}`) + expect(result.headers).toBeUndefined() }) test("Tavily config uses Authorization Bearer header format", () => { @@ -111,6 +152,8 @@ describe("websearch MCP provider configuration", () => { //#then expect(result.url).toContain("mcp.exa.ai") + expect(result.url).toContain("tools=web_search_exa") + expect(result.url).not.toContain("exaApiKey=") expect(result.headers).toBeUndefined() }) }) diff --git a/src/mcp/websearch.ts b/src/mcp/websearch.ts index 91eddccc..a306ac49 100644 --- a/src/mcp/websearch.ts +++ b/src/mcp/websearch.ts @@ -31,11 +31,10 @@ export function createWebsearchConfig(config?: WebsearchConfig): RemoteMcpConfig // Default to Exa return { type: "remote" as const, - url: "https://mcp.exa.ai/mcp?tools=web_search_exa", + url: process.env.EXA_API_KEY + ? `https://mcp.exa.ai/mcp?tools=web_search_exa&exaApiKey=${encodeURIComponent(process.env.EXA_API_KEY)}` + : "https://mcp.exa.ai/mcp?tools=web_search_exa", enabled: true, - headers: process.env.EXA_API_KEY - ? { "x-api-key": process.env.EXA_API_KEY } - : undefined, oauth: false as const, } } diff --git a/src/plugin-handlers/agent-config-handler.ts b/src/plugin-handlers/agent-config-handler.ts index 249216a6..2c2beee4 100644 --- a/src/plugin-handlers/agent-config-handler.ts +++ b/src/plugin-handlers/agent-config-handler.ts @@ -13,6 +13,7 @@ import { loadProjectAgents, loadUserAgents } from "../features/claude-code-agent import type { PluginComponents } from "./plugin-components-loader"; import { reorderAgentsByPriority } from "./agent-priority-order"; import { buildPrometheusAgentConfig } from "./prometheus-agent-config-builder"; +import { buildPlanDemoteConfig } from "./plan-model-inheritance"; type AgentConfigRecord = Record | undefined> & { build?: Record; @@ -152,7 +153,12 @@ export async function applyAgentConfig(params: { ? migrateAgentConfig(configAgent.build as Record) : {}; - const planDemoteConfig = shouldDemotePlan ? { mode: "subagent" as const } : undefined; + const planDemoteConfig = shouldDemotePlan + ? buildPlanDemoteConfig( + agentConfig["prometheus"] as Record | undefined, + params.pluginConfig.agents?.plan as Record | undefined, + ) + : undefined; params.config.agent = { ...agentConfig, diff --git a/src/plugin-handlers/config-handler.test.ts b/src/plugin-handlers/config-handler.test.ts index d33f3718..08c58f6f 100644 --- a/src/plugin-handlers/config-handler.test.ts +++ b/src/plugin-handlers/config-handler.test.ts @@ -600,6 +600,187 @@ describe("Prometheus direct override priority over category", () => { }) }) +describe("Plan agent model inheritance from prometheus", () => { + test("plan agent inherits all model-related settings from resolved prometheus config", async () => { + //#given - prometheus resolves to claude-opus-4-6 with model settings + spyOn(shared, "resolveModelPipeline" as any).mockReturnValue({ + model: "anthropic/claude-opus-4-6", + provenance: "provider-fallback", + variant: "max", + }) + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + replace_plan: true, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: { + plan: { + name: "plan", + mode: "primary", + prompt: "original plan prompt", + }, + }, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then - plan inherits model and variant from prometheus, but NOT prompt + const agents = config.agent as Record + expect(agents.plan).toBeDefined() + expect(agents.plan.mode).toBe("subagent") + expect(agents.plan.model).toBe("anthropic/claude-opus-4-6") + expect(agents.plan.variant).toBe("max") + expect(agents.plan.prompt).toBeUndefined() + }) + + test("plan agent inherits temperature, reasoningEffort, and other model settings from prometheus", async () => { + //#given - prometheus configured with category that has temperature and reasoningEffort + spyOn(shared, "resolveModelPipeline" as any).mockReturnValue({ + model: "openai/gpt-5.2", + provenance: "override", + variant: "high", + }) + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + replace_plan: true, + }, + agents: { + prometheus: { + model: "openai/gpt-5.2", + variant: "high", + temperature: 0.3, + top_p: 0.9, + maxTokens: 16000, + reasoningEffort: "high", + textVerbosity: "medium", + thinking: { type: "enabled", budgetTokens: 8000 }, + }, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then - plan inherits ALL model-related settings from resolved prometheus + const agents = config.agent as Record> + expect(agents.plan).toBeDefined() + expect(agents.plan.mode).toBe("subagent") + expect(agents.plan.model).toBe("openai/gpt-5.2") + expect(agents.plan.variant).toBe("high") + expect(agents.plan.temperature).toBe(0.3) + expect(agents.plan.top_p).toBe(0.9) + expect(agents.plan.maxTokens).toBe(16000) + expect(agents.plan.reasoningEffort).toBe("high") + expect(agents.plan.textVerbosity).toBe("medium") + expect(agents.plan.thinking).toEqual({ type: "enabled", budgetTokens: 8000 }) + }) + + test("plan agent user override takes priority over prometheus inherited settings", async () => { + //#given - prometheus resolves to opus, but user has plan override for gpt-5.2 + spyOn(shared, "resolveModelPipeline" as any).mockReturnValue({ + model: "anthropic/claude-opus-4-6", + provenance: "provider-fallback", + variant: "max", + }) + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + replace_plan: true, + }, + agents: { + plan: { + model: "openai/gpt-5.2", + variant: "high", + temperature: 0.5, + }, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then - plan uses its own override, not prometheus settings + const agents = config.agent as Record> + expect(agents.plan.model).toBe("openai/gpt-5.2") + expect(agents.plan.variant).toBe("high") + expect(agents.plan.temperature).toBe(0.5) + }) + + test("plan agent does NOT inherit prompt, description, or color from prometheus", async () => { + //#given + spyOn(shared, "resolveModelPipeline" as any).mockReturnValue({ + model: "anthropic/claude-opus-4-6", + provenance: "provider-fallback", + variant: "max", + }) + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + replace_plan: true, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then - plan has model settings but NOT prompt/description/color + const agents = config.agent as Record> + expect(agents.plan.model).toBe("anthropic/claude-opus-4-6") + expect(agents.plan.prompt).toBeUndefined() + expect(agents.plan.description).toBeUndefined() + expect(agents.plan.color).toBeUndefined() + }) +}) + describe("Deadlock prevention - fetchAvailableModels must not receive client", () => { test("fetchAvailableModels should be called with undefined client to prevent deadlock during plugin init", async () => { // given - This test ensures we don't regress on issue #1301 @@ -762,3 +943,108 @@ describe("config-handler plugin loading error boundary (#1559)", () => { expect(commands["test-cmd"]).toBeDefined() }) }) + +describe("per-agent todowrite/todoread deny when task_system enabled", () => { + const PRIMARY_AGENTS = ["sisyphus", "hephaestus", "atlas", "prometheus", "sisyphus-junior"] + + test("denies todowrite and todoread for primary agents when task_system is enabled", async () => { + //#given + spyOn(agents, "createBuiltinAgents" as any).mockResolvedValue({ + sisyphus: { name: "sisyphus", prompt: "test", mode: "primary" }, + hephaestus: { name: "hephaestus", prompt: "test", mode: "primary" }, + atlas: { name: "atlas", prompt: "test", mode: "primary" }, + prometheus: { name: "prometheus", prompt: "test", mode: "primary" }, + "sisyphus-junior": { name: "sisyphus-junior", prompt: "test", mode: "subagent" }, + oracle: { name: "oracle", prompt: "test", mode: "subagent" }, + }) + + const pluginConfig: OhMyOpenCodeConfig = { + experimental: { task_system: true }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then + const agentResult = config.agent as Record }> + for (const agentName of PRIMARY_AGENTS) { + expect(agentResult[agentName]?.permission?.todowrite).toBe("deny") + expect(agentResult[agentName]?.permission?.todoread).toBe("deny") + } + }) + + test("does not deny todowrite/todoread when task_system is disabled", async () => { + //#given + spyOn(agents, "createBuiltinAgents" as any).mockResolvedValue({ + sisyphus: { name: "sisyphus", prompt: "test", mode: "primary" }, + hephaestus: { name: "hephaestus", prompt: "test", mode: "primary" }, + }) + + const pluginConfig: OhMyOpenCodeConfig = { + experimental: { task_system: false }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then + const agentResult = config.agent as Record }> + expect(agentResult.sisyphus?.permission?.todowrite).toBeUndefined() + expect(agentResult.sisyphus?.permission?.todoread).toBeUndefined() + expect(agentResult.hephaestus?.permission?.todowrite).toBeUndefined() + expect(agentResult.hephaestus?.permission?.todoread).toBeUndefined() + }) + + test("does not deny todowrite/todoread when task_system is undefined", async () => { + //#given + spyOn(agents, "createBuiltinAgents" as any).mockResolvedValue({ + sisyphus: { name: "sisyphus", prompt: "test", mode: "primary" }, + }) + + const pluginConfig: OhMyOpenCodeConfig = {} + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then + const agentResult = config.agent as Record }> + expect(agentResult.sisyphus?.permission?.todowrite).toBeUndefined() + expect(agentResult.sisyphus?.permission?.todoread).toBeUndefined() + }) +}) diff --git a/src/plugin-handlers/plan-model-inheritance.test.ts b/src/plugin-handlers/plan-model-inheritance.test.ts new file mode 100644 index 00000000..3b68f0a1 --- /dev/null +++ b/src/plugin-handlers/plan-model-inheritance.test.ts @@ -0,0 +1,118 @@ +import { describe, test, expect } from "bun:test" +import { buildPlanDemoteConfig } from "./plan-model-inheritance" + +describe("buildPlanDemoteConfig", () => { + test("returns only mode when prometheus and plan override are both undefined", () => { + //#given + const prometheusConfig = undefined + const planOverride = undefined + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, planOverride) + + //#then + expect(result).toEqual({ mode: "subagent" }) + }) + + test("extracts all model settings from prometheus config", () => { + //#given + const prometheusConfig = { + name: "prometheus", + model: "anthropic/claude-opus-4-6", + variant: "max", + mode: "all", + prompt: "You are Prometheus...", + permission: { edit: "allow" }, + description: "Plan agent (Prometheus)", + color: "#FF5722", + temperature: 0.1, + top_p: 0.95, + maxTokens: 32000, + thinking: { type: "enabled", budgetTokens: 10000 }, + reasoningEffort: "high", + textVerbosity: "medium", + providerOptions: { key: "value" }, + } + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, undefined) + + //#then - picks model settings, NOT prompt/permission/description/color/name/mode + expect(result.mode).toBe("subagent") + expect(result.model).toBe("anthropic/claude-opus-4-6") + expect(result.variant).toBe("max") + expect(result.temperature).toBe(0.1) + expect(result.top_p).toBe(0.95) + expect(result.maxTokens).toBe(32000) + expect(result.thinking).toEqual({ type: "enabled", budgetTokens: 10000 }) + expect(result.reasoningEffort).toBe("high") + expect(result.textVerbosity).toBe("medium") + expect(result.providerOptions).toEqual({ key: "value" }) + expect(result.prompt).toBeUndefined() + expect(result.permission).toBeUndefined() + expect(result.description).toBeUndefined() + expect(result.color).toBeUndefined() + expect(result.name).toBeUndefined() + }) + + test("plan override takes priority over prometheus for all model settings", () => { + //#given + const prometheusConfig = { + model: "anthropic/claude-opus-4-6", + variant: "max", + temperature: 0.1, + reasoningEffort: "high", + } + const planOverride = { + model: "openai/gpt-5.2", + variant: "high", + temperature: 0.5, + reasoningEffort: "low", + } + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, planOverride) + + //#then + expect(result.model).toBe("openai/gpt-5.2") + expect(result.variant).toBe("high") + expect(result.temperature).toBe(0.5) + expect(result.reasoningEffort).toBe("low") + }) + + test("falls back to prometheus when plan override has partial settings", () => { + //#given + const prometheusConfig = { + model: "anthropic/claude-opus-4-6", + variant: "max", + temperature: 0.1, + reasoningEffort: "high", + } + const planOverride = { + model: "openai/gpt-5.2", + } + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, planOverride) + + //#then - plan model wins, rest inherits from prometheus + expect(result.model).toBe("openai/gpt-5.2") + expect(result.variant).toBe("max") + expect(result.temperature).toBe(0.1) + expect(result.reasoningEffort).toBe("high") + }) + + test("skips undefined values from both sources", () => { + //#given + const prometheusConfig = { + model: "anthropic/claude-opus-4-6", + } + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, undefined) + + //#then + expect(result).toEqual({ mode: "subagent", model: "anthropic/claude-opus-4-6" }) + expect(Object.keys(result)).toEqual(["mode", "model"]) + }) +}) diff --git a/src/plugin-handlers/plan-model-inheritance.ts b/src/plugin-handlers/plan-model-inheritance.ts new file mode 100644 index 00000000..bb32483c --- /dev/null +++ b/src/plugin-handlers/plan-model-inheritance.ts @@ -0,0 +1,27 @@ +const MODEL_SETTINGS_KEYS = [ + "model", + "variant", + "temperature", + "top_p", + "maxTokens", + "thinking", + "reasoningEffort", + "textVerbosity", + "providerOptions", +] as const + +export function buildPlanDemoteConfig( + prometheusConfig: Record | undefined, + planOverride: Record | undefined, +): Record { + const modelSettings: Record = {} + + for (const key of MODEL_SETTINGS_KEYS) { + const value = planOverride?.[key] ?? prometheusConfig?.[key] + if (value !== undefined) { + modelSettings[key] = value + } + } + + return { mode: "subagent" as const, ...modelSettings } +} diff --git a/src/plugin-handlers/tool-config-handler.ts b/src/plugin-handlers/tool-config-handler.ts index f55044bd..d587bc97 100644 --- a/src/plugin-handlers/tool-config-handler.ts +++ b/src/plugin-handlers/tool-config-handler.ts @@ -7,6 +7,10 @@ export function applyToolConfig(params: { pluginConfig: OhMyOpenCodeConfig; agentResult: Record; }): void { + const denyTodoTools = params.pluginConfig.experimental?.task_system + ? { todowrite: "deny", todoread: "deny" } + : {} + params.config.tools = { ...(params.config.tools as Record), "grep_app_*": false, @@ -39,6 +43,7 @@ export function applyToolConfig(params: { call_omo_agent: "deny", "task_*": "allow", teammate: "allow", + ...denyTodoTools, }; } if (params.agentResult.sisyphus) { @@ -50,6 +55,7 @@ export function applyToolConfig(params: { question: questionPermission, "task_*": "allow", teammate: "allow", + ...denyTodoTools, }; } if (params.agentResult.hephaestus) { @@ -59,6 +65,7 @@ export function applyToolConfig(params: { call_omo_agent: "deny", task: "allow", question: questionPermission, + ...denyTodoTools, }; } if (params.agentResult["prometheus"]) { @@ -70,6 +77,7 @@ export function applyToolConfig(params: { question: questionPermission, "task_*": "allow", teammate: "allow", + ...denyTodoTools, }; } if (params.agentResult["sisyphus-junior"]) { @@ -79,6 +87,7 @@ export function applyToolConfig(params: { task: "allow", "task_*": "allow", teammate: "allow", + ...denyTodoTools, }; } diff --git a/src/plugin/event.ts b/src/plugin/event.ts index bd05bce0..21f458f1 100644 --- a/src/plugin/event.ts +++ b/src/plugin/event.ts @@ -28,25 +28,26 @@ export function createEventHandler(args: { const { ctx, firstMessageVariantGate, managers, hooks } = args return async (input): Promise => { - await hooks.autoUpdateChecker?.event?.(input) - await hooks.claudeCodeHooks?.event?.(input) - await hooks.backgroundNotificationHook?.event?.(input) - await hooks.sessionNotification?.(input) - await hooks.todoContinuationEnforcer?.handler?.(input) - await hooks.unstableAgentBabysitter?.event?.(input) - await hooks.contextWindowMonitor?.event?.(input) - await hooks.directoryAgentsInjector?.event?.(input) - await hooks.directoryReadmeInjector?.event?.(input) - await hooks.rulesInjector?.event?.(input) - await hooks.thinkMode?.event?.(input) - await hooks.anthropicContextWindowLimitRecovery?.event?.(input) - await hooks.agentUsageReminder?.event?.(input) - await hooks.categorySkillReminder?.event?.(input) - await hooks.interactiveBashSession?.event?.(input) - await hooks.ralphLoop?.event?.(input) - await hooks.stopContinuationGuard?.event?.(input) - await hooks.compactionTodoPreserver?.event?.(input) - await hooks.atlasHook?.handler?.(input) + await Promise.resolve(hooks.autoUpdateChecker?.event?.(input)) + await Promise.resolve(hooks.claudeCodeHooks?.event?.(input)) + await Promise.resolve(hooks.backgroundNotificationHook?.event?.(input)) + await Promise.resolve(hooks.sessionNotification?.(input)) + await Promise.resolve(hooks.todoContinuationEnforcer?.handler?.(input)) + await Promise.resolve(hooks.taskContinuationEnforcer?.handler?.(input)) + await Promise.resolve(hooks.unstableAgentBabysitter?.event?.(input)) + await Promise.resolve(hooks.contextWindowMonitor?.event?.(input)) + await Promise.resolve(hooks.directoryAgentsInjector?.event?.(input)) + await Promise.resolve(hooks.directoryReadmeInjector?.event?.(input)) + await Promise.resolve(hooks.rulesInjector?.event?.(input)) + await Promise.resolve(hooks.thinkMode?.event?.(input)) + await Promise.resolve(hooks.anthropicContextWindowLimitRecovery?.event?.(input)) + await Promise.resolve(hooks.agentUsageReminder?.event?.(input)) + await Promise.resolve(hooks.categorySkillReminder?.event?.(input)) + await Promise.resolve(hooks.interactiveBashSession?.event?.(input)) + await Promise.resolve(hooks.ralphLoop?.event?.(input)) + await Promise.resolve(hooks.stopContinuationGuard?.event?.(input)) + await Promise.resolve(hooks.compactionTodoPreserver?.event?.(input)) + await Promise.resolve(hooks.atlasHook?.handler?.(input)) const { event } = input const props = event.properties as Record | undefined diff --git a/src/plugin/hooks/create-continuation-hooks.ts b/src/plugin/hooks/create-continuation-hooks.ts index 90d17eeb..4db6d33b 100644 --- a/src/plugin/hooks/create-continuation-hooks.ts +++ b/src/plugin/hooks/create-continuation-hooks.ts @@ -4,6 +4,7 @@ import type { PluginContext } from "../types" import { createTodoContinuationEnforcer, + createTaskContinuationEnforcer, createBackgroundNotificationHook, createStopContinuationGuardHook, createCompactionContextInjector, @@ -18,6 +19,7 @@ export type ContinuationHooks = { compactionContextInjector: ReturnType | null compactionTodoPreserver: ReturnType | null todoContinuationEnforcer: ReturnType | null + taskContinuationEnforcer: ReturnType | null unstableAgentBabysitter: ReturnType | null backgroundNotificationHook: ReturnType | null atlasHook: ReturnType | null @@ -68,14 +70,43 @@ export function createContinuationHooks(args: { })) : null + const taskContinuationEnforcer = isHookEnabled("task-continuation-enforcer") + ? safeHook("task-continuation-enforcer", () => + createTaskContinuationEnforcer(ctx, pluginConfig, { + backgroundManager, + isContinuationStopped: stopContinuationGuard?.isStopped, + })) + : null + const unstableAgentBabysitter = isHookEnabled("unstable-agent-babysitter") ? safeHook("unstable-agent-babysitter", () => createUnstableAgentBabysitter({ ctx, backgroundManager, pluginConfig })) : null - if (sessionRecovery && todoContinuationEnforcer) { - sessionRecovery.setOnAbortCallback(todoContinuationEnforcer.markRecovering) - sessionRecovery.setOnRecoveryCompleteCallback(todoContinuationEnforcer.markRecoveryComplete) + if (sessionRecovery) { + const onAbortCallbacks: Array<(sessionID: string) => void> = [] + const onRecoveryCompleteCallbacks: Array<(sessionID: string) => void> = [] + + if (todoContinuationEnforcer) { + onAbortCallbacks.push(todoContinuationEnforcer.markRecovering) + onRecoveryCompleteCallbacks.push(todoContinuationEnforcer.markRecoveryComplete) + } + if (taskContinuationEnforcer) { + onAbortCallbacks.push(taskContinuationEnforcer.markRecovering) + onRecoveryCompleteCallbacks.push(taskContinuationEnforcer.markRecoveryComplete) + } + + if (onAbortCallbacks.length > 0) { + sessionRecovery.setOnAbortCallback((sessionID: string) => { + for (const callback of onAbortCallbacks) callback(sessionID) + }) + } + + if (onRecoveryCompleteCallbacks.length > 0) { + sessionRecovery.setOnRecoveryCompleteCallback((sessionID: string) => { + for (const callback of onRecoveryCompleteCallbacks) callback(sessionID) + }) + } } const backgroundNotificationHook = isHookEnabled("background-notification") @@ -89,6 +120,7 @@ export function createContinuationHooks(args: { backgroundManager, isContinuationStopped: (sessionID: string) => stopContinuationGuard?.isStopped(sessionID) ?? false, + agentOverrides: pluginConfig.agents, })) : null @@ -97,6 +129,7 @@ export function createContinuationHooks(args: { compactionContextInjector, compactionTodoPreserver, todoContinuationEnforcer, + taskContinuationEnforcer, unstableAgentBabysitter, backgroundNotificationHook, atlasHook, diff --git a/src/plugin/tool-execute-before.ts b/src/plugin/tool-execute-before.ts index c7fefb0a..0af9605a 100644 --- a/src/plugin/tool-execute-before.ts +++ b/src/plugin/tool-execute-before.ts @@ -88,6 +88,7 @@ export function createToolExecuteBeforeHandler(args: { if (command === "stop-continuation" && sessionID) { hooks.stopContinuationGuard?.stop(sessionID) hooks.todoContinuationEnforcer?.cancelAllCountdowns() + hooks.taskContinuationEnforcer?.cancelAllCountdowns() hooks.ralphLoop?.cancelLoop(sessionID) clearBoulderState(ctx.directory) log("[stop-continuation] All continuation mechanisms stopped", { diff --git a/src/shared/git-worktree/collect-git-diff-stats.ts b/src/shared/git-worktree/collect-git-diff-stats.ts new file mode 100644 index 00000000..158a09d8 --- /dev/null +++ b/src/shared/git-worktree/collect-git-diff-stats.ts @@ -0,0 +1,29 @@ +import { execSync } from "node:child_process" +import { parseGitStatusPorcelain } from "./parse-status-porcelain" +import { parseGitDiffNumstat } from "./parse-diff-numstat" +import type { GitFileStat } from "./types" + +export function collectGitDiffStats(directory: string): GitFileStat[] { + try { + const diffOutput = execSync("git diff --numstat HEAD", { + cwd: directory, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() + + if (!diffOutput) return [] + + const statusOutput = execSync("git status --porcelain", { + cwd: directory, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() + + const statusMap = parseGitStatusPorcelain(statusOutput) + return parseGitDiffNumstat(diffOutput, statusMap) + } catch { + return [] + } +} diff --git a/src/shared/git-worktree/format-file-changes.ts b/src/shared/git-worktree/format-file-changes.ts new file mode 100644 index 00000000..5afb58b8 --- /dev/null +++ b/src/shared/git-worktree/format-file-changes.ts @@ -0,0 +1,46 @@ +import type { GitFileStat } from "./types" + +export function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string { + if (stats.length === 0) return "[FILE CHANGES SUMMARY]\nNo file changes detected.\n" + + const modified = stats.filter((s) => s.status === "modified") + const added = stats.filter((s) => s.status === "added") + const deleted = stats.filter((s) => s.status === "deleted") + + const lines: string[] = ["[FILE CHANGES SUMMARY]"] + + if (modified.length > 0) { + lines.push("Modified files:") + for (const f of modified) { + lines.push(` ${f.path} (+${f.added}, -${f.removed})`) + } + lines.push("") + } + + if (added.length > 0) { + lines.push("Created files:") + for (const f of added) { + lines.push(` ${f.path} (+${f.added})`) + } + lines.push("") + } + + if (deleted.length > 0) { + lines.push("Deleted files:") + for (const f of deleted) { + lines.push(` ${f.path} (-${f.removed})`) + } + lines.push("") + } + + if (notepadPath) { + const notepadStat = stats.find((s) => s.path.includes("notepad") || s.path.includes(".sisyphus")) + if (notepadStat) { + lines.push("[NOTEPAD UPDATED]") + lines.push(` ${notepadStat.path} (+${notepadStat.added})`) + lines.push("") + } + } + + return lines.join("\n") +} diff --git a/src/shared/git-worktree/git-worktree.test.ts b/src/shared/git-worktree/git-worktree.test.ts new file mode 100644 index 00000000..27183018 --- /dev/null +++ b/src/shared/git-worktree/git-worktree.test.ts @@ -0,0 +1,51 @@ +/// + +import { describe, expect, test } from "bun:test" +import { formatFileChanges, parseGitDiffNumstat, parseGitStatusPorcelain } from "./index" + +describe("git-worktree", () => { + test("#given status porcelain output #when parsing #then maps paths to statuses", () => { + const porcelain = [ + " M src/a.ts", + "A src/b.ts", + "?? src/c.ts", + "D src/d.ts", + ].join("\n") + + const map = parseGitStatusPorcelain(porcelain) + expect(map.get("src/a.ts")).toBe("modified") + expect(map.get("src/b.ts")).toBe("added") + expect(map.get("src/c.ts")).toBe("added") + expect(map.get("src/d.ts")).toBe("deleted") + }) + + test("#given diff numstat and status map #when parsing #then returns typed stats", () => { + const porcelain = [" M src/a.ts", "A src/b.ts"].join("\n") + const statusMap = parseGitStatusPorcelain(porcelain) + + const numstat = ["1\t2\tsrc/a.ts", "3\t0\tsrc/b.ts", "-\t-\tbin.dat"].join("\n") + const stats = parseGitDiffNumstat(numstat, statusMap) + + expect(stats).toEqual([ + { path: "src/a.ts", added: 1, removed: 2, status: "modified" }, + { path: "src/b.ts", added: 3, removed: 0, status: "added" }, + { path: "bin.dat", added: 0, removed: 0, status: "modified" }, + ]) + }) + + test("#given git file stats #when formatting #then produces grouped summary", () => { + const summary = formatFileChanges([ + { path: "src/a.ts", added: 1, removed: 2, status: "modified" }, + { path: "src/b.ts", added: 3, removed: 0, status: "added" }, + { path: "src/c.ts", added: 0, removed: 4, status: "deleted" }, + ]) + + expect(summary).toContain("[FILE CHANGES SUMMARY]") + expect(summary).toContain("Modified files:") + expect(summary).toContain("Created files:") + expect(summary).toContain("Deleted files:") + expect(summary).toContain("src/a.ts") + expect(summary).toContain("src/b.ts") + expect(summary).toContain("src/c.ts") + }) +}) diff --git a/src/shared/git-worktree/index.ts b/src/shared/git-worktree/index.ts new file mode 100644 index 00000000..0bc363d0 --- /dev/null +++ b/src/shared/git-worktree/index.ts @@ -0,0 +1,5 @@ +export type { GitFileStatus, GitFileStat } from "./types" +export { parseGitStatusPorcelain } from "./parse-status-porcelain" +export { parseGitDiffNumstat } from "./parse-diff-numstat" +export { collectGitDiffStats } from "./collect-git-diff-stats" +export { formatFileChanges } from "./format-file-changes" diff --git a/src/shared/git-worktree/parse-diff-numstat.ts b/src/shared/git-worktree/parse-diff-numstat.ts new file mode 100644 index 00000000..3ea2b0f6 --- /dev/null +++ b/src/shared/git-worktree/parse-diff-numstat.ts @@ -0,0 +1,27 @@ +import type { GitFileStat, GitFileStatus } from "./types" + +export function parseGitDiffNumstat( + output: string, + statusMap: Map +): GitFileStat[] { + if (!output) return [] + + const stats: GitFileStat[] = [] + for (const line of output.split("\n")) { + const parts = line.split("\t") + if (parts.length < 3) continue + + const [addedStr, removedStr, path] = parts + const added = addedStr === "-" ? 0 : parseInt(addedStr, 10) + const removed = removedStr === "-" ? 0 : parseInt(removedStr, 10) + + stats.push({ + path, + added, + removed, + status: statusMap.get(path) ?? "modified", + }) + } + + return stats +} diff --git a/src/shared/git-worktree/parse-status-porcelain.ts b/src/shared/git-worktree/parse-status-porcelain.ts new file mode 100644 index 00000000..0623de5d --- /dev/null +++ b/src/shared/git-worktree/parse-status-porcelain.ts @@ -0,0 +1,25 @@ +import type { GitFileStatus } from "./types" + +export function parseGitStatusPorcelain(output: string): Map { + const map = new Map() + if (!output) return map + + for (const line of output.split("\n")) { + if (!line) continue + + const status = line.substring(0, 2).trim() + const filePath = line.substring(3) + + if (!filePath) continue + + if (status === "A" || status === "??") { + map.set(filePath, "added") + } else if (status === "D") { + map.set(filePath, "deleted") + } else { + map.set(filePath, "modified") + } + } + + return map +} diff --git a/src/shared/git-worktree/types.ts b/src/shared/git-worktree/types.ts new file mode 100644 index 00000000..eb423699 --- /dev/null +++ b/src/shared/git-worktree/types.ts @@ -0,0 +1,8 @@ +export type GitFileStatus = "modified" | "added" | "deleted" + +export interface GitFileStat { + path: string + added: number + removed: number + status: GitFileStatus +} diff --git a/src/shared/index.ts b/src/shared/index.ts index e3262161..07d8bd86 100644 --- a/src/shared/index.ts +++ b/src/shared/index.ts @@ -46,5 +46,6 @@ export * from "./tmux" export * from "./model-suggestion-retry" export * from "./opencode-server-auth" export * from "./port-utils" +export * from "./git-worktree" export * from "./safe-create-hook" export * from "./truncate-description" diff --git a/src/shared/system-directive.ts b/src/shared/system-directive.ts index f2ae8c60..0b8ba4f9 100644 --- a/src/shared/system-directive.ts +++ b/src/shared/system-directive.ts @@ -48,6 +48,7 @@ export function removeSystemReminders(text: string): string { export const SystemDirectiveTypes = { TODO_CONTINUATION: "TODO CONTINUATION", + TASK_CONTINUATION: "TASK CONTINUATION", RALPH_LOOP: "RALPH LOOP", BOULDER_CONTINUATION: "BOULDER CONTINUATION", DELEGATION_REQUIRED: "DELEGATION REQUIRED", diff --git a/src/tools/delegate-task/constants.ts b/src/tools/delegate-task/constants.ts index 2bc8f95f..9aa9552a 100644 --- a/src/tools/delegate-task/constants.ts +++ b/src/tools/delegate-task/constants.ts @@ -535,18 +535,35 @@ export function buildPlanAgentSystemPrepend( } /** - * List of agent names that should be treated as plan agents. + * List of agent names that should be treated as plan agents (receive plan system prompt). * Case-insensitive matching is used. */ -export const PLAN_AGENT_NAMES = ["plan", "prometheus", "planner"] +export const PLAN_AGENT_NAMES = ["plan"] /** - * Check if the given agent name is a plan agent. - * @param agentName - The agent name to check - * @returns true if the agent is a plan agent + * Check if the given agent name is a plan agent (receives plan system prompt). */ export function isPlanAgent(agentName: string | undefined): boolean { if (!agentName) return false const lowerName = agentName.toLowerCase().trim() return PLAN_AGENT_NAMES.some(name => lowerName === name || lowerName.includes(name)) } + +/** + * Plan family: plan + prometheus. Shares mutual delegation blocking and task tool permission. + * Does NOT share system prompt (only isPlanAgent controls that). + */ +export const PLAN_FAMILY_NAMES = ["plan", "prometheus"] + +/** + * Check if the given agent belongs to the plan family (blocking + task permission). + */ +export function isPlanFamily(category: string): boolean +export function isPlanFamily(category: string | undefined): boolean +export function isPlanFamily(category: string | undefined): boolean { + if (!category) return false + const lowerCategory = category.toLowerCase().trim() + return PLAN_FAMILY_NAMES.some( + (name) => lowerCategory === name || lowerCategory.includes(name) + ) +} diff --git a/src/tools/delegate-task/executor-types.ts b/src/tools/delegate-task/executor-types.ts index 4a671613..534ebb68 100644 --- a/src/tools/delegate-task/executor-types.ts +++ b/src/tools/delegate-task/executor-types.ts @@ -1,5 +1,5 @@ import type { BackgroundManager } from "../../features/background-agent" -import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider } from "../../config/schema" +import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider, AgentOverrides } from "../../config/schema" import type { OpencodeClient } from "./types" export interface ExecutorContext { @@ -10,6 +10,7 @@ export interface ExecutorContext { gitMasterConfig?: GitMasterConfig sisyphusJuniorModel?: string browserProvider?: BrowserAutomationProvider + agentOverrides?: AgentOverrides onSyncSessionCreated?: (event: { sessionID: string; parentID: string; title: string }) => Promise } @@ -25,9 +26,10 @@ export interface SessionMessage { role?: string time?: { created?: number } agent?: string - model?: { providerID: string; modelID: string } + model?: { providerID: string; modelID: string; variant?: string } modelID?: string providerID?: string + variant?: string } parts?: Array<{ type?: string; text?: string }> } diff --git a/src/tools/delegate-task/subagent-resolver.ts b/src/tools/delegate-task/subagent-resolver.ts index ebfaa448..2ee6af35 100644 --- a/src/tools/delegate-task/subagent-resolver.ts +++ b/src/tools/delegate-task/subagent-resolver.ts @@ -1,15 +1,20 @@ import type { DelegateTaskArgs } from "./types" import type { ExecutorContext } from "./executor-types" -import { isPlanAgent } from "./constants" +import { isPlanFamily } from "./constants" import { SISYPHUS_JUNIOR_AGENT } from "./sisyphus-junior-agent" +import { parseModelString } from "./model-string-parser" +import { resolveModelPipeline } from "../../shared" +import { fetchAvailableModels } from "../../shared/model-availability" +import { readConnectedProvidersCache } from "../../shared/connected-providers-cache" +import { AGENT_MODEL_REQUIREMENTS } from "../../shared/model-requirements" export async function resolveSubagentExecution( args: DelegateTaskArgs, executorCtx: ExecutorContext, parentAgent: string | undefined, categoryExamples: string -): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string } | undefined; error?: string }> { - const { client } = executorCtx +): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; error?: string }> { + const { client, agentOverrides } = executorCtx if (!args.subagent_type?.trim()) { return { agentToUse: "", categoryModel: undefined, error: `Agent name cannot be empty.` } @@ -27,18 +32,18 @@ Sisyphus-Junior is spawned automatically when you specify a category. Pick the a } } - if (isPlanAgent(agentName) && isPlanAgent(parentAgent)) { + if (isPlanFamily(agentName) && isPlanFamily(parentAgent)) { return { agentToUse: "", categoryModel: undefined, - error: `You are prometheus. You cannot delegate to prometheus via task. + error: `You are a plan-family agent (plan/prometheus). You cannot delegate to other plan-family agents via task. Create the work plan directly - that's your job as the planning agent.`, } } let agentToUse = agentName - let categoryModel: { providerID: string; modelID: string } | undefined + let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined try { const agentsResult = await client.app.agents() @@ -76,7 +81,41 @@ Create the work plan directly - that's your job as the planning agent.`, agentToUse = matchedAgent.name - if (matchedAgent.model) { + const agentNameLower = agentToUse.toLowerCase() + const agentOverride = agentOverrides?.[agentNameLower as keyof typeof agentOverrides] + ?? (agentOverrides ? Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentNameLower)?.[1] : undefined) + const agentRequirement = AGENT_MODEL_REQUIREMENTS[agentNameLower] + + if (agentOverride?.model || agentRequirement) { + const connectedProviders = readConnectedProvidersCache() + const availableModels = await fetchAvailableModels(client, { + connectedProviders: connectedProviders ?? undefined, + }) + + const matchedAgentModelStr = matchedAgent.model + ? `${matchedAgent.model.providerID}/${matchedAgent.model.modelID}` + : undefined + + const resolution = resolveModelPipeline({ + intent: { + userModel: agentOverride?.model, + categoryDefaultModel: matchedAgentModelStr, + }, + constraints: { availableModels }, + policy: { + fallbackChain: agentRequirement?.fallbackChain, + systemDefaultModel: undefined, + }, + }) + + if (resolution) { + const parsed = parseModelString(resolution.model) + if (parsed) { + const variantToUse = agentOverride?.variant ?? resolution.variant + categoryModel = variantToUse ? { ...parsed, variant: variantToUse } : parsed + } + } + } else if (matchedAgent.model) { categoryModel = matchedAgent.model } } catch { diff --git a/src/tools/delegate-task/sync-continuation.ts b/src/tools/delegate-task/sync-continuation.ts index 8b772bc7..653df39e 100644 --- a/src/tools/delegate-task/sync-continuation.ts +++ b/src/tools/delegate-task/sync-continuation.ts @@ -1,9 +1,9 @@ import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types" import type { ExecutorContext, SessionMessage } from "./executor-types" -import { getTimingConfig } from "./timing" +import { isPlanFamily } from "./constants" import { storeToolMetadata } from "../../features/tool-metadata-store" import { getTaskToastManager } from "../../features/task-toast-manager" -import { getAgentToolRestrictions, getMessageDir } from "../../shared" +import { getAgentToolRestrictions, getMessageDir, promptSyncWithModelSuggestionRetry } from "../../shared" import { findNearestMessageWithFields } from "../../features/hook-message-injector" import { formatDuration } from "./time-formatter" @@ -46,6 +46,7 @@ export async function executeSyncContinuation( try { let resumeAgent: string | undefined let resumeModel: { providerID: string; modelID: string } | undefined + let resumeVariant: string | undefined try { const messagesResp = await client.session.messages({ path: { id: args.session_id! } }) @@ -55,6 +56,7 @@ export async function executeSyncContinuation( if (info?.agent || info?.model || (info?.modelID && info?.providerID)) { resumeAgent = info.agent resumeModel = info.model ?? (info.providerID && info.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined) + resumeVariant = info.variant break } } @@ -65,22 +67,26 @@ export async function executeSyncContinuation( resumeModel = resumeMessage?.model?.providerID && resumeMessage?.model?.modelID ? { providerID: resumeMessage.model.providerID, modelID: resumeMessage.model.modelID } : undefined + resumeVariant = resumeMessage?.model?.variant } - await (client.session as any).promptAsync({ - path: { id: args.session_id! }, - body: { - ...(resumeAgent !== undefined ? { agent: resumeAgent } : {}), - ...(resumeModel !== undefined ? { model: resumeModel } : {}), - tools: { - ...(resumeAgent ? getAgentToolRestrictions(resumeAgent) : {}), - task: false, - call_omo_agent: true, - question: false, - }, - parts: [{ type: "text", text: args.prompt }], - }, - }) + const allowTask = isPlanFamily(resumeAgent) + + await promptSyncWithModelSuggestionRetry(client, { + path: { id: args.session_id! }, + body: { + ...(resumeAgent !== undefined ? { agent: resumeAgent } : {}), + ...(resumeModel !== undefined ? { model: resumeModel } : {}), + ...(resumeVariant !== undefined ? { variant: resumeVariant } : {}), + tools: { + ...(resumeAgent ? getAgentToolRestrictions(resumeAgent) : {}), + task: allowTask, + call_omo_agent: true, + question: false, + }, + parts: [{ type: "text", text: args.prompt }], + }, + }) } catch (promptError) { if (toastManager) { toastManager.removeTask(taskId) @@ -89,30 +95,6 @@ export async function executeSyncContinuation( return `Failed to send continuation prompt: ${errorMessage}\n\nSession ID: ${args.session_id}` } - const timing = getTimingConfig() - const pollStart = Date.now() - let lastMsgCount = 0 - let stablePolls = 0 - - while (Date.now() - pollStart < 60000) { - await new Promise(resolve => setTimeout(resolve, timing.POLL_INTERVAL_MS)) - - const elapsed = Date.now() - pollStart - if (elapsed < timing.SESSION_CONTINUATION_STABILITY_MS) continue - - const messagesCheck = await client.session.messages({ path: { id: args.session_id! } }) - const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array - const currentMsgCount = msgs.length - - if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) { - stablePolls++ - if (stablePolls >= timing.STABILITY_POLLS_REQUIRED) break - } else { - stablePolls = 0 - lastMsgCount = currentMsgCount - } - } - const messagesResult = await client.session.messages({ path: { id: args.session_id! }, }) diff --git a/src/tools/delegate-task/sync-prompt-sender.ts b/src/tools/delegate-task/sync-prompt-sender.ts index d3a9f8e6..d0829878 100644 --- a/src/tools/delegate-task/sync-prompt-sender.ts +++ b/src/tools/delegate-task/sync-prompt-sender.ts @@ -1,6 +1,6 @@ import type { DelegateTaskArgs, OpencodeClient } from "./types" -import { isPlanAgent } from "./constants" -import { promptWithModelSuggestionRetry } from "../../shared" +import { isPlanFamily } from "./constants" +import { promptSyncWithModelSuggestionRetry } from "../../shared" import { formatDetailedError } from "./error-formatting" export async function sendSyncPrompt( @@ -16,8 +16,8 @@ export async function sendSyncPrompt( } ): Promise { try { - const allowTask = isPlanAgent(input.agentToUse) - await promptWithModelSuggestionRetry(client, { + const allowTask = isPlanFamily(input.agentToUse) + await promptSyncWithModelSuggestionRetry(client, { path: { id: input.sessionID }, body: { agent: input.agentToUse, diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 63a42297..4f32addf 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -1,6 +1,6 @@ declare const require: (name: string) => any -const { describe, test, expect, beforeEach, afterEach, spyOn } = require("bun:test") -import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES } from "./constants" +const { describe, test, expect, beforeEach, afterEach, spyOn, mock } = require("bun:test") +import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES, isPlanFamily, PLAN_FAMILY_NAMES } from "./constants" import { resolveCategoryConfig } from "./tools" import type { CategoryConfig } from "../../config/schema" import { __resetModelCache } from "../../shared/model-availability" @@ -135,19 +135,19 @@ describe("sisyphus-task", () => { expect(result).toBe(true) }) - test("returns true for 'prometheus'", () => { - // given / #when + test("returns false for 'prometheus' (decoupled from plan)", () => { + //#given / #when const result = isPlanAgent("prometheus") - // then - expect(result).toBe(true) + //#then - prometheus is NOT a plan agent + expect(result).toBe(false) }) - test("returns true for 'planner'", () => { - // given / #when + test("returns true for 'planner' (matches via includes('plan'))", () => { + //#given / #when const result = isPlanAgent("planner") - // then + //#then - "planner" contains "plan" so it matches via includes expect(result).toBe(true) }) @@ -159,12 +159,12 @@ describe("sisyphus-task", () => { expect(result).toBe(true) }) - test("returns true for case-insensitive match 'Prometheus'", () => { - // given / #when + test("returns false for case-insensitive match 'Prometheus' (decoupled from plan)", () => { + //#given / #when const result = isPlanAgent("Prometheus") - // then - expect(result).toBe(true) + //#then - Prometheus is NOT a plan agent + expect(result).toBe(false) }) test("returns false for 'oracle'", () => { @@ -199,11 +199,44 @@ describe("sisyphus-task", () => { expect(result).toBe(false) }) - test("PLAN_AGENT_NAMES contains expected values", () => { - // given / #when / #then - expect(PLAN_AGENT_NAMES).toContain("plan") - expect(PLAN_AGENT_NAMES).toContain("prometheus") - expect(PLAN_AGENT_NAMES).toContain("planner") + test("PLAN_AGENT_NAMES contains only plan", () => { + //#given / #when / #then + expect(PLAN_AGENT_NAMES).toEqual(["plan"]) + }) + }) + + describe("isPlanFamily", () => { + test("returns true for 'plan'", () => { + //#given / #when + const result = isPlanFamily("plan") + //#then + expect(result).toBe(true) + }) + + test("returns true for 'prometheus'", () => { + //#given / #when + const result = isPlanFamily("prometheus") + //#then + expect(result).toBe(true) + }) + + test("returns false for 'oracle'", () => { + //#given / #when + const result = isPlanFamily("oracle") + //#then + expect(result).toBe(false) + }) + + test("returns false for undefined", () => { + //#given / #when + const result = isPlanFamily(undefined) + //#then + expect(result).toBe(false) + }) + + test("PLAN_FAMILY_NAMES contains plan and prometheus", () => { + //#given / #when / #then + expect(PLAN_FAMILY_NAMES).toEqual(["plan", "prometheus"]) }) }) @@ -852,42 +885,42 @@ describe("sisyphus-task", () => { test("skills parameter is required - throws error when not provided", async () => { // given const { createDelegateTask } = require("./tools") - - const mockManager = { launch: async () => ({}) } - const mockClient = { - app: { agents: async () => ({ data: [] }) }, - config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - session: { - create: async () => ({ data: { id: "test-session" } }), - prompt: async () => ({ data: {} }), - promptAsync: async () => ({ data: {} }), - messages: async () => ({ data: [] }), - }, - } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) - - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "sisyphus", - abort: new AbortController().signal, - } - - // when - skills not provided (undefined) - // then - should throw error about missing skills - await expect(tool.execute( - { - description: "Test task", - prompt: "Do something", - category: "ultrabrain", - run_in_background: false, - }, - toolContext - )).rejects.toThrow("IT IS HIGHLY RECOMMENDED") + + const mockManager = { launch: async () => ({}) } + const mockClient = { + app: { agents: async () => ({ data: [] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + create: async () => ({ data: { id: "test-session" } }), + prompt: async () => ({ data: {} }), + promptAsync: async () => ({ data: {} }), + messages: async () => ({ data: [] }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - skills not provided (undefined) + // then - should throw error about missing skills + await expect(tool.execute( + { + description: "Test task", + prompt: "Do something", + category: "ultrabrain", + run_in_background: false, + }, + toolContext + )).rejects.toThrow("IT IS HIGHLY RECOMMENDED") }) test("null skills throws error", async () => { @@ -1055,6 +1088,75 @@ describe("sisyphus-task", () => { expect(result).not.toContain("Background task continued") }, { timeout: 10000 }) + test("sync continuation preserves variant from previous session message", async () => { + //#given a session with a previous message that has variant "max" + const { createDelegateTask } = require("./tools") + + const promptMock = mock(async (input: any) => { + return { data: {} } + }) + + const mockClient = { + session: { + prompt: promptMock, + promptAsync: async () => ({ data: {} }), + messages: async () => ({ + data: [ + { + info: { + role: "user", + agent: "sisyphus-junior", + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + variant: "max", + time: { created: Date.now() }, + }, + parts: [{ type: "text", text: "previous message" }], + }, + { + info: { role: "assistant", time: { created: Date.now() + 1 } }, + parts: [{ type: "text", text: "Completed." }], + }, + ], + }), + }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + app: { + agents: async () => ({ data: [] }), + }, + } + + const tool = createDelegateTask({ + manager: { resume: async () => ({ id: "task-var", sessionID: "ses_var_test", description: "Variant test", agent: "sisyphus-junior", status: "running" }) }, + client: mockClient, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + //#when continuing the session + await tool.execute( + { + description: "Continue with variant", + prompt: "Continue the task", + session_id: "ses_var_test", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + //#then prompt should include variant from previous message + expect(promptMock).toHaveBeenCalled() + const callArgs = promptMock.mock.calls[0][0] + expect(callArgs.body.variant).toBe("max") + expect(callArgs.body.agent).toBe("sisyphus-junior") + expect(callArgs.body.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-6" }) + }, { timeout: 10000 }) + test("session_id with background=true should return immediately without waiting", async () => { // given const { createDelegateTask } = require("./tools") @@ -2058,6 +2160,127 @@ describe("sisyphus-task", () => { expect(launchInput.model.providerID).toBe("openai") expect(launchInput.model.modelID).toBe("gpt-5.3-codex") }) + + test("sisyphus-junior model override works with quick category (#1295)", async () => { + // given - user configures agents.sisyphus-junior.model but uses quick category + const { createDelegateTask } = require("./tools") + let launchInput: any + + const mockManager = { + launch: async (input: any) => { + launchInput = input + return { + id: "task-1295-quick", + sessionID: "ses_1295_quick", + description: "Issue 1295 regression", + agent: "sisyphus-junior", + status: "running", + } + }, + } + + const mockClient = { + app: { agents: async () => ({ data: [] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + model: { list: async () => [] }, + session: { + create: async () => ({ data: { id: "test-session" } }), + prompt: async () => ({ data: {} }), + messages: async () => ({ data: [] }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + sisyphusJuniorModel: "anthropic/claude-sonnet-4-5", + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - using quick category (default: anthropic/claude-haiku-4-5) + await tool.execute( + { + description: "Issue 1295 quick category test", + prompt: "Quick task", + category: "quick", + run_in_background: true, + load_skills: [], + }, + toolContext + ) + + // then - sisyphus-junior override model should be used, not category default + expect(launchInput.model.providerID).toBe("anthropic") + expect(launchInput.model.modelID).toBe("claude-sonnet-4-5") + }) + + test("sisyphus-junior model override works with user-defined category (#1295)", async () => { + // given - user has a custom category with no model requirement + const { createDelegateTask } = require("./tools") + let launchInput: any + + const mockManager = { + launch: async (input: any) => { + launchInput = input + return { + id: "task-1295-custom", + sessionID: "ses_1295_custom", + description: "Issue 1295 custom category", + agent: "sisyphus-junior", + status: "running", + } + }, + } + + const mockClient = { + app: { agents: async () => ({ data: [] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + model: { list: async () => [] }, + session: { + create: async () => ({ data: { id: "test-session" } }), + prompt: async () => ({ data: {} }), + messages: async () => ({ data: [] }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + sisyphusJuniorModel: "openai/gpt-5.2", + userCategories: { + "my-custom": { temperature: 0.5 }, + }, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - using custom category with no explicit model + await tool.execute( + { + description: "Custom category with agent model", + prompt: "Do something custom", + category: "my-custom", + run_in_background: true, + load_skills: [], + }, + toolContext + ) + + // then - sisyphus-junior override model should be used as fallback + expect(launchInput.model.providerID).toBe("openai") + expect(launchInput.model.modelID).toBe("gpt-5.2") + }) }) describe("browserProvider propagation", () => { @@ -2258,68 +2481,36 @@ describe("sisyphus-task", () => { expect(result).toBe(buildPlanAgentSystemPrepend(availableCategories, availableSkills)) }) - test("prepends plan agent system prompt when agentName is 'prometheus'", () => { - // given + test("does not prepend plan agent prompt for prometheus agent", () => { + //#given - prometheus is NOT a plan agent (decoupled) const { buildSystemContent } = require("./tools") - const { buildPlanAgentSystemPrepend } = require("./constants") + const skillContent = "You are a strategic planner" - const availableCategories = [ - { - name: "ultrabrain", - description: "Complex architecture, deep logical reasoning", - model: "openai/gpt-5.3-codex", - }, - ] - const availableSkills = [ - { - name: "git-master", - description: "Atomic commits, git operations.", - location: "plugin", - }, - ] - - // when + //#when const result = buildSystemContent({ + skillContent, agentName: "prometheus", - availableCategories, - availableSkills, }) - // then - expect(result).toContain("") - expect(result).toBe(buildPlanAgentSystemPrepend(availableCategories, availableSkills)) + //#then - prometheus should NOT get plan agent system prepend + expect(result).toBe(skillContent) + expect(result).not.toContain("MANDATORY CONTEXT GATHERING PROTOCOL") }) - test("prepends plan agent system prompt when agentName is 'Prometheus' (case insensitive)", () => { - // given + test("does not prepend plan agent prompt for Prometheus (case insensitive)", () => { + //#given - Prometheus (capitalized) is NOT a plan agent const { buildSystemContent } = require("./tools") - const { buildPlanAgentSystemPrepend } = require("./constants") + const skillContent = "You are a strategic planner" - const availableCategories = [ - { - name: "quick", - description: "Trivial tasks", - model: "anthropic/claude-haiku-4-5", - }, - ] - const availableSkills = [ - { - name: "dev-browser", - description: "Persistent browser state automation.", - location: "plugin", - }, - ] - - // when + //#when const result = buildSystemContent({ + skillContent, agentName: "Prometheus", - availableCategories, - availableSkills, }) - // then - expect(result).toContain("") - expect(result).toBe(buildPlanAgentSystemPrepend(availableCategories, availableSkills)) + //#then + expect(result).toBe(skillContent) + expect(result).not.toContain("MANDATORY CONTEXT GATHERING PROTOCOL") }) test("combines plan agent prepend with skill content", () => { @@ -2565,149 +2756,95 @@ describe("sisyphus-task", () => { }) }) - describe("prometheus self-delegation block", () => { - test("prometheus cannot delegate to prometheus - returns error with guidance", async () => { - // given - current agent is prometheus + describe("plan family mutual delegation block", () => { + test("plan cannot delegate to plan (self-delegation)", async () => { + //#given const { createDelegateTask } = require("./tools") + const mockClient = { + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { get: async () => ({ data: { directory: "/project" } }), create: async () => ({ data: { id: "s" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), messages: async () => ({ data: [] }), status: async () => ({ data: {} }) }, + } + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) - const mockManager = { launch: async () => ({}) } + //#when + const result = await tool.execute( + { description: "test", prompt: "Create a plan", subagent_type: "plan", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "plan", abort: new AbortController().signal } + ) + + //#then + expect(result).toContain("plan-family") + expect(result).toContain("directly") + }) + + test("prometheus cannot delegate to plan (cross-blocking)", async () => { + //#given + const { createDelegateTask } = require("./tools") + const mockClient = { + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { get: async () => ({ data: { directory: "/project" } }), create: async () => ({ data: { id: "s" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), messages: async () => ({ data: [] }), status: async () => ({ data: {} }) }, + } + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) + + //#when + const result = await tool.execute( + { description: "test", prompt: "Create a plan", subagent_type: "plan", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "prometheus", abort: new AbortController().signal } + ) + + //#then + expect(result).toContain("plan-family") + }) + + test("plan cannot delegate to prometheus (cross-blocking)", async () => { + //#given + const { createDelegateTask } = require("./tools") const mockClient = { app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - session: { - get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "test-session" } }), - prompt: async () => ({ data: {} }), - promptAsync: async () => ({ data: {} }), - messages: async () => ({ data: [] }), - status: async () => ({ data: {} }), - }, + session: { get: async () => ({ data: { directory: "/project" } }), create: async () => ({ data: { id: "s" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), messages: async () => ({ data: [] }), status: async () => ({ data: {} }) }, } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) - - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "prometheus", - abort: new AbortController().signal, - } + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) - // when - prometheus tries to delegate to prometheus + //#when const result = await tool.execute( - { - description: "Test self-delegation block", - prompt: "Create a plan", - subagent_type: "prometheus", - run_in_background: false, - load_skills: [], - }, - toolContext + { description: "test", prompt: "Execute", subagent_type: "prometheus", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "plan", abort: new AbortController().signal } ) - // then - should return error telling prometheus to create plan directly - expect(result).toContain("prometheus") - expect(result).toContain("directly") + //#then + expect(result).toContain("plan-family") }) - test("non-prometheus agent CAN delegate to prometheus - proceeds normally", async () => { - // given - current agent is sisyphus + test("sisyphus CAN delegate to plan (not in plan family)", async () => { + //#given const { createDelegateTask } = require("./tools") - - const mockManager = { launch: async () => ({}) } - const mockClient = { - app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + const mockClient = { + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_prometheus_allowed" } }), + create: async () => ({ data: { id: "ses_ok" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), - messages: async () => ({ - data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created successfully" }] }] - }), - status: async () => ({ data: { "ses_prometheus_allowed": { type: "idle" } } }), + messages: async () => ({ data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }] }), + status: async () => ({ data: { "ses_ok": { type: "idle" } } }), }, } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "sisyphus", - abort: new AbortController().signal, - } - - // when - sisyphus delegates to prometheus + //#when const result = await tool.execute( - { - description: "Test prometheus delegation from non-prometheus agent", - prompt: "Create a plan", - subagent_type: "prometheus", - run_in_background: false, - load_skills: [], - }, - toolContext + { description: "test", prompt: "Create a plan", subagent_type: "plan", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "sisyphus", abort: new AbortController().signal } ) - // then - should proceed normally - expect(result).not.toContain("Cannot delegate") - expect(result).toContain("Plan created successfully") + //#then + expect(result).not.toContain("plan-family") + expect(result).toContain("Plan created") }, { timeout: 20000 }) - - test("case-insensitive: Prometheus (capitalized) cannot delegate to prometheus", async () => { - // given - current agent is Prometheus (capitalized) - const { createDelegateTask } = require("./tools") - - const mockManager = { launch: async () => ({}) } - const mockClient = { - app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, - config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - session: { - get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "test-session" } }), - prompt: async () => ({ data: {} }), - promptAsync: async () => ({ data: {} }), - messages: async () => ({ data: [] }), - status: async () => ({ data: {} }), - }, - } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) - - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "Prometheus", - abort: new AbortController().signal, - } - - // when - Prometheus tries to delegate to prometheus - const result = await tool.execute( - { - description: "Test case-insensitive block", - prompt: "Create a plan", - subagent_type: "prometheus", - run_in_background: false, - load_skills: [], - }, - toolContext - ) - - // then - should still return error - expect(result).toContain("prometheus") - expect(result).toContain("directly") - }) }) describe("subagent_type model extraction (issue #1225)", () => { @@ -2841,8 +2978,8 @@ describe("sisyphus-task", () => { }) }, { timeout: 20000 }) - test("agent without model does not override categoryModel", async () => { - // given - agent registered without model field + test("agent without model resolves via fallback chain", async () => { + // given - agent registered without model field, fallback chain should resolve const { createDelegateTask } = require("./tools") let promptBody: any @@ -2857,7 +2994,7 @@ describe("sisyphus-task", () => { app: { agents: async () => ({ data: [ - { name: "explore", mode: "subagent" }, // no model field + { name: "explore", mode: "subagent" }, ], }), }, @@ -2898,14 +3035,211 @@ describe("sisyphus-task", () => { toolContext ) - // then - no model should be passed to session.prompt - expect(promptBody.model).toBeUndefined() + // then - model should be resolved via AGENT_MODEL_REQUIREMENTS fallback chain + expect(promptBody.model).toBeDefined() + }, { timeout: 20000 }) + + test("agentOverrides model takes priority over matchedAgent.model (#1357)", async () => { + // given - user configured oracle to use a specific model in oh-my-opencode.json + const { createDelegateTask } = require("./tools") + let promptBody: any + + const mockManager = { launch: async () => ({}) } + + const promptMock = async (input: any) => { + promptBody = input.body + return { data: {} } + } + + const mockClient = { + app: { + agents: async () => ({ + data: [ + { name: "oracle", mode: "subagent", model: { providerID: "openai", modelID: "gpt-5.2" } }, + ], + }), + }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_override_model" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }], + }), + status: async () => ({ data: { "ses_override_model": { type: "idle" } } }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + agentOverrides: { + oracle: { model: "anthropic/claude-opus-4-6" }, + }, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - delegating to oracle via subagent_type with user override + await tool.execute( + { + description: "Consult oracle with override", + prompt: "Review architecture", + subagent_type: "oracle", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + // then - user-configured model should take priority over matchedAgent.model + expect(promptBody.model).toEqual({ + providerID: "anthropic", + modelID: "claude-opus-4-6", + }) + }, { timeout: 20000 }) + + test("agentOverrides variant is applied when model is overridden (#1357)", async () => { + // given - user configured oracle with model and variant + const { createDelegateTask } = require("./tools") + let promptBody: any + + const mockManager = { launch: async () => ({}) } + + const promptMock = async (input: any) => { + promptBody = input.body + return { data: {} } + } + + const mockClient = { + app: { + agents: async () => ({ + data: [ + { name: "oracle", mode: "subagent", model: { providerID: "openai", modelID: "gpt-5.2" } }, + ], + }), + }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_variant_test" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }], + }), + status: async () => ({ data: { "ses_variant_test": { type: "idle" } } }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + agentOverrides: { + oracle: { model: "anthropic/claude-opus-4-6", variant: "max" }, + }, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - delegating to oracle via subagent_type with variant override + await tool.execute( + { + description: "Consult oracle with variant", + prompt: "Review architecture", + subagent_type: "oracle", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + // then - user-configured variant should be applied + expect(promptBody.variant).toBe("max") + }, { timeout: 20000 }) + + test("fallback chain resolves model when no override and no matchedAgent.model (#1357)", async () => { + // given - agent registered without model, no override, but AGENT_MODEL_REQUIREMENTS has fallback + const { createDelegateTask } = require("./tools") + let promptBody: any + + const mockManager = { launch: async () => ({}) } + + const promptMock = async (input: any) => { + promptBody = input.body + return { data: {} } + } + + const mockClient = { + app: { + agents: async () => ({ + data: [ + { name: "oracle", mode: "subagent" }, // no model field + ], + }), + }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_fallback_test" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }], + }), + status: async () => ({ data: { "ses_fallback_test": { type: "idle" } } }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + // no agentOverrides + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - delegating to oracle with no override and no matchedAgent model + await tool.execute( + { + description: "Consult oracle with fallback", + prompt: "Review architecture", + subagent_type: "oracle", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + // then - should resolve via AGENT_MODEL_REQUIREMENTS fallback chain for oracle + // oracle fallback chain: gpt-5.2 (openai) > gemini-3-pro (google) > claude-opus-4-6 (anthropic) + // Since openai is in connectedProviders, should resolve to openai/gpt-5.2 + expect(promptBody.model).toBeDefined() + expect(promptBody.model.providerID).toBe("openai") + expect(promptBody.model.modelID).toContain("gpt-5.2") }, { timeout: 20000 }) }) - describe("prometheus subagent task permission", () => { - test("prometheus subagent should have task permission enabled", async () => { - // given - sisyphus delegates to prometheus + describe("subagent task permission", () => { + test("plan subagent should have task permission enabled", async () => { + //#given - sisyphus delegates to plan agent const { createDelegateTask } = require("./tools") let promptBody: any @@ -2917,17 +3251,17 @@ describe("sisyphus-task", () => { } const mockClient = { - app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_prometheus_delegate" } }), + create: async () => ({ data: { id: "ses_plan_delegate" } }), prompt: promptMock, promptAsync: promptMock, messages: async () => ({ data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }] }), - status: async () => ({ data: { "ses_prometheus_delegate": { type: "idle" } } }), + status: async () => ({ data: { "ses_plan_delegate": { type: "idle" } } }), }, } @@ -2943,24 +3277,53 @@ describe("sisyphus-task", () => { abort: new AbortController().signal, } - // when - sisyphus delegates to prometheus + //#when - sisyphus delegates to plan await tool.execute( { - description: "Test prometheus task permission", + description: "Test plan task permission", prompt: "Create a plan", - subagent_type: "prometheus", + subagent_type: "plan", run_in_background: false, load_skills: [], }, toolContext ) - // then - prometheus should have task permission + //#then - plan agent should have task permission expect(promptBody.tools.task).toBe(true) }, { timeout: 20000 }) - test("non-prometheus subagent should NOT have task permission", async () => { - // given - sisyphus delegates to oracle (non-prometheus) + test("prometheus subagent should have task permission (plan family)", async () => { + //#given + const { createDelegateTask } = require("./tools") + let promptBody: any + const promptMock = async (input: any) => { promptBody = input.body; return { data: {} } } + const mockClient = { + app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_prometheus_task" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }] }), + status: async () => ({ data: { "ses_prometheus_task": { type: "idle" } } }), + }, + } + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) + + //#when + await tool.execute( + { description: "Test prometheus task permission", prompt: "Create a plan", subagent_type: "prometheus", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "sisyphus", abort: new AbortController().signal } + ) + + //#then + expect(promptBody.tools.task).toBe(true) + }, { timeout: 20000 }) + + test("non-plan subagent should NOT have task permission", async () => { + //#given - sisyphus delegates to oracle (non-plan) const { createDelegateTask } = require("./tools") let promptBody: any diff --git a/src/tools/delegate-task/types.ts b/src/tools/delegate-task/types.ts index 1fb4b4a6..1646b1fe 100644 --- a/src/tools/delegate-task/types.ts +++ b/src/tools/delegate-task/types.ts @@ -1,6 +1,6 @@ import type { PluginInput } from "@opencode-ai/plugin" import type { BackgroundManager } from "../../features/background-agent" -import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider } from "../../config/schema" +import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider, AgentOverrides } from "../../config/schema" import type { AvailableCategory, AvailableSkill, @@ -53,6 +53,7 @@ export interface DelegateTaskToolOptions { disabledSkills?: Set availableCategories?: AvailableCategory[] availableSkills?: AvailableSkill[] + agentOverrides?: AgentOverrides onSyncSessionCreated?: (event: SyncSessionCreatedEvent) => Promise } diff --git a/src/tools/lsp/client.test.ts b/src/tools/lsp/client.test.ts index 3ca6ee3b..8c805d14 100644 --- a/src/tools/lsp/client.test.ts +++ b/src/tools/lsp/client.test.ts @@ -2,7 +2,7 @@ import { mkdtempSync, rmSync, writeFileSync } from "node:fs" import { join } from "node:path" import { tmpdir } from "node:os" -import { describe, it, expect, spyOn, mock } from "bun:test" +import { describe, it, expect, spyOn, mock, beforeEach, afterEach } from "bun:test" mock.module("vscode-jsonrpc/node", () => ({ createMessageConnection: () => { @@ -12,10 +12,18 @@ mock.module("vscode-jsonrpc/node", () => ({ StreamMessageWriter: function StreamMessageWriter() {}, })) -import { LSPClient, validateCwd } from "./client" +import { LSPClient, lspManager, validateCwd } from "./client" import type { ResolvedServer } from "./types" describe("LSPClient", () => { + beforeEach(async () => { + await lspManager.stopAll() + }) + + afterEach(async () => { + await lspManager.stopAll() + }) + describe("openFile", () => { it("sends didChange when a previously opened file changes on disk", async () => { // #given @@ -61,6 +69,108 @@ describe("LSPClient", () => { }) }) + describe("LSPServerManager", () => { + it("recreates client after init failure instead of staying permanently blocked", async () => { + //#given + const dir = mkdtempSync(join(tmpdir(), "lsp-manager-test-")) + + const server: ResolvedServer = { + id: "typescript", + command: ["typescript-language-server", "--stdio"], + extensions: [".ts"], + priority: 0, + } + + const startSpy = spyOn(LSPClient.prototype, "start") + const initializeSpy = spyOn(LSPClient.prototype, "initialize") + const isAliveSpy = spyOn(LSPClient.prototype, "isAlive") + const stopSpy = spyOn(LSPClient.prototype, "stop") + + startSpy.mockImplementationOnce(async () => { + throw new Error("boom") + }) + startSpy.mockImplementation(async () => {}) + initializeSpy.mockImplementation(async () => {}) + isAliveSpy.mockImplementation(() => true) + stopSpy.mockImplementation(async () => {}) + + try { + //#when + await expect(lspManager.getClient(dir, server)).rejects.toThrow("boom") + + const client = await lspManager.getClient(dir, server) + + //#then + expect(client).toBeInstanceOf(LSPClient) + expect(startSpy).toHaveBeenCalledTimes(2) + expect(stopSpy).toHaveBeenCalled() + } finally { + startSpy.mockRestore() + initializeSpy.mockRestore() + isAliveSpy.mockRestore() + stopSpy.mockRestore() + rmSync(dir, { recursive: true, force: true }) + } + }) + + it("resets stale initializing entry so a hung init does not permanently block future clients", async () => { + //#given + const dir = mkdtempSync(join(tmpdir(), "lsp-manager-stale-test-")) + + const server: ResolvedServer = { + id: "typescript", + command: ["typescript-language-server", "--stdio"], + extensions: [".ts"], + priority: 0, + } + + const dateNowSpy = spyOn(Date, "now") + + const startSpy = spyOn(LSPClient.prototype, "start") + const initializeSpy = spyOn(LSPClient.prototype, "initialize") + const isAliveSpy = spyOn(LSPClient.prototype, "isAlive") + const stopSpy = spyOn(LSPClient.prototype, "stop") + + // First client init hangs forever. + const never = new Promise(() => {}) + startSpy.mockImplementationOnce(async () => { + await never + }) + + // Second attempt should be allowed after stale reset. + startSpy.mockImplementationOnce(async () => {}) + startSpy.mockImplementation(async () => {}) + initializeSpy.mockImplementation(async () => {}) + isAliveSpy.mockImplementation(() => true) + stopSpy.mockImplementation(async () => {}) + + try { + //#when + dateNowSpy.mockReturnValueOnce(0) + lspManager.warmupClient(dir, server) + + dateNowSpy.mockReturnValueOnce(60_000) + + const client = await Promise.race([ + lspManager.getClient(dir, server), + new Promise((_, reject) => setTimeout(() => reject(new Error("test-timeout")), 50)), + ]) + + //#then + expect(client).toBeInstanceOf(LSPClient) + expect(startSpy).toHaveBeenCalledTimes(2) + expect(stopSpy).toHaveBeenCalled() + } finally { + dateNowSpy.mockRestore() + startSpy.mockRestore() + initializeSpy.mockRestore() + isAliveSpy.mockRestore() + stopSpy.mockRestore() + rmSync(dir, { recursive: true, force: true }) + } + }) + }) + describe("validateCwd", () => { it("returns valid for existing directory", () => { // #given diff --git a/src/tools/lsp/lsp-manager-process-cleanup.ts b/src/tools/lsp/lsp-manager-process-cleanup.ts new file mode 100644 index 00000000..9f769570 --- /dev/null +++ b/src/tools/lsp/lsp-manager-process-cleanup.ts @@ -0,0 +1,45 @@ +type ManagedClientForCleanup = { + client: { + stop: () => Promise + } +} + +type ProcessCleanupOptions = { + getClients: () => IterableIterator<[string, ManagedClientForCleanup]> + clearClients: () => void + clearCleanupInterval: () => void +} + +export function registerLspManagerProcessCleanup(options: ProcessCleanupOptions): void { + // Synchronous cleanup for 'exit' event (cannot await) + const syncCleanup = () => { + for (const [, managed] of options.getClients()) { + try { + // Fire-and-forget during sync exit - process is terminating + void managed.client.stop().catch(() => {}) + } catch {} + } + options.clearClients() + options.clearCleanupInterval() + } + + // Async cleanup for signal handlers - properly await all stops + const asyncCleanup = async () => { + const stopPromises: Promise[] = [] + for (const [, managed] of options.getClients()) { + stopPromises.push(managed.client.stop().catch(() => {})) + } + await Promise.allSettled(stopPromises) + options.clearClients() + options.clearCleanupInterval() + } + + process.on("exit", syncCleanup) + + // Don't call process.exit() here; other handlers (background-agent manager) handle final exit. + process.on("SIGINT", () => void asyncCleanup().catch(() => {})) + process.on("SIGTERM", () => void asyncCleanup().catch(() => {})) + if (process.platform === "win32") { + process.on("SIGBREAK", () => void asyncCleanup().catch(() => {})) + } +} diff --git a/src/tools/lsp/lsp-manager-temp-directory-cleanup.ts b/src/tools/lsp/lsp-manager-temp-directory-cleanup.ts new file mode 100644 index 00000000..5ce5aa97 --- /dev/null +++ b/src/tools/lsp/lsp-manager-temp-directory-cleanup.ts @@ -0,0 +1,29 @@ +type ManagedClientForTempDirectoryCleanup = { + refCount: number + client: { + stop: () => Promise + } +} + +export async function cleanupTempDirectoryLspClients( + clients: Map +): Promise { + const keysToRemove: string[] = [] + for (const [key, managed] of clients.entries()) { + const isTempDir = key.startsWith("/tmp/") || key.startsWith("/var/folders/") + const isIdle = managed.refCount === 0 + if (isTempDir && isIdle) { + keysToRemove.push(key) + } + } + + for (const key of keysToRemove) { + const managed = clients.get(key) + if (managed) { + clients.delete(key) + try { + await managed.client.stop() + } catch {} + } + } +} diff --git a/src/tools/lsp/lsp-server.ts b/src/tools/lsp/lsp-server.ts index 6d4ee4b6..9cbba484 100644 --- a/src/tools/lsp/lsp-server.ts +++ b/src/tools/lsp/lsp-server.ts @@ -1,4 +1,6 @@ import type { ResolvedServer } from "./types" +import { registerLspManagerProcessCleanup } from "./lsp-manager-process-cleanup" +import { cleanupTempDirectoryLspClients } from "./lsp-manager-temp-directory-cleanup" import { LSPClient } from "./lsp-client" interface ManagedClient { client: LSPClient @@ -6,52 +8,31 @@ interface ManagedClient { refCount: number initPromise?: Promise isInitializing: boolean + initializingSince?: number } class LSPServerManager { private static instance: LSPServerManager private clients = new Map() private cleanupInterval: ReturnType | null = null private readonly IDLE_TIMEOUT = 5 * 60 * 1000 + private readonly INIT_TIMEOUT = 60 * 1000 private constructor() { this.startCleanupTimer() this.registerProcessCleanup() } private registerProcessCleanup(): void { - // Synchronous cleanup for 'exit' event (cannot await) - const syncCleanup = () => { - for (const [, managed] of this.clients) { - try { - // Fire-and-forget during sync exit - process is terminating - void managed.client.stop().catch(() => {}) - } catch {} - } - this.clients.clear() - if (this.cleanupInterval) { - clearInterval(this.cleanupInterval) - this.cleanupInterval = null - } - } - // Async cleanup for signal handlers - properly await all stops - const asyncCleanup = async () => { - const stopPromises: Promise[] = [] - for (const [, managed] of this.clients) { - stopPromises.push(managed.client.stop().catch(() => {})) - } - await Promise.allSettled(stopPromises) - this.clients.clear() - if (this.cleanupInterval) { - clearInterval(this.cleanupInterval) - this.cleanupInterval = null - } - } - process.on("exit", syncCleanup) - - // Don't call process.exit() here; other handlers (background-agent manager) handle final exit. - process.on("SIGINT", () => void asyncCleanup().catch(() => {})) - process.on("SIGTERM", () => void asyncCleanup().catch(() => {})) - if (process.platform === "win32") { - process.on("SIGBREAK", () => void asyncCleanup().catch(() => {})) - } + registerLspManagerProcessCleanup({ + getClients: () => this.clients.entries(), + clearClients: () => { + this.clients.clear() + }, + clearCleanupInterval: () => { + if (this.cleanupInterval) { + clearInterval(this.cleanupInterval) + this.cleanupInterval = null + } + }, + }) } static getInstance(): LSPServerManager { @@ -85,17 +66,46 @@ class LSPServerManager { async getClient(root: string, server: ResolvedServer): Promise { const key = this.getKey(root, server.id) let managed = this.clients.get(key) + if (managed) { + const now = Date.now() + if ( + managed.isInitializing && + managed.initializingSince !== undefined && + now - managed.initializingSince >= this.INIT_TIMEOUT + ) { + // Stale init can permanently block subsequent calls (e.g., LSP process hang) + try { + await managed.client.stop() + } catch {} + this.clients.delete(key) + managed = undefined + } + } if (managed) { if (managed.initPromise) { - await managed.initPromise + try { + await managed.initPromise + } catch { + // Failed init should not keep the key blocked forever. + try { + await managed.client.stop() + } catch {} + this.clients.delete(key) + managed = undefined + } } - if (managed.client.isAlive()) { - managed.refCount++ - managed.lastUsedAt = Date.now() - return managed.client + + if (managed) { + if (managed.client.isAlive()) { + managed.refCount++ + managed.lastUsedAt = Date.now() + return managed.client + } + try { + await managed.client.stop() + } catch {} + this.clients.delete(key) } - await managed.client.stop() - this.clients.delete(key) } const client = new LSPClient(root, server) @@ -103,19 +113,30 @@ class LSPServerManager { await client.start() await client.initialize() })() + const initStartedAt = Date.now() this.clients.set(key, { client, - lastUsedAt: Date.now(), + lastUsedAt: initStartedAt, refCount: 1, initPromise, isInitializing: true, + initializingSince: initStartedAt, }) - await initPromise + try { + await initPromise + } catch (error) { + this.clients.delete(key) + try { + await client.stop() + } catch {} + throw error + } const m = this.clients.get(key) if (m) { m.initPromise = undefined m.isInitializing = false + m.initializingSince = undefined } return client @@ -130,21 +151,30 @@ class LSPServerManager { await client.initialize() })() + const initStartedAt = Date.now() this.clients.set(key, { client, - lastUsedAt: Date.now(), + lastUsedAt: initStartedAt, refCount: 0, initPromise, isInitializing: true, + initializingSince: initStartedAt, }) - initPromise.then(() => { - const m = this.clients.get(key) - if (m) { - m.initPromise = undefined - m.isInitializing = false - } - }) + initPromise + .then(() => { + const m = this.clients.get(key) + if (m) { + m.initPromise = undefined + m.isInitializing = false + m.initializingSince = undefined + } + }) + .catch(() => { + // Warmup failures must not permanently block future initialization. + this.clients.delete(key) + void client.stop().catch(() => {}) + }) } releaseClient(root: string, serverId: string): void { @@ -174,23 +204,7 @@ class LSPServerManager { } async cleanupTempDirectoryClients(): Promise { - const keysToRemove: string[] = [] - for (const [key, managed] of this.clients.entries()) { - const isTempDir = key.startsWith("/tmp/") || key.startsWith("/var/folders/") - const isIdle = managed.refCount === 0 - if (isTempDir && isIdle) { - keysToRemove.push(key) - } - } - for (const key of keysToRemove) { - const managed = this.clients.get(key) - if (managed) { - this.clients.delete(key) - try { - await managed.client.stop() - } catch {} - } - } + await cleanupTempDirectoryLspClients(this.clients) } }