From fecc4888483b6b25cff26c5d7caab0ceb55f102f Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Mon, 9 Feb 2026 23:42:14 +0900 Subject: [PATCH 1/9] fix(sisyphus-junior): disambiguate blocked delegation tool from allowed task management tools When task_system is enabled, the prompt said 'task tool: BLOCKED' which LLMs interpreted as blocking task_create/task_update/task_list/task_get too. Now the constraints section explicitly separates 'task (agent delegation tool): BLOCKED' from 'task_create, task_update, ...: ALLOWED' so Junior no longer refuses to use task management tools. --- src/agents/sisyphus-junior/default.ts | 32 +++++++++++---- src/agents/sisyphus-junior/gpt.ts | 51 ++++++++++++++++++------ src/agents/sisyphus-junior/index.test.ts | 42 +++++++++++++++++++ 3 files changed, 105 insertions(+), 20 deletions(-) diff --git a/src/agents/sisyphus-junior/default.ts b/src/agents/sisyphus-junior/default.ts index 95cce607..fea9e715 100644 --- a/src/agents/sisyphus-junior/default.ts +++ b/src/agents/sisyphus-junior/default.ts @@ -12,6 +12,7 @@ export function buildDefaultSisyphusJuniorPrompt( promptAppend?: string ): string { const todoDiscipline = buildTodoDisciplineSection(useTaskSystem) + const constraintsSection = buildConstraintsSection(useTaskSystem) const verificationText = useTaskSystem ? "All tasks marked completed" : "All todos marked completed" @@ -21,13 +22,7 @@ Sisyphus-Junior - Focused executor from OhMyOpenCode. Execute tasks directly. NEVER delegate or spawn other agents. - -BLOCKED ACTIONS (will fail if attempted): -- task tool: BLOCKED - -ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research. -You work ALONE for implementation. No delegation of implementation tasks. - +${constraintsSection} ${todoDiscipline} @@ -48,6 +43,29 @@ Task NOT complete without: return prompt + "\n\n" + promptAppend } +function buildConstraintsSection(useTaskSystem: boolean): string { + if (useTaskSystem) { + return ` +BLOCKED ACTIONS (will fail if attempted): +- task (agent delegation tool): BLOCKED — you cannot delegate work to other agents + +ALLOWED tools: +- call_omo_agent: You CAN spawn explore/librarian agents for research +- task_create, task_update, task_list, task_get: ALLOWED — use these for tracking your work + +You work ALONE for implementation. No delegation of implementation tasks. +` + } + + return ` +BLOCKED ACTIONS (will fail if attempted): +- task (agent delegation tool): BLOCKED — you cannot delegate work to other agents + +ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research. +You work ALONE for implementation. No delegation of implementation tasks. +` +} + function buildTodoDisciplineSection(useTaskSystem: boolean): string { if (useTaskSystem) { return ` diff --git a/src/agents/sisyphus-junior/gpt.ts b/src/agents/sisyphus-junior/gpt.ts index 03653ba0..e4a84965 100644 --- a/src/agents/sisyphus-junior/gpt.ts +++ b/src/agents/sisyphus-junior/gpt.ts @@ -21,6 +21,7 @@ export function buildGptSisyphusJuniorPrompt( promptAppend?: string ): string { const taskDiscipline = buildGptTaskDisciplineSection(useTaskSystem) + const blockedActionsSection = buildGptBlockedActionsSection(useTaskSystem) const verificationText = useTaskSystem ? "All tasks marked completed" : "All todos marked completed" @@ -45,19 +46,7 @@ Role: Execute tasks directly. You work ALONE. - Do NOT expand task boundaries beyond what's written. - -BLOCKED (will fail if attempted): -| Tool | Status | -|------|--------| -| task | BLOCKED | - -ALLOWED: -| Tool | Usage | -|------|-------| -| call_omo_agent | Spawn explore/librarian for research ONLY | - -You work ALONE for implementation. No delegation. - +${blockedActionsSection} - If a task is ambiguous or underspecified: @@ -99,6 +88,42 @@ Task NOT complete without evidence: return prompt + "\n\n" + promptAppend } +function buildGptBlockedActionsSection(useTaskSystem: boolean): string { + if (useTaskSystem) { + return ` +BLOCKED (will fail if attempted): +| Tool | Status | Description | +|------|--------|-------------| +| task | BLOCKED | Agent delegation tool — you cannot spawn other agents | + +ALLOWED: +| Tool | Usage | +|------|-------| +| call_omo_agent | Spawn explore/librarian for research ONLY | +| task_create | Create tasks to track your work | +| task_update | Update task status (in_progress, completed) | +| task_list | List active tasks | +| task_get | Get task details by ID | + +You work ALONE for implementation. No delegation. +` + } + + return ` +BLOCKED (will fail if attempted): +| Tool | Status | Description | +|------|--------|-------------| +| task | BLOCKED | Agent delegation tool — you cannot spawn other agents | + +ALLOWED: +| Tool | Usage | +|------|-------| +| call_omo_agent | Spawn explore/librarian for research ONLY | + +You work ALONE for implementation. No delegation. +` +} + function buildGptTaskDisciplineSection(useTaskSystem: boolean): string { if (useTaskSystem) { return ` diff --git a/src/agents/sisyphus-junior/index.test.ts b/src/agents/sisyphus-junior/index.test.ts index d574f57c..748d8924 100644 --- a/src/agents/sisyphus-junior/index.test.ts +++ b/src/agents/sisyphus-junior/index.test.ts @@ -238,6 +238,48 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { expect(result.prompt).toContain("todowrite") expect(result.prompt).not.toContain("TaskCreate") }) + + test("useTaskSystem=true explicitly lists task management tools as ALLOWED for Claude", () => { + //#given + const override = { model: "anthropic/claude-sonnet-4-5" } + + //#when + const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true) + + //#then - prompt must disambiguate: delegation tool blocked, management tools allowed + expect(result.prompt).toContain("task_create") + expect(result.prompt).toContain("task_update") + expect(result.prompt).toContain("task_list") + expect(result.prompt).toContain("task_get") + expect(result.prompt).toContain("agent delegation tool") + }) + + test("useTaskSystem=true explicitly lists task management tools as ALLOWED for GPT", () => { + //#given + const override = { model: "openai/gpt-5.2" } + + //#when + const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true) + + //#then - prompt must disambiguate: delegation tool blocked, management tools allowed + expect(result.prompt).toContain("task_create") + expect(result.prompt).toContain("task_update") + expect(result.prompt).toContain("task_list") + expect(result.prompt).toContain("task_get") + expect(result.prompt).toContain("Agent delegation tool") + }) + + test("useTaskSystem=false does NOT list task management tools in constraints", () => { + //#given - Claude model without task system + const override = { model: "anthropic/claude-sonnet-4-5" } + + //#when + const result = createSisyphusJuniorAgentWithOverrides(override, undefined, false) + + //#then - no task management tool references in constraints section + expect(result.prompt).not.toContain("task_create") + expect(result.prompt).not.toContain("task_update") + }) }) describe("prompt composition", () => { From 7255fec8b3a8321ed99ca5b40b602b1a9e546de3 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 11:16:05 +0900 Subject: [PATCH 2/9] test(git-worktree): fix test pollution from incomplete fs mock Replace mock.module with spyOn + mockRestore to prevent fs module pollution across test files. mock.module replaces the entire module and caused 69 test failures in other files that depend on fs. --- .../collect-git-diff-stats.test.ts | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/src/shared/git-worktree/collect-git-diff-stats.test.ts b/src/shared/git-worktree/collect-git-diff-stats.test.ts index 334d15f4..659c4234 100644 --- a/src/shared/git-worktree/collect-git-diff-stats.test.ts +++ b/src/shared/git-worktree/collect-git-diff-stats.test.ts @@ -1,79 +1,79 @@ /// -import { describe, expect, mock, test } from "bun:test" - -const execSyncMock = mock(() => { - throw new Error("execSync should not be called") -}) - -const execFileSyncMock = mock((file: string, args: string[], _opts: { cwd?: string }) => { - if (file !== "git") throw new Error(`unexpected file: ${file}`) - const subcommand = args[0] - - if (subcommand === "diff") { - return "1\t2\tfile.ts\n" - } - - if (subcommand === "status") { - return " M file.ts\n?? new-file.ts\n" - } - - if (subcommand === "ls-files") { - return "new-file.ts\n" - } - - throw new Error(`unexpected args: ${args.join(" ")}`) -}) - -const readFileSyncMock = mock((_path: string, _encoding: string) => { - return "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\n" -}) - -mock.module("node:child_process", () => ({ - execSync: execSyncMock, - execFileSync: execFileSyncMock, -})) - -mock.module("node:fs", () => ({ - readFileSync: readFileSyncMock, -})) - -const { collectGitDiffStats } = await import("./collect-git-diff-stats") +import { describe, expect, test, spyOn, beforeEach, afterEach } from "bun:test" +import * as childProcess from "node:child_process" +import * as fs from "node:fs" describe("collectGitDiffStats", () => { - test("uses execFileSync with arg arrays (no shell injection)", () => { + let execFileSyncSpy: ReturnType + let execSyncSpy: ReturnType + let readFileSyncSpy: ReturnType + + beforeEach(() => { + execSyncSpy = spyOn(childProcess, "execSync").mockImplementation(() => { + throw new Error("execSync should not be called") + }) + + execFileSyncSpy = spyOn(childProcess, "execFileSync").mockImplementation( + ((file: string, args: string[], _opts: { cwd?: string }) => { + if (file !== "git") throw new Error(`unexpected file: ${file}`) + const subcommand = args[0] + + if (subcommand === "diff") return "1\t2\tfile.ts\n" + if (subcommand === "status") return " M file.ts\n?? new-file.ts\n" + if (subcommand === "ls-files") return "new-file.ts\n" + + throw new Error(`unexpected args: ${args.join(" ")}`) + }) as typeof childProcess.execFileSync + ) + + readFileSyncSpy = spyOn(fs, "readFileSync").mockImplementation( + ((_path: unknown, _encoding: unknown) => { + return "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\n" + }) as typeof fs.readFileSync + ) + }) + + afterEach(() => { + execSyncSpy.mockRestore() + execFileSyncSpy.mockRestore() + readFileSyncSpy.mockRestore() + }) + + test("uses execFileSync with arg arrays (no shell injection)", async () => { //#given + const { collectGitDiffStats } = await import("./collect-git-diff-stats") const directory = "/tmp/safe-repo;touch /tmp/pwn" //#when const result = collectGitDiffStats(directory) //#then - expect(execSyncMock).not.toHaveBeenCalled() - expect(execFileSyncMock).toHaveBeenCalledTimes(3) + expect(execSyncSpy).not.toHaveBeenCalled() + expect(execFileSyncSpy).toHaveBeenCalledTimes(3) - const [firstCallFile, firstCallArgs, firstCallOpts] = execFileSyncMock.mock + const [firstCallFile, firstCallArgs, firstCallOpts] = execFileSyncSpy.mock .calls[0]! as unknown as [string, string[], { cwd?: string }] expect(firstCallFile).toBe("git") expect(firstCallArgs).toEqual(["diff", "--numstat", "HEAD"]) expect(firstCallOpts.cwd).toBe(directory) expect(firstCallArgs.join(" ")).not.toContain(directory) - const [secondCallFile, secondCallArgs, secondCallOpts] = execFileSyncMock.mock + const [secondCallFile, secondCallArgs, secondCallOpts] = execFileSyncSpy.mock .calls[1]! as unknown as [string, string[], { cwd?: string }] expect(secondCallFile).toBe("git") expect(secondCallArgs).toEqual(["status", "--porcelain"]) expect(secondCallOpts.cwd).toBe(directory) expect(secondCallArgs.join(" ")).not.toContain(directory) - const [thirdCallFile, thirdCallArgs, thirdCallOpts] = execFileSyncMock.mock + const [thirdCallFile, thirdCallArgs, thirdCallOpts] = execFileSyncSpy.mock .calls[2]! as unknown as [string, string[], { cwd?: string }] expect(thirdCallFile).toBe("git") expect(thirdCallArgs).toEqual(["ls-files", "--others", "--exclude-standard"]) expect(thirdCallOpts.cwd).toBe(directory) expect(thirdCallArgs.join(" ")).not.toContain(directory) - expect(readFileSyncMock).toHaveBeenCalled() + expect(readFileSyncSpy).toHaveBeenCalled() expect(result).toEqual([ { From 44675fb57f77d5ae50527123e0ebe13413779b17 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 11:16:19 +0900 Subject: [PATCH 3/9] fix(atlas): allow boulder continuation for Sisyphus sessions When boulderState.agent is not explicitly set (defaults to 'atlas'), allow continuation for sessions where the last agent is 'sisyphus'. This fixes the issue where boulder continuation was skipped when Sisyphus took over the conversation after boulder creation. --- src/hooks/atlas/event-handler.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/hooks/atlas/event-handler.ts b/src/hooks/atlas/event-handler.ts index a9d26a32..f9024776 100644 --- a/src/hooks/atlas/event-handler.ts +++ b/src/hooks/atlas/event-handler.ts @@ -89,13 +89,20 @@ export function createAtlasEventHandler(input: { return } - const requiredAgent = (boulderState.agent ?? "atlas").toLowerCase() const lastAgent = getLastAgentFromSession(sessionID) - if (!lastAgent || lastAgent !== requiredAgent) { + const requiredAgent = (boulderState.agent ?? "atlas").toLowerCase() + const lastAgentMatchesRequired = lastAgent === requiredAgent + const boulderAgentWasNotExplicitlySet = boulderState.agent === undefined + const boulderAgentDefaultsToAtlas = requiredAgent === "atlas" + const lastAgentIsSisyphus = lastAgent === "sisyphus" + const allowSisyphusWhenDefaultAtlas = boulderAgentWasNotExplicitlySet && boulderAgentDefaultsToAtlas && lastAgentIsSisyphus + const agentMatches = lastAgentMatchesRequired || allowSisyphusWhenDefaultAtlas + if (!agentMatches) { log(`[${HOOK_NAME}] Skipped: last agent does not match boulder agent`, { sessionID, lastAgent: lastAgent ?? "unknown", requiredAgent, + boulderAgentExplicitlySet: boulderState.agent !== undefined, }) return } From 1717050f73765aa720a130f6f5369e8e81fb8546 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 11:16:44 +0900 Subject: [PATCH 4/9] feat(event): normalize session.status to session.idle Add session-status-normalizer to handle session.status events and convert idle status to synthetic session.idle events. Includes deduplication logic to prevent duplicate idle events within 500ms. --- src/plugin/event.ts | 28 ++++- src/plugin/session-status-normalizer.test.ts | 119 +++++++++++++++++++ src/plugin/session-status-normalizer.ts | 22 ++++ 3 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 src/plugin/session-status-normalizer.test.ts create mode 100644 src/plugin/session-status-normalizer.ts diff --git a/src/plugin/event.ts b/src/plugin/event.ts index 4c068503..3755a88c 100644 --- a/src/plugin/event.ts +++ b/src/plugin/event.ts @@ -12,6 +12,7 @@ import { lspManager } from "../tools" import type { CreatedHooks } from "../create-hooks" import type { Managers } from "../create-managers" +import { normalizeSessionStatusToIdle } from "./session-status-normalizer" type FirstMessageVariantGate = { markSessionCreated: (sessionInfo: { id?: string; title?: string; parentID?: string } | undefined) => void @@ -27,7 +28,7 @@ export function createEventHandler(args: { }): (input: { event: { type: string; properties?: Record } }) => Promise { const { ctx, firstMessageVariantGate, managers, hooks } = args - return async (input): Promise => { + const dispatchToHooks = async (input: { event: { type: string; properties?: Record } }): Promise => { await Promise.resolve(hooks.autoUpdateChecker?.event?.(input)) await Promise.resolve(hooks.claudeCodeHooks?.event?.(input)) await Promise.resolve(hooks.backgroundNotificationHook?.event?.(input)) @@ -47,6 +48,31 @@ export function createEventHandler(args: { await Promise.resolve(hooks.stopContinuationGuard?.event?.(input)) await Promise.resolve(hooks.compactionTodoPreserver?.event?.(input)) await Promise.resolve(hooks.atlasHook?.handler?.(input)) + } + + const recentSyntheticIdles = new Map() + const DEDUP_WINDOW_MS = 500 + + return async (input): Promise => { + if (input.event.type === "session.idle") { + const sessionID = (input.event.properties as Record | undefined)?.sessionID as string | undefined + if (sessionID) { + const emittedAt = recentSyntheticIdles.get(sessionID) + if (emittedAt && Date.now() - emittedAt < DEDUP_WINDOW_MS) { + recentSyntheticIdles.delete(sessionID) + return + } + } + } + + await dispatchToHooks(input) + + const syntheticIdle = normalizeSessionStatusToIdle(input) + if (syntheticIdle) { + const sessionID = (syntheticIdle.event.properties as Record)?.sessionID as string + recentSyntheticIdles.set(sessionID, Date.now()) + await dispatchToHooks(syntheticIdle) + } const { event } = input const props = event.properties as Record | undefined diff --git a/src/plugin/session-status-normalizer.test.ts b/src/plugin/session-status-normalizer.test.ts new file mode 100644 index 00000000..cfb99ec6 --- /dev/null +++ b/src/plugin/session-status-normalizer.test.ts @@ -0,0 +1,119 @@ +import { describe, it, expect } from "bun:test" +import { normalizeSessionStatusToIdle } from "./session-status-normalizer" + +type EventInput = { event: { type: string; properties?: Record } } + +describe("normalizeSessionStatusToIdle", () => { + it("converts session.status with idle type to synthetic session.idle event", () => { + //#given - a session.status event with type=idle + const input: EventInput = { + event: { + type: "session.status", + properties: { + sessionID: "ses_abc123", + status: { type: "idle" }, + }, + }, + } + + //#when - normalizeSessionStatusToIdle is called + const result = normalizeSessionStatusToIdle(input) + + //#then - returns a synthetic session.idle event + expect(result).toEqual({ + event: { + type: "session.idle", + properties: { + sessionID: "ses_abc123", + }, + }, + }) + }) + + it("returns null for session.status with busy type", () => { + //#given - a session.status event with type=busy + const input: EventInput = { + event: { + type: "session.status", + properties: { + sessionID: "ses_abc123", + status: { type: "busy" }, + }, + }, + } + + //#when - normalizeSessionStatusToIdle is called + const result = normalizeSessionStatusToIdle(input) + + //#then - returns null (no synthetic idle event) + expect(result).toBeNull() + }) + + it("returns null for session.status with retry type", () => { + //#given - a session.status event with type=retry + const input: EventInput = { + event: { + type: "session.status", + properties: { + sessionID: "ses_abc123", + status: { type: "retry", attempt: 1, message: "retrying", next: 5000 }, + }, + }, + } + + //#when - normalizeSessionStatusToIdle is called + const result = normalizeSessionStatusToIdle(input) + + //#then - returns null + expect(result).toBeNull() + }) + + it("returns null for non-session.status events", () => { + //#given - a message.updated event + const input: EventInput = { + event: { + type: "message.updated", + properties: { info: { sessionID: "ses_abc123" } }, + }, + } + + //#when - normalizeSessionStatusToIdle is called + const result = normalizeSessionStatusToIdle(input) + + //#then - returns null + expect(result).toBeNull() + }) + + it("returns null when session.status has no properties", () => { + //#given - a session.status event with no properties + const input: EventInput = { + event: { + type: "session.status", + }, + } + + //#when - normalizeSessionStatusToIdle is called + const result = normalizeSessionStatusToIdle(input) + + //#then - returns null + expect(result).toBeNull() + }) + + it("returns null when session.status has no status object", () => { + //#given - a session.status event with sessionID but no status + const input: EventInput = { + event: { + type: "session.status", + properties: { + sessionID: "ses_abc123", + }, + }, + } + + //#when - normalizeSessionStatusToIdle is called + const result = normalizeSessionStatusToIdle(input) + + //#then - returns null + expect(result).toBeNull() + }) +}) diff --git a/src/plugin/session-status-normalizer.ts b/src/plugin/session-status-normalizer.ts new file mode 100644 index 00000000..e02377d3 --- /dev/null +++ b/src/plugin/session-status-normalizer.ts @@ -0,0 +1,22 @@ +type EventInput = { event: { type: string; properties?: Record } } +type SessionStatus = { type: string } + +export function normalizeSessionStatusToIdle(input: EventInput): EventInput | null { + if (input.event.type !== "session.status") return null + + const props = input.event.properties + if (!props) return null + + const status = props.status as SessionStatus | undefined + if (!status || status.type !== "idle") return null + + const sessionID = props.sessionID as string | undefined + if (!sessionID) return null + + return { + event: { + type: "session.idle", + properties: { sessionID }, + }, + } +} From 2fd847d88dffed9f5504006c18516e90bd4002ee Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 11:16:55 +0900 Subject: [PATCH 5/9] refactor: fix import path and update test fixtures - Fix import path in opencode-skill-loader/loader.ts - Update executor.test.ts fixtures --- src/features/opencode-skill-loader/loader.ts | 2 +- .../executor.test.ts | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/features/opencode-skill-loader/loader.ts b/src/features/opencode-skill-loader/loader.ts index 6caf4e73..2eebc007 100644 --- a/src/features/opencode-skill-loader/loader.ts +++ b/src/features/opencode-skill-loader/loader.ts @@ -1,5 +1,5 @@ import { join } from "path" -import { getClaudeConfigDir } from "../../shared" +import { getClaudeConfigDir } from "../../shared/claude-config-dir" import { getOpenCodeConfigDir } from "../../shared/opencode-config-dir" import type { CommandDefinition } from "../claude-code-command-loader/types" import type { LoadedSkill } from "./types" diff --git a/src/hooks/anthropic-context-window-limit-recovery/executor.test.ts b/src/hooks/anthropic-context-window-limit-recovery/executor.test.ts index ca6d383c..aa1fea43 100644 --- a/src/hooks/anthropic-context-window-limit-recovery/executor.test.ts +++ b/src/hooks/anthropic-context-window-limit-recovery/executor.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, mock, spyOn, test } from "bun: import { executeCompact } from "./executor" import type { AutoCompactState } from "./types" import * as storage from "./storage" +import * as messagesReader from "../session-recovery/storage/messages-reader" type TimerCallback = (...args: any[]) => void @@ -168,7 +169,8 @@ describe("executeCompact lock management", () => { }) test("clears lock when fixEmptyMessages path executes", async () => { - // given: Empty content error scenario + //#given - Empty content error scenario with no messages in storage + const readMessagesSpy = spyOn(messagesReader, "readMessages").mockReturnValue([]) autoCompactState.errorDataBySession.set(sessionID, { errorType: "non-empty content required", messageIndex: 0, @@ -176,16 +178,17 @@ describe("executeCompact lock management", () => { maxTokens: 200000, }) - // when: Execute compaction (fixEmptyMessages will be called) + //#when - Execute compaction (fixEmptyMessages will be called) await executeCompact(sessionID, msg, autoCompactState, mockClient, directory) - // then: Lock should be cleared + //#then - Lock should be cleared expect(autoCompactState.compactionInProgress.has(sessionID)).toBe(false) + readMessagesSpy.mockRestore() }) test("clears lock when truncation is sufficient", async () => { - // given: Aggressive truncation scenario with sufficient truncation - // This test verifies the early return path in aggressive truncation + //#given - Aggressive truncation scenario with no messages in storage + const readMessagesSpy = spyOn(messagesReader, "readMessages").mockReturnValue([]) autoCompactState.errorDataBySession.set(sessionID, { errorType: "token_limit", currentTokens: 250000, @@ -197,7 +200,7 @@ describe("executeCompact lock management", () => { aggressive_truncation: true, } - // when: Execute compaction with experimental flag + //#when - Execute compaction with experimental flag await executeCompact( sessionID, msg, @@ -207,8 +210,9 @@ describe("executeCompact lock management", () => { experimental, ) - // then: Lock should be cleared even on early return + //#then - Lock should be cleared even on early return expect(autoCompactState.compactionInProgress.has(sessionID)).toBe(false) + readMessagesSpy.mockRestore() }) test("prevents concurrent compaction attempts", async () => { From 19a4324b3e70fc36f51644714dd3c68b421fce92 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 13:25:49 +0900 Subject: [PATCH 6/9] fix(provider-cache): extract models from provider.list().all response OpenCode SDK does not expose client.model.list API. This caused the provider-models cache to always be empty (models: {}), which in turn caused delegate-task categories with requiresModel (e.g., 'deep', 'artistry') to fail with misleading 'Unknown category' errors. Changes: - connected-providers-cache.ts: Extract models from provider.list() response's .all array instead of calling non-existent client.model.list - category-resolver.ts: Distinguish between 'unknown category' and 'model not available' errors with clearer error messages - Add comprehensive tests for both fixes Bug chain: client.model?.list is undefined -> empty cache -> isModelAvailable returns false for requiresModel categories -> null returned from resolveCategoryConfig -> 'Unknown category' error (wrong message) --- src/shared/connected-providers-cache.test.ts | 133 ++++++++++++++++++ src/shared/connected-providers-cache.ts | 42 +++--- .../delegate-task/category-resolver.test.ts | 62 ++++++++ src/tools/delegate-task/category-resolver.ts | 28 +++- 4 files changed, 239 insertions(+), 26 deletions(-) create mode 100644 src/shared/connected-providers-cache.test.ts create mode 100644 src/tools/delegate-task/category-resolver.test.ts diff --git a/src/shared/connected-providers-cache.test.ts b/src/shared/connected-providers-cache.test.ts new file mode 100644 index 00000000..2200a9b1 --- /dev/null +++ b/src/shared/connected-providers-cache.test.ts @@ -0,0 +1,133 @@ +import { describe, test, expect, beforeEach, afterEach, spyOn } from "bun:test" +import { existsSync, mkdirSync, rmSync } from "fs" +import { join } from "path" +import * as dataPath from "./data-path" +import { updateConnectedProvidersCache, readProviderModelsCache } from "./connected-providers-cache" + +const TEST_CACHE_DIR = join(import.meta.dir, "__test-cache__") + +describe("updateConnectedProvidersCache", () => { + let cacheDirSpy: ReturnType + + beforeEach(() => { + cacheDirSpy = spyOn(dataPath, "getOmoOpenCodeCacheDir").mockReturnValue(TEST_CACHE_DIR) + if (existsSync(TEST_CACHE_DIR)) { + rmSync(TEST_CACHE_DIR, { recursive: true }) + } + mkdirSync(TEST_CACHE_DIR, { recursive: true }) + }) + + afterEach(() => { + cacheDirSpy.mockRestore() + if (existsSync(TEST_CACHE_DIR)) { + rmSync(TEST_CACHE_DIR, { recursive: true }) + } + }) + + test("extracts models from provider.list().all response", async () => { + //#given + const mockClient = { + provider: { + list: async () => ({ + data: { + connected: ["openai", "anthropic"], + all: [ + { + id: "openai", + name: "OpenAI", + env: [], + models: { + "gpt-5.3-codex": { id: "gpt-5.3-codex", name: "GPT-5.3 Codex" }, + "gpt-5.2": { id: "gpt-5.2", name: "GPT-5.2" }, + }, + }, + { + id: "anthropic", + name: "Anthropic", + env: [], + models: { + "claude-opus-4-6": { id: "claude-opus-4-6", name: "Claude Opus 4.6" }, + "claude-sonnet-4-5": { id: "claude-sonnet-4-5", name: "Claude Sonnet 4.5" }, + }, + }, + ], + }, + }), + }, + } + + //#when + await updateConnectedProvidersCache(mockClient) + + //#then + const cache = readProviderModelsCache() + expect(cache).not.toBeNull() + expect(cache!.connected).toEqual(["openai", "anthropic"]) + expect(cache!.models).toEqual({ + openai: ["gpt-5.3-codex", "gpt-5.2"], + anthropic: ["claude-opus-4-6", "claude-sonnet-4-5"], + }) + }) + + test("writes empty models when provider has no models", async () => { + //#given + const mockClient = { + provider: { + list: async () => ({ + data: { + connected: ["empty-provider"], + all: [ + { + id: "empty-provider", + name: "Empty", + env: [], + models: {}, + }, + ], + }, + }), + }, + } + + //#when + await updateConnectedProvidersCache(mockClient) + + //#then + const cache = readProviderModelsCache() + expect(cache).not.toBeNull() + expect(cache!.models).toEqual({}) + }) + + test("writes empty models when all field is missing", async () => { + //#given + const mockClient = { + provider: { + list: async () => ({ + data: { + connected: ["openai"], + }, + }), + }, + } + + //#when + await updateConnectedProvidersCache(mockClient) + + //#then + const cache = readProviderModelsCache() + expect(cache).not.toBeNull() + expect(cache!.models).toEqual({}) + }) + + test("does nothing when client.provider.list is not available", async () => { + //#given + const mockClient = {} + + //#when + await updateConnectedProvidersCache(mockClient) + + //#then + const cache = readProviderModelsCache() + expect(cache).toBeNull() + }) +}) diff --git a/src/shared/connected-providers-cache.ts b/src/shared/connected-providers-cache.ts index 9c1ff5e1..e9a1ec36 100644 --- a/src/shared/connected-providers-cache.ts +++ b/src/shared/connected-providers-cache.ts @@ -149,10 +149,12 @@ export function writeProviderModelsCache(data: { models: Record Promise<{ data?: { connected?: string[] } }> - } - model?: { - list?: () => Promise<{ data?: Array<{ id: string; provider: string }> }> + list?: () => Promise<{ + data?: { + connected?: string[] + all?: Array<{ id: string; models?: Record }> + } + }> } }): Promise { if (!client?.provider?.list) { @@ -167,31 +169,23 @@ export async function updateConnectedProvidersCache(client: { writeConnectedProvidersCache(connected) - // Always update provider-models cache (overwrite with fresh data) - let modelsByProvider: Record = {} - if (client.model?.list) { - try { - const modelsResult = await client.model.list() - const models = modelsResult.data ?? [] + const modelsByProvider: Record = {} + const allProviders = result.data?.all ?? [] - for (const model of models) { - if (!modelsByProvider[model.provider]) { - modelsByProvider[model.provider] = [] - } - modelsByProvider[model.provider].push(model.id) + for (const provider of allProviders) { + if (provider.models) { + const modelIds = Object.keys(provider.models) + if (modelIds.length > 0) { + modelsByProvider[provider.id] = modelIds } - - log("[connected-providers-cache] Fetched models from API", { - providerCount: Object.keys(modelsByProvider).length, - totalModels: models.length, - }) - } catch (modelErr) { - log("[connected-providers-cache] Error fetching models, writing empty cache", { error: String(modelErr) }) } - } else { - log("[connected-providers-cache] client.model.list not available, writing empty cache") } + log("[connected-providers-cache] Extracted models from provider list", { + providerCount: Object.keys(modelsByProvider).length, + totalModels: Object.values(modelsByProvider).reduce((sum, ids) => sum + ids.length, 0), + }) + writeProviderModelsCache({ models: modelsByProvider, connected, diff --git a/src/tools/delegate-task/category-resolver.test.ts b/src/tools/delegate-task/category-resolver.test.ts new file mode 100644 index 00000000..febfe3f1 --- /dev/null +++ b/src/tools/delegate-task/category-resolver.test.ts @@ -0,0 +1,62 @@ +import { describe, test, expect } from "bun:test" +import { resolveCategoryExecution } from "./category-resolver" +import type { ExecutorContext } from "./executor-types" + +describe("resolveCategoryExecution", () => { + const createMockExecutorContext = (): ExecutorContext => ({ + client: {} as any, + manager: {} as any, + directory: "/tmp/test", + userCategories: {}, + sisyphusJuniorModel: undefined, + }) + + test("returns clear error when category exists but required model is not available", async () => { + //#given + const args = { + category: "deep", + prompt: "test prompt", + description: "Test task", + run_in_background: false, + load_skills: [], + blockedBy: undefined, + enableSkillTools: false, + } + const executorCtx = createMockExecutorContext() + const inheritedModel = undefined + const systemDefaultModel = "anthropic/claude-sonnet-4-5" + + //#when + const result = await resolveCategoryExecution(args, executorCtx, inheritedModel, systemDefaultModel) + + //#then + expect(result.error).toBeDefined() + expect(result.error).toContain("deep") + expect(result.error).toMatch(/model.*not.*available|requires.*model/i) + expect(result.error).not.toContain("Unknown category") + }) + + test("returns 'unknown category' error for truly unknown categories", async () => { + //#given + const args = { + category: "definitely-not-a-real-category-xyz123", + prompt: "test prompt", + description: "Test task", + run_in_background: false, + load_skills: [], + blockedBy: undefined, + enableSkillTools: false, + } + const executorCtx = createMockExecutorContext() + const inheritedModel = undefined + const systemDefaultModel = "anthropic/claude-sonnet-4-5" + + //#when + const result = await resolveCategoryExecution(args, executorCtx, inheritedModel, systemDefaultModel) + + //#then + expect(result.error).toBeDefined() + expect(result.error).toContain("Unknown category") + expect(result.error).toContain("definitely-not-a-real-category-xyz123") + }) +}) diff --git a/src/tools/delegate-task/category-resolver.ts b/src/tools/delegate-task/category-resolver.ts index 84df498a..57d427e4 100644 --- a/src/tools/delegate-task/category-resolver.ts +++ b/src/tools/delegate-task/category-resolver.ts @@ -29,7 +29,10 @@ export async function resolveCategoryExecution( const availableModels = await getAvailableModelsForDelegateTask(client) - const resolved = resolveCategoryConfig(args.category!, { + const categoryName = args.category! + const categoryExists = DEFAULT_CATEGORIES[categoryName] !== undefined || userCategories?.[categoryName] !== undefined + + const resolved = resolveCategoryConfig(categoryName, { userCategories, inheritedModel, systemDefaultModel, @@ -37,6 +40,27 @@ export async function resolveCategoryExecution( }) if (!resolved) { + const requirement = CATEGORY_MODEL_REQUIREMENTS[categoryName] + const allCategoryNames = Object.keys({ ...DEFAULT_CATEGORIES, ...userCategories }).join(", ") + + if (categoryExists && requirement?.requiresModel) { + return { + agentToUse: "", + categoryModel: undefined, + categoryPromptAppend: undefined, + modelInfo: undefined, + actualModel: undefined, + isUnstableAgent: false, + error: `Category "${categoryName}" requires model "${requirement.requiresModel}" which is not available. + +To use this category: +1. Connect a provider with this model: ${requirement.requiresModel} +2. Or configure an alternative model in your oh-my-opencode.json for this category + +Available categories: ${allCategoryNames}`, + } + } + return { agentToUse: "", categoryModel: undefined, @@ -44,7 +68,7 @@ export async function resolveCategoryExecution( modelInfo: undefined, actualModel: undefined, isUnstableAgent: false, - error: `Unknown category: "${args.category}". Available: ${Object.keys({ ...DEFAULT_CATEGORIES, ...userCategories }).join(", ")}`, + error: `Unknown category: "${categoryName}". Available: ${allCategoryNames}`, } } From 61531ca26c4f9df23faceaa52dcc903c7ee288aa Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 13:58:34 +0900 Subject: [PATCH 7/9] feat(comment-checker): run checks for apply_patch edits --- src/hooks/comment-checker/cli-runner.ts | 39 +++++++++ .../comment-checker/hook.apply-patch.test.ts | 83 ++++++++++++++++++ src/hooks/comment-checker/hook.ts | 86 ++++++++++++++++--- 3 files changed, 195 insertions(+), 13 deletions(-) create mode 100644 src/hooks/comment-checker/hook.apply-patch.test.ts diff --git a/src/hooks/comment-checker/cli-runner.ts b/src/hooks/comment-checker/cli-runner.ts index 8721836e..b546125f 100644 --- a/src/hooks/comment-checker/cli-runner.ts +++ b/src/hooks/comment-checker/cli-runner.ts @@ -58,6 +58,45 @@ export async function processWithCli( } } +export interface ApplyPatchEdit { + filePath: string + before: string + after: string +} + +export async function processApplyPatchEditsWithCli( + sessionID: string, + edits: ApplyPatchEdit[], + output: { output: string }, + cliPath: string, + customPrompt: string | undefined, + debugLog: (...args: unknown[]) => void, +): Promise { + debugLog("processing apply_patch edits:", edits.length) + + for (const edit of edits) { + const hookInput: HookInput = { + session_id: sessionID, + tool_name: "Edit", + transcript_path: "", + cwd: process.cwd(), + hook_event_name: "PostToolUse", + tool_input: { + file_path: edit.filePath, + old_string: edit.before, + new_string: edit.after, + }, + } + + const result = await runCommentChecker(hookInput, cliPath, customPrompt) + + if (result.hasComments && result.message) { + debugLog("CLI detected comments for apply_patch file:", edit.filePath) + output.output += `\n\n${result.message}` + } + } +} + export function isCliPathUsable(cliPath: string | null): cliPath is string { return Boolean(cliPath && existsSync(cliPath)) } diff --git a/src/hooks/comment-checker/hook.apply-patch.test.ts b/src/hooks/comment-checker/hook.apply-patch.test.ts new file mode 100644 index 00000000..ec1b4cd8 --- /dev/null +++ b/src/hooks/comment-checker/hook.apply-patch.test.ts @@ -0,0 +1,83 @@ +import { describe, it, expect, mock, beforeEach } from "bun:test" + +const processApplyPatchEditsWithCli = mock(async () => {}) + +mock.module("./cli-runner", () => ({ + initializeCommentCheckerCli: () => {}, + getCommentCheckerCliPathPromise: () => Promise.resolve("/tmp/fake-comment-checker"), + isCliPathUsable: () => true, + processWithCli: async () => {}, + processApplyPatchEditsWithCli, +})) + +const { createCommentCheckerHooks } = await import("./hook") + +describe("comment-checker apply_patch integration", () => { + beforeEach(() => { + processApplyPatchEditsWithCli.mockClear() + }) + + it("runs comment checker using apply_patch metadata.files", async () => { + // given + const hooks = createCommentCheckerHooks() + + const input = { tool: "apply_patch", sessionID: "ses_test", callID: "call_test" } + const output = { + title: "ok", + output: "Success. Updated the following files:\nM src/a.ts", + metadata: { + files: [ + { + filePath: "/repo/src/a.ts", + before: "const a = 1\n", + after: "// comment\nconst a = 1\n", + type: "update", + }, + { + filePath: "/repo/src/old.ts", + movePath: "/repo/src/new.ts", + before: "const b = 1\n", + after: "// moved comment\nconst b = 1\n", + type: "move", + }, + { + filePath: "/repo/src/delete.ts", + before: "// deleted\n", + after: "", + type: "delete", + }, + ], + }, + } + + // when + await hooks["tool.execute.after"](input, output) + + // then + expect(processApplyPatchEditsWithCli).toHaveBeenCalledTimes(1) + expect(processApplyPatchEditsWithCli).toHaveBeenCalledWith( + "ses_test", + [ + { filePath: "/repo/src/a.ts", before: "const a = 1\n", after: "// comment\nconst a = 1\n" }, + { filePath: "/repo/src/new.ts", before: "const b = 1\n", after: "// moved comment\nconst b = 1\n" }, + ], + expect.any(Object), + "/tmp/fake-comment-checker", + undefined, + expect.any(Function), + ) + }) + + it("skips when apply_patch metadata.files is missing", async () => { + // given + const hooks = createCommentCheckerHooks() + const input = { tool: "apply_patch", sessionID: "ses_test", callID: "call_test" } + const output = { title: "ok", output: "ok", metadata: {} } + + // when + await hooks["tool.execute.after"](input, output) + + // then + expect(processApplyPatchEditsWithCli).toHaveBeenCalledTimes(0) + }) +}) diff --git a/src/hooks/comment-checker/hook.ts b/src/hooks/comment-checker/hook.ts index 79d147b3..89893764 100644 --- a/src/hooks/comment-checker/hook.ts +++ b/src/hooks/comment-checker/hook.ts @@ -1,7 +1,15 @@ import type { PendingCall } from "./types" import type { CommentCheckerConfig } from "../../config/schema" -import { initializeCommentCheckerCli, getCommentCheckerCliPathPromise, isCliPathUsable, processWithCli } from "./cli-runner" +import z from "zod" + +import { + initializeCommentCheckerCli, + getCommentCheckerCliPathPromise, + isCliPathUsable, + processWithCli, + processApplyPatchEditsWithCli, +} from "./cli-runner" import { registerPendingCall, startPendingCallCleanup, takePendingCall } from "./pending-calls" import * as fs from "fs" @@ -81,13 +89,7 @@ export function createCommentCheckerHooks(config?: CommentCheckerConfig) { ): Promise => { debugLog("tool.execute.after:", { tool: input.tool, callID: input.callID }) - const pendingCall = takePendingCall(input.callID) - if (!pendingCall) { - debugLog("no pendingCall found for:", input.callID) - return - } - - debugLog("processing pendingCall:", pendingCall) + const toolLower = input.tool.toLowerCase() // Only skip if the output indicates a tool execution failure const outputLower = output.output.toLowerCase() @@ -102,17 +104,75 @@ export function createCommentCheckerHooks(config?: CommentCheckerConfig) { return } - try { - // Wait for CLI path resolution - const cliPath = await getCommentCheckerCliPathPromise() + const ApplyPatchMetadataSchema = z.object({ + files: z.array( + z.object({ + filePath: z.string(), + movePath: z.string().optional(), + before: z.string(), + after: z.string(), + type: z.string().optional(), + }), + ), + }) + if (toolLower === "apply_patch") { + const parsed = ApplyPatchMetadataSchema.safeParse(output.metadata) + if (!parsed.success) { + debugLog("apply_patch metadata schema mismatch, skipping") + return + } + + const edits = parsed.data.files + .filter((f) => f.type !== "delete") + .map((f) => ({ + filePath: f.movePath ?? f.filePath, + before: f.before, + after: f.after, + })) + + if (edits.length === 0) { + debugLog("apply_patch had no editable files, skipping") + return + } + + try { + const cliPath = await getCommentCheckerCliPathPromise() + if (!isCliPathUsable(cliPath)) { + debugLog("CLI not available, skipping comment check") + return + } + + debugLog("using CLI for apply_patch:", cliPath) + await processApplyPatchEditsWithCli( + input.sessionID, + edits, + output, + cliPath, + config?.custom_prompt, + debugLog, + ) + } catch (err) { + debugLog("apply_patch comment check failed:", err) + } + return + } + + const pendingCall = takePendingCall(input.callID) + if (!pendingCall) { + debugLog("no pendingCall found for:", input.callID) + return + } + + debugLog("processing pendingCall:", pendingCall) + + try { + const cliPath = await getCommentCheckerCliPathPromise() if (!isCliPathUsable(cliPath)) { - // CLI not available - silently skip comment checking debugLog("CLI not available, skipping comment check") return } - // CLI mode only debugLog("using CLI:", cliPath) await processWithCli(input, pendingCall, output, cliPath, config?.custom_prompt, debugLog) } catch (err) { From 97b7215848e5c9d36808b9f2c85a6f5c02396bb1 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 14:08:02 +0900 Subject: [PATCH 8/9] fix(event): prune synthetic idle dedup map --- src/plugin/event.ts | 7 +++++++ src/plugin/recent-synthetic-idles.test.ts | 24 +++++++++++++++++++++++ src/plugin/recent-synthetic-idles.ts | 13 ++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 src/plugin/recent-synthetic-idles.test.ts create mode 100644 src/plugin/recent-synthetic-idles.ts diff --git a/src/plugin/event.ts b/src/plugin/event.ts index 3755a88c..fb88bcd6 100644 --- a/src/plugin/event.ts +++ b/src/plugin/event.ts @@ -13,6 +13,7 @@ import { lspManager } from "../tools" import type { CreatedHooks } from "../create-hooks" import type { Managers } from "../create-managers" import { normalizeSessionStatusToIdle } from "./session-status-normalizer" +import { pruneRecentSyntheticIdles } from "./recent-synthetic-idles" type FirstMessageVariantGate = { markSessionCreated: (sessionInfo: { id?: string; title?: string; parentID?: string } | undefined) => void @@ -54,6 +55,12 @@ export function createEventHandler(args: { const DEDUP_WINDOW_MS = 500 return async (input): Promise => { + pruneRecentSyntheticIdles({ + recentSyntheticIdles, + now: Date.now(), + dedupWindowMs: DEDUP_WINDOW_MS, + }) + if (input.event.type === "session.idle") { const sessionID = (input.event.properties as Record | undefined)?.sessionID as string | undefined if (sessionID) { diff --git a/src/plugin/recent-synthetic-idles.test.ts b/src/plugin/recent-synthetic-idles.test.ts new file mode 100644 index 00000000..468e6e30 --- /dev/null +++ b/src/plugin/recent-synthetic-idles.test.ts @@ -0,0 +1,24 @@ +import { describe, it, expect } from "bun:test" + +import { pruneRecentSyntheticIdles } from "./recent-synthetic-idles" + +describe("pruneRecentSyntheticIdles", () => { + it("removes entries older than dedup window", () => { + // given + const recentSyntheticIdles = new Map([ + ["ses_old", 1000], + ["ses_new", 1600], + ]) + + // when + pruneRecentSyntheticIdles({ + recentSyntheticIdles, + now: 2000, + dedupWindowMs: 500, + }) + + // then + expect(recentSyntheticIdles.has("ses_old")).toBe(false) + expect(recentSyntheticIdles.has("ses_new")).toBe(true) + }) +}) diff --git a/src/plugin/recent-synthetic-idles.ts b/src/plugin/recent-synthetic-idles.ts new file mode 100644 index 00000000..971ff5e0 --- /dev/null +++ b/src/plugin/recent-synthetic-idles.ts @@ -0,0 +1,13 @@ +export function pruneRecentSyntheticIdles(args: { + recentSyntheticIdles: Map + now: number + dedupWindowMs: number +}): void { + const { recentSyntheticIdles, now, dedupWindowMs } = args + + for (const [sessionID, emittedAt] of recentSyntheticIdles) { + if (now - emittedAt >= dedupWindowMs) { + recentSyntheticIdles.delete(sessionID) + } + } +} From f3f5b98c687e7c53f85f8e8c7a8a63b9d06806ca Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 10 Feb 2026 14:13:28 +0900 Subject: [PATCH 9/9] test: use BDD markers in pruneRecentSyntheticIdles test --- src/plugin/recent-synthetic-idles.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/plugin/recent-synthetic-idles.test.ts b/src/plugin/recent-synthetic-idles.test.ts index 468e6e30..4a91145a 100644 --- a/src/plugin/recent-synthetic-idles.test.ts +++ b/src/plugin/recent-synthetic-idles.test.ts @@ -4,20 +4,20 @@ import { pruneRecentSyntheticIdles } from "./recent-synthetic-idles" describe("pruneRecentSyntheticIdles", () => { it("removes entries older than dedup window", () => { - // given + //#given const recentSyntheticIdles = new Map([ ["ses_old", 1000], ["ses_new", 1600], ]) - // when + //#when pruneRecentSyntheticIdles({ recentSyntheticIdles, now: 2000, dedupWindowMs: 500, }) - // then + //#then expect(recentSyntheticIdles.has("ses_old")).toBe(false) expect(recentSyntheticIdles.has("ses_new")).toBe(true) })