Merge remote-tracking branch 'origin/dev' into feat/custom-agents

# Conflicts:
#	src/agents/utils.test.ts
#	src/plugin-handlers/agent-config-handler.ts
This commit is contained in:
edxeth 2026-02-26 18:53:29 +01:00
commit 7e90c2c48f
147 changed files with 6360 additions and 1763 deletions

View File

@ -35,15 +35,15 @@ jobs:
# - Uploads compressed artifacts for the publish job # - Uploads compressed artifacts for the publish job
# ============================================================================= # =============================================================================
build: build:
runs-on: ${{ matrix.platform == 'windows-x64' && 'windows-latest' || 'ubuntu-latest' }} runs-on: ${{ startsWith(matrix.platform, 'windows-') && 'windows-latest' || 'ubuntu-latest' }}
defaults: defaults:
run: run:
shell: bash shell: bash
strategy: strategy:
fail-fast: false fail-fast: false
max-parallel: 7 max-parallel: 11
matrix: matrix:
platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64] platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -95,14 +95,18 @@ jobs:
case "$PLATFORM" in case "$PLATFORM" in
darwin-arm64) TARGET="bun-darwin-arm64" ;; darwin-arm64) TARGET="bun-darwin-arm64" ;;
darwin-x64) TARGET="bun-darwin-x64" ;; darwin-x64) TARGET="bun-darwin-x64" ;;
darwin-x64-baseline) TARGET="bun-darwin-x64-baseline" ;;
linux-x64) TARGET="bun-linux-x64" ;; linux-x64) TARGET="bun-linux-x64" ;;
linux-x64-baseline) TARGET="bun-linux-x64-baseline" ;;
linux-arm64) TARGET="bun-linux-arm64" ;; linux-arm64) TARGET="bun-linux-arm64" ;;
linux-x64-musl) TARGET="bun-linux-x64-musl" ;; linux-x64-musl) TARGET="bun-linux-x64-musl" ;;
linux-x64-musl-baseline) TARGET="bun-linux-x64-musl-baseline" ;;
linux-arm64-musl) TARGET="bun-linux-arm64-musl" ;; linux-arm64-musl) TARGET="bun-linux-arm64-musl" ;;
windows-x64) TARGET="bun-windows-x64" ;; windows-x64) TARGET="bun-windows-x64" ;;
windows-x64-baseline) TARGET="bun-windows-x64-baseline" ;;
esac esac
if [ "$PLATFORM" = "windows-x64" ]; then if [[ "$PLATFORM" == windows-* ]]; then
OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode.exe" OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode.exe"
else else
OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode" OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode"
@ -119,7 +123,7 @@ jobs:
PLATFORM="${{ matrix.platform }}" PLATFORM="${{ matrix.platform }}"
cd packages/${PLATFORM} cd packages/${PLATFORM}
if [ "$PLATFORM" = "windows-x64" ]; then if [[ "$PLATFORM" == windows-* ]]; then
# Windows: use 7z (pre-installed on windows-latest) # Windows: use 7z (pre-installed on windows-latest)
7z a -tzip ../../binary-${PLATFORM}.zip bin/ package.json 7z a -tzip ../../binary-${PLATFORM}.zip bin/ package.json
else else
@ -155,7 +159,7 @@ jobs:
fail-fast: false fail-fast: false
max-parallel: 2 max-parallel: 2
matrix: matrix:
platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64] platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
steps: steps:
- name: Check if already published - name: Check if already published
id: check id: check
@ -184,7 +188,7 @@ jobs:
PLATFORM="${{ matrix.platform }}" PLATFORM="${{ matrix.platform }}"
mkdir -p packages/${PLATFORM} mkdir -p packages/${PLATFORM}
if [ "$PLATFORM" = "windows-x64" ]; then if [[ "$PLATFORM" == windows-* ]]; then
unzip binary-${PLATFORM}.zip -d packages/${PLATFORM}/ unzip binary-${PLATFORM}.zip -d packages/${PLATFORM}/
else else
tar -xzvf binary-${PLATFORM}.tar.gz -C packages/${PLATFORM}/ tar -xzvf binary-${PLATFORM}.tar.gz -C packages/${PLATFORM}/

View File

@ -189,7 +189,7 @@ jobs:
VERSION="${{ steps.version.outputs.version }}" VERSION="${{ steps.version.outputs.version }}"
jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json
for platform in darwin-arm64 darwin-x64 linux-x64 linux-arm64 linux-x64-musl linux-arm64-musl windows-x64; do for platform in darwin-arm64 darwin-x64 darwin-x64-baseline linux-x64 linux-x64-baseline linux-arm64 linux-x64-musl linux-x64-musl-baseline linux-arm64-musl windows-x64 windows-x64-baseline; do
jq --arg v "$VERSION" '.version = $v' "packages/${platform}/package.json" > tmp.json jq --arg v "$VERSION" '.version = $v' "packages/${platform}/package.json" > tmp.json
mv tmp.json "packages/${platform}/package.json" mv tmp.json "packages/${platform}/package.json"
done done

View File

@ -24,19 +24,7 @@
"disabled_agents": { "disabled_agents": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string", "type": "string"
"enum": [
"sisyphus",
"hephaestus",
"prometheus",
"oracle",
"librarian",
"explore",
"multimodal-looker",
"metis",
"momus",
"atlas"
]
} }
}, },
"disabled_skills": { "disabled_skills": {
@ -960,6 +948,9 @@
} }
}, },
"additionalProperties": false "additionalProperties": false
},
"allow_non_gpt_model": {
"type": "boolean"
} }
}, },
"additionalProperties": false "additionalProperties": false
@ -3474,6 +3465,11 @@
"prompt_append": { "prompt_append": {
"type": "string" "type": "string"
}, },
"max_prompt_tokens": {
"type": "integer",
"exclusiveMinimum": 0,
"maximum": 9007199254740991
},
"is_unstable_agent": { "is_unstable_agent": {
"type": "boolean" "type": "boolean"
}, },

62
benchmarks/bun.lock Normal file
View File

@ -0,0 +1,62 @@
{
"lockfileVersion": 1,
"configVersion": 1,
"workspaces": {
"": {
"name": "hashline-edit-benchmark",
"dependencies": {
"@ai-sdk/openai": "^1.3.0",
"@friendliai/ai-provider": "^1.0.9",
"ai": "^6.0.94",
"zod": "^4.1.0",
},
},
},
"packages": {
"@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="],
"@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="],
"@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="],
"@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
"@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="],
"@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="],
"@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
"@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="],
"ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="],
"eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
"json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
"nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
"secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="],
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
"@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
"@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
"@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
"ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
"ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
}
}

193
benchmarks/headless.ts Normal file
View File

@ -0,0 +1,193 @@
#!/usr/bin/env bun
import { readFile, writeFile, mkdir } from "node:fs/promises"
import { join, dirname } from "node:path"
import { stepCountIs, streamText, type CoreMessage } from "ai"
import { tool } from "ai"
import { createFriendli } from "@friendliai/ai-provider"
import { z } from "zod"
import { formatHashLines } from "../src/tools/hashline-edit/hash-computation"
import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits"
import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations"
import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization"
const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5"
const MAX_STEPS = 50
const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
const emit = (event: Record<string, unknown>) =>
console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event }))
// ── CLI ──────────────────────────────────────────────────────
function parseArgs(): { prompt: string; modelId: string } {
const args = process.argv.slice(2)
let prompt = ""
let modelId = DEFAULT_MODEL
for (let i = 0; i < args.length; i++) {
if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) {
prompt = args[++i]
} else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) {
modelId = args[++i]
} else if (args[i] === "--reasoning-mode" && args[i + 1]) {
i++ // consume
}
// --no-translate, --think consumed silently
}
if (!prompt) {
console.error("Usage: bun run benchmarks/headless.ts -p <prompt> [-m <model>]")
process.exit(1)
}
return { prompt, modelId }
}
// ── Tools ────────────────────────────────────────────────────
const readFileTool = tool({
description: "Read a file with hashline-tagged content (LINE#ID format)",
inputSchema: z.object({ path: z.string().describe("File path") }),
execute: async ({ path }) => {
const fullPath = join(process.cwd(), path)
try {
const content = await readFile(fullPath, "utf-8")
const lines = content.split("\n")
const tagged = formatHashLines(content)
return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}`
} catch {
return `Error: File not found: ${path}`
}
},
})
const editFileTool = tool({
description: "Edit a file using hashline anchors (LINE#ID format)",
inputSchema: z.object({
path: z.string(),
edits: z.array(
z.object({
op: z.enum(["replace", "append", "prepend"]),
pos: z.string().optional(),
end: z.string().optional(),
lines: z.union([z.array(z.string()), z.string(), z.null()]),
})
).min(1),
}),
execute: async ({ path, edits }) => {
const fullPath = join(process.cwd(), path)
try {
let rawContent = ""
let exists = true
try {
rawContent = await readFile(fullPath, "utf-8")
} catch {
exists = false
}
const normalized = normalizeHashlineEdits(edits)
if (!exists) {
const canCreate = normalized.every(
(e) => (e.op === "append" || e.op === "prepend") && !e.pos
)
if (!canCreate) return `Error: File not found: ${path}`
}
const envelope = canonicalizeFileText(rawContent)
const result = applyHashlineEditsWithReport(envelope.content, normalized)
if (result.content === envelope.content) {
return `Error: No changes made to ${path}. The edits produced identical content.`
}
const writeContent = restoreFileText(result.content, envelope)
await mkdir(dirname(fullPath), { recursive: true })
await writeFile(fullPath, writeContent, "utf-8")
const oldLineCount = rawContent.split("\n").length
const newLineCount = writeContent.split("\n").length
const delta = newLineCount - oldLineCount
const sign = delta > 0 ? "+" : ""
const action = exists ? "Updated" : "Created"
return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)`
} catch (error) {
return `Error: ${error instanceof Error ? error.message : String(error)}`
}
},
})
// ── Agent Loop ───────────────────────────────────────────────
async function run() {
const { prompt, modelId } = parseArgs()
const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! })
const model = friendli(modelId)
const tools = { read_file: readFileTool, edit_file: editFileTool }
emit({ type: "user", content: prompt })
const messages: CoreMessage[] = [{ role: "user", content: prompt }]
const system =
"You are a code editing assistant. Use read_file to read files and edit_file to edit them. " +
"Always read a file before editing it to get fresh LINE#ID anchors."
for (let step = 0; step < MAX_STEPS; step++) {
const stream = streamText({
model,
tools,
messages,
system,
stopWhen: stepCountIs(1),
})
let currentText = ""
for await (const part of stream.fullStream) {
switch (part.type) {
case "text-delta":
currentText += part.text
break
case "tool-call":
emit({
type: "tool_call",
tool_call_id: part.toolCallId,
tool_name: part.toolName,
tool_input: part.args,
model: modelId,
})
break
case "tool-result": {
const output = typeof part.result === "string" ? part.result : JSON.stringify(part.result)
const isError = typeof output === "string" && output.startsWith("Error:")
emit({
type: "tool_result",
tool_call_id: part.toolCallId,
output,
...(isError ? { error: output } : {}),
})
break
}
}
const response = await stream.response
messages.push(...response.messages)
const finishReason = await stream.finishReason
if (finishReason !== "tool-calls") {
if (currentText.trim()) {
emit({ type: "assistant", content: currentText, model: modelId })
}
break
}
}
}
// ── Signal + Startup ─────────────────────────────────────────
process.once("SIGINT", () => process.exit(0))
process.once("SIGTERM", () => process.exit(143))
const startTime = Date.now()
run()
.catch((error) => {
emit({ type: "error", error: error instanceof Error ? error.message : String(error) })
process.exit(1)
})
.then(() => {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2)
console.error(`[headless] Completed in ${elapsed}s`)
})

19
benchmarks/package.json Normal file
View File

@ -0,0 +1,19 @@
{
"name": "hashline-edit-benchmark",
"version": "0.1.0",
"private": true,
"type": "module",
"description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider",
"scripts": {
"bench:basic": "bun run test-edit-ops.ts",
"bench:edge": "bun run test-edge-cases.ts",
"bench:multi": "bun run test-multi-model.ts",
"bench:all": "bun run bench:basic && bun run bench:edge"
},
"dependencies": {
"ai": "^6.0.94",
"@ai-sdk/openai": "^1.3.0",
"@friendliai/ai-provider": "^1.0.9",
"zod": "^4.1.0"
}
}

File diff suppressed because it is too large Load Diff

808
benchmarks/test-edit-ops.ts Normal file
View File

@ -0,0 +1,808 @@
#!/usr/bin/env bun
/**
* Comprehensive headless edit_file stress test: 21 operation types
*
* Tests: 5 basic ops + 10 creative cases + 6 whitespace cases
* Each runs via headless mode with its own demo file + prompt.
*
* Usage:
* bun run scripts/test-headless-edit-ops.ts [-m <model>] [--provider <provider>]
*/
import { spawn } from "node:child_process";
import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join, resolve } from "node:path";
// ── CLI arg passthrough ───────────────────────────────────────
const extraArgs: string[] = [];
const rawArgs = process.argv.slice(2);
for (let i = 0; i < rawArgs.length; i++) {
const arg = rawArgs[i];
if (
(arg === "-m" || arg === "--model" || arg === "--provider") &&
i + 1 < rawArgs.length
) {
extraArgs.push(arg, rawArgs[i + 1]);
i++;
} else if (arg === "--think" || arg === "--no-translate") {
extraArgs.push(arg);
} else if (arg === "--reasoning-mode" && i + 1 < rawArgs.length) {
extraArgs.push(arg, rawArgs[i + 1]);
i++;
}
}
// ── Colors ────────────────────────────────────────────────────
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const YELLOW = "\x1b[33m";
const DIM = "\x1b[2m";
const CYAN = "\x1b[36m";
const RESET = "\x1b[0m";
const pass = (msg: string) => console.log(` ${GREEN}${RESET} ${msg}`);
const fail = (msg: string) => console.log(` ${RED}${RESET} ${msg}`);
const info = (msg: string) => console.log(` ${DIM}${msg}${RESET}`);
const warn = (msg: string) => console.log(` ${YELLOW}${RESET} ${msg}`);
// ── Test case definition ─────────────────────────────────────
interface TestCase {
fileContent: string;
fileName: string;
name: string;
prompt: string;
validate: (content: string) => { passed: boolean; reason: string };
}
const TEST_CASES: TestCase[] = [
{
name: "1. Replace single line",
fileName: "config.txt",
fileContent: [
"host: localhost",
"port: 3000",
"debug: false",
"timeout: 30",
"retries: 3",
].join("\n"),
prompt: [
"Follow these steps exactly:",
"Step 1: Call read_file on config.txt.",
"Step 2: Note the anchor for the port line (line 2).",
"Step 3: Call edit_file with path='config.txt' and edits containing ONE object:",
" { op: 'replace', pos: '<line2 anchor>', lines: ['port: 8080'] }",
"IMPORTANT: pos must be ONLY the anchor (like '2#KB'). lines must be a SEPARATE array field with the new content.",
].join(" "),
validate: (content) => {
const has8080 = content.includes("port: 8080");
const has3000 = content.includes("port: 3000");
if (has8080 && !has3000) {
return { passed: true, reason: "port changed to 8080" };
}
if (has3000) {
return { passed: false, reason: "port still 3000 — edit not applied" };
}
return {
passed: false,
reason: `unexpected content: ${content.slice(0, 100)}`,
};
},
},
{
name: "2. Append after line",
fileName: "fruits.txt",
fileContent: ["apple", "banana", "cherry"].join("\n"),
prompt:
"Read fruits.txt with read_file. Then use edit_file with op='append' to insert a new line 'grape' after the 'banana' line. Use pos='LINE#HASH' of the banana line and lines=['grape'].",
validate: (content) => {
const lines = content.trim().split("\n");
const bananaIdx = lines.findIndex((l) => l.trim() === "banana");
const grapeIdx = lines.findIndex((l) => l.trim() === "grape");
if (grapeIdx === -1) {
return { passed: false, reason: '"grape" not found in file' };
}
if (bananaIdx === -1) {
return { passed: false, reason: '"banana" was removed' };
}
if (grapeIdx !== bananaIdx + 1) {
return {
passed: false,
reason: `"grape" at line ${grapeIdx + 1} but expected after "banana" at line ${bananaIdx + 1}`,
};
}
if (lines.length !== 4) {
return {
passed: false,
reason: `expected 4 lines, got ${lines.length}`,
};
}
return {
passed: true,
reason: '"grape" correctly appended after "banana"',
};
},
},
{
name: "3. Prepend before line",
fileName: "code.txt",
fileContent: ["function greet() {", ' return "hello";', "}"].join("\n"),
prompt:
"Read code.txt with read_file. Then use edit_file with op='prepend' to add '// Greeting function' before the function line. Use pos='LINE#HASH' of the function line and lines=['// Greeting function'].",
validate: (content) => {
const lines = content.trim().split("\n");
const commentIdx = lines.findIndex(
(l) => l.trim().startsWith("//") && l.toLowerCase().includes("greet")
);
const funcIdx = lines.findIndex((l) =>
l.trim().startsWith("function greet")
);
if (commentIdx === -1) {
return { passed: false, reason: "comment line not found" };
}
if (funcIdx === -1) {
return { passed: false, reason: '"function greet" line was removed' };
}
if (commentIdx !== funcIdx - 1) {
return {
passed: false,
reason: `comment at line ${commentIdx + 1} but function at ${funcIdx + 1} — not directly before`,
};
}
return {
passed: true,
reason: "comment correctly prepended before function",
};
},
},
{
name: "4. Range replace (multi-line → single line)",
fileName: "log.txt",
fileContent: [
"=== Log Start ===",
"INFO: started",
"WARN: slow query",
"ERROR: timeout",
"INFO: recovered",
"=== Log End ===",
].join("\n"),
prompt: [
"Follow these steps exactly:",
"Step 1: Call read_file on log.txt to see line anchors.",
"Step 2: Note the anchor for 'WARN: slow query' (line 3) and 'ERROR: timeout' (line 4).",
"Step 3: Call edit_file with path='log.txt' and edits containing ONE object with THREE separate JSON fields:",
" { op: 'replace', pos: '<line3 anchor>', end: '<line4 anchor>', lines: ['RESOLVED: issues cleared'] }",
"CRITICAL: pos, end, and lines are THREE SEPARATE JSON fields. pos is ONLY '3#XX'. end is ONLY '4#YY'. lines is ['RESOLVED: issues cleared'].",
"If edit_file fails or errors, use write_file to write the complete correct file content instead.",
"The correct final content should be: === Log Start ===, INFO: started, RESOLVED: issues cleared, INFO: recovered, === Log End ===",
"Do not make any other changes.",
].join(" "),
validate: (content) => {
const lines = content.trim().split("\n");
const hasResolved = lines.some(
(l) => l.trim() === "RESOLVED: issues cleared"
);
const hasWarn = content.includes("WARN: slow query");
const hasError = content.includes("ERROR: timeout");
if (!hasResolved) {
return {
passed: false,
reason: '"RESOLVED: issues cleared" not found',
};
}
if (hasWarn || hasError) {
return { passed: false, reason: "old WARN/ERROR lines still present" };
}
// Core assertion: 2 old lines removed, 1 new line added = net -1 line
// Allow slight overshoot from model adding extra content
if (lines.length < 4 || lines.length > 6) {
return {
passed: false,
reason: `expected ~5 lines, got ${lines.length}`,
};
}
return {
passed: true,
reason: "range replace succeeded — 2 lines → 1 line",
};
},
},
{
name: "5. Delete line",
fileName: "settings.txt",
fileContent: [
"mode: production",
"debug: true",
"cache: enabled",
"log_level: info",
].join("\n"),
prompt: [
"Follow these steps exactly:",
"Step 1: Call read_file on settings.txt to see line anchors.",
"Step 2: Note the anchor for 'debug: true' (line 2).",
"Step 3: Call edit_file with path='settings.txt' and edits containing ONE object:",
" { op: 'replace', pos: '<line2 anchor>', lines: [] }",
"IMPORTANT: lines must be an empty array [] to delete the line. pos must be ONLY the anchor like '2#SR'.",
].join(" "),
validate: (content) => {
const lines = content.trim().split("\n");
const hasDebug = content.includes("debug: true");
if (hasDebug) {
return { passed: false, reason: '"debug: true" still present' };
}
if (lines.length !== 3) {
return {
passed: false,
reason: `expected 3 lines, got ${lines.length}`,
};
}
if (
!(
content.includes("mode: production") &&
content.includes("cache: enabled")
)
) {
return { passed: false, reason: "other lines were removed" };
}
return { passed: true, reason: '"debug: true" successfully deleted' };
},
},
// ── Creative cases (6-15) ────────────────────────────────────
{
name: "6. Batch edit — two replacements in one call",
fileName: "batch.txt",
fileContent: ["red", "green", "blue", "yellow"].join("\n"),
prompt: [
"Read batch.txt with read_file.",
"Then call edit_file ONCE with path='batch.txt' and edits containing TWO objects:",
" 1) { op: 'replace', pos: '<line1 anchor>', lines: ['crimson'] }",
" 2) { op: 'replace', pos: '<line3 anchor>', lines: ['navy'] }",
"Both edits must be in the SAME edits array in a single edit_file call.",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (!c.includes("crimson")) return { passed: false, reason: "'crimson' not found" };
if (!c.includes("navy")) return { passed: false, reason: "'navy' not found" };
if (c.includes("red")) return { passed: false, reason: "'red' still present" };
if (c.includes("blue")) return { passed: false, reason: "'blue' still present" };
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
return { passed: true, reason: "both lines replaced in single call" };
},
},
{
name: "7. Line expansion — 1 line → 3 lines",
fileName: "expand.txt",
fileContent: ["header", "TODO: implement", "footer"].join("\n"),
prompt: [
"Read expand.txt with read_file.",
"Replace the 'TODO: implement' line (line 2) with THREE lines:",
" 'step 1: init', 'step 2: process', 'step 3: cleanup'",
"Use edit_file with op='replace', pos=<line2 anchor>, lines=['step 1: init', 'step 2: process', 'step 3: cleanup'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("TODO")) return { passed: false, reason: "TODO line still present" };
if (!c.includes("step 1: init")) return { passed: false, reason: "'step 1: init' not found" };
if (!c.includes("step 3: cleanup")) return { passed: false, reason: "'step 3: cleanup' not found" };
if (lines.length !== 5) return { passed: false, reason: `expected 5 lines, got ${lines.length}` };
return { passed: true, reason: "1 line expanded to 3 lines" };
},
},
{
name: "8. Append at EOF",
fileName: "eof.txt",
fileContent: ["line one", "line two"].join("\n"),
prompt: [
"Read eof.txt with read_file.",
"Use edit_file to append 'line three' after the LAST line of the file.",
"Use op='append', pos=<last line anchor>, lines=['line three'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (!c.includes("line three")) return { passed: false, reason: "'line three' not found" };
if (lines[lines.length - 1].trim() !== "line three")
return { passed: false, reason: "'line three' not at end" };
if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
return { passed: true, reason: "appended at EOF" };
},
},
{
name: "9. Special characters in content",
fileName: "special.json",
fileContent: [
'{',
' "name": "old-value",',
' "count": 42',
'}',
].join("\n"),
prompt: [
"Read special.json with read_file.",
'Replace the line containing \"name\": \"old-value\" with \"name\": \"new-value\".',
"Use edit_file with op='replace', pos=<that line's anchor>, lines=[' \"name\": \"new-value\",'].",
].join(" "),
validate: (c) => {
if (c.includes("old-value")) return { passed: false, reason: "'old-value' still present" };
if (!c.includes('"new-value"')) return { passed: false, reason: "'new-value' not found" };
if (!c.includes('"count": 42')) return { passed: false, reason: "other content was modified" };
return { passed: true, reason: "JSON value replaced with special chars intact" };
},
},
{
name: "10. Replace first line",
fileName: "first.txt",
fileContent: ["OLD HEADER", "body content", "footer"].join("\n"),
prompt: [
"Read first.txt with read_file.",
"Replace the very first line 'OLD HEADER' with 'NEW HEADER'.",
"Use edit_file with op='replace', pos=<line1 anchor>, lines=['NEW HEADER'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("OLD HEADER")) return { passed: false, reason: "'OLD HEADER' still present" };
if (lines[0].trim() !== "NEW HEADER") return { passed: false, reason: "first line is not 'NEW HEADER'" };
if (!c.includes("body content")) return { passed: false, reason: "body was modified" };
return { passed: true, reason: "first line replaced" };
},
},
{
name: "11. Replace last line",
fileName: "last.txt",
fileContent: ["alpha", "bravo", "OLD_FOOTER"].join("\n"),
prompt: [
"Read last.txt with read_file.",
"Replace the last line 'OLD_FOOTER' with 'NEW_FOOTER'.",
"Use edit_file with op='replace', pos=<last line anchor>, lines=['NEW_FOOTER'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("OLD_FOOTER")) return { passed: false, reason: "'OLD_FOOTER' still present" };
if (lines[lines.length - 1].trim() !== "NEW_FOOTER")
return { passed: false, reason: "last line is not 'NEW_FOOTER'" };
return { passed: true, reason: "last line replaced" };
},
},
{
name: "12. Adjacent line edits",
fileName: "adjacent.txt",
fileContent: ["aaa", "bbb", "ccc", "ddd"].join("\n"),
prompt: [
"Read adjacent.txt with read_file.",
"Replace line 2 ('bbb') with 'BBB' and line 3 ('ccc') with 'CCC'.",
"Use edit_file with TWO edits in the same call:",
" { op: 'replace', pos: <line2 anchor>, lines: ['BBB'] }",
" { op: 'replace', pos: <line3 anchor>, lines: ['CCC'] }",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("bbb")) return { passed: false, reason: "'bbb' still present" };
if (c.includes("ccc")) return { passed: false, reason: "'ccc' still present" };
if (!c.includes("BBB")) return { passed: false, reason: "'BBB' not found" };
if (!c.includes("CCC")) return { passed: false, reason: "'CCC' not found" };
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
return { passed: true, reason: "two adjacent lines replaced" };
},
},
{
name: "13. Prepend multi-line block",
fileName: "block.py",
fileContent: ["def main():", " print('hello')", "", "main()"].join("\n"),
prompt: [
"Read block.py with read_file.",
"Prepend a 2-line comment block before 'def main():' (line 1).",
"The two lines are: '# Author: test' and '# Date: 2025-01-01'.",
"Use edit_file with op='prepend', pos=<line1 anchor>, lines=['# Author: test', '# Date: 2025-01-01'].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (!c.includes("# Author: test")) return { passed: false, reason: "author comment not found" };
if (!c.includes("# Date: 2025-01-01")) return { passed: false, reason: "date comment not found" };
const defIdx = lines.findIndex((l) => l.startsWith("def main"));
const authorIdx = lines.findIndex((l) => l.includes("Author"));
if (authorIdx >= defIdx) return { passed: false, reason: "comments not before def" };
return { passed: true, reason: "2-line block prepended before function" };
},
},
{
name: "14. Delete range — 3 consecutive lines",
fileName: "cleanup.txt",
fileContent: ["keep1", "remove-a", "remove-b", "remove-c", "keep2"].join("\n"),
prompt: [
"Read cleanup.txt with read_file.",
"Delete lines 2-4 ('remove-a', 'remove-b', 'remove-c') using a single range replace.",
"Use edit_file with op='replace', pos=<line2 anchor>, end=<line4 anchor>, lines=[].",
"An empty lines array deletes the range.",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (c.includes("remove")) return { passed: false, reason: "'remove' lines still present" };
if (!c.includes("keep1")) return { passed: false, reason: "'keep1' was deleted" };
if (!c.includes("keep2")) return { passed: false, reason: "'keep2' was deleted" };
if (lines.length !== 2) return { passed: false, reason: `expected 2 lines, got ${lines.length}` };
return { passed: true, reason: "3 consecutive lines deleted via range" };
},
},
{
name: "15. Replace with duplicate-content line",
fileName: "dupes.txt",
fileContent: ["item", "item", "item", "item"].join("\n"),
prompt: [
"Read dupes.txt with read_file. All 4 lines have the same text 'item'.",
"Replace ONLY line 3 with 'CHANGED'. Do NOT modify any other line.",
"Use edit_file with op='replace', pos=<line3 anchor>, lines=['CHANGED'].",
"The anchor hash uniquely identifies line 3 even though the content is identical.",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (!c.includes("CHANGED")) return { passed: false, reason: "'CHANGED' not found" };
const changedCount = lines.filter((l) => l.trim() === "CHANGED").length;
const itemCount = lines.filter((l) => l.trim() === "item").length;
if (changedCount !== 1) return { passed: false, reason: `expected 1 CHANGED, got ${changedCount}` };
if (itemCount !== 3) return { passed: false, reason: `expected 3 item lines, got ${itemCount}` };
if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
return { passed: true, reason: "only line 3 changed among duplicates" };
},
},
// ── Whitespace cases (16-21) ──────────────────────────────────
{
name: "16. Fix indentation — 2 spaces → 4 spaces",
fileName: "indent.js",
fileContent: ["function foo() {", " const x = 1;", " return x;", "}"].join("\n"),
prompt: [
"Read indent.js with read_file.",
"Replace line 2 ' const x = 1;' (2-space indent) with ' const x = 1;' (4-space indent).",
"Use edit_file with op='replace', pos=<line2 anchor>, lines=[' const x = 1;'].",
"The ONLY change is the indentation: 2 spaces → 4 spaces. Content stays the same.",
].join(" "),
validate: (c) => {
const lines = c.split("\n");
const line2 = lines[1];
if (!line2) return { passed: false, reason: "line 2 missing" };
if (line2 === " const x = 1;") return { passed: true, reason: "indentation fixed to 4 spaces" };
if (line2 === " const x = 1;") return { passed: false, reason: "still 2-space indent" };
return { passed: false, reason: `unexpected line 2: '${line2}'` };
},
},
{
name: "17. Replace preserving leading whitespace",
fileName: "preserve.py",
fileContent: [
"class Foo:",
" def old_method(self):",
" pass",
].join("\n"),
prompt: [
"Read preserve.py with read_file.",
"Replace line 2 ' def old_method(self):' with ' def new_method(self):'.",
"Keep the 4-space indentation. Only change the method name.",
"Use edit_file with op='replace', pos=<line2 anchor>, lines=[' def new_method(self):'].",
].join(" "),
validate: (c) => {
if (c.includes("old_method")) return { passed: false, reason: "'old_method' still present" };
const lines = c.split("\n");
const methodLine = lines.find((l) => l.includes("new_method"));
if (!methodLine) return { passed: false, reason: "'new_method' not found" };
if (!methodLine.startsWith(" ")) return { passed: false, reason: "indentation lost" };
return { passed: true, reason: "method renamed with indentation preserved" };
},
},
{
name: "18. Insert blank line between sections",
fileName: "sections.txt",
fileContent: ["[section-a]", "value-a=1", "[section-b]", "value-b=2"].join("\n"),
prompt: [
"Read sections.txt with read_file.",
"Insert a blank empty line between 'value-a=1' (line 2) and '[section-b]' (line 3).",
"Use edit_file with op='append', pos=<line2 anchor>, lines=[''].",
"lines=[''] inserts one empty line.",
].join(" "),
validate: (c) => {
const lines = c.split("\n");
const valAIdx = lines.findIndex((l) => l.includes("value-a=1"));
const secBIdx = lines.findIndex((l) => l.includes("[section-b]"));
if (valAIdx === -1) return { passed: false, reason: "'value-a=1' missing" };
if (secBIdx === -1) return { passed: false, reason: "'[section-b]' missing" };
if (secBIdx - valAIdx < 2) return { passed: false, reason: "no blank line between sections" };
const between = lines[valAIdx + 1];
if (between.trim() !== "") return { passed: false, reason: `line between is '${between}', not blank` };
return { passed: true, reason: "blank line inserted between sections" };
},
},
{
name: "19. Delete blank line",
fileName: "noblank.txt",
fileContent: ["first", "", "second", "third"].join("\n"),
prompt: [
"Read noblank.txt with read_file.",
"Delete the empty blank line (line 2). Use edit_file with op='replace', pos=<line2 anchor>, lines=[].",
].join(" "),
validate: (c) => {
const lines = c.trim().split("\n");
if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
if (lines[0].trim() !== "first") return { passed: false, reason: "'first' not on line 1" };
if (lines[1].trim() !== "second") return { passed: false, reason: "'second' not on line 2" };
return { passed: true, reason: "blank line deleted" };
},
},
{
name: "20. Tab → spaces conversion",
fileName: "tabs.txt",
fileContent: ["start", "\tindented-with-tab", "end"].join("\n"),
prompt: [
"Read tabs.txt with read_file.",
"Replace the tab-indented line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: [' indented-with-spaces'] }].",
"Expected final line 2 to be 4 spaces followed by indented-with-spaces.",
].join(" "),
validate: (c) => {
if (c.includes("\t")) return { passed: false, reason: "tab still present" };
if (!c.includes(" indented-with-spaces"))
return { passed: false, reason: "' indented-with-spaces' not found" };
if (!c.includes("start")) return { passed: false, reason: "'start' was modified" };
return { passed: true, reason: "tab converted to 4 spaces" };
},
},
{
name: "21. Deeply nested indent replacement",
fileName: "nested.ts",
fileContent: [
"if (a) {",
" if (b) {",
" if (c) {",
" old_call();",
" }",
" }",
"}",
].join("\n"),
prompt: [
"Read nested.ts with read_file.",
"Replace line 4 ' old_call();' with ' new_call();'.",
"Preserve the exact 6-space indentation. Only change the function name.",
"Use edit_file with op='replace', pos=<line4 anchor>, lines=[' new_call();'].",
].join(" "),
validate: (c) => {
if (c.includes("old_call")) return { passed: false, reason: "'old_call' still present" };
const lines = c.split("\n");
const callLine = lines.find((l) => l.includes("new_call"));
if (!callLine) return { passed: false, reason: "'new_call' not found" };
const leadingSpaces = callLine.match(/^ */)?.[0].length ?? 0;
if (leadingSpaces !== 6) return { passed: false, reason: `expected 6-space indent, got ${leadingSpaces}` };
return { passed: true, reason: "deeply nested line replaced with indent preserved" };
},
},
];
// ── JSONL event types ─────────────────────────────────────────
interface ToolCallEvent {
tool_call_id: string;
tool_input: Record<string, unknown>;
tool_name: string;
type: "tool_call";
}
interface ToolResultEvent {
error?: string;
output: string;
tool_call_id: string;
type: "tool_result";
}
interface AnyEvent {
type: string;
[key: string]: unknown;
}
// ── Run single test case ─────────────────────────────────────
async function runTestCase(
tc: TestCase,
testDir: string
): Promise<{
passed: boolean;
editCalls: number;
editSuccesses: number;
duration: number;
}> {
const testFile = join(testDir, tc.fileName);
writeFileSync(testFile, tc.fileContent, "utf-8");
const headlessScript = resolve(import.meta.dir, "headless.ts");
const headlessArgs = [
"run",
headlessScript,
"-p",
tc.prompt,
"--no-translate",
...extraArgs,
];
const startTime = Date.now();
const output = await new Promise<string>((res, reject) => {
const proc = spawn("bun", headlessArgs, {
cwd: testDir,
env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
proc.stdout.on("data", (chunk: Buffer) => {
stdout += chunk.toString();
});
proc.stderr.on("data", (chunk: Buffer) => {
stderr += chunk.toString();
});
const timeout = setTimeout(
() => {
proc.kill("SIGTERM");
reject(new Error("Timed out after 4 minutes"));
},
4 * 60 * 1000
);
proc.on("close", (code) => {
clearTimeout(timeout);
if (code !== 0) {
reject(new Error(`Exit code ${code}\n${stderr.slice(-500)}`));
} else {
res(stdout);
}
});
proc.on("error", (err) => {
clearTimeout(timeout);
reject(err);
});
});
const duration = Date.now() - startTime;
// Parse events
const events: AnyEvent[] = [];
for (const line of output.split("\n").filter((l) => l.trim())) {
try {
events.push(JSON.parse(line) as AnyEvent);
} catch {
// skip non-JSON
}
}
const toolCalls = events.filter(
(e) => e.type === "tool_call"
) as unknown as ToolCallEvent[];
const toolResults = events.filter(
(e) => e.type === "tool_result"
) as unknown as ToolResultEvent[];
const editCalls = toolCalls.filter((e) => e.tool_name === "edit_file");
const editCallIds = new Set(editCalls.map((e) => e.tool_call_id));
const editResults = toolResults.filter((e) =>
editCallIds.has(e.tool_call_id)
);
const editSuccesses = editResults.filter((e) => !e.error);
// Show blocked calls
const editErrors = editResults.filter((e) => e.error);
for (const err of editErrors) {
const matchingCall = editCalls.find(
(c) => c.tool_call_id === err.tool_call_id
);
info(` blocked: ${err.error?.slice(0, 120)}`);
if (matchingCall) {
info(` input: ${JSON.stringify(matchingCall.tool_input).slice(0, 200)}`);
}
}
// Validate file content
let finalContent: string;
try {
finalContent = readFileSync(testFile, "utf-8");
} catch {
return {
passed: false,
editCalls: editCalls.length,
editSuccesses: editSuccesses.length,
duration,
};
}
const validation = tc.validate(finalContent);
return {
passed: validation.passed,
editCalls: editCalls.length,
editSuccesses: editSuccesses.length,
duration,
};
}
// ── Main ──────────────────────────────────────────────────────
const main = async () => {
console.log(`\n${BOLD}Headless Edit Operations Test — ${TEST_CASES.length} Types${RESET}\n`);
const testDir = join(tmpdir(), `edit-ops-${Date.now()}`);
mkdirSync(testDir, { recursive: true });
info(`Test dir: ${testDir}`);
console.log();
let totalPassed = 0;
const results: { name: string; passed: boolean; detail: string }[] = [];
for (const tc of TEST_CASES) {
console.log(`${CYAN}${BOLD}${tc.name}${RESET}`);
info(`File: ${tc.fileName}`);
info(`Prompt: "${tc.prompt.slice(0, 80)}..."`);
try {
const result = await runTestCase(tc, testDir);
const status = result.passed
? `${GREEN}PASS${RESET}`
: `${RED}FAIL${RESET}`;
const detail = `edit_file: ${result.editSuccesses}/${result.editCalls} succeeded, ${(result.duration / 1000).toFixed(1)}s`;
console.log(` ${status}${detail}`);
if (result.passed) {
totalPassed++;
// Validate the file to show reason
const content = readFileSync(join(testDir, tc.fileName), "utf-8");
const v = tc.validate(content);
pass(v.reason);
} else {
const content = readFileSync(join(testDir, tc.fileName), "utf-8");
const v = tc.validate(content);
fail(v.reason);
info(
`Final content:\n${content
.split("\n")
.map((l, i) => ` ${i + 1}: ${l}`)
.join("\n")}`
);
}
results.push({ name: tc.name, passed: result.passed, detail });
} catch (error) {
const msg = error instanceof Error ? error.message : String(error);
console.log(` ${RED}ERROR${RESET}${msg.slice(0, 200)}`);
fail(msg.slice(0, 200));
results.push({ name: tc.name, passed: false, detail: msg.slice(0, 100) });
}
// Reset file for next test (in case of side effects)
try {
rmSync(join(testDir, tc.fileName), { force: true });
} catch {}
console.log();
}
// Summary
console.log(`${BOLD}━━━ Summary ━━━${RESET}`);
for (const r of results) {
const icon = r.passed ? `${GREEN}${RESET}` : `${RED}${RESET}`;
console.log(` ${icon} ${r.name}${r.detail}`);
}
console.log();
console.log(
`${BOLD}Result: ${totalPassed}/${TEST_CASES.length} passed (${Math.round((totalPassed / TEST_CASES.length) * 100)}%)${RESET}`
);
// Cleanup
try {
rmSync(testDir, { recursive: true, force: true });
} catch {}
if (totalPassed === TEST_CASES.length) {
console.log(
`\n${BOLD}${GREEN}🎉 ALL TESTS PASSED — 100% success rate!${RESET}\n`
);
process.exit(0);
} else {
console.log(`\n${BOLD}${RED}Some tests failed.${RESET}\n`);
process.exit(1);
}
};
main();

View File

@ -0,0 +1,280 @@
#!/usr/bin/env bun
/**
* Multi-model edit_file test runner
*
* Runs test-headless-edit-ops.ts against every available model
* and produces a summary table.
*
* Usage:
* bun run scripts/test-multi-model-edit.ts [--timeout <seconds>]
*/
import { spawn } from "node:child_process";
import { resolve } from "node:path";
// ── Models ────────────────────────────────────────────────────
const MODELS = [
{ id: "MiniMaxAI/MiniMax-M2.5", short: "M2.5" },
// { id: "MiniMaxAI/MiniMax-M2.1", short: "M2.1" }, // masked: slow + timeout-prone
// { id: "zai-org/GLM-5", short: "GLM-5" }, // masked: API 503
{ id: "zai-org/GLM-4.7", short: "GLM-4.7" },
];
// ── CLI args ──────────────────────────────────────────────────
let perModelTimeoutSec = 900; // 15 min default per model (5 tests)
const rawArgs = process.argv.slice(2);
for (let i = 0; i < rawArgs.length; i++) {
if (rawArgs[i] === "--timeout" && i + 1 < rawArgs.length) {
const parsed = Number.parseInt(rawArgs[i + 1], 10);
if (Number.isNaN(parsed) || parsed <= 0) {
console.error(`Invalid --timeout value: ${rawArgs[i + 1]}`);
process.exit(1);
}
perModelTimeoutSec = parsed;
i++;
}
// ── Colors ────────────────────────────────────────────────────
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const YELLOW = "\x1b[33m";
const DIM = "\x1b[2m";
const CYAN = "\x1b[36m";
const RESET = "\x1b[0m";
// ── Types ─────────────────────────────────────────────────────
interface TestResult {
detail: string;
name: string;
passed: boolean;
}
interface ModelResult {
durationMs: number;
error?: string;
modelId: string;
modelShort: string;
tests: TestResult[];
totalPassed: number;
totalTests: number;
}
// ── Parse test-headless-edit-ops stdout ───────────────────────
function parseOpsOutput(stdout: string): TestResult[] {
const results: TestResult[] = [];
// Match lines like: " PASS — edit_file: 1/1 succeeded, 32.5s"
// or " FAIL — edit_file: 0/3 succeeded, 15.2s"
// or " ERROR — Timed out after 10 minutes"
// Following a line like: "1. Replace single line"
const lines = stdout.split("\n");
let currentTestName = "";
for (const line of lines) {
// Detect test name: starts with ANSI-colored bold cyan + "N. Name"
// Strip ANSI codes for matching
const stripped = line.replace(/\x1b\[[0-9;]*m/g, "");
// Test name pattern: "N. <name>"
const testNameMatch = stripped.match(/^\s*(\d+\.\s+.+)$/);
if (
testNameMatch &&
!stripped.includes("—") &&
!stripped.includes("✓") &&
!stripped.includes("✗")
) {
currentTestName = testNameMatch[1].trim();
continue;
}
// Result line: PASS/FAIL/ERROR
if (currentTestName && stripped.includes("PASS")) {
const detail = stripped.replace(/^\s*PASS\s*—?\s*/, "").trim();
results.push({
name: currentTestName,
passed: true,
detail: detail || "passed",
});
currentTestName = "";
} else if (currentTestName && stripped.includes("FAIL")) {
const detail = stripped.replace(/^\s*FAIL\s*—?\s*/, "").trim();
results.push({
name: currentTestName,
passed: false,
detail: detail || "failed",
});
currentTestName = "";
} else if (currentTestName && stripped.includes("ERROR")) {
const detail = stripped.replace(/^\s*ERROR\s*—?\s*/, "").trim();
results.push({
name: currentTestName,
passed: false,
detail: detail || "error",
});
currentTestName = "";
}
}
return results;
}
// ── Run one model ────────────────────────────────────────────
async function runModel(model: {
id: string;
short: string;
}): Promise<ModelResult> {
const opsScript = resolve(import.meta.dir, "test-edit-ops.ts");
const startTime = Date.now();
return new Promise<ModelResult>((resolvePromise) => {
const proc = spawn(
"bun",
["run", opsScript, "-m", model.id, "--no-translate"],
{
cwd: resolve(import.meta.dir),
env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
stdio: ["ignore", "pipe", "pipe"],
}
);
let stdout = "";
let stderr = "";
proc.stdout.on("data", (chunk: Buffer) => {
stdout += chunk.toString();
});
proc.stderr.on("data", (chunk: Buffer) => {
stderr += chunk.toString();
});
const timeout = setTimeout(() => {
proc.kill("SIGTERM");
resolvePromise({
modelId: model.id,
modelShort: model.short,
tests: [],
totalPassed: 0,
totalTests: 0,
durationMs: Date.now() - startTime,
error: `Timed out after ${perModelTimeoutSec}s`,
});
}, perModelTimeoutSec * 1000);
proc.on("close", () => {
clearTimeout(timeout);
const tests = parseOpsOutput(stdout);
const totalPassed = tests.filter((t) => t.passed).length;
resolvePromise({
modelId: model.id,
modelShort: model.short,
tests,
totalPassed,
totalTests: Math.max(tests.length, 5),
durationMs: Date.now() - startTime,
});
});
proc.on("error", (err) => {
clearTimeout(timeout);
resolvePromise({
modelId: model.id,
modelShort: model.short,
tests: [],
totalPassed: 0,
totalTests: 0,
durationMs: Date.now() - startTime,
error: err.message,
});
});
});
}
// ── Main ──────────────────────────────────────────────────────
const main = async () => {
console.log(`\n${BOLD}═══ Multi-Model edit_file Test Runner ═══${RESET}\n`);
console.log(`${DIM}Models: ${MODELS.map((m) => m.short).join(", ")}${RESET}`);
console.log(`${DIM}Timeout: ${perModelTimeoutSec}s per model${RESET}`);
console.log();
const allResults: ModelResult[] = [];
for (const model of MODELS) {
console.log(`${CYAN}${BOLD}▶ Testing ${model.short} (${model.id})${RESET}`);
const result = await runModel(model);
allResults.push(result);
const timeStr = `${(result.durationMs / 1000).toFixed(1)}s`;
if (result.error) {
console.log(` ${RED}ERROR${RESET}: ${result.error} (${timeStr})`);
} else {
const color =
result.totalPassed === result.totalTests
? GREEN
: result.totalPassed > 0
? YELLOW
: RED;
console.log(
` ${color}${result.totalPassed}/${result.totalTests} passed${RESET} (${timeStr})`
);
for (const t of result.tests) {
const icon = t.passed ? `${GREEN}${RESET}` : `${RED}${RESET}`;
console.log(` ${icon} ${t.name}`);
}
}
console.log();
}
// ── Summary Table ──────────────────────────────────────────
console.log(`${BOLD}═══ Summary ═══${RESET}\n`);
// Per-model results
for (const r of allResults) {
const timeStr = `${(r.durationMs / 1000).toFixed(0)}s`;
const color = r.error ? RED : r.totalPassed === r.totalTests ? GREEN : r.totalPassed > 0 ? YELLOW : RED;
const label = r.error ? `ERROR: ${r.error}` : `${r.totalPassed}/${r.totalTests}`;
console.log(` ${r.modelShort.padEnd(8)} ${color}${label}${RESET} (${timeStr})`);
for (const t of r.tests) {
const icon = t.passed ? `${GREEN}${RESET}` : `${RED}${RESET}`;
console.log(` ${icon} ${t.name}`);
}
}
console.log();
// Overall
const totalModels = allResults.length;
const erroredModels = allResults.filter((r) => r.error).length;
const perfectModels = allResults.filter(
(r) => !r.error && r.totalPassed === r.totalTests && r.totalTests > 0
).length;
console.log(
`${BOLD}Models with 100%: ${perfectModels}/${totalModels}${RESET}`
);
const overallPassed = allResults.reduce((sum, r) => sum + r.totalPassed, 0);
const overallTotal = allResults.reduce((sum, r) => sum + r.totalTests, 0);
console.log(
`${BOLD}Overall: ${overallPassed}/${overallTotal} (${Math.round((overallPassed / overallTotal) * 100)}%)${RESET}`
);
console.log();
if (erroredModels > 0) {
console.log(
`${BOLD}${RED}${erroredModels} model(s) errored. See details above.${RESET}\n`
);
process.exit(1);
} else if (perfectModels === totalModels) {
console.log(`${BOLD}${GREEN}🎉 ALL MODELS PASSED ALL TESTS!${RESET}\n`);
process.exit(0);
} else {
console.log(
`${BOLD}${YELLOW}Some models have failures. See details above.${RESET}\n`
);
process.exit(1);
}
};
main();

View File

@ -3,8 +3,9 @@
// Wrapper script that detects platform and spawns the correct binary // Wrapper script that detects platform and spawns the correct binary
import { spawnSync } from "node:child_process"; import { spawnSync } from "node:child_process";
import { readFileSync } from "node:fs";
import { createRequire } from "node:module"; import { createRequire } from "node:module";
import { getPlatformPackage, getBinaryPath } from "./platform.js"; import { getPlatformPackageCandidates, getBinaryPath } from "./platform.js";
const require = createRequire(import.meta.url); const require = createRequire(import.meta.url);
@ -26,55 +27,116 @@ function getLibcFamily() {
} }
} }
function supportsAvx2() {
if (process.arch !== "x64") {
return null;
}
if (process.env.OH_MY_OPENCODE_FORCE_BASELINE === "1") {
return false;
}
if (process.platform === "linux") {
try {
const cpuInfo = readFileSync("/proc/cpuinfo", "utf8").toLowerCase();
return cpuInfo.includes("avx2");
} catch {
return null;
}
}
if (process.platform === "darwin") {
const probe = spawnSync("sysctl", ["-n", "machdep.cpu.leaf7_features"], {
encoding: "utf8",
});
if (probe.error || probe.status !== 0) {
return null;
}
return probe.stdout.toUpperCase().includes("AVX2");
}
return null;
}
function getSignalExitCode(signal) {
const signalCodeByName = {
SIGINT: 2,
SIGILL: 4,
SIGKILL: 9,
SIGTERM: 15,
};
return 128 + (signalCodeByName[signal] ?? 1);
}
function main() { function main() {
const { platform, arch } = process; const { platform, arch } = process;
const libcFamily = getLibcFamily(); const libcFamily = getLibcFamily();
const avx2Supported = supportsAvx2();
// Get platform package name let packageCandidates;
let pkg;
try { try {
pkg = getPlatformPackage({ platform, arch, libcFamily }); packageCandidates = getPlatformPackageCandidates({
platform,
arch,
libcFamily,
preferBaseline: avx2Supported === false,
});
} catch (error) { } catch (error) {
console.error(`\noh-my-opencode: ${error.message}\n`); console.error(`\noh-my-opencode: ${error.message}\n`);
process.exit(1); process.exit(1);
} }
// Resolve binary path const resolvedBinaries = packageCandidates
const binRelPath = getBinaryPath(pkg, platform); .map((pkg) => {
try {
let binPath; return { pkg, binPath: require.resolve(getBinaryPath(pkg, platform)) };
try { } catch {
binPath = require.resolve(binRelPath); return null;
} catch { }
})
.filter((entry) => entry !== null);
if (resolvedBinaries.length === 0) {
console.error(`\noh-my-opencode: Platform binary not installed.`); console.error(`\noh-my-opencode: Platform binary not installed.`);
console.error(`\nYour platform: ${platform}-${arch}${libcFamily === "musl" ? "-musl" : ""}`); console.error(`\nYour platform: ${platform}-${arch}${libcFamily === "musl" ? "-musl" : ""}`);
console.error(`Expected package: ${pkg}`); console.error(`Expected packages (in order): ${packageCandidates.join(", ")}`);
console.error(`\nTo fix, run:`); console.error(`\nTo fix, run:`);
console.error(` npm install ${pkg}\n`); console.error(` npm install ${packageCandidates[0]}\n`);
process.exit(1); process.exit(1);
} }
// Spawn the binary for (let index = 0; index < resolvedBinaries.length; index += 1) {
const result = spawnSync(binPath, process.argv.slice(2), { const currentBinary = resolvedBinaries[index];
stdio: "inherit", const hasFallback = index < resolvedBinaries.length - 1;
}); const result = spawnSync(currentBinary.binPath, process.argv.slice(2), {
stdio: "inherit",
// Handle spawn errors });
if (result.error) {
console.error(`\noh-my-opencode: Failed to execute binary.`); if (result.error) {
console.error(`Error: ${result.error.message}\n`); if (hasFallback) {
process.exit(2); continue;
} }
// Handle signals console.error(`\noh-my-opencode: Failed to execute binary.`);
if (result.signal) { console.error(`Error: ${result.error.message}\n`);
const signalNum = result.signal === "SIGTERM" ? 15 : process.exit(2);
result.signal === "SIGKILL" ? 9 : }
result.signal === "SIGINT" ? 2 : 1;
process.exit(128 + signalNum); if (result.signal === "SIGILL" && hasFallback) {
continue;
}
if (result.signal) {
process.exit(getSignalExitCode(result.signal));
}
process.exit(result.status ?? 1);
} }
process.exit(result.status ?? 1); process.exit(1);
} }
main(); main();

14
bin/platform.d.ts vendored Normal file
View File

@ -0,0 +1,14 @@
export declare function getPlatformPackage(options: {
platform: string;
arch: string;
libcFamily?: string | null;
}): string;
export declare function getPlatformPackageCandidates(options: {
platform: string;
arch: string;
libcFamily?: string | null;
preferBaseline?: boolean;
}): string[];
export declare function getBinaryPath(pkg: string, platform: string): string;

View File

@ -26,6 +26,50 @@ export function getPlatformPackage({ platform, arch, libcFamily }) {
return `oh-my-opencode-${os}-${arch}${suffix}`; return `oh-my-opencode-${os}-${arch}${suffix}`;
} }
/** @param {{ platform: string, arch: string, libcFamily?: string | null, preferBaseline?: boolean }} options */
export function getPlatformPackageCandidates({ platform, arch, libcFamily, preferBaseline = false }) {
const primaryPackage = getPlatformPackage({ platform, arch, libcFamily });
const baselinePackage = getBaselinePlatformPackage({ platform, arch, libcFamily });
if (!baselinePackage) {
return [primaryPackage];
}
return preferBaseline ? [baselinePackage, primaryPackage] : [primaryPackage, baselinePackage];
}
/** @param {{ platform: string, arch: string, libcFamily?: string | null }} options */
function getBaselinePlatformPackage({ platform, arch, libcFamily }) {
if (arch !== "x64") {
return null;
}
if (platform === "darwin") {
return "oh-my-opencode-darwin-x64-baseline";
}
if (platform === "win32") {
return "oh-my-opencode-windows-x64-baseline";
}
if (platform === "linux") {
if (libcFamily === null || libcFamily === undefined) {
throw new Error(
"Could not detect libc on Linux. " +
"Please ensure detect-libc is installed or report this issue."
);
}
if (libcFamily === "musl") {
return "oh-my-opencode-linux-x64-musl-baseline";
}
return "oh-my-opencode-linux-x64-baseline";
}
return null;
}
/** /**
* Get the path to the binary within a platform package * Get the path to the binary within a platform package
* @param {string} pkg Package name * @param {string} pkg Package name

View File

@ -1,6 +1,6 @@
// bin/platform.test.ts // bin/platform.test.ts
import { describe, expect, test } from "bun:test"; import { describe, expect, test } from "bun:test";
import { getPlatformPackage, getBinaryPath } from "./platform.js"; import { getBinaryPath, getPlatformPackage, getPlatformPackageCandidates } from "./platform.js";
describe("getPlatformPackage", () => { describe("getPlatformPackage", () => {
// #region Darwin platforms // #region Darwin platforms
@ -146,3 +146,58 @@ describe("getBinaryPath", () => {
expect(result).toBe("oh-my-opencode-linux-x64/bin/oh-my-opencode"); expect(result).toBe("oh-my-opencode-linux-x64/bin/oh-my-opencode");
}); });
}); });
describe("getPlatformPackageCandidates", () => {
test("returns x64 and baseline candidates for Linux glibc", () => {
// #given Linux x64 with glibc
const input = { platform: "linux", arch: "x64", libcFamily: "glibc" };
// #when getting package candidates
const result = getPlatformPackageCandidates(input);
// #then returns modern first then baseline fallback
expect(result).toEqual([
"oh-my-opencode-linux-x64",
"oh-my-opencode-linux-x64-baseline",
]);
});
test("returns x64 musl and baseline candidates for Linux musl", () => {
// #given Linux x64 with musl
const input = { platform: "linux", arch: "x64", libcFamily: "musl" };
// #when getting package candidates
const result = getPlatformPackageCandidates(input);
// #then returns musl modern first then musl baseline fallback
expect(result).toEqual([
"oh-my-opencode-linux-x64-musl",
"oh-my-opencode-linux-x64-musl-baseline",
]);
});
test("returns baseline first when preferBaseline is true", () => {
// #given Windows x64 and baseline preference
const input = { platform: "win32", arch: "x64", preferBaseline: true };
// #when getting package candidates
const result = getPlatformPackageCandidates(input);
// #then baseline package is preferred first
expect(result).toEqual([
"oh-my-opencode-windows-x64-baseline",
"oh-my-opencode-windows-x64",
]);
});
test("returns only one candidate for ARM64", () => {
// #given non-x64 platform
const input = { platform: "linux", arch: "arm64", libcFamily: "glibc" };
// #when getting package candidates
const result = getPlatformPackageCandidates(input);
// #then baseline fallback is not included
expect(result).toEqual(["oh-my-opencode-linux-arm64"]);
});
});

View File

@ -77,11 +77,15 @@
"optionalDependencies": { "optionalDependencies": {
"oh-my-opencode-darwin-arm64": "3.8.5", "oh-my-opencode-darwin-arm64": "3.8.5",
"oh-my-opencode-darwin-x64": "3.8.5", "oh-my-opencode-darwin-x64": "3.8.5",
"oh-my-opencode-darwin-x64-baseline": "3.8.5",
"oh-my-opencode-linux-arm64": "3.8.5", "oh-my-opencode-linux-arm64": "3.8.5",
"oh-my-opencode-linux-arm64-musl": "3.8.5", "oh-my-opencode-linux-arm64-musl": "3.8.5",
"oh-my-opencode-linux-x64": "3.8.5", "oh-my-opencode-linux-x64": "3.8.5",
"oh-my-opencode-linux-x64-baseline": "3.8.5",
"oh-my-opencode-linux-x64-musl": "3.8.5", "oh-my-opencode-linux-x64-musl": "3.8.5",
"oh-my-opencode-windows-x64": "3.8.5" "oh-my-opencode-linux-x64-musl-baseline": "3.8.5",
"oh-my-opencode-windows-x64": "3.8.5",
"oh-my-opencode-windows-x64-baseline": "3.8.5"
}, },
"trustedDependencies": [ "trustedDependencies": [
"@ast-grep/cli", "@ast-grep/cli",

View File

@ -2,7 +2,7 @@
// Runs after npm install to verify platform binary is available // Runs after npm install to verify platform binary is available
import { createRequire } from "node:module"; import { createRequire } from "node:module";
import { getPlatformPackage, getBinaryPath } from "./bin/platform.js"; import { getPlatformPackageCandidates, getBinaryPath } from "./bin/platform.js";
const require = createRequire(import.meta.url); const require = createRequire(import.meta.url);
@ -27,12 +27,28 @@ function main() {
const libcFamily = getLibcFamily(); const libcFamily = getLibcFamily();
try { try {
const pkg = getPlatformPackage({ platform, arch, libcFamily }); const packageCandidates = getPlatformPackageCandidates({
const binPath = getBinaryPath(pkg, platform); platform,
arch,
// Try to resolve the binary libcFamily,
require.resolve(binPath); });
console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch}`);
const resolvedPackage = packageCandidates.find((pkg) => {
try {
require.resolve(getBinaryPath(pkg, platform));
return true;
} catch {
return false;
}
});
if (!resolvedPackage) {
throw new Error(
`No platform binary package installed. Tried: ${packageCandidates.join(", ")}`
);
}
console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch} (${resolvedPackage})`);
} catch (error) { } catch (error) {
console.warn(`⚠ oh-my-opencode: ${error.message}`); console.warn(`⚠ oh-my-opencode: ${error.message}`);
console.warn(` The CLI may not work on this platform.`); console.warn(` The CLI may not work on this platform.`);

View File

@ -1719,6 +1719,54 @@
"created_at": "2026-02-24T17:12:31Z", "created_at": "2026-02-24T17:12:31Z",
"repoId": 1108837393, "repoId": 1108837393,
"pullRequestNo": 1983 "pullRequestNo": 1983
},
{
"name": "east-shine",
"id": 20237288,
"comment_id": 3957576758,
"created_at": "2026-02-25T08:19:34Z",
"repoId": 1108837393,
"pullRequestNo": 2113
},
{
"name": "SupenBysz",
"id": 3314033,
"comment_id": 3962352704,
"created_at": "2026-02-25T22:00:54Z",
"repoId": 1108837393,
"pullRequestNo": 2119
},
{
"name": "zhzy0077",
"id": 8717471,
"comment_id": 3964015975,
"created_at": "2026-02-26T04:45:23Z",
"repoId": 1108837393,
"pullRequestNo": 2125
},
{
"name": "spacecowboy0416",
"id": 239068998,
"comment_id": 3964320737,
"created_at": "2026-02-26T06:05:27Z",
"repoId": 1108837393,
"pullRequestNo": 2126
},
{
"name": "imwxc",
"id": 49653609,
"comment_id": 3965127447,
"created_at": "2026-02-26T09:00:16Z",
"repoId": 1108837393,
"pullRequestNo": 2129
},
{
"name": "maou-shonen",
"id": 22576780,
"comment_id": 3965445132,
"created_at": "2026-02-26T09:50:46Z",
"repoId": 1108837393,
"pullRequestNo": 2131
} }
] ]
} }

View File

@ -17,7 +17,6 @@ import type { AvailableAgent, AvailableSkill, AvailableCategory } from "../dynam
import { buildCategorySkillsDelegationGuide } from "../dynamic-agent-prompt-builder" import { buildCategorySkillsDelegationGuide } from "../dynamic-agent-prompt-builder"
import type { CategoryConfig } from "../../config/schema" import type { CategoryConfig } from "../../config/schema"
import { mergeCategories } from "../../shared/merge-categories" import { mergeCategories } from "../../shared/merge-categories"
import { createAgentToolRestrictions } from "../../shared/permission-compat"
import { getDefaultAtlasPrompt } from "./default" import { getDefaultAtlasPrompt } from "./default"
import { getGptAtlasPrompt } from "./gpt" import { getGptAtlasPrompt } from "./gpt"
@ -30,7 +29,7 @@ import {
buildDecisionMatrix, buildDecisionMatrix,
} from "./prompt-section-builder" } from "./prompt-section-builder"
const MODE: AgentMode = "primary" const MODE: AgentMode = "all"
export type AtlasPromptSource = "default" | "gpt" | "gemini" export type AtlasPromptSource = "default" | "gpt" | "gemini"
@ -100,11 +99,6 @@ function buildDynamicOrchestratorPrompt(ctx?: OrchestratorContext): string {
} }
export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig { export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
const restrictions = createAgentToolRestrictions([
"task",
"call_omo_agent",
])
const baseConfig = { const baseConfig = {
description: description:
"Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)", "Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
@ -113,7 +107,6 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
temperature: 0.1, temperature: 0.1,
prompt: buildDynamicOrchestratorPrompt(ctx), prompt: buildDynamicOrchestratorPrompt(ctx),
color: "#10B981", color: "#10B981",
...restrictions,
} }
return baseConfig as AgentConfig return baseConfig as AgentConfig

View File

@ -0,0 +1,41 @@
/// <reference types="bun-types" />
import { describe, test, expect } from "bun:test"
import { createEnvContext } from "./env-context"
describe("createEnvContext", () => {
test("returns omo-env block with timezone and locale", () => {
// #given - no setup needed
// #when
const result = createEnvContext()
// #then
expect(result).toContain("<omo-env>")
expect(result).toContain("</omo-env>")
expect(result).toContain("Timezone:")
expect(result).toContain("Locale:")
expect(result).not.toContain("Current date:")
})
test("does not include time with seconds precision to preserve token cache", () => {
// #given - seconds-precision time changes every second, breaking cache on every request
// #when
const result = createEnvContext()
// #then - no HH:MM:SS pattern anywhere in the output
expect(result).not.toMatch(/\d{1,2}:\d{2}:\d{2}/)
})
test("does not include date or time fields since OpenCode already provides them", () => {
// #given - OpenCode's system.ts already injects date, platform, working directory
// #when
const result = createEnvContext()
// #then - only timezone and locale remain; both are stable across requests
expect(result).not.toContain("Current date:")
expect(result).not.toContain("Current time:")
})
})

View File

@ -1,32 +1,15 @@
/** /**
* Creates OmO-specific environment context (time, timezone, locale). * Creates OmO-specific environment context (timezone, locale).
* Note: Working directory, platform, and date are already provided by OpenCode's system.ts, * Note: Working directory, platform, and date are already provided by OpenCode's system.ts,
* so we only include fields that OpenCode doesn't provide to avoid duplication. * so we only include fields that OpenCode doesn't provide to avoid duplication.
* See: https://github.com/code-yeongyu/oh-my-opencode/issues/379 * See: https://github.com/code-yeongyu/oh-my-opencode/issues/379
*/ */
export function createEnvContext(): string { export function createEnvContext(): string {
const now = new Date()
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone
const locale = Intl.DateTimeFormat().resolvedOptions().locale const locale = Intl.DateTimeFormat().resolvedOptions().locale
const dateStr = now.toLocaleDateString(locale, {
weekday: "short",
year: "numeric",
month: "short",
day: "numeric",
})
const timeStr = now.toLocaleTimeString(locale, {
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
hour12: true,
})
return ` return `
<omo-env> <omo-env>
Current date: ${dateStr}
Current time: ${timeStr}
Timezone: ${timezone} Timezone: ${timezone}
Locale: ${locale} Locale: ${locale}
</omo-env>` </omo-env>`

View File

@ -19,7 +19,7 @@ import {
categorizeTools, categorizeTools,
} from "./dynamic-agent-prompt-builder"; } from "./dynamic-agent-prompt-builder";
const MODE: AgentMode = "primary"; const MODE: AgentMode = "all";
function buildTodoDisciplineSection(useTaskSystem: boolean): string { function buildTodoDisciplineSection(useTaskSystem: boolean): string {
if (useTaskSystem) { if (useTaskSystem) {
@ -448,6 +448,21 @@ ${oracleSection}
4. **Run build** if applicable exit code 0 required 4. **Run build** if applicable exit code 0 required
5. **Tell user** what you verified and the results keep it clear and helpful 5. **Tell user** what you verified and the results keep it clear and helpful
### Auto-Commit Policy (MANDATORY for implementation/fix work)
1. **Auto-commit after implementation is complete** when the task includes feature/fix code changes
2. **Commit ONLY after verification gates pass**:
- \`lsp_diagnostics\` clean on all modified files
- Related tests pass
- Typecheck/build pass when applicable
3. **If any gate fails, DO NOT commit** fix issues first, re-run verification, then commit
4. **Use Conventional Commits format** with meaningful intent-focused messages:
- \`feat(scope): add ...\` for new functionality
- \`fix(scope): resolve ...\` for bug fixes
- \`refactor(scope): simplify ...\` for internal restructuring
5. **Do not make placeholder commits** (\`wip\`, \`temp\`, \`update\`) or commit unverified code
6. **If user explicitly says not to commit**, skip commit and report that changes are left uncommitted
- **File edit** \`lsp_diagnostics\` clean - **File edit** \`lsp_diagnostics\` clean
- **Build** Exit code 0 - **Build** Exit code 0
- **Tests** Pass (or pre-existing failures noted) - **Tests** Pass (or pre-existing failures noted)

View File

@ -8,7 +8,7 @@ import {
buildGeminiIntentGateEnforcement, buildGeminiIntentGateEnforcement,
} from "./sisyphus-gemini-overlays"; } from "./sisyphus-gemini-overlays";
const MODE: AgentMode = "primary"; const MODE: AgentMode = "all";
export const SISYPHUS_PROMPT_METADATA: AgentPromptMetadata = { export const SISYPHUS_PROMPT_METADATA: AgentPromptMetadata = {
category: "utility", category: "utility",
cost: "EXPENSIVE", cost: "EXPENSIVE",

View File

@ -4,6 +4,7 @@ import { createLibrarianAgent } from "./librarian"
import { createExploreAgent } from "./explore" import { createExploreAgent } from "./explore"
import { createMomusAgent } from "./momus" import { createMomusAgent } from "./momus"
import { createMetisAgent } from "./metis" import { createMetisAgent } from "./metis"
import { createAtlasAgent } from "./atlas"
const TEST_MODEL = "anthropic/claude-sonnet-4-5" const TEST_MODEL = "anthropic/claude-sonnet-4-5"
@ -96,4 +97,18 @@ describe("read-only agent tool restrictions", () => {
} }
}) })
}) })
describe("Atlas", () => {
test("allows delegation tools for orchestration", () => {
// given
const agent = createAtlasAgent({ model: TEST_MODEL })
// when
const permission = (agent.permission ?? {}) as Record<string, string>
// then
expect(permission["task"]).toBeUndefined()
expect(permission["call_omo_agent"]).toBeUndefined()
})
})
}) })

View File

@ -2,11 +2,17 @@ import { describe, test, expect } from "bun:test";
import { isGptModel, isGeminiModel } from "./types"; import { isGptModel, isGeminiModel } from "./types";
describe("isGptModel", () => { describe("isGptModel", () => {
test("standard openai provider models", () => { test("standard openai provider gpt models", () => {
expect(isGptModel("openai/gpt-5.2")).toBe(true); expect(isGptModel("openai/gpt-5.2")).toBe(true);
expect(isGptModel("openai/gpt-4o")).toBe(true); expect(isGptModel("openai/gpt-4o")).toBe(true);
expect(isGptModel("openai/o1")).toBe(true); });
expect(isGptModel("openai/o3-mini")).toBe(true);
test("o-series models are not gpt by name", () => {
expect(isGptModel("openai/o1")).toBe(false);
expect(isGptModel("openai/o3-mini")).toBe(false);
expect(isGptModel("litellm/o1")).toBe(false);
expect(isGptModel("litellm/o3-mini")).toBe(false);
expect(isGptModel("litellm/o4-mini")).toBe(false);
}); });
test("github copilot gpt models", () => { test("github copilot gpt models", () => {
@ -17,9 +23,6 @@ describe("isGptModel", () => {
test("litellm proxied gpt models", () => { test("litellm proxied gpt models", () => {
expect(isGptModel("litellm/gpt-5.2")).toBe(true); expect(isGptModel("litellm/gpt-5.2")).toBe(true);
expect(isGptModel("litellm/gpt-4o")).toBe(true); expect(isGptModel("litellm/gpt-4o")).toBe(true);
expect(isGptModel("litellm/o1")).toBe(true);
expect(isGptModel("litellm/o3-mini")).toBe(true);
expect(isGptModel("litellm/o4-mini")).toBe(true);
}); });
test("other proxied gpt models", () => { test("other proxied gpt models", () => {
@ -27,6 +30,11 @@ describe("isGptModel", () => {
expect(isGptModel("custom-provider/gpt-5.2")).toBe(true); expect(isGptModel("custom-provider/gpt-5.2")).toBe(true);
}); });
test("venice provider gpt models", () => {
expect(isGptModel("venice/gpt-5.2")).toBe(true);
expect(isGptModel("venice/gpt-4o")).toBe(true);
});
test("gpt4 prefix without hyphen (legacy naming)", () => { test("gpt4 prefix without hyphen (legacy naming)", () => {
expect(isGptModel("litellm/gpt4o")).toBe(true); expect(isGptModel("litellm/gpt4o")).toBe(true);
expect(isGptModel("ollama/gpt4")).toBe(true); expect(isGptModel("ollama/gpt4")).toBe(true);
@ -39,8 +47,8 @@ describe("isGptModel", () => {
}); });
test("gemini models are not gpt", () => { test("gemini models are not gpt", () => {
expect(isGptModel("google/gemini-3-pro")).toBe(false); expect(isGptModel("google/gemini-3.1-pro")).toBe(false);
expect(isGptModel("litellm/gemini-3-pro")).toBe(false); expect(isGptModel("litellm/gemini-3.1-pro")).toBe(false);
}); });
test("opencode provider is not gpt", () => { test("opencode provider is not gpt", () => {
@ -50,29 +58,29 @@ describe("isGptModel", () => {
describe("isGeminiModel", () => { describe("isGeminiModel", () => {
test("#given google provider models #then returns true", () => { test("#given google provider models #then returns true", () => {
expect(isGeminiModel("google/gemini-3-pro")).toBe(true); expect(isGeminiModel("google/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("google/gemini-3-flash")).toBe(true); expect(isGeminiModel("google/gemini-3-flash")).toBe(true);
expect(isGeminiModel("google/gemini-2.5-pro")).toBe(true); expect(isGeminiModel("google/gemini-2.5-pro")).toBe(true);
}); });
test("#given google-vertex provider models #then returns true", () => { test("#given google-vertex provider models #then returns true", () => {
expect(isGeminiModel("google-vertex/gemini-3-pro")).toBe(true); expect(isGeminiModel("google-vertex/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("google-vertex/gemini-3-flash")).toBe(true); expect(isGeminiModel("google-vertex/gemini-3-flash")).toBe(true);
}); });
test("#given github copilot gemini models #then returns true", () => { test("#given github copilot gemini models #then returns true", () => {
expect(isGeminiModel("github-copilot/gemini-3-pro")).toBe(true); expect(isGeminiModel("github-copilot/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("github-copilot/gemini-3-flash")).toBe(true); expect(isGeminiModel("github-copilot/gemini-3-flash")).toBe(true);
}); });
test("#given litellm proxied gemini models #then returns true", () => { test("#given litellm proxied gemini models #then returns true", () => {
expect(isGeminiModel("litellm/gemini-3-pro")).toBe(true); expect(isGeminiModel("litellm/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("litellm/gemini-3-flash")).toBe(true); expect(isGeminiModel("litellm/gemini-3-flash")).toBe(true);
expect(isGeminiModel("litellm/gemini-2.5-pro")).toBe(true); expect(isGeminiModel("litellm/gemini-2.5-pro")).toBe(true);
}); });
test("#given other proxied gemini models #then returns true", () => { test("#given other proxied gemini models #then returns true", () => {
expect(isGeminiModel("custom-provider/gemini-3-pro")).toBe(true); expect(isGeminiModel("custom-provider/gemini-3.1-pro")).toBe(true);
expect(isGeminiModel("ollama/gemini-3-flash")).toBe(true); expect(isGeminiModel("ollama/gemini-3-flash")).toBe(true);
}); });

View File

@ -70,14 +70,9 @@ function extractModelName(model: string): string {
return model.includes("/") ? model.split("/").pop() ?? model : model return model.includes("/") ? model.split("/").pop() ?? model : model
} }
const GPT_MODEL_PREFIXES = ["gpt-", "gpt4", "o1", "o3", "o4"]
export function isGptModel(model: string): boolean { export function isGptModel(model: string): boolean {
if (model.startsWith("openai/") || model.startsWith("github-copilot/gpt-"))
return true
const modelName = extractModelName(model).toLowerCase() const modelName = extractModelName(model).toLowerCase()
return GPT_MODEL_PREFIXES.some((prefix) => modelName.startsWith(prefix)) return modelName.includes("gpt")
} }
const GEMINI_PROVIDERS = ["google/", "google-vertex/"] const GEMINI_PROVIDERS = ["google/", "google-vertex/"]

View File

@ -603,8 +603,8 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () =>
} }
}) })
test("hephaestus is not created when only github-copilot provider is connected", async () => { test("hephaestus IS created when github-copilot is connected with a GPT model", async () => {
// #given - github-copilot provider has models available // #given - github-copilot provider has gpt-5.3-codex available
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
new Set(["github-copilot/gpt-5.3-codex"]) new Set(["github-copilot/gpt-5.3-codex"])
) )
@ -614,8 +614,8 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () =>
// #when // #when
const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {}) const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
// #then // #then - github-copilot is now a valid provider for hephaestus
expect(agents.hephaestus).toBeUndefined() expect(agents.hephaestus).toBeDefined()
} finally { } finally {
fetchSpy.mockRestore() fetchSpy.mockRestore()
cacheSpy.mockRestore() cacheSpy.mockRestore()
@ -1002,7 +1002,7 @@ describe("buildAgent with category and skills", () => {
const agent = buildAgent(source["test-agent"], TEST_MODEL) const agent = buildAgent(source["test-agent"], TEST_MODEL)
// #then - category's built-in model is applied // #then - category's built-in model is applied
expect(agent.model).toBe("google/gemini-3-pro") expect(agent.model).toBe("google/gemini-3.1-pro")
}) })
test("agent with category and existing model keeps existing model", () => { test("agent with category and existing model keeps existing model", () => {

View File

@ -325,7 +325,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json", "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": { "agents": {
"atlas": { "atlas": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
}, },
"explore": { "explore": {
"model": "opencode/gpt-5-nano", "model": "opencode/gpt-5-nano",
@ -334,34 +334,34 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"model": "opencode/glm-4.7-free", "model": "opencode/glm-4.7-free",
}, },
"metis": { "metis": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"momus": { "momus": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"multimodal-looker": { "multimodal-looker": {
"model": "google/gemini-3-flash-preview", "model": "google/gemini-3-flash-preview",
}, },
"oracle": { "oracle": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"prometheus": { "prometheus": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
}, },
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"quick": { "quick": {
"model": "google/gemini-3-flash-preview", "model": "google/gemini-3-flash-preview",
}, },
"ultrabrain": { "ultrabrain": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"unspecified-high": { "unspecified-high": {
@ -371,7 +371,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
"model": "google/gemini-3-flash-preview", "model": "google/gemini-3-flash-preview",
}, },
"visual-engineering": { "visual-engineering": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -386,7 +386,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json", "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
"agents": { "agents": {
"atlas": { "atlas": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
}, },
"explore": { "explore": {
"model": "opencode/gpt-5-nano", "model": "opencode/gpt-5-nano",
@ -395,44 +395,44 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
"model": "opencode/glm-4.7-free", "model": "opencode/glm-4.7-free",
}, },
"metis": { "metis": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"momus": { "momus": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"multimodal-looker": { "multimodal-looker": {
"model": "google/gemini-3-flash-preview", "model": "google/gemini-3-flash-preview",
}, },
"oracle": { "oracle": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"prometheus": { "prometheus": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
}, },
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"quick": { "quick": {
"model": "google/gemini-3-flash-preview", "model": "google/gemini-3-flash-preview",
}, },
"ultrabrain": { "ultrabrain": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"unspecified-high": { "unspecified-high": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
}, },
"unspecified-low": { "unspecified-low": {
"model": "google/gemini-3-flash-preview", "model": "google/gemini-3-flash-preview",
}, },
"visual-engineering": { "visual-engineering": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -485,7 +485,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -506,7 +506,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
"model": "anthropic/claude-sonnet-4-5", "model": "anthropic/claude-sonnet-4-5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -559,7 +559,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -581,7 +581,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
"model": "anthropic/claude-sonnet-4-5", "model": "anthropic/claude-sonnet-4-5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -634,7 +634,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "opencode/gemini-3-pro", "model": "opencode/gemini-3.1-pro",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -655,7 +655,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
"model": "opencode/claude-sonnet-4-5", "model": "opencode/claude-sonnet-4-5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "opencode/gemini-3-pro", "model": "opencode/gemini-3.1-pro",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -708,7 +708,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "opencode/gemini-3-pro", "model": "opencode/gemini-3.1-pro",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -730,7 +730,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
"model": "opencode/claude-sonnet-4-5", "model": "opencode/claude-sonnet-4-5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "opencode/gemini-3-pro", "model": "opencode/gemini-3.1-pro",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -779,14 +779,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"quick": { "quick": {
"model": "github-copilot/claude-haiku-4.5", "model": "github-copilot/claude-haiku-4.5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"unspecified-high": { "unspecified-high": {
@ -796,7 +796,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
"model": "github-copilot/claude-sonnet-4.5", "model": "github-copilot/claude-sonnet-4.5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -845,14 +845,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"quick": { "quick": {
"model": "github-copilot/claude-haiku-4.5", "model": "github-copilot/claude-haiku-4.5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"unspecified-high": { "unspecified-high": {
@ -863,7 +863,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
"model": "github-copilot/claude-sonnet-4.5", "model": "github-copilot/claude-sonnet-4.5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -1026,7 +1026,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "opencode/gemini-3-pro", "model": "opencode/gemini-3.1-pro",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -1047,7 +1047,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
"model": "anthropic/claude-sonnet-4-5", "model": "anthropic/claude-sonnet-4-5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "opencode/gemini-3-pro", "model": "opencode/gemini-3.1-pro",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -1100,7 +1100,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -1121,7 +1121,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
"model": "github-copilot/claude-sonnet-4.5", "model": "github-copilot/claude-sonnet-4.5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -1217,7 +1217,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"model": "google/gemini-3-flash-preview", "model": "google/gemini-3-flash-preview",
}, },
"oracle": { "oracle": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"prometheus": { "prometheus": {
@ -1231,14 +1231,14 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"quick": { "quick": {
"model": "anthropic/claude-haiku-4-5", "model": "anthropic/claude-haiku-4-5",
}, },
"ultrabrain": { "ultrabrain": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"unspecified-high": { "unspecified-high": {
@ -1248,7 +1248,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
"model": "anthropic/claude-sonnet-4-5", "model": "anthropic/claude-sonnet-4-5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -1301,7 +1301,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -1322,7 +1322,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
"model": "github-copilot/claude-sonnet-4.5", "model": "github-copilot/claude-sonnet-4.5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "github-copilot/gemini-3-pro-preview", "model": "github-copilot/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -1375,7 +1375,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -1396,7 +1396,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
"model": "anthropic/claude-sonnet-4-5", "model": "anthropic/claude-sonnet-4-5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {
@ -1449,7 +1449,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
}, },
"categories": { "categories": {
"artistry": { "artistry": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"deep": { "deep": {
@ -1471,7 +1471,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
"model": "anthropic/claude-sonnet-4-5", "model": "anthropic/claude-sonnet-4-5",
}, },
"visual-engineering": { "visual-engineering": {
"model": "google/gemini-3-pro-preview", "model": "google/gemini-3.1-pro-preview",
"variant": "high", "variant": "high",
}, },
"writing": { "writing": {

View File

@ -178,7 +178,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
expect(models).toBeTruthy() expect(models).toBeTruthy()
const required = [ const required = [
"antigravity-gemini-3-pro", "antigravity-gemini-3.1-pro",
"antigravity-gemini-3-flash", "antigravity-gemini-3-flash",
"antigravity-claude-sonnet-4-6", "antigravity-claude-sonnet-4-6",
"antigravity-claude-sonnet-4-6-thinking", "antigravity-claude-sonnet-4-6-thinking",
@ -206,7 +206,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any> const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
// #when checking Gemini Pro variants // #when checking Gemini Pro variants
const pro = models["antigravity-gemini-3-pro"] const pro = models["antigravity-gemini-3.1-pro"]
// #then should have low and high variants // #then should have low and high variants
expect(pro.variants).toBeTruthy() expect(pro.variants).toBeTruthy()
expect(pro.variants.low).toBeTruthy() expect(pro.variants.low).toBeTruthy()

View File

@ -4,10 +4,10 @@
* IMPORTANT: Model names MUST use `antigravity-` prefix for stability. * IMPORTANT: Model names MUST use `antigravity-` prefix for stability.
* *
* Since opencode-antigravity-auth v1.3.0, models use a variant system: * Since opencode-antigravity-auth v1.3.0, models use a variant system:
* - `antigravity-gemini-3-pro` with variants: low, high * - `antigravity-gemini-3.1-pro` with variants: low, high
* - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high * - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high
* *
* Legacy tier-suffixed names (e.g., `antigravity-gemini-3-pro-high`) still work * Legacy tier-suffixed names (e.g., `antigravity-gemini-3.1-pro-high`) still work
* but variants are the recommended approach. * but variants are the recommended approach.
* *
* @see https://github.com/NoeFabris/opencode-antigravity-auth#models * @see https://github.com/NoeFabris/opencode-antigravity-auth#models
@ -16,7 +16,7 @@ export const ANTIGRAVITY_PROVIDER_CONFIG = {
google: { google: {
name: "Google", name: "Google",
models: { models: {
"antigravity-gemini-3-pro": { "antigravity-gemini-3.1-pro": {
name: "Gemini 3 Pro (Antigravity)", name: "Gemini 3 Pro (Antigravity)",
limit: { context: 1048576, output: 65535 }, limit: { context: 1048576, output: 65535 },
modalities: { input: ["text", "image", "pdf"], output: ["text"] }, modalities: { input: ["text", "image", "pdf"], output: ["text"] },

View File

@ -1,4 +1,5 @@
import { getConfigDir } from "./config-context" import { getConfigDir } from "./config-context"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
const BUN_INSTALL_TIMEOUT_SECONDS = 60 const BUN_INSTALL_TIMEOUT_SECONDS = 60
const BUN_INSTALL_TIMEOUT_MS = BUN_INSTALL_TIMEOUT_SECONDS * 1000 const BUN_INSTALL_TIMEOUT_MS = BUN_INSTALL_TIMEOUT_SECONDS * 1000
@ -16,7 +17,7 @@ export async function runBunInstall(): Promise<boolean> {
export async function runBunInstallWithDetails(): Promise<BunInstallResult> { export async function runBunInstallWithDetails(): Promise<BunInstallResult> {
try { try {
const proc = Bun.spawn(["bun", "install"], { const proc = spawnWithWindowsHide(["bun", "install"], {
cwd: getConfigDir(), cwd: getConfigDir(),
stdout: "inherit", stdout: "inherit",
stderr: "inherit", stderr: "inherit",

View File

@ -1,4 +1,5 @@
import type { OpenCodeBinaryType } from "../../shared/opencode-config-dir-types" import type { OpenCodeBinaryType } from "../../shared/opencode-config-dir-types"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
import { initConfigContext } from "./config-context" import { initConfigContext } from "./config-context"
const OPENCODE_BINARIES = ["opencode", "opencode-desktop"] as const const OPENCODE_BINARIES = ["opencode", "opencode-desktop"] as const
@ -11,7 +12,7 @@ interface OpenCodeBinaryResult {
async function findOpenCodeBinaryWithVersion(): Promise<OpenCodeBinaryResult | null> { async function findOpenCodeBinaryWithVersion(): Promise<OpenCodeBinaryResult | null> {
for (const binary of OPENCODE_BINARIES) { for (const binary of OPENCODE_BINARIES) {
try { try {
const proc = Bun.spawn([binary, "--version"], { const proc = spawnWithWindowsHide([binary, "--version"], {
stdout: "pipe", stdout: "pipe",
stderr: "pipe", stderr: "pipe",
}) })

View File

@ -0,0 +1,80 @@
import { afterEach, beforeEach, describe, expect, it } from "bun:test"
import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"
import { tmpdir } from "node:os"
import { join } from "node:path"
import { parseJsonc } from "../../shared/jsonc-parser"
import type { InstallConfig } from "../types"
import { resetConfigContext } from "./config-context"
import { generateOmoConfig } from "./generate-omo-config"
import { writeOmoConfig } from "./write-omo-config"
const installConfig: InstallConfig = {
hasClaude: true,
isMax20: true,
hasOpenAI: true,
hasGemini: true,
hasCopilot: false,
hasOpencodeZen: false,
hasZaiCodingPlan: false,
hasKimiForCoding: false,
}
function getRecord(value: unknown): Record<string, unknown> {
if (value && typeof value === "object" && !Array.isArray(value)) {
return value as Record<string, unknown>
}
return {}
}
describe("writeOmoConfig", () => {
let testConfigDir = ""
let testConfigPath = ""
beforeEach(() => {
testConfigDir = join(tmpdir(), `omo-write-config-${Date.now()}-${Math.random().toString(36).slice(2)}`)
testConfigPath = join(testConfigDir, "oh-my-opencode.json")
mkdirSync(testConfigDir, { recursive: true })
process.env.OPENCODE_CONFIG_DIR = testConfigDir
resetConfigContext()
})
afterEach(() => {
rmSync(testConfigDir, { recursive: true, force: true })
resetConfigContext()
delete process.env.OPENCODE_CONFIG_DIR
})
it("preserves existing user values while adding new defaults", () => {
// given
const existingConfig = {
agents: {
sisyphus: {
model: "custom/provider-model",
},
},
disabled_hooks: ["comment-checker"],
}
writeFileSync(testConfigPath, JSON.stringify(existingConfig, null, 2) + "\n", "utf-8")
const generatedDefaults = generateOmoConfig(installConfig)
// when
const result = writeOmoConfig(installConfig)
// then
expect(result.success).toBe(true)
const savedConfig = parseJsonc<Record<string, unknown>>(readFileSync(testConfigPath, "utf-8"))
const savedAgents = getRecord(savedConfig.agents)
const savedSisyphus = getRecord(savedAgents.sisyphus)
expect(savedSisyphus.model).toBe("custom/provider-model")
expect(savedConfig.disabled_hooks).toEqual(["comment-checker"])
for (const defaultKey of Object.keys(generatedDefaults)) {
expect(savedConfig).toHaveProperty(defaultKey)
}
})
})

View File

@ -43,7 +43,7 @@ export function writeOmoConfig(installConfig: InstallConfig): ConfigMergeResult
return { success: true, configPath: omoConfigPath } return { success: true, configPath: omoConfigPath }
} }
const merged = deepMergeRecord(existing, newConfig) const merged = deepMergeRecord(newConfig, existing)
writeFileSync(omoConfigPath, JSON.stringify(merged, null, 2) + "\n") writeFileSync(omoConfigPath, JSON.stringify(merged, null, 2) + "\n")
} catch (parseErr) { } catch (parseErr) {
if (parseErr instanceof SyntaxError) { if (parseErr instanceof SyntaxError) {

View File

@ -3,6 +3,7 @@ import { createRequire } from "node:module"
import { dirname, join } from "node:path" import { dirname, join } from "node:path"
import type { DependencyInfo } from "../types" import type { DependencyInfo } from "../types"
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> { async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> {
try { try {
@ -18,7 +19,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat
async function getBinaryVersion(binary: string): Promise<string | null> { async function getBinaryVersion(binary: string): Promise<string | null> {
try { try {
const proc = Bun.spawn([binary, "--version"], { stdout: "pipe", stderr: "pipe" }) const proc = spawnWithWindowsHide([binary, "--version"], { stdout: "pipe", stderr: "pipe" })
const output = await new Response(proc.stdout).text() const output = await new Response(proc.stdout).text()
await proc.exited await proc.exited
if (proc.exitCode === 0) { if (proc.exitCode === 0) {
@ -140,4 +141,3 @@ export async function checkCommentChecker(): Promise<DependencyInfo> {
path: resolvedPath, path: resolvedPath,
} }
} }

View File

@ -26,7 +26,7 @@ describe("model-resolution check", () => {
// then: Should have category entries // then: Should have category entries
const visual = info.categories.find((c) => c.name === "visual-engineering") const visual = info.categories.find((c) => c.name === "visual-engineering")
expect(visual).toBeDefined() expect(visual).toBeDefined()
expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro") expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3.1-pro")
expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google") expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
}) })
}) })

View File

@ -1,6 +1,7 @@
import { existsSync } from "node:fs" import { existsSync } from "node:fs"
import { homedir } from "node:os" import { homedir } from "node:os"
import { join } from "node:path" import { join } from "node:path"
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
import { OPENCODE_BINARIES } from "../constants" import { OPENCODE_BINARIES } from "../constants"
@ -110,7 +111,7 @@ export async function getOpenCodeVersion(
): Promise<string | null> { ): Promise<string | null> {
try { try {
const command = buildVersionCommand(binaryPath, platform) const command = buildVersionCommand(binaryPath, platform)
const processResult = Bun.spawn(command, { stdout: "pipe", stderr: "pipe" }) const processResult = spawnWithWindowsHide(command, { stdout: "pipe", stderr: "pipe" })
const output = await new Response(processResult.stdout).text() const output = await new Response(processResult.stdout).text()
await processResult.exited await processResult.exited

View File

@ -1,3 +1,5 @@
import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
export interface GhCliInfo { export interface GhCliInfo {
installed: boolean installed: boolean
version: string | null version: string | null
@ -19,7 +21,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat
async function getGhVersion(): Promise<string | null> { async function getGhVersion(): Promise<string | null> {
try { try {
const processResult = Bun.spawn(["gh", "--version"], { stdout: "pipe", stderr: "pipe" }) const processResult = spawnWithWindowsHide(["gh", "--version"], { stdout: "pipe", stderr: "pipe" })
const output = await new Response(processResult.stdout).text() const output = await new Response(processResult.stdout).text()
await processResult.exited await processResult.exited
if (processResult.exitCode !== 0) return null if (processResult.exitCode !== 0) return null
@ -38,7 +40,7 @@ async function getGhAuthStatus(): Promise<{
error: string | null error: string | null
}> { }> {
try { try {
const processResult = Bun.spawn(["gh", "auth", "status"], { const processResult = spawnWithWindowsHide(["gh", "auth", "status"], {
stdout: "pipe", stdout: "pipe",
stderr: "pipe", stderr: "pipe",
env: { ...process.env, GH_NO_UPDATE_NOTIFIER: "1" }, env: { ...process.env, GH_NO_UPDATE_NOTIFIER: "1" },

View File

@ -24,7 +24,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
oracle: { oracle: {
fallbackChain: [ fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
], ],
}, },
@ -59,7 +59,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
{ providers: ["kimi-for-coding"], model: "k2p5" }, { providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" }, { providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
], ],
}, },
metis: { metis: {
@ -68,14 +68,14 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
{ providers: ["kimi-for-coding"], model: "k2p5" }, { providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" }, { providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
], ],
}, },
momus: { momus: {
fallbackChain: [ fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
], ],
}, },
atlas: { atlas: {
@ -84,7 +84,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
{ providers: ["opencode"], model: "kimi-k2.5-free" }, { providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
], ],
}, },
} }
@ -92,7 +92,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = { export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
"visual-engineering": { "visual-engineering": {
fallbackChain: [ fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
{ providers: ["zai-coding-plan"], model: "glm-5" }, { providers: ["zai-coding-plan"], model: "glm-5" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" }, { providers: ["kimi-for-coding"], model: "k2p5" },
@ -101,7 +101,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
ultrabrain: { ultrabrain: {
fallbackChain: [ fallbackChain: [
{ providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" }, { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
], ],
}, },
@ -109,17 +109,17 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
fallbackChain: [ fallbackChain: [
{ providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
], ],
requiresModel: "gpt-5.3-codex", requiresModel: "gpt-5.3-codex",
}, },
artistry: { artistry: {
fallbackChain: [ fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
], ],
requiresModel: "gemini-3-pro", requiresModel: "gemini-3.1-pro",
}, },
quick: { quick: {
fallbackChain: [ fallbackChain: [
@ -139,7 +139,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
fallbackChain: [ fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
], ],
}, },
writing: { writing: {

View File

@ -40,16 +40,16 @@ describe("transformModelForProvider", () => {
expect(result).toBe("claude-haiku-4.5") expect(result).toBe("claude-haiku-4.5")
}) })
test("transforms gemini-3-pro to gemini-3-pro-preview", () => { test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => {
// #given github-copilot provider and gemini-3-pro model // #given github-copilot provider and gemini-3.1-pro model
const provider = "github-copilot" const provider = "github-copilot"
const model = "gemini-3-pro" const model = "gemini-3.1-pro"
// #when transformModelForProvider is called // #when transformModelForProvider is called
const result = transformModelForProvider(provider, model) const result = transformModelForProvider(provider, model)
// #then should transform to gemini-3-pro-preview // #then should transform to gemini-3.1-pro-preview
expect(result).toBe("gemini-3-pro-preview") expect(result).toBe("gemini-3.1-pro-preview")
}) })
test("transforms gemini-3-flash to gemini-3-flash-preview", () => { test("transforms gemini-3-flash to gemini-3-flash-preview", () => {
@ -64,16 +64,16 @@ describe("transformModelForProvider", () => {
expect(result).toBe("gemini-3-flash-preview") expect(result).toBe("gemini-3-flash-preview")
}) })
test("prevents double transformation of gemini-3-pro-preview", () => { test("prevents double transformation of gemini-3.1-pro-preview", () => {
// #given github-copilot provider and gemini-3-pro-preview model (already transformed) // #given github-copilot provider and gemini-3.1-pro-preview model (already transformed)
const provider = "github-copilot" const provider = "github-copilot"
const model = "gemini-3-pro-preview" const model = "gemini-3.1-pro-preview"
// #when transformModelForProvider is called // #when transformModelForProvider is called
const result = transformModelForProvider(provider, model) const result = transformModelForProvider(provider, model)
// #then should NOT become gemini-3-pro-preview-preview // #then should NOT become gemini-3.1-pro-preview-preview
expect(result).toBe("gemini-3-pro-preview") expect(result).toBe("gemini-3.1-pro-preview")
}) })
test("prevents double transformation of gemini-3-flash-preview", () => { test("prevents double transformation of gemini-3-flash-preview", () => {
@ -102,16 +102,16 @@ describe("transformModelForProvider", () => {
expect(result).toBe("gemini-3-flash-preview") expect(result).toBe("gemini-3-flash-preview")
}) })
test("transforms gemini-3-pro to gemini-3-pro-preview", () => { test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => {
// #given google provider and gemini-3-pro model // #given google provider and gemini-3.1-pro model
const provider = "google" const provider = "google"
const model = "gemini-3-pro" const model = "gemini-3.1-pro"
// #when transformModelForProvider is called // #when transformModelForProvider is called
const result = transformModelForProvider(provider, model) const result = transformModelForProvider(provider, model)
// #then should transform to gemini-3-pro-preview // #then should transform to gemini-3.1-pro-preview
expect(result).toBe("gemini-3-pro-preview") expect(result).toBe("gemini-3.1-pro-preview")
}) })
test("passes through other gemini models unchanged", () => { test("passes through other gemini models unchanged", () => {
@ -138,16 +138,16 @@ describe("transformModelForProvider", () => {
expect(result).toBe("gemini-3-flash-preview") expect(result).toBe("gemini-3-flash-preview")
}) })
test("prevents double transformation of gemini-3-pro-preview", () => { test("prevents double transformation of gemini-3.1-pro-preview", () => {
// #given google provider and gemini-3-pro-preview model (already transformed) // #given google provider and gemini-3.1-pro-preview model (already transformed)
const provider = "google" const provider = "google"
const model = "gemini-3-pro-preview" const model = "gemini-3.1-pro-preview"
// #when transformModelForProvider is called // #when transformModelForProvider is called
const result = transformModelForProvider(provider, model) const result = transformModelForProvider(provider, model)
// #then should NOT become gemini-3-pro-preview-preview // #then should NOT become gemini-3.1-pro-preview-preview
expect(result).toBe("gemini-3-pro-preview") expect(result).toBe("gemini-3.1-pro-preview")
}) })
test("does not transform claude models for google provider", () => { test("does not transform claude models for google provider", () => {

View File

@ -1,4 +1,4 @@
import { describe, it, expect, spyOn } from "bun:test" const { describe, it, expect, spyOn } = require("bun:test")
import type { RunContext } from "./types" import type { RunContext } from "./types"
import { createEventState } from "./events" import { createEventState } from "./events"
import { handleSessionStatus, handleMessagePartUpdated, handleMessageUpdated, handleTuiToast } from "./event-handlers" import { handleSessionStatus, handleMessagePartUpdated, handleMessageUpdated, handleTuiToast } from "./event-handlers"
@ -235,9 +235,7 @@ describe("handleMessagePartUpdated", () => {
it("prints completion metadata once when assistant text part is completed", () => { it("prints completion metadata once when assistant text part is completed", () => {
// given // given
const nowSpy = spyOn(Date, "now") const nowSpy = spyOn(Date, "now").mockReturnValue(3400)
nowSpy.mockReturnValueOnce(1000)
nowSpy.mockReturnValueOnce(3400)
const ctx = createMockContext("ses_main") const ctx = createMockContext("ses_main")
const state = createEventState() const state = createEventState()
@ -259,6 +257,7 @@ describe("handleMessagePartUpdated", () => {
} as any, } as any,
state, state,
) )
state.messageStartedAtById["msg_1"] = 1000
// when // when
handleMessagePartUpdated( handleMessagePartUpdated(

View File

@ -7,6 +7,8 @@ export interface EventState {
currentTool: string | null currentTool: string | null
/** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */ /** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */
hasReceivedMeaningfulWork: boolean hasReceivedMeaningfulWork: boolean
/** Timestamp of the last received event (for watchdog detection) */
lastEventTimestamp: number
/** Count of assistant messages for the main session */ /** Count of assistant messages for the main session */
messageCount: number messageCount: number
/** Current agent name from the latest assistant message */ /** Current agent name from the latest assistant message */
@ -54,6 +56,7 @@ export function createEventState(): EventState {
lastPartText: "", lastPartText: "",
currentTool: null, currentTool: null,
hasReceivedMeaningfulWork: false, hasReceivedMeaningfulWork: false,
lastEventTimestamp: Date.now(),
messageCount: 0, messageCount: 0,
currentAgent: null, currentAgent: null,
currentModel: null, currentModel: null,

View File

@ -35,6 +35,9 @@ export async function processEvents(
logEventVerbose(ctx, payload) logEventVerbose(ctx, payload)
} }
// Update last event timestamp for watchdog detection
state.lastEventTimestamp = Date.now()
handleSessionError(ctx, payload, state) handleSessionError(ctx, payload, state)
handleSessionIdle(ctx, payload, state) handleSessionIdle(ctx, payload, state)
handleSessionStatus(ctx, payload, state) handleSessionStatus(ctx, payload, state)

View File

@ -3,6 +3,7 @@ import type { RunResult } from "./types"
import { createJsonOutputManager } from "./json-output" import { createJsonOutputManager } from "./json-output"
import { resolveSession } from "./session-resolver" import { resolveSession } from "./session-resolver"
import { executeOnCompleteHook } from "./on-complete-hook" import { executeOnCompleteHook } from "./on-complete-hook"
import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
import type { OpencodeClient } from "./types" import type { OpencodeClient } from "./types"
import * as originalSdk from "@opencode-ai/sdk" import * as originalSdk from "@opencode-ai/sdk"
import * as originalPortUtils from "../../shared/port-utils" import * as originalPortUtils from "../../shared/port-utils"
@ -147,7 +148,7 @@ describe("integration: --session-id", () => {
const result = resolveSession({ client: mockClient, sessionId, directory: "/test" }) const result = resolveSession({ client: mockClient, sessionId, directory: "/test" })
// then // then
await expect(result).rejects.toThrow(`Session not found: ${sessionId}`) expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
expect(mockClient.session.get).toHaveBeenCalledWith({ expect(mockClient.session.get).toHaveBeenCalledWith({
path: { id: sessionId }, path: { id: sessionId },
query: { directory: "/test" }, query: { directory: "/test" },
@ -161,10 +162,13 @@ describe("integration: --on-complete", () => {
beforeEach(() => { beforeEach(() => {
spyOn(console, "error").mockImplementation(() => {}) spyOn(console, "error").mockImplementation(() => {})
spawnSpy = spyOn(Bun, "spawn").mockReturnValue({ spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({
exited: Promise.resolve(0), exited: Promise.resolve(0),
exitCode: 0, exitCode: 0,
} as unknown as ReturnType<typeof Bun.spawn>) stdout: undefined,
stderr: undefined,
kill: () => {},
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>)
}) })
afterEach(() => { afterEach(() => {
@ -186,7 +190,7 @@ describe("integration: --on-complete", () => {
// then // then
expect(spawnSpy).toHaveBeenCalledTimes(1) expect(spawnSpy).toHaveBeenCalledTimes(1)
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn> const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(options?.env?.SESSION_ID).toBe("session-123") expect(options?.env?.SESSION_ID).toBe("session-123")
expect(options?.env?.EXIT_CODE).toBe("0") expect(options?.env?.EXIT_CODE).toBe("0")
expect(options?.env?.DURATION_MS).toBe("5000") expect(options?.env?.DURATION_MS).toBe("5000")
@ -208,10 +212,13 @@ describe("integration: option combinations", () => {
spyOn(console, "error").mockImplementation(() => {}) spyOn(console, "error").mockImplementation(() => {})
mockStdout = createMockWriteStream() mockStdout = createMockWriteStream()
mockStderr = createMockWriteStream() mockStderr = createMockWriteStream()
spawnSpy = spyOn(Bun, "spawn").mockReturnValue({ spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({
exited: Promise.resolve(0), exited: Promise.resolve(0),
exitCode: 0, exitCode: 0,
} as unknown as ReturnType<typeof Bun.spawn>) stdout: undefined,
stderr: undefined,
kill: () => {},
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>)
}) })
afterEach(() => { afterEach(() => {
@ -249,9 +256,9 @@ describe("integration: option combinations", () => {
const emitted = mockStdout.writes[0]! const emitted = mockStdout.writes[0]!
expect(() => JSON.parse(emitted)).not.toThrow() expect(() => JSON.parse(emitted)).not.toThrow()
expect(spawnSpy).toHaveBeenCalledTimes(1) expect(spawnSpy).toHaveBeenCalledTimes(1)
const [args] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn> const [args] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(args).toEqual(["sh", "-c", "echo done"]) expect(args).toEqual(["sh", "-c", "echo done"])
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn> const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(options?.env?.SESSION_ID).toBe("session-123") expect(options?.env?.SESSION_ID).toBe("session-123")
expect(options?.env?.EXIT_CODE).toBe("0") expect(options?.env?.EXIT_CODE).toBe("0")
expect(options?.env?.DURATION_MS).toBe("5000") expect(options?.env?.DURATION_MS).toBe("5000")

View File

@ -1,4 +1,5 @@
import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test" import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
import { executeOnCompleteHook } from "./on-complete-hook" import { executeOnCompleteHook } from "./on-complete-hook"
describe("executeOnCompleteHook", () => { describe("executeOnCompleteHook", () => {
@ -6,7 +7,10 @@ describe("executeOnCompleteHook", () => {
return { return {
exited: Promise.resolve(exitCode), exited: Promise.resolve(exitCode),
exitCode, exitCode,
} as unknown as ReturnType<typeof Bun.spawn> stdout: undefined,
stderr: undefined,
kill: () => {},
} satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
} }
let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">> let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
@ -21,7 +25,7 @@ describe("executeOnCompleteHook", () => {
it("executes command with correct env vars", async () => { it("executes command with correct env vars", async () => {
// given // given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0)) const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
try { try {
// when // when
@ -35,7 +39,7 @@ describe("executeOnCompleteHook", () => {
// then // then
expect(spawnSpy).toHaveBeenCalledTimes(1) expect(spawnSpy).toHaveBeenCalledTimes(1)
const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn> const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(args).toEqual(["sh", "-c", "echo test"]) expect(args).toEqual(["sh", "-c", "echo test"])
expect(options?.env?.SESSION_ID).toBe("session-123") expect(options?.env?.SESSION_ID).toBe("session-123")
@ -51,7 +55,7 @@ describe("executeOnCompleteHook", () => {
it("env var values are strings", async () => { it("env var values are strings", async () => {
// given // given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0)) const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
try { try {
// when // when
@ -64,7 +68,7 @@ describe("executeOnCompleteHook", () => {
}) })
// then // then
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn> const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
expect(options?.env?.EXIT_CODE).toBe("1") expect(options?.env?.EXIT_CODE).toBe("1")
expect(options?.env?.EXIT_CODE).toBeTypeOf("string") expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
@ -79,7 +83,7 @@ describe("executeOnCompleteHook", () => {
it("empty command string is no-op", async () => { it("empty command string is no-op", async () => {
// given // given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0)) const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
try { try {
// when // when
@ -100,7 +104,7 @@ describe("executeOnCompleteHook", () => {
it("whitespace-only command is no-op", async () => { it("whitespace-only command is no-op", async () => {
// given // given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0)) const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
try { try {
// when // when
@ -121,11 +125,11 @@ describe("executeOnCompleteHook", () => {
it("command failure logs warning but does not throw", async () => { it("command failure logs warning but does not throw", async () => {
// given // given
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1)) const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(1))
try { try {
// when // when
await expect( expect(
executeOnCompleteHook({ executeOnCompleteHook({
command: "false", command: "false",
sessionId: "session-123", sessionId: "session-123",
@ -149,13 +153,13 @@ describe("executeOnCompleteHook", () => {
it("spawn error logs warning but does not throw", async () => { it("spawn error logs warning but does not throw", async () => {
// given // given
const spawnError = new Error("Command not found") const spawnError = new Error("Command not found")
const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => { const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockImplementation(() => {
throw spawnError throw spawnError
}) })
try { try {
// when // when
await expect( expect(
executeOnCompleteHook({ executeOnCompleteHook({
command: "nonexistent-command", command: "nonexistent-command",
sessionId: "session-123", sessionId: "session-123",

View File

@ -1,4 +1,5 @@
import pc from "picocolors" import pc from "picocolors"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
export async function executeOnCompleteHook(options: { export async function executeOnCompleteHook(options: {
command: string command: string
@ -17,7 +18,7 @@ export async function executeOnCompleteHook(options: {
console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`)) console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
try { try {
const proc = Bun.spawn(["sh", "-c", trimmedCommand], { const proc = spawnWithWindowsHide(["sh", "-c", trimmedCommand], {
env: { env: {
...process.env, ...process.env,
SESSION_ID: sessionId, SESSION_ID: sessionId,

View File

@ -1,4 +1,5 @@
import { delimiter, dirname, join } from "node:path" import { delimiter, dirname, join } from "node:path"
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
const OPENCODE_COMMANDS = ["opencode", "opencode-desktop"] as const const OPENCODE_COMMANDS = ["opencode", "opencode-desktop"] as const
const WINDOWS_SUFFIXES = ["", ".exe", ".cmd", ".bat", ".ps1"] as const const WINDOWS_SUFFIXES = ["", ".exe", ".cmd", ".bat", ".ps1"] as const
@ -41,7 +42,7 @@ export function collectCandidateBinaryPaths(
export async function canExecuteBinary(binaryPath: string): Promise<boolean> { export async function canExecuteBinary(binaryPath: string): Promise<boolean> {
try { try {
const proc = Bun.spawn([binaryPath, "--version"], { const proc = spawnWithWindowsHide([binaryPath, "--version"], {
stdout: "pipe", stdout: "pipe",
stderr: "pipe", stderr: "pipe",
}) })

View File

@ -8,11 +8,15 @@ const DEFAULT_POLL_INTERVAL_MS = 500
const DEFAULT_REQUIRED_CONSECUTIVE = 1 const DEFAULT_REQUIRED_CONSECUTIVE = 1
const ERROR_GRACE_CYCLES = 3 const ERROR_GRACE_CYCLES = 3
const MIN_STABILIZATION_MS = 1_000 const MIN_STABILIZATION_MS = 1_000
const DEFAULT_EVENT_WATCHDOG_MS = 30_000 // 30 seconds
const DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS = 60_000 // 60 seconds
export interface PollOptions { export interface PollOptions {
pollIntervalMs?: number pollIntervalMs?: number
requiredConsecutive?: number requiredConsecutive?: number
minStabilizationMs?: number minStabilizationMs?: number
eventWatchdogMs?: number
secondaryMeaningfulWorkTimeoutMs?: number
} }
export async function pollForCompletion( export async function pollForCompletion(
@ -28,9 +32,15 @@ export async function pollForCompletion(
options.minStabilizationMs ?? MIN_STABILIZATION_MS options.minStabilizationMs ?? MIN_STABILIZATION_MS
const minStabilizationMs = const minStabilizationMs =
rawMinStabilizationMs > 0 ? rawMinStabilizationMs : MIN_STABILIZATION_MS rawMinStabilizationMs > 0 ? rawMinStabilizationMs : MIN_STABILIZATION_MS
const eventWatchdogMs =
options.eventWatchdogMs ?? DEFAULT_EVENT_WATCHDOG_MS
const secondaryMeaningfulWorkTimeoutMs =
options.secondaryMeaningfulWorkTimeoutMs ??
DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS
let consecutiveCompleteChecks = 0 let consecutiveCompleteChecks = 0
let errorCycleCount = 0 let errorCycleCount = 0
let firstWorkTimestamp: number | null = null let firstWorkTimestamp: number | null = null
let secondaryTimeoutChecked = false
const pollStartTimestamp = Date.now() const pollStartTimestamp = Date.now()
while (!abortController.signal.aborted) { while (!abortController.signal.aborted) {
@ -59,7 +69,37 @@ export async function pollForCompletion(
errorCycleCount = 0 errorCycleCount = 0
} }
const mainSessionStatus = await getMainSessionStatus(ctx) // Watchdog: if no events received for N seconds, verify session status via API
let mainSessionStatus: "idle" | "busy" | "retry" | null = null
if (eventState.lastEventTimestamp !== null) {
const timeSinceLastEvent = Date.now() - eventState.lastEventTimestamp
if (timeSinceLastEvent > eventWatchdogMs) {
// Events stopped coming - verify actual session state
console.log(
pc.yellow(
`\n No events for ${Math.round(
timeSinceLastEvent / 1000
)}s, verifying session status...`
)
)
// Force check session status directly
mainSessionStatus = await getMainSessionStatus(ctx)
if (mainSessionStatus === "idle") {
eventState.mainSessionIdle = true
} else if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
eventState.mainSessionIdle = false
}
// Reset timestamp to avoid repeated checks
eventState.lastEventTimestamp = Date.now()
}
}
// Only call getMainSessionStatus if watchdog didn't already check
if (mainSessionStatus === null) {
mainSessionStatus = await getMainSessionStatus(ctx)
}
if (mainSessionStatus === "busy" || mainSessionStatus === "retry") { if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
eventState.mainSessionIdle = false eventState.mainSessionIdle = false
} else if (mainSessionStatus === "idle") { } else if (mainSessionStatus === "idle") {
@ -81,6 +121,50 @@ export async function pollForCompletion(
consecutiveCompleteChecks = 0 consecutiveCompleteChecks = 0
continue continue
} }
// Secondary timeout: if we've been polling for reasonable time but haven't
// received meaningful work via events, check if there's active work via API
// Only check once to avoid unnecessary API calls every poll cycle
if (
Date.now() - pollStartTimestamp > secondaryMeaningfulWorkTimeoutMs &&
!secondaryTimeoutChecked
) {
secondaryTimeoutChecked = true
// Check if session actually has pending work (children, todos, etc.)
const childrenRes = await ctx.client.session.children({
path: { id: ctx.sessionID },
query: { directory: ctx.directory },
})
const children = normalizeSDKResponse(childrenRes, [] as unknown[])
const todosRes = await ctx.client.session.todo({
path: { id: ctx.sessionID },
query: { directory: ctx.directory },
})
const todos = normalizeSDKResponse(todosRes, [] as unknown[])
const hasActiveChildren =
Array.isArray(children) && children.length > 0
const hasActiveTodos =
Array.isArray(todos) &&
todos.some(
(t: unknown) =>
(t as { status?: string })?.status !== "completed" &&
(t as { status?: string })?.status !== "cancelled"
)
const hasActiveWork = hasActiveChildren || hasActiveTodos
if (hasActiveWork) {
// Assume meaningful work is happening even without events
eventState.hasReceivedMeaningfulWork = true
console.log(
pc.yellow(
`\n No meaningful work events for ${Math.round(
secondaryMeaningfulWorkTimeoutMs / 1000
)}s but session has active work - assuming in progress`
)
)
}
}
} else { } else {
// Track when first meaningful work was received // Track when first meaningful work was received
if (firstWorkTimestamp === null) { if (firstWorkTimestamp === null) {

View File

@ -60,7 +60,9 @@ const BuiltinAgentOverridesSchema = z.object({
build: AgentOverrideConfigSchema.optional(), build: AgentOverrideConfigSchema.optional(),
plan: AgentOverrideConfigSchema.optional(), plan: AgentOverrideConfigSchema.optional(),
sisyphus: AgentOverrideConfigSchema.optional(), sisyphus: AgentOverrideConfigSchema.optional(),
hephaestus: AgentOverrideConfigSchema.optional(), hephaestus: AgentOverrideConfigSchema.extend({
allow_non_gpt_model: z.boolean().optional(),
}).optional(),
"sisyphus-junior": AgentOverrideConfigSchema.optional(), "sisyphus-junior": AgentOverrideConfigSchema.optional(),
"OpenCode-Builder": AgentOverrideConfigSchema.optional(), "OpenCode-Builder": AgentOverrideConfigSchema.optional(),
prometheus: AgentOverrideConfigSchema.optional(), prometheus: AgentOverrideConfigSchema.optional(),

View File

@ -20,6 +20,7 @@ export const CategoryConfigSchema = z.object({
textVerbosity: z.enum(["low", "medium", "high"]).optional(), textVerbosity: z.enum(["low", "medium", "high"]).optional(),
tools: z.record(z.string(), z.boolean()).optional(), tools: z.record(z.string(), z.boolean()).optional(),
prompt_append: z.string().optional(), prompt_append: z.string().optional(),
max_prompt_tokens: z.number().int().positive().optional(),
/** Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini/minimax models. */ /** Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini/minimax models. */
is_unstable_agent: z.boolean().optional(), is_unstable_agent: z.boolean().optional(),
/** Disable this category. Disabled categories are excluded from task delegation. */ /** Disable this category. Disabled categories are excluded from task delegation. */

View File

@ -27,7 +27,7 @@ export const OhMyOpenCodeConfigSchema = z.object({
/** Default agent name for `oh-my-opencode run` (env: OPENCODE_DEFAULT_AGENT) */ /** Default agent name for `oh-my-opencode run` (env: OPENCODE_DEFAULT_AGENT) */
default_run_agent: z.string().optional(), default_run_agent: z.string().optional(),
disabled_mcps: z.array(AnyMcpNameSchema).optional(), disabled_mcps: z.array(AnyMcpNameSchema).optional(),
disabled_agents: z.array(BuiltinAgentNameSchema).optional(), disabled_agents: z.array(z.string()).optional(),
disabled_skills: z.array(BuiltinSkillNameSchema).optional(), disabled_skills: z.array(BuiltinSkillNameSchema).optional(),
disabled_hooks: z.array(z.string()).optional(), disabled_hooks: z.array(z.string()).optional(),
disabled_commands: z.array(BuiltinCommandNameSchema).optional(), disabled_commands: z.array(BuiltinCommandNameSchema).optional(),

View File

@ -34,7 +34,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {
test("should return provider limit even when modelConcurrency exists but doesn't match", () => { test("should return provider limit even when modelConcurrency exists but doesn't match", () => {
// given // given
const config: BackgroundTaskConfig = { const config: BackgroundTaskConfig = {
modelConcurrency: { "google/gemini-3-pro": 5 }, modelConcurrency: { "google/gemini-3.1-pro": 5 },
providerConcurrency: { anthropic: 3 } providerConcurrency: { anthropic: 3 }
} }
const manager = new ConcurrencyManager(config) const manager = new ConcurrencyManager(config)
@ -95,7 +95,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {
// when // when
const modelLimit = manager.getConcurrencyLimit("anthropic/claude-sonnet-4-6") const modelLimit = manager.getConcurrencyLimit("anthropic/claude-sonnet-4-6")
const providerLimit = manager.getConcurrencyLimit("anthropic/claude-opus-4-6") const providerLimit = manager.getConcurrencyLimit("anthropic/claude-opus-4-6")
const defaultLimit = manager.getConcurrencyLimit("google/gemini-3-pro") const defaultLimit = manager.getConcurrencyLimit("google/gemini-3.1-pro")
// then // then
expect(modelLimit).toBe(10) expect(modelLimit).toBe(10)

View File

@ -191,6 +191,10 @@ function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>
return (manager as unknown as { pendingByParent: Map<string, Set<string>> }).pendingByParent return (manager as unknown as { pendingByParent: Map<string, Set<string>> }).pendingByParent
} }
function getPendingNotifications(manager: BackgroundManager): Map<string, string[]> {
return (manager as unknown as { pendingNotifications: Map<string, string[]> }).pendingNotifications
}
function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> { function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
} }
@ -1057,6 +1061,49 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
manager.shutdown() manager.shutdown()
}) })
test("should queue notification when promptAsync aborts while parent is idle", async () => {
//#given
const promptMock = async () => {
const error = new Error("Request aborted while waiting for input")
error.name = "MessageAbortedError"
throw error
}
const client = {
session: {
prompt: promptMock,
promptAsync: promptMock,
abort: async () => ({}),
messages: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
const task: BackgroundTask = {
id: "task-aborted-idle-queue",
sessionID: "session-child",
parentSessionID: "session-parent",
parentMessageID: "msg-parent",
description: "task idle queue",
prompt: "test",
agent: "explore",
status: "completed",
startedAt: new Date(),
completedAt: new Date(),
}
getPendingByParent(manager).set("session-parent", new Set([task.id]))
//#when
await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> })
.notifyParentSession(task)
//#then
const queuedNotifications = getPendingNotifications(manager).get("session-parent") ?? []
expect(queuedNotifications).toHaveLength(1)
expect(queuedNotifications[0]).toContain("<system-reminder>")
expect(queuedNotifications[0]).toContain("[ALL BACKGROUND TASKS COMPLETE]")
manager.shutdown()
})
}) })
describe("BackgroundManager.notifyParentSession - notifications toggle", () => { describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
@ -1105,6 +1152,29 @@ describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
}) })
}) })
describe("BackgroundManager.injectPendingNotificationsIntoChatMessage", () => {
test("should prepend queued notifications to first text part and clear queue", () => {
// given
const manager = createBackgroundManager()
manager.queuePendingNotification("session-parent", "<system-reminder>queued-one</system-reminder>")
manager.queuePendingNotification("session-parent", "<system-reminder>queued-two</system-reminder>")
const output = {
parts: [{ type: "text", text: "User prompt" }],
}
// when
manager.injectPendingNotificationsIntoChatMessage(output, "session-parent")
// then
expect(output.parts[0].text).toContain("<system-reminder>queued-one</system-reminder>")
expect(output.parts[0].text).toContain("<system-reminder>queued-two</system-reminder>")
expect(output.parts[0].text).toContain("User prompt")
expect(getPendingNotifications(manager).get("session-parent")).toBeUndefined()
manager.shutdown()
})
})
function buildNotificationPromptBody( function buildNotificationPromptBody(
task: BackgroundTask, task: BackgroundTask,
currentMessage: CurrentMessage | null currentMessage: CurrentMessage | null

View File

@ -93,6 +93,7 @@ export class BackgroundManager {
private tasks: Map<string, BackgroundTask> private tasks: Map<string, BackgroundTask>
private notifications: Map<string, BackgroundTask[]> private notifications: Map<string, BackgroundTask[]>
private pendingNotifications: Map<string, string[]>
private pendingByParent: Map<string, Set<string>> // Track pending tasks per parent for batching private pendingByParent: Map<string, Set<string>> // Track pending tasks per parent for batching
private client: OpencodeClient private client: OpencodeClient
private directory: string private directory: string
@ -125,6 +126,7 @@ export class BackgroundManager {
) { ) {
this.tasks = new Map() this.tasks = new Map()
this.notifications = new Map() this.notifications = new Map()
this.pendingNotifications = new Map()
this.pendingByParent = new Map() this.pendingByParent = new Map()
this.client = ctx.client this.client = ctx.client
this.directory = ctx.directory this.directory = ctx.directory
@ -917,6 +919,32 @@ export class BackgroundManager {
this.notifications.delete(sessionID) this.notifications.delete(sessionID)
} }
queuePendingNotification(sessionID: string | undefined, notification: string): void {
if (!sessionID) return
const existingNotifications = this.pendingNotifications.get(sessionID) ?? []
existingNotifications.push(notification)
this.pendingNotifications.set(sessionID, existingNotifications)
}
injectPendingNotificationsIntoChatMessage(output: { parts: Array<{ type: string; text?: string; [key: string]: unknown }> }, sessionID: string): void {
const pendingNotifications = this.pendingNotifications.get(sessionID)
if (!pendingNotifications || pendingNotifications.length === 0) {
return
}
this.pendingNotifications.delete(sessionID)
const notificationContent = pendingNotifications.join("\n\n")
const firstTextPartIndex = output.parts.findIndex((part) => part.type === "text")
if (firstTextPartIndex === -1) {
output.parts.unshift(createInternalAgentTextPart(notificationContent))
return
}
const originalText = output.parts[firstTextPartIndex].text ?? ""
output.parts[firstTextPartIndex].text = `${notificationContent}\n\n---\n\n${originalText}`
}
/** /**
* Validates that a session has actual assistant/tool output before marking complete. * Validates that a session has actual assistant/tool output before marking complete.
* Prevents premature completion when session.idle fires before agent responds. * Prevents premature completion when session.idle fires before agent responds.
@ -1340,6 +1368,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
taskId: task.id, taskId: task.id,
parentSessionID: task.parentSessionID, parentSessionID: task.parentSessionID,
}) })
this.queuePendingNotification(task.parentSessionID, notification)
} else { } else {
log("[background-agent] Failed to send notification:", error) log("[background-agent] Failed to send notification:", error)
} }
@ -1568,6 +1597,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
this.concurrencyManager.clear() this.concurrencyManager.clear()
this.tasks.clear() this.tasks.clear()
this.notifications.clear() this.notifications.clear()
this.pendingNotifications.clear()
this.pendingByParent.clear() this.pendingByParent.clear()
this.notificationQueueByParent.clear() this.notificationQueueByParent.clear()
this.queuesByKey.clear() this.queuesByKey.clear()

View File

@ -269,6 +269,71 @@ describe("boulder-state", () => {
expect(progress.isComplete).toBe(false) expect(progress.isComplete).toBe(false)
}) })
test("should count space-indented unchecked checkbox", () => {
// given - plan file with a two-space indented checkbox
const planPath = join(TEST_DIR, "space-indented-plan.md")
writeFileSync(planPath, `# Plan
- [ ] indented task
`)
// when
const progress = getPlanProgress(planPath)
// then
expect(progress.total).toBe(1)
expect(progress.completed).toBe(0)
expect(progress.isComplete).toBe(false)
})
test("should count tab-indented unchecked checkbox", () => {
// given - plan file with a tab-indented checkbox
const planPath = join(TEST_DIR, "tab-indented-plan.md")
writeFileSync(planPath, `# Plan
- [ ] tab-indented task
`)
// when
const progress = getPlanProgress(planPath)
// then
expect(progress.total).toBe(1)
expect(progress.completed).toBe(0)
expect(progress.isComplete).toBe(false)
})
test("should count mixed top-level checked and indented unchecked checkboxes", () => {
// given - plan file with checked top-level and unchecked indented task
const planPath = join(TEST_DIR, "mixed-indented-plan.md")
writeFileSync(planPath, `# Plan
- [x] top-level completed task
- [ ] nested unchecked task
`)
// when
const progress = getPlanProgress(planPath)
// then
expect(progress.total).toBe(2)
expect(progress.completed).toBe(1)
expect(progress.isComplete).toBe(false)
})
test("should count space-indented completed checkbox", () => {
// given - plan file with a two-space indented completed checkbox
const planPath = join(TEST_DIR, "indented-completed-plan.md")
writeFileSync(planPath, `# Plan
- [x] indented completed task
`)
// when
const progress = getPlanProgress(planPath)
// then
expect(progress.total).toBe(1)
expect(progress.completed).toBe(1)
expect(progress.isComplete).toBe(true)
})
test("should return isComplete true when all checked", () => { test("should return isComplete true when all checked", () => {
// given - all tasks completed // given - all tasks completed
const planPath = join(TEST_DIR, "complete-plan.md") const planPath = join(TEST_DIR, "complete-plan.md")

View File

@ -121,8 +121,8 @@ export function getPlanProgress(planPath: string): PlanProgress {
const content = readFileSync(planPath, "utf-8") const content = readFileSync(planPath, "utf-8")
// Match markdown checkboxes: - [ ] or - [x] or - [X] // Match markdown checkboxes: - [ ] or - [x] or - [X]
const uncheckedMatches = content.match(/^[-*]\s*\[\s*\]/gm) || [] const uncheckedMatches = content.match(/^\s*[-*]\s*\[\s*\]/gm) || []
const checkedMatches = content.match(/^[-*]\s*\[[xX]\]/gm) || [] const checkedMatches = content.match(/^\s*[-*]\s*\[[xX]\]/gm) || []
const total = uncheckedMatches.length + checkedMatches.length const total = uncheckedMatches.length + checkedMatches.length
const completed = checkedMatches.length const completed = checkedMatches.length
@ -150,7 +150,8 @@ export function getPlanName(planPath: string): string {
export function createBoulderState( export function createBoulderState(
planPath: string, planPath: string,
sessionId: string, sessionId: string,
agent?: string agent?: string,
worktreePath?: string,
): BoulderState { ): BoulderState {
return { return {
active_plan: planPath, active_plan: planPath,
@ -158,5 +159,6 @@ export function createBoulderState(
session_ids: [sessionId], session_ids: [sessionId],
plan_name: getPlanName(planPath), plan_name: getPlanName(planPath),
...(agent !== undefined ? { agent } : {}), ...(agent !== undefined ? { agent } : {}),
...(worktreePath !== undefined ? { worktree_path: worktreePath } : {}),
} }
} }

View File

@ -16,6 +16,8 @@ export interface BoulderState {
plan_name: string plan_name: string
/** Agent type to use when resuming (e.g., 'atlas') */ /** Agent type to use when resuming (e.g., 'atlas') */
agent?: string agent?: string
/** Absolute path to the git worktree root where work happens */
worktree_path?: string
} }
export interface PlanProgress { export interface PlanProgress {

View File

@ -1,5 +1,14 @@
export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session. export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
## ARGUMENTS
- \`/start-work [plan-name] [--worktree <path>]\`
- \`plan-name\` (optional): name or partial match of the plan to start
- \`--worktree <path>\` (optional): absolute path to an existing git worktree to work in
- If specified and valid: hook pre-sets worktree_path in boulder.json
- If specified but invalid: you must run \`git worktree add <path> <branch>\` first
- If omitted: you MUST choose or create a worktree (see Worktree Setup below)
## WHAT TO DO ## WHAT TO DO
1. **Find available plans**: Search for Prometheus-generated plan files at \`.sisyphus/plans/\` 1. **Find available plans**: Search for Prometheus-generated plan files at \`.sisyphus/plans/\`
@ -15,17 +24,24 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
- If ONE plan: auto-select it - If ONE plan: auto-select it
- If MULTIPLE plans: show list with timestamps, ask user to select - If MULTIPLE plans: show list with timestamps, ask user to select
4. **Create/Update boulder.json**: 4. **Worktree Setup** (when \`worktree_path\` not already set in boulder.json):
1. \`git worktree list --porcelain\` — see available worktrees
2. Create: \`git worktree add <absolute-path> <branch-or-HEAD>\`
3. Update boulder.json to add \`"worktree_path": "<absolute-path>"\`
4. All work happens inside that worktree directory
5. **Create/Update boulder.json**:
\`\`\`json \`\`\`json
{ {
"active_plan": "/absolute/path/to/plan.md", "active_plan": "/absolute/path/to/plan.md",
"started_at": "ISO_TIMESTAMP", "started_at": "ISO_TIMESTAMP",
"session_ids": ["session_id_1", "session_id_2"], "session_ids": ["session_id_1", "session_id_2"],
"plan_name": "plan-name" "plan_name": "plan-name",
"worktree_path": "/absolute/path/to/git/worktree"
} }
\`\`\` \`\`\`
5. **Read the plan file** and start executing tasks according to atlas workflow 6. **Read the plan file** and start executing tasks according to atlas workflow
## OUTPUT FORMAT ## OUTPUT FORMAT
@ -49,6 +65,7 @@ Resuming Work Session
Active Plan: {plan-name} Active Plan: {plan-name}
Progress: {completed}/{total} tasks Progress: {completed}/{total} tasks
Sessions: {count} (appending current session) Sessions: {count} (appending current session)
Worktree: {worktree_path}
Reading plan and continuing from last incomplete task... Reading plan and continuing from last incomplete task...
\`\`\` \`\`\`
@ -60,6 +77,7 @@ Starting Work Session
Plan: {plan-name} Plan: {plan-name}
Session ID: {session_id} Session ID: {session_id}
Started: {timestamp} Started: {timestamp}
Worktree: {worktree_path}
Reading plan and beginning execution... Reading plan and beginning execution...
\`\`\` \`\`\`
@ -68,5 +86,6 @@ Reading plan and beginning execution...
- The session_id is injected by the hook - use it directly - The session_id is injected by the hook - use it directly
- Always update boulder.json BEFORE starting work - Always update boulder.json BEFORE starting work
- Always set worktree_path in boulder.json before executing any tasks
- Read the FULL plan file before delegating any tasks - Read the FULL plan file before delegating any tasks
- Follow atlas delegation protocols (7-section format)` - Follow atlas delegation protocols (7-section format)`

View File

@ -162,7 +162,7 @@ describe("TaskToastManager", () => {
description: "Task with category default model", description: "Task with category default model",
agent: "sisyphus-junior", agent: "sisyphus-junior",
isBackground: false, isBackground: false,
modelInfo: { model: "google/gemini-3-pro", type: "category-default" as const }, modelInfo: { model: "google/gemini-3.1-pro", type: "category-default" as const },
} }
// when - addTask is called // when - addTask is called

View File

@ -6,7 +6,7 @@ export function getOrCreateRetryState(
): RetryState { ): RetryState {
let state = autoCompactState.retryStateBySession.get(sessionID) let state = autoCompactState.retryStateBySession.get(sessionID)
if (!state) { if (!state) {
state = { attempt: 0, lastAttemptTime: 0 } state = { attempt: 0, lastAttemptTime: 0, firstAttemptTime: 0 }
autoCompactState.retryStateBySession.set(sessionID, state) autoCompactState.retryStateBySession.set(sessionID, state)
} }
return state return state

View File

@ -0,0 +1,122 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"
import { runSummarizeRetryStrategy } from "./summarize-retry-strategy"
import type { AutoCompactState, ParsedTokenLimitError, RetryState } from "./types"
import type { OhMyOpenCodeConfig } from "../../config"
type TimeoutCall = {
delay: number
}
function createAutoCompactState(): AutoCompactState {
return {
pendingCompact: new Set<string>(),
errorDataBySession: new Map<string, ParsedTokenLimitError>(),
retryStateBySession: new Map<string, RetryState>(),
truncateStateBySession: new Map(),
emptyContentAttemptBySession: new Map(),
compactionInProgress: new Set<string>(),
}
}
describe("runSummarizeRetryStrategy", () => {
const sessionID = "ses_retry_timeout"
const directory = "/tmp"
let autoCompactState: AutoCompactState
const summarizeMock = mock(() => Promise.resolve())
const showToastMock = mock(() => Promise.resolve())
const client = {
session: {
summarize: summarizeMock,
messages: mock(() => Promise.resolve({ data: [] })),
promptAsync: mock(() => Promise.resolve()),
revert: mock(() => Promise.resolve()),
},
tui: {
showToast: showToastMock,
},
}
beforeEach(() => {
autoCompactState = createAutoCompactState()
summarizeMock.mockReset()
showToastMock.mockReset()
summarizeMock.mockResolvedValue(undefined)
showToastMock.mockResolvedValue(undefined)
})
afterEach(() => {
globalThis.setTimeout = originalSetTimeout
})
const originalSetTimeout = globalThis.setTimeout
test("stops retries when total summarize timeout is exceeded", async () => {
//#given
autoCompactState.pendingCompact.add(sessionID)
autoCompactState.errorDataBySession.set(sessionID, {
currentTokens: 250000,
maxTokens: 200000,
errorType: "token_limit_exceeded",
})
autoCompactState.retryStateBySession.set(sessionID, {
attempt: 1,
lastAttemptTime: Date.now(),
firstAttemptTime: Date.now() - 130000,
})
//#when
await runSummarizeRetryStrategy({
sessionID,
msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" },
autoCompactState,
client: client as never,
directory,
pluginConfig: {} as OhMyOpenCodeConfig,
})
//#then
expect(summarizeMock).not.toHaveBeenCalled()
expect(autoCompactState.pendingCompact.has(sessionID)).toBe(false)
expect(autoCompactState.errorDataBySession.has(sessionID)).toBe(false)
expect(autoCompactState.retryStateBySession.has(sessionID)).toBe(false)
expect(showToastMock).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
title: "Auto Compact Timed Out",
}),
}),
)
})
test("caps retry delay by remaining total timeout window", async () => {
//#given
const timeoutCalls: TimeoutCall[] = []
globalThis.setTimeout = ((_: (...args: unknown[]) => void, delay?: number) => {
timeoutCalls.push({ delay: delay ?? 0 })
return 1 as unknown as ReturnType<typeof setTimeout>
}) as typeof setTimeout
autoCompactState.retryStateBySession.set(sessionID, {
attempt: 1,
lastAttemptTime: Date.now(),
firstAttemptTime: Date.now() - 119700,
})
summarizeMock.mockRejectedValueOnce(new Error("rate limited"))
//#when
await runSummarizeRetryStrategy({
sessionID,
msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" },
autoCompactState,
client: client as never,
directory,
pluginConfig: {} as OhMyOpenCodeConfig,
})
//#then
expect(timeoutCalls.length).toBe(1)
expect(timeoutCalls[0]!.delay).toBeGreaterThan(0)
expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(500)
})
})

View File

@ -7,6 +7,8 @@ import { sanitizeEmptyMessagesBeforeSummarize } from "./message-builder"
import { fixEmptyMessages } from "./empty-content-recovery" import { fixEmptyMessages } from "./empty-content-recovery"
import { resolveCompactionModel } from "../shared/compaction-model-resolver" import { resolveCompactionModel } from "../shared/compaction-model-resolver"
const SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS = 120_000
export async function runSummarizeRetryStrategy(params: { export async function runSummarizeRetryStrategy(params: {
sessionID: string sessionID: string
msg: Record<string, unknown> msg: Record<string, unknown>
@ -18,6 +20,27 @@ export async function runSummarizeRetryStrategy(params: {
messageIndex?: number messageIndex?: number
}): Promise<void> { }): Promise<void> {
const retryState = getOrCreateRetryState(params.autoCompactState, params.sessionID) const retryState = getOrCreateRetryState(params.autoCompactState, params.sessionID)
const now = Date.now()
if (retryState.firstAttemptTime === 0) {
retryState.firstAttemptTime = now
}
const elapsedTimeMs = now - retryState.firstAttemptTime
if (elapsedTimeMs >= SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS) {
clearSessionState(params.autoCompactState, params.sessionID)
await params.client.tui
.showToast({
body: {
title: "Auto Compact Timed Out",
message: "Compaction retries exceeded the timeout window. Please start a new session.",
variant: "error",
duration: 5000,
},
})
.catch(() => {})
return
}
if (params.errorType?.includes("non-empty content")) { if (params.errorType?.includes("non-empty content")) {
const attempt = getEmptyContentAttempt(params.autoCompactState, params.sessionID) const attempt = getEmptyContentAttempt(params.autoCompactState, params.sessionID)
@ -52,6 +75,7 @@ export async function runSummarizeRetryStrategy(params: {
if (Date.now() - retryState.lastAttemptTime > 300000) { if (Date.now() - retryState.lastAttemptTime > 300000) {
retryState.attempt = 0 retryState.attempt = 0
retryState.firstAttemptTime = Date.now()
params.autoCompactState.truncateStateBySession.delete(params.sessionID) params.autoCompactState.truncateStateBySession.delete(params.sessionID)
} }
@ -92,10 +116,26 @@ export async function runSummarizeRetryStrategy(params: {
}) })
return return
} catch { } catch {
const remainingTimeMs = SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS - (Date.now() - retryState.firstAttemptTime)
if (remainingTimeMs <= 0) {
clearSessionState(params.autoCompactState, params.sessionID)
await params.client.tui
.showToast({
body: {
title: "Auto Compact Timed Out",
message: "Compaction retries exceeded the timeout window. Please start a new session.",
variant: "error",
duration: 5000,
},
})
.catch(() => {})
return
}
const delay = const delay =
RETRY_CONFIG.initialDelayMs * RETRY_CONFIG.initialDelayMs *
Math.pow(RETRY_CONFIG.backoffFactor, retryState.attempt - 1) Math.pow(RETRY_CONFIG.backoffFactor, retryState.attempt - 1)
const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs) const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs, remainingTimeMs)
setTimeout(() => { setTimeout(() => {
void runSummarizeRetryStrategy(params) void runSummarizeRetryStrategy(params)

View File

@ -11,6 +11,7 @@ export interface ParsedTokenLimitError {
export interface RetryState { export interface RetryState {
attempt: number attempt: number
lastAttemptTime: number lastAttemptTime: number
firstAttemptTime: number
} }
export interface TruncateState { export interface TruncateState {

View File

@ -14,6 +14,7 @@ export async function injectBoulderContinuation(input: {
remaining: number remaining: number
total: number total: number
agent?: string agent?: string
worktreePath?: string
backgroundManager?: BackgroundManager backgroundManager?: BackgroundManager
sessionState: SessionState sessionState: SessionState
}): Promise<void> { }): Promise<void> {
@ -24,6 +25,7 @@ export async function injectBoulderContinuation(input: {
remaining, remaining,
total, total,
agent, agent,
worktreePath,
backgroundManager, backgroundManager,
sessionState, sessionState,
} = input } = input
@ -37,9 +39,11 @@ export async function injectBoulderContinuation(input: {
return return
} }
const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : ""
const prompt = const prompt =
BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) + BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) +
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` +
worktreeContext
try { try {
log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining }) log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining })
@ -62,6 +66,7 @@ export async function injectBoulderContinuation(input: {
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID }) log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
} catch (err) { } catch (err) {
sessionState.promptFailureCount += 1 sessionState.promptFailureCount += 1
sessionState.lastFailureAt = Date.now()
log(`[${HOOK_NAME}] Boulder continuation failed`, { log(`[${HOOK_NAME}] Boulder continuation failed`, {
sessionID, sessionID,
error: String(err), error: String(err),

View File

@ -10,6 +10,7 @@ import { getLastAgentFromSession } from "./session-last-agent"
import type { AtlasHookOptions, SessionState } from "./types" import type { AtlasHookOptions, SessionState } from "./types"
const CONTINUATION_COOLDOWN_MS = 5000 const CONTINUATION_COOLDOWN_MS = 5000
const FAILURE_BACKOFF_MS = 5 * 60 * 1000
export function createAtlasEventHandler(input: { export function createAtlasEventHandler(input: {
ctx: PluginInput ctx: PluginInput
@ -53,6 +54,7 @@ export function createAtlasEventHandler(input: {
} }
const state = getState(sessionID) const state = getState(sessionID)
const now = Date.now()
if (state.lastEventWasAbortError) { if (state.lastEventWasAbortError) {
state.lastEventWasAbortError = false state.lastEventWasAbortError = false
@ -61,11 +63,18 @@ export function createAtlasEventHandler(input: {
} }
if (state.promptFailureCount >= 2) { if (state.promptFailureCount >= 2) {
log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, { const timeSinceLastFailure = state.lastFailureAt !== undefined ? now - state.lastFailureAt : Number.POSITIVE_INFINITY
sessionID, if (timeSinceLastFailure < FAILURE_BACKOFF_MS) {
promptFailureCount: state.promptFailureCount, log(`[${HOOK_NAME}] Skipped: continuation in backoff after repeated failures`, {
}) sessionID,
return promptFailureCount: state.promptFailureCount,
backoffRemaining: FAILURE_BACKOFF_MS - timeSinceLastFailure,
})
return
}
state.promptFailureCount = 0
state.lastFailureAt = undefined
} }
const backgroundManager = options?.backgroundManager const backgroundManager = options?.backgroundManager
@ -92,17 +101,15 @@ export function createAtlasEventHandler(input: {
const lastAgentKey = getAgentConfigKey(lastAgent ?? "") const lastAgentKey = getAgentConfigKey(lastAgent ?? "")
const requiredAgent = getAgentConfigKey(boulderState.agent ?? "atlas") const requiredAgent = getAgentConfigKey(boulderState.agent ?? "atlas")
const lastAgentMatchesRequired = lastAgentKey === requiredAgent const lastAgentMatchesRequired = lastAgentKey === requiredAgent
const boulderAgentWasNotExplicitlySet = boulderState.agent === undefined
const boulderAgentDefaultsToAtlas = requiredAgent === "atlas" const boulderAgentDefaultsToAtlas = requiredAgent === "atlas"
const lastAgentIsSisyphus = lastAgentKey === "sisyphus" const lastAgentIsSisyphus = lastAgentKey === "sisyphus"
const allowSisyphusWhenDefaultAtlas = boulderAgentWasNotExplicitlySet && boulderAgentDefaultsToAtlas && lastAgentIsSisyphus const allowSisyphusForAtlasBoulder = boulderAgentDefaultsToAtlas && lastAgentIsSisyphus
const agentMatches = lastAgentMatchesRequired || allowSisyphusWhenDefaultAtlas const agentMatches = lastAgentMatchesRequired || allowSisyphusForAtlasBoulder
if (!agentMatches) { if (!agentMatches) {
log(`[${HOOK_NAME}] Skipped: last agent does not match boulder agent`, { log(`[${HOOK_NAME}] Skipped: last agent does not match boulder agent`, {
sessionID, sessionID,
lastAgent: lastAgent ?? "unknown", lastAgent: lastAgent ?? "unknown",
requiredAgent, requiredAgent,
boulderAgentExplicitlySet: boulderState.agent !== undefined,
}) })
return return
} }
@ -113,7 +120,6 @@ export function createAtlasEventHandler(input: {
return return
} }
const now = Date.now()
if (state.lastContinuationInjectedAt && now - state.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) { if (state.lastContinuationInjectedAt && now - state.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) {
log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, { log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, {
sessionID, sessionID,
@ -132,6 +138,7 @@ export function createAtlasEventHandler(input: {
remaining, remaining,
total: progress.total, total: progress.total,
agent: boulderState.agent, agent: boulderState.agent,
worktreePath: boulderState.worktree_path,
backgroundManager, backgroundManager,
sessionState: state, sessionState: state,
}) })

View File

@ -933,8 +933,8 @@ describe("atlas hook", () => {
expect(callArgs.body.parts[0].text).toContain("2 remaining") expect(callArgs.body.parts[0].text).toContain("2 remaining")
}) })
test("should not inject when last agent does not match boulder agent", async () => { test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
// given - boulder state with incomplete plan, but last agent does NOT match // given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
const planPath = join(TEST_DIR, "test-plan.md") const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
@ -947,7 +947,7 @@ describe("atlas hook", () => {
} }
writeBoulderState(TEST_DIR, state) writeBoulderState(TEST_DIR, state)
// given - last agent is NOT the boulder agent // given - last agent is sisyphus (typical state right after /start-work)
cleanupMessageStorage(MAIN_SESSION_ID) cleanupMessageStorage(MAIN_SESSION_ID)
setupMessageStorage(MAIN_SESSION_ID, "sisyphus") setupMessageStorage(MAIN_SESSION_ID, "sisyphus")
@ -962,7 +962,39 @@ describe("atlas hook", () => {
}, },
}) })
// then - should NOT call prompt because agent does not match // then - should call prompt because sisyphus is always allowed for atlas boulders
expect(mockInput._promptMock).toHaveBeenCalled()
})
test("should not inject when last agent is non-sisyphus and does not match boulder agent", async () => {
// given - boulder explicitly set to atlas, last agent is hephaestus (unrelated agent)
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
agent: "atlas",
}
writeBoulderState(TEST_DIR, state)
cleanupMessageStorage(MAIN_SESSION_ID)
setupMessageStorage(MAIN_SESSION_ID, "hephaestus")
const mockInput = createMockPluginInput()
const hook = createAtlasHook(mockInput)
// when
await hook.handler({
event: {
type: "session.idle",
properties: { sessionID: MAIN_SESSION_ID },
},
})
// then - should NOT call prompt because hephaestus does not match atlas or sisyphus
expect(mockInput._promptMock).not.toHaveBeenCalled() expect(mockInput._promptMock).not.toHaveBeenCalled()
}) })
@ -1122,6 +1154,144 @@ describe("atlas hook", () => {
} }
}) })
test("should keep skipping continuation during 5-minute backoff after 2 consecutive failures", async () => {
//#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - third idle occurs inside 5-minute backoff window
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 60000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
//#then - third attempt should still be skipped
expect(promptMock).toHaveBeenCalledTimes(2)
} finally {
Date.now = originalDateNow
}
})
test("should retry continuation after 5-minute backoff expires following 2 consecutive failures", async () => {
//#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - third idle occurs after 5+ minutes
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 300000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
//#then - third attempt should run after backoff expiration
expect(promptMock).toHaveBeenCalledTimes(3)
} finally {
Date.now = originalDateNow
}
})
test("should reset prompt failure counter after successful retry beyond backoff window", async () => {
//#given - boulder state with incomplete plan and success on first retry after backoff
const planPath = join(TEST_DIR, "test-plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
const state: BoulderState = {
active_plan: planPath,
started_at: "2026-01-02T10:00:00Z",
session_ids: [MAIN_SESSION_ID],
plan_name: "test-plan",
}
writeBoulderState(TEST_DIR, state)
const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
promptMock.mockImplementationOnce(() => Promise.resolve(undefined))
const mockInput = createMockPluginInput({ promptMock })
const hook = createAtlasHook(mockInput)
const originalDateNow = Date.now
let now = 0
Date.now = () => now
try {
//#when - fail twice, recover after backoff with success, then fail twice again
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 300000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
now += 6000
await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
await flushMicrotasks()
//#then - success retry resets counter, so two additional failures are allowed before skip
expect(promptMock).toHaveBeenCalledTimes(5)
} finally {
Date.now = originalDateNow
}
})
test("should reset continuation failure state on session.compacted event", async () => { test("should reset continuation failure state on session.compacted event", async () => {
//#given - boulder state with incomplete plan and prompt always fails //#given - boulder state with incomplete plan and prompt always fails
const planPath = join(TEST_DIR, "test-plan.md") const planPath = join(TEST_DIR, "test-plan.md")

View File

@ -26,4 +26,5 @@ export interface SessionState {
lastEventWasAbortError?: boolean lastEventWasAbortError?: boolean
lastContinuationInjectedAt?: number lastContinuationInjectedAt?: number
promptFailureCount: number promptFailureCount: number
lastFailureAt?: number
} }

View File

@ -9,6 +9,14 @@ interface EventInput {
event: Event event: Event
} }
interface ChatMessageInput {
sessionID: string
}
interface ChatMessageOutput {
parts: Array<{ type: string; text?: string; [key: string]: unknown }>
}
/** /**
* Background notification hook - handles event routing to BackgroundManager. * Background notification hook - handles event routing to BackgroundManager.
* *
@ -20,7 +28,15 @@ export function createBackgroundNotificationHook(manager: BackgroundManager) {
manager.handleEvent(event) manager.handleEvent(event)
} }
const chatMessageHandler = async (
input: ChatMessageInput,
output: ChatMessageOutput,
): Promise<void> => {
manager.injectPendingNotificationsIntoChatMessage(output, input.sessionID)
}
return { return {
"chat.message": chatMessageHandler,
event: eventHandler, event: eventHandler,
} }
} }

View File

@ -6,6 +6,7 @@ import {
import { OMO_SESSION_PREFIX, buildSessionReminderMessage } from "./constants"; import { OMO_SESSION_PREFIX, buildSessionReminderMessage } from "./constants";
import type { InteractiveBashSessionState } from "./types"; import type { InteractiveBashSessionState } from "./types";
import { subagentSessions } from "../../features/claude-code-session-state"; import { subagentSessions } from "../../features/claude-code-session-state";
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide";
type AbortSession = (args: { path: { id: string } }) => Promise<unknown> type AbortSession = (args: { path: { id: string } }) => Promise<unknown>
@ -19,7 +20,7 @@ async function killAllTrackedSessions(
): Promise<void> { ): Promise<void> {
for (const sessionName of state.tmuxSessions) { for (const sessionName of state.tmuxSessions) {
try { try {
const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], { const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], {
stdout: "ignore", stdout: "ignore",
stderr: "ignore", stderr: "ignore",
}) })

View File

@ -1,6 +1,7 @@
import type { InteractiveBashSessionState } from "./types"; import type { InteractiveBashSessionState } from "./types";
import { loadInteractiveBashSessionState } from "./storage"; import { loadInteractiveBashSessionState } from "./storage";
import { OMO_SESSION_PREFIX } from "./constants"; import { OMO_SESSION_PREFIX } from "./constants";
import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide";
export function getOrCreateState(sessionID: string, sessionStates: Map<string, InteractiveBashSessionState>): InteractiveBashSessionState { export function getOrCreateState(sessionID: string, sessionStates: Map<string, InteractiveBashSessionState>): InteractiveBashSessionState {
if (!sessionStates.has(sessionID)) { if (!sessionStates.has(sessionID)) {
@ -24,7 +25,7 @@ export async function killAllTrackedSessions(
): Promise<void> { ): Promise<void> {
for (const sessionName of state.tmuxSessions) { for (const sessionName of state.tmuxSessions) {
try { try {
const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], { const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], {
stdout: "ignore", stdout: "ignore",
stderr: "ignore", stderr: "ignore",
}); });

View File

@ -12,12 +12,16 @@ const TOAST_MESSAGE = [
].join("\n") ].join("\n")
const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus") const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus")
function showToast(ctx: PluginInput, sessionID: string): void { type NoHephaestusNonGptHookOptions = {
allowNonGptModel?: boolean
}
function showToast(ctx: PluginInput, sessionID: string, variant: "error" | "warning"): void {
ctx.client.tui.showToast({ ctx.client.tui.showToast({
body: { body: {
title: TOAST_TITLE, title: TOAST_TITLE,
message: TOAST_MESSAGE, message: TOAST_MESSAGE,
variant: "error", variant,
duration: 10000, duration: 10000,
}, },
}).catch((error) => { }).catch((error) => {
@ -28,7 +32,10 @@ function showToast(ctx: PluginInput, sessionID: string): void {
}) })
} }
export function createNoHephaestusNonGptHook(ctx: PluginInput) { export function createNoHephaestusNonGptHook(
ctx: PluginInput,
options?: NoHephaestusNonGptHookOptions,
) {
return { return {
"chat.message": async (input: { "chat.message": async (input: {
sessionID: string sessionID: string
@ -40,9 +47,13 @@ export function createNoHephaestusNonGptHook(ctx: PluginInput) {
const rawAgent = input.agent ?? getSessionAgent(input.sessionID) ?? "" const rawAgent = input.agent ?? getSessionAgent(input.sessionID) ?? ""
const agentKey = getAgentConfigKey(rawAgent) const agentKey = getAgentConfigKey(rawAgent)
const modelID = input.model?.modelID const modelID = input.model?.modelID
const allowNonGptModel = options?.allowNonGptModel === true
if (agentKey === "hephaestus" && modelID && !isGptModel(modelID)) { if (agentKey === "hephaestus" && modelID && !isGptModel(modelID)) {
showToast(ctx, input.sessionID) showToast(ctx, input.sessionID, allowNonGptModel ? "warning" : "error")
if (allowNonGptModel) {
return
}
input.agent = SISYPHUS_DISPLAY input.agent = SISYPHUS_DISPLAY
if (output?.message) { if (output?.message) {
output.message.agent = SISYPHUS_DISPLAY output.message.agent = SISYPHUS_DISPLAY

View File

@ -1,3 +1,5 @@
/// <reference types="bun-types" />
import { describe, expect, spyOn, test } from "bun:test" import { describe, expect, spyOn, test } from "bun:test"
import { _resetForTesting, updateSessionAgent } from "../../features/claude-code-session-state" import { _resetForTesting, updateSessionAgent } from "../../features/claude-code-session-state"
import { getAgentDisplayName } from "../../shared/agent-display-names" import { getAgentDisplayName } from "../../shared/agent-display-names"
@ -8,7 +10,7 @@ const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus")
function createOutput() { function createOutput() {
return { return {
message: {}, message: {} as { agent?: string; [key: string]: unknown },
parts: [], parts: [],
} }
} }
@ -16,7 +18,7 @@ function createOutput() {
describe("no-hephaestus-non-gpt hook", () => { describe("no-hephaestus-non-gpt hook", () => {
test("shows toast on every chat.message when hephaestus uses non-gpt model", async () => { test("shows toast on every chat.message when hephaestus uses non-gpt model", async () => {
// given - hephaestus with claude model // given - hephaestus with claude model
const showToast = spyOn({ fn: async () => ({}) }, "fn") const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({ const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } }, client: { tui: { showToast } },
} as any) } as any)
@ -49,9 +51,38 @@ describe("no-hephaestus-non-gpt hook", () => {
}) })
}) })
test("shows warning and does not switch agent when allow_non_gpt_model is enabled", async () => {
// given - hephaestus with claude model and opt-out enabled
const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } },
} as any, {
allowNonGptModel: true,
})
const output = createOutput()
// when - chat.message runs
await hook["chat.message"]?.({
sessionID: "ses_opt_out",
agent: HEPHAESTUS_DISPLAY,
model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
}, output)
// then - warning toast is shown but agent is not switched
expect(showToast).toHaveBeenCalledTimes(1)
expect(output.message.agent).toBeUndefined()
expect(showToast.mock.calls[0]?.[0]).toMatchObject({
body: {
title: "NEVER Use Hephaestus with Non-GPT",
variant: "warning",
},
})
})
test("does not show toast when hephaestus uses gpt model", async () => { test("does not show toast when hephaestus uses gpt model", async () => {
// given - hephaestus with gpt model // given - hephaestus with gpt model
const showToast = spyOn({ fn: async () => ({}) }, "fn") const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({ const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } }, client: { tui: { showToast } },
} as any) } as any)
@ -72,7 +103,7 @@ describe("no-hephaestus-non-gpt hook", () => {
test("does not show toast for non-hephaestus agent", async () => { test("does not show toast for non-hephaestus agent", async () => {
// given - sisyphus with claude model (non-gpt) // given - sisyphus with claude model (non-gpt)
const showToast = spyOn({ fn: async () => ({}) }, "fn") const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({ const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } }, client: { tui: { showToast } },
} as any) } as any)
@ -95,7 +126,7 @@ describe("no-hephaestus-non-gpt hook", () => {
// given - session agent saved as hephaestus // given - session agent saved as hephaestus
_resetForTesting() _resetForTesting()
updateSessionAgent("ses_4", HEPHAESTUS_DISPLAY) updateSessionAgent("ses_4", HEPHAESTUS_DISPLAY)
const showToast = spyOn({ fn: async () => ({}) }, "fn") const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
const hook = createNoHephaestusNonGptHook({ const hook = createNoHephaestusNonGptHook({
client: { tui: { showToast } }, client: { tui: { showToast } },
} as any) } as any)

View File

@ -45,6 +45,23 @@ function createMockCtx() {
} }
} }
function setupImmediateTimeouts(): () => void {
const originalSetTimeout = globalThis.setTimeout
const originalClearTimeout = globalThis.clearTimeout
globalThis.setTimeout = ((callback: (...args: unknown[]) => void, _delay?: number, ...args: unknown[]) => {
callback(...args)
return 1 as unknown as ReturnType<typeof setTimeout>
}) as typeof setTimeout
globalThis.clearTimeout = (() => {}) as typeof clearTimeout
return () => {
globalThis.setTimeout = originalSetTimeout
globalThis.clearTimeout = originalClearTimeout
}
}
describe("preemptive-compaction", () => { describe("preemptive-compaction", () => {
let ctx: ReturnType<typeof createMockCtx> let ctx: ReturnType<typeof createMockCtx>
@ -63,7 +80,7 @@ describe("preemptive-compaction", () => {
// #when tool.execute.after is called // #when tool.execute.after is called
// #then session.messages() should NOT be called // #then session.messages() should NOT be called
it("should use cached token info instead of fetching session.messages()", async () => { it("should use cached token info instead of fetching session.messages()", async () => {
const hook = createPreemptiveCompactionHook(ctx as never) const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_test1" const sessionID = "ses_test1"
// Simulate message.updated with token info below threshold // Simulate message.updated with token info below threshold
@ -101,7 +118,7 @@ describe("preemptive-compaction", () => {
// #when tool.execute.after is called // #when tool.execute.after is called
// #then should skip without fetching // #then should skip without fetching
it("should skip gracefully when no cached token info exists", async () => { it("should skip gracefully when no cached token info exists", async () => {
const hook = createPreemptiveCompactionHook(ctx as never) const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const output = { title: "", output: "test", metadata: null } const output = { title: "", output: "test", metadata: null }
await hook["tool.execute.after"]( await hook["tool.execute.after"](
@ -116,7 +133,7 @@ describe("preemptive-compaction", () => {
// #when tool.execute.after runs // #when tool.execute.after runs
// #then should trigger summarize // #then should trigger summarize
it("should trigger compaction when usage exceeds threshold", async () => { it("should trigger compaction when usage exceeds threshold", async () => {
const hook = createPreemptiveCompactionHook(ctx as never) const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_high" const sessionID = "ses_high"
// 170K input + 10K cache = 180K → 90% of 200K // 170K input + 10K cache = 180K → 90% of 200K
@ -153,7 +170,7 @@ describe("preemptive-compaction", () => {
it("should trigger compaction for google-vertex-anthropic provider", async () => { it("should trigger compaction for google-vertex-anthropic provider", async () => {
//#given google-vertex-anthropic usage above threshold //#given google-vertex-anthropic usage above threshold
const hook = createPreemptiveCompactionHook(ctx as never) const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_vertex_anthropic_high" const sessionID = "ses_vertex_anthropic_high"
await hook.event({ await hook.event({
@ -191,7 +208,7 @@ describe("preemptive-compaction", () => {
// #given session deleted // #given session deleted
// #then cache should be cleaned up // #then cache should be cleaned up
it("should clean up cache on session.deleted", async () => { it("should clean up cache on session.deleted", async () => {
const hook = createPreemptiveCompactionHook(ctx as never) const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_del" const sessionID = "ses_del"
await hook.event({ await hook.event({
@ -228,7 +245,7 @@ describe("preemptive-compaction", () => {
it("should log summarize errors instead of swallowing them", async () => { it("should log summarize errors instead of swallowing them", async () => {
//#given //#given
const hook = createPreemptiveCompactionHook(ctx as never) const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_log_error" const sessionID = "ses_log_error"
const summarizeError = new Error("summarize failed") const summarizeError = new Error("summarize failed")
ctx.client.session.summarize.mockRejectedValueOnce(summarizeError) ctx.client.session.summarize.mockRejectedValueOnce(summarizeError)
@ -343,4 +360,58 @@ describe("preemptive-compaction", () => {
//#then //#then
expect(ctx.client.session.summarize).not.toHaveBeenCalled() expect(ctx.client.session.summarize).not.toHaveBeenCalled()
}) })
it("should clear in-progress lock when summarize times out", async () => {
//#given
const restoreTimeouts = setupImmediateTimeouts()
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
const sessionID = "ses_timeout"
ctx.client.session.summarize
.mockImplementationOnce(() => new Promise(() => {}))
.mockResolvedValueOnce({})
try {
await hook.event({
event: {
type: "message.updated",
properties: {
info: {
role: "assistant",
sessionID,
providerID: "anthropic",
modelID: "claude-sonnet-4-6",
finish: true,
tokens: {
input: 170000,
output: 0,
reasoning: 0,
cache: { read: 10000, write: 0 },
},
},
},
},
})
//#when
await hook["tool.execute.after"](
{ tool: "bash", sessionID, callID: "call_timeout_1" },
{ title: "", output: "test", metadata: null },
)
await hook["tool.execute.after"](
{ tool: "bash", sessionID, callID: "call_timeout_2" },
{ title: "", output: "test", metadata: null },
)
//#then
expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2)
expect(logMock).toHaveBeenCalledWith("[preemptive-compaction] Compaction failed", {
sessionID,
error: expect.stringContaining("Compaction summarize timed out"),
})
} finally {
restoreTimeouts()
}
})
}) })

View File

@ -3,6 +3,7 @@ import type { OhMyOpenCodeConfig } from "../config"
import { resolveCompactionModel } from "./shared/compaction-model-resolver" import { resolveCompactionModel } from "./shared/compaction-model-resolver"
const DEFAULT_ACTUAL_LIMIT = 200_000 const DEFAULT_ACTUAL_LIMIT = 200_000
const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000
type ModelCacheStateLike = { type ModelCacheStateLike = {
anthropicContext1MEnabled: boolean anthropicContext1MEnabled: boolean
@ -31,6 +32,26 @@ interface CachedCompactionState {
tokens: TokenInfo tokens: TokenInfo
} }
function withTimeout<TValue>(
promise: Promise<TValue>,
timeoutMs: number,
errorMessage: string,
): Promise<TValue> {
let timeoutID: ReturnType<typeof setTimeout> | undefined
const timeoutPromise = new Promise<never>((_, reject) => {
timeoutID = setTimeout(() => {
reject(new Error(errorMessage))
}, timeoutMs)
})
return Promise.race([promise, timeoutPromise]).finally(() => {
if (timeoutID !== undefined) {
clearTimeout(timeoutID)
}
})
}
function isAnthropicProvider(providerID: string): boolean { function isAnthropicProvider(providerID: string): boolean {
return providerID === "anthropic" || providerID === "google-vertex-anthropic" return providerID === "anthropic" || providerID === "google-vertex-anthropic"
} }
@ -94,11 +115,15 @@ export function createPreemptiveCompactionHook(
modelID modelID
) )
await ctx.client.session.summarize({ await withTimeout(
path: { id: sessionID }, ctx.client.session.summarize({
body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never, path: { id: sessionID },
query: { directory: ctx.directory }, body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never,
}) query: { directory: ctx.directory },
}),
PREEMPTIVE_COMPACTION_TIMEOUT_MS,
`Compaction summarize timed out after ${PREEMPTIVE_COMPACTION_TIMEOUT_MS}ms`,
)
compactedSessions.add(sessionID) compactedSessions.add(sessionID)
} catch (error) { } catch (error) {

View File

@ -79,8 +79,8 @@ export async function detectCompletionInSessionMessages(
if (assistantMessages.length === 0) return false if (assistantMessages.length === 0) return false
const pattern = buildPromisePattern(options.promise) const pattern = buildPromisePattern(options.promise)
const recentAssistants = assistantMessages.slice(-3) for (let index = assistantMessages.length - 1; index >= 0; index -= 1) {
for (const assistant of recentAssistants) { const assistant = assistantMessages[index]
if (!assistant.parts) continue if (!assistant.parts) continue
let responseText = "" let responseText = ""

View File

@ -494,6 +494,7 @@ describe("ralph-loop", () => {
config: { config: {
enabled: true, enabled: true,
default_max_iterations: 200, default_max_iterations: 200,
default_strategy: "continue",
}, },
}) })
@ -708,6 +709,57 @@ describe("ralph-loop", () => {
expect(promptCalls[0].text).toContain("<promise>CALCULATOR_DONE</promise>") expect(promptCalls[0].text).toContain("<promise>CALCULATOR_DONE</promise>")
}) })
test("should skip concurrent idle events for same session when handler is in flight", async () => {
// given - active loop with delayed prompt injection
let releasePromptAsync: (() => void) | undefined
const promptAsyncBlocked = new Promise<void>((resolve) => {
releasePromptAsync = resolve
})
let firstPromptStartedResolve: (() => void) | undefined
const firstPromptStarted = new Promise<void>((resolve) => {
firstPromptStartedResolve = resolve
})
const mockInput = createMockPluginInput() as {
client: {
session: {
promptAsync: (opts: { path: { id: string }; body: { parts: Array<{ type: string; text: string }> } }) => Promise<unknown>
}
}
}
const originalPromptAsync = mockInput.client.session.promptAsync
let promptAsyncCalls = 0
mockInput.client.session.promptAsync = async (opts) => {
promptAsyncCalls += 1
if (promptAsyncCalls === 1) {
firstPromptStartedResolve?.()
}
await promptAsyncBlocked
return originalPromptAsync(opts)
}
const hook = createRalphLoopHook(mockInput as Parameters<typeof createRalphLoopHook>[0])
hook.startLoop("session-123", "Build feature", { maxIterations: 10 })
// when - second idle arrives while first idle processing is still in flight
const firstIdle = hook.event({
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
await firstPromptStarted
const secondIdle = hook.event({
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
releasePromptAsync?.()
await Promise.all([firstIdle, secondIdle])
// then - only one continuation should be injected
expect(promptAsyncCalls).toBe(1)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
})
test("should clear loop state on user abort (MessageAbortedError)", async () => { test("should clear loop state on user abort (MessageAbortedError)", async () => {
// given - active loop // given - active loop
const hook = createRalphLoopHook(createMockPluginInput()) const hook = createRalphLoopHook(createMockPluginInput())
@ -782,8 +834,8 @@ describe("ralph-loop", () => {
expect(hook.getState()).toBeNull() expect(hook.getState()).toBeNull()
}) })
test("should NOT detect completion if promise is older than last 3 assistant messages", async () => { test("should detect completion even when promise is older than previous narrow window", async () => {
// given - promise appears in an assistant message older than last 3 // given - promise appears in an older assistant message with additional assistant output after it
mockSessionMessages = [ mockSessionMessages = [
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early <promise>DONE</promise>" }] }, { info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early <promise>DONE</promise>" }] },
@ -801,9 +853,40 @@ describe("ralph-loop", () => {
event: { type: "session.idle", properties: { sessionID: "session-123" } }, event: { type: "session.idle", properties: { sessionID: "session-123" } },
}) })
// then - loop should continue (promise is older than last 3 assistant messages) // then - loop should complete because all assistant messages are scanned
expect(promptCalls.length).toBe(1) expect(promptCalls.length).toBe(0)
expect(hook.getState()?.iteration).toBe(2) expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
expect(hook.getState()).toBeNull()
})
test("should detect completion when many assistant messages are emitted after promise", async () => {
// given - completion promise followed by long assistant output sequence
mockSessionMessages = [
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done now <promise>DONE</promise>" }] },
]
for (let index = 1; index <= 25; index += 1) {
mockSessionMessages.push({
info: { role: "assistant" },
parts: [{ type: "text", text: `Post-completion assistant output ${index}` }],
})
}
const hook = createRalphLoopHook(createMockPluginInput(), {
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
})
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
// when - session goes idle
await hook.event({
event: { type: "session.idle", properties: { sessionID: "session-123" } },
})
// then - loop should complete despite large trailing output
expect(promptCalls.length).toBe(0)
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
expect(hook.getState()).toBeNull()
}) })
test("should allow starting new loop while previous loop is active (different session)", async () => { test("should allow starting new loop while previous loop is active (different session)", async () => {

View File

@ -33,15 +33,6 @@ export async function continueIteration(
return return
} }
const boundState = options.loopState.setSessionID(newSessionID)
if (!boundState) {
log(`[${HOOK_NAME}] Failed to bind loop state to new session`, {
previousSessionID: options.previousSessionID,
newSessionID,
})
return
}
await injectContinuationPrompt(ctx, { await injectContinuationPrompt(ctx, {
sessionID: newSessionID, sessionID: newSessionID,
inheritFromSessionID: options.previousSessionID, inheritFromSessionID: options.previousSessionID,
@ -51,6 +42,16 @@ export async function continueIteration(
}) })
await selectSessionInTui(ctx.client, newSessionID) await selectSessionInTui(ctx.client, newSessionID)
const boundState = options.loopState.setSessionID(newSessionID)
if (!boundState) {
log(`[${HOOK_NAME}] Failed to bind loop state to new session`, {
previousSessionID: options.previousSessionID,
newSessionID,
})
return
}
return return
} }

View File

@ -25,6 +25,8 @@ export function createRalphLoopEventHandler(
ctx: PluginInput, ctx: PluginInput,
options: RalphLoopEventHandlerOptions, options: RalphLoopEventHandlerOptions,
) { ) {
const inFlightSessions = new Set<string>()
return async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => { return async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
const props = event.properties as Record<string, unknown> | undefined const props = event.properties as Record<string, unknown> | undefined
@ -32,115 +34,127 @@ export function createRalphLoopEventHandler(
const sessionID = props?.sessionID as string | undefined const sessionID = props?.sessionID as string | undefined
if (!sessionID) return if (!sessionID) return
if (options.sessionRecovery.isRecovering(sessionID)) { if (inFlightSessions.has(sessionID)) {
log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID }) log(`[${HOOK_NAME}] Skipped: handler in flight`, { sessionID })
return return
} }
const state = options.loopState.getState() inFlightSessions.add(sessionID)
if (!state || !state.active) {
return
}
if (state.session_id && state.session_id !== sessionID) {
if (options.checkSessionExists) {
try {
const exists = await options.checkSessionExists(state.session_id)
if (!exists) {
options.loopState.clear()
log(`[${HOOK_NAME}] Cleared orphaned state from deleted session`, {
orphanedSessionId: state.session_id,
currentSessionId: sessionID,
})
return
}
} catch (err) {
log(`[${HOOK_NAME}] Failed to check session existence`, {
sessionId: state.session_id,
error: String(err),
})
}
}
return
}
const transcriptPath = options.getTranscriptPath(sessionID)
const completionViaTranscript = detectCompletionInTranscript(transcriptPath, state.completion_promise)
const completionViaApi = completionViaTranscript
? false
: await detectCompletionInSessionMessages(ctx, {
sessionID,
promise: state.completion_promise,
apiTimeoutMs: options.apiTimeoutMs,
directory: options.directory,
})
if (completionViaTranscript || completionViaApi) {
log(`[${HOOK_NAME}] Completion detected!`, {
sessionID,
iteration: state.iteration,
promise: state.completion_promise,
detectedVia: completionViaTranscript
? "transcript_file"
: "session_messages_api",
})
options.loopState.clear()
const title = state.ultrawork ? "ULTRAWORK LOOP COMPLETE!" : "Ralph Loop Complete!"
const message = state.ultrawork ? `JUST ULW ULW! Task completed after ${state.iteration} iteration(s)` : `Task completed after ${state.iteration} iteration(s)`
await ctx.client.tui?.showToast?.({ body: { title, message, variant: "success", duration: 5000 } }).catch(() => {})
return
}
if (state.iteration >= state.max_iterations) {
log(`[${HOOK_NAME}] Max iterations reached`, {
sessionID,
iteration: state.iteration,
max: state.max_iterations,
})
options.loopState.clear()
await ctx.client.tui?.showToast?.({
body: { title: "Ralph Loop Stopped", message: `Max iterations (${state.max_iterations}) reached without completion`, variant: "warning", duration: 5000 },
}).catch(() => {})
return
}
const newState = options.loopState.incrementIteration()
if (!newState) {
log(`[${HOOK_NAME}] Failed to increment iteration`, { sessionID })
return
}
log(`[${HOOK_NAME}] Continuing loop`, {
sessionID,
iteration: newState.iteration,
max: newState.max_iterations,
})
await ctx.client.tui?.showToast?.({
body: {
title: "Ralph Loop",
message: `Iteration ${newState.iteration}/${newState.max_iterations}`,
variant: "info",
duration: 2000,
},
}).catch(() => {})
try { try {
await continueIteration(ctx, newState, {
previousSessionID: sessionID, if (options.sessionRecovery.isRecovering(sessionID)) {
directory: options.directory, log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID })
apiTimeoutMs: options.apiTimeoutMs, return
loopState: options.loopState, }
})
} catch (err) { const state = options.loopState.getState()
log(`[${HOOK_NAME}] Failed to inject continuation`, { if (!state || !state.active) {
return
}
if (state.session_id && state.session_id !== sessionID) {
if (options.checkSessionExists) {
try {
const exists = await options.checkSessionExists(state.session_id)
if (!exists) {
options.loopState.clear()
log(`[${HOOK_NAME}] Cleared orphaned state from deleted session`, {
orphanedSessionId: state.session_id,
currentSessionId: sessionID,
})
return
}
} catch (err) {
log(`[${HOOK_NAME}] Failed to check session existence`, {
sessionId: state.session_id,
error: String(err),
})
}
}
return
}
const transcriptPath = options.getTranscriptPath(sessionID)
const completionViaTranscript = detectCompletionInTranscript(transcriptPath, state.completion_promise)
const completionViaApi = completionViaTranscript
? false
: await detectCompletionInSessionMessages(ctx, {
sessionID,
promise: state.completion_promise,
apiTimeoutMs: options.apiTimeoutMs,
directory: options.directory,
})
if (completionViaTranscript || completionViaApi) {
log(`[${HOOK_NAME}] Completion detected!`, {
sessionID,
iteration: state.iteration,
promise: state.completion_promise,
detectedVia: completionViaTranscript
? "transcript_file"
: "session_messages_api",
})
options.loopState.clear()
const title = state.ultrawork ? "ULTRAWORK LOOP COMPLETE!" : "Ralph Loop Complete!"
const message = state.ultrawork ? `JUST ULW ULW! Task completed after ${state.iteration} iteration(s)` : `Task completed after ${state.iteration} iteration(s)`
await ctx.client.tui?.showToast?.({ body: { title, message, variant: "success", duration: 5000 } }).catch(() => {})
return
}
if (state.iteration >= state.max_iterations) {
log(`[${HOOK_NAME}] Max iterations reached`, {
sessionID,
iteration: state.iteration,
max: state.max_iterations,
})
options.loopState.clear()
await ctx.client.tui?.showToast?.({
body: { title: "Ralph Loop Stopped", message: `Max iterations (${state.max_iterations}) reached without completion`, variant: "warning", duration: 5000 },
}).catch(() => {})
return
}
const newState = options.loopState.incrementIteration()
if (!newState) {
log(`[${HOOK_NAME}] Failed to increment iteration`, { sessionID })
return
}
log(`[${HOOK_NAME}] Continuing loop`, {
sessionID, sessionID,
error: String(err), iteration: newState.iteration,
max: newState.max_iterations,
}) })
await ctx.client.tui?.showToast?.({
body: {
title: "Ralph Loop",
message: `Iteration ${newState.iteration}/${newState.max_iterations}`,
variant: "info",
duration: 2000,
},
}).catch(() => {})
try {
await continueIteration(ctx, newState, {
previousSessionID: sessionID,
directory: options.directory,
apiTimeoutMs: options.apiTimeoutMs,
loopState: options.loopState,
})
} catch (err) {
log(`[${HOOK_NAME}] Failed to inject continuation`, {
sessionID,
error: String(err),
})
}
return
} finally {
inFlightSessions.delete(sessionID)
} }
return
} }
if (event.type === "session.deleted") { if (event.type === "session.deleted") {

View File

@ -0,0 +1,111 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test"
import { createRalphLoopHook } from "./index"
function createDeferred(): {
promise: Promise<void>
resolve: () => void
} {
let resolvePromise: (() => void) | null = null
const promise = new Promise<void>((resolve) => {
resolvePromise = resolve
})
return {
promise,
resolve: () => {
if (resolvePromise) {
resolvePromise()
}
},
}
}
async function waitUntil(condition: () => boolean): Promise<void> {
for (let index = 0; index < 100; index++) {
if (condition()) {
return
}
await new Promise<void>((resolve) => {
setTimeout(resolve, 0)
})
}
throw new Error("Condition was not met in time")
}
describe("ralph-loop reset strategy race condition", () => {
test("should skip duplicate idle while reset iteration handling is in flight", async () => {
// given - reset strategy loop with blocked TUI session switch
const promptCalls: Array<{ sessionID: string; text: string }> = []
const createSessionCalls: Array<{ parentID?: string }> = []
let selectSessionCalls = 0
const selectSessionDeferred = createDeferred()
const hook = createRalphLoopHook({
directory: process.cwd(),
client: {
session: {
prompt: async (options: {
path: { id: string }
body: { parts: Array<{ type: string; text: string }> }
}) => {
promptCalls.push({
sessionID: options.path.id,
text: options.body.parts[0].text,
})
return {}
},
promptAsync: async (options: {
path: { id: string }
body: { parts: Array<{ type: string; text: string }> }
}) => {
promptCalls.push({
sessionID: options.path.id,
text: options.body.parts[0].text,
})
return {}
},
create: async (options: {
body: { parentID?: string; title?: string }
query?: { directory?: string }
}) => {
createSessionCalls.push({ parentID: options.body.parentID })
return { data: { id: `new-session-${createSessionCalls.length}` } }
},
messages: async () => ({ data: [] }),
},
tui: {
showToast: async () => ({}),
selectSession: async () => {
selectSessionCalls += 1
await selectSessionDeferred.promise
return {}
},
},
},
} as unknown as Parameters<typeof createRalphLoopHook>[0])
hook.startLoop("session-old", "Build feature", { strategy: "reset" })
// when - first idle is in-flight and old session fires idle again before TUI switch resolves
const firstIdleEvent = hook.event({
event: { type: "session.idle", properties: { sessionID: "session-old" } },
})
await waitUntil(() => selectSessionCalls > 0)
const secondIdleEvent = hook.event({
event: { type: "session.idle", properties: { sessionID: "session-old" } },
})
selectSessionDeferred.resolve()
await Promise.all([firstIdleEvent, secondIdleEvent])
// then - duplicate idle should be skipped to prevent concurrent continuation injection
expect(createSessionCalls.length).toBe(1)
expect(promptCalls.length).toBe(1)
expect(hook.getState()?.iteration).toBe(2)
})
})

View File

@ -125,7 +125,7 @@ describe("runtime-fallback", () => {
await hook.event({ await hook.event({
event: { event: {
type: "session.created", type: "session.created",
properties: { info: { id: sessionID, model: "google/gemini-3-pro" } }, properties: { info: { id: sessionID, model: "google/gemini-3.1-pro" } },
}, },
}) })
@ -1841,7 +1841,7 @@ describe("runtime-fallback", () => {
test("should apply fallback model on next chat.message after error", async () => { test("should apply fallback model on next chat.message after error", async () => {
const hook = createRuntimeFallbackHook(createMockPluginInput(), { const hook = createRuntimeFallbackHook(createMockPluginInput(), {
config: createMockConfig({ notify_on_fallback: false }), config: createMockConfig({ notify_on_fallback: false }),
pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3-pro"]), pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3.1-pro"]),
}) })
const sessionID = "test-session-switch" const sessionID = "test-session-switch"
SessionCategoryRegistry.register(sessionID, "test") SessionCategoryRegistry.register(sessionID, "test")
@ -1916,7 +1916,7 @@ describe("runtime-fallback", () => {
const input = createMockPluginInput() const input = createMockPluginInput()
const hook = createRuntimeFallbackHook(input, { const hook = createRuntimeFallbackHook(input, {
config: createMockConfig({ notify_on_fallback: false }), config: createMockConfig({ notify_on_fallback: false }),
pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3-pro"]), pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3.1-pro"]),
}) })
const sessionID = "test-agent-fallback" const sessionID = "test-agent-fallback"

View File

@ -3,6 +3,7 @@ const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:te
const { createSessionNotification } = require("./session-notification") const { createSessionNotification } = require("./session-notification")
const { setMainSession, subagentSessions, _resetForTesting } = require("../features/claude-code-session-state") const { setMainSession, subagentSessions, _resetForTesting } = require("../features/claude-code-session-state")
const utils = require("./session-notification-utils") const utils = require("./session-notification-utils")
const sender = require("./session-notification-sender")
describe("session-notification input-needed events", () => { describe("session-notification input-needed events", () => {
let notificationCalls: string[] let notificationCalls: string[]
@ -37,6 +38,10 @@ describe("session-notification input-needed events", () => {
spyOn(utils, "getNotifySendPath").mockResolvedValue("/usr/bin/notify-send") spyOn(utils, "getNotifySendPath").mockResolvedValue("/usr/bin/notify-send")
spyOn(utils, "getPowershellPath").mockResolvedValue("powershell") spyOn(utils, "getPowershellPath").mockResolvedValue("powershell")
spyOn(utils, "startBackgroundCheck").mockImplementation(() => {}) spyOn(utils, "startBackgroundCheck").mockImplementation(() => {})
spyOn(sender, "detectPlatform").mockReturnValue("darwin")
spyOn(sender, "sendSessionNotification").mockImplementation(async (_ctx: unknown, _platform: unknown, _title: unknown, message: string) => {
notificationCalls.push(message)
})
}) })
afterEach(() => { afterEach(() => {
@ -47,7 +52,7 @@ describe("session-notification input-needed events", () => {
test("sends question notification when question tool asks for input", async () => { test("sends question notification when question tool asks for input", async () => {
const sessionID = "main-question" const sessionID = "main-question"
setMainSession(sessionID) setMainSession(sessionID)
const hook = createSessionNotification(createMockPluginInput()) const hook = createSessionNotification(createMockPluginInput(), { enforceMainSessionFilter: false })
await hook({ await hook({
event: { event: {
@ -74,7 +79,7 @@ describe("session-notification input-needed events", () => {
test("sends permission notification for permission events", async () => { test("sends permission notification for permission events", async () => {
const sessionID = "main-permission" const sessionID = "main-permission"
setMainSession(sessionID) setMainSession(sessionID)
const hook = createSessionNotification(createMockPluginInput()) const hook = createSessionNotification(createMockPluginInput(), { enforceMainSessionFilter: false })
await hook({ await hook({
event: { event: {

View File

@ -1,8 +1,9 @@
import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test" const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:test")
import { createSessionNotification } from "./session-notification" import { createSessionNotification } from "./session-notification"
import { setMainSession, subagentSessions, _resetForTesting } from "../features/claude-code-session-state" import { setMainSession, subagentSessions, _resetForTesting } from "../features/claude-code-session-state"
import * as utils from "./session-notification-utils" import * as utils from "./session-notification-utils"
import * as sender from "./session-notification-sender"
describe("session-notification", () => { describe("session-notification", () => {
let notificationCalls: string[] let notificationCalls: string[]
@ -40,6 +41,10 @@ describe("session-notification", () => {
spyOn(utils, "getPaplayPath").mockResolvedValue("/usr/bin/paplay") spyOn(utils, "getPaplayPath").mockResolvedValue("/usr/bin/paplay")
spyOn(utils, "getAplayPath").mockResolvedValue("/usr/bin/aplay") spyOn(utils, "getAplayPath").mockResolvedValue("/usr/bin/aplay")
spyOn(utils, "startBackgroundCheck").mockImplementation(() => {}) spyOn(utils, "startBackgroundCheck").mockImplementation(() => {})
spyOn(sender, "detectPlatform").mockReturnValue("darwin")
spyOn(sender, "sendSessionNotification").mockImplementation(async (_ctx, _platform, _title, message) => {
notificationCalls.push(message)
})
}) })
afterEach(() => { afterEach(() => {
@ -105,6 +110,7 @@ describe("session-notification", () => {
const hook = createSessionNotification(createMockPluginInput(), { const hook = createSessionNotification(createMockPluginInput(), {
idleConfirmationDelay: 10, idleConfirmationDelay: 10,
skipIfIncompleteTodos: false, skipIfIncompleteTodos: false,
enforceMainSessionFilter: false,
}) })
// when - main session goes idle // when - main session goes idle
@ -332,6 +338,7 @@ describe("session-notification", () => {
const hook = createSessionNotification(createMockPluginInput(), { const hook = createSessionNotification(createMockPluginInput(), {
idleConfirmationDelay: 10, idleConfirmationDelay: 10,
skipIfIncompleteTodos: false, skipIfIncompleteTodos: false,
enforceMainSessionFilter: false,
}) })
// when - session goes idle twice // when - session goes idle twice

View File

@ -4,11 +4,9 @@ import {
startBackgroundCheck, startBackgroundCheck,
} from "./session-notification-utils" } from "./session-notification-utils"
import { import {
detectPlatform, type Platform,
getDefaultSoundPath,
playSessionNotificationSound,
sendSessionNotification,
} from "./session-notification-sender" } from "./session-notification-sender"
import * as sessionNotificationSender from "./session-notification-sender"
import { hasIncompleteTodos } from "./session-todo-status" import { hasIncompleteTodos } from "./session-todo-status"
import { createIdleNotificationScheduler } from "./session-notification-scheduler" import { createIdleNotificationScheduler } from "./session-notification-scheduler"
@ -25,13 +23,14 @@ interface SessionNotificationConfig {
skipIfIncompleteTodos?: boolean skipIfIncompleteTodos?: boolean
/** Maximum number of sessions to track before cleanup (default: 100) */ /** Maximum number of sessions to track before cleanup (default: 100) */
maxTrackedSessions?: number maxTrackedSessions?: number
enforceMainSessionFilter?: boolean
} }
export function createSessionNotification( export function createSessionNotification(
ctx: PluginInput, ctx: PluginInput,
config: SessionNotificationConfig = {} config: SessionNotificationConfig = {}
) { ) {
const currentPlatform = detectPlatform() const currentPlatform: Platform = sessionNotificationSender.detectPlatform()
const defaultSoundPath = getDefaultSoundPath(currentPlatform) const defaultSoundPath = sessionNotificationSender.getDefaultSoundPath(currentPlatform)
startBackgroundCheck(currentPlatform) startBackgroundCheck(currentPlatform)
@ -45,6 +44,7 @@ export function createSessionNotification(
idleConfirmationDelay: 1500, idleConfirmationDelay: 1500,
skipIfIncompleteTodos: true, skipIfIncompleteTodos: true,
maxTrackedSessions: 100, maxTrackedSessions: 100,
enforceMainSessionFilter: true,
...config, ...config,
} }
@ -53,8 +53,8 @@ export function createSessionNotification(
platform: currentPlatform, platform: currentPlatform,
config: mergedConfig, config: mergedConfig,
hasIncompleteTodos, hasIncompleteTodos,
send: sendSessionNotification, send: sessionNotificationSender.sendSessionNotification,
playSound: playSessionNotificationSound, playSound: sessionNotificationSender.playSessionNotificationSound,
}) })
const QUESTION_TOOLS = new Set(["question", "ask_user_question", "askuserquestion"]) const QUESTION_TOOLS = new Set(["question", "ask_user_question", "askuserquestion"])
@ -81,8 +81,10 @@ export function createSessionNotification(
const shouldNotifyForSession = (sessionID: string): boolean => { const shouldNotifyForSession = (sessionID: string): boolean => {
if (subagentSessions.has(sessionID)) return false if (subagentSessions.has(sessionID)) return false
const mainSessionID = getMainSessionID() if (mergedConfig.enforceMainSessionFilter) {
if (mainSessionID && sessionID !== mainSessionID) return false const mainSessionID = getMainSessionID()
if (mainSessionID && sessionID !== mainSessionID) return false
}
return true return true
} }
@ -146,9 +148,14 @@ export function createSessionNotification(
if (!shouldNotifyForSession(sessionID)) return if (!shouldNotifyForSession(sessionID)) return
scheduler.markSessionActivity(sessionID) scheduler.markSessionActivity(sessionID)
await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, mergedConfig.permissionMessage) await sessionNotificationSender.sendSessionNotification(
ctx,
currentPlatform,
mergedConfig.title,
mergedConfig.permissionMessage,
)
if (mergedConfig.playSound && mergedConfig.soundPath) { if (mergedConfig.playSound && mergedConfig.soundPath) {
await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath) await sessionNotificationSender.playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
} }
return return
} }
@ -168,9 +175,9 @@ export function createSessionNotification(
? mergedConfig.permissionMessage ? mergedConfig.permissionMessage
: mergedConfig.questionMessage : mergedConfig.questionMessage
await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message) await sessionNotificationSender.sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message)
if (mergedConfig.playSound && mergedConfig.soundPath) { if (mergedConfig.playSound && mergedConfig.soundPath) {
await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath) await sessionNotificationSender.playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
} }
} }
} }

View File

@ -7,9 +7,12 @@ import { createStartWorkHook } from "./index"
import { import {
writeBoulderState, writeBoulderState,
clearBoulderState, clearBoulderState,
readBoulderState,
} from "../../features/boulder-state" } from "../../features/boulder-state"
import type { BoulderState } from "../../features/boulder-state" import type { BoulderState } from "../../features/boulder-state"
import * as sessionState from "../../features/claude-code-session-state" import * as sessionState from "../../features/claude-code-session-state"
import * as worktreeDetector from "./worktree-detector"
import * as worktreeDetector from "./worktree-detector"
describe("start-work hook", () => { describe("start-work hook", () => {
let testDir: string let testDir: string
@ -402,4 +405,152 @@ describe("start-work hook", () => {
updateSpy.mockRestore() updateSpy.mockRestore()
}) })
}) })
describe("worktree support", () => {
let detectSpy: ReturnType<typeof spyOn>
beforeEach(() => {
detectSpy = spyOn(worktreeDetector, "detectWorktreePath").mockReturnValue(null)
})
afterEach(() => {
detectSpy.mockRestore()
})
test("should inject model-decides instructions when no --worktree flag", async () => {
// given - single plan, no worktree flag
const plansDir = join(testDir, ".sisyphus", "plans")
mkdirSync(plansDir, { recursive: true })
writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context></session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-123" }, output)
// then - model-decides instructions should appear
expect(output.parts[0].text).toContain("Worktree Setup Required")
expect(output.parts[0].text).toContain("git worktree list --porcelain")
expect(output.parts[0].text).toContain("git worktree add")
})
test("should inject worktree path when --worktree flag is valid", async () => {
// given - single plan + valid worktree path
const plansDir = join(testDir, ".sisyphus", "plans")
mkdirSync(plansDir, { recursive: true })
writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
detectSpy.mockReturnValue("/validated/worktree")
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /validated/worktree</user-request>\n</session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-123" }, output)
// then - validated path shown, no model-decides instructions
expect(output.parts[0].text).toContain("**Worktree**: /validated/worktree")
expect(output.parts[0].text).not.toContain("Worktree Setup Required")
})
test("should store worktree_path in boulder when --worktree is valid", async () => {
// given - plan + valid worktree
const plansDir = join(testDir, ".sisyphus", "plans")
mkdirSync(plansDir, { recursive: true })
writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
detectSpy.mockReturnValue("/valid/wt")
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /valid/wt</user-request>\n</session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-123" }, output)
// then - boulder.json has worktree_path
const state = readBoulderState(testDir)
expect(state?.worktree_path).toBe("/valid/wt")
})
test("should NOT store worktree_path when --worktree path is invalid", async () => {
// given - plan + invalid worktree path (detectWorktreePath returns null)
const plansDir = join(testDir, ".sisyphus", "plans")
mkdirSync(plansDir, { recursive: true })
writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
// detectSpy already returns null by default
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /nonexistent/wt</user-request>\n</session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-123" }, output)
// then - worktree_path absent, setup instructions present
const state = readBoulderState(testDir)
expect(state?.worktree_path).toBeUndefined()
expect(output.parts[0].text).toContain("needs setup")
expect(output.parts[0].text).toContain("git worktree add /nonexistent/wt")
})
test("should update boulder worktree_path on resume when new --worktree given", async () => {
// given - existing boulder with old worktree, user provides new worktree
const planPath = join(testDir, "plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
const existingState: BoulderState = {
active_plan: planPath,
started_at: "2026-01-01T00:00:00Z",
session_ids: ["old-session"],
plan_name: "plan",
worktree_path: "/old/wt",
}
writeBoulderState(testDir, existingState)
detectSpy.mockReturnValue("/new/wt")
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /new/wt</user-request>\n</session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-456" }, output)
// then - boulder reflects updated worktree and new session appended
const state = readBoulderState(testDir)
expect(state?.worktree_path).toBe("/new/wt")
expect(state?.session_ids).toContain("session-456")
})
test("should show existing worktree on resume when no --worktree flag", async () => {
// given - existing boulder already has worktree_path, no flag given
const planPath = join(testDir, "plan.md")
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
const existingState: BoulderState = {
active_plan: planPath,
started_at: "2026-01-01T00:00:00Z",
session_ids: ["old-session"],
plan_name: "plan",
worktree_path: "/existing/wt",
}
writeBoulderState(testDir, existingState)
const hook = createStartWorkHook(createMockPluginInput())
const output = {
parts: [{ type: "text", text: "<session-context></session-context>" }],
}
// when
await hook["chat.message"]({ sessionID: "session-789" }, output)
// then - shows existing worktree, no model-decides instructions
expect(output.parts[0].text).toContain("/existing/wt")
expect(output.parts[0].text).not.toContain("Worktree Setup Required")
})
})
}) })

View File

@ -1 +1,4 @@
export { HOOK_NAME, createStartWorkHook } from "./start-work-hook" export { HOOK_NAME, createStartWorkHook } from "./start-work-hook"
export { detectWorktreePath } from "./worktree-detector"
export type { ParsedUserRequest } from "./parse-user-request"
export { parseUserRequest } from "./parse-user-request"

View File

@ -0,0 +1,78 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test"
import { parseUserRequest } from "./parse-user-request"
describe("parseUserRequest", () => {
describe("when no user-request tag", () => {
test("#given prompt without tag #when parsing #then returns nulls", () => {
const result = parseUserRequest("Just a regular message without any tags")
expect(result.planName).toBeNull()
expect(result.explicitWorktreePath).toBeNull()
})
})
describe("when user-request tag is empty", () => {
test("#given empty user-request tag #when parsing #then returns nulls", () => {
const result = parseUserRequest("<user-request> </user-request>")
expect(result.planName).toBeNull()
expect(result.explicitWorktreePath).toBeNull()
})
})
describe("when only plan name given", () => {
test("#given plan name without worktree flag #when parsing #then returns plan name with null worktree", () => {
const result = parseUserRequest("<session-context>\n<user-request>my-plan</user-request>\n</session-context>")
expect(result.planName).toBe("my-plan")
expect(result.explicitWorktreePath).toBeNull()
})
})
describe("when only --worktree flag given", () => {
test("#given --worktree with path only #when parsing #then returns worktree path with null plan", () => {
const result = parseUserRequest("<user-request>--worktree /home/user/repo-feat</user-request>")
expect(result.planName).toBeNull()
expect(result.explicitWorktreePath).toBe("/home/user/repo-feat")
})
})
describe("when plan name and --worktree are both given", () => {
test("#given plan name before --worktree #when parsing #then returns both", () => {
const result = parseUserRequest("<user-request>my-plan --worktree /path/to/worktree</user-request>")
expect(result.planName).toBe("my-plan")
expect(result.explicitWorktreePath).toBe("/path/to/worktree")
})
test("#given --worktree before plan name #when parsing #then returns both", () => {
const result = parseUserRequest("<user-request>--worktree /path/to/worktree my-plan</user-request>")
expect(result.planName).toBe("my-plan")
expect(result.explicitWorktreePath).toBe("/path/to/worktree")
})
})
describe("when --worktree flag has no path", () => {
test("#given --worktree without path #when parsing #then worktree path is null", () => {
const result = parseUserRequest("<user-request>--worktree</user-request>")
expect(result.explicitWorktreePath).toBeNull()
})
})
describe("when ultrawork keywords are present", () => {
test("#given plan name with ultrawork keyword #when parsing #then strips keyword from plan name", () => {
const result = parseUserRequest("<user-request>my-plan ultrawork</user-request>")
expect(result.planName).toBe("my-plan")
})
test("#given plan name with ulw keyword and worktree #when parsing #then strips ulw, preserves worktree", () => {
const result = parseUserRequest("<user-request>my-plan ulw --worktree /path/to/wt</user-request>")
expect(result.planName).toBe("my-plan")
expect(result.explicitWorktreePath).toBe("/path/to/wt")
})
test("#given only ultrawork keyword with worktree #when parsing #then plan name is null, worktree preserved", () => {
const result = parseUserRequest("<user-request>ultrawork --worktree /wt</user-request>")
expect(result.planName).toBeNull()
expect(result.explicitWorktreePath).toBe("/wt")
})
})
})

View File

@ -0,0 +1,29 @@
const KEYWORD_PATTERN = /\b(ultrawork|ulw)\b/gi
const WORKTREE_FLAG_PATTERN = /--worktree(?:\s+(\S+))?/
export interface ParsedUserRequest {
planName: string | null
explicitWorktreePath: string | null
}
export function parseUserRequest(promptText: string): ParsedUserRequest {
const match = promptText.match(/<user-request>\s*([\s\S]*?)\s*<\/user-request>/i)
if (!match) return { planName: null, explicitWorktreePath: null }
let rawArg = match[1].trim()
if (!rawArg) return { planName: null, explicitWorktreePath: null }
const worktreeMatch = rawArg.match(WORKTREE_FLAG_PATTERN)
const explicitWorktreePath = worktreeMatch ? (worktreeMatch[1] ?? null) : null
if (worktreeMatch) {
rawArg = rawArg.replace(worktreeMatch[0], "").trim()
}
const cleanedArg = rawArg.replace(KEYWORD_PATTERN, "").trim()
return {
planName: cleanedArg || null,
explicitWorktreePath,
}
}

View File

@ -1,3 +1,4 @@
import { statSync } from "node:fs"
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin"
import { import {
readBoulderState, readBoulderState,
@ -11,11 +12,11 @@ import {
} from "../../features/boulder-state" } from "../../features/boulder-state"
import { log } from "../../shared/logger" import { log } from "../../shared/logger"
import { updateSessionAgent } from "../../features/claude-code-session-state" import { updateSessionAgent } from "../../features/claude-code-session-state"
import { detectWorktreePath } from "./worktree-detector"
import { parseUserRequest } from "./parse-user-request"
export const HOOK_NAME = "start-work" as const export const HOOK_NAME = "start-work" as const
const KEYWORD_PATTERN = /\b(ultrawork|ulw)\b/gi
interface StartWorkHookInput { interface StartWorkHookInput {
sessionID: string sessionID: string
messageID?: string messageID?: string
@ -25,73 +26,76 @@ interface StartWorkHookOutput {
parts: Array<{ type: string; text?: string }> parts: Array<{ type: string; text?: string }>
} }
function extractUserRequestPlanName(promptText: string): string | null {
const userRequestMatch = promptText.match(/<user-request>\s*([\s\S]*?)\s*<\/user-request>/i)
if (!userRequestMatch) return null
const rawArg = userRequestMatch[1].trim()
if (!rawArg) return null
const cleanedArg = rawArg.replace(KEYWORD_PATTERN, "").trim()
return cleanedArg || null
}
function findPlanByName(plans: string[], requestedName: string): string | null { function findPlanByName(plans: string[], requestedName: string): string | null {
const lowerName = requestedName.toLowerCase() const lowerName = requestedName.toLowerCase()
const exactMatch = plans.find((p) => getPlanName(p).toLowerCase() === lowerName)
const exactMatch = plans.find(p => getPlanName(p).toLowerCase() === lowerName)
if (exactMatch) return exactMatch if (exactMatch) return exactMatch
const partialMatch = plans.find((p) => getPlanName(p).toLowerCase().includes(lowerName))
const partialMatch = plans.find(p => getPlanName(p).toLowerCase().includes(lowerName))
return partialMatch || null return partialMatch || null
} }
const MODEL_DECIDES_WORKTREE_BLOCK = `
## Worktree Setup Required
No worktree specified. Before starting work, you MUST choose or create one:
1. \`git worktree list --porcelain\` — list existing worktrees
2. Create if needed: \`git worktree add <absolute-path> <branch-or-HEAD>\`
3. Update \`.sisyphus/boulder.json\` — add \`"worktree_path": "<absolute-path>"\`
4. Work exclusively inside that worktree directory`
function resolveWorktreeContext(
explicitWorktreePath: string | null,
): { worktreePath: string | undefined; block: string } {
if (explicitWorktreePath === null) {
return { worktreePath: undefined, block: MODEL_DECIDES_WORKTREE_BLOCK }
}
const validatedPath = detectWorktreePath(explicitWorktreePath)
if (validatedPath) {
return { worktreePath: validatedPath, block: `\n**Worktree**: ${validatedPath}` }
}
return {
worktreePath: undefined,
block: `\n**Worktree** (needs setup): \`git worktree add ${explicitWorktreePath} <branch>\`, then add \`"worktree_path"\` to boulder.json`,
}
}
export function createStartWorkHook(ctx: PluginInput) { export function createStartWorkHook(ctx: PluginInput) {
return { return {
"chat.message": async ( "chat.message": async (input: StartWorkHookInput, output: StartWorkHookOutput): Promise<void> => {
input: StartWorkHookInput,
output: StartWorkHookOutput
): Promise<void> => {
const parts = output.parts const parts = output.parts
const promptText = parts const promptText =
?.filter((p) => p.type === "text" && p.text) parts
.map((p) => p.text) ?.filter((p) => p.type === "text" && p.text)
.join("\n") .map((p) => p.text)
.trim() || "" .join("\n")
.trim() || ""
// Only trigger on actual command execution (contains <session-context> tag) if (!promptText.includes("<session-context>")) return
// NOT on description text like "Start Sisyphus work session from Prometheus plan"
const isStartWorkCommand = promptText.includes("<session-context>")
if (!isStartWorkCommand) { log(`[${HOOK_NAME}] Processing start-work command`, { sessionID: input.sessionID })
return updateSessionAgent(input.sessionID, "atlas")
}
log(`[${HOOK_NAME}] Processing start-work command`, {
sessionID: input.sessionID,
})
updateSessionAgent(input.sessionID, "atlas") // Always switch: fixes #1298
const existingState = readBoulderState(ctx.directory) const existingState = readBoulderState(ctx.directory)
const sessionId = input.sessionID const sessionId = input.sessionID
const timestamp = new Date().toISOString() const timestamp = new Date().toISOString()
const { planName: explicitPlanName, explicitWorktreePath } = parseUserRequest(promptText)
const { worktreePath, block: worktreeBlock } = resolveWorktreeContext(explicitWorktreePath)
let contextInfo = "" let contextInfo = ""
const explicitPlanName = extractUserRequestPlanName(promptText)
if (explicitPlanName) { if (explicitPlanName) {
log(`[${HOOK_NAME}] Explicit plan name requested: ${explicitPlanName}`, { log(`[${HOOK_NAME}] Explicit plan name requested: ${explicitPlanName}`, { sessionID: input.sessionID })
sessionID: input.sessionID,
})
const allPlans = findPrometheusPlans(ctx.directory) const allPlans = findPrometheusPlans(ctx.directory)
const matchedPlan = findPlanByName(allPlans, explicitPlanName) const matchedPlan = findPlanByName(allPlans, explicitPlanName)
if (matchedPlan) { if (matchedPlan) {
const progress = getPlanProgress(matchedPlan) const progress = getPlanProgress(matchedPlan)
if (progress.isComplete) { if (progress.isComplete) {
contextInfo = ` contextInfo = `
## Plan Already Complete ## Plan Already Complete
@ -99,12 +103,10 @@ export function createStartWorkHook(ctx: PluginInput) {
The requested plan "${getPlanName(matchedPlan)}" has been completed. The requested plan "${getPlanName(matchedPlan)}" has been completed.
All ${progress.total} tasks are done. Create a new plan with: /plan "your task"` All ${progress.total} tasks are done. Create a new plan with: /plan "your task"`
} else { } else {
if (existingState) { if (existingState) clearBoulderState(ctx.directory)
clearBoulderState(ctx.directory) const newState = createBoulderState(matchedPlan, sessionId, "atlas", worktreePath)
}
const newState = createBoulderState(matchedPlan, sessionId, "atlas")
writeBoulderState(ctx.directory, newState) writeBoulderState(ctx.directory, newState)
contextInfo = ` contextInfo = `
## Auto-Selected Plan ## Auto-Selected Plan
@ -113,17 +115,20 @@ All ${progress.total} tasks are done. Create a new plan with: /plan "your task"`
**Progress**: ${progress.completed}/${progress.total} tasks **Progress**: ${progress.completed}/${progress.total} tasks
**Session ID**: ${sessionId} **Session ID**: ${sessionId}
**Started**: ${timestamp} **Started**: ${timestamp}
${worktreeBlock}
boulder.json has been created. Read the plan and begin execution.` boulder.json has been created. Read the plan and begin execution.`
} }
} else { } else {
const incompletePlans = allPlans.filter(p => !getPlanProgress(p).isComplete) const incompletePlans = allPlans.filter((p) => !getPlanProgress(p).isComplete)
if (incompletePlans.length > 0) { if (incompletePlans.length > 0) {
const planList = incompletePlans.map((p, i) => { const planList = incompletePlans
const prog = getPlanProgress(p) .map((p, i) => {
return `${i + 1}. [${getPlanName(p)}] - Progress: ${prog.completed}/${prog.total}` const prog = getPlanProgress(p)
}).join("\n") return `${i + 1}. [${getPlanName(p)}] - Progress: ${prog.completed}/${prog.total}`
})
.join("\n")
contextInfo = ` contextInfo = `
## Plan Not Found ## Plan Not Found
@ -143,9 +148,25 @@ No incomplete plans available. Create a new plan with: /plan "your task"`
} }
} else if (existingState) { } else if (existingState) {
const progress = getPlanProgress(existingState.active_plan) const progress = getPlanProgress(existingState.active_plan)
if (!progress.isComplete) { if (!progress.isComplete) {
appendSessionId(ctx.directory, sessionId) const effectiveWorktree = worktreePath ?? existingState.worktree_path
if (worktreePath !== undefined) {
const updatedSessions = existingState.session_ids.includes(sessionId)
? existingState.session_ids
: [...existingState.session_ids, sessionId]
writeBoulderState(ctx.directory, {
...existingState,
worktree_path: worktreePath,
session_ids: updatedSessions,
})
} else {
appendSessionId(ctx.directory, sessionId)
}
const worktreeDisplay = effectiveWorktree ? `\n**Worktree**: ${effectiveWorktree}` : worktreeBlock
contextInfo = ` contextInfo = `
## Active Work Session Found ## Active Work Session Found
@ -155,6 +176,7 @@ No incomplete plans available. Create a new plan with: /plan "your task"`
**Progress**: ${progress.completed}/${progress.total} tasks completed **Progress**: ${progress.completed}/${progress.total} tasks completed
**Sessions**: ${existingState.session_ids.length + 1} (current session appended) **Sessions**: ${existingState.session_ids.length + 1} (current session appended)
**Started**: ${existingState.started_at} **Started**: ${existingState.started_at}
${worktreeDisplay}
The current session (${sessionId}) has been added to session_ids. The current session (${sessionId}) has been added to session_ids.
Read the plan file and continue from the first unchecked task.` Read the plan file and continue from the first unchecked task.`
@ -167,13 +189,15 @@ Looking for new plans...`
} }
} }
if ((!existingState && !explicitPlanName) || (existingState && !explicitPlanName && getPlanProgress(existingState.active_plan).isComplete)) { if (
(!existingState && !explicitPlanName) ||
(existingState && !explicitPlanName && getPlanProgress(existingState.active_plan).isComplete)
) {
const plans = findPrometheusPlans(ctx.directory) const plans = findPrometheusPlans(ctx.directory)
const incompletePlans = plans.filter(p => !getPlanProgress(p).isComplete) const incompletePlans = plans.filter((p) => !getPlanProgress(p).isComplete)
if (plans.length === 0) { if (plans.length === 0) {
contextInfo += ` contextInfo += `
## No Plans Found ## No Plans Found
No Prometheus plan files found at .sisyphus/plans/ No Prometheus plan files found at .sisyphus/plans/
@ -187,7 +211,7 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta
} else if (incompletePlans.length === 1) { } else if (incompletePlans.length === 1) {
const planPath = incompletePlans[0] const planPath = incompletePlans[0]
const progress = getPlanProgress(planPath) const progress = getPlanProgress(planPath)
const newState = createBoulderState(planPath, sessionId, "atlas") const newState = createBoulderState(planPath, sessionId, "atlas", worktreePath)
writeBoulderState(ctx.directory, newState) writeBoulderState(ctx.directory, newState)
contextInfo += ` contextInfo += `
@ -199,15 +223,17 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta
**Progress**: ${progress.completed}/${progress.total} tasks **Progress**: ${progress.completed}/${progress.total} tasks
**Session ID**: ${sessionId} **Session ID**: ${sessionId}
**Started**: ${timestamp} **Started**: ${timestamp}
${worktreeBlock}
boulder.json has been created. Read the plan and begin execution.` boulder.json has been created. Read the plan and begin execution.`
} else { } else {
const planList = incompletePlans.map((p, i) => { const planList = incompletePlans
const progress = getPlanProgress(p) .map((p, i) => {
const stat = require("node:fs").statSync(p) const progress = getPlanProgress(p)
const modified = new Date(stat.mtimeMs).toISOString() const modified = new Date(statSync(p).mtimeMs).toISOString()
return `${i + 1}. [${getPlanName(p)}] - Modified: ${modified} - Progress: ${progress.completed}/${progress.total}` return `${i + 1}. [${getPlanName(p)}] - Modified: ${modified} - Progress: ${progress.completed}/${progress.total}`
}).join("\n") })
.join("\n")
contextInfo += ` contextInfo += `
@ -220,6 +246,7 @@ Session ID: ${sessionId}
${planList} ${planList}
Ask the user which plan to work on. Present the options above and wait for their response. Ask the user which plan to work on. Present the options above and wait for their response.
${worktreeBlock}
</system-reminder>` </system-reminder>`
} }
} }
@ -229,13 +256,14 @@ Ask the user which plan to work on. Present the options above and wait for their
output.parts[idx].text = output.parts[idx].text output.parts[idx].text = output.parts[idx].text
.replace(/\$SESSION_ID/g, sessionId) .replace(/\$SESSION_ID/g, sessionId)
.replace(/\$TIMESTAMP/g, timestamp) .replace(/\$TIMESTAMP/g, timestamp)
output.parts[idx].text += `\n\n---\n${contextInfo}` output.parts[idx].text += `\n\n---\n${contextInfo}`
} }
log(`[${HOOK_NAME}] Context injected`, { log(`[${HOOK_NAME}] Context injected`, {
sessionID: input.sessionID, sessionID: input.sessionID,
hasExistingState: !!existingState, hasExistingState: !!existingState,
worktreePath,
}) })
}, },
} }

View File

@ -0,0 +1,79 @@
/// <reference types="bun-types" />
import { describe, expect, test, spyOn, beforeEach, afterEach } from "bun:test"
import * as childProcess from "node:child_process"
import { detectWorktreePath } from "./worktree-detector"
describe("detectWorktreePath", () => {
let execFileSyncSpy: ReturnType<typeof spyOn>
beforeEach(() => {
execFileSyncSpy = spyOn(childProcess, "execFileSync").mockImplementation(
((_file: string, _args: string[]) => "") as typeof childProcess.execFileSync,
)
})
afterEach(() => {
execFileSyncSpy.mockRestore()
})
describe("when directory is a valid git worktree", () => {
test("#given valid git dir #when detecting #then returns worktree root path", () => {
execFileSyncSpy.mockImplementation(
((_file: string, _args: string[]) => "/home/user/my-repo\n") as typeof childProcess.execFileSync,
)
// when
const result = detectWorktreePath("/home/user/my-repo/src")
// then
expect(result).toBe("/home/user/my-repo")
})
test("#given git output with trailing newline #when detecting #then trims output", () => {
execFileSyncSpy.mockImplementation(
((_file: string, _args: string[]) => "/projects/worktree-a\n\n") as typeof childProcess.execFileSync,
)
const result = detectWorktreePath("/projects/worktree-a")
expect(result).toBe("/projects/worktree-a")
})
test("#given valid dir #when detecting #then calls git rev-parse with cwd", () => {
execFileSyncSpy.mockImplementation(
((_file: string, _args: string[]) => "/repo\n") as typeof childProcess.execFileSync,
)
detectWorktreePath("/repo/some/subdir")
expect(execFileSyncSpy).toHaveBeenCalledWith(
"git",
["rev-parse", "--show-toplevel"],
expect.objectContaining({ cwd: "/repo/some/subdir" }),
)
})
})
describe("when directory is not a git worktree", () => {
test("#given non-git directory #when detecting #then returns null", () => {
execFileSyncSpy.mockImplementation((_file: string, _args: string[]) => {
throw new Error("not a git repository")
})
const result = detectWorktreePath("/tmp/not-a-repo")
expect(result).toBeNull()
})
test("#given non-existent directory #when detecting #then returns null", () => {
execFileSyncSpy.mockImplementation((_file: string, _args: string[]) => {
throw new Error("ENOENT: no such file or directory")
})
const result = detectWorktreePath("/nonexistent/path")
expect(result).toBeNull()
})
})
})

View File

@ -0,0 +1,14 @@
import { execFileSync } from "node:child_process"
export function detectWorktreePath(directory: string): string | null {
try {
return execFileSync("git", ["rev-parse", "--show-toplevel"], {
cwd: directory,
encoding: "utf-8",
timeout: 5000,
stdio: ["pipe", "pipe", "pipe"],
}).trim()
} catch {
return null
}
}

View File

@ -1,4 +1,5 @@
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin"
import type { BackgroundManager } from "../../features/background-agent"
import { import {
clearContinuationMarker, clearContinuationMarker,
@ -8,6 +9,11 @@ import { log } from "../../shared/logger"
const HOOK_NAME = "stop-continuation-guard" const HOOK_NAME = "stop-continuation-guard"
type StopContinuationBackgroundManager = Pick<
BackgroundManager,
"getAllDescendantTasks" | "cancelTask"
>
export interface StopContinuationGuard { export interface StopContinuationGuard {
event: (input: { event: { type: string; properties?: unknown } }) => Promise<void> event: (input: { event: { type: string; properties?: unknown } }) => Promise<void>
"chat.message": (input: { sessionID?: string }) => Promise<void> "chat.message": (input: { sessionID?: string }) => Promise<void>
@ -17,7 +23,10 @@ export interface StopContinuationGuard {
} }
export function createStopContinuationGuardHook( export function createStopContinuationGuardHook(
ctx: PluginInput ctx: PluginInput,
options?: {
backgroundManager?: StopContinuationBackgroundManager
}
): StopContinuationGuard { ): StopContinuationGuard {
const stoppedSessions = new Set<string>() const stoppedSessions = new Set<string>()
@ -25,6 +34,38 @@ export function createStopContinuationGuardHook(
stoppedSessions.add(sessionID) stoppedSessions.add(sessionID)
setContinuationMarkerSource(ctx.directory, sessionID, "stop", "stopped", "continuation stopped") setContinuationMarkerSource(ctx.directory, sessionID, "stop", "stopped", "continuation stopped")
log(`[${HOOK_NAME}] Continuation stopped for session`, { sessionID }) log(`[${HOOK_NAME}] Continuation stopped for session`, { sessionID })
const backgroundManager = options?.backgroundManager
if (!backgroundManager) {
return
}
const cancellableTasks = backgroundManager
.getAllDescendantTasks(sessionID)
.filter((task) => task.status === "running" || task.status === "pending")
if (cancellableTasks.length === 0) {
return
}
void Promise.allSettled(
cancellableTasks.map(async (task) => {
await backgroundManager.cancelTask(task.id, {
source: "stop-continuation",
reason: "Continuation stopped via /stop-continuation",
abortSession: task.status === "running",
skipNotification: true,
})
})
).then((results) => {
const cancelledCount = results.filter((result) => result.status === "fulfilled").length
const failedCount = results.length - cancelledCount
log(`[${HOOK_NAME}] Cancelled background tasks for stopped session`, {
sessionID,
cancelledCount,
failedCount,
})
})
} }
const isStopped = (sessionID: string): boolean => { const isStopped = (sessionID: string): boolean => {

View File

@ -2,9 +2,15 @@ import { afterEach, describe, expect, test } from "bun:test"
import { mkdtempSync, rmSync } from "node:fs" import { mkdtempSync, rmSync } from "node:fs"
import { join } from "node:path" import { join } from "node:path"
import { tmpdir } from "node:os" import { tmpdir } from "node:os"
import type { BackgroundManager, BackgroundTask } from "../../features/background-agent"
import { readContinuationMarker } from "../../features/run-continuation-state" import { readContinuationMarker } from "../../features/run-continuation-state"
import { createStopContinuationGuardHook } from "./index" import { createStopContinuationGuardHook } from "./index"
type CancelCall = {
taskId: string
options?: Parameters<BackgroundManager["cancelTask"]>[1]
}
describe("stop-continuation-guard", () => { describe("stop-continuation-guard", () => {
const tempDirs: string[] = [] const tempDirs: string[] = []
@ -34,6 +40,33 @@ describe("stop-continuation-guard", () => {
} as any } as any
} }
function createBackgroundTask(status: BackgroundTask["status"], id: string): BackgroundTask {
return {
id,
status,
description: `${id} description`,
parentSessionID: "parent-session",
parentMessageID: "parent-message",
prompt: "prompt",
agent: "sisyphus-junior",
}
}
function createMockBackgroundManager(tasks: BackgroundTask[], cancelCalls: CancelCall[]): Pick<BackgroundManager, "getAllDescendantTasks" | "cancelTask"> {
return {
getAllDescendantTasks: () => tasks,
cancelTask: async (taskId: string, options?: Parameters<BackgroundManager["cancelTask"]>[1]) => {
cancelCalls.push({ taskId, options })
return true
},
}
}
async function flushMicrotasks(): Promise<void> {
await Promise.resolve()
await Promise.resolve()
}
test("should mark session as stopped", () => { test("should mark session as stopped", () => {
// given - a guard hook with no stopped sessions // given - a guard hook with no stopped sessions
const input = createMockPluginInput() const input = createMockPluginInput()
@ -166,4 +199,31 @@ describe("stop-continuation-guard", () => {
// then - should not throw and stopped session remains stopped // then - should not throw and stopped session remains stopped
expect(guard.isStopped("some-session")).toBe(true) expect(guard.isStopped("some-session")).toBe(true)
}) })
test("should cancel only running and pending background tasks on stop", async () => {
// given - a background manager with mixed task statuses
const cancelCalls: CancelCall[] = []
const backgroundManager = createMockBackgroundManager(
[
createBackgroundTask("running", "task-running"),
createBackgroundTask("pending", "task-pending"),
createBackgroundTask("completed", "task-completed"),
],
cancelCalls,
)
const guard = createStopContinuationGuardHook(createMockPluginInput(), {
backgroundManager,
})
// when - stop continuation is triggered
guard.stop("test-session-bg")
await flushMicrotasks()
// then - only running and pending tasks are cancelled
expect(cancelCalls).toHaveLength(2)
expect(cancelCalls[0]?.taskId).toBe("task-running")
expect(cancelCalls[0]?.options?.abortSession).toBe(true)
expect(cancelCalls[1]?.taskId).toBe("task-pending")
expect(cancelCalls[1]?.options?.abortSession).toBe(false)
})
}) })

View File

@ -1,6 +1,6 @@
import { detectThinkKeyword, extractPromptText } from "./detector" import { detectThinkKeyword, extractPromptText } from "./detector"
import { getHighVariant, getThinkingConfig, isAlreadyHighVariant } from "./switcher" import { getHighVariant, isAlreadyHighVariant } from "./switcher"
import type { ThinkModeInput, ThinkModeState } from "./types" import type { ThinkModeState } from "./types"
import { log } from "../../shared" import { log } from "../../shared"
const thinkModeState = new Map<string, ThinkModeState>() const thinkModeState = new Map<string, ThinkModeState>()
@ -10,53 +10,24 @@ export function clearThinkModeState(sessionID: string): void {
} }
export function createThinkModeHook() { export function createThinkModeHook() {
function isDisabledThinkingConfig(config: Record<string, unknown>): boolean {
const thinkingConfig = config.thinking
if (
typeof thinkingConfig === "object" &&
thinkingConfig !== null &&
"type" in thinkingConfig &&
(thinkingConfig as { type?: string }).type === "disabled"
) {
return true
}
const providerOptions = config.providerOptions
if (typeof providerOptions !== "object" || providerOptions === null) {
return false
}
return Object.values(providerOptions as Record<string, unknown>).some(
(providerConfig) => {
if (typeof providerConfig !== "object" || providerConfig === null) {
return false
}
const providerConfigMap = providerConfig as Record<string, unknown>
const extraBody = providerConfigMap.extra_body
if (typeof extraBody !== "object" || extraBody === null) {
return false
}
const extraBodyMap = extraBody as Record<string, unknown>
const extraThinking = extraBodyMap.thinking
return (
typeof extraThinking === "object" &&
extraThinking !== null &&
(extraThinking as { type?: string }).type === "disabled"
)
}
)
}
return { return {
"chat.params": async (output: ThinkModeInput, sessionID: string): Promise<void> => { "chat.message": async (
input: {
sessionID: string
model?: { providerID: string; modelID: string }
},
output: {
message: Record<string, unknown>
parts: Array<{ type: string; text?: string; [key: string]: unknown }>
}
): Promise<void> => {
const promptText = extractPromptText(output.parts) const promptText = extractPromptText(output.parts)
const sessionID = input.sessionID
const state: ThinkModeState = { const state: ThinkModeState = {
requested: false, requested: false,
modelSwitched: false, modelSwitched: false,
thinkingConfigInjected: false, variantSet: false,
} }
if (!detectThinkKeyword(promptText)) { if (!detectThinkKeyword(promptText)) {
@ -66,7 +37,12 @@ export function createThinkModeHook() {
state.requested = true state.requested = true
const currentModel = output.message.model if (typeof output.message.variant === "string") {
thinkModeState.set(sessionID, state)
return
}
const currentModel = input.model
if (!currentModel) { if (!currentModel) {
thinkModeState.set(sessionID, state) thinkModeState.set(sessionID, state)
return return
@ -81,14 +57,15 @@ export function createThinkModeHook() {
} }
const highVariant = getHighVariant(currentModel.modelID) const highVariant = getHighVariant(currentModel.modelID)
const thinkingConfig = getThinkingConfig(currentModel.providerID, currentModel.modelID)
if (highVariant) { if (highVariant) {
output.message.model = { output.message.model = {
providerID: currentModel.providerID, providerID: currentModel.providerID,
modelID: highVariant, modelID: highVariant,
} }
output.message.variant = "high"
state.modelSwitched = true state.modelSwitched = true
state.variantSet = true
log("Think mode: model switched to high variant", { log("Think mode: model switched to high variant", {
sessionID, sessionID,
from: currentModel.modelID, from: currentModel.modelID,
@ -96,42 +73,6 @@ export function createThinkModeHook() {
}) })
} }
if (thinkingConfig) {
const messageData = output.message as Record<string, unknown>
const agentThinking = messageData.thinking as { type?: string } | undefined
const agentProviderOptions = messageData.providerOptions
const agentDisabledThinking = agentThinking?.type === "disabled"
const agentHasCustomProviderOptions = Boolean(agentProviderOptions)
if (agentDisabledThinking) {
log("Think mode: skipping - agent has thinking disabled", {
sessionID,
provider: currentModel.providerID,
})
} else if (agentHasCustomProviderOptions) {
log("Think mode: skipping - agent has custom providerOptions", {
sessionID,
provider: currentModel.providerID,
})
} else if (
!isDisabledThinkingConfig(thinkingConfig as Record<string, unknown>)
) {
Object.assign(output.message, thinkingConfig)
state.thinkingConfigInjected = true
log("Think mode: thinking config injected", {
sessionID,
provider: currentModel.providerID,
config: thinkingConfig,
})
} else {
log("Think mode: skipping disabled thinking config", {
sessionID,
provider: currentModel.providerID,
})
}
}
thinkModeState.set(sessionID, state) thinkModeState.set(sessionID, state)
}, },

View File

@ -1,452 +1,155 @@
import { describe, expect, it, beforeEach } from "bun:test" import { beforeEach, describe, expect, it } from "bun:test"
import type { ThinkModeInput } from "./types"
const { createThinkModeHook, clearThinkModeState } = await import("./index") const { clearThinkModeState, createThinkModeHook } = await import("./index")
type ThinkModeHookInput = {
sessionID: string
model?: { providerID: string; modelID: string }
}
type ThinkModeHookOutput = {
message: Record<string, unknown>
parts: Array<{ type: string; text?: string; [key: string]: unknown }>
}
function createHookInput(args: {
sessionID?: string
providerID?: string
modelID?: string
}): ThinkModeHookInput {
const { sessionID = "test-session-id", providerID, modelID } = args
if (!providerID || !modelID) {
return { sessionID }
}
/**
* Helper to create a mock ThinkModeInput for testing
*/
function createMockInput(
providerID: string,
modelID: string,
promptText: string
): ThinkModeInput {
return { return {
parts: [{ type: "text", text: promptText }], sessionID,
message: { model: { providerID, modelID },
model: {
providerID,
modelID,
},
},
} }
} }
/** function createHookOutput(promptText: string, variant?: string): ThinkModeHookOutput {
* Type helper for accessing dynamically injected properties on message return {
*/ message: variant ? { variant } : {},
type MessageWithInjectedProps = Record<string, unknown> parts: [{ type: "text", text: promptText }],
}
}
describe("createThinkModeHook integration", () => { describe("createThinkModeHook", () => {
const sessionID = "test-session-id" const sessionID = "test-session-id"
beforeEach(() => { beforeEach(() => {
clearThinkModeState(sessionID) clearThinkModeState(sessionID)
}) })
describe("GitHub Copilot provider integration", () => { it("sets high variant and switches model when think keyword is present", async () => {
describe("Claude models", () => { // given
it("should activate thinking mode for github-copilot Claude with think keyword", async () => { const hook = createThinkModeHook()
// given a github-copilot Claude model and prompt with "think" keyword const input = createHookInput({
const hook = createThinkModeHook() sessionID,
const input = createMockInput( providerID: "github-copilot",
"github-copilot", modelID: "claude-opus-4-6",
"claude-opus-4-6",
"Please think deeply about this problem"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant and inject thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
expect(message.thinking).toBeDefined()
expect((message.thinking as Record<string, unknown>)?.type).toBe(
"enabled"
)
expect(
(message.thinking as Record<string, unknown>)?.budgetTokens
).toBe(64000)
})
it("should handle github-copilot Claude with dots in version", async () => {
// given a github-copilot Claude model with dot format (claude-opus-4.6)
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"claude-opus-4.6",
"ultrathink mode"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant (hyphen format)
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
expect(message.thinking).toBeDefined()
})
it("should handle github-copilot Claude Sonnet", async () => {
// given a github-copilot Claude Sonnet model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"claude-sonnet-4-6",
"think about this"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
expect(message.thinking).toBeDefined()
})
}) })
const output = createHookOutput("Please think deeply about this")
describe("Gemini models", () => { // when
it("should activate thinking mode for github-copilot Gemini Pro", async () => { await hook["chat.message"](input, output)
// given a github-copilot Gemini Pro model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gemini-3-pro",
"think about this"
)
// when the chat.params hook is called // then
await hook["chat.params"](input, sessionID) expect(output.message.variant).toBe("high")
expect(output.message.model).toEqual({
// then should upgrade to high variant and inject google thinking config providerID: "github-copilot",
const message = input.message as MessageWithInjectedProps modelID: "claude-opus-4-6-high",
expect(input.message.model?.modelID).toBe("gemini-3-pro-high")
expect(message.providerOptions).toBeDefined()
const googleOptions = (
message.providerOptions as Record<string, unknown>
)?.google as Record<string, unknown>
expect(googleOptions?.thinkingConfig).toBeDefined()
})
it("should activate thinking mode for github-copilot Gemini Flash", async () => {
// given a github-copilot Gemini Flash model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gemini-3-flash",
"ultrathink"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gemini-3-flash-high")
expect(message.providerOptions).toBeDefined()
})
})
describe("GPT models", () => {
it("should activate thinking mode for github-copilot GPT-5.2", async () => {
// given a github-copilot GPT-5.2 model
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gpt-5.2",
"please think"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant and inject openai thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gpt-5-2-high")
expect(message.reasoning_effort).toBe("high")
})
it("should activate thinking mode for github-copilot GPT-5", async () => {
// given a github-copilot GPT-5 model
const hook = createThinkModeHook()
const input = createMockInput("github-copilot", "gpt-5", "think deeply")
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should upgrade to high variant
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gpt-5-high")
expect(message.reasoning_effort).toBe("high")
})
})
describe("No think keyword", () => {
it("should NOT activate for github-copilot without think keyword", async () => {
// given a prompt without any think keyword
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"claude-opus-4-6",
"Just do this task"
)
const originalModelID = input.message.model?.modelID
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT change model or inject config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe(originalModelID)
expect(message.thinking).toBeUndefined()
})
}) })
}) })
describe("Backwards compatibility with direct providers", () => { it("supports dotted model IDs by switching to normalized high variant", async () => {
it("should still work for direct anthropic provider", async () => { // given
// given direct anthropic provider const hook = createThinkModeHook()
const hook = createThinkModeHook() const input = createHookInput({
const input = createMockInput( sessionID,
"anthropic", providerID: "github-copilot",
"claude-sonnet-4-6", modelID: "gpt-5.2",
"think about this"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should work as before
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
expect(message.thinking).toBeDefined()
}) })
const output = createHookOutput("ultrathink about this")
it("should work for direct google-vertex-anthropic provider", async () => { // when
//#given direct google-vertex-anthropic provider await hook["chat.message"](input, output)
const hook = createThinkModeHook()
const input = createMockInput(
"google-vertex-anthropic",
"claude-opus-4-6",
"think deeply"
)
//#when the chat.params hook is called // then
await hook["chat.params"](input, sessionID) expect(output.message.variant).toBe("high")
expect(output.message.model).toEqual({
//#then should upgrade model and inject Claude thinking config providerID: "github-copilot",
const message = input.message as MessageWithInjectedProps modelID: "gpt-5-2-high",
expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
expect(message.thinking).toBeDefined()
expect((message.thinking as Record<string, unknown>)?.budgetTokens).toBe(
64000
)
})
it("should still work for direct google provider", async () => {
// given direct google provider
const hook = createThinkModeHook()
const input = createMockInput(
"google",
"gemini-3-pro",
"think about this"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should work as before
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gemini-3-pro-high")
expect(message.providerOptions).toBeDefined()
})
it("should still work for direct openai provider", async () => {
// given direct openai provider
const hook = createThinkModeHook()
const input = createMockInput("openai", "gpt-5", "think about this")
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should work
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gpt-5-high")
expect(message.reasoning_effort).toBe("high")
})
it("should still work for amazon-bedrock provider", async () => {
// given amazon-bedrock provider
const hook = createThinkModeHook()
const input = createMockInput(
"amazon-bedrock",
"claude-sonnet-4-6",
"think"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should inject bedrock thinking config
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
expect(message.reasoningConfig).toBeDefined()
}) })
}) })
describe("Already-high variants", () => { it("skips when message variant is already set", async () => {
it("should NOT re-upgrade already-high variants", async () => { // given
// given an already-high variant model const hook = createThinkModeHook()
const hook = createThinkModeHook() const input = createHookInput({
const input = createMockInput( sessionID,
"github-copilot", providerID: "github-copilot",
"claude-opus-4-6-high", modelID: "claude-sonnet-4-6",
"think deeply"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT modify the model (already high)
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
// No additional thinking config should be injected
expect(message.thinking).toBeUndefined()
}) })
const output = createHookOutput("think through this", "max")
it("should NOT re-upgrade already-high GPT variants", async () => { // when
// given an already-high GPT variant await hook["chat.message"](input, output)
const hook = createThinkModeHook()
const input = createMockInput(
"github-copilot",
"gpt-5.2-high",
"ultrathink"
)
// when the chat.params hook is called // then
await hook["chat.params"](input, sessionID) expect(output.message.variant).toBe("max")
expect(output.message.model).toBeUndefined()
// then should NOT modify the model
const message = input.message as MessageWithInjectedProps
expect(input.message.model?.modelID).toBe("gpt-5.2-high")
expect(message.reasoning_effort).toBeUndefined()
})
}) })
describe("Unknown models", () => { it("does nothing when think keyword is absent", async () => {
it("should not crash for unknown models via github-copilot", async () => { // given
// given an unknown model type const hook = createThinkModeHook()
const hook = createThinkModeHook() const input = createHookInput({
const input = createMockInput( sessionID,
"github-copilot", providerID: "google",
"llama-3-70b", modelID: "gemini-3.1-pro",
"think about this"
)
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should not crash and model should remain unchanged
expect(input.message.model?.modelID).toBe("llama-3-70b")
}) })
const output = createHookOutput("Please solve this directly")
// when
await hook["chat.message"](input, output)
// then
expect(output.message.variant).toBeUndefined()
expect(output.message.model).toBeUndefined()
}) })
describe("Edge cases", () => { it("does not modify already-high models", async () => {
it("should handle missing model gracefully", async () => { // given
// given input without a model const hook = createThinkModeHook()
const hook = createThinkModeHook() const input = createHookInput({
const input: ThinkModeInput = { sessionID,
parts: [{ type: "text", text: "think about this" }], providerID: "openai",
message: {}, modelID: "gpt-5-high",
}
// when the chat.params hook is called
// then should not crash
await expect(
hook["chat.params"](input, sessionID)
).resolves.toBeUndefined()
}) })
const output = createHookOutput("think deeply")
it("should handle empty prompt gracefully", async () => { // when
// given empty prompt await hook["chat.message"](input, output)
const hook = createThinkModeHook()
const input = createMockInput("github-copilot", "claude-opus-4-6", "")
// when the chat.params hook is called // then
await hook["chat.params"](input, sessionID) expect(output.message.variant).toBeUndefined()
expect(output.message.model).toBeUndefined()
// then should not upgrade (no think keyword)
expect(input.message.model?.modelID).toBe("claude-opus-4-6")
})
}) })
describe("Agent-level thinking configuration respect", () => { it("handles missing input model without crashing", async () => {
it("should omit Z.ai GLM disabled thinking config", async () => { // given
//#given a Z.ai GLM model with think prompt const hook = createThinkModeHook()
const hook = createThinkModeHook() const input = createHookInput({ sessionID })
const input = createMockInput( const output = createHookOutput("think about this")
"zai-coding-plan",
"glm-5",
"ultrathink mode"
)
//#when think mode resolves Z.ai thinking configuration // when
await hook["chat.params"](input, sessionID) await expect(hook["chat.message"](input, output)).resolves.toBeUndefined()
//#then thinking config should be omitted from request // then
const message = input.message as MessageWithInjectedProps expect(output.message.variant).toBeUndefined()
expect(input.message.model?.modelID).toBe("glm-5") expect(output.message.model).toBeUndefined()
expect(message.thinking).toBeUndefined()
expect(message.providerOptions).toBeUndefined()
})
it("should NOT inject thinking config when agent has thinking disabled", async () => {
// given agent with thinking explicitly disabled
const hook = createThinkModeHook()
const input: ThinkModeInput = {
parts: [{ type: "text", text: "ultrathink deeply" }],
message: {
model: { providerID: "google", modelID: "gemini-3-pro" },
thinking: { type: "disabled" },
} as ThinkModeInput["message"],
}
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT override agent's thinking disabled setting
const message = input.message as MessageWithInjectedProps
expect((message.thinking as { type: string }).type).toBe("disabled")
expect(message.providerOptions).toBeUndefined()
})
it("should NOT inject thinking config when agent has custom providerOptions", async () => {
// given agent with custom providerOptions
const hook = createThinkModeHook()
const input: ThinkModeInput = {
parts: [{ type: "text", text: "ultrathink" }],
message: {
model: { providerID: "google", modelID: "gemini-3-flash" },
providerOptions: {
google: { thinkingConfig: { thinkingBudget: 0 } },
},
} as ThinkModeInput["message"],
}
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should NOT override agent's providerOptions
const message = input.message as MessageWithInjectedProps
const providerOpts = message.providerOptions as Record<string, unknown>
expect((providerOpts.google as Record<string, unknown>).thinkingConfig).toEqual({
thinkingBudget: 0,
})
})
it("should still inject thinking config when agent has no thinking override", async () => {
// given agent without thinking override
const hook = createThinkModeHook()
const input = createMockInput("google", "gemini-3-pro", "ultrathink")
// when the chat.params hook is called
await hook["chat.params"](input, sessionID)
// then should inject thinking config as normal
const message = input.message as MessageWithInjectedProps
expect(message.providerOptions).toBeDefined()
})
}) })
}) })

View File

@ -1,128 +1,10 @@
import { describe, expect, it } from "bun:test" import { describe, expect, it } from "bun:test"
import { import {
getHighVariant, getHighVariant,
getThinkingConfig,
isAlreadyHighVariant, isAlreadyHighVariant,
THINKING_CONFIGS,
} from "./switcher" } from "./switcher"
describe("think-mode switcher", () => { describe("think-mode switcher", () => {
describe("GitHub Copilot provider support", () => {
describe("Claude models via github-copilot", () => {
it("should resolve github-copilot Claude Opus to anthropic config", () => {
// given a github-copilot provider with Claude Opus model
const providerID = "github-copilot"
const modelID = "claude-opus-4-6"
// when getting thinking config
const config = getThinkingConfig(providerID, modelID)
// then should return anthropic thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
expect((config?.thinking as Record<string, unknown>)?.type).toBe(
"enabled"
)
expect((config?.thinking as Record<string, unknown>)?.budgetTokens).toBe(
64000
)
})
it("should resolve github-copilot Claude Sonnet to anthropic config", () => {
// given a github-copilot provider with Claude Sonnet model
const config = getThinkingConfig("github-copilot", "claude-sonnet-4-6")
// then should return anthropic thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
})
it("should handle Claude with dots in version number", () => {
// given a model ID with dots (claude-opus-4.6)
const config = getThinkingConfig("github-copilot", "claude-opus-4.6")
// then should still return anthropic thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
})
})
describe("Gemini models via github-copilot", () => {
it("should resolve github-copilot Gemini Pro to google config", () => {
// given a github-copilot provider with Gemini Pro model
const config = getThinkingConfig("github-copilot", "gemini-3-pro")
// then should return google thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
const googleOptions = (
config?.providerOptions as Record<string, unknown>
)?.google as Record<string, unknown>
expect(googleOptions?.thinkingConfig).toBeDefined()
})
it("should resolve github-copilot Gemini Flash to google config", () => {
// given a github-copilot provider with Gemini Flash model
const config = getThinkingConfig(
"github-copilot",
"gemini-3-flash"
)
// then should return google thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
})
})
describe("GPT models via github-copilot", () => {
it("should resolve github-copilot GPT-5.2 to openai config", () => {
// given a github-copilot provider with GPT-5.2 model
const config = getThinkingConfig("github-copilot", "gpt-5.2")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
it("should resolve github-copilot GPT-5 to openai config", () => {
// given a github-copilot provider with GPT-5 model
const config = getThinkingConfig("github-copilot", "gpt-5")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
it("should resolve github-copilot o1 to openai config", () => {
// given a github-copilot provider with o1 model
const config = getThinkingConfig("github-copilot", "o1-preview")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
it("should resolve github-copilot o3 to openai config", () => {
// given a github-copilot provider with o3 model
const config = getThinkingConfig("github-copilot", "o3-mini")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
})
describe("Unknown models via github-copilot", () => {
it("should return null for unknown model types", () => {
// given a github-copilot provider with unknown model
const config = getThinkingConfig("github-copilot", "llama-3-70b")
// then should return null (no matching provider)
expect(config).toBeNull()
})
})
})
describe("Model ID normalization", () => { describe("Model ID normalization", () => {
describe("getHighVariant with dots vs hyphens", () => { describe("getHighVariant with dots vs hyphens", () => {
it("should handle dots in Claude version numbers", () => { it("should handle dots in Claude version numbers", () => {
@ -167,8 +49,8 @@ describe("think-mode switcher", () => {
it("should handle Gemini preview variants", () => { it("should handle Gemini preview variants", () => {
// given Gemini preview model IDs // given Gemini preview model IDs
expect(getHighVariant("gemini-3-pro")).toBe( expect(getHighVariant("gemini-3.1-pro")).toBe(
"gemini-3-pro-high" "gemini-3-1-pro-high"
) )
expect(getHighVariant("gemini-3-flash")).toBe( expect(getHighVariant("gemini-3-flash")).toBe(
"gemini-3-flash-high" "gemini-3-flash-high"
@ -179,7 +61,7 @@ describe("think-mode switcher", () => {
// given model IDs that are already high variants // given model IDs that are already high variants
expect(getHighVariant("claude-opus-4-6-high")).toBeNull() expect(getHighVariant("claude-opus-4-6-high")).toBeNull()
expect(getHighVariant("gpt-5-2-high")).toBeNull() expect(getHighVariant("gpt-5-2-high")).toBeNull()
expect(getHighVariant("gemini-3-pro-high")).toBeNull() expect(getHighVariant("gemini-3-1-pro-high")).toBeNull()
}) })
it("should return null for unknown models", () => { it("should return null for unknown models", () => {
@ -195,7 +77,7 @@ describe("think-mode switcher", () => {
// given model IDs with -high suffix // given model IDs with -high suffix
expect(isAlreadyHighVariant("claude-opus-4-6-high")).toBe(true) expect(isAlreadyHighVariant("claude-opus-4-6-high")).toBe(true)
expect(isAlreadyHighVariant("gpt-5-2-high")).toBe(true) expect(isAlreadyHighVariant("gpt-5-2-high")).toBe(true)
expect(isAlreadyHighVariant("gemini-3-pro-high")).toBe(true) expect(isAlreadyHighVariant("gemini-3.1-pro-high")).toBe(true)
}) })
it("should detect -high suffix after normalization", () => { it("should detect -high suffix after normalization", () => {
@ -208,7 +90,7 @@ describe("think-mode switcher", () => {
expect(isAlreadyHighVariant("claude-opus-4-6")).toBe(false) expect(isAlreadyHighVariant("claude-opus-4-6")).toBe(false)
expect(isAlreadyHighVariant("claude-opus-4.6")).toBe(false) expect(isAlreadyHighVariant("claude-opus-4.6")).toBe(false)
expect(isAlreadyHighVariant("gpt-5.2")).toBe(false) expect(isAlreadyHighVariant("gpt-5.2")).toBe(false)
expect(isAlreadyHighVariant("gemini-3-pro")).toBe(false) expect(isAlreadyHighVariant("gemini-3.1-pro")).toBe(false)
}) })
it("should return false for models with 'high' in name but not suffix", () => { it("should return false for models with 'high' in name but not suffix", () => {
@ -217,149 +99,6 @@ describe("think-mode switcher", () => {
}) })
}) })
describe("getThinkingConfig", () => {
describe("Already high variants", () => {
it("should return null for already-high variants", () => {
// given already-high model variants
expect(
getThinkingConfig("anthropic", "claude-opus-4-6-high")
).toBeNull()
expect(getThinkingConfig("openai", "gpt-5-2-high")).toBeNull()
expect(getThinkingConfig("google", "gemini-3-pro-high")).toBeNull()
})
it("should return null for already-high variants via github-copilot", () => {
// given already-high model variants via github-copilot
expect(
getThinkingConfig("github-copilot", "claude-opus-4-6-high")
).toBeNull()
expect(getThinkingConfig("github-copilot", "gpt-5.2-high")).toBeNull()
})
})
describe("Non-thinking-capable models", () => {
it("should return null for non-thinking-capable models", () => {
// given models that don't support thinking mode
expect(getThinkingConfig("anthropic", "claude-2")).toBeNull()
expect(getThinkingConfig("openai", "gpt-4")).toBeNull()
expect(getThinkingConfig("google", "gemini-1")).toBeNull()
})
})
describe("Unknown providers", () => {
it("should return null for unknown providers", () => {
// given unknown provider IDs
expect(getThinkingConfig("unknown-provider", "some-model")).toBeNull()
expect(getThinkingConfig("azure", "gpt-5")).toBeNull()
})
})
})
describe("Direct provider configs (backwards compatibility)", () => {
it("should still work for direct anthropic provider", () => {
// given direct anthropic provider
const config = getThinkingConfig("anthropic", "claude-opus-4-6")
// then should return anthropic thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
expect((config?.thinking as Record<string, unknown>)?.type).toBe("enabled")
})
it("should work for direct google-vertex-anthropic provider", () => {
//#given direct google-vertex-anthropic provider
const config = getThinkingConfig(
"google-vertex-anthropic",
"claude-opus-4-6"
)
//#when thinking config is resolved
//#then it should return anthropic-style thinking config
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
expect((config?.thinking as Record<string, unknown>)?.type).toBe("enabled")
expect((config?.thinking as Record<string, unknown>)?.budgetTokens).toBe(
64000
)
})
it("should still work for direct google provider", () => {
// given direct google provider
const config = getThinkingConfig("google", "gemini-3-pro")
// then should return google thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
})
it("should still work for amazon-bedrock provider", () => {
// given amazon-bedrock provider with claude model
const config = getThinkingConfig("amazon-bedrock", "claude-sonnet-4-6")
// then should return bedrock thinking config
expect(config).not.toBeNull()
expect(config?.reasoningConfig).toBeDefined()
})
it("should still work for google-vertex provider", () => {
// given google-vertex provider
const config = getThinkingConfig("google-vertex", "gemini-3-pro")
// then should return google-vertex thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
const vertexOptions = (config?.providerOptions as Record<string, unknown>)?.[
"google-vertex"
] as Record<string, unknown>
expect(vertexOptions?.thinkingConfig).toBeDefined()
})
it("should work for direct openai provider", () => {
// given direct openai provider
const config = getThinkingConfig("openai", "gpt-5")
// then should return openai thinking config
expect(config).not.toBeNull()
expect(config?.reasoning_effort).toBe("high")
})
})
describe("THINKING_CONFIGS structure", () => {
it("should have correct structure for anthropic", () => {
const config = THINKING_CONFIGS.anthropic
expect(config.thinking).toBeDefined()
expect(config.maxTokens).toBe(128000)
})
it("should have correct structure for google-vertex-anthropic", () => {
//#given google-vertex-anthropic config entry
const config = THINKING_CONFIGS["google-vertex-anthropic"]
//#when structure is validated
//#then it should match anthropic style structure
expect(config.thinking).toBeDefined()
expect(config.maxTokens).toBe(128000)
})
it("should have correct structure for google", () => {
const config = THINKING_CONFIGS.google
expect(config.providerOptions).toBeDefined()
})
it("should have correct structure for openai", () => {
const config = THINKING_CONFIGS.openai
expect(config.reasoning_effort).toBe("high")
})
it("should have correct structure for amazon-bedrock", () => {
const config = THINKING_CONFIGS["amazon-bedrock"]
expect(config.reasoningConfig).toBeDefined()
expect(config.maxTokens).toBe(64000)
})
})
describe("Custom provider prefixes support", () => { describe("Custom provider prefixes support", () => {
describe("getHighVariant with prefixes", () => { describe("getHighVariant with prefixes", () => {
it("should preserve vertex_ai/ prefix when getting high variant", () => { it("should preserve vertex_ai/ prefix when getting high variant", () => {
@ -390,7 +129,7 @@ describe("think-mode switcher", () => {
// given various custom prefixes // given various custom prefixes
expect(getHighVariant("azure/gpt-5")).toBe("azure/gpt-5-high") expect(getHighVariant("azure/gpt-5")).toBe("azure/gpt-5-high")
expect(getHighVariant("bedrock/claude-sonnet-4-6")).toBe("bedrock/claude-sonnet-4-6-high") expect(getHighVariant("bedrock/claude-sonnet-4-6")).toBe("bedrock/claude-sonnet-4-6-high")
expect(getHighVariant("custom-llm/gemini-3-pro")).toBe("custom-llm/gemini-3-pro-high") expect(getHighVariant("custom-llm/gemini-3.1-pro")).toBe("custom-llm/gemini-3-1-pro-high")
}) })
it("should return null for prefixed models without high variant mapping", () => { it("should return null for prefixed models without high variant mapping", () => {
@ -411,7 +150,7 @@ describe("think-mode switcher", () => {
// given prefixed model IDs with -high suffix // given prefixed model IDs with -high suffix
expect(isAlreadyHighVariant("vertex_ai/claude-opus-4-6-high")).toBe(true) expect(isAlreadyHighVariant("vertex_ai/claude-opus-4-6-high")).toBe(true)
expect(isAlreadyHighVariant("openai/gpt-5-2-high")).toBe(true) expect(isAlreadyHighVariant("openai/gpt-5-2-high")).toBe(true)
expect(isAlreadyHighVariant("custom/gemini-3-pro-high")).toBe(true) expect(isAlreadyHighVariant("custom/gemini-3.1-pro-high")).toBe(true)
}) })
it("should return false for prefixed base models", () => { it("should return false for prefixed base models", () => {
@ -426,141 +165,5 @@ describe("think-mode switcher", () => {
expect(isAlreadyHighVariant("vertex_ai/gpt-5.2-high")).toBe(true) expect(isAlreadyHighVariant("vertex_ai/gpt-5.2-high")).toBe(true)
}) })
}) })
})
describe("getThinkingConfig with prefixes", () => {
it("should return null for custom providers (not in THINKING_CONFIGS)", () => {
// given custom provider with prefixed Claude model
const config = getThinkingConfig("dia-llm", "vertex_ai/claude-sonnet-4-6")
// then should return null (custom provider not in THINKING_CONFIGS)
expect(config).toBeNull()
})
it("should work with prefixed models on known providers", () => {
// given known provider (anthropic) with prefixed model
// This tests that the base model name is correctly extracted for capability check
const config = getThinkingConfig("anthropic", "custom-prefix/claude-opus-4-6")
// then should return thinking config (base model is capable)
expect(config).not.toBeNull()
expect(config?.thinking).toBeDefined()
})
it("should return null for prefixed models that are already high", () => {
// given prefixed already-high model
const config = getThinkingConfig("anthropic", "vertex_ai/claude-opus-4-6-high")
// then should return null
expect(config).toBeNull()
})
})
describe("Real-world custom provider scenario", () => {
it("should handle LLM proxy with vertex_ai prefix correctly", () => {
// given a custom LLM proxy provider using vertex_ai/ prefix
const providerID = "dia-llm"
const modelID = "vertex_ai/claude-sonnet-4-6"
// when getting high variant
const highVariant = getHighVariant(modelID)
// then should preserve the prefix
expect(highVariant).toBe("vertex_ai/claude-sonnet-4-6-high")
// #and when checking if already high
expect(isAlreadyHighVariant(modelID)).toBe(false)
expect(isAlreadyHighVariant(highVariant!)).toBe(true)
// #and when getting thinking config for custom provider
const config = getThinkingConfig(providerID, modelID)
// then should return null (custom provider, not anthropic)
// This prevents applying incompatible thinking configs to custom providers
expect(config).toBeNull()
})
it("should not break when switching to high variant in think mode", () => {
// given think mode switching vertex_ai/claude model to high variant
const original = "vertex_ai/claude-opus-4-6"
const high = getHighVariant(original)
// then the high variant should be valid
expect(high).toBe("vertex_ai/claude-opus-4-6-high")
// #and should be recognized as already high
expect(isAlreadyHighVariant(high!)).toBe(true)
// #and switching again should return null (already high)
expect(getHighVariant(high!)).toBeNull()
})
})
})
describe("Z.AI GLM-4.7 provider support", () => {
describe("getThinkingConfig for zai-coding-plan", () => {
it("should return thinking config for glm-5", () => {
//#given a Z.ai GLM model
const config = getThinkingConfig("zai-coding-plan", "glm-5")
//#when thinking config is resolved
//#then thinking type is "disabled"
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
const zaiOptions = (config?.providerOptions as Record<string, unknown>)?.[
"zai-coding-plan"
] as Record<string, unknown>
expect(zaiOptions?.extra_body).toBeDefined()
const extraBody = zaiOptions?.extra_body as Record<string, unknown>
expect(extraBody?.thinking).toBeDefined()
expect((extraBody?.thinking as Record<string, unknown>)?.type).toBe("disabled")
})
it("should return thinking config for glm-4.6v (multimodal)", () => {
// given zai-coding-plan provider with glm-4.6v model
const config = getThinkingConfig("zai-coding-plan", "glm-4.6v")
// then should return zai-coding-plan thinking config
expect(config).not.toBeNull()
expect(config?.providerOptions).toBeDefined()
})
it("should return null for non-GLM models on zai-coding-plan", () => {
// given zai-coding-plan provider with unknown model
const config = getThinkingConfig("zai-coding-plan", "some-other-model")
// then should return null
expect(config).toBeNull()
})
})
describe("HIGH_VARIANT_MAP for GLM", () => {
it("should NOT have high variant for glm-5", () => {
// given glm-5 model
const variant = getHighVariant("glm-5")
// then should return null (no high variant needed)
expect(variant).toBeNull()
})
it("should NOT have high variant for glm-4.6v", () => {
// given glm-4.6v model
const variant = getHighVariant("glm-4.6v")
// then should return null
expect(variant).toBeNull()
})
})
})
describe("THINKING_CONFIGS structure for zai-coding-plan", () => {
it("should have correct structure for zai-coding-plan", () => {
const config = THINKING_CONFIGS["zai-coding-plan"]
expect(config.providerOptions).toBeDefined()
const zaiOptions = (config.providerOptions as Record<string, unknown>)?.[
"zai-coding-plan"
] as Record<string, unknown>
expect(zaiOptions?.extra_body).toBeDefined()
})
})
}) })

View File

@ -53,35 +53,7 @@ function normalizeModelID(modelID: string): string {
return modelID.replace(/\.(\d+)/g, "-$1") return modelID.replace(/\.(\d+)/g, "-$1")
} }
/**
* Resolves proxy providers (like github-copilot) to their underlying provider.
* This allows GitHub Copilot to inherit thinking configurations from the actual
* model provider (Anthropic, Google, OpenAI).
*
* @example
* resolveProvider("github-copilot", "claude-opus-4-6") // "anthropic"
* resolveProvider("github-copilot", "gemini-3-pro") // "google"
* resolveProvider("github-copilot", "gpt-5.2") // "openai"
* resolveProvider("anthropic", "claude-opus-4-6") // "anthropic" (unchanged)
*/
function resolveProvider(providerID: string, modelID: string): string {
// GitHub Copilot is a proxy - infer actual provider from model name
if (providerID === "github-copilot") {
const modelLower = modelID.toLowerCase()
if (modelLower.includes("claude")) return "anthropic"
if (modelLower.includes("gemini")) return "google"
if (
modelLower.includes("gpt") ||
modelLower.includes("o1") ||
modelLower.includes("o3")
) {
return "openai"
}
}
// Direct providers or unknown - return as-is
return providerID
}
// Maps model IDs to their "high reasoning" variant (internal convention) // Maps model IDs to their "high reasoning" variant (internal convention)
// For OpenAI models, this signals that reasoning_effort should be set to "high" // For OpenAI models, this signals that reasoning_effort should be set to "high"
@ -90,8 +62,8 @@ const HIGH_VARIANT_MAP: Record<string, string> = {
"claude-sonnet-4-6": "claude-sonnet-4-6-high", "claude-sonnet-4-6": "claude-sonnet-4-6-high",
"claude-opus-4-6": "claude-opus-4-6-high", "claude-opus-4-6": "claude-opus-4-6-high",
// Gemini // Gemini
"gemini-3-pro": "gemini-3-pro-high", "gemini-3-1-pro": "gemini-3-1-pro-high",
"gemini-3-pro-low": "gemini-3-pro-high", "gemini-3-1-pro-low": "gemini-3-1-pro-high",
"gemini-3-flash": "gemini-3-flash-high", "gemini-3-flash": "gemini-3-flash-high",
// GPT-5 // GPT-5
"gpt-5": "gpt-5-high", "gpt-5": "gpt-5-high",
@ -110,77 +82,12 @@ const HIGH_VARIANT_MAP: Record<string, string> = {
"gpt-5-2-chat-latest": "gpt-5-2-chat-latest-high", "gpt-5-2-chat-latest": "gpt-5-2-chat-latest-high",
"gpt-5-2-pro": "gpt-5-2-pro-high", "gpt-5-2-pro": "gpt-5-2-pro-high",
// Antigravity (Google) // Antigravity (Google)
"antigravity-gemini-3-pro": "antigravity-gemini-3-pro-high", "antigravity-gemini-3-1-pro": "antigravity-gemini-3-1-pro-high",
"antigravity-gemini-3-flash": "antigravity-gemini-3-flash-high", "antigravity-gemini-3-flash": "antigravity-gemini-3-flash-high",
} }
const ALREADY_HIGH: Set<string> = new Set(Object.values(HIGH_VARIANT_MAP)) const ALREADY_HIGH: Set<string> = new Set(Object.values(HIGH_VARIANT_MAP))
export const THINKING_CONFIGS = {
anthropic: {
thinking: {
type: "enabled",
budgetTokens: 64000,
},
maxTokens: 128000,
},
"google-vertex-anthropic": {
thinking: {
type: "enabled",
budgetTokens: 64000,
},
maxTokens: 128000,
},
"amazon-bedrock": {
reasoningConfig: {
type: "enabled",
budgetTokens: 32000,
},
maxTokens: 64000,
},
google: {
providerOptions: {
google: {
thinkingConfig: {
thinkingLevel: "HIGH",
},
},
},
},
"google-vertex": {
providerOptions: {
"google-vertex": {
thinkingConfig: {
thinkingLevel: "HIGH",
},
},
},
},
openai: {
reasoning_effort: "high",
},
"zai-coding-plan": {
providerOptions: {
"zai-coding-plan": {
extra_body: {
thinking: {
type: "disabled",
},
},
},
},
},
} as const satisfies Record<string, Record<string, unknown>>
const THINKING_CAPABLE_MODELS = {
anthropic: ["claude-sonnet-4", "claude-opus-4", "claude-3"],
"google-vertex-anthropic": ["claude-sonnet-4", "claude-opus-4", "claude-3"],
"amazon-bedrock": ["claude", "anthropic"],
google: ["gemini-2", "gemini-3"],
"google-vertex": ["gemini-2", "gemini-3"],
openai: ["gpt-5", "o1", "o3"],
"zai-coding-plan": ["glm"],
} as const satisfies Record<string, readonly string[]>
export function getHighVariant(modelID: string): string | null { export function getHighVariant(modelID: string): string | null {
const normalized = normalizeModelID(modelID) const normalized = normalizeModelID(modelID)
@ -207,37 +114,3 @@ export function isAlreadyHighVariant(modelID: string): boolean {
return ALREADY_HIGH.has(base) || base.endsWith("-high") return ALREADY_HIGH.has(base) || base.endsWith("-high")
} }
type ThinkingProvider = keyof typeof THINKING_CONFIGS
function isThinkingProvider(provider: string): provider is ThinkingProvider {
return provider in THINKING_CONFIGS
}
export function getThinkingConfig(
providerID: string,
modelID: string
): Record<string, unknown> | null {
const normalized = normalizeModelID(modelID)
const { base } = extractModelPrefix(normalized)
if (isAlreadyHighVariant(normalized)) {
return null
}
const resolvedProvider = resolveProvider(providerID, modelID)
if (!isThinkingProvider(resolvedProvider)) {
return null
}
const config = THINKING_CONFIGS[resolvedProvider]
const capablePatterns = THINKING_CAPABLE_MODELS[resolvedProvider]
// Check capability using base model name (without prefix)
const baseLower = base.toLowerCase()
const isCapable = capablePatterns.some((pattern) =>
baseLower.includes(pattern.toLowerCase())
)
return isCapable ? config : null
}

View File

@ -1,21 +1,16 @@
export interface ThinkModeState { export interface ThinkModeState {
requested: boolean requested: boolean
modelSwitched: boolean modelSwitched: boolean
thinkingConfigInjected: boolean variantSet: boolean
providerID?: string providerID?: string
modelID?: string modelID?: string
} }
export interface ModelRef { interface ModelRef {
providerID: string providerID: string
modelID: string modelID: string
} }
export interface MessageWithModel { interface MessageWithModel {
model?: ModelRef model?: ModelRef
} }
export interface ThinkModeInput {
parts: Array<{ type: string; text?: string }>
message: MessageWithModel
}

View File

@ -17,6 +17,6 @@ export const TOAST_DURATION_MS = 900
export const COUNTDOWN_GRACE_PERIOD_MS = 500 export const COUNTDOWN_GRACE_PERIOD_MS = 500
export const ABORT_WINDOW_MS = 3000 export const ABORT_WINDOW_MS = 3000
export const CONTINUATION_COOLDOWN_MS = 30_000 export const CONTINUATION_COOLDOWN_MS = 5_000
export const MAX_CONSECUTIVE_FAILURES = 5 export const MAX_CONSECUTIVE_FAILURES = 5
export const FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000 export const FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000

View File

@ -15,6 +15,7 @@ import {
MAX_CONSECUTIVE_FAILURES, MAX_CONSECUTIVE_FAILURES,
} from "./constants" } from "./constants"
import { isLastAssistantMessageAborted } from "./abort-detection" import { isLastAssistantMessageAborted } from "./abort-detection"
import { hasUnansweredQuestion } from "./pending-question-detection"
import { getIncompleteCount } from "./todo" import { getIncompleteCount } from "./todo"
import type { MessageInfo, ResolvedMessageInfo, Todo } from "./types" import type { MessageInfo, ResolvedMessageInfo, Todo } from "./types"
import type { SessionStateStore } from "./session-state" import type { SessionStateStore } from "./session-state"
@ -74,6 +75,10 @@ export async function handleSessionIdle(args: {
log(`[${HOOK_NAME}] Skipped: last assistant message was aborted (API fallback)`, { sessionID }) log(`[${HOOK_NAME}] Skipped: last assistant message was aborted (API fallback)`, { sessionID })
return return
} }
if (hasUnansweredQuestion(messages)) {
log(`[${HOOK_NAME}] Skipped: pending question awaiting user response`, { sessionID })
return
}
} catch (error) { } catch (error) {
log(`[${HOOK_NAME}] Messages fetch failed, continuing`, { sessionID, error: String(error) }) log(`[${HOOK_NAME}] Messages fetch failed, continuing`, { sessionID, error: String(error) })
} }

View File

@ -0,0 +1,100 @@
/// <reference types="bun-types" />
import { describe, expect, test } from "bun:test"
import { hasUnansweredQuestion } from "./pending-question-detection"
describe("hasUnansweredQuestion", () => {
test("given empty messages, returns false", () => {
expect(hasUnansweredQuestion([])).toBe(false)
})
test("given null-ish input, returns false", () => {
expect(hasUnansweredQuestion(undefined as never)).toBe(false)
})
test("given last assistant message with question tool_use, returns true", () => {
const messages = [
{ info: { role: "user" } },
{
info: { role: "assistant" },
parts: [
{ type: "tool_use", name: "question" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(true)
})
test("given last assistant message with question tool-invocation, returns true", () => {
const messages = [
{ info: { role: "user" } },
{
info: { role: "assistant" },
parts: [
{ type: "tool-invocation", toolName: "question" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(true)
})
test("given user message after question (answered), returns false", () => {
const messages = [
{
info: { role: "assistant" },
parts: [
{ type: "tool_use", name: "question" },
],
},
{ info: { role: "user" } },
]
expect(hasUnansweredQuestion(messages)).toBe(false)
})
test("given assistant message with non-question tool, returns false", () => {
const messages = [
{ info: { role: "user" } },
{
info: { role: "assistant" },
parts: [
{ type: "tool_use", name: "bash" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(false)
})
test("given assistant message with no parts, returns false", () => {
const messages = [
{ info: { role: "user" } },
{ info: { role: "assistant" } },
]
expect(hasUnansweredQuestion(messages)).toBe(false)
})
test("given role on message directly (not in info), returns true for question", () => {
const messages = [
{ role: "user" },
{
role: "assistant",
parts: [
{ type: "tool_use", name: "question" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(true)
})
test("given mixed tools including question, returns true", () => {
const messages = [
{
info: { role: "assistant" },
parts: [
{ type: "tool_use", name: "bash" },
{ type: "tool_use", name: "question" },
],
},
]
expect(hasUnansweredQuestion(messages)).toBe(true)
})
})

View File

@ -0,0 +1,40 @@
import { log } from "../../shared/logger"
import { HOOK_NAME } from "./constants"
interface MessagePart {
type: string
name?: string
toolName?: string
}
interface Message {
info?: { role?: string }
role?: string
parts?: MessagePart[]
}
export function hasUnansweredQuestion(messages: Message[]): boolean {
if (!messages || messages.length === 0) return false
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]
const role = msg.info?.role ?? msg.role
if (role === "user") return false
if (role === "assistant" && msg.parts) {
const hasQuestion = msg.parts.some(
(part) =>
(part.type === "tool_use" || part.type === "tool-invocation") &&
(part.name === "question" || part.toolName === "question"),
)
if (hasQuestion) {
log(`[${HOOK_NAME}] Detected pending question tool in last assistant message`)
return true
}
return false
}
}
return false
}

View File

@ -297,6 +297,31 @@ describe("todo-continuation-enforcer", () => {
expect(promptCalls).toHaveLength(0) expect(promptCalls).toHaveLength(0)
}) })
test("should not inject when remaining todos are blocked or deleted", async () => {
// given - session where non-completed todos are only blocked/deleted
const sessionID = "main-blocked-deleted"
setMainSession(sessionID)
const mockInput = createMockPluginInput()
mockInput.client.session.todo = async () => ({ data: [
{ id: "1", content: "Blocked task", status: "blocked", priority: "high" },
{ id: "2", content: "Deleted task", status: "deleted", priority: "medium" },
{ id: "3", content: "Done task", status: "completed", priority: "low" },
]})
const hook = createTodoContinuationEnforcer(mockInput, {})
// when - session goes idle
await hook.handler({
event: { type: "session.idle", properties: { sessionID } },
})
await fakeTimers.advanceBy(3000)
// then - no continuation injected
expect(promptCalls).toHaveLength(0)
})
test("should not inject when background tasks are running", async () => { test("should not inject when background tasks are running", async () => {
// given - session with running background tasks // given - session with running background tasks
const sessionID = "main-789" const sessionID = "main-789"
@ -1663,7 +1688,6 @@ describe("todo-continuation-enforcer", () => {
test("should cancel all countdowns via cancelAllCountdowns", async () => { test("should cancel all countdowns via cancelAllCountdowns", async () => {
// given - multiple sessions with running countdowns // given - multiple sessions with running countdowns
const session1 = "main-cancel-all-1" const session1 = "main-cancel-all-1"
const session2 = "main-cancel-all-2"
setMainSession(session1) setMainSession(session1)
const hook = createTodoContinuationEnforcer(createMockPluginInput(), {}) const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

Some files were not shown because too many files have changed in this diff Show More