refactor(delegate-task): restructure category system for unbiased model selection

- Remove temperature from all categories
- Consolidate CATEGORY_MODEL_CATALOG into DEFAULT_CATEGORIES
- Replace 'general' and 'most-capable' with 'unspecified-low' and 'unspecified-high'
- Add Selection_Gate to unspecified categories to force deliberate selection
- Update quick category to use claude-haiku-4-5
- Update all references and tests across codebase
This commit is contained in:
justsisyphus 2026-01-20 16:22:53 +09:00
parent 2c3f1bfd80
commit 8cc995891e
9 changed files with 82 additions and 135 deletions

View File

@ -92,7 +92,7 @@ ${skillRows.join("\n")}
**Usage:** **Usage:**
\`\`\`typescript \`\`\`typescript
delegate_task(category="visual-engineering", skills=["frontend-ui-ux"], prompt="...") delegate_task(category="visual-engineering", skills=["frontend-ui-ux"], prompt="...")
delegate_task(category="general", skills=["playwright"], prompt="...") // Browser testing delegate_task(category="unspecified-low", skills=["playwright"], prompt="...") // Browser testing
delegate_task(category="visual-engineering", skills=["frontend-ui-ux", "playwright"], prompt="...") // UI with browser testing delegate_task(category="visual-engineering", skills=["frontend-ui-ux", "playwright"], prompt="...") // UI with browser testing
\`\`\` \`\`\`

View File

@ -360,7 +360,7 @@ describe("CategoryConfigSchema", () => {
describe("BuiltinCategoryNameSchema", () => { describe("BuiltinCategoryNameSchema", () => {
test("accepts all builtin category names", () => { test("accepts all builtin category names", () => {
// #given // #given
const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "most-capable", "writing", "general"] const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "unspecified-low", "unspecified-high", "writing"]
// #when / #then // #when / #then
for (const cat of categories) { for (const cat of categories) {

View File

@ -174,9 +174,9 @@ export const BuiltinCategoryNameSchema = z.enum([
"ultrabrain", "ultrabrain",
"artistry", "artistry",
"quick", "quick",
"most-capable", "unspecified-low",
"unspecified-high",
"writing", "writing",
"general",
]) ])
export const CategoriesConfigSchema = z.record(z.string(), CategoryConfigSchema) export const CategoriesConfigSchema = z.record(z.string(), CategoryConfigSchema)

View File

@ -108,7 +108,7 @@ Example of CORRECT call:
delegate_task( delegate_task(
description="Task description", description="Task description",
prompt="Detailed prompt...", prompt="Detailed prompt...",
category="general", // OR subagent_type="explore" category="unspecified-low", // OR subagent_type="explore"
run_in_background=false, run_in_background=false,
skills=[] skills=[]
) )

View File

@ -325,7 +325,7 @@ describe("migrateAgentConfigToCategory", () => {
{ model: "anthropic/claude-sonnet-4-5" }, { model: "anthropic/claude-sonnet-4-5" },
] ]
const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "most-capable", "general"] const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "unspecified-high", "unspecified-low"]
// #when: Migrate each config // #when: Migrate each config
const results = configs.map(migrateAgentConfigToCategory) const results = configs.map(migrateAgentConfigToCategory)
@ -385,10 +385,9 @@ describe("shouldDeleteAgentConfig", () => {
test("returns true when all fields match category defaults", () => { test("returns true when all fields match category defaults", () => {
// #given: Config with fields matching category defaults // #given: Config with fields matching category defaults
// Note: DEFAULT_CATEGORIES only has temperature, not model
const config = { const config = {
category: "visual-engineering", category: "visual-engineering",
temperature: 0.7, model: "google/gemini-3-pro-preview",
} }
// #when: Check if config should be deleted // #when: Check if config should be deleted
@ -399,10 +398,10 @@ describe("shouldDeleteAgentConfig", () => {
}) })
test("returns false when fields differ from category defaults", () => { test("returns false when fields differ from category defaults", () => {
// #given: Config with custom temperature override // #given: Config with custom model override
const config = { const config = {
category: "visual-engineering", category: "visual-engineering",
temperature: 0.9, // Different from default (0.7) model: "anthropic/claude-opus-4-5",
} }
// #when: Check if config should be deleted // #when: Check if config should be deleted
@ -415,10 +414,10 @@ describe("shouldDeleteAgentConfig", () => {
test("handles different categories with their defaults", () => { test("handles different categories with their defaults", () => {
// #given: Configs for different categories // #given: Configs for different categories
const configs = [ const configs = [
{ category: "ultrabrain", temperature: 0.1 }, { category: "ultrabrain" },
{ category: "quick", temperature: 0.3 }, { category: "quick" },
{ category: "most-capable", temperature: 0.1 }, { category: "unspecified-high" },
{ category: "general", temperature: 0.3 }, { category: "unspecified-low" },
] ]
// #when: Check each config // #when: Check each config

View File

@ -52,7 +52,7 @@ export const HOOK_NAME_MAP: Record<string, string> = {
* from explicit model configs to category-based configs. * from explicit model configs to category-based configs.
* *
* DO NOT add new entries here. New agents should use: * DO NOT add new entries here. New agents should use:
* - Category-based config (preferred): { category: "most-capable" } * - Category-based config (preferred): { category: "unspecified-high" }
* - Or inherit from OpenCode's config.model * - Or inherit from OpenCode's config.model
* *
* This map will be removed in a future major version once migration period ends. * This map will be removed in a future major version once migration period ends.
@ -61,8 +61,8 @@ export const MODEL_TO_CATEGORY_MAP: Record<string, string> = {
"google/gemini-3-pro-preview": "visual-engineering", "google/gemini-3-pro-preview": "visual-engineering",
"openai/gpt-5.2": "ultrabrain", "openai/gpt-5.2": "ultrabrain",
"anthropic/claude-haiku-4-5": "quick", "anthropic/claude-haiku-4-5": "quick",
"anthropic/claude-opus-4-5": "most-capable", "anthropic/claude-opus-4-5": "unspecified-high",
"anthropic/claude-sonnet-4-5": "general", "anthropic/claude-sonnet-4-5": "unspecified-low",
} }
export function migrateAgentNames(agents: Record<string, unknown>): { migrated: Record<string, unknown>; changed: boolean } { export function migrateAgentNames(agents: Record<string, unknown>): { migrated: Record<string, unknown>; changed: boolean } {

View File

@ -99,20 +99,42 @@ EXPECTED OUTPUT:
If your prompt lacks this structure, REWRITE IT before delegating. If your prompt lacks this structure, REWRITE IT before delegating.
</Caller_Warning>` </Caller_Warning>`
export const MOST_CAPABLE_CATEGORY_PROMPT_APPEND = `<Category_Context> export const UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND = `<Category_Context>
You are working on COMPLEX / MOST-CAPABLE tasks. You are working on tasks that don't fit specific categories but require moderate effort.
Maximum capability mindset: <Selection_Gate>
- Bring full reasoning power to bear BEFORE selecting this category, VERIFY ALL conditions:
- Consider all edge cases and implications 1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
- Deep analysis before action 2. Task requires more than trivial effort but is NOT system-wide
- Quality over speed 3. Scope is contained within a few files/modules
Approach: If task fits ANY other category, DO NOT select unspecified-low.
- Thorough understanding first This is NOT a default choice - it's for genuinely unclassifiable moderate-effort work.
- Comprehensive solution design </Selection_Gate>
- Meticulous execution </Category_Context>
- This is for the most challenging problems
<Caller_Warning>
THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
**PROVIDE CLEAR STRUCTURE:**
1. MUST DO: Enumerate required actions explicitly
2. MUST NOT DO: State forbidden actions to prevent scope creep
3. EXPECTED OUTPUT: Define concrete success criteria
</Caller_Warning>`
export const UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND = `<Category_Context>
You are working on tasks that don't fit specific categories but require substantial effort.
<Selection_Gate>
BEFORE selecting this category, VERIFY ALL conditions:
1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
2. Task requires substantial effort across multiple systems/modules
3. Changes have broad impact or require careful coordination
4. NOT just "complex" - must be genuinely unclassifiable AND high-effort
If task fits ANY other category, DO NOT select unspecified-high.
If task is unclassifiable but moderate-effort, use unspecified-low instead.
</Selection_Gate>
</Category_Context>` </Category_Context>`
export const WRITING_CATEGORY_PROMPT_APPEND = `<Category_Context> export const WRITING_CATEGORY_PROMPT_APPEND = `<Category_Context>
@ -131,88 +153,16 @@ Approach:
- Documentation, READMEs, articles, technical writing - Documentation, READMEs, articles, technical writing
</Category_Context>` </Category_Context>`
export const GENERAL_CATEGORY_PROMPT_APPEND = `<Category_Context>
You are working on GENERAL tasks.
Balanced execution mindset:
- Practical, straightforward approach
- Good enough is good enough
- Focus on getting things done
Approach:
- Standard best practices
- Reasonable trade-offs
- Efficient completion
</Category_Context>
<Caller_Warning>
THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
While capable, this model benefits significantly from EXPLICIT instructions.
**PROVIDE CLEAR STRUCTURE:**
1. MUST DO: Enumerate required actions explicitly - don't assume inference
2. MUST NOT DO: State forbidden actions to prevent scope creep or wrong approaches
3. EXPECTED OUTPUT: Define concrete success criteria and deliverables
**COMMON PITFALLS WITHOUT EXPLICIT INSTRUCTIONS:**
- Model may take shortcuts that miss edge cases
- Implicit requirements get overlooked
- Output format may not match expectations
- Scope may expand beyond intended boundaries
**RECOMMENDED PROMPT PATTERN:**
\`\`\`
TASK: [Clear, single-purpose goal]
CONTEXT: [Relevant background the model needs]
MUST DO:
- [Explicit requirement 1]
- [Explicit requirement 2]
MUST NOT DO:
- [Boundary/constraint 1]
- [Boundary/constraint 2]
EXPECTED OUTPUT:
- [What success looks like]
- [How to verify completion]
\`\`\`
The more explicit your prompt, the better the results.
</Caller_Warning>`
export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = { export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
"visual-engineering": { "visual-engineering": { model: "google/gemini-3-pro-preview" },
temperature: 0.7,
},
ultrabrain: {
temperature: 0.1,
},
artistry: {
temperature: 0.9,
},
quick: {
temperature: 0.3,
},
"most-capable": {
temperature: 0.1,
},
writing: {
temperature: 0.5,
},
general: {
temperature: 0.3,
},
}
export const CATEGORY_MODEL_CATALOG: Record<string, { model: string; variant?: string }> = {
ultrabrain: { model: "openai/gpt-5.2-codex", variant: "xhigh" }, ultrabrain: { model: "openai/gpt-5.2-codex", variant: "xhigh" },
artistry: { model: "google/gemini-3-pro-preview", variant: "max" }, artistry: { model: "google/gemini-3-pro-preview", variant: "max" },
"most-capable": { model: "anthropic/claude-opus-4-5", variant: "max" }, quick: { model: "anthropic/claude-haiku-4-5" },
"unspecified-low": { model: "anthropic/claude-sonnet-4-5" },
"unspecified-high": { model: "anthropic/claude-opus-4-5", variant: "max" },
writing: { model: "google/gemini-3-flash-preview" }, writing: { model: "google/gemini-3-flash-preview" },
general: { model: "anthropic/claude-sonnet-4-5" },
} }
export const CATEGORY_PROMPT_APPENDS: Record<string, string> = { export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
@ -220,19 +170,19 @@ export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
ultrabrain: STRATEGIC_CATEGORY_PROMPT_APPEND, ultrabrain: STRATEGIC_CATEGORY_PROMPT_APPEND,
artistry: ARTISTRY_CATEGORY_PROMPT_APPEND, artistry: ARTISTRY_CATEGORY_PROMPT_APPEND,
quick: QUICK_CATEGORY_PROMPT_APPEND, quick: QUICK_CATEGORY_PROMPT_APPEND,
"most-capable": MOST_CAPABLE_CATEGORY_PROMPT_APPEND, "unspecified-low": UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND,
"unspecified-high": UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND,
writing: WRITING_CATEGORY_PROMPT_APPEND, writing: WRITING_CATEGORY_PROMPT_APPEND,
general: GENERAL_CATEGORY_PROMPT_APPEND,
} }
export const CATEGORY_DESCRIPTIONS: Record<string, string> = { export const CATEGORY_DESCRIPTIONS: Record<string, string> = {
"visual-engineering": "Frontend, UI/UX, design, styling, animation", "visual-engineering": "Frontend, UI/UX, design, styling, animation",
ultrabrain: "Strict architecture design, very complex business logic", ultrabrain: "Deep logical reasoning, complex architecture decisions requiring extensive analysis",
artistry: "Highly creative/artistic tasks, novel ideas", artistry: "Highly creative/artistic tasks, novel ideas",
quick: "Cheap & fast - small tasks with minimal overhead, budget-friendly", quick: "Trivial tasks - single file changes, typo fixes, simple modifications",
"most-capable": "Complex tasks requiring maximum capability", "unspecified-low": "Tasks that don't fit other categories, low effort required",
"unspecified-high": "Tasks that don't fit other categories, high effort required",
writing: "Documentation, prose, technical writing", writing: "Documentation, prose, technical writing",
general: "General purpose tasks",
} }
const BUILTIN_CATEGORIES = Object.keys(DEFAULT_CATEGORIES).join(", ") const BUILTIN_CATEGORIES = Object.keys(DEFAULT_CATEGORIES).join(", ")

View File

@ -8,24 +8,23 @@ const SYSTEM_DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
describe("sisyphus-task", () => { describe("sisyphus-task", () => {
describe("DEFAULT_CATEGORIES", () => { describe("DEFAULT_CATEGORIES", () => {
test("visual-engineering category has temperature config only (model removed)", () => { test("visual-engineering category has model config", () => {
// #given // #given
const category = DEFAULT_CATEGORIES["visual-engineering"] const category = DEFAULT_CATEGORIES["visual-engineering"]
// #when / #then // #when / #then
expect(category).toBeDefined() expect(category).toBeDefined()
expect(category.model).toBeUndefined() expect(category.model).toBe("google/gemini-3-pro-preview")
expect(category.temperature).toBe(0.7)
}) })
test("ultrabrain category has temperature config only (model removed)", () => { test("ultrabrain category has model and variant config", () => {
// #given // #given
const category = DEFAULT_CATEGORIES["ultrabrain"] const category = DEFAULT_CATEGORIES["ultrabrain"]
// #when / #then // #when / #then
expect(category).toBeDefined() expect(category).toBeDefined()
expect(category.model).toBeUndefined() expect(category.model).toBe("openai/gpt-5.2-codex")
expect(category.temperature).toBe(0.1) expect(category.variant).toBe("xhigh")
}) })
}) })
@ -61,13 +60,13 @@ describe("sisyphus-task", () => {
} }
}) })
test("most-capable category exists and has description", () => { test("unspecified-high category exists and has description", () => {
// #given / #when // #given / #when
const description = CATEGORY_DESCRIPTIONS["most-capable"] const description = CATEGORY_DESCRIPTIONS["unspecified-high"]
// #then // #then
expect(description).toBeDefined() expect(description).toBeDefined()
expect(description).toContain("Complex") expect(description).toContain("high effort")
}) })
}) })
@ -141,16 +140,16 @@ describe("sisyphus-task", () => {
expect(result).toBeNull() expect(result).toBeNull()
}) })
test("returns systemDefaultModel for builtin category (categories no longer have default models)", () => { test("returns default model from DEFAULT_CATEGORIES for builtin category", () => {
// #given // #given
const categoryName = "visual-engineering" const categoryName = "visual-engineering"
// #when // #when
const result = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL }) const result = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
// #then - model comes from systemDefaultModel since categories no longer have model defaults // #then
expect(result).not.toBeNull() expect(result).not.toBeNull()
expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL) expect(result!.config.model).toBe("google/gemini-3-pro-preview")
expect(result!.promptAppend).toContain("VISUAL/UI") expect(result!.promptAppend).toContain("VISUAL/UI")
}) })
@ -270,7 +269,7 @@ describe("sisyphus-task", () => {
expect(result!.config.model).toBe("my-provider/my-model") expect(result!.config.model).toBe("my-provider/my-model")
}) })
test("systemDefaultModel is used when no user model and no inheritedModel", () => { test("default model from category config is used when no user model and no inheritedModel", () => {
// #given // #given
const categoryName = "visual-engineering" const categoryName = "visual-engineering"
@ -279,7 +278,7 @@ describe("sisyphus-task", () => {
// #then // #then
expect(result).not.toBeNull() expect(result).not.toBeNull()
expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL) expect(result!.config.model).toBe("google/gemini-3-pro-preview")
}) })
}) })
@ -907,16 +906,16 @@ describe("sisyphus-task", () => {
expect(resolved!.config.variant).toBe("xhigh") expect(resolved!.config.variant).toBe("xhigh")
}) })
test("systemDefaultModel is used for category without catalog entry", () => { test("default model is used for category with default entry", () => {
// #given - general has no catalog entry // #given - unspecified-low has default model
const categoryName = "general" const categoryName = "unspecified-low"
// #when // #when
const resolved = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL }) const resolved = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
// #then - systemDefaultModel is used // #then - default model from DEFAULT_CATEGORIES is used
expect(resolved).not.toBeNull() expect(resolved).not.toBeNull()
expect(resolved!.config.model).toBe(SYSTEM_DEFAULT_MODEL) expect(resolved!.config.model).toBe("anthropic/claude-sonnet-4-5")
}) })
test("inheritedModel takes precedence over systemDefaultModel for builtin category", () => { test("inheritedModel takes precedence over systemDefaultModel for builtin category", () => {

View File

@ -4,7 +4,7 @@ import { join } from "node:path"
import type { BackgroundManager } from "../../features/background-agent" import type { BackgroundManager } from "../../features/background-agent"
import type { DelegateTaskArgs } from "./types" import type { DelegateTaskArgs } from "./types"
import type { CategoryConfig, CategoriesConfig, GitMasterConfig } from "../../config/schema" import type { CategoryConfig, CategoriesConfig, GitMasterConfig } from "../../config/schema"
import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_MODEL_CATALOG } from "./constants" import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS } from "./constants"
import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector" import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content" import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content"
import { discoverSkills } from "../../features/opencode-skill-loader" import { discoverSkills } from "../../features/opencode-skill-loader"
@ -118,24 +118,23 @@ export function resolveCategoryConfig(
const { userCategories, inheritedModel, systemDefaultModel } = options const { userCategories, inheritedModel, systemDefaultModel } = options
const defaultConfig = DEFAULT_CATEGORIES[categoryName] const defaultConfig = DEFAULT_CATEGORIES[categoryName]
const userConfig = userCategories?.[categoryName] const userConfig = userCategories?.[categoryName]
const catalogEntry = CATEGORY_MODEL_CATALOG[categoryName]
const defaultPromptAppend = CATEGORY_PROMPT_APPENDS[categoryName] ?? "" const defaultPromptAppend = CATEGORY_PROMPT_APPENDS[categoryName] ?? ""
if (!defaultConfig && !userConfig) { if (!defaultConfig && !userConfig) {
return null return null
} }
// Model priority: user override > inherited from parent > catalog default > system default // Model priority: user override > inherited from parent > default config > system default
const model = resolveModel({ const model = resolveModel({
userModel: userConfig?.model, userModel: userConfig?.model,
inheritedModel, inheritedModel,
systemDefault: catalogEntry?.model ?? systemDefaultModel, systemDefault: defaultConfig?.model ?? systemDefaultModel,
}) })
const config: CategoryConfig = { const config: CategoryConfig = {
...defaultConfig, ...defaultConfig,
...userConfig, ...userConfig,
model, model,
variant: userConfig?.variant ?? catalogEntry?.variant, variant: userConfig?.variant ?? defaultConfig?.variant,
} }
let promptAppend = defaultPromptAppend let promptAppend = defaultPromptAppend