refactor(delegate-task): restructure category system for unbiased model selection

- Remove temperature from all categories - Consolidate CATEGORY_MODEL_CATALOG into DEFAULT_CATEGORIES - Replace 'general' and 'most-capable' with 'unspecified-low' and 'unspecified-high' - Add Selection_Gate to unspecified categories to force deliberate selection - Update quick category to use claude-haiku-4-5 - Update all references and tests across codebase
2026-01-20 16:22:53 +09:00 · 2026-01-20 16:22:53 +09:00 · 8cc995891e
commit 8cc995891e
parent 2c3f1bfd80
9 changed files with 82 additions and 135 deletions
--- a/src/agents/atlas.ts
+++ b/src/agents/atlas.ts
@ -92,7 +92,7 @@ ${skillRows.join("\n")}
 **Usage:**
 \`\`\`typescript
 delegate_task(category="visual-engineering", skills=["frontend-ui-ux"], prompt="...")
-delegate_task(category="general", skills=["playwright"], prompt="...")  // Browser testing
+delegate_task(category="unspecified-low", skills=["playwright"], prompt="...")  // Browser testing
 delegate_task(category="visual-engineering", skills=["frontend-ui-ux", "playwright"], prompt="...")  // UI with browser testing
 \`\`\`
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@ -360,7 +360,7 @@ describe("CategoryConfigSchema", () => {
 describe("BuiltinCategoryNameSchema", () => {
  test("accepts all builtin category names", () => {
    // #given
-    const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "most-capable", "writing", "general"]
+    const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "unspecified-low", "unspecified-high", "writing"]
    // #when / #then
    for (const cat of categories) {
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@ -174,9 +174,9 @@ export const BuiltinCategoryNameSchema = z.enum([
  "ultrabrain",
  "artistry",
  "quick",
-  "most-capable",
+  "unspecified-low",
  "unspecified-high",
  "writing",
  "general",
 ])
 export const CategoriesConfigSchema = z.record(z.string(), CategoryConfigSchema)
--- a/src/hooks/delegate-task-retry/index.ts
+++ b/src/hooks/delegate-task-retry/index.ts
@ -108,7 +108,7 @@ Example of CORRECT call:
 delegate_task(
  description="Task description",
  prompt="Detailed prompt...",
-  category="general",  // OR subagent_type="explore"
+  category="unspecified-low",  // OR subagent_type="explore"
  run_in_background=false,
  skills=[]
 )
--- a/src/shared/migration.test.ts
+++ b/src/shared/migration.test.ts
@ -325,7 +325,7 @@ describe("migrateAgentConfigToCategory", () => {
      { model: "anthropic/claude-sonnet-4-5" },
    ]
-    const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "most-capable", "general"]
+    const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "unspecified-high", "unspecified-low"]
    // #when: Migrate each config
    const results = configs.map(migrateAgentConfigToCategory)
@ -385,10 +385,9 @@ describe("shouldDeleteAgentConfig", () => {
  test("returns true when all fields match category defaults", () => {
    // #given: Config with fields matching category defaults
    // Note: DEFAULT_CATEGORIES only has temperature, not model
    const config = {
      category: "visual-engineering",
-      temperature: 0.7,
+      model: "google/gemini-3-pro-preview",
    }
    // #when: Check if config should be deleted
@ -399,10 +398,10 @@ describe("shouldDeleteAgentConfig", () => {
  })
  test("returns false when fields differ from category defaults", () => {
-    // #given: Config with custom temperature override
+    // #given: Config with custom model override
    const config = {
      category: "visual-engineering",
-      temperature: 0.9, // Different from default (0.7)
+      model: "anthropic/claude-opus-4-5",
    }
    // #when: Check if config should be deleted
@ -415,10 +414,10 @@ describe("shouldDeleteAgentConfig", () => {
  test("handles different categories with their defaults", () => {
    // #given: Configs for different categories
    const configs = [
-      { category: "ultrabrain", temperature: 0.1 },
+      { category: "ultrabrain" },
-      { category: "quick", temperature: 0.3 },
+      { category: "quick" },
-      { category: "most-capable", temperature: 0.1 },
+      { category: "unspecified-high" },
-      { category: "general", temperature: 0.3 },
+      { category: "unspecified-low" },
    ]
    // #when: Check each config
--- a/src/shared/migration.ts
+++ b/src/shared/migration.ts
@ -52,7 +52,7 @@ export const HOOK_NAME_MAP: Record<string, string> = {
 * from explicit model configs to category-based configs.
 * 
 * DO NOT add new entries here. New agents should use:
- * - Category-based config (preferred): { category: "most-capable" }
+ * - Category-based config (preferred): { category: "unspecified-high" }
 * - Or inherit from OpenCode's config.model
 * 
 * This map will be removed in a future major version once migration period ends.
@ -61,8 +61,8 @@ export const MODEL_TO_CATEGORY_MAP: Record<string, string> = {
  "google/gemini-3-pro-preview": "visual-engineering",
  "openai/gpt-5.2": "ultrabrain",
  "anthropic/claude-haiku-4-5": "quick",
-  "anthropic/claude-opus-4-5": "most-capable",
+  "anthropic/claude-opus-4-5": "unspecified-high",
-  "anthropic/claude-sonnet-4-5": "general",
+  "anthropic/claude-sonnet-4-5": "unspecified-low",
 }
 export function migrateAgentNames(agents: Record<string, unknown>): { migrated: Record<string, unknown>; changed: boolean } {
--- a/src/tools/delegate-task/constants.ts
+++ b/src/tools/delegate-task/constants.ts
@ -99,20 +99,42 @@ EXPECTED OUTPUT:
 If your prompt lacks this structure, REWRITE IT before delegating.
 </Caller_Warning>`
-export const MOST_CAPABLE_CATEGORY_PROMPT_APPEND = `<Category_Context>
+export const UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND = `<Category_Context>
-You are working on COMPLEX / MOST-CAPABLE tasks.
+You are working on tasks that don't fit specific categories but require moderate effort.
-Maximum capability mindset:
+<Selection_Gate>
- Bring full reasoning power to bear
+BEFORE selecting this category, VERIFY ALL conditions:
- Consider all edge cases and implications
+1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
- Deep analysis before action
+2. Task requires more than trivial effort but is NOT system-wide
- Quality over speed
+3. Scope is contained within a few files/modules
-Approach:
+If task fits ANY other category, DO NOT select unspecified-low.
- Thorough understanding first
+This is NOT a default choice - it's for genuinely unclassifiable moderate-effort work.
- Comprehensive solution design
+</Selection_Gate>
- Meticulous execution
+</Category_Context>
- This is for the most challenging problems
+
 <Caller_Warning>
 THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
 **PROVIDE CLEAR STRUCTURE:**
 1. MUST DO: Enumerate required actions explicitly
 2. MUST NOT DO: State forbidden actions to prevent scope creep
 3. EXPECTED OUTPUT: Define concrete success criteria
 </Caller_Warning>`
 export const UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND = `<Category_Context>
 You are working on tasks that don't fit specific categories but require substantial effort.
 <Selection_Gate>
 BEFORE selecting this category, VERIFY ALL conditions:
 1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
 2. Task requires substantial effort across multiple systems/modules
 3. Changes have broad impact or require careful coordination
 4. NOT just "complex" - must be genuinely unclassifiable AND high-effort
 If task fits ANY other category, DO NOT select unspecified-high.
 If task is unclassifiable but moderate-effort, use unspecified-low instead.
 </Selection_Gate>
 </Category_Context>`
 export const WRITING_CATEGORY_PROMPT_APPEND = `<Category_Context>
@ -131,88 +153,16 @@ Approach:
 - Documentation, READMEs, articles, technical writing
 </Category_Context>`
 export const GENERAL_CATEGORY_PROMPT_APPEND = `<Category_Context>
 You are working on GENERAL tasks.
 Balanced execution mindset:
 - Practical, straightforward approach
 - Good enough is good enough
 - Focus on getting things done
 Approach:
 - Standard best practices
 - Reasonable trade-offs
 - Efficient completion
 </Category_Context>
 <Caller_Warning>
 THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
 While capable, this model benefits significantly from EXPLICIT instructions.
 **PROVIDE CLEAR STRUCTURE:**
 1. MUST DO: Enumerate required actions explicitly - don't assume inference
 2. MUST NOT DO: State forbidden actions to prevent scope creep or wrong approaches
 3. EXPECTED OUTPUT: Define concrete success criteria and deliverables
 **COMMON PITFALLS WITHOUT EXPLICIT INSTRUCTIONS:**
 - Model may take shortcuts that miss edge cases
 - Implicit requirements get overlooked
 - Output format may not match expectations
 - Scope may expand beyond intended boundaries
 **RECOMMENDED PROMPT PATTERN:**
 \`\`\`
 TASK: [Clear, single-purpose goal]
 CONTEXT: [Relevant background the model needs]
 MUST DO:
 - [Explicit requirement 1]
 - [Explicit requirement 2]
 MUST NOT DO:
 - [Boundary/constraint 1]
 - [Boundary/constraint 2]
 EXPECTED OUTPUT:
 - [What success looks like]
 - [How to verify completion]
 \`\`\`
 The more explicit your prompt, the better the results.
 </Caller_Warning>`
 export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
-  "visual-engineering": {
+  "visual-engineering": { model: "google/gemini-3-pro-preview" },
    temperature: 0.7,
  },
  ultrabrain: {
    temperature: 0.1,
  },
  artistry: {
    temperature: 0.9,
  },
  quick: {
    temperature: 0.3,
  },
  "most-capable": {
    temperature: 0.1,
  },
  writing: {
    temperature: 0.5,
  },
  general: {
    temperature: 0.3,
  },
 }
 export const CATEGORY_MODEL_CATALOG: Record<string, { model: string; variant?: string }> = {
  ultrabrain: { model: "openai/gpt-5.2-codex", variant: "xhigh" },
  artistry: { model: "google/gemini-3-pro-preview", variant: "max" },
-  "most-capable": { model: "anthropic/claude-opus-4-5", variant: "max" },
+  quick: { model: "anthropic/claude-haiku-4-5" },
  "unspecified-low": { model: "anthropic/claude-sonnet-4-5" },
  "unspecified-high": { model: "anthropic/claude-opus-4-5", variant: "max" },
  writing: { model: "google/gemini-3-flash-preview" },
  general: { model: "anthropic/claude-sonnet-4-5" },
 }
 export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
@ -220,19 +170,19 @@ export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
  ultrabrain: STRATEGIC_CATEGORY_PROMPT_APPEND,
  artistry: ARTISTRY_CATEGORY_PROMPT_APPEND,
  quick: QUICK_CATEGORY_PROMPT_APPEND,
-  "most-capable": MOST_CAPABLE_CATEGORY_PROMPT_APPEND,
+  "unspecified-low": UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND,
  "unspecified-high": UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND,
  writing: WRITING_CATEGORY_PROMPT_APPEND,
  general: GENERAL_CATEGORY_PROMPT_APPEND,
 }
 export const CATEGORY_DESCRIPTIONS: Record<string, string> = {
  "visual-engineering": "Frontend, UI/UX, design, styling, animation",
-  ultrabrain: "Strict architecture design, very complex business logic",
+  ultrabrain: "Deep logical reasoning, complex architecture decisions requiring extensive analysis",
  artistry: "Highly creative/artistic tasks, novel ideas",
-  quick: "Cheap & fast - small tasks with minimal overhead, budget-friendly",
+  quick: "Trivial tasks - single file changes, typo fixes, simple modifications",
-  "most-capable": "Complex tasks requiring maximum capability",
+  "unspecified-low": "Tasks that don't fit other categories, low effort required",
  "unspecified-high": "Tasks that don't fit other categories, high effort required",
  writing: "Documentation, prose, technical writing",
  general: "General purpose tasks",
 }
 const BUILTIN_CATEGORIES = Object.keys(DEFAULT_CATEGORIES).join(", ")
--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@ -8,24 +8,23 @@ const SYSTEM_DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
 describe("sisyphus-task", () => {
  describe("DEFAULT_CATEGORIES", () => {
-    test("visual-engineering category has temperature config only (model removed)", () => {
+    test("visual-engineering category has model config", () => {
      // #given
      const category = DEFAULT_CATEGORIES["visual-engineering"]
      // #when / #then
      expect(category).toBeDefined()
-      expect(category.model).toBeUndefined()
+      expect(category.model).toBe("google/gemini-3-pro-preview")
      expect(category.temperature).toBe(0.7)
    })
-    test("ultrabrain category has temperature config only (model removed)", () => {
+    test("ultrabrain category has model and variant config", () => {
      // #given
      const category = DEFAULT_CATEGORIES["ultrabrain"]
      // #when / #then
      expect(category).toBeDefined()
-      expect(category.model).toBeUndefined()
+      expect(category.model).toBe("openai/gpt-5.2-codex")
-      expect(category.temperature).toBe(0.1)
+      expect(category.variant).toBe("xhigh")
    })
  })
@ -61,13 +60,13 @@ describe("sisyphus-task", () => {
      }
    })
-    test("most-capable category exists and has description", () => {
+    test("unspecified-high category exists and has description", () => {
      // #given / #when
-      const description = CATEGORY_DESCRIPTIONS["most-capable"]
+      const description = CATEGORY_DESCRIPTIONS["unspecified-high"]
      // #then
      expect(description).toBeDefined()
-      expect(description).toContain("Complex")
+      expect(description).toContain("high effort")
    })
  })
@ -141,16 +140,16 @@ describe("sisyphus-task", () => {
      expect(result).toBeNull()
    })
-    test("returns systemDefaultModel for builtin category (categories no longer have default models)", () => {
+    test("returns default model from DEFAULT_CATEGORIES for builtin category", () => {
      // #given
      const categoryName = "visual-engineering"
      // #when
      const result = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
-      // #then - model comes from systemDefaultModel since categories no longer have model defaults
+      // #then
      expect(result).not.toBeNull()
-      expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
+      expect(result!.config.model).toBe("google/gemini-3-pro-preview")
      expect(result!.promptAppend).toContain("VISUAL/UI")
    })
@ -270,7 +269,7 @@ describe("sisyphus-task", () => {
      expect(result!.config.model).toBe("my-provider/my-model")
    })
-    test("systemDefaultModel is used when no user model and no inheritedModel", () => {
+    test("default model from category config is used when no user model and no inheritedModel", () => {
      // #given
      const categoryName = "visual-engineering"
@ -279,7 +278,7 @@ describe("sisyphus-task", () => {
      // #then
      expect(result).not.toBeNull()
-      expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
+      expect(result!.config.model).toBe("google/gemini-3-pro-preview")
    })
  })
@ -907,16 +906,16 @@ describe("sisyphus-task", () => {
      expect(resolved!.config.variant).toBe("xhigh")
    })
-    test("systemDefaultModel is used for category without catalog entry", () => {
+    test("default model is used for category with default entry", () => {
-      // #given - general has no catalog entry
+      // #given - unspecified-low has default model
-      const categoryName = "general"
+      const categoryName = "unspecified-low"
      // #when
      const resolved = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
-      // #then - systemDefaultModel is used
+      // #then - default model from DEFAULT_CATEGORIES is used
      expect(resolved).not.toBeNull()
-      expect(resolved!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
+      expect(resolved!.config.model).toBe("anthropic/claude-sonnet-4-5")
    })
    test("inheritedModel takes precedence over systemDefaultModel for builtin category", () => {
--- a/src/tools/delegate-task/tools.ts
+++ b/src/tools/delegate-task/tools.ts
@ -4,7 +4,7 @@ import { join } from "node:path"
 import type { BackgroundManager } from "../../features/background-agent"
 import type { DelegateTaskArgs } from "./types"
 import type { CategoryConfig, CategoriesConfig, GitMasterConfig } from "../../config/schema"
-import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_MODEL_CATALOG } from "./constants"
+import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS } from "./constants"
 import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
 import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content"
 import { discoverSkills } from "../../features/opencode-skill-loader"
@ -118,24 +118,23 @@ export function resolveCategoryConfig(
  const { userCategories, inheritedModel, systemDefaultModel } = options
  const defaultConfig = DEFAULT_CATEGORIES[categoryName]
  const userConfig = userCategories?.[categoryName]
  const catalogEntry = CATEGORY_MODEL_CATALOG[categoryName]
  const defaultPromptAppend = CATEGORY_PROMPT_APPENDS[categoryName] ?? ""
  if (!defaultConfig && !userConfig) {
    return null
  }
-  // Model priority: user override > inherited from parent > catalog default > system default
+  // Model priority: user override > inherited from parent > default config > system default
  const model = resolveModel({
    userModel: userConfig?.model,
    inheritedModel,
-    systemDefault: catalogEntry?.model ?? systemDefaultModel,
+    systemDefault: defaultConfig?.model ?? systemDefaultModel,
  })
  const config: CategoryConfig = {
    ...defaultConfig,
    ...userConfig,
    model,
-    variant: userConfig?.variant ?? catalogEntry?.variant,
+    variant: userConfig?.variant ?? defaultConfig?.variant,
  }
  let promptAppend = defaultPromptAppend