fix: tune machine learning workflow routing

2026-05-14 02:10:07 +08:00 · 2026-05-11 17:58:50 -04:00 · 2026-05-11 17:58:50 -04:00 · ab6e998383
commit ab6e998383
parent 240d52d27f
6 changed files with 120 additions and 6 deletions
--- a/.agents/skills/mle-workflow/SKILL.md
+++ b/.agents/skills/mle-workflow/SKILL.md
@ -37,6 +37,8 @@ Use only the lanes that fit the system in front of you. This skill is useful for
 Do not treat MLE as separate from software engineering. Most ECC SWE workflows apply directly to ML systems, often with stricter failure modes:
 The recommended `minimal --with capability:machine-learning` install keeps the core agent surface available alongside this skill. For skill-only or agent-limited harnesses, pair `skill:mle-workflow` with `agent:mle-reviewer` where the target supports agents.
 | SWE surface | MLE use |
 |-------------|---------|
 | `product-capability` / `architecture-decision-records` | Turn model work into explicit product contracts and record irreversible data, model, and rollout choices |
--- a/manifests/install-modules.json
+++ b/manifests/install-modules.json
@ -587,7 +587,9 @@
        "antigravity",
        "codex",
        "opencode",
-        "codebuddy"
+        "codebuddy",
        "joycode",
        "qwen"
      ],
      "dependencies": [
        "framework-language",
--- a/scripts/consult.js
+++ b/scripts/consult.js
@ -11,6 +11,21 @@ const DEFAULT_TARGET = 'claude';
 const DEFAULT_LIMIT = 5;
 const MAX_LIMIT = 20;
 const SCHEMA_VERSION = 'ecc.consult.v1';
 const FUZZY_EXCLUDED_TOKENS = new Set(['review']);
 const MACHINE_LEARNING_CONTEXT_TOKENS = new Set([
  'data-science',
  'evals',
  'evaluation',
  'inference',
  'ml',
  'mle',
  'mlops',
  'model',
  'models',
  'pytorch',
  'serving',
  'training',
 ]);
 const STOP_WORDS = new Set([
  'a',
@ -74,6 +89,7 @@ const COMPONENT_ALIASES = Object.freeze({
    'mlops',
    'model',
    'models',
    'pytorch',
    'training',
    'inference',
    'serving',
@ -252,6 +268,7 @@ function scoreAgainstQuery(queryTokens, corpusTokens, options = {}) {
    if (
      token.length >= 4
      && !FUZZY_EXCLUDED_TOKENS.has(token)
      && [...corpus].some(corpusToken => (
        corpusToken.length >= 4
        && (corpusToken.includes(token) || token.includes(corpusToken))
@ -272,6 +289,7 @@ function scoreAgainstQuery(queryTokens, corpusTokens, options = {}) {
 function preferredComponentBonus(component, queryTokens) {
  let bonus = 0;
  const suffix = component.id.split(':')[1];
  const hasMachineLearningContext = queryTokens.some(token => MACHINE_LEARNING_CONTEXT_TOKENS.has(token));
  if (queryTokens[0] === suffix) {
    bonus += 5;
@ -281,7 +299,17 @@ function preferredComponentBonus(component, queryTokens) {
    bonus += 3;
  }
-  if (component.id === 'capability:security' && queryTokens.some(token => ['audit', 'review', 'security'].includes(token))) {
+  if (component.id === 'agent:mle-reviewer' && hasMachineLearningContext) {
    bonus += 2;
  }
  if (
    component.id === 'capability:security'
    && (
      queryTokens.some(token => ['audit', 'security', 'threat', 'vulnerability'].includes(token))
      || (!hasMachineLearningContext && queryTokens.includes('review'))
    )
  ) {
    bonus += 4;
  }
--- a/skills/mle-workflow/SKILL.md
+++ b/skills/mle-workflow/SKILL.md
@ -37,6 +37,8 @@ Use only the lanes that fit the system in front of you. This skill is useful for
 Do not treat MLE as separate from software engineering. Most ECC SWE workflows apply directly to ML systems, often with stricter failure modes:
 The recommended `minimal --with capability:machine-learning` install keeps the core agent surface available alongside this skill. For skill-only or agent-limited harnesses, pair `skill:mle-workflow` with `agent:mle-reviewer` where the target supports agents.
 | SWE surface | MLE use |
 |-------------|---------|
 | `product-capability` / `architecture-decision-records` | Turn model work into explicit product contracts and record irreversible data, model, and rollout choices |
--- a/tests/lib/install-manifests.test.js
+++ b/tests/lib/install-manifests.test.js
@ -301,6 +301,43 @@ function runTests() {
    )), 'Should install the MLE workflow skill');
  })) passed++; else failed++;
  if (test('resolves machine-learning component on JoyCode and Qwen targets', () => {
    for (const target of ['joycode', 'qwen']) {
      const plan = resolveInstallPlan({
        includeComponentIds: ['capability:machine-learning'],
        target,
        projectRoot: '/workspace/ml-app',
        homeDir: '/Users/example',
      });
      assert.ok(plan.selectedModuleIds.includes('machine-learning'),
        `Should include machine-learning module for ${target}`);
      assert.ok(!plan.skippedModuleIds.includes('machine-learning'),
        `Should not skip machine-learning module for ${target}`);
      assert.ok(plan.operations.some(operation => (
        operation.sourceRelativePath === 'skills/mle-workflow'
      )), `Should install the MLE workflow skill for ${target}`);
    }
  })) passed++; else failed++;
  if (test('minimal machine-learning install includes MLE reviewer agent surface', () => {
    const plan = resolveInstallPlan({
      profileId: 'minimal',
      includeComponentIds: ['capability:machine-learning'],
      target: 'claude',
      projectRoot: '/workspace/ml-app',
    });
    assert.ok(plan.selectedModuleIds.includes('agents-core'),
      'Minimal install should keep the agent surface available');
    assert.ok(plan.operations.some(operation => (
      operation.sourceRelativePath === 'agents'
    )), 'Should install the agent directory that contains mle-reviewer.md');
    assert.ok(plan.operations.some(operation => (
      operation.sourceRelativePath === 'skills/mle-workflow'
    )), 'Should install the MLE workflow skill');
  })) passed++; else failed++;
  if (test('resolves explicit modules with dependency expansion', () => {
    const plan = resolveInstallPlan({ moduleIds: ['security'] });
    assert.ok(plan.selectedModuleIds.includes('security'), 'Should include requested module');
--- a/tests/scripts/consult.test.js
+++ b/tests/scripts/consult.test.js
@ -22,6 +22,14 @@ function parseJson(stdout) {
  return JSON.parse(stdout.trim());
 }
 function findMatch(payload, componentId) {
  return payload.matches.find(match => match.componentId === componentId);
 }
 function findMatchIndex(payload, componentId) {
  return payload.matches.findIndex(match => match.componentId === componentId);
 }
 function test(name, fn) {
  try {
    fn();
@ -88,9 +96,13 @@ function runTests() {
    assert.strictEqual(result.status, 0, result.stderr);
    const payload = parseJson(result.stdout);
-    assert.strictEqual(payload.matches[0].componentId, 'capability:machine-learning');
+    const capabilityIndex = findMatchIndex(payload, 'capability:machine-learning');
-    assert.ok(payload.matches[0].installCommand.includes('--with capability:machine-learning'));
+    const reviewerIndex = findMatchIndex(payload, 'agent:mle-reviewer');
-    assert.ok(payload.matches.some(match => match.componentId === 'agent:mle-reviewer'));
+    assert.ok(capabilityIndex >= 0, 'Should include capability:machine-learning');
    assert.ok(reviewerIndex >= 0, 'Should include agent:mle-reviewer');
    assert.ok(capabilityIndex < reviewerIndex,
      'The workflow capability should rank ahead of the reviewer agent for broad MLE setup queries');
    assert.ok(findMatch(payload, 'capability:machine-learning').installCommand.includes('--with capability:machine-learning'));
    assert.ok(!payload.profiles.some(profile => profile.id === 'mle'));
  })) passed++; else failed++;
@ -99,10 +111,41 @@ function runTests() {
    assert.strictEqual(result.status, 0, result.stderr);
    const payload = parseJson(result.stdout);
-    const reviewer = payload.matches.find(match => match.componentId === 'agent:mle-reviewer');
+    const capabilityIndex = findMatchIndex(payload, 'capability:machine-learning');
    const securityIndex = findMatchIndex(payload, 'capability:security');
    const reviewerIndex = findMatchIndex(payload, 'agent:mle-reviewer');
    const codeReviewerIndex = findMatchIndex(payload, 'agent:code-reviewer');
    const reviewer = findMatch(payload, 'agent:mle-reviewer');
    assert.ok(reviewer, 'Should include agent:mle-reviewer');
    assert.ok(reviewer.reasons.includes('matched "model"'));
    assert.ok(!reviewer.reasons.includes('matched "review"'));
    assert.ok(!reviewer.reasons.includes('fuzzy matched "review"'));
    assert.ok(capabilityIndex >= 0, 'Should include capability:machine-learning');
    assert.ok(securityIndex < 0 || capabilityIndex < securityIndex,
      'Model review queries should prefer the MLE capability over generic security review');
    assert.ok(codeReviewerIndex < 0 || reviewerIndex < codeReviewerIndex,
      'Model review queries should prefer the MLE reviewer over generic code review');
  })) passed++; else failed++;
  if (test('surfaces MLE reviewer for PyTorch model review queries', () => {
    const result = run(['pytorch', 'model', 'review', '--json']);
    assert.strictEqual(result.status, 0, result.stderr);
    const payload = parseJson(result.stdout);
    const reviewer = findMatch(payload, 'agent:mle-reviewer');
    assert.ok(findMatch(payload, 'capability:machine-learning'), 'Should include capability:machine-learning');
    assert.ok(reviewer, 'Should include agent:mle-reviewer');
    assert.ok(reviewer.reasons.includes('matched "pytorch"'));
  })) passed++; else failed++;
  if (test('does not route generic review queries to MLE components', () => {
    const result = run(['review', '--json']);
    assert.strictEqual(result.status, 0, result.stderr);
    const payload = parseJson(result.stdout);
    assert.ok(!findMatch(payload, 'capability:machine-learning'));
    assert.ok(!findMatch(payload, 'agent:mle-reviewer'));
    assert.ok(!payload.profiles.some(profile => profile.id === 'mle'));
  })) passed++; else failed++;
  if (test('works from outside the ECC repository', () => {