everything-claude-code/tests/hooks/observer-loop-archive.test.js

/**
 * Tests for observer-loop archive-on-failure fix (#2370)
 *
 * Bug: analyze_observations() in observer-loop.sh moved the live
 * observations.jsonl into observations.archive/ unconditionally, even when
 * the Claude analysis step failed (timeout, non-zero exit, rate limit).
 * Because the analyzer only ever reads the live file, a failed batch could
 * never be re-analyzed and its instincts were silently lost.
 *
 * Fix: archive only after a successful analysis; on failure log and return,
 * retaining observations for the next cycle to retry.
 *
 * Strategy: source observer-loop.sh (a BASH_SOURCE guard stops the main
 * loop from running when sourced) and drive analyze_observations directly
 * with a stub `claude` (exit code controlled per case) and a stub sibling
 * session-guardian.sh. Assert symmetric outcomes for failure vs success.
 *
 * Run with: node tests/hooks/observer-loop-archive.test.js
 */

const assert = require('assert');
const path = require('path');
const fs = require('fs');
const os = require('os');
const { spawnSync } = require('child_process');

let passed = 0;
let failed = 0;

function test(name, fn) {
  try {
    fn();
    console.log(`  ✓ ${name}`);
    passed++;
  } catch (err) {
    console.log(`  ✗ ${name}`);
    console.log(`    Error: ${err.message}`);
    failed++;
  }
}

function createTempDir() {
  return fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-observer-archive-'));
}

function cleanupDir(dir) {
  try {
    fs.rmSync(dir, { recursive: true, force: true });
  } catch {
    // ignore cleanup errors
  }
}

const repoRoot = path.resolve(__dirname, '..', '..');
const observerLoopPath = path.join(
  repoRoot, 'skills', 'continuous-learning-v2', 'agents', 'observer-loop.sh'
);

/**
 * Run analyze_observations once with the given stub claude exit code.
 * Returns { liveExists, archivedCount, log } describing the resulting state.
 */
function runAnalyzeOnce(claudeExitCode) {
  const sandbox = createTempDir();
  try {
    const binDir = path.join(sandbox, 'bin');
    const projectDir = path.join(sandbox, 'project');
    fs.mkdirSync(binDir, { recursive: true });
    fs.mkdirSync(projectDir, { recursive: true });

    // Stub claude: exit with the requested code, ignoring all args.
    const claudeStub = path.join(binDir, 'claude');
    fs.writeFileSync(claudeStub, '#!/usr/bin/env bash\nexit ${CLAUDE_STUB_EXIT:-0}\n');
    fs.chmodSync(claudeStub, 0o755);

    // analyze_observations resolves the real session-guardian.sh via its own
    // ${BASH_SOURCE[0]}-derived SCRIPT_DIR, so we drive the real guardian with
    // all of its gates disabled/isolated (see env below) rather than stubbing it.

    // Driver sources observer-loop.sh (guard stops the main loop) then runs
    // the single function under test.
    const driver = path.join(sandbox, 'driver.sh');
    fs.writeFileSync(
      driver,
      `#!/usr/bin/env bash\nsource ${JSON.stringify(observerLoopPath)}\nanalyze_observations\n`
    );
    fs.chmodSync(driver, 0o755);

    const observationsFile = path.join(projectDir, 'observations.jsonl');
    fs.writeFileSync(observationsFile, '{"a":1}\n{"a":2}\n{"a":3}\n');

    // Defensive: never leak CLAUDE_PLUGIN_ROOT into the ECC test shell (it
    // contaminates this project's hook-root resolution).
    const childEnv = Object.assign({}, process.env);
    delete childEnv.CLAUDE_PLUGIN_ROOT;
    childEnv.PATH = binDir + path.delimiter + process.env.PATH;
    childEnv.CLAUDE_STUB_EXIT = String(claudeExitCode);
    childEnv.OBSERVATIONS_FILE = observationsFile;
    childEnv.MIN_OBSERVATIONS = '1';
    childEnv.PROJECT_DIR = projectDir;
    childEnv.LOG_FILE = path.join(projectDir, 'observer.log');
    childEnv.PROJECT_NAME = 'test-project';
    childEnv.PROJECT_ID = 'test-project';
    childEnv.INSTINCTS_DIR = path.join(projectDir, 'instincts');
    childEnv.CONFIG_DIR = projectDir;
    childEnv.CLV2_IS_WINDOWS = 'false';
    childEnv.ECC_OBSERVER_TIMEOUT_SECONDS = '2';
    // Make the real session-guardian.sh deterministically proceed (exit 0):
    // disable the active-hours and idle gates, isolate the cooldown log, and
    // zero the cooldown interval so a fresh project always passes.
    childEnv.OBSERVER_ACTIVE_HOURS_START = '0';
    childEnv.OBSERVER_ACTIVE_HOURS_END = '0';
    childEnv.OBSERVER_MAX_IDLE_SECONDS = '0';
    childEnv.OBSERVER_INTERVAL_SECONDS = '0';
    childEnv.OBSERVER_LAST_RUN_LOG = path.join(projectDir, 'observer-last-run.log');

    const result = spawnSync('bash', [driver], {
      encoding: 'utf8',
      timeout: 15000,
      env: childEnv
    });

    assert.strictEqual(
      result.status, 0,
      `driver should exit 0, got ${result.status}; stderr: ${result.stderr}`
    );

    const archiveDir = path.join(projectDir, 'observations.archive');
    let archivedCount = 0;
    if (fs.existsSync(archiveDir)) {
      archivedCount = fs.readdirSync(archiveDir)
        .filter(f => /^processed-.*\.jsonl$/.test(f)).length;
    }
    let log = '';
    try { log = fs.readFileSync(childEnv.LOG_FILE, 'utf8'); } catch { /* none */ }

    return { liveExists: fs.existsSync(observationsFile), archivedCount, log };
  } finally {
    cleanupDir(sandbox);
  }
}

console.log('\n=== Observer-loop Archive-on-Failure Tests (#2370) ===\n');

console.log('--- behavioral ---');

test('failed analysis retains observations and archives nothing', () => {
  // Shell-driven behavioral check; skip on Windows where the bash driver's
  // $0 path handling differs (matches observer-memory.test.js convention).
  if (process.platform === 'win32') {
    return;
  }
  const { liveExists, archivedCount, log } = runAnalyzeOnce(1);
  assert.ok(liveExists, 'live observations.jsonl must be retained when analysis fails');
  assert.strictEqual(archivedCount, 0, 'nothing should be archived when analysis fails');
  assert.ok(
    /retaining observations for retry/.test(log),
    `failure log should note retention; got: ${log}`
  );
});

test('successful analysis archives the batch (happy path preserved)', () => {
  // Shell-driven behavioral check; skip on Windows (see note above).
  if (process.platform === 'win32') {
    return;
  }
  const { liveExists, archivedCount } = runAnalyzeOnce(0);
  assert.ok(!liveExists, 'live observations.jsonl should be moved after a successful analysis');
  assert.strictEqual(archivedCount, 1, 'exactly one processed-*.jsonl should be archived on success');
});

console.log('--- static guards ---');

test('analyze_observations returns on failure before the archive mv', () => {
  const content = fs.readFileSync(observerLoopPath, 'utf8');
  // Operate on full file content with explicit anchors rather than a lazy
  // function-body extraction (which could truncate on a future inner "\n}"
  // and pass vacuously). These tokens each occur once, inside the function.
  const failIdx = content.search(/exit_code"?\s+-ne\s+0/);
  const returnIdx = content.indexOf('return', failIdx);
  const archiveIdx = content.indexOf('observations.archive');
  assert.ok(failIdx !== -1, 'should find the non-zero exit_code check');
  assert.ok(archiveIdx !== -1, 'should find the archive block');
  assert.ok(returnIdx !== -1, 'failure branch should contain a return');
  assert.ok(returnIdx < archiveIdx,
    'failure branch must return before reaching the archive block');
});

test('observer-loop.sh has a source-guard so it can be sourced in tests', () => {
  const content = fs.readFileSync(observerLoopPath, 'utf8');
  assert.ok(
    content.includes('BASH_SOURCE[0]') && content.includes('return 0 2>/dev/null'),
    'observer-loop.sh should short-circuit when sourced rather than executed'
  );
});

console.log('\n=== Test Results ===');
console.log(`Passed: ${passed}`);
console.log(`Failed: ${failed}`);
console.log(`Total:  ${passed + failed}\n`);

process.exit(failed > 0 ? 1 : 0);