diff --git a/skills/continuous-learning-v2/agents/observer-loop.sh b/skills/continuous-learning-v2/agents/observer-loop.sh index 9bab37c5..1ac1c56c 100755 --- a/skills/continuous-learning-v2/agents/observer-loop.sh +++ b/skills/continuous-learning-v2/agents/observer-loop.sh @@ -19,6 +19,12 @@ IDLE_TIMEOUT_SECONDS="${ECC_OBSERVER_IDLE_TIMEOUT_SECONDS:-1800}" SESSION_LEASE_DIR="${PROJECT_DIR}/.observer-sessions" ACTIVITY_FILE="${PROJECT_DIR}/.observer-last-activity" +# Resolve this script's own directory so sibling scripts (session-guardian.sh) +# and relative helpers (../scripts/instinct-cli.py) resolve correctly whether +# this file is executed or sourced. $0 is the *caller* when sourced, so prefer +# ${BASH_SOURCE[0]}, which always points at this file (#2370). +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + cleanup() { [ -n "$SLEEP_PID" ] && kill "$SLEEP_PID" 2>/dev/null if [ -f "$PID_FILE" ] && [ "$(cat "$PID_FILE" 2>/dev/null)" = "$$" ]; then @@ -129,7 +135,7 @@ analyze_observations() { fi # session-guardian: gate observer cycle (active hours, cooldown, idle detection) - if ! bash "$(dirname "$0")/session-guardian.sh"; then + if ! bash "${SCRIPT_DIR}/session-guardian.sh"; then echo "[$(date)] Observer cycle skipped by session-guardian" >> "$LOG_FILE" return fi @@ -259,9 +265,14 @@ PROMPT rm -f "$analysis_file" if [ "$exit_code" -ne 0 ]; then - echo "[$(date)] Claude analysis failed (exit $exit_code)" >> "$LOG_FILE" + echo "[$(date)] Claude analysis failed (exit $exit_code); retaining observations for retry" >> "$LOG_FILE" + return fi + # Archive observations only after a successful analysis. A transient + # failure (timeout, non-zero exit, rate limit) must not discard the batch + # before it has been turned into instincts, since the analyzer only ever + # reads the live observations file (#2370). if [ -f "$OBSERVATIONS_FILE" ]; then archive_dir="${PROJECT_DIR}/observations.archive" mkdir -p "$archive_dir" @@ -298,11 +309,20 @@ on_usr1() { } trap on_usr1 USR1 +# When this file is sourced (e.g. by tests/hooks/observer-loop-archive.test.js) +# rather than executed, stop here so callers can invoke individual functions +# such as analyze_observations without starting the observer loop. The only +# production caller (start-observer.sh) executes the script, so $0 equals +# BASH_SOURCE[0] there and this guard is a no-op (#2370). +if [ "${BASH_SOURCE[0]}" != "${0}" ]; then + return 0 2>/dev/null || true +fi + echo "$$" > "$PID_FILE" echo "[$(date)] Observer started for ${PROJECT_NAME} (PID: $$)" >> "$LOG_FILE" -# Prune expired pending instincts before analysis -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# Prune expired pending instincts before analysis (SCRIPT_DIR resolved at top +# via ${BASH_SOURCE[0]} so it is correct under both execution and sourcing). "${CLV2_PYTHON_CMD:-python3}" "${SCRIPT_DIR}/../scripts/instinct-cli.py" prune --quiet >> "$LOG_FILE" 2>&1 || echo "[$(date)] Warning: instinct prune failed (non-fatal)" >> "$LOG_FILE" while true; do diff --git a/tests/hooks/observer-loop-archive.test.js b/tests/hooks/observer-loop-archive.test.js new file mode 100644 index 00000000..56676e76 --- /dev/null +++ b/tests/hooks/observer-loop-archive.test.js @@ -0,0 +1,202 @@ +/** + * Tests for observer-loop archive-on-failure fix (#2370) + * + * Bug: analyze_observations() in observer-loop.sh moved the live + * observations.jsonl into observations.archive/ unconditionally, even when + * the Claude analysis step failed (timeout, non-zero exit, rate limit). + * Because the analyzer only ever reads the live file, a failed batch could + * never be re-analyzed and its instincts were silently lost. + * + * Fix: archive only after a successful analysis; on failure log and return, + * retaining observations for the next cycle to retry. + * + * Strategy: source observer-loop.sh (a BASH_SOURCE guard stops the main + * loop from running when sourced) and drive analyze_observations directly + * with a stub `claude` (exit code controlled per case) and a stub sibling + * session-guardian.sh. Assert symmetric outcomes for failure vs success. + * + * Run with: node tests/hooks/observer-loop-archive.test.js + */ + +const assert = require('assert'); +const path = require('path'); +const fs = require('fs'); +const os = require('os'); +const { spawnSync } = require('child_process'); + +let passed = 0; +let failed = 0; + +function test(name, fn) { + try { + fn(); + console.log(` ✓ ${name}`); + passed++; + } catch (err) { + console.log(` ✗ ${name}`); + console.log(` Error: ${err.message}`); + failed++; + } +} + +function createTempDir() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-observer-archive-')); +} + +function cleanupDir(dir) { + try { + fs.rmSync(dir, { recursive: true, force: true }); + } catch { + // ignore cleanup errors + } +} + +const repoRoot = path.resolve(__dirname, '..', '..'); +const observerLoopPath = path.join( + repoRoot, 'skills', 'continuous-learning-v2', 'agents', 'observer-loop.sh' +); + +/** + * Run analyze_observations once with the given stub claude exit code. + * Returns { liveExists, archivedCount, log } describing the resulting state. + */ +function runAnalyzeOnce(claudeExitCode) { + const sandbox = createTempDir(); + try { + const binDir = path.join(sandbox, 'bin'); + const projectDir = path.join(sandbox, 'project'); + fs.mkdirSync(binDir, { recursive: true }); + fs.mkdirSync(projectDir, { recursive: true }); + + // Stub claude: exit with the requested code, ignoring all args. + const claudeStub = path.join(binDir, 'claude'); + fs.writeFileSync(claudeStub, '#!/usr/bin/env bash\nexit ${CLAUDE_STUB_EXIT:-0}\n'); + fs.chmodSync(claudeStub, 0o755); + + // analyze_observations resolves the real session-guardian.sh via its own + // ${BASH_SOURCE[0]}-derived SCRIPT_DIR, so we drive the real guardian with + // all of its gates disabled/isolated (see env below) rather than stubbing it. + + // Driver sources observer-loop.sh (guard stops the main loop) then runs + // the single function under test. + const driver = path.join(sandbox, 'driver.sh'); + fs.writeFileSync( + driver, + `#!/usr/bin/env bash\nsource ${JSON.stringify(observerLoopPath)}\nanalyze_observations\n` + ); + fs.chmodSync(driver, 0o755); + + const observationsFile = path.join(projectDir, 'observations.jsonl'); + fs.writeFileSync(observationsFile, '{"a":1}\n{"a":2}\n{"a":3}\n'); + + // Defensive: never leak CLAUDE_PLUGIN_ROOT into the ECC test shell (it + // contaminates this project's hook-root resolution). + const childEnv = Object.assign({}, process.env); + delete childEnv.CLAUDE_PLUGIN_ROOT; + childEnv.PATH = binDir + path.delimiter + process.env.PATH; + childEnv.CLAUDE_STUB_EXIT = String(claudeExitCode); + childEnv.OBSERVATIONS_FILE = observationsFile; + childEnv.MIN_OBSERVATIONS = '1'; + childEnv.PROJECT_DIR = projectDir; + childEnv.LOG_FILE = path.join(projectDir, 'observer.log'); + childEnv.PROJECT_NAME = 'test-project'; + childEnv.PROJECT_ID = 'test-project'; + childEnv.INSTINCTS_DIR = path.join(projectDir, 'instincts'); + childEnv.CONFIG_DIR = projectDir; + childEnv.CLV2_IS_WINDOWS = 'false'; + childEnv.ECC_OBSERVER_TIMEOUT_SECONDS = '2'; + // Make the real session-guardian.sh deterministically proceed (exit 0): + // disable the active-hours and idle gates, isolate the cooldown log, and + // zero the cooldown interval so a fresh project always passes. + childEnv.OBSERVER_ACTIVE_HOURS_START = '0'; + childEnv.OBSERVER_ACTIVE_HOURS_END = '0'; + childEnv.OBSERVER_MAX_IDLE_SECONDS = '0'; + childEnv.OBSERVER_INTERVAL_SECONDS = '0'; + childEnv.OBSERVER_LAST_RUN_LOG = path.join(projectDir, 'observer-last-run.log'); + + const result = spawnSync('bash', [driver], { + encoding: 'utf8', + timeout: 15000, + env: childEnv + }); + + assert.strictEqual( + result.status, 0, + `driver should exit 0, got ${result.status}; stderr: ${result.stderr}` + ); + + const archiveDir = path.join(projectDir, 'observations.archive'); + let archivedCount = 0; + if (fs.existsSync(archiveDir)) { + archivedCount = fs.readdirSync(archiveDir) + .filter(f => /^processed-.*\.jsonl$/.test(f)).length; + } + let log = ''; + try { log = fs.readFileSync(childEnv.LOG_FILE, 'utf8'); } catch { /* none */ } + + return { liveExists: fs.existsSync(observationsFile), archivedCount, log }; + } finally { + cleanupDir(sandbox); + } +} + +console.log('\n=== Observer-loop Archive-on-Failure Tests (#2370) ===\n'); + +console.log('--- behavioral ---'); + +test('failed analysis retains observations and archives nothing', () => { + // Shell-driven behavioral check; skip on Windows where the bash driver's + // $0 path handling differs (matches observer-memory.test.js convention). + if (process.platform === 'win32') { + return; + } + const { liveExists, archivedCount, log } = runAnalyzeOnce(1); + assert.ok(liveExists, 'live observations.jsonl must be retained when analysis fails'); + assert.strictEqual(archivedCount, 0, 'nothing should be archived when analysis fails'); + assert.ok( + /retaining observations for retry/.test(log), + `failure log should note retention; got: ${log}` + ); +}); + +test('successful analysis archives the batch (happy path preserved)', () => { + // Shell-driven behavioral check; skip on Windows (see note above). + if (process.platform === 'win32') { + return; + } + const { liveExists, archivedCount } = runAnalyzeOnce(0); + assert.ok(!liveExists, 'live observations.jsonl should be moved after a successful analysis'); + assert.strictEqual(archivedCount, 1, 'exactly one processed-*.jsonl should be archived on success'); +}); + +console.log('--- static guards ---'); + +test('analyze_observations returns on failure before the archive mv', () => { + const content = fs.readFileSync(observerLoopPath, 'utf8'); + // Operate on full file content with explicit anchors rather than a lazy + // function-body extraction (which could truncate on a future inner "\n}" + // and pass vacuously). These tokens each occur once, inside the function. + const failIdx = content.search(/exit_code"?\s+-ne\s+0/); + const returnIdx = content.indexOf('return', failIdx); + const archiveIdx = content.indexOf('observations.archive'); + assert.ok(failIdx !== -1, 'should find the non-zero exit_code check'); + assert.ok(archiveIdx !== -1, 'should find the archive block'); + assert.ok(returnIdx !== -1, 'failure branch should contain a return'); + assert.ok(returnIdx < archiveIdx, + 'failure branch must return before reaching the archive block'); +}); + +test('observer-loop.sh has a source-guard so it can be sourced in tests', () => { + const content = fs.readFileSync(observerLoopPath, 'utf8'); + assert.ok( + content.includes('BASH_SOURCE[0]') && content.includes('return 0 2>/dev/null'), + 'observer-loop.sh should short-circuit when sourced rather than executed' + ); +}); + +console.log('\n=== Test Results ==='); +console.log(`Passed: ${passed}`); +console.log(`Failed: ${failed}`); +console.log(`Total: ${passed + failed}\n`); + +process.exit(failed > 0 ? 1 : 0);