From e3f467989a2c446cd8d0137893215e1249689c77 Mon Sep 17 00:00:00 2001 From: Gaurav Dubey Date: Fri, 26 Jun 2026 05:17:32 +0530 Subject: [PATCH] fix(clv2): escape $HOME before pgrep -f in migrate-homunculus.sh (#2339) * fix(clv2): escape $HOME before pgrep -f in migrate-homunculus.sh pgrep -f treats its argument as an extended regular expression, but the running-observer guard interpolated $HOME unescaped. Paths containing regex metacharacters (e.g. /home/user.name, /home/c++dev, /home/user (work)) made the match over-broad or invalid, causing either a false negative (live observer missed, migration proceeds and risks registry corruption) or a false positive (migration blocked unnecessarily). Escape the ERE metacharacters in $HOME via sed before building the pattern so the home prefix is matched literally while the trailing .*observer-loop\.sh regex is preserved. Portable across BSD and GNU sed. Fixes #2301 * test(clv2): add regression test for migrate-homunculus.sh $HOME escaping Guards the #2301 fix: extracts the script's sed escaping command and asserts the resulting pgrep -f pattern matches the literal home path while no longer over-matching a regex-expanded decoy (HOME=/home/user.name must not match /home/userXname). Also pins that the guard uses escaped_home rather than $HOME directly. Follows the existing clv2 shell-test convention in tests/hooks/observe-entrypoint-allowlist.test.js. Refs #2301 * test(clv2): skip migrate-homunculus escaping test on Windows The test relies on POSIX bash/sed/grep -E semantics, which differ on the Windows CI runners. Guard with the same process.platform === 'win32' early exit used by tests/hooks/observe-subdirectory-detection.test.js so the bash-dependent assertions only run on POSIX platforms. Refs #2301 --- .../scripts/migrate-homunculus.sh | 8 +- .../migrate-homunculus-home-escape.test.js | 141 ++++++++++++++++++ 2 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 tests/hooks/migrate-homunculus-home-escape.test.js diff --git a/skills/continuous-learning-v2/scripts/migrate-homunculus.sh b/skills/continuous-learning-v2/scripts/migrate-homunculus.sh index 9358fc7b..b6c19cac 100755 --- a/skills/continuous-learning-v2/scripts/migrate-homunculus.sh +++ b/skills/continuous-learning-v2/scripts/migrate-homunculus.sh @@ -20,7 +20,13 @@ if [ ! -d "$OLD" ]; then fi if command -v pgrep >/dev/null 2>&1; then - if pgrep -f "${HOME}.*observer-loop\\.sh" >/dev/null 2>&1; then + # pgrep -f treats its argument as an extended regular expression, so $HOME + # must be escaped before interpolation. Without this, regex metacharacters in + # the path (e.g. /home/user.name, /home/c++dev, /home/user (work)) would make + # the match over-broad or invalid, causing false negatives (observer missed, + # migration proceeds unsafely) or false positives (migration blocked). + escaped_home="$(printf '%s' "$HOME" | sed 's/[]\.[(){}+*?|^$]/\\&/g')" + if pgrep -f "${escaped_home}.*observer-loop\\.sh" >/dev/null 2>&1; then echo "Refusing to migrate: observer-loop.sh is running." >&2 echo "Exit all Claude Code sessions, then re-run." >&2 exit 1 diff --git a/tests/hooks/migrate-homunculus-home-escape.test.js b/tests/hooks/migrate-homunculus-home-escape.test.js new file mode 100644 index 00000000..b28e8a84 --- /dev/null +++ b/tests/hooks/migrate-homunculus-home-escape.test.js @@ -0,0 +1,141 @@ +/** + * Regression test for migrate-homunculus.sh $HOME escaping (#2301). + * + * The running-observer guard in migrate-homunculus.sh builds a `pgrep -f` + * pattern from $HOME. `pgrep -f` treats its argument as an extended regular + * expression, so an unescaped $HOME containing regex metacharacters (e.g. + * /home/user.name, /home/c++dev, /home/user (work)) made the match over-broad + * or invalid. That caused either a false negative (a live observer-loop.sh is + * missed and the migration proceeds unsafely) or a false positive (an unrelated + * process matches and the migration is blocked). + * + * The fix escapes the ERE metacharacters in $HOME before interpolation. This + * test pins that behavior by extracting the exact `sed` escaping command from + * the script (so it tests the real implementation, not a copy), then asserting + * that, for HOME values containing metacharacters: + * (a) the escaped pattern matches the literal home path, and + * (b) the escaped pattern does NOT over-match a decoy path that the + * unescaped (regex-expanded) form would have matched. + * + * Run with: node tests/hooks/migrate-homunculus-home-escape.test.js + */ + +'use strict'; + +// migrate-homunculus.sh and this test's assertions rely on POSIX bash, sed, and +// grep -E semantics. Skip on Windows, matching the repo convention for +// bash-dependent clv2 tests (see tests/hooks/observe-subdirectory-detection.test.js). +if (process.platform === 'win32') { + console.log('Skipping bash-dependent migrate-homunculus tests on Windows'); + process.exit(0); +} + +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const { spawnSync } = require('child_process'); + +const repoRoot = path.resolve(__dirname, '..', '..'); +const scriptPath = path.join( + repoRoot, + 'skills', + 'continuous-learning-v2', + 'scripts', + 'migrate-homunculus.sh' +); + +let passed = 0; +let failed = 0; + +function test(name, fn) { + try { + fn(); + console.log(` ✓ ${name}`); + passed++; + } catch (err) { + console.log(` ✗ ${name}`); + console.log(` Error: ${err.message}`); + failed++; + } +} + +const scriptSource = fs.readFileSync(scriptPath, 'utf8'); + +// Extract the exact sed escaping command from the script so this test verifies +// the real implementation. Expected form: +// escaped_home="$(printf '%s' "$HOME" | sed 's/.../\\&/g')" +const sedMatch = scriptSource.match( + /escaped_home="\$\(printf '%s' "\$HOME" \| (sed '[^']*')\)"/ +); + +// Build the pgrep pattern exactly as the script does: ${escaped_home} followed +// by the literal observer-loop.sh regex tail. +function buildPattern(home) { + assert.ok( + sedMatch, + 'could not locate the escaped_home sed command in migrate-homunculus.sh; ' + + 'the fix for #2301 must escape $HOME before pgrep -f' + ); + const sedCmd = sedMatch[1]; + const res = spawnSync( + 'bash', + ['-c', `printf '%s' "$1" | ${sedCmd}`, 'bash', home], + { encoding: 'utf8' } + ); + assert.strictEqual( + res.status, + 0, + `sed escaping failed for HOME=${home}: ${res.stderr}` + ); + return `${res.stdout}.*observer-loop\\.sh`; +} + +// grep -E uses the same ERE engine as pgrep -f. Return true if cmdline matches. +function ereMatches(pattern, cmdline) { + const res = spawnSync('grep', ['-E', pattern], { + input: cmdline, + encoding: 'utf8', + }); + return res.status === 0; +} + +console.log('\n=== migrate-homunculus.sh $HOME escaping (#2301) ===\n'); + +test('the running-observer guard no longer interpolates $HOME unescaped', () => { + assert.ok( + !/pgrep -f "\$\{HOME\}/.test(scriptSource), + 'pgrep -f must not use ${HOME} directly; it must use the escaped value' + ); + assert.ok( + /pgrep -f "\$\{escaped_home\}/.test(scriptSource), + 'pgrep -f must use the escaped_home value built from $HOME' + ); +}); + +const problemHomes = ['/home/user.name', '/home/c++dev', '/home/user (work)', '/tmp/h[x]']; + +for (const home of problemHomes) { + test(`escaped pattern matches the literal home ${home}`, () => { + const pattern = buildPattern(home); + const cmdline = `/bin/bash ${home}/.local/share/ecc-homunculus/observer-loop.sh`; + assert.ok( + ereMatches(pattern, cmdline), + `expected escaped pattern to match the literal observer cmdline for HOME=${home}` + ); + }); +} + +test('escaped "." does not over-match a different path (#2301 false positive)', () => { + const pattern = buildPattern('/home/user.name'); + // The unescaped form ("." as any-char) would match /home/userXname; the + // escaped form must not. + const decoy = '/bin/bash /home/userXname/observer-loop.sh'; + assert.ok( + !ereMatches(pattern, decoy), + 'escaped pattern must not over-match /home/userXname when HOME=/home/user.name' + ); +}); + +console.log(`\nPassed: ${passed}`); +console.log(`Failed: ${failed}`); +process.exit(failed > 0 ? 1 : 0);