From f12b106c3ca692220a86da74ec898ba86cd90be2 Mon Sep 17 00:00:00 2001 From: Gaurav Dubey Date: Tue, 30 Jun 2026 07:13:19 +0530 Subject: [PATCH] fix(clv2): align Python _update_registry schema with shell counterpart (#2369) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(clv2): align Python _update_registry schema with shell counterpart The Python `_update_registry` in instinct-cli.py wrote registry entries without the `id` and `created_at` fields, while the shell counterpart in detect-project.sh writes both. A projects.json entry could therefore have a different shape depending on which path (Python CLI or shell hook) last touched it. Emit the same field set and order as the shell version: id, name, root, remote, created_at (preserved from any existing entry), last_seen. Add regression tests asserting field parity and created_at preservation. Fixes #2299 * fix(clv2): guard _update_registry against a non-dict registry entry A malformed projects.json (a non-dict value for the current project id, e.g. null) would make existing.get("created_at", ...) raise and crash the update, losing the old code's ability to self-heal a corrupt per-entry value. Normalize existing to {} when it is not a dict so the entry is healed by the rewrite. Add a regression test for the malformed-entry path. * test(clv2): assert the first-write created_at == last_seen contract The new _update_registry tests only checked both timestamps were truthy. On the initial write both derive from the same `now`, so created_at must equal last_seen; assert that explicitly so a later refactor that breaks the contract is caught. Split the compound assertions into single-expression checks. * fix(clv2): heal a non-dict top-level registry in _update_registry A projects.json that is valid JSON but not a mapping (e.g. `[]` or a string) previously crashed _update_registry on registry.get(), before the per-entry guard could run, so the corrupt file could not be healed. Guard the top-level shape right after the load and fall back to {} so the rewrite repairs the file — matching the per-entry healing already in place. Resolves the remaining CodeRabbit finding on #2299. Co-Authored-By: Claude Opus 4.8 --------- Co-authored-by: Claude Opus 4.8 --- .../scripts/instinct-cli.py | 22 ++++++- .../scripts/test_parse_instinct.py | 64 +++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/skills/continuous-learning-v2/scripts/instinct-cli.py b/skills/continuous-learning-v2/scripts/instinct-cli.py index 13bc467b..2274a852 100755 --- a/skills/continuous-learning-v2/scripts/instinct-cli.py +++ b/skills/continuous-learning-v2/scripts/instinct-cli.py @@ -430,12 +430,32 @@ def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None: registry = json.load(f) except (FileNotFoundError, json.JSONDecodeError): registry = {} + # A registry that is valid JSON but not a mapping (e.g. a list from a + # corrupt projects.json) must not crash the update before the per-entry + # guard below: fall back to an empty dict so the whole file is healed. + if not isinstance(registry, dict): + registry = {} + # Mirror the shell counterpart in detect-project.sh: the entry carries + # "id" and "created_at" alongside the other fields so a projects.json + # record has the same shape regardless of which path (Python CLI or + # shell hook) last wrote it. "created_at" is preserved from any + # existing entry; only "last_seen" advances on update. + now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + existing = registry.get(pid, {}) + # A malformed registry (e.g. a non-dict value for this id) must not + # crash the update: fall back to an empty dict so the corrupt entry is + # healed by the rewrite, matching the old unconditional-overwrite + # behavior. + if not isinstance(existing, dict): + existing = {} registry[pid] = { + "id": pid, "name": pname, "root": proot, "remote": premote, - "last_seen": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "created_at": existing.get("created_at", now), + "last_seen": now, } tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}" diff --git a/skills/continuous-learning-v2/scripts/test_parse_instinct.py b/skills/continuous-learning-v2/scripts/test_parse_instinct.py index 225dcb05..a7979986 100644 --- a/skills/continuous-learning-v2/scripts/test_parse_instinct.py +++ b/skills/continuous-learning-v2/scripts/test_parse_instinct.py @@ -1047,6 +1047,70 @@ def test_update_registry_atomic_replaces_file(patch_globals): assert leftovers == [] +def test_update_registry_matches_shell_schema(patch_globals): + # Issue #2299: the Python writer must emit the same field set as the shell + # counterpart in detect-project.sh (id, name, root, remote, created_at, + # last_seen) so a projects.json entry has a consistent shape regardless of + # which path wrote it. + tree = patch_globals + _update_registry("abc123", "demo", "/repo", "https://example.com/repo.git") + entry = json.loads(tree["registry_file"].read_text())["abc123"] + assert set(entry) == {"id", "name", "root", "remote", "created_at", "last_seen"} + assert entry["id"] == "abc123" + assert entry["name"] == "demo" + assert entry["root"] == "/repo" + assert entry["remote"] == "https://example.com/repo.git" + # On the initial write both timestamps come from the same `now`, so the + # first-write contract is created_at == last_seen. + assert entry["created_at"] + assert entry["created_at"] == entry["last_seen"] + + +def test_update_registry_preserves_created_at(patch_globals): + # created_at is stamped on first write and preserved on subsequent updates, + # while last_seen advances — matching entry.get("created_at", now) in the + # shell counterpart. + tree = patch_globals + _update_registry("abc123", "demo", "/repo", "https://example.com/repo.git") + first = json.loads(tree["registry_file"].read_text())["abc123"] + + _update_registry("abc123", "demo-renamed", "/repo", "https://example.com/repo.git") + second = json.loads(tree["registry_file"].read_text())["abc123"] + + assert second["created_at"] == first["created_at"] + assert second["name"] == "demo-renamed" + assert second["last_seen"] >= first["last_seen"] + + +def test_update_registry_heals_malformed_entry(patch_globals): + # Issue #2299 follow-up: a non-dict value for the project id (e.g. a + # corrupt registry) must not crash _update_registry. The entry is healed by + # the rewrite, preserving the old unconditional-overwrite behavior. + tree = patch_globals + tree["registry_file"].write_text(json.dumps({"abc123": None}), encoding="utf-8") + _update_registry("abc123", "demo", "/repo", "https://example.com/repo.git") + entry = json.loads(tree["registry_file"].read_text())["abc123"] + assert isinstance(entry, dict) + assert entry["id"] == "abc123" + assert entry["created_at"] + assert entry["created_at"] == entry["last_seen"] + + +def test_update_registry_heals_non_dict_registry(patch_globals): + # Issue #2299 follow-up: a top-level registry that is valid JSON but not a + # mapping (e.g. a list or string from a corrupt projects.json) must not + # crash _update_registry before the per-entry guard runs. The whole file is + # healed by the rewrite, preserving the old unconditional-overwrite behavior. + tree = patch_globals + tree["registry_file"].write_text(json.dumps(["oops"]), encoding="utf-8") + _update_registry("abc123", "demo", "/repo", "https://example.com/repo.git") + registry = json.loads(tree["registry_file"].read_text()) + assert isinstance(registry, dict) + entry = registry["abc123"] + assert entry["id"] == "abc123" + assert entry["created_at"] == entry["last_seen"] + + def test_write_registry_atomic_no_tmp_leftovers(patch_globals): # Issue #2294: _write_registry now holds the registry lock like # _update_registry. It must still write atomically with no stray tmp files.