fix(clv2): align Python _update_registry schema with shell counterpart (#2369)

* fix(clv2): align Python _update_registry schema with shell counterpart

The Python `_update_registry` in instinct-cli.py wrote registry entries
without the `id` and `created_at` fields, while the shell counterpart in
detect-project.sh writes both. A projects.json entry could therefore have a
different shape depending on which path (Python CLI or shell hook) last
touched it.

Emit the same field set and order as the shell version: id, name, root,
remote, created_at (preserved from any existing entry), last_seen. Add
regression tests asserting field parity and created_at preservation.

Fixes #2299

* fix(clv2): guard _update_registry against a non-dict registry entry

A malformed projects.json (a non-dict value for the current project id, e.g.
null) would make existing.get("created_at", ...) raise and crash the update,
losing the old code's ability to self-heal a corrupt per-entry value. Normalize
existing to {} when it is not a dict so the entry is healed by the rewrite. Add
a regression test for the malformed-entry path.

* test(clv2): assert the first-write created_at == last_seen contract

The new _update_registry tests only checked both timestamps were truthy. On the
initial write both derive from the same `now`, so created_at must equal
last_seen; assert that explicitly so a later refactor that breaks the contract
is caught. Split the compound assertions into single-expression checks.

* fix(clv2): heal a non-dict top-level registry in _update_registry

A projects.json that is valid JSON but not a mapping (e.g. `[]` or a
string) previously crashed _update_registry on registry.get(), before
the per-entry guard could run, so the corrupt file could not be healed.
Guard the top-level shape right after the load and fall back to {} so the
rewrite repairs the file — matching the per-entry healing already in place.

Resolves the remaining CodeRabbit finding on #2299.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Gaurav Dubey 2026-06-30 07:13:19 +05:30 committed by GitHub
parent be91f21837
commit f12b106c3c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 85 additions and 1 deletions

View File

@ -430,12 +430,32 @@ def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
registry = json.load(f) registry = json.load(f)
except (FileNotFoundError, json.JSONDecodeError): except (FileNotFoundError, json.JSONDecodeError):
registry = {} registry = {}
# A registry that is valid JSON but not a mapping (e.g. a list from a
# corrupt projects.json) must not crash the update before the per-entry
# guard below: fall back to an empty dict so the whole file is healed.
if not isinstance(registry, dict):
registry = {}
# Mirror the shell counterpart in detect-project.sh: the entry carries
# "id" and "created_at" alongside the other fields so a projects.json
# record has the same shape regardless of which path (Python CLI or
# shell hook) last wrote it. "created_at" is preserved from any
# existing entry; only "last_seen" advances on update.
now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
existing = registry.get(pid, {})
# A malformed registry (e.g. a non-dict value for this id) must not
# crash the update: fall back to an empty dict so the corrupt entry is
# healed by the rewrite, matching the old unconditional-overwrite
# behavior.
if not isinstance(existing, dict):
existing = {}
registry[pid] = { registry[pid] = {
"id": pid,
"name": pname, "name": pname,
"root": proot, "root": proot,
"remote": premote, "remote": premote,
"last_seen": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), "created_at": existing.get("created_at", now),
"last_seen": now,
} }
tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}" tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}"

View File

@ -1047,6 +1047,70 @@ def test_update_registry_atomic_replaces_file(patch_globals):
assert leftovers == [] assert leftovers == []
def test_update_registry_matches_shell_schema(patch_globals):
# Issue #2299: the Python writer must emit the same field set as the shell
# counterpart in detect-project.sh (id, name, root, remote, created_at,
# last_seen) so a projects.json entry has a consistent shape regardless of
# which path wrote it.
tree = patch_globals
_update_registry("abc123", "demo", "/repo", "https://example.com/repo.git")
entry = json.loads(tree["registry_file"].read_text())["abc123"]
assert set(entry) == {"id", "name", "root", "remote", "created_at", "last_seen"}
assert entry["id"] == "abc123"
assert entry["name"] == "demo"
assert entry["root"] == "/repo"
assert entry["remote"] == "https://example.com/repo.git"
# On the initial write both timestamps come from the same `now`, so the
# first-write contract is created_at == last_seen.
assert entry["created_at"]
assert entry["created_at"] == entry["last_seen"]
def test_update_registry_preserves_created_at(patch_globals):
# created_at is stamped on first write and preserved on subsequent updates,
# while last_seen advances — matching entry.get("created_at", now) in the
# shell counterpart.
tree = patch_globals
_update_registry("abc123", "demo", "/repo", "https://example.com/repo.git")
first = json.loads(tree["registry_file"].read_text())["abc123"]
_update_registry("abc123", "demo-renamed", "/repo", "https://example.com/repo.git")
second = json.loads(tree["registry_file"].read_text())["abc123"]
assert second["created_at"] == first["created_at"]
assert second["name"] == "demo-renamed"
assert second["last_seen"] >= first["last_seen"]
def test_update_registry_heals_malformed_entry(patch_globals):
# Issue #2299 follow-up: a non-dict value for the project id (e.g. a
# corrupt registry) must not crash _update_registry. The entry is healed by
# the rewrite, preserving the old unconditional-overwrite behavior.
tree = patch_globals
tree["registry_file"].write_text(json.dumps({"abc123": None}), encoding="utf-8")
_update_registry("abc123", "demo", "/repo", "https://example.com/repo.git")
entry = json.loads(tree["registry_file"].read_text())["abc123"]
assert isinstance(entry, dict)
assert entry["id"] == "abc123"
assert entry["created_at"]
assert entry["created_at"] == entry["last_seen"]
def test_update_registry_heals_non_dict_registry(patch_globals):
# Issue #2299 follow-up: a top-level registry that is valid JSON but not a
# mapping (e.g. a list or string from a corrupt projects.json) must not
# crash _update_registry before the per-entry guard runs. The whole file is
# healed by the rewrite, preserving the old unconditional-overwrite behavior.
tree = patch_globals
tree["registry_file"].write_text(json.dumps(["oops"]), encoding="utf-8")
_update_registry("abc123", "demo", "/repo", "https://example.com/repo.git")
registry = json.loads(tree["registry_file"].read_text())
assert isinstance(registry, dict)
entry = registry["abc123"]
assert entry["id"] == "abc123"
assert entry["created_at"] == entry["last_seen"]
def test_write_registry_atomic_no_tmp_leftovers(patch_globals): def test_write_registry_atomic_no_tmp_leftovers(patch_globals):
# Issue #2294: _write_registry now holds the registry lock like # Issue #2294: _write_registry now holds the registry lock like
# _update_registry. It must still write atomically with no stray tmp files. # _update_registry. It must still write atomically with no stray tmp files.