fix(clv2): harden registry writes and project deletion (#2294, #2297) (#2323)

Two security-priority fixes in continuous-learning-v2/scripts/instinct-cli.py:

- #2294: _write_registry wrote projects.json without the advisory lock that
  _update_registry holds, so concurrent 'projects delete/gc/merge' could race an
  observe-time update and corrupt the registry. Extract the lock into a shared
  _registry_lock() context manager and use it in both writers.

- #2297: _remove_project_storage called shutil.rmtree on PROJECTS_DIR/project_id
  with no containment check. Add defense-in-depth: resolve the path and refuse to
  delete anything that is not strictly inside PROJECTS_DIR (or is the root
  itself), so a relaxed validator or future caller can never cause an
  arbitrary-directory delete.

Adds 5 pytest regression tests (atomic write under lock, contained delete,
missing-dir no-op, traversal refused, root refused). Node integration suite
(tests/scripts/instinct-cli-projects.test.js) green 9/9.
This commit is contained in:
Affaan Mustafa 2026-06-25 16:47:35 -07:00 committed by GitHub
parent e3f467989a
commit 2bc924faf2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 86 additions and 24 deletions

View File

@ -27,6 +27,7 @@ import ipaddress
import socket import socket
import urllib.parse import urllib.parse
import urllib.request import urllib.request
from contextlib import contextmanager
from pathlib import Path from pathlib import Path
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from collections import defaultdict from collections import defaultdict
@ -394,22 +395,36 @@ def detect_project() -> dict:
} }
@contextmanager
def _registry_lock():
"""Serialize registry read-modify-write across concurrent sessions.
Acquires the same advisory lock for every registry writer (``_update_registry``
and ``_write_registry``) so ``projects delete/gc/merge`` cannot interleave with
a concurrent observe-time update and corrupt ``projects.json``. No-op on
platforms without ``fcntl`` (Windows).
"""
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
lock_path = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.lock"
lock_fd = None
try:
if _HAS_FCNTL:
lock_fd = open(lock_path, "w")
fcntl.flock(lock_fd, fcntl.LOCK_EX)
yield
finally:
if lock_fd is not None:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None: def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
"""Update the projects.json registry. """Update the projects.json registry.
Uses file locking (where available) to prevent concurrent sessions from Uses file locking (where available) to prevent concurrent sessions from
overwriting each other's updates. overwriting each other's updates.
""" """
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True) with _registry_lock():
lock_path = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.lock"
lock_fd = None
try:
# Acquire advisory lock to serialize read-modify-write
if _HAS_FCNTL:
lock_fd = open(lock_path, "w")
fcntl.flock(lock_fd, fcntl.LOCK_EX)
try: try:
with open(REGISTRY_FILE, encoding="utf-8") as f: with open(REGISTRY_FILE, encoding="utf-8") as f:
registry = json.load(f) registry = json.load(f)
@ -429,10 +444,6 @@ def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
f.flush() f.flush()
os.fsync(f.fileno()) os.fsync(f.fileno())
os.replace(tmp_file, REGISTRY_FILE) os.replace(tmp_file, REGISTRY_FILE)
finally:
if lock_fd is not None:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
def load_registry() -> dict: def load_registry() -> dict:
@ -445,8 +456,12 @@ def load_registry() -> dict:
def _write_registry(registry: dict) -> None: def _write_registry(registry: dict) -> None:
"""Write the project registry atomically.""" """Write the project registry atomically.
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
Holds the same advisory lock as ``_update_registry`` so concurrent
``projects delete/gc/merge`` and observe-time updates cannot corrupt the file.
"""
with _registry_lock():
tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}" tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}"
with open(tmp_file, "w", encoding="utf-8") as f: with open(tmp_file, "w", encoding="utf-8") as f:
json.dump(registry, f, indent=2) json.dump(registry, f, indent=2)
@ -573,7 +588,14 @@ def _project_counts(project_id: str) -> dict:
def _remove_project_storage(project_id: str) -> None: def _remove_project_storage(project_id: str) -> None:
project_dir = PROJECTS_DIR / project_id # Defense-in-depth: resolve and confirm the target is contained within
# PROJECTS_DIR before recursively deleting, even though callers validate the
# project id. A relaxed validator or a future caller must never be able to
# turn this into an arbitrary-directory delete.
projects_root = PROJECTS_DIR.resolve()
project_dir = (PROJECTS_DIR / project_id).resolve()
if project_dir == projects_root or projects_root not in project_dir.parents:
raise ValueError(f"refusing to remove {project_dir}: escapes {projects_root}")
if project_dir.exists(): if project_dir.exists():
shutil.rmtree(project_dir) shutil.rmtree(project_dir)

View File

@ -46,6 +46,8 @@ load_registry = _mod.load_registry
_validate_instinct_id = _mod._validate_instinct_id _validate_instinct_id = _mod._validate_instinct_id
_validate_import_url = _mod._validate_import_url _validate_import_url = _mod._validate_import_url
_update_registry = _mod._update_registry _update_registry = _mod._update_registry
_write_registry = _mod._write_registry
_remove_project_storage = _mod._remove_project_storage
_confidence_bar = _mod._confidence_bar _confidence_bar = _mod._confidence_bar
@ -1043,3 +1045,41 @@ def test_update_registry_atomic_replaces_file(patch_globals):
assert "abc123" in data assert "abc123" in data
leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*")) leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
assert leftovers == [] assert leftovers == []
def test_write_registry_atomic_no_tmp_leftovers(patch_globals):
# Issue #2294: _write_registry now holds the registry lock like
# _update_registry. It must still write atomically with no stray tmp files.
tree = patch_globals
_write_registry({"keep": {"name": "demo", "root": "/repo", "remote": ""}})
data = json.loads(tree["registry_file"].read_text())
assert data == {"keep": {"name": "demo", "root": "/repo", "remote": ""}}
leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
assert leftovers == []
def test_remove_project_storage_deletes_contained_dir(patch_globals):
tree = patch_globals
target = tree["projects_dir"] / "proj-1"
(target / "instincts").mkdir(parents=True)
(target / "instincts" / "x.md").write_text("hi", encoding="utf-8")
_remove_project_storage("proj-1")
assert not target.exists()
def test_remove_project_storage_missing_dir_is_noop(patch_globals):
# No raise when the contained dir simply does not exist.
_remove_project_storage("never-created")
def test_remove_project_storage_blocks_traversal(patch_globals):
# Issue #2297: defense-in-depth — a traversal id must be refused even when a
# caller skips _validate_project_id, so this can never delete outside
# PROJECTS_DIR.
with pytest.raises(ValueError):
_remove_project_storage("../../etc")
def test_remove_project_storage_blocks_root_itself(patch_globals):
with pytest.raises(ValueError):
_remove_project_storage(".")