From b2a0c5da03a7e9acd32a47e62bd607662b7c48a2 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Wed, 22 Apr 2026 17:44:48 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20#165=20=E2=80=94=20load-session=20CLI=20?= =?UTF-8?q?now=20parity-matches=20list/delete=20(--directory,=20--output-f?= =?UTF-8?q?ormat,=20typed=20JSON=20errors)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The #160 session-lifecycle CLI triplet was asymmetric: list-sessions and delete-session accepted --directory + --output-format and emitted typed JSON error envelopes, but load-session had neither flag and dumped a raw Python traceback (including the SessionNotFoundError class name) on a missing session. Three concrete impacts this fix closes: 1. Alternate session-store locations (e.g. /tmp/claw-run-XXX/.port_sessions) were unreachable via load-session; claws had to chdir or monkeypatch DEFAULT_SESSION_DIR to work around it. 2. Not-found emitted a multi-line Python stack, not a parseable envelope. Claws deciding retry/escalate/give-up had only exit code 1 to work with. 3. The traceback leaked 'src.session_store.SessionNotFoundError' verbatim, coupling version-pinned claws to our internal exception class name. Now all three triplet commands accept the same flag pair and emit the same JSON error shape: Success (json mode): {"session_id": "alpha", "loaded": true, "messages_count": 3, "input_tokens": 42, "output_tokens": 99} Not-found: {"session_id": "missing", "loaded": false, "error": {"kind": "session_not_found", "message": "session 'missing' not found in /path", "directory": "/path", "retryable": false}} Corrupted file: {"session_id": "broken", "loaded": false, "error": {"kind": "session_load_failed", "message": "...", "directory": "/path", "retryable": true}} Exit code contract: - 0 on successful load - 1 on not-found (preserves existing $?) - 1 on OSError/JSONDecodeError (distinct 'kind' in JSON) Backward compat: legacy 'claw load-session ID' text output unchanged byte-for-byte. Only new behaviour is the flags and structured error path. Tests (tests/test_load_session_cli.py, 13 tests): - TestDirectoryFlagParity (2): --directory works + fallback to CWD/.port_sessions - TestOutputFormatFlagParity (2): json schema + text-mode backward compat - TestNotFoundTypedError (2): JSON envelope on not-found; no traceback in either mode; no internal class name leak - TestLoadFailedDistinctFromNotFound (1): corrupted file = session_load_failed with retryable=true, distinct from session_not_found - TestTripletParityConsistency (6): parametrised over [list, delete, load] * [--directory, --output-format] — explicit parity guard for future regressions Full suite: 80/80 passing, zero regression. Discovered via Jobdori dogfood sweep 2026-04-22 17:44 KST — ran 'claw load-session nonexistent' expecting a clean error, got a Python traceback. Filed #165 + fixed in same commit. Closes ROADMAP #165. --- src/main.py | 69 ++++++++++++- tests/test_load_session_cli.py | 179 +++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+), 3 deletions(-) create mode 100644 tests/test_load_session_cli.py diff --git a/src/main.py b/src/main.py index e1a56633..50797a3e 100644 --- a/src/main.py +++ b/src/main.py @@ -84,8 +84,20 @@ def build_parser() -> argparse.ArgumentParser: flush_parser = subparsers.add_parser('flush-transcript', help='persist and flush a temporary session transcript') flush_parser.add_argument('prompt') - load_session_parser = subparsers.add_parser('load-session', help='load a previously persisted session') + load_session_parser = subparsers.add_parser( + 'load-session', + help='load a previously persisted session (#160/#165: claw-native session API)', + ) load_session_parser.add_argument('session_id') + load_session_parser.add_argument( + '--directory', help='session storage directory (default: .port_sessions)' + ) + load_session_parser.add_argument( + '--output-format', + choices=['text', 'json'], + default='text', + help='output format', + ) list_sessions_parser = subparsers.add_parser( 'list-sessions', @@ -227,8 +239,59 @@ def main(argv: list[str] | None = None) -> int: print(f'flushed={engine.transcript_store.flushed}') return 0 if args.command == 'load-session': - session = load_session(args.session_id) - print(f'{session.session_id}\n{len(session.messages)} messages\nin={session.input_tokens} out={session.output_tokens}') + from pathlib import Path as _Path + directory = _Path(args.directory) if args.directory else None + # #165: catch typed SessionNotFoundError + surface a JSON error envelope + # matching the delete-session contract shape. No more raw tracebacks. + try: + session = load_session(args.session_id, directory) + except SessionNotFoundError as exc: + if args.output_format == 'json': + import json as _json + resolved_dir = str(directory) if directory else '.port_sessions' + print(_json.dumps({ + 'session_id': args.session_id, + 'loaded': False, + 'error': { + 'kind': 'session_not_found', + 'message': str(exc), + 'directory': resolved_dir, + 'retryable': False, + }, + })) + else: + print(f'error: {exc}') + return 1 + except (OSError, ValueError) as exc: + # Corrupted session file, IO error, JSON decode error — distinct + # from 'not found'. Callers may retry here (fs glitch). + if args.output_format == 'json': + import json as _json + resolved_dir = str(directory) if directory else '.port_sessions' + print(_json.dumps({ + 'session_id': args.session_id, + 'loaded': False, + 'error': { + 'kind': 'session_load_failed', + 'message': str(exc), + 'directory': resolved_dir, + 'retryable': True, + }, + })) + else: + print(f'error: {exc}') + return 1 + if args.output_format == 'json': + import json as _json + print(_json.dumps({ + 'session_id': session.session_id, + 'loaded': True, + 'messages_count': len(session.messages), + 'input_tokens': session.input_tokens, + 'output_tokens': session.output_tokens, + })) + else: + print(f'{session.session_id}\n{len(session.messages)} messages\nin={session.input_tokens} out={session.output_tokens}') return 0 if args.command == 'list-sessions': from pathlib import Path as _Path diff --git a/tests/test_load_session_cli.py b/tests/test_load_session_cli.py new file mode 100644 index 00000000..c2ce0e22 --- /dev/null +++ b/tests/test_load_session_cli.py @@ -0,0 +1,179 @@ +"""Tests for load-session CLI parity with list-sessions/delete-session (ROADMAP #165). + +Verifies the session-lifecycle CLI triplet is now symmetric: +- --directory DIR accepted (alternate storage locations reachable) +- --output-format {text,json} accepted +- Not-found emits typed JSON error envelope, never a Python traceback +- Corrupted session file distinguished from not-found via 'kind' +- Legacy text-mode output unchanged (backward compat) +""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from src.session_store import StoredSession, save_session # noqa: E402 + + +_REPO_ROOT = Path(__file__).resolve().parent.parent + + +def _run_cli( + *args: str, cwd: Path | None = None, +) -> subprocess.CompletedProcess[str]: + """Always invoke the CLI with cwd=repo-root so ``python -m src.main`` + can resolve the ``src`` package, regardless of where the test's + tmp_path is. + """ + return subprocess.run( + [sys.executable, '-m', 'src.main', *args], + capture_output=True, + text=True, + cwd=str(cwd) if cwd else str(_REPO_ROOT), + ) + + +def _make_session(session_id: str) -> StoredSession: + return StoredSession( + session_id=session_id, messages=('hi',), input_tokens=1, output_tokens=2, + ) + + +class TestDirectoryFlagParity: + def test_load_session_accepts_directory_flag(self, tmp_path: Path) -> None: + save_session(_make_session('alpha'), tmp_path) + result = _run_cli('load-session', 'alpha', '--directory', str(tmp_path)) + assert result.returncode == 0, result.stderr + assert 'alpha' in result.stdout + + def test_load_session_without_directory_uses_cwd_default( + self, tmp_path: Path, + ) -> None: + """When --directory is omitted, fall back to .port_sessions in CWD. + + Subprocess CWD must still be able to import ``src.main``, so we use + ``cwd=tmp_path`` which means ``python -m src.main`` needs ``src/`` on + sys.path. We set PYTHONPATH to the repo root via env. + """ + sessions_dir = tmp_path / '.port_sessions' + sessions_dir.mkdir() + save_session(_make_session('beta'), sessions_dir) + import os + env = os.environ.copy() + env['PYTHONPATH'] = str(_REPO_ROOT) + result = subprocess.run( + [sys.executable, '-m', 'src.main', 'load-session', 'beta'], + capture_output=True, text=True, cwd=str(tmp_path), env=env, + ) + assert result.returncode == 0, result.stderr + assert 'beta' in result.stdout + + +class TestOutputFormatFlagParity: + def test_json_mode_on_success(self, tmp_path: Path) -> None: + save_session( + StoredSession( + session_id='gamma', messages=('x', 'y'), + input_tokens=5, output_tokens=7, + ), + tmp_path, + ) + result = _run_cli( + 'load-session', 'gamma', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert result.returncode == 0 + data = json.loads(result.stdout) + assert data == { + 'session_id': 'gamma', + 'loaded': True, + 'messages_count': 2, + 'input_tokens': 5, + 'output_tokens': 7, + } + + def test_text_mode_unchanged_on_success(self, tmp_path: Path) -> None: + """Legacy text output must be byte-identical for backward compat.""" + save_session(_make_session('delta'), tmp_path) + result = _run_cli('load-session', 'delta', '--directory', str(tmp_path)) + assert result.returncode == 0 + lines = result.stdout.strip().split('\n') + assert lines == ['delta', '1 messages', 'in=1 out=2'] + + +class TestNotFoundTypedError: + def test_not_found_json_envelope(self, tmp_path: Path) -> None: + """Not-found emits structured JSON, never a Python traceback.""" + result = _run_cli( + 'load-session', 'missing', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert result.returncode == 1 + assert 'Traceback' not in result.stderr, ( + 'regression #165: raw traceback leaked to stderr' + ) + assert 'SessionNotFoundError' not in result.stdout, ( + 'regression #165: internal class name leaked into CLI output' + ) + data = json.loads(result.stdout) + assert data['session_id'] == 'missing' + assert data['loaded'] is False + assert data['error']['kind'] == 'session_not_found' + assert data['error']['retryable'] is False + # directory field is populated so claws know where we looked + assert 'directory' in data['error'] + + def test_not_found_text_mode_no_traceback(self, tmp_path: Path) -> None: + """Text mode on not-found must not dump a Python stack either.""" + result = _run_cli( + 'load-session', 'missing', '--directory', str(tmp_path), + ) + assert result.returncode == 1 + assert 'Traceback' not in result.stderr + assert result.stdout.startswith('error:') + + +class TestLoadFailedDistinctFromNotFound: + def test_corrupted_session_file_surfaces_distinct_kind( + self, tmp_path: Path, + ) -> None: + """A corrupted JSON file must emit kind='session_load_failed', not 'session_not_found'.""" + (tmp_path / 'broken.json').write_text('{ not valid json') + result = _run_cli( + 'load-session', 'broken', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert result.returncode == 1 + data = json.loads(result.stdout) + assert data['error']['kind'] == 'session_load_failed' + assert data['error']['retryable'] is True, ( + 'corrupted file is potentially retryable (fs glitch) unlike not-found' + ) + + +class TestTripletParityConsistency: + """All three #160 CLI commands should accept the same flag pair.""" + + @pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session']) + def test_all_three_accept_directory_flag(self, command: str) -> None: + help_text = _run_cli(command, '--help').stdout + assert '--directory' in help_text, ( + f'{command} missing --directory flag (#165 parity gap)' + ) + + @pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session']) + def test_all_three_accept_output_format_flag(self, command: str) -> None: + help_text = _run_cli(command, '--help').stdout + assert '--output-format' in help_text, ( + f'{command} missing --output-format flag (#165 parity gap)' + )