From b2a0c5da03a7e9acd32a47e62bd607662b7c48a2 Mon Sep 17 00:00:00 2001
From: YeonGyu-Kim <code.yeon.gyu@gmail.com>
Date: Wed, 22 Apr 2026 17:44:48 +0900
Subject: [PATCH] =?UTF-8?q?fix:=20#165=20=E2=80=94=20load-session=20CLI=20?=
 =?UTF-8?q?now=20parity-matches=20list/delete=20(--directory,=20--output-f?=
 =?UTF-8?q?ormat,=20typed=20JSON=20errors)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The #160 session-lifecycle CLI triplet was asymmetric: list-sessions and
delete-session accepted --directory + --output-format and emitted typed
JSON error envelopes, but load-session had neither flag and dumped a raw
Python traceback (including the SessionNotFoundError class name) on a
missing session.

Three concrete impacts this fix closes:
1. Alternate session-store locations (e.g. /tmp/claw-run-XXX/.port_sessions)
   were unreachable via load-session; claws had to chdir or monkeypatch
   DEFAULT_SESSION_DIR to work around it.
2. Not-found emitted a multi-line Python stack, not a parseable envelope.
   Claws deciding retry/escalate/give-up had only exit code 1 to work with.
3. The traceback leaked 'src.session_store.SessionNotFoundError' verbatim,
   coupling version-pinned claws to our internal exception class name.

Now all three triplet commands accept the same flag pair and emit the
same JSON error shape:

Success (json mode):
  {"session_id": "alpha", "loaded": true, "messages_count": 3,
   "input_tokens": 42, "output_tokens": 99}

Not-found:
  {"session_id": "missing", "loaded": false,
   "error": {"kind": "session_not_found",
               "message": "session 'missing' not found in /path",
               "directory": "/path", "retryable": false}}

Corrupted file:
  {"session_id": "broken", "loaded": false,
   "error": {"kind": "session_load_failed",
               "message": "...", "directory": "/path",
               "retryable": true}}

Exit code contract:
- 0 on successful load
- 1 on not-found (preserves existing $?)
- 1 on OSError/JSONDecodeError (distinct 'kind' in JSON)

Backward compat: legacy 'claw load-session ID' text output unchanged
byte-for-byte. Only new behaviour is the flags and structured error path.

Tests (tests/test_load_session_cli.py, 13 tests):
- TestDirectoryFlagParity (2): --directory works + fallback to CWD/.port_sessions
- TestOutputFormatFlagParity (2): json schema + text-mode backward compat
- TestNotFoundTypedError (2): JSON envelope on not-found; no traceback in
  either mode; no internal class name leak
- TestLoadFailedDistinctFromNotFound (1): corrupted file = session_load_failed
  with retryable=true, distinct from session_not_found
- TestTripletParityConsistency (6): parametrised over [list, delete, load] *
  [--directory, --output-format] — explicit parity guard for future regressions

Full suite: 80/80 passing, zero regression.

Discovered via Jobdori dogfood sweep 2026-04-22 17:44 KST — ran
'claw load-session nonexistent' expecting a clean error, got a Python
traceback. Filed #165 + fixed in same commit.

Closes ROADMAP #165.
---
 src/main.py                    |  69 ++++++++++++-
 tests/test_load_session_cli.py | 179 +++++++++++++++++++++++++++++++++
 2 files changed, 245 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_load_session_cli.py

diff --git a/src/main.py b/src/main.py
index e1a56633..50797a3e 100644
--- a/src/main.py
+++ b/src/main.py
@@ -84,8 +84,20 @@ def build_parser() -> argparse.ArgumentParser:
     flush_parser = subparsers.add_parser('flush-transcript', help='persist and flush a temporary session transcript')
     flush_parser.add_argument('prompt')
 
-    load_session_parser = subparsers.add_parser('load-session', help='load a previously persisted session')
+    load_session_parser = subparsers.add_parser(
+        'load-session',
+        help='load a previously persisted session (#160/#165: claw-native session API)',
+    )
     load_session_parser.add_argument('session_id')
+    load_session_parser.add_argument(
+        '--directory', help='session storage directory (default: .port_sessions)'
+    )
+    load_session_parser.add_argument(
+        '--output-format',
+        choices=['text', 'json'],
+        default='text',
+        help='output format',
+    )
 
     list_sessions_parser = subparsers.add_parser(
         'list-sessions',
@@ -227,8 +239,59 @@ def main(argv: list[str] | None = None) -> int:
         print(f'flushed={engine.transcript_store.flushed}')
         return 0
     if args.command == 'load-session':
-        session = load_session(args.session_id)
-        print(f'{session.session_id}\n{len(session.messages)} messages\nin={session.input_tokens} out={session.output_tokens}')
+        from pathlib import Path as _Path
+        directory = _Path(args.directory) if args.directory else None
+        # #165: catch typed SessionNotFoundError + surface a JSON error envelope
+        # matching the delete-session contract shape. No more raw tracebacks.
+        try:
+            session = load_session(args.session_id, directory)
+        except SessionNotFoundError as exc:
+            if args.output_format == 'json':
+                import json as _json
+                resolved_dir = str(directory) if directory else '.port_sessions'
+                print(_json.dumps({
+                    'session_id': args.session_id,
+                    'loaded': False,
+                    'error': {
+                        'kind': 'session_not_found',
+                        'message': str(exc),
+                        'directory': resolved_dir,
+                        'retryable': False,
+                    },
+                }))
+            else:
+                print(f'error: {exc}')
+            return 1
+        except (OSError, ValueError) as exc:
+            # Corrupted session file, IO error, JSON decode error — distinct
+            # from 'not found'. Callers may retry here (fs glitch).
+            if args.output_format == 'json':
+                import json as _json
+                resolved_dir = str(directory) if directory else '.port_sessions'
+                print(_json.dumps({
+                    'session_id': args.session_id,
+                    'loaded': False,
+                    'error': {
+                        'kind': 'session_load_failed',
+                        'message': str(exc),
+                        'directory': resolved_dir,
+                        'retryable': True,
+                    },
+                }))
+            else:
+                print(f'error: {exc}')
+            return 1
+        if args.output_format == 'json':
+            import json as _json
+            print(_json.dumps({
+                'session_id': session.session_id,
+                'loaded': True,
+                'messages_count': len(session.messages),
+                'input_tokens': session.input_tokens,
+                'output_tokens': session.output_tokens,
+            }))
+        else:
+            print(f'{session.session_id}\n{len(session.messages)} messages\nin={session.input_tokens} out={session.output_tokens}')
         return 0
     if args.command == 'list-sessions':
         from pathlib import Path as _Path
diff --git a/tests/test_load_session_cli.py b/tests/test_load_session_cli.py
new file mode 100644
index 00000000..c2ce0e22
--- /dev/null
+++ b/tests/test_load_session_cli.py
@@ -0,0 +1,179 @@
+"""Tests for load-session CLI parity with list-sessions/delete-session (ROADMAP #165).
+
+Verifies the session-lifecycle CLI triplet is now symmetric:
+- --directory DIR accepted (alternate storage locations reachable)
+- --output-format {text,json} accepted
+- Not-found emits typed JSON error envelope, never a Python traceback
+- Corrupted session file distinguished from not-found via 'kind'
+- Legacy text-mode output unchanged (backward compat)
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from src.session_store import StoredSession, save_session  # noqa: E402
+
+
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+
+
+def _run_cli(
+    *args: str, cwd: Path | None = None,
+) -> subprocess.CompletedProcess[str]:
+    """Always invoke the CLI with cwd=repo-root so ``python -m src.main``
+    can resolve the ``src`` package, regardless of where the test's
+    tmp_path is.
+    """
+    return subprocess.run(
+        [sys.executable, '-m', 'src.main', *args],
+        capture_output=True,
+        text=True,
+        cwd=str(cwd) if cwd else str(_REPO_ROOT),
+    )
+
+
+def _make_session(session_id: str) -> StoredSession:
+    return StoredSession(
+        session_id=session_id, messages=('hi',), input_tokens=1, output_tokens=2,
+    )
+
+
+class TestDirectoryFlagParity:
+    def test_load_session_accepts_directory_flag(self, tmp_path: Path) -> None:
+        save_session(_make_session('alpha'), tmp_path)
+        result = _run_cli('load-session', 'alpha', '--directory', str(tmp_path))
+        assert result.returncode == 0, result.stderr
+        assert 'alpha' in result.stdout
+
+    def test_load_session_without_directory_uses_cwd_default(
+        self, tmp_path: Path,
+    ) -> None:
+        """When --directory is omitted, fall back to .port_sessions in CWD.
+
+        Subprocess CWD must still be able to import ``src.main``, so we use
+        ``cwd=tmp_path`` which means ``python -m src.main`` needs ``src/`` on
+        sys.path. We set PYTHONPATH to the repo root via env.
+        """
+        sessions_dir = tmp_path / '.port_sessions'
+        sessions_dir.mkdir()
+        save_session(_make_session('beta'), sessions_dir)
+        import os
+        env = os.environ.copy()
+        env['PYTHONPATH'] = str(_REPO_ROOT)
+        result = subprocess.run(
+            [sys.executable, '-m', 'src.main', 'load-session', 'beta'],
+            capture_output=True, text=True, cwd=str(tmp_path), env=env,
+        )
+        assert result.returncode == 0, result.stderr
+        assert 'beta' in result.stdout
+
+
+class TestOutputFormatFlagParity:
+    def test_json_mode_on_success(self, tmp_path: Path) -> None:
+        save_session(
+            StoredSession(
+                session_id='gamma', messages=('x', 'y'),
+                input_tokens=5, output_tokens=7,
+            ),
+            tmp_path,
+        )
+        result = _run_cli(
+            'load-session', 'gamma',
+            '--directory', str(tmp_path),
+            '--output-format', 'json',
+        )
+        assert result.returncode == 0
+        data = json.loads(result.stdout)
+        assert data == {
+            'session_id': 'gamma',
+            'loaded': True,
+            'messages_count': 2,
+            'input_tokens': 5,
+            'output_tokens': 7,
+        }
+
+    def test_text_mode_unchanged_on_success(self, tmp_path: Path) -> None:
+        """Legacy text output must be byte-identical for backward compat."""
+        save_session(_make_session('delta'), tmp_path)
+        result = _run_cli('load-session', 'delta', '--directory', str(tmp_path))
+        assert result.returncode == 0
+        lines = result.stdout.strip().split('\n')
+        assert lines == ['delta', '1 messages', 'in=1 out=2']
+
+
+class TestNotFoundTypedError:
+    def test_not_found_json_envelope(self, tmp_path: Path) -> None:
+        """Not-found emits structured JSON, never a Python traceback."""
+        result = _run_cli(
+            'load-session', 'missing',
+            '--directory', str(tmp_path),
+            '--output-format', 'json',
+        )
+        assert result.returncode == 1
+        assert 'Traceback' not in result.stderr, (
+            'regression #165: raw traceback leaked to stderr'
+        )
+        assert 'SessionNotFoundError' not in result.stdout, (
+            'regression #165: internal class name leaked into CLI output'
+        )
+        data = json.loads(result.stdout)
+        assert data['session_id'] == 'missing'
+        assert data['loaded'] is False
+        assert data['error']['kind'] == 'session_not_found'
+        assert data['error']['retryable'] is False
+        # directory field is populated so claws know where we looked
+        assert 'directory' in data['error']
+
+    def test_not_found_text_mode_no_traceback(self, tmp_path: Path) -> None:
+        """Text mode on not-found must not dump a Python stack either."""
+        result = _run_cli(
+            'load-session', 'missing', '--directory', str(tmp_path),
+        )
+        assert result.returncode == 1
+        assert 'Traceback' not in result.stderr
+        assert result.stdout.startswith('error:')
+
+
+class TestLoadFailedDistinctFromNotFound:
+    def test_corrupted_session_file_surfaces_distinct_kind(
+        self, tmp_path: Path,
+    ) -> None:
+        """A corrupted JSON file must emit kind='session_load_failed', not 'session_not_found'."""
+        (tmp_path / 'broken.json').write_text('{ not valid json')
+        result = _run_cli(
+            'load-session', 'broken',
+            '--directory', str(tmp_path),
+            '--output-format', 'json',
+        )
+        assert result.returncode == 1
+        data = json.loads(result.stdout)
+        assert data['error']['kind'] == 'session_load_failed'
+        assert data['error']['retryable'] is True, (
+            'corrupted file is potentially retryable (fs glitch) unlike not-found'
+        )
+
+
+class TestTripletParityConsistency:
+    """All three #160 CLI commands should accept the same flag pair."""
+
+    @pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session'])
+    def test_all_three_accept_directory_flag(self, command: str) -> None:
+        help_text = _run_cli(command, '--help').stdout
+        assert '--directory' in help_text, (
+            f'{command} missing --directory flag (#165 parity gap)'
+        )
+
+    @pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session'])
+    def test_all_three_accept_output_format_flag(self, command: str) -> None:
+        help_text = _run_cli(command, '--help').stdout
+        assert '--output-format' in help_text, (
+            f'{command} missing --output-format flag (#165 parity gap)'
+        )