From 85de7f9814fb3da0f5ac60c58214b25a4ea2cc30 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Wed, 22 Apr 2026 18:04:25 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20#166=20=E2=80=94=20flush-transcript=20no?= =?UTF-8?q?w=20accepts=20--directory=20/=20--output-format=20/=20--session?= =?UTF-8?q?-id;=20session-creation=20command=20parity=20with=20#160/#165?= =?UTF-8?q?=20lifecycle=20triplet?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ROADMAP.md | 79 +++++++++++ src/main.py | 42 +++++- src/query_engine.py | 17 ++- tests/test_flush_transcript_cli.py | 206 +++++++++++++++++++++++++++++ 4 files changed, 338 insertions(+), 6 deletions(-) create mode 100644 tests/test_flush_transcript_cli.py diff --git a/ROADMAP.md b/ROADMAP.md index 5d54011..e82b18f 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -6379,3 +6379,82 @@ src.session_store.SessionNotFoundError: "session 'nonexistent' not found in .por **Blocker.** None. Purely CLI-layer wiring; `session_store.load_session` already accepts `directory` and already raises the typed `SessionNotFoundError`. This is closing the gap between the library contract (which is clean) and the CLI contract (which isn't). **Source.** Jobdori dogfood sweep 2026-04-22 17:44 KST — ran `claw load-session nonexistent`, got a Python traceback. Compared `--help` across the #160 triplet; confirmed `list-sessions` and `delete-session` both have `--directory` + `--output-format` but `load-session` has neither. The session-lifecycle surface is inconsistent in a way that directly hurts claws that already adopted #160. + +## Pinpoint #166. `flush-transcript` CLI lacks `--directory` / `--output-format` / `--session-id` — session-*creation* command is out-of-family with the now-symmetric #160/#165 lifecycle triplet + +**Gap.** The session lifecycle has a creation step (`flush-transcript`) and a management triplet (`list-sessions`, `delete-session`, `load-session`). #160 and #165 made the management triplet fully symmetric — all three accept `--directory` and `--output-format {text,json}`, and emit structured JSON envelopes. But `flush-transcript` — which *creates* the persisted session file in the first place — has **none of these flags** and emits a hybrid path-plus-key=value text blob on stdout: + +``` +$ claw flush-transcript "hello" +.port_sessions/629412aad6f24b4fb44ed636e12b0f25.json +flushed=True +``` + +Two lines, two formats, one a path and one a key=value. Claws scripting session creation have to: +- `tail -n 2 | head -n 1` to get the path, or regex for `\.json$` +- Parse the second line as a key=value pair +- Extract the session ID from the filename (stripping extension) +- Hope the working directory is the one they wanted sessions written to + +**Impact.** Three concrete breakages: + +1. **No way to redirect creation to an alternate `--directory`.** Claws running out-of-tree (e.g., `/tmp/claw-run-XXX/.port_sessions`) must `chdir` before calling `flush-transcript`. Creates race conditions in parallel orchestration and breaks composition with `list-sessions --directory /tmp/...` and `load-session --directory /tmp/...` which *do* accept the flag. + +2. **Session ID is engine-generated and only discoverable via stdout parsing.** There's no way to say `flush-transcript "hello" --session-id claw-run-42`, so claws that want deterministic session IDs for checkpointing/replay must regex the output to discover what ID the engine picked. The ID is available in the persisted file's content (`.session_id`), but you have to load the file to read it. + +3. **Output is unparseable as JSON, unkeyed in text mode.** Every other lifecycle CLI now emits either parseable JSON or well-labeled text. `flush-transcript` is the one place where the output format is a historical artifact. Claws building session-creation pipelines have to special-case it. + +**Repro (family consistency check).** +```bash +# Management triplet (all three symmetric after #160/#165): +$ claw list-sessions --directory /tmp/a --output-format json +{"sessions": [], "count": 0} + +$ claw delete-session foo --directory /tmp/a --output-format json +{"session_id": "foo", "deleted": false, "status": "not_found"} + +$ claw load-session foo --directory /tmp/a --output-format json +{"session_id": "foo", "loaded": false, "error": {"kind": "session_not_found", ...}} + +# Creation step (out-of-family): +$ claw flush-transcript "hello" --directory /tmp/a --output-format json +error: unrecognized arguments: --directory /tmp/a --output-format json + +$ claw flush-transcript "hello" +.port_sessions/629412aad6f24b4fb44ed636e12b0f25.json +flushed=True +``` + +**Fix shape (~40 lines across CLI + engine).** + +1. **Engine layer** — `QueryEnginePort.persist_session(directory: Path | None = None)` — pass through to `save_session(directory=directory)` (which already accepts it). No API break; existing callers pass nothing. + +2. **CLI flags** — add to `flush-transcript` parser: + - `--directory DIR` — alternate storage location (parity with triplet) + - `--output-format {text,json}` — same choices as triplet + - `--session-id ID` — override the auto-generated UUID (deterministic IDs for claw checkpointing) + +3. **JSON output shape** (success): + ```json + { + "session_id": "629412aad6f24b4fb44ed636e12b0f25", + "path": "/tmp/a/629412aad6f24b4fb44ed636e12b0f25.json", + "flushed": true, + "messages_count": 1, + "input_tokens": 0, + "output_tokens": 3 + } + ``` + Matches the `load-session --output-format json` success shape (modulo `path` + `flushed` which are creation-specific). + +4. **Text output** — keep the existing two-line format byte-identical for backward compat; new structure only activates when `--output-format json`. + +**Acceptance.** All four of these pass: +- `claw flush-transcript "hi" --directory /tmp/a` persists to `/tmp/a/.json` +- `claw flush-transcript "hi" --session-id fixed-id` persists to `.port_sessions/fixed-id.json` +- `claw flush-transcript "hi" --output-format json` emits parseable JSON with all fields +- Existing `claw flush-transcript "hi"` output unchanged byte-for-byte + +**Blocker.** None. `save_session` already accepts `directory`; `QueryEnginePort.session_id` is already a settable field; the wiring is pure CLI layer. + +**Source.** Jobdori dogfood sweep 2026-04-22 17:58 KST — ran `flush-transcript "hello"`, got the path-plus-key=value hybrid output, then checked `--help` for the flag pair I just shipped across the triplet in #165. Realized the session-*creation* command was asymmetric with the now-symmetric management triplet. Closes the last gap in the session-lifecycle CLI surface. diff --git a/src/main.py b/src/main.py index 50797a3..36d88da 100644 --- a/src/main.py +++ b/src/main.py @@ -81,8 +81,24 @@ def build_parser() -> argparse.ArgumentParser: ), ) - flush_parser = subparsers.add_parser('flush-transcript', help='persist and flush a temporary session transcript') + flush_parser = subparsers.add_parser( + 'flush-transcript', + help='persist and flush a temporary session transcript (#160/#166: claw-native session API)', + ) flush_parser.add_argument('prompt') + flush_parser.add_argument( + '--directory', help='session storage directory (default: .port_sessions)' + ) + flush_parser.add_argument( + '--output-format', + choices=['text', 'json'], + default='text', + help='output format', + ) + flush_parser.add_argument( + '--session-id', + help='deterministic session ID (default: auto-generated UUID)', + ) load_session_parser = subparsers.add_parser( 'load-session', @@ -232,11 +248,29 @@ def main(argv: list[str] | None = None) -> int: return 2 return 0 if args.command == 'flush-transcript': + from pathlib import Path as _Path engine = QueryEnginePort.from_workspace() + # #166: allow deterministic session IDs for claw checkpointing/replay. + # When unset, the engine's auto-generated UUID is used (backward compat). + if args.session_id: + engine.session_id = args.session_id engine.submit_message(args.prompt) - path = engine.persist_session() - print(path) - print(f'flushed={engine.transcript_store.flushed}') + directory = _Path(args.directory) if args.directory else None + path = engine.persist_session(directory) + if args.output_format == 'json': + import json as _json + print(_json.dumps({ + 'session_id': engine.session_id, + 'path': path, + 'flushed': engine.transcript_store.flushed, + 'messages_count': len(engine.mutable_messages), + 'input_tokens': engine.total_usage.input_tokens, + 'output_tokens': engine.total_usage.output_tokens, + })) + else: + # #166: legacy text output preserved byte-for-byte for backward compat. + print(path) + print(f'flushed={engine.transcript_store.flushed}') return 0 if args.command == 'load-session': from pathlib import Path as _Path diff --git a/src/query_engine.py b/src/query_engine.py index 97c060d..5f3f3ed 100644 --- a/src/query_engine.py +++ b/src/query_engine.py @@ -153,7 +153,19 @@ class QueryEnginePort: def flush_transcript(self) -> None: self.transcript_store.flush() - def persist_session(self) -> str: + def persist_session(self, directory: 'Path | None' = None) -> str: + """Flush the transcript and save the session to disk. + + Args: + directory: Optional override for the storage directory. When None + (default, for backward compat), uses the default location + (``.port_sessions`` in CWD). When set, passes through to + ``save_session`` which already supports directory overrides. + + #166: added directory parameter to match the session-lifecycle CLI + surface established by #160/#165. Claws running out-of-tree can now + redirect session creation to a workspace-specific dir without chdir. + """ self.flush_transcript() path = save_session( StoredSession( @@ -161,7 +173,8 @@ class QueryEnginePort: messages=tuple(self.mutable_messages), input_tokens=self.total_usage.input_tokens, output_tokens=self.total_usage.output_tokens, - ) + ), + directory, ) return str(path) diff --git a/tests/test_flush_transcript_cli.py b/tests/test_flush_transcript_cli.py new file mode 100644 index 0000000..27701b1 --- /dev/null +++ b/tests/test_flush_transcript_cli.py @@ -0,0 +1,206 @@ +"""Tests for flush-transcript CLI parity with the #160/#165 lifecycle triplet (ROADMAP #166). + +Verifies that session *creation* now accepts the same flag family as session +management (list/delete/load): +- --directory DIR (alternate storage location) +- --output-format {text,json} (structured output) +- --session-id ID (deterministic IDs for claw checkpointing) + +Also verifies backward compat: default text output unchanged byte-for-byte. +""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + + +_REPO_ROOT = Path(__file__).resolve().parent.parent + + +def _run_cli(*args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, '-m', 'src.main', *args], + capture_output=True, text=True, cwd=str(_REPO_ROOT), + ) + + +class TestDirectoryFlag: + def test_flush_transcript_writes_to_custom_directory(self, tmp_path: Path) -> None: + result = _run_cli( + 'flush-transcript', 'hello world', + '--directory', str(tmp_path), + ) + assert result.returncode == 0, result.stderr + # Exactly one session file should exist in the directory + files = list(tmp_path.glob('*.json')) + assert len(files) == 1 + # And the legacy text output points to that file + assert str(files[0]) in result.stdout + + +class TestSessionIdFlag: + def test_explicit_session_id_is_respected(self, tmp_path: Path) -> None: + result = _run_cli( + 'flush-transcript', 'hello', + '--directory', str(tmp_path), + '--session-id', 'deterministic-id-42', + ) + assert result.returncode == 0, result.stderr + expected_path = tmp_path / 'deterministic-id-42.json' + assert expected_path.exists(), ( + f'session file not created at deterministic path: {expected_path}' + ) + # And it should contain the ID we asked for + data = json.loads(expected_path.read_text()) + assert data['session_id'] == 'deterministic-id-42' + + def test_auto_session_id_when_flag_omitted(self, tmp_path: Path) -> None: + """Without --session-id, engine still auto-generates a UUID (backward compat).""" + result = _run_cli( + 'flush-transcript', 'hello', + '--directory', str(tmp_path), + ) + assert result.returncode == 0 + files = list(tmp_path.glob('*.json')) + assert len(files) == 1 + # The filename (minus .json) should be a 32-char hex UUID + stem = files[0].stem + assert len(stem) == 32 + assert all(c in '0123456789abcdef' for c in stem) + + +class TestOutputFormatFlag: + def test_json_mode_emits_structured_envelope(self, tmp_path: Path) -> None: + result = _run_cli( + 'flush-transcript', 'hello', + '--directory', str(tmp_path), + '--session-id', 'beta', + '--output-format', 'json', + ) + assert result.returncode == 0 + data = json.loads(result.stdout) + assert data['session_id'] == 'beta' + assert data['flushed'] is True + assert data['path'].endswith('beta.json') + # messages_count and token counts should be present and typed + assert isinstance(data['messages_count'], int) + assert isinstance(data['input_tokens'], int) + assert isinstance(data['output_tokens'], int) + + def test_text_mode_byte_identical_to_pre_166_output(self, tmp_path: Path) -> None: + """Legacy text output must not change — claws may be parsing it.""" + result = _run_cli( + 'flush-transcript', 'hello', + '--directory', str(tmp_path), + ) + assert result.returncode == 0 + lines = result.stdout.strip().split('\n') + # Line 1: path ending in .json + assert lines[0].endswith('.json') + # Line 2: exact legacy format + assert lines[1] == 'flushed=True' + + +class TestBackwardCompat: + def test_no_flags_default_behaviour(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Running with no flags still works (default dir, text mode, auto UUID).""" + import os + env = os.environ.copy() + env['PYTHONPATH'] = str(_REPO_ROOT) + result = subprocess.run( + [sys.executable, '-m', 'src.main', 'flush-transcript', 'hello'], + capture_output=True, text=True, cwd=str(tmp_path), env=env, + ) + assert result.returncode == 0, result.stderr + # Default dir is `.port_sessions` in CWD + sessions_dir = tmp_path / '.port_sessions' + assert sessions_dir.exists() + assert len(list(sessions_dir.glob('*.json'))) == 1 + + +class TestLifecycleIntegration: + """#166's real value: the triplet + creation command are now a coherent family.""" + + def test_create_then_list_then_load_then_delete_roundtrip( + self, tmp_path: Path, + ) -> None: + """End-to-end: flush → list → load → delete, all via the same --directory.""" + # 1. Create + create_result = _run_cli( + 'flush-transcript', 'roundtrip test', + '--directory', str(tmp_path), + '--session-id', 'rt-session', + '--output-format', 'json', + ) + assert create_result.returncode == 0 + assert json.loads(create_result.stdout)['session_id'] == 'rt-session' + + # 2. List + list_result = _run_cli( + 'list-sessions', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert list_result.returncode == 0 + list_data = json.loads(list_result.stdout) + assert 'rt-session' in list_data['sessions'] + + # 3. Load + load_result = _run_cli( + 'load-session', 'rt-session', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert load_result.returncode == 0 + assert json.loads(load_result.stdout)['loaded'] is True + + # 4. Delete + delete_result = _run_cli( + 'delete-session', 'rt-session', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert delete_result.returncode == 0 + + # 5. Verify gone + verify_result = _run_cli( + 'load-session', 'rt-session', + '--directory', str(tmp_path), + '--output-format', 'json', + ) + assert verify_result.returncode == 1 + assert json.loads(verify_result.stdout)['error']['kind'] == 'session_not_found' + + +class TestFullFamilyParity: + """All four session-lifecycle CLI commands accept the same core flag pair. + + This is the #166 acceptance test: flush-transcript joins the family. + """ + + @pytest.mark.parametrize( + 'command', + ['list-sessions', 'delete-session', 'load-session', 'flush-transcript'], + ) + def test_all_four_accept_directory_flag(self, command: str) -> None: + help_text = _run_cli(command, '--help').stdout + assert '--directory' in help_text, ( + f'{command} missing --directory flag (#166 parity gap)' + ) + + @pytest.mark.parametrize( + 'command', + ['list-sessions', 'delete-session', 'load-session', 'flush-transcript'], + ) + def test_all_four_accept_output_format_flag(self, command: str) -> None: + help_text = _run_cli(command, '--help').stdout + assert '--output-format' in help_text, ( + f'{command} missing --output-format flag (#166 parity gap)' + )