feat: #164 Stage B CLOSURE — turn-loop JSON + cancel_observed coverage + CLAWABLE promotion

Closes all three gaebal-gajae-identified closure criteria for #164 Stage B: 1. turn-loop runtime surface exposes cancel_observed consistently 2. cancellation path tests validate safe-to-reuse semantics 3. turn-loop promoted from OPT_OUT to CLAWABLE surface Changes: src/main.py: - turn-loop accepts --output-format {text,json} - JSON envelope includes per-turn cancel_observed + final_cancel_observed - All turn fields exposed: prompt, output, stop_reason, cancel_observed, matched_commands, matched_tools - Exit code 2 on final timeout preserved tests/test_cli_parity_audit.py: - CLAWABLE_SURFACES now contains 14 commands (was 13) - Removed 'turn-loop' from OPT_OUT_SURFACES - Parametrized --output-format test auto-validates turn-loop JSON tests/test_cancel_observed_field.py (new, 9 tests): - TestCancelObservedField (5 tests): field contract - default False - explicit True preserved - normal completion → False - bootstrap JSON exposes field - turn-loop JSON exposes per-turn field - TestCancelObservedSafeReuseSemantics (2 tests): reuse contract - timeout result has cancel_observed=True when signaled - engine.mutable_messages not corrupted after cancelled turn - engine accepts fresh message after cancellation - TestCancelObservedSchemaCompliance (2 tests): SCHEMAS.md contract - cancel_observed is always bool - final_cancel_observed convenience field present Closure criteria validated: - ✅ Field exposed in bootstrap JSON - ✅ Field exposed per-turn in turn-loop JSON - ✅ Field is always bool, never null - ✅ Safe-to-reuse: engine can accept fresh messages after cancellation - ✅ mutable_messages not corrupted by cancelled turn - ✅ turn-loop promoted from OPT_OUT (14 clawable commands now) Protocol now distinguishes at runtime: timeout + cancel_observed=false → infra/wedge (escalate) timeout + cancel_observed=true → cooperative cancellation (safe to retry) Test results: 182 → 192 passing, +10 tests, zero regression, 3 skipped unchanged. Closes #164 Stage B. Stage C (async-native preemption) remains future work.
2026-06-12 12:33:34 +08:00 · 2026-04-22 19:49:20 +09:00 · 2026-04-22 19:49:20 +09:00 · 11f9e8a5a2
commit 11f9e8a5a2
parent 97c4b130dc
3 changed files with 237 additions and 7 deletions
--- a/src/main.py
+++ b/src/main.py
@ -101,6 +101,12 @@ def build_parser() -> argparse.ArgumentParser:
            'suffix that used to pollute the transcript.'
        ),
    )
+    loop_parser.add_argument(
+        '--output-format',
+        choices=['text', 'json'],
+        default='text',
+        help='output format (#164 Stage B: JSON includes cancel_observed per turn)',
+    )

    flush_parser = subparsers.add_parser(
        'flush-transcript',
@ -349,15 +355,40 @@ def main(argv: list[str] | None = None) -> int:
            timeout_seconds=args.timeout_seconds,
            continuation_prompt=args.continuation_prompt,
        )
+        # Exit 2 when a timeout terminated the loop so claws can distinguish
+        # 'ran to completion' from 'hit wall-clock budget'.
+        loop_exit_code = 2 if results and results[-1].stop_reason == 'timeout' else 0
+        if args.output_format == 'json':
+            # #164 Stage B + #173: JSON envelope with per-turn cancel_observed
+            # Promotes turn-loop from OPT_OUT to CLAWABLE surface.
+            import json
+            envelope = {
+                'prompt': args.prompt,
+                'max_turns': args.max_turns,
+                'turns_completed': len(results),
+                'timeout_seconds': args.timeout_seconds,
+                'continuation_prompt': args.continuation_prompt,
+                'turns': [
+                    {
+                        'prompt': r.prompt,
+                        'output': r.output,
+                        'stop_reason': r.stop_reason,
+                        'cancel_observed': r.cancel_observed,
+                        'matched_commands': list(r.matched_commands),
+                        'matched_tools': list(r.matched_tools),
+                    }
+                    for r in results
+                ],
+                'final_stop_reason': results[-1].stop_reason if results else None,
+                'final_cancel_observed': results[-1].cancel_observed if results else False,
+            }
+            print(json.dumps(wrap_json_envelope(envelope, args.command, exit_code=loop_exit_code)))
+            return loop_exit_code
        for idx, result in enumerate(results, start=1):
            print(f'## Turn {idx}')
            print(result.output)
            print(f'stop_reason={result.stop_reason}')
-        # Exit 2 when a timeout terminated the loop so claws can distinguish
-        # 'ran to completion' from 'hit wall-clock budget'.
-        if results and results[-1].stop_reason == 'timeout':
-            return 2
-        return 0
+        return loop_exit_code
    if args.command == 'flush-transcript':
        from pathlib import Path as _Path
        engine = QueryEnginePort.from_workspace()
--- a/tests/test_cancel_observed_field.py
+++ b/tests/test_cancel_observed_field.py
@ -0,0 +1,199 @@
+"""#164 Stage B — cancel_observed field coverage.
+
+Validates that the TurnResult.cancel_observed field correctly signals
+whether cancellation was observed during turn execution.
+
+Test coverage:
+1. Normal completion: cancel_observed=False (no timeout occurred)
+2. Timeout with cancel signaled: cancel_observed=True
+3. bootstrap JSON output exposes the field
+4. turn-loop JSON output exposes cancel_observed per turn
+5. Safe-to-reuse: after timeout with cancel_observed=True,
+   engine can accept fresh messages without state corruption
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+from src.query_engine import QueryEnginePort, TurnResult
+from src.runtime import PortRuntime
+
+
+CLI = [sys.executable, '-m', 'src.main']
+REPO_ROOT = Path(__file__).resolve().parent.parent
+
+
+class TestCancelObservedField:
+    """TurnResult.cancel_observed correctly signals cancellation observation."""
+
+    def test_default_value_is_false(self) -> None:
+        """New TurnResult defaults to cancel_observed=False (backward compat)."""
+        from src.models import UsageSummary
+        result = TurnResult(
+            prompt='test',
+            output='ok',
+            matched_commands=(),
+            matched_tools=(),
+            permission_denials=(),
+            usage=UsageSummary(),
+            stop_reason='completed',
+        )
+        assert result.cancel_observed is False
+
+    def test_explicit_true_preserved(self) -> None:
+        """cancel_observed=True is preserved through construction."""
+        from src.models import UsageSummary
+        result = TurnResult(
+            prompt='test',
+            output='timed out',
+            matched_commands=(),
+            matched_tools=(),
+            permission_denials=(),
+            usage=UsageSummary(),
+            stop_reason='timeout',
+            cancel_observed=True,
+        )
+        assert result.cancel_observed is True
+
+    def test_normal_completion_cancel_observed_false(self) -> None:
+        """Normal turn completion → cancel_observed=False."""
+        runtime = PortRuntime()
+        results = runtime.run_turn_loop('hello', max_turns=1)
+        assert len(results) >= 1
+        assert results[0].cancel_observed is False
+
+    def test_bootstrap_json_includes_cancel_observed(self) -> None:
+        """bootstrap JSON envelope includes cancel_observed in turn result."""
+        result = subprocess.run(
+            CLI + ['bootstrap', 'hello', '--output-format', 'json'],
+            cwd=REPO_ROOT,
+            capture_output=True,
+            text=True,
+        )
+        assert result.returncode == 0
+        envelope = json.loads(result.stdout)
+        assert 'turn' in envelope
+        assert 'cancel_observed' in envelope['turn'], (
+            f"bootstrap turn must include cancel_observed (SCHEMAS.md contract). "
+            f"Got keys: {list(envelope['turn'].keys())}"
+        )
+        # Normal completion → False
+        assert envelope['turn']['cancel_observed'] is False
+
+    def test_turn_loop_json_per_turn_cancel_observed(self) -> None:
+        """turn-loop JSON envelope includes cancel_observed per turn (#164 Stage B closure)."""
+        result = subprocess.run(
+            CLI + ['turn-loop', 'hello', '--max-turns', '1', '--output-format', 'json'],
+            cwd=REPO_ROOT,
+            capture_output=True,
+            text=True,
+        )
+        assert result.returncode == 0, f"stderr: {result.stderr}"
+        envelope = json.loads(result.stdout)
+        # Common fields from wrap_json_envelope
+        assert envelope['command'] == 'turn-loop'
+        assert envelope['schema_version'] == '1.0'
+        # Turn-loop-specific fields
+        assert 'turns' in envelope
+        assert len(envelope['turns']) >= 1
+        for idx, turn in enumerate(envelope['turns']):
+            assert 'cancel_observed' in turn, (
+                f"Turn {idx} missing cancel_observed: {list(turn.keys())}"
+            )
+        # final_cancel_observed convenience field
+        assert 'final_cancel_observed' in envelope
+        assert isinstance(envelope['final_cancel_observed'], bool)
+
+
+class TestCancelObservedSafeReuseSemantics:
+    """After timeout with cancel_observed=True, engine state is safe to reuse."""
+
+    def test_timeout_result_cancel_observed_true_when_signaled(self) -> None:
+        """#164 Stage B: timeout path passes cancel_event.is_set() to result."""
+        # Force a timeout with max_turns=3 and timeout=0.0001 (instant)
+        runtime = PortRuntime()
+        results = runtime.run_turn_loop(
+            'hello', max_turns=3, timeout_seconds=0.0001,
+            continuation_prompt='keep going',
+        )
+        # Last result should be timeout (pre-start path since timeout is instant)
+        assert results, 'timeout path should still produce a result'
+        last = results[-1]
+        assert last.stop_reason == 'timeout'
+        # cancel_observed=True because the timeout path explicitly sets cancel_event
+        assert last.cancel_observed is True, (
+            f"timeout path must signal cancel_observed=True; got {last.cancel_observed}. "
+            f"stop_reason={last.stop_reason}"
+        )
+
+    def test_engine_messages_not_corrupted_by_timeout(self) -> None:
+        """After timeout with cancel_observed, engine.mutable_messages is consistent.
+
+        #164 Stage B contract: safe-to-reuse means after a timeout-with-cancel,
+        the engine has not committed a ghost turn and can accept fresh input.
+        """
+        engine = QueryEnginePort.from_workspace()
+        # Track initial state
+        initial_message_count = len(engine.mutable_messages)
+
+        # Simulate a direct submit_message call with cancellation
+        import threading
+        cancel_event = threading.Event()
+        cancel_event.set()  # Pre-set: first checkpoint fires
+        result = engine.submit_message(
+            'test', ('cmd1',), ('tool1',),
+            denied_tools=(), cancel_event=cancel_event,
+        )
+
+        # Cancelled turn should not commit mutation
+        assert result.stop_reason == 'cancelled', (
+            f"expected cancelled; got {result.stop_reason}"
+        )
+        # mutable_messages should not have grown
+        assert len(engine.mutable_messages) == initial_message_count, (
+            f"engine.mutable_messages grew after cancelled turn "
+            f"(was {initial_message_count}, now {len(engine.mutable_messages)})"
+        )
+
+        # Engine should accept a fresh message now
+        fresh = engine.submit_message('fresh prompt', ('cmd1',), ('tool1',))
+        assert fresh.stop_reason in ('completed', 'max_budget_reached'), (
+            f"expected engine reusable; got {fresh.stop_reason}"
+        )
+
+
+class TestCancelObservedSchemaCompliance:
+    """SCHEMAS.md contract for cancel_observed field."""
+
+    def test_cancel_observed_is_bool_not_nullable(self) -> None:
+        """cancel_observed is always bool (never null/missing) per SCHEMAS.md."""
+        result = subprocess.run(
+            CLI + ['bootstrap', 'test', '--output-format', 'json'],
+            cwd=REPO_ROOT,
+            capture_output=True,
+            text=True,
+        )
+        envelope = json.loads(result.stdout)
+        cancel_observed = envelope['turn']['cancel_observed']
+        assert isinstance(cancel_observed, bool), (
+            f"cancel_observed must be bool; got {type(cancel_observed)}"
+        )
+
+    def test_turn_loop_envelope_has_final_cancel_observed(self) -> None:
+        """turn-loop JSON exposes final_cancel_observed convenience field."""
+        result = subprocess.run(
+            CLI + ['turn-loop', 'test', '--max-turns', '1', '--output-format', 'json'],
+            cwd=REPO_ROOT,
+            capture_output=True,
+            text=True,
+        )
+        assert result.returncode == 0
+        envelope = json.loads(result.stdout)
+        assert 'final_cancel_observed' in envelope
+        assert isinstance(envelope['final_cancel_observed'], bool)
--- a/tests/test_cli_parity_audit.py
+++ b/tests/test_cli_parity_audit.py
@ -59,6 +59,8 @@ CLAWABLE_SURFACES = frozenset({
    'command-graph',
    'tool-pool',
    'bootstrap-graph',
+    # Turn-loop with JSON output (#164 Stage B, #174)
+    'turn-loop',
 })

 # Commands explicitly exempt from --output-format requirement.
@ -75,8 +77,6 @@ OPT_OUT_SURFACES = frozenset({
    'subsystems',         # use --limit
    'commands',           # use --query / --limit / --no-plugin-commands
    'tools',              # use --query / --limit / --simple-mode
-    # Turn-loop has structured_output flag; JSON mode is future work
-    'turn-loop',
    # Simulation/debug surfaces (not claw-orchestrated)
    'remote-mode',
    'ssh-mode',