claw-code/tests/test_exec_route_bootstrap_output_format.py

"""Tests for --output-format on exec-command/exec-tool/route/bootstrap (ROADMAP #168).

Closes the final JSON-parity gap across the CLI family. After #160/#165/
#166/#167, the session-lifecycle and inspect CLI commands all spoke JSON;
this batch extends that contract to the exec, route, and bootstrap
surfaces — the commands claws actually invoke to DO work, not just inspect
state.

Verifies:
- exec-command / exec-tool: JSON envelope with handled + source_hint on
  success; {name, handled:false, error:{kind,message,retryable}} on
  not-found
- route: JSON envelope with match_count + matches list
- bootstrap: JSON envelope with setup, routed_matches, turn, messages,
  persisted_session_path
- All 4 preserve legacy text mode byte-identically
- Exit codes unchanged (0 success, 1 exec-not-found)
"""

from __future__ import annotations

import json
import subprocess
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))


def _run(args: list[str]) -> subprocess.CompletedProcess:
    return subprocess.run(
        [sys.executable, '-m', 'src.main', *args],
        cwd=Path(__file__).resolve().parent.parent,
        capture_output=True,
        text=True,
    )


class TestExecCommandOutputFormat:
    def test_exec_command_found_json(self) -> None:
        result = _run(['exec-command', 'add-dir', 'hello', '--output-format', 'json'])
        assert result.returncode == 0, result.stderr

        envelope = json.loads(result.stdout)
        assert envelope['handled'] is True
        assert envelope['name'] == 'add-dir'
        assert envelope['prompt'] == 'hello'
        assert 'source_hint' in envelope
        assert 'message' in envelope
        assert 'error' not in envelope

    def test_exec_command_not_found_json(self) -> None:
        result = _run(['exec-command', 'nonexistent-cmd', 'hi', '--output-format', 'json'])
        assert result.returncode == 1

        envelope = json.loads(result.stdout)
        assert envelope['handled'] is False
        assert envelope['name'] == 'nonexistent-cmd'
        assert envelope['prompt'] == 'hi'
        assert envelope['error']['kind'] == 'command_not_found'
        assert envelope['error']['retryable'] is False
        assert 'source_hint' not in envelope

    def test_exec_command_text_backward_compat(self) -> None:
        result = _run(['exec-command', 'add-dir', 'hello'])
        assert result.returncode == 0
        # Single line prose (unchanged from pre-#168)
        assert result.stdout.count('\n') == 1
        assert 'add-dir' in result.stdout


class TestExecToolOutputFormat:
    def test_exec_tool_found_json(self) -> None:
        result = _run(['exec-tool', 'BashTool', '{"cmd":"ls"}', '--output-format', 'json'])
        assert result.returncode == 0, result.stderr

        envelope = json.loads(result.stdout)
        assert envelope['handled'] is True
        assert envelope['name'] == 'BashTool'
        assert envelope['payload'] == '{"cmd":"ls"}'
        assert 'source_hint' in envelope
        assert 'error' not in envelope

    def test_exec_tool_not_found_json(self) -> None:
        result = _run(['exec-tool', 'NotATool', '{}', '--output-format', 'json'])
        assert result.returncode == 1

        envelope = json.loads(result.stdout)
        assert envelope['handled'] is False
        assert envelope['name'] == 'NotATool'
        assert envelope['error']['kind'] == 'tool_not_found'
        assert envelope['error']['retryable'] is False

    def test_exec_tool_text_backward_compat(self) -> None:
        result = _run(['exec-tool', 'BashTool', '{}'])
        assert result.returncode == 0
        assert result.stdout.count('\n') == 1


class TestRouteOutputFormat:
    def test_route_json_envelope(self) -> None:
        result = _run(['route', 'review mcp', '--limit', '3', '--output-format', 'json'])
        assert result.returncode == 0

        envelope = json.loads(result.stdout)
        assert envelope['prompt'] == 'review mcp'
        assert envelope['limit'] == 3
        assert 'match_count' in envelope
        assert 'matches' in envelope
        assert envelope['match_count'] == len(envelope['matches'])
        # Every match has required keys
        for m in envelope['matches']:
            assert set(m.keys()) == {'kind', 'name', 'score', 'source_hint'}
            assert m['kind'] in ('command', 'tool')

    def test_route_json_no_matches(self) -> None:
        # Very unusual string should yield zero matches
        result = _run(['route', 'zzzzzzzzzqqqqq', '--output-format', 'json'])
        assert result.returncode == 0

        envelope = json.loads(result.stdout)
        assert envelope['match_count'] == 0
        assert envelope['matches'] == []

    def test_route_text_backward_compat(self) -> None:
        """Text mode tab-separated output unchanged from pre-#168."""
        result = _run(['route', 'review mcp', '--limit', '2'])
        assert result.returncode == 0
        # Each non-empty line has exactly 3 tabs (kind\tname\tscore\tsource_hint)
        for line in result.stdout.strip().split('\n'):
            if line:
                assert line.count('\t') == 3


class TestBootstrapOutputFormat:
    def test_bootstrap_json_envelope(self) -> None:
        result = _run(['bootstrap', 'review MCP', '--limit', '2', '--output-format', 'json'])
        assert result.returncode == 0, result.stderr

        envelope = json.loads(result.stdout)
        # Required top-level keys
        required = {
            'prompt', 'limit', 'setup', 'routed_matches',
            'command_execution_messages', 'tool_execution_messages',
            'turn', 'persisted_session_path',
        }
        assert required.issubset(envelope.keys())
        # Setup sub-envelope
        assert 'python_version' in envelope['setup']
        assert 'platform_name' in envelope['setup']
        # Turn sub-envelope
        assert 'stop_reason' in envelope['turn']
        assert 'prompt' in envelope['turn']

    def test_bootstrap_text_is_markdown(self) -> None:
        """Text mode produces Markdown (unchanged from pre-#168)."""
        result = _run(['bootstrap', 'hello', '--limit', '2'])
        assert result.returncode == 0
        # Markdown headers
        assert '# Runtime Session' in result.stdout
        assert '## Setup' in result.stdout
        assert '## Routed Matches' in result.stdout


class TestFamilyWideJsonParity:
    """After #167 and #168, ALL inspect/exec/route/lifecycle commands
    support --output-format. Verify the full family is now parity-complete."""

    FAMILY_SURFACES = [
        # (cmd_args, expected_to_parse_json)
        (['show-command', 'add-dir'], True),
        (['show-tool', 'BashTool'], True),
        (['exec-command', 'add-dir', 'hi'], True),
        (['exec-tool', 'BashTool', '{}'], True),
        (['route', 'review'], True),
        (['bootstrap', 'hello'], True),
    ]

    def test_all_family_commands_accept_output_format_json(self) -> None:
        """Every family command accepts --output-format json and emits parseable JSON."""
        failures = []
        for args_base, should_parse in self.FAMILY_SURFACES:
            result = _run([*args_base, '--output-format', 'json'])
            if result.returncode not in (0, 1):
                failures.append(f'{args_base}: exit {result.returncode} — {result.stderr}')
                continue
            try:
                json.loads(result.stdout)
            except json.JSONDecodeError as e:
                failures.append(f'{args_base}: not parseable JSON ({e}): {result.stdout[:100]}')
        assert not failures, (
            'CLI family JSON parity gap:\n' + '\n'.join(failures)
        )

    def test_all_family_commands_text_mode_unchanged(self) -> None:
        """Omitting --output-format defaults to text for every family command."""
        # Sanity: just verify each runs without error in text mode
        for args_base, _ in self.FAMILY_SURFACES:
            result = _run(args_base)
            assert result.returncode in (0, 1), (
                f'{args_base} failed in text mode: {result.stderr}'
            )
            # Output should not be JSON-shaped (no leading {)
            assert not result.stdout.strip().startswith('{')


class TestEnvelopeExitCodeMatchesProcessExit:
    """#181: Envelope exit_code field must match actual process exit code.

    Regression test for the protocol violation where exec-command/exec-tool
    not-found cases returned exit code 1 from the process but emitted
    envelopes with exit_code: 0 (default wrap_json_envelope). Claws reading
    the envelope would misclassify failures as successes.

    Contract (from ERROR_HANDLING.md):
    - Exit code 0 = success
    - Exit code 1 = error/not-found
    - Envelope MUST reflect process exit
    """

    def test_exec_command_not_found_envelope_exit_matches(self) -> None:
        """exec-command 'unknown-name' must have exit_code=1 in envelope."""
        result = _run(['exec-command', 'nonexistent-cmd-name', 'test-prompt', '--output-format', 'json'])
        assert result.returncode == 1, f'process exit should be 1, got {result.returncode}'
        envelope = json.loads(result.stdout)
        assert envelope['exit_code'] == 1, (
            f'envelope.exit_code mismatch: process=1, envelope={envelope["exit_code"]}'
        )
        assert envelope['handled'] is False
        assert envelope['error']['kind'] == 'command_not_found'

    def test_exec_tool_not_found_envelope_exit_matches(self) -> None:
        """exec-tool 'unknown-tool' must have exit_code=1 in envelope."""
        result = _run(['exec-tool', 'nonexistent-tool-name', '{}', '--output-format', 'json'])
        assert result.returncode == 1, f'process exit should be 1, got {result.returncode}'
        envelope = json.loads(result.stdout)
        assert envelope['exit_code'] == 1, (
            f'envelope.exit_code mismatch: process=1, envelope={envelope["exit_code"]}'
        )
        assert envelope['handled'] is False
        assert envelope['error']['kind'] == 'tool_not_found'

    def test_all_commands_exit_code_invariant(self) -> None:
        """Audit: for every clawable command, envelope.exit_code == process exit.

        This is a stronger invariant than 'emits JSON'. Claws dispatching on
        the envelope's exit_code field must get the truth, not a lie.
        """
        # Sample cases known to return non-zero
        cases = [
            # command, expected_exit, justification
            (['show-command', 'nonexistent-abc'], 1, 'not-found inventory lookup'),
            (['show-tool', 'nonexistent-xyz'], 1, 'not-found inventory lookup'),
            (['exec-command', 'nonexistent-1', 'test'], 1, 'not-found execution'),
            (['exec-tool', 'nonexistent-2', '{}'], 1, 'not-found execution'),
        ]
        mismatches = []
        for args, expected_exit, reason in cases:
            result = _run([*args, '--output-format', 'json'])
            if result.returncode != expected_exit:
                mismatches.append(
                    f'{args}: expected process exit {expected_exit} ({reason}), '
                    f'got {result.returncode}'
                )
                continue
            try:
                envelope = json.loads(result.stdout)
            except json.JSONDecodeError as e:
                mismatches.append(f'{args}: JSON parse failed: {e}')
                continue
            if envelope.get('exit_code') != result.returncode:
                mismatches.append(
                    f'{args}: envelope.exit_code={envelope.get("exit_code")} '
                    f'!= process exit={result.returncode} ({reason})'
                )
        assert not mismatches, (
            'Envelope exit_code must match process exit code:\n' +
            '\n'.join(mismatches)
        )