mirror of
https://github.com/ultraworkers/claw-code.git
synced 2026-04-24 21:20:48 +08:00
Cycle #29 dogfood found a real pinpoint: cross-mode exit code divergence. ## The Pinpoint Dogfooding the CLI revealed that unknown subcommand errors return different exit codes depending on output mode: $ python3 -m src.main nonexistent-cmd # exit 2 $ python3 -m src.main nonexistent-cmd --output-format json # exit 1 ERROR_HANDLING.md documented the exit-code contract (1=parse, 2=timeout) but did NOT explicitly state the contract applies only to JSON mode. Text mode follows argparse defaults (exit 2 for any parse error), which violates the documented contract when interpreted generally. A claw using text mode with 'claw nonexistent' would see exit 2 and misclassify as timeout per the docs. Real protocol contract gap, not implementation bug. ## Classification This is a DOCUMENTATION gap, not a behavior bug: - Text mode follows argparse convention (reasonable for humans) - JSON mode normalizes to documented contract (reasonable for claws) - The divergence is intentional; only the docs were silent about it Fix = document the divergence explicitly + lock it with tests. NOT fix = change text mode exit code to 1 (would break argparse conventions and confuse human users). ## Documentation Changes ERROR_HANDLING.md: 1. Added IMPORTANT callout in Quick Reference section: 'The exit code contract applies ONLY when --output-format json is explicitly set. Text mode follows argparse conventions.' 2. New 'Text mode vs JSON mode exit codes' table showing exact divergence: - Unknown subcommand: text=2, json=1 - Missing required arg: text=2, json=1 - Session not found: text=1, json=1 (app-level, identical) - Success: text=0, json=0 (identical) - Timeout: text=2, json=2 (identical, #161) 3. Practical rule: 'always pass --output-format json' ## Tests Added (5) TestTextVsJsonModeDivergence in test_cross_channel_consistency.py: 1. test_unknown_command_text_mode_exits_2 — text mode argparse default 2. test_unknown_command_json_mode_exits_1 — JSON mode contract normalized 3. test_missing_required_arg_text_mode_exits_2 — same for missing args 4. test_missing_required_arg_json_mode_exits_1 — same normalization 5. test_success_path_identical_in_both_modes — success exit identical These tests LOCK the expected divergence so: - Documentation stays aligned with implementation - Future changes (either direction) are caught as intentional - Claws trust the docs ## Test Status - 217 → 222 tests passing (+5) - Zero regressions ## Discipline This cycle follows the cycle #28 template exactly: - Dogfood probe revealed real friction (test said exit=2, docs said exit=1) - Minimal fix shape (documentation clarification, not code change) - Regression guard via tests - Evidence-backed, not speculative Relationship to #181: - #181 fixed env.exit_code != process exit (WITHIN JSON mode) - #29 clarifies exit code contract scope (ONLY JSON mode) - Both establish: exit codes are deterministic, but only when --output-format json --- Classification (per cycle #24 calibration): - Red-state bug? ✗ (behavior was reasonable, docs were incomplete) - Real friction? ✓ (docs/code divergence revealed by dogfood) - Evidence-backed? ✓ (test suite probed both modes, found the gap) Source: Jobdori cycle #29 proactive dogfood — in response to Clawhip nudge for pinpoint hunting. Found that text-mode errors return exit 2 but ERROR_HANDLING.md implied exit 1 was the parse-error contract universally.
243 lines
9.5 KiB
Python
243 lines
9.5 KiB
Python
"""Cycle #27 cross-channel consistency audit (post-#181).
|
|
|
|
After #181 fix (envelope.exit_code must match process exit), this test
|
|
class systematizes the three-layer protocol invariant framework:
|
|
|
|
1. Structural compliance: Does the envelope exist? (#178)
|
|
2. Quality compliance: Is stderr silent + message truthful? (#179)
|
|
3. Cross-channel consistency: Do multiple channels agree? (#181 + this)
|
|
|
|
This file captures cycle #27's proactive invariant audit proving that
|
|
envelope fields match their corresponding reality channels:
|
|
|
|
- envelope.command ↔ argv dispatch
|
|
- envelope.output_format ↔ --output-format flag
|
|
- envelope.timestamp ↔ actual wall clock
|
|
- envelope.found/handled/deleted ↔ operational truth (no error block mismatch)
|
|
|
|
All tests passing = no drift detected.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import subprocess
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
|
|
|
|
def _run(args: list[str]) -> subprocess.CompletedProcess:
|
|
"""Run claw-code command and capture output."""
|
|
return subprocess.run(
|
|
['python3', '-m', 'src.main'] + args,
|
|
cwd=Path(__file__).parent.parent,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
|
|
class TestCrossChannelConsistency:
|
|
"""Cycle #27: envelope fields must match reality channels.
|
|
|
|
These are distinct from structural/quality tests. A command can
|
|
emit structurally valid JSON with clean stderr but still lie about
|
|
its own output_format or exit code (as #181 proved).
|
|
"""
|
|
|
|
def test_envelope_command_matches_dispatch(self) -> None:
|
|
"""Envelope.command must equal the dispatched subcommand."""
|
|
commands_to_test = [
|
|
'show-command',
|
|
'show-tool',
|
|
'list-sessions',
|
|
'exec-command',
|
|
'exec-tool',
|
|
'delete-session',
|
|
]
|
|
failures = []
|
|
for cmd in commands_to_test:
|
|
# Dispatch varies by arity
|
|
if cmd == 'show-command':
|
|
args = [cmd, 'nonexistent', '--output-format', 'json']
|
|
elif cmd == 'show-tool':
|
|
args = [cmd, 'nonexistent', '--output-format', 'json']
|
|
elif cmd == 'exec-command':
|
|
args = [cmd, 'unknown', 'test', '--output-format', 'json']
|
|
elif cmd == 'exec-tool':
|
|
args = [cmd, 'unknown', '{}', '--output-format', 'json']
|
|
else:
|
|
args = [cmd, '--output-format', 'json']
|
|
|
|
result = _run(args)
|
|
try:
|
|
envelope = json.loads(result.stdout)
|
|
except json.JSONDecodeError:
|
|
failures.append(f'{cmd}: JSON parse error')
|
|
continue
|
|
|
|
if envelope.get('command') != cmd:
|
|
failures.append(
|
|
f'{cmd}: envelope.command={envelope.get("command")}, '
|
|
f'expected {cmd}'
|
|
)
|
|
assert not failures, (
|
|
'Envelope.command must match dispatched subcommand:\n' +
|
|
'\n'.join(failures)
|
|
)
|
|
|
|
def test_envelope_output_format_matches_flag(self) -> None:
|
|
"""Envelope.output_format must match --output-format flag."""
|
|
result = _run(['list-sessions', '--output-format', 'json'])
|
|
envelope = json.loads(result.stdout)
|
|
assert envelope['output_format'] == 'json', (
|
|
f'output_format mismatch: flag=json, envelope={envelope["output_format"]}'
|
|
)
|
|
|
|
def test_envelope_timestamp_is_recent(self) -> None:
|
|
"""Envelope.timestamp must be recent (generated at call time)."""
|
|
result = _run(['list-sessions', '--output-format', 'json'])
|
|
envelope = json.loads(result.stdout)
|
|
ts_str = envelope.get('timestamp')
|
|
assert ts_str, 'no timestamp field'
|
|
|
|
ts = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
|
|
now = datetime.now(timezone.utc)
|
|
delta = abs((now - ts).total_seconds())
|
|
|
|
assert delta < 5, f'timestamp off by {delta}s (should be <5s)'
|
|
|
|
def test_envelope_exit_code_matches_process_exit(self) -> None:
|
|
"""Cycle #26/#181: envelope.exit_code == process exit code.
|
|
|
|
This is a critical invariant. Claws that trust the envelope
|
|
field must get the truth, not a lie.
|
|
"""
|
|
cases = [
|
|
(['show-command', 'nonexistent', '--output-format', 'json'], 1),
|
|
(['show-tool', 'nonexistent', '--output-format', 'json'], 1),
|
|
(['list-sessions', '--output-format', 'json'], 0),
|
|
(['delete-session', 'any-id', '--output-format', 'json'], 0),
|
|
]
|
|
failures = []
|
|
for args, expected_exit in cases:
|
|
result = _run(args)
|
|
if result.returncode != expected_exit:
|
|
failures.append(
|
|
f'{args[0]}: process exit {result.returncode}, '
|
|
f'expected {expected_exit}'
|
|
)
|
|
continue
|
|
|
|
envelope = json.loads(result.stdout)
|
|
if envelope['exit_code'] != result.returncode:
|
|
failures.append(
|
|
f'{args[0]}: process exit {result.returncode}, '
|
|
f'envelope.exit_code {envelope["exit_code"]}'
|
|
)
|
|
|
|
assert not failures, (
|
|
'Envelope.exit_code must match process exit:\n' +
|
|
'\n'.join(failures)
|
|
)
|
|
|
|
def test_envelope_boolean_fields_match_error_presence(self) -> None:
|
|
"""found/handled/deleted fields must correlate with error block.
|
|
|
|
- If field is True, no error block should exist
|
|
- If field is False + operational error, error block must exist
|
|
- If field is False + idempotent (delete nonexistent), no error block
|
|
"""
|
|
cases = [
|
|
# (args, bool_field, expected_value, expect_error_block)
|
|
(['show-command', 'nonexistent', '--output-format', 'json'],
|
|
'found', False, True),
|
|
(['exec-command', 'unknown', 'test', '--output-format', 'json'],
|
|
'handled', False, True),
|
|
(['delete-session', 'any-id', '--output-format', 'json'],
|
|
'deleted', False, False), # idempotent, no error
|
|
]
|
|
failures = []
|
|
for args, field, expected_val, expect_error in cases:
|
|
result = _run(args)
|
|
envelope = json.loads(result.stdout)
|
|
|
|
actual_val = envelope.get(field)
|
|
has_error = 'error' in envelope
|
|
|
|
if actual_val != expected_val:
|
|
failures.append(
|
|
f'{args[0]}: {field}={actual_val}, expected {expected_val}'
|
|
)
|
|
if expect_error and not has_error:
|
|
failures.append(
|
|
f'{args[0]}: expected error block, but none present'
|
|
)
|
|
elif not expect_error and has_error:
|
|
failures.append(
|
|
f'{args[0]}: unexpected error block present'
|
|
)
|
|
|
|
assert not failures, (
|
|
'Boolean fields must correlate with error block:\n' +
|
|
'\n'.join(failures)
|
|
)
|
|
|
|
|
|
class TestTextVsJsonModeDivergence:
|
|
"""Cycle #29: Document known text-mode vs JSON-mode exit code divergence.
|
|
|
|
ERROR_HANDLING.md specifies the exit code contract applies ONLY when
|
|
--output-format json is set. Text mode follows argparse defaults (e.g.,
|
|
exit 2 for parse errors) while JSON mode normalizes to the contract
|
|
(exit 1 for parse errors).
|
|
|
|
This test class LOCKS the expected divergence so:
|
|
1. Documentation stays aligned with implementation
|
|
2. Future changes to text mode behavior are caught as intentional
|
|
3. Claws consuming subprocess output can trust the docs
|
|
"""
|
|
|
|
def test_unknown_command_text_mode_exits_2(self) -> None:
|
|
"""Text mode: argparse default exit 2 for unknown subcommand."""
|
|
result = _run(['nonexistent-cmd'])
|
|
assert result.returncode == 2, (
|
|
f'text mode should exit 2 (argparse default), got {result.returncode}'
|
|
)
|
|
|
|
def test_unknown_command_json_mode_exits_1(self) -> None:
|
|
"""JSON mode: normalized exit 1 for parse error (#178)."""
|
|
result = _run(['nonexistent-cmd', '--output-format', 'json'])
|
|
assert result.returncode == 1, (
|
|
f'JSON mode should exit 1 (protocol contract), got {result.returncode}'
|
|
)
|
|
envelope = json.loads(result.stdout)
|
|
assert envelope['error']['kind'] == 'parse'
|
|
|
|
def test_missing_required_arg_text_mode_exits_2(self) -> None:
|
|
"""Text mode: argparse default exit 2 for missing required arg."""
|
|
result = _run(['exec-command']) # missing name + prompt
|
|
assert result.returncode == 2, (
|
|
f'text mode should exit 2, got {result.returncode}'
|
|
)
|
|
|
|
def test_missing_required_arg_json_mode_exits_1(self) -> None:
|
|
"""JSON mode: normalized exit 1 for parse error."""
|
|
result = _run(['exec-command', '--output-format', 'json'])
|
|
assert result.returncode == 1, (
|
|
f'JSON mode should exit 1, got {result.returncode}'
|
|
)
|
|
|
|
def test_success_path_identical_in_both_modes(self) -> None:
|
|
"""Success exit codes are identical in both modes."""
|
|
text_result = _run(['list-sessions'])
|
|
json_result = _run(['list-sessions', '--output-format', 'json'])
|
|
assert text_result.returncode == json_result.returncode == 0, (
|
|
f'success exit should be 0 in both modes: '
|
|
f'text={text_result.returncode}, json={json_result.returncode}'
|
|
)
|