mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-05-14 02:10:07 +08:00
- add Vite and Redis pattern skills from closed stale PRs - add frontend-slides support assets - port skill-comply runner fixes and LLM prompt/provider regressions - harden agent frontmatter validation and sync catalog counts
173 lines
6.5 KiB
Python
173 lines
6.5 KiB
Python
"""Tests for runner module — scenario execution + subprocess error handling."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import subprocess
|
|
from dataclasses import dataclass
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from scripts.runner import _setup_sandbox, run_scenario
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class _FakeScenario:
|
|
"""Minimal Scenario-like object for runner tests (avoids generator deps)."""
|
|
|
|
id: str
|
|
prompt: str = "do nothing"
|
|
setup_commands: tuple[str, ...] = ()
|
|
|
|
|
|
class TestSetupSandboxSkipsShellBuiltins:
|
|
"""Setup commands containing shell builtins (cd/pushd/popd) must be skipped.
|
|
|
|
Regression: subprocess.run(["cd", ...]) raises FileNotFoundError because
|
|
cd is a shell builtin, not an external binary. Real-world scenarios often
|
|
include "cd subdir" in setup_commands assuming shell semantics, so the
|
|
runner must tolerate this rather than crashing the whole scenario.
|
|
"""
|
|
|
|
def test_skips_cd(self, tmp_path):
|
|
scenario = _FakeScenario(
|
|
id="t1",
|
|
setup_commands=("cd subdir",),
|
|
)
|
|
called_args: list[list[str]] = []
|
|
|
|
def fake_run(args, **kwargs):
|
|
called_args.append(args)
|
|
return subprocess.CompletedProcess(args=args, returncode=0)
|
|
|
|
with patch("scripts.runner.subprocess.run", side_effect=fake_run):
|
|
_setup_sandbox(tmp_path, scenario)
|
|
|
|
# git init runs once; "cd subdir" must NOT be passed to subprocess
|
|
assert ["git", "init"] in called_args
|
|
assert ["cd", "subdir"] not in called_args
|
|
|
|
def test_skips_pushd_popd(self, tmp_path):
|
|
scenario = _FakeScenario(
|
|
id="t2",
|
|
setup_commands=("pushd dir", "popd"),
|
|
)
|
|
called_args: list[list[str]] = []
|
|
|
|
def fake_run(args, **kwargs):
|
|
called_args.append(args)
|
|
return subprocess.CompletedProcess(args=args, returncode=0)
|
|
|
|
with patch("scripts.runner.subprocess.run", side_effect=fake_run):
|
|
_setup_sandbox(tmp_path, scenario)
|
|
|
|
assert ["pushd", "dir"] not in called_args
|
|
assert ["popd"] not in called_args
|
|
|
|
def test_tolerates_missing_executable(self, tmp_path):
|
|
"""A scenario referencing an unavailable tool must not crash setup."""
|
|
scenario = _FakeScenario(
|
|
id="t3",
|
|
setup_commands=("nonexistent-tool-xyz arg",),
|
|
)
|
|
|
|
def fake_run(args, **kwargs):
|
|
if args[0] == "nonexistent-tool-xyz":
|
|
raise FileNotFoundError(2, "No such file or directory")
|
|
return subprocess.CompletedProcess(args=args, returncode=0)
|
|
|
|
with patch("scripts.runner.subprocess.run", side_effect=fake_run):
|
|
# Must NOT raise — missing tools are skipped, not fatal
|
|
_setup_sandbox(tmp_path, scenario)
|
|
|
|
def test_real_commands_still_run(self, tmp_path):
|
|
"""Skip logic must not break legitimate setup commands."""
|
|
scenario = _FakeScenario(
|
|
id="t4",
|
|
setup_commands=("touch file.txt", "cd ignored", "echo hi"),
|
|
)
|
|
called_args: list[list[str]] = []
|
|
|
|
def fake_run(args, **kwargs):
|
|
called_args.append(args)
|
|
return subprocess.CompletedProcess(args=args, returncode=0)
|
|
|
|
with patch("scripts.runner.subprocess.run", side_effect=fake_run):
|
|
_setup_sandbox(tmp_path, scenario)
|
|
|
|
# Real commands present, cd absent
|
|
assert ["touch", "file.txt"] in called_args
|
|
assert ["echo", "hi"] in called_args
|
|
assert ["cd", "ignored"] not in called_args
|
|
|
|
|
|
class TestRunScenarioMaxTurnsTermination:
|
|
"""rc=1 with terminal_reason=max_turns is graceful termination, not failure.
|
|
|
|
claude -p returns rc=1 when --max-turns is reached, but the stream-json
|
|
output is still valid. Treating this as RuntimeError aborts scenarios
|
|
that would have produced useful observations. Detect the marker in stdout
|
|
and downgrade rc=1 + max_turns to non-fatal.
|
|
"""
|
|
|
|
def test_rc1_with_max_turns_marker_returns_normally(self, tmp_path, monkeypatch):
|
|
scenario = _FakeScenario(id="mt1", prompt="long task", setup_commands=())
|
|
|
|
# Skip sandbox setup side effects
|
|
monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
|
|
|
|
max_turns_stdout = (
|
|
'{"type":"system","subtype":"init","session_id":"s1"}\n'
|
|
'{"type":"result","terminal_reason":"max_turns"}\n'
|
|
)
|
|
|
|
fake_result = subprocess.CompletedProcess(
|
|
args=["claude"], returncode=1, stdout=max_turns_stdout, stderr=""
|
|
)
|
|
|
|
with patch("scripts.runner.subprocess.run", return_value=fake_result):
|
|
# Must NOT raise — max_turns is graceful termination
|
|
run_scenario(scenario, model="haiku")
|
|
|
|
def test_rc1_without_max_turns_marker_still_raises(self, tmp_path, monkeypatch):
|
|
"""Real failures (rc≠0 with no max_turns marker) must still raise."""
|
|
scenario = _FakeScenario(id="mt2", prompt="oops", setup_commands=())
|
|
monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
|
|
|
|
fake_result = subprocess.CompletedProcess(
|
|
args=["claude"], returncode=1, stdout="", stderr="auth error"
|
|
)
|
|
|
|
with patch("scripts.runner.subprocess.run", return_value=fake_result):
|
|
with pytest.raises(RuntimeError, match="claude -p failed"):
|
|
run_scenario(scenario, model="haiku")
|
|
|
|
|
|
class TestRunScenarioErrorIncludesStdoutTail:
|
|
"""Error messages must include stdout tail, not only stderr.
|
|
|
|
When claude -p fails inside an LLM call, useful diagnostic context often
|
|
appears in stdout (partial stream-json events, model error JSON), not
|
|
stderr. Including stdout tail in the RuntimeError message dramatically
|
|
improves debug-ability without adding any new dependency.
|
|
"""
|
|
|
|
def test_error_message_contains_stdout_tail(self, tmp_path, monkeypatch):
|
|
scenario = _FakeScenario(id="e1", prompt="x", setup_commands=())
|
|
monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
|
|
|
|
diagnostic_marker = "DIAG_STDOUT_MARKER_xyz123"
|
|
fake_result = subprocess.CompletedProcess(
|
|
args=["claude"],
|
|
returncode=2,
|
|
stdout=f"some context {diagnostic_marker} more text",
|
|
stderr="generic error",
|
|
)
|
|
|
|
with patch("scripts.runner.subprocess.run", return_value=fake_result):
|
|
with pytest.raises(RuntimeError) as excinfo:
|
|
run_scenario(scenario, model="haiku")
|
|
|
|
# Stdout marker MUST appear in the error message
|
|
assert diagnostic_marker in str(excinfo.value)
|