diff --git a/skills/skill-comply/scripts/runner.py b/skills/skill-comply/scripts/runner.py index ce315a43..84421c4f 100644 --- a/skills/skill-comply/scripts/runner.py +++ b/skills/skill-comply/scripts/runner.py @@ -15,6 +15,11 @@ from scripts.scenario_generator import Scenario SANDBOX_BASE = Path("/tmp/skill-comply-sandbox") ALLOWED_MODELS = frozenset({"haiku", "sonnet", "opus"}) +ALLOWED_SETUP_EXECUTABLES = frozenset({ + "git", "npm", "pip", "pip3", + "touch", "mkdir", "cp", "mv", "echo", + "chmod", "unzip", "tar", +}) # Shell builtins cannot be invoked via subprocess.run; cwd is already # controlled by the cwd= keyword. Scenarios that include these in # setup_commands (a common shell-style convention) must be tolerated. @@ -106,6 +111,9 @@ def _setup_sandbox(sandbox_dir: Path, scenario: Scenario) -> None: if not parts or parts[0] in SHELL_BUILTINS: # Shell builtins (cd/pushd/popd) cannot run as subprocess; skip. continue + if parts[0] not in ALLOWED_SETUP_EXECUTABLES: + # Restrict to known-safe executables to prevent arbitrary code execution. + continue try: subprocess.run(parts, cwd=sandbox_dir, capture_output=True) except FileNotFoundError: diff --git a/tests/test_invariant_runner.py b/tests/test_invariant_runner.py new file mode 100644 index 00000000..a699438a --- /dev/null +++ b/tests/test_invariant_runner.py @@ -0,0 +1,64 @@ +import os +import sys +import pytest +from pathlib import Path + +_SKILL_COMPLY_ROOT = Path(__file__).resolve().parent.parent / "skills" / "skill-comply" +if str(_SKILL_COMPLY_ROOT) not in sys.path: + sys.path.insert(0, str(_SKILL_COMPLY_ROOT)) + +from scripts.runner import _setup_sandbox # noqa: E402 +from scripts.scenario_generator import Scenario # noqa: E402 + +_GLOBAL_MARKER = "/tmp/runner_test_pwned_marker" + + +@pytest.fixture(autouse=True) +def _remove_marker(): + if os.path.exists(_GLOBAL_MARKER): + os.remove(_GLOBAL_MARKER) + yield + if os.path.exists(_GLOBAL_MARKER): + os.remove(_GLOBAL_MARKER) + + +@pytest.mark.parametrize( + "setup_commands,test_id", + [ + ( + ("python -c \"import os; os.system('touch /tmp/runner_test_pwned_marker')\"",), + "python_interpreter", + ), + ( + ("../../../../../../bin/sh -c 'touch /tmp/runner_test_pwned_marker'",), + "path_traversal", + ), + ( + ("bash -c 'touch /tmp/runner_test_pwned_marker'",), + "non_allowlisted_binary", + ), + ( + ("echo hello",), + "benign_echo", + ), + ], + ids=["python_interpreter", "path_traversal", "non_allowlisted_binary", "benign_echo"], +) +def test_setup_sandbox_blocks_dangerous_commands(setup_commands, test_id, tmp_path): + """Invariant: _setup_sandbox must not execute disallowed commands.""" + scenario = Scenario( + id=f"test-{test_id}", + level=1, + level_name="basic", + description="security test scenario", + prompt="", + setup_commands=setup_commands, + ) + sandbox_dir = tmp_path / "sandbox" + + _setup_sandbox(sandbox_dir, scenario) + + assert not os.path.exists(_GLOBAL_MARKER), ( + f"Arbitrary command execution detected for '{test_id}': " + f"marker file created at {_GLOBAL_MARKER}" + )