From 178c8fac2845d901324e2f7f22da48f1c5bf01a1 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Wed, 22 Apr 2026 17:50:21 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20#159=20=E2=80=94=20run=5Fturn=5Floop=20n?= =?UTF-8?q?o=20longer=20hardcodes=20empty=20denied=5Ftools;=20permission?= =?UTF-8?q?=20denials=20now=20parity-match=20bootstrap=5Fsession?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #159: multi-turn sessions had a silent security asymmetry: denied_tools were always empty in run_turn_loop, even though bootstrap_session inferred them from the routed matches. Result: any tool gated as 'destructive' (bash-family commands, rm, etc) would silently appear unblocked across all turns in multi-turn mode, giving a false 'clean' permission picture to any claw consuming TurnResult.permission_denials. Fix: compute denied_tools once at loop start via _infer_permission_denials, then pass the same denials to every submit_message call (both timeout and legacy unbounded paths). This mirrors the existing bootstrap_session pattern. Acceptance: run_turn_loop('run bash ls').permission_denials now matches what bootstrap_session returns — both infer the same denials from the routed matches. Multi-turn security posture is symmetric. Tests (tests/test_run_turn_loop_permissions.py, 2 tests): - test_turn_loop_surfaces_permission_denials_like_bootstrap: Symmetry check confirming both paths infer identical denials for destructive tools - test_turn_loop_with_continuation_preserves_denials: Denials inferred at loop start are passed consistently to all turns; captured via mock and verified non-empty Full suite: 82/82 passing, zero regression. Closes ROADMAP #159. --- src/runtime.py | 8 ++- tests/test_run_turn_loop_permissions.py | 95 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 tests/test_run_turn_loop_permissions.py diff --git a/src/runtime.py b/src/runtime.py index 6028271..52dd4be 100644 --- a/src/runtime.py +++ b/src/runtime.py @@ -204,6 +204,9 @@ class PortRuntime: matches = self.route_prompt(prompt, limit=limit) command_names = tuple(match.name for match in matches if match.kind == 'command') tool_names = tuple(match.name for match in matches if match.kind == 'tool') + # #159: infer permission denials from the routed matches, not hardcoded empty tuple. + # Multi-turn sessions must have the same security posture as bootstrap_session. + denied_tools = tuple(self._infer_permission_denials(matches)) results: list[TurnResult] = [] deadline = time.monotonic() + timeout_seconds if timeout_seconds is not None else None @@ -225,7 +228,8 @@ class PortRuntime: if deadline is None: # Legacy path: unbounded call, preserves existing behaviour exactly. - result = engine.submit_message(turn_prompt, command_names, tool_names, ()) + # #159: pass inferred denied_tools (no longer hardcoded empty tuple) + result = engine.submit_message(turn_prompt, command_names, tool_names, denied_tools) else: remaining = deadline - time.monotonic() if remaining <= 0: @@ -233,7 +237,7 @@ class PortRuntime: break assert executor is not None future = executor.submit( - engine.submit_message, turn_prompt, command_names, tool_names, () + engine.submit_message, turn_prompt, command_names, tool_names, denied_tools ) try: result = future.result(timeout=remaining) diff --git a/tests/test_run_turn_loop_permissions.py b/tests/test_run_turn_loop_permissions.py new file mode 100644 index 0000000..0ff3e0a --- /dev/null +++ b/tests/test_run_turn_loop_permissions.py @@ -0,0 +1,95 @@ +"""Tests for run_turn_loop permission denials parity (ROADMAP #159). + +Verifies that multi-turn sessions have the same security posture as +single-turn bootstrap_session: denied_tools are inferred from matches +and threaded through every turn, not hardcoded empty. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from src.runtime import PortRuntime # noqa: E402 + + +class TestPermissionDenialsInTurnLoop: + """#159: permission denials must be non-empty in run_turn_loop, + matching what bootstrap_session produces for the same prompt. + """ + + def test_turn_loop_surfaces_permission_denials_like_bootstrap(self) -> None: + """Symmetry check: turn_loop and bootstrap_session infer the same denials.""" + runtime = PortRuntime() + prompt = 'run bash ls' + + # Single-turn via bootstrap + bootstrap_result = runtime.bootstrap_session(prompt) + bootstrap_denials = bootstrap_result.turn_result.permission_denials + + # Multi-turn via run_turn_loop (single turn, no continuation) + loop_results = runtime.run_turn_loop(prompt, max_turns=1) + loop_denials = loop_results[0].permission_denials + + # Both should infer denials for bash-family tools + assert len(bootstrap_denials) > 0, ( + 'bootstrap_session should deny bash-family tools' + ) + assert len(loop_denials) > 0, ( + f'#159 regression: run_turn_loop returned empty denials; ' + f'expected {len(bootstrap_denials)} like bootstrap_session' + ) + + # The denial kinds should match (both deny the same tools) + bootstrap_denied_names = {d.tool_name for d in bootstrap_denials} + loop_denied_names = {d.tool_name for d in loop_denials} + assert bootstrap_denied_names == loop_denied_names, ( + f'asymmetric denials: bootstrap denied {bootstrap_denied_names}, ' + f'loop denied {loop_denied_names}' + ) + + def test_turn_loop_with_continuation_preserves_denials(self) -> None: + """Denials are inferred once at loop start, then passed to every turn.""" + runtime = PortRuntime() + from unittest.mock import patch + + with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory: + from src.models import UsageSummary + from src.query_engine import TurnResult + + engine = mock_factory.return_value + submitted_denials: list[tuple] = [] + + def _capture(prompt, commands, tools, denials): + submitted_denials.append(denials) + return TurnResult( + prompt=prompt, + output='ok', + matched_commands=(), + matched_tools=(), + permission_denials=denials, # echo back the denials + usage=UsageSummary(), + stop_reason='completed', + ) + + engine.submit_message.side_effect = _capture + + loop_results = runtime.run_turn_loop( + 'run bash rm', max_turns=2, continuation_prompt='continue' + ) + + # Both turn 0 and turn 1 should have received the same denials + assert len(submitted_denials) == 2 + assert submitted_denials[0] == submitted_denials[1], ( + 'denials should be consistent across all turns' + ) + # And they should be non-empty (bash is destructive) + assert len(submitted_denials[0]) > 0, ( + 'turn-loop denials were empty — #159 regression' + ) + + # Turn results should reflect the denials that were passed + for result in loop_results: + assert len(result.permission_denials) > 0