mirror of
https://github.com/ultraworkers/claw-code.git
synced 2026-04-24 05:00:25 +08:00
fix: #159 — run_turn_loop no longer hardcodes empty denied_tools; permission denials now parity-match bootstrap_session
#159: multi-turn sessions had a silent security asymmetry: denied_tools were always empty in run_turn_loop, even though bootstrap_session inferred them from the routed matches. Result: any tool gated as 'destructive' (bash-family commands, rm, etc) would silently appear unblocked across all turns in multi-turn mode, giving a false 'clean' permission picture to any claw consuming TurnResult.permission_denials. Fix: compute denied_tools once at loop start via _infer_permission_denials, then pass the same denials to every submit_message call (both timeout and legacy unbounded paths). This mirrors the existing bootstrap_session pattern. Acceptance: run_turn_loop('run bash ls').permission_denials now matches what bootstrap_session returns — both infer the same denials from the routed matches. Multi-turn security posture is symmetric. Tests (tests/test_run_turn_loop_permissions.py, 2 tests): - test_turn_loop_surfaces_permission_denials_like_bootstrap: Symmetry check confirming both paths infer identical denials for destructive tools - test_turn_loop_with_continuation_preserves_denials: Denials inferred at loop start are passed consistently to all turns; captured via mock and verified non-empty Full suite: 82/82 passing, zero regression. Closes ROADMAP #159.
This commit is contained in:
parent
d453eedae6
commit
178c8fac28
@ -204,6 +204,9 @@ class PortRuntime:
|
||||
matches = self.route_prompt(prompt, limit=limit)
|
||||
command_names = tuple(match.name for match in matches if match.kind == 'command')
|
||||
tool_names = tuple(match.name for match in matches if match.kind == 'tool')
|
||||
# #159: infer permission denials from the routed matches, not hardcoded empty tuple.
|
||||
# Multi-turn sessions must have the same security posture as bootstrap_session.
|
||||
denied_tools = tuple(self._infer_permission_denials(matches))
|
||||
results: list[TurnResult] = []
|
||||
deadline = time.monotonic() + timeout_seconds if timeout_seconds is not None else None
|
||||
|
||||
@ -225,7 +228,8 @@ class PortRuntime:
|
||||
|
||||
if deadline is None:
|
||||
# Legacy path: unbounded call, preserves existing behaviour exactly.
|
||||
result = engine.submit_message(turn_prompt, command_names, tool_names, ())
|
||||
# #159: pass inferred denied_tools (no longer hardcoded empty tuple)
|
||||
result = engine.submit_message(turn_prompt, command_names, tool_names, denied_tools)
|
||||
else:
|
||||
remaining = deadline - time.monotonic()
|
||||
if remaining <= 0:
|
||||
@ -233,7 +237,7 @@ class PortRuntime:
|
||||
break
|
||||
assert executor is not None
|
||||
future = executor.submit(
|
||||
engine.submit_message, turn_prompt, command_names, tool_names, ()
|
||||
engine.submit_message, turn_prompt, command_names, tool_names, denied_tools
|
||||
)
|
||||
try:
|
||||
result = future.result(timeout=remaining)
|
||||
|
||||
95
tests/test_run_turn_loop_permissions.py
Normal file
95
tests/test_run_turn_loop_permissions.py
Normal file
@ -0,0 +1,95 @@
|
||||
"""Tests for run_turn_loop permission denials parity (ROADMAP #159).
|
||||
|
||||
Verifies that multi-turn sessions have the same security posture as
|
||||
single-turn bootstrap_session: denied_tools are inferred from matches
|
||||
and threaded through every turn, not hardcoded empty.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
from src.runtime import PortRuntime # noqa: E402
|
||||
|
||||
|
||||
class TestPermissionDenialsInTurnLoop:
|
||||
"""#159: permission denials must be non-empty in run_turn_loop,
|
||||
matching what bootstrap_session produces for the same prompt.
|
||||
"""
|
||||
|
||||
def test_turn_loop_surfaces_permission_denials_like_bootstrap(self) -> None:
|
||||
"""Symmetry check: turn_loop and bootstrap_session infer the same denials."""
|
||||
runtime = PortRuntime()
|
||||
prompt = 'run bash ls'
|
||||
|
||||
# Single-turn via bootstrap
|
||||
bootstrap_result = runtime.bootstrap_session(prompt)
|
||||
bootstrap_denials = bootstrap_result.turn_result.permission_denials
|
||||
|
||||
# Multi-turn via run_turn_loop (single turn, no continuation)
|
||||
loop_results = runtime.run_turn_loop(prompt, max_turns=1)
|
||||
loop_denials = loop_results[0].permission_denials
|
||||
|
||||
# Both should infer denials for bash-family tools
|
||||
assert len(bootstrap_denials) > 0, (
|
||||
'bootstrap_session should deny bash-family tools'
|
||||
)
|
||||
assert len(loop_denials) > 0, (
|
||||
f'#159 regression: run_turn_loop returned empty denials; '
|
||||
f'expected {len(bootstrap_denials)} like bootstrap_session'
|
||||
)
|
||||
|
||||
# The denial kinds should match (both deny the same tools)
|
||||
bootstrap_denied_names = {d.tool_name for d in bootstrap_denials}
|
||||
loop_denied_names = {d.tool_name for d in loop_denials}
|
||||
assert bootstrap_denied_names == loop_denied_names, (
|
||||
f'asymmetric denials: bootstrap denied {bootstrap_denied_names}, '
|
||||
f'loop denied {loop_denied_names}'
|
||||
)
|
||||
|
||||
def test_turn_loop_with_continuation_preserves_denials(self) -> None:
|
||||
"""Denials are inferred once at loop start, then passed to every turn."""
|
||||
runtime = PortRuntime()
|
||||
from unittest.mock import patch
|
||||
|
||||
with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
|
||||
from src.models import UsageSummary
|
||||
from src.query_engine import TurnResult
|
||||
|
||||
engine = mock_factory.return_value
|
||||
submitted_denials: list[tuple] = []
|
||||
|
||||
def _capture(prompt, commands, tools, denials):
|
||||
submitted_denials.append(denials)
|
||||
return TurnResult(
|
||||
prompt=prompt,
|
||||
output='ok',
|
||||
matched_commands=(),
|
||||
matched_tools=(),
|
||||
permission_denials=denials, # echo back the denials
|
||||
usage=UsageSummary(),
|
||||
stop_reason='completed',
|
||||
)
|
||||
|
||||
engine.submit_message.side_effect = _capture
|
||||
|
||||
loop_results = runtime.run_turn_loop(
|
||||
'run bash rm', max_turns=2, continuation_prompt='continue'
|
||||
)
|
||||
|
||||
# Both turn 0 and turn 1 should have received the same denials
|
||||
assert len(submitted_denials) == 2
|
||||
assert submitted_denials[0] == submitted_denials[1], (
|
||||
'denials should be consistent across all turns'
|
||||
)
|
||||
# And they should be non-empty (bash is destructive)
|
||||
assert len(submitted_denials[0]) > 0, (
|
||||
'turn-loop denials were empty — #159 regression'
|
||||
)
|
||||
|
||||
# Turn results should reflect the denials that were passed
|
||||
for result in loop_results:
|
||||
assert len(result.permission_denials) > 0
|
||||
Loading…
x
Reference in New Issue
Block a user