mirror of
https://github.com/ultraworkers/claw-code.git
synced 2026-04-24 21:28:11 +08:00
#164 Stage B requires exposing whether cancellation was observed at the turn-result level. This commit adds the infrastructure field: Changes: - TurnResult.cancel_observed: bool = False (query_engine.py) - _build_timeout_result() accepts cancel_observed parameter (runtime.py) - Two timeout paths now pass cancel_event.is_set() to signal observation (runtime.py) - bootstrap command includes cancel_observed in turn JSON (main.py) - SCHEMAS.md documents Turn Result Fields with cancel_observed contract Usage: When a turn timeout occurs, cancel_observed=true indicates that the engine observed the cancellation event being set. This allows callers to distinguish: - timeout with no cancel → infrastructure/network stall - timeout with cancel observed → cooperative cancellation was triggered Backward compat: - Existing TurnResult construction without cancel_observed defaults to False - bootstrap JSON output still validates per SCHEMAS.md (new field is always present) Test results: 182 passing, 3 skipped, zero regression. Related: #161 (wall-clock timeout), #164 (cancellation observability protocol) ROADMAP continues #164 with Stage C (test coverage for cancellation + turn envelope).
336 lines
15 KiB
Python
336 lines
15 KiB
Python
from __future__ import annotations
|
|
|
|
import threading
|
|
import time
|
|
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
|
|
from dataclasses import dataclass
|
|
|
|
from .commands import PORTED_COMMANDS
|
|
from .context import PortContext, build_port_context, render_context
|
|
from .history import HistoryLog
|
|
from .models import PermissionDenial, PortingModule, UsageSummary
|
|
from .query_engine import QueryEngineConfig, QueryEnginePort, TurnResult
|
|
from .setup import SetupReport, WorkspaceSetup, run_setup
|
|
from .system_init import build_system_init_message
|
|
from .tools import PORTED_TOOLS
|
|
from .execution_registry import build_execution_registry
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RoutedMatch:
|
|
kind: str
|
|
name: str
|
|
source_hint: str
|
|
score: int
|
|
|
|
|
|
@dataclass
|
|
class RuntimeSession:
|
|
prompt: str
|
|
context: PortContext
|
|
setup: WorkspaceSetup
|
|
setup_report: SetupReport
|
|
system_init_message: str
|
|
history: HistoryLog
|
|
routed_matches: list[RoutedMatch]
|
|
turn_result: TurnResult
|
|
command_execution_messages: tuple[str, ...]
|
|
tool_execution_messages: tuple[str, ...]
|
|
stream_events: tuple[dict[str, object], ...]
|
|
persisted_session_path: str
|
|
|
|
def as_markdown(self) -> str:
|
|
lines = [
|
|
'# Runtime Session',
|
|
'',
|
|
f'Prompt: {self.prompt}',
|
|
'',
|
|
'## Context',
|
|
render_context(self.context),
|
|
'',
|
|
'## Setup',
|
|
f'- Python: {self.setup.python_version} ({self.setup.implementation})',
|
|
f'- Platform: {self.setup.platform_name}',
|
|
f'- Test command: {self.setup.test_command}',
|
|
'',
|
|
'## Startup Steps',
|
|
*(f'- {step}' for step in self.setup.startup_steps()),
|
|
'',
|
|
'## System Init',
|
|
self.system_init_message,
|
|
'',
|
|
'## Routed Matches',
|
|
]
|
|
if self.routed_matches:
|
|
lines.extend(
|
|
f'- [{match.kind}] {match.name} ({match.score}) — {match.source_hint}'
|
|
for match in self.routed_matches
|
|
)
|
|
else:
|
|
lines.append('- none')
|
|
lines.extend([
|
|
'',
|
|
'## Command Execution',
|
|
*(self.command_execution_messages or ('none',)),
|
|
'',
|
|
'## Tool Execution',
|
|
*(self.tool_execution_messages or ('none',)),
|
|
'',
|
|
'## Stream Events',
|
|
*(f"- {event['type']}: {event}" for event in self.stream_events),
|
|
'',
|
|
'## Turn Result',
|
|
self.turn_result.output,
|
|
'',
|
|
f'Persisted session path: {self.persisted_session_path}',
|
|
'',
|
|
self.history.as_markdown(),
|
|
])
|
|
return '\n'.join(lines)
|
|
|
|
|
|
class PortRuntime:
|
|
def route_prompt(self, prompt: str, limit: int = 5) -> list[RoutedMatch]:
|
|
tokens = {token.lower() for token in prompt.replace('/', ' ').replace('-', ' ').split() if token}
|
|
by_kind = {
|
|
'command': self._collect_matches(tokens, PORTED_COMMANDS, 'command'),
|
|
'tool': self._collect_matches(tokens, PORTED_TOOLS, 'tool'),
|
|
}
|
|
|
|
selected: list[RoutedMatch] = []
|
|
for kind in ('command', 'tool'):
|
|
if by_kind[kind]:
|
|
selected.append(by_kind[kind].pop(0))
|
|
|
|
leftovers = sorted(
|
|
[match for matches in by_kind.values() for match in matches],
|
|
key=lambda item: (-item.score, item.kind, item.name),
|
|
)
|
|
selected.extend(leftovers[: max(0, limit - len(selected))])
|
|
return selected[:limit]
|
|
|
|
def bootstrap_session(self, prompt: str, limit: int = 5) -> RuntimeSession:
|
|
context = build_port_context()
|
|
setup_report = run_setup(trusted=True)
|
|
setup = setup_report.setup
|
|
history = HistoryLog()
|
|
engine = QueryEnginePort.from_workspace()
|
|
history.add('context', f'python_files={context.python_file_count}, archive_available={context.archive_available}')
|
|
history.add('registry', f'commands={len(PORTED_COMMANDS)}, tools={len(PORTED_TOOLS)}')
|
|
matches = self.route_prompt(prompt, limit=limit)
|
|
registry = build_execution_registry()
|
|
command_execs = tuple(registry.command(match.name).execute(prompt) for match in matches if match.kind == 'command' and registry.command(match.name))
|
|
tool_execs = tuple(registry.tool(match.name).execute(prompt) for match in matches if match.kind == 'tool' and registry.tool(match.name))
|
|
denials = tuple(self._infer_permission_denials(matches))
|
|
stream_events = tuple(engine.stream_submit_message(
|
|
prompt,
|
|
matched_commands=tuple(match.name for match in matches if match.kind == 'command'),
|
|
matched_tools=tuple(match.name for match in matches if match.kind == 'tool'),
|
|
denied_tools=denials,
|
|
))
|
|
turn_result = engine.submit_message(
|
|
prompt,
|
|
matched_commands=tuple(match.name for match in matches if match.kind == 'command'),
|
|
matched_tools=tuple(match.name for match in matches if match.kind == 'tool'),
|
|
denied_tools=denials,
|
|
)
|
|
persisted_session_path = engine.persist_session()
|
|
history.add('routing', f'matches={len(matches)} for prompt={prompt!r}')
|
|
history.add('execution', f'command_execs={len(command_execs)} tool_execs={len(tool_execs)}')
|
|
history.add('turn', f'commands={len(turn_result.matched_commands)} tools={len(turn_result.matched_tools)} denials={len(turn_result.permission_denials)} stop={turn_result.stop_reason}')
|
|
history.add('session_store', persisted_session_path)
|
|
return RuntimeSession(
|
|
prompt=prompt,
|
|
context=context,
|
|
setup=setup,
|
|
setup_report=setup_report,
|
|
system_init_message=build_system_init_message(trusted=True),
|
|
history=history,
|
|
routed_matches=matches,
|
|
turn_result=turn_result,
|
|
command_execution_messages=command_execs,
|
|
tool_execution_messages=tool_execs,
|
|
stream_events=stream_events,
|
|
persisted_session_path=persisted_session_path,
|
|
)
|
|
|
|
def run_turn_loop(
|
|
self,
|
|
prompt: str,
|
|
limit: int = 5,
|
|
max_turns: int = 3,
|
|
structured_output: bool = False,
|
|
timeout_seconds: float | None = None,
|
|
continuation_prompt: str | None = None,
|
|
) -> list[TurnResult]:
|
|
"""Run a multi-turn engine loop with optional wall-clock deadline.
|
|
|
|
Args:
|
|
prompt: The initial prompt to submit.
|
|
limit: Match routing limit.
|
|
max_turns: Maximum number of turns before stopping.
|
|
structured_output: Whether to request structured output.
|
|
timeout_seconds: Total wall-clock budget across all turns. When the
|
|
budget is exhausted mid-turn, a synthetic TurnResult with
|
|
``stop_reason='timeout'`` is appended and the loop exits.
|
|
``None`` (default) preserves legacy unbounded behaviour.
|
|
continuation_prompt: What to send on turns after the first. When
|
|
``None`` (default, #163), the loop stops after turn 0 and the
|
|
caller decides how to continue. When set, the same text is
|
|
submitted for every turn after the first, giving claws a clean
|
|
hook for structured follow-ups (e.g. ``"Continue."``, a
|
|
routing-planner instruction, or a tool-output cue). Previously
|
|
the loop silently appended ``" [turn N]"`` to the original
|
|
prompt, polluting the transcript with harness-generated
|
|
annotation the model had no way to interpret.
|
|
|
|
Returns:
|
|
A list of TurnResult objects. The final entry's ``stop_reason``
|
|
distinguishes ``'completed'``, ``'max_turns_reached'``,
|
|
``'max_budget_reached'``, or ``'timeout'``.
|
|
|
|
#161: prior to this change a hung ``engine.submit_message`` call would
|
|
block the loop indefinitely with no cancellation path, forcing claws to
|
|
rely on external watchdogs or OS-level kills. Callers can now enforce a
|
|
deadline and receive a typed timeout signal instead.
|
|
|
|
#163: the old ``f'{prompt} [turn {turn + 1}]'`` suffix was never
|
|
interpreted by the engine or any system prompt. It looked like a real
|
|
user turn in ``mutable_messages`` and the transcript, making replay and
|
|
analysis fragile. Removed entirely; callers supply ``continuation_prompt``
|
|
for meaningful follow-ups or let the loop stop after turn 0.
|
|
"""
|
|
engine = QueryEnginePort.from_workspace()
|
|
engine.config = QueryEngineConfig(max_turns=max_turns, structured_output=structured_output)
|
|
matches = self.route_prompt(prompt, limit=limit)
|
|
command_names = tuple(match.name for match in matches if match.kind == 'command')
|
|
tool_names = tuple(match.name for match in matches if match.kind == 'tool')
|
|
# #159: infer permission denials from the routed matches, not hardcoded empty tuple.
|
|
# Multi-turn sessions must have the same security posture as bootstrap_session.
|
|
denied_tools = tuple(self._infer_permission_denials(matches))
|
|
results: list[TurnResult] = []
|
|
deadline = time.monotonic() + timeout_seconds if timeout_seconds is not None else None
|
|
# #164 Stage A: shared cancel_event signals cooperative cancellation
|
|
# across turns. On timeout we set() it so any still-running
|
|
# submit_message call (or the next one on the same engine) observes
|
|
# the cancel at a safe checkpoint and returns stop_reason='cancelled'
|
|
# without mutating state. This closes the window where a wedged
|
|
# provider thread could commit a ghost turn after the caller saw
|
|
# 'timeout'.
|
|
cancel_event = threading.Event() if deadline is not None else None
|
|
|
|
# ThreadPoolExecutor is reused across turns so we cancel cleanly on exit.
|
|
executor = ThreadPoolExecutor(max_workers=1) if deadline is not None else None
|
|
try:
|
|
for turn in range(max_turns):
|
|
# #163: no more f'{prompt} [turn N]' suffix injection.
|
|
# On turn 0 submit the original prompt.
|
|
# On turn > 0, submit the caller-supplied continuation prompt;
|
|
# if the caller did not supply one, stop the loop cleanly instead
|
|
# of fabricating a fake user turn.
|
|
if turn == 0:
|
|
turn_prompt = prompt
|
|
elif continuation_prompt is not None:
|
|
turn_prompt = continuation_prompt
|
|
else:
|
|
break
|
|
|
|
if deadline is None:
|
|
# Legacy path: unbounded call, preserves existing behaviour exactly.
|
|
# #159: pass inferred denied_tools (no longer hardcoded empty tuple)
|
|
# #164: cancel_event is None on this path; submit_message skips
|
|
# cancellation checks entirely (legacy zero-overhead behaviour).
|
|
result = engine.submit_message(turn_prompt, command_names, tool_names, denied_tools)
|
|
else:
|
|
remaining = deadline - time.monotonic()
|
|
if remaining <= 0:
|
|
# #164: signal cancel for any in-flight/future submit_message
|
|
# calls that share this engine. Safe because nothing has been
|
|
# submitted yet this turn.
|
|
assert cancel_event is not None
|
|
cancel_event.set()
|
|
results.append(self._build_timeout_result(
|
|
turn_prompt, command_names, tool_names,
|
|
cancel_observed=cancel_event.is_set()
|
|
))
|
|
break
|
|
assert executor is not None
|
|
future = executor.submit(
|
|
engine.submit_message, turn_prompt, command_names, tool_names,
|
|
denied_tools, cancel_event,
|
|
)
|
|
try:
|
|
result = future.result(timeout=remaining)
|
|
except FuturesTimeoutError:
|
|
# #164 Stage A: explicitly signal cancel to the still-running
|
|
# submit_message thread. The next time it hits a checkpoint
|
|
# (entry or post-budget), it returns 'cancelled' without
|
|
# mutating state instead of committing a ghost turn. This
|
|
# upgrades #161's best-effort future.cancel() (which only
|
|
# cancels pre-start futures) to cooperative mid-flight cancel.
|
|
assert cancel_event is not None
|
|
cancel_event.set()
|
|
future.cancel()
|
|
results.append(self._build_timeout_result(
|
|
turn_prompt, command_names, tool_names,
|
|
cancel_observed=cancel_event.is_set()
|
|
))
|
|
break
|
|
|
|
results.append(result)
|
|
if result.stop_reason != 'completed':
|
|
break
|
|
finally:
|
|
if executor is not None:
|
|
# wait=False: don't let a hung thread block loop exit indefinitely.
|
|
# The thread will be reaped when the interpreter shuts down or when
|
|
# the engine call eventually returns.
|
|
executor.shutdown(wait=False)
|
|
return results
|
|
|
|
@staticmethod
|
|
def _build_timeout_result(
|
|
prompt: str,
|
|
command_names: tuple[str, ...],
|
|
tool_names: tuple[str, ...],
|
|
cancel_observed: bool = False,
|
|
) -> TurnResult:
|
|
"""Synthesize a TurnResult representing a wall-clock timeout (#161).
|
|
#164 Stage B: cancel_observed signals cancellation event was set.
|
|
"""
|
|
return TurnResult(
|
|
prompt=prompt,
|
|
output='Wall-clock timeout exceeded before turn completed.',
|
|
matched_commands=command_names,
|
|
matched_tools=tool_names,
|
|
permission_denials=(),
|
|
usage=UsageSummary(),
|
|
stop_reason='timeout',
|
|
cancel_observed=cancel_observed,
|
|
)
|
|
|
|
def _infer_permission_denials(self, matches: list[RoutedMatch]) -> list[PermissionDenial]:
|
|
denials: list[PermissionDenial] = []
|
|
for match in matches:
|
|
if match.kind == 'tool' and 'bash' in match.name.lower():
|
|
denials.append(PermissionDenial(tool_name=match.name, reason='destructive shell execution remains gated in the Python port'))
|
|
return denials
|
|
|
|
def _collect_matches(self, tokens: set[str], modules: tuple[PortingModule, ...], kind: str) -> list[RoutedMatch]:
|
|
matches: list[RoutedMatch] = []
|
|
for module in modules:
|
|
score = self._score(tokens, module)
|
|
if score > 0:
|
|
matches.append(RoutedMatch(kind=kind, name=module.name, source_hint=module.source_hint, score=score))
|
|
matches.sort(key=lambda item: (-item.score, item.name))
|
|
return matches
|
|
|
|
@staticmethod
|
|
def _score(tokens: set[str], module: PortingModule) -> int:
|
|
haystacks = [module.name.lower(), module.source_hint.lower(), module.responsibility.lower()]
|
|
score = 0
|
|
for token in tokens:
|
|
if any(token in haystack for haystack in haystacks):
|
|
score += 1
|
|
return score
|