mirror of
https://github.com/ultraworkers/claw-code.git
synced 2026-04-24 13:08:11 +08:00
fix: #162 — budget-overflow no longer corrupts session state in submit_message
Previously, QueryEnginePort.submit_message() checked the token budget AFTER
appending the prompt to mutable_messages, transcript_store, and permission_denials,
and AFTER calling compact_messages_if_needed(). On overflow it set
stop_reason='max_budget_reached' but the overflow turn was already committed.
Any caller that persisted the session afterwards wrote the rejected prompt to
disk — the session was silently poisoned even though the TurnResult said the
turn never completed.
Fix:
- Restructure submit_message so the budget check early-returns BEFORE any
mutation of mutable_messages, transcript_store, permission_denials, or
total_usage.
- The returned TurnResult.usage reflects pre-call state (overflow never
advanced the usage counter).
- Normal (in-budget) path unchanged: mutation happens exactly once, at the
end, only on 'completed' results.
This closes the atomicity gap: submit_message is now either 'turn committed'
(stop_reason='completed') or 'turn rejected, state untouched'
(stop_reason in {'max_budget_reached', 'max_turns_reached'}). Callers can
safely retry with a fresh budget or a smaller prompt without worrying about
phantom committed turns from prior rejections.
Tests (tests/test_submit_message_budget.py, 10 tests):
- TestBudgetOverflowDoesNotMutate (5): mutable_messages / transcript /
permission_denials / total_usage / TurnResult.usage all pre-mutation after overflow
- TestOverflowPersistence (2): first-turn overflow persists empty session;
successful-turn-then-overflow persists only the successful turn
- TestEngineUsableAfterOverflow (2): subsequent in-budget call still works
with no residue; repeated overflows don't accumulate hidden state
- TestNormalPathStillCommits (1): regression guard — non-overflow path still
commits mutable_messages/transcript/usage as expected
Full suite: 59/59 passing, zero regression.
Blocker: none. Closes ROADMAP #162.
This commit is contained in:
parent
3f4d46d7b4
commit
4813a2b351
@ -85,9 +85,25 @@ class QueryEnginePort:
|
||||
]
|
||||
output = self._format_output(summary_lines)
|
||||
projected_usage = self.total_usage.add_turn(prompt, output)
|
||||
stop_reason = 'completed'
|
||||
|
||||
# #162: budget check must precede mutation. Previously this block set
|
||||
# stop_reason='max_budget_reached' but still appended the overflow turn
|
||||
# to mutable_messages / transcript_store / permission_denials, corrupting
|
||||
# the session for any caller that persisted it afterwards. The overflow
|
||||
# prompt was effectively committed even though the TurnResult signalled
|
||||
# rejection. Now we early-return with pre-mutation state intact so
|
||||
# callers can safely retry with a smaller prompt or a fresh budget.
|
||||
if projected_usage.input_tokens + projected_usage.output_tokens > self.config.max_budget_tokens:
|
||||
stop_reason = 'max_budget_reached'
|
||||
return TurnResult(
|
||||
prompt=prompt,
|
||||
output=output,
|
||||
matched_commands=matched_commands,
|
||||
matched_tools=matched_tools,
|
||||
permission_denials=denied_tools,
|
||||
usage=self.total_usage, # unchanged — overflow turn was rejected
|
||||
stop_reason='max_budget_reached',
|
||||
)
|
||||
|
||||
self.mutable_messages.append(prompt)
|
||||
self.transcript_store.append(prompt)
|
||||
self.permission_denials.extend(denied_tools)
|
||||
@ -100,7 +116,7 @@ class QueryEnginePort:
|
||||
matched_tools=matched_tools,
|
||||
permission_denials=denied_tools,
|
||||
usage=self.total_usage,
|
||||
stop_reason=stop_reason,
|
||||
stop_reason='completed',
|
||||
)
|
||||
|
||||
def stream_submit_message(
|
||||
|
||||
167
tests/test_submit_message_budget.py
Normal file
167
tests/test_submit_message_budget.py
Normal file
@ -0,0 +1,167 @@
|
||||
"""Tests for submit_message budget-overflow atomicity (ROADMAP #162).
|
||||
|
||||
Covers:
|
||||
- Budget overflow returns stop_reason='max_budget_reached' without mutating session
|
||||
- mutable_messages, transcript_store, permission_denials, total_usage all unchanged
|
||||
- Session persisted after overflow does not contain the overflow turn
|
||||
- Engine remains usable after overflow: subsequent in-budget call succeeds
|
||||
- Normal (non-overflow) path still commits state as before
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
from src.models import PermissionDenial, UsageSummary # noqa: E402
|
||||
from src.port_manifest import build_port_manifest # noqa: E402
|
||||
from src.query_engine import QueryEngineConfig, QueryEnginePort # noqa: E402
|
||||
from src.session_store import StoredSession, load_session, save_session # noqa: E402
|
||||
|
||||
|
||||
def _make_engine(max_budget_tokens: int = 10) -> QueryEnginePort:
|
||||
engine = QueryEnginePort(manifest=build_port_manifest())
|
||||
engine.config = QueryEngineConfig(max_budget_tokens=max_budget_tokens)
|
||||
return engine
|
||||
|
||||
|
||||
class TestBudgetOverflowDoesNotMutate:
|
||||
"""The core #162 contract: overflow must leave session state untouched."""
|
||||
|
||||
def test_mutable_messages_unchanged_on_overflow(self) -> None:
|
||||
engine = _make_engine(max_budget_tokens=10)
|
||||
pre_count = len(engine.mutable_messages)
|
||||
overflow_prompt = ' '.join(['word'] * 50)
|
||||
result = engine.submit_message(overflow_prompt)
|
||||
assert result.stop_reason == 'max_budget_reached'
|
||||
assert len(engine.mutable_messages) == pre_count
|
||||
|
||||
def test_transcript_unchanged_on_overflow(self) -> None:
|
||||
engine = _make_engine(max_budget_tokens=10)
|
||||
pre_count = len(engine.transcript_store.entries)
|
||||
overflow_prompt = ' '.join(['word'] * 50)
|
||||
result = engine.submit_message(overflow_prompt)
|
||||
assert result.stop_reason == 'max_budget_reached'
|
||||
assert len(engine.transcript_store.entries) == pre_count
|
||||
|
||||
def test_permission_denials_unchanged_on_overflow(self) -> None:
|
||||
engine = _make_engine(max_budget_tokens=10)
|
||||
pre_count = len(engine.permission_denials)
|
||||
denials = (PermissionDenial(tool_name='bash', reason='gated in test'),)
|
||||
overflow_prompt = ' '.join(['word'] * 50)
|
||||
result = engine.submit_message(overflow_prompt, denied_tools=denials)
|
||||
assert result.stop_reason == 'max_budget_reached'
|
||||
assert len(engine.permission_denials) == pre_count
|
||||
|
||||
def test_total_usage_unchanged_on_overflow(self) -> None:
|
||||
engine = _make_engine(max_budget_tokens=10)
|
||||
pre_usage = engine.total_usage
|
||||
overflow_prompt = ' '.join(['word'] * 50)
|
||||
result = engine.submit_message(overflow_prompt)
|
||||
assert result.stop_reason == 'max_budget_reached'
|
||||
assert engine.total_usage == pre_usage
|
||||
|
||||
def test_turn_result_reports_pre_mutation_usage(self) -> None:
|
||||
"""The TurnResult.usage must reflect session state as-if overflow never happened."""
|
||||
engine = _make_engine(max_budget_tokens=10)
|
||||
pre_usage = engine.total_usage
|
||||
overflow_prompt = ' '.join(['word'] * 50)
|
||||
result = engine.submit_message(overflow_prompt)
|
||||
assert result.stop_reason == 'max_budget_reached'
|
||||
assert result.usage == pre_usage
|
||||
|
||||
|
||||
class TestOverflowPersistence:
|
||||
"""Session persisted after overflow must not contain the overflow turn."""
|
||||
|
||||
def test_persisted_session_empty_when_first_turn_overflows(
|
||||
self, tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
"""When the very first call overflows, persisted session has zero messages."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
engine = _make_engine(max_budget_tokens=10)
|
||||
overflow_prompt = ' '.join(['word'] * 50)
|
||||
result = engine.submit_message(overflow_prompt)
|
||||
assert result.stop_reason == 'max_budget_reached'
|
||||
|
||||
path_str = engine.persist_session()
|
||||
path = Path(path_str)
|
||||
assert path.exists()
|
||||
loaded = load_session(path.stem, path.parent)
|
||||
assert loaded.messages == (), (
|
||||
f'overflow turn poisoned session: {loaded.messages!r}'
|
||||
)
|
||||
|
||||
def test_persisted_session_retains_only_successful_turns(
|
||||
self, tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
"""A successful turn followed by an overflow persists only the successful turn."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
# Budget large enough for one short turn but not a second big one.
|
||||
# Token counting is whitespace-split (see UsageSummary.add_turn),
|
||||
# so overflow prompts must contain many whitespace-separated words.
|
||||
engine = QueryEnginePort(manifest=build_port_manifest())
|
||||
engine.config = QueryEngineConfig(max_budget_tokens=50)
|
||||
|
||||
ok = engine.submit_message('short')
|
||||
assert ok.stop_reason == 'completed'
|
||||
assert 'short' in engine.mutable_messages
|
||||
|
||||
# 500 whitespace-separated tokens — definitely over a 50-token budget
|
||||
overflow_prompt = ' '.join(['word'] * 500)
|
||||
overflow = engine.submit_message(overflow_prompt)
|
||||
assert overflow.stop_reason == 'max_budget_reached'
|
||||
|
||||
path = Path(engine.persist_session())
|
||||
loaded = load_session(path.stem, path.parent)
|
||||
assert loaded.messages == ('short',), (
|
||||
f'expected only the successful turn, got {loaded.messages!r}'
|
||||
)
|
||||
|
||||
|
||||
class TestEngineUsableAfterOverflow:
|
||||
"""After overflow, engine must still be usable — overflow is rejection, not corruption."""
|
||||
|
||||
def test_subsequent_in_budget_call_succeeds(self) -> None:
|
||||
"""After an overflow rejection, raising the budget and retrying works."""
|
||||
engine = _make_engine(max_budget_tokens=10)
|
||||
overflow_prompt = ' '.join(['word'] * 100)
|
||||
overflow = engine.submit_message(overflow_prompt)
|
||||
assert overflow.stop_reason == 'max_budget_reached'
|
||||
|
||||
# Raise the budget and retry — the engine should be in a clean state
|
||||
engine.config = QueryEngineConfig(max_budget_tokens=10_000)
|
||||
ok = engine.submit_message('short retry')
|
||||
assert ok.stop_reason == 'completed'
|
||||
assert 'short retry' in engine.mutable_messages
|
||||
# The overflow prompt should never have been recorded
|
||||
assert overflow_prompt not in engine.mutable_messages
|
||||
|
||||
def test_multiple_overflow_calls_remain_idempotent(self) -> None:
|
||||
"""Repeated overflow calls must not accumulate hidden state."""
|
||||
engine = _make_engine(max_budget_tokens=10)
|
||||
overflow_prompt = ' '.join(['word'] * 50)
|
||||
for _ in range(5):
|
||||
result = engine.submit_message(overflow_prompt)
|
||||
assert result.stop_reason == 'max_budget_reached'
|
||||
assert len(engine.mutable_messages) == 0
|
||||
assert len(engine.transcript_store.entries) == 0
|
||||
assert engine.total_usage == UsageSummary()
|
||||
|
||||
|
||||
class TestNormalPathStillCommits:
|
||||
"""Regression guard: non-overflow path must still mutate state as before."""
|
||||
|
||||
def test_in_budget_turn_commits_all_state(self) -> None:
|
||||
engine = QueryEnginePort(manifest=build_port_manifest())
|
||||
engine.config = QueryEngineConfig(max_budget_tokens=10_000)
|
||||
result = engine.submit_message('review MCP tool')
|
||||
assert result.stop_reason == 'completed'
|
||||
assert len(engine.mutable_messages) == 1
|
||||
assert len(engine.transcript_store.entries) == 1
|
||||
assert engine.total_usage.input_tokens > 0
|
||||
assert engine.total_usage.output_tokens > 0
|
||||
Loading…
x
Reference in New Issue
Block a user