From 11326905e931d64adad92407d708b56aa85cdae1 Mon Sep 17 00:00:00 2001
From: YeonGyu-Kim <code.yeon.gyu@gmail.com>
Date: Wed, 22 Apr 2026 17:37:22 +0900
Subject: [PATCH] =?UTF-8?q?fix:=20#163=20=E2=80=94=20remove=20[turn=20N]?=
 =?UTF-8?q?=20suffix=20pollution=20from=20run=5Fturn=5Floop;=20file=20#164?=
 =?UTF-8?q?=20timeout-cancellation=20followup?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

#163: run_turn_loop no longer injects f'{prompt} [turn N]' into follow-up
prompts. The suffix was never defined or interpreted anywhere — not by the
engine, not by the system prompt, not by any LLM. It looked like a real
user-typed annotation in the transcript and made replay/analysis fragile.

New behaviour:
- turn 0 submits the original prompt (unchanged)
- turn > 0 submits caller-supplied continuation_prompt if provided, else
  the loop stops cleanly — no fabricated user turn
- added continuation_prompt: str | None = None parameter to run_turn_loop
- added --continuation-prompt CLI flag for claws scripting multi-turn loops
- zero '[turn' strings ever appear in mutable_messages or stdout now

Behaviour change for existing callers:
- Before: run_turn_loop(prompt, max_turns=3) submitted 3 turns
  ('prompt', 'prompt [turn 2]', 'prompt [turn 3]')
- After:  run_turn_loop(prompt, max_turns=3) submits 1 turn ('prompt')
- To preserve old multi-turn behaviour, pass continuation_prompt='Continue.'
  or any structured follow-up text

One existing timeout test (test_budget_is_cumulative_across_turns) updated
to pass continuation_prompt so the cumulative-budget contract is actually
exercised across turns instead of trivially satisfied by a one-turn loop.

#164 filed: addresses reviewer feedback on #161. The wall-clock timeout
bounds the caller-facing wait, but the underlying submit_message worker
thread keeps running and can mutate engine state after the timeout
TurnResult is returned. A cooperative cancel_event pattern is sketched in
the pinpoint; real asyncio.Task.cancel() support will come once provider
IO is async-native (larger refactor).

Tests (tests/test_run_turn_loop_continuation.py, 8 tests):
- TestNoTurnSuffixInjection (2): zero '[turn' strings in any submitted
  prompt, both default and explicit-continuation paths
- TestContinuationDefaultStopsAfterTurnZero (2): default loops run exactly
  one turn; engine.submit_message called exactly once despite max_turns=10
- TestExplicitContinuationBehaviour (2): turn 0 = original, turn N = continuation
  verbatim; max_turns still respected
- TestCLIContinuationFlag (2): CLI default emits only '## Turn 1';
  --continuation-prompt wires through to multi-turn behaviour

Full suite: 67/67 passing.

Closes ROADMAP #163. Files #164.
---
 src/main.py                              |  10 ++
 src/runtime.py                           |  28 +++-
 tests/test_run_turn_loop_continuation.py | 161 +++++++++++++++++++++++
 tests/test_run_turn_loop_timeout.py      |  13 +-
 4 files changed, 209 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_run_turn_loop_continuation.py

diff --git a/src/main.py b/src/main.py
index ba76566f..e1a56633 100644
--- a/src/main.py
+++ b/src/main.py
@@ -71,6 +71,15 @@ def build_parser() -> argparse.ArgumentParser:
         default=None,
         help='total wall-clock budget across all turns (#161). Default: unbounded.',
     )
+    loop_parser.add_argument(
+        '--continuation-prompt',
+        default=None,
+        help=(
+            'prompt to submit on turns after the first (#163). Default: None '
+            '(loop stops after turn 0). Replaces the deprecated implicit "[turn N]" '
+            'suffix that used to pollute the transcript.'
+        ),
+    )
 
     flush_parser = subparsers.add_parser('flush-transcript', help='persist and flush a temporary session transcript')
     flush_parser.add_argument('prompt')
@@ -199,6 +208,7 @@ def main(argv: list[str] | None = None) -> int:
             max_turns=args.max_turns,
             structured_output=args.structured_output,
             timeout_seconds=args.timeout_seconds,
+            continuation_prompt=args.continuation_prompt,
         )
         for idx, result in enumerate(results, start=1):
             print(f'## Turn {idx}')
diff --git a/src/runtime.py b/src/runtime.py
index 0ec97ae3..60282714 100644
--- a/src/runtime.py
+++ b/src/runtime.py
@@ -160,6 +160,7 @@ class PortRuntime:
         max_turns: int = 3,
         structured_output: bool = False,
         timeout_seconds: float | None = None,
+        continuation_prompt: str | None = None,
     ) -> list[TurnResult]:
         """Run a multi-turn engine loop with optional wall-clock deadline.
 
@@ -172,6 +173,15 @@ class PortRuntime:
                 budget is exhausted mid-turn, a synthetic TurnResult with
                 ``stop_reason='timeout'`` is appended and the loop exits.
                 ``None`` (default) preserves legacy unbounded behaviour.
+            continuation_prompt: What to send on turns after the first. When
+                ``None`` (default, #163), the loop stops after turn 0 and the
+                caller decides how to continue. When set, the same text is
+                submitted for every turn after the first, giving claws a clean
+                hook for structured follow-ups (e.g. ``"Continue."``, a
+                routing-planner instruction, or a tool-output cue). Previously
+                the loop silently appended ``" [turn N]"`` to the original
+                prompt, polluting the transcript with harness-generated
+                annotation the model had no way to interpret.
 
         Returns:
             A list of TurnResult objects. The final entry's ``stop_reason``
@@ -182,6 +192,12 @@ class PortRuntime:
         block the loop indefinitely with no cancellation path, forcing claws to
         rely on external watchdogs or OS-level kills. Callers can now enforce a
         deadline and receive a typed timeout signal instead.
+
+        #163: the old ``f'{prompt} [turn {turn + 1}]'`` suffix was never
+        interpreted by the engine or any system prompt. It looked like a real
+        user turn in ``mutable_messages`` and the transcript, making replay and
+        analysis fragile. Removed entirely; callers supply ``continuation_prompt``
+        for meaningful follow-ups or let the loop stop after turn 0.
         """
         engine = QueryEnginePort.from_workspace()
         engine.config = QueryEngineConfig(max_turns=max_turns, structured_output=structured_output)
@@ -195,7 +211,17 @@ class PortRuntime:
         executor = ThreadPoolExecutor(max_workers=1) if deadline is not None else None
         try:
             for turn in range(max_turns):
-                turn_prompt = prompt if turn == 0 else f'{prompt} [turn {turn + 1}]'
+                # #163: no more f'{prompt} [turn N]' suffix injection.
+                # On turn 0 submit the original prompt.
+                # On turn > 0, submit the caller-supplied continuation prompt;
+                # if the caller did not supply one, stop the loop cleanly instead
+                # of fabricating a fake user turn.
+                if turn == 0:
+                    turn_prompt = prompt
+                elif continuation_prompt is not None:
+                    turn_prompt = continuation_prompt
+                else:
+                    break
 
                 if deadline is None:
                     # Legacy path: unbounded call, preserves existing behaviour exactly.
diff --git a/tests/test_run_turn_loop_continuation.py b/tests/test_run_turn_loop_continuation.py
new file mode 100644
index 00000000..43f6bdad
--- /dev/null
+++ b/tests/test_run_turn_loop_continuation.py
@@ -0,0 +1,161 @@
+"""Tests for run_turn_loop continuation contract (ROADMAP #163).
+
+The deprecated ``f'{prompt} [turn N]'`` suffix injection is gone. Verifies:
+- No ``[turn N]`` string ever lands in a submitted prompt
+- Default (``continuation_prompt=None``) stops the loop after turn 0
+- Explicit ``continuation_prompt`` is submitted verbatim on subsequent turns
+- The first turn always gets the original prompt, not the continuation
+"""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from src.models import UsageSummary  # noqa: E402
+from src.query_engine import TurnResult  # noqa: E402
+from src.runtime import PortRuntime  # noqa: E402
+
+
+def _completed_result(prompt: str) -> TurnResult:
+    return TurnResult(
+        prompt=prompt,
+        output='ok',
+        matched_commands=(),
+        matched_tools=(),
+        permission_denials=(),
+        usage=UsageSummary(),
+        stop_reason='completed',
+    )
+
+
+class TestNoTurnSuffixInjection:
+    """Core acceptance: no prompt submitted to the engine ever contains '[turn N]'."""
+
+    def test_default_path_submits_original_prompt_only(self) -> None:
+        runtime = PortRuntime()
+        submitted: list[str] = []
+
+        def _capture(prompt, commands, tools, denials):
+            submitted.append(prompt)
+            return _completed_result(prompt)
+
+        with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
+            engine = mock_factory.return_value
+            engine.submit_message.side_effect = _capture
+
+            runtime.run_turn_loop('investigate this bug', max_turns=3)
+
+            # Without continuation_prompt, only turn 0 should run
+            assert submitted == ['investigate this bug']
+            # And no '[turn N]' suffix anywhere
+            for p in submitted:
+                assert '[turn' not in p, f'found [turn suffix in submitted prompt: {p!r}'
+
+    def test_with_continuation_prompt_no_turn_suffix(self) -> None:
+        runtime = PortRuntime()
+        submitted: list[str] = []
+
+        def _capture(prompt, commands, tools, denials):
+            submitted.append(prompt)
+            return _completed_result(prompt)
+
+        with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
+            engine = mock_factory.return_value
+            engine.submit_message.side_effect = _capture
+
+            runtime.run_turn_loop(
+                'investigate this bug',
+                max_turns=3,
+                continuation_prompt='Continue.',
+            )
+
+            # Turn 0 = original, turns 1-2 = continuation, verbatim
+            assert submitted == ['investigate this bug', 'Continue.', 'Continue.']
+            # No harness-injected suffix anywhere
+            for p in submitted:
+                assert '[turn' not in p
+                assert not p.endswith(']')
+
+
+class TestContinuationDefaultStopsAfterTurnZero:
+    def test_default_continuation_returns_one_result(self) -> None:
+        runtime = PortRuntime()
+        with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
+            engine = mock_factory.return_value
+            engine.submit_message.side_effect = lambda p, *_: _completed_result(p)
+
+            results = runtime.run_turn_loop('x', max_turns=5)
+            assert len(results) == 1
+            assert results[0].prompt == 'x'
+
+    def test_default_continuation_does_not_call_engine_twice(self) -> None:
+        runtime = PortRuntime()
+        with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
+            engine = mock_factory.return_value
+            engine.submit_message.side_effect = lambda p, *_: _completed_result(p)
+
+            runtime.run_turn_loop('x', max_turns=10)
+            # Exactly one submit_message call despite max_turns=10
+            assert engine.submit_message.call_count == 1
+
+
+class TestExplicitContinuationBehaviour:
+    def test_first_turn_always_uses_original_prompt(self) -> None:
+        runtime = PortRuntime()
+        captured: list[str] = []
+
+        def _capture(prompt, *_):
+            captured.append(prompt)
+            return _completed_result(prompt)
+
+        with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
+            engine = mock_factory.return_value
+            engine.submit_message.side_effect = _capture
+
+            runtime.run_turn_loop(
+                'original task', max_turns=2, continuation_prompt='keep going'
+            )
+
+            assert captured[0] == 'original task'
+            assert captured[1] == 'keep going'
+
+    def test_continuation_respects_max_turns(self) -> None:
+        runtime = PortRuntime()
+        with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
+            engine = mock_factory.return_value
+            engine.submit_message.side_effect = lambda p, *_: _completed_result(p)
+
+            runtime.run_turn_loop('x', max_turns=3, continuation_prompt='go')
+            assert engine.submit_message.call_count == 3
+
+
+class TestCLIContinuationFlag:
+    def test_cli_default_runs_one_turn(self) -> None:
+        """Without --continuation-prompt, CLI should emit exactly '## Turn 1'."""
+        result = subprocess.run(
+            [sys.executable, '-m', 'src.main', 'turn-loop', 'review MCP tool',
+             '--max-turns', '3', '--structured-output'],
+            check=True, capture_output=True, text=True,
+        )
+        assert '## Turn 1' in result.stdout
+        assert '## Turn 2' not in result.stdout
+        assert '[turn' not in result.stdout
+
+    def test_cli_with_continuation_runs_multiple_turns(self) -> None:
+        """With --continuation-prompt, CLI should run up to max_turns."""
+        result = subprocess.run(
+            [sys.executable, '-m', 'src.main', 'turn-loop', 'review MCP tool',
+             '--max-turns', '2', '--structured-output',
+             '--continuation-prompt', 'continue'],
+            check=True, capture_output=True, text=True,
+        )
+        assert '## Turn 1' in result.stdout
+        assert '## Turn 2' in result.stdout
+        # The continuation text is visible (it's submitted as the turn prompt)
+        # but no harness-injected [turn N] suffix
+        assert '[turn' not in result.stdout
diff --git a/tests/test_run_turn_loop_timeout.py b/tests/test_run_turn_loop_timeout.py
index 011f3f0a..8a24dae5 100644
--- a/tests/test_run_turn_loop_timeout.py
+++ b/tests/test_run_turn_loop_timeout.py
@@ -74,7 +74,13 @@ class TestTimeoutAbortsHungTurn:
 
 class TestTimeoutBudgetIsTotal:
     def test_budget_is_cumulative_across_turns(self) -> None:
-        """timeout_seconds is total wall-clock across all turns, not per-turn."""
+        """timeout_seconds is total wall-clock across all turns, not per-turn.
+
+        #163 interaction: multi-turn behaviour now requires an explicit
+        ``continuation_prompt``; otherwise the loop stops after turn 0 and
+        the cumulative-budget contract is trivially satisfied. We supply one
+        here so the test actually exercises the cross-turn deadline.
+        """
         runtime = PortRuntime()
         call_count = {'n': 0}
 
@@ -91,7 +97,10 @@ class TestTimeoutBudgetIsTotal:
             # 0.6s budget, 0.4s per turn. First turn completes (~0.4s),
             # second turn times out before finishing.
             results = runtime.run_turn_loop(
-                'review MCP tool', max_turns=5, timeout_seconds=0.6
+                'review MCP tool',
+                max_turns=5,
+                timeout_seconds=0.6,
+                continuation_prompt='continue',
             )
             elapsed = time.monotonic() - start