From 75c08bc982c927d6d986edb552d7349d55e3b59e Mon Sep 17 00:00:00 2001
From: YeonGyu-Kim <code.yeon.gyu@gmail.com>
Date: Wed, 6 May 2026 15:32:34 +0900
Subject: [PATCH] fix: REPL display, /compact panic, identity leak, DeepSeek
 reasoning, thinking blocks

Five interrelated fixes from parallel Hephaestus sessions:

1. fix(repl): display assistant text after spinner (#2981, #2982, #2937)
   - Added final_assistant_text() call after run_turn spinner completes
   - REPL now shows response text like run_prompt_json does

2. fix(compact): handle Thinking content blocks (#2985)
   - Added ContentBlock::Thinking variant throughout compact summarizer
   - Prevents panic when /compact encounters thinking blocks

3. fix(prompt): provider-aware model identity (#2822)
   - New ModelFamilyIdentity enum (Claude vs Generic)
   - Non-Anthropic models no longer say 'I am Claude'
   - model_family_identity_for() detects provider and sets identity

4. fix(openai): preserve DeepSeek reasoning_content (#2821)
   - Stream parser now captures reasoning_content from OpenAI-compat
   - Emits ThinkingDelta/SignatureDelta events for reasoning models
   - Thinking blocks included in conversation history for re-send

5. feat(runtime): Thinking block support across codebase
   - AssistantEvent::Thinking variant in conversation.rs
   - ContentBlock::Thinking in session serialization
   - Thinking-aware compact summarization
   - Tests for thinking block ordering and content

Closes #2981, #2982, #2937, #2985, #2822, #2821
---
 rust/crates/api/src/lib.rs                    |   5 +-
 rust/crates/api/src/providers/mod.rs          |  53 ++-
 .../crates/api/src/providers/openai_compat.rs | 338 ++++++++++++++++--
 rust/crates/api/src/types.rs                  |  37 +-
 .../api/tests/openai_compat_integration.rs    |  44 +++
 rust/crates/mock-anthropic-service/src/lib.rs |   1 +
 rust/crates/runtime/src/compact.rs            |  10 +
 rust/crates/runtime/src/conversation.rs       |  55 +++
 rust/crates/runtime/src/lib.rs                |   4 +-
 rust/crates/runtime/src/prompt.rs             |  93 ++++-
 rust/crates/runtime/src/session.rs            |  57 +++
 rust/crates/rusty-claude-cli/src/main.rs      |  86 ++++-
 .../rusty-claude-cli/tests/compact_output.rs  |  78 ++++
 .../tests/compact_repl_panic.rs               | 138 +++++++
 rust/crates/tools/src/lib.rs                  | 175 +++++++--
 15 files changed, 1099 insertions(+), 75 deletions(-)
 create mode 100644 rust/crates/rusty-claude-cli/tests/compact_repl_panic.rs

diff --git a/rust/crates/api/src/lib.rs b/rust/crates/api/src/lib.rs
index 40da29f1..d55b211b 100644
--- a/rust/crates/api/src/lib.rs
+++ b/rust/crates/api/src/lib.rs
@@ -21,11 +21,12 @@ pub use prompt_cache::{
 pub use providers::anthropic::{AnthropicClient, AnthropicClient as ApiClient, AuthSource};
 pub use providers::openai_compat::{
     build_chat_completion_request, flatten_tool_result_content, is_reasoning_model,
-    model_rejects_is_error_field, translate_message, OpenAiCompatClient, OpenAiCompatConfig,
+    model_rejects_is_error_field, model_requires_reasoning_content_in_history, translate_message,
+    OpenAiCompatClient, OpenAiCompatConfig,
 };
 pub use providers::{
     detect_provider_kind, max_tokens_for_model, max_tokens_for_model_with_override,
-    resolve_model_alias, ProviderKind,
+    model_family_identity_for, model_family_identity_for_kind, resolve_model_alias, ProviderKind,
 };
 pub use sse::{parse_frame, SseParser};
 pub use types::{
diff --git a/rust/crates/api/src/providers/mod.rs b/rust/crates/api/src/providers/mod.rs
index 0ef663f1..9c50eb7a 100644
--- a/rust/crates/api/src/providers/mod.rs
+++ b/rust/crates/api/src/providers/mod.rs
@@ -250,6 +250,19 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
     ProviderKind::Anthropic
 }
 
+#[must_use]
+pub const fn model_family_identity_for_kind(kind: ProviderKind) -> runtime::ModelFamilyIdentity {
+    match kind {
+        ProviderKind::Anthropic => runtime::ModelFamilyIdentity::Claude,
+        ProviderKind::Xai | ProviderKind::OpenAi => runtime::ModelFamilyIdentity::Generic,
+    }
+}
+
+#[must_use]
+pub fn model_family_identity_for(model: &str) -> runtime::ModelFamilyIdentity {
+    model_family_identity_for_kind(detect_provider_kind(model))
+}
+
 #[must_use]
 pub fn max_tokens_for_model(model: &str) -> u32 {
     let canonical = resolve_model_alias(model);
@@ -484,8 +497,8 @@ mod tests {
     use super::{
         anthropic_missing_credentials, anthropic_missing_credentials_hint, detect_provider_kind,
         load_dotenv_file, max_tokens_for_model, max_tokens_for_model_with_override,
-        model_token_limit, parse_dotenv, preflight_message_request, resolve_model_alias,
-        ProviderKind,
+        model_family_identity_for, model_family_identity_for_kind, model_token_limit, parse_dotenv,
+        preflight_message_request, resolve_model_alias, ProviderKind,
     };
 
     /// Serializes every test in this module that mutates process-wide
@@ -544,6 +557,42 @@ mod tests {
         );
     }
 
+    #[test]
+    fn maps_provider_kind_to_model_family_identity() {
+        // given: each supported provider kind
+        let anthropic = ProviderKind::Anthropic;
+        let openai = ProviderKind::OpenAi;
+        let xai = ProviderKind::Xai;
+
+        // when: converting provider kinds to prompt model family identities
+        let anthropic_identity = model_family_identity_for_kind(anthropic);
+        let openai_identity = model_family_identity_for_kind(openai);
+        let xai_identity = model_family_identity_for_kind(xai);
+
+        // then: Anthropic stays Claude and OpenAI-compatible providers are generic
+        assert_eq!(anthropic_identity, runtime::ModelFamilyIdentity::Claude);
+        assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
+        assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
+    }
+
+    #[test]
+    fn maps_model_name_to_model_family_identity() {
+        // given: Anthropic, OpenAI-compatible, and xAI model names
+        let claude_model = "claude-opus-4-6";
+        let openai_model = "openai/gpt-4.1-mini";
+        let xai_model = "grok-3";
+
+        // when: detecting prompt model family identities from model names
+        let claude_identity = model_family_identity_for(claude_model);
+        let openai_identity = model_family_identity_for(openai_model);
+        let xai_identity = model_family_identity_for(xai_model);
+
+        // then: Anthropic stays Claude and OpenAI-compatible providers are generic
+        assert_eq!(claude_identity, runtime::ModelFamilyIdentity::Claude);
+        assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
+        assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
+    }
+
     #[test]
     fn openai_namespaced_model_routes_to_openai_not_anthropic() {
         // Regression: "openai/gpt-4.1-mini" was misrouted to Anthropic when
diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs
index a810502e..b3800d6a 100644
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -443,6 +443,8 @@ struct StreamState {
     stop_reason: Option<String>,
     usage: Option<Usage>,
     tool_calls: BTreeMap<u32, ToolCallState>,
+    thinking_started: bool,
+    thinking_finished: bool,
 }
 
 impl StreamState {
@@ -456,6 +458,8 @@ impl StreamState {
             stop_reason: None,
             usage: None,
             tool_calls: BTreeMap::new(),
+            thinking_started: false,
+            thinking_finished: false,
         }
     }
 
@@ -493,35 +497,61 @@ impl StreamState {
         }
 
         for choice in chunk.choices {
+            if let Some(reasoning) = choice
+                .delta
+                .reasoning_content
+                .filter(|value| !value.is_empty())
+            {
+                if !self.thinking_started {
+                    self.thinking_started = true;
+                    events.push(StreamEvent::ContentBlockStart(ContentBlockStartEvent {
+                        index: 0,
+                        content_block: OutputContentBlock::Thinking {
+                            thinking: String::new(),
+                            signature: None,
+                        },
+                    }));
+                }
+                events.push(StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
+                    index: 0,
+                    delta: ContentBlockDelta::ThinkingDelta {
+                        thinking: reasoning,
+                    },
+                }));
+            }
+
             if let Some(content) = choice.delta.content.filter(|value| !value.is_empty()) {
+                self.close_thinking(&mut events);
                 if !self.text_started {
                     self.text_started = true;
                     events.push(StreamEvent::ContentBlockStart(ContentBlockStartEvent {
-                        index: 0,
+                        index: self.text_block_index(),
                         content_block: OutputContentBlock::Text {
                             text: String::new(),
                         },
                     }));
                 }
                 events.push(StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
-                    index: 0,
+                    index: self.text_block_index(),
                     delta: ContentBlockDelta::TextDelta { text: content },
                 }));
             }
 
             for tool_call in choice.delta.tool_calls {
+                self.close_thinking(&mut events);
+                let tool_index_offset = self.tool_index_offset();
                 let state = self.tool_calls.entry(tool_call.index).or_default();
                 state.apply(tool_call);
-                let block_index = state.block_index();
+                let block_index = state.block_index(tool_index_offset);
                 if !state.started {
-                    if let Some(start_event) = state.start_event()? {
+                    if let Some(start_event) = state.start_event(tool_index_offset)? {
                         state.started = true;
                         events.push(StreamEvent::ContentBlockStart(start_event));
                     } else {
                         continue;
                     }
                 }
-                if let Some(delta_event) = state.delta_event() {
+                if let Some(delta_event) = state.delta_event(tool_index_offset) {
                     events.push(StreamEvent::ContentBlockDelta(delta_event));
                 }
                 if choice.finish_reason.as_deref() == Some("tool_calls") && !state.stopped {
@@ -535,11 +565,12 @@ impl StreamState {
             if let Some(finish_reason) = choice.finish_reason {
                 self.stop_reason = Some(normalize_finish_reason(&finish_reason));
                 if finish_reason == "tool_calls" {
+                    let tool_index_offset = self.tool_index_offset();
                     for state in self.tool_calls.values_mut() {
                         if state.started && !state.stopped {
                             state.stopped = true;
                             events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
-                                index: state.block_index(),
+                                index: state.block_index(tool_index_offset),
                             }));
                         }
                     }
@@ -557,19 +588,21 @@ impl StreamState {
         self.finished = true;
 
         let mut events = Vec::new();
+        self.close_thinking(&mut events);
         if self.text_started && !self.text_finished {
             self.text_finished = true;
             events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
-                index: 0,
+                index: self.text_block_index(),
             }));
         }
 
+        let tool_index_offset = self.tool_index_offset();
         for state in self.tool_calls.values_mut() {
             if !state.started {
-                if let Some(start_event) = state.start_event()? {
+                if let Some(start_event) = state.start_event(tool_index_offset)? {
                     state.started = true;
                     events.push(StreamEvent::ContentBlockStart(start_event));
-                    if let Some(delta_event) = state.delta_event() {
+                    if let Some(delta_event) = state.delta_event(tool_index_offset) {
                         events.push(StreamEvent::ContentBlockDelta(delta_event));
                     }
                 }
@@ -577,7 +610,7 @@ impl StreamState {
             if state.started && !state.stopped {
                 state.stopped = true;
                 events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
-                    index: state.block_index(),
+                    index: state.block_index(tool_index_offset),
                 }));
             }
         }
@@ -603,6 +636,31 @@ impl StreamState {
         }
         Ok(events)
     }
+
+    fn close_thinking(&mut self, events: &mut Vec<StreamEvent>) {
+        if self.thinking_started && !self.thinking_finished {
+            self.thinking_finished = true;
+            events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
+                index: 0,
+            }));
+        }
+    }
+
+    const fn text_block_index(&self) -> u32 {
+        if self.thinking_started {
+            1
+        } else {
+            0
+        }
+    }
+
+    const fn tool_index_offset(&self) -> u32 {
+        if self.thinking_started {
+            2
+        } else {
+            1
+        }
+    }
 }
 
 #[derive(Debug, Default)]
@@ -630,12 +688,12 @@ impl ToolCallState {
         }
     }
 
-    const fn block_index(&self) -> u32 {
-        self.openai_index + 1
+    const fn block_index(&self, offset: u32) -> u32 {
+        self.openai_index + offset
     }
 
     #[allow(clippy::unnecessary_wraps)]
-    fn start_event(&self) -> Result<Option<ContentBlockStartEvent>, ApiError> {
+    fn start_event(&self, offset: u32) -> Result<Option<ContentBlockStartEvent>, ApiError> {
         let Some(name) = self.name.clone() else {
             return Ok(None);
         };
@@ -644,7 +702,7 @@ impl ToolCallState {
             .clone()
             .unwrap_or_else(|| format!("tool_call_{}", self.openai_index));
         Ok(Some(ContentBlockStartEvent {
-            index: self.block_index(),
+            index: self.block_index(offset),
             content_block: OutputContentBlock::ToolUse {
                 id,
                 name,
@@ -653,14 +711,14 @@ impl ToolCallState {
         }))
     }
 
-    fn delta_event(&mut self) -> Option<ContentBlockDeltaEvent> {
+    fn delta_event(&mut self, offset: u32) -> Option<ContentBlockDeltaEvent> {
         if self.emitted_len >= self.arguments.len() {
             return None;
         }
         let delta = self.arguments[self.emitted_len..].to_string();
         self.emitted_len = self.arguments.len();
         Some(ContentBlockDeltaEvent {
-            index: self.block_index(),
+            index: self.block_index(offset),
             delta: ContentBlockDelta::InputJsonDelta {
                 partial_json: delta,
             },
@@ -690,6 +748,8 @@ struct ChatMessage {
     #[serde(default)]
     content: Option<String>,
     #[serde(default)]
+    reasoning_content: Option<String>,
+    #[serde(default)]
     tool_calls: Vec<ResponseToolCall>,
 }
 
@@ -735,6 +795,8 @@ struct ChunkChoice {
 struct ChunkDelta {
     #[serde(default)]
     content: Option<String>,
+    #[serde(default)]
+    reasoning_content: Option<String>,
     #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
     tool_calls: Vec<DeltaToolCall>,
 }
@@ -793,6 +855,15 @@ pub fn is_reasoning_model(model: &str) -> bool {
         || canonical.contains("thinking")
 }
 
+/// Returns true for OpenAI-compatible DeepSeek V4 models that require prior
+/// assistant reasoning to be echoed back as `reasoning_content` in history.
+#[must_use]
+pub fn model_requires_reasoning_content_in_history(model: &str) -> bool {
+    let lowered = model.to_ascii_lowercase();
+    let canonical = lowered.rsplit('/').next().unwrap_or(lowered.as_str());
+    canonical.starts_with("deepseek-v4")
+}
+
 /// Strip routing prefix (e.g., "openai/gpt-4" → "gpt-4") for the wire.
 /// The prefix is used only to select transport; the backend expects the
 /// bare model id.
@@ -948,10 +1019,14 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
     match message.role.as_str() {
         "assistant" => {
             let mut text = String::new();
+            let mut reasoning = String::new();
             let mut tool_calls = Vec::new();
             for block in &message.content {
                 match block {
                     InputContentBlock::Text { text: value } => text.push_str(value),
+                    InputContentBlock::Thinking {
+                        thinking: value, ..
+                    } => reasoning.push_str(value),
                     InputContentBlock::ToolUse { id, name, input } => tool_calls.push(json!({
                         "id": id,
                         "type": "function",
@@ -963,13 +1038,18 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
                     InputContentBlock::ToolResult { .. } => {}
                 }
             }
-            if text.is_empty() && tool_calls.is_empty() {
+            let include_reasoning =
+                model_requires_reasoning_content_in_history(model) && !reasoning.is_empty();
+            if text.is_empty() && tool_calls.is_empty() && !include_reasoning {
                 Vec::new()
             } else {
                 let mut msg = serde_json::json!({
                     "role": "assistant",
                     "content": (!text.is_empty()).then_some(text),
                 });
+                if include_reasoning {
+                    msg["reasoning_content"] = json!(reasoning);
+                }
                 // Only include tool_calls when non-empty: some providers reject
                 // assistant messages with an explicit empty tool_calls array.
                 if !tool_calls.is_empty() {
@@ -1003,6 +1083,7 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
                     }
                     Some(msg)
                 }
+                InputContentBlock::Thinking { .. } => None,
                 InputContentBlock::ToolUse { .. } => None,
             })
             .collect(),
@@ -1182,6 +1263,16 @@ fn normalize_response(
             "chat completion response missing choices",
         ))?;
     let mut content = Vec::new();
+    if let Some(thinking) = choice
+        .message
+        .reasoning_content
+        .filter(|value| !value.is_empty())
+    {
+        content.push(OutputContentBlock::Thinking {
+            thinking,
+            signature: None,
+        });
+    }
     if let Some(text) = choice.message.content.filter(|value| !value.is_empty()) {
         content.push(OutputContentBlock::Text { text });
     }
@@ -1413,13 +1504,15 @@ impl StringExt for String {
 mod tests {
     use super::{
         build_chat_completion_request, chat_completions_endpoint, is_reasoning_model,
-        normalize_finish_reason, openai_tool_choice, parse_tool_arguments, OpenAiCompatClient,
-        OpenAiCompatConfig,
+        model_requires_reasoning_content_in_history, normalize_finish_reason, normalize_response,
+        openai_tool_choice, parse_tool_arguments, OpenAiCompatClient, OpenAiCompatConfig,
+        StreamState,
     };
     use crate::error::ApiError;
     use crate::types::{
-        InputContentBlock, InputMessage, MessageRequest, ToolChoice, ToolDefinition,
-        ToolResultContentBlock,
+        ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent,
+        InputContentBlock, InputMessage, MessageRequest, OutputContentBlock, StreamEvent,
+        ToolChoice, ToolDefinition, ToolResultContentBlock,
     };
     use serde_json::json;
     use std::sync::{Mutex, OnceLock};
@@ -1465,6 +1558,188 @@ mod tests {
         assert_eq!(payload["tool_choice"], json!("auto"));
     }
 
+    #[test]
+    fn model_requires_reasoning_content_in_history_detects_deepseek_v4_models() {
+        // Given DeepSeek V4 and non-V4 model names.
+        let positive = [
+            "deepseek-v4-flash",
+            "deepseek-v4-pro",
+            "openai/deepseek-v4-pro",
+            "deepseek/deepseek-v4-flash",
+        ];
+        let negative = [
+            "deepseek-reasoner",
+            "deepseek-chat",
+            "gpt-4o",
+            "claude-sonnet-4-6",
+        ];
+
+        // When checking whether history reasoning_content is required.
+        // Then only DeepSeek V4 variants require it.
+        for model in positive {
+            assert!(model_requires_reasoning_content_in_history(model));
+        }
+        for model in negative {
+            assert!(!model_requires_reasoning_content_in_history(model));
+        }
+    }
+
+    #[test]
+    fn legacy_deepseek_reasoner_request_omits_reasoning_content_for_assistant_history() {
+        // Given an assistant history turn containing thinking.
+        let request = assistant_history_with_thinking_request("deepseek-reasoner");
+
+        // When serializing for legacy deepseek-reasoner.
+        let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
+
+        // Then reasoning_content is omitted.
+        let assistant = &payload["messages"][0];
+        assert_eq!(assistant["role"], json!("assistant"));
+        assert!(assistant.get("reasoning_content").is_none());
+    }
+
+    #[test]
+    fn deepseek_v4_pro_request_includes_reasoning_content_for_assistant_history() {
+        // Given an assistant history turn containing thinking.
+        let request = assistant_history_with_thinking_request("openai/deepseek-v4-pro");
+
+        // When serializing for DeepSeek V4 Pro.
+        let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
+
+        // Then reasoning_content is included on the assistant message.
+        let assistant = &payload["messages"][0];
+        assert_eq!(assistant["reasoning_content"], json!("prior reasoning"));
+        assert_eq!(assistant["content"], json!("answer"));
+    }
+
+    #[test]
+    fn deepseek_v4_flash_request_includes_reasoning_content_for_assistant_history() {
+        // Given an assistant history turn containing thinking.
+        let request = assistant_history_with_thinking_request("deepseek-v4-flash");
+
+        // When serializing for DeepSeek V4 Flash.
+        let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
+
+        // Then reasoning_content is included on the assistant message.
+        let assistant = &payload["messages"][0];
+        assert_eq!(assistant["reasoning_content"], json!("prior reasoning"));
+    }
+
+    #[test]
+    fn non_streaming_response_with_reasoning_content_emits_thinking_block_first() {
+        // Given a non-streaming OpenAI-compatible response with reasoning_content.
+        let response = super::ChatCompletionResponse {
+            id: "chatcmpl_reasoning".to_string(),
+            model: "deepseek-v4-pro".to_string(),
+            choices: vec![super::ChatChoice {
+                message: super::ChatMessage {
+                    role: "assistant".to_string(),
+                    content: Some("final answer".to_string()),
+                    reasoning_content: Some("hidden thought".to_string()),
+                    tool_calls: Vec::new(),
+                },
+                finish_reason: Some("stop".to_string()),
+            }],
+            usage: None,
+        };
+
+        // When normalizing the provider response.
+        let normalized = normalize_response("deepseek-v4-pro", response).expect("normalized");
+
+        // Then Thinking is the first content block, before text.
+        assert_eq!(
+            normalized.content,
+            vec![
+                OutputContentBlock::Thinking {
+                    thinking: "hidden thought".to_string(),
+                    signature: None,
+                },
+                OutputContentBlock::Text {
+                    text: "final answer".to_string(),
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn streaming_chunks_with_reasoning_content_emit_thinking_block_events_before_text() {
+        // Given streaming chunks with reasoning_content followed by text.
+        let mut state = StreamState::new("deepseek-v4-pro".to_string());
+        let mut events = state
+            .ingest_chunk(super::ChatCompletionChunk {
+                id: "chatcmpl_stream_reasoning".to_string(),
+                model: Some("deepseek-v4-pro".to_string()),
+                choices: vec![super::ChunkChoice {
+                    delta: super::ChunkDelta {
+                        content: None,
+                        reasoning_content: Some("think".to_string()),
+                        tool_calls: Vec::new(),
+                    },
+                    finish_reason: None,
+                }],
+                usage: None,
+            })
+            .expect("reasoning chunk");
+        events.extend(
+            state
+                .ingest_chunk(super::ChatCompletionChunk {
+                    id: "chatcmpl_stream_reasoning".to_string(),
+                    model: None,
+                    choices: vec![super::ChunkChoice {
+                        delta: super::ChunkDelta {
+                            content: Some(" answer".to_string()),
+                            reasoning_content: None,
+                            tool_calls: Vec::new(),
+                        },
+                        finish_reason: Some("stop".to_string()),
+                    }],
+                    usage: None,
+                })
+                .expect("text chunk"),
+        );
+        events.extend(state.finish().expect("finish"));
+
+        // When reading normalized stream events.
+        // Then Thinking starts at index 0, text is offset to index 1.
+        assert!(matches!(events[0], StreamEvent::MessageStart(_)));
+        assert!(matches!(
+            events[1],
+            StreamEvent::ContentBlockStart(ContentBlockStartEvent {
+                index: 0,
+                content_block: OutputContentBlock::Thinking { .. },
+            })
+        ));
+        assert!(matches!(
+            events[2],
+            StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
+                index: 0,
+                delta: ContentBlockDelta::ThinkingDelta { .. },
+            })
+        ));
+        assert!(matches!(
+            events[3],
+            StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 0 })
+        ));
+        assert!(matches!(
+            events[4],
+            StreamEvent::ContentBlockStart(ContentBlockStartEvent {
+                index: 1,
+                content_block: OutputContentBlock::Text { .. },
+            })
+        ));
+        assert!(matches!(
+            events[5],
+            StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
+                index: 1,
+                delta: ContentBlockDelta::TextDelta { .. },
+            })
+        ));
+        assert!(matches!(
+            events[6],
+            StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 1 })
+        ));
+    }
+
     #[test]
     fn tool_schema_object_gets_strict_fields_for_responses_endpoint() {
         // OpenAI /responses endpoint rejects object schemas missing
@@ -1624,6 +1899,27 @@ mod tests {
         );
     }
 
+    fn assistant_history_with_thinking_request(model: &str) -> MessageRequest {
+        MessageRequest {
+            model: model.to_string(),
+            max_tokens: 100,
+            messages: vec![InputMessage {
+                role: "assistant".to_string(),
+                content: vec![
+                    InputContentBlock::Thinking {
+                        thinking: "prior reasoning".to_string(),
+                        signature: None,
+                    },
+                    InputContentBlock::Text {
+                        text: "answer".to_string(),
+                    },
+                ],
+            }],
+            stream: false,
+            ..Default::default()
+        }
+    }
+
     fn env_lock() -> std::sync::MutexGuard<'static, ()> {
         static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
         LOCK.get_or_init(|| Mutex::new(()))
diff --git a/rust/crates/api/src/types.rs b/rust/crates/api/src/types.rs
index e136a766..0d41db19 100644
--- a/rust/crates/api/src/types.rs
+++ b/rust/crates/api/src/types.rs
@@ -81,6 +81,11 @@ pub enum InputContentBlock {
     Text {
         text: String,
     },
+    Thinking {
+        thinking: String,
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        signature: Option<String>,
+    },
     ToolUse {
         id: String,
         name: String,
@@ -268,8 +273,9 @@ pub enum StreamEvent {
 #[cfg(test)]
 mod tests {
     use runtime::format_usd;
+    use serde_json::json;
 
-    use super::{MessageResponse, Usage};
+    use super::{InputContentBlock, MessageResponse, Usage};
 
     #[test]
     fn usage_total_tokens_includes_cache_tokens() {
@@ -307,4 +313,33 @@ mod tests {
         assert_eq!(format_usd(cost.total_cost_usd()), "$54.6750");
         assert_eq!(response.total_tokens(), 1_800_000);
     }
+
+    #[test]
+    fn input_content_block_thinking_serializes_with_snake_case_type() {
+        // given
+        let block = InputContentBlock::Thinking {
+            thinking: "pondering".to_string(),
+            signature: Some("sig_123".to_string()),
+        };
+
+        // when
+        let serialized = serde_json::to_value(&block).unwrap();
+        let deserialized: InputContentBlock = serde_json::from_value(json!({
+            "type": "thinking",
+            "thinking": "pondering",
+            "signature": "sig_123"
+        }))
+        .unwrap();
+
+        // then
+        assert_eq!(
+            serialized,
+            json!({
+                "type": "thinking",
+                "thinking": "pondering",
+                "signature": "sig_123"
+            })
+        );
+        assert_eq!(deserialized, block);
+    }
 }
diff --git a/rust/crates/api/tests/openai_compat_integration.rs b/rust/crates/api/tests/openai_compat_integration.rs
index d5596bb0..5db9eaf1 100644
--- a/rust/crates/api/tests/openai_compat_integration.rs
+++ b/rust/crates/api/tests/openai_compat_integration.rs
@@ -63,6 +63,50 @@ async fn send_message_uses_openai_compatible_endpoint_and_auth() {
     assert_eq!(body["tools"][0]["type"], json!("function"));
 }
 
+#[tokio::test]
+async fn send_message_preserves_deepseek_reasoning_content_before_text() {
+    let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
+    let body = concat!(
+        "{",
+        "\"id\":\"chatcmpl_deepseek_reasoning\",",
+        "\"model\":\"deepseek-v4-pro\",",
+        "\"choices\":[{",
+        "\"message\":{\"role\":\"assistant\",\"reasoning_content\":\"Think first\",\"content\":\"Answer second\",\"tool_calls\":[]},",
+        "\"finish_reason\":\"stop\"",
+        "}],",
+        "\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":5}",
+        "}"
+    );
+    let server = spawn_server(
+        state.clone(),
+        vec![http_response("200 OK", "application/json", body)],
+    )
+    .await;
+
+    let client = OpenAiCompatClient::new("openai-test-key", OpenAiCompatConfig::openai())
+        .with_base_url(server.base_url());
+    let response = client
+        .send_message(&MessageRequest {
+            model: "openai/deepseek-v4-pro".to_string(),
+            ..sample_request(false)
+        })
+        .await
+        .expect("request should succeed");
+
+    assert_eq!(
+        response.content,
+        vec![
+            OutputContentBlock::Thinking {
+                thinking: "Think first".to_string(),
+                signature: None,
+            },
+            OutputContentBlock::Text {
+                text: "Answer second".to_string(),
+            },
+        ]
+    );
+}
+
 #[tokio::test]
 async fn send_message_blocks_oversized_xai_requests_before_the_http_call() {
     let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
diff --git a/rust/crates/mock-anthropic-service/src/lib.rs b/rust/crates/mock-anthropic-service/src/lib.rs
index 68968eed..99623d18 100644
--- a/rust/crates/mock-anthropic-service/src/lib.rs
+++ b/rust/crates/mock-anthropic-service/src/lib.rs
@@ -248,6 +248,7 @@ fn detect_scenario(request: &MessageRequest) -> Option<Scenario> {
                 .split_whitespace()
                 .find_map(|token| token.strip_prefix(SCENARIO_PREFIX))
                 .and_then(Scenario::parse),
+            InputContentBlock::Thinking { .. } => None,
             _ => None,
         })
     })
diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs
index 3e805dda..e4fd3db0 100644
--- a/rust/crates/runtime/src/compact.rs
+++ b/rust/crates/runtime/src/compact.rs
@@ -213,6 +213,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
             ContentBlock::ToolUse { name, .. } => Some(name.as_str()),
             ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()),
             ContentBlock::Text { .. } => None,
+            ContentBlock::Thinking { .. } => None,
         })
         .collect::<Vec<_>>();
     tool_names.sort_unstable();
@@ -317,6 +318,9 @@ fn merge_compact_summaries(existing_summary: Option<&str>, new_summary: &str) ->
 fn summarize_block(block: &ContentBlock) -> String {
     let raw = match block {
         ContentBlock::Text { text } => text.clone(),
+        ContentBlock::Thinking { thinking, .. } => {
+            format!("thinking ({} chars)", thinking.chars().count())
+        }
         ContentBlock::ToolUse { name, input, .. } => format!("tool_use {name}({input})"),
         ContentBlock::ToolResult {
             tool_name,
@@ -378,6 +382,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec<String> {
             ContentBlock::Text { text } => text.as_str(),
             ContentBlock::ToolUse { input, .. } => input.as_str(),
             ContentBlock::ToolResult { output, .. } => output.as_str(),
+            ContentBlock::Thinking { thinking, .. } => thinking.as_str(),
         })
         .flat_map(extract_file_candidates)
         .collect::<Vec<_>>();
@@ -400,6 +405,7 @@ fn first_text_block(message: &ConversationMessage) -> Option<&str> {
         ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()),
         ContentBlock::ToolUse { .. }
         | ContentBlock::ToolResult { .. }
+        | ContentBlock::Thinking { .. }
         | ContentBlock::Text { .. } => None,
     })
 }
@@ -450,6 +456,10 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize {
             ContentBlock::ToolResult {
                 tool_name, output, ..
             } => (tool_name.len() + output.len()) / 4 + 1,
+            ContentBlock::Thinking {
+                thinking,
+                signature,
+            } => thinking.len() / 4 + signature.as_ref().map_or(0, |value| value.len() / 4 + 1),
         })
         .sum()
 }
diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs
index 610ba1a8..35c0f73f 100644
--- a/rust/crates/runtime/src/conversation.rs
+++ b/rust/crates/runtime/src/conversation.rs
@@ -28,6 +28,10 @@ pub struct ApiRequest {
 /// Streamed events emitted while processing a single assistant turn.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum AssistantEvent {
+    Thinking {
+        thinking: String,
+        signature: Option<String>,
+    },
     TextDelta(String),
     ToolUse {
         id: String,
@@ -721,6 +725,16 @@ fn build_assistant_message(
 
     for event in events {
         match event {
+            AssistantEvent::Thinking {
+                thinking,
+                signature,
+            } => {
+                flush_text_block(&mut text, &mut blocks);
+                blocks.push(ContentBlock::Thinking {
+                    thinking,
+                    signature,
+                });
+            }
             AssistantEvent::TextDelta(delta) => text.push_str(&delta),
             AssistantEvent::ToolUse { id, name, input } => {
                 flush_text_block(&mut text, &mut blocks);
@@ -1723,6 +1737,47 @@ mod tests {
             .contains("assistant stream produced no content"));
     }
 
+    #[test]
+    fn build_assistant_message_places_thinking_block_before_text_and_tool_use() {
+        // given
+        let events = vec![
+            AssistantEvent::Thinking {
+                thinking: "pondering".to_string(),
+                signature: Some("sig".to_string()),
+            },
+            AssistantEvent::TextDelta("hello".to_string()),
+            AssistantEvent::ToolUse {
+                id: "tool-1".to_string(),
+                name: "echo".to_string(),
+                input: "payload".to_string(),
+            },
+            AssistantEvent::MessageStop,
+        ];
+
+        // when
+        let (message, _, _) = build_assistant_message(events)
+            .expect("assistant message should preserve thinking, text, and tool blocks");
+
+        // then
+        assert_eq!(
+            message.blocks,
+            vec![
+                ContentBlock::Thinking {
+                    thinking: "pondering".to_string(),
+                    signature: Some("sig".to_string()),
+                },
+                ContentBlock::Text {
+                    text: "hello".to_string(),
+                },
+                ContentBlock::ToolUse {
+                    id: "tool-1".to_string(),
+                    name: "echo".to_string(),
+                    input: "payload".to_string(),
+                },
+            ]
+        );
+    }
+
     #[test]
     fn static_tool_executor_rejects_unknown_tools() {
         // given
diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs
index c7d87091..c1108d3d 100644
--- a/rust/crates/runtime/src/lib.rs
+++ b/rust/crates/runtime/src/lib.rs
@@ -131,8 +131,8 @@ pub use policy_engine::{
     PolicyEngine, PolicyRule, ReconcileReason, ReviewStatus,
 };
 pub use prompt::{
-    load_system_prompt, prepend_bullets, ContextFile, ProjectContext, PromptBuildError,
-    SystemPromptBuilder, FRONTIER_MODEL_NAME, SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
+    load_system_prompt, prepend_bullets, ContextFile, ModelFamilyIdentity, ProjectContext,
+    PromptBuildError, SystemPromptBuilder, FRONTIER_MODEL_NAME, SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
 };
 pub use recovery_recipes::{
     attempt_recovery, recipe_for, EscalationPolicy, FailureScenario, RecoveryContext,
diff --git a/rust/crates/runtime/src/prompt.rs b/rust/crates/runtime/src/prompt.rs
index e46b7ebe..1e6c4eda 100644
--- a/rust/crates/runtime/src/prompt.rs
+++ b/rust/crates/runtime/src/prompt.rs
@@ -43,6 +43,24 @@ pub const FRONTIER_MODEL_NAME: &str = "Claude Opus 4.6";
 const MAX_INSTRUCTION_FILE_CHARS: usize = 4_000;
 const MAX_TOTAL_INSTRUCTION_CHARS: usize = 12_000;
 
+/// Neutral identity for the model family line in generated prompts.
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
+pub enum ModelFamilyIdentity {
+    #[default]
+    Claude,
+    Generic,
+}
+
+impl ModelFamilyIdentity {
+    #[must_use]
+    pub const fn family_label(self) -> &'static str {
+        match self {
+            Self::Claude => FRONTIER_MODEL_NAME,
+            Self::Generic => "an AI assistant",
+        }
+    }
+}
+
 /// Contents of an instruction file included in prompt construction.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ContextFile {
@@ -97,6 +115,7 @@ pub struct SystemPromptBuilder {
     output_style_prompt: Option<String>,
     os_name: Option<String>,
     os_version: Option<String>,
+    model_family: Option<ModelFamilyIdentity>,
     append_sections: Vec<String>,
     project_context: Option<ProjectContext>,
     config: Option<RuntimeConfig>,
@@ -122,6 +141,12 @@ impl SystemPromptBuilder {
         self
     }
 
+    #[must_use]
+    pub fn with_model_family(mut self, model_family: ModelFamilyIdentity) -> Self {
+        self.model_family = Some(model_family);
+        self
+    }
+
     #[must_use]
     pub fn with_project_context(mut self, project_context: ProjectContext) -> Self {
         self.project_context = Some(project_context);
@@ -179,9 +204,10 @@ impl SystemPromptBuilder {
             || "unknown".to_string(),
             |context| context.current_date.clone(),
         );
+        let identity = self.model_family.unwrap_or_default();
         let mut lines = vec!["# Environment context".to_string()];
         lines.extend(prepend_bullets(vec![
-            format!("Model family: {FRONTIER_MODEL_NAME}"),
+            format!("Model family: {}", identity.family_label()),
             format!("Working directory: {cwd}"),
             format!("Date: {date}"),
             format!(
@@ -434,12 +460,14 @@ pub fn load_system_prompt(
     current_date: impl Into<String>,
     os_name: impl Into<String>,
     os_version: impl Into<String>,
+    model_family: ModelFamilyIdentity,
 ) -> Result<Vec<String>, PromptBuildError> {
     let cwd = cwd.into();
     let project_context = ProjectContext::discover_with_git(&cwd, current_date.into())?;
     let config = ConfigLoader::default_for(&cwd).load()?;
     Ok(SystemPromptBuilder::new()
         .with_os(os_name, os_version)
+        .with_model_family(model_family)
         .with_project_context(project_context)
         .with_runtime_config(config)
         .build())
@@ -522,7 +550,8 @@ mod tests {
     use super::{
         collapse_blank_lines, display_context_path, normalize_instruction_content,
         render_instruction_content, render_instruction_files, truncate_instruction_content,
-        ContextFile, ProjectContext, SystemPromptBuilder, SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
+        ContextFile, ModelFamilyIdentity, ProjectContext, SystemPromptBuilder,
+        SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
     };
     use crate::config::ConfigLoader;
     use std::fs;
@@ -804,13 +833,19 @@ mod tests {
         std::env::set_var("HOME", &root);
         std::env::set_var("CLAW_CONFIG_HOME", root.join("missing-home"));
         std::env::set_current_dir(&root).expect("change cwd");
-        let prompt = super::load_system_prompt(&root, "2026-03-31", "linux", "6.8")
-            .expect("system prompt should load")
-            .join(
-                "
+        let prompt = super::load_system_prompt(
+            &root,
+            "2026-03-31",
+            "linux",
+            "6.8",
+            ModelFamilyIdentity::Claude,
+        )
+        .expect("system prompt should load")
+        .join(
+            "
 
 ",
-            );
+        );
         std::env::set_current_dir(previous).expect("restore cwd");
         if let Some(value) = original_home {
             std::env::set_var("HOME", value);
@@ -828,6 +863,50 @@ mod tests {
         fs::remove_dir_all(root).expect("cleanup temp dir");
     }
 
+    #[test]
+    fn renders_default_claude_model_family_identity() {
+        // given: a prompt builder without an explicit model family override
+        let project_context = ProjectContext {
+            cwd: PathBuf::from("/tmp/project"),
+            current_date: "2026-03-31".to_string(),
+            ..ProjectContext::default()
+        };
+
+        // when: rendering the system prompt environment section
+        let prompt = SystemPromptBuilder::new()
+            .with_os("linux", "6.8")
+            .with_project_context(project_context)
+            .render();
+
+        // then: the Claude model family label is preserved by default
+        assert!(prompt.contains("Model family: Claude Opus 4.6"));
+    }
+
+    #[test]
+    fn renders_generic_model_family_identity_without_claude_label() {
+        // given: a prompt builder with generic model family identity
+        let project_context = ProjectContext {
+            cwd: PathBuf::from("/tmp/project"),
+            current_date: "2026-03-31".to_string(),
+            ..ProjectContext::default()
+        };
+
+        // when: rendering the system prompt environment section
+        let prompt = SystemPromptBuilder::new()
+            .with_os("linux", "6.8")
+            .with_model_family(ModelFamilyIdentity::Generic)
+            .with_project_context(project_context)
+            .render();
+        let model_family_line = prompt
+            .lines()
+            .find(|line| line.contains("Model family:"))
+            .expect("model family line should render");
+
+        // then: the model family line is neutral and excludes Claude Opus 4.6
+        assert_eq!(model_family_line, " - Model family: an AI assistant");
+        assert!(!model_family_line.contains("Claude Opus 4.6"));
+    }
+
     #[test]
     fn renders_claude_code_style_sections_with_project_context() {
         let root = temp_dir();
diff --git a/rust/crates/runtime/src/session.rs b/rust/crates/runtime/src/session.rs
index b97378e5..6b62444d 100644
--- a/rust/crates/runtime/src/session.rs
+++ b/rust/crates/runtime/src/session.rs
@@ -30,6 +30,10 @@ pub enum ContentBlock {
     Text {
         text: String,
     },
+    Thinking {
+        thinking: String,
+        signature: Option<String>,
+    },
     ToolUse {
         id: String,
         name: String,
@@ -737,6 +741,22 @@ impl ContentBlock {
                 object.insert("type".to_string(), JsonValue::String("text".to_string()));
                 object.insert("text".to_string(), JsonValue::String(text.clone()));
             }
+            Self::Thinking {
+                thinking,
+                signature,
+            } => {
+                object.insert(
+                    "type".to_string(),
+                    JsonValue::String("thinking".to_string()),
+                );
+                object.insert("thinking".to_string(), JsonValue::String(thinking.clone()));
+                if let Some(signature) = signature {
+                    object.insert(
+                        "signature".to_string(),
+                        JsonValue::String(signature.clone()),
+                    );
+                }
+            }
             Self::ToolUse { id, name, input } => {
                 object.insert(
                     "type".to_string(),
@@ -783,6 +803,13 @@ impl ContentBlock {
             "text" => Ok(Self::Text {
                 text: required_string(object, "text")?,
             }),
+            "thinking" => Ok(Self::Thinking {
+                thinking: required_string(object, "thinking")?,
+                signature: object
+                    .get("signature")
+                    .and_then(JsonValue::as_str)
+                    .map(String::from),
+            }),
             "tool_use" => Ok(Self::ToolUse {
                 id: required_string(object, "id")?,
                 name: required_string(object, "name")?,
@@ -1208,6 +1235,36 @@ mod tests {
         assert_eq!(restored.session_id, session.session_id);
     }
 
+    #[test]
+    fn persists_assistant_thinking_block_round_trip_through_jsonl() {
+        // given
+        let mut session = Session::new();
+        session
+            .push_message(ConversationMessage::assistant(vec![
+                ContentBlock::Thinking {
+                    thinking: "trace the path through session persistence".to_string(),
+                    signature: Some("sig-123".to_string()),
+                },
+            ]))
+            .expect("thinking block should append");
+        let path = temp_session_path("thinking-jsonl");
+
+        // when
+        session.save_to_path(&path).expect("session should save");
+        let restored = Session::load_from_path(&path).expect("session should load");
+        fs::remove_file(&path).expect("temp file should be removable");
+
+        // then
+        assert_eq!(restored, session);
+        assert_eq!(
+            restored.messages[0].blocks[0],
+            ContentBlock::Thinking {
+                thinking: "trace the path through session persistence".to_string(),
+                signature: Some("sig-123".to_string()),
+            }
+        );
+    }
+
     #[test]
     fn loads_legacy_session_json_object() {
         let path = temp_session_path("legacy");
diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs
index 628118af..df4d8da4 100644
--- a/rust/crates/rusty-claude-cli/src/main.rs
+++ b/rust/crates/rusty-claude-cli/src/main.rs
@@ -24,10 +24,11 @@ use std::thread::{self, JoinHandle};
 use std::time::{Duration, Instant, UNIX_EPOCH};
 
 use api::{
-    detect_provider_kind, resolve_startup_auth_source, AnthropicClient, AuthSource,
-    ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest, MessageResponse,
-    OutputContentBlock, PromptCache, ProviderClient as ApiProviderClient, ProviderKind,
-    StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, ToolResultContentBlock,
+    detect_provider_kind, model_family_identity_for, resolve_startup_auth_source, AnthropicClient,
+    AuthSource, ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest,
+    MessageResponse, OutputContentBlock, PromptCache, ProviderClient as ApiProviderClient,
+    ProviderKind, StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition,
+    ToolResultContentBlock,
 };
 
 use commands::{
@@ -357,8 +358,9 @@ fn run() -> Result<(), Box<dyn std::error::Error>> {
         CliAction::PrintSystemPrompt {
             cwd,
             date,
+            model,
             output_format,
-        } => print_system_prompt(cwd, date, output_format)?,
+        } => print_system_prompt(cwd, date, &model, output_format)?,
         CliAction::Version { output_format } => print_version(output_format)?,
         CliAction::ResumeSession {
             session_path,
@@ -498,6 +500,7 @@ enum CliAction {
     PrintSystemPrompt {
         cwd: PathBuf,
         date: String,
+        model: String,
         output_format: CliOutputFormat,
     },
     Version {
@@ -960,7 +963,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
                 }),
             }
         }
-        "system-prompt" => parse_system_prompt_args(&rest[1..], output_format),
+        "system-prompt" => parse_system_prompt_args(&rest[1..], model, output_format),
         "acp" => parse_acp_args(&rest[1..], output_format),
         "login" | "logout" => Err(removed_auth_surface_error(rest[0].as_str())),
         "init" => Ok(CliAction::Init { output_format }),
@@ -1638,6 +1641,7 @@ fn filter_tool_specs(
 
 fn parse_system_prompt_args(
     args: &[String],
+    model: String,
     output_format: CliOutputFormat,
 ) -> Result<CliAction, String> {
     let mut cwd = env::current_dir().map_err(|error| error.to_string())?;
@@ -1674,6 +1678,7 @@ fn parse_system_prompt_args(
     Ok(CliAction::PrintSystemPrompt {
         cwd,
         date,
+        model,
         output_format,
     })
 }
@@ -2614,9 +2619,16 @@ fn print_bootstrap_plan(output_format: CliOutputFormat) -> Result<(), Box<dyn st
 fn print_system_prompt(
     cwd: PathBuf,
     date: String,
+    model: &str,
     output_format: CliOutputFormat,
 ) -> Result<(), Box<dyn std::error::Error>> {
-    let sections = load_system_prompt(cwd, date, env::consts::OS, "unknown")?;
+    let sections = load_system_prompt(
+        cwd,
+        date,
+        env::consts::OS,
+        "unknown",
+        model_family_identity_for(model),
+    )?;
     let message = sections.join(
         "
 
@@ -4394,7 +4406,7 @@ impl LiveCli {
         allowed_tools: Option<AllowedToolSet>,
         permission_mode: PermissionMode,
     ) -> Result<Self, Box<dyn std::error::Error>> {
-        let system_prompt = build_system_prompt()?;
+        let system_prompt = build_system_prompt(&model)?;
         let session_state = new_cli_session()?;
         let session = create_managed_session_handle(&session_state.session_id)?;
         let runtime = build_runtime(
@@ -4530,6 +4542,10 @@ impl LiveCli {
                     TerminalRenderer::new().color_theme(),
                     &mut stdout,
                 )?;
+                let final_text = final_assistant_text(&summary);
+                if !final_text.is_empty() {
+                    println!("{final_text}");
+                }
                 println!();
                 if let Some(event) = summary.auto_compaction {
                     println!(
@@ -7005,6 +7021,7 @@ fn render_export_text(session: &Session) -> String {
         for block in &message.blocks {
             match block {
                 ContentBlock::Text { text } => lines.push(text.clone()),
+                ContentBlock::Thinking { .. } => {}
                 ContentBlock::ToolUse { id, name, input } => {
                     lines.push(format!("[tool_use id={id} name={name}] {input}"));
                 }
@@ -7191,6 +7208,7 @@ fn render_session_markdown(session: &Session, session_id: &str, session_path: &P
                         lines.push(String::new());
                     }
                 }
+                ContentBlock::Thinking { .. } => {}
                 ContentBlock::ToolUse { id, name, input } => {
                     lines.push(format!(
                         "**Tool call** `{name}` _(id `{}`)_",
@@ -7244,12 +7262,13 @@ fn short_tool_id(id: &str) -> String {
     format!("{prefix}…")
 }
 
-fn build_system_prompt() -> Result<Vec<String>, Box<dyn std::error::Error>> {
+fn build_system_prompt(model: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
     Ok(load_system_prompt(
         env::current_dir()?,
         DEFAULT_DATE,
         env::consts::OS,
         "unknown",
+        model_family_identity_for(model),
     )?)
 }
 
@@ -9211,26 +9230,29 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec<InputMessage> {
             let content = message
                 .blocks
                 .iter()
-                .map(|block| match block {
-                    ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() },
-                    ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse {
+                .filter_map(|block| match block {
+                    ContentBlock::Text { text } => {
+                        Some(InputContentBlock::Text { text: text.clone() })
+                    }
+                    ContentBlock::Thinking { .. } => None,
+                    ContentBlock::ToolUse { id, name, input } => Some(InputContentBlock::ToolUse {
                         id: id.clone(),
                         name: name.clone(),
                         input: serde_json::from_str(input)
                             .unwrap_or_else(|_| serde_json::json!({ "raw": input })),
-                    },
+                    }),
                     ContentBlock::ToolResult {
                         tool_use_id,
                         output,
                         is_error,
                         ..
-                    } => InputContentBlock::ToolResult {
+                    } => Some(InputContentBlock::ToolResult {
                         tool_use_id: tool_use_id.clone(),
                         content: vec![ToolResultContentBlock::Text {
                             text: output.clone(),
                         }],
                         is_error: *is_error,
-                    },
+                    }),
                 })
                 .collect::<Vec<_>>();
             (!content.is_empty()).then(|| InputMessage {
@@ -9628,7 +9650,9 @@ mod tests {
             "{rendered}"
         );
         assert!(
-            rendered.contains("Detail           Input tokens exceed the configured limit of 922000 tokens."),
+            rendered.contains(
+                "Detail           Input tokens exceed the configured limit of 922000 tokens."
+            ),
             "{rendered}"
         );
         assert!(rendered.contains("Compact          /compact"), "{rendered}");
@@ -10264,6 +10288,7 @@ mod tests {
 
     #[test]
     fn parses_system_prompt_options() {
+        // given: system-prompt options for cwd and date
         let args = vec![
             "system-prompt".to_string(),
             "--cwd".to_string(),
@@ -10271,16 +10296,43 @@ mod tests {
             "--date".to_string(),
             "2026-04-01".to_string(),
         ];
+
+        // when: parsing the direct system-prompt command
+        let action = parse_args(&args).expect("args should parse");
+
+        // then: the action carries prompt options and default model
         assert_eq!(
-            parse_args(&args).expect("args should parse"),
+            action,
             CliAction::PrintSystemPrompt {
                 cwd: PathBuf::from("/tmp/project"),
                 date: "2026-04-01".to_string(),
+                model: DEFAULT_MODEL.to_string(),
                 output_format: CliOutputFormat::Text,
             }
         );
     }
 
+    #[test]
+    fn parses_global_model_for_system_prompt() {
+        // given: a global OpenAI-compatible model before system-prompt
+        let args = vec![
+            "--model".to_string(),
+            "openai/gpt-4.1-mini".to_string(),
+            "system-prompt".to_string(),
+        ];
+
+        // when: parsing the CLI arguments
+        let action = parse_args(&args).expect("args should parse");
+
+        // then: the system-prompt action carries the selected model
+        match action {
+            CliAction::PrintSystemPrompt { model, .. } => {
+                assert_eq!(model, "openai/gpt-4.1-mini");
+            }
+            other => panic!("expected PrintSystemPrompt, got {other:?}"),
+        }
+    }
+
     #[test]
     fn removed_login_and_logout_subcommands_error_helpfully() {
         let login = parse_args(&["login".to_string()]).expect_err("login should be removed");
diff --git a/rust/crates/rusty-claude-cli/tests/compact_output.rs b/rust/crates/rusty-claude-cli/tests/compact_output.rs
index 8e751c0c..4ccca2f4 100644
--- a/rust/crates/rusty-claude-cli/tests/compact_output.rs
+++ b/rust/crates/rusty-claude-cli/tests/compact_output.rs
@@ -126,6 +126,66 @@ fn compact_flag_streaming_text_only_emits_final_message_text() {
     fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed");
 }
 
+#[test]
+fn text_prompt_mode_prints_final_assistant_text_after_spinner() {
+    // given a workspace pointed at the mock Anthropic service running the
+    // streaming_text scenario which only emits a single assistant text block
+    let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build");
+    let server = runtime
+        .block_on(MockAnthropicService::spawn())
+        .expect("mock service should start");
+    let base_url = server.base_url();
+
+    let workspace = unique_temp_dir("text-prompt-mode");
+    let config_home = workspace.join("config-home");
+    let home = workspace.join("home");
+    fs::create_dir_all(&workspace).expect("workspace should exist");
+    fs::create_dir_all(&config_home).expect("config home should exist");
+    fs::create_dir_all(&home).expect("home should exist");
+
+    // when we invoke claw in normal text prompt mode for the streaming text scenario
+    let prompt = format!("{SCENARIO_PREFIX}streaming_text");
+    let output = run_claw(
+        &workspace,
+        &config_home,
+        &home,
+        &base_url,
+        &[
+            "--model",
+            "sonnet",
+            "--permission-mode",
+            "read-only",
+            &prompt,
+        ],
+    );
+
+    // then stdout should contain the final assistant text, not just spinner output
+    assert!(
+        output.status.success(),
+        "text prompt run should succeed\nstdout:\n{}\n\nstderr:\n{}",
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr),
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8");
+    let plain_stdout = strip_ansi_codes(&stdout);
+    assert!(
+        plain_stdout.contains("Mock streaming says hello from the parity harness."),
+        "text prompt stdout should include the assistant text ({stdout:?})"
+    );
+    assert!(
+        plain_stdout.contains("✔ ✨ Done"),
+        "text prompt stdout should still include spinner completion ({stdout:?})"
+    );
+    assert!(
+        plain_stdout
+            .lines()
+            .any(|line| line == "Mock streaming says hello from the parity harness."),
+        "text prompt stdout should print the assistant text as its own line ({stdout:?})"
+    );
+
+    fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed");
+}
+
 #[test]
 fn compact_flag_with_json_output_emits_structured_json() {
     let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build");
@@ -215,3 +275,21 @@ fn unique_temp_dir(label: &str) -> PathBuf {
         std::process::id()
     ))
 }
+
+fn strip_ansi_codes(input: &str) -> String {
+    let mut output = String::with_capacity(input.len());
+    let mut chars = input.chars().peekable();
+    while let Some(ch) = chars.next() {
+        if ch == '\u{1b}' && matches!(chars.peek(), Some('[')) {
+            chars.next();
+            while let Some(next) = chars.next() {
+                if ('@'..='~').contains(&next) {
+                    break;
+                }
+            }
+            continue;
+        }
+        output.push(ch);
+    }
+    output
+}
diff --git a/rust/crates/rusty-claude-cli/tests/compact_repl_panic.rs b/rust/crates/rusty-claude-cli/tests/compact_repl_panic.rs
new file mode 100644
index 00000000..e930cf43
--- /dev/null
+++ b/rust/crates/rusty-claude-cli/tests/compact_repl_panic.rs
@@ -0,0 +1,138 @@
+use std::fs;
+use std::io::Write;
+use std::path::PathBuf;
+use std::process::{Command, Output, Stdio};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::{SystemTime, UNIX_EPOCH};
+
+static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+#[test]
+fn compact_slash_command_in_repl_does_not_start_nested_tokio_runtime() {
+    // given
+    let workspace = unique_temp_dir("compact-repl-panic");
+    let config_home = workspace.join("config-home");
+    let home = workspace.join("home");
+    fs::create_dir_all(&workspace).expect("workspace should exist");
+    fs::create_dir_all(&config_home).expect("config home should exist");
+    fs::create_dir_all(&home).expect("home should exist");
+
+    // when
+    let output = run_claw_repl(&workspace, &config_home, &home, "/compact\n/exit\n");
+
+    // then
+    assert!(
+        output.status.success(),
+        "compact repl run should succeed\nstdout:\n{}\n\nstderr:\n{}",
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr),
+    );
+    let stderr = String::from_utf8(output.stderr).expect("stderr should be utf8");
+    assert!(
+        !stderr.contains("Cannot start a runtime"),
+        "stderr must not contain nested runtime panic: {stderr:?}"
+    );
+    assert!(
+        !stderr.contains("panicked at"),
+        "stderr must not contain panic output: {stderr:?}"
+    );
+
+    let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8");
+    let plain_stdout = strip_ansi_codes(&stdout);
+    assert!(
+        plain_stdout.contains("Compaction skipped")
+            || plain_stdout.contains("Result           skipped")
+            || plain_stdout.contains("Result           compacted"),
+        "stdout should contain compact report output ({stdout:?})"
+    );
+
+    fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed");
+}
+
+fn run_claw_repl(
+    cwd: &std::path::Path,
+    config_home: &std::path::Path,
+    home: &std::path::Path,
+    stdin: &str,
+) -> Output {
+    let mut command = python_pty_command(env!("CARGO_BIN_EXE_claw"));
+    let mut child = command
+        .current_dir(cwd)
+        .env_clear()
+        .env("ANTHROPIC_API_KEY", "test-compact-repl-key")
+        .env("CLAW_CONFIG_HOME", config_home)
+        .env("HOME", home)
+        .env("NO_COLOR", "1")
+        .env("PATH", "/usr/bin:/bin")
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .expect("claw should launch");
+
+    child
+        .stdin
+        .as_mut()
+        .expect("stdin should be piped")
+        .write_all(stdin.as_bytes())
+        .expect("stdin should write");
+
+    child.wait_with_output().expect("claw should finish")
+}
+
+fn python_pty_command(claw: &str) -> Command {
+    let mut command = Command::new("python3");
+    command.args([
+        "-c",
+        r#"
+import os
+import pty
+import subprocess
+import sys
+
+claw = sys.argv[1]
+payload = sys.stdin.buffer.read()
+master, slave = pty.openpty()
+child = subprocess.Popen([claw], stdin=slave, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+os.close(slave)
+os.write(master, payload)
+stdout, stderr = child.communicate(timeout=30)
+os.close(master)
+sys.stdout.buffer.write(stdout)
+sys.stderr.buffer.write(stderr)
+raise SystemExit(child.returncode)
+"#,
+        claw,
+    ]);
+    command
+}
+
+fn unique_temp_dir(label: &str) -> PathBuf {
+    let millis = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .expect("clock should be after epoch")
+        .as_millis();
+    let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+    std::env::temp_dir().join(format!(
+        "claw-{label}-{}-{millis}-{counter}",
+        std::process::id()
+    ))
+}
+
+fn strip_ansi_codes(input: &str) -> String {
+    let mut output = String::with_capacity(input.len());
+    let mut chars = input.chars().peekable();
+    while let Some(ch) = chars.next() {
+        if ch == '\u{1b}' && matches!(chars.peek(), Some('[')) {
+            chars.next();
+            for next in chars.by_ref() {
+                if ('@'..='~').contains(&next) {
+                    break;
+                }
+            }
+            continue;
+        }
+        output.push(ch);
+    }
+    output
+}
diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs
index f3d1849a..9e669f5e 100644
--- a/rust/crates/tools/src/lib.rs
+++ b/rust/crates/tools/src/lib.rs
@@ -4,9 +4,10 @@ use std::process::Command;
 use std::time::{Duration, Instant};
 
 use api::{
-    max_tokens_for_model, resolve_model_alias, ApiError, ContentBlockDelta, InputContentBlock,
-    InputMessage, MessageRequest, MessageResponse, OutputContentBlock, ProviderClient,
-    StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, ToolResultContentBlock,
+    max_tokens_for_model, model_family_identity_for, resolve_model_alias, ApiError,
+    ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest, MessageResponse,
+    OutputContentBlock, ProviderClient, StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition,
+    ToolResultContentBlock,
 };
 use plugins::PluginTool;
 use reqwest::blocking::Client;
@@ -3075,27 +3076,33 @@ fn extract_quoted_value(input: &str) -> Option<(String, &str)> {
 }
 
 fn decode_duckduckgo_redirect(url: &str) -> Option<String> {
-    if url.starts_with("http://") || url.starts_with("https://") {
-        return Some(html_entity_decode_url(url));
-    }
-
-    let joined = if url.starts_with("//") {
-        format!("https:{url}")
-    } else if url.starts_with('/') {
-        format!("https://duckduckgo.com{url}")
+    let decoded = html_entity_decode_url(url);
+    let parsed = if decoded.starts_with("http://") || decoded.starts_with("https://") {
+        reqwest::Url::parse(&decoded).ok()
+    } else if decoded.starts_with("//") {
+        reqwest::Url::parse(&format!("https:{decoded}")).ok()
+    } else if decoded.starts_with('/') {
+        reqwest::Url::parse(&format!("https://duckduckgo.com{decoded}")).ok()
     } else {
         return None;
-    };
+    }?;
 
-    let parsed = reqwest::Url::parse(&joined).ok()?;
-    if parsed.path() == "/l/" || parsed.path() == "/l" {
+    let host = parsed.host_str().unwrap_or_default().to_ascii_lowercase();
+    if (host == "duckduckgo.com" || host.ends_with(".duckduckgo.com"))
+        && (parsed.path() == "/l/" || parsed.path() == "/l")
+    {
         for (key, value) in parsed.query_pairs() {
             if key == "uddg" {
                 return Some(html_entity_decode_url(value.as_ref()));
             }
         }
     }
-    Some(joined)
+
+    if decoded.starts_with("http://") || decoded.starts_with("https://") {
+        Some(decoded)
+    } else {
+        Some(parsed.to_string())
+    }
 }
 
 fn html_entity_decode_url(url: &str) -> String {
@@ -3510,7 +3517,7 @@ where
         .filter(|name| !name.is_empty())
         .unwrap_or_else(|| slugify_agent_name(&input.description));
     let created_at = iso8601_now();
-    let system_prompt = build_agent_system_prompt(&normalized_subagent_type)?;
+    let system_prompt = build_agent_system_prompt(&normalized_subagent_type, &model)?;
     let allowed_tools = allowed_tools_for_subagent(&normalized_subagent_type);
 
     let output_contents = format!(
@@ -3623,13 +3630,14 @@ fn build_agent_runtime(
     ))
 }
 
-fn build_agent_system_prompt(subagent_type: &str) -> Result<Vec<String>, String> {
+fn build_agent_system_prompt(subagent_type: &str, model: &str) -> Result<Vec<String>, String> {
     let cwd = std::env::current_dir().map_err(|error| error.to_string())?;
     let mut prompt = load_system_prompt(
         cwd,
         DEFAULT_AGENT_SYSTEM_DATE.to_string(),
         std::env::consts::OS,
         "unknown",
+        model_family_identity_for(model),
     )
     .map_err(|error| error.to_string())?;
     prompt.push(format!(
@@ -4759,6 +4767,9 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec<InputMessage> {
                 .iter()
                 .map(|block| match block {
                     ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() },
+                    ContentBlock::Thinking { .. } => InputContentBlock::Text {
+                        text: String::new(),
+                    },
                     ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse {
                         id: id.clone(),
                         name: name.clone(),
@@ -4778,6 +4789,9 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec<InputMessage> {
                         is_error: *is_error,
                     },
                 })
+                .filter(
+                    |block| !matches!(block, InputContentBlock::Text { text } if text.is_empty()),
+                )
                 .collect::<Vec<_>>();
             (!content.is_empty()).then(|| InputMessage {
                 role: role.to_string(),
@@ -6134,12 +6148,13 @@ mod tests {
     use std::time::Duration;
 
     use super::{
-        agent_permission_policy, allowed_tools_for_subagent, classify_lane_failure,
-        derive_agent_state, execute_agent_with_spawn, execute_tool, extract_recovery_outcome,
-        final_assistant_text, global_cron_registry, maybe_commit_provenance, mvp_tool_specs,
-        permission_mode_from_plugin, persist_agent_terminal_state, push_output_block,
-        run_task_packet, AgentInput, AgentJob, GlobalToolRegistry, LaneEventName, LaneFailureClass,
-        ProviderRuntimeClient, SubagentToolExecutor,
+        agent_permission_policy, allowed_tools_for_subagent, build_agent_system_prompt,
+        classify_lane_failure, derive_agent_state, execute_agent_with_spawn, execute_tool,
+        extract_recovery_outcome, final_assistant_text, global_cron_registry,
+        maybe_commit_provenance, mvp_tool_specs, permission_mode_from_plugin,
+        persist_agent_terminal_state, push_output_block, run_task_packet, AgentInput, AgentJob,
+        GlobalToolRegistry, LaneEventName, LaneFailureClass, ProviderRuntimeClient,
+        SubagentToolExecutor,
     };
     use api::OutputContentBlock;
     use runtime::ProviderFallbackConfig;
@@ -7148,6 +7163,98 @@ mod tests {
         assert!(error.contains("relative URL without a base") || error.contains("empty host"));
     }
 
+    #[test]
+    fn web_search_decodes_absolute_duckduckgo_redirect_urls() {
+        // given
+        let _guard = env_lock()
+            .lock()
+            .unwrap_or_else(std::sync::PoisonError::into_inner);
+        let server = TestServer::spawn(Arc::new(|request_line: &str| {
+            assert!(request_line.contains("GET /search?q=duckduckgo+redirects "));
+            HttpResponse::html(
+                200,
+                "OK",
+                r#"
+                <html><body>
+                  <a rel="nofollow" class="result__a" href="https://duckduckgo.com/l/?uddg=https%3A%2F%2Fdocs.rs%2Freqwest&amp;rut=abc">Reqwest docs</a>
+                </body></html>
+                "#,
+            )
+        }));
+
+        // when
+        std::env::set_var(
+            "CLAWD_WEB_SEARCH_BASE_URL",
+            format!("http://{}/search", server.addr()),
+        );
+        let result = execute_tool(
+            "WebSearch",
+            &json!({
+                "query": "duckduckgo redirects"
+            }),
+        )
+        .expect("WebSearch should succeed");
+        std::env::remove_var("CLAWD_WEB_SEARCH_BASE_URL");
+
+        // then
+        let output: serde_json::Value = serde_json::from_str(&result).expect("valid json");
+        let results = output["results"].as_array().expect("results array");
+        let search_result = results
+            .iter()
+            .find(|item| item.get("content").is_some())
+            .expect("search result block present");
+        let content = search_result["content"].as_array().expect("content array");
+        assert_eq!(content.len(), 1);
+        assert_eq!(content[0]["title"], "Reqwest docs");
+        assert_eq!(content[0]["url"], "https://docs.rs/reqwest");
+    }
+
+    #[test]
+    fn web_search_decodes_protocol_relative_duckduckgo_redirect_urls() {
+        // given
+        let _guard = env_lock()
+            .lock()
+            .unwrap_or_else(std::sync::PoisonError::into_inner);
+        let server = TestServer::spawn(Arc::new(|request_line: &str| {
+            assert!(request_line.contains("GET /search?q=duckduckgo+protocol+relative "));
+            HttpResponse::html(
+                200,
+                "OK",
+                r#"
+                <html><body>
+                  <a rel="nofollow" class="result__a" href="//duckduckgo.com/l/?uddg=https%3A%2F%2Fdocs.rs%2Ftokio&amp;rut=xyz">Tokio Docs</a>
+                </body></html>
+                "#,
+            )
+        }));
+
+        // when
+        std::env::set_var(
+            "CLAWD_WEB_SEARCH_BASE_URL",
+            format!("http://{}/search", server.addr()),
+        );
+        let result = execute_tool(
+            "WebSearch",
+            &json!({
+                "query": "duckduckgo protocol relative"
+            }),
+        )
+        .expect("WebSearch should succeed");
+        std::env::remove_var("CLAWD_WEB_SEARCH_BASE_URL");
+
+        // then
+        let output: serde_json::Value = serde_json::from_str(&result).expect("valid json");
+        let results = output["results"].as_array().expect("results array");
+        let search_result = results
+            .iter()
+            .find(|item| item.get("content").is_some())
+            .expect("search result block present");
+        let content = search_result["content"].as_array().expect("content array");
+        assert_eq!(content.len(), 1);
+        assert_eq!(content[0]["title"], "Tokio Docs");
+        assert_eq!(content[0]["url"], "https://docs.rs/tokio");
+    }
+
     #[test]
     fn pending_tools_preserve_multiple_streaming_tool_calls_by_index() {
         let mut events = Vec::new();
@@ -8409,6 +8516,28 @@ mod tests {
         assert!(!verification.contains("write_file"));
     }
 
+    #[test]
+    fn subagent_system_prompt_uses_resolved_model_identity() {
+        // given: a temporary workspace and an OpenAI-compatible subagent model
+        let _guard = env_guard();
+        let root = temp_path("subagent-prompt-identity");
+        fs::create_dir_all(&root).expect("create temp workspace");
+        let previous = std::env::current_dir().expect("current dir");
+        std::env::set_current_dir(&root).expect("enter temp workspace");
+
+        // when: building the subagent system prompt
+        let prompt = build_agent_system_prompt("Explore", "openai/gpt-4.1-mini")
+            .expect("subagent system prompt should build")
+            .join("\n");
+        std::env::set_current_dir(previous).expect("restore current dir");
+
+        // then: the prompt renders a generic model family identity
+        assert!(prompt.contains("Model family: an AI assistant"));
+        assert!(!prompt.contains("Model family: Claude Opus 4.6"));
+
+        fs::remove_dir_all(root).expect("cleanup temp workspace");
+    }
+
     #[derive(Debug)]
     struct MockSubagentApiClient {
         calls: usize,