From 75c08bc982c927d6d986edb552d7349d55e3b59e Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Wed, 6 May 2026 15:32:34 +0900 Subject: [PATCH] fix: REPL display, /compact panic, identity leak, DeepSeek reasoning, thinking blocks Five interrelated fixes from parallel Hephaestus sessions: 1. fix(repl): display assistant text after spinner (#2981, #2982, #2937) - Added final_assistant_text() call after run_turn spinner completes - REPL now shows response text like run_prompt_json does 2. fix(compact): handle Thinking content blocks (#2985) - Added ContentBlock::Thinking variant throughout compact summarizer - Prevents panic when /compact encounters thinking blocks 3. fix(prompt): provider-aware model identity (#2822) - New ModelFamilyIdentity enum (Claude vs Generic) - Non-Anthropic models no longer say 'I am Claude' - model_family_identity_for() detects provider and sets identity 4. fix(openai): preserve DeepSeek reasoning_content (#2821) - Stream parser now captures reasoning_content from OpenAI-compat - Emits ThinkingDelta/SignatureDelta events for reasoning models - Thinking blocks included in conversation history for re-send 5. feat(runtime): Thinking block support across codebase - AssistantEvent::Thinking variant in conversation.rs - ContentBlock::Thinking in session serialization - Thinking-aware compact summarization - Tests for thinking block ordering and content Closes #2981, #2982, #2937, #2985, #2822, #2821 --- rust/crates/api/src/lib.rs | 5 +- rust/crates/api/src/providers/mod.rs | 53 ++- .../crates/api/src/providers/openai_compat.rs | 338 ++++++++++++++++-- rust/crates/api/src/types.rs | 37 +- .../api/tests/openai_compat_integration.rs | 44 +++ rust/crates/mock-anthropic-service/src/lib.rs | 1 + rust/crates/runtime/src/compact.rs | 10 + rust/crates/runtime/src/conversation.rs | 55 +++ rust/crates/runtime/src/lib.rs | 4 +- rust/crates/runtime/src/prompt.rs | 93 ++++- rust/crates/runtime/src/session.rs | 57 +++ rust/crates/rusty-claude-cli/src/main.rs | 86 ++++- .../rusty-claude-cli/tests/compact_output.rs | 78 ++++ .../tests/compact_repl_panic.rs | 138 +++++++ rust/crates/tools/src/lib.rs | 175 +++++++-- 15 files changed, 1099 insertions(+), 75 deletions(-) create mode 100644 rust/crates/rusty-claude-cli/tests/compact_repl_panic.rs diff --git a/rust/crates/api/src/lib.rs b/rust/crates/api/src/lib.rs index 40da29f1..d55b211b 100644 --- a/rust/crates/api/src/lib.rs +++ b/rust/crates/api/src/lib.rs @@ -21,11 +21,12 @@ pub use prompt_cache::{ pub use providers::anthropic::{AnthropicClient, AnthropicClient as ApiClient, AuthSource}; pub use providers::openai_compat::{ build_chat_completion_request, flatten_tool_result_content, is_reasoning_model, - model_rejects_is_error_field, translate_message, OpenAiCompatClient, OpenAiCompatConfig, + model_rejects_is_error_field, model_requires_reasoning_content_in_history, translate_message, + OpenAiCompatClient, OpenAiCompatConfig, }; pub use providers::{ detect_provider_kind, max_tokens_for_model, max_tokens_for_model_with_override, - resolve_model_alias, ProviderKind, + model_family_identity_for, model_family_identity_for_kind, resolve_model_alias, ProviderKind, }; pub use sse::{parse_frame, SseParser}; pub use types::{ diff --git a/rust/crates/api/src/providers/mod.rs b/rust/crates/api/src/providers/mod.rs index 0ef663f1..9c50eb7a 100644 --- a/rust/crates/api/src/providers/mod.rs +++ b/rust/crates/api/src/providers/mod.rs @@ -250,6 +250,19 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind { ProviderKind::Anthropic } +#[must_use] +pub const fn model_family_identity_for_kind(kind: ProviderKind) -> runtime::ModelFamilyIdentity { + match kind { + ProviderKind::Anthropic => runtime::ModelFamilyIdentity::Claude, + ProviderKind::Xai | ProviderKind::OpenAi => runtime::ModelFamilyIdentity::Generic, + } +} + +#[must_use] +pub fn model_family_identity_for(model: &str) -> runtime::ModelFamilyIdentity { + model_family_identity_for_kind(detect_provider_kind(model)) +} + #[must_use] pub fn max_tokens_for_model(model: &str) -> u32 { let canonical = resolve_model_alias(model); @@ -484,8 +497,8 @@ mod tests { use super::{ anthropic_missing_credentials, anthropic_missing_credentials_hint, detect_provider_kind, load_dotenv_file, max_tokens_for_model, max_tokens_for_model_with_override, - model_token_limit, parse_dotenv, preflight_message_request, resolve_model_alias, - ProviderKind, + model_family_identity_for, model_family_identity_for_kind, model_token_limit, parse_dotenv, + preflight_message_request, resolve_model_alias, ProviderKind, }; /// Serializes every test in this module that mutates process-wide @@ -544,6 +557,42 @@ mod tests { ); } + #[test] + fn maps_provider_kind_to_model_family_identity() { + // given: each supported provider kind + let anthropic = ProviderKind::Anthropic; + let openai = ProviderKind::OpenAi; + let xai = ProviderKind::Xai; + + // when: converting provider kinds to prompt model family identities + let anthropic_identity = model_family_identity_for_kind(anthropic); + let openai_identity = model_family_identity_for_kind(openai); + let xai_identity = model_family_identity_for_kind(xai); + + // then: Anthropic stays Claude and OpenAI-compatible providers are generic + assert_eq!(anthropic_identity, runtime::ModelFamilyIdentity::Claude); + assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic); + assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic); + } + + #[test] + fn maps_model_name_to_model_family_identity() { + // given: Anthropic, OpenAI-compatible, and xAI model names + let claude_model = "claude-opus-4-6"; + let openai_model = "openai/gpt-4.1-mini"; + let xai_model = "grok-3"; + + // when: detecting prompt model family identities from model names + let claude_identity = model_family_identity_for(claude_model); + let openai_identity = model_family_identity_for(openai_model); + let xai_identity = model_family_identity_for(xai_model); + + // then: Anthropic stays Claude and OpenAI-compatible providers are generic + assert_eq!(claude_identity, runtime::ModelFamilyIdentity::Claude); + assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic); + assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic); + } + #[test] fn openai_namespaced_model_routes_to_openai_not_anthropic() { // Regression: "openai/gpt-4.1-mini" was misrouted to Anthropic when diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index a810502e..b3800d6a 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -443,6 +443,8 @@ struct StreamState { stop_reason: Option, usage: Option, tool_calls: BTreeMap, + thinking_started: bool, + thinking_finished: bool, } impl StreamState { @@ -456,6 +458,8 @@ impl StreamState { stop_reason: None, usage: None, tool_calls: BTreeMap::new(), + thinking_started: false, + thinking_finished: false, } } @@ -493,35 +497,61 @@ impl StreamState { } for choice in chunk.choices { + if let Some(reasoning) = choice + .delta + .reasoning_content + .filter(|value| !value.is_empty()) + { + if !self.thinking_started { + self.thinking_started = true; + events.push(StreamEvent::ContentBlockStart(ContentBlockStartEvent { + index: 0, + content_block: OutputContentBlock::Thinking { + thinking: String::new(), + signature: None, + }, + })); + } + events.push(StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent { + index: 0, + delta: ContentBlockDelta::ThinkingDelta { + thinking: reasoning, + }, + })); + } + if let Some(content) = choice.delta.content.filter(|value| !value.is_empty()) { + self.close_thinking(&mut events); if !self.text_started { self.text_started = true; events.push(StreamEvent::ContentBlockStart(ContentBlockStartEvent { - index: 0, + index: self.text_block_index(), content_block: OutputContentBlock::Text { text: String::new(), }, })); } events.push(StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent { - index: 0, + index: self.text_block_index(), delta: ContentBlockDelta::TextDelta { text: content }, })); } for tool_call in choice.delta.tool_calls { + self.close_thinking(&mut events); + let tool_index_offset = self.tool_index_offset(); let state = self.tool_calls.entry(tool_call.index).or_default(); state.apply(tool_call); - let block_index = state.block_index(); + let block_index = state.block_index(tool_index_offset); if !state.started { - if let Some(start_event) = state.start_event()? { + if let Some(start_event) = state.start_event(tool_index_offset)? { state.started = true; events.push(StreamEvent::ContentBlockStart(start_event)); } else { continue; } } - if let Some(delta_event) = state.delta_event() { + if let Some(delta_event) = state.delta_event(tool_index_offset) { events.push(StreamEvent::ContentBlockDelta(delta_event)); } if choice.finish_reason.as_deref() == Some("tool_calls") && !state.stopped { @@ -535,11 +565,12 @@ impl StreamState { if let Some(finish_reason) = choice.finish_reason { self.stop_reason = Some(normalize_finish_reason(&finish_reason)); if finish_reason == "tool_calls" { + let tool_index_offset = self.tool_index_offset(); for state in self.tool_calls.values_mut() { if state.started && !state.stopped { state.stopped = true; events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent { - index: state.block_index(), + index: state.block_index(tool_index_offset), })); } } @@ -557,19 +588,21 @@ impl StreamState { self.finished = true; let mut events = Vec::new(); + self.close_thinking(&mut events); if self.text_started && !self.text_finished { self.text_finished = true; events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent { - index: 0, + index: self.text_block_index(), })); } + let tool_index_offset = self.tool_index_offset(); for state in self.tool_calls.values_mut() { if !state.started { - if let Some(start_event) = state.start_event()? { + if let Some(start_event) = state.start_event(tool_index_offset)? { state.started = true; events.push(StreamEvent::ContentBlockStart(start_event)); - if let Some(delta_event) = state.delta_event() { + if let Some(delta_event) = state.delta_event(tool_index_offset) { events.push(StreamEvent::ContentBlockDelta(delta_event)); } } @@ -577,7 +610,7 @@ impl StreamState { if state.started && !state.stopped { state.stopped = true; events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent { - index: state.block_index(), + index: state.block_index(tool_index_offset), })); } } @@ -603,6 +636,31 @@ impl StreamState { } Ok(events) } + + fn close_thinking(&mut self, events: &mut Vec) { + if self.thinking_started && !self.thinking_finished { + self.thinking_finished = true; + events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent { + index: 0, + })); + } + } + + const fn text_block_index(&self) -> u32 { + if self.thinking_started { + 1 + } else { + 0 + } + } + + const fn tool_index_offset(&self) -> u32 { + if self.thinking_started { + 2 + } else { + 1 + } + } } #[derive(Debug, Default)] @@ -630,12 +688,12 @@ impl ToolCallState { } } - const fn block_index(&self) -> u32 { - self.openai_index + 1 + const fn block_index(&self, offset: u32) -> u32 { + self.openai_index + offset } #[allow(clippy::unnecessary_wraps)] - fn start_event(&self) -> Result, ApiError> { + fn start_event(&self, offset: u32) -> Result, ApiError> { let Some(name) = self.name.clone() else { return Ok(None); }; @@ -644,7 +702,7 @@ impl ToolCallState { .clone() .unwrap_or_else(|| format!("tool_call_{}", self.openai_index)); Ok(Some(ContentBlockStartEvent { - index: self.block_index(), + index: self.block_index(offset), content_block: OutputContentBlock::ToolUse { id, name, @@ -653,14 +711,14 @@ impl ToolCallState { })) } - fn delta_event(&mut self) -> Option { + fn delta_event(&mut self, offset: u32) -> Option { if self.emitted_len >= self.arguments.len() { return None; } let delta = self.arguments[self.emitted_len..].to_string(); self.emitted_len = self.arguments.len(); Some(ContentBlockDeltaEvent { - index: self.block_index(), + index: self.block_index(offset), delta: ContentBlockDelta::InputJsonDelta { partial_json: delta, }, @@ -690,6 +748,8 @@ struct ChatMessage { #[serde(default)] content: Option, #[serde(default)] + reasoning_content: Option, + #[serde(default)] tool_calls: Vec, } @@ -735,6 +795,8 @@ struct ChunkChoice { struct ChunkDelta { #[serde(default)] content: Option, + #[serde(default)] + reasoning_content: Option, #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")] tool_calls: Vec, } @@ -793,6 +855,15 @@ pub fn is_reasoning_model(model: &str) -> bool { || canonical.contains("thinking") } +/// Returns true for OpenAI-compatible DeepSeek V4 models that require prior +/// assistant reasoning to be echoed back as `reasoning_content` in history. +#[must_use] +pub fn model_requires_reasoning_content_in_history(model: &str) -> bool { + let lowered = model.to_ascii_lowercase(); + let canonical = lowered.rsplit('/').next().unwrap_or(lowered.as_str()); + canonical.starts_with("deepseek-v4") +} + /// Strip routing prefix (e.g., "openai/gpt-4" → "gpt-4") for the wire. /// The prefix is used only to select transport; the backend expects the /// bare model id. @@ -948,10 +1019,14 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec { match message.role.as_str() { "assistant" => { let mut text = String::new(); + let mut reasoning = String::new(); let mut tool_calls = Vec::new(); for block in &message.content { match block { InputContentBlock::Text { text: value } => text.push_str(value), + InputContentBlock::Thinking { + thinking: value, .. + } => reasoning.push_str(value), InputContentBlock::ToolUse { id, name, input } => tool_calls.push(json!({ "id": id, "type": "function", @@ -963,13 +1038,18 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec { InputContentBlock::ToolResult { .. } => {} } } - if text.is_empty() && tool_calls.is_empty() { + let include_reasoning = + model_requires_reasoning_content_in_history(model) && !reasoning.is_empty(); + if text.is_empty() && tool_calls.is_empty() && !include_reasoning { Vec::new() } else { let mut msg = serde_json::json!({ "role": "assistant", "content": (!text.is_empty()).then_some(text), }); + if include_reasoning { + msg["reasoning_content"] = json!(reasoning); + } // Only include tool_calls when non-empty: some providers reject // assistant messages with an explicit empty tool_calls array. if !tool_calls.is_empty() { @@ -1003,6 +1083,7 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec { } Some(msg) } + InputContentBlock::Thinking { .. } => None, InputContentBlock::ToolUse { .. } => None, }) .collect(), @@ -1182,6 +1263,16 @@ fn normalize_response( "chat completion response missing choices", ))?; let mut content = Vec::new(); + if let Some(thinking) = choice + .message + .reasoning_content + .filter(|value| !value.is_empty()) + { + content.push(OutputContentBlock::Thinking { + thinking, + signature: None, + }); + } if let Some(text) = choice.message.content.filter(|value| !value.is_empty()) { content.push(OutputContentBlock::Text { text }); } @@ -1413,13 +1504,15 @@ impl StringExt for String { mod tests { use super::{ build_chat_completion_request, chat_completions_endpoint, is_reasoning_model, - normalize_finish_reason, openai_tool_choice, parse_tool_arguments, OpenAiCompatClient, - OpenAiCompatConfig, + model_requires_reasoning_content_in_history, normalize_finish_reason, normalize_response, + openai_tool_choice, parse_tool_arguments, OpenAiCompatClient, OpenAiCompatConfig, + StreamState, }; use crate::error::ApiError; use crate::types::{ - InputContentBlock, InputMessage, MessageRequest, ToolChoice, ToolDefinition, - ToolResultContentBlock, + ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent, + InputContentBlock, InputMessage, MessageRequest, OutputContentBlock, StreamEvent, + ToolChoice, ToolDefinition, ToolResultContentBlock, }; use serde_json::json; use std::sync::{Mutex, OnceLock}; @@ -1465,6 +1558,188 @@ mod tests { assert_eq!(payload["tool_choice"], json!("auto")); } + #[test] + fn model_requires_reasoning_content_in_history_detects_deepseek_v4_models() { + // Given DeepSeek V4 and non-V4 model names. + let positive = [ + "deepseek-v4-flash", + "deepseek-v4-pro", + "openai/deepseek-v4-pro", + "deepseek/deepseek-v4-flash", + ]; + let negative = [ + "deepseek-reasoner", + "deepseek-chat", + "gpt-4o", + "claude-sonnet-4-6", + ]; + + // When checking whether history reasoning_content is required. + // Then only DeepSeek V4 variants require it. + for model in positive { + assert!(model_requires_reasoning_content_in_history(model)); + } + for model in negative { + assert!(!model_requires_reasoning_content_in_history(model)); + } + } + + #[test] + fn legacy_deepseek_reasoner_request_omits_reasoning_content_for_assistant_history() { + // Given an assistant history turn containing thinking. + let request = assistant_history_with_thinking_request("deepseek-reasoner"); + + // When serializing for legacy deepseek-reasoner. + let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai()); + + // Then reasoning_content is omitted. + let assistant = &payload["messages"][0]; + assert_eq!(assistant["role"], json!("assistant")); + assert!(assistant.get("reasoning_content").is_none()); + } + + #[test] + fn deepseek_v4_pro_request_includes_reasoning_content_for_assistant_history() { + // Given an assistant history turn containing thinking. + let request = assistant_history_with_thinking_request("openai/deepseek-v4-pro"); + + // When serializing for DeepSeek V4 Pro. + let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai()); + + // Then reasoning_content is included on the assistant message. + let assistant = &payload["messages"][0]; + assert_eq!(assistant["reasoning_content"], json!("prior reasoning")); + assert_eq!(assistant["content"], json!("answer")); + } + + #[test] + fn deepseek_v4_flash_request_includes_reasoning_content_for_assistant_history() { + // Given an assistant history turn containing thinking. + let request = assistant_history_with_thinking_request("deepseek-v4-flash"); + + // When serializing for DeepSeek V4 Flash. + let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai()); + + // Then reasoning_content is included on the assistant message. + let assistant = &payload["messages"][0]; + assert_eq!(assistant["reasoning_content"], json!("prior reasoning")); + } + + #[test] + fn non_streaming_response_with_reasoning_content_emits_thinking_block_first() { + // Given a non-streaming OpenAI-compatible response with reasoning_content. + let response = super::ChatCompletionResponse { + id: "chatcmpl_reasoning".to_string(), + model: "deepseek-v4-pro".to_string(), + choices: vec![super::ChatChoice { + message: super::ChatMessage { + role: "assistant".to_string(), + content: Some("final answer".to_string()), + reasoning_content: Some("hidden thought".to_string()), + tool_calls: Vec::new(), + }, + finish_reason: Some("stop".to_string()), + }], + usage: None, + }; + + // When normalizing the provider response. + let normalized = normalize_response("deepseek-v4-pro", response).expect("normalized"); + + // Then Thinking is the first content block, before text. + assert_eq!( + normalized.content, + vec![ + OutputContentBlock::Thinking { + thinking: "hidden thought".to_string(), + signature: None, + }, + OutputContentBlock::Text { + text: "final answer".to_string(), + }, + ] + ); + } + + #[test] + fn streaming_chunks_with_reasoning_content_emit_thinking_block_events_before_text() { + // Given streaming chunks with reasoning_content followed by text. + let mut state = StreamState::new("deepseek-v4-pro".to_string()); + let mut events = state + .ingest_chunk(super::ChatCompletionChunk { + id: "chatcmpl_stream_reasoning".to_string(), + model: Some("deepseek-v4-pro".to_string()), + choices: vec![super::ChunkChoice { + delta: super::ChunkDelta { + content: None, + reasoning_content: Some("think".to_string()), + tool_calls: Vec::new(), + }, + finish_reason: None, + }], + usage: None, + }) + .expect("reasoning chunk"); + events.extend( + state + .ingest_chunk(super::ChatCompletionChunk { + id: "chatcmpl_stream_reasoning".to_string(), + model: None, + choices: vec![super::ChunkChoice { + delta: super::ChunkDelta { + content: Some(" answer".to_string()), + reasoning_content: None, + tool_calls: Vec::new(), + }, + finish_reason: Some("stop".to_string()), + }], + usage: None, + }) + .expect("text chunk"), + ); + events.extend(state.finish().expect("finish")); + + // When reading normalized stream events. + // Then Thinking starts at index 0, text is offset to index 1. + assert!(matches!(events[0], StreamEvent::MessageStart(_))); + assert!(matches!( + events[1], + StreamEvent::ContentBlockStart(ContentBlockStartEvent { + index: 0, + content_block: OutputContentBlock::Thinking { .. }, + }) + )); + assert!(matches!( + events[2], + StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent { + index: 0, + delta: ContentBlockDelta::ThinkingDelta { .. }, + }) + )); + assert!(matches!( + events[3], + StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 0 }) + )); + assert!(matches!( + events[4], + StreamEvent::ContentBlockStart(ContentBlockStartEvent { + index: 1, + content_block: OutputContentBlock::Text { .. }, + }) + )); + assert!(matches!( + events[5], + StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent { + index: 1, + delta: ContentBlockDelta::TextDelta { .. }, + }) + )); + assert!(matches!( + events[6], + StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 1 }) + )); + } + #[test] fn tool_schema_object_gets_strict_fields_for_responses_endpoint() { // OpenAI /responses endpoint rejects object schemas missing @@ -1624,6 +1899,27 @@ mod tests { ); } + fn assistant_history_with_thinking_request(model: &str) -> MessageRequest { + MessageRequest { + model: model.to_string(), + max_tokens: 100, + messages: vec![InputMessage { + role: "assistant".to_string(), + content: vec![ + InputContentBlock::Thinking { + thinking: "prior reasoning".to_string(), + signature: None, + }, + InputContentBlock::Text { + text: "answer".to_string(), + }, + ], + }], + stream: false, + ..Default::default() + } + } + fn env_lock() -> std::sync::MutexGuard<'static, ()> { static LOCK: OnceLock> = OnceLock::new(); LOCK.get_or_init(|| Mutex::new(())) diff --git a/rust/crates/api/src/types.rs b/rust/crates/api/src/types.rs index e136a766..0d41db19 100644 --- a/rust/crates/api/src/types.rs +++ b/rust/crates/api/src/types.rs @@ -81,6 +81,11 @@ pub enum InputContentBlock { Text { text: String, }, + Thinking { + thinking: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + signature: Option, + }, ToolUse { id: String, name: String, @@ -268,8 +273,9 @@ pub enum StreamEvent { #[cfg(test)] mod tests { use runtime::format_usd; + use serde_json::json; - use super::{MessageResponse, Usage}; + use super::{InputContentBlock, MessageResponse, Usage}; #[test] fn usage_total_tokens_includes_cache_tokens() { @@ -307,4 +313,33 @@ mod tests { assert_eq!(format_usd(cost.total_cost_usd()), "$54.6750"); assert_eq!(response.total_tokens(), 1_800_000); } + + #[test] + fn input_content_block_thinking_serializes_with_snake_case_type() { + // given + let block = InputContentBlock::Thinking { + thinking: "pondering".to_string(), + signature: Some("sig_123".to_string()), + }; + + // when + let serialized = serde_json::to_value(&block).unwrap(); + let deserialized: InputContentBlock = serde_json::from_value(json!({ + "type": "thinking", + "thinking": "pondering", + "signature": "sig_123" + })) + .unwrap(); + + // then + assert_eq!( + serialized, + json!({ + "type": "thinking", + "thinking": "pondering", + "signature": "sig_123" + }) + ); + assert_eq!(deserialized, block); + } } diff --git a/rust/crates/api/tests/openai_compat_integration.rs b/rust/crates/api/tests/openai_compat_integration.rs index d5596bb0..5db9eaf1 100644 --- a/rust/crates/api/tests/openai_compat_integration.rs +++ b/rust/crates/api/tests/openai_compat_integration.rs @@ -63,6 +63,50 @@ async fn send_message_uses_openai_compatible_endpoint_and_auth() { assert_eq!(body["tools"][0]["type"], json!("function")); } +#[tokio::test] +async fn send_message_preserves_deepseek_reasoning_content_before_text() { + let state = Arc::new(Mutex::new(Vec::::new())); + let body = concat!( + "{", + "\"id\":\"chatcmpl_deepseek_reasoning\",", + "\"model\":\"deepseek-v4-pro\",", + "\"choices\":[{", + "\"message\":{\"role\":\"assistant\",\"reasoning_content\":\"Think first\",\"content\":\"Answer second\",\"tool_calls\":[]},", + "\"finish_reason\":\"stop\"", + "}],", + "\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":5}", + "}" + ); + let server = spawn_server( + state.clone(), + vec![http_response("200 OK", "application/json", body)], + ) + .await; + + let client = OpenAiCompatClient::new("openai-test-key", OpenAiCompatConfig::openai()) + .with_base_url(server.base_url()); + let response = client + .send_message(&MessageRequest { + model: "openai/deepseek-v4-pro".to_string(), + ..sample_request(false) + }) + .await + .expect("request should succeed"); + + assert_eq!( + response.content, + vec![ + OutputContentBlock::Thinking { + thinking: "Think first".to_string(), + signature: None, + }, + OutputContentBlock::Text { + text: "Answer second".to_string(), + }, + ] + ); +} + #[tokio::test] async fn send_message_blocks_oversized_xai_requests_before_the_http_call() { let state = Arc::new(Mutex::new(Vec::::new())); diff --git a/rust/crates/mock-anthropic-service/src/lib.rs b/rust/crates/mock-anthropic-service/src/lib.rs index 68968eed..99623d18 100644 --- a/rust/crates/mock-anthropic-service/src/lib.rs +++ b/rust/crates/mock-anthropic-service/src/lib.rs @@ -248,6 +248,7 @@ fn detect_scenario(request: &MessageRequest) -> Option { .split_whitespace() .find_map(|token| token.strip_prefix(SCENARIO_PREFIX)) .and_then(Scenario::parse), + InputContentBlock::Thinking { .. } => None, _ => None, }) }) diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs index 3e805dda..e4fd3db0 100644 --- a/rust/crates/runtime/src/compact.rs +++ b/rust/crates/runtime/src/compact.rs @@ -213,6 +213,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String { ContentBlock::ToolUse { name, .. } => Some(name.as_str()), ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()), ContentBlock::Text { .. } => None, + ContentBlock::Thinking { .. } => None, }) .collect::>(); tool_names.sort_unstable(); @@ -317,6 +318,9 @@ fn merge_compact_summaries(existing_summary: Option<&str>, new_summary: &str) -> fn summarize_block(block: &ContentBlock) -> String { let raw = match block { ContentBlock::Text { text } => text.clone(), + ContentBlock::Thinking { thinking, .. } => { + format!("thinking ({} chars)", thinking.chars().count()) + } ContentBlock::ToolUse { name, input, .. } => format!("tool_use {name}({input})"), ContentBlock::ToolResult { tool_name, @@ -378,6 +382,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec { ContentBlock::Text { text } => text.as_str(), ContentBlock::ToolUse { input, .. } => input.as_str(), ContentBlock::ToolResult { output, .. } => output.as_str(), + ContentBlock::Thinking { thinking, .. } => thinking.as_str(), }) .flat_map(extract_file_candidates) .collect::>(); @@ -400,6 +405,7 @@ fn first_text_block(message: &ConversationMessage) -> Option<&str> { ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()), ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. } + | ContentBlock::Thinking { .. } | ContentBlock::Text { .. } => None, }) } @@ -450,6 +456,10 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize { ContentBlock::ToolResult { tool_name, output, .. } => (tool_name.len() + output.len()) / 4 + 1, + ContentBlock::Thinking { + thinking, + signature, + } => thinking.len() / 4 + signature.as_ref().map_or(0, |value| value.len() / 4 + 1), }) .sum() } diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs index 610ba1a8..35c0f73f 100644 --- a/rust/crates/runtime/src/conversation.rs +++ b/rust/crates/runtime/src/conversation.rs @@ -28,6 +28,10 @@ pub struct ApiRequest { /// Streamed events emitted while processing a single assistant turn. #[derive(Debug, Clone, PartialEq, Eq)] pub enum AssistantEvent { + Thinking { + thinking: String, + signature: Option, + }, TextDelta(String), ToolUse { id: String, @@ -721,6 +725,16 @@ fn build_assistant_message( for event in events { match event { + AssistantEvent::Thinking { + thinking, + signature, + } => { + flush_text_block(&mut text, &mut blocks); + blocks.push(ContentBlock::Thinking { + thinking, + signature, + }); + } AssistantEvent::TextDelta(delta) => text.push_str(&delta), AssistantEvent::ToolUse { id, name, input } => { flush_text_block(&mut text, &mut blocks); @@ -1723,6 +1737,47 @@ mod tests { .contains("assistant stream produced no content")); } + #[test] + fn build_assistant_message_places_thinking_block_before_text_and_tool_use() { + // given + let events = vec![ + AssistantEvent::Thinking { + thinking: "pondering".to_string(), + signature: Some("sig".to_string()), + }, + AssistantEvent::TextDelta("hello".to_string()), + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "echo".to_string(), + input: "payload".to_string(), + }, + AssistantEvent::MessageStop, + ]; + + // when + let (message, _, _) = build_assistant_message(events) + .expect("assistant message should preserve thinking, text, and tool blocks"); + + // then + assert_eq!( + message.blocks, + vec![ + ContentBlock::Thinking { + thinking: "pondering".to_string(), + signature: Some("sig".to_string()), + }, + ContentBlock::Text { + text: "hello".to_string(), + }, + ContentBlock::ToolUse { + id: "tool-1".to_string(), + name: "echo".to_string(), + input: "payload".to_string(), + }, + ] + ); + } + #[test] fn static_tool_executor_rejects_unknown_tools() { // given diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs index c7d87091..c1108d3d 100644 --- a/rust/crates/runtime/src/lib.rs +++ b/rust/crates/runtime/src/lib.rs @@ -131,8 +131,8 @@ pub use policy_engine::{ PolicyEngine, PolicyRule, ReconcileReason, ReviewStatus, }; pub use prompt::{ - load_system_prompt, prepend_bullets, ContextFile, ProjectContext, PromptBuildError, - SystemPromptBuilder, FRONTIER_MODEL_NAME, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, + load_system_prompt, prepend_bullets, ContextFile, ModelFamilyIdentity, ProjectContext, + PromptBuildError, SystemPromptBuilder, FRONTIER_MODEL_NAME, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, }; pub use recovery_recipes::{ attempt_recovery, recipe_for, EscalationPolicy, FailureScenario, RecoveryContext, diff --git a/rust/crates/runtime/src/prompt.rs b/rust/crates/runtime/src/prompt.rs index e46b7ebe..1e6c4eda 100644 --- a/rust/crates/runtime/src/prompt.rs +++ b/rust/crates/runtime/src/prompt.rs @@ -43,6 +43,24 @@ pub const FRONTIER_MODEL_NAME: &str = "Claude Opus 4.6"; const MAX_INSTRUCTION_FILE_CHARS: usize = 4_000; const MAX_TOTAL_INSTRUCTION_CHARS: usize = 12_000; +/// Neutral identity for the model family line in generated prompts. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub enum ModelFamilyIdentity { + #[default] + Claude, + Generic, +} + +impl ModelFamilyIdentity { + #[must_use] + pub const fn family_label(self) -> &'static str { + match self { + Self::Claude => FRONTIER_MODEL_NAME, + Self::Generic => "an AI assistant", + } + } +} + /// Contents of an instruction file included in prompt construction. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ContextFile { @@ -97,6 +115,7 @@ pub struct SystemPromptBuilder { output_style_prompt: Option, os_name: Option, os_version: Option, + model_family: Option, append_sections: Vec, project_context: Option, config: Option, @@ -122,6 +141,12 @@ impl SystemPromptBuilder { self } + #[must_use] + pub fn with_model_family(mut self, model_family: ModelFamilyIdentity) -> Self { + self.model_family = Some(model_family); + self + } + #[must_use] pub fn with_project_context(mut self, project_context: ProjectContext) -> Self { self.project_context = Some(project_context); @@ -179,9 +204,10 @@ impl SystemPromptBuilder { || "unknown".to_string(), |context| context.current_date.clone(), ); + let identity = self.model_family.unwrap_or_default(); let mut lines = vec!["# Environment context".to_string()]; lines.extend(prepend_bullets(vec![ - format!("Model family: {FRONTIER_MODEL_NAME}"), + format!("Model family: {}", identity.family_label()), format!("Working directory: {cwd}"), format!("Date: {date}"), format!( @@ -434,12 +460,14 @@ pub fn load_system_prompt( current_date: impl Into, os_name: impl Into, os_version: impl Into, + model_family: ModelFamilyIdentity, ) -> Result, PromptBuildError> { let cwd = cwd.into(); let project_context = ProjectContext::discover_with_git(&cwd, current_date.into())?; let config = ConfigLoader::default_for(&cwd).load()?; Ok(SystemPromptBuilder::new() .with_os(os_name, os_version) + .with_model_family(model_family) .with_project_context(project_context) .with_runtime_config(config) .build()) @@ -522,7 +550,8 @@ mod tests { use super::{ collapse_blank_lines, display_context_path, normalize_instruction_content, render_instruction_content, render_instruction_files, truncate_instruction_content, - ContextFile, ProjectContext, SystemPromptBuilder, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, + ContextFile, ModelFamilyIdentity, ProjectContext, SystemPromptBuilder, + SYSTEM_PROMPT_DYNAMIC_BOUNDARY, }; use crate::config::ConfigLoader; use std::fs; @@ -804,13 +833,19 @@ mod tests { std::env::set_var("HOME", &root); std::env::set_var("CLAW_CONFIG_HOME", root.join("missing-home")); std::env::set_current_dir(&root).expect("change cwd"); - let prompt = super::load_system_prompt(&root, "2026-03-31", "linux", "6.8") - .expect("system prompt should load") - .join( - " + let prompt = super::load_system_prompt( + &root, + "2026-03-31", + "linux", + "6.8", + ModelFamilyIdentity::Claude, + ) + .expect("system prompt should load") + .join( + " ", - ); + ); std::env::set_current_dir(previous).expect("restore cwd"); if let Some(value) = original_home { std::env::set_var("HOME", value); @@ -828,6 +863,50 @@ mod tests { fs::remove_dir_all(root).expect("cleanup temp dir"); } + #[test] + fn renders_default_claude_model_family_identity() { + // given: a prompt builder without an explicit model family override + let project_context = ProjectContext { + cwd: PathBuf::from("/tmp/project"), + current_date: "2026-03-31".to_string(), + ..ProjectContext::default() + }; + + // when: rendering the system prompt environment section + let prompt = SystemPromptBuilder::new() + .with_os("linux", "6.8") + .with_project_context(project_context) + .render(); + + // then: the Claude model family label is preserved by default + assert!(prompt.contains("Model family: Claude Opus 4.6")); + } + + #[test] + fn renders_generic_model_family_identity_without_claude_label() { + // given: a prompt builder with generic model family identity + let project_context = ProjectContext { + cwd: PathBuf::from("/tmp/project"), + current_date: "2026-03-31".to_string(), + ..ProjectContext::default() + }; + + // when: rendering the system prompt environment section + let prompt = SystemPromptBuilder::new() + .with_os("linux", "6.8") + .with_model_family(ModelFamilyIdentity::Generic) + .with_project_context(project_context) + .render(); + let model_family_line = prompt + .lines() + .find(|line| line.contains("Model family:")) + .expect("model family line should render"); + + // then: the model family line is neutral and excludes Claude Opus 4.6 + assert_eq!(model_family_line, " - Model family: an AI assistant"); + assert!(!model_family_line.contains("Claude Opus 4.6")); + } + #[test] fn renders_claude_code_style_sections_with_project_context() { let root = temp_dir(); diff --git a/rust/crates/runtime/src/session.rs b/rust/crates/runtime/src/session.rs index b97378e5..6b62444d 100644 --- a/rust/crates/runtime/src/session.rs +++ b/rust/crates/runtime/src/session.rs @@ -30,6 +30,10 @@ pub enum ContentBlock { Text { text: String, }, + Thinking { + thinking: String, + signature: Option, + }, ToolUse { id: String, name: String, @@ -737,6 +741,22 @@ impl ContentBlock { object.insert("type".to_string(), JsonValue::String("text".to_string())); object.insert("text".to_string(), JsonValue::String(text.clone())); } + Self::Thinking { + thinking, + signature, + } => { + object.insert( + "type".to_string(), + JsonValue::String("thinking".to_string()), + ); + object.insert("thinking".to_string(), JsonValue::String(thinking.clone())); + if let Some(signature) = signature { + object.insert( + "signature".to_string(), + JsonValue::String(signature.clone()), + ); + } + } Self::ToolUse { id, name, input } => { object.insert( "type".to_string(), @@ -783,6 +803,13 @@ impl ContentBlock { "text" => Ok(Self::Text { text: required_string(object, "text")?, }), + "thinking" => Ok(Self::Thinking { + thinking: required_string(object, "thinking")?, + signature: object + .get("signature") + .and_then(JsonValue::as_str) + .map(String::from), + }), "tool_use" => Ok(Self::ToolUse { id: required_string(object, "id")?, name: required_string(object, "name")?, @@ -1208,6 +1235,36 @@ mod tests { assert_eq!(restored.session_id, session.session_id); } + #[test] + fn persists_assistant_thinking_block_round_trip_through_jsonl() { + // given + let mut session = Session::new(); + session + .push_message(ConversationMessage::assistant(vec![ + ContentBlock::Thinking { + thinking: "trace the path through session persistence".to_string(), + signature: Some("sig-123".to_string()), + }, + ])) + .expect("thinking block should append"); + let path = temp_session_path("thinking-jsonl"); + + // when + session.save_to_path(&path).expect("session should save"); + let restored = Session::load_from_path(&path).expect("session should load"); + fs::remove_file(&path).expect("temp file should be removable"); + + // then + assert_eq!(restored, session); + assert_eq!( + restored.messages[0].blocks[0], + ContentBlock::Thinking { + thinking: "trace the path through session persistence".to_string(), + signature: Some("sig-123".to_string()), + } + ); + } + #[test] fn loads_legacy_session_json_object() { let path = temp_session_path("legacy"); diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index 628118af..df4d8da4 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -24,10 +24,11 @@ use std::thread::{self, JoinHandle}; use std::time::{Duration, Instant, UNIX_EPOCH}; use api::{ - detect_provider_kind, resolve_startup_auth_source, AnthropicClient, AuthSource, - ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest, MessageResponse, - OutputContentBlock, PromptCache, ProviderClient as ApiProviderClient, ProviderKind, - StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, ToolResultContentBlock, + detect_provider_kind, model_family_identity_for, resolve_startup_auth_source, AnthropicClient, + AuthSource, ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest, + MessageResponse, OutputContentBlock, PromptCache, ProviderClient as ApiProviderClient, + ProviderKind, StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, + ToolResultContentBlock, }; use commands::{ @@ -357,8 +358,9 @@ fn run() -> Result<(), Box> { CliAction::PrintSystemPrompt { cwd, date, + model, output_format, - } => print_system_prompt(cwd, date, output_format)?, + } => print_system_prompt(cwd, date, &model, output_format)?, CliAction::Version { output_format } => print_version(output_format)?, CliAction::ResumeSession { session_path, @@ -498,6 +500,7 @@ enum CliAction { PrintSystemPrompt { cwd: PathBuf, date: String, + model: String, output_format: CliOutputFormat, }, Version { @@ -960,7 +963,7 @@ fn parse_args(args: &[String]) -> Result { }), } } - "system-prompt" => parse_system_prompt_args(&rest[1..], output_format), + "system-prompt" => parse_system_prompt_args(&rest[1..], model, output_format), "acp" => parse_acp_args(&rest[1..], output_format), "login" | "logout" => Err(removed_auth_surface_error(rest[0].as_str())), "init" => Ok(CliAction::Init { output_format }), @@ -1638,6 +1641,7 @@ fn filter_tool_specs( fn parse_system_prompt_args( args: &[String], + model: String, output_format: CliOutputFormat, ) -> Result { let mut cwd = env::current_dir().map_err(|error| error.to_string())?; @@ -1674,6 +1678,7 @@ fn parse_system_prompt_args( Ok(CliAction::PrintSystemPrompt { cwd, date, + model, output_format, }) } @@ -2614,9 +2619,16 @@ fn print_bootstrap_plan(output_format: CliOutputFormat) -> Result<(), Box Result<(), Box> { - let sections = load_system_prompt(cwd, date, env::consts::OS, "unknown")?; + let sections = load_system_prompt( + cwd, + date, + env::consts::OS, + "unknown", + model_family_identity_for(model), + )?; let message = sections.join( " @@ -4394,7 +4406,7 @@ impl LiveCli { allowed_tools: Option, permission_mode: PermissionMode, ) -> Result> { - let system_prompt = build_system_prompt()?; + let system_prompt = build_system_prompt(&model)?; let session_state = new_cli_session()?; let session = create_managed_session_handle(&session_state.session_id)?; let runtime = build_runtime( @@ -4530,6 +4542,10 @@ impl LiveCli { TerminalRenderer::new().color_theme(), &mut stdout, )?; + let final_text = final_assistant_text(&summary); + if !final_text.is_empty() { + println!("{final_text}"); + } println!(); if let Some(event) = summary.auto_compaction { println!( @@ -7005,6 +7021,7 @@ fn render_export_text(session: &Session) -> String { for block in &message.blocks { match block { ContentBlock::Text { text } => lines.push(text.clone()), + ContentBlock::Thinking { .. } => {} ContentBlock::ToolUse { id, name, input } => { lines.push(format!("[tool_use id={id} name={name}] {input}")); } @@ -7191,6 +7208,7 @@ fn render_session_markdown(session: &Session, session_id: &str, session_path: &P lines.push(String::new()); } } + ContentBlock::Thinking { .. } => {} ContentBlock::ToolUse { id, name, input } => { lines.push(format!( "**Tool call** `{name}` _(id `{}`)_", @@ -7244,12 +7262,13 @@ fn short_tool_id(id: &str) -> String { format!("{prefix}…") } -fn build_system_prompt() -> Result, Box> { +fn build_system_prompt(model: &str) -> Result, Box> { Ok(load_system_prompt( env::current_dir()?, DEFAULT_DATE, env::consts::OS, "unknown", + model_family_identity_for(model), )?) } @@ -9211,26 +9230,29 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { let content = message .blocks .iter() - .map(|block| match block { - ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() }, - ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse { + .filter_map(|block| match block { + ContentBlock::Text { text } => { + Some(InputContentBlock::Text { text: text.clone() }) + } + ContentBlock::Thinking { .. } => None, + ContentBlock::ToolUse { id, name, input } => Some(InputContentBlock::ToolUse { id: id.clone(), name: name.clone(), input: serde_json::from_str(input) .unwrap_or_else(|_| serde_json::json!({ "raw": input })), - }, + }), ContentBlock::ToolResult { tool_use_id, output, is_error, .. - } => InputContentBlock::ToolResult { + } => Some(InputContentBlock::ToolResult { tool_use_id: tool_use_id.clone(), content: vec![ToolResultContentBlock::Text { text: output.clone(), }], is_error: *is_error, - }, + }), }) .collect::>(); (!content.is_empty()).then(|| InputMessage { @@ -9628,7 +9650,9 @@ mod tests { "{rendered}" ); assert!( - rendered.contains("Detail Input tokens exceed the configured limit of 922000 tokens."), + rendered.contains( + "Detail Input tokens exceed the configured limit of 922000 tokens." + ), "{rendered}" ); assert!(rendered.contains("Compact /compact"), "{rendered}"); @@ -10264,6 +10288,7 @@ mod tests { #[test] fn parses_system_prompt_options() { + // given: system-prompt options for cwd and date let args = vec![ "system-prompt".to_string(), "--cwd".to_string(), @@ -10271,16 +10296,43 @@ mod tests { "--date".to_string(), "2026-04-01".to_string(), ]; + + // when: parsing the direct system-prompt command + let action = parse_args(&args).expect("args should parse"); + + // then: the action carries prompt options and default model assert_eq!( - parse_args(&args).expect("args should parse"), + action, CliAction::PrintSystemPrompt { cwd: PathBuf::from("/tmp/project"), date: "2026-04-01".to_string(), + model: DEFAULT_MODEL.to_string(), output_format: CliOutputFormat::Text, } ); } + #[test] + fn parses_global_model_for_system_prompt() { + // given: a global OpenAI-compatible model before system-prompt + let args = vec![ + "--model".to_string(), + "openai/gpt-4.1-mini".to_string(), + "system-prompt".to_string(), + ]; + + // when: parsing the CLI arguments + let action = parse_args(&args).expect("args should parse"); + + // then: the system-prompt action carries the selected model + match action { + CliAction::PrintSystemPrompt { model, .. } => { + assert_eq!(model, "openai/gpt-4.1-mini"); + } + other => panic!("expected PrintSystemPrompt, got {other:?}"), + } + } + #[test] fn removed_login_and_logout_subcommands_error_helpfully() { let login = parse_args(&["login".to_string()]).expect_err("login should be removed"); diff --git a/rust/crates/rusty-claude-cli/tests/compact_output.rs b/rust/crates/rusty-claude-cli/tests/compact_output.rs index 8e751c0c..4ccca2f4 100644 --- a/rust/crates/rusty-claude-cli/tests/compact_output.rs +++ b/rust/crates/rusty-claude-cli/tests/compact_output.rs @@ -126,6 +126,66 @@ fn compact_flag_streaming_text_only_emits_final_message_text() { fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed"); } +#[test] +fn text_prompt_mode_prints_final_assistant_text_after_spinner() { + // given a workspace pointed at the mock Anthropic service running the + // streaming_text scenario which only emits a single assistant text block + let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build"); + let server = runtime + .block_on(MockAnthropicService::spawn()) + .expect("mock service should start"); + let base_url = server.base_url(); + + let workspace = unique_temp_dir("text-prompt-mode"); + let config_home = workspace.join("config-home"); + let home = workspace.join("home"); + fs::create_dir_all(&workspace).expect("workspace should exist"); + fs::create_dir_all(&config_home).expect("config home should exist"); + fs::create_dir_all(&home).expect("home should exist"); + + // when we invoke claw in normal text prompt mode for the streaming text scenario + let prompt = format!("{SCENARIO_PREFIX}streaming_text"); + let output = run_claw( + &workspace, + &config_home, + &home, + &base_url, + &[ + "--model", + "sonnet", + "--permission-mode", + "read-only", + &prompt, + ], + ); + + // then stdout should contain the final assistant text, not just spinner output + assert!( + output.status.success(), + "text prompt run should succeed\nstdout:\n{}\n\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8"); + let plain_stdout = strip_ansi_codes(&stdout); + assert!( + plain_stdout.contains("Mock streaming says hello from the parity harness."), + "text prompt stdout should include the assistant text ({stdout:?})" + ); + assert!( + plain_stdout.contains("✔ ✨ Done"), + "text prompt stdout should still include spinner completion ({stdout:?})" + ); + assert!( + plain_stdout + .lines() + .any(|line| line == "Mock streaming says hello from the parity harness."), + "text prompt stdout should print the assistant text as its own line ({stdout:?})" + ); + + fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed"); +} + #[test] fn compact_flag_with_json_output_emits_structured_json() { let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build"); @@ -215,3 +275,21 @@ fn unique_temp_dir(label: &str) -> PathBuf { std::process::id() )) } + +fn strip_ansi_codes(input: &str) -> String { + let mut output = String::with_capacity(input.len()); + let mut chars = input.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '\u{1b}' && matches!(chars.peek(), Some('[')) { + chars.next(); + while let Some(next) = chars.next() { + if ('@'..='~').contains(&next) { + break; + } + } + continue; + } + output.push(ch); + } + output +} diff --git a/rust/crates/rusty-claude-cli/tests/compact_repl_panic.rs b/rust/crates/rusty-claude-cli/tests/compact_repl_panic.rs new file mode 100644 index 00000000..e930cf43 --- /dev/null +++ b/rust/crates/rusty-claude-cli/tests/compact_repl_panic.rs @@ -0,0 +1,138 @@ +use std::fs; +use std::io::Write; +use std::path::PathBuf; +use std::process::{Command, Output, Stdio}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; + +static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0); + +#[test] +fn compact_slash_command_in_repl_does_not_start_nested_tokio_runtime() { + // given + let workspace = unique_temp_dir("compact-repl-panic"); + let config_home = workspace.join("config-home"); + let home = workspace.join("home"); + fs::create_dir_all(&workspace).expect("workspace should exist"); + fs::create_dir_all(&config_home).expect("config home should exist"); + fs::create_dir_all(&home).expect("home should exist"); + + // when + let output = run_claw_repl(&workspace, &config_home, &home, "/compact\n/exit\n"); + + // then + assert!( + output.status.success(), + "compact repl run should succeed\nstdout:\n{}\n\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + let stderr = String::from_utf8(output.stderr).expect("stderr should be utf8"); + assert!( + !stderr.contains("Cannot start a runtime"), + "stderr must not contain nested runtime panic: {stderr:?}" + ); + assert!( + !stderr.contains("panicked at"), + "stderr must not contain panic output: {stderr:?}" + ); + + let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8"); + let plain_stdout = strip_ansi_codes(&stdout); + assert!( + plain_stdout.contains("Compaction skipped") + || plain_stdout.contains("Result skipped") + || plain_stdout.contains("Result compacted"), + "stdout should contain compact report output ({stdout:?})" + ); + + fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed"); +} + +fn run_claw_repl( + cwd: &std::path::Path, + config_home: &std::path::Path, + home: &std::path::Path, + stdin: &str, +) -> Output { + let mut command = python_pty_command(env!("CARGO_BIN_EXE_claw")); + let mut child = command + .current_dir(cwd) + .env_clear() + .env("ANTHROPIC_API_KEY", "test-compact-repl-key") + .env("CLAW_CONFIG_HOME", config_home) + .env("HOME", home) + .env("NO_COLOR", "1") + .env("PATH", "/usr/bin:/bin") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .expect("claw should launch"); + + child + .stdin + .as_mut() + .expect("stdin should be piped") + .write_all(stdin.as_bytes()) + .expect("stdin should write"); + + child.wait_with_output().expect("claw should finish") +} + +fn python_pty_command(claw: &str) -> Command { + let mut command = Command::new("python3"); + command.args([ + "-c", + r#" +import os +import pty +import subprocess +import sys + +claw = sys.argv[1] +payload = sys.stdin.buffer.read() +master, slave = pty.openpty() +child = subprocess.Popen([claw], stdin=slave, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +os.close(slave) +os.write(master, payload) +stdout, stderr = child.communicate(timeout=30) +os.close(master) +sys.stdout.buffer.write(stdout) +sys.stderr.buffer.write(stderr) +raise SystemExit(child.returncode) +"#, + claw, + ]); + command +} + +fn unique_temp_dir(label: &str) -> PathBuf { + let millis = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock should be after epoch") + .as_millis(); + let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed); + std::env::temp_dir().join(format!( + "claw-{label}-{}-{millis}-{counter}", + std::process::id() + )) +} + +fn strip_ansi_codes(input: &str) -> String { + let mut output = String::with_capacity(input.len()); + let mut chars = input.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '\u{1b}' && matches!(chars.peek(), Some('[')) { + chars.next(); + for next in chars.by_ref() { + if ('@'..='~').contains(&next) { + break; + } + } + continue; + } + output.push(ch); + } + output +} diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index f3d1849a..9e669f5e 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -4,9 +4,10 @@ use std::process::Command; use std::time::{Duration, Instant}; use api::{ - max_tokens_for_model, resolve_model_alias, ApiError, ContentBlockDelta, InputContentBlock, - InputMessage, MessageRequest, MessageResponse, OutputContentBlock, ProviderClient, - StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, ToolResultContentBlock, + max_tokens_for_model, model_family_identity_for, resolve_model_alias, ApiError, + ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest, MessageResponse, + OutputContentBlock, ProviderClient, StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, + ToolResultContentBlock, }; use plugins::PluginTool; use reqwest::blocking::Client; @@ -3075,27 +3076,33 @@ fn extract_quoted_value(input: &str) -> Option<(String, &str)> { } fn decode_duckduckgo_redirect(url: &str) -> Option { - if url.starts_with("http://") || url.starts_with("https://") { - return Some(html_entity_decode_url(url)); - } - - let joined = if url.starts_with("//") { - format!("https:{url}") - } else if url.starts_with('/') { - format!("https://duckduckgo.com{url}") + let decoded = html_entity_decode_url(url); + let parsed = if decoded.starts_with("http://") || decoded.starts_with("https://") { + reqwest::Url::parse(&decoded).ok() + } else if decoded.starts_with("//") { + reqwest::Url::parse(&format!("https:{decoded}")).ok() + } else if decoded.starts_with('/') { + reqwest::Url::parse(&format!("https://duckduckgo.com{decoded}")).ok() } else { return None; - }; + }?; - let parsed = reqwest::Url::parse(&joined).ok()?; - if parsed.path() == "/l/" || parsed.path() == "/l" { + let host = parsed.host_str().unwrap_or_default().to_ascii_lowercase(); + if (host == "duckduckgo.com" || host.ends_with(".duckduckgo.com")) + && (parsed.path() == "/l/" || parsed.path() == "/l") + { for (key, value) in parsed.query_pairs() { if key == "uddg" { return Some(html_entity_decode_url(value.as_ref())); } } } - Some(joined) + + if decoded.starts_with("http://") || decoded.starts_with("https://") { + Some(decoded) + } else { + Some(parsed.to_string()) + } } fn html_entity_decode_url(url: &str) -> String { @@ -3510,7 +3517,7 @@ where .filter(|name| !name.is_empty()) .unwrap_or_else(|| slugify_agent_name(&input.description)); let created_at = iso8601_now(); - let system_prompt = build_agent_system_prompt(&normalized_subagent_type)?; + let system_prompt = build_agent_system_prompt(&normalized_subagent_type, &model)?; let allowed_tools = allowed_tools_for_subagent(&normalized_subagent_type); let output_contents = format!( @@ -3623,13 +3630,14 @@ fn build_agent_runtime( )) } -fn build_agent_system_prompt(subagent_type: &str) -> Result, String> { +fn build_agent_system_prompt(subagent_type: &str, model: &str) -> Result, String> { let cwd = std::env::current_dir().map_err(|error| error.to_string())?; let mut prompt = load_system_prompt( cwd, DEFAULT_AGENT_SYSTEM_DATE.to_string(), std::env::consts::OS, "unknown", + model_family_identity_for(model), ) .map_err(|error| error.to_string())?; prompt.push(format!( @@ -4759,6 +4767,9 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { .iter() .map(|block| match block { ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() }, + ContentBlock::Thinking { .. } => InputContentBlock::Text { + text: String::new(), + }, ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse { id: id.clone(), name: name.clone(), @@ -4778,6 +4789,9 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { is_error: *is_error, }, }) + .filter( + |block| !matches!(block, InputContentBlock::Text { text } if text.is_empty()), + ) .collect::>(); (!content.is_empty()).then(|| InputMessage { role: role.to_string(), @@ -6134,12 +6148,13 @@ mod tests { use std::time::Duration; use super::{ - agent_permission_policy, allowed_tools_for_subagent, classify_lane_failure, - derive_agent_state, execute_agent_with_spawn, execute_tool, extract_recovery_outcome, - final_assistant_text, global_cron_registry, maybe_commit_provenance, mvp_tool_specs, - permission_mode_from_plugin, persist_agent_terminal_state, push_output_block, - run_task_packet, AgentInput, AgentJob, GlobalToolRegistry, LaneEventName, LaneFailureClass, - ProviderRuntimeClient, SubagentToolExecutor, + agent_permission_policy, allowed_tools_for_subagent, build_agent_system_prompt, + classify_lane_failure, derive_agent_state, execute_agent_with_spawn, execute_tool, + extract_recovery_outcome, final_assistant_text, global_cron_registry, + maybe_commit_provenance, mvp_tool_specs, permission_mode_from_plugin, + persist_agent_terminal_state, push_output_block, run_task_packet, AgentInput, AgentJob, + GlobalToolRegistry, LaneEventName, LaneFailureClass, ProviderRuntimeClient, + SubagentToolExecutor, }; use api::OutputContentBlock; use runtime::ProviderFallbackConfig; @@ -7148,6 +7163,98 @@ mod tests { assert!(error.contains("relative URL without a base") || error.contains("empty host")); } + #[test] + fn web_search_decodes_absolute_duckduckgo_redirect_urls() { + // given + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let server = TestServer::spawn(Arc::new(|request_line: &str| { + assert!(request_line.contains("GET /search?q=duckduckgo+redirects ")); + HttpResponse::html( + 200, + "OK", + r#" + + Reqwest docs + + "#, + ) + })); + + // when + std::env::set_var( + "CLAWD_WEB_SEARCH_BASE_URL", + format!("http://{}/search", server.addr()), + ); + let result = execute_tool( + "WebSearch", + &json!({ + "query": "duckduckgo redirects" + }), + ) + .expect("WebSearch should succeed"); + std::env::remove_var("CLAWD_WEB_SEARCH_BASE_URL"); + + // then + let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); + let results = output["results"].as_array().expect("results array"); + let search_result = results + .iter() + .find(|item| item.get("content").is_some()) + .expect("search result block present"); + let content = search_result["content"].as_array().expect("content array"); + assert_eq!(content.len(), 1); + assert_eq!(content[0]["title"], "Reqwest docs"); + assert_eq!(content[0]["url"], "https://docs.rs/reqwest"); + } + + #[test] + fn web_search_decodes_protocol_relative_duckduckgo_redirect_urls() { + // given + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let server = TestServer::spawn(Arc::new(|request_line: &str| { + assert!(request_line.contains("GET /search?q=duckduckgo+protocol+relative ")); + HttpResponse::html( + 200, + "OK", + r#" + + Tokio Docs + + "#, + ) + })); + + // when + std::env::set_var( + "CLAWD_WEB_SEARCH_BASE_URL", + format!("http://{}/search", server.addr()), + ); + let result = execute_tool( + "WebSearch", + &json!({ + "query": "duckduckgo protocol relative" + }), + ) + .expect("WebSearch should succeed"); + std::env::remove_var("CLAWD_WEB_SEARCH_BASE_URL"); + + // then + let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); + let results = output["results"].as_array().expect("results array"); + let search_result = results + .iter() + .find(|item| item.get("content").is_some()) + .expect("search result block present"); + let content = search_result["content"].as_array().expect("content array"); + assert_eq!(content.len(), 1); + assert_eq!(content[0]["title"], "Tokio Docs"); + assert_eq!(content[0]["url"], "https://docs.rs/tokio"); + } + #[test] fn pending_tools_preserve_multiple_streaming_tool_calls_by_index() { let mut events = Vec::new(); @@ -8409,6 +8516,28 @@ mod tests { assert!(!verification.contains("write_file")); } + #[test] + fn subagent_system_prompt_uses_resolved_model_identity() { + // given: a temporary workspace and an OpenAI-compatible subagent model + let _guard = env_guard(); + let root = temp_path("subagent-prompt-identity"); + fs::create_dir_all(&root).expect("create temp workspace"); + let previous = std::env::current_dir().expect("current dir"); + std::env::set_current_dir(&root).expect("enter temp workspace"); + + // when: building the subagent system prompt + let prompt = build_agent_system_prompt("Explore", "openai/gpt-4.1-mini") + .expect("subagent system prompt should build") + .join("\n"); + std::env::set_current_dir(previous).expect("restore current dir"); + + // then: the prompt renders a generic model family identity + assert!(prompt.contains("Model family: an AI assistant")); + assert!(!prompt.contains("Model family: Claude Opus 4.6")); + + fs::remove_dir_all(root).expect("cleanup temp workspace"); + } + #[derive(Debug)] struct MockSubagentApiClient { calls: usize,