From 8973d0f6c53c0eb522a1f9007847e6706d15ed58 Mon Sep 17 00:00:00 2001 From: quadcent Date: Tue, 30 Jun 2026 00:50:52 +0200 Subject: [PATCH] fix(llm): align Claude provider with current Anthropic API (#2133) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace invalid default model IDs (e.g. claude-sonnet-4-7) with current claude-sonnet-4-6, claude-opus-4-8, and claude-haiku-4-5. Route system messages to the API system field, enable ephemeral prompt caching, omit temperature for Opus 4.7/4.8, and surface cache usage metrics. Update the CLI model picker to match. Co-authored-by: Vladimir Đuranović Co-authored-by: Cursor --- src/llm/cli/selector.py | 6 +- src/llm/providers/claude.py | 114 +++++++++++++++++++++--------------- 2 files changed, 70 insertions(+), 50 deletions(-) diff --git a/src/llm/cli/selector.py b/src/llm/cli/selector.py index 4419b2bc..87b513e2 100644 --- a/src/llm/cli/selector.py +++ b/src/llm/cli/selector.py @@ -105,9 +105,9 @@ def interactive_select( if models_per_provider is None: models_per_provider = { "claude": [ - ("claude-opus-4-5", "Claude Opus 4.5 - Most capable"), - ("claude-sonnet-4-7", "Claude Sonnet 4.7 - Balanced"), - ("claude-haiku-4-7", "Claude Haiku 4.7 - Fast"), + ("claude-opus-4-8", "Claude Opus 4.8 - Most capable"), + ("claude-sonnet-4-6", "Claude Sonnet 4.6 - Balanced"), + ("claude-haiku-4-5", "Claude Haiku 4.5 - Fast"), ], "openai": [ ("gpt-4o", "GPT-4o - Most capable"), diff --git a/src/llm/providers/claude.py b/src/llm/providers/claude.py index 2383db4e..55cce895 100644 --- a/src/llm/providers/claude.py +++ b/src/llm/providers/claude.py @@ -13,7 +13,14 @@ from llm.core.interface import ( LLMProvider, RateLimitError, ) -from llm.core.types import LLMInput, LLMOutput, Message, ModelInfo, ProviderType, ToolCall +from llm.core.types import LLMInput, LLMOutput, ModelInfo, ProviderType, Role, ToolCall + +_DEFAULT_MODEL = "claude-sonnet-4-6" +_OPUS_ADAPTIVE_ONLY_PREFIXES = ("claude-opus-4-7", "claude-opus-4-8") + + +def _uses_adaptive_thinking_only(model: str) -> bool: + return any(model.startswith(prefix) for prefix in _OPUS_ADAPTIVE_ONLY_PREFIXES) class ClaudeProvider(LLMProvider): @@ -23,77 +30,90 @@ class ClaudeProvider(LLMProvider): self.client = Anthropic(api_key=api_key or os.environ.get("ANTHROPIC_API_KEY"), base_url=base_url) self._models = [ ModelInfo( - name="claude-opus-4-5", + name="claude-opus-4-8", provider=ProviderType.CLAUDE, supports_tools=True, supports_vision=True, - max_tokens=8192, - context_window=200000, + max_tokens=64000, + context_window=1_000_000, ), ModelInfo( - name="claude-sonnet-4-7", + name="claude-sonnet-4-6", provider=ProviderType.CLAUDE, supports_tools=True, supports_vision=True, - max_tokens=8192, - context_window=200000, + max_tokens=64000, + context_window=1_000_000, ), ModelInfo( - name="claude-haiku-4-7", + name="claude-haiku-4-5", provider=ProviderType.CLAUDE, supports_tools=True, - supports_vision=False, - max_tokens=4096, - context_window=200000, + supports_vision=True, + max_tokens=16000, + context_window=200_000, ), ] def generate(self, input: LLMInput) -> LLMOutput: try: + model = input.model or _DEFAULT_MODEL + system_parts = [msg.content for msg in input.messages if msg.role == Role.SYSTEM] + api_messages = [ + msg.to_dict() for msg in input.messages if msg.role not in (Role.SYSTEM,) + ] + params: dict[str, Any] = { - "model": input.model or "claude-sonnet-4-7", - "messages": [msg.to_dict() for msg in input.messages], - "temperature": input.temperature, + "model": model, + "messages": api_messages, + "max_tokens": input.max_tokens if input.max_tokens else 16000, + "cache_control": {"type": "ephemeral"}, } - if input.max_tokens: - params["max_tokens"] = input.max_tokens - else: - params["max_tokens"] = 8192 # required by Anthropic API - if input.tools: - params["tools"] = [tool.to_anthropic_tool() for tool in input.tools] + if system_parts: + params["system"] = "\n\n".join(system_parts) + if input.tools: + params["tools"] = [tool.to_anthropic_tool() for tool in input.tools] + if not _uses_adaptive_thinking_only(model): + params["temperature"] = input.temperature + if _uses_adaptive_thinking_only(model): + params["thinking"] = {"type": "adaptive"} response = self.client.messages.create(**params) - text_parts: list[str] = [] - tool_calls: list[ToolCall] = [] - for block in response.content or []: - block_type = getattr(block, "type", None) - if block_type == "text": - text = getattr(block, "text", "") - if text: - text_parts.append(text) - elif block_type == "tool_use": - raw_arguments = getattr(block, "input", {}) - arguments = ( - raw_arguments.copy() - if isinstance(raw_arguments, dict) - else getattr(raw_arguments, "__dict__", {}).copy() - ) - tool_calls.append( - ToolCall( - id=getattr(block, "id", ""), - name=getattr(block, "name", ""), - arguments=arguments, - ) - ) - - return LLMOutput( - content="".join(text_parts), - tool_calls=tool_calls or None, + text_parts: list[str] = [] + tool_calls: list[ToolCall] = [] + for block in response.content or []: + block_type = getattr(block, "type", None) + if block_type == "text": + text = getattr(block, "text", "") + if text: + text_parts.append(text) + elif block_type == "tool_use": + raw_arguments = getattr(block, "input", {}) + arguments = ( + raw_arguments.copy() + if isinstance(raw_arguments, dict) + else getattr(raw_arguments, "__dict__", {}).copy() + ) + tool_calls.append( + ToolCall( + id=getattr(block, "id", ""), + name=getattr(block, "name", ""), + arguments=arguments, + ) + ) + + return LLMOutput( + content="".join(text_parts), + tool_calls=tool_calls or None, model=response.model, usage={ "input_tokens": response.usage.input_tokens, "output_tokens": response.usage.output_tokens, + "cache_creation_input_tokens": getattr( + response.usage, "cache_creation_input_tokens", 0 + ), + "cache_read_input_tokens": getattr(response.usage, "cache_read_input_tokens", 0), }, stop_reason=response.stop_reason, ) @@ -114,4 +134,4 @@ class ClaudeProvider(LLMProvider): return bool(self.client.api_key) def get_default_model(self) -> str: - return "claude-sonnet-4-7" + return _DEFAULT_MODEL