fix(llm): align Claude provider with current Anthropic API (#2133)

Replace invalid default model IDs (e.g. claude-sonnet-4-7) with current
claude-sonnet-4-6, claude-opus-4-8, and claude-haiku-4-5. Route system
messages to the API system field, enable ephemeral prompt caching, omit
temperature for Opus 4.7/4.8, and surface cache usage metrics. Update the
CLI model picker to match.

Co-authored-by: Vladimir Đuranović <vlada@MacBook-Pro.local>
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
quadcent 2026-06-30 00:50:52 +02:00 committed by GitHub
parent 85dfb07576
commit 8973d0f6c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 70 additions and 50 deletions

View File

@ -105,9 +105,9 @@ def interactive_select(
if models_per_provider is None: if models_per_provider is None:
models_per_provider = { models_per_provider = {
"claude": [ "claude": [
("claude-opus-4-5", "Claude Opus 4.5 - Most capable"), ("claude-opus-4-8", "Claude Opus 4.8 - Most capable"),
("claude-sonnet-4-7", "Claude Sonnet 4.7 - Balanced"), ("claude-sonnet-4-6", "Claude Sonnet 4.6 - Balanced"),
("claude-haiku-4-7", "Claude Haiku 4.7 - Fast"), ("claude-haiku-4-5", "Claude Haiku 4.5 - Fast"),
], ],
"openai": [ "openai": [
("gpt-4o", "GPT-4o - Most capable"), ("gpt-4o", "GPT-4o - Most capable"),

View File

@ -13,7 +13,14 @@ from llm.core.interface import (
LLMProvider, LLMProvider,
RateLimitError, RateLimitError,
) )
from llm.core.types import LLMInput, LLMOutput, Message, ModelInfo, ProviderType, ToolCall from llm.core.types import LLMInput, LLMOutput, ModelInfo, ProviderType, Role, ToolCall
_DEFAULT_MODEL = "claude-sonnet-4-6"
_OPUS_ADAPTIVE_ONLY_PREFIXES = ("claude-opus-4-7", "claude-opus-4-8")
def _uses_adaptive_thinking_only(model: str) -> bool:
return any(model.startswith(prefix) for prefix in _OPUS_ADAPTIVE_ONLY_PREFIXES)
class ClaudeProvider(LLMProvider): class ClaudeProvider(LLMProvider):
@ -23,77 +30,90 @@ class ClaudeProvider(LLMProvider):
self.client = Anthropic(api_key=api_key or os.environ.get("ANTHROPIC_API_KEY"), base_url=base_url) self.client = Anthropic(api_key=api_key or os.environ.get("ANTHROPIC_API_KEY"), base_url=base_url)
self._models = [ self._models = [
ModelInfo( ModelInfo(
name="claude-opus-4-5", name="claude-opus-4-8",
provider=ProviderType.CLAUDE, provider=ProviderType.CLAUDE,
supports_tools=True, supports_tools=True,
supports_vision=True, supports_vision=True,
max_tokens=8192, max_tokens=64000,
context_window=200000, context_window=1_000_000,
), ),
ModelInfo( ModelInfo(
name="claude-sonnet-4-7", name="claude-sonnet-4-6",
provider=ProviderType.CLAUDE, provider=ProviderType.CLAUDE,
supports_tools=True, supports_tools=True,
supports_vision=True, supports_vision=True,
max_tokens=8192, max_tokens=64000,
context_window=200000, context_window=1_000_000,
), ),
ModelInfo( ModelInfo(
name="claude-haiku-4-7", name="claude-haiku-4-5",
provider=ProviderType.CLAUDE, provider=ProviderType.CLAUDE,
supports_tools=True, supports_tools=True,
supports_vision=False, supports_vision=True,
max_tokens=4096, max_tokens=16000,
context_window=200000, context_window=200_000,
), ),
] ]
def generate(self, input: LLMInput) -> LLMOutput: def generate(self, input: LLMInput) -> LLMOutput:
try: try:
model = input.model or _DEFAULT_MODEL
system_parts = [msg.content for msg in input.messages if msg.role == Role.SYSTEM]
api_messages = [
msg.to_dict() for msg in input.messages if msg.role not in (Role.SYSTEM,)
]
params: dict[str, Any] = { params: dict[str, Any] = {
"model": input.model or "claude-sonnet-4-7", "model": model,
"messages": [msg.to_dict() for msg in input.messages], "messages": api_messages,
"temperature": input.temperature, "max_tokens": input.max_tokens if input.max_tokens else 16000,
"cache_control": {"type": "ephemeral"},
} }
if input.max_tokens: if system_parts:
params["max_tokens"] = input.max_tokens params["system"] = "\n\n".join(system_parts)
else: if input.tools:
params["max_tokens"] = 8192 # required by Anthropic API params["tools"] = [tool.to_anthropic_tool() for tool in input.tools]
if input.tools: if not _uses_adaptive_thinking_only(model):
params["tools"] = [tool.to_anthropic_tool() for tool in input.tools] params["temperature"] = input.temperature
if _uses_adaptive_thinking_only(model):
params["thinking"] = {"type": "adaptive"}
response = self.client.messages.create(**params) response = self.client.messages.create(**params)
text_parts: list[str] = [] text_parts: list[str] = []
tool_calls: list[ToolCall] = [] tool_calls: list[ToolCall] = []
for block in response.content or []: for block in response.content or []:
block_type = getattr(block, "type", None) block_type = getattr(block, "type", None)
if block_type == "text": if block_type == "text":
text = getattr(block, "text", "") text = getattr(block, "text", "")
if text: if text:
text_parts.append(text) text_parts.append(text)
elif block_type == "tool_use": elif block_type == "tool_use":
raw_arguments = getattr(block, "input", {}) raw_arguments = getattr(block, "input", {})
arguments = ( arguments = (
raw_arguments.copy() raw_arguments.copy()
if isinstance(raw_arguments, dict) if isinstance(raw_arguments, dict)
else getattr(raw_arguments, "__dict__", {}).copy() else getattr(raw_arguments, "__dict__", {}).copy()
) )
tool_calls.append( tool_calls.append(
ToolCall( ToolCall(
id=getattr(block, "id", ""), id=getattr(block, "id", ""),
name=getattr(block, "name", ""), name=getattr(block, "name", ""),
arguments=arguments, arguments=arguments,
) )
) )
return LLMOutput( return LLMOutput(
content="".join(text_parts), content="".join(text_parts),
tool_calls=tool_calls or None, tool_calls=tool_calls or None,
model=response.model, model=response.model,
usage={ usage={
"input_tokens": response.usage.input_tokens, "input_tokens": response.usage.input_tokens,
"output_tokens": response.usage.output_tokens, "output_tokens": response.usage.output_tokens,
"cache_creation_input_tokens": getattr(
response.usage, "cache_creation_input_tokens", 0
),
"cache_read_input_tokens": getattr(response.usage, "cache_read_input_tokens", 0),
}, },
stop_reason=response.stop_reason, stop_reason=response.stop_reason,
) )
@ -114,4 +134,4 @@ class ClaudeProvider(LLMProvider):
return bool(self.client.api_key) return bool(self.client.api_key)
def get_default_model(self) -> str: def get_default_model(self) -> str:
return "claude-sonnet-4-7" return _DEFAULT_MODEL