mirror of
https://github.com/ultraworkers/claw-code.git
synced 2026-05-16 18:41:46 +08:00
Cap OpenAI default output tokens using model metadata
This commit is contained in:
parent
6ac13ffdad
commit
9a512633a5
@ -252,17 +252,16 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
|
|||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn max_tokens_for_model(model: &str) -> u32 {
|
pub fn max_tokens_for_model(model: &str) -> u32 {
|
||||||
model_token_limit(model).map_or_else(
|
|
||||||
|| {
|
|
||||||
let canonical = resolve_model_alias(model);
|
let canonical = resolve_model_alias(model);
|
||||||
if canonical.contains("opus") {
|
let heuristic = if canonical.contains("opus") {
|
||||||
32_000
|
32_000
|
||||||
} else {
|
} else {
|
||||||
64_000
|
64_000
|
||||||
}
|
};
|
||||||
},
|
|
||||||
|limit| limit.max_output_tokens,
|
model_token_limit(model)
|
||||||
)
|
.map(|limit| heuristic.min(limit.max_output_tokens))
|
||||||
|
.unwrap_or(heuristic)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the effective max output tokens for a model, preferring a plugin
|
/// Returns the effective max output tokens for a model, preferring a plugin
|
||||||
@ -276,7 +275,8 @@ pub fn max_tokens_for_model_with_override(model: &str, plugin_override: Option<u
|
|||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
|
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
|
||||||
let canonical = resolve_model_alias(model);
|
let canonical = resolve_model_alias(model);
|
||||||
match canonical.as_str() {
|
let base_model = canonical.rsplit('/').next().unwrap_or(canonical.as_str());
|
||||||
|
match base_model {
|
||||||
"claude-opus-4-6" => Some(ModelTokenLimit {
|
"claude-opus-4-6" => Some(ModelTokenLimit {
|
||||||
max_output_tokens: 32_000,
|
max_output_tokens: 32_000,
|
||||||
context_window_tokens: 200_000,
|
context_window_tokens: 200_000,
|
||||||
@ -289,6 +289,20 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
|
|||||||
max_output_tokens: 64_000,
|
max_output_tokens: 64_000,
|
||||||
context_window_tokens: 131_072,
|
context_window_tokens: 131_072,
|
||||||
}),
|
}),
|
||||||
|
// GPT-4.1 family via the OpenAI API.
|
||||||
|
"gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" => Some(ModelTokenLimit {
|
||||||
|
max_output_tokens: 32_768,
|
||||||
|
context_window_tokens: 1_047_576,
|
||||||
|
}),
|
||||||
|
// GPT-5.4 family via the OpenAI API.
|
||||||
|
"gpt-5.4" => Some(ModelTokenLimit {
|
||||||
|
max_output_tokens: 128_000,
|
||||||
|
context_window_tokens: 1_000_000,
|
||||||
|
}),
|
||||||
|
"gpt-5.4-mini" | "gpt-5.4-nano" => Some(ModelTokenLimit {
|
||||||
|
max_output_tokens: 128_000,
|
||||||
|
context_window_tokens: 400_000,
|
||||||
|
}),
|
||||||
// Kimi models via DashScope (Moonshot AI)
|
// Kimi models via DashScope (Moonshot AI)
|
||||||
// Source: https://platform.moonshot.cn/docs/intro
|
// Source: https://platform.moonshot.cn/docs/intro
|
||||||
"kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
|
"kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
|
||||||
@ -614,6 +628,15 @@ mod tests {
|
|||||||
fn keeps_existing_max_token_heuristic() {
|
fn keeps_existing_max_token_heuristic() {
|
||||||
assert_eq!(max_tokens_for_model("opus"), 32_000);
|
assert_eq!(max_tokens_for_model("opus"), 32_000);
|
||||||
assert_eq!(max_tokens_for_model("grok-3"), 64_000);
|
assert_eq!(max_tokens_for_model("grok-3"), 64_000);
|
||||||
|
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn caps_default_max_tokens_to_openai_model_limits() {
|
||||||
|
assert_eq!(max_tokens_for_model("gpt-4.1-mini"), 32_768);
|
||||||
|
assert_eq!(max_tokens_for_model("openai/gpt-4.1-mini"), 32_768);
|
||||||
|
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
|
||||||
|
assert_eq!(max_tokens_for_model("openai/gpt-5.4"), 64_000);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -680,6 +703,18 @@ mod tests {
|
|||||||
.context_window_tokens,
|
.context_window_tokens,
|
||||||
131_072
|
131_072
|
||||||
);
|
);
|
||||||
|
assert_eq!(
|
||||||
|
model_token_limit("openai/gpt-4.1-mini")
|
||||||
|
.expect("openai/gpt-4.1-mini should be registered")
|
||||||
|
.context_window_tokens,
|
||||||
|
1_047_576
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
model_token_limit("gpt-5.4")
|
||||||
|
.expect("gpt-5.4 should be registered")
|
||||||
|
.context_window_tokens,
|
||||||
|
1_000_000
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -728,6 +763,42 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn preflight_blocks_oversized_requests_for_gpt_5_4() {
|
||||||
|
let request = MessageRequest {
|
||||||
|
model: "gpt-5.4".to_string(),
|
||||||
|
max_tokens: 64_000,
|
||||||
|
messages: vec![InputMessage {
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: vec![InputContentBlock::Text {
|
||||||
|
text: "x".repeat(3_900_000),
|
||||||
|
}],
|
||||||
|
}],
|
||||||
|
system: Some("Keep the answer short.".to_string()),
|
||||||
|
tools: None,
|
||||||
|
tool_choice: None,
|
||||||
|
stream: true,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let error = preflight_message_request(&request)
|
||||||
|
.expect_err("oversized gpt-5.4 request should be rejected before the provider call");
|
||||||
|
|
||||||
|
match error {
|
||||||
|
ApiError::ContextWindowExceeded {
|
||||||
|
model,
|
||||||
|
requested_output_tokens,
|
||||||
|
context_window_tokens,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
assert_eq!(model, "gpt-5.4");
|
||||||
|
assert_eq!(requested_output_tokens, 64_000);
|
||||||
|
assert_eq!(context_window_tokens, 1_000_000);
|
||||||
|
}
|
||||||
|
other => panic!("expected context-window preflight failure, got {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn preflight_skips_unknown_models() {
|
fn preflight_skips_unknown_models() {
|
||||||
let request = MessageRequest {
|
let request = MessageRequest {
|
||||||
|
|||||||
@ -148,11 +148,7 @@ impl ModelProvenance {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn max_tokens_for_model(model: &str) -> u32 {
|
fn max_tokens_for_model(model: &str) -> u32 {
|
||||||
if model.contains("opus") {
|
api::max_tokens_for_model(model)
|
||||||
32_000
|
|
||||||
} else {
|
|
||||||
64_000
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Build-time constants injected by build.rs (fall back to static values when
|
// Build-time constants injected by build.rs (fall back to static values when
|
||||||
// build.rs hasn't run, e.g. in doc-test or unusual toolchain environments).
|
// build.rs hasn't run, e.g. in doc-test or unusual toolchain environments).
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user