Cap OpenAI default output tokens using model metadata

This commit is contained in:
Andreas Haida 2026-05-03 18:46:04 +02:00
parent 6ac13ffdad
commit 9a512633a5
2 changed files with 84 additions and 17 deletions

View File

@ -252,17 +252,16 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
#[must_use] #[must_use]
pub fn max_tokens_for_model(model: &str) -> u32 { pub fn max_tokens_for_model(model: &str) -> u32 {
model_token_limit(model).map_or_else(
|| {
let canonical = resolve_model_alias(model); let canonical = resolve_model_alias(model);
if canonical.contains("opus") { let heuristic = if canonical.contains("opus") {
32_000 32_000
} else { } else {
64_000 64_000
} };
},
|limit| limit.max_output_tokens, model_token_limit(model)
) .map(|limit| heuristic.min(limit.max_output_tokens))
.unwrap_or(heuristic)
} }
/// Returns the effective max output tokens for a model, preferring a plugin /// Returns the effective max output tokens for a model, preferring a plugin
@ -276,7 +275,8 @@ pub fn max_tokens_for_model_with_override(model: &str, plugin_override: Option<u
#[must_use] #[must_use]
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> { pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
let canonical = resolve_model_alias(model); let canonical = resolve_model_alias(model);
match canonical.as_str() { let base_model = canonical.rsplit('/').next().unwrap_or(canonical.as_str());
match base_model {
"claude-opus-4-6" => Some(ModelTokenLimit { "claude-opus-4-6" => Some(ModelTokenLimit {
max_output_tokens: 32_000, max_output_tokens: 32_000,
context_window_tokens: 200_000, context_window_tokens: 200_000,
@ -289,6 +289,20 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
max_output_tokens: 64_000, max_output_tokens: 64_000,
context_window_tokens: 131_072, context_window_tokens: 131_072,
}), }),
// GPT-4.1 family via the OpenAI API.
"gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" => Some(ModelTokenLimit {
max_output_tokens: 32_768,
context_window_tokens: 1_047_576,
}),
// GPT-5.4 family via the OpenAI API.
"gpt-5.4" => Some(ModelTokenLimit {
max_output_tokens: 128_000,
context_window_tokens: 1_000_000,
}),
"gpt-5.4-mini" | "gpt-5.4-nano" => Some(ModelTokenLimit {
max_output_tokens: 128_000,
context_window_tokens: 400_000,
}),
// Kimi models via DashScope (Moonshot AI) // Kimi models via DashScope (Moonshot AI)
// Source: https://platform.moonshot.cn/docs/intro // Source: https://platform.moonshot.cn/docs/intro
"kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit { "kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
@ -614,6 +628,15 @@ mod tests {
fn keeps_existing_max_token_heuristic() { fn keeps_existing_max_token_heuristic() {
assert_eq!(max_tokens_for_model("opus"), 32_000); assert_eq!(max_tokens_for_model("opus"), 32_000);
assert_eq!(max_tokens_for_model("grok-3"), 64_000); assert_eq!(max_tokens_for_model("grok-3"), 64_000);
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
}
#[test]
fn caps_default_max_tokens_to_openai_model_limits() {
assert_eq!(max_tokens_for_model("gpt-4.1-mini"), 32_768);
assert_eq!(max_tokens_for_model("openai/gpt-4.1-mini"), 32_768);
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
assert_eq!(max_tokens_for_model("openai/gpt-5.4"), 64_000);
} }
#[test] #[test]
@ -680,6 +703,18 @@ mod tests {
.context_window_tokens, .context_window_tokens,
131_072 131_072
); );
assert_eq!(
model_token_limit("openai/gpt-4.1-mini")
.expect("openai/gpt-4.1-mini should be registered")
.context_window_tokens,
1_047_576
);
assert_eq!(
model_token_limit("gpt-5.4")
.expect("gpt-5.4 should be registered")
.context_window_tokens,
1_000_000
);
} }
#[test] #[test]
@ -728,6 +763,42 @@ mod tests {
} }
} }
#[test]
fn preflight_blocks_oversized_requests_for_gpt_5_4() {
let request = MessageRequest {
model: "gpt-5.4".to_string(),
max_tokens: 64_000,
messages: vec![InputMessage {
role: "user".to_string(),
content: vec![InputContentBlock::Text {
text: "x".repeat(3_900_000),
}],
}],
system: Some("Keep the answer short.".to_string()),
tools: None,
tool_choice: None,
stream: true,
..Default::default()
};
let error = preflight_message_request(&request)
.expect_err("oversized gpt-5.4 request should be rejected before the provider call");
match error {
ApiError::ContextWindowExceeded {
model,
requested_output_tokens,
context_window_tokens,
..
} => {
assert_eq!(model, "gpt-5.4");
assert_eq!(requested_output_tokens, 64_000);
assert_eq!(context_window_tokens, 1_000_000);
}
other => panic!("expected context-window preflight failure, got {other:?}"),
}
}
#[test] #[test]
fn preflight_skips_unknown_models() { fn preflight_skips_unknown_models() {
let request = MessageRequest { let request = MessageRequest {

View File

@ -148,11 +148,7 @@ impl ModelProvenance {
} }
fn max_tokens_for_model(model: &str) -> u32 { fn max_tokens_for_model(model: &str) -> u32 {
if model.contains("opus") { api::max_tokens_for_model(model)
32_000
} else {
64_000
}
} }
// Build-time constants injected by build.rs (fall back to static values when // Build-time constants injected by build.rs (fall back to static values when
// build.rs hasn't run, e.g. in doc-test or unusual toolchain environments). // build.rs hasn't run, e.g. in doc-test or unusual toolchain environments).