Merge pull request #2984 from andhai/pr/openai-token-limit-hardening

openai: harden token-limit handling and default output-token caps
This commit is contained in:
YeonGyu-Kim 2026-05-06 14:53:24 +09:00 committed by GitHub
commit 28998422e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 142 additions and 17 deletions

View File

@ -14,6 +14,11 @@ const CONTEXT_WINDOW_ERROR_MARKERS: &[&str] = &[
"too many tokens", "too many tokens",
"prompt is too long", "prompt is too long",
"input is too long", "input is too long",
"input tokens exceed",
"configured limit",
"messages resulted in",
"completion tokens",
"prompt tokens",
"request is too large", "request is too large",
]; ];
@ -542,6 +547,26 @@ mod tests {
assert_eq!(error.request_id(), Some("req_ctx_123")); assert_eq!(error.request_id(), Some("req_ctx_123"));
} }
#[test]
fn classifies_openai_configured_limit_errors_as_context_window_failures() {
let error = ApiError::Api {
status: reqwest::StatusCode::BAD_REQUEST,
error_type: Some("invalid_request_error".to_string()),
message: Some(
"Input tokens exceed the configured limit of 922000 tokens. Your messages resulted in 1860900 tokens. Please reduce the length of the messages."
.to_string(),
),
request_id: Some("req_ctx_openai_123".to_string()),
body: String::new(),
retryable: false,
suggested_action: None,
};
assert!(error.is_context_window_failure());
assert_eq!(error.safe_failure_class(), "context_window");
assert_eq!(error.request_id(), Some("req_ctx_openai_123"));
}
#[test] #[test]
fn missing_credentials_without_hint_renders_the_canonical_message() { fn missing_credentials_without_hint_renders_the_canonical_message() {
// given // given

View File

@ -252,17 +252,16 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
#[must_use] #[must_use]
pub fn max_tokens_for_model(model: &str) -> u32 { pub fn max_tokens_for_model(model: &str) -> u32 {
model_token_limit(model).map_or_else( let canonical = resolve_model_alias(model);
|| { let heuristic = if canonical.contains("opus") {
let canonical = resolve_model_alias(model); 32_000
if canonical.contains("opus") { } else {
32_000 64_000
} else { };
64_000
} model_token_limit(model)
}, .map(|limit| heuristic.min(limit.max_output_tokens))
|limit| limit.max_output_tokens, .unwrap_or(heuristic)
)
} }
/// Returns the effective max output tokens for a model, preferring a plugin /// Returns the effective max output tokens for a model, preferring a plugin
@ -276,7 +275,8 @@ pub fn max_tokens_for_model_with_override(model: &str, plugin_override: Option<u
#[must_use] #[must_use]
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> { pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
let canonical = resolve_model_alias(model); let canonical = resolve_model_alias(model);
match canonical.as_str() { let base_model = canonical.rsplit('/').next().unwrap_or(canonical.as_str());
match base_model {
"claude-opus-4-6" => Some(ModelTokenLimit { "claude-opus-4-6" => Some(ModelTokenLimit {
max_output_tokens: 32_000, max_output_tokens: 32_000,
context_window_tokens: 200_000, context_window_tokens: 200_000,
@ -289,6 +289,20 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
max_output_tokens: 64_000, max_output_tokens: 64_000,
context_window_tokens: 131_072, context_window_tokens: 131_072,
}), }),
// GPT-4.1 family via the OpenAI API.
"gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" => Some(ModelTokenLimit {
max_output_tokens: 32_768,
context_window_tokens: 1_047_576,
}),
// GPT-5.4 family via the OpenAI API.
"gpt-5.4" => Some(ModelTokenLimit {
max_output_tokens: 128_000,
context_window_tokens: 1_000_000,
}),
"gpt-5.4-mini" | "gpt-5.4-nano" => Some(ModelTokenLimit {
max_output_tokens: 128_000,
context_window_tokens: 400_000,
}),
// Kimi models via DashScope (Moonshot AI) // Kimi models via DashScope (Moonshot AI)
// Source: https://platform.moonshot.cn/docs/intro // Source: https://platform.moonshot.cn/docs/intro
"kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit { "kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
@ -614,6 +628,15 @@ mod tests {
fn keeps_existing_max_token_heuristic() { fn keeps_existing_max_token_heuristic() {
assert_eq!(max_tokens_for_model("opus"), 32_000); assert_eq!(max_tokens_for_model("opus"), 32_000);
assert_eq!(max_tokens_for_model("grok-3"), 64_000); assert_eq!(max_tokens_for_model("grok-3"), 64_000);
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
}
#[test]
fn caps_default_max_tokens_to_openai_model_limits() {
assert_eq!(max_tokens_for_model("gpt-4.1-mini"), 32_768);
assert_eq!(max_tokens_for_model("openai/gpt-4.1-mini"), 32_768);
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
assert_eq!(max_tokens_for_model("openai/gpt-5.4"), 64_000);
} }
#[test] #[test]
@ -680,6 +703,18 @@ mod tests {
.context_window_tokens, .context_window_tokens,
131_072 131_072
); );
assert_eq!(
model_token_limit("openai/gpt-4.1-mini")
.expect("openai/gpt-4.1-mini should be registered")
.context_window_tokens,
1_047_576
);
assert_eq!(
model_token_limit("gpt-5.4")
.expect("gpt-5.4 should be registered")
.context_window_tokens,
1_000_000
);
} }
#[test] #[test]
@ -728,6 +763,42 @@ mod tests {
} }
} }
#[test]
fn preflight_blocks_oversized_requests_for_gpt_5_4() {
let request = MessageRequest {
model: "gpt-5.4".to_string(),
max_tokens: 64_000,
messages: vec![InputMessage {
role: "user".to_string(),
content: vec![InputContentBlock::Text {
text: "x".repeat(3_900_000),
}],
}],
system: Some("Keep the answer short.".to_string()),
tools: None,
tool_choice: None,
stream: true,
..Default::default()
};
let error = preflight_message_request(&request)
.expect_err("oversized gpt-5.4 request should be rejected before the provider call");
match error {
ApiError::ContextWindowExceeded {
model,
requested_output_tokens,
context_window_tokens,
..
} => {
assert_eq!(model, "gpt-5.4");
assert_eq!(requested_output_tokens, 64_000);
assert_eq!(context_window_tokens, 1_000_000);
}
other => panic!("expected context-window preflight failure, got {other:?}"),
}
}
#[test] #[test]
fn preflight_skips_unknown_models() { fn preflight_skips_unknown_models() {
let request = MessageRequest { let request = MessageRequest {

View File

@ -148,11 +148,7 @@ impl ModelProvenance {
} }
fn max_tokens_for_model(model: &str) -> u32 { fn max_tokens_for_model(model: &str) -> u32 {
if model.contains("opus") { api::max_tokens_for_model(model)
32_000
} else {
64_000
}
} }
// Build-time constants injected by build.rs (fall back to static values when // Build-time constants injected by build.rs (fall back to static values when
// build.rs hasn't run, e.g. in doc-test or unusual toolchain environments). // build.rs hasn't run, e.g. in doc-test or unusual toolchain environments).
@ -9609,6 +9605,39 @@ mod tests {
); );
} }
#[test]
fn openai_configured_limit_errors_are_rendered_as_context_window_guidance() {
let error = ApiError::Api {
status: "400".parse().expect("status"),
error_type: Some("invalid_request_error".to_string()),
message: Some(
"Input tokens exceed the configured limit of 922000 tokens. Your messages resulted in 1860900 tokens. Please reduce the length of the messages."
.to_string(),
),
request_id: Some("req_ctx_openai_456".to_string()),
body: String::new(),
retryable: false,
suggested_action: None,
};
let rendered = format_user_visible_api_error("session-issue-32", &error);
assert!(rendered.contains("Context window blocked"), "{rendered}");
assert!(rendered.contains("context_window_blocked"), "{rendered}");
assert!(
rendered.contains("Trace req_ctx_openai_456"),
"{rendered}"
);
assert!(
rendered.contains("Detail Input tokens exceed the configured limit of 922000 tokens."),
"{rendered}"
);
assert!(rendered.contains("Compact /compact"), "{rendered}");
assert!(
rendered.contains("Fresh session /clear --confirm"),
"{rendered}"
);
}
#[test] #[test]
fn retry_wrapped_context_window_errors_keep_recovery_guidance() { fn retry_wrapped_context_window_errors_keep_recovery_guidance() {
let error = ApiError::RetriesExhausted { let error = ApiError::RetriesExhausted {