mirror of
https://github.com/ultraworkers/claw-code.git
synced 2026-05-10 15:01:16 +08:00
Merge pull request #2984 from andhai/pr/openai-token-limit-hardening
openai: harden token-limit handling and default output-token caps
This commit is contained in:
commit
28998422e2
@ -14,6 +14,11 @@ const CONTEXT_WINDOW_ERROR_MARKERS: &[&str] = &[
|
|||||||
"too many tokens",
|
"too many tokens",
|
||||||
"prompt is too long",
|
"prompt is too long",
|
||||||
"input is too long",
|
"input is too long",
|
||||||
|
"input tokens exceed",
|
||||||
|
"configured limit",
|
||||||
|
"messages resulted in",
|
||||||
|
"completion tokens",
|
||||||
|
"prompt tokens",
|
||||||
"request is too large",
|
"request is too large",
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -542,6 +547,26 @@ mod tests {
|
|||||||
assert_eq!(error.request_id(), Some("req_ctx_123"));
|
assert_eq!(error.request_id(), Some("req_ctx_123"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classifies_openai_configured_limit_errors_as_context_window_failures() {
|
||||||
|
let error = ApiError::Api {
|
||||||
|
status: reqwest::StatusCode::BAD_REQUEST,
|
||||||
|
error_type: Some("invalid_request_error".to_string()),
|
||||||
|
message: Some(
|
||||||
|
"Input tokens exceed the configured limit of 922000 tokens. Your messages resulted in 1860900 tokens. Please reduce the length of the messages."
|
||||||
|
.to_string(),
|
||||||
|
),
|
||||||
|
request_id: Some("req_ctx_openai_123".to_string()),
|
||||||
|
body: String::new(),
|
||||||
|
retryable: false,
|
||||||
|
suggested_action: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
assert!(error.is_context_window_failure());
|
||||||
|
assert_eq!(error.safe_failure_class(), "context_window");
|
||||||
|
assert_eq!(error.request_id(), Some("req_ctx_openai_123"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn missing_credentials_without_hint_renders_the_canonical_message() {
|
fn missing_credentials_without_hint_renders_the_canonical_message() {
|
||||||
// given
|
// given
|
||||||
|
|||||||
@ -252,17 +252,16 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
|
|||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn max_tokens_for_model(model: &str) -> u32 {
|
pub fn max_tokens_for_model(model: &str) -> u32 {
|
||||||
model_token_limit(model).map_or_else(
|
let canonical = resolve_model_alias(model);
|
||||||
|| {
|
let heuristic = if canonical.contains("opus") {
|
||||||
let canonical = resolve_model_alias(model);
|
32_000
|
||||||
if canonical.contains("opus") {
|
} else {
|
||||||
32_000
|
64_000
|
||||||
} else {
|
};
|
||||||
64_000
|
|
||||||
}
|
model_token_limit(model)
|
||||||
},
|
.map(|limit| heuristic.min(limit.max_output_tokens))
|
||||||
|limit| limit.max_output_tokens,
|
.unwrap_or(heuristic)
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the effective max output tokens for a model, preferring a plugin
|
/// Returns the effective max output tokens for a model, preferring a plugin
|
||||||
@ -276,7 +275,8 @@ pub fn max_tokens_for_model_with_override(model: &str, plugin_override: Option<u
|
|||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
|
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
|
||||||
let canonical = resolve_model_alias(model);
|
let canonical = resolve_model_alias(model);
|
||||||
match canonical.as_str() {
|
let base_model = canonical.rsplit('/').next().unwrap_or(canonical.as_str());
|
||||||
|
match base_model {
|
||||||
"claude-opus-4-6" => Some(ModelTokenLimit {
|
"claude-opus-4-6" => Some(ModelTokenLimit {
|
||||||
max_output_tokens: 32_000,
|
max_output_tokens: 32_000,
|
||||||
context_window_tokens: 200_000,
|
context_window_tokens: 200_000,
|
||||||
@ -289,6 +289,20 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
|
|||||||
max_output_tokens: 64_000,
|
max_output_tokens: 64_000,
|
||||||
context_window_tokens: 131_072,
|
context_window_tokens: 131_072,
|
||||||
}),
|
}),
|
||||||
|
// GPT-4.1 family via the OpenAI API.
|
||||||
|
"gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" => Some(ModelTokenLimit {
|
||||||
|
max_output_tokens: 32_768,
|
||||||
|
context_window_tokens: 1_047_576,
|
||||||
|
}),
|
||||||
|
// GPT-5.4 family via the OpenAI API.
|
||||||
|
"gpt-5.4" => Some(ModelTokenLimit {
|
||||||
|
max_output_tokens: 128_000,
|
||||||
|
context_window_tokens: 1_000_000,
|
||||||
|
}),
|
||||||
|
"gpt-5.4-mini" | "gpt-5.4-nano" => Some(ModelTokenLimit {
|
||||||
|
max_output_tokens: 128_000,
|
||||||
|
context_window_tokens: 400_000,
|
||||||
|
}),
|
||||||
// Kimi models via DashScope (Moonshot AI)
|
// Kimi models via DashScope (Moonshot AI)
|
||||||
// Source: https://platform.moonshot.cn/docs/intro
|
// Source: https://platform.moonshot.cn/docs/intro
|
||||||
"kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
|
"kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
|
||||||
@ -614,6 +628,15 @@ mod tests {
|
|||||||
fn keeps_existing_max_token_heuristic() {
|
fn keeps_existing_max_token_heuristic() {
|
||||||
assert_eq!(max_tokens_for_model("opus"), 32_000);
|
assert_eq!(max_tokens_for_model("opus"), 32_000);
|
||||||
assert_eq!(max_tokens_for_model("grok-3"), 64_000);
|
assert_eq!(max_tokens_for_model("grok-3"), 64_000);
|
||||||
|
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn caps_default_max_tokens_to_openai_model_limits() {
|
||||||
|
assert_eq!(max_tokens_for_model("gpt-4.1-mini"), 32_768);
|
||||||
|
assert_eq!(max_tokens_for_model("openai/gpt-4.1-mini"), 32_768);
|
||||||
|
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
|
||||||
|
assert_eq!(max_tokens_for_model("openai/gpt-5.4"), 64_000);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -680,6 +703,18 @@ mod tests {
|
|||||||
.context_window_tokens,
|
.context_window_tokens,
|
||||||
131_072
|
131_072
|
||||||
);
|
);
|
||||||
|
assert_eq!(
|
||||||
|
model_token_limit("openai/gpt-4.1-mini")
|
||||||
|
.expect("openai/gpt-4.1-mini should be registered")
|
||||||
|
.context_window_tokens,
|
||||||
|
1_047_576
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
model_token_limit("gpt-5.4")
|
||||||
|
.expect("gpt-5.4 should be registered")
|
||||||
|
.context_window_tokens,
|
||||||
|
1_000_000
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -728,6 +763,42 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn preflight_blocks_oversized_requests_for_gpt_5_4() {
|
||||||
|
let request = MessageRequest {
|
||||||
|
model: "gpt-5.4".to_string(),
|
||||||
|
max_tokens: 64_000,
|
||||||
|
messages: vec![InputMessage {
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: vec![InputContentBlock::Text {
|
||||||
|
text: "x".repeat(3_900_000),
|
||||||
|
}],
|
||||||
|
}],
|
||||||
|
system: Some("Keep the answer short.".to_string()),
|
||||||
|
tools: None,
|
||||||
|
tool_choice: None,
|
||||||
|
stream: true,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let error = preflight_message_request(&request)
|
||||||
|
.expect_err("oversized gpt-5.4 request should be rejected before the provider call");
|
||||||
|
|
||||||
|
match error {
|
||||||
|
ApiError::ContextWindowExceeded {
|
||||||
|
model,
|
||||||
|
requested_output_tokens,
|
||||||
|
context_window_tokens,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
assert_eq!(model, "gpt-5.4");
|
||||||
|
assert_eq!(requested_output_tokens, 64_000);
|
||||||
|
assert_eq!(context_window_tokens, 1_000_000);
|
||||||
|
}
|
||||||
|
other => panic!("expected context-window preflight failure, got {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn preflight_skips_unknown_models() {
|
fn preflight_skips_unknown_models() {
|
||||||
let request = MessageRequest {
|
let request = MessageRequest {
|
||||||
|
|||||||
@ -148,11 +148,7 @@ impl ModelProvenance {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn max_tokens_for_model(model: &str) -> u32 {
|
fn max_tokens_for_model(model: &str) -> u32 {
|
||||||
if model.contains("opus") {
|
api::max_tokens_for_model(model)
|
||||||
32_000
|
|
||||||
} else {
|
|
||||||
64_000
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Build-time constants injected by build.rs (fall back to static values when
|
// Build-time constants injected by build.rs (fall back to static values when
|
||||||
// build.rs hasn't run, e.g. in doc-test or unusual toolchain environments).
|
// build.rs hasn't run, e.g. in doc-test or unusual toolchain environments).
|
||||||
@ -9609,6 +9605,39 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn openai_configured_limit_errors_are_rendered_as_context_window_guidance() {
|
||||||
|
let error = ApiError::Api {
|
||||||
|
status: "400".parse().expect("status"),
|
||||||
|
error_type: Some("invalid_request_error".to_string()),
|
||||||
|
message: Some(
|
||||||
|
"Input tokens exceed the configured limit of 922000 tokens. Your messages resulted in 1860900 tokens. Please reduce the length of the messages."
|
||||||
|
.to_string(),
|
||||||
|
),
|
||||||
|
request_id: Some("req_ctx_openai_456".to_string()),
|
||||||
|
body: String::new(),
|
||||||
|
retryable: false,
|
||||||
|
suggested_action: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let rendered = format_user_visible_api_error("session-issue-32", &error);
|
||||||
|
assert!(rendered.contains("Context window blocked"), "{rendered}");
|
||||||
|
assert!(rendered.contains("context_window_blocked"), "{rendered}");
|
||||||
|
assert!(
|
||||||
|
rendered.contains("Trace req_ctx_openai_456"),
|
||||||
|
"{rendered}"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
rendered.contains("Detail Input tokens exceed the configured limit of 922000 tokens."),
|
||||||
|
"{rendered}"
|
||||||
|
);
|
||||||
|
assert!(rendered.contains("Compact /compact"), "{rendered}");
|
||||||
|
assert!(
|
||||||
|
rendered.contains("Fresh session /clear --confirm"),
|
||||||
|
"{rendered}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn retry_wrapped_context_window_errors_keep_recovery_guidance() {
|
fn retry_wrapped_context_window_errors_keep_recovery_guidance() {
|
||||||
let error = ApiError::RetriesExhausted {
|
let error = ApiError::RetriesExhausted {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user