From 00d0eb61d4cfc1abd345dc76023dd21e420f6488 Mon Sep 17 00:00:00 2001 From: Yeachan-Heo Date: Fri, 17 Apr 2026 04:15:38 +0000 Subject: [PATCH] US-024: Add token limit metadata for kimi models Add ModelTokenLimit entries for kimi-k2.5 and kimi-k1.5 to enable preflight context window validation. Per Moonshot AI documentation: - Context window: 256,000 tokens - Max output: 16,384 tokens Includes 3 unit tests: - returns_context_window_metadata_for_kimi_models - kimi_alias_resolves_to_kimi_k25_token_limits - preflight_blocks_oversized_requests_for_kimi_models All tests pass, clippy clean. Co-Authored-By: Claude Opus 4.6 --- prd.json | 21 +++++++-- rust/crates/api/src/providers/mod.rs | 69 ++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/prd.json b/prd.json index 00684bb..8faaa54 100644 --- a/prd.json +++ b/prd.json @@ -329,13 +329,28 @@ ], "passes": true, "priority": "P1" + }, + { + "id": "US-024", + "title": "Add token limit metadata for kimi models", + "description": "The model_token_limit() function has no entries for kimi-k2.5 or kimi-k1.5, causing preflight context window validation to skip these models. Add token limit metadata to enable preflight checks and accurate max token defaults. Per Moonshot AI documentation, kimi-k2.5 supports 256K context window and 16K max output tokens.", + "acceptanceCriteria": [ + "model_token_limit('kimi-k2.5') returns Some(ModelTokenLimit { max_output_tokens: 16384, context_window_tokens: 256000 })", + "model_token_limit('kimi-k1.5') returns appropriate limits", + "model_token_limit('kimi') follows alias chain (kimi → kimi-k2.5) and returns k2.5 limits", + "preflight_message_request() validates context window for kimi models (via generic preflight, no provider-specific code needed)", + "Unit tests verify limits and preflight behavior for kimi models", + "All tests pass and clippy is clean" + ], + "passes": true, + "priority": "P1" } ], "metadata": { - "lastUpdated": "2026-04-16", - "completedStories": ["US-001", "US-002", "US-003", "US-004", "US-005", "US-006", "US-007", "US-008", "US-009", "US-010", "US-011", "US-012", "US-013", "US-014", "US-015", "US-016", "US-017", "US-018", "US-019", "US-020", "US-021", "US-022", "US-023"], + "lastUpdated": "2026-04-17", + "completedStories": ["US-001", "US-002", "US-003", "US-004", "US-005", "US-006", "US-007", "US-008", "US-009", "US-010", "US-011", "US-012", "US-013", "US-014", "US-015", "US-016", "US-017", "US-018", "US-019", "US-020", "US-021", "US-022", "US-023", "US-024"], "inProgressStories": [], - "totalStories": 23, + "totalStories": 24, "status": "completed" } } diff --git a/rust/crates/api/src/providers/mod.rs b/rust/crates/api/src/providers/mod.rs index 64cfa8b..fb97900 100644 --- a/rust/crates/api/src/providers/mod.rs +++ b/rust/crates/api/src/providers/mod.rs @@ -289,6 +289,12 @@ pub fn model_token_limit(model: &str) -> Option { max_output_tokens: 64_000, context_window_tokens: 131_072, }), + // Kimi models via DashScope (Moonshot AI) + // Source: https://platform.moonshot.cn/docs/intro + "kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit { + max_output_tokens: 16_384, + context_window_tokens: 256_000, + }), _ => None, } } @@ -744,6 +750,69 @@ mod tests { .expect("models without context metadata should skip the guarded preflight"); } + #[test] + fn returns_context_window_metadata_for_kimi_models() { + // kimi-k2.5 + let k25_limit = model_token_limit("kimi-k2.5") + .expect("kimi-k2.5 should have token limit metadata"); + assert_eq!(k25_limit.max_output_tokens, 16_384); + assert_eq!(k25_limit.context_window_tokens, 256_000); + + // kimi-k1.5 + let k15_limit = model_token_limit("kimi-k1.5") + .expect("kimi-k1.5 should have token limit metadata"); + assert_eq!(k15_limit.max_output_tokens, 16_384); + assert_eq!(k15_limit.context_window_tokens, 256_000); + } + + #[test] + fn kimi_alias_resolves_to_kimi_k25_token_limits() { + // The "kimi" alias resolves to "kimi-k2.5" via resolve_model_alias() + let alias_limit = model_token_limit("kimi") + .expect("kimi alias should resolve to kimi-k2.5 limits"); + let direct_limit = model_token_limit("kimi-k2.5") + .expect("kimi-k2.5 should have limits"); + assert_eq!(alias_limit.max_output_tokens, direct_limit.max_output_tokens); + assert_eq!( + alias_limit.context_window_tokens, + direct_limit.context_window_tokens + ); + } + + #[test] + fn preflight_blocks_oversized_requests_for_kimi_models() { + let request = MessageRequest { + model: "kimi-k2.5".to_string(), + max_tokens: 16_384, + messages: vec![InputMessage { + role: "user".to_string(), + content: vec![InputContentBlock::Text { + text: "x".repeat(1_000_000), // Large input to exceed context window + }], + }], + system: Some("Keep the answer short.".to_string()), + tools: None, + tool_choice: None, + stream: true, + ..Default::default() + }; + + let error = preflight_message_request(&request) + .expect_err("oversized request should be rejected for kimi models"); + + match error { + ApiError::ContextWindowExceeded { + model, + context_window_tokens, + .. + } => { + assert_eq!(model, "kimi-k2.5"); + assert_eq!(context_window_tokens, 256_000); + } + other => panic!("expected context-window preflight failure, got {other:?}"), + } + } + #[test] fn parse_dotenv_extracts_keys_handles_comments_quotes_and_export_prefix() { // given