US-024: Add token limit metadata for kimi models

Add ModelTokenLimit entries for kimi-k2.5 and kimi-k1.5 to enable preflight context window validation. Per Moonshot AI documentation: - Context window: 256,000 tokens - Max output: 16,384 tokens Includes 3 unit tests: - returns_context_window_metadata_for_kimi_models - kimi_alias_resolves_to_kimi_k25_token_limits - preflight_blocks_oversized_requests_for_kimi_models All tests pass, clippy clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-10 03:12:29 +08:00 · 2026-04-17 04:15:38 +00:00 · 2026-04-17 04:15:38 +00:00 · 00d0eb61d4
commit 00d0eb61d4
parent 8d8e2c3afd
2 changed files with 87 additions and 3 deletions
--- a/prd.json
+++ b/prd.json
@ -329,13 +329,28 @@
      ],
      "passes": true,
      "priority": "P1"
+    },
+    {
+      "id": "US-024",
+      "title": "Add token limit metadata for kimi models",
+      "description": "The model_token_limit() function has no entries for kimi-k2.5 or kimi-k1.5, causing preflight context window validation to skip these models. Add token limit metadata to enable preflight checks and accurate max token defaults. Per Moonshot AI documentation, kimi-k2.5 supports 256K context window and 16K max output tokens.",
+      "acceptanceCriteria": [
+        "model_token_limit('kimi-k2.5') returns Some(ModelTokenLimit { max_output_tokens: 16384, context_window_tokens: 256000 })",
+        "model_token_limit('kimi-k1.5') returns appropriate limits",
+        "model_token_limit('kimi') follows alias chain (kimi → kimi-k2.5) and returns k2.5 limits",
+        "preflight_message_request() validates context window for kimi models (via generic preflight, no provider-specific code needed)",
+        "Unit tests verify limits and preflight behavior for kimi models",
+        "All tests pass and clippy is clean"
+      ],
+      "passes": true,
+      "priority": "P1"
    }
  ],
  "metadata": {
-    "lastUpdated": "2026-04-16",
-    "completedStories": ["US-001", "US-002", "US-003", "US-004", "US-005", "US-006", "US-007", "US-008", "US-009", "US-010", "US-011", "US-012", "US-013", "US-014", "US-015", "US-016", "US-017", "US-018", "US-019", "US-020", "US-021", "US-022", "US-023"],
+    "lastUpdated": "2026-04-17",
+    "completedStories": ["US-001", "US-002", "US-003", "US-004", "US-005", "US-006", "US-007", "US-008", "US-009", "US-010", "US-011", "US-012", "US-013", "US-014", "US-015", "US-016", "US-017", "US-018", "US-019", "US-020", "US-021", "US-022", "US-023", "US-024"],
    "inProgressStories": [],
-    "totalStories": 23,
+    "totalStories": 24,
    "status": "completed"
  }
 }
--- a/rust/crates/api/src/providers/mod.rs
+++ b/rust/crates/api/src/providers/mod.rs
@ -289,6 +289,12 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
            max_output_tokens: 64_000,
            context_window_tokens: 131_072,
        }),
+        // Kimi models via DashScope (Moonshot AI)
+        // Source: https://platform.moonshot.cn/docs/intro
+        "kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
+            max_output_tokens: 16_384,
+            context_window_tokens: 256_000,
+        }),
        _ => None,
    }
 }
@ -744,6 +750,69 @@ mod tests {
            .expect("models without context metadata should skip the guarded preflight");
    }

+    #[test]
+    fn returns_context_window_metadata_for_kimi_models() {
+        // kimi-k2.5
+        let k25_limit = model_token_limit("kimi-k2.5")
+            .expect("kimi-k2.5 should have token limit metadata");
+        assert_eq!(k25_limit.max_output_tokens, 16_384);
+        assert_eq!(k25_limit.context_window_tokens, 256_000);
+
+        // kimi-k1.5
+        let k15_limit = model_token_limit("kimi-k1.5")
+            .expect("kimi-k1.5 should have token limit metadata");
+        assert_eq!(k15_limit.max_output_tokens, 16_384);
+        assert_eq!(k15_limit.context_window_tokens, 256_000);
+    }
+
+    #[test]
+    fn kimi_alias_resolves_to_kimi_k25_token_limits() {
+        // The "kimi" alias resolves to "kimi-k2.5" via resolve_model_alias()
+        let alias_limit = model_token_limit("kimi")
+            .expect("kimi alias should resolve to kimi-k2.5 limits");
+        let direct_limit = model_token_limit("kimi-k2.5")
+            .expect("kimi-k2.5 should have limits");
+        assert_eq!(alias_limit.max_output_tokens, direct_limit.max_output_tokens);
+        assert_eq!(
+            alias_limit.context_window_tokens,
+            direct_limit.context_window_tokens
+        );
+    }
+
+    #[test]
+    fn preflight_blocks_oversized_requests_for_kimi_models() {
+        let request = MessageRequest {
+            model: "kimi-k2.5".to_string(),
+            max_tokens: 16_384,
+            messages: vec![InputMessage {
+                role: "user".to_string(),
+                content: vec![InputContentBlock::Text {
+                    text: "x".repeat(1_000_000), // Large input to exceed context window
+                }],
+            }],
+            system: Some("Keep the answer short.".to_string()),
+            tools: None,
+            tool_choice: None,
+            stream: true,
+            ..Default::default()
+        };
+
+        let error = preflight_message_request(&request)
+            .expect_err("oversized request should be rejected for kimi models");
+
+        match error {
+            ApiError::ContextWindowExceeded {
+                model,
+                context_window_tokens,
+                ..
+            } => {
+                assert_eq!(model, "kimi-k2.5");
+                assert_eq!(context_window_tokens, 256_000);
+            }
+            other => panic!("expected context-window preflight failure, got {other:?}"),
+        }
+    }
+
    #[test]
    fn parse_dotenv_extracts_keys_handles_comments_quotes_and_export_prefix() {
        // given