fix: read anthropic 1m flag from live model cache state

This commit is contained in:
YeonGyu-Kim 2026-02-17 10:45:48 +09:00
parent 363016681b
commit d786691260
12 changed files with 64 additions and 38 deletions

View File

@ -249,7 +249,9 @@ describe("context-window-monitor", () => {
it("should use 1M limit when model cache flag is enabled", async () => { it("should use 1M limit when model cache flag is enabled", async () => {
//#given //#given
const hook = createContextWindowMonitorHook(ctx as never, true) const hook = createContextWindowMonitorHook(ctx as never, {
anthropicContext1MEnabled: true,
})
const sessionID = "ses_1m_flag" const sessionID = "ses_1m_flag"
await hook.event({ await hook.event({
@ -286,7 +288,9 @@ describe("context-window-monitor", () => {
it("should keep env var fallback when model cache flag is disabled", async () => { it("should keep env var fallback when model cache flag is disabled", async () => {
//#given //#given
process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
const hook = createContextWindowMonitorHook(ctx as never, false) const hook = createContextWindowMonitorHook(ctx as never, {
anthropicContext1MEnabled: false,
})
const sessionID = "ses_env_fallback" const sessionID = "ses_env_fallback"
await hook.event({ await hook.event({

View File

@ -5,8 +5,12 @@ const ANTHROPIC_DISPLAY_LIMIT = 1_000_000
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
const CONTEXT_WARNING_THRESHOLD = 0.70 const CONTEXT_WARNING_THRESHOLD = 0.70
function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number { type ModelCacheStateLike = {
return anthropicContext1MEnabled || anthropicContext1MEnabled: boolean
}
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.ANTHROPIC_1M_CONTEXT === "true" ||
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
? 1_000_000 ? 1_000_000
@ -37,7 +41,7 @@ function isAnthropicProvider(providerID: string): boolean {
export function createContextWindowMonitorHook( export function createContextWindowMonitorHook(
_ctx: PluginInput, _ctx: PluginInput,
anthropicContext1MEnabled = false, modelCacheState?: ModelCacheStateLike,
) { ) {
const remindedSessions = new Set<string>() const remindedSessions = new Set<string>()
const tokenCache = new Map<string, CachedTokenState>() const tokenCache = new Map<string, CachedTokenState>()
@ -59,7 +63,7 @@ export function createContextWindowMonitorHook(
const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
const actualUsagePercentage = const actualUsagePercentage =
totalInputTokens / getAnthropicActualLimit(anthropicContext1MEnabled) totalInputTokens / getAnthropicActualLimit(modelCacheState)
if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return

View File

@ -29,10 +29,10 @@ interface EventInput {
export function createDirectoryAgentsInjectorHook( export function createDirectoryAgentsInjectorHook(
ctx: PluginInput, ctx: PluginInput,
anthropicContext1MEnabled?: boolean, modelCacheState?: { anthropicContext1MEnabled: boolean },
) { ) {
const sessionCaches = new Map<string, Set<string>>(); const sessionCaches = new Map<string, Set<string>>();
const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); const truncator = createDynamicTruncator(ctx, modelCacheState);
const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => { const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => {
const toolName = input.tool.toLowerCase(); const toolName = input.tool.toLowerCase();

View File

@ -29,10 +29,10 @@ interface EventInput {
export function createDirectoryReadmeInjectorHook( export function createDirectoryReadmeInjectorHook(
ctx: PluginInput, ctx: PluginInput,
anthropicContext1MEnabled?: boolean, modelCacheState?: { anthropicContext1MEnabled: boolean },
) { ) {
const sessionCaches = new Map<string, Set<string>>(); const sessionCaches = new Map<string, Set<string>>();
const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); const truncator = createDynamicTruncator(ctx, modelCacheState);
const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => { const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => {
const toolName = input.tool.toLowerCase(); const toolName = input.tool.toLowerCase();

View File

@ -269,7 +269,9 @@ describe("preemptive-compaction", () => {
it("should use 1M limit when model cache flag is enabled", async () => { it("should use 1M limit when model cache flag is enabled", async () => {
//#given //#given
const hook = createPreemptiveCompactionHook(ctx as never, true) const hook = createPreemptiveCompactionHook(ctx as never, {
anthropicContext1MEnabled: true,
})
const sessionID = "ses_1m_flag" const sessionID = "ses_1m_flag"
await hook.event({ await hook.event({
@ -306,7 +308,9 @@ describe("preemptive-compaction", () => {
it("should keep env var fallback when model cache flag is disabled", async () => { it("should keep env var fallback when model cache flag is disabled", async () => {
//#given //#given
process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
const hook = createPreemptiveCompactionHook(ctx as never, false) const hook = createPreemptiveCompactionHook(ctx as never, {
anthropicContext1MEnabled: false,
})
const sessionID = "ses_env_fallback" const sessionID = "ses_env_fallback"
await hook.event({ await hook.event({

View File

@ -2,8 +2,12 @@ import { log } from "../shared/logger"
const DEFAULT_ACTUAL_LIMIT = 200_000 const DEFAULT_ACTUAL_LIMIT = 200_000
function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number { type ModelCacheStateLike = {
return anthropicContext1MEnabled || anthropicContext1MEnabled: boolean
}
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.ANTHROPIC_1M_CONTEXT === "true" ||
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
? 1_000_000 ? 1_000_000
@ -47,7 +51,7 @@ type PluginInput = {
export function createPreemptiveCompactionHook( export function createPreemptiveCompactionHook(
ctx: PluginInput, ctx: PluginInput,
anthropicContext1MEnabled = false, modelCacheState?: ModelCacheStateLike,
) { ) {
const compactionInProgress = new Set<string>() const compactionInProgress = new Set<string>()
const compactedSessions = new Set<string>() const compactedSessions = new Set<string>()
@ -65,7 +69,7 @@ export function createPreemptiveCompactionHook(
const actualLimit = const actualLimit =
isAnthropicProvider(cached.providerID) isAnthropicProvider(cached.providerID)
? getAnthropicActualLimit(anthropicContext1MEnabled) ? getAnthropicActualLimit(modelCacheState)
: DEFAULT_ACTUAL_LIMIT : DEFAULT_ACTUAL_LIMIT
const lastTokens = cached.tokens const lastTokens = cached.tokens

View File

@ -31,9 +31,9 @@ const TRACKED_TOOLS = ["read", "write", "edit", "multiedit"];
export function createRulesInjectorHook( export function createRulesInjectorHook(
ctx: PluginInput, ctx: PluginInput,
anthropicContext1MEnabled?: boolean, modelCacheState?: { anthropicContext1MEnabled: boolean },
) { ) {
const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); const truncator = createDynamicTruncator(ctx, modelCacheState);
const { getSessionCache, clearSessionCache } = createSessionCacheStore(); const { getSessionCache, clearSessionCache } = createSessionCacheStore();
const { processFilePathForInjection } = createRuleInjectionProcessor({ const { processFilePathForInjection } = createRuleInjectionProcessor({
workspaceDirectory: ctx.directory, workspaceDirectory: ctx.directory,

View File

@ -27,12 +27,12 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
} }
interface ToolOutputTruncatorOptions { interface ToolOutputTruncatorOptions {
anthropicContext1MEnabled?: boolean modelCacheState?: { anthropicContext1MEnabled: boolean }
experimental?: ExperimentalConfig experimental?: ExperimentalConfig
} }
export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOutputTruncatorOptions) { export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOutputTruncatorOptions) {
const truncator = createDynamicTruncator(ctx, options?.anthropicContext1MEnabled) const truncator = createDynamicTruncator(ctx, options?.modelCacheState)
const truncateAll = options?.experimental?.truncate_all_tool_outputs ?? false const truncateAll = options?.experimental?.truncate_all_tool_outputs ?? false
const toolExecuteAfter = async ( const toolExecuteAfter = async (

View File

@ -66,14 +66,14 @@ export function createSessionHooks(args: {
const contextWindowMonitor = isHookEnabled("context-window-monitor") const contextWindowMonitor = isHookEnabled("context-window-monitor")
? safeHook("context-window-monitor", () => ? safeHook("context-window-monitor", () =>
createContextWindowMonitorHook(ctx, modelCacheState.anthropicContext1MEnabled)) createContextWindowMonitorHook(ctx, modelCacheState))
: null : null
const preemptiveCompaction = const preemptiveCompaction =
isHookEnabled("preemptive-compaction") && isHookEnabled("preemptive-compaction") &&
pluginConfig.experimental?.preemptive_compaction pluginConfig.experimental?.preemptive_compaction
? safeHook("preemptive-compaction", () => ? safeHook("preemptive-compaction", () =>
createPreemptiveCompactionHook(ctx, modelCacheState.anthropicContext1MEnabled)) createPreemptiveCompactionHook(ctx, modelCacheState))
: null : null
const sessionRecovery = isHookEnabled("session-recovery") const sessionRecovery = isHookEnabled("session-recovery")

View File

@ -51,7 +51,7 @@ export function createToolGuardHooks(args: {
const toolOutputTruncator = isHookEnabled("tool-output-truncator") const toolOutputTruncator = isHookEnabled("tool-output-truncator")
? safeHook("tool-output-truncator", () => ? safeHook("tool-output-truncator", () =>
createToolOutputTruncatorHook(ctx, { createToolOutputTruncatorHook(ctx, {
anthropicContext1MEnabled: modelCacheState.anthropicContext1MEnabled, modelCacheState,
experimental: pluginConfig.experimental, experimental: pluginConfig.experimental,
})) }))
: null : null
@ -68,13 +68,13 @@ export function createToolGuardHooks(args: {
}) })
} else { } else {
directoryAgentsInjector = safeHook("directory-agents-injector", () => directoryAgentsInjector = safeHook("directory-agents-injector", () =>
createDirectoryAgentsInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) createDirectoryAgentsInjectorHook(ctx, modelCacheState))
} }
} }
const directoryReadmeInjector = isHookEnabled("directory-readme-injector") const directoryReadmeInjector = isHookEnabled("directory-readme-injector")
? safeHook("directory-readme-injector", () => ? safeHook("directory-readme-injector", () =>
createDirectoryReadmeInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) createDirectoryReadmeInjectorHook(ctx, modelCacheState))
: null : null
const emptyTaskResponseDetector = isHookEnabled("empty-task-response-detector") const emptyTaskResponseDetector = isHookEnabled("empty-task-response-detector")
@ -83,7 +83,7 @@ export function createToolGuardHooks(args: {
const rulesInjector = isHookEnabled("rules-injector") const rulesInjector = isHookEnabled("rules-injector")
? safeHook("rules-injector", () => ? safeHook("rules-injector", () =>
createRulesInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) createRulesInjectorHook(ctx, modelCacheState))
: null : null
const tasksTodowriteDisabler = isHookEnabled("tasks-todowrite-disabler") const tasksTodowriteDisabler = isHookEnabled("tasks-todowrite-disabler")

View File

@ -60,7 +60,9 @@ describe("getContextWindowUsage", () => {
const ctx = createContextUsageMockContext(300000) const ctx = createContextUsageMockContext(300000)
//#when //#when
const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", true) const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", {
anthropicContext1MEnabled: true,
})
//#then //#then
expect(usage?.usagePercentage).toBe(0.3) expect(usage?.usagePercentage).toBe(0.3)
@ -74,7 +76,9 @@ describe("getContextWindowUsage", () => {
const ctx = createContextUsageMockContext(150000) const ctx = createContextUsageMockContext(150000)
//#when //#when
const usage = await getContextWindowUsage(ctx as never, "ses_default", false) const usage = await getContextWindowUsage(ctx as never, "ses_default", {
anthropicContext1MEnabled: false,
})
//#then //#then
expect(usage?.usagePercentage).toBe(0.75) expect(usage?.usagePercentage).toBe(0.75)
@ -87,7 +91,9 @@ describe("getContextWindowUsage", () => {
const ctx = createContextUsageMockContext(300000) const ctx = createContextUsageMockContext(300000)
//#when //#when
const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", false) const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", {
anthropicContext1MEnabled: false,
})
//#then //#then
expect(usage?.usagePercentage).toBe(0.3) expect(usage?.usagePercentage).toBe(0.3)

View File

@ -5,8 +5,12 @@ const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000;
const CHARS_PER_TOKEN_ESTIMATE = 4; const CHARS_PER_TOKEN_ESTIMATE = 4;
const DEFAULT_TARGET_MAX_TOKENS = 50_000; const DEFAULT_TARGET_MAX_TOKENS = 50_000;
function getAnthropicActualLimit(anthropicContext1MEnabled = false): number { type ModelCacheStateLike = {
return anthropicContext1MEnabled || anthropicContext1MEnabled: boolean;
}
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.ANTHROPIC_1M_CONTEXT === "true" ||
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
? 1_000_000 ? 1_000_000
@ -114,7 +118,7 @@ export function truncateToTokenLimit(
export async function getContextWindowUsage( export async function getContextWindowUsage(
ctx: PluginInput, ctx: PluginInput,
sessionID: string, sessionID: string,
anthropicContext1MEnabled = false, modelCacheState?: ModelCacheStateLike,
): Promise<{ ): Promise<{
usedTokens: number; usedTokens: number;
remainingTokens: number; remainingTokens: number;
@ -139,7 +143,7 @@ export async function getContextWindowUsage(
(lastTokens?.input ?? 0) + (lastTokens?.input ?? 0) +
(lastTokens?.cache?.read ?? 0) + (lastTokens?.cache?.read ?? 0) +
(lastTokens?.output ?? 0); (lastTokens?.output ?? 0);
const anthropicActualLimit = getAnthropicActualLimit(anthropicContext1MEnabled); const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
const remainingTokens = anthropicActualLimit - usedTokens; const remainingTokens = anthropicActualLimit - usedTokens;
return { return {
@ -157,7 +161,7 @@ export async function dynamicTruncate(
sessionID: string, sessionID: string,
output: string, output: string,
options: TruncationOptions = {}, options: TruncationOptions = {},
anthropicContext1MEnabled = false, modelCacheState?: ModelCacheStateLike,
): Promise<TruncationResult> { ): Promise<TruncationResult> {
if (typeof output !== 'string') { if (typeof output !== 'string') {
return { result: String(output ?? ''), truncated: false }; return { result: String(output ?? ''), truncated: false };
@ -168,7 +172,7 @@ export async function dynamicTruncate(
preserveHeaderLines = 3, preserveHeaderLines = 3,
} = options; } = options;
const usage = await getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled); const usage = await getContextWindowUsage(ctx, sessionID, modelCacheState);
if (!usage) { if (!usage) {
// Fallback: apply conservative truncation when context usage unavailable // Fallback: apply conservative truncation when context usage unavailable
@ -192,17 +196,17 @@ export async function dynamicTruncate(
export function createDynamicTruncator( export function createDynamicTruncator(
ctx: PluginInput, ctx: PluginInput,
anthropicContext1MEnabled?: boolean, modelCacheState?: ModelCacheStateLike,
) { ) {
return { return {
truncate: ( truncate: (
sessionID: string, sessionID: string,
output: string, output: string,
options?: TruncationOptions, options?: TruncationOptions,
) => dynamicTruncate(ctx, sessionID, output, options, anthropicContext1MEnabled), ) => dynamicTruncate(ctx, sessionID, output, options, modelCacheState),
getUsage: (sessionID: string) => getUsage: (sessionID: string) =>
getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled), getContextWindowUsage(ctx, sessionID, modelCacheState),
truncateSync: ( truncateSync: (
output: string, output: string,