From b444899153c5aef44731bec33d128126f0936ca3 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 17 Feb 2026 10:33:17 +0900 Subject: [PATCH 1/3] fix: use model cache context flag for runtime context limits --- src/create-hooks.ts | 4 +++ src/hooks/context-window-monitor.ts | 22 +++++++++----- src/hooks/directory-agents-injector/hook.ts | 7 +++-- src/hooks/directory-readme-injector/hook.ts | 7 +++-- src/hooks/preemptive-compaction.ts | 15 ++++++---- src/hooks/rules-injector/hook.ts | 7 +++-- src/hooks/tool-output-truncator.ts | 3 +- src/index.ts | 1 + src/plugin/hooks/create-core-hooks.ts | 6 +++- src/plugin/hooks/create-session-hooks.ts | 10 +++++-- src/plugin/hooks/create-tool-guard-hooks.ts | 18 +++++++---- src/shared/dynamic-truncator.ts | 33 ++++++++++++++------- 12 files changed, 94 insertions(+), 39 deletions(-) diff --git a/src/create-hooks.ts b/src/create-hooks.ts index efa81fab..9972551e 100644 --- a/src/create-hooks.ts +++ b/src/create-hooks.ts @@ -3,6 +3,7 @@ import type { HookName, OhMyOpenCodeConfig } from "./config" import type { LoadedSkill } from "./features/opencode-skill-loader/types" import type { BackgroundManager } from "./features/background-agent" import type { PluginContext } from "./plugin/types" +import type { ModelCacheState } from "./plugin-state" import { createCoreHooks } from "./plugin/hooks/create-core-hooks" import { createContinuationHooks } from "./plugin/hooks/create-continuation-hooks" @@ -13,6 +14,7 @@ export type CreatedHooks = ReturnType export function createHooks(args: { ctx: PluginContext pluginConfig: OhMyOpenCodeConfig + modelCacheState: ModelCacheState backgroundManager: BackgroundManager isHookEnabled: (hookName: HookName) => boolean safeHookEnabled: boolean @@ -22,6 +24,7 @@ export function createHooks(args: { const { ctx, pluginConfig, + modelCacheState, backgroundManager, isHookEnabled, safeHookEnabled, @@ -32,6 +35,7 @@ export function createHooks(args: { const core = createCoreHooks({ ctx, pluginConfig, + modelCacheState, isHookEnabled, safeHookEnabled, }) diff --git a/src/hooks/context-window-monitor.ts b/src/hooks/context-window-monitor.ts index e0caf0d1..91d99a76 100644 --- a/src/hooks/context-window-monitor.ts +++ b/src/hooks/context-window-monitor.ts @@ -2,13 +2,17 @@ import type { PluginInput } from "@opencode-ai/plugin" import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-directive" const ANTHROPIC_DISPLAY_LIMIT = 1_000_000 -const ANTHROPIC_ACTUAL_LIMIT = - process.env.ANTHROPIC_1M_CONTEXT === "true" || - process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" - ? 1_000_000 - : 200_000 +const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000 const CONTEXT_WARNING_THRESHOLD = 0.70 +function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number { + return anthropicContext1MEnabled || + process.env.ANTHROPIC_1M_CONTEXT === "true" || + process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" + ? 1_000_000 + : DEFAULT_ANTHROPIC_ACTUAL_LIMIT +} + const CONTEXT_REMINDER = `${createSystemDirective(SystemDirectiveTypes.CONTEXT_WINDOW_MONITOR)} You are using Anthropic Claude with 1M context window. @@ -31,7 +35,10 @@ function isAnthropicProvider(providerID: string): boolean { return providerID === "anthropic" || providerID === "google-vertex-anthropic" } -export function createContextWindowMonitorHook(_ctx: PluginInput) { +export function createContextWindowMonitorHook( + _ctx: PluginInput, + anthropicContext1MEnabled = false, +) { const remindedSessions = new Set() const tokenCache = new Map() @@ -51,7 +58,8 @@ export function createContextWindowMonitorHook(_ctx: PluginInput) { const lastTokens = cached.tokens const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) - const actualUsagePercentage = totalInputTokens / ANTHROPIC_ACTUAL_LIMIT + const actualUsagePercentage = + totalInputTokens / getAnthropicActualLimit(anthropicContext1MEnabled) if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return diff --git a/src/hooks/directory-agents-injector/hook.ts b/src/hooks/directory-agents-injector/hook.ts index a510301a..0dd431a7 100644 --- a/src/hooks/directory-agents-injector/hook.ts +++ b/src/hooks/directory-agents-injector/hook.ts @@ -27,9 +27,12 @@ interface EventInput { }; } -export function createDirectoryAgentsInjectorHook(ctx: PluginInput) { +export function createDirectoryAgentsInjectorHook( + ctx: PluginInput, + anthropicContext1MEnabled?: boolean, +) { const sessionCaches = new Map>(); - const truncator = createDynamicTruncator(ctx); + const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => { const toolName = input.tool.toLowerCase(); diff --git a/src/hooks/directory-readme-injector/hook.ts b/src/hooks/directory-readme-injector/hook.ts index 33c50bc7..608f8123 100644 --- a/src/hooks/directory-readme-injector/hook.ts +++ b/src/hooks/directory-readme-injector/hook.ts @@ -27,9 +27,12 @@ interface EventInput { }; } -export function createDirectoryReadmeInjectorHook(ctx: PluginInput) { +export function createDirectoryReadmeInjectorHook( + ctx: PluginInput, + anthropicContext1MEnabled?: boolean, +) { const sessionCaches = new Map>(); - const truncator = createDynamicTruncator(ctx); + const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => { const toolName = input.tool.toLowerCase(); diff --git a/src/hooks/preemptive-compaction.ts b/src/hooks/preemptive-compaction.ts index a3f76914..5157ab7a 100644 --- a/src/hooks/preemptive-compaction.ts +++ b/src/hooks/preemptive-compaction.ts @@ -2,11 +2,13 @@ import { log } from "../shared/logger" const DEFAULT_ACTUAL_LIMIT = 200_000 -const ANTHROPIC_ACTUAL_LIMIT = - process.env.ANTHROPIC_1M_CONTEXT === "true" || - process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" +function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number { + return anthropicContext1MEnabled || + process.env.ANTHROPIC_1M_CONTEXT === "true" || + process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" ? 1_000_000 : DEFAULT_ACTUAL_LIMIT +} const PREEMPTIVE_COMPACTION_THRESHOLD = 0.78 @@ -43,7 +45,10 @@ type PluginInput = { directory: string } -export function createPreemptiveCompactionHook(ctx: PluginInput) { +export function createPreemptiveCompactionHook( + ctx: PluginInput, + anthropicContext1MEnabled = false, +) { const compactionInProgress = new Set() const compactedSessions = new Set() const tokenCache = new Map() @@ -60,7 +65,7 @@ export function createPreemptiveCompactionHook(ctx: PluginInput) { const actualLimit = isAnthropicProvider(cached.providerID) - ? ANTHROPIC_ACTUAL_LIMIT + ? getAnthropicActualLimit(anthropicContext1MEnabled) : DEFAULT_ACTUAL_LIMIT const lastTokens = cached.tokens diff --git a/src/hooks/rules-injector/hook.ts b/src/hooks/rules-injector/hook.ts index b556a8a7..4300fba5 100644 --- a/src/hooks/rules-injector/hook.ts +++ b/src/hooks/rules-injector/hook.ts @@ -29,8 +29,11 @@ interface EventInput { const TRACKED_TOOLS = ["read", "write", "edit", "multiedit"]; -export function createRulesInjectorHook(ctx: PluginInput) { - const truncator = createDynamicTruncator(ctx); +export function createRulesInjectorHook( + ctx: PluginInput, + anthropicContext1MEnabled?: boolean, +) { + const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); const { getSessionCache, clearSessionCache } = createSessionCacheStore(); const { processFilePathForInjection } = createRuleInjectionProcessor({ workspaceDirectory: ctx.directory, diff --git a/src/hooks/tool-output-truncator.ts b/src/hooks/tool-output-truncator.ts index 8f8c300d..f3d88013 100644 --- a/src/hooks/tool-output-truncator.ts +++ b/src/hooks/tool-output-truncator.ts @@ -27,11 +27,12 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record = { } interface ToolOutputTruncatorOptions { + anthropicContext1MEnabled?: boolean experimental?: ExperimentalConfig } export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOutputTruncatorOptions) { - const truncator = createDynamicTruncator(ctx) + const truncator = createDynamicTruncator(ctx, options?.anthropicContext1MEnabled) const truncateAll = options?.experimental?.truncate_all_tool_outputs ?? false const toolExecuteAfter = async ( diff --git a/src/index.ts b/src/index.ts index 2555a931..bba71904 100644 --- a/src/index.ts +++ b/src/index.ts @@ -56,6 +56,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { const hooks = createHooks({ ctx, pluginConfig, + modelCacheState, backgroundManager: managers.backgroundManager, isHookEnabled, safeHookEnabled, diff --git a/src/plugin/hooks/create-core-hooks.ts b/src/plugin/hooks/create-core-hooks.ts index 2bfac4ae..4bfd2b4b 100644 --- a/src/plugin/hooks/create-core-hooks.ts +++ b/src/plugin/hooks/create-core-hooks.ts @@ -1,5 +1,6 @@ import type { HookName, OhMyOpenCodeConfig } from "../../config" import type { PluginContext } from "../types" +import type { ModelCacheState } from "../../plugin-state" import { createSessionHooks } from "./create-session-hooks" import { createToolGuardHooks } from "./create-tool-guard-hooks" @@ -8,14 +9,16 @@ import { createTransformHooks } from "./create-transform-hooks" export function createCoreHooks(args: { ctx: PluginContext pluginConfig: OhMyOpenCodeConfig + modelCacheState: ModelCacheState isHookEnabled: (hookName: HookName) => boolean safeHookEnabled: boolean }) { - const { ctx, pluginConfig, isHookEnabled, safeHookEnabled } = args + const { ctx, pluginConfig, modelCacheState, isHookEnabled, safeHookEnabled } = args const session = createSessionHooks({ ctx, pluginConfig, + modelCacheState, isHookEnabled, safeHookEnabled, }) @@ -23,6 +26,7 @@ export function createCoreHooks(args: { const tool = createToolGuardHooks({ ctx, pluginConfig, + modelCacheState, isHookEnabled, safeHookEnabled, }) diff --git a/src/plugin/hooks/create-session-hooks.ts b/src/plugin/hooks/create-session-hooks.ts index d93ec585..457adda2 100644 --- a/src/plugin/hooks/create-session-hooks.ts +++ b/src/plugin/hooks/create-session-hooks.ts @@ -1,4 +1,5 @@ import type { OhMyOpenCodeConfig, HookName } from "../../config" +import type { ModelCacheState } from "../../plugin-state" import type { PluginContext } from "../types" import { @@ -55,21 +56,24 @@ export type SessionHooks = { export function createSessionHooks(args: { ctx: PluginContext pluginConfig: OhMyOpenCodeConfig + modelCacheState: ModelCacheState isHookEnabled: (hookName: HookName) => boolean safeHookEnabled: boolean }): SessionHooks { - const { ctx, pluginConfig, isHookEnabled, safeHookEnabled } = args + const { ctx, pluginConfig, modelCacheState, isHookEnabled, safeHookEnabled } = args const safeHook = (hookName: HookName, factory: () => T): T | null => safeCreateHook(hookName, factory, { enabled: safeHookEnabled }) const contextWindowMonitor = isHookEnabled("context-window-monitor") - ? safeHook("context-window-monitor", () => createContextWindowMonitorHook(ctx)) + ? safeHook("context-window-monitor", () => + createContextWindowMonitorHook(ctx, modelCacheState.anthropicContext1MEnabled)) : null const preemptiveCompaction = isHookEnabled("preemptive-compaction") && pluginConfig.experimental?.preemptive_compaction - ? safeHook("preemptive-compaction", () => createPreemptiveCompactionHook(ctx)) + ? safeHook("preemptive-compaction", () => + createPreemptiveCompactionHook(ctx, modelCacheState.anthropicContext1MEnabled)) : null const sessionRecovery = isHookEnabled("session-recovery") diff --git a/src/plugin/hooks/create-tool-guard-hooks.ts b/src/plugin/hooks/create-tool-guard-hooks.ts index 46a36140..b49b60b4 100644 --- a/src/plugin/hooks/create-tool-guard-hooks.ts +++ b/src/plugin/hooks/create-tool-guard-hooks.ts @@ -1,4 +1,5 @@ import type { HookName, OhMyOpenCodeConfig } from "../../config" +import type { ModelCacheState } from "../../plugin-state" import type { PluginContext } from "../types" import { @@ -35,10 +36,11 @@ export type ToolGuardHooks = { export function createToolGuardHooks(args: { ctx: PluginContext pluginConfig: OhMyOpenCodeConfig + modelCacheState: ModelCacheState isHookEnabled: (hookName: HookName) => boolean safeHookEnabled: boolean }): ToolGuardHooks { - const { ctx, pluginConfig, isHookEnabled, safeHookEnabled } = args + const { ctx, pluginConfig, modelCacheState, isHookEnabled, safeHookEnabled } = args const safeHook = (hookName: HookName, factory: () => T): T | null => safeCreateHook(hookName, factory, { enabled: safeHookEnabled }) @@ -48,7 +50,10 @@ export function createToolGuardHooks(args: { const toolOutputTruncator = isHookEnabled("tool-output-truncator") ? safeHook("tool-output-truncator", () => - createToolOutputTruncatorHook(ctx, { experimental: pluginConfig.experimental })) + createToolOutputTruncatorHook(ctx, { + anthropicContext1MEnabled: modelCacheState.anthropicContext1MEnabled, + experimental: pluginConfig.experimental, + })) : null let directoryAgentsInjector: ReturnType | null = null @@ -62,12 +67,14 @@ export function createToolGuardHooks(args: { nativeVersion: OPENCODE_NATIVE_AGENTS_INJECTION_VERSION, }) } else { - directoryAgentsInjector = safeHook("directory-agents-injector", () => createDirectoryAgentsInjectorHook(ctx)) + directoryAgentsInjector = safeHook("directory-agents-injector", () => + createDirectoryAgentsInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) } } const directoryReadmeInjector = isHookEnabled("directory-readme-injector") - ? safeHook("directory-readme-injector", () => createDirectoryReadmeInjectorHook(ctx)) + ? safeHook("directory-readme-injector", () => + createDirectoryReadmeInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) : null const emptyTaskResponseDetector = isHookEnabled("empty-task-response-detector") @@ -75,7 +82,8 @@ export function createToolGuardHooks(args: { : null const rulesInjector = isHookEnabled("rules-injector") - ? safeHook("rules-injector", () => createRulesInjectorHook(ctx)) + ? safeHook("rules-injector", () => + createRulesInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) : null const tasksTodowriteDisabler = isHookEnabled("tasks-todowrite-disabler") diff --git a/src/shared/dynamic-truncator.ts b/src/shared/dynamic-truncator.ts index dbd90466..15432ce6 100644 --- a/src/shared/dynamic-truncator.ts +++ b/src/shared/dynamic-truncator.ts @@ -1,14 +1,18 @@ import type { PluginInput } from "@opencode-ai/plugin"; import { normalizeSDKResponse } from "./normalize-sdk-response" -const ANTHROPIC_ACTUAL_LIMIT = - process.env.ANTHROPIC_1M_CONTEXT === "true" || - process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" - ? 1_000_000 - : 200_000; +const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000; const CHARS_PER_TOKEN_ESTIMATE = 4; const DEFAULT_TARGET_MAX_TOKENS = 50_000; +function getAnthropicActualLimit(anthropicContext1MEnabled = false): number { + return anthropicContext1MEnabled || + process.env.ANTHROPIC_1M_CONTEXT === "true" || + process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" + ? 1_000_000 + : DEFAULT_ANTHROPIC_ACTUAL_LIMIT; +} + interface AssistantMessageInfo { role: "assistant"; tokens: { @@ -110,6 +114,7 @@ export function truncateToTokenLimit( export async function getContextWindowUsage( ctx: PluginInput, sessionID: string, + anthropicContext1MEnabled = false, ): Promise<{ usedTokens: number; remainingTokens: number; @@ -134,12 +139,13 @@ export async function getContextWindowUsage( (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) + (lastTokens?.output ?? 0); - const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - usedTokens; + const anthropicActualLimit = getAnthropicActualLimit(anthropicContext1MEnabled); + const remainingTokens = anthropicActualLimit - usedTokens; return { usedTokens, remainingTokens, - usagePercentage: usedTokens / ANTHROPIC_ACTUAL_LIMIT, + usagePercentage: usedTokens / anthropicActualLimit, }; } catch { return null; @@ -151,6 +157,7 @@ export async function dynamicTruncate( sessionID: string, output: string, options: TruncationOptions = {}, + anthropicContext1MEnabled = false, ): Promise { if (typeof output !== 'string') { return { result: String(output ?? ''), truncated: false }; @@ -161,7 +168,7 @@ export async function dynamicTruncate( preserveHeaderLines = 3, } = options; - const usage = await getContextWindowUsage(ctx, sessionID); + const usage = await getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled); if (!usage) { // Fallback: apply conservative truncation when context usage unavailable @@ -183,15 +190,19 @@ export async function dynamicTruncate( return truncateToTokenLimit(output, maxOutputTokens, preserveHeaderLines); } -export function createDynamicTruncator(ctx: PluginInput) { +export function createDynamicTruncator( + ctx: PluginInput, + anthropicContext1MEnabled?: boolean, +) { return { truncate: ( sessionID: string, output: string, options?: TruncationOptions, - ) => dynamicTruncate(ctx, sessionID, output, options), + ) => dynamicTruncate(ctx, sessionID, output, options, anthropicContext1MEnabled), - getUsage: (sessionID: string) => getContextWindowUsage(ctx, sessionID), + getUsage: (sessionID: string) => + getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled), truncateSync: ( output: string, From 363016681bf896b53980b685fb7f313ef86f7e38 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 17 Feb 2026 10:33:25 +0900 Subject: [PATCH 2/3] test: cover model-cache and env fallback context limits --- src/hooks/context-window-monitor.test.ts | 103 ++++++++++++++++++++++- src/hooks/preemptive-compaction.test.ts | 103 ++++++++++++++++++++++- src/shared/dynamic-truncator.test.ts | 96 +++++++++++++++++++++ 3 files changed, 300 insertions(+), 2 deletions(-) create mode 100644 src/shared/dynamic-truncator.test.ts diff --git a/src/hooks/context-window-monitor.test.ts b/src/hooks/context-window-monitor.test.ts index e2252fd0..d0f8de3f 100644 --- a/src/hooks/context-window-monitor.test.ts +++ b/src/hooks/context-window-monitor.test.ts @@ -1,6 +1,28 @@ -import { describe, it, expect, mock, beforeEach } from "bun:test" +/// + +import { describe, it, expect, mock, beforeEach, afterEach } from "bun:test" import { createContextWindowMonitorHook } from "./context-window-monitor" +const ANTHROPIC_CONTEXT_ENV_KEY = "ANTHROPIC_1M_CONTEXT" +const VERTEX_CONTEXT_ENV_KEY = "VERTEX_ANTHROPIC_1M_CONTEXT" + +const originalAnthropicContextEnv = process.env[ANTHROPIC_CONTEXT_ENV_KEY] +const originalVertexContextEnv = process.env[VERTEX_CONTEXT_ENV_KEY] + +function resetContextLimitEnv(): void { + if (originalAnthropicContextEnv === undefined) { + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + } else { + process.env[ANTHROPIC_CONTEXT_ENV_KEY] = originalAnthropicContextEnv + } + + if (originalVertexContextEnv === undefined) { + delete process.env[VERTEX_CONTEXT_ENV_KEY] + } else { + process.env[VERTEX_CONTEXT_ENV_KEY] = originalVertexContextEnv + } +} + function createMockCtx() { return { client: { @@ -17,6 +39,12 @@ describe("context-window-monitor", () => { beforeEach(() => { ctx = createMockCtx() + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + delete process.env[VERTEX_CONTEXT_ENV_KEY] + }) + + afterEach(() => { + resetContextLimitEnv() }) // #given event caches token info from message.updated @@ -218,4 +246,77 @@ describe("context-window-monitor", () => { ) expect(output.output).toBe("test") }) + + it("should use 1M limit when model cache flag is enabled", async () => { + //#given + const hook = createContextWindowMonitorHook(ctx as never, true) + const sessionID = "ses_1m_flag" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "anthropic", + finish: true, + tokens: { + input: 300000, + output: 1000, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + }, + }, + }, + }) + + //#when + const output = { title: "", output: "original", metadata: null } + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_1" }, + output + ) + + //#then + expect(output.output).toBe("original") + }) + + it("should keep env var fallback when model cache flag is disabled", async () => { + //#given + process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" + const hook = createContextWindowMonitorHook(ctx as never, false) + const sessionID = "ses_env_fallback" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "anthropic", + finish: true, + tokens: { + input: 300000, + output: 1000, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + }, + }, + }, + }) + + //#when + const output = { title: "", output: "original", metadata: null } + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_1" }, + output + ) + + //#then + expect(output.output).toBe("original") + }) }) diff --git a/src/hooks/preemptive-compaction.test.ts b/src/hooks/preemptive-compaction.test.ts index c6c38fa5..41e704e9 100644 --- a/src/hooks/preemptive-compaction.test.ts +++ b/src/hooks/preemptive-compaction.test.ts @@ -1,4 +1,26 @@ -import { describe, it, expect, mock, beforeEach } from "bun:test" +/// + +import { describe, it, expect, mock, beforeEach, afterEach } from "bun:test" + +const ANTHROPIC_CONTEXT_ENV_KEY = "ANTHROPIC_1M_CONTEXT" +const VERTEX_CONTEXT_ENV_KEY = "VERTEX_ANTHROPIC_1M_CONTEXT" + +const originalAnthropicContextEnv = process.env[ANTHROPIC_CONTEXT_ENV_KEY] +const originalVertexContextEnv = process.env[VERTEX_CONTEXT_ENV_KEY] + +function resetContextLimitEnv(): void { + if (originalAnthropicContextEnv === undefined) { + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + } else { + process.env[ANTHROPIC_CONTEXT_ENV_KEY] = originalAnthropicContextEnv + } + + if (originalVertexContextEnv === undefined) { + delete process.env[VERTEX_CONTEXT_ENV_KEY] + } else { + process.env[VERTEX_CONTEXT_ENV_KEY] = originalVertexContextEnv + } +} const logMock = mock(() => {}) @@ -29,6 +51,12 @@ describe("preemptive-compaction", () => { beforeEach(() => { ctx = createMockCtx() logMock.mockClear() + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + delete process.env[VERTEX_CONTEXT_ENV_KEY] + }) + + afterEach(() => { + resetContextLimitEnv() }) // #given event caches token info from message.updated @@ -238,4 +266,77 @@ describe("preemptive-compaction", () => { error: String(summarizeError), }) }) + + it("should use 1M limit when model cache flag is enabled", async () => { + //#given + const hook = createPreemptiveCompactionHook(ctx as never, true) + const sessionID = "ses_1m_flag" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "anthropic", + modelID: "claude-sonnet-4-5", + finish: true, + tokens: { + input: 300000, + output: 1000, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + }, + }, + }, + }) + + //#when + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_1" }, + { title: "", output: "test", metadata: null } + ) + + //#then + expect(ctx.client.session.summarize).not.toHaveBeenCalled() + }) + + it("should keep env var fallback when model cache flag is disabled", async () => { + //#given + process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" + const hook = createPreemptiveCompactionHook(ctx as never, false) + const sessionID = "ses_env_fallback" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "anthropic", + modelID: "claude-sonnet-4-5", + finish: true, + tokens: { + input: 300000, + output: 1000, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + }, + }, + }, + }) + + //#when + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_1" }, + { title: "", output: "test", metadata: null } + ) + + //#then + expect(ctx.client.session.summarize).not.toHaveBeenCalled() + }) }) diff --git a/src/shared/dynamic-truncator.test.ts b/src/shared/dynamic-truncator.test.ts new file mode 100644 index 00000000..91105bc7 --- /dev/null +++ b/src/shared/dynamic-truncator.test.ts @@ -0,0 +1,96 @@ +/// + +import { describe, expect, it, afterEach } from "bun:test" + +import { getContextWindowUsage } from "./dynamic-truncator" + +const ANTHROPIC_CONTEXT_ENV_KEY = "ANTHROPIC_1M_CONTEXT" +const VERTEX_CONTEXT_ENV_KEY = "VERTEX_ANTHROPIC_1M_CONTEXT" + +const originalAnthropicContextEnv = process.env[ANTHROPIC_CONTEXT_ENV_KEY] +const originalVertexContextEnv = process.env[VERTEX_CONTEXT_ENV_KEY] + +function resetContextLimitEnv(): void { + if (originalAnthropicContextEnv === undefined) { + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + } else { + process.env[ANTHROPIC_CONTEXT_ENV_KEY] = originalAnthropicContextEnv + } + + if (originalVertexContextEnv === undefined) { + delete process.env[VERTEX_CONTEXT_ENV_KEY] + } else { + process.env[VERTEX_CONTEXT_ENV_KEY] = originalVertexContextEnv + } +} + +function createContextUsageMockContext(inputTokens: number) { + return { + client: { + session: { + messages: async () => ({ + data: [ + { + info: { + role: "assistant", + tokens: { + input: inputTokens, + output: 0, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + }, + }, + ], + }), + }, + }, + } +} + +describe("getContextWindowUsage", () => { + afterEach(() => { + resetContextLimitEnv() + }) + + it("uses 1M limit when model cache flag is enabled", async () => { + //#given + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + delete process.env[VERTEX_CONTEXT_ENV_KEY] + const ctx = createContextUsageMockContext(300000) + + //#when + const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", true) + + //#then + expect(usage?.usagePercentage).toBe(0.3) + expect(usage?.remainingTokens).toBe(700000) + }) + + it("uses 200K limit when model cache flag is disabled and env vars are unset", async () => { + //#given + delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] + delete process.env[VERTEX_CONTEXT_ENV_KEY] + const ctx = createContextUsageMockContext(150000) + + //#when + const usage = await getContextWindowUsage(ctx as never, "ses_default", false) + + //#then + expect(usage?.usagePercentage).toBe(0.75) + expect(usage?.remainingTokens).toBe(50000) + }) + + it("keeps env var fallback when model cache flag is disabled", async () => { + //#given + process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" + const ctx = createContextUsageMockContext(300000) + + //#when + const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", false) + + //#then + expect(usage?.usagePercentage).toBe(0.3) + expect(usage?.remainingTokens).toBe(700000) + }) +}) From d78669126024c925c582f08b014b1db37326c587 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 17 Feb 2026 10:45:48 +0900 Subject: [PATCH 3/3] fix: read anthropic 1m flag from live model cache state --- src/hooks/context-window-monitor.test.ts | 8 ++++++-- src/hooks/context-window-monitor.ts | 12 +++++++---- src/hooks/directory-agents-injector/hook.ts | 4 ++-- src/hooks/directory-readme-injector/hook.ts | 4 ++-- src/hooks/preemptive-compaction.test.ts | 8 ++++++-- src/hooks/preemptive-compaction.ts | 12 +++++++---- src/hooks/rules-injector/hook.ts | 4 ++-- src/hooks/tool-output-truncator.ts | 4 ++-- src/plugin/hooks/create-session-hooks.ts | 4 ++-- src/plugin/hooks/create-tool-guard-hooks.ts | 8 ++++---- src/shared/dynamic-truncator.test.ts | 12 ++++++++--- src/shared/dynamic-truncator.ts | 22 ++++++++++++--------- 12 files changed, 64 insertions(+), 38 deletions(-) diff --git a/src/hooks/context-window-monitor.test.ts b/src/hooks/context-window-monitor.test.ts index d0f8de3f..515e94f2 100644 --- a/src/hooks/context-window-monitor.test.ts +++ b/src/hooks/context-window-monitor.test.ts @@ -249,7 +249,9 @@ describe("context-window-monitor", () => { it("should use 1M limit when model cache flag is enabled", async () => { //#given - const hook = createContextWindowMonitorHook(ctx as never, true) + const hook = createContextWindowMonitorHook(ctx as never, { + anthropicContext1MEnabled: true, + }) const sessionID = "ses_1m_flag" await hook.event({ @@ -286,7 +288,9 @@ describe("context-window-monitor", () => { it("should keep env var fallback when model cache flag is disabled", async () => { //#given process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" - const hook = createContextWindowMonitorHook(ctx as never, false) + const hook = createContextWindowMonitorHook(ctx as never, { + anthropicContext1MEnabled: false, + }) const sessionID = "ses_env_fallback" await hook.event({ diff --git a/src/hooks/context-window-monitor.ts b/src/hooks/context-window-monitor.ts index 91d99a76..399c0810 100644 --- a/src/hooks/context-window-monitor.ts +++ b/src/hooks/context-window-monitor.ts @@ -5,8 +5,12 @@ const ANTHROPIC_DISPLAY_LIMIT = 1_000_000 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000 const CONTEXT_WARNING_THRESHOLD = 0.70 -function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number { - return anthropicContext1MEnabled || +type ModelCacheStateLike = { + anthropicContext1MEnabled: boolean +} + +function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { + return (modelCacheState?.anthropicContext1MEnabled ?? false) || process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" ? 1_000_000 @@ -37,7 +41,7 @@ function isAnthropicProvider(providerID: string): boolean { export function createContextWindowMonitorHook( _ctx: PluginInput, - anthropicContext1MEnabled = false, + modelCacheState?: ModelCacheStateLike, ) { const remindedSessions = new Set() const tokenCache = new Map() @@ -59,7 +63,7 @@ export function createContextWindowMonitorHook( const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) const actualUsagePercentage = - totalInputTokens / getAnthropicActualLimit(anthropicContext1MEnabled) + totalInputTokens / getAnthropicActualLimit(modelCacheState) if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return diff --git a/src/hooks/directory-agents-injector/hook.ts b/src/hooks/directory-agents-injector/hook.ts index 0dd431a7..fba64cc7 100644 --- a/src/hooks/directory-agents-injector/hook.ts +++ b/src/hooks/directory-agents-injector/hook.ts @@ -29,10 +29,10 @@ interface EventInput { export function createDirectoryAgentsInjectorHook( ctx: PluginInput, - anthropicContext1MEnabled?: boolean, + modelCacheState?: { anthropicContext1MEnabled: boolean }, ) { const sessionCaches = new Map>(); - const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); + const truncator = createDynamicTruncator(ctx, modelCacheState); const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => { const toolName = input.tool.toLowerCase(); diff --git a/src/hooks/directory-readme-injector/hook.ts b/src/hooks/directory-readme-injector/hook.ts index 608f8123..d621c7f2 100644 --- a/src/hooks/directory-readme-injector/hook.ts +++ b/src/hooks/directory-readme-injector/hook.ts @@ -29,10 +29,10 @@ interface EventInput { export function createDirectoryReadmeInjectorHook( ctx: PluginInput, - anthropicContext1MEnabled?: boolean, + modelCacheState?: { anthropicContext1MEnabled: boolean }, ) { const sessionCaches = new Map>(); - const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); + const truncator = createDynamicTruncator(ctx, modelCacheState); const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => { const toolName = input.tool.toLowerCase(); diff --git a/src/hooks/preemptive-compaction.test.ts b/src/hooks/preemptive-compaction.test.ts index 41e704e9..1550a5d5 100644 --- a/src/hooks/preemptive-compaction.test.ts +++ b/src/hooks/preemptive-compaction.test.ts @@ -269,7 +269,9 @@ describe("preemptive-compaction", () => { it("should use 1M limit when model cache flag is enabled", async () => { //#given - const hook = createPreemptiveCompactionHook(ctx as never, true) + const hook = createPreemptiveCompactionHook(ctx as never, { + anthropicContext1MEnabled: true, + }) const sessionID = "ses_1m_flag" await hook.event({ @@ -306,7 +308,9 @@ describe("preemptive-compaction", () => { it("should keep env var fallback when model cache flag is disabled", async () => { //#given process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" - const hook = createPreemptiveCompactionHook(ctx as never, false) + const hook = createPreemptiveCompactionHook(ctx as never, { + anthropicContext1MEnabled: false, + }) const sessionID = "ses_env_fallback" await hook.event({ diff --git a/src/hooks/preemptive-compaction.ts b/src/hooks/preemptive-compaction.ts index 5157ab7a..3e60e2a8 100644 --- a/src/hooks/preemptive-compaction.ts +++ b/src/hooks/preemptive-compaction.ts @@ -2,8 +2,12 @@ import { log } from "../shared/logger" const DEFAULT_ACTUAL_LIMIT = 200_000 -function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number { - return anthropicContext1MEnabled || +type ModelCacheStateLike = { + anthropicContext1MEnabled: boolean +} + +function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { + return (modelCacheState?.anthropicContext1MEnabled ?? false) || process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" ? 1_000_000 @@ -47,7 +51,7 @@ type PluginInput = { export function createPreemptiveCompactionHook( ctx: PluginInput, - anthropicContext1MEnabled = false, + modelCacheState?: ModelCacheStateLike, ) { const compactionInProgress = new Set() const compactedSessions = new Set() @@ -65,7 +69,7 @@ export function createPreemptiveCompactionHook( const actualLimit = isAnthropicProvider(cached.providerID) - ? getAnthropicActualLimit(anthropicContext1MEnabled) + ? getAnthropicActualLimit(modelCacheState) : DEFAULT_ACTUAL_LIMIT const lastTokens = cached.tokens diff --git a/src/hooks/rules-injector/hook.ts b/src/hooks/rules-injector/hook.ts index 4300fba5..fec4ffd2 100644 --- a/src/hooks/rules-injector/hook.ts +++ b/src/hooks/rules-injector/hook.ts @@ -31,9 +31,9 @@ const TRACKED_TOOLS = ["read", "write", "edit", "multiedit"]; export function createRulesInjectorHook( ctx: PluginInput, - anthropicContext1MEnabled?: boolean, + modelCacheState?: { anthropicContext1MEnabled: boolean }, ) { - const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); + const truncator = createDynamicTruncator(ctx, modelCacheState); const { getSessionCache, clearSessionCache } = createSessionCacheStore(); const { processFilePathForInjection } = createRuleInjectionProcessor({ workspaceDirectory: ctx.directory, diff --git a/src/hooks/tool-output-truncator.ts b/src/hooks/tool-output-truncator.ts index f3d88013..f47bf199 100644 --- a/src/hooks/tool-output-truncator.ts +++ b/src/hooks/tool-output-truncator.ts @@ -27,12 +27,12 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record = { } interface ToolOutputTruncatorOptions { - anthropicContext1MEnabled?: boolean + modelCacheState?: { anthropicContext1MEnabled: boolean } experimental?: ExperimentalConfig } export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOutputTruncatorOptions) { - const truncator = createDynamicTruncator(ctx, options?.anthropicContext1MEnabled) + const truncator = createDynamicTruncator(ctx, options?.modelCacheState) const truncateAll = options?.experimental?.truncate_all_tool_outputs ?? false const toolExecuteAfter = async ( diff --git a/src/plugin/hooks/create-session-hooks.ts b/src/plugin/hooks/create-session-hooks.ts index 457adda2..3d6d1c89 100644 --- a/src/plugin/hooks/create-session-hooks.ts +++ b/src/plugin/hooks/create-session-hooks.ts @@ -66,14 +66,14 @@ export function createSessionHooks(args: { const contextWindowMonitor = isHookEnabled("context-window-monitor") ? safeHook("context-window-monitor", () => - createContextWindowMonitorHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createContextWindowMonitorHook(ctx, modelCacheState)) : null const preemptiveCompaction = isHookEnabled("preemptive-compaction") && pluginConfig.experimental?.preemptive_compaction ? safeHook("preemptive-compaction", () => - createPreemptiveCompactionHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createPreemptiveCompactionHook(ctx, modelCacheState)) : null const sessionRecovery = isHookEnabled("session-recovery") diff --git a/src/plugin/hooks/create-tool-guard-hooks.ts b/src/plugin/hooks/create-tool-guard-hooks.ts index b49b60b4..b762dfde 100644 --- a/src/plugin/hooks/create-tool-guard-hooks.ts +++ b/src/plugin/hooks/create-tool-guard-hooks.ts @@ -51,7 +51,7 @@ export function createToolGuardHooks(args: { const toolOutputTruncator = isHookEnabled("tool-output-truncator") ? safeHook("tool-output-truncator", () => createToolOutputTruncatorHook(ctx, { - anthropicContext1MEnabled: modelCacheState.anthropicContext1MEnabled, + modelCacheState, experimental: pluginConfig.experimental, })) : null @@ -68,13 +68,13 @@ export function createToolGuardHooks(args: { }) } else { directoryAgentsInjector = safeHook("directory-agents-injector", () => - createDirectoryAgentsInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createDirectoryAgentsInjectorHook(ctx, modelCacheState)) } } const directoryReadmeInjector = isHookEnabled("directory-readme-injector") ? safeHook("directory-readme-injector", () => - createDirectoryReadmeInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createDirectoryReadmeInjectorHook(ctx, modelCacheState)) : null const emptyTaskResponseDetector = isHookEnabled("empty-task-response-detector") @@ -83,7 +83,7 @@ export function createToolGuardHooks(args: { const rulesInjector = isHookEnabled("rules-injector") ? safeHook("rules-injector", () => - createRulesInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createRulesInjectorHook(ctx, modelCacheState)) : null const tasksTodowriteDisabler = isHookEnabled("tasks-todowrite-disabler") diff --git a/src/shared/dynamic-truncator.test.ts b/src/shared/dynamic-truncator.test.ts index 91105bc7..0a91d709 100644 --- a/src/shared/dynamic-truncator.test.ts +++ b/src/shared/dynamic-truncator.test.ts @@ -60,7 +60,9 @@ describe("getContextWindowUsage", () => { const ctx = createContextUsageMockContext(300000) //#when - const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", true) + const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", { + anthropicContext1MEnabled: true, + }) //#then expect(usage?.usagePercentage).toBe(0.3) @@ -74,7 +76,9 @@ describe("getContextWindowUsage", () => { const ctx = createContextUsageMockContext(150000) //#when - const usage = await getContextWindowUsage(ctx as never, "ses_default", false) + const usage = await getContextWindowUsage(ctx as never, "ses_default", { + anthropicContext1MEnabled: false, + }) //#then expect(usage?.usagePercentage).toBe(0.75) @@ -87,7 +91,9 @@ describe("getContextWindowUsage", () => { const ctx = createContextUsageMockContext(300000) //#when - const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", false) + const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", { + anthropicContext1MEnabled: false, + }) //#then expect(usage?.usagePercentage).toBe(0.3) diff --git a/src/shared/dynamic-truncator.ts b/src/shared/dynamic-truncator.ts index 15432ce6..5236f3e7 100644 --- a/src/shared/dynamic-truncator.ts +++ b/src/shared/dynamic-truncator.ts @@ -5,8 +5,12 @@ const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000; const CHARS_PER_TOKEN_ESTIMATE = 4; const DEFAULT_TARGET_MAX_TOKENS = 50_000; -function getAnthropicActualLimit(anthropicContext1MEnabled = false): number { - return anthropicContext1MEnabled || +type ModelCacheStateLike = { + anthropicContext1MEnabled: boolean; +} + +function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { + return (modelCacheState?.anthropicContext1MEnabled ?? false) || process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" ? 1_000_000 @@ -114,7 +118,7 @@ export function truncateToTokenLimit( export async function getContextWindowUsage( ctx: PluginInput, sessionID: string, - anthropicContext1MEnabled = false, + modelCacheState?: ModelCacheStateLike, ): Promise<{ usedTokens: number; remainingTokens: number; @@ -139,7 +143,7 @@ export async function getContextWindowUsage( (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) + (lastTokens?.output ?? 0); - const anthropicActualLimit = getAnthropicActualLimit(anthropicContext1MEnabled); + const anthropicActualLimit = getAnthropicActualLimit(modelCacheState); const remainingTokens = anthropicActualLimit - usedTokens; return { @@ -157,7 +161,7 @@ export async function dynamicTruncate( sessionID: string, output: string, options: TruncationOptions = {}, - anthropicContext1MEnabled = false, + modelCacheState?: ModelCacheStateLike, ): Promise { if (typeof output !== 'string') { return { result: String(output ?? ''), truncated: false }; @@ -168,7 +172,7 @@ export async function dynamicTruncate( preserveHeaderLines = 3, } = options; - const usage = await getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled); + const usage = await getContextWindowUsage(ctx, sessionID, modelCacheState); if (!usage) { // Fallback: apply conservative truncation when context usage unavailable @@ -192,17 +196,17 @@ export async function dynamicTruncate( export function createDynamicTruncator( ctx: PluginInput, - anthropicContext1MEnabled?: boolean, + modelCacheState?: ModelCacheStateLike, ) { return { truncate: ( sessionID: string, output: string, options?: TruncationOptions, - ) => dynamicTruncate(ctx, sessionID, output, options, anthropicContext1MEnabled), + ) => dynamicTruncate(ctx, sessionID, output, options, modelCacheState), getUsage: (sessionID: string) => - getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled), + getContextWindowUsage(ctx, sessionID, modelCacheState), truncateSync: ( output: string,