fix: read anthropic 1m flag from live model cache state
This commit is contained in:
parent
363016681b
commit
d786691260
@ -249,7 +249,9 @@ describe("context-window-monitor", () => {
|
|||||||
|
|
||||||
it("should use 1M limit when model cache flag is enabled", async () => {
|
it("should use 1M limit when model cache flag is enabled", async () => {
|
||||||
//#given
|
//#given
|
||||||
const hook = createContextWindowMonitorHook(ctx as never, true)
|
const hook = createContextWindowMonitorHook(ctx as never, {
|
||||||
|
anthropicContext1MEnabled: true,
|
||||||
|
})
|
||||||
const sessionID = "ses_1m_flag"
|
const sessionID = "ses_1m_flag"
|
||||||
|
|
||||||
await hook.event({
|
await hook.event({
|
||||||
@ -286,7 +288,9 @@ describe("context-window-monitor", () => {
|
|||||||
it("should keep env var fallback when model cache flag is disabled", async () => {
|
it("should keep env var fallback when model cache flag is disabled", async () => {
|
||||||
//#given
|
//#given
|
||||||
process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
|
process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
|
||||||
const hook = createContextWindowMonitorHook(ctx as never, false)
|
const hook = createContextWindowMonitorHook(ctx as never, {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
})
|
||||||
const sessionID = "ses_env_fallback"
|
const sessionID = "ses_env_fallback"
|
||||||
|
|
||||||
await hook.event({
|
await hook.event({
|
||||||
|
|||||||
@ -5,8 +5,12 @@ const ANTHROPIC_DISPLAY_LIMIT = 1_000_000
|
|||||||
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
|
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
|
||||||
const CONTEXT_WARNING_THRESHOLD = 0.70
|
const CONTEXT_WARNING_THRESHOLD = 0.70
|
||||||
|
|
||||||
function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number {
|
type ModelCacheStateLike = {
|
||||||
return anthropicContext1MEnabled ||
|
anthropicContext1MEnabled: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
|
||||||
|
return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
|
||||||
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
||||||
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
||||||
? 1_000_000
|
? 1_000_000
|
||||||
@ -37,7 +41,7 @@ function isAnthropicProvider(providerID: string): boolean {
|
|||||||
|
|
||||||
export function createContextWindowMonitorHook(
|
export function createContextWindowMonitorHook(
|
||||||
_ctx: PluginInput,
|
_ctx: PluginInput,
|
||||||
anthropicContext1MEnabled = false,
|
modelCacheState?: ModelCacheStateLike,
|
||||||
) {
|
) {
|
||||||
const remindedSessions = new Set<string>()
|
const remindedSessions = new Set<string>()
|
||||||
const tokenCache = new Map<string, CachedTokenState>()
|
const tokenCache = new Map<string, CachedTokenState>()
|
||||||
@ -59,7 +63,7 @@ export function createContextWindowMonitorHook(
|
|||||||
const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
|
const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
|
||||||
|
|
||||||
const actualUsagePercentage =
|
const actualUsagePercentage =
|
||||||
totalInputTokens / getAnthropicActualLimit(anthropicContext1MEnabled)
|
totalInputTokens / getAnthropicActualLimit(modelCacheState)
|
||||||
|
|
||||||
if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return
|
if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return
|
||||||
|
|
||||||
|
|||||||
@ -29,10 +29,10 @@ interface EventInput {
|
|||||||
|
|
||||||
export function createDirectoryAgentsInjectorHook(
|
export function createDirectoryAgentsInjectorHook(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
anthropicContext1MEnabled?: boolean,
|
modelCacheState?: { anthropicContext1MEnabled: boolean },
|
||||||
) {
|
) {
|
||||||
const sessionCaches = new Map<string, Set<string>>();
|
const sessionCaches = new Map<string, Set<string>>();
|
||||||
const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled);
|
const truncator = createDynamicTruncator(ctx, modelCacheState);
|
||||||
|
|
||||||
const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => {
|
const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => {
|
||||||
const toolName = input.tool.toLowerCase();
|
const toolName = input.tool.toLowerCase();
|
||||||
|
|||||||
@ -29,10 +29,10 @@ interface EventInput {
|
|||||||
|
|
||||||
export function createDirectoryReadmeInjectorHook(
|
export function createDirectoryReadmeInjectorHook(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
anthropicContext1MEnabled?: boolean,
|
modelCacheState?: { anthropicContext1MEnabled: boolean },
|
||||||
) {
|
) {
|
||||||
const sessionCaches = new Map<string, Set<string>>();
|
const sessionCaches = new Map<string, Set<string>>();
|
||||||
const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled);
|
const truncator = createDynamicTruncator(ctx, modelCacheState);
|
||||||
|
|
||||||
const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => {
|
const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => {
|
||||||
const toolName = input.tool.toLowerCase();
|
const toolName = input.tool.toLowerCase();
|
||||||
|
|||||||
@ -269,7 +269,9 @@ describe("preemptive-compaction", () => {
|
|||||||
|
|
||||||
it("should use 1M limit when model cache flag is enabled", async () => {
|
it("should use 1M limit when model cache flag is enabled", async () => {
|
||||||
//#given
|
//#given
|
||||||
const hook = createPreemptiveCompactionHook(ctx as never, true)
|
const hook = createPreemptiveCompactionHook(ctx as never, {
|
||||||
|
anthropicContext1MEnabled: true,
|
||||||
|
})
|
||||||
const sessionID = "ses_1m_flag"
|
const sessionID = "ses_1m_flag"
|
||||||
|
|
||||||
await hook.event({
|
await hook.event({
|
||||||
@ -306,7 +308,9 @@ describe("preemptive-compaction", () => {
|
|||||||
it("should keep env var fallback when model cache flag is disabled", async () => {
|
it("should keep env var fallback when model cache flag is disabled", async () => {
|
||||||
//#given
|
//#given
|
||||||
process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
|
process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
|
||||||
const hook = createPreemptiveCompactionHook(ctx as never, false)
|
const hook = createPreemptiveCompactionHook(ctx as never, {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
})
|
||||||
const sessionID = "ses_env_fallback"
|
const sessionID = "ses_env_fallback"
|
||||||
|
|
||||||
await hook.event({
|
await hook.event({
|
||||||
|
|||||||
@ -2,8 +2,12 @@ import { log } from "../shared/logger"
|
|||||||
|
|
||||||
const DEFAULT_ACTUAL_LIMIT = 200_000
|
const DEFAULT_ACTUAL_LIMIT = 200_000
|
||||||
|
|
||||||
function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number {
|
type ModelCacheStateLike = {
|
||||||
return anthropicContext1MEnabled ||
|
anthropicContext1MEnabled: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
|
||||||
|
return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
|
||||||
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
||||||
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
||||||
? 1_000_000
|
? 1_000_000
|
||||||
@ -47,7 +51,7 @@ type PluginInput = {
|
|||||||
|
|
||||||
export function createPreemptiveCompactionHook(
|
export function createPreemptiveCompactionHook(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
anthropicContext1MEnabled = false,
|
modelCacheState?: ModelCacheStateLike,
|
||||||
) {
|
) {
|
||||||
const compactionInProgress = new Set<string>()
|
const compactionInProgress = new Set<string>()
|
||||||
const compactedSessions = new Set<string>()
|
const compactedSessions = new Set<string>()
|
||||||
@ -65,7 +69,7 @@ export function createPreemptiveCompactionHook(
|
|||||||
|
|
||||||
const actualLimit =
|
const actualLimit =
|
||||||
isAnthropicProvider(cached.providerID)
|
isAnthropicProvider(cached.providerID)
|
||||||
? getAnthropicActualLimit(anthropicContext1MEnabled)
|
? getAnthropicActualLimit(modelCacheState)
|
||||||
: DEFAULT_ACTUAL_LIMIT
|
: DEFAULT_ACTUAL_LIMIT
|
||||||
|
|
||||||
const lastTokens = cached.tokens
|
const lastTokens = cached.tokens
|
||||||
|
|||||||
@ -31,9 +31,9 @@ const TRACKED_TOOLS = ["read", "write", "edit", "multiedit"];
|
|||||||
|
|
||||||
export function createRulesInjectorHook(
|
export function createRulesInjectorHook(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
anthropicContext1MEnabled?: boolean,
|
modelCacheState?: { anthropicContext1MEnabled: boolean },
|
||||||
) {
|
) {
|
||||||
const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled);
|
const truncator = createDynamicTruncator(ctx, modelCacheState);
|
||||||
const { getSessionCache, clearSessionCache } = createSessionCacheStore();
|
const { getSessionCache, clearSessionCache } = createSessionCacheStore();
|
||||||
const { processFilePathForInjection } = createRuleInjectionProcessor({
|
const { processFilePathForInjection } = createRuleInjectionProcessor({
|
||||||
workspaceDirectory: ctx.directory,
|
workspaceDirectory: ctx.directory,
|
||||||
|
|||||||
@ -27,12 +27,12 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
interface ToolOutputTruncatorOptions {
|
interface ToolOutputTruncatorOptions {
|
||||||
anthropicContext1MEnabled?: boolean
|
modelCacheState?: { anthropicContext1MEnabled: boolean }
|
||||||
experimental?: ExperimentalConfig
|
experimental?: ExperimentalConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOutputTruncatorOptions) {
|
export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOutputTruncatorOptions) {
|
||||||
const truncator = createDynamicTruncator(ctx, options?.anthropicContext1MEnabled)
|
const truncator = createDynamicTruncator(ctx, options?.modelCacheState)
|
||||||
const truncateAll = options?.experimental?.truncate_all_tool_outputs ?? false
|
const truncateAll = options?.experimental?.truncate_all_tool_outputs ?? false
|
||||||
|
|
||||||
const toolExecuteAfter = async (
|
const toolExecuteAfter = async (
|
||||||
|
|||||||
@ -66,14 +66,14 @@ export function createSessionHooks(args: {
|
|||||||
|
|
||||||
const contextWindowMonitor = isHookEnabled("context-window-monitor")
|
const contextWindowMonitor = isHookEnabled("context-window-monitor")
|
||||||
? safeHook("context-window-monitor", () =>
|
? safeHook("context-window-monitor", () =>
|
||||||
createContextWindowMonitorHook(ctx, modelCacheState.anthropicContext1MEnabled))
|
createContextWindowMonitorHook(ctx, modelCacheState))
|
||||||
: null
|
: null
|
||||||
|
|
||||||
const preemptiveCompaction =
|
const preemptiveCompaction =
|
||||||
isHookEnabled("preemptive-compaction") &&
|
isHookEnabled("preemptive-compaction") &&
|
||||||
pluginConfig.experimental?.preemptive_compaction
|
pluginConfig.experimental?.preemptive_compaction
|
||||||
? safeHook("preemptive-compaction", () =>
|
? safeHook("preemptive-compaction", () =>
|
||||||
createPreemptiveCompactionHook(ctx, modelCacheState.anthropicContext1MEnabled))
|
createPreemptiveCompactionHook(ctx, modelCacheState))
|
||||||
: null
|
: null
|
||||||
|
|
||||||
const sessionRecovery = isHookEnabled("session-recovery")
|
const sessionRecovery = isHookEnabled("session-recovery")
|
||||||
|
|||||||
@ -51,7 +51,7 @@ export function createToolGuardHooks(args: {
|
|||||||
const toolOutputTruncator = isHookEnabled("tool-output-truncator")
|
const toolOutputTruncator = isHookEnabled("tool-output-truncator")
|
||||||
? safeHook("tool-output-truncator", () =>
|
? safeHook("tool-output-truncator", () =>
|
||||||
createToolOutputTruncatorHook(ctx, {
|
createToolOutputTruncatorHook(ctx, {
|
||||||
anthropicContext1MEnabled: modelCacheState.anthropicContext1MEnabled,
|
modelCacheState,
|
||||||
experimental: pluginConfig.experimental,
|
experimental: pluginConfig.experimental,
|
||||||
}))
|
}))
|
||||||
: null
|
: null
|
||||||
@ -68,13 +68,13 @@ export function createToolGuardHooks(args: {
|
|||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
directoryAgentsInjector = safeHook("directory-agents-injector", () =>
|
directoryAgentsInjector = safeHook("directory-agents-injector", () =>
|
||||||
createDirectoryAgentsInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled))
|
createDirectoryAgentsInjectorHook(ctx, modelCacheState))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const directoryReadmeInjector = isHookEnabled("directory-readme-injector")
|
const directoryReadmeInjector = isHookEnabled("directory-readme-injector")
|
||||||
? safeHook("directory-readme-injector", () =>
|
? safeHook("directory-readme-injector", () =>
|
||||||
createDirectoryReadmeInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled))
|
createDirectoryReadmeInjectorHook(ctx, modelCacheState))
|
||||||
: null
|
: null
|
||||||
|
|
||||||
const emptyTaskResponseDetector = isHookEnabled("empty-task-response-detector")
|
const emptyTaskResponseDetector = isHookEnabled("empty-task-response-detector")
|
||||||
@ -83,7 +83,7 @@ export function createToolGuardHooks(args: {
|
|||||||
|
|
||||||
const rulesInjector = isHookEnabled("rules-injector")
|
const rulesInjector = isHookEnabled("rules-injector")
|
||||||
? safeHook("rules-injector", () =>
|
? safeHook("rules-injector", () =>
|
||||||
createRulesInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled))
|
createRulesInjectorHook(ctx, modelCacheState))
|
||||||
: null
|
: null
|
||||||
|
|
||||||
const tasksTodowriteDisabler = isHookEnabled("tasks-todowrite-disabler")
|
const tasksTodowriteDisabler = isHookEnabled("tasks-todowrite-disabler")
|
||||||
|
|||||||
@ -60,7 +60,9 @@ describe("getContextWindowUsage", () => {
|
|||||||
const ctx = createContextUsageMockContext(300000)
|
const ctx = createContextUsageMockContext(300000)
|
||||||
|
|
||||||
//#when
|
//#when
|
||||||
const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", true)
|
const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", {
|
||||||
|
anthropicContext1MEnabled: true,
|
||||||
|
})
|
||||||
|
|
||||||
//#then
|
//#then
|
||||||
expect(usage?.usagePercentage).toBe(0.3)
|
expect(usage?.usagePercentage).toBe(0.3)
|
||||||
@ -74,7 +76,9 @@ describe("getContextWindowUsage", () => {
|
|||||||
const ctx = createContextUsageMockContext(150000)
|
const ctx = createContextUsageMockContext(150000)
|
||||||
|
|
||||||
//#when
|
//#when
|
||||||
const usage = await getContextWindowUsage(ctx as never, "ses_default", false)
|
const usage = await getContextWindowUsage(ctx as never, "ses_default", {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
})
|
||||||
|
|
||||||
//#then
|
//#then
|
||||||
expect(usage?.usagePercentage).toBe(0.75)
|
expect(usage?.usagePercentage).toBe(0.75)
|
||||||
@ -87,7 +91,9 @@ describe("getContextWindowUsage", () => {
|
|||||||
const ctx = createContextUsageMockContext(300000)
|
const ctx = createContextUsageMockContext(300000)
|
||||||
|
|
||||||
//#when
|
//#when
|
||||||
const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", false)
|
const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
})
|
||||||
|
|
||||||
//#then
|
//#then
|
||||||
expect(usage?.usagePercentage).toBe(0.3)
|
expect(usage?.usagePercentage).toBe(0.3)
|
||||||
|
|||||||
@ -5,8 +5,12 @@ const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000;
|
|||||||
const CHARS_PER_TOKEN_ESTIMATE = 4;
|
const CHARS_PER_TOKEN_ESTIMATE = 4;
|
||||||
const DEFAULT_TARGET_MAX_TOKENS = 50_000;
|
const DEFAULT_TARGET_MAX_TOKENS = 50_000;
|
||||||
|
|
||||||
function getAnthropicActualLimit(anthropicContext1MEnabled = false): number {
|
type ModelCacheStateLike = {
|
||||||
return anthropicContext1MEnabled ||
|
anthropicContext1MEnabled: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
|
||||||
|
return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
|
||||||
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
||||||
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
||||||
? 1_000_000
|
? 1_000_000
|
||||||
@ -114,7 +118,7 @@ export function truncateToTokenLimit(
|
|||||||
export async function getContextWindowUsage(
|
export async function getContextWindowUsage(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
sessionID: string,
|
sessionID: string,
|
||||||
anthropicContext1MEnabled = false,
|
modelCacheState?: ModelCacheStateLike,
|
||||||
): Promise<{
|
): Promise<{
|
||||||
usedTokens: number;
|
usedTokens: number;
|
||||||
remainingTokens: number;
|
remainingTokens: number;
|
||||||
@ -139,7 +143,7 @@ export async function getContextWindowUsage(
|
|||||||
(lastTokens?.input ?? 0) +
|
(lastTokens?.input ?? 0) +
|
||||||
(lastTokens?.cache?.read ?? 0) +
|
(lastTokens?.cache?.read ?? 0) +
|
||||||
(lastTokens?.output ?? 0);
|
(lastTokens?.output ?? 0);
|
||||||
const anthropicActualLimit = getAnthropicActualLimit(anthropicContext1MEnabled);
|
const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
|
||||||
const remainingTokens = anthropicActualLimit - usedTokens;
|
const remainingTokens = anthropicActualLimit - usedTokens;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -157,7 +161,7 @@ export async function dynamicTruncate(
|
|||||||
sessionID: string,
|
sessionID: string,
|
||||||
output: string,
|
output: string,
|
||||||
options: TruncationOptions = {},
|
options: TruncationOptions = {},
|
||||||
anthropicContext1MEnabled = false,
|
modelCacheState?: ModelCacheStateLike,
|
||||||
): Promise<TruncationResult> {
|
): Promise<TruncationResult> {
|
||||||
if (typeof output !== 'string') {
|
if (typeof output !== 'string') {
|
||||||
return { result: String(output ?? ''), truncated: false };
|
return { result: String(output ?? ''), truncated: false };
|
||||||
@ -168,7 +172,7 @@ export async function dynamicTruncate(
|
|||||||
preserveHeaderLines = 3,
|
preserveHeaderLines = 3,
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
const usage = await getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled);
|
const usage = await getContextWindowUsage(ctx, sessionID, modelCacheState);
|
||||||
|
|
||||||
if (!usage) {
|
if (!usage) {
|
||||||
// Fallback: apply conservative truncation when context usage unavailable
|
// Fallback: apply conservative truncation when context usage unavailable
|
||||||
@ -192,17 +196,17 @@ export async function dynamicTruncate(
|
|||||||
|
|
||||||
export function createDynamicTruncator(
|
export function createDynamicTruncator(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
anthropicContext1MEnabled?: boolean,
|
modelCacheState?: ModelCacheStateLike,
|
||||||
) {
|
) {
|
||||||
return {
|
return {
|
||||||
truncate: (
|
truncate: (
|
||||||
sessionID: string,
|
sessionID: string,
|
||||||
output: string,
|
output: string,
|
||||||
options?: TruncationOptions,
|
options?: TruncationOptions,
|
||||||
) => dynamicTruncate(ctx, sessionID, output, options, anthropicContext1MEnabled),
|
) => dynamicTruncate(ctx, sessionID, output, options, modelCacheState),
|
||||||
|
|
||||||
getUsage: (sessionID: string) =>
|
getUsage: (sessionID: string) =>
|
||||||
getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled),
|
getContextWindowUsage(ctx, sessionID, modelCacheState),
|
||||||
|
|
||||||
truncateSync: (
|
truncateSync: (
|
||||||
output: string,
|
output: string,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user