fix(preemptive-compaction): allow re-compaction after context grows and use model-specific limits

compactedSessions permanently blocked re-compaction after first success, causing unbounded context growth (e.g. 500k on Kimi K2.5 with 256k limit). - Clear compactedSessions flag on new message.updated so compaction can re-trigger when context exceeds threshold again - Use modelContextLimitsCache for model-specific context limits instead of always falling back to 200k for non-Anthropic providers
2026-03-02 23:07:39 +09:00 · 2026-03-02 23:07:39 +09:00 · 1c2caa09df
commit 1c2caa09df
parent f27fd9a6de
2 changed files with 161 additions and 4 deletions
--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@ -414,4 +414,157 @@ describe("preemptive-compaction", () => {
      restoreTimeouts()
    }
  })
  // #given first compaction succeeded and context grew again
  // #when tool.execute.after runs after new high-token message
  // #then should trigger compaction again (re-compaction)
  it("should allow re-compaction when context grows after successful compaction", async () => {
    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
    const sessionID = "ses_recompact"
    // given - first compaction cycle
    await hook.event({
      event: {
        type: "message.updated",
        properties: {
          info: {
            role: "assistant",
            sessionID,
            providerID: "anthropic",
            modelID: "claude-sonnet-4-6",
            finish: true,
            tokens: {
              input: 170000,
              output: 0,
              reasoning: 0,
              cache: { read: 10000, write: 0 },
            },
          },
        },
      },
    })
    await hook["tool.execute.after"](
      { tool: "bash", sessionID, callID: "call_1" },
      { title: "", output: "test", metadata: null }
    )
    expect(ctx.client.session.summarize).toHaveBeenCalledTimes(1)
    // when - new message with high tokens (context grew after compaction)
    await hook.event({
      event: {
        type: "message.updated",
        properties: {
          info: {
            role: "assistant",
            sessionID,
            providerID: "anthropic",
            modelID: "claude-sonnet-4-6",
            finish: true,
            tokens: {
              input: 170000,
              output: 0,
              reasoning: 0,
              cache: { read: 10000, write: 0 },
            },
          },
        },
      },
    })
    await hook["tool.execute.after"](
      { tool: "bash", sessionID, callID: "call_2" },
      { title: "", output: "test", metadata: null }
    )
    // then - summarize should fire again
    expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2)
  })
  // #given modelContextLimitsCache has model-specific limit (256k)
  // #when tokens are above default 78% of 200k but below 78% of 256k
  // #then should NOT trigger compaction
  it("should use model-specific context limit from modelContextLimitsCache", async () => {
    const modelContextLimitsCache = new Map<string, number>()
    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
    const hook = createPreemptiveCompactionHook(ctx as never, {} as never, {
      anthropicContext1MEnabled: false,
      modelContextLimitsCache,
    })
    const sessionID = "ses_kimi_limit"
    // 180k total tokens — above 78% of 200k (156k) but below 78% of 256k (204k)
    await hook.event({
      event: {
        type: "message.updated",
        properties: {
          info: {
            role: "assistant",
            sessionID,
            providerID: "opencode",
            modelID: "kimi-k2.5-free",
            finish: true,
            tokens: {
              input: 170000,
              output: 0,
              reasoning: 0,
              cache: { read: 10000, write: 0 },
            },
          },
        },
      },
    })
    await hook["tool.execute.after"](
      { tool: "bash", sessionID, callID: "call_1" },
      { title: "", output: "test", metadata: null }
    )
    expect(ctx.client.session.summarize).not.toHaveBeenCalled()
  })
  // #given modelContextLimitsCache has model-specific limit (256k)
  // #when tokens exceed 78% of model-specific limit
  // #then should trigger compaction
  it("should trigger compaction at model-specific threshold", async () => {
    const modelContextLimitsCache = new Map<string, number>()
    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
    const hook = createPreemptiveCompactionHook(ctx as never, {} as never, {
      anthropicContext1MEnabled: false,
      modelContextLimitsCache,
    })
    const sessionID = "ses_kimi_trigger"
    // 210k total — above 78% of 256k (≈204k)
    await hook.event({
      event: {
        type: "message.updated",
        properties: {
          info: {
            role: "assistant",
            sessionID,
            providerID: "opencode",
            modelID: "kimi-k2.5-free",
            finish: true,
            tokens: {
              input: 200000,
              output: 0,
              reasoning: 0,
              cache: { read: 10000, write: 0 },
            },
          },
        },
      },
    })
    await hook["tool.execute.after"](
      { tool: "bash", sessionID, callID: "call_1" },
      { title: "", output: "test", metadata: null }
    )
    expect(ctx.client.session.summarize).toHaveBeenCalled()
  })
 })
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@ -7,6 +7,7 @@ const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000
 type ModelCacheStateLike = {
  anthropicContext1MEnabled: boolean
  modelContextLimitsCache?: Map<string, number>
 }
 function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@ -91,10 +92,12 @@ export function createPreemptiveCompactionHook(
    const cached = tokenCache.get(sessionID)
    if (!cached) return
-    const actualLimit =
+    const modelSpecificLimit = !isAnthropicProvider(cached.providerID)
-      isAnthropicProvider(cached.providerID)
+      ? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`)
-        ? getAnthropicActualLimit(modelCacheState)
+      : undefined
-        : DEFAULT_ACTUAL_LIMIT
+    const actualLimit = isAnthropicProvider(cached.providerID)
      ? getAnthropicActualLimit(modelCacheState)
      : modelSpecificLimit ?? DEFAULT_ACTUAL_LIMIT
    const lastTokens = cached.tokens
    const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
@ -164,6 +167,7 @@ export function createPreemptiveCompactionHook(
        modelID: info.modelID ?? "",
        tokens: info.tokens,
      })
      compactedSessions.delete(info.sessionID)
    }
  }