fix(hooks): prefer fresh harness cost cache (#2054)

Uses a fresh harness cost cache when available and keeps transcript pricing as the fallback. Focused cost-tracker tests passed locally before merge.
2026-05-30 21:54:17 +08:00 · 2026-05-25 21:08:11 +03:00 · 2026-05-25 21:08:11 +03:00 · d243adbf8d
commit d243adbf8d
parent ee9e5a19c4
2 changed files with 137 additions and 1 deletions
--- a/scripts/hooks/cost-tracker.js
+++ b/scripts/hooks/cost-tracker.js
@ -20,15 +20,54 @@
 * Each row therefore represents the cumulative session total up to that point.
 * To get per-session cost, take the last row per session_id. To get per-day
 * spend, aggregate.
+ *
+ * Harness-cost contract (optional, opt-in by the statusline):
+ *   If the user's statusline (which receives `cost.total_cost_usd` directly
+ *   from Claude Code) writes `{ts, cost_usd}` to
+ *   `<os.tmpdir()>/harness-cost-<session_id>.json` on each render, this hook
+ *   prefers that authoritative value over the transcript-sum estimate when
+ *   the cache is fresh (≤ 300s). The transcript-sum is kept as a safe
+ *   fallback because:
+ *     - the hard-coded rate table cannot represent Opus 4.7's >200K-token
+ *       2x tier or the 1h-cache 2x tier (under-counts on long sessions);
+ *     - summing the full transcript double-counts work done across
+ *       `--resume` boundaries while `cost.total_cost_usd` is per-process.
+ *   Absent a writer, behavior is unchanged.
 */

 'use strict';

 const fs = require('fs');
+const os = require('os');
 const path = require('path');
 const { ensureDir, appendFile, getClaudeDir } = require('../lib/utils');
 const { sanitizeSessionId } = require('../lib/session-bridge');

+const HARNESS_COST_MAX_AGE_SECONDS = 300;
+
+/**
+ * Read authoritative harness cost from the per-session cache file.
+ * @param {string} sessionId
+ * @param {number} maxAgeSeconds
+ * @returns {number|null} cost in USD, or null on miss / stale / parse error
+ */
+function readHarnessCost(sessionId, maxAgeSeconds) {
+  if (!sessionId) return null;
+  try {
+    const fp = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`);
+    if (!fs.existsSync(fp)) return null;
+    const obj = JSON.parse(fs.readFileSync(fp, 'utf8'));
+    const ts = Number(obj && obj.ts);
+    const cost = Number(obj && obj.cost_usd);
+    if (!Number.isFinite(ts) || !Number.isFinite(cost) || cost < 0) return null;
+    const age = Math.floor(Date.now() / 1000) - ts;
+    if (age < 0 || age > maxAgeSeconds) return null;
+    return cost;
+  } catch {
+    return null;
+  }
+}
+
 // Approximate per-1M-token billing rates (USD).
 // Cache creation: 1.25x input rate. Cache read: 0.1x input rate.
 const RATE_TABLE = {
@ -125,13 +164,23 @@ process.stdin.on('end', () => {
    } = usageTotals || {};

    const rates = getRates(model);
-    const estimatedCostUsd = Math.round((
+    const transcriptCostUsd = Math.round((
      (inputTokens      / 1e6) * rates.in +
      (outputTokens     / 1e6) * rates.out +
      (cacheWriteTokens / 1e6) * rates.cacheWrite +
      (cacheReadTokens  / 1e6) * rates.cacheRead
    ) * 1e6) / 1e6;

+    // Prefer the harness's authoritative `cost.total_cost_usd` when the
+    // statusline has written it to the per-session cache (see contract in
+    // the file header). The harness number reflects API-billed truth
+    // (correct rates, 1h-cache 2x, >200K tier 2x) and is per-process so it
+    // does not drift across `--resume`. Cache miss → transcript-sum.
+    const harnessCost = readHarnessCost(sessionId, HARNESS_COST_MAX_AGE_SECONDS);
+    const estimatedCostUsd = harnessCost !== null
+      ? Math.round(harnessCost * 1e6) / 1e6
+      : transcriptCostUsd;
+
    const metricsDir = path.join(getClaudeDir(), 'metrics');
    ensureDir(metricsDir);

--- a/tests/hooks/cost-tracker.test.js
+++ b/tests/hooks/cost-tracker.test.js
@ -215,6 +215,93 @@ function runTests() {
    fs.rmSync(tmpHome, { recursive: true, force: true });
  }) ? passed++ : failed++);

+  // 8. Prefers harness-cost cache value over transcript-sum when fresh
+  (test('prefers fresh harness-cost cache over transcript estimate', () => {
+    const tmpHome = makeTempDir();
+    const sessionId = 'harness-fresh-' + Date.now();
+    const transcriptPath = path.join(tmpHome, 'session.jsonl');
+    writeTranscript(transcriptPath, [
+      {
+        type: 'assistant',
+        message: {
+          model: 'claude-opus-4-20250514',
+          usage: {
+            input_tokens: 10000,
+            output_tokens: 5000,
+            cache_creation_input_tokens: 200000,
+            cache_read_input_tokens: 1000000,
+          },
+        },
+      },
+    ]);
+    const harnessCachePath = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`);
+    const nowEpoch = Math.floor(Date.now() / 1000);
+    fs.writeFileSync(
+      harnessCachePath,
+      JSON.stringify({ ts: nowEpoch, cost_usd: 1.23 }),
+      'utf8'
+    );
+
+    try {
+      const result = runScript(
+        { session_id: sessionId, transcript_path: transcriptPath },
+        withTempHome(tmpHome)
+      );
+      assert.strictEqual(result.code, 0, `Expected exit code 0, got ${result.code}`);
+
+      const metricsFile = path.join(tmpHome, '.claude', 'metrics', 'costs.jsonl');
+      const row = JSON.parse(fs.readFileSync(metricsFile, 'utf8').trim());
+      assert.strictEqual(row.estimated_cost_usd, 1.23, 'Expected harness cost to win');
+      // Token totals still reflect the transcript scan
+      assert.strictEqual(row.input_tokens, 10000, 'Token totals should still come from transcript');
+      assert.strictEqual(row.output_tokens, 5000, 'Token totals should still come from transcript');
+    } finally {
+      try { fs.unlinkSync(harnessCachePath); } catch { /* best-effort */ }
+      fs.rmSync(tmpHome, { recursive: true, force: true });
+    }
+  }) ? passed++ : failed++);
+
+  // 9. Ignores stale harness-cost cache and falls back to transcript estimate
+  (test('ignores stale harness-cost cache (>300s) and uses transcript estimate', () => {
+    const tmpHome = makeTempDir();
+    const sessionId = 'harness-stale-' + Date.now();
+    const transcriptPath = path.join(tmpHome, 'session.jsonl');
+    writeTranscript(transcriptPath, [
+      {
+        type: 'assistant',
+        message: {
+          model: 'claude-sonnet-4-20250514',
+          usage: { input_tokens: 1000, output_tokens: 500 },
+        },
+      },
+    ]);
+    const harnessCachePath = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`);
+    const staleEpoch = Math.floor(Date.now() / 1000) - 3600;
+    fs.writeFileSync(
+      harnessCachePath,
+      JSON.stringify({ ts: staleEpoch, cost_usd: 999.99 }),
+      'utf8'
+    );
+
+    try {
+      const result = runScript(
+        { session_id: sessionId, transcript_path: transcriptPath },
+        withTempHome(tmpHome)
+      );
+      assert.strictEqual(result.code, 0, `Expected exit code 0, got ${result.code}`);
+
+      const metricsFile = path.join(tmpHome, '.claude', 'metrics', 'costs.jsonl');
+      const row = JSON.parse(fs.readFileSync(metricsFile, 'utf8').trim());
+      assert.notStrictEqual(row.estimated_cost_usd, 999.99, 'Stale cache must not win');
+      assert.ok(row.estimated_cost_usd > 0, 'Expected fallback transcript estimate to be positive');
+      // Sonnet rates: 1000/1e6*3 + 500/1e6*15 ≈ $0.011 — well below the 999.99 stale value
+      assert.ok(row.estimated_cost_usd < 1, 'Expected small transcript estimate, not the stale 999.99');
+    } finally {
+      try { fs.unlinkSync(harnessCachePath); } catch { /* best-effort */ }
+      fs.rmSync(tmpHome, { recursive: true, force: true });
+    }
+  }) ? passed++ : failed++);
+
  console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
  process.exit(failed > 0 ? 1 : 0);
 }