From d243adbf8d5dd6927cea1091ad4ec6f31496b3cf Mon Sep 17 00:00:00 2001 From: Robert Egoyan <95100203+qMiko318@users.noreply.github.com> Date: Mon, 25 May 2026 21:08:11 +0300 Subject: [PATCH] fix(hooks): prefer fresh harness cost cache (#2054) Uses a fresh harness cost cache when available and keeps transcript pricing as the fallback. Focused cost-tracker tests passed locally before merge. --- scripts/hooks/cost-tracker.js | 51 ++++++++++++++++++- tests/hooks/cost-tracker.test.js | 87 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 1 deletion(-) diff --git a/scripts/hooks/cost-tracker.js b/scripts/hooks/cost-tracker.js index 291345cc..03937578 100755 --- a/scripts/hooks/cost-tracker.js +++ b/scripts/hooks/cost-tracker.js @@ -20,15 +20,54 @@ * Each row therefore represents the cumulative session total up to that point. * To get per-session cost, take the last row per session_id. To get per-day * spend, aggregate. + * + * Harness-cost contract (optional, opt-in by the statusline): + * If the user's statusline (which receives `cost.total_cost_usd` directly + * from Claude Code) writes `{ts, cost_usd}` to + * `/harness-cost-.json` on each render, this hook + * prefers that authoritative value over the transcript-sum estimate when + * the cache is fresh (≤ 300s). The transcript-sum is kept as a safe + * fallback because: + * - the hard-coded rate table cannot represent Opus 4.7's >200K-token + * 2x tier or the 1h-cache 2x tier (under-counts on long sessions); + * - summing the full transcript double-counts work done across + * `--resume` boundaries while `cost.total_cost_usd` is per-process. + * Absent a writer, behavior is unchanged. */ 'use strict'; const fs = require('fs'); +const os = require('os'); const path = require('path'); const { ensureDir, appendFile, getClaudeDir } = require('../lib/utils'); const { sanitizeSessionId } = require('../lib/session-bridge'); +const HARNESS_COST_MAX_AGE_SECONDS = 300; + +/** + * Read authoritative harness cost from the per-session cache file. + * @param {string} sessionId + * @param {number} maxAgeSeconds + * @returns {number|null} cost in USD, or null on miss / stale / parse error + */ +function readHarnessCost(sessionId, maxAgeSeconds) { + if (!sessionId) return null; + try { + const fp = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`); + if (!fs.existsSync(fp)) return null; + const obj = JSON.parse(fs.readFileSync(fp, 'utf8')); + const ts = Number(obj && obj.ts); + const cost = Number(obj && obj.cost_usd); + if (!Number.isFinite(ts) || !Number.isFinite(cost) || cost < 0) return null; + const age = Math.floor(Date.now() / 1000) - ts; + if (age < 0 || age > maxAgeSeconds) return null; + return cost; + } catch { + return null; + } +} + // Approximate per-1M-token billing rates (USD). // Cache creation: 1.25x input rate. Cache read: 0.1x input rate. const RATE_TABLE = { @@ -125,13 +164,23 @@ process.stdin.on('end', () => { } = usageTotals || {}; const rates = getRates(model); - const estimatedCostUsd = Math.round(( + const transcriptCostUsd = Math.round(( (inputTokens / 1e6) * rates.in + (outputTokens / 1e6) * rates.out + (cacheWriteTokens / 1e6) * rates.cacheWrite + (cacheReadTokens / 1e6) * rates.cacheRead ) * 1e6) / 1e6; + // Prefer the harness's authoritative `cost.total_cost_usd` when the + // statusline has written it to the per-session cache (see contract in + // the file header). The harness number reflects API-billed truth + // (correct rates, 1h-cache 2x, >200K tier 2x) and is per-process so it + // does not drift across `--resume`. Cache miss → transcript-sum. + const harnessCost = readHarnessCost(sessionId, HARNESS_COST_MAX_AGE_SECONDS); + const estimatedCostUsd = harnessCost !== null + ? Math.round(harnessCost * 1e6) / 1e6 + : transcriptCostUsd; + const metricsDir = path.join(getClaudeDir(), 'metrics'); ensureDir(metricsDir); diff --git a/tests/hooks/cost-tracker.test.js b/tests/hooks/cost-tracker.test.js index 9e578b19..117e4db2 100644 --- a/tests/hooks/cost-tracker.test.js +++ b/tests/hooks/cost-tracker.test.js @@ -215,6 +215,93 @@ function runTests() { fs.rmSync(tmpHome, { recursive: true, force: true }); }) ? passed++ : failed++); + // 8. Prefers harness-cost cache value over transcript-sum when fresh + (test('prefers fresh harness-cost cache over transcript estimate', () => { + const tmpHome = makeTempDir(); + const sessionId = 'harness-fresh-' + Date.now(); + const transcriptPath = path.join(tmpHome, 'session.jsonl'); + writeTranscript(transcriptPath, [ + { + type: 'assistant', + message: { + model: 'claude-opus-4-20250514', + usage: { + input_tokens: 10000, + output_tokens: 5000, + cache_creation_input_tokens: 200000, + cache_read_input_tokens: 1000000, + }, + }, + }, + ]); + const harnessCachePath = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`); + const nowEpoch = Math.floor(Date.now() / 1000); + fs.writeFileSync( + harnessCachePath, + JSON.stringify({ ts: nowEpoch, cost_usd: 1.23 }), + 'utf8' + ); + + try { + const result = runScript( + { session_id: sessionId, transcript_path: transcriptPath }, + withTempHome(tmpHome) + ); + assert.strictEqual(result.code, 0, `Expected exit code 0, got ${result.code}`); + + const metricsFile = path.join(tmpHome, '.claude', 'metrics', 'costs.jsonl'); + const row = JSON.parse(fs.readFileSync(metricsFile, 'utf8').trim()); + assert.strictEqual(row.estimated_cost_usd, 1.23, 'Expected harness cost to win'); + // Token totals still reflect the transcript scan + assert.strictEqual(row.input_tokens, 10000, 'Token totals should still come from transcript'); + assert.strictEqual(row.output_tokens, 5000, 'Token totals should still come from transcript'); + } finally { + try { fs.unlinkSync(harnessCachePath); } catch { /* best-effort */ } + fs.rmSync(tmpHome, { recursive: true, force: true }); + } + }) ? passed++ : failed++); + + // 9. Ignores stale harness-cost cache and falls back to transcript estimate + (test('ignores stale harness-cost cache (>300s) and uses transcript estimate', () => { + const tmpHome = makeTempDir(); + const sessionId = 'harness-stale-' + Date.now(); + const transcriptPath = path.join(tmpHome, 'session.jsonl'); + writeTranscript(transcriptPath, [ + { + type: 'assistant', + message: { + model: 'claude-sonnet-4-20250514', + usage: { input_tokens: 1000, output_tokens: 500 }, + }, + }, + ]); + const harnessCachePath = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`); + const staleEpoch = Math.floor(Date.now() / 1000) - 3600; + fs.writeFileSync( + harnessCachePath, + JSON.stringify({ ts: staleEpoch, cost_usd: 999.99 }), + 'utf8' + ); + + try { + const result = runScript( + { session_id: sessionId, transcript_path: transcriptPath }, + withTempHome(tmpHome) + ); + assert.strictEqual(result.code, 0, `Expected exit code 0, got ${result.code}`); + + const metricsFile = path.join(tmpHome, '.claude', 'metrics', 'costs.jsonl'); + const row = JSON.parse(fs.readFileSync(metricsFile, 'utf8').trim()); + assert.notStrictEqual(row.estimated_cost_usd, 999.99, 'Stale cache must not win'); + assert.ok(row.estimated_cost_usd > 0, 'Expected fallback transcript estimate to be positive'); + // Sonnet rates: 1000/1e6*3 + 500/1e6*15 ≈ $0.011 — well below the 999.99 stale value + assert.ok(row.estimated_cost_usd < 1, 'Expected small transcript estimate, not the stale 999.99'); + } finally { + try { fs.unlinkSync(harnessCachePath); } catch { /* best-effort */ } + fs.rmSync(tmpHome, { recursive: true, force: true }); + } + }) ? passed++ : failed++); + console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`); process.exit(failed > 0 ? 1 : 0); }