From d1a0a66dde227b64931df17e25ceb2939628433d Mon Sep 17 00:00:00 2001 From: minpeter Date: Fri, 27 Feb 2026 01:37:40 +0900 Subject: [PATCH] feat(benchmarks): add hashline-edit benchmark agent and deps Standalone headless agent using Vercel AI SDK v6 with FriendliAI provider. Imports hashline-edit pure functions directly from src/ for benchmarking the edit tool against LLMs (Minimax M2.5 via FriendliAI). Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus --- benchmarks/bun.lock | 62 +++++++++++++ benchmarks/headless.ts | 190 ++++++++++++++++++++++++++++++++++++++++ benchmarks/package.json | 19 ++++ 3 files changed, 271 insertions(+) create mode 100644 benchmarks/bun.lock create mode 100644 benchmarks/headless.ts create mode 100644 benchmarks/package.json diff --git a/benchmarks/bun.lock b/benchmarks/bun.lock new file mode 100644 index 00000000..3a31bf1c --- /dev/null +++ b/benchmarks/bun.lock @@ -0,0 +1,62 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "hashline-edit-benchmark", + "dependencies": { + "@ai-sdk/openai": "^1.3.0", + "@friendliai/ai-provider": "^1.0.9", + "ai": "^6.0.94", + "zod": "^4.1.0", + }, + }, + }, + "packages": { + "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="], + + "@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="], + + "@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="], + + "@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="], + + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="], + + "@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="], + + "@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="], + + "@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="], + + "@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="], + + "ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="], + + "eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="], + + "json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="], + + "nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], + + "secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="], + + "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], + + "@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + + "@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + + "@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + + "ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="], + + "ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="], + } +} diff --git a/benchmarks/headless.ts b/benchmarks/headless.ts new file mode 100644 index 00000000..bb2af701 --- /dev/null +++ b/benchmarks/headless.ts @@ -0,0 +1,190 @@ +#!/usr/bin/env bun +import { readFile, writeFile, mkdir } from "node:fs/promises" +import { join, dirname } from "node:path" +import { stepCountIs, streamText, type CoreMessage } from "ai" +import { tool } from "ai" +import { createFriendli } from "@friendliai/ai-provider" +import { z } from "zod" +import { formatHashLines } from "../src/tools/hashline-edit/hash-computation" +import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits" +import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations" +import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization" + +const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5" +const MAX_STEPS = 50 +const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` + +const emit = (event: Record) => + console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event })) + +// ── CLI ────────────────────────────────────────────────────── +function parseArgs(): { prompt: string; modelId: string } { + const args = process.argv.slice(2) + let prompt = "" + let modelId = DEFAULT_MODEL + for (let i = 0; i < args.length; i++) { + if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) { + prompt = args[++i] + } else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) { + modelId = args[++i] + } else if (args[i] === "--reasoning-mode" && args[i + 1]) { + i++ // consume + } + // --no-translate, --think consumed silently + } + if (!prompt) { + console.error("Usage: bun run benchmarks/headless.ts -p [-m ]") + process.exit(1) + } + return { prompt, modelId } +} + +// ── Tools ──────────────────────────────────────────────────── +const readFileTool = tool({ + description: "Read a file with hashline-tagged content (LINE#ID format)", + inputSchema: z.object({ path: z.string().describe("File path") }), + execute: async ({ path }) => { + const fullPath = join(process.cwd(), path) + try { + const content = await readFile(fullPath, "utf-8") + const lines = content.split("\n") + const tagged = formatHashLines(content) + return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}` + } catch { + return `Error: File not found: ${path}` + } + }, +}) + +const editFileTool = tool({ + description: "Edit a file using hashline anchors (LINE#ID format)", + inputSchema: z.object({ + path: z.string(), + edits: z.array( + z.object({ + op: z.enum(["replace", "append", "prepend"]), + pos: z.string().optional(), + end: z.string().optional(), + lines: z.union([z.array(z.string()), z.string(), z.null()]), + }) + ).min(1), + }), + execute: async ({ path, edits }) => { + const fullPath = join(process.cwd(), path) + try { + let rawContent = "" + let exists = true + try { + rawContent = await readFile(fullPath, "utf-8") + } catch { + exists = false + } + + const normalized = normalizeHashlineEdits(edits) + + if (!exists) { + const canCreate = normalized.every( + (e) => (e.op === "append" || e.op === "prepend") && !e.pos + ) + if (!canCreate) return `Error: File not found: ${path}` + } + + const envelope = canonicalizeFileText(rawContent) + const result = applyHashlineEditsWithReport(envelope.content, normalized) + + if (result.content === envelope.content) { + return `Error: No changes made to ${path}. The edits produced identical content.` + } + + const writeContent = restoreFileText(result.content, envelope) + await mkdir(dirname(fullPath), { recursive: true }) + await writeFile(fullPath, writeContent, "utf-8") + + const oldLineCount = rawContent.split("\n").length + const newLineCount = writeContent.split("\n").length + const delta = newLineCount - oldLineCount + const sign = delta > 0 ? "+" : "" + const action = exists ? "Updated" : "Created" + return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)` + } catch (error) { + return `Error: ${error instanceof Error ? error.message : String(error)}` + } + }, +}) + +// ── Agent Loop ─────────────────────────────────────────────── +async function run() { + const { prompt, modelId } = parseArgs() + + const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! }) + const model = friendli(modelId) + const tools = { read_file: readFileTool, edit_file: editFileTool } + + emit({ type: "user", content: prompt }) + + const messages: CoreMessage[] = [{ role: "user", content: prompt }] + const system = + "You are a code editing assistant. Use read_file to read files and edit_file to edit them. " + + "Always read a file before editing it to get fresh LINE#ID anchors." + + for (let step = 0; step < MAX_STEPS; step++) { + const stream = streamText({ + model, + tools, + messages, + system, + stopWhen: stepCountIs(1), + }) + + let currentText = "" + for await (const part of stream.fullStream) { + switch (part.type) { + case "text-delta": + currentText += part.text + break + case "tool-call": + emit({ + type: "tool_call", + tool_call_id: part.toolCallId, + tool_name: part.toolName, + tool_input: part.args, + model: modelId, + }) + break + case "tool-result": + emit({ + type: "tool_result", + tool_call_id: part.toolCallId, + output: typeof part.result === "string" ? part.result : JSON.stringify(part.result), + }) + break + } + } + + const response = await stream.response + messages.push(...response.messages) + + const finishReason = await stream.finishReason + if (finishReason !== "tool-calls") { + if (currentText.trim()) { + emit({ type: "assistant", content: currentText, model: modelId }) + } + break + } + } +} + +// ── Signal + Startup ───────────────────────────────────────── +process.once("SIGINT", () => process.exit(0)) +process.once("SIGTERM", () => process.exit(143)) + +const startTime = Date.now() +run() + .catch((error) => { + emit({ type: "error", error: error instanceof Error ? error.message : String(error) }) + process.exit(1) + }) + .then(() => { + const elapsed = ((Date.now() - startTime) / 1000).toFixed(2) + console.error(`[headless] Completed in ${elapsed}s`) + }) diff --git a/benchmarks/package.json b/benchmarks/package.json new file mode 100644 index 00000000..bbddfed8 --- /dev/null +++ b/benchmarks/package.json @@ -0,0 +1,19 @@ +{ + "name": "hashline-edit-benchmark", + "version": "0.1.0", + "private": true, + "type": "module", + "description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider", + "scripts": { + "bench:basic": "bun run test-edit-ops.ts", + "bench:edge": "bun run test-edge-cases.ts", + "bench:multi": "bun run test-multi-model.ts", + "bench:all": "bun run bench:basic && bun run bench:edge" + }, + "dependencies": { + "ai": "^6.0.94", + "@ai-sdk/openai": "^1.3.0", + "@friendliai/ai-provider": "^1.0.9", + "zod": "^4.1.0" + } +}