feat(benchmarks): add hashline-edit benchmark agent and deps
Standalone headless agent using Vercel AI SDK v6 with FriendliAI provider. Imports hashline-edit pure functions directly from src/ for benchmarking the edit tool against LLMs (Minimax M2.5 via FriendliAI). Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
b1203b9501
commit
d1a0a66dde
62
benchmarks/bun.lock
Normal file
62
benchmarks/bun.lock
Normal file
@ -0,0 +1,62 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"configVersion": 1,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "hashline-edit-benchmark",
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai": "^1.3.0",
|
||||
"@friendliai/ai-provider": "^1.0.9",
|
||||
"ai": "^6.0.94",
|
||||
"zod": "^4.1.0",
|
||||
},
|
||||
},
|
||||
},
|
||||
"packages": {
|
||||
"@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="],
|
||||
|
||||
"@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="],
|
||||
|
||||
"@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="],
|
||||
|
||||
"@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
|
||||
|
||||
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
|
||||
|
||||
"@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="],
|
||||
|
||||
"@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="],
|
||||
|
||||
"@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
|
||||
|
||||
"@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="],
|
||||
|
||||
"ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="],
|
||||
|
||||
"eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
|
||||
|
||||
"json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
|
||||
|
||||
"nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
|
||||
|
||||
"secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="],
|
||||
|
||||
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
|
||||
|
||||
"@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
|
||||
"ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||
|
||||
"ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||
}
|
||||
}
|
||||
190
benchmarks/headless.ts
Normal file
190
benchmarks/headless.ts
Normal file
@ -0,0 +1,190 @@
|
||||
#!/usr/bin/env bun
|
||||
import { readFile, writeFile, mkdir } from "node:fs/promises"
|
||||
import { join, dirname } from "node:path"
|
||||
import { stepCountIs, streamText, type CoreMessage } from "ai"
|
||||
import { tool } from "ai"
|
||||
import { createFriendli } from "@friendliai/ai-provider"
|
||||
import { z } from "zod"
|
||||
import { formatHashLines } from "../src/tools/hashline-edit/hash-computation"
|
||||
import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits"
|
||||
import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations"
|
||||
import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization"
|
||||
|
||||
const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5"
|
||||
const MAX_STEPS = 50
|
||||
const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
||||
|
||||
const emit = (event: Record<string, unknown>) =>
|
||||
console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event }))
|
||||
|
||||
// ── CLI ──────────────────────────────────────────────────────
|
||||
function parseArgs(): { prompt: string; modelId: string } {
|
||||
const args = process.argv.slice(2)
|
||||
let prompt = ""
|
||||
let modelId = DEFAULT_MODEL
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) {
|
||||
prompt = args[++i]
|
||||
} else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) {
|
||||
modelId = args[++i]
|
||||
} else if (args[i] === "--reasoning-mode" && args[i + 1]) {
|
||||
i++ // consume
|
||||
}
|
||||
// --no-translate, --think consumed silently
|
||||
}
|
||||
if (!prompt) {
|
||||
console.error("Usage: bun run benchmarks/headless.ts -p <prompt> [-m <model>]")
|
||||
process.exit(1)
|
||||
}
|
||||
return { prompt, modelId }
|
||||
}
|
||||
|
||||
// ── Tools ────────────────────────────────────────────────────
|
||||
const readFileTool = tool({
|
||||
description: "Read a file with hashline-tagged content (LINE#ID format)",
|
||||
inputSchema: z.object({ path: z.string().describe("File path") }),
|
||||
execute: async ({ path }) => {
|
||||
const fullPath = join(process.cwd(), path)
|
||||
try {
|
||||
const content = await readFile(fullPath, "utf-8")
|
||||
const lines = content.split("\n")
|
||||
const tagged = formatHashLines(content)
|
||||
return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}`
|
||||
} catch {
|
||||
return `Error: File not found: ${path}`
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
const editFileTool = tool({
|
||||
description: "Edit a file using hashline anchors (LINE#ID format)",
|
||||
inputSchema: z.object({
|
||||
path: z.string(),
|
||||
edits: z.array(
|
||||
z.object({
|
||||
op: z.enum(["replace", "append", "prepend"]),
|
||||
pos: z.string().optional(),
|
||||
end: z.string().optional(),
|
||||
lines: z.union([z.array(z.string()), z.string(), z.null()]),
|
||||
})
|
||||
).min(1),
|
||||
}),
|
||||
execute: async ({ path, edits }) => {
|
||||
const fullPath = join(process.cwd(), path)
|
||||
try {
|
||||
let rawContent = ""
|
||||
let exists = true
|
||||
try {
|
||||
rawContent = await readFile(fullPath, "utf-8")
|
||||
} catch {
|
||||
exists = false
|
||||
}
|
||||
|
||||
const normalized = normalizeHashlineEdits(edits)
|
||||
|
||||
if (!exists) {
|
||||
const canCreate = normalized.every(
|
||||
(e) => (e.op === "append" || e.op === "prepend") && !e.pos
|
||||
)
|
||||
if (!canCreate) return `Error: File not found: ${path}`
|
||||
}
|
||||
|
||||
const envelope = canonicalizeFileText(rawContent)
|
||||
const result = applyHashlineEditsWithReport(envelope.content, normalized)
|
||||
|
||||
if (result.content === envelope.content) {
|
||||
return `Error: No changes made to ${path}. The edits produced identical content.`
|
||||
}
|
||||
|
||||
const writeContent = restoreFileText(result.content, envelope)
|
||||
await mkdir(dirname(fullPath), { recursive: true })
|
||||
await writeFile(fullPath, writeContent, "utf-8")
|
||||
|
||||
const oldLineCount = rawContent.split("\n").length
|
||||
const newLineCount = writeContent.split("\n").length
|
||||
const delta = newLineCount - oldLineCount
|
||||
const sign = delta > 0 ? "+" : ""
|
||||
const action = exists ? "Updated" : "Created"
|
||||
return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)`
|
||||
} catch (error) {
|
||||
return `Error: ${error instanceof Error ? error.message : String(error)}`
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
// ── Agent Loop ───────────────────────────────────────────────
|
||||
async function run() {
|
||||
const { prompt, modelId } = parseArgs()
|
||||
|
||||
const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! })
|
||||
const model = friendli(modelId)
|
||||
const tools = { read_file: readFileTool, edit_file: editFileTool }
|
||||
|
||||
emit({ type: "user", content: prompt })
|
||||
|
||||
const messages: CoreMessage[] = [{ role: "user", content: prompt }]
|
||||
const system =
|
||||
"You are a code editing assistant. Use read_file to read files and edit_file to edit them. " +
|
||||
"Always read a file before editing it to get fresh LINE#ID anchors."
|
||||
|
||||
for (let step = 0; step < MAX_STEPS; step++) {
|
||||
const stream = streamText({
|
||||
model,
|
||||
tools,
|
||||
messages,
|
||||
system,
|
||||
stopWhen: stepCountIs(1),
|
||||
})
|
||||
|
||||
let currentText = ""
|
||||
for await (const part of stream.fullStream) {
|
||||
switch (part.type) {
|
||||
case "text-delta":
|
||||
currentText += part.text
|
||||
break
|
||||
case "tool-call":
|
||||
emit({
|
||||
type: "tool_call",
|
||||
tool_call_id: part.toolCallId,
|
||||
tool_name: part.toolName,
|
||||
tool_input: part.args,
|
||||
model: modelId,
|
||||
})
|
||||
break
|
||||
case "tool-result":
|
||||
emit({
|
||||
type: "tool_result",
|
||||
tool_call_id: part.toolCallId,
|
||||
output: typeof part.result === "string" ? part.result : JSON.stringify(part.result),
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const response = await stream.response
|
||||
messages.push(...response.messages)
|
||||
|
||||
const finishReason = await stream.finishReason
|
||||
if (finishReason !== "tool-calls") {
|
||||
if (currentText.trim()) {
|
||||
emit({ type: "assistant", content: currentText, model: modelId })
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Signal + Startup ─────────────────────────────────────────
|
||||
process.once("SIGINT", () => process.exit(0))
|
||||
process.once("SIGTERM", () => process.exit(143))
|
||||
|
||||
const startTime = Date.now()
|
||||
run()
|
||||
.catch((error) => {
|
||||
emit({ type: "error", error: error instanceof Error ? error.message : String(error) })
|
||||
process.exit(1)
|
||||
})
|
||||
.then(() => {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2)
|
||||
console.error(`[headless] Completed in ${elapsed}s`)
|
||||
})
|
||||
19
benchmarks/package.json
Normal file
19
benchmarks/package.json
Normal file
@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "hashline-edit-benchmark",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider",
|
||||
"scripts": {
|
||||
"bench:basic": "bun run test-edit-ops.ts",
|
||||
"bench:edge": "bun run test-edge-cases.ts",
|
||||
"bench:multi": "bun run test-multi-model.ts",
|
||||
"bench:all": "bun run bench:basic && bun run bench:edge"
|
||||
},
|
||||
"dependencies": {
|
||||
"ai": "^6.0.94",
|
||||
"@ai-sdk/openai": "^1.3.0",
|
||||
"@friendliai/ai-provider": "^1.0.9",
|
||||
"zod": "^4.1.0"
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user