feat(benchmarks): add hashline-edit benchmark agent and deps
Standalone headless agent using Vercel AI SDK v6 with FriendliAI provider. Imports hashline-edit pure functions directly from src/ for benchmarking the edit tool against LLMs (Minimax M2.5 via FriendliAI). Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
b1203b9501
commit
d1a0a66dde
62
benchmarks/bun.lock
Normal file
62
benchmarks/bun.lock
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
"lockfileVersion": 1,
|
||||||
|
"configVersion": 1,
|
||||||
|
"workspaces": {
|
||||||
|
"": {
|
||||||
|
"name": "hashline-edit-benchmark",
|
||||||
|
"dependencies": {
|
||||||
|
"@ai-sdk/openai": "^1.3.0",
|
||||||
|
"@friendliai/ai-provider": "^1.0.9",
|
||||||
|
"ai": "^6.0.94",
|
||||||
|
"zod": "^4.1.0",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"packages": {
|
||||||
|
"@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="],
|
||||||
|
|
||||||
|
"@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="],
|
||||||
|
|
||||||
|
"@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="],
|
||||||
|
|
||||||
|
"@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
|
||||||
|
|
||||||
|
"@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
|
||||||
|
|
||||||
|
"@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="],
|
||||||
|
|
||||||
|
"@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="],
|
||||||
|
|
||||||
|
"@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
|
||||||
|
|
||||||
|
"@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="],
|
||||||
|
|
||||||
|
"ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="],
|
||||||
|
|
||||||
|
"eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
|
||||||
|
|
||||||
|
"json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
|
||||||
|
|
||||||
|
"nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
|
||||||
|
|
||||||
|
"secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="],
|
||||||
|
|
||||||
|
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
|
||||||
|
|
||||||
|
"@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||||
|
|
||||||
|
"@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||||
|
|
||||||
|
"@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||||
|
|
||||||
|
"@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||||
|
|
||||||
|
"@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||||
|
|
||||||
|
"@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||||
|
|
||||||
|
"ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
|
||||||
|
|
||||||
|
"ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
|
||||||
|
}
|
||||||
|
}
|
||||||
190
benchmarks/headless.ts
Normal file
190
benchmarks/headless.ts
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
#!/usr/bin/env bun
|
||||||
|
import { readFile, writeFile, mkdir } from "node:fs/promises"
|
||||||
|
import { join, dirname } from "node:path"
|
||||||
|
import { stepCountIs, streamText, type CoreMessage } from "ai"
|
||||||
|
import { tool } from "ai"
|
||||||
|
import { createFriendli } from "@friendliai/ai-provider"
|
||||||
|
import { z } from "zod"
|
||||||
|
import { formatHashLines } from "../src/tools/hashline-edit/hash-computation"
|
||||||
|
import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits"
|
||||||
|
import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations"
|
||||||
|
import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization"
|
||||||
|
|
||||||
|
const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5"
|
||||||
|
const MAX_STEPS = 50
|
||||||
|
const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
||||||
|
|
||||||
|
const emit = (event: Record<string, unknown>) =>
|
||||||
|
console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event }))
|
||||||
|
|
||||||
|
// ── CLI ──────────────────────────────────────────────────────
|
||||||
|
function parseArgs(): { prompt: string; modelId: string } {
|
||||||
|
const args = process.argv.slice(2)
|
||||||
|
let prompt = ""
|
||||||
|
let modelId = DEFAULT_MODEL
|
||||||
|
for (let i = 0; i < args.length; i++) {
|
||||||
|
if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) {
|
||||||
|
prompt = args[++i]
|
||||||
|
} else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) {
|
||||||
|
modelId = args[++i]
|
||||||
|
} else if (args[i] === "--reasoning-mode" && args[i + 1]) {
|
||||||
|
i++ // consume
|
||||||
|
}
|
||||||
|
// --no-translate, --think consumed silently
|
||||||
|
}
|
||||||
|
if (!prompt) {
|
||||||
|
console.error("Usage: bun run benchmarks/headless.ts -p <prompt> [-m <model>]")
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
return { prompt, modelId }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Tools ────────────────────────────────────────────────────
|
||||||
|
const readFileTool = tool({
|
||||||
|
description: "Read a file with hashline-tagged content (LINE#ID format)",
|
||||||
|
inputSchema: z.object({ path: z.string().describe("File path") }),
|
||||||
|
execute: async ({ path }) => {
|
||||||
|
const fullPath = join(process.cwd(), path)
|
||||||
|
try {
|
||||||
|
const content = await readFile(fullPath, "utf-8")
|
||||||
|
const lines = content.split("\n")
|
||||||
|
const tagged = formatHashLines(content)
|
||||||
|
return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}`
|
||||||
|
} catch {
|
||||||
|
return `Error: File not found: ${path}`
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const editFileTool = tool({
|
||||||
|
description: "Edit a file using hashline anchors (LINE#ID format)",
|
||||||
|
inputSchema: z.object({
|
||||||
|
path: z.string(),
|
||||||
|
edits: z.array(
|
||||||
|
z.object({
|
||||||
|
op: z.enum(["replace", "append", "prepend"]),
|
||||||
|
pos: z.string().optional(),
|
||||||
|
end: z.string().optional(),
|
||||||
|
lines: z.union([z.array(z.string()), z.string(), z.null()]),
|
||||||
|
})
|
||||||
|
).min(1),
|
||||||
|
}),
|
||||||
|
execute: async ({ path, edits }) => {
|
||||||
|
const fullPath = join(process.cwd(), path)
|
||||||
|
try {
|
||||||
|
let rawContent = ""
|
||||||
|
let exists = true
|
||||||
|
try {
|
||||||
|
rawContent = await readFile(fullPath, "utf-8")
|
||||||
|
} catch {
|
||||||
|
exists = false
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalized = normalizeHashlineEdits(edits)
|
||||||
|
|
||||||
|
if (!exists) {
|
||||||
|
const canCreate = normalized.every(
|
||||||
|
(e) => (e.op === "append" || e.op === "prepend") && !e.pos
|
||||||
|
)
|
||||||
|
if (!canCreate) return `Error: File not found: ${path}`
|
||||||
|
}
|
||||||
|
|
||||||
|
const envelope = canonicalizeFileText(rawContent)
|
||||||
|
const result = applyHashlineEditsWithReport(envelope.content, normalized)
|
||||||
|
|
||||||
|
if (result.content === envelope.content) {
|
||||||
|
return `Error: No changes made to ${path}. The edits produced identical content.`
|
||||||
|
}
|
||||||
|
|
||||||
|
const writeContent = restoreFileText(result.content, envelope)
|
||||||
|
await mkdir(dirname(fullPath), { recursive: true })
|
||||||
|
await writeFile(fullPath, writeContent, "utf-8")
|
||||||
|
|
||||||
|
const oldLineCount = rawContent.split("\n").length
|
||||||
|
const newLineCount = writeContent.split("\n").length
|
||||||
|
const delta = newLineCount - oldLineCount
|
||||||
|
const sign = delta > 0 ? "+" : ""
|
||||||
|
const action = exists ? "Updated" : "Created"
|
||||||
|
return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)`
|
||||||
|
} catch (error) {
|
||||||
|
return `Error: ${error instanceof Error ? error.message : String(error)}`
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── Agent Loop ───────────────────────────────────────────────
|
||||||
|
async function run() {
|
||||||
|
const { prompt, modelId } = parseArgs()
|
||||||
|
|
||||||
|
const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! })
|
||||||
|
const model = friendli(modelId)
|
||||||
|
const tools = { read_file: readFileTool, edit_file: editFileTool }
|
||||||
|
|
||||||
|
emit({ type: "user", content: prompt })
|
||||||
|
|
||||||
|
const messages: CoreMessage[] = [{ role: "user", content: prompt }]
|
||||||
|
const system =
|
||||||
|
"You are a code editing assistant. Use read_file to read files and edit_file to edit them. " +
|
||||||
|
"Always read a file before editing it to get fresh LINE#ID anchors."
|
||||||
|
|
||||||
|
for (let step = 0; step < MAX_STEPS; step++) {
|
||||||
|
const stream = streamText({
|
||||||
|
model,
|
||||||
|
tools,
|
||||||
|
messages,
|
||||||
|
system,
|
||||||
|
stopWhen: stepCountIs(1),
|
||||||
|
})
|
||||||
|
|
||||||
|
let currentText = ""
|
||||||
|
for await (const part of stream.fullStream) {
|
||||||
|
switch (part.type) {
|
||||||
|
case "text-delta":
|
||||||
|
currentText += part.text
|
||||||
|
break
|
||||||
|
case "tool-call":
|
||||||
|
emit({
|
||||||
|
type: "tool_call",
|
||||||
|
tool_call_id: part.toolCallId,
|
||||||
|
tool_name: part.toolName,
|
||||||
|
tool_input: part.args,
|
||||||
|
model: modelId,
|
||||||
|
})
|
||||||
|
break
|
||||||
|
case "tool-result":
|
||||||
|
emit({
|
||||||
|
type: "tool_result",
|
||||||
|
tool_call_id: part.toolCallId,
|
||||||
|
output: typeof part.result === "string" ? part.result : JSON.stringify(part.result),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await stream.response
|
||||||
|
messages.push(...response.messages)
|
||||||
|
|
||||||
|
const finishReason = await stream.finishReason
|
||||||
|
if (finishReason !== "tool-calls") {
|
||||||
|
if (currentText.trim()) {
|
||||||
|
emit({ type: "assistant", content: currentText, model: modelId })
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Signal + Startup ─────────────────────────────────────────
|
||||||
|
process.once("SIGINT", () => process.exit(0))
|
||||||
|
process.once("SIGTERM", () => process.exit(143))
|
||||||
|
|
||||||
|
const startTime = Date.now()
|
||||||
|
run()
|
||||||
|
.catch((error) => {
|
||||||
|
emit({ type: "error", error: error instanceof Error ? error.message : String(error) })
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
|
.then(() => {
|
||||||
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2)
|
||||||
|
console.error(`[headless] Completed in ${elapsed}s`)
|
||||||
|
})
|
||||||
19
benchmarks/package.json
Normal file
19
benchmarks/package.json
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"name": "hashline-edit-benchmark",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider",
|
||||||
|
"scripts": {
|
||||||
|
"bench:basic": "bun run test-edit-ops.ts",
|
||||||
|
"bench:edge": "bun run test-edge-cases.ts",
|
||||||
|
"bench:multi": "bun run test-multi-model.ts",
|
||||||
|
"bench:all": "bun run bench:basic && bun run bench:edge"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"ai": "^6.0.94",
|
||||||
|
"@ai-sdk/openai": "^1.3.0",
|
||||||
|
"@friendliai/ai-provider": "^1.0.9",
|
||||||
|
"zod": "^4.1.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user