refactor(look-at): split tools.ts into argument parsing and extraction modules
Extract multimodal look-at tool internals: - look-at-arguments.ts: argument validation and parsing - assistant-message-extractor.ts: response extraction - mime-type-inference.ts: file type detection - multimodal-agent-metadata.ts: agent metadata constants
This commit is contained in:
parent
6e0f6d53a7
commit
480dcff420
67
src/tools/look-at/assistant-message-extractor.ts
Normal file
67
src/tools/look-at/assistant-message-extractor.ts
Normal file
@ -0,0 +1,67 @@
|
||||
type MessageTime = { created?: number }
|
||||
|
||||
type MessageInfo = {
|
||||
role?: string
|
||||
time?: MessageTime
|
||||
}
|
||||
|
||||
type MessagePart = {
|
||||
type?: string
|
||||
text?: string
|
||||
}
|
||||
|
||||
type SessionMessage = {
|
||||
info?: MessageInfo
|
||||
parts?: unknown
|
||||
}
|
||||
|
||||
function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null
|
||||
}
|
||||
|
||||
function asSessionMessage(value: unknown): SessionMessage | null {
|
||||
if (!isObject(value)) return null
|
||||
const info = value["info"]
|
||||
const parts = value["parts"]
|
||||
return {
|
||||
info: isObject(info)
|
||||
? {
|
||||
role: typeof info["role"] === "string" ? info["role"] : undefined,
|
||||
time: isObject(info["time"]) ? { created: typeof info["time"]["created"] === "number" ? info["time"]["created"] : undefined } : undefined,
|
||||
}
|
||||
: undefined,
|
||||
parts,
|
||||
}
|
||||
}
|
||||
|
||||
function getCreatedTime(message: SessionMessage): number {
|
||||
return message.info?.time?.created ?? 0
|
||||
}
|
||||
|
||||
function getTextParts(message: SessionMessage): MessagePart[] {
|
||||
if (!Array.isArray(message.parts)) return []
|
||||
return message.parts
|
||||
.filter((part): part is Record<string, unknown> => isObject(part))
|
||||
.map((part) => ({
|
||||
type: typeof part["type"] === "string" ? part["type"] : undefined,
|
||||
text: typeof part["text"] === "string" ? part["text"] : undefined,
|
||||
}))
|
||||
.filter((part) => part.type === "text" && Boolean(part.text))
|
||||
}
|
||||
|
||||
export function extractLatestAssistantText(messages: unknown): string | null {
|
||||
if (!Array.isArray(messages) || messages.length === 0) return null
|
||||
|
||||
const assistantMessages = messages
|
||||
.map(asSessionMessage)
|
||||
.filter((message): message is SessionMessage => message !== null)
|
||||
.filter((message) => message.info?.role === "assistant")
|
||||
.sort((a, b) => getCreatedTime(b) - getCreatedTime(a))
|
||||
|
||||
const lastAssistantMessage = assistantMessages[0]
|
||||
if (!lastAssistantMessage) return null
|
||||
|
||||
const textParts = getTextParts(lastAssistantMessage)
|
||||
const responseText = textParts.map((part) => part.text).join("\n")
|
||||
return responseText
|
||||
}
|
||||
31
src/tools/look-at/look-at-arguments.ts
Normal file
31
src/tools/look-at/look-at-arguments.ts
Normal file
@ -0,0 +1,31 @@
|
||||
import type { LookAtArgs } from "./types"
|
||||
|
||||
export interface LookAtArgsWithAlias extends LookAtArgs {
|
||||
path?: string
|
||||
}
|
||||
|
||||
export function normalizeArgs(args: LookAtArgsWithAlias): LookAtArgs {
|
||||
return {
|
||||
file_path: args.file_path ?? args.path,
|
||||
image_data: args.image_data,
|
||||
goal: args.goal ?? "",
|
||||
}
|
||||
}
|
||||
|
||||
export function validateArgs(args: LookAtArgs): string | null {
|
||||
const hasFilePath = Boolean(args.file_path && args.file_path.length > 0)
|
||||
const hasImageData = Boolean(args.image_data && args.image_data.length > 0)
|
||||
|
||||
if (!hasFilePath && !hasImageData) {
|
||||
return `Error: Must provide either 'file_path' or 'image_data'. Usage:
|
||||
- look_at(file_path="/path/to/file", goal="what to extract")
|
||||
- look_at(image_data="base64_encoded_data", goal="what to extract")`
|
||||
}
|
||||
if (hasFilePath && hasImageData) {
|
||||
return "Error: Provide only one of 'file_path' or 'image_data', not both."
|
||||
}
|
||||
if (!args.goal) {
|
||||
return "Error: Missing required parameter 'goal'. Usage: look_at(file_path=\"/path/to/file\", goal=\"what to extract\")"
|
||||
}
|
||||
return null
|
||||
}
|
||||
71
src/tools/look-at/mime-type-inference.ts
Normal file
71
src/tools/look-at/mime-type-inference.ts
Normal file
@ -0,0 +1,71 @@
|
||||
import { extname } from "node:path"
|
||||
|
||||
export function inferMimeTypeFromBase64(base64Data: string): string {
|
||||
if (base64Data.startsWith("data:")) {
|
||||
const match = base64Data.match(/^data:([^;]+);/)
|
||||
if (match) return match[1]
|
||||
}
|
||||
|
||||
try {
|
||||
const cleanData = base64Data.replace(/^data:[^;]+;base64,/, "")
|
||||
const header = atob(cleanData.slice(0, 16))
|
||||
|
||||
if (header.startsWith("\x89PNG")) return "image/png"
|
||||
if (header.startsWith("\xFF\xD8\xFF")) return "image/jpeg"
|
||||
if (header.startsWith("GIF8")) return "image/gif"
|
||||
if (header.startsWith("RIFF") && header.includes("WEBP")) return "image/webp"
|
||||
if (header.startsWith("%PDF")) return "application/pdf"
|
||||
} catch {
|
||||
// invalid base64 - fall through
|
||||
}
|
||||
|
||||
return "image/png"
|
||||
}
|
||||
|
||||
export function inferMimeTypeFromFilePath(filePath: string): string {
|
||||
const ext = extname(filePath).toLowerCase()
|
||||
const mimeTypes: Record<string, string> = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".webp": "image/webp",
|
||||
".heic": "image/heic",
|
||||
".heif": "image/heif",
|
||||
".mp4": "video/mp4",
|
||||
".mpeg": "video/mpeg",
|
||||
".mpg": "video/mpeg",
|
||||
".mov": "video/mov",
|
||||
".avi": "video/avi",
|
||||
".flv": "video/x-flv",
|
||||
".webm": "video/webm",
|
||||
".wmv": "video/wmv",
|
||||
".3gpp": "video/3gpp",
|
||||
".3gp": "video/3gpp",
|
||||
".wav": "audio/wav",
|
||||
".mp3": "audio/mp3",
|
||||
".aiff": "audio/aiff",
|
||||
".aac": "audio/aac",
|
||||
".ogg": "audio/ogg",
|
||||
".flac": "audio/flac",
|
||||
".pdf": "application/pdf",
|
||||
".txt": "text/plain",
|
||||
".csv": "text/csv",
|
||||
".md": "text/md",
|
||||
".html": "text/html",
|
||||
".json": "application/json",
|
||||
".xml": "application/xml",
|
||||
".js": "text/javascript",
|
||||
".py": "text/x-python",
|
||||
}
|
||||
return mimeTypes[ext] || "application/octet-stream"
|
||||
}
|
||||
|
||||
export function extractBase64Data(imageData: string): string {
|
||||
if (imageData.startsWith("data:")) {
|
||||
const commaIndex = imageData.indexOf(",")
|
||||
if (commaIndex !== -1) {
|
||||
return imageData.slice(commaIndex + 1)
|
||||
}
|
||||
}
|
||||
return imageData
|
||||
}
|
||||
56
src/tools/look-at/multimodal-agent-metadata.ts
Normal file
56
src/tools/look-at/multimodal-agent-metadata.ts
Normal file
@ -0,0 +1,56 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { MULTIMODAL_LOOKER_AGENT } from "./constants"
|
||||
import { log } from "../../shared"
|
||||
|
||||
type AgentModel = { providerID: string; modelID: string }
|
||||
|
||||
type ResolvedAgentMetadata = {
|
||||
agentModel?: AgentModel
|
||||
agentVariant?: string
|
||||
}
|
||||
|
||||
type AgentInfo = {
|
||||
name?: string
|
||||
model?: AgentModel
|
||||
variant?: string
|
||||
}
|
||||
|
||||
function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null
|
||||
}
|
||||
|
||||
function toAgentInfo(value: unknown): AgentInfo | null {
|
||||
if (!isObject(value)) return null
|
||||
const name = typeof value["name"] === "string" ? value["name"] : undefined
|
||||
const variant = typeof value["variant"] === "string" ? value["variant"] : undefined
|
||||
const modelValue = value["model"]
|
||||
const model =
|
||||
isObject(modelValue) &&
|
||||
typeof modelValue["providerID"] === "string" &&
|
||||
typeof modelValue["modelID"] === "string"
|
||||
? { providerID: modelValue["providerID"], modelID: modelValue["modelID"] }
|
||||
: undefined
|
||||
return { name, model, variant }
|
||||
}
|
||||
|
||||
export async function resolveMultimodalLookerAgentMetadata(
|
||||
ctx: PluginInput
|
||||
): Promise<ResolvedAgentMetadata> {
|
||||
try {
|
||||
const agentsResult = await ctx.client.app?.agents?.()
|
||||
const agentsRaw = isObject(agentsResult) ? agentsResult["data"] : undefined
|
||||
const agents = Array.isArray(agentsRaw) ? agentsRaw.map(toAgentInfo).filter(Boolean) : []
|
||||
|
||||
const matched = agents.find(
|
||||
(agent) => agent?.name?.toLowerCase() === MULTIMODAL_LOOKER_AGENT.toLowerCase()
|
||||
)
|
||||
|
||||
return {
|
||||
agentModel: matched?.model,
|
||||
agentVariant: matched?.variant,
|
||||
}
|
||||
} catch (error) {
|
||||
log("[look_at] Failed to resolve multimodal-looker model info", error)
|
||||
return {}
|
||||
}
|
||||
}
|
||||
@ -1,109 +1,20 @@
|
||||
import { extname, basename } from "node:path"
|
||||
import { basename } from "node:path"
|
||||
import { pathToFileURL } from "node:url"
|
||||
import { tool, type PluginInput, type ToolDefinition } from "@opencode-ai/plugin"
|
||||
import { LOOK_AT_DESCRIPTION, MULTIMODAL_LOOKER_AGENT } from "./constants"
|
||||
import type { LookAtArgs } from "./types"
|
||||
import { log, promptSyncWithModelSuggestionRetry } from "../../shared"
|
||||
import { extractLatestAssistantText } from "./assistant-message-extractor"
|
||||
import type { LookAtArgsWithAlias } from "./look-at-arguments"
|
||||
import { normalizeArgs, validateArgs } from "./look-at-arguments"
|
||||
import {
|
||||
extractBase64Data,
|
||||
inferMimeTypeFromBase64,
|
||||
inferMimeTypeFromFilePath,
|
||||
} from "./mime-type-inference"
|
||||
import { resolveMultimodalLookerAgentMetadata } from "./multimodal-agent-metadata"
|
||||
|
||||
interface LookAtArgsWithAlias extends LookAtArgs {
|
||||
path?: string
|
||||
}
|
||||
|
||||
export function normalizeArgs(args: LookAtArgsWithAlias): LookAtArgs {
|
||||
return {
|
||||
file_path: args.file_path ?? args.path,
|
||||
image_data: args.image_data,
|
||||
goal: args.goal ?? "",
|
||||
}
|
||||
}
|
||||
|
||||
export function validateArgs(args: LookAtArgs): string | null {
|
||||
const hasFilePath = args.file_path && args.file_path.length > 0
|
||||
const hasImageData = args.image_data && args.image_data.length > 0
|
||||
|
||||
if (!hasFilePath && !hasImageData) {
|
||||
return `Error: Must provide either 'file_path' or 'image_data'. Usage:
|
||||
- look_at(file_path="/path/to/file", goal="what to extract")
|
||||
- look_at(image_data="base64_encoded_data", goal="what to extract")`
|
||||
}
|
||||
if (hasFilePath && hasImageData) {
|
||||
return `Error: Provide only one of 'file_path' or 'image_data', not both.`
|
||||
}
|
||||
if (!args.goal) {
|
||||
return `Error: Missing required parameter 'goal'. Usage: look_at(file_path="/path/to/file", goal="what to extract")`
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function inferMimeTypeFromBase64(base64Data: string): string {
|
||||
if (base64Data.startsWith("data:")) {
|
||||
const match = base64Data.match(/^data:([^;]+);/)
|
||||
if (match) return match[1]
|
||||
}
|
||||
|
||||
try {
|
||||
const cleanData = base64Data.replace(/^data:[^;]+;base64,/, "")
|
||||
const header = atob(cleanData.slice(0, 16))
|
||||
|
||||
if (header.startsWith("\x89PNG")) return "image/png"
|
||||
if (header.startsWith("\xFF\xD8\xFF")) return "image/jpeg"
|
||||
if (header.startsWith("GIF8")) return "image/gif"
|
||||
if (header.startsWith("RIFF") && header.includes("WEBP")) return "image/webp"
|
||||
if (header.startsWith("%PDF")) return "application/pdf"
|
||||
} catch {
|
||||
// Invalid base64 - fall through to default
|
||||
}
|
||||
|
||||
return "image/png"
|
||||
}
|
||||
|
||||
function inferMimeType(filePath: string): string {
|
||||
const ext = extname(filePath).toLowerCase()
|
||||
const mimeTypes: Record<string, string> = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".webp": "image/webp",
|
||||
".heic": "image/heic",
|
||||
".heif": "image/heif",
|
||||
".mp4": "video/mp4",
|
||||
".mpeg": "video/mpeg",
|
||||
".mpg": "video/mpeg",
|
||||
".mov": "video/mov",
|
||||
".avi": "video/avi",
|
||||
".flv": "video/x-flv",
|
||||
".webm": "video/webm",
|
||||
".wmv": "video/wmv",
|
||||
".3gpp": "video/3gpp",
|
||||
".3gp": "video/3gpp",
|
||||
".wav": "audio/wav",
|
||||
".mp3": "audio/mp3",
|
||||
".aiff": "audio/aiff",
|
||||
".aac": "audio/aac",
|
||||
".ogg": "audio/ogg",
|
||||
".flac": "audio/flac",
|
||||
".pdf": "application/pdf",
|
||||
".txt": "text/plain",
|
||||
".csv": "text/csv",
|
||||
".md": "text/md",
|
||||
".html": "text/html",
|
||||
".json": "application/json",
|
||||
".xml": "application/xml",
|
||||
".js": "text/javascript",
|
||||
".py": "text/x-python",
|
||||
}
|
||||
return mimeTypes[ext] || "application/octet-stream"
|
||||
}
|
||||
|
||||
function extractBase64Data(imageData: string): string {
|
||||
if (imageData.startsWith("data:")) {
|
||||
const commaIndex = imageData.indexOf(",")
|
||||
if (commaIndex !== -1) {
|
||||
return imageData.slice(commaIndex + 1)
|
||||
}
|
||||
}
|
||||
return imageData
|
||||
}
|
||||
export { normalizeArgs, validateArgs } from "./look-at-arguments"
|
||||
|
||||
export function createLookAt(ctx: PluginInput): ToolDefinition {
|
||||
return tool({
|
||||
@ -125,27 +36,30 @@ export function createLookAt(ctx: PluginInput): ToolDefinition {
|
||||
const sourceDescription = isBase64Input ? "clipboard/pasted image" : args.file_path
|
||||
log(`[look_at] Analyzing ${sourceDescription}, goal: ${args.goal}`)
|
||||
|
||||
const imageData = args.image_data
|
||||
const filePath = args.file_path
|
||||
|
||||
let mimeType: string
|
||||
let filePart: { type: "file"; mime: string; url: string; filename: string }
|
||||
|
||||
if (isBase64Input) {
|
||||
mimeType = inferMimeTypeFromBase64(args.image_data!)
|
||||
const base64Content = extractBase64Data(args.image_data!)
|
||||
const dataUrl = `data:${mimeType};base64,${base64Content}`
|
||||
if (imageData) {
|
||||
mimeType = inferMimeTypeFromBase64(imageData)
|
||||
filePart = {
|
||||
type: "file",
|
||||
mime: mimeType,
|
||||
url: dataUrl,
|
||||
url: `data:${mimeType};base64,${extractBase64Data(imageData)}`,
|
||||
filename: `clipboard-image.${mimeType.split("/")[1] || "png"}`,
|
||||
}
|
||||
} else {
|
||||
mimeType = inferMimeType(args.file_path!)
|
||||
} else if (filePath) {
|
||||
mimeType = inferMimeTypeFromFilePath(filePath)
|
||||
filePart = {
|
||||
type: "file",
|
||||
mime: mimeType,
|
||||
url: pathToFileURL(args.file_path!).href,
|
||||
filename: basename(args.file_path!),
|
||||
url: pathToFileURL(filePath).href,
|
||||
filename: basename(filePath),
|
||||
}
|
||||
} else {
|
||||
return "Error: Must provide either 'file_path' or 'image_data'."
|
||||
}
|
||||
|
||||
const prompt = `Analyze this ${isBase64Input ? "image" : "file"} and extract the requested information.
|
||||
@ -166,13 +80,8 @@ If the requested information is not found, clearly state what is missing.`
|
||||
body: {
|
||||
parentID: toolContext.sessionID,
|
||||
title: `look_at: ${args.goal.substring(0, 50)}`,
|
||||
permission: [
|
||||
{ permission: "question", action: "deny" as const, pattern: "*" },
|
||||
],
|
||||
} as any,
|
||||
query: {
|
||||
directory: parentDirectory,
|
||||
},
|
||||
query: { directory: parentDirectory },
|
||||
})
|
||||
|
||||
if (createResult.error) {
|
||||
@ -194,32 +103,7 @@ Original error: ${createResult.error}`
|
||||
const sessionID = createResult.data.id
|
||||
log(`[look_at] Created session: ${sessionID}`)
|
||||
|
||||
let agentModel: { providerID: string; modelID: string } | undefined
|
||||
let agentVariant: string | undefined
|
||||
|
||||
try {
|
||||
const agentsResult = await ctx.client.app?.agents?.()
|
||||
type AgentInfo = {
|
||||
name: string
|
||||
mode?: "subagent" | "primary" | "all"
|
||||
model?: { providerID: string; modelID: string }
|
||||
variant?: string
|
||||
}
|
||||
const agents = ((agentsResult as { data?: AgentInfo[] })?.data ?? agentsResult) as AgentInfo[] | undefined
|
||||
if (agents?.length) {
|
||||
const matchedAgent = agents.find(
|
||||
(agent) => agent.name.toLowerCase() === MULTIMODAL_LOOKER_AGENT.toLowerCase()
|
||||
)
|
||||
if (matchedAgent?.model) {
|
||||
agentModel = matchedAgent.model
|
||||
}
|
||||
if (matchedAgent?.variant) {
|
||||
agentVariant = matchedAgent.variant
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
log("[look_at] Failed to resolve multimodal-looker model info", error)
|
||||
}
|
||||
const { agentModel, agentVariant } = await resolveMultimodalLookerAgentMetadata(ctx)
|
||||
|
||||
log(`[look_at] Sending prompt with ${isBase64Input ? "base64 image" : "file"} to session ${sessionID}`)
|
||||
try {
|
||||
@ -242,7 +126,6 @@ Original error: ${createResult.error}`
|
||||
},
|
||||
})
|
||||
} catch (promptError) {
|
||||
const errorMessage = promptError instanceof Error ? promptError.message : String(promptError)
|
||||
log(`[look_at] Prompt error:`, promptError)
|
||||
|
||||
throw promptError
|
||||
@ -262,25 +145,13 @@ Original error: ${createResult.error}`
|
||||
const messages = messagesResult.data
|
||||
log(`[look_at] Got ${messages.length} messages`)
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const lastAssistantMessage = messages
|
||||
.filter((m: any) => m.info.role === "assistant")
|
||||
.sort((a: any, b: any) => (b.info.time?.created || 0) - (a.info.time?.created || 0))[0]
|
||||
|
||||
if (!lastAssistantMessage) {
|
||||
log(`[look_at] No assistant message found`)
|
||||
return `Error: No response from multimodal-looker agent`
|
||||
const responseText = extractLatestAssistantText(messages)
|
||||
if (!responseText) {
|
||||
log("[look_at] No assistant message found")
|
||||
return "Error: No response from multimodal-looker agent"
|
||||
}
|
||||
|
||||
log(`[look_at] Found assistant message with ${lastAssistantMessage.parts.length} parts`)
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const textParts = lastAssistantMessage.parts.filter((p: any) => p.type === "text")
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const responseText = textParts.map((p: any) => p.text).join("\n")
|
||||
|
||||
log(`[look_at] Got response, length: ${responseText.length}`)
|
||||
|
||||
return responseText
|
||||
},
|
||||
})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user