df0b9f76 regressed look_at from synchronous prompt (session.prompt) to async prompt (session.promptAsync) + pollSessionUntilIdle polling. This introduced a race condition where the poller fires before the server registers the session as busy, causing it to return immediately with no messages available. Fix: restore promptSyncWithModelSuggestionRetry (blocking HTTP call) and remove polling entirely. Catch prompt errors gracefully and still attempt to fetch messages, since session.prompt may throw even on success.
157 lines
5.9 KiB
TypeScript
157 lines
5.9 KiB
TypeScript
import { basename } from "node:path"
|
|
import { pathToFileURL } from "node:url"
|
|
import { tool, type PluginInput, type ToolDefinition } from "@opencode-ai/plugin"
|
|
import { LOOK_AT_DESCRIPTION, MULTIMODAL_LOOKER_AGENT } from "./constants"
|
|
import type { LookAtArgs } from "./types"
|
|
import { log, promptSyncWithModelSuggestionRetry } from "../../shared"
|
|
import { extractLatestAssistantText } from "./assistant-message-extractor"
|
|
import type { LookAtArgsWithAlias } from "./look-at-arguments"
|
|
import { normalizeArgs, validateArgs } from "./look-at-arguments"
|
|
import {
|
|
extractBase64Data,
|
|
inferMimeTypeFromBase64,
|
|
inferMimeTypeFromFilePath,
|
|
} from "./mime-type-inference"
|
|
import { resolveMultimodalLookerAgentMetadata } from "./multimodal-agent-metadata"
|
|
|
|
export { normalizeArgs, validateArgs } from "./look-at-arguments"
|
|
|
|
export function createLookAt(ctx: PluginInput): ToolDefinition {
|
|
return tool({
|
|
description: LOOK_AT_DESCRIPTION,
|
|
args: {
|
|
file_path: tool.schema.string().optional().describe("Absolute path to the file to analyze"),
|
|
image_data: tool.schema.string().optional().describe("Base64 encoded image data (for clipboard/pasted images)"),
|
|
goal: tool.schema.string().describe("What specific information to extract from the file"),
|
|
},
|
|
async execute(rawArgs: LookAtArgs, toolContext) {
|
|
const args = normalizeArgs(rawArgs as LookAtArgsWithAlias)
|
|
const validationError = validateArgs(args)
|
|
if (validationError) {
|
|
log(`[look_at] Validation failed: ${validationError}`)
|
|
return validationError
|
|
}
|
|
|
|
const isBase64Input = Boolean(args.image_data)
|
|
const sourceDescription = isBase64Input ? "clipboard/pasted image" : args.file_path
|
|
log(`[look_at] Analyzing ${sourceDescription}, goal: ${args.goal}`)
|
|
|
|
const imageData = args.image_data
|
|
const filePath = args.file_path
|
|
|
|
let mimeType: string
|
|
let filePart: { type: "file"; mime: string; url: string; filename: string }
|
|
|
|
if (imageData) {
|
|
mimeType = inferMimeTypeFromBase64(imageData)
|
|
filePart = {
|
|
type: "file",
|
|
mime: mimeType,
|
|
url: `data:${mimeType};base64,${extractBase64Data(imageData)}`,
|
|
filename: `clipboard-image.${mimeType.split("/")[1] || "png"}`,
|
|
}
|
|
} else if (filePath) {
|
|
mimeType = inferMimeTypeFromFilePath(filePath)
|
|
filePart = {
|
|
type: "file",
|
|
mime: mimeType,
|
|
url: pathToFileURL(filePath).href,
|
|
filename: basename(filePath),
|
|
}
|
|
} else {
|
|
return "Error: Must provide either 'file_path' or 'image_data'."
|
|
}
|
|
|
|
const prompt = `Analyze this ${isBase64Input ? "image" : "file"} and extract the requested information.
|
|
|
|
Goal: ${args.goal}
|
|
|
|
Provide ONLY the extracted information that matches the goal.
|
|
Be thorough on what was requested, concise on everything else.
|
|
If the requested information is not found, clearly state what is missing.`
|
|
|
|
log(`[look_at] Creating session with parent: ${toolContext.sessionID}`)
|
|
const parentSession = await ctx.client.session.get({
|
|
path: { id: toolContext.sessionID },
|
|
}).catch(() => null)
|
|
const parentDirectory = parentSession?.data?.directory ?? ctx.directory
|
|
|
|
const createResult = await ctx.client.session.create({
|
|
body: {
|
|
parentID: toolContext.sessionID,
|
|
title: `look_at: ${args.goal.substring(0, 50)}`,
|
|
},
|
|
query: { directory: parentDirectory },
|
|
})
|
|
|
|
if (createResult.error) {
|
|
log(`[look_at] Session create error:`, createResult.error)
|
|
const errorStr = String(createResult.error)
|
|
if (errorStr.toLowerCase().includes("unauthorized")) {
|
|
return `Error: Failed to create session (Unauthorized). This may be due to:
|
|
1. OAuth token restrictions (e.g., Claude Code credentials are restricted to Claude Code only)
|
|
2. Provider authentication issues
|
|
3. Session permission inheritance problems
|
|
|
|
Try using a different provider or API key authentication.
|
|
|
|
Original error: ${createResult.error}`
|
|
}
|
|
return `Error: Failed to create session: ${createResult.error}`
|
|
}
|
|
|
|
const sessionID = createResult.data.id
|
|
log(`[look_at] Created session: ${sessionID}`)
|
|
|
|
const { agentModel, agentVariant } = await resolveMultimodalLookerAgentMetadata(ctx)
|
|
|
|
log(`[look_at] Sending prompt with ${isBase64Input ? "base64 image" : "file"} to session ${sessionID}`)
|
|
try {
|
|
await promptSyncWithModelSuggestionRetry(ctx.client, {
|
|
path: { id: sessionID },
|
|
body: {
|
|
agent: MULTIMODAL_LOOKER_AGENT,
|
|
tools: {
|
|
task: false,
|
|
call_omo_agent: false,
|
|
look_at: false,
|
|
read: false,
|
|
},
|
|
parts: [
|
|
{ type: "text", text: prompt },
|
|
filePart,
|
|
],
|
|
...(agentModel ? { model: { providerID: agentModel.providerID, modelID: agentModel.modelID } } : {}),
|
|
...(agentVariant ? { variant: agentVariant } : {}),
|
|
},
|
|
})
|
|
} catch (promptError) {
|
|
log(`[look_at] Prompt error (ignored, will still fetch messages):`, promptError)
|
|
}
|
|
|
|
log(`[look_at] Fetching messages from session ${sessionID}...`)
|
|
|
|
const messagesResult = await ctx.client.session.messages({
|
|
path: { id: sessionID },
|
|
})
|
|
|
|
if (messagesResult.error) {
|
|
log(`[look_at] Messages error:`, messagesResult.error)
|
|
return `Error: Failed to get messages: ${messagesResult.error}`
|
|
}
|
|
|
|
const messages = messagesResult.data
|
|
log(`[look_at] Got ${messages.length} messages`)
|
|
|
|
const responseText = extractLatestAssistantText(messages)
|
|
if (!responseText) {
|
|
log("[look_at] No assistant message found")
|
|
return "Error: No response from multimodal-looker agent"
|
|
}
|
|
|
|
log(`[look_at] Got response, length: ${responseText.length}`)
|
|
return responseText
|
|
},
|
|
})
|
|
}
|