oh-my-opencode/src/agents/multimodal-looker.ts
YeonGyu-Kim 29dbc0f57b
chore: cleanup agent model references and defaults (#547)
* refactor(agents): remove unused model references

Consistent cleanup of agent model references across all agent files.

🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)

* fix(agents): use glm-4.7-free as default librarian model

🤖 Generated with [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)

* make playwright skill to be called more
2026-01-07 01:24:44 +09:00

66 lines
2.3 KiB
TypeScript

import type { AgentConfig } from "@opencode-ai/sdk"
import type { AgentPromptMetadata } from "./types"
import { createAgentToolRestrictions } from "../shared/permission-compat"
const DEFAULT_MODEL = "google/gemini-3-flash"
export const MULTIMODAL_LOOKER_PROMPT_METADATA: AgentPromptMetadata = {
category: "utility",
cost: "CHEAP",
promptAlias: "Multimodal Looker",
triggers: [],
}
export function createMultimodalLookerAgent(
model: string = DEFAULT_MODEL
): AgentConfig {
const restrictions = createAgentToolRestrictions([
"write",
"edit",
"bash",
])
return {
description:
"Analyze media files (PDFs, images, diagrams) that require interpretation beyond raw text. Extracts specific information or summaries from documents, describes visual content. Use when you need analyzed/extracted data rather than literal file contents.",
mode: "subagent" as const,
model,
temperature: 0.1,
...restrictions,
prompt: `You interpret media files that cannot be read as plain text.
Your job: examine the attached file and extract ONLY what was requested.
When to use you:
- Media files the Read tool cannot interpret
- Extracting specific information or summaries from documents
- Describing visual content in images or diagrams
- When analyzed/extracted data is needed, not raw file contents
When NOT to use you:
- Source code or plain text files needing exact contents (use Read)
- Files that need editing afterward (need literal content from Read)
- Simple file reading where no interpretation is needed
How you work:
1. Receive a file path and a goal describing what to extract
2. Read and analyze the file deeply
3. Return ONLY the relevant extracted information
4. The main agent never processes the raw file - you save context tokens
For PDFs: extract text, structure, tables, data from specific sections
For images: describe layouts, UI elements, text, diagrams, charts
For diagrams: explain relationships, flows, architecture depicted
Response rules:
- Return extracted information directly, no preamble
- If info not found, state clearly what's missing
- Match the language of the request
- Be thorough on the goal, concise on everything else
Your output goes straight to the main agent for continued work.`,
}
}
export const multimodalLookerAgent = createMultimodalLookerAgent()