diff --git a/src/tools/look-at/image-converter.test.ts b/src/tools/look-at/image-converter.test.ts new file mode 100644 index 00000000..37c9cdf2 --- /dev/null +++ b/src/tools/look-at/image-converter.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, test, mock, beforeEach } from "bun:test" +import { existsSync, mkdtempSync, writeFileSync, unlinkSync, rmSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +const originalChildProcess = await import("node:child_process") + +const execFileSyncMock = mock((_command: string, _args: string[]) => "") +const execSyncMock = mock(() => { + throw new Error("execSync should not be called") +}) + +mock.module("node:child_process", () => ({ + ...originalChildProcess, + execFileSync: execFileSyncMock, + execSync: execSyncMock, +})) + +const { convertImageToJpeg } = await import("./image-converter") + +describe("image-converter command execution safety", () => { + beforeEach(() => { + execFileSyncMock.mockReset() + execSyncMock.mockReset() + }) + + test("uses execFileSync with argument arrays for conversion commands", () => { + const testDir = mkdtempSync(join(tmpdir(), "img-converter-test-")) + const inputPath = join(testDir, "evil$(touch_pwn).heic") + writeFileSync(inputPath, "fake-heic-data") + + execFileSyncMock.mockImplementation((command: string, args: string[]) => { + if (command === "sips") { + const outIndex = args.indexOf("--out") + const outputPath = outIndex >= 0 ? args[outIndex + 1] : undefined + if (outputPath) writeFileSync(outputPath, "jpeg") + } else if (command === "convert") { + writeFileSync(args[1], "jpeg") + } + return "" + }) + + const outputPath = convertImageToJpeg(inputPath, "image/heic") + + expect(execSyncMock).not.toHaveBeenCalled() + expect(execFileSyncMock).toHaveBeenCalled() + + const [firstCommand, firstArgs] = execFileSyncMock.mock.calls[0] as [string, string[]] + expect(typeof firstCommand).toBe("string") + expect(Array.isArray(firstArgs)).toBe(true) + expect(firstArgs).toContain(inputPath) + expect(firstArgs.join(" ")).not.toContain(`\"${inputPath}\"`) + + expect(existsSync(outputPath)).toBe(true) + + if (existsSync(outputPath)) unlinkSync(outputPath) + if (existsSync(inputPath)) unlinkSync(inputPath) + rmSync(testDir, { recursive: true, force: true }) + }) +}) diff --git a/src/tools/look-at/image-converter.ts b/src/tools/look-at/image-converter.ts new file mode 100644 index 00000000..e95237ba --- /dev/null +++ b/src/tools/look-at/image-converter.ts @@ -0,0 +1,149 @@ +import { execFileSync } from "node:child_process" +import { existsSync, mkdtempSync, unlinkSync, writeFileSync, readFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { log } from "../../shared" + +const SUPPORTED_FORMATS = new Set([ + "image/jpeg", + "image/png", + "image/webp", + "image/gif", + "image/bmp", + "image/tiff", +]) + +const UNSUPPORTED_FORMATS = new Set([ + "image/heic", + "image/heif", + "image/x-canon-cr2", + "image/x-canon-crw", + "image/x-nikon-nef", + "image/x-nikon-nrw", + "image/x-sony-arw", + "image/x-sony-sr2", + "image/x-sony-srf", + "image/x-pentax-pef", + "image/x-olympus-orf", + "image/x-panasonic-raw", + "image/x-fuji-raf", + "image/x-adobe-dng", + "image/vnd.adobe.photoshop", + "image/x-photoshop", +]) + +export function needsConversion(mimeType: string): boolean { + if (SUPPORTED_FORMATS.has(mimeType)) { + return false + } + + if (UNSUPPORTED_FORMATS.has(mimeType)) { + return true + } + + return mimeType.startsWith("image/") +} + +export function convertImageToJpeg(inputPath: string, mimeType: string): string { + if (!existsSync(inputPath)) { + throw new Error(`File not found: ${inputPath}`) + } + + const tempDir = mkdtempSync(join(tmpdir(), "opencode-img-")) + const outputPath = join(tempDir, "converted.jpg") + + log(`[image-converter] Converting ${mimeType} to JPEG: ${inputPath}`) + + try { + if (process.platform === "darwin") { + try { + execFileSync("sips", ["-s", "format", "jpeg", inputPath, "--out", outputPath], { + stdio: "pipe", + encoding: "utf-8", + }) + + if (existsSync(outputPath)) { + log(`[image-converter] Converted using sips: ${outputPath}`) + return outputPath + } + } catch (sipsError) { + log(`[image-converter] sips failed: ${sipsError}`) + } + } + + try { + execFileSync("convert", [inputPath, outputPath], { + stdio: "pipe", + encoding: "utf-8", + }) + + if (existsSync(outputPath)) { + log(`[image-converter] Converted using ImageMagick: ${outputPath}`) + return outputPath + } + } catch (convertError) { + log(`[image-converter] ImageMagick convert failed: ${convertError}`) + } + + throw new Error( + `No image conversion tool available. Please install ImageMagick:\n` + + ` macOS: brew install imagemagick\n` + + ` Ubuntu/Debian: sudo apt install imagemagick\n` + + ` RHEL/CentOS: sudo yum install ImageMagick` + ) + } catch (error) { + try { + if (existsSync(outputPath)) { + unlinkSync(outputPath) + } + } catch {} + + throw error + } +} + +export function cleanupConvertedImage(filePath: string): void { + try { + if (existsSync(filePath)) { + unlinkSync(filePath) + log(`[image-converter] Cleaned up temporary file: ${filePath}`) + } + } catch (error) { + log(`[image-converter] Failed to cleanup ${filePath}: ${error}`) + } +} + +export function convertBase64ImageToJpeg( + base64Data: string, + mimeType: string +): { base64: string; tempFiles: string[] } { + const tempDir = mkdtempSync(join(tmpdir(), "opencode-b64-")) + const inputExt = mimeType.split("/")[1] || "bin" + const inputPath = join(tempDir, `input.${inputExt}`) + const tempFiles: string[] = [inputPath] + + try { + const cleanBase64 = base64Data.replace(/^data:[^;]+;base64,/, "") + const buffer = Buffer.from(cleanBase64, "base64") + writeFileSync(inputPath, buffer) + + log(`[image-converter] Converting Base64 ${mimeType} to JPEG`) + + const outputPath = convertImageToJpeg(inputPath, mimeType) + tempFiles.push(outputPath) + + const convertedBuffer = readFileSync(outputPath) + const convertedBase64 = convertedBuffer.toString("base64") + + log(`[image-converter] Base64 conversion successful`) + + return { base64: convertedBase64, tempFiles } + } catch (error) { + tempFiles.forEach(file => { + try { + if (existsSync(file)) unlinkSync(file) + } catch {} + }) + throw error + } +} diff --git a/src/tools/look-at/mime-type-inference.test.ts b/src/tools/look-at/mime-type-inference.test.ts new file mode 100644 index 00000000..69ac6e6b --- /dev/null +++ b/src/tools/look-at/mime-type-inference.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, test } from "bun:test" +import { extractBase64Data, inferMimeTypeFromBase64, inferMimeTypeFromFilePath } from "./mime-type-inference" + +describe("mime type inference", () => { + test("returns MIME from data URL prefix", () => { + const mime = inferMimeTypeFromBase64("data:image/heic;base64,AAAAGGZ0eXBoZWlj") + expect(mime).toBe("image/heic") + }) + + test("detects HEIC from raw base64 magic bytes", () => { + const heicHeader = Buffer.from("00000018667479706865696300000000", "hex").toString("base64") + const mime = inferMimeTypeFromBase64(heicHeader) + expect(mime).toBe("image/heic") + }) + + test("detects HEIF from raw base64 magic bytes", () => { + const heifHeader = Buffer.from("00000018667479706865696600000000", "hex").toString("base64") + const mime = inferMimeTypeFromBase64(heifHeader) + expect(mime).toBe("image/heif") + }) + + test("falls back to png when base64 signature is unknown", () => { + const mime = inferMimeTypeFromBase64("dW5rbm93biBiaW5hcnk=") + expect(mime).toBe("image/png") + }) + + test("infers heic from file extension", () => { + const mime = inferMimeTypeFromFilePath("/tmp/photo.HEIC") + expect(mime).toBe("image/heic") + }) + + test("extracts raw base64 data from data URL", () => { + const base64 = extractBase64Data("data:image/png;base64,abc123") + expect(base64).toBe("abc123") + }) + + test("extracts raw base64 data from data URL with extra parameters", () => { + const base64 = extractBase64Data("data:image/heic;name=clip.heic;base64,abc123") + expect(base64).toBe("abc123") + }) +}) diff --git a/src/tools/look-at/mime-type-inference.ts b/src/tools/look-at/mime-type-inference.ts index 18954c46..0718259d 100644 --- a/src/tools/look-at/mime-type-inference.ts +++ b/src/tools/look-at/mime-type-inference.ts @@ -8,12 +8,18 @@ export function inferMimeTypeFromBase64(base64Data: string): string { try { const cleanData = base64Data.replace(/^data:[^;]+;base64,/, "") - const header = atob(cleanData.slice(0, 16)) + const header = Buffer.from(cleanData.slice(0, 256), "base64").toString("binary") if (header.startsWith("\x89PNG")) return "image/png" if (header.startsWith("\xFF\xD8\xFF")) return "image/jpeg" if (header.startsWith("GIF8")) return "image/gif" if (header.startsWith("RIFF") && header.includes("WEBP")) return "image/webp" + if (header.includes("ftypheic") || header.includes("ftypheix") || header.includes("ftyphevc") || header.includes("ftyphevx")) { + return "image/heic" + } + if (header.includes("ftypheif") || header.includes("ftypmif1") || header.includes("ftypmsf1")) { + return "image/heif" + } if (header.startsWith("%PDF")) return "application/pdf" } catch { // invalid base64 - fall through @@ -29,8 +35,25 @@ export function inferMimeTypeFromFilePath(filePath: string): string { ".jpeg": "image/jpeg", ".png": "image/png", ".webp": "image/webp", + ".gif": "image/gif", + ".bmp": "image/bmp", + ".tiff": "image/tiff", + ".tif": "image/tiff", ".heic": "image/heic", ".heif": "image/heif", + ".cr2": "image/x-canon-cr2", + ".crw": "image/x-canon-crw", + ".nef": "image/x-nikon-nef", + ".nrw": "image/x-nikon-nrw", + ".arw": "image/x-sony-arw", + ".sr2": "image/x-sony-sr2", + ".srf": "image/x-sony-srf", + ".pef": "image/x-pentax-pef", + ".orf": "image/x-olympus-orf", + ".raw": "image/x-panasonic-raw", + ".raf": "image/x-fuji-raf", + ".dng": "image/x-adobe-dng", + ".psd": "image/vnd.adobe.photoshop", ".mp4": "video/mp4", ".mpeg": "video/mpeg", ".mpg": "video/mpeg", diff --git a/src/tools/look-at/tools.ts b/src/tools/look-at/tools.ts index 0d5c1c0b..a4c67f05 100644 --- a/src/tools/look-at/tools.ts +++ b/src/tools/look-at/tools.ts @@ -13,6 +13,12 @@ import { inferMimeTypeFromFilePath, } from "./mime-type-inference" import { resolveMultimodalLookerAgentMetadata } from "./multimodal-agent-metadata" +import { + needsConversion, + convertImageToJpeg, + convertBase64ImageToJpeg, + cleanupConvertedImage, +} from "./image-converter" export { normalizeArgs, validateArgs } from "./look-at-arguments" @@ -41,22 +47,58 @@ export function createLookAt(ctx: PluginInput): ToolDefinition { let mimeType: string let filePart: { type: "file"; mime: string; url: string; filename: string } + let tempFilePath: string | null = null + let tempFilesToCleanup: string[] = [] - if (imageData) { - mimeType = inferMimeTypeFromBase64(imageData) - filePart = { - type: "file", - mime: mimeType, - url: `data:${mimeType};base64,${extractBase64Data(imageData)}`, - filename: `clipboard-image.${mimeType.split("/")[1] || "png"}`, - } - } else if (filePath) { + try { + if (imageData) { + mimeType = inferMimeTypeFromBase64(imageData) + + let finalBase64Data = extractBase64Data(imageData) + let finalMimeType = mimeType + + if (needsConversion(mimeType)) { + log(`[look_at] Detected unsupported Base64 format: ${mimeType}, converting to JPEG...`) + try { + const { base64, tempFiles } = convertBase64ImageToJpeg(finalBase64Data, mimeType) + finalBase64Data = base64 + finalMimeType = "image/jpeg" + tempFilesToCleanup = tempFiles + log(`[look_at] Base64 conversion successful`) + } catch (conversionError) { + log(`[look_at] Base64 conversion failed: ${conversionError}`) + return `Error: Failed to convert Base64 image format. ${conversionError}` + } + } + + filePart = { + type: "file", + mime: finalMimeType, + url: `data:${finalMimeType};base64,${finalBase64Data}`, + filename: `clipboard-image.${finalMimeType.split("/")[1] || "png"}`, + } + } else if (filePath) { mimeType = inferMimeTypeFromFilePath(filePath) + + let actualFilePath = filePath + if (needsConversion(mimeType)) { + log(`[look_at] Detected unsupported format: ${mimeType}, converting to JPEG...`) + try { + tempFilePath = convertImageToJpeg(filePath, mimeType) + actualFilePath = tempFilePath + mimeType = "image/jpeg" + log(`[look_at] Conversion successful: ${tempFilePath}`) + } catch (conversionError) { + log(`[look_at] Conversion failed: ${conversionError}`) + return `Error: Failed to convert image format. ${conversionError}` + } + } + filePart = { type: "file", mime: mimeType, - url: pathToFileURL(filePath).href, - filename: basename(filePath), + url: pathToFileURL(actualFilePath).href, + filename: basename(actualFilePath), } } else { return "Error: Must provide either 'file_path' or 'image_data'." @@ -149,8 +191,14 @@ Original error: ${createResult.error}` return "Error: No response from multimodal-looker agent" } - log(`[look_at] Got response, length: ${responseText.length}`) - return responseText + log(`[look_at] Got response, length: ${responseText.length}`) + return responseText + } finally { + if (tempFilePath) { + cleanupConvertedImage(tempFilePath) + } + tempFilesToCleanup.forEach(file => cleanupConvertedImage(file)) + } }, }) }