diff --git a/src/loader.ts b/src/loader.ts index c20c66eae..387c365e8 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -94,6 +94,7 @@ process.env.GSD_BUNDLED_EXTENSION_PATHS = [ join(agentDir, 'extensions', 'search-the-web', 'index.ts'), join(agentDir, 'extensions', 'slash-commands', 'index.ts'), join(agentDir, 'extensions', 'subagent', 'index.ts'), + join(agentDir, 'extensions', 'mac-tools', 'index.ts'), join(agentDir, 'extensions', 'ask-user-questions.ts'), join(agentDir, 'extensions', 'get-secrets-from-user.ts'), ].join(':') diff --git a/src/resources/extensions/mac-tools/index.ts b/src/resources/extensions/mac-tools/index.ts new file mode 100644 index 000000000..be9ff1ed6 --- /dev/null +++ b/src/resources/extensions/mac-tools/index.ts @@ -0,0 +1,852 @@ +/** + * mac-tools — pi extension + * + * Gives the agent macOS automation capabilities via a Swift CLI that interfaces + * with Accessibility APIs, NSWorkspace, and CGWindowList. + * + * Architecture: + * - Swift CLI (`swift-cli/`) handles all macOS API calls + * - JSON protocol: stdin `{ command, params }` → stdout `{ success, data?, error? }` + * - TS extension invokes CLI per-command via execFileSync + * - Mtime-based compilation caching: recompiles only when source files change + * - All Swift debug output goes to stderr; only JSON on stdout + */ + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import { StringEnum } from "@mariozechner/pi-ai"; +import { Type } from "@sinclair/typebox"; +import { execFileSync } from "node:child_process"; +import { statSync, readdirSync } from "node:fs"; +import path from "node:path"; + +// --------------------------------------------------------------------------- +// Paths +// --------------------------------------------------------------------------- + +const EXTENSION_DIR = path.dirname(new URL(import.meta.url).pathname); +const SWIFT_CLI_DIR = path.join(EXTENSION_DIR, "swift-cli"); +const SOURCES_DIR = path.join(SWIFT_CLI_DIR, "Sources"); +const BINARY_PATH = path.join(SWIFT_CLI_DIR, ".build", "release", "mac-agent"); +const PACKAGE_SWIFT = path.join(SWIFT_CLI_DIR, "Package.swift"); + +// --------------------------------------------------------------------------- +// Compilation caching +// --------------------------------------------------------------------------- + +/** Get the latest mtime (ms) across all Swift source files and Package.swift. */ +function getSourceMtime(): number { + let latest = 0; + // Check Package.swift + try { + latest = Math.max(latest, statSync(PACKAGE_SWIFT).mtimeMs); + } catch {} + // Check all files in Sources/ + try { + const files = readdirSync(SOURCES_DIR); + for (const f of files) { + try { + const mt = statSync(path.join(SOURCES_DIR, f)).mtimeMs; + if (mt > latest) latest = mt; + } catch {} + } + } catch {} + return latest; +} + +/** Get the binary mtime (ms), or 0 if it doesn't exist. */ +function getBinaryMtime(): number { + try { + return statSync(BINARY_PATH).mtimeMs; + } catch { + return 0; + } +} + +/** Compile the Swift CLI if source files are newer than the binary. */ +function ensureCompiled(): void { + const srcMtime = getSourceMtime(); + const binMtime = getBinaryMtime(); + + if (binMtime > 0 && binMtime >= srcMtime) { + return; // Binary is up-to-date + } + + const action = binMtime === 0 ? "Compiling" : "Recompiling"; + try { + execFileSync("swift", ["build", "-c", "release"], { + cwd: SWIFT_CLI_DIR, + timeout: 30_000, + stdio: ["pipe", "pipe", "pipe"], + }); + } catch (err: any) { + const stderr = err.stderr?.toString() || ""; + const stdout = err.stdout?.toString() || ""; + throw new Error( + `Swift compilation failed (${action.toLowerCase()}):\n${stderr || stdout || err.message}` + ); + } +} + +// --------------------------------------------------------------------------- +// CLI invocation +// --------------------------------------------------------------------------- + +interface MacAgentResponse { + success: boolean; + data?: Record; + error?: string; +} + +/** + * Invoke the mac-agent CLI with a command and optional params. + * Handles compilation caching, stdin/stdout JSON protocol, and error surfacing. + */ +function execMacAgent(command: string, params?: Record): MacAgentResponse { + ensureCompiled(); + + const input = JSON.stringify({ command, params: params ?? {} }); + let stdout: string; + let stderr: string = ""; + + // Interaction commands (click, type) can block while the target app + // processes the action — e.g. TextEdit's AXPress on "New Document" + // takes ~12s while it dismisses the Open dialog and creates a window. + // Screenshots can also be slow for large retina windows. + const slowCommands = new Set(["clickElement", "typeText", "screenshotWindow"]); + const timeout = slowCommands.has(command) ? 30_000 : 10_000; + + try { + const result = execFileSync(BINARY_PATH, [], { + input, + timeout, + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + maxBuffer: 5 * 1024 * 1024, // 5MB — needed for retina screenshot base64 payloads + }); + stdout = typeof result === "string" ? result : result.toString(); + } catch (err: any) { + stderr = err.stderr?.toString() || ""; + const isTimeout = err.killed || err.signal === "SIGTERM"; + // If the process exited non-zero but produced stdout, try to parse it + if (err.stdout) { + stdout = err.stdout.toString(); + } else if (isTimeout) { + throw new Error( + `mac-agent timed out after ${timeout / 1000}s (command: ${command}). ` + + `The target app may be slow to respond — AXPress can block while the app processes the action.` + ); + } else { + throw new Error( + `mac-agent CLI failed (command: ${command}):\n${stderr || err.message}` + ); + } + } + + try { + return JSON.parse(stdout.trim()) as MacAgentResponse; + } catch { + throw new Error( + `mac-agent returned invalid JSON (command: ${command}):\nstdout: ${stdout}\nstderr: ${stderr}` + ); + } +} + +// --------------------------------------------------------------------------- +// Extension entry point +// --------------------------------------------------------------------------- + +export default function (pi: ExtensionAPI) { + // ----------------------------------------------------------------- + // mac_check_permissions + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_check_permissions", + label: "Mac Permissions", + description: + "Check whether macOS Accessibility and Screen Recording permissions are enabled for the current terminal. " + + "Returns { accessibilityEnabled, screenRecordingEnabled }. Accessibility is required for UI automation; " + + "Screen Recording is required for mac_screenshot. Both are granted in System Settings > Privacy & Security.", + promptGuidelines: [ + "Run this first if any mac tool returns a permission error.", + ], + parameters: Type.Object({}), + + async execute(_toolCallId: any) { + const result = execMacAgent("checkPermissions"); + if (!result.success) { + throw new Error("mac_check_permissions: " + result.error); + } + const accessibility = result.data?.accessibilityEnabled ?? false; + const screenRecording = result.data?.screenRecordingEnabled ?? false; + + const lines: string[] = []; + lines.push(accessibility + ? "✅ Accessibility: enabled" + : "❌ Accessibility: NOT enabled — grant in System Settings > Privacy & Security > Accessibility"); + lines.push(screenRecording + ? "✅ Screen Recording: enabled" + : "❌ Screen Recording: NOT enabled — grant in System Settings > Privacy & Security > Screen Recording"); + + return { + content: [{ type: "text" as const, text: lines.join("\n") }], + details: result.data, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_list_apps + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_list_apps", + label: "List Apps", + description: + "List all running macOS applications. Returns an array of { name, bundleId, pid, isActive } " + + "for user-facing apps (regular activation policy). Set includeBackground to true to also " + + "include accessory/background apps.", + promptGuidelines: [ + "Use to discover what apps are running before interacting with them.", + ], + parameters: Type.Object({ + includeBackground: Type.Optional(Type.Boolean({ description: "Include background/accessory apps (default: false)" })), + }), + + async execute(_toolCallId: any, { includeBackground }: { includeBackground?: boolean }) { + const result = execMacAgent("listApps", includeBackground ? { includeBackground: true } : undefined); + if (!result.success) { + throw new Error("mac_list_apps: " + result.error); + } + const apps = result.data as unknown as Array<{ name: string; bundleId: string; pid: number; isActive: boolean }>; + const summary = apps.map(a => `${a.name} (${a.bundleId}) pid:${a.pid}${a.isActive ? " [active]" : ""}`).join("\n"); + return { + content: [{ type: "text" as const, text: `${apps.length} running apps:\n${summary}` }], + details: { apps }, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_launch_app + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_launch_app", + label: "Launch App", + description: + "Launch a macOS application by name or bundle ID. " + + "Returns { launched, name, bundleId, pid } on success. " + + "Provide either 'name' (e.g. 'TextEdit') or 'bundleId' (e.g. 'com.apple.TextEdit').", + promptGuidelines: [ + "Use app name for well-known apps; use bundleId when the name is ambiguous.", + ], + parameters: Type.Object({ + name: Type.Optional(Type.String({ description: "Application name (e.g. 'TextEdit', 'Safari')" })), + bundleId: Type.Optional(Type.String({ description: "Bundle identifier (e.g. 'com.apple.TextEdit')" })), + }), + + async execute(_toolCallId: any, { name, bundleId }: { name?: string; bundleId?: string }) { + if (!name && !bundleId) { + throw new Error("mac_launch_app: provide either 'name' or 'bundleId' parameter"); + } + const params: Record = {}; + if (name) params.name = name; + if (bundleId) params.bundleId = bundleId; + + const result = execMacAgent("launchApp", params); + if (!result.success) { + throw new Error("mac_launch_app: " + result.error); + } + const d = result.data!; + return { + content: [{ type: "text" as const, text: `Launched ${d.name} (${d.bundleId}) pid:${d.pid}` }], + details: result.data, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_activate_app + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_activate_app", + label: "Activate App", + description: + "Bring a running macOS application to the front. " + + "Returns { activated, name } on success. Errors if the app is not running. " + + "Provide either 'name' or 'bundleId'.", + promptGuidelines: [ + "Activate an app before interacting with its UI to ensure it is frontmost.", + ], + parameters: Type.Object({ + name: Type.Optional(Type.String({ description: "Application name" })), + bundleId: Type.Optional(Type.String({ description: "Bundle identifier" })), + }), + + async execute(_toolCallId: any, { name, bundleId }: { name?: string; bundleId?: string }) { + if (!name && !bundleId) { + throw new Error("mac_activate_app: provide either 'name' or 'bundleId' parameter"); + } + const params: Record = {}; + if (name) params.name = name; + if (bundleId) params.bundleId = bundleId; + + const result = execMacAgent("activateApp", params); + if (!result.success) { + throw new Error("mac_activate_app: " + result.error); + } + return { + content: [{ type: "text" as const, text: `Activated ${result.data?.name}` }], + details: result.data, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_quit_app + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_quit_app", + label: "Quit App", + description: + "Quit a running macOS application. " + + "Returns { quit, name } on success. Errors if the app is not running. " + + "Provide either 'name' or 'bundleId'.", + promptGuidelines: [ + "Use to clean up apps launched during automation — don't leave apps running unnecessarily.", + ], + parameters: Type.Object({ + name: Type.Optional(Type.String({ description: "Application name" })), + bundleId: Type.Optional(Type.String({ description: "Bundle identifier" })), + }), + + async execute(_toolCallId: any, { name, bundleId }: { name?: string; bundleId?: string }) { + if (!name && !bundleId) { + throw new Error("mac_quit_app: provide either 'name' or 'bundleId' parameter"); + } + const params: Record = {}; + if (name) params.name = name; + if (bundleId) params.bundleId = bundleId; + + const result = execMacAgent("quitApp", params); + if (!result.success) { + throw new Error("mac_quit_app: " + result.error); + } + return { + content: [{ type: "text" as const, text: `Quit ${result.data?.name}` }], + details: result.data, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_list_windows + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_list_windows", + label: "List Windows", + description: + "List all on-screen windows for a macOS application. " + + "Returns an array of { windowId, title, bounds: {x,y,width,height}, isOnScreen, layer }. " + + "The windowId can be used with getWindowInfo for detailed inspection or with screenshotWindow for capture. " + + "Returns an empty array (not error) if the app is running but has no visible windows. " + + "Errors if the app is not running.", + promptGuidelines: [ + "Use to get windowId values needed by mac_screenshot.", + ], + parameters: Type.Object({ + app: Type.String({ description: "Application name (e.g. 'TextEdit') or bundle identifier (e.g. 'com.apple.TextEdit')" }), + }), + + async execute(_toolCallId: any, { app }: { app: string }) { + const result = execMacAgent("listWindows", { app }); + if (!result.success) { + throw new Error("mac_list_windows: " + result.error); + } + const data = result.data as { windows: Array<{ windowId: number; title: string; bounds: Record; isOnScreen: boolean; layer: number }>; app: string; pid: number }; + const windows = data.windows ?? []; + if (windows.length === 0) { + return { + content: [{ type: "text" as const, text: `${data.app} (pid:${data.pid}) has no visible windows.` }], + details: data, + }; + } + const summary = windows.map(w => + ` windowId:${w.windowId} "${w.title}" ${w.bounds.width}x${w.bounds.height} at (${w.bounds.x},${w.bounds.y}) layer:${w.layer}` + ).join("\n"); + return { + content: [{ type: "text" as const, text: `${data.app} (pid:${data.pid}) — ${windows.length} window(s):\n${summary}` }], + details: data, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_find + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_find", + label: "Find Elements", + description: + "Find UI elements in a macOS application's accessibility tree. Three modes:\n" + + "- 'search' (default): Find elements matching role/title/value/identifier criteria. Returns a numbered list of matches.\n" + + "- 'tree': Dump the full accessibility subtree as an indented tree. Use maxDepth/maxCount to bound output.\n" + + "- 'focused': Get the currently focused element in the app. No criteria needed.\n" + + "The 'app' param accepts an app name (e.g. 'Finder') or bundle ID (e.g. 'com.apple.Finder').", + promptGuidelines: [ + "Prefer for targeted element search — use role/title/value criteria to narrow results.", + "Use mode:focused to check the current focus target without search criteria.", + "Use mac_get_tree instead of mode:tree when you just need to understand app structure.", + ], + parameters: Type.Object({ + app: Type.String({ description: "Application name or bundle identifier" }), + mode: Type.Optional(StringEnum(["search", "tree", "focused"] as const, { description: "'search' (default), 'tree', or 'focused'" })), + role: Type.Optional(Type.String({ description: "AX role to match (e.g. 'AXButton', 'AXTextArea')" })), + title: Type.Optional(Type.String({ description: "AX title to match" })), + value: Type.Optional(Type.String({ description: "AX value to match" })), + identifier: Type.Optional(Type.String({ description: "AX identifier to match" })), + matchType: Type.Optional(Type.String({ description: "'exact' (default) or 'contains'" })), + maxDepth: Type.Optional(Type.Number({ description: "Maximum tree depth to traverse (default: 10)" })), + maxCount: Type.Optional(Type.Number({ description: "Maximum elements to return/visit (default: 100)" })), + }), + + async execute(_toolCallId: any, args: { + app: string; + mode?: string; + role?: string; + title?: string; + value?: string; + identifier?: string; + matchType?: string; + maxDepth?: number; + maxCount?: number; + }) { + const mode = args.mode ?? "search"; + + // --- Focused mode --- + if (mode === "focused") { + const result = execMacAgent("getFocusedElement", { app: args.app }); + if (!result.success) { + throw new Error("mac_find (focused): " + result.error); + } + const el = result.data as Record; + const parts = [el.role ?? "unknown"]; + if (el.title) parts.push(`"${el.title}"`); + if (el.value !== undefined) parts.push(`[${el.value}]`); + return { + content: [{ type: "text" as const, text: `Focused element: ${parts.join(" ")}` }], + details: result.data, + }; + } + + // --- Tree mode --- + if (mode === "tree") { + const params: Record = { app: args.app }; + if (args.maxDepth !== undefined) params.maxDepth = args.maxDepth; + if (args.maxCount !== undefined) params.maxCount = args.maxCount; + + const result = execMacAgent("getTree", params); + if (!result.success) { + throw new Error("mac_find (tree): " + result.error); + } + + const data = result.data as { tree: any[]; totalElements: number; truncated: boolean }; + const lines: string[] = []; + + function renderTree(nodes: any[], indent: number) { + for (const node of nodes) { + const parts = [node.role ?? "?"]; + if (node.title) parts.push(`"${node.title}"`); + if (node.value !== undefined && node.value !== "") parts.push(`[${node.value}]`); + lines.push(" ".repeat(indent) + parts.join(" ")); + if (node.children?.length) { + renderTree(node.children, indent + 1); + } + } + } + + renderTree(data.tree ?? [], 0); + const truncNote = data.truncated ? `\n(truncated — ${data.totalElements} elements visited)` : ""; + return { + content: [{ type: "text" as const, text: `${lines.join("\n")}${truncNote}` }], + details: result.data, + }; + } + + // --- Search mode (default) --- + const params: Record = { app: args.app }; + if (args.role) params.role = args.role; + if (args.title) params.title = args.title; + if (args.value) params.value = args.value; + if (args.identifier) params.identifier = args.identifier; + if (args.matchType) params.matchType = args.matchType; + if (args.maxDepth !== undefined) params.maxDepth = args.maxDepth; + if (args.maxCount !== undefined) params.maxCount = args.maxCount; + + const result = execMacAgent("findElements", params); + if (!result.success) { + throw new Error("mac_find (search): " + result.error); + } + + const data = result.data as { elements: any[]; totalVisited: number; truncated: boolean }; + const elements = data.elements ?? []; + + if (elements.length === 0) { + const criteria = [args.role, args.title, args.value, args.identifier].filter(Boolean).join(", "); + return { + content: [{ type: "text" as const, text: `No elements found matching: ${criteria || "(no criteria)"}` }], + details: result.data, + }; + } + + const lines = elements.map((el: any, i: number) => { + const parts = [`${i + 1}. ${el.role ?? "?"}`]; + if (el.title) parts.push(`"${el.title}"`); + if (el.value !== undefined && el.value !== "") parts.push(`[${el.value}]`); + return parts.join(" "); + }); + const truncNote = data.truncated ? `\n(truncated — search stopped at limit)` : ""; + return { + content: [{ type: "text" as const, text: `${elements.length} element(s) found:\n${lines.join("\n")}${truncNote}` }], + details: result.data, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_get_tree + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_get_tree", + label: "Get UI Tree", + description: + "Get a compact accessibility tree of a macOS application's UI structure. " + + "Returns an indented tree showing role, title, and value of each element. " + + "Tighter defaults than mac_find's tree mode — designed for quick structure inspection. " + + "Each line: `role \"title\" [value]` with 2-space indent per depth level. " + + "Omits title/value when nil or empty.", + promptGuidelines: [ + "Use for understanding app UI structure — start with low limits and increase if needed.", + "Prefer mac_find search mode when you know what you're looking for.", + "Check the truncation note to know if the tree was cut short.", + ], + parameters: Type.Object({ + app: Type.String({ description: "Application name or bundle identifier" }), + maxDepth: Type.Optional(Type.Number({ description: "Maximum tree depth to traverse (default: 3)" })), + maxCount: Type.Optional(Type.Number({ description: "Maximum elements to include (default: 50)" })), + }), + + async execute(_toolCallId: any, args: { app: string; maxDepth?: number; maxCount?: number }) { + const params: Record = { app: args.app }; + params.maxDepth = args.maxDepth ?? 3; + params.maxCount = args.maxCount ?? 50; + + const result = execMacAgent("getTree", params); + if (!result.success) { + throw new Error("mac_get_tree: " + result.error); + } + + const data = result.data as { tree: any[]; totalElements: number; truncated: boolean }; + const lines: string[] = []; + + function renderNode(nodes: any[], indent: number) { + for (const node of nodes) { + const parts = [node.role ?? "?"]; + if (node.title) parts.push(`"${node.title}"`); + if (node.value !== undefined && node.value !== null && node.value !== "") parts.push(`[${node.value}]`); + lines.push(" ".repeat(indent) + parts.join(" ")); + if (node.children?.length) { + renderNode(node.children, indent + 1); + } + } + } + + renderNode(data.tree ?? [], 0); + if (data.truncated) { + lines.push(`\n(truncated — ${data.totalElements} elements visited, increase maxDepth or maxCount for more)`); + } + return { + content: [{ type: "text" as const, text: lines.join("\n") }], + details: { totalElements: data.totalElements, truncated: data.truncated }, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_click + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_click", + label: "Click Element", + description: + "Click a UI element in a macOS application by performing AXPress. " + + "Finds the first element matching the given criteria (role, title, value, identifier) and clicks it. " + + "At least one criterion is required. Returns the clicked element's attributes.", + promptGuidelines: [ + "Verify the click worked by reading the resulting state with mac_find or mac_read.", + "Use mac_find first to discover the right role/title/value criteria before clicking.", + ], + parameters: Type.Object({ + app: Type.String({ description: "Application name or bundle identifier" }), + role: Type.Optional(Type.String({ description: "AX role (e.g. 'AXButton', 'AXMenuItem')" })), + title: Type.Optional(Type.String({ description: "AX title to match" })), + value: Type.Optional(Type.String({ description: "AX value to match" })), + identifier: Type.Optional(Type.String({ description: "AX identifier to match" })), + matchType: Type.Optional(Type.String({ description: "'exact' (default) or 'contains'" })), + }), + + async execute(_toolCallId: any, args: { + app: string; + role?: string; + title?: string; + value?: string; + identifier?: string; + matchType?: string; + }) { + if (!args.role && !args.title && !args.value && !args.identifier) { + throw new Error("mac_click: provide at least one search criterion (role, title, value, or identifier)"); + } + const params: Record = { app: args.app }; + if (args.role) params.role = args.role; + if (args.title) params.title = args.title; + if (args.value) params.value = args.value; + if (args.identifier) params.identifier = args.identifier; + if (args.matchType) params.matchType = args.matchType; + + const result = execMacAgent("clickElement", params); + if (!result.success) { + throw new Error("mac_click: " + result.error); + } + + const el = result.data?.element as Record | undefined; + const parts = [el?.role ?? "element"]; + if (el?.title) parts.push(`'${el.title}'`); + return { + content: [{ type: "text" as const, text: `Clicked ${parts.join(" ")}` }], + details: result.data, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_type + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_type", + label: "Type Text", + description: + "Type text into a UI element in a macOS application by setting its AXValue attribute. " + + "Finds the first element matching the given criteria and sets its value. " + + "Returns the actual value after setting (read-back verification). " + + "At least one criterion is required.", + promptGuidelines: [ + "Read back the value after typing to verify — the return value includes actual content.", + "Target text fields/areas by role (AXTextArea, AXTextField) for reliability.", + ], + parameters: Type.Object({ + app: Type.String({ description: "Application name or bundle identifier" }), + text: Type.String({ description: "Text to type into the element" }), + role: Type.Optional(Type.String({ description: "AX role (e.g. 'AXTextArea', 'AXTextField')" })), + title: Type.Optional(Type.String({ description: "AX title to match" })), + value: Type.Optional(Type.String({ description: "AX value to match" })), + identifier: Type.Optional(Type.String({ description: "AX identifier to match" })), + matchType: Type.Optional(Type.String({ description: "'exact' (default) or 'contains'" })), + }), + + async execute(_toolCallId: any, args: { + app: string; + text: string; + role?: string; + title?: string; + value?: string; + identifier?: string; + matchType?: string; + }) { + if (!args.role && !args.title && !args.value && !args.identifier) { + throw new Error("mac_type: provide at least one search criterion (role, title, value, or identifier)"); + } + const params: Record = { app: args.app, text: args.text }; + if (args.role) params.role = args.role; + if (args.title) params.title = args.title; + if (args.value) params.value = args.value; + if (args.identifier) params.identifier = args.identifier; + if (args.matchType) params.matchType = args.matchType; + + const result = execMacAgent("typeText", params); + if (!result.success) { + throw new Error("mac_type: " + result.error); + } + + const el = result.data?.element as Record | undefined; + const actualValue = result.data?.value; + const parts = [el?.role ?? "element"]; + if (el?.title) parts.push(`'${el.title}'`); + return { + content: [{ type: "text" as const, text: `Typed into ${parts.join(" ")} — value is now: ${actualValue}` }], + details: result.data, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_screenshot + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_screenshot", + label: "Screenshot Window", + description: + "Take a screenshot of a macOS application window by its window ID (from mac_list_windows). " + + "Returns the screenshot as an image content block for visual analysis, alongside text metadata " + + "(dimensions and format). Requires Screen Recording permission — use mac_check_permissions to verify.", + promptGuidelines: [ + "Use for visual verification when accessibility attributes aren't sufficient.", + "Prefer nominal resolution unless retina detail is needed — retina doubles payload size.", + "Requires Screen Recording permission — run mac_check_permissions first if screenshot fails.", + ], + parameters: Type.Object({ + windowId: Type.Number({ description: "Window ID from mac_list_windows output" }), + format: Type.Optional(StringEnum(["jpeg", "png"] as const, { description: "'jpeg' (default) or 'png'" })), + quality: Type.Optional(Type.Number({ description: "JPEG compression quality 0-1 (default: 0.8)" })), + retina: Type.Optional(Type.Boolean({ description: "Capture at full pixel resolution (default: false)" })), + }), + + async execute(_toolCallId: any, args: { windowId: number; format?: string; quality?: number; retina?: boolean }) { + const params: Record = { windowId: args.windowId }; + if (args.format) params.format = args.format; + if (args.quality !== undefined) params.quality = args.quality; + if (args.retina !== undefined) params.retina = args.retina; + + const result = execMacAgent("screenshotWindow", params); + if (!result.success) { + throw new Error("mac_screenshot: " + result.error); + } + + const data = result.data!; + const imageData = data.imageData as string; + const format = data.format as string; + const width = data.width as number; + const height = data.height as number; + const mimeType = format === "png" ? "image/png" : "image/jpeg"; + + return { + content: [ + { type: "text" as const, text: `Screenshot: ${width}x${height} ${format}` }, + { type: "image" as const, data: imageData, mimeType }, + ], + details: { width, height, format, mimeType }, + }; + }, + }); + + // ----------------------------------------------------------------- + // mac_read + // ----------------------------------------------------------------- + pi.registerTool({ + name: "mac_read", + label: "Read Attribute", + description: + "Read one or more accessibility attributes from a UI element in a macOS application. " + + "Finds the first element matching the given criteria and reads the named attribute(s). " + + "AXValue subtypes (CGPoint, CGSize, CGRect, CFRange) are automatically unpacked to structured dicts. " + + "Use 'attribute' for a single attribute or 'attributes' for multiple. At least one search criterion is required.", + promptGuidelines: [ + "Use to verify state after actions — read AXValue to confirm text was typed, AXEnabled to check if a button is active.", + ], + parameters: Type.Object({ + app: Type.String({ description: "Application name or bundle identifier" }), + attribute: Type.Optional(Type.String({ description: "Single attribute name to read (e.g. 'AXValue', 'AXPosition', 'AXRole')" })), + attributes: Type.Optional(Type.Array(Type.String(), { description: "Multiple attribute names to read" })), + role: Type.Optional(Type.String({ description: "AX role (e.g. 'AXButton', 'AXTextArea')" })), + title: Type.Optional(Type.String({ description: "AX title to match" })), + value: Type.Optional(Type.String({ description: "AX value to match" })), + identifier: Type.Optional(Type.String({ description: "AX identifier to match" })), + matchType: Type.Optional(Type.String({ description: "'exact' (default) or 'contains'" })), + }), + + async execute(_toolCallId: any, args: { + app: string; + attribute?: string; + attributes?: string[]; + role?: string; + title?: string; + value?: string; + identifier?: string; + matchType?: string; + }) { + if (!args.attribute && (!args.attributes || args.attributes.length === 0)) { + throw new Error("mac_read: provide 'attribute' (single) or 'attributes' (array) parameter"); + } + if (!args.role && !args.title && !args.value && !args.identifier) { + throw new Error("mac_read: provide at least one search criterion (role, title, value, or identifier)"); + } + const params: Record = { app: args.app }; + if (args.attribute) params.attribute = args.attribute; + if (args.attributes) params.attributes = args.attributes; + if (args.role) params.role = args.role; + if (args.title) params.title = args.title; + if (args.value) params.value = args.value; + if (args.identifier) params.identifier = args.identifier; + if (args.matchType) params.matchType = args.matchType; + + const result = execMacAgent("readAttribute", params); + if (!result.success) { + throw new Error("mac_read: " + result.error); + } + + // Format output based on single vs multi attribute + if (args.attribute && !args.attributes) { + const val = result.data?.value; + const formatted = typeof val === "object" ? JSON.stringify(val) : String(val); + return { + content: [{ type: "text" as const, text: `${args.attribute}: ${formatted}` }], + details: result.data, + }; + } + + // Multi-attribute: format as key: value lines + const values = result.data?.values as Record | undefined; + if (values) { + const lines = Object.entries(values).map(([k, v]) => { + const formatted = typeof v === "object" ? JSON.stringify(v) : String(v); + return `${k}: ${formatted}`; + }); + return { + content: [{ type: "text" as const, text: lines.join("\n") }], + details: result.data, + }; + } + + // Fallback + return { + content: [{ type: "text" as const, text: JSON.stringify(result.data) }], + details: result.data, + }; + }, + }); + + // ----------------------------------------------------------------- + // System prompt injection — mac-tools usage guidelines + // ----------------------------------------------------------------- + pi.on("before_agent_start", async (event) => { + const guidelines = ` + +[SYSTEM CONTEXT — Mac Tools] + +## Native macOS App Interaction + +You have mac-tools for controlling native macOS applications (Finder, TextEdit, Safari, Xcode, etc.) via Accessibility APIs. + +**Mac-tools vs browser-tools:** Use mac-tools for native macOS apps. Use browser-tools for web pages inside a browser. If you need to interact with a website in Safari or Chrome, use browser-tools — mac-tools controls the browser's native UI chrome (menus, tabs, address bar), not web page content. + +**Permissions:** If any mac tool returns a permission error, run \`mac_check_permissions\` to diagnose. Accessibility and Screen Recording permissions are granted in System Settings > Privacy & Security. + +**Interaction pattern — discover → act → verify:** +1. **Discover** the UI structure with \`mac_find\` (search for specific elements) or \`mac_get_tree\` (see overall layout) +2. **Act** with \`mac_click\` (press buttons/menus) or \`mac_type\` (enter text into fields) +3. **Verify** the result with \`mac_read\` (check attribute values) or \`mac_screenshot\` (visual confirmation) + +**Tree queries:** Start with default limits (mac_get_tree: maxDepth:3, maxCount:50). Increase only if the element you need isn't visible in the output. Large trees waste context. + +**Screenshots:** Use \`mac_screenshot\` only when visual verification is genuinely needed — the image payload is large. Prefer \`mac_read\` or \`mac_find\` for checking text values and element state.`; + + return { systemPrompt: event.systemPrompt + guidelines }; + }); +} diff --git a/src/resources/extensions/mac-tools/swift-cli/.gitignore b/src/resources/extensions/mac-tools/swift-cli/.gitignore new file mode 100644 index 000000000..30bcfa4ed --- /dev/null +++ b/src/resources/extensions/mac-tools/swift-cli/.gitignore @@ -0,0 +1 @@ +.build/ diff --git a/src/resources/extensions/mac-tools/swift-cli/Package.swift b/src/resources/extensions/mac-tools/swift-cli/Package.swift new file mode 100644 index 000000000..fe4767257 --- /dev/null +++ b/src/resources/extensions/mac-tools/swift-cli/Package.swift @@ -0,0 +1,22 @@ +// swift-tools-version: 5.9 + +import PackageDescription + +let package = Package( + name: "mac-agent", + platforms: [.macOS(.v14)], + products: [ + .executable(name: "mac-agent", targets: ["mac-agent"]), + ], + targets: [ + .executableTarget( + name: "mac-agent", + path: "Sources", + linkerSettings: [ + .linkedFramework("ApplicationServices"), + .linkedFramework("AppKit"), + .linkedFramework("ScreenCaptureKit"), + ] + ), + ] +) diff --git a/src/resources/extensions/mac-tools/swift-cli/Sources/main.swift b/src/resources/extensions/mac-tools/swift-cli/Sources/main.swift new file mode 100644 index 000000000..6d832e5e7 --- /dev/null +++ b/src/resources/extensions/mac-tools/swift-cli/Sources/main.swift @@ -0,0 +1,1318 @@ +import Foundation +import ApplicationServices +import AppKit +import ScreenCaptureKit +import UniformTypeIdentifiers + +// MARK: - JSON Protocol Types + +struct CommandRequest: Decodable { + let command: String + let params: [String: AnyCodable]? +} + +struct CommandResponse: Encodable { + let success: Bool + let data: AnyCodable? + let error: String? + + static func ok(_ data: Any) -> CommandResponse { + CommandResponse(success: true, data: AnyCodable(data), error: nil) + } + + static func fail(_ message: String) -> CommandResponse { + CommandResponse(success: false, data: nil, error: message) + } +} + +/// Type-erased Codable wrapper for heterogeneous JSON values. +struct AnyCodable: Codable { + let value: Any + + init(_ value: Any) { + self.value = value + } + + init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + if container.decodeNil() { + value = NSNull() + } else if let b = try? container.decode(Bool.self) { + value = b + } else if let i = try? container.decode(Int.self) { + value = i + } else if let d = try? container.decode(Double.self) { + value = d + } else if let s = try? container.decode(String.self) { + value = s + } else if let a = try? container.decode([AnyCodable].self) { + value = a.map(\.value) + } else if let dict = try? container.decode([String: AnyCodable].self) { + value = dict.mapValues(\.value) + } else { + throw DecodingError.dataCorruptedError(in: container, debugDescription: "Unsupported JSON type") + } + } + + func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch value { + case is NSNull: + try container.encodeNil() + case let b as Bool: + try container.encode(b) + case let i as Int: + try container.encode(i) + case let i as Int64: + try container.encode(i) + case let i as Int32: + try container.encode(i) + case let i as UInt32: + try container.encode(i) + case let d as Double: + try container.encode(d) + case let s as String: + try container.encode(s) + case let a as [Any]: + try container.encode(a.map { AnyCodable($0) }) + case let dict as [String: Any]: + try container.encode(dict.mapValues { AnyCodable($0) }) + default: + try container.encode(String(describing: value)) + } + } +} + +// MARK: - Debug Logging (stderr only) + +func debug(_ message: String) { + FileHandle.standardError.write(Data("[mac-agent] \(message)\n".utf8)) +} + +// MARK: - Command Handlers + +func handlePing() -> CommandResponse { + .ok(["status": "ok"]) +} + +func handleCheckPermissions() -> CommandResponse { + let accessibilityEnabled = AXIsProcessTrusted() + let screenRecordingEnabled = CGPreflightScreenCaptureAccess() + return .ok([ + "accessibilityEnabled": accessibilityEnabled, + "screenRecordingEnabled": screenRecordingEnabled + ] as [String: Any]) +} + +// MARK: - App Lifecycle Commands + +func handleListApps(_ params: [String: AnyCodable]?) -> CommandResponse { + let includeBackground = (params?["includeBackground"]?.value as? Bool) ?? false + let apps = NSWorkspace.shared.runningApplications + + var result: [[String: Any]] = [] + for app in apps { + let policy = app.activationPolicy + if policy == .regular || (includeBackground && policy == .accessory) { + let entry: [String: Any] = [ + "name": app.localizedName ?? "Unknown", + "bundleId": app.bundleIdentifier ?? "", + "pid": Int(app.processIdentifier), + "isActive": app.isActive + ] + result.append(entry) + } + } + + return .ok(result) +} + +/// Find a running application by name or bundle ID. +func findRunningApp(params: [String: AnyCodable]?) -> NSRunningApplication? { + let name = params?["name"]?.value as? String + let bundleId = params?["bundleId"]?.value as? String + + guard name != nil || bundleId != nil else { return nil } + + let apps = NSWorkspace.shared.runningApplications + for app in apps { + if let bundleId = bundleId, app.bundleIdentifier == bundleId { + return app + } + if let name = name, app.localizedName?.lowercased() == name.lowercased() { + return app + } + } + return nil +} + +func handleLaunchApp(_ params: [String: AnyCodable]?) -> CommandResponse { + let name = params?["name"]?.value as? String + let bundleId = params?["bundleId"]?.value as? String + + guard name != nil || bundleId != nil else { + return .fail("launchApp requires 'name' or 'bundleId' parameter") + } + + // Try bundle ID first if provided + if let bundleId = bundleId { + if let appURL = NSWorkspace.shared.urlForApplication(withBundleIdentifier: bundleId) { + do { + let config = NSWorkspace.OpenConfiguration() + config.activates = true + let semaphore = DispatchSemaphore(value: 0) + var launchedApp: NSRunningApplication? + var launchError: Error? + + NSWorkspace.shared.openApplication(at: appURL, configuration: config) { app, error in + launchedApp = app + launchError = error + semaphore.signal() + } + semaphore.wait() + + if let error = launchError { + return .fail("Failed to launch app with bundleId '\(bundleId)': \(error.localizedDescription)") + } + + return .ok([ + "launched": true, + "name": launchedApp?.localizedName ?? "Unknown", + "bundleId": bundleId, + "pid": Int(launchedApp?.processIdentifier ?? 0) + ] as [String: Any]) + } + } else { + return .fail("App not found with bundleId: \(bundleId)") + } + } + + // Launch by name using /usr/bin/open -a + if let name = name { + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/open") + process.arguments = ["-a", name] + let errPipe = Pipe() + process.standardError = errPipe + + do { + try process.run() + process.waitUntilExit() + } catch { + return .fail("Failed to launch '\(name)': \(error.localizedDescription)") + } + + if process.terminationStatus != 0 { + let errData = errPipe.fileHandleForReading.readDataToEndOfFile() + let errMsg = String(data: errData, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "Unknown error" + return .fail("App not found: \(name). \(errMsg)") + } + + // Give the app a moment to appear in running apps, then find it + Thread.sleep(forTimeInterval: 0.5) + let apps = NSWorkspace.shared.runningApplications + let launched = apps.first { $0.localizedName?.lowercased() == name.lowercased() } + + return .ok([ + "launched": true, + "name": launched?.localizedName ?? name, + "bundleId": launched?.bundleIdentifier ?? "", + "pid": Int(launched?.processIdentifier ?? 0) + ] as [String: Any]) + } + + return .fail("launchApp requires 'name' or 'bundleId' parameter") +} + +func handleActivateApp(_ params: [String: AnyCodable]?) -> CommandResponse { + let name = params?["name"]?.value as? String + let bundleId = params?["bundleId"]?.value as? String + + guard name != nil || bundleId != nil else { + return .fail("activateApp requires 'name' or 'bundleId' parameter") + } + + guard let app = findRunningApp(params: params) else { + let identifier = name ?? bundleId ?? "unknown" + return .fail("App not running: \(identifier)") + } + + let activated = app.activate(options: .activateIgnoringOtherApps) + if activated { + return .ok([ + "activated": true, + "name": app.localizedName ?? "Unknown" + ] as [String: Any]) + } else { + return .fail("Failed to activate app: \(app.localizedName ?? "Unknown")") + } +} + +func handleQuitApp(_ params: [String: AnyCodable]?) -> CommandResponse { + let name = params?["name"]?.value as? String + let bundleId = params?["bundleId"]?.value as? String + + guard name != nil || bundleId != nil else { + return .fail("quitApp requires 'name' or 'bundleId' parameter") + } + + guard let app = findRunningApp(params: params) else { + let identifier = name ?? bundleId ?? "unknown" + return .fail("App not running: \(identifier)") + } + + let appName = app.localizedName ?? "Unknown" + let terminated = app.terminate() + if terminated { + return .ok([ + "quit": true, + "name": appName + ] as [String: Any]) + } else { + return .fail("Failed to quit app: \(appName). The app may have unsaved changes or refused to terminate.") + } +} + +// MARK: - AX Element Helpers + +/// Resolve an `app` parameter (name or bundleId) to a running application. +func resolveApp(_ params: [String: AnyCodable]?) -> (app: NSRunningApplication?, identifier: String) { + let appIdentifier = params?["app"]?.value as? String ?? "" + guard !appIdentifier.isEmpty else { return (nil, "") } + + let apps = NSWorkspace.shared.runningApplications + for app in apps { + if app.bundleIdentifier == appIdentifier { return (app, appIdentifier) } + if let name = app.localizedName, name.lowercased() == appIdentifier.lowercased() { return (app, appIdentifier) } + } + return (nil, appIdentifier) +} + +/// Get child AXUIElements of a given element. +/// Uses AXUIElementCopyAttributeValues (plural, indexed) as primary path, +/// falling back to AXUIElementCopyAttributeValue for kAXChildrenAttribute. +/// Returns empty array on failure (leaf elements have no children — not an error). +func getChildren(_ element: AXUIElement) -> [AXUIElement] { + // Primary: AXUIElementCopyAttributeValues (plural) — handles edge cases in some apps + var values: CFArray? + let pluralErr = AXUIElementCopyAttributeValues(element, kAXChildrenAttribute as CFString, 0, 100, &values) + if pluralErr == .success, let cfArray = values { + let arr = cfArray as [AnyObject] + return arr.compactMap { $0 as! AXUIElement? } + } + + // Fallback: AXUIElementCopyAttributeValue (singular) + var value: CFTypeRef? + let singularErr = AXUIElementCopyAttributeValue(element, kAXChildrenAttribute as CFString, &value) + if singularErr == .success, let cfArray = value as? [AXUIElement] { + return cfArray + } + + return [] +} + +/// Extract key attributes from an AXUIElement as a dictionary. +/// Omits nil values. This is the standard element representation for JSON responses. +func getElementAttributes(_ element: AXUIElement) -> [String: Any] { + var attrs: [String: Any] = [:] + + // Helper to read a string attribute + func readString(_ attr: String) -> String? { + var value: CFTypeRef? + let err = AXUIElementCopyAttributeValue(element, attr as CFString, &value) + guard err == .success, let v = value else { return nil } + return v as? String + } + + // Helper to read a bool attribute + func readBool(_ attr: String) -> Bool? { + var value: CFTypeRef? + let err = AXUIElementCopyAttributeValue(element, attr as CFString, &value) + guard err == .success, let v = value else { return nil } + if let num = v as? NSNumber { return num.boolValue } + return nil + } + + if let role = readString(kAXRoleAttribute) { attrs["role"] = role } + if let title = readString(kAXTitleAttribute) { attrs["title"] = title } + if let desc = readString(kAXDescriptionAttribute) { attrs["description"] = desc } + if let ident = readString("AXIdentifier") { attrs["identifier"] = ident } + + // AXValue: return string if it's a simple string, otherwise a type description + var axValue: CFTypeRef? + let valErr = AXUIElementCopyAttributeValue(element, kAXValueAttribute as CFString, &axValue) + if valErr == .success, let v = axValue { + if let s = v as? String { + attrs["value"] = s + } else if let n = v as? NSNumber { + attrs["value"] = n.stringValue + } else { + attrs["value"] = String(describing: type(of: v)) + } + } + + if let enabled = readBool(kAXEnabledAttribute) { attrs["enabled"] = enabled } + if let focused = readBool(kAXFocusedAttribute) { attrs["focused"] = focused } + + return attrs +} + +/// DFS search for AXUIElements matching the given criteria. +/// Returns (matches, totalVisited, truncated). +func findMatchingElements( + root: AXUIElement, + role: String?, + title: String?, + value: String?, + identifier: String?, + matchType: String, + maxDepth: Int, + maxCount: Int +) -> (matches: [[String: Any]], totalVisited: Int, truncated: Bool) { + var matches: [[String: Any]] = [] + var totalVisited = 0 + var truncated = false + + func matchesString(_ actual: String?, _ expected: String?, _ matchType: String) -> Bool { + guard let expected = expected else { return true } // no criteria = matches + guard let actual = actual else { return false } + if matchType == "exact" { + return actual == expected + } else { + // contains, case-insensitive + return actual.lowercased().contains(expected.lowercased()) + } + } + + func dfs(_ element: AXUIElement, depth: Int) { + guard !truncated else { return } + totalVisited += 1 + + let attrs = getElementAttributes(element) + let elementRole = attrs["role"] as? String + let elementTitle = attrs["title"] as? String + let elementValue = attrs["value"] as? String + let elementIdent = attrs["identifier"] as? String + + // Check all specified criteria + let roleMatch = matchesString(elementRole, role, matchType) + let titleMatch = matchesString(elementTitle, title, matchType) + let valueMatch = matchesString(elementValue, value, matchType) + let identMatch = matchesString(elementIdent, identifier, matchType) + + // Only add if at least one criterion was specified and all specified criteria match + let hasCriteria = role != nil || title != nil || value != nil || identifier != nil + if !hasCriteria || (roleMatch && titleMatch && valueMatch && identMatch) { + matches.append(attrs) + if matches.count >= maxCount { + truncated = true + return + } + } + + // Recurse into children if within depth + if depth < maxDepth { + let children = getChildren(element) + for child in children { + guard !truncated else { return } + dfs(child, depth: depth + 1) + } + } + } + + dfs(root, depth: 0) + return (matches, totalVisited, truncated) +} + +// MARK: - Element Discovery Commands + +func handleFindElements(_ params: [String: AnyCodable]?) -> CommandResponse { + let (app, identifier) = resolveApp(params) + + guard !identifier.isEmpty else { + return .fail("findElements requires 'app' parameter (app name or bundleId)") + } + guard let app = app else { + return .fail("App not running: \(identifier)") + } + + let pid = app.processIdentifier + let appElement = AXUIElementCreateApplication(pid) + + let role = params?["role"]?.value as? String + let title = params?["title"]?.value as? String + let value = params?["value"]?.value as? String + let identifierParam = params?["identifier"]?.value as? String + let matchType = (params?["matchType"]?.value as? String) ?? "contains" + + let maxDepth: Int + if let d = params?["maxDepth"]?.value as? Int { maxDepth = d } + else if let d = params?["maxDepth"]?.value as? Double { maxDepth = Int(d) } + else { maxDepth = 5 } + + let maxCount: Int + if let c = params?["maxCount"]?.value as? Int { maxCount = c } + else if let c = params?["maxCount"]?.value as? Double { maxCount = Int(c) } + else { maxCount = 200 } + + let (matches, totalVisited, truncated) = findMatchingElements( + root: appElement, + role: role, + title: title, + value: value, + identifier: identifierParam, + matchType: matchType, + maxDepth: maxDepth, + maxCount: maxCount + ) + + return .ok([ + "elements": matches, + "totalVisited": totalVisited, + "truncated": truncated + ] as [String: Any]) +} + +func handleGetTree(_ params: [String: AnyCodable]?) -> CommandResponse { + let (app, identifier) = resolveApp(params) + + guard !identifier.isEmpty else { + return .fail("getTree requires 'app' parameter (app name or bundleId)") + } + guard let app = app else { + return .fail("App not running: \(identifier)") + } + + let pid = app.processIdentifier + let appElement = AXUIElementCreateApplication(pid) + + let maxDepth: Int + if let d = params?["maxDepth"]?.value as? Int { maxDepth = d } + else if let d = params?["maxDepth"]?.value as? Double { maxDepth = Int(d) } + else { maxDepth = 5 } + + let maxCount: Int + if let c = params?["maxCount"]?.value as? Int { maxCount = c } + else if let c = params?["maxCount"]?.value as? Double { maxCount = Int(c) } + else { maxCount = 200 } + + var totalElements = 0 + var truncated = false + + func buildTree(_ element: AXUIElement, depth: Int) -> [String: Any]? { + guard !truncated else { return nil } + totalElements += 1 + + if totalElements > maxCount { + truncated = true + return nil + } + + let attrs = getElementAttributes(element) + var node: [String: Any] = [:] + if let v = attrs["role"] { node["role"] = v } + if let v = attrs["title"] { node["title"] = v } + if let v = attrs["value"] { node["value"] = v } + if let v = attrs["description"] { node["description"] = v } + if let v = attrs["identifier"] { node["identifier"] = v } + + if depth < maxDepth { + let children = getChildren(element) + var childNodes: [[String: Any]] = [] + for child in children { + guard !truncated else { break } + if let childNode = buildTree(child, depth: depth + 1) { + childNodes.append(childNode) + } + } + if !childNodes.isEmpty { + node["children"] = childNodes + } + } + + return node + } + + // Build tree from the app element's children (the app element itself is the root context) + let rootChildren = getChildren(appElement) + var tree: [[String: Any]] = [] + for child in rootChildren { + guard !truncated else { break } + if let node = buildTree(child, depth: 1) { + tree.append(node) + } + } + + return .ok([ + "tree": tree, + "totalElements": totalElements, + "truncated": truncated + ] as [String: Any]) +} + +// MARK: - AXValue Unpacking and Attribute Reading + +/// Unpack an AXValue (CGPoint, CGSize, CGRect, CFRange) into a JSON-serializable dictionary. +/// Returns nil if the value is not an AXValue type. +func unpackAXValue(_ value: CFTypeRef) -> [String: Any]? { + guard CFGetTypeID(value) == AXValueGetTypeID() else { return nil } + + let axValue = value as! AXValue + let axType = AXValueGetType(axValue) + + switch axType { + case .cgPoint: + var point = CGPoint.zero + if AXValueGetValue(axValue, .cgPoint, &point) { + return ["type": "CGPoint", "x": Double(point.x), "y": Double(point.y)] + } + case .cgSize: + var size = CGSize.zero + if AXValueGetValue(axValue, .cgSize, &size) { + return ["type": "CGSize", "width": Double(size.width), "height": Double(size.height)] + } + case .cgRect: + var rect = CGRect.zero + if AXValueGetValue(axValue, .cgRect, &rect) { + return ["type": "CGRect", "x": Double(rect.origin.x), "y": Double(rect.origin.y), + "width": Double(rect.size.width), "height": Double(rect.size.height)] + } + case .cfRange: + var range = CFRange(location: 0, length: 0) + if AXValueGetValue(axValue, .cfRange, &range) { + return ["type": "CFRange", "location": range.location, "length": range.length] + } + default: + return ["type": "unknown", "description": String(describing: axType)] + } + + return nil +} + +/// Read a single attribute from an AXUIElement and return a JSON-serializable value. +/// Handles: NSString → String, NSNumber → Bool/Int/Double, AXValue → unpacked dict, +/// [AXUIElement] → count description, AXUIElement → role description. +func readElementAttribute(_ element: AXUIElement, attribute: String) -> Any? { + var value: CFTypeRef? + let err = AXUIElementCopyAttributeValue(element, attribute as CFString, &value) + guard err == .success, let v = value else { return nil } + + // String + if let s = v as? String { return s } + + // NSNumber — check for boolean first (CFBoolean is bridged to NSNumber) + if let num = v as? NSNumber { + if CFGetTypeID(num) == CFBooleanGetTypeID() { + return num.boolValue + } + // Check if it's an integer (no fractional part) + if num.doubleValue == Double(num.intValue) { + return num.intValue + } + return num.doubleValue + } + + // AXValue subtypes (CGPoint, CGSize, CGRect, CFRange) + if let unpacked = unpackAXValue(v) { + return unpacked + } + + // Array of AXUIElements + if let elements = v as? [AXUIElement] { + return ["type": "elementArray", "count": elements.count] + } + + // Single AXUIElement reference + if CFGetTypeID(v) == AXUIElementGetTypeID() { + let childElement = v as! AXUIElement + var role: CFTypeRef? + AXUIElementCopyAttributeValue(childElement, kAXRoleAttribute as CFString, &role) + let roleStr = (role as? String) ?? "unknown" + return ["type": "element", "role": roleStr] + } + + // Fallback: string description + return String(describing: v) +} + +// MARK: - Interaction Commands + +func handleClickElement(_ params: [String: AnyCodable]?) -> CommandResponse { + let (app, identifier) = resolveApp(params) + + guard !identifier.isEmpty else { + return .fail("clickElement requires 'app' parameter (app name or bundleId)") + } + guard let app = app else { + return .fail("App not running: \(identifier)") + } + + let pid = app.processIdentifier + let appElement = AXUIElementCreateApplication(pid) + + let role = params?["role"]?.value as? String + let title = params?["title"]?.value as? String + let value = params?["value"]?.value as? String + let identifierParam = params?["identifier"]?.value as? String + let matchType = (params?["matchType"]?.value as? String) ?? "contains" + + guard role != nil || title != nil || value != nil || identifierParam != nil else { + return .fail("clickElement requires at least one element criterion (role, title, value, or identifier)") + } + + // Find the element using the shared DFS search, limit to 1 match + let (matches, _, _) = findMatchingElements( + root: appElement, + role: role, + title: title, + value: value, + identifier: identifierParam, + matchType: matchType, + maxDepth: 10, + maxCount: 1 + ) + + guard !matches.isEmpty else { + var criteria: [String] = [] + if let r = role { criteria.append("role=\(r)") } + if let t = title { criteria.append("title=\(t)") } + if let v = value { criteria.append("value=\(v)") } + if let i = identifierParam { criteria.append("identifier=\(i)") } + return .fail("No element found matching criteria: \(criteria.joined(separator: ", ")) in app '\(identifier)'") + } + + // We need the actual AXUIElement handle to perform the action + let targetElement = findFirstAXUIElement( + root: appElement, + role: role, + title: title, + value: value, + identifier: identifierParam, + matchType: matchType, + maxDepth: 10 + ) + + guard let element = targetElement else { + return .fail("Element found in search but could not re-acquire handle") + } + + // Check available actions + var actionNames: CFArray? + AXUIElementCopyActionNames(element, &actionNames) + let actions = (actionNames as? [String]) ?? [] + + // Try AXPress + let pressErr = AXUIElementPerformAction(element, kAXPressAction as CFString) + if pressErr == .success { + // Read element attributes after click for post-action inspection + let postAttrs = getElementAttributes(element) + return .ok([ + "clicked": true, + "element": postAttrs + ] as [String: Any]) + } + + // AXPress failed — return actionable error with available actions + return .fail("AXPress action failed (error \(pressErr.rawValue)) on element matching criteria. Available actions: \(actions.isEmpty ? "none" : actions.joined(separator: ", "))") +} + +func handleTypeText(_ params: [String: AnyCodable]?) -> CommandResponse { + let (app, identifier) = resolveApp(params) + + guard !identifier.isEmpty else { + return .fail("typeText requires 'app' parameter (app name or bundleId)") + } + guard let app = app else { + return .fail("App not running: \(identifier)") + } + + guard let text = params?["text"]?.value as? String else { + return .fail("typeText requires 'text' parameter (string to type)") + } + + let pid = app.processIdentifier + let appElement = AXUIElementCreateApplication(pid) + + let role = params?["role"]?.value as? String + let title = params?["title"]?.value as? String + let value = params?["value"]?.value as? String + let identifierParam = params?["identifier"]?.value as? String + let matchType = (params?["matchType"]?.value as? String) ?? "contains" + + guard role != nil || title != nil || value != nil || identifierParam != nil else { + return .fail("typeText requires at least one element criterion (role, title, value, or identifier)") + } + + let targetElement = findFirstAXUIElement( + root: appElement, + role: role, + title: title, + value: value, + identifier: identifierParam, + matchType: matchType, + maxDepth: 10 + ) + + guard let element = targetElement else { + var criteria: [String] = [] + if let r = role { criteria.append("role=\(r)") } + if let t = title { criteria.append("title=\(t)") } + if let v = value { criteria.append("value=\(v)") } + if let i = identifierParam { criteria.append("identifier=\(i)") } + return .fail("No element found matching criteria: \(criteria.joined(separator: ", ")) in app '\(identifier)'") + } + + // Set the AXValue attribute + let setErr = AXUIElementSetAttributeValue(element, kAXValueAttribute as CFString, text as CFTypeRef) + if setErr != .success { + return .fail("Failed to set AXValue on element (error \(setErr.rawValue)). The element may be read-only or not support text input.") + } + + // Read back the value for verification + var readBack: CFTypeRef? + let readErr = AXUIElementCopyAttributeValue(element, kAXValueAttribute as CFString, &readBack) + let readValue: Any + if readErr == .success, let v = readBack { + if let s = v as? String { readValue = s } + else if let n = v as? NSNumber { readValue = n.stringValue } + else { readValue = String(describing: v) } + } else { + readValue = NSNull() + } + + let elementAttrs = getElementAttributes(element) + return .ok([ + "typed": true, + "value": readValue, + "element": elementAttrs + ] as [String: Any]) +} + +func handleReadAttribute(_ params: [String: AnyCodable]?) -> CommandResponse { + let (app, identifier) = resolveApp(params) + + guard !identifier.isEmpty else { + return .fail("readAttribute requires 'app' parameter (app name or bundleId)") + } + guard let app = app else { + return .fail("App not running: \(identifier)") + } + + // Support single "attribute" or multiple "attributes" + let singleAttr = params?["attribute"]?.value as? String + var multiAttrs: [String]? = nil + if let arr = params?["attributes"]?.value as? [Any] { + multiAttrs = arr.compactMap { $0 as? String } + } + + guard singleAttr != nil || (multiAttrs != nil && !multiAttrs!.isEmpty) else { + return .fail("readAttribute requires 'attribute' (string) or 'attributes' (array of strings) parameter") + } + + let pid = app.processIdentifier + let appElement = AXUIElementCreateApplication(pid) + + let role = params?["role"]?.value as? String + let title = params?["title"]?.value as? String + let value = params?["value"]?.value as? String + let identifierParam = params?["identifier"]?.value as? String + let matchType = (params?["matchType"]?.value as? String) ?? "contains" + + guard role != nil || title != nil || value != nil || identifierParam != nil else { + return .fail("readAttribute requires at least one element criterion (role, title, value, or identifier)") + } + + let targetElement = findFirstAXUIElement( + root: appElement, + role: role, + title: title, + value: value, + identifier: identifierParam, + matchType: matchType, + maxDepth: 10 + ) + + guard let element = targetElement else { + var criteria: [String] = [] + if let r = role { criteria.append("role=\(r)") } + if let t = title { criteria.append("title=\(t)") } + if let v = value { criteria.append("value=\(v)") } + if let i = identifierParam { criteria.append("identifier=\(i)") } + return .fail("No element found matching criteria: \(criteria.joined(separator: ", ")) in app '\(identifier)'") + } + + let elementAttrs = getElementAttributes(element) + + // Single attribute mode + if let attr = singleAttr { + let val = readElementAttribute(element, attribute: attr) + return .ok([ + "value": val ?? NSNull(), + "element": elementAttrs + ] as [String: Any]) + } + + // Multiple attributes mode + if let attrs = multiAttrs { + var values: [String: Any] = [:] + for attr in attrs { + values[attr] = readElementAttribute(element, attribute: attr) ?? NSNull() + } + return .ok([ + "values": values, + "element": elementAttrs + ] as [String: Any]) + } + + return .fail("Internal error: no attribute specified") +} + +func handleGetFocusedElement(_ params: [String: AnyCodable]?) -> CommandResponse { + let (app, identifier) = resolveApp(params) + + guard !identifier.isEmpty else { + return .fail("getFocusedElement requires 'app' parameter (app name or bundleId)") + } + guard let app = app else { + return .fail("App not running: \(identifier)") + } + + let pid = app.processIdentifier + let appElement = AXUIElementCreateApplication(pid) + + // Attempt to get the focused element + var focusedValue: CFTypeRef? + let err = AXUIElementCopyAttributeValue(appElement, kAXFocusedUIElementAttribute as CFString, &focusedValue) + + if err == .success, let focused = focusedValue { + // If it works (unlikely from CLI context), return element attributes + if CFGetTypeID(focused) == AXUIElementGetTypeID() { + let focusedElement = focused as! AXUIElement + let attrs = getElementAttributes(focusedElement) + return .ok([ + "focused": true, + "element": attrs + ] as [String: Any]) + } + return .ok(["focused": true, "value": String(describing: focused)]) + } + + // Expected failure from CLI context — return actionable error + return .fail("getFocusedElement failed (AX error \(err.rawValue)). " + + "This is a known macOS limitation: kAXFocusedUIElementAttribute returns error -25212 (notImplemented) " + + "when called from a CLI process that is not the frontmost app. " + + "Workaround: use findElements with role/title criteria to locate specific elements, " + + "or use getTree to discover the element hierarchy.") +} + +/// Find the first AXUIElement matching the given criteria via DFS. +/// Returns the AXUIElement handle (not just attributes) for performing actions. +func findFirstAXUIElement( + root: AXUIElement, + role: String?, + title: String?, + value: String?, + identifier: String?, + matchType: String, + maxDepth: Int +) -> AXUIElement? { + func matchesString(_ actual: String?, _ expected: String?, _ matchType: String) -> Bool { + guard let expected = expected else { return true } + guard let actual = actual else { return false } + if matchType == "exact" { + return actual == expected + } else { + return actual.lowercased().contains(expected.lowercased()) + } + } + + func dfs(_ element: AXUIElement, depth: Int) -> AXUIElement? { + let attrs = getElementAttributes(element) + let elementRole = attrs["role"] as? String + let elementTitle = attrs["title"] as? String + let elementValue = attrs["value"] as? String + let elementIdent = attrs["identifier"] as? String + + let roleMatch = matchesString(elementRole, role, matchType) + let titleMatch = matchesString(elementTitle, title, matchType) + let valueMatch = matchesString(elementValue, value, matchType) + let identMatch = matchesString(elementIdent, identifier, matchType) + + let hasCriteria = role != nil || title != nil || value != nil || identifier != nil + if hasCriteria && roleMatch && titleMatch && valueMatch && identMatch { + return element + } + + if depth < maxDepth { + for child in getChildren(element) { + if let found = dfs(child, depth: depth + 1) { + return found + } + } + } + + return nil + } + + return dfs(root, depth: 0) +} + +// MARK: - Window Commands + +func handleListWindows(_ params: [String: AnyCodable]?) -> CommandResponse { + let appIdentifier = params?["app"]?.value as? String + + guard let appIdentifier = appIdentifier, !appIdentifier.isEmpty else { + return .fail("listWindows requires 'app' parameter (app name or bundleId)") + } + + // Resolve app to PID + let apps = NSWorkspace.shared.runningApplications + var targetApp: NSRunningApplication? + for app in apps { + if app.bundleIdentifier == appIdentifier { + targetApp = app + break + } + if let name = app.localizedName, name.lowercased() == appIdentifier.lowercased() { + targetApp = app + break + } + } + + guard let app = targetApp else { + return .fail("App not running: \(appIdentifier)") + } + + let targetPid = Int(app.processIdentifier) + + // Get on-screen windows via CGWindowListCopyWindowInfo + guard let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else { + return .ok(["windows": [] as [Any], "app": app.localizedName ?? appIdentifier, "pid": targetPid]) + } + + var windows: [[String: Any]] = [] + for win in windowList { + guard let ownerPid = win[kCGWindowOwnerPID as String] as? Int, + ownerPid == targetPid else { continue } + + // Skip windows with no title or empty title that are at layer 0 + // (these are often AXScrollArea-type artifacts, e.g. Finder desktop) + let title = win[kCGWindowName as String] as? String ?? "" + let layer = win[kCGWindowLayer as String] as? Int ?? 0 + let windowId = win[kCGWindowNumber as String] as? Int ?? 0 + let isOnScreen = win[kCGWindowIsOnscreen as String] as? Bool ?? true + + // Get bounds + var bounds: [String: Any] = [:] + if let boundsDict = win[kCGWindowBounds as String] as? [String: Any] { + bounds = [ + "x": boundsDict["X"] as? Double ?? 0.0, + "y": boundsDict["Y"] as? Double ?? 0.0, + "width": boundsDict["Width"] as? Double ?? 0.0, + "height": boundsDict["Height"] as? Double ?? 0.0 + ] + } + + let entry: [String: Any] = [ + "windowId": windowId, + "title": title, + "bounds": bounds, + "isOnScreen": isOnScreen, + "layer": layer + ] + windows.append(entry) + } + + return .ok(["windows": windows, "app": app.localizedName ?? appIdentifier, "pid": targetPid] as [String: Any]) +} + +func handleGetWindowInfo(_ params: [String: AnyCodable]?) -> CommandResponse { + let windowIdValue = params?["windowId"]?.value + let windowId: Int + + // Handle both Int and Double (JSON numbers can decode as either) + if let intVal = windowIdValue as? Int { + windowId = intVal + } else if let doubleVal = windowIdValue as? Double { + windowId = Int(doubleVal) + } else { + return .fail("getWindowInfo requires 'windowId' parameter (number)") + } + + // Get all windows including off-screen + guard let windowList = CGWindowListCopyWindowInfo([.optionAll], kCGNullWindowID) as? [[String: Any]] else { + return .fail("Failed to retrieve window list from CGWindowListCopyWindowInfo") + } + + for win in windowList { + guard let winNum = win[kCGWindowNumber as String] as? Int, + winNum == windowId else { continue } + + let title = win[kCGWindowName as String] as? String ?? "" + let ownerName = win[kCGWindowOwnerName as String] as? String ?? "" + let ownerPid = win[kCGWindowOwnerPID as String] as? Int ?? 0 + let layer = win[kCGWindowLayer as String] as? Int ?? 0 + let isOnScreen = win[kCGWindowIsOnscreen as String] as? Bool ?? false + let alpha = win[kCGWindowAlpha as String] as? Double ?? 1.0 + let memoryUsage = win[kCGWindowMemoryUsage as String] as? Int ?? 0 + + var bounds: [String: Any] = [:] + if let boundsDict = win[kCGWindowBounds as String] as? [String: Any] { + bounds = [ + "x": boundsDict["X"] as? Double ?? 0.0, + "y": boundsDict["Y"] as? Double ?? 0.0, + "width": boundsDict["Width"] as? Double ?? 0.0, + "height": boundsDict["Height"] as? Double ?? 0.0 + ] + } + + let result: [String: Any] = [ + "windowId": windowId, + "title": title, + "bounds": bounds, + "ownerName": ownerName, + "ownerPid": ownerPid, + "layer": layer, + "isOnScreen": isOnScreen, + "alpha": alpha, + "memoryUsage": memoryUsage + ] + return .ok(result) + } + + return .fail("Window not found: \(windowId)") +} + +// MARK: - Screenshot Commands + +func handleScreenshotWindow(_ params: [String: AnyCodable]?) -> CommandResponse { + // Check Screen Recording permission first + guard CGPreflightScreenCaptureAccess() else { + return .fail("Screen Recording permission not granted. " + + "Go to System Settings → Privacy & Security → Screen Recording and enable this app. " + + "You may need to add the terminal or shell that runs mac-agent.") + } + + // Parse windowId (handle both Int and Double from JSON) + let windowIdValue = params?["windowId"]?.value + let windowId: UInt32 + + if let intVal = windowIdValue as? Int { + windowId = UInt32(intVal) + } else if let doubleVal = windowIdValue as? Double { + windowId = UInt32(doubleVal) + } else { + return .fail("screenshotWindow requires 'windowId' parameter (number)") + } + + // Parse optional parameters + let format = (params?["format"]?.value as? String) ?? "jpeg" + let quality: Double + if let q = params?["quality"]?.value as? Double { + quality = q + } else { + quality = 0.8 + } + let retina = (params?["retina"]?.value as? Bool) ?? false + + guard format == "jpeg" || format == "png" else { + return .fail("Unsupported format '\(format)'. Use 'jpeg' or 'png'.") + } + + debug("screenshotWindow: windowId=\(windowId) format=\(format) quality=\(quality) retina=\(retina)") + + // Get available windows via SCShareableContent + let semaphore = DispatchSemaphore(value: 0) + var scContent: SCShareableContent? + var scError: Error? + + Task { + do { + scContent = try await SCShareableContent.current + } catch { + scError = error + } + semaphore.signal() + } + semaphore.wait() + + if let error = scError { + return .fail("Failed to get shareable content: \(error.localizedDescription)") + } + + guard let content = scContent else { + return .fail("SCShareableContent returned nil") + } + + // Find the window matching windowId + guard let targetWindow = content.windows.first(where: { $0.windowID == windowId }) else { + debug("screenshotWindow: Window not found. Available window IDs: \(content.windows.prefix(20).map { $0.windowID })") + return .fail("Window not found with ID \(windowId). Use 'listWindows' to get valid window IDs.") + } + + debug("screenshotWindow: Found window '\(targetWindow.title ?? "untitled")' (\(targetWindow.frame.width)x\(targetWindow.frame.height))") + + // Configure capture + let config = SCStreamConfiguration() + config.captureResolution = retina ? .best : .nominal + // Set dimensions to match the window frame + config.width = Int(targetWindow.frame.width) + config.height = Int(targetWindow.frame.height) + + // Capture the image + let captureSemaphore = DispatchSemaphore(value: 0) + var capturedImage: CGImage? + var captureError: Error? + + let captureStart = CFAbsoluteTimeGetCurrent() + + Task { + do { + capturedImage = try await SCScreenshotManager.captureImage( + contentFilter: SCContentFilter(desktopIndependentWindow: targetWindow), + configuration: config + ) + } catch { + captureError = error + } + captureSemaphore.signal() + } + captureSemaphore.wait() + + let captureDuration = CFAbsoluteTimeGetCurrent() - captureStart + debug("screenshotWindow: Capture took \(String(format: "%.3f", captureDuration))s") + + if let error = captureError { + return .fail("Screenshot capture failed: \(error.localizedDescription)") + } + + guard let image = capturedImage else { + return .fail("Screenshot capture returned nil image for window \(windowId)") + } + + let imageWidth = image.width + let imageHeight = image.height + debug("screenshotWindow: Captured image \(imageWidth)x\(imageHeight)") + + // Encode to JPEG or PNG using CGImageDestination + let imageData = NSMutableData() + let uti = (format == "png") ? UTType.png.identifier as CFString : UTType.jpeg.identifier as CFString + + guard let destination = CGImageDestinationCreateWithData(imageData as CFMutableData, uti, 1, nil) else { + return .fail("Failed to create image destination for encoding") + } + + var options: [CFString: Any] = [:] + if format == "jpeg" { + options[kCGImageDestinationLossyCompressionQuality] = quality + } + + CGImageDestinationAddImage(destination, image, options as CFDictionary) + + guard CGImageDestinationFinalize(destination) else { + return .fail("Failed to encode image to \(format)") + } + + // Base64 encode + let base64String = (imageData as Data).base64EncodedString() + debug("screenshotWindow: Encoded \(format) data size: \(base64String.count) chars (\(imageData.length) bytes raw)") + + return .ok([ + "imageData": base64String, + "format": format, + "width": imageWidth, + "height": imageHeight + ] as [String: Any]) +} + +// MARK: - Command Dispatch + +func dispatch(_ request: CommandRequest) -> CommandResponse { + debug("Dispatching command: \(request.command)") + + switch request.command { + case "ping": + return handlePing() + case "checkPermissions": + return handleCheckPermissions() + case "listApps": + return handleListApps(request.params) + case "launchApp": + return handleLaunchApp(request.params) + case "activateApp": + return handleActivateApp(request.params) + case "quitApp": + return handleQuitApp(request.params) + case "listWindows": + return handleListWindows(request.params) + case "getWindowInfo": + return handleGetWindowInfo(request.params) + case "screenshotWindow": + return handleScreenshotWindow(request.params) + case "findElements": + return handleFindElements(request.params) + case "getTree": + return handleGetTree(request.params) + case "clickElement": + return handleClickElement(request.params) + case "typeText": + return handleTypeText(request.params) + case "readAttribute": + return handleReadAttribute(request.params) + case "getFocusedElement": + return handleGetFocusedElement(request.params) + default: + return .fail("Unknown command: \(request.command)") + } +} + +// MARK: - Main Entry Point + +func main() { + // Initialize NSApplication — required for ScreenCaptureKit's WindowServer connection. + // Must happen before any SCShareableContent or SCScreenshotManager calls. + // Verified to not break JSON stdin/stdout protocol. + let _ = NSApplication.shared + + // Read all of stdin + let inputData = FileHandle.standardInput.readDataToEndOfFile() + + guard !inputData.isEmpty else { + let response = CommandResponse.fail("No input received on stdin") + writeResponse(response) + return + } + + // Parse the command request + let decoder = JSONDecoder() + let request: CommandRequest + do { + request = try decoder.decode(CommandRequest.self, from: inputData) + } catch { + let response = CommandResponse.fail("Invalid JSON input: \(error.localizedDescription)") + writeResponse(response) + return + } + + // Dispatch and respond + let response = dispatch(request) + writeResponse(response) +} + +func writeResponse(_ response: CommandResponse) { + let encoder = JSONEncoder() + encoder.outputFormatting = [.sortedKeys] + do { + let data = try encoder.encode(response) + FileHandle.standardOutput.write(data) + FileHandle.standardOutput.write(Data("\n".utf8)) + } catch { + // Last-resort fallback — write error JSON manually + let fallback = #"{"success":false,"error":"Failed to encode response: \#(error.localizedDescription)"}"# + FileHandle.standardOutput.write(Data(fallback.utf8)) + FileHandle.standardOutput.write(Data("\n".utf8)) + } +} + +main()