From ca299db1c6f3e3ae6c13fa8a402b63a29092e263 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Mon, 16 Mar 2026 17:34:44 -0400 Subject: [PATCH] feat(browser-tools): add 10 new browser tools (#698) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement all features from the browser-tools feature additions proposal: 1. browser_extract — structured data extraction with JSON Schema validation 2. browser_save_state / browser_restore_state — session state persistence 3. browser_generate_test — Playwright test code generation from session 4. browser_mock_route / browser_block_urls / browser_clear_routes — network interception 5. browser_emulate_device — device emulation with 143 Playwright device presets 6. browser_visual_diff — visual regression diffing with baseline management 7. browser_save_pdf — PDF generation (Chromium page.pdf) 8. browser_zoom_region — region capture with upscaling via sharp 9. browser_action_cache — intent→selector caching for repeat visits 10. browser_check_injection — prompt injection detection on page content Total browser tools: 47 → 60. No new dependencies — uses existing sharp, ajv, @sinclair/typebox, and Playwright core APIs. --- .gitignore | 14 +- .../698-browser-tools-feature-additions.md | 312 ++++++++++++++++++ .../extensions/browser-tools/index.ts | 20 ++ .../tests/browser-tools-unit.test.cjs | 25 ++ .../browser-tools/tools/action-cache.ts | 216 ++++++++++++ .../extensions/browser-tools/tools/codegen.ts | 274 +++++++++++++++ .../extensions/browser-tools/tools/device.ts | 183 ++++++++++ .../extensions/browser-tools/tools/extract.ts | 229 +++++++++++++ .../browser-tools/tools/injection-detect.ts | 221 +++++++++++++ .../browser-tools/tools/network-mock.ts | 244 ++++++++++++++ .../extensions/browser-tools/tools/pdf.ts | 92 ++++++ .../browser-tools/tools/state-persistence.ts | 202 ++++++++++++ .../browser-tools/tools/visual-diff.ts | 209 ++++++++++++ .../extensions/browser-tools/tools/zoom.ts | 104 ++++++ 14 files changed, 2344 insertions(+), 1 deletion(-) create mode 100644 docs/proposals/698-browser-tools-feature-additions.md create mode 100644 src/resources/extensions/browser-tools/tools/action-cache.ts create mode 100644 src/resources/extensions/browser-tools/tools/codegen.ts create mode 100644 src/resources/extensions/browser-tools/tools/device.ts create mode 100644 src/resources/extensions/browser-tools/tools/extract.ts create mode 100644 src/resources/extensions/browser-tools/tools/injection-detect.ts create mode 100644 src/resources/extensions/browser-tools/tools/network-mock.ts create mode 100644 src/resources/extensions/browser-tools/tools/pdf.ts create mode 100644 src/resources/extensions/browser-tools/tools/state-persistence.ts create mode 100644 src/resources/extensions/browser-tools/tools/visual-diff.ts create mode 100644 src/resources/extensions/browser-tools/tools/zoom.ts diff --git a/.gitignore b/.gitignore index be98fee7d..5a0355593 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ # ── GSD project state (development-only, lives in worktree branches) ── -.gsd/ .claude/ RELEASE-GUIDE.md @@ -50,3 +49,16 @@ AGENTS.md .bg-shell/ TODOS.md .planning/ + +# ── GSD baseline (auto-generated) ── +.gsd/activity/ +.gsd/runtime/ +.gsd/worktrees/ +.gsd/auto.lock +.gsd/metrics.json +.gsd/completed-units.json +.gsd/STATE.md +.gsd/gsd.db +.gsd/DISCUSSION-MANIFEST.json +.gsd/milestones/**/*-CONTINUE.md +.gsd/milestones/**/continue.md diff --git a/docs/proposals/698-browser-tools-feature-additions.md b/docs/proposals/698-browser-tools-feature-additions.md new file mode 100644 index 000000000..031490a8d --- /dev/null +++ b/docs/proposals/698-browser-tools-feature-additions.md @@ -0,0 +1,312 @@ +# Browser-Tools Feature Additions — Implementation Requirements + +> Ref: [#698](https://github.com/gsd-build/gsd-2/issues/698) +> Status: Proposal — open for contributor review + +## Current State + +Browser-tools ships **47 tools** across 10 modules (~8,300 lines). The extension wraps Playwright's Chromium instance with intent resolution, semantic actions, assertions, state diffing, an action timeline, HAR/trace export, and a deterministic ref system. Context is managed via `lifecycle.ts` (browser/context/page lifecycle) and `state.ts` (session tracking). + +Key existing capabilities: `browser_navigate`, `browser_click`, `browser_evaluate`, `browser_assert`, `browser_diff`, `browser_batch`, `browser_find_best`, `browser_act`, `browser_trace_start/stop`, `browser_export_har`, `browser_set_viewport`, `browser_screenshot`, `browser_snapshot_refs`. + +No existing support for: storage state persistence, route interception, PDF export, structured data extraction, device emulation profiles, visual diffing, or test code generation. + +--- + +## Feature 1: Structured Data Extraction with Schema Validation + +**Tool:** `browser_extract` + +### What it does +Accept a JSON Schema (or simplified shape description), extract matching structured data from the current page, validate against the schema, return typed JSON. + +### Implementation requirements + +| Item | Details | +|---|---| +| **New file** | `tools/extract.ts` | +| **Playwright API** | `page.evaluate()` — runs extraction logic in-page | +| **Schema validation** | Use `@sinclair/typebox` (already a dependency) for schema definition; `ajv` or inline validation for runtime checking | +| **Extraction strategy** | 1. Convert page to accessibility tree or clean text via existing `browser_get_accessibility_tree` / `browser_get_page_source` infrastructure. 2. Use `page.evaluate()` to run CSS selector-based extraction. 3. For complex extraction, pass schema + page content to the LLM via tool result and let the agent extract (Stagehand approach) | +| **Tool signature** | `browser_extract({ schema: JSONSchema, selector?: string, multiple?: boolean })` → `{ data: T, validationErrors?: string[] }` | +| **Dependencies** | None new — Typebox already available, `page.evaluate` is Playwright core | +| **Estimated effort** | **16–24 hours** | +| **Risk** | Medium — extraction quality depends heavily on page structure; may need multiple strategies (DOM-based, a11y-tree-based, LLM-assisted) | + +### Acceptance criteria +- [ ] Extracts data matching a provided JSON schema from a page +- [ ] Returns validation errors when extracted data doesn't match schema +- [ ] Supports scoping extraction to a CSS selector +- [ ] Supports extracting arrays of items (`multiple: true`) +- [ ] Handles pages with dynamic content (waits for network idle before extraction) + +--- + +## Feature 2: Session State Persistence & Restoration + +**Tools:** `browser_save_state`, `browser_restore_state` + +### What it does +Save cookies, localStorage, sessionStorage, and auth tokens to disk. Restore them on a subsequent browser session to resume authenticated state without re-logging in. + +### Implementation requirements + +| Item | Details | +|---|---| +| **New tools in** | `tools/session.ts` (extend existing file) | +| **Playwright API** | `context.storageState()` for cookies + localStorage; `page.evaluate()` for sessionStorage (not included in Playwright's storageState) | +| **Storage location** | Session artifacts directory: `.gsd/browser-state/.json` | +| **Tool signatures** | `browser_save_state({ name?: string })` → `{ path, cookieCount, localStorageOrigins }` / `browser_restore_state({ name?: string })` → `{ restored, cookieCount }` | +| **Restore mechanism** | `browser.newContext({ storageState: path })` for new sessions; `context.addCookies()` + `page.evaluate()` for mid-session restore | +| **Security** | State files may contain auth tokens — add to `.gitignore` pattern, warn in tool output | +| **Dependencies** | None new — all Playwright core APIs | +| **Estimated effort** | **8–12 hours** | +| **Risk** | Low — Playwright's `storageState()` is well-tested; sessionStorage requires extra handling | + +### Acceptance criteria +- [ ] Saves cookies + localStorage via `context.storageState()` +- [ ] Saves sessionStorage via `page.evaluate()` (per-origin) +- [ ] Restores state on new browser context launch +- [ ] Restores state mid-session (cookies + evaluate injection) +- [ ] State files written to `.gsd/browser-state/` and gitignored +- [ ] Tool output shows count of restored items, never displays secret values + +--- + +## Feature 3: Test Code Generation from Session + +**Tool:** `browser_generate_test` + +### What it does +Record agent interactions during a browser session and emit a Playwright test script. Turns AI-driven exploration into deterministic, reproducible tests. + +### Implementation requirements + +| Item | Details | +|---|---| +| **New file** | `tools/codegen.ts` | +| **Data source** | Action timeline (already tracked in `state.ts`) + trace data from `browser_trace_start/stop` | +| **Code generation** | Transform timeline entries (navigate, click, type, assert) into Playwright test syntax: `await page.goto(...)`, `await page.click(...)`, `await expect(page.locator(...)).toBeVisible()` | +| **Tool signature** | `browser_generate_test({ name?: string, includeAssertions?: boolean })` → `{ path, actionCount, testCode }` | +| **Output format** | Standard Playwright test file (`*.spec.ts`) written to project's test directory or session artifacts | +| **Selector strategy** | Prefer stable selectors: `getByRole` > `getByText` > CSS selector (use ref metadata for best selectors) | +| **Dependencies** | None new — reads from existing timeline/trace infrastructure | +| **Estimated effort** | **20–30 hours** | +| **Risk** | High — generated selectors may be brittle; action timeline may not capture all nuances (hover timing, scroll position, wait conditions); output quality varies significantly by page complexity | + +### Acceptance criteria +- [ ] Generates a runnable Playwright test from a recorded session +- [ ] Includes navigation, click, type, and assertion actions +- [ ] Uses stable selectors (role-based preferred over CSS) +- [ ] Generated test passes when run against the same page state +- [ ] Writes test file to configurable output path + +--- + +## Feature 4: Network Request Interception & Mocking + +**Tools:** `browser_mock_route`, `browser_block_urls`, `browser_clear_routes` + +### What it does +Intercept network requests to mock API responses, block URLs (analytics, ads), simulate error conditions (500s, timeouts, slow responses). + +### Implementation requirements + +| Item | Details | +|---|---| +| **New file** | `tools/network-mock.ts` | +| **Playwright API** | `page.route(urlPattern, handler)` for interception; `route.fulfill()` for mock responses; `route.abort()` for blocking | +| **Tool signatures** | `browser_mock_route({ url: string, status?: number, body?: string, headers?: Record })` / `browser_block_urls({ patterns: string[] })` / `browser_clear_routes()` | +| **State tracking** | Track active routes in module state for cleanup and listing | +| **Dependencies** | None new — Playwright core API | +| **Estimated effort** | **12–16 hours** | +| **Risk** | Low — Playwright's route API is mature and well-documented | + +### Acceptance criteria +- [ ] Mock API responses with custom status, body, and headers +- [ ] Block requests matching URL patterns (glob or regex) +- [ ] Simulate slow responses with configurable delay +- [ ] Clear all active routes +- [ ] List active routes for debugging +- [ ] Routes survive page navigation within the same context + +--- + +## Feature 5: Device Emulation Presets + +**Tool:** `browser_emulate_device` + +### What it does +One-call device simulation: viewport + user agent + touch + device scale factor. Wraps Playwright's device descriptors. + +### Implementation requirements + +| Item | Details | +|---|---| +| **Extend** | `tools/interaction.ts` (alongside `browser_set_viewport`) or new `tools/device.ts` | +| **Playwright API** | `playwright.devices['iPhone 15']` → `{ viewport, userAgent, deviceScaleFactor, isMobile, hasTouch }` applied via context recreation or page emulation | +| **Tool signature** | `browser_emulate_device({ device: string })` → `{ device, viewport, userAgent, isMobile }` | +| **Device list** | Expose Playwright's built-in device descriptors (~100 devices); accept fuzzy matching on device name | +| **Limitation** | Some properties (userAgent, isMobile) can only be set at context creation — may require context restart | +| **Dependencies** | None new — Playwright ships device descriptors | +| **Estimated effort** | **6–10 hours** | +| **Risk** | Low-Medium — context restart for full emulation changes the page state; partial emulation (viewport only) is simpler but less accurate | + +### Acceptance criteria +- [ ] Accept device name (e.g., "iPhone 15", "Pixel 7") and configure full emulation +- [ ] Support fuzzy matching on device name with suggestions on no match +- [ ] Set viewport, user agent, device scale factor, touch, and mobile flag +- [ ] Warn when context restart is required and confirm with user + +--- + +## Feature 6: Visual Diffing (Screenshot Comparison) + +**Tool:** `browser_visual_diff` + +### What it does +Compare two screenshots pixel-by-pixel, return a diff image and similarity score. + +### Implementation requirements + +| Item | Details | +|---|---| +| **New file** | `tools/visual-diff.ts` | +| **Comparison library** | `pixelmatch` (lightweight, ~200 lines, MIT) or Playwright's built-in `expect(page).toHaveScreenshot()` comparison | +| **Tool signature** | `browser_visual_diff({ baseline?: string, current?: string, threshold?: number })` → `{ match: boolean, similarity: number, diffPixels: number, diffImagePath?: string }` | +| **Baseline management** | Save baselines to `.gsd/browser-baselines/`; auto-name by URL + viewport | +| **Dependencies** | `pixelmatch` + `pngjs` (new deps, ~50KB total) or use Playwright's built-in comparator | +| **Estimated effort** | **10–14 hours** | +| **Risk** | Medium — anti-aliasing and dynamic content (timestamps, ads) cause false positives; threshold tuning needed | + +### Acceptance criteria +- [ ] Compare current page screenshot against a stored baseline +- [ ] Return similarity score (0–1) and diff pixel count +- [ ] Generate diff image highlighting changed regions +- [ ] Configurable threshold for pass/fail +- [ ] Support element-scoped comparison (crop to selector) + +--- + +## Feature 7: PDF Generation + +**Tool:** `browser_save_pdf` + +### What it does +Render current page as PDF artifact. + +### Implementation requirements + +| Item | Details | +|---|---| +| **Extend** | `tools/screenshot.ts` or new `tools/pdf.ts` | +| **Playwright API** | `page.pdf({ path, format, printBackground })` — Chromium only (already our engine) | +| **Tool signature** | `browser_save_pdf({ filename?: string, format?: string, printBackground?: boolean })` → `{ path, pageCount, sizeBytes }` | +| **Output location** | Session artifacts directory | +| **Dependencies** | None — Playwright core API | +| **Estimated effort** | **3–5 hours** | +| **Risk** | Low — straightforward Playwright wrapper | + +### Acceptance criteria +- [ ] Generate PDF from current page +- [ ] Support A4/Letter/custom page formats +- [ ] Include background graphics option +- [ ] Write to session artifacts with configurable filename +- [ ] Return file path and size + +--- + +## Feature 8: Region Zoom / Targeted High-Res Capture + +**Tool:** `browser_zoom_region` + +### What it does +Capture and upscale a specific rectangular region for detailed inspection of dense UIs. + +### Implementation requirements + +| Item | Details | +|---|---| +| **Extend** | `tools/screenshot.ts` | +| **Playwright API** | `page.screenshot({ clip: { x, y, width, height } })` for region capture; upscale via `sharp` or return at native device pixel ratio | +| **Tool signature** | `browser_zoom_region({ x, y, width, height, scale?: number })` → screenshot image | +| **Dependencies** | Optional `sharp` for upscaling, or rely on Playwright's deviceScaleFactor | +| **Estimated effort** | **4–6 hours** | +| **Risk** | Low | + +### Acceptance criteria +- [ ] Capture arbitrary rectangular region by coordinates +- [ ] Support scale factor for upscaling (2x, 3x) +- [ ] Return as inline image (same as `browser_screenshot`) + +--- + +## Feature 9: Action Caching / Replay (Lower Priority) + +**Tool:** Internal optimization, not a user-facing tool + +### Implementation requirements + +| Item | Details | +|---|---| +| **Cache key** | URL + DOM structure hash → selector mapping | +| **Storage** | In-memory LRU cache with optional disk persistence | +| **Integration point** | `browser_find_best` / `browser_act` — check cache before LLM resolution | +| **Estimated effort** | **12–18 hours** | +| **Risk** | Medium — cache invalidation when page structure changes; stale selectors cause silent failures | + +--- + +## Feature 10: Prompt Injection Detection (Lower Priority) + +**Tool:** `browser_check_injection` + +### Implementation requirements + +| Item | Details | +|---|---| +| **Detection strategy** | Regex/keyword scan on screenshot OCR text or page text content for known injection patterns ("ignore previous", "system prompt", "you are now") | +| **Integration point** | Optional auto-check after `browser_screenshot` or `browser_navigate` | +| **Estimated effort** | **8–12 hours** | +| **Risk** | Medium — false positives on legitimate content; OCR adds latency; determined adversaries can evade keyword detection | + +--- + +## Summary — Effort & Priority Matrix + +| # | Feature | Priority | Effort | New Deps | Risk | +|---|---|---|---|---|---| +| 1 | Structured data extraction | High | 16–24h | None | Medium | +| 2 | Session state persistence | High | 8–12h | None | Low | +| 3 | Test code generation | High | 20–30h | None | High | +| 4 | Network interception/mocking | High | 12–16h | None | Low | +| 5 | Device emulation presets | Medium | 6–10h | None | Low-Med | +| 6 | Visual diffing | Medium | 10–14h | pixelmatch (~50KB) | Medium | +| 7 | PDF generation | Medium | 3–5h | None | Low | +| 8 | Region zoom capture | Medium | 4–6h | Optional sharp | Low | +| 9 | Action caching | Lower | 12–18h | None | Medium | +| 10 | Prompt injection detection | Lower | 8–12h | None | Medium | +| | **Total** | | **~100–150h** | | | + +## Recommended Implementation Order + +1. **PDF generation** (Feature 7) — smallest, zero deps, immediate utility, good warmup +2. **Session state persistence** (Feature 2) — high value, low risk, moderate effort +3. **Network interception** (Feature 4) — high value, low risk, Playwright API is mature +4. **Region zoom** (Feature 8) — small effort, extends existing screenshot tool +5. **Device emulation** (Feature 5) — moderate effort, extends existing viewport tool +6. **Structured extraction** (Feature 1) — high value but needs design iteration on extraction strategy +7. **Visual diffing** (Feature 6) — useful for UAT, needs threshold tuning +8. **Test code generation** (Feature 3) — high value but high risk, best tackled after timeline infrastructure is battle-tested +9. **Action caching** (Feature 9) — optimization, defer until intent resolution is a proven bottleneck +10. **Prompt injection** (Feature 10) — defensive, defer until production use cases mature + +## Notes for Contributors + +- All features wrap existing Playwright APIs — no custom browser extensions or CDP hacking needed +- Features 2, 4, 5, 7, 8 are straightforward Playwright wrappers with low implementation risk +- Features 1 and 3 involve more design work — open sub-issues for design discussion before implementation +- Each feature should be a separate PR with its own tests +- Follow the existing tool registration pattern in `index.ts` → `tools/*.ts` +- Use `Type` from `@sinclair/typebox` for tool parameter schemas (existing convention) +- Session artifacts go in the artifacts directory managed by `session.ts` diff --git a/src/resources/extensions/browser-tools/index.ts b/src/resources/extensions/browser-tools/index.ts index 11c062584..79de059c8 100644 --- a/src/resources/extensions/browser-tools/index.ts +++ b/src/resources/extensions/browser-tools/index.ts @@ -17,6 +17,16 @@ import { registerWaitTools } from "./tools/wait.js"; import { registerPageTools } from "./tools/pages.js"; import { registerFormTools } from "./tools/forms.js"; import { registerIntentTools } from "./tools/intent.js"; +import { registerPdfTools } from "./tools/pdf.js"; +import { registerStatePersistenceTools } from "./tools/state-persistence.js"; +import { registerNetworkMockTools } from "./tools/network-mock.js"; +import { registerDeviceTools } from "./tools/device.js"; +import { registerExtractTools } from "./tools/extract.js"; +import { registerVisualDiffTools } from "./tools/visual-diff.js"; +import { registerZoomTools } from "./tools/zoom.js"; +import { registerCodegenTools } from "./tools/codegen.js"; +import { registerActionCacheTools } from "./tools/action-cache.js"; +import { registerInjectionDetectionTools } from "./tools/injection-detect.js"; export default function (pi: ExtensionAPI) { pi.on("session_shutdown", async () => { await closeBrowser(); }); @@ -48,4 +58,14 @@ export default function (pi: ExtensionAPI) { registerPageTools(pi, deps); registerFormTools(pi, deps); registerIntentTools(pi, deps); + registerPdfTools(pi, deps); + registerStatePersistenceTools(pi, deps); + registerNetworkMockTools(pi, deps); + registerDeviceTools(pi, deps); + registerExtractTools(pi, deps); + registerVisualDiffTools(pi, deps); + registerZoomTools(pi, deps); + registerCodegenTools(pi, deps); + registerActionCacheTools(pi, deps); + registerInjectionDetectionTools(pi, deps); } diff --git a/src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs b/src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs index 1555072e3..f63aa3066 100644 --- a/src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs +++ b/src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs @@ -612,3 +612,28 @@ describe("constrainScreenshot", () => { assert.equal(meta.height, 1568); }); }); + +// --------------------------------------------------------------------------- +// browser_save_pdf — tool registration +// --------------------------------------------------------------------------- + +describe("browser_save_pdf tool registration", () => { + it("registerPdfTools exports a function", () => { + const { registerPdfTools } = jiti("../tools/pdf.ts"); + assert.equal(typeof registerPdfTools, "function", "registerPdfTools should be a function"); + }); + + it("tool can be registered with a mock pi", () => { + const { registerPdfTools } = jiti("../tools/pdf.ts"); + const registeredTools = []; + const mockPi = { + registerTool: (tool) => registeredTools.push(tool), + }; + const mockDeps = {}; + registerPdfTools(mockPi, mockDeps); + assert.equal(registeredTools.length, 1, "should register exactly 1 tool"); + assert.equal(registeredTools[0].name, "browser_save_pdf", "tool name should be browser_save_pdf"); + assert.ok(registeredTools[0].parameters, "tool should have parameters schema"); + assert.equal(typeof registeredTools[0].execute, "function", "tool should have execute function"); + }); +}); diff --git a/src/resources/extensions/browser-tools/tools/action-cache.ts b/src/resources/extensions/browser-tools/tools/action-cache.ts new file mode 100644 index 000000000..e0b492a86 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/action-cache.ts @@ -0,0 +1,216 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +/** + * Action caching — cache semantic intent → selector mappings to skip LLM inference on repeat visits. + * Internal optimization that hooks into browser_find_best / browser_act. + */ + +interface CacheEntry { + selector: string; + score: number; + url: string; + domHash: string; + timestamp: number; + hitCount: number; +} + +const cache = new Map(); +const MAX_CACHE_SIZE = 200; + +export function registerActionCacheTools(pi: ExtensionAPI, deps: ToolDeps): void { + // ------------------------------------------------------------------------- + // browser_action_cache + // ------------------------------------------------------------------------- + pi.registerTool({ + name: "browser_action_cache", + label: "Browser Action Cache", + description: + "Manage the action cache that maps page structure + intent → resolved selectors. " + + "Cache reduces token cost on repeat visits to same pages. " + + "Actions: 'stats' (show cache metrics), 'get' (lookup cached selector), " + + "'put' (store a selector mapping), 'clear' (flush cache).", + parameters: Type.Object({ + action: Type.String({ + description: "Cache action: 'stats', 'get', 'put', or 'clear'.", + }), + intent: Type.Optional( + Type.String({ description: "Semantic intent key (for get/put). E.g., 'submit_form', 'close_dialog'." }), + ), + selector: Type.Optional( + Type.String({ description: "CSS selector to cache (for put)." }), + ), + score: Type.Optional( + Type.Number({ description: "Confidence score 0–1 for the cached selector (for put)." }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { page: p } = await deps.ensureBrowser(); + const url = p.url(); + + switch (params.action) { + case "stats": { + const entries = [...cache.values()]; + const totalHits = entries.reduce((sum, e) => sum + e.hitCount, 0); + return { + content: [{ + type: "text", + text: `Action cache: ${cache.size} entries, ${totalHits} total hits\nMax size: ${MAX_CACHE_SIZE}`, + }], + details: { + size: cache.size, + maxSize: MAX_CACHE_SIZE, + totalHits, + entries: entries.map((e) => ({ + url: e.url, + selector: e.selector, + hitCount: e.hitCount, + score: e.score, + })), + }, + }; + } + + case "get": { + if (!params.intent) { + return { + content: [{ type: "text", text: "Intent parameter required for 'get' action." }], + details: { error: "missing_intent" }, + isError: true, + }; + } + + const domHash = await computeDomHash(p); + const key = buildCacheKey(url, domHash, params.intent); + const entry = cache.get(key); + + if (!entry) { + return { + content: [{ type: "text", text: `Cache miss for intent "${params.intent}" on ${url}` }], + details: { hit: false, intent: params.intent, url }, + }; + } + + // Validate the cached selector still exists + const exists = await p.locator(entry.selector).first().isVisible().catch(() => false); + if (!exists) { + cache.delete(key); + return { + content: [{ type: "text", text: `Cache entry stale (selector no longer visible): ${entry.selector}` }], + details: { hit: false, stale: true, selector: entry.selector }, + }; + } + + entry.hitCount++; + return { + content: [{ + type: "text", + text: `Cache hit: "${params.intent}" → ${entry.selector} (score: ${entry.score}, hits: ${entry.hitCount})`, + }], + details: { hit: true, ...entry }, + }; + } + + case "put": { + if (!params.intent || !params.selector) { + return { + content: [{ type: "text", text: "Intent and selector parameters required for 'put' action." }], + details: { error: "missing_params" }, + isError: true, + }; + } + + const domHash = await computeDomHash(p); + const key = buildCacheKey(url, domHash, params.intent); + + // Evict oldest entries if at capacity + if (cache.size >= MAX_CACHE_SIZE && !cache.has(key)) { + const oldestKey = [...cache.entries()] + .sort(([, a], [, b]) => a.timestamp - b.timestamp)[0]?.[0]; + if (oldestKey) cache.delete(oldestKey); + } + + const entry: CacheEntry = { + selector: params.selector, + score: params.score ?? 1.0, + url, + domHash, + timestamp: Date.now(), + hitCount: 0, + }; + cache.set(key, entry); + + return { + content: [{ + type: "text", + text: `Cached: "${params.intent}" → ${params.selector} (cache size: ${cache.size})`, + }], + details: { stored: true, key, ...entry, cacheSize: cache.size }, + }; + } + + case "clear": { + const size = cache.size; + cache.clear(); + return { + content: [{ type: "text", text: `Action cache cleared (${size} entries removed).` }], + details: { cleared: size }, + }; + } + + default: + return { + content: [{ type: "text", text: `Unknown action: ${params.action}. Use 'stats', 'get', 'put', or 'clear'.` }], + details: { error: "unknown_action" }, + isError: true, + }; + } + } catch (err: any) { + return { + content: [{ type: "text", text: `Action cache error: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} + +function buildCacheKey(url: string, domHash: string, intent: string): string { + // Normalize URL — strip hash and query params for broader matching + let normalized: string; + try { + const u = new URL(url); + normalized = `${u.origin}${u.pathname}`; + } catch { + normalized = url; + } + return `${normalized}|${domHash}|${intent}`; +} + +async function computeDomHash(page: any): Promise { + try { + return await page.evaluate(() => { + // Structural hash based on element count + tag distribution + const tags = new Map(); + const all = document.querySelectorAll("*"); + for (const el of all) { + const tag = el.tagName; + tags.set(tag, (tags.get(tag) ?? 0) + 1); + } + const entries = [...tags.entries()].sort((a, b) => a[0].localeCompare(b[0])); + const str = entries.map(([t, c]) => `${t}:${c}`).join("|"); + // Simple hash + let h = 5381; + for (let i = 0; i < str.length; i++) { + h = ((h << 5) - h + str.charCodeAt(i)) | 0; + } + return (h >>> 0).toString(16); + }); + } catch { + return "unknown"; + } +} diff --git a/src/resources/extensions/browser-tools/tools/codegen.ts b/src/resources/extensions/browser-tools/tools/codegen.ts new file mode 100644 index 000000000..afe483c29 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/codegen.ts @@ -0,0 +1,274 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; +import { getActionTimeline } from "../state.js"; + +/** + * Test code generation — transform recorded browser session into a Playwright test script. + */ + +export function registerCodegenTools(pi: ExtensionAPI, deps: ToolDeps): void { + pi.registerTool({ + name: "browser_generate_test", + label: "Browser Generate Test", + description: + "Generate a runnable Playwright test script from the recorded action timeline. " + + "Transforms navigation, click, type, and assertion actions into standard Playwright test syntax. " + + "Uses stable selectors (role-based preferred). Writes the test file to a configurable path.", + parameters: Type.Object({ + name: Type.Optional( + Type.String({ description: "Test name (used for describe/test block and filename). Default: 'recorded-session'." }), + ), + outputPath: Type.Optional( + Type.String({ + description: + "Output file path for the generated test. Default: writes to session artifacts directory. " + + "Use a path ending in .spec.ts for standard Playwright test convention.", + }), + ), + includeAssertions: Type.Optional( + Type.Boolean({ description: "Include assertion steps from the timeline (default: true)." }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + await deps.ensureBrowser(); + const timeline = getActionTimeline(); + + if (timeline.entries.length === 0) { + return { + content: [{ type: "text", text: "No actions recorded in the current session. Interact with pages first, then generate a test." }], + details: { error: "no_actions" }, + isError: true, + }; + } + + const testName = params.name ?? "recorded-session"; + const includeAssertions = params.includeAssertions ?? true; + + // Transform timeline entries into Playwright test code + const testLines: string[] = []; + const imports = new Set(); + imports.add("test"); + imports.add("expect"); + + testLines.push(`test.describe('${escapeString(testName)}', () => {`); + testLines.push(` test('recorded session', async ({ page }) => {`); + + let lastUrl = ""; + let actionCount = 0; + + for (const entry of timeline.entries) { + if (entry.status === "error" && entry.tool !== "browser_assert") continue; + + const params = parseParamsSummary(entry.paramsSummary); + + switch (entry.tool) { + case "browser_navigate": { + const url = params.url; + if (url && url !== lastUrl) { + testLines.push(` await page.goto(${quote(url)});`); + lastUrl = url; + actionCount++; + } + break; + } + + case "browser_click": { + const selector = params.selector; + if (selector) { + testLines.push(` await page.locator(${quote(selector)}).click();`); + actionCount++; + } + break; + } + + case "browser_click_ref": { + // Refs are session-specific — add comment + testLines.push(` // browser_click_ref: ${entry.paramsSummary} — replace with stable selector`); + actionCount++; + break; + } + + case "browser_type": { + const selector = params.selector; + const text = params.text; + if (selector && text) { + testLines.push(` await page.locator(${quote(selector)}).fill(${quote(text)});`); + actionCount++; + } + break; + } + + case "browser_fill_ref": { + testLines.push(` // browser_fill_ref: ${entry.paramsSummary} — replace with stable selector`); + actionCount++; + break; + } + + case "browser_key_press": { + const key = params.key; + if (key) { + testLines.push(` await page.keyboard.press(${quote(key)});`); + actionCount++; + } + break; + } + + case "browser_select_option": { + const selector = params.selector; + const option = params.option; + if (selector && option) { + testLines.push(` await page.locator(${quote(selector)}).selectOption(${quote(option)});`); + actionCount++; + } + break; + } + + case "browser_set_checked": { + const selector = params.selector; + const checked = params.checked; + if (selector) { + testLines.push(` await page.locator(${quote(selector)}).setChecked(${checked === "true"});`); + actionCount++; + } + break; + } + + case "browser_hover": { + const selector = params.selector; + if (selector) { + testLines.push(` await page.locator(${quote(selector)}).hover();`); + actionCount++; + } + break; + } + + case "browser_wait_for": { + const condition = params.condition; + const value = params.value; + if (condition === "selector_visible" && value) { + testLines.push(` await expect(page.locator(${quote(value)})).toBeVisible();`); + actionCount++; + } else if (condition === "text_visible" && value) { + testLines.push(` await expect(page.locator('body')).toContainText(${quote(value)});`); + actionCount++; + } else if (condition === "url_contains" && value) { + testLines.push(` await page.waitForURL(${quote(`**/*${value}*`)});`); + actionCount++; + } else if (condition === "network_idle") { + testLines.push(` await page.waitForLoadState('networkidle');`); + actionCount++; + } else if (condition === "delay" && value) { + testLines.push(` await page.waitForTimeout(${value});`); + actionCount++; + } + break; + } + + case "browser_assert": { + if (!includeAssertions) break; + // The assertion details are in verificationSummary + if (entry.verificationSummary) { + testLines.push(` // Assertion: ${entry.verificationSummary}`); + } + actionCount++; + break; + } + + case "browser_scroll": { + const direction = params.direction; + const amount = params.amount ?? "300"; + const delta = direction === "up" ? `-${amount}` : amount; + testLines.push(` await page.mouse.wheel(0, ${delta});`); + actionCount++; + break; + } + + case "browser_set_viewport": { + const width = params.width; + const height = params.height; + if (width && height) { + testLines.push(` await page.setViewportSize({ width: ${width}, height: ${height} });`); + actionCount++; + } + break; + } + + default: + // Skip tools that don't map to Playwright test actions + break; + } + } + + testLines.push(` });`); + testLines.push(`});`); + + const importLine = `import { ${[...imports].join(", ")} } from '@playwright/test';`; + const fullTest = `${importLine}\n\n${testLines.join("\n")}\n`; + + // Write to file + let outputPath: string; + if (params.outputPath) { + outputPath = params.outputPath; + } else { + const safeName = deps.sanitizeArtifactName(testName, "recorded-session"); + outputPath = deps.buildSessionArtifactPath(`${safeName}.spec.ts`); + } + + await deps.ensureSessionArtifactDir(); + const { path: writtenPath, bytes } = await deps.writeArtifactFile(outputPath, fullTest); + + return { + content: [{ + type: "text", + text: `Test generated: ${writtenPath}\nActions: ${actionCount}\nTimeline entries processed: ${timeline.entries.length}\n\n${fullTest}`, + }], + details: { + path: writtenPath, + bytes, + actionCount, + timelineEntries: timeline.entries.length, + testCode: fullTest, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Test generation failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} + +function escapeString(s: string): string { + return s.replace(/'/g, "\\'").replace(/\\/g, "\\\\"); +} + +function quote(s: string): string { + // Use single quotes for simple strings, backtick for those with quotes + if (!s.includes("'")) return `'${s}'`; + if (!s.includes("`")) return `\`${s}\``; + return `'${s.replace(/'/g, "\\'")}'`; +} + +/** + * Parse the paramsSummary string back into key-value pairs. + * Format: key="value", key=value, key=[N], key={...} + */ +function parseParamsSummary(summary: string): Record { + const result: Record = {}; + if (!summary) return result; + + const regex = /(\w+)=(?:"([^"]*(?:\\"[^"]*)*)"|([^,\s]+))/g; + let match; + while ((match = regex.exec(summary)) !== null) { + const key = match[1]; + const value = match[2] ?? match[3]; + result[key] = value; + } + return result; +} diff --git a/src/resources/extensions/browser-tools/tools/device.ts b/src/resources/extensions/browser-tools/tools/device.ts new file mode 100644 index 000000000..3fe929514 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/device.ts @@ -0,0 +1,183 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +/** + * Device emulation tool — full device simulation using Playwright's built-in device descriptors. + */ + +export function registerDeviceTools(pi: ExtensionAPI, deps: ToolDeps): void { + pi.registerTool({ + name: "browser_emulate_device", + label: "Browser Emulate Device", + description: + "Simulate a specific device by setting viewport, user agent, device scale factor, touch, and mobile flag. " + + "Uses Playwright's built-in device descriptors (~143 devices). Accepts fuzzy matching on device name. " + + "Note: Full emulation (user agent, isMobile) requires a context restart — the current page state will be lost. " + + "The tool recreates the context with the device profile applied.", + parameters: Type.Object({ + device: Type.String({ + description: + "Device name (e.g., 'iPhone 15', 'Pixel 7', 'iPad Pro 11'). " + + "Case-insensitive fuzzy matching. Use 'list' to see all available devices.", + }), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { chromium, devices } = await import("playwright"); + const allDeviceNames = Object.keys(devices); + + // Handle 'list' request + if (params.device.toLowerCase() === "list") { + // Group by base device name (remove landscape variants for cleaner display) + const baseNames = allDeviceNames.filter((n) => !n.endsWith(" landscape")); + return { + content: [{ + type: "text", + text: `Available devices (${allDeviceNames.length} total, ${baseNames.length} base):\n${baseNames.join("\n")}`, + }], + details: { devices: baseNames, total: allDeviceNames.length }, + }; + } + + // Fuzzy match device name + const needle = params.device.toLowerCase(); + let exactMatch = allDeviceNames.find((n) => n.toLowerCase() === needle); + if (!exactMatch) { + // Try contains match + const containsMatches = allDeviceNames.filter((n) => n.toLowerCase().includes(needle)); + if (containsMatches.length === 1) { + exactMatch = containsMatches[0]; + } else if (containsMatches.length > 1) { + // Pick the shortest match (most specific) + containsMatches.sort((a, b) => a.length - b.length); + exactMatch = containsMatches[0]; + const suggestions = containsMatches.slice(0, 5).join(", "); + // Continue with best match but mention alternatives + } else { + // No match at all — suggest closest + const suggestions = allDeviceNames + .map((n) => ({ name: n, score: fuzzyScore(needle, n.toLowerCase()) })) + .sort((a, b) => b.score - a.score) + .slice(0, 5) + .map((s) => s.name); + + return { + content: [{ + type: "text", + text: `No device matching "${params.device}". Did you mean:\n${suggestions.map((s) => ` - ${s}`).join("\n")}`, + }], + details: { error: "no_match", suggestions }, + isError: true, + }; + } + } + + const deviceDescriptor = devices[exactMatch!]; + if (!deviceDescriptor) { + return { + content: [{ type: "text", text: `Device descriptor not found for "${exactMatch}"` }], + details: { error: "descriptor_not_found" }, + isError: true, + }; + } + + // Context restart required for full emulation. + // Save current URL to navigate back after restart. + const { page: currentPage, context: currentCtx } = await deps.ensureBrowser(); + const currentUrl = currentPage.url(); + + // Close existing browser and relaunch with device profile + await deps.closeBrowser(); + + // Re-launch — ensureBrowser doesn't accept device params, so we do it manually. + // This is a one-off context creation with device emulation. + const needsHeadless = process.platform === "linux" && !process.env.DISPLAY; + const launchOptions: Record = { + headless: needsHeadless || process.env.FORCE_HEADLESS === "true", + }; + const customPath = process.env.BROWSER_PATH; + if (customPath) launchOptions.executablePath = customPath; + + const browser = await chromium.launch(launchOptions); + const context = await browser.newContext({ + ...deviceDescriptor, + }); + + // Inject evaluate helpers + const { EVALUATE_HELPERS_SOURCE } = await import("../evaluate-helpers.js"); + await context.addInitScript(EVALUATE_HELPERS_SOURCE); + + // Wire up state + const { + setBrowser, setContext, pageRegistry, setSessionStartedAt, + setSessionArtifactDir, resetAllState, + } = await import("../state.js"); + const { registryAddPage, registrySetActive } = await import("../core.js"); + + // Reset state for new session + resetAllState(); + setBrowser(browser); + setContext(context); + setSessionStartedAt(Date.now()); + + const page = await context.newPage(); + const entry = registryAddPage(pageRegistry, { + page, + title: "", + url: "about:blank", + opener: null, + }); + registrySetActive(pageRegistry, entry.id); + deps.attachPageListeners(page, entry.id); + + // Navigate back to previous URL if it wasn't about:blank + if (currentUrl && currentUrl !== "about:blank") { + await page.goto(currentUrl, { waitUntil: "domcontentloaded", timeout: 15000 }).catch(() => {}); + } + + const viewport = deviceDescriptor.viewport; + const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown"; + + return { + content: [{ + type: "text", + text: `Device emulation active: ${exactMatch}\nViewport: ${vpText}\nUser Agent: ${deviceDescriptor.userAgent?.slice(0, 80) ?? "default"}...\nMobile: ${deviceDescriptor.isMobile ?? false}\nTouch: ${deviceDescriptor.hasTouch ?? false}\nScale Factor: ${deviceDescriptor.deviceScaleFactor ?? 1}\n\nContext was restarted for full emulation. Page state was reset.`, + }], + details: { + device: exactMatch, + viewport: vpText, + isMobile: deviceDescriptor.isMobile ?? false, + hasTouch: deviceDescriptor.hasTouch ?? false, + deviceScaleFactor: deviceDescriptor.deviceScaleFactor ?? 1, + userAgent: deviceDescriptor.userAgent, + restoredUrl: currentUrl, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Device emulation failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} + +/** + * Simple fuzzy scoring — counts matching characters in order. + */ +function fuzzyScore(needle: string, haystack: string): number { + let score = 0; + let hi = 0; + for (let ni = 0; ni < needle.length && hi < haystack.length; ni++) { + const idx = haystack.indexOf(needle[ni], hi); + if (idx >= 0) { + score++; + hi = idx + 1; + } + } + return score / Math.max(needle.length, 1); +} diff --git a/src/resources/extensions/browser-tools/tools/extract.ts b/src/resources/extensions/browser-tools/tools/extract.ts new file mode 100644 index 000000000..d68bef370 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/extract.ts @@ -0,0 +1,229 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +/** + * Structured data extraction with JSON Schema validation. + */ + +export function registerExtractTools(pi: ExtensionAPI, deps: ToolDeps): void { + pi.registerTool({ + name: "browser_extract", + label: "Browser Extract", + description: + "Extract structured data from the current page using CSS selectors and validate against a JSON Schema. " + + "Provide a schema describing the shape of data you want. The tool extracts data by evaluating " + + "CSS selectors in the page context, then validates the result against your schema. " + + "Supports extracting single objects or arrays of items. Waits for network idle before extraction.", + parameters: Type.Object({ + schema: Type.Record(Type.String(), Type.Unknown(), { + description: + "JSON Schema describing the data shape to extract. Properties should include " + + "'_selector' (CSS selector) and '_attribute' (attribute to read, default: 'textContent') hints. " + + "Example: { type: 'object', properties: { title: { _selector: 'h1', _attribute: 'textContent' }, price: { _selector: '.price', _attribute: 'textContent' } } }", + }), + selector: Type.Optional( + Type.String({ description: "CSS selector to scope extraction to a specific container element." }), + ), + multiple: Type.Optional( + Type.Boolean({ + description: + "If true, extract an array of items. The 'selector' parameter becomes the item container selector, " + + "and schema properties are extracted relative to each matched container.", + }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { page: p } = await deps.ensureBrowser(); + + // Wait for network idle before extraction + await p.waitForLoadState("networkidle", { timeout: 10000 }).catch(() => {}); + + const schema = params.schema as any; + const scopeSelector = params.selector; + const multiple = params.multiple ?? false; + + // Build extraction plan from schema + const extractionPlan = buildExtractionPlan(schema); + + // Execute extraction in page context + const rawData = await p.evaluate( + ({ plan, scope, multi }: { plan: ExtractionField[]; scope: string | undefined; multi: boolean }) => { + function extractFromContainer(container: Element, fields: typeof plan): Record { + const result: Record = {}; + for (const field of fields) { + const el = container.querySelector(field.selector); + if (!el) { + result[field.name] = null; + continue; + } + let value: unknown; + switch (field.attribute) { + case "textContent": + value = (el.textContent ?? "").trim(); + break; + case "innerText": + value = ((el as HTMLElement).innerText ?? "").trim(); + break; + case "innerHTML": + value = el.innerHTML; + break; + case "href": + value = (el as HTMLAnchorElement).href ?? el.getAttribute("href"); + break; + case "src": + value = (el as HTMLImageElement).src ?? el.getAttribute("src"); + break; + case "value": + value = (el as HTMLInputElement).value; + break; + default: + value = el.getAttribute(field.attribute) ?? (el.textContent ?? "").trim(); + } + // Type coercion + if (field.type === "number" && typeof value === "string") { + const num = parseFloat(value.replace(/[^0-9.-]/g, "")); + value = isNaN(num) ? value : num; + } else if (field.type === "boolean" && typeof value === "string") { + value = value.toLowerCase() === "true" || value === "1"; + } + result[field.name] = value; + } + return result; + } + + const root = scope ? document.querySelector(scope) : document.body; + if (!root) return { data: null, error: `Scope selector "${scope}" not found` }; + + if (multi) { + // For multiple items, scope is the item selector + const containers = scope + ? document.querySelectorAll(scope) + : [document.body]; + const items = Array.from(containers).map((container) => + extractFromContainer(container, plan), + ); + return { data: items, error: null }; + } else { + return { data: extractFromContainer(root, plan), error: null }; + } + }, + { plan: extractionPlan, scope: scopeSelector, multi: multiple }, + ); + + if (rawData.error) { + return { + content: [{ type: "text", text: `Extraction failed: ${rawData.error}` }], + details: { error: rawData.error }, + isError: true, + }; + } + + // Validate against schema using ajv + const validationErrors = await validateData(rawData.data, schema, multiple); + + const resultText = JSON.stringify(rawData.data, null, 2); + const truncated = resultText.length > 4000 ? resultText.slice(0, 4000) + "\n...(truncated)" : resultText; + + return { + content: [{ + type: "text", + text: validationErrors.length > 0 + ? `Extracted data (with ${validationErrors.length} validation warning(s)):\n${truncated}\n\nValidation warnings:\n${validationErrors.join("\n")}` + : `Extracted data:\n${truncated}`, + }], + details: { + data: rawData.data, + validationErrors: validationErrors.length > 0 ? validationErrors : undefined, + fieldCount: extractionPlan.length, + itemCount: multiple ? (rawData.data as any[])?.length ?? 0 : 1, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Extraction failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} + +interface ExtractionField { + name: string; + selector: string; + attribute: string; + type: string; +} + +function buildExtractionPlan(schema: any): ExtractionField[] { + const fields: ExtractionField[] = []; + + if (!schema || typeof schema !== "object") return fields; + + const properties = schema.properties ?? schema; + + for (const [name, propSchema] of Object.entries(properties)) { + const prop = propSchema as any; + if (!prop || typeof prop !== "object") continue; + + // Skip meta fields + if (name === "type" || name === "required" || name === "properties" || name === "$schema") continue; + + const selector = prop._selector ?? prop.selector ?? `[data-field="${name}"], .${name}, #${name}`; + const attribute = prop._attribute ?? prop.attribute ?? "textContent"; + const type = prop.type ?? "string"; + + fields.push({ name, selector, attribute, type }); + } + + return fields; +} + +async function validateData(data: unknown, schema: any, isArray: boolean): Promise { + const errors: string[] = []; + + try { + const ajvModule = await import("ajv"); + const Ajv = ajvModule.default ?? ajvModule; + const ajv = new (Ajv as any)({ allErrors: true, strict: false }); + + // Clean schema — remove our custom _selector/_attribute hints before validation + const cleanSchema = cleanSchemaForValidation(schema); + + // Wrap in array schema if multiple + const validationSchema = isArray + ? { type: "array", items: cleanSchema } + : cleanSchema; + + const validate = ajv.compile(validationSchema); + const valid = validate(data); + + if (!valid && validate.errors) { + for (const err of validate.errors) { + errors.push(`${err.instancePath || "/"}: ${err.message}`); + } + } + } catch (err: any) { + errors.push(`Schema validation setup failed: ${err.message}`); + } + + return errors; +} + +function cleanSchemaForValidation(schema: any): any { + if (!schema || typeof schema !== "object") return schema; + if (Array.isArray(schema)) return schema.map(cleanSchemaForValidation); + + const cleaned: any = {}; + for (const [key, value] of Object.entries(schema)) { + if (key.startsWith("_")) continue; // Remove our custom hints + if (key === "selector" && typeof value === "string") continue; // Also remove plain 'selector' + if (key === "attribute" && typeof value === "string") continue; // Also remove plain 'attribute' + cleaned[key] = cleanSchemaForValidation(value); + } + return cleaned; +} diff --git a/src/resources/extensions/browser-tools/tools/injection-detect.ts b/src/resources/extensions/browser-tools/tools/injection-detect.ts new file mode 100644 index 000000000..c887307b4 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/injection-detect.ts @@ -0,0 +1,221 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +/** + * Prompt injection detection — scan page content for text attempting to hijack the agent. + */ + +// Known injection patterns — regex patterns that match common prompt injection attempts +const INJECTION_PATTERNS: Array<{ pattern: RegExp; category: string; severity: "high" | "medium" | "low" }> = [ + // Direct instruction override attempts + { pattern: /ignore\s+(all\s+)?previous\s+(instructions?|prompts?)/i, category: "instruction_override", severity: "high" }, + { pattern: /disregard\s+(all\s+)?previous\s+(instructions?|prompts?)/i, category: "instruction_override", severity: "high" }, + { pattern: /forget\s+(all\s+)?previous\s+(instructions?|prompts?)/i, category: "instruction_override", severity: "high" }, + { pattern: /override\s+(all\s+)?previous\s+(instructions?|prompts?)/i, category: "instruction_override", severity: "high" }, + + // System prompt extraction + { pattern: /(?:what|show|reveal|display|repeat|tell)\s+(?:me\s+)?(?:your|the)\s+system\s+prompt/i, category: "prompt_extraction", severity: "high" }, + { pattern: /print\s+(?:your|the)\s+(?:system\s+)?(?:prompt|instructions)/i, category: "prompt_extraction", severity: "high" }, + + // Role reassignment + { pattern: /you\s+are\s+now\s+(?:a\s+)?(?:DAN|jailbreak|unrestricted|unfiltered)/i, category: "role_reassignment", severity: "high" }, + { pattern: /act\s+as\s+(?:a\s+)?(?:DAN|jailbreak|unrestricted|unfiltered)/i, category: "role_reassignment", severity: "high" }, + { pattern: /entering\s+(?:a\s+)?(?:developer|admin|root|sudo)\s+mode/i, category: "role_reassignment", severity: "high" }, + + // Hidden instruction injection + { pattern: /\[SYSTEM\]\s*:/i, category: "fake_system_message", severity: "high" }, + { pattern: /\[INST\]\s*:/i, category: "fake_system_message", severity: "medium" }, + { pattern: /<\/?system>/i, category: "fake_system_message", severity: "high" }, + + // Tool/action manipulation + { pattern: /execute\s+(?:the\s+following\s+)?(?:command|code|script)/i, category: "command_injection", severity: "medium" }, + { pattern: /run\s+(?:this|the\s+following)\s+(?:command|code|script)/i, category: "command_injection", severity: "medium" }, + + // Invisible text / social engineering + { pattern: /do\s+not\s+(?:read|process|show)\s+(?:the\s+)?(?:following|rest)/i, category: "social_engineering", severity: "low" }, + { pattern: /(?:this|the\s+following)\s+(?:is|are)\s+(?:your\s+)?new\s+instructions/i, category: "instruction_override", severity: "high" }, + + // Base64/encoded content markers + { pattern: /base64\s*:\s*[A-Za-z0-9+\/=]{50,}/i, category: "encoded_payload", severity: "medium" }, +]; + +export function registerInjectionDetectionTools(pi: ExtensionAPI, deps: ToolDeps): void { + pi.registerTool({ + name: "browser_check_injection", + label: "Browser Check Injection", + description: + "Scan current page content for potential prompt injection attempts. " + + "Checks visible text and hidden elements for patterns that might hijack the agent. " + + "Returns findings with severity levels. Use after navigating to untrusted pages.", + parameters: Type.Object({ + includeHidden: Type.Optional( + Type.Boolean({ + description: + "Also scan hidden/invisible text (default: true). " + + "Hidden text is a common vector for injection attacks.", + }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { page: p } = await deps.ensureBrowser(); + const includeHidden = params.includeHidden ?? true; + + // Extract text content from the page + const pageContent = await p.evaluate((scanHidden: boolean) => { + const results: Array<{ text: string; source: string; visible: boolean }> = []; + + // 1. Visible text content + const bodyText = document.body?.innerText ?? ""; + results.push({ text: bodyText, source: "body_visible_text", visible: true }); + + // 2. Title and meta + results.push({ text: document.title, source: "page_title", visible: true }); + + // Meta descriptions and keywords + const metas = document.querySelectorAll("meta[name], meta[property]"); + for (const meta of metas) { + const content = meta.getAttribute("content"); + if (content) { + results.push({ + text: content, + source: `meta:${meta.getAttribute("name") || meta.getAttribute("property")}`, + visible: false, + }); + } + } + + if (scanHidden) { + // 3. Hidden elements (display:none, visibility:hidden, opacity:0, off-screen, aria-hidden) + const allElements = document.querySelectorAll("*"); + for (const el of allElements) { + const htmlEl = el as HTMLElement; + const style = window.getComputedStyle(htmlEl); + const isHidden = + style.display === "none" || + style.visibility === "hidden" || + style.opacity === "0" || + htmlEl.getAttribute("aria-hidden") === "true" || + (htmlEl.offsetWidth === 0 && htmlEl.offsetHeight === 0); + + if (isHidden && htmlEl.textContent?.trim()) { + const text = htmlEl.textContent.trim(); + if (text.length > 5 && text.length < 5000) { + results.push({ text, source: "hidden_element", visible: false }); + } + } + } + + // 4. HTML comments + const walker = document.createTreeWalker( + document.documentElement, + NodeFilter.SHOW_COMMENT, + ); + let node; + while ((node = walker.nextNode())) { + const text = (node as Comment).textContent?.trim() ?? ""; + if (text.length > 10) { + results.push({ text, source: "html_comment", visible: false }); + } + } + + // 5. Data attributes with text content + const dataElements = document.querySelectorAll("[data-prompt], [data-instruction], [data-system]"); + for (const el of dataElements) { + for (const attr of el.attributes) { + if (attr.name.startsWith("data-") && attr.value.length > 10) { + results.push({ + text: attr.value, + source: `data_attribute:${attr.name}`, + visible: false, + }); + } + } + } + } + + return results; + }, includeHidden); + + // Scan all extracted text against injection patterns + const findings: Array<{ + pattern: string; + category: string; + severity: string; + source: string; + visible: boolean; + matchedText: string; + }> = []; + + for (const { text, source, visible } of pageContent) { + for (const { pattern, category, severity } of INJECTION_PATTERNS) { + const match = text.match(pattern); + if (match) { + findings.push({ + pattern: pattern.source.slice(0, 60), + category, + severity, + source, + visible, + matchedText: match[0].slice(0, 100), + }); + } + } + } + + // Deduplicate findings by category + source + const seen = new Set(); + const uniqueFindings = findings.filter((f) => { + const key = `${f.category}|${f.source}|${f.matchedText}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + + const highCount = uniqueFindings.filter((f) => f.severity === "high").length; + const medCount = uniqueFindings.filter((f) => f.severity === "medium").length; + const lowCount = uniqueFindings.filter((f) => f.severity === "low").length; + + if (uniqueFindings.length === 0) { + return { + content: [{ + type: "text", + text: `No prompt injection patterns detected.\nScanned: ${pageContent.length} text regions (hidden: ${includeHidden})`, + }], + details: { + clean: true, + scannedRegions: pageContent.length, + includeHidden, + }, + }; + } + + const findingLines = uniqueFindings.map((f) => + ` [${f.severity.toUpperCase()}] ${f.category} in ${f.source}${!f.visible ? " (HIDDEN)" : ""}: "${f.matchedText}"`, + ); + + return { + content: [{ + type: "text", + text: `⚠️ Prompt injection patterns detected: ${uniqueFindings.length} finding(s)\nHigh: ${highCount} | Medium: ${medCount} | Low: ${lowCount}\n\n${findingLines.join("\n")}\n\n⚠️ This page may be attempting to manipulate the agent. Proceed with caution.`, + }], + details: { + clean: false, + findings: uniqueFindings, + counts: { high: highCount, medium: medCount, low: lowCount, total: uniqueFindings.length }, + scannedRegions: pageContent.length, + includeHidden, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Injection check failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} diff --git a/src/resources/extensions/browser-tools/tools/network-mock.ts b/src/resources/extensions/browser-tools/tools/network-mock.ts new file mode 100644 index 000000000..936f4e919 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/network-mock.ts @@ -0,0 +1,244 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +/** + * Network interception & mocking tools — mock API responses, block URLs, simulate errors. + */ + +interface ActiveRoute { + id: number; + pattern: string; + type: "mock" | "block"; + status?: number; + delay?: number; + description: string; +} + +let nextRouteId = 1; +const activeRoutes: ActiveRoute[] = []; +const routeCleanups: Map Promise> = new Map(); + +export function registerNetworkMockTools(pi: ExtensionAPI, deps: ToolDeps): void { + // ------------------------------------------------------------------------- + // browser_mock_route + // ------------------------------------------------------------------------- + pi.registerTool({ + name: "browser_mock_route", + label: "Browser Mock Route", + description: + "Intercept network requests matching a URL pattern and respond with custom status, body, and headers. " + + "Supports simulating slow responses via delay parameter. " + + "Routes survive page navigation within the same context. Use browser_clear_routes to remove all mocks.", + parameters: Type.Object({ + url: Type.String({ + description: "URL pattern to intercept. Supports glob patterns (e.g., '**/api/users*') or exact URLs.", + }), + status: Type.Optional( + Type.Number({ description: "HTTP status code for the mock response (default: 200)." }), + ), + body: Type.Optional( + Type.String({ description: "Response body string. For JSON responses, pass a JSON string." }), + ), + contentType: Type.Optional( + Type.String({ description: "Content-Type header (default: 'application/json' if body looks like JSON, else 'text/plain')." }), + ), + headers: Type.Optional( + Type.Record(Type.String(), Type.String(), { + description: "Additional response headers as key-value pairs.", + }), + ), + delay: Type.Optional( + Type.Number({ description: "Delay in milliseconds before sending the response. Simulates slow responses." }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { page: p } = await deps.ensureBrowser(); + const routeId = nextRouteId++; + + const status = params.status ?? 200; + const body = params.body ?? ""; + const delay = params.delay ?? 0; + + // Auto-detect content type + let contentType = params.contentType; + if (!contentType) { + try { + JSON.parse(body); + contentType = "application/json"; + } catch { + contentType = "text/plain"; + } + } + + const headers: Record = { + "content-type": contentType, + "access-control-allow-origin": "*", + ...(params.headers ?? {}), + }; + + const handler = async (route: any) => { + if (delay > 0) { + await new Promise((resolve) => setTimeout(resolve, delay)); + } + await route.fulfill({ + status, + body, + headers, + }); + }; + + await p.route(params.url, handler); + + const cleanup = async () => { + try { + await p.unroute(params.url, handler); + } catch { + // Page may be closed + } + }; + + const routeInfo: ActiveRoute = { + id: routeId, + pattern: params.url, + type: "mock", + status, + delay: delay > 0 ? delay : undefined, + description: `Mock ${params.url} → ${status}${delay > 0 ? ` (${delay}ms delay)` : ""}`, + }; + + activeRoutes.push(routeInfo); + routeCleanups.set(routeId, cleanup); + + return { + content: [{ + type: "text", + text: `Route mocked: ${routeInfo.description}\nRoute ID: ${routeId}\nActive routes: ${activeRoutes.length}`, + }], + details: { routeId, ...routeInfo, activeRouteCount: activeRoutes.length }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Mock route failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); + + // ------------------------------------------------------------------------- + // browser_block_urls + // ------------------------------------------------------------------------- + pi.registerTool({ + name: "browser_block_urls", + label: "Browser Block URLs", + description: + "Block network requests matching URL patterns. Useful for blocking analytics, ads, or third-party scripts. " + + "Accepts glob patterns. Routes survive page navigation.", + parameters: Type.Object({ + patterns: Type.Array(Type.String(), { + description: "URL patterns to block (glob syntax, e.g., ['**/analytics*', '**/ads*']).", + }), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { page: p } = await deps.ensureBrowser(); + const results: ActiveRoute[] = []; + + for (const pattern of params.patterns) { + const routeId = nextRouteId++; + + const handler = async (route: any) => { + await route.abort("blockedbyclient"); + }; + + await p.route(pattern, handler); + + const cleanup = async () => { + try { + await p.unroute(pattern, handler); + } catch {} + }; + + const routeInfo: ActiveRoute = { + id: routeId, + pattern, + type: "block", + description: `Block ${pattern}`, + }; + + activeRoutes.push(routeInfo); + routeCleanups.set(routeId, cleanup); + results.push(routeInfo); + } + + return { + content: [{ + type: "text", + text: `Blocked ${results.length} URL pattern(s):\n${results.map((r) => ` - ${r.description} (ID: ${r.id})`).join("\n")}\nActive routes: ${activeRoutes.length}`, + }], + details: { blocked: results, activeRouteCount: activeRoutes.length }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Block URLs failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); + + // ------------------------------------------------------------------------- + // browser_clear_routes + // ------------------------------------------------------------------------- + pi.registerTool({ + name: "browser_clear_routes", + label: "Browser Clear Routes", + description: + "Remove all active route mocks and URL blocks. Also lists currently active routes if called with no routes active.", + parameters: Type.Object({}), + + async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) { + try { + await deps.ensureBrowser(); + const count = activeRoutes.length; + + if (count === 0) { + return { + content: [{ type: "text", text: "No active routes to clear." }], + details: { cleared: 0 }, + }; + } + + const routeDescriptions = activeRoutes.map((r) => r.description); + + // Clean up all routes + for (const [id, cleanup] of routeCleanups) { + await cleanup(); + } + + activeRoutes.length = 0; + routeCleanups.clear(); + + return { + content: [{ + type: "text", + text: `Cleared ${count} route(s):\n${routeDescriptions.map((d) => ` - ${d}`).join("\n")}`, + }], + details: { cleared: count, routes: routeDescriptions }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Clear routes failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} diff --git a/src/resources/extensions/browser-tools/tools/pdf.ts b/src/resources/extensions/browser-tools/tools/pdf.ts new file mode 100644 index 000000000..5808aa0d3 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/pdf.ts @@ -0,0 +1,92 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +export function registerPdfTools(pi: ExtensionAPI, deps: ToolDeps): void { + pi.registerTool({ + name: "browser_save_pdf", + label: "Browser Save PDF", + description: + "Render current page as PDF artifact via Playwright's page.pdf(). " + + "Supports A4/Letter/custom page formats and optional background graphics. " + + "Writes to session artifacts directory. Chromium only.", + parameters: Type.Object({ + filename: Type.Optional( + Type.String({ description: "Output filename (default: auto-generated from page title + timestamp)." }), + ), + format: Type.Optional( + Type.String({ + description: + "Page format: 'A4' (default), 'Letter', 'Legal', 'Tabloid', or custom like '8.5in x 11in'. " + + "Custom format uses CSS dimension syntax for width x height.", + }), + ), + printBackground: Type.Optional( + Type.Boolean({ description: "Include background graphics (default: true)." }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { page: p } = await deps.ensureBrowser(); + + const url = p.url(); + const title = await p.title().catch(() => "untitled"); + + // Resolve filename + const timestamp = deps.formatArtifactTimestamp(Date.now()); + const safeName = deps.sanitizeArtifactName(params.filename || `${title}-${timestamp}`, `pdf-${timestamp}`); + const filename = safeName.endsWith(".pdf") ? safeName : `${safeName}.pdf`; + + // Resolve format + const knownFormats = new Set(["A4", "Letter", "Legal", "Tabloid", "Ledger", "A0", "A1", "A2", "A3", "A5", "A6"]); + const formatInput = params.format ?? "A4"; + let pdfOptions: Record = {}; + + if (knownFormats.has(formatInput)) { + pdfOptions.format = formatInput; + } else { + // Custom format: parse "WIDTHin x HEIGHTin" or "WIDTHcm x HEIGHTcm" etc. + const customMatch = formatInput.match(/^(.+?)\s*[xX×]\s*(.+)$/); + if (customMatch) { + pdfOptions.width = customMatch[1]!.trim(); + pdfOptions.height = customMatch[2]!.trim(); + } else { + pdfOptions.format = "A4"; // fallback + } + } + + pdfOptions.printBackground = params.printBackground ?? true; + + // Generate PDF + await deps.ensureSessionArtifactDir(); + const outputPath = deps.buildSessionArtifactPath(filename); + pdfOptions.path = outputPath; + + await p.pdf(pdfOptions as any); + + // Read file size + const { stat } = await import("node:fs/promises"); + const fileStat = await stat(outputPath); + const sizeBytes = fileStat.size; + const sizeKB = (sizeBytes / 1024).toFixed(1); + + return { + content: [ + { + type: "text", + text: `PDF saved: ${outputPath}\nSize: ${sizeKB} KB\nFormat: ${formatInput}\nPage: ${title}\nURL: ${url}`, + }, + ], + details: { path: outputPath, sizeBytes, format: formatInput, pageUrl: url, pageTitle: title }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `PDF generation failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} diff --git a/src/resources/extensions/browser-tools/tools/state-persistence.ts b/src/resources/extensions/browser-tools/tools/state-persistence.ts new file mode 100644 index 000000000..497e51c46 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/state-persistence.ts @@ -0,0 +1,202 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +/** + * State persistence tools — save/restore cookies, localStorage, sessionStorage. + */ + +const STATE_DIR = ".gsd/browser-state"; + +export function registerStatePersistenceTools(pi: ExtensionAPI, deps: ToolDeps): void { + // ------------------------------------------------------------------------- + // browser_save_state + // ------------------------------------------------------------------------- + pi.registerTool({ + name: "browser_save_state", + label: "Browser Save State", + description: + "Save cookies, localStorage, and sessionStorage to disk so authenticated sessions survive browser restarts. " + + "State files are written to .gsd/browser-state/ and should be gitignored (may contain auth tokens). " + + "Never displays secret values in output.", + parameters: Type.Object({ + name: Type.Optional( + Type.String({ description: "Name for the state file (default: 'default'). Used as the filename stem." }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { context: ctx, page: p } = await deps.ensureBrowser(); + const name = deps.sanitizeArtifactName(params.name ?? "default", "default"); + + const { mkdir, writeFile } = await import("node:fs/promises"); + const path = await import("node:path"); + const stateDir = path.resolve(process.cwd(), STATE_DIR); + await mkdir(stateDir, { recursive: true }); + + // 1. Playwright storageState: cookies + localStorage + const storageState = await ctx.storageState(); + + // 2. sessionStorage: must be extracted per-origin via page.evaluate + const sessionStorageData: Record> = {}; + try { + const origin = new URL(p.url()).origin; + const ssData = await p.evaluate(() => { + const data: Record = {}; + for (let i = 0; i < sessionStorage.length; i++) { + const key = sessionStorage.key(i); + if (key) data[key] = sessionStorage.getItem(key) ?? ""; + } + return data; + }); + if (Object.keys(ssData).length > 0) { + sessionStorageData[origin] = ssData; + } + } catch { + // Page may not have a valid origin (about:blank, etc.) + } + + const combined = { + storageState, + sessionStorage: sessionStorageData, + savedAt: new Date().toISOString(), + url: p.url(), + }; + + const filePath = path.join(stateDir, `${name}.json`); + await writeFile(filePath, JSON.stringify(combined, null, 2)); + + // Ensure .gitignore covers the state dir + const gitignorePath = path.resolve(process.cwd(), STATE_DIR, ".gitignore"); + await writeFile(gitignorePath, "*\n!.gitignore\n").catch(() => {}); + + const cookieCount = storageState.cookies?.length ?? 0; + const localStorageOrigins = storageState.origins?.length ?? 0; + const sessionStorageOrigins = Object.keys(sessionStorageData).length; + + return { + content: [{ + type: "text", + text: `State saved: ${filePath}\nCookies: ${cookieCount}\nlocalStorage origins: ${localStorageOrigins}\nsessionStorage origins: ${sessionStorageOrigins}`, + }], + details: { + path: filePath, + cookieCount, + localStorageOrigins, + sessionStorageOrigins, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Save state failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); + + // ------------------------------------------------------------------------- + // browser_restore_state + // ------------------------------------------------------------------------- + pi.registerTool({ + name: "browser_restore_state", + label: "Browser Restore State", + description: + "Restore cookies, localStorage, and sessionStorage from a previously saved state file. " + + "Injects cookies via context.addCookies() and storage via page.evaluate(). " + + "For full fidelity, restore before navigating to the target site.", + parameters: Type.Object({ + name: Type.Optional( + Type.String({ description: "Name of the state file to restore (default: 'default')." }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { context: ctx, page: p } = await deps.ensureBrowser(); + const name = deps.sanitizeArtifactName(params.name ?? "default", "default"); + + const { readFile } = await import("node:fs/promises"); + const path = await import("node:path"); + const filePath = path.join(process.cwd(), STATE_DIR, `${name}.json`); + + let raw: string; + try { + raw = await readFile(filePath, "utf-8"); + } catch { + return { + content: [{ type: "text", text: `State file not found: ${filePath}` }], + details: { error: "file_not_found", path: filePath }, + isError: true, + }; + } + + const combined = JSON.parse(raw); + const storageState = combined.storageState; + const sessionStorageData: Record> = combined.sessionStorage ?? {}; + + // 1. Restore cookies + let cookieCount = 0; + if (storageState?.cookies?.length) { + await ctx.addCookies(storageState.cookies); + cookieCount = storageState.cookies.length; + } + + // 2. Restore localStorage via page.evaluate + let localStorageOrigins = 0; + if (storageState?.origins?.length) { + for (const origin of storageState.origins) { + try { + await p.evaluate((items: Array<{ name: string; value: string }>) => { + for (const { name, value } of items) { + localStorage.setItem(name, value); + } + }, origin.localStorage ?? []); + localStorageOrigins++; + } catch { + // Origin mismatch — localStorage can only be set on matching origin + } + } + } + + // 3. Restore sessionStorage via page.evaluate + let sessionStorageOrigins = 0; + for (const [_origin, data] of Object.entries(sessionStorageData)) { + try { + await p.evaluate((items: Record) => { + for (const [key, value] of Object.entries(items)) { + sessionStorage.setItem(key, value); + } + }, data); + sessionStorageOrigins++; + } catch { + // Origin mismatch + } + } + + return { + content: [{ + type: "text", + text: `State restored from: ${filePath}\nCookies: ${cookieCount}\nlocalStorage origins: ${localStorageOrigins}\nsessionStorage origins: ${sessionStorageOrigins}\nSaved at: ${combined.savedAt ?? "unknown"}`, + }], + details: { + path: filePath, + cookieCount, + localStorageOrigins, + sessionStorageOrigins, + savedAt: combined.savedAt, + savedUrl: combined.url, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Restore state failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} diff --git a/src/resources/extensions/browser-tools/tools/visual-diff.ts b/src/resources/extensions/browser-tools/tools/visual-diff.ts new file mode 100644 index 000000000..f13b88bb9 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/visual-diff.ts @@ -0,0 +1,209 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +/** + * Visual regression diffing — compare current page screenshot against a stored baseline. + */ + +const BASELINE_DIR = ".gsd/browser-baselines"; + +export function registerVisualDiffTools(pi: ExtensionAPI, deps: ToolDeps): void { + pi.registerTool({ + name: "browser_visual_diff", + label: "Browser Visual Diff", + description: + "Compare current page screenshot against a stored baseline pixel-by-pixel. " + + "Returns similarity score (0–1), diff pixel count, and optionally generates a diff image highlighting changes. " + + "On first run with no baseline, saves the current screenshot as the baseline. " + + "Baselines are stored in .gsd/browser-baselines/ (gitignored, environment-specific).", + parameters: Type.Object({ + name: Type.Optional( + Type.String({ + description: + "Baseline name (default: auto-generated from URL + viewport). " + + "Use consistent names to compare the same view across runs.", + }), + ), + selector: Type.Optional( + Type.String({ + description: "CSS selector to scope comparison to a specific element instead of full viewport.", + }), + ), + threshold: Type.Optional( + Type.Number({ + description: + "Pixel matching threshold 0–1 (default: 0.1). " + + "Higher values are more tolerant of anti-aliasing and rendering differences.", + }), + ), + updateBaseline: Type.Optional( + Type.Boolean({ + description: "If true, overwrite the existing baseline with the current screenshot (default: false).", + }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { page: p } = await deps.ensureBrowser(); + const { mkdir, readFile, writeFile } = await import("node:fs/promises"); + const pathMod = await import("node:path"); + + const baselineDir = pathMod.resolve(process.cwd(), BASELINE_DIR); + await mkdir(baselineDir, { recursive: true }); + + // Ensure .gitignore + const gitignorePath = pathMod.join(baselineDir, ".gitignore"); + await writeFile(gitignorePath, "*\n!.gitignore\n").catch(() => {}); + + // Generate baseline name + const url = p.url(); + const viewport = p.viewportSize(); + const vpSuffix = viewport ? `${viewport.width}x${viewport.height}` : "unknown"; + const autoName = deps.sanitizeArtifactName( + `${new URL(url).pathname.replace(/\//g, "-")}-${vpSuffix}`, + `baseline-${vpSuffix}`, + ); + const name = deps.sanitizeArtifactName(params.name ?? autoName, autoName); + + const baselinePath = pathMod.join(baselineDir, `${name}.png`); + const diffPath = pathMod.join(baselineDir, `${name}-diff.png`); + + // Capture current screenshot as PNG (needed for pixel comparison) + let currentBuffer: Buffer; + if (params.selector) { + const locator = p.locator(params.selector).first(); + currentBuffer = await locator.screenshot({ type: "png" }); + } else { + currentBuffer = await p.screenshot({ type: "png", fullPage: false }); + } + + // Check if baseline exists + let baselineBuffer: Buffer | null = null; + try { + baselineBuffer = await readFile(baselinePath) as Buffer; + } catch { + // No baseline yet + } + + if (!baselineBuffer || params.updateBaseline) { + // Save as new baseline + await writeFile(baselinePath, currentBuffer); + return { + content: [{ + type: "text", + text: baselineBuffer + ? `Baseline updated: ${baselinePath}\nSize: ${(currentBuffer.length / 1024).toFixed(1)} KB` + : `Baseline created (first run): ${baselinePath}\nSize: ${(currentBuffer.length / 1024).toFixed(1)} KB\nRe-run to compare against this baseline.`, + }], + details: { + baselinePath, + baselineCreated: !baselineBuffer, + baselineUpdated: !!baselineBuffer, + sizeBytes: currentBuffer.length, + }, + }; + } + + // Perform pixel comparison using sharp for PNG decoding + const sharp = (await import("sharp")).default; + + const baselineMeta = await sharp(baselineBuffer).metadata(); + const currentMeta = await sharp(currentBuffer).metadata(); + + const bWidth = baselineMeta.width ?? 0; + const bHeight = baselineMeta.height ?? 0; + const cWidth = currentMeta.width ?? 0; + const cHeight = currentMeta.height ?? 0; + + // If dimensions differ, report mismatch + if (bWidth !== cWidth || bHeight !== cHeight) { + return { + content: [{ + type: "text", + text: `Dimension mismatch: baseline is ${bWidth}x${bHeight}, current is ${cWidth}x${cHeight}. Cannot compare.\nUse updateBaseline: true to reset.`, + }], + details: { + match: false, + dimensionMismatch: true, + baselineDimensions: { width: bWidth, height: bHeight }, + currentDimensions: { width: cWidth, height: cHeight }, + }, + }; + } + + // Extract raw RGBA pixel data + const baselineRaw = await sharp(baselineBuffer).ensureAlpha().raw().toBuffer(); + const currentRaw = await sharp(currentBuffer).ensureAlpha().raw().toBuffer(); + + const width = bWidth; + const height = bHeight; + const totalPixels = width * height; + const threshold = params.threshold ?? 0.1; + + // Simple pixel-by-pixel comparison (avoiding pixelmatch dependency) + const diffData = Buffer.alloc(width * height * 4); + let diffPixels = 0; + const thresholdSq = threshold * threshold * 255 * 255 * 3; + + for (let i = 0; i < totalPixels; i++) { + const offset = i * 4; + const dr = baselineRaw[offset] - currentRaw[offset]; + const dg = baselineRaw[offset + 1] - currentRaw[offset + 1]; + const db = baselineRaw[offset + 2] - currentRaw[offset + 2]; + const distSq = dr * dr + dg * dg + db * db; + + if (distSq > thresholdSq) { + diffPixels++; + // Mark diff pixels as red + diffData[offset] = 255; // R + diffData[offset + 1] = 0; // G + diffData[offset + 2] = 0; // B + diffData[offset + 3] = 255; // A + } else { + // Dim unchanged pixels + diffData[offset] = currentRaw[offset] >> 1; + diffData[offset + 1] = currentRaw[offset + 1] >> 1; + diffData[offset + 2] = currentRaw[offset + 2] >> 1; + diffData[offset + 3] = 255; + } + } + + const similarity = 1 - (diffPixels / totalPixels); + const match = diffPixels === 0; + + // Save diff image + await sharp(diffData, { raw: { width, height, channels: 4 } }) + .png() + .toFile(diffPath); + + return { + content: [{ + type: "text", + text: match + ? `Visual diff: MATCH (100% similar)\nBaseline: ${baselinePath}` + : `Visual diff: ${(similarity * 100).toFixed(2)}% similar\nDiff pixels: ${diffPixels} of ${totalPixels} (${((diffPixels / totalPixels) * 100).toFixed(2)}%)\nDiff image: ${diffPath}\nBaseline: ${baselinePath}`, + }], + details: { + match, + similarity, + diffPixels, + totalPixels, + diffPercentage: (diffPixels / totalPixels) * 100, + dimensions: { width, height }, + baselinePath, + diffImagePath: match ? undefined : diffPath, + threshold, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Visual diff failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +} diff --git a/src/resources/extensions/browser-tools/tools/zoom.ts b/src/resources/extensions/browser-tools/tools/zoom.ts new file mode 100644 index 000000000..6a146c345 --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/zoom.ts @@ -0,0 +1,104 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +/** + * Region zoom / high-res capture — capture and upscale specific page regions. + */ + +export function registerZoomTools(pi: ExtensionAPI, deps: ToolDeps): void { + pi.registerTool({ + name: "browser_zoom_region", + label: "Browser Zoom Region", + description: + "Capture and optionally upscale a specific rectangular region of the page for detailed inspection. " + + "Useful for dense UIs where full-page screenshots have text too small to read. " + + "Returns the region as an inline image, same as browser_screenshot.", + parameters: Type.Object({ + x: Type.Number({ description: "Left coordinate of the region in CSS pixels." }), + y: Type.Number({ description: "Top coordinate of the region in CSS pixels." }), + width: Type.Number({ description: "Width of the region in CSS pixels." }), + height: Type.Number({ description: "Height of the region in CSS pixels." }), + scale: Type.Optional( + Type.Number({ + description: "Upscale factor (default: 2). Use 1 for native resolution, 2-4 for zoomed detail.", + }), + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + try { + const { page: p } = await deps.ensureBrowser(); + const { x, y, width, height } = params; + const scale = params.scale ?? 2; + + // Validate dimensions + if (width <= 0 || height <= 0) { + return { + content: [{ type: "text", text: "Width and height must be positive." }], + details: { error: "invalid_dimensions" }, + isError: true, + }; + } + + // Capture the region using Playwright's clip option + const regionBuffer = await p.screenshot({ + type: "png", + clip: { x, y, width, height }, + }); + + let outputBuffer: Buffer = regionBuffer; + let outputMime = "image/png"; + + // Upscale if scale > 1 + if (scale > 1) { + const sharp = (await import("sharp")).default; + const targetWidth = Math.round(width * scale); + const targetHeight = Math.round(height * scale); + + outputBuffer = await sharp(regionBuffer) + .resize(targetWidth, targetHeight, { + kernel: "lanczos3", + fit: "fill", + }) + .png() + .toBuffer(); + } + + const base64Data = outputBuffer.toString("base64"); + const title = await p.title(); + const url = p.url(); + + return { + content: [ + { + type: "text", + text: `Region capture: ${width}x${height} at (${x},${y})${scale > 1 ? ` upscaled ${scale}x to ${Math.round(width * scale)}x${Math.round(height * scale)}` : ""}\nPage: ${title}\nURL: ${url}`, + }, + { + type: "image", + data: base64Data, + mimeType: outputMime, + }, + ], + details: { + region: { x, y, width, height }, + scale, + outputDimensions: { + width: Math.round(width * scale), + height: Math.round(height * scale), + }, + title, + url, + }, + }; + } catch (err: any) { + return { + content: [{ type: "text", text: `Region zoom failed: ${err.message}` }], + details: { error: err.message }, + isError: true, + }; + } + }, + }); +}