chore(M002/S01): auto-commit after reassess-roadmap

This commit is contained in:
Lex Christopherson 2026-03-12 23:19:42 -06:00
parent 9bf6e911f3
commit ee6dce643b
29 changed files with 6017 additions and 5030 deletions

View file

@ -18,3 +18,7 @@
| D010 | M002 | arch | Browser-side utility injection | page.addInitScript under window.__pi namespace | Survives navigation, available before page scripts, namespaced to avoid collisions. | Yes — if timing issues discovered |
| D011 | M002 | convention | Intent resolution approach | Deterministic heuristics only, no LLM calls | Predictable latency and cost. Scoring functions are testable and debuggable. | Yes — if heuristic coverage proves insufficient |
| D012 | M002 | convention | Browser reuse across sessions | Skip completely | Architecturally different from within-session work; user directed to exclude entirely. | No |
| D013 | M002/S01 | pattern | Mutable state accessor pattern | get/set functions for all 18 state variables, not `export let` | ES module live bindings break under jiti's CJS shim. Accessors guarantee consumers see mutations. | No |
| D014 | M002/S01 | pattern | ToolDeps interface location | Defined in state.ts alongside types it references | Keeps the dependency graph simple — tool files import state.ts for ToolDeps + types. | Yes — could move to separate types.ts if state.ts grows |
| D015 | M002/S01 | pattern | Factory pattern for lifecycle-dependent utils | createGetLivePagesSnapshot(ensureBrowser) instead of direct import | Avoids circular dependency between utils.ts and lifecycle.ts. Wired at orchestrator level. | No |
| D016 | M002/S01 | pattern | Tool file import strategy | Tool files import state accessors and core.js functions directly — ToolDeps carries only infrastructure functions needing lifecycle wiring | Keeps ToolDeps lean. State accessors are stable imports, not runtime-wired dependencies. Avoids bloating the deps interface with every utility. | Yes — if ToolDeps grows unwieldy |

View file

@ -26,7 +26,7 @@ The GSD extension is fully functional with:
- **Secrets gate**: `startAuto()` checks `getManifestStatus()` before first dispatch
- **Disk-driven state**: `.gsd/` files are the source of truth, `STATE.md` is derived cache
- **File parsing**: `files.ts` has markdown parsers for all GSD file types
- **Browser-tools**: Single `index.ts` (~5000 lines) with all tool registrations, shared infrastructure in `core.js` (~1000 lines). Uses Playwright for browser control. Accessibility-first state representation, deterministic versioned refs, adaptive DOM settling, compact post-action summaries.
- **Browser-tools**: Modular structure — slim `index.ts` orchestrator (47 lines), 8 focused infrastructure modules (state.ts, utils.ts, evaluate-helpers.ts, lifecycle.ts, capture.ts, settle.ts, refs.ts), 9 categorized tool files under `tools/`, shared infrastructure in `core.js` (~1000 lines). Browser-side utilities injected once via `addInitScript` under `window.__pi` namespace. Uses Playwright for browser control. Accessibility-first state representation, deterministic versioned refs, adaptive DOM settling, compact post-action summaries.
- **Prompt templates**: `prompts/` directory with mustache-like `{{var}}` substitution
- **TUI components**: `@gsd/pi-tui` provides `Editor`, `Text`, key handling, themes
- **Branch-per-slice**: git branches isolate slice work, squash-merged to main on completion

View file

@ -4,28 +4,6 @@ This file is the explicit capability and coverage contract for the project.
## Active
### R015 — Module decomposition of browser-tools
- Class: quality-attribute
- Status: active
- Description: The monolithic browser-tools index.ts (~5000 lines) is split into focused modules: shared infrastructure, tool groups, and browser-side utilities. All 43 existing tools continue to work identically.
- Why it matters: A 5000-line file is unmaintainable and makes targeted changes risky. Module boundaries enable safe refactoring and new tool development.
- Source: user
- Primary owning slice: M002/S01
- Supporting slices: none
- Validation: unmapped
- Notes: core.js already exists with ~1000 lines of shared utilities. The split extends this pattern.
### R016 — Shared browser-side evaluate utilities
- Class: quality-attribute
- Status: active
- Description: Common functions duplicated across page.evaluate boundaries (cssPath, simpleHash, isVisible, isEnabled, inferRole, accessibleName) are injected once and referenced from all evaluate callbacks.
- Why it matters: Currently buildRefSnapshot and resolveRefTarget each redeclare ~100 lines of identical utility code. Deduplication reduces payload size, improves maintainability, and ensures consistency.
- Source: user
- Primary owning slice: M002/S01
- Supporting slices: none
- Validation: unmapped
- Notes: Options include page.addInitScript or a one-time setup evaluate that attaches to window.
### R017 — Consolidated state capture per action
- Class: core-capability
- Status: active
@ -138,6 +116,28 @@ This file is the explicit capability and coverage contract for the project.
## Validated
### R015 — Module decomposition of browser-tools
- Class: quality-attribute
- Status: validated
- Description: The monolithic browser-tools index.ts (~5000 lines) is split into focused modules: shared infrastructure, tool groups, and browser-side utilities. All 43 existing tools continue to work identically.
- Why it matters: A 5000-line file is unmaintainable and makes targeted changes risky. Module boundaries enable safe refactoring and new tool development.
- Source: user
- Primary owning slice: M002/S01
- Supporting slices: none
- Validation: Extension loads via jiti, 43 tools register, browser navigate/snapshot/click work against real page, index.ts is 47-line orchestrator with zero registerTool calls, 9 tool files under tools/.
- Notes: core.js already exists with ~1000 lines of shared utilities. The split extends this pattern.
### R016 — Shared browser-side evaluate utilities
- Class: quality-attribute
- Status: validated
- Description: Common functions duplicated across page.evaluate boundaries (cssPath, simpleHash, isVisible, isEnabled, inferRole, accessibleName) are injected once and referenced from all evaluate callbacks.
- Why it matters: Currently buildRefSnapshot and resolveRefTarget each redeclare ~100 lines of identical utility code. Deduplication reduces payload size, improves maintainability, and ensures consistency.
- Source: user
- Primary owning slice: M002/S01
- Supporting slices: none
- Validation: window.__pi contains all 9 functions, survives navigation, refs.ts has zero inline redeclarations, close/reopen re-injects via addInitScript correctly.
- Notes: Uses context.addInitScript under window.__pi namespace.
### R001 — Secret forecasting during milestone planning
- Class: core-capability
- Status: validated
@ -336,8 +336,8 @@ This file is the explicit capability and coverage contract for the project.
| R012 | operability | deferred | none | none | unmapped |
| R013 | anti-feature | out-of-scope | none | none | n/a |
| R014 | anti-feature | out-of-scope | none | none | n/a |
| R015 | quality-attribute | active | M002/S01 | none | unmapped |
| R016 | quality-attribute | active | M002/S01 | none | unmapped |
| R015 | quality-attribute | validated | M002/S01 | none | jiti load, 43 tools register, slim index, browser spot-check |
| R016 | quality-attribute | validated | M002/S01 | none | window.__pi injection, zero inline redeclarations, survives navigation |
| R017 | core-capability | active | M002/S02 | M002/S01 | unmapped |
| R018 | core-capability | active | M002/S02 | none | unmapped |
| R019 | core-capability | active | M002/S02 | none | unmapped |
@ -353,8 +353,8 @@ This file is the explicit capability and coverage contract for the project.
## Coverage Summary
- Active requirements: 12
- Validated requirements: 10
- Active requirements: 10
- Validated requirements: 12
- Deferred requirements: 3
- Out of scope: 3
- Unmapped active requirements: 12
- Unmapped active requirements: 10

View file

@ -1,24 +1,19 @@
# GSD State
**Active Milestone:** M002 — Browser Tools Performance & Intelligence
**Active Slice:** None
**Active Task:** None
**Phase:** planned
**Requirements Status:** 12 active · 10 validated · 3 deferred · 3 out of scope
**Active Slice:** S02 — Action pipeline performance
**Phase:** planning
**Requirements Status:** 10 active · 12 validated · 3 deferred · 3 out of scope
## Milestone Registry
- ✅ **M001:** Proactive Secret Management
- 🔵 **M002:** Browser Tools Performance & Intelligence
- 🔄 **M002:** Browser Tools Performance & Intelligence
## Recent Decisions
- D007: Split index.ts into focused modules (state, lifecycle, capture, settle, refs, utils, evaluate-helpers, tools/)
- D008: Use sharp for image resizing
- D009: Navigate screenshots off by default
- D010: Inject browser-side utilities via addInitScript under window.__pi
- D011: Deterministic heuristics only for intent resolution
- None recorded
## Blockers
- None
## Next Action
Begin S01: Module decomposition and shared evaluate utilities — plan the slice, then execute.
Plan slice S02 (Action pipeline performance).

View file

@ -58,7 +58,7 @@ This milestone is complete only when all are true:
## Slices
- [ ] **S01: Module decomposition and shared evaluate utilities** `risk:high` `depends:[]`
- [x] **S01: Module decomposition and shared evaluate utilities** `risk:high` `depends:[]`
> After this: all 43 existing browser tools work identically with the new module structure; shared browser-side utilities (cssPath, simpleHash, isVisible, isEnabled, inferRole, accessibleName) are injected once via addInitScript and used by buildRefSnapshot and resolveRefTarget — verified by build success and spot-check against a real page.
- [ ] **S02: Action pipeline performance** `risk:medium` `depends:[S01]`

View file

@ -0,0 +1,85 @@
# S01: Module decomposition and shared evaluate utilities
**Goal:** Split browser-tools index.ts (~5000 lines) into focused modules with shared browser-side utilities injected via addInitScript — all 43 existing tools work identically after.
**Demo:** Extension loads via jiti, all 43 tools register, browser_navigate + browser_snapshot_refs + browser_click work against a real page, buildRefSnapshot/resolveRefTarget use window.__pi utilities instead of inline duplicates.
## Must-Haves
- All 18 mutable state variables live in state.ts with accessor/mutator functions
- Infrastructure functions (ensureBrowser, captureCompactPageState, settleAfterActionAdaptive, buildRefSnapshot, resolveRefTarget, etc.) live in dedicated modules
- 43 tool registrations distributed across 9 categorized files in tools/
- index.ts is a slim orchestrator (<50 lines) that imports and calls registration functions
- evaluate-helpers.ts exports a JS string constant defining window.__pi.{cssPath, simpleHash, isVisible, isEnabled, inferRole, accessibleName, isInteractiveEl, domPath, selectorHints}
- ensureBrowser() injects evaluate-helpers via context.addInitScript()
- buildRefSnapshot and resolveRefTarget reference window.__pi.* instead of redeclaring utilities inline
- Extension loads via jiti at runtime — no build step failures
- All 43 tools register and are callable
## Proof Level
- This slice proves: operational + integration (module split works at runtime, tools register and execute)
- Real runtime required: yes (jiti loading, Playwright browser)
- Human/UAT required: no (spot-check is agent-executable)
## Verification
- `node -e "const jiti = require('@mariozechner/jiti')(...); const ext = jiti('src/resources/extensions/browser-tools/index.ts'); console.log(typeof ext.default)"` — extension loads without error
- Run browser_navigate to a test page, then browser_snapshot_refs, then browser_click on a ref — all succeed
- Verify window.__pi utilities are available: `page.evaluate(() => typeof window.__pi?.cssPath)` returns "function"
- Count registered tools === 43
## Integration Closure
- Upstream surfaces consumed: `core.js` (pure helpers), `@gsd/pi-coding-agent` (ExtensionAPI type, truncation utils)
- New wiring introduced in this slice: state.ts accessor pattern, ToolDeps interface, addInitScript injection in ensureBrowser()
- What remains before the milestone is truly usable end-to-end: S02 (performance), S03 (screenshot/sharp), S04 (form tools), S05 (intent tools), S06 (tests)
## Tasks
- [x] **T01: Extract state, types, utilities, and evaluate-helpers modules** `est:1h`
- Why: Foundation — everything else imports from these. State accessors are the key risk (jiti mutable binding behavior). evaluate-helpers is a standalone string constant with no imports.
- Files: `src/resources/extensions/browser-tools/state.ts`, `src/resources/extensions/browser-tools/utils.ts`, `src/resources/extensions/browser-tools/evaluate-helpers.ts`
- Do: Extract all 18 mutable state variables + types into state.ts with get/set accessor functions and resetAllState(). Extract truncateText, artifact helpers, error formatting, accessibility helpers, assertion helpers, verification helpers into utils.ts. Write evaluate-helpers.ts as an exported string constant containing the browser-side JS for window.__pi utilities (cssPath, simpleHash, isVisible, isEnabled, inferRole, accessibleName, isInteractiveEl, domPath, selectorHints). Define ToolDeps interface that tool registration functions will accept. Preserve the djb2 hash invariant — simpleHash must match core.js computeContentHash algorithm.
- Verify: `node -e "..."` — state.ts, utils.ts, evaluate-helpers.ts all import without error via jiti
- Done when: Three modules exist, export correct interfaces, and load via jiti without circular dependency errors
- [x] **T02: Extract infrastructure modules and wire addInitScript injection** `est:1.5h`
- Why: Delivers R016 (shared evaluate utilities) and the infrastructure layer that all tool files depend on. This is where addInitScript injection lands and where buildRefSnapshot/resolveRefTarget stop redeclaring utilities.
- Files: `src/resources/extensions/browser-tools/lifecycle.ts`, `src/resources/extensions/browser-tools/capture.ts`, `src/resources/extensions/browser-tools/settle.ts`, `src/resources/extensions/browser-tools/refs.ts`
- Do: Extract ensureBrowser/closeBrowser/getActivePage/getActiveTarget/attachPageListeners into lifecycle.ts — add context.addInitScript(EVALUATE_HELPERS_SOURCE) right after browser.newContext(). Extract captureCompactPageState/postActionSummary/constrainScreenshot/captureErrorScreenshot/getRecentErrors into capture.ts. Extract settleAfterActionAdaptive/ensureMutationCounter/readMutationCounter/readFocusedDescriptor into settle.ts. Extract buildRefSnapshot/resolveRefTarget/parseRef/formatVersionedRef/staleRefGuidance into refs.ts — refactor the evaluate callbacks in buildRefSnapshot and resolveRefTarget to reference window.__pi.cssPath, window.__pi.simpleHash etc. instead of redeclaring them. All modules import state accessors from state.ts, never raw variables.
- Verify: Modules load via jiti. buildRefSnapshot evaluate callback no longer contains function declarations for cssPath/simpleHash (grep confirms). lifecycle.ts contains addInitScript call.
- Done when: Four infrastructure modules exist, lifecycle.ts injects evaluate-helpers, refs.ts uses window.__pi.*, all load without error
- [x] **T03: Extract tool registrations into grouped files and create slim index.ts** `est:1.5h`
- Why: Delivers R015 (module decomposition). The 43 tool registrations move from a single 3400-line block into 9 categorized files. index.ts becomes a slim orchestrator.
- Files: `src/resources/extensions/browser-tools/tools/navigation.ts`, `tools/screenshot.ts`, `tools/interaction.ts`, `tools/inspection.ts`, `tools/session.ts`, `tools/assertions.ts`, `tools/refs.ts`, `tools/wait.ts`, `tools/pages.ts`, `src/resources/extensions/browser-tools/index.ts`
- Do: Create tools/ directory. Each file exports a register function (e.g. registerNavigationTools(pi, deps)) that takes ExtensionAPI and ToolDeps. Move tool registrations verbatim — no logic changes, just import wiring. browser_batch in assertions.ts needs imports for settleAfterActionAdaptive, parseRef, resolveRefTarget, collectAssertionState, etc. Write new index.ts (<50 lines): import all register functions, build ToolDeps object, call each register function, register session_shutdown hook.
- Verify: Count pi.registerTool calls across all tool files === 43. Extension loads via jiti. index.ts is under 50 lines.
- Done when: Old monolithic index.ts is replaced by slim orchestrator, 9 tool files exist with correct tool counts per category, extension loads
- [x] **T04: Runtime verification against a real browser page** `est:30m`
- Why: The split is worthless if tools don't actually work. This task proves the operational contract by exercising the extension end-to-end.
- Files: none (verification only)
- Do: Load the extension, launch a browser, navigate to a page, take a snapshot, click a ref, verify window.__pi is injected. Check that buildRefSnapshot evaluate callback uses window.__pi (not inline declarations). Verify closeBrowser() resets all state. Verify re-launch after close works (addInitScript re-registered on new context).
- Verify: browser_navigate succeeds, browser_snapshot_refs returns refs, browser_click_ref resolves and clicks, page.evaluate(() => Object.keys(window.__pi)) returns expected function names, close + re-open cycle works
- Done when: All 43 tools register, navigate/snapshot/click work against a real page, window.__pi utilities are callable in evaluate context, close/reopen cycle passes
## Files Likely Touched
- `src/resources/extensions/browser-tools/index.ts` (rewritten to slim orchestrator)
- `src/resources/extensions/browser-tools/state.ts` (new)
- `src/resources/extensions/browser-tools/utils.ts` (new)
- `src/resources/extensions/browser-tools/evaluate-helpers.ts` (new)
- `src/resources/extensions/browser-tools/lifecycle.ts` (new)
- `src/resources/extensions/browser-tools/capture.ts` (new)
- `src/resources/extensions/browser-tools/settle.ts` (new)
- `src/resources/extensions/browser-tools/refs.ts` (new)
- `src/resources/extensions/browser-tools/tools/navigation.ts` (new)
- `src/resources/extensions/browser-tools/tools/screenshot.ts` (new)
- `src/resources/extensions/browser-tools/tools/interaction.ts` (new)
- `src/resources/extensions/browser-tools/tools/inspection.ts` (new)
- `src/resources/extensions/browser-tools/tools/session.ts` (new)
- `src/resources/extensions/browser-tools/tools/assertions.ts` (new)
- `src/resources/extensions/browser-tools/tools/refs.ts` (new)
- `src/resources/extensions/browser-tools/tools/wait.ts` (new)
- `src/resources/extensions/browser-tools/tools/pages.ts` (new)

View file

@ -0,0 +1,52 @@
---
estimated_steps: 5
estimated_files: 3
---
# T01: Extract state, types, utilities, and evaluate-helpers modules
**Slice:** S01 — Module decomposition and shared evaluate utilities
**Milestone:** M002
## Description
Extract the foundation modules that all other browser-tools modules will import from. `state.ts` holds all 18 mutable state variables behind accessor functions (critical for jiti compatibility — ES module live bindings may not work). `utils.ts` holds Node-side utility functions. `evaluate-helpers.ts` exports a JS string constant for browser-side injection. Define the `ToolDeps` interface that tool registration functions will consume.
## Steps
1. Create `state.ts`: move all 18 mutable state variables (lines 62202 of index.ts), their type/interface definitions, and the constants (ARTIFACT_ROOT, HAR_FILENAME). Export get/set accessor functions for each variable (getBrowser/setBrowser, getContext/setContext, etc.). Export `resetAllState()` that mirrors current `closeBrowser()`'s reset logic. Export the `pageRegistry` and `actionTimeline` instances (these are objects with internal state, not plain variables). Import `createPageRegistry`, `createActionTimeline`, `createBoundedLogPusher` from `./core.js`.
2. Create `utils.ts`: move `truncateText()`, `formatArtifactTimestamp()`, `ensureDir()`, `writeArtifactFile()`, `copyArtifactFile()`, `ensureSessionStartedAt()`, `ensureSessionArtifactDir()`, `buildSessionArtifactPath()`, `getActivePageMetadata()`, `getActiveFrameMetadata()`, `getSessionArtifactMetadata()`, `sanitizeArtifactName()`, `getLivePagesSnapshot()`, `resolveAccessibilityScope()`, `captureAccessibilityMarkdown()`, `isCriticalResourceType()`, `updatePendingCriticalRequests()`, `getPendingCriticalRequests()`, `verificationFromChecks()`, `verificationLine()`, `collectAssertionState()`, `formatAssertionText()`, `formatDiffText()`, `getUrlHash()`, `countOpenDialogs()`, `captureClickTargetState()`, `readInputLikeValue()`, `firstErrorLine()`, `beginTrackedAction()`, `finishTrackedAction()`, `getSinceTimestamp()`, `getConsoleEntriesSince()`, `getNetworkEntriesSince()`. These import state accessors from `./state.ts`. Functions that reference `browser`, `context`, `consoleLogs`, etc. use the accessor pattern.
3. Create `evaluate-helpers.ts`: export a single `EVALUATE_HELPERS_SOURCE` string constant containing an IIFE that attaches functions to `window.__pi`. The functions: `cssPath`, `simpleHash`, `isVisible`, `isEnabled`, `inferRole`, `accessibleName`, `isInteractiveEl`, `domPath`, `selectorHints`. Copy these verbatim from `buildRefSnapshot`'s evaluate callback (lines 12281430 of index.ts). Wrap in `(function() { window.__pi = window.__pi || {}; window.__pi.cssPath = ...; ... })()`. Ensure `simpleHash` uses the exact djb2 algorithm that matches `core.js`.
4. Define `ToolDeps` interface (in state.ts or a separate types file — decide based on import graph). This bundles the infrastructure functions that tool registration files need: `ensureBrowser`, `closeBrowser`, `getActivePage`, `getActiveTarget`, `getActivePageOrNull`, `captureCompactPageState`, `postActionSummary`, `constrainScreenshot`, `captureErrorScreenshot`, `getRecentErrors`, `settleAfterActionAdaptive`, `ensureMutationCounter`, `buildRefSnapshot`, `resolveRefTarget`, `parseRef`, `formatVersionedRef`, `staleRefGuidance`, `formatCompactStateSummary`, `beginTrackedAction`, `finishTrackedAction`, etc.
5. Verify all three modules load via jiti without errors. Check no circular dependencies exist (state.ts imports only from core.js and node stdlib; utils.ts imports from state.ts and core.js; evaluate-helpers.ts imports nothing).
## Must-Haves
- [ ] state.ts exports accessor functions for all 18 state variables, not raw `export let`
- [ ] state.ts exports `resetAllState()` that resets every variable to its initial value
- [ ] evaluate-helpers.ts `simpleHash` uses identical djb2 algorithm to core.js `computeContentHash`
- [ ] evaluate-helpers.ts covers all 9 functions: cssPath, simpleHash, isVisible, isEnabled, inferRole, accessibleName, isInteractiveEl, domPath, selectorHints
- [ ] No circular imports between the three new modules
- [ ] ToolDeps interface defined and exported
## Verification
- `node -e "const jiti = require('@mariozechner/jiti')(...); jiti('./src/resources/extensions/browser-tools/state.ts'); console.log('state ok')"` — no error
- `node -e "const jiti = require('@mariozechner/jiti')(...); jiti('./src/resources/extensions/browser-tools/utils.ts'); console.log('utils ok')"` — no error
- `node -e "const jiti = require('@mariozechner/jiti')(...); const h = jiti('./src/resources/extensions/browser-tools/evaluate-helpers.ts'); console.log(h.EVALUATE_HELPERS_SOURCE.includes('cssPath'))"` — prints true
- grep evaluate-helpers.ts for all 9 function names
## Inputs
- `src/resources/extensions/browser-tools/index.ts` — lines 62202 (state/types), lines 204620 (helpers), lines 12281430 (browser-side utilities)
- `src/resources/extensions/browser-tools/core.js``computeContentHash` djb2 algorithm for hash invariant check
## Expected Output
- `src/resources/extensions/browser-tools/state.ts` — all state + types + accessors + resetAllState + ToolDeps interface
- `src/resources/extensions/browser-tools/utils.ts` — all Node-side utility functions
- `src/resources/extensions/browser-tools/evaluate-helpers.ts` — EVALUATE_HELPERS_SOURCE string constant

View file

@ -0,0 +1,54 @@
---
estimated_steps: 5
estimated_files: 4
---
# T02: Extract infrastructure modules and wire addInitScript injection
**Slice:** S01 — Module decomposition and shared evaluate utilities
**Milestone:** M002
## Description
Extract the four infrastructure modules (lifecycle, capture, settle, refs) that sit between state/utils and the tool registration layer. The key deliverable beyond mechanical extraction: `lifecycle.ts` injects `EVALUATE_HELPERS_SOURCE` via `context.addInitScript()` in `ensureBrowser()`, and `refs.ts` refactors `buildRefSnapshot`/`resolveRefTarget` evaluate callbacks to reference `window.__pi.*` instead of redeclaring utilities inline. This retires the R016 risk (shared browser-side evaluate utilities).
## Steps
1. Create `lifecycle.ts`: move `ensureBrowser()`, `closeBrowser()`, `getActivePage()`, `getActiveTarget()`, `getActivePageOrNull()`, `attachPageListeners()` from index.ts. Import state accessors from `./state.ts`. Import `EVALUATE_HELPERS_SOURCE` from `./evaluate-helpers.ts`. In `ensureBrowser()`, add `context.addInitScript(EVALUATE_HELPERS_SOURCE)` immediately after `browser.newContext()` and before `context.newPage()`. `closeBrowser()` calls `resetAllState()` from state.ts instead of resetting variables individually.
2. Create `capture.ts`: move `captureCompactPageState()`, `formatCompactStateSummary()`, `postActionSummary()`, `constrainScreenshot()`, `captureErrorScreenshot()`, `getRecentErrors()` from index.ts. Import from `./state.ts` and `./lifecycle.ts` as needed.
3. Create `settle.ts`: move `settleAfterActionAdaptive()`, `ensureMutationCounter()`, `readMutationCounter()`, `readFocusedDescriptor()` from index.ts. Import from `./state.ts`.
4. Create `refs.ts`: move `buildRefSnapshot()`, `resolveRefTarget()`, `parseRef()`, `formatVersionedRef()`, `staleRefGuidance()` from index.ts. **Refactor `buildRefSnapshot`'s evaluate callback:** remove the inline function declarations for `cssPath`, `simpleHash`, `isVisible`, `isEnabled`, `inferRole`, `accessibleName`, `isInteractiveEl`, `domPath`, `selectorHints`, `matchesMode`, `computeNearestHeading`, `computeFormOwnership` — replace with `window.__pi.cssPath(el)`, `window.__pi.simpleHash(str)`, etc. for the 9 injected functions. Keep `matchesMode`, `computeNearestHeading`, `computeFormOwnership` inline (they're not shared/duplicated). **Refactor `resolveRefTarget`'s evaluate callback:** remove inline `cssPath` and `simpleHash` declarations, replace with `window.__pi.cssPath` and `window.__pi.simpleHash`.
5. Verify all four modules load via jiti. Grep `buildRefSnapshot` and `resolveRefTarget` to confirm zero inline declarations of `cssPath` or `simpleHash`. Verify `lifecycle.ts` contains the `addInitScript` call.
## Must-Haves
- [ ] lifecycle.ts calls `context.addInitScript(EVALUATE_HELPERS_SOURCE)` after `browser.newContext()` and before `context.newPage()`
- [ ] closeBrowser() in lifecycle.ts calls resetAllState() from state.ts
- [ ] buildRefSnapshot evaluate callback uses window.__pi.cssPath, window.__pi.simpleHash, etc. — zero inline redeclarations of the 9 shared functions
- [ ] resolveRefTarget evaluate callback uses window.__pi.cssPath and window.__pi.simpleHash — zero inline redeclarations
- [ ] No circular imports between infrastructure modules (lifecycle→state, capture→state+lifecycle, settle→state, refs→state)
## Verification
- `grep -c "function cssPath\|function simpleHash" src/resources/extensions/browser-tools/refs.ts` returns 0
- `grep "addInitScript" src/resources/extensions/browser-tools/lifecycle.ts` returns a match
- `grep "resetAllState" src/resources/extensions/browser-tools/lifecycle.ts` returns a match
- All four modules load via jiti without error
## Inputs
- `src/resources/extensions/browser-tools/state.ts` — state accessors (from T01)
- `src/resources/extensions/browser-tools/utils.ts` — utility functions (from T01)
- `src/resources/extensions/browser-tools/evaluate-helpers.ts` — EVALUATE_HELPERS_SOURCE (from T01)
- `src/resources/extensions/browser-tools/index.ts` — source functions to extract
## Expected Output
- `src/resources/extensions/browser-tools/lifecycle.ts` — browser lifecycle with addInitScript injection
- `src/resources/extensions/browser-tools/capture.ts` — page state capture functions
- `src/resources/extensions/browser-tools/settle.ts` — DOM settle logic
- `src/resources/extensions/browser-tools/refs.ts` — ref snapshot/resolution using window.__pi.*

View file

@ -0,0 +1,70 @@
---
estimated_steps: 4
estimated_files: 10
---
# T03: Extract tool registrations into grouped files and create slim index.ts
**Slice:** S01 — Module decomposition and shared evaluate utilities
**Milestone:** M002
## Description
Move all 43 tool registrations from the monolithic export default function into 9 categorized tool files under `tools/`. Each file exports a single registration function. Rewrite `index.ts` as a slim orchestrator that imports everything and wires it together. This is the largest task by line count but the most mechanical — tool implementations don't change, only their location and import sources.
## Steps
1. Create `tools/` directory and 9 tool files. Each exports a function like `export function registerNavigationTools(pi: ExtensionAPI, deps: ToolDeps)`. Tool categorization per research:
- `navigation.ts` — browser_navigate, browser_go_back, browser_go_forward, browser_reload (4 tools)
- `screenshot.ts` — browser_screenshot (1 tool)
- `interaction.ts` — browser_click, browser_drag, browser_type, browser_upload_file, browser_scroll, browser_hover, browser_key_press, browser_select_option, browser_set_checked, browser_set_viewport (10 tools)
- `inspection.ts` — browser_get_console_logs, browser_get_network_logs, browser_get_dialog_logs, browser_evaluate, browser_get_page_source, browser_get_accessibility_tree, browser_find (7 tools)
- `session.ts` — browser_close, browser_trace_start, browser_trace_stop, browser_export_har, browser_timeline, browser_session_summary, browser_debug_bundle (7 tools)
- `assertions.ts` — browser_assert, browser_diff, browser_batch (3 tools)
- `tools/refs.ts` — browser_snapshot_refs, browser_get_ref, browser_click_ref, browser_hover_ref, browser_fill_ref (5 tools)
- `wait.ts` — browser_wait_for (1 tool)
- `pages.ts` — browser_list_pages, browser_switch_page, browser_close_page, browser_list_frames, browser_select_frame (5 tools)
2. For each tool, the execute function body stays verbatim. Replace direct function calls (ensureBrowser, captureCompactPageState, etc.) with `deps.ensureBrowser()`, `deps.captureCompactPageState()`, etc. Replace direct state variable access (consoleLogs, currentRefMap, etc.) with state accessor calls imported from `../state.ts`.
3. Handle `browser_batch` carefully — its `executeStep` closure calls `settleAfterActionAdaptive`, `parseRef`, `resolveRefTarget`, `collectAssertionState`, `evaluateAssertionChecks`, and accesses `consoleLogs` directly. All of these come through deps or state imports. The `validateWaitParams`, `parseThreshold`, `meetsThreshold`, `includesNeedle`, `createRegionStableScript` come from core.js imports.
4. Rewrite `index.ts` as slim orchestrator: import all 9 register functions, import infrastructure modules, build the ToolDeps object, call each register function, register the `session_shutdown` hook. Target: under 50 lines. The old index.ts content is fully replaced.
## Must-Haves
- [ ] Exactly 43 pi.registerTool calls across all 9 tool files (count must match)
- [ ] index.ts is under 50 lines and contains zero tool registrations
- [ ] browser_batch internal step execution works — all infrastructure functions accessible via deps/imports
- [ ] No tool parameter schemas or return formats changed
- [ ] Extension loads via jiti and all tools register
## Verification
- `grep -rc "pi.registerTool" src/resources/extensions/browser-tools/tools/` sums to 43
- `wc -l src/resources/extensions/browser-tools/index.ts` is under 50
- `grep "pi.registerTool" src/resources/extensions/browser-tools/index.ts` returns no matches
- Extension loads via jiti without error
## Inputs
- `src/resources/extensions/browser-tools/state.ts` — state accessors (from T01)
- `src/resources/extensions/browser-tools/utils.ts` — utility functions (from T01)
- `src/resources/extensions/browser-tools/lifecycle.ts` — browser lifecycle (from T02)
- `src/resources/extensions/browser-tools/capture.ts` — state capture (from T02)
- `src/resources/extensions/browser-tools/settle.ts` — DOM settle (from T02)
- `src/resources/extensions/browser-tools/refs.ts` — ref management (from T02)
- `src/resources/extensions/browser-tools/index.ts` — source tool registrations to extract (lines 16144989)
## Expected Output
- `src/resources/extensions/browser-tools/tools/navigation.ts` (4 tools)
- `src/resources/extensions/browser-tools/tools/screenshot.ts` (1 tool)
- `src/resources/extensions/browser-tools/tools/interaction.ts` (10 tools)
- `src/resources/extensions/browser-tools/tools/inspection.ts` (7 tools)
- `src/resources/extensions/browser-tools/tools/session.ts` (7 tools)
- `src/resources/extensions/browser-tools/tools/assertions.ts` (3 tools)
- `src/resources/extensions/browser-tools/tools/refs.ts` (5 tools)
- `src/resources/extensions/browser-tools/tools/wait.ts` (1 tool)
- `src/resources/extensions/browser-tools/tools/pages.ts` (5 tools)
- `src/resources/extensions/browser-tools/index.ts` — slim orchestrator (<50 lines)

View file

@ -0,0 +1,50 @@
---
estimated_steps: 4
estimated_files: 0
---
# T04: Runtime verification against a real browser page
**Slice:** S01 — Module decomposition and shared evaluate utilities
**Milestone:** M002
## Description
End-to-end verification that the module split actually works at runtime. Load the extension via jiti, verify all 43 tools register, launch a real browser, navigate to a page, exercise snapshot/click/ref tools, confirm window.__pi injection, and verify the close/reopen cycle re-registers addInitScript. This is pure verification — no code changes unless bugs are found.
## Steps
1. Load the extension module via jiti and verify it exports a default function. Mock or use the real ExtensionAPI to count tool registrations — confirm exactly 43.
2. Use the running pi instance or a test script to exercise the browser tools sequence: browser_navigate to a local or test URL → verify page title returned → browser_snapshot_refs → verify ref nodes returned → browser_click on a returned ref → verify click succeeds.
3. Verify window.__pi injection: use browser_evaluate to run `Object.keys(window.__pi)` and confirm it contains cssPath, simpleHash, isVisible, isEnabled, inferRole, accessibleName, isInteractiveEl, domPath, selectorHints. Navigate to a new URL and re-check — confirms addInitScript survives navigation.
4. Verify close/reopen: call browser_close, then browser_navigate again. Confirm window.__pi is still available on the new browser context (addInitScript re-registered on the fresh context created by ensureBrowser).
## Must-Haves
- [ ] 43 tools registered (no more, no less)
- [ ] browser_navigate returns page title and URL
- [ ] browser_snapshot_refs returns ref nodes with valid structure
- [ ] window.__pi contains all 9 expected functions
- [ ] window.__pi survives navigation to new URL
- [ ] Close + reopen cycle works — window.__pi available on fresh context
## Verification
- Tool registration count === 43
- browser_navigate succeeds (returns content, no error)
- browser_snapshot_refs returns array with at least 1 ref
- `page.evaluate(() => Object.keys(window.__pi).sort())` returns the 9 expected function names
- After browser_close + browser_navigate: window.__pi still available
## Inputs
- All modules from T01T03 in place
- A reachable URL to navigate to (localhost dev server or data: URL)
## Expected Output
- Verification passes — no code changes needed (or bug fixes applied if issues found)
- Slice is confirmed done

View file

@ -300,7 +300,11 @@ GSD preferences live in `~/.gsd/preferences.md` (global) or `.gsd/preferences.md
version: 1
models:
research: claude-sonnet-4-6
planning: claude-opus-4-6
planning:
model: claude-opus-4-6
fallbacks:
- openrouter/z-ai/glm-5
- openrouter/minimax/minimax-m2.5
execution: claude-sonnet-4-6
completion: claude-sonnet-4-6
skill_discovery: suggest
@ -316,7 +320,7 @@ budget_ceiling: 50.00
| Setting | What it controls |
|---------|-----------------|
| `models.*` | Per-phase model selection (Opus for planning, Sonnet for execution, etc.) |
| `models.*` | Per-phase model selection — string for a single model, or `{model, fallbacks}` for automatic failover |
| `skill_discovery` | `auto` / `suggest` / `off` — how GSD finds and applies skills |
| `auto_supervisor.*` | Timeout thresholds for auto mode supervision |
| `budget_ceiling` | USD ceiling — auto mode pauses when reached |
@ -423,12 +427,15 @@ In your preferences (`/gsd prefs`), assign different models to different phases:
```yaml
models:
research: openrouter/deepseek/deepseek-r1
planning: claude-opus-4-6
planning:
model: claude-opus-4-6
fallbacks:
- openrouter/z-ai/glm-5
execution: claude-sonnet-4-6
completion: claude-sonnet-4-6
```
Use expensive models where quality matters (planning, complex execution) and cheaper/faster models where speed matters (research, simple completions). GSD tracks cost per-model so you can see exactly where your budget goes.
Use expensive models where quality matters (planning, complex execution) and cheaper/faster models where speed matters (research, simple completions). Each phase accepts a simple model string or an object with `model` and `fallbacks` — if the primary model fails (provider outage, rate limit, credit exhaustion), GSD automatically tries the next fallback. GSD tracks cost per-model so you can see exactly where your budget goes.
---

View file

@ -176,7 +176,7 @@ export class InteractiveMode {
private pendingTools = new Map<string, ToolExecutionComponent>();
// Tool output expansion state
private toolOutputExpanded = false;
private toolOutputExpanded = true;
// Thinking block visibility state
private hideThinkingBlock = false;

View file

@ -0,0 +1,190 @@
/**
* browser-tools page state capture
*
* Functions for capturing compact page state, screenshots, and summaries.
* Used by tool implementations for post-action feedback.
*/
import type { Frame, Page } from "playwright";
import type { CompactPageState, CompactSelectorState } from "./state.js";
import { formatCompactStateSummary } from "./utils.js";
// Anthropic API rejects images > 2000px in multi-image requests.
// Cap at 1568px (recommended optimal size) to stay well within limits.
const MAX_SCREENSHOT_DIM = 1568;
// ---------------------------------------------------------------------------
// Compact page state capture
// ---------------------------------------------------------------------------
export async function captureCompactPageState(
p: Page,
options: { selectors?: string[]; includeBodyText?: boolean; target?: Page | Frame } = {},
): Promise<CompactPageState> {
const selectors = Array.from(new Set((options.selectors ?? []).filter(Boolean)));
const target = options.target ?? p;
const domState = await target.evaluate(({ selectors, includeBodyText }) => {
const selectorStates: Record<string, {
exists: boolean;
visible: boolean;
value: string;
checked: boolean | null;
text: string;
}> = {};
for (const selector of selectors) {
let el: Element | null = null;
try {
el = document.querySelector(selector);
} catch {
el = null;
}
if (!el) {
selectorStates[selector] = {
exists: false,
visible: false,
value: "",
checked: null,
text: "",
};
continue;
}
const htmlEl = el as HTMLElement;
const style = window.getComputedStyle(htmlEl);
const rect = htmlEl.getBoundingClientRect();
const visible = style.display !== "none" && style.visibility !== "hidden" && rect.width > 0 && rect.height > 0;
const input = el as HTMLInputElement;
selectorStates[selector] = {
exists: true,
visible,
value:
el instanceof HTMLInputElement ||
el instanceof HTMLTextAreaElement ||
el instanceof HTMLSelectElement
? el.value
: htmlEl.getAttribute("value") || "",
checked: el instanceof HTMLInputElement && ["checkbox", "radio"].includes(input.type) ? input.checked : null,
text: (htmlEl.innerText || htmlEl.textContent || "").trim().replace(/\s+/g, " ").slice(0, 160),
};
}
const focused = document.activeElement as HTMLElement | null;
const focusedDesc = focused && focused !== document.body && focused !== document.documentElement
? `${focused.tagName.toLowerCase()}${focused.id ? '#' + focused.id : ''}${focused.getAttribute('aria-label') ? ' "' + focused.getAttribute('aria-label') + '"' : ''}`
: "";
const headings = Array.from(document.querySelectorAll('h1,h2,h3')).slice(0, 5).map((h) => (h.textContent || '').trim().replace(/\s+/g, ' ').slice(0, 80));
const dialog = document.querySelector('[role="dialog"]:not([hidden]),dialog[open]');
const dialogTitle = dialog?.querySelector('[role="heading"],[aria-label]')?.textContent?.trim().slice(0, 80) ?? "";
const bodyText = includeBodyText
? (document.body?.innerText || document.body?.textContent || "").trim().replace(/\s+/g, ' ').slice(0, 4000)
: "";
return {
url: window.location.href,
title: document.title,
focus: focusedDesc,
headings,
bodyText,
counts: {
landmarks: document.querySelectorAll('[role="main"],[role="banner"],[role="navigation"],[role="contentinfo"],[role="complementary"],[role="search"],[role="form"],[role="dialog"],[role="alert"],main,header,nav,footer,aside,section,form,dialog').length,
buttons: document.querySelectorAll('button,[role="button"]').length,
links: document.querySelectorAll('a[href]').length,
inputs: document.querySelectorAll('input,textarea,select').length,
},
dialog: {
count: document.querySelectorAll('[role="dialog"]:not([hidden]),dialog[open]').length,
title: dialogTitle,
},
selectorStates,
};
}, { selectors, includeBodyText: options.includeBodyText === true });
// URL and title always come from the Page, not the frame
return { ...domState, url: p.url(), title: await p.title() };
}
// ---------------------------------------------------------------------------
// Post-action summary
// ---------------------------------------------------------------------------
/** Lightweight page summary after an action. Returns ~50-150 tokens instead of full tree. */
export async function postActionSummary(p: Page, target?: Page | Frame): Promise<string> {
try {
const state = await captureCompactPageState(p, { target });
return formatCompactStateSummary(state);
} catch {
return "[summary unavailable]";
}
}
// ---------------------------------------------------------------------------
// Screenshot helpers
// ---------------------------------------------------------------------------
/**
* If either dimension of the image buffer exceeds MAX_SCREENSHOT_DIM,
* downscale proportionally using the browser's canvas (zero dependencies).
* Returns the original buffer unchanged if already within limits.
*/
export async function constrainScreenshot(
page: Page,
buffer: Buffer,
mimeType: string,
quality: number,
): Promise<Buffer> {
let width: number;
let height: number;
if (mimeType === "image/png") {
width = buffer.readUInt32BE(16);
height = buffer.readUInt32BE(20);
} else {
width = 0;
height = 0;
for (let i = 0; i < buffer.length - 8; i++) {
if (buffer[i] === 0xff && (buffer[i + 1] === 0xc0 || buffer[i + 1] === 0xc2)) {
height = buffer.readUInt16BE(i + 5);
width = buffer.readUInt16BE(i + 7);
break;
}
}
}
if (width <= MAX_SCREENSHOT_DIM && height <= MAX_SCREENSHOT_DIM) {
return buffer;
}
const b64 = buffer.toString("base64");
const result = await page.evaluate(
async ({ b64, mime, maxDim, q }) => {
const img = new Image();
await new Promise<void>((resolve, reject) => {
img.onload = () => resolve();
img.onerror = reject;
img.src = `data:${mime};base64,${b64}`;
});
const scale = Math.min(maxDim / img.width, maxDim / img.height);
const w = Math.round(img.width * scale);
const h = Math.round(img.height * scale);
const canvas = document.createElement("canvas");
canvas.width = w;
canvas.height = h;
const ctx = canvas.getContext("2d")!;
ctx.drawImage(img, 0, 0, w, h);
return canvas.toDataURL(mime, q / 100);
},
{ b64, mime: mimeType, maxDim: MAX_SCREENSHOT_DIM, q: quality },
);
const resizedB64 = result.split(",")[1];
return Buffer.from(resizedB64, "base64");
}
/** Capture a JPEG screenshot for error debugging. Returns base64 or null. */
export async function captureErrorScreenshot(p: Page | null): Promise<{ data: string; mimeType: string } | null> {
if (!p) return null;
try {
let buf = await p.screenshot({ type: "jpeg", quality: 60, scale: "css" });
buf = await constrainScreenshot(p, buf, "image/jpeg", 60);
return { data: buf.toString("base64"), mimeType: "image/jpeg" };
} catch {
return null;
}
}

View file

@ -0,0 +1,184 @@
/**
* browser-tools browser-side evaluate helpers
*
* Exports a single string constant `EVALUATE_HELPERS_SOURCE` containing an IIFE
* that attaches utility functions to `window.__pi`. This is injected into every
* new BrowserContext via `context.addInitScript()` so that `page.evaluate()`
* callbacks can reference `window.__pi.cssPath(el)` etc. instead of redeclaring
* the same functions inline.
*
* The `simpleHash` function uses the djb2 algorithm identical to
* `computeContentHash` / `computeStructuralSignature` in `core.js`.
*
* Functions provided (9):
* cssPath, simpleHash, isVisible, isEnabled, inferRole,
* accessibleName, isInteractiveEl, domPath, selectorHints
*/
export const EVALUATE_HELPERS_SOURCE = `(function() {
var pi = window.__pi = window.__pi || {};
// -----------------------------------------------------------------------
// 1. simpleHash — djb2 hash matching core.js computeContentHash
// -----------------------------------------------------------------------
pi.simpleHash = function simpleHash(str) {
if (!str) return "0";
var h = 5381;
for (var i = 0; i < str.length; i++) {
h = ((h << 5) - h + str.charCodeAt(i)) | 0;
}
return (h >>> 0).toString(16);
};
// -----------------------------------------------------------------------
// 2. isVisible
// -----------------------------------------------------------------------
pi.isVisible = function isVisible(el) {
var style = window.getComputedStyle(el);
if (style.display === "none" || style.visibility === "hidden") return false;
var rect = el.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
};
// -----------------------------------------------------------------------
// 3. isEnabled
// -----------------------------------------------------------------------
pi.isEnabled = function isEnabled(el) {
var disabledAttr = el.getAttribute("disabled") !== null;
var ariaDisabled = (el.getAttribute("aria-disabled") || "").toLowerCase() === "true";
return !disabledAttr && !ariaDisabled;
};
// -----------------------------------------------------------------------
// 4. inferRole
// -----------------------------------------------------------------------
pi.inferRole = function inferRole(el) {
var explicit = (el.getAttribute("role") || "").trim();
if (explicit) return explicit;
var tag = el.tagName.toLowerCase();
if (tag === "a" && el.getAttribute("href")) return "link";
if (tag === "button") return "button";
if (tag === "select") return "combobox";
if (tag === "textarea") return "textbox";
if (tag === "input") {
var type = (el.getAttribute("type") || "text").toLowerCase();
if (["button", "submit", "reset"].indexOf(type) !== -1) return "button";
if (type === "checkbox") return "checkbox";
if (type === "radio") return "radio";
if (type === "search") return "searchbox";
return "textbox";
}
return "";
};
// -----------------------------------------------------------------------
// 5. accessibleName
// -----------------------------------------------------------------------
pi.accessibleName = function accessibleName(el) {
var ariaLabel = el.getAttribute("aria-label");
if (ariaLabel && ariaLabel.trim()) return ariaLabel.trim();
var labelledBy = el.getAttribute("aria-labelledby");
if (labelledBy && labelledBy.trim()) {
var text = labelledBy.trim().split(/\\s+/).map(function(id) {
var ref = document.getElementById(id);
return ref ? (ref.textContent || "").trim() : "";
}).join(" ").trim();
if (text) return text;
}
var placeholder = el.getAttribute("placeholder");
if (placeholder && placeholder.trim()) return placeholder.trim();
var alt = el.getAttribute("alt");
if (alt && alt.trim()) return alt.trim();
var value = el.value;
if (value && typeof value === "string" && value.trim()) return value.trim().slice(0, 80);
return (el.textContent || "").trim().replace(/\\s+/g, " ").slice(0, 80);
};
// -----------------------------------------------------------------------
// 6. isInteractiveEl
// -----------------------------------------------------------------------
var interactiveRoles = {
button: 1, link: 1, textbox: 1, searchbox: 1, combobox: 1,
checkbox: 1, radio: 1, "switch": 1, menuitem: 1,
menuitemcheckbox: 1, menuitemradio: 1, tab: 1, option: 1,
slider: 1, spinbutton: 1
};
pi.isInteractiveEl = function isInteractiveEl(el) {
var tag = el.tagName.toLowerCase();
var role = pi.inferRole(el);
if (["button", "input", "select", "textarea", "summary", "option"].indexOf(tag) !== -1) return true;
if (tag === "a" && !!el.getAttribute("href")) return true;
if (interactiveRoles[role]) return true;
if (el.tabIndex >= 0) return true;
if (el.isContentEditable) return true;
return false;
};
// -----------------------------------------------------------------------
// 7. cssPath
// -----------------------------------------------------------------------
pi.cssPath = function cssPath(el) {
if (el.id) return "#" + CSS.escape(el.id);
var parts = [];
var current = el;
while (current && current.nodeType === Node.ELEMENT_NODE && current !== document.body) {
var tag = current.tagName.toLowerCase();
var part = tag;
var parent = current.parentElement;
if (parent) {
var siblings = Array.from(parent.children).filter(function(c) {
return c.tagName === current.tagName;
});
if (siblings.length > 1) {
var idx = siblings.indexOf(current) + 1;
part += ":nth-of-type(" + idx + ")";
}
}
parts.unshift(part);
current = current.parentElement;
}
return "body > " + parts.join(" > ");
};
// -----------------------------------------------------------------------
// 8. domPath
// -----------------------------------------------------------------------
pi.domPath = function domPath(el) {
var path = [];
var current = el;
while (current && current !== document.documentElement) {
var parent = current.parentElement;
if (!parent) break;
var idx = Array.from(parent.children).indexOf(current);
path.unshift(idx);
current = parent;
}
return path;
};
// -----------------------------------------------------------------------
// 9. selectorHints
// -----------------------------------------------------------------------
pi.selectorHints = function selectorHints(el) {
var hints = [];
if (el.id) hints.push("#" + CSS.escape(el.id));
var nameAttr = el.getAttribute("name");
if (nameAttr) hints.push(el.tagName.toLowerCase() + '[name="' + CSS.escape(nameAttr) + '"]');
var aria = el.getAttribute("aria-label");
if (aria) hints.push(el.tagName.toLowerCase() + '[aria-label="' + CSS.escape(aria) + '"]');
var placeholder = el.getAttribute("placeholder");
if (placeholder) hints.push(el.tagName.toLowerCase() + '[placeholder="' + CSS.escape(placeholder) + '"]');
var cls = Array.from(el.classList).slice(0, 2);
if (cls.length > 0) hints.push(el.tagName.toLowerCase() + "." + cls.map(function(c) { return CSS.escape(c); }).join("."));
hints.push(pi.cssPath(el));
var seen = {};
var unique = [];
for (var i = 0; i < hints.length; i++) {
if (!seen[hints[i]]) {
seen[hints[i]] = true;
unique.push(hints[i]);
}
}
return unique.slice(0, 6);
};
})();`;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,265 @@
/**
* browser-tools browser lifecycle management
*
* Manages the shared Browser + BrowserContext + Page singleton.
* Injects EVALUATE_HELPERS_SOURCE via context.addInitScript() so that
* page.evaluate() callbacks can reference window.__pi.* utilities.
*/
import type { Browser, BrowserContext, Frame, Page } from "playwright";
import path from "node:path";
import {
registryAddPage,
registryGetActive,
registryRemovePage,
registrySetActive,
} from "./core.js";
import {
getBrowser,
setBrowser,
getContext,
setContext,
pageRegistry,
getActiveFrame,
setActiveFrame,
logPusher,
getConsoleLogs,
getNetworkLogs,
getDialogLogs,
getPendingCriticalRequestsByPage,
setHarState,
resetAllState,
HAR_FILENAME,
type ConsoleEntry,
type NetworkEntry,
} from "./state.js";
import {
isCriticalResourceType,
updatePendingCriticalRequests,
ensureSessionStartedAt,
ensureSessionArtifactDir,
} from "./utils.js";
import { EVALUATE_HELPERS_SOURCE } from "./evaluate-helpers.js";
// ---------------------------------------------------------------------------
// Page event wiring
// ---------------------------------------------------------------------------
/** Attach all event listeners to a page. Called on initial page and new tabs. */
export function attachPageListeners(p: Page, pageId: number): void {
const pendingMap = getPendingCriticalRequestsByPage();
pendingMap.set(p, 0);
const consoleLogs = getConsoleLogs();
const networkLogs = getNetworkLogs();
const dialogLogs = getDialogLogs();
// Console messages
p.on("console", (msg) => {
logPusher(consoleLogs, {
type: msg.type(),
text: msg.text(),
timestamp: Date.now(),
url: p.url(),
pageId,
});
});
// Uncaught JS errors
p.on("pageerror", (err) => {
logPusher(consoleLogs, {
type: "pageerror",
text: err.message,
timestamp: Date.now(),
url: p.url(),
pageId,
});
});
// Network requests — start/completed/failed
p.on("request", (request) => {
if (isCriticalResourceType(request.resourceType())) {
updatePendingCriticalRequests(p, 1);
}
});
p.on("requestfinished", async (request) => {
if (isCriticalResourceType(request.resourceType())) {
updatePendingCriticalRequests(p, -1);
}
try {
const response = await request.response();
const status = response?.status() ?? null;
const entry: NetworkEntry = {
method: request.method(),
url: request.url(),
status,
resourceType: request.resourceType(),
timestamp: Date.now(),
failed: false,
pageId,
};
if (response && status !== null && status >= 400) {
try {
const body = await response.text();
entry.responseBody = body.slice(0, 2000);
} catch {}
}
logPusher(networkLogs, entry);
} catch {}
});
p.on("requestfailed", (request) => {
if (isCriticalResourceType(request.resourceType())) {
updatePendingCriticalRequests(p, -1);
}
logPusher(networkLogs, {
method: request.method(),
url: request.url(),
status: null,
resourceType: request.resourceType(),
timestamp: Date.now(),
failed: true,
failureText: request.failure()?.errorText ?? "Unknown failure",
pageId,
});
});
// Auto-handle JS dialogs (alert, confirm, prompt, beforeunload)
p.on("dialog", async (dialog) => {
logPusher(dialogLogs, {
type: dialog.type(),
message: dialog.message(),
timestamp: Date.now(),
url: p.url(),
defaultValue: dialog.defaultValue() || undefined,
accepted: true,
pageId,
});
// Auto-accept all dialogs to prevent page freezes
await dialog.accept().catch(() => {});
});
// Frame detach handler — clears activeFrame if the selected frame detaches
p.on("framedetached", (frame) => {
if (getActiveFrame() === frame) setActiveFrame(null);
});
// Page close handler — removes page from registry and handles active fallback
p.on("close", () => {
try {
registryRemovePage(pageRegistry, pageId);
} catch {
// Page already removed (e.g. during closeBrowser)
}
});
}
// ---------------------------------------------------------------------------
// Browser lifecycle
// ---------------------------------------------------------------------------
export async function ensureBrowser(): Promise<{ browser: Browser; context: BrowserContext; page: Page }> {
const existingBrowser = getBrowser();
const existingContext = getContext();
if (existingBrowser && existingContext) {
return { browser: existingBrowser, context: existingContext, page: getActivePage() };
}
const startedAt = ensureSessionStartedAt();
const artifactDir = await ensureSessionArtifactDir();
const sessionHarPath = path.join(artifactDir, HAR_FILENAME);
setHarState({
enabled: true,
configuredAtContextCreation: true,
path: sessionHarPath,
exportCount: 0,
lastExportedPath: null,
lastExportedAt: null,
});
// Lazy import so playwright is only loaded when actually needed
const { chromium } = await import("playwright");
const launchOptions: Record<string, unknown> = { headless: false };
const customPath = process.env.BROWSER_PATH;
if (customPath) launchOptions.executablePath = customPath;
const browser = await chromium.launch(launchOptions);
const context = await browser.newContext({
deviceScaleFactor: 2,
viewport: { width: 1280, height: 800 },
recordHar: {
path: sessionHarPath,
mode: "minimal",
content: "omit",
},
});
// Inject shared browser-side utilities into every new page/frame
await context.addInitScript(EVALUATE_HELPERS_SOURCE);
setBrowser(browser);
setContext(context);
const initialPage = await context.newPage();
const pageEntry = registryAddPage(pageRegistry, {
page: initialPage,
title: await initialPage.title().catch(() => ""),
url: initialPage.url(),
opener: null,
});
registrySetActive(pageRegistry, pageEntry.id);
attachPageListeners(initialPage, pageEntry.id);
// Register new pages (popups, target="_blank", window.open) but do NOT auto-switch
context.on("page", (newPage) => {
// Determine opener page ID — find which registry page opened this one
const openerPage = newPage.opener();
let openerId: number | null = null;
if (openerPage) {
const openerEntry = pageRegistry.pages.find((e: any) => e.page === openerPage);
if (openerEntry) openerId = openerEntry.id;
}
const entry = registryAddPage(pageRegistry, {
page: newPage,
title: "",
url: newPage.url(),
opener: openerId,
});
attachPageListeners(newPage, entry.id);
// Update title once loaded
newPage.waitForLoadState("domcontentloaded", { timeout: 5000 })
.then(() => newPage.title())
.then((title) => { entry.title = title; })
.catch(() => {});
});
return { browser, context, page: getActivePage() };
}
/** Get the currently active page from the registry. */
export function getActivePage(): Page {
return registryGetActive(pageRegistry).page;
}
/** Get the active target — returns the selected frame if one is active, otherwise the active page. */
export function getActiveTarget(): Page | Frame {
return getActiveFrame() ?? getActivePage();
}
/** Safe accessor for error handling — returns the active page or null if unavailable. */
export function getActivePageOrNull(): Page | null {
try {
return getActivePage();
} catch {
return null;
}
}
export async function closeBrowser(): Promise<void> {
const browser = getBrowser();
if (browser) {
await browser.close().catch(() => {});
}
resetAllState();
}

View file

@ -0,0 +1,264 @@
/**
* browser-tools ref snapshot and resolution
*
* Builds deterministic element snapshots and resolves ref targets.
* Uses window.__pi.* utilities injected via addInitScript (from
* evaluate-helpers.ts) instead of redeclaring functions inline.
*
* Functions kept inline (not shared/duplicated):
* - matchesMode, computeNearestHeading, computeFormOwnership
*/
import type { Frame, Page } from "playwright";
import type { RefNode } from "./state.js";
import { getSnapshotModeConfig } from "./core.js";
// ---------------------------------------------------------------------------
// buildRefSnapshot
// ---------------------------------------------------------------------------
export async function buildRefSnapshot(
target: Page | Frame,
options: { selector?: string; interactiveOnly: boolean; limit: number; mode?: string },
): Promise<Array<Omit<RefNode, "ref">>> {
// Resolve mode config in Node context and serialize it as plain data for the evaluate callback
const modeConfig = options.mode ? getSnapshotModeConfig(options.mode) : null;
return await target.evaluate(({ selector, interactiveOnly, limit, modeConfig: mc }) => {
const root = selector ? document.querySelector(selector) : document.body;
if (!root) {
throw new Error(`Selector scope not found: ${selector}`);
}
// Use injected window.__pi utilities
const pi = (window as any).__pi;
const simpleHash = pi.simpleHash;
const isVisible = pi.isVisible;
const isEnabled = pi.isEnabled;
const inferRole = pi.inferRole;
const accessibleName = pi.accessibleName;
const isInteractiveEl = pi.isInteractiveEl;
const cssPath = pi.cssPath;
const domPath = pi.domPath;
const selectorHints = pi.selectorHints;
// Mode-based element matching — used when a snapshot mode config is provided
const matchesMode = (el: Element, cfg: { tags: string[]; roles: string[]; selectors: string[]; ariaAttributes: string[] }): boolean => {
const tag = el.tagName.toLowerCase();
if (cfg.tags.length > 0 && cfg.tags.includes(tag)) return true;
const role = inferRole(el);
if (cfg.roles.length > 0 && cfg.roles.includes(role)) return true;
for (const sel of cfg.selectors) {
try { if (el.matches(sel)) return true; } catch { /* invalid selector, skip */ }
}
for (const attr of cfg.ariaAttributes) {
if (el.hasAttribute(attr)) return true;
}
return false;
};
let elements = Array.from(root.querySelectorAll("*"));
if (mc) {
// Mode takes precedence over interactiveOnly
if (mc.visibleOnly) {
// visible_only mode: include all elements that are visible
elements = elements.filter((el) => isVisible(el));
} else if (mc.useInteractiveFilter) {
// interactive mode: reuse existing isInteractiveEl
elements = elements.filter((el) => isInteractiveEl(el));
} else if (mc.containerExpand) {
// Container-expanding modes (dialog, errors): match containers, then include
// all interactive children of those containers, plus the containers themselves
const containers: Element[] = [];
const directMatches: Element[] = [];
for (const el of elements) {
if (matchesMode(el, mc)) {
// Check if this is a container element (has children)
const childEls = el.querySelectorAll("*");
if (childEls.length > 0) {
containers.push(el);
} else {
directMatches.push(el);
}
}
}
// Collect container elements + all interactive children inside containers
const result = new Set<Element>(directMatches);
for (const container of containers) {
result.add(container);
const children = Array.from(container.querySelectorAll("*"));
for (const child of children) {
if (isInteractiveEl(child)) result.add(child);
}
}
elements = Array.from(result);
} else {
// Standard mode filtering by tag/role/selector/ariaAttribute
elements = elements.filter((el) => matchesMode(el, mc));
}
} else if (!interactiveOnly) {
if (root instanceof Element) elements.unshift(root);
} else {
elements = elements.filter((el) => isInteractiveEl(el));
}
const seen = new Set<Element>();
const unique = elements.filter((el) => {
if (seen.has(el)) return false;
seen.add(el);
return true;
});
// Fingerprint helpers — computed for each element in the snapshot
const computeNearestHeading = (el: Element): string => {
const headingTags = new Set(["H1", "H2", "H3", "H4", "H5", "H6"]);
// Walk up ancestors looking for heading or preceding-sibling heading
let current: Element | null = el;
while (current && current !== document.body) {
// Check preceding siblings of current
let sib: Element | null = current.previousElementSibling;
while (sib) {
if (headingTags.has(sib.tagName) || sib.getAttribute("role") === "heading") {
return (sib.textContent || "").trim().replace(/\s+/g, " ").slice(0, 80);
}
sib = sib.previousElementSibling;
}
// Check if the parent itself is a heading (unlikely but possible)
const parent = current.parentElement;
if (parent && (headingTags.has(parent.tagName) || parent.getAttribute("role") === "heading")) {
return (parent.textContent || "").trim().replace(/\s+/g, " ").slice(0, 80);
}
current = parent;
}
return "";
};
const computeFormOwnership = (el: Element): string => {
// Check form attribute (explicit form association)
const formAttr = el.getAttribute("form");
if (formAttr) return formAttr;
// Walk up ancestors looking for <form>
let current: Element | null = el.parentElement;
while (current && current !== document.body) {
if (current.tagName === "FORM") {
return (current as HTMLFormElement).id || (current as HTMLFormElement).name || "form";
}
current = current.parentElement;
}
return "";
};
return unique.slice(0, limit).map((el) => {
const tag = el.tagName.toLowerCase();
const role = inferRole(el);
const textContent = (el.textContent || "").trim().replace(/\s+/g, " ").slice(0, 200);
const childTags = Array.from(el.children).map((c) => c.tagName.toLowerCase());
return {
tag,
role,
name: accessibleName(el),
selectorHints: selectorHints(el),
isVisible: isVisible(el),
isEnabled: isEnabled(el),
xpathOrPath: cssPath(el),
href: el.getAttribute("href") || undefined,
type: el.getAttribute("type") || undefined,
path: domPath(el),
contentHash: simpleHash(textContent),
structuralSignature: simpleHash(`${tag}|${role}|${childTags.join(",")}`),
nearestHeading: computeNearestHeading(el),
formOwnership: computeFormOwnership(el),
};
});
}, { ...options, modeConfig });
}
// ---------------------------------------------------------------------------
// resolveRefTarget
// ---------------------------------------------------------------------------
export async function resolveRefTarget(
target: Page | Frame,
node: RefNode,
): Promise<{ ok: true; selector: string } | { ok: false; reason: string }> {
return await target.evaluate((refNode) => {
// Use injected window.__pi utilities
const pi = (window as any).__pi;
const cssPath = pi.cssPath;
const simpleHash = pi.simpleHash;
const byPath = (): Element | null => {
let current: Element | null = document.documentElement;
for (const idx of refNode.path || []) {
if (!current || idx < 0 || idx >= current.children.length) return null;
current = current.children[idx] as Element;
}
return current;
};
const nodeName = (el: Element): string => {
return (
el.getAttribute("aria-label")?.trim() ||
(el as HTMLInputElement).value?.trim() ||
el.getAttribute("placeholder")?.trim() ||
(el.textContent || "").trim().replace(/\s+/g, " ").slice(0, 80)
);
};
// Tier 1: path-based resolution
const pathEl = byPath();
if (pathEl && pathEl.tagName.toLowerCase() === refNode.tag) {
return { ok: true as const, selector: cssPath(pathEl) };
}
// Tier 2: selector hints
for (const hint of refNode.selectorHints || []) {
try {
const el = document.querySelector(hint);
if (!el) continue;
if (el.tagName.toLowerCase() !== refNode.tag) continue;
return { ok: true as const, selector: cssPath(el) };
} catch {
// ignore malformed selector hint
}
}
// Tier 3: role + name match
const candidates = Array.from(document.querySelectorAll(refNode.tag));
const matchTarget = candidates.find((el) => {
const role = el.getAttribute("role") || "";
const name = nodeName(el);
const roleMatch = !refNode.role || role === refNode.role;
const nameMatch = !!refNode.name && name.toLowerCase() === refNode.name.toLowerCase();
return roleMatch && nameMatch;
});
if (matchTarget) {
return { ok: true as const, selector: cssPath(matchTarget) };
}
// Tier 4: structural signature + content hash fingerprint matching
if (refNode.contentHash && refNode.structuralSignature) {
const fpMatches: Element[] = [];
for (const candidate of candidates) {
const tag = candidate.tagName.toLowerCase();
const role = candidate.getAttribute("role") || "";
const textContent = (candidate.textContent || "").trim().replace(/\s+/g, " ").slice(0, 200);
const childTags = Array.from(candidate.children).map((c) => c.tagName.toLowerCase());
const candidateContentHash = simpleHash(textContent);
const candidateStructSig = simpleHash(`${tag}|${role}|${childTags.join(",")}`);
if (candidateContentHash === refNode.contentHash && candidateStructSig === refNode.structuralSignature) {
fpMatches.push(candidate);
}
}
if (fpMatches.length === 1) {
return { ok: true as const, selector: cssPath(fpMatches[0]) };
}
if (fpMatches.length > 1) {
return { ok: false as const, reason: "multiple fingerprint matches — ambiguous" };
}
}
return { ok: false as const, reason: "element not found in current DOM" };
}, node);
}

View file

@ -0,0 +1,140 @@
/**
* browser-tools DOM settle logic
*
* Adaptive settling after browser actions. Polls for DOM quiet (mutation
* counter stable, no pending critical requests, optional focus stability)
* before returning control.
*/
import type { Frame, Page } from "playwright";
import type { AdaptiveSettleDetails, AdaptiveSettleOptions } from "./state.js";
import { getPendingCriticalRequests } from "./utils.js";
// ---------------------------------------------------------------------------
// Mutation counter (installed in-page via evaluate)
// ---------------------------------------------------------------------------
export async function ensureMutationCounter(p: Page): Promise<void> {
await p.evaluate(() => {
const key = "__piMutationCounter" as const;
const installedKey = "__piMutationCounterInstalled" as const;
const w = window as unknown as Record<string, unknown>;
if (typeof w[key] !== "number") w[key] = 0;
if (w[installedKey]) return;
const observer = new MutationObserver(() => {
const current = typeof w[key] === "number" ? (w[key] as number) : 0;
w[key] = current + 1;
});
observer.observe(document.documentElement || document.body, {
subtree: true,
childList: true,
attributes: true,
characterData: true,
});
w[installedKey] = true;
});
}
export async function readMutationCounter(p: Page): Promise<number> {
try {
return await p.evaluate(() => {
const w = window as unknown as Record<string, unknown>;
const value = w.__piMutationCounter;
return typeof value === "number" ? value : 0;
});
} catch {
return 0;
}
}
// ---------------------------------------------------------------------------
// Focus descriptor (for focus-stability checks)
// ---------------------------------------------------------------------------
export async function readFocusedDescriptor(target: Page | Frame): Promise<string> {
try {
return await target.evaluate(() => {
const el = document.activeElement as HTMLElement | null;
if (!el || el === document.body || el === document.documentElement) return "";
const id = el.id ? `#${el.id}` : "";
const role = el.getAttribute("role") || "";
const name = (el.getAttribute("aria-label") || el.getAttribute("name") || "").trim();
return `${el.tagName.toLowerCase()}${id}|${role}|${name}`;
});
} catch {
return "";
}
}
// ---------------------------------------------------------------------------
// Adaptive settle
// ---------------------------------------------------------------------------
export async function settleAfterActionAdaptive(
p: Page,
opts: AdaptiveSettleOptions = {},
): Promise<AdaptiveSettleDetails> {
const timeoutMs = Math.max(150, opts.timeoutMs ?? 500);
const pollMs = Math.min(100, Math.max(20, opts.pollMs ?? 40));
const quietWindowMs = Math.max(60, opts.quietWindowMs ?? 100);
const checkFocus = opts.checkFocusStability ?? false;
const startedAt = Date.now();
let polls = 0;
let sawUrlChange = false;
let lastActivityAt = startedAt;
let previousUrl = p.url();
await ensureMutationCounter(p).catch(() => {});
let previousMutationCount = await readMutationCounter(p);
let previousFocus = checkFocus ? await readFocusedDescriptor(p) : "";
while (Date.now() - startedAt < timeoutMs) {
await new Promise((resolve) => setTimeout(resolve, pollMs));
polls += 1;
const now = Date.now();
const currentUrl = p.url();
if (currentUrl !== previousUrl) {
sawUrlChange = true;
previousUrl = currentUrl;
lastActivityAt = now;
}
const currentMutationCount = await readMutationCounter(p);
if (currentMutationCount > previousMutationCount) {
previousMutationCount = currentMutationCount;
lastActivityAt = now;
}
if (checkFocus) {
const currentFocus = await readFocusedDescriptor(p);
if (currentFocus !== previousFocus) {
previousFocus = currentFocus;
lastActivityAt = now;
}
}
const pendingCritical = getPendingCriticalRequests(p);
if (pendingCritical > 0) {
lastActivityAt = now;
continue;
}
if (now - lastActivityAt >= quietWindowMs) {
return {
settleMode: "adaptive",
settleMs: now - startedAt,
settleReason: sawUrlChange ? "url_changed_then_quiet" : "dom_quiet",
settlePolls: polls,
};
}
}
return {
settleMode: "adaptive",
settleMs: Date.now() - startedAt,
settleReason: "timeout_fallback",
settlePolls: polls,
};
}

View file

@ -0,0 +1,409 @@
/**
* browser-tools shared mutable state
*
* All mutable state lives behind accessor functions (get/set) so that
* jiti-transpiled modules see updates reliably. ES module live bindings
* (`export let`) are not guaranteed to work under jiti's CJS shim layer.
*
* State is initialized to sensible defaults and can be bulk-reset via
* `resetAllState()` (called by closeBrowser).
*/
import type { Browser, BrowserContext, Frame, Page } from "playwright";
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import path from "node:path";
import {
createActionTimeline,
createBoundedLogPusher,
createPageRegistry,
} from "./core.js";
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
export const ARTIFACT_ROOT = path.resolve(process.cwd(), ".artifacts", "browser");
export const HAR_FILENAME = "session.har";
// ---------------------------------------------------------------------------
// Type / interface definitions
// ---------------------------------------------------------------------------
export interface ConsoleEntry {
type: string;
text: string;
timestamp: number;
url: string;
pageId: number;
}
export interface NetworkEntry {
method: string;
url: string;
status: number | null;
resourceType: string;
timestamp: number;
failed: boolean;
failureText?: string;
responseBody?: string;
pageId: number;
}
export interface DialogEntry {
type: string;
message: string;
timestamp: number;
url: string;
defaultValue?: string;
accepted: boolean;
pageId: number;
}
export interface RefNode {
ref: string;
tag: string;
role: string;
name: string;
selectorHints: string[];
isVisible: boolean;
isEnabled: boolean;
xpathOrPath: string;
href?: string;
type?: string;
path: number[];
contentHash?: string;
structuralSignature?: string;
nearestHeading?: string;
formOwnership?: string;
}
export interface RefMetadata {
url: string;
timestamp: number;
selectorScope?: string;
interactiveOnly: boolean;
limit: number;
version: number;
frameContext?: string;
mode?: string;
}
export interface CompactSelectorState {
exists: boolean;
visible: boolean;
value: string;
checked: boolean | null;
text: string;
}
export interface CompactPageState {
url: string;
title: string;
focus: string;
headings: string[];
bodyText: string;
counts: {
landmarks: number;
buttons: number;
links: number;
inputs: number;
};
dialog: {
count: number;
title: string;
};
selectorStates: Record<string, CompactSelectorState>;
}
export interface TraceSessionState {
startedAt: number;
name: string;
title?: string;
path?: string;
}
export interface HarState {
enabled: boolean;
configuredAtContextCreation: boolean;
path: string | null;
exportCount: number;
lastExportedPath: string | null;
lastExportedAt: number | null;
}
export interface ClickTargetStateSnapshot {
exists: boolean;
ariaExpanded: string | null;
ariaPressed: string | null;
ariaSelected: string | null;
open: boolean | null;
}
export interface VerificationCheck {
name: string;
passed: boolean;
value?: unknown;
expected?: unknown;
}
export interface VerificationResult {
verified: boolean;
checks: VerificationCheck[];
verificationSummary: string;
retryHint?: string;
}
export interface AdaptiveSettleOptions {
timeoutMs?: number;
pollMs?: number;
quietWindowMs?: number;
checkFocusStability?: boolean;
}
export interface AdaptiveSettleDetails {
settleMode: "adaptive";
settleMs: number;
settleReason: "dom_quiet" | "url_changed_then_quiet" | "timeout_fallback";
settlePolls: number;
}
export interface ParsedRefSpec {
key: string;
version: number | null;
display: string;
}
export interface BrowserAssertionCheckInput {
kind: string;
selector?: string;
text?: string;
value?: string;
checked?: boolean;
sinceActionId?: number;
}
// ---------------------------------------------------------------------------
// Mutable state variables — accessed only via get/set functions
// ---------------------------------------------------------------------------
// 1. browser
let _browser: Browser | null = null;
export function getBrowser(): Browser | null { return _browser; }
export function setBrowser(b: Browser | null): void { _browser = b; }
// 2. context
let _context: BrowserContext | null = null;
export function getContext(): BrowserContext | null { return _context; }
export function setContext(c: BrowserContext | null): void { _context = c; }
// 3. pageRegistry (object with internal state — export the instance directly + getter)
export const pageRegistry = createPageRegistry();
export function getPageRegistry() { return pageRegistry; }
// 4. activeFrame
let _activeFrame: Frame | null = null;
export function getActiveFrame(): Frame | null { return _activeFrame; }
export function setActiveFrame(f: Frame | null): void { _activeFrame = f; }
// 5. logPusher (bounded log push function — stateless utility, export directly)
export const logPusher = createBoundedLogPusher(1000);
// 6. consoleLogs
let _consoleLogs: ConsoleEntry[] = [];
export function getConsoleLogs(): ConsoleEntry[] { return _consoleLogs; }
export function setConsoleLogs(logs: ConsoleEntry[]): void { _consoleLogs = logs; }
// 7. networkLogs
let _networkLogs: NetworkEntry[] = [];
export function getNetworkLogs(): NetworkEntry[] { return _networkLogs; }
export function setNetworkLogs(logs: NetworkEntry[]): void { _networkLogs = logs; }
// 8. dialogLogs
let _dialogLogs: DialogEntry[] = [];
export function getDialogLogs(): DialogEntry[] { return _dialogLogs; }
export function setDialogLogs(logs: DialogEntry[]): void { _dialogLogs = logs; }
// 9. pendingCriticalRequestsByPage (WeakMap — can't be reassigned, just cleared by replacing)
let _pendingCriticalRequestsByPage = new WeakMap<Page, number>();
export function getPendingCriticalRequestsByPage(): WeakMap<Page, number> { return _pendingCriticalRequestsByPage; }
export function resetPendingCriticalRequestsByPage(): void { _pendingCriticalRequestsByPage = new WeakMap(); }
// 10. currentRefMap
let _currentRefMap: Record<string, RefNode> = {};
export function getCurrentRefMap(): Record<string, RefNode> { return _currentRefMap; }
export function setCurrentRefMap(m: Record<string, RefNode>): void { _currentRefMap = m; }
// 11. refVersion
let _refVersion = 0;
export function getRefVersion(): number { return _refVersion; }
export function setRefVersion(v: number): void { _refVersion = v; }
// 12. refMetadata
let _refMetadata: RefMetadata | null = null;
export function getRefMetadata(): RefMetadata | null { return _refMetadata; }
export function setRefMetadata(m: RefMetadata | null): void { _refMetadata = m; }
// 13. actionTimeline (object with internal state)
export const actionTimeline = createActionTimeline(60);
export function getActionTimeline() { return actionTimeline; }
// 14. lastActionBeforeState
let _lastActionBeforeState: CompactPageState | null = null;
export function getLastActionBeforeState(): CompactPageState | null { return _lastActionBeforeState; }
export function setLastActionBeforeState(s: CompactPageState | null): void { _lastActionBeforeState = s; }
// 15. lastActionAfterState
let _lastActionAfterState: CompactPageState | null = null;
export function getLastActionAfterState(): CompactPageState | null { return _lastActionAfterState; }
export function setLastActionAfterState(s: CompactPageState | null): void { _lastActionAfterState = s; }
// 16. sessionStartedAt
let _sessionStartedAt: number | null = null;
export function getSessionStartedAt(): number | null { return _sessionStartedAt; }
export function setSessionStartedAt(t: number | null): void { _sessionStartedAt = t; }
// 17. sessionArtifactDir
let _sessionArtifactDir: string | null = null;
export function getSessionArtifactDir(): string | null { return _sessionArtifactDir; }
export function setSessionArtifactDir(d: string | null): void { _sessionArtifactDir = d; }
// 18a. activeTraceSession
let _activeTraceSession: TraceSessionState | null = null;
export function getActiveTraceSession(): TraceSessionState | null { return _activeTraceSession; }
export function setActiveTraceSession(t: TraceSessionState | null): void { _activeTraceSession = t; }
// 18b. harState
const DEFAULT_HAR_STATE: HarState = {
enabled: false,
configuredAtContextCreation: false,
path: null,
exportCount: 0,
lastExportedPath: null,
lastExportedAt: null,
};
let _harState: HarState = { ...DEFAULT_HAR_STATE };
export function getHarState(): HarState { return _harState; }
export function setHarState(h: HarState): void { _harState = h; }
// ---------------------------------------------------------------------------
// resetAllState — mirrors closeBrowser()'s reset logic
// ---------------------------------------------------------------------------
export function resetAllState(): void {
_browser = null;
_context = null;
pageRegistry.pages = [];
pageRegistry.activePageId = null;
pageRegistry.nextId = 1;
_activeFrame = null;
_consoleLogs = [];
_networkLogs = [];
_dialogLogs = [];
_pendingCriticalRequestsByPage = new WeakMap();
_currentRefMap = {};
_refVersion = 0;
_refMetadata = null;
_lastActionBeforeState = null;
_lastActionAfterState = null;
actionTimeline.entries = [];
actionTimeline.nextId = 1;
_sessionStartedAt = null;
_sessionArtifactDir = null;
_activeTraceSession = null;
_harState = { ...DEFAULT_HAR_STATE };
}
// ---------------------------------------------------------------------------
// ToolDeps — interface that tool registration functions consume
// ---------------------------------------------------------------------------
/**
* Bundles the infrastructure functions that tool registration files need.
* Built once in the index.ts orchestrator and passed to each register* function.
*/
export interface ToolDeps {
// Lifecycle
ensureBrowser: () => Promise<{ browser: Browser; context: BrowserContext; page: Page }>;
closeBrowser: () => Promise<void>;
getActivePage: () => Page;
getActiveTarget: () => Page | Frame;
getActivePageOrNull: () => Page | null;
// Page event wiring
attachPageListeners: (p: Page, pageId: number) => void;
// Capture & summary
captureCompactPageState: (
p: Page,
options?: { selectors?: string[]; includeBodyText?: boolean; target?: Page | Frame }
) => Promise<CompactPageState>;
postActionSummary: (p: Page, target?: Page | Frame) => Promise<string>;
formatCompactStateSummary: (state: CompactPageState) => string;
constrainScreenshot: (page: Page, buffer: Buffer, mimeType: string, quality: number) => Promise<Buffer>;
captureErrorScreenshot: (p: Page | null) => Promise<{ data: string; mimeType: string } | null>;
getRecentErrors: (pageUrl: string) => string;
// Settle
settleAfterActionAdaptive: (p: Page, opts?: AdaptiveSettleOptions) => Promise<AdaptiveSettleDetails>;
ensureMutationCounter: (p: Page) => Promise<void>;
// Refs
buildRefSnapshot: (
target: Page | Frame,
options: { selector?: string; interactiveOnly: boolean; limit: number; mode?: string }
) => Promise<Array<Omit<RefNode, "ref">>>;
resolveRefTarget: (
target: Page | Frame,
node: RefNode
) => Promise<{ ok: true; selector: string } | { ok: false; reason: string }>;
parseRef: (input: string) => ParsedRefSpec;
formatVersionedRef: (version: number, key: string) => string;
staleRefGuidance: (refDisplay: string, reason: string) => string;
// Action tracking
beginTrackedAction: (tool: string, params: unknown, beforeUrl: string) => ReturnType<typeof import("./core.js").beginAction>;
finishTrackedAction: (
actionId: number,
updates: {
status: "success" | "error";
afterUrl?: string;
verificationSummary?: string;
warningSummary?: string;
diffSummary?: string;
changed?: boolean;
error?: string;
beforeState?: CompactPageState;
afterState?: CompactPageState;
}
) => ReturnType<typeof import("./core.js").finishAction>;
// Utilities (forwarded from utils.ts)
truncateText: (text: string) => string;
verificationFromChecks: (checks: VerificationCheck[], retryHint?: string) => VerificationResult;
verificationLine: (verification: VerificationResult) => string;
collectAssertionState: (
p: Page,
checks: BrowserAssertionCheckInput[],
target?: Page | Frame
) => Promise<Record<string, unknown>>;
formatAssertionText: (result: ReturnType<typeof import("./core.js").evaluateAssertionChecks>) => string;
formatDiffText: (diff: ReturnType<typeof import("./core.js").diffCompactStates>) => string;
getUrlHash: (url: string) => string;
countOpenDialogs: (target: Page | Frame) => Promise<number>;
captureClickTargetState: (target: Page | Frame, selector: string) => Promise<ClickTargetStateSnapshot>;
readInputLikeValue: (target: Page | Frame, selector?: string) => Promise<string | null>;
firstErrorLine: (err: unknown) => string;
captureAccessibilityMarkdown: (selector?: string) => Promise<{ snapshot: string; scope: string; source: string }>;
resolveAccessibilityScope: (selector?: string) => Promise<{ selector?: string; scope: string; source: string }>;
getLivePagesSnapshot: () => Promise<ReturnType<typeof import("./core.js").registryListPages>>;
getSinceTimestamp: (sinceActionId?: number) => number;
getConsoleEntriesSince: (sinceActionId?: number) => ConsoleEntry[];
getNetworkEntriesSince: (sinceActionId?: number) => NetworkEntry[];
writeArtifactFile: (filePath: string, content: string | Uint8Array) => Promise<{ path: string; bytes: number }>;
copyArtifactFile: (sourcePath: string, destinationPath: string) => Promise<{ path: string; bytes: number }>;
ensureSessionArtifactDir: () => Promise<string>;
buildSessionArtifactPath: (filename: string) => string;
getSessionArtifactMetadata: () => Record<string, unknown>;
sanitizeArtifactName: (value: string, fallback: string) => string;
formatArtifactTimestamp: (timestamp: number) => string;
}

View file

@ -0,0 +1,342 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@gsd/pi-ai";
import {
diffCompactStates,
evaluateAssertionChecks,
findAction,
runBatchSteps,
validateWaitParams,
createRegionStableScript,
parseThreshold,
includesNeedle,
} from "../core.js";
import type { ToolDeps, CompactPageState } from "../state.js";
import {
getConsoleLogs,
getCurrentRefMap,
getLastActionBeforeState,
getLastActionAfterState,
setLastActionBeforeState,
setLastActionAfterState,
getActionTimeline,
} from "../state.js";
export function registerAssertionTools(pi: ExtensionAPI, deps: ToolDeps): void {
// -------------------------------------------------------------------------
// browser_assert
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_assert",
label: "Browser Assert",
description:
"Run one or more explicit browser assertions and return structured PASS/FAIL results. Prefer this for verification instead of inferring success from prose summaries.",
promptGuidelines: [
"Prefer browser_assert for browser verification instead of inferring success from summaries.",
"When finishing UI work, explicit browser assertions should usually be the final verification step.",
"Use checks for URL, text, selector state, value, and browser diagnostics whenever those signals are available.",
],
parameters: Type.Object({
checks: Type.Array(
Type.Object({
kind: Type.String({ description: "Assertion kind, e.g. url_contains, text_visible, selector_visible, value_equals, no_console_errors, no_failed_requests, request_url_seen, response_status, console_message_matches, network_count, console_count, no_console_errors_since, no_failed_requests_since" }),
selector: Type.Optional(Type.String()),
text: Type.Optional(Type.String()),
value: Type.Optional(Type.String()),
checked: Type.Optional(Type.Boolean()),
sinceActionId: Type.Optional(Type.Number()),
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const state = await deps.collectAssertionState(p, params.checks, target);
const result = evaluateAssertionChecks({ checks: params.checks, state });
return {
content: [{ type: "text", text: `Browser assert\n\n${deps.formatAssertionText(result)}` }],
details: { ...result, url: state.url, title: state.title },
isError: !result.verified,
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Browser assert failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_diff
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_diff",
label: "Browser Diff",
description:
"Report meaningful browser-state changes. By default compares the current page to the most recent tracked action state. Use this to understand what changed after a click, submit, or navigation.",
promptGuidelines: [
"Use browser_diff after ambiguous or high-impact actions when you need to know what changed.",
"Prefer browser_diff over requesting a broad new page inspection when the question is change detection.",
],
parameters: Type.Object({
sinceActionId: Type.Optional(Type.Number({ description: "Optional action id to diff against. Uses that action's stored after-state when available." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const current = await deps.captureCompactPageState(p, { includeBodyText: true, target });
let baseline: CompactPageState | null = null;
if (params.sinceActionId) {
const actionTimeline = getActionTimeline();
const action = findAction(actionTimeline, params.sinceActionId) as { afterState?: CompactPageState } | null;
baseline = action?.afterState ?? null;
}
if (!baseline) {
baseline = getLastActionAfterState() ?? getLastActionBeforeState();
}
if (!baseline) {
return {
content: [{ type: "text", text: "Browser diff unavailable: no prior tracked browser state exists yet." }],
details: { changed: false, changes: [], summary: "No prior tracked state" },
isError: true,
};
}
const diff = diffCompactStates(baseline, current);
return {
content: [{ type: "text", text: `Browser diff\n\n${deps.formatDiffText(diff)}` }],
details: diff,
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Browser diff failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_batch
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_batch",
label: "Browser Batch",
description:
"Execute multiple explicit browser steps in one call. Prefer this for obvious action sequences like click → type → wait → assert to reduce round trips and token usage.",
promptGuidelines: [
"If the next 2-5 browser actions are obvious and low-risk, prefer browser_batch over multiple tiny browser calls.",
"Use browser_batch for explicit sequences like click → type → submit → wait → assert.",
"Keep browser_batch steps explicit; do not use it as a speculative planner.",
],
parameters: Type.Object({
steps: Type.Array(
Type.Object({
action: StringEnum(["navigate", "click", "type", "key_press", "wait_for", "assert", "click_ref", "fill_ref"] as const),
selector: Type.Optional(Type.String()),
text: Type.Optional(Type.String()),
url: Type.Optional(Type.String()),
key: Type.Optional(Type.String()),
condition: Type.Optional(Type.String()),
value: Type.Optional(Type.String()),
threshold: Type.Optional(Type.String()),
timeout: Type.Optional(Type.Number()),
clearFirst: Type.Optional(Type.Boolean()),
submit: Type.Optional(Type.Boolean()),
ref: Type.Optional(Type.String()),
checks: Type.Optional(Type.Array(Type.Object({
kind: Type.String({ description: "Assertion kind, e.g. url_contains, text_visible, selector_visible, value_equals, no_console_errors, no_failed_requests, request_url_seen, response_status, console_message_matches, network_count, console_count, no_console_errors_since, no_failed_requests_since" }),
selector: Type.Optional(Type.String()),
text: Type.Optional(Type.String()),
value: Type.Optional(Type.String()),
checked: Type.Optional(Type.Boolean()),
sinceActionId: Type.Optional(Type.Number()),
}))),
})
),
stopOnFailure: Type.Optional(Type.Boolean({ description: "Stop after the first failing step (default: true)." })),
finalSummaryOnly: Type.Optional(Type.Boolean({ description: "Return only the compact final batch summary in content while keeping step results in details." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
let actionId: number | null = null;
let beforeState: CompactPageState | null = null;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
beforeState = await deps.captureCompactPageState(p, { includeBodyText: true, target });
actionId = deps.beginTrackedAction("browser_batch", params, beforeState.url).id;
const executeStep = async (step: any, index: number) => {
const stepTarget = deps.getActiveTarget();
try {
switch (step.action) {
case "navigate": {
await p.goto(step.url, { waitUntil: "domcontentloaded", timeout: 30000 });
await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
return { ok: true, action: step.action, url: p.url() };
}
case "click": {
await stepTarget.locator(step.selector).first().click({ timeout: step.timeout ?? 8000 });
await deps.settleAfterActionAdaptive(p);
return { ok: true, action: step.action, selector: step.selector, url: p.url() };
}
case "type": {
if (step.clearFirst) {
await stepTarget.locator(step.selector).first().fill("");
}
await stepTarget.locator(step.selector).first().fill(step.text ?? "", { timeout: step.timeout ?? 8000 });
if (step.submit) await p.keyboard.press("Enter");
await deps.settleAfterActionAdaptive(p);
return { ok: true, action: step.action, selector: step.selector, text: step.text };
}
case "key_press": {
await p.keyboard.press(step.key);
await deps.settleAfterActionAdaptive(p, { checkFocusStability: true });
return { ok: true, action: step.action, key: step.key };
}
case "wait_for": {
const timeout = step.timeout ?? 10000;
const waitValidation = validateWaitParams({ condition: step.condition, value: step.value, threshold: step.threshold });
if (waitValidation) throw new Error(waitValidation.error);
if (step.condition === "selector_visible") await stepTarget.waitForSelector(step.value, { state: "visible", timeout });
else if (step.condition === "selector_hidden") await stepTarget.waitForSelector(step.value, { state: "hidden", timeout });
else if (step.condition === "url_contains") await p.waitForURL((url) => url.toString().includes(step.value), { timeout });
else if (step.condition === "network_idle") await p.waitForLoadState("networkidle", { timeout });
else if (step.condition === "delay") await new Promise((resolve) => setTimeout(resolve, parseInt(step.value ?? "1000", 10)));
else if (step.condition === "text_visible") {
await stepTarget.waitForFunction(
(needle: string) => (document.body?.innerText ?? "").toLowerCase().includes(needle.toLowerCase()),
step.value!,
{ timeout }
);
}
else if (step.condition === "text_hidden") {
await stepTarget.waitForFunction(
(needle: string) => !(document.body?.innerText ?? "").toLowerCase().includes(needle.toLowerCase()),
step.value!,
{ timeout }
);
}
else if (step.condition === "request_completed") {
await deps.getActivePage().waitForResponse(
(resp: any) => resp.url().includes(step.value!),
{ timeout }
);
}
else if (step.condition === "console_message") {
const needle = step.value!;
const startTime = Date.now();
let found = false;
while (Date.now() - startTime < timeout) {
if (getConsoleLogs().find((entry) => includesNeedle(entry.text, needle))) { found = true; break; }
await new Promise((resolve) => setTimeout(resolve, 100));
}
if (!found) throw new Error(`Timed out waiting for console message matching "${needle}" (${timeout}ms)`);
}
else if (step.condition === "element_count") {
const threshold = parseThreshold(step.threshold ?? ">=1");
if (!threshold) throw new Error(`element_count threshold is malformed: "${step.threshold}"`);
const selector = step.value!;
const op = threshold.op;
const n = threshold.n;
await stepTarget.waitForFunction(
({ selector, op, n }: { selector: string; op: string; n: number }) => {
const count = document.querySelectorAll(selector).length;
switch (op) {
case ">=": return count >= n;
case "<=": return count <= n;
case "==": return count === n;
case ">": return count > n;
case "<": return count < n;
default: return false;
}
},
{ selector, op, n },
{ timeout }
);
}
else if (step.condition === "region_stable") {
const script = createRegionStableScript(step.value!);
await stepTarget.waitForFunction(script, undefined, { timeout, polling: 200 });
}
else throw new Error(`Unsupported wait condition: ${step.condition}`);
return { ok: true, action: step.action, condition: step.condition, value: step.value };
}
case "assert": {
const state = await deps.collectAssertionState(p, step.checks ?? [], stepTarget);
const assertion = evaluateAssertionChecks({ checks: step.checks ?? [], state });
return { ok: assertion.verified, action: step.action, summary: assertion.summary, assertion };
}
case "click_ref": {
const parsedRef = deps.parseRef(step.ref);
const currentRefMap = getCurrentRefMap();
const node = currentRefMap[parsedRef.key];
if (!node) throw new Error(`Unknown ref: ${step.ref}`);
const resolved = await deps.resolveRefTarget(stepTarget, node);
if (!resolved.ok) throw new Error(resolved.reason);
await stepTarget.locator(resolved.selector).first().click({ timeout: step.timeout ?? 8000 });
await deps.settleAfterActionAdaptive(p);
return { ok: true, action: step.action, ref: step.ref };
}
case "fill_ref": {
const parsedRef = deps.parseRef(step.ref);
const currentRefMap = getCurrentRefMap();
const node = currentRefMap[parsedRef.key];
if (!node) throw new Error(`Unknown ref: ${step.ref}`);
const resolved = await deps.resolveRefTarget(stepTarget, node);
if (!resolved.ok) throw new Error(resolved.reason);
if (step.clearFirst) await stepTarget.locator(resolved.selector).first().fill("");
await stepTarget.locator(resolved.selector).first().fill(step.text ?? "", { timeout: step.timeout ?? 8000 });
if (step.submit) await p.keyboard.press("Enter");
await deps.settleAfterActionAdaptive(p);
return { ok: true, action: step.action, ref: step.ref, text: step.text };
}
default:
throw new Error(`Unsupported batch action: ${step.action}`);
}
} catch (err: any) {
return { ok: false, action: step.action, index, message: err.message };
}
};
const run = await runBatchSteps({
steps: params.steps,
executeStep,
stopOnFailure: params.stopOnFailure !== false,
});
const batchEndTarget = deps.getActiveTarget();
const afterState = await deps.captureCompactPageState(p, { includeBodyText: true, target: batchEndTarget });
const diff = diffCompactStates(beforeState!, afterState);
setLastActionBeforeState(beforeState!);
setLastActionAfterState(afterState);
deps.finishTrackedAction(actionId!, {
status: run.ok ? "success" : "error",
afterUrl: afterState.url,
diffSummary: diff.summary,
changed: diff.changed,
error: run.ok ? undefined : run.summary,
beforeState: beforeState!,
afterState,
});
const summary = `${run.summary}\n${run.stepResults.map((step: any, index: number) => `- ${index + 1}. ${step.action}: ${step.ok ? "PASS" : "FAIL"}${step.message ? ` (${step.message})` : ""}`).join("\n")}`;
return {
content: [{ type: "text", text: params.finalSummaryOnly ? run.summary : `Browser batch\nAction: ${actionId}\n\n${summary}\n\nDiff:\n${deps.formatDiffText(diff)}` }],
details: { actionId, diff, ...run },
isError: !run.ok,
};
} catch (err: any) {
if (actionId !== null) {
deps.finishTrackedAction(actionId, { status: "error", afterUrl: deps.getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
}
return {
content: [{ type: "text", text: `Browser batch failed: ${err.message}` }],
details: { error: err.message, actionId },
isError: true,
};
}
},
});
}

View file

@ -0,0 +1,492 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@gsd/pi-ai";
import type { ToolDeps } from "../state.js";
import {
getConsoleLogs,
setConsoleLogs,
getNetworkLogs,
setNetworkLogs,
getDialogLogs,
setDialogLogs,
} from "../state.js";
export function registerInspectionTools(pi: ExtensionAPI, deps: ToolDeps): void {
// -------------------------------------------------------------------------
// browser_get_console_logs
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_get_console_logs",
label: "Browser Console Logs",
description:
"Get all buffered browser console logs and JavaScript errors captured since the last clear. Each entry includes timestamp and page URL. Note: JS errors are also auto-surfaced in interaction tool responses — use this for the full log.",
parameters: Type.Object({
clear: Type.Optional(
Type.Boolean({
description: "Clear the buffer after returning logs (default: true)",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const shouldClear = params.clear !== false;
const logs = [...getConsoleLogs()];
if (shouldClear) {
setConsoleLogs([]);
}
if (logs.length === 0) {
return {
content: [{ type: "text", text: "No console logs captured." }],
details: { logs: [], count: 0 },
};
}
const formatted = logs
.map((entry) => {
const time = new Date(entry.timestamp).toISOString().slice(11, 23);
return `[${time}] [${entry.type.toUpperCase()}] ${entry.text}`;
})
.join("\n");
const truncated = deps.truncateText(formatted);
return {
content: [
{
type: "text",
text: `${logs.length} console log(s):\n\n${truncated}`,
},
],
details: { logs, count: logs.length },
};
},
});
// -------------------------------------------------------------------------
// browser_get_network_logs
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_get_network_logs",
label: "Browser Network Logs",
description:
"Get buffered network requests and responses. Shows method, URL, status code, and resource type for all requests. Includes response body for failed requests (4xx/5xx). Use to debug API failures, CORS issues, missing resources, and auth problems.",
parameters: Type.Object({
clear: Type.Optional(
Type.Boolean({
description: "Clear the buffer after returning logs (default: true)",
})
),
filter: Type.Optional(
StringEnum(["all", "errors", "fetch-xhr"] as const)
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const shouldClear = params.clear !== false;
let logs = [...getNetworkLogs()];
if (shouldClear) {
setNetworkLogs([]);
}
if (params.filter === "errors") {
logs = logs.filter(e => e.failed || (e.status !== null && e.status >= 400));
} else if (params.filter === "fetch-xhr") {
logs = logs.filter(e => e.resourceType === "fetch" || e.resourceType === "xhr");
}
if (logs.length === 0) {
return {
content: [{ type: "text", text: "No network requests captured." }],
details: { logs: [], count: 0 },
};
}
const formatted = logs
.map((entry) => {
const time = new Date(entry.timestamp).toISOString().slice(11, 23);
const status = entry.failed
? `FAILED (${entry.failureText})`
: `${entry.status}`;
let line = `[${time}] ${entry.method} ${entry.url}${status} (${entry.resourceType})`;
if (entry.responseBody) {
line += `\n Response: ${entry.responseBody}`;
}
return line;
})
.join("\n");
const truncated = deps.truncateText(formatted);
return {
content: [
{
type: "text",
text: `${logs.length} network request(s):\n\n${truncated}`,
},
],
details: { count: logs.length },
};
},
});
// -------------------------------------------------------------------------
// browser_get_dialog_logs
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_get_dialog_logs",
label: "Browser Dialog Logs",
description:
"Get buffered JavaScript dialog events (alert, confirm, prompt, beforeunload). Dialogs are auto-accepted to prevent page freezes. Use this to see what dialogs appeared and their messages.",
parameters: Type.Object({
clear: Type.Optional(
Type.Boolean({
description: "Clear the buffer after returning logs (default: true)",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const shouldClear = params.clear !== false;
const logs = [...getDialogLogs()];
if (shouldClear) {
setDialogLogs([]);
}
if (logs.length === 0) {
return {
content: [{ type: "text", text: "No dialog events captured." }],
details: { logs: [], count: 0 },
};
}
const formatted = logs
.map((entry) => {
const time = new Date(entry.timestamp).toISOString().slice(11, 23);
let line = `[${time}] ${entry.type}: "${entry.message}"`;
if (entry.defaultValue) {
line += ` (default: "${entry.defaultValue}")`;
}
line += ` → auto-accepted`;
return line;
})
.join("\n");
const truncated = deps.truncateText(formatted);
return {
content: [
{
type: "text",
text: `${logs.length} dialog(s):\n\n${truncated}`,
},
],
details: { logs, count: logs.length },
};
},
});
// -------------------------------------------------------------------------
// browser_evaluate
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_evaluate",
label: "Browser Evaluate",
description:
"Execute a JavaScript expression in the browser context and return the result. Useful for reading DOM state, checking values, etc.",
parameters: Type.Object({
expression: Type.String({
description: "JavaScript expression to evaluate in the page context",
}),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const target = deps.getActiveTarget();
const result = await target.evaluate(params.expression);
let serialized: string;
if (result === undefined) {
serialized = "undefined";
} else {
try {
serialized = JSON.stringify(result, null, 2) ?? "undefined";
} catch {
serialized = `[non-serializable: ${typeof result}]`;
}
}
const truncated = deps.truncateText(serialized);
return {
content: [{ type: "text", text: truncated }],
details: { expression: params.expression },
};
} catch (err: any) {
return {
content: [
{
type: "text",
text: `Evaluation failed: ${err.message}`,
},
],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_get_accessibility_tree
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_get_accessibility_tree",
label: "Browser Accessibility Tree",
description:
"Get the accessibility tree of the current page as structured text. Shows roles, names, labels, values, and states of all interactive elements. Use this to understand page structure before clicking — it reveals buttons, inputs, links, and their labels without needing to guess CSS selectors or coordinates. Much more reliable than inspecting the DOM directly.",
parameters: Type.Object({
selector: Type.Optional(
Type.String({
description:
"Scope the accessibility tree to a specific element by CSS selector (e.g. 'main', 'form', '#modal'). If omitted, returns the full page tree.",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
let snapshot: string;
if (params.selector) {
const locator = target.locator(params.selector).first();
snapshot = await locator.ariaSnapshot();
} else {
snapshot = await target.locator("body").ariaSnapshot();
}
const truncated = deps.truncateText(snapshot);
const scope = params.selector ? `element "${params.selector}"` : "full page";
const viewport = p.viewportSize();
const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown";
return {
content: [
{
type: "text",
text: `Accessibility tree for ${scope} (viewport: ${vpText}):\n\n${truncated}`,
},
],
details: { scope, snapshot, viewport: vpText },
};
} catch (err: any) {
return {
content: [
{
type: "text",
text: `Accessibility tree failed: ${err.message}`,
},
],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_find
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_find",
label: "Browser Find",
description:
"Find elements on the page by text content, ARIA role, or CSS selector. Returns only the matched nodes as a compact accessibility snapshot — far cheaper than browser_get_accessibility_tree. Use this after any action to locate a specific button, input, heading, or link before clicking it.",
promptGuidelines: [
"Use browser_find for cheap targeted discovery before requesting the full accessibility tree.",
"Prefer browser_find when you need one button, input, heading, dialog, or alert rather than a full-page structure dump.",
],
parameters: Type.Object({
text: Type.Optional(
Type.String({
description: "Find elements whose visible text contains this string (case-insensitive).",
})
),
role: Type.Optional(
Type.String({
description: "ARIA role to filter by, e.g. 'button', 'link', 'heading', 'textbox', 'dialog', 'alert'.",
})
),
selector: Type.Optional(
Type.String({
description: "CSS selector to scope the search. If omitted, searches the full page.",
})
),
limit: Type.Optional(
Type.Number({
description: "Maximum number of results to return (default: 20).",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const target = deps.getActiveTarget();
const limit = params.limit ?? 20;
const results = await target.evaluate(({ text, role, selector, limit }) => {
const root = selector ? document.querySelector(selector) : document.body;
if (!root) return [];
let candidates: Element[];
if (role) {
const roleMap: Record<string, string> = {
button: 'button,[role="button"]',
link: 'a[href],[role="link"]',
heading: 'h1,h2,h3,h4,h5,h6,[role="heading"]',
textbox: 'input:not([type="hidden"]):not([type="checkbox"]):not([type="radio"]):not([type="submit"]):not([type="button"]),textarea,[role="textbox"]',
checkbox: 'input[type="checkbox"],[role="checkbox"]',
radio: 'input[type="radio"],[role="radio"]',
combobox: 'select,[role="combobox"]',
dialog: 'dialog,[role="dialog"]',
alert: '[role="alert"]',
navigation: 'nav,[role="navigation"]',
listitem: 'li,[role="listitem"]',
};
const cssForRole = roleMap[role.toLowerCase()] ?? `[role="${role}"]`;
candidates = Array.from(root.querySelectorAll(cssForRole));
} else {
candidates = Array.from(root.querySelectorAll('*'));
}
if (text) {
const lower = text.toLowerCase();
candidates = candidates.filter(el =>
(el.textContent ?? "").toLowerCase().includes(lower) ||
(el.getAttribute("aria-label") ?? "").toLowerCase().includes(lower) ||
(el.getAttribute("placeholder") ?? "").toLowerCase().includes(lower) ||
(el.getAttribute("value") ?? "").toLowerCase().includes(lower)
);
}
return candidates.slice(0, limit).map(el => {
const tag = el.tagName.toLowerCase();
const id = el.id ? `#${el.id}` : "";
const classes = Array.from(el.classList).slice(0, 2).map(c => `.${c}`).join("");
const ariaLabel = el.getAttribute("aria-label") ?? "";
const placeholder = el.getAttribute("placeholder") ?? "";
const textContent = (el.textContent ?? "").trim().slice(0, 80);
const role = el.getAttribute("role") ?? "";
const type = el.getAttribute("type") ?? "";
const href = el.getAttribute("href") ?? "";
const value = (el as HTMLInputElement).value ?? "";
return { tag, id, classes, ariaLabel, placeholder, textContent, role, type, href, value };
});
}, { text: params.text, role: params.role, selector: params.selector, limit });
if (results.length === 0) {
return {
content: [{ type: "text", text: "No elements found matching the criteria." }],
details: { count: 0 },
};
}
const lines = results.map((r: any) => {
const parts: string[] = [`${r.tag}${r.id}${r.classes}`];
if (r.role) parts.push(`role="${r.role}"`);
if (r.type) parts.push(`type="${r.type}"`);
if (r.ariaLabel) parts.push(`aria-label="${r.ariaLabel}"`);
if (r.placeholder) parts.push(`placeholder="${r.placeholder}"`);
if (r.href) parts.push(`href="${r.href.slice(0, 60)}"`);
if (r.value) parts.push(`value="${r.value.slice(0, 40)}"`);
if (r.textContent && !r.ariaLabel) parts.push(`"${r.textContent}"`);
return " " + parts.join(" ");
});
const criteria: string[] = [];
if (params.role) criteria.push(`role="${params.role}"`);
if (params.text) criteria.push(`text="${params.text}"`);
if (params.selector) criteria.push(`within="${params.selector}"`);
return {
content: [
{
type: "text",
text: `Found ${results.length} element(s) [${criteria.join(", ")}]:\n${lines.join("\n")}`,
},
],
details: { count: results.length, results },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Find failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_get_page_source
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_get_page_source",
label: "Browser Page Source",
description:
"Get the current HTML source of the page (or a specific element). Use when you need to inspect the actual DOM structure — verify semantic HTML, check that elements rendered correctly, debug why a selector isn't matching, or audit accessibility markup. Output is truncated for large pages.",
parameters: Type.Object({
selector: Type.Optional(
Type.String({
description:
"CSS selector to scope the output to a specific element (e.g. 'main', 'form', '#app'). If omitted, returns the full page HTML.",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const target = deps.getActiveTarget();
let html: string;
if (params.selector) {
html = await target.locator(params.selector).first().evaluate((el: Element) => el.outerHTML);
} else {
html = await target.content();
}
const truncated = deps.truncateText(html);
const scope = params.selector ? `element "${params.selector}"` : "full page";
return {
content: [
{
type: "text",
text: `HTML source of ${scope}:\n\n${truncated}`,
},
],
details: { scope },
};
} catch (err: any) {
return {
content: [
{
type: "text",
text: `Get page source failed: ${err.message}`,
},
],
details: { error: err.message },
isError: true,
};
}
},
});
}

View file

@ -0,0 +1,865 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@gsd/pi-ai";
import {
diffCompactStates,
} from "../core.js";
import type { ToolDeps, CompactPageState } from "../state.js";
import {
setLastActionBeforeState,
setLastActionAfterState,
} from "../state.js";
import { readFocusedDescriptor } from "../settle.js";
export function registerInteractionTools(pi: ExtensionAPI, deps: ToolDeps): void {
// -------------------------------------------------------------------------
// browser_click
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_click",
label: "Browser Click",
description:
"Click an element on the page by CSS selector or by x,y coordinates. Returns a compact page summary plus lightweight verification details after clicking. Provide either selector or both x and y. Prefer selector over coordinates — selectors are more reliable because they handle shadow DOM via getByRole fallbacks. Use coordinates only when you have no other option.",
parameters: Type.Object({
selector: Type.Optional(
Type.String({ description: "CSS selector of the element to click. The tool will try getByRole fallbacks if the CSS selector fails (handles shadow DOM)." })
),
x: Type.Optional(Type.Number({ description: "X coordinate to click" })),
y: Type.Optional(Type.Number({ description: "Y coordinate to click" })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
let actionId: number | null = null;
let beforeState: CompactPageState | null = null;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
beforeState = await deps.captureCompactPageState(p, { selectors: params.selector ? [params.selector] : [], includeBodyText: true, target });
actionId = deps.beginTrackedAction("browser_click", params, beforeState.url).id;
const beforeUrl = p.url();
const beforeHash = deps.getUrlHash(beforeUrl);
const beforeDialogCount = await deps.countOpenDialogs(target);
const beforeTargetState = params.selector
? await deps.captureClickTargetState(target, params.selector)
: null;
if (params.selector) {
try {
await target.locator(params.selector).first().click({ timeout: 5000 });
} catch {
const nameMatch = params.selector.match(/\[(?:aria-label|name|placeholder)="([^"]+)"\]/i);
const roleName = nameMatch?.[1];
let clicked = false;
for (const role of ["combobox", "searchbox", "textbox", "button", "link"] as const) {
try {
const loc = roleName
? target.getByRole(role, { name: new RegExp(roleName, "i") })
: target.getByRole(role);
await loc.first().click({ timeout: 3000 });
clicked = true;
break;
} catch { /* try next role */ }
}
if (!clicked) {
if (params.x !== undefined && params.y !== undefined) {
await p.mouse.click(params.x, params.y);
} else {
throw new Error(`Could not click selector "${params.selector}" — element not found (shadow DOM?)`);
}
}
}
} else if (params.x !== undefined && params.y !== undefined) {
await p.mouse.click(params.x, params.y);
} else {
return {
content: [
{
type: "text",
text: "Must provide either selector or both x and y coordinates",
},
],
details: {},
isError: true,
};
}
const settle = await deps.settleAfterActionAdaptive(p);
const url = p.url();
const hash = deps.getUrlHash(url);
const afterDialogCount = await deps.countOpenDialogs(target);
const afterTargetState = params.selector
? await deps.captureClickTargetState(target, params.selector)
: null;
const targetStateChanged = !!beforeTargetState && !!afterTargetState && (
beforeTargetState.exists !== afterTargetState.exists ||
beforeTargetState.ariaExpanded !== afterTargetState.ariaExpanded ||
beforeTargetState.ariaPressed !== afterTargetState.ariaPressed ||
beforeTargetState.ariaSelected !== afterTargetState.ariaSelected ||
beforeTargetState.open !== afterTargetState.open
);
const verification = deps.verificationFromChecks(
[
{ name: "url_changed", passed: url !== beforeUrl, value: url, expected: `!= ${beforeUrl}` },
{ name: "hash_changed", passed: hash !== beforeHash, value: hash, expected: `!= ${beforeHash}` },
{ name: "target_state_changed", passed: targetStateChanged, value: afterTargetState, expected: beforeTargetState },
{ name: "dialog_open", passed: afterDialogCount > beforeDialogCount, value: afterDialogCount, expected: `> ${beforeDialogCount}` },
],
"Try a more specific selector or click a clearly interactive element."
);
const clickTarget = params.selector ?? `(${params.x}, ${params.y})`;
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
const afterState = await deps.captureCompactPageState(p, { selectors: params.selector ? [params.selector] : [], includeBodyText: true, target });
const diff = diffCompactStates(beforeState!, afterState);
setLastActionBeforeState(beforeState!);
setLastActionAfterState(afterState);
deps.finishTrackedAction(actionId!, {
status: "success",
afterUrl: afterState.url,
verificationSummary: verification.verificationSummary,
warningSummary: jsErrors.trim() || undefined,
diffSummary: diff.summary,
changed: diff.changed,
beforeState: beforeState!,
afterState,
});
return {
content: [{ type: "text", text: `Clicked: ${clickTarget}\nURL: ${url}\nAction: ${actionId}\n${deps.verificationLine(verification)}${jsErrors}\n\nDiff:\n${deps.formatDiffText(diff)}\n\nPage summary:\n${summary}` }],
details: { target: clickTarget, url, actionId, diff, ...settle, ...verification },
};
} catch (err: any) {
if (actionId !== null) {
deps.finishTrackedAction(actionId, { status: "error", afterUrl: deps.getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
}
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Click failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_drag
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_drag",
label: "Browser Drag",
description:
"Drag an element and drop it onto another element. Use for sortable lists, kanban boards, sliders, and any drag-and-drop UI.",
parameters: Type.Object({
sourceSelector: Type.String({
description: "CSS selector of the element to drag",
}),
targetSelector: Type.String({
description: "CSS selector of the element to drop onto",
}),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
await target.dragAndDrop(params.sourceSelector, params.targetSelector, { timeout: 10000 });
const settle = await deps.settleAfterActionAdaptive(p);
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
return {
content: [{
type: "text",
text: `Dragged "${params.sourceSelector}" → "${params.targetSelector}"${jsErrors}\n\nPage summary:\n${summary}`,
}],
details: { source: params.sourceSelector, target: params.targetSelector, ...settle },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Drag failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return { content, details: { error: err.message }, isError: true };
}
},
});
// -------------------------------------------------------------------------
// browser_type
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_type",
label: "Browser Type",
description:
"Type text into an input element. By default uses atomic fill (clears and sets value instantly). Use 'slowly' for character-by-character typing when you need to trigger key handlers (e.g. search autocomplete). Use 'submit' to press Enter after typing. Returns a compact page summary plus lightweight verification details. IMPORTANT: Always provide a selector — do NOT rely on coordinate clicks to focus an input before calling this. CSS attribute selectors like combobox[aria-label='X'] work for most inputs; for shadow DOM inputs (e.g. Google Search), the tool automatically tries getByRole fallbacks.",
parameters: Type.Object({
text: Type.String({ description: "Text to type" }),
selector: Type.Optional(
Type.String({ description: "CSS selector of the input to type into (clicks it first). Examples: 'input[name=q]', 'textarea', 'combobox[aria-label=\"Search\"]'. The tool will try getByRole fallbacks if the CSS selector fails." })
),
clearFirst: Type.Optional(
Type.Boolean({
description:
"Clear the input's existing value before typing (default: false). Use this when replacing existing text.",
})
),
submit: Type.Optional(
Type.Boolean({
description: "Press Enter after typing to submit the form (default: false).",
})
),
slowly: Type.Optional(
Type.Boolean({
description:
"Type one character at a time instead of filling atomically. Use when you need to trigger key handlers (e.g. search autocomplete). Default: false.",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
let actionId: number | null = null;
let beforeState: CompactPageState | null = null;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
beforeState = await deps.captureCompactPageState(p, { selectors: params.selector ? [params.selector] : [], includeBodyText: true, target });
actionId = deps.beginTrackedAction("browser_type", params, beforeState.url).id;
const beforeUrl = p.url();
async function focusViaRole(selector: string): Promise<boolean> {
const nameMatch = selector.match(/\[(?:aria-label|name|placeholder)="([^"]+)"\]/i);
const roleName = nameMatch?.[1];
for (const role of ["combobox", "searchbox", "textbox"] as const) {
try {
const loc = roleName
? target.getByRole(role, { name: new RegExp(roleName, "i") })
: target.getByRole(role);
await loc.first().click({ timeout: 3000 });
return true;
} catch { /* try next */ }
}
return false;
}
if (params.selector) {
if (params.slowly) {
let focused = false;
try {
await target.locator(params.selector).first().click({ timeout: 5000 });
focused = true;
} catch {
focused = await focusViaRole(params.selector);
}
if (!focused) throw new Error(`Could not focus selector "${params.selector}"`);
if (params.clearFirst) {
await p.keyboard.press("Control+A");
await p.keyboard.press("Delete");
}
await p.keyboard.type(params.text);
} else {
let filled = false;
try {
await target.locator(params.selector).first().fill(params.text, { timeout: 5000 });
filled = true;
} catch { /* fall through */ }
if (!filled) {
const nameMatch = params.selector.match(/\[(?:aria-label|name|placeholder)="([^"]+)"\]/i);
const roleName = nameMatch?.[1];
for (const role of ["combobox", "searchbox", "textbox"] as const) {
try {
const loc = roleName
? target.getByRole(role, { name: new RegExp(roleName, "i") })
: target.getByRole(role);
await loc.first().fill(params.text, { timeout: 3000 });
filled = true;
break;
} catch { /* try next */ }
}
}
if (!filled) {
let focused = false;
try {
await target.locator(params.selector).first().click({ timeout: 5000 });
focused = true;
} catch {
focused = await focusViaRole(params.selector);
}
if (!focused) throw new Error(`Could not focus selector "${params.selector}"`);
if (params.clearFirst) {
await p.keyboard.press("Control+A");
await p.keyboard.press("Delete");
}
await target.locator(":focus").pressSequentially(params.text, { timeout: 5000 }).catch(() =>
p.keyboard.type(params.text)
);
} else if (params.clearFirst) {
// fill() already replaced the value; clearFirst is a no-op here
}
}
} else {
const hasFocus = await target.evaluate(() => {
const el = document.activeElement;
return !!(el && el !== document.body && el !== document.documentElement);
});
if (!hasFocus) {
return {
content: [{ type: "text", text: "Type failed: no element is focused. Use browser_click to focus an input first, or provide a selector." }],
details: { error: "no focused element" },
isError: true,
};
}
await target.locator(":focus").pressSequentially(params.text, { timeout: 10000 }).catch(() =>
p.keyboard.type(params.text)
);
}
if (params.submit) {
await p.keyboard.press("Enter");
}
const settle = await deps.settleAfterActionAdaptive(p);
const typedValue = await deps.readInputLikeValue(target, params.selector);
const afterUrl = p.url();
const verification = deps.verificationFromChecks(
[
{ name: "value_equals_expected", passed: typedValue === params.text, value: typedValue, expected: params.text },
{ name: "value_contains_expected", passed: typeof typedValue === "string" && typedValue.includes(params.text), value: typedValue, expected: params.text },
{ name: "url_changed_after_submit", passed: !!params.submit && afterUrl !== beforeUrl, value: afterUrl, expected: `!= ${beforeUrl}` },
],
"Try clearFirst=true, use a more specific selector, or set slowly=true for key-driven inputs."
);
const typeTarget = params.selector ? ` into "${params.selector}"` : "";
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
const afterState = await deps.captureCompactPageState(p, { selectors: params.selector ? [params.selector] : [], includeBodyText: true, target });
const diff = diffCompactStates(beforeState!, afterState);
setLastActionBeforeState(beforeState!);
setLastActionAfterState(afterState);
deps.finishTrackedAction(actionId!, {
status: "success",
afterUrl: afterState.url,
verificationSummary: verification.verificationSummary,
warningSummary: jsErrors.trim() || undefined,
diffSummary: diff.summary,
changed: diff.changed,
beforeState: beforeState!,
afterState,
});
return {
content: [{ type: "text", text: `Typed "${params.text}"${typeTarget}\nAction: ${actionId}\n${deps.verificationLine(verification)}${jsErrors}\n\nDiff:\n${deps.formatDiffText(diff)}\n\nPage summary:\n${summary}` }],
details: { text: params.text, selector: params.selector, typedValue, actionId, diff, ...settle, ...verification },
};
} catch (err: any) {
if (actionId !== null) {
deps.finishTrackedAction(actionId, { status: "error", afterUrl: deps.getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
}
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Type failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_upload_file
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_upload_file",
label: "Browser Upload File",
description:
"Set files on a file input element. The selector must target an <input type=\"file\"> element. Accepts one or more absolute file paths.",
parameters: Type.Object({
selector: Type.String({
description: 'CSS selector targeting the <input type="file"> element',
}),
files: Type.Array(Type.String({ description: "Absolute path to a file" }), {
description: "One or more file paths to upload",
}),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const cleanFiles = params.files.map((f: string) => f.replace(/^@/, ""));
await target.locator(params.selector).first().setInputFiles(cleanFiles);
const settle = await deps.settleAfterActionAdaptive(p);
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
return {
content: [{
type: "text",
text: `Uploaded ${cleanFiles.length} file(s) to "${params.selector}": ${cleanFiles.join(", ")}${jsErrors}\n\nPage summary:\n${summary}`,
}],
details: { selector: params.selector, files: cleanFiles, ...settle },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Upload failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return { content, details: { error: err.message }, isError: true };
}
},
});
// -------------------------------------------------------------------------
// browser_scroll
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_scroll",
label: "Browser Scroll",
description: "Scroll the page up or down by a given number of pixels. Returns scroll position (px and percentage) and an accessibility snapshot of the visible content.",
parameters: Type.Object({
direction: StringEnum(["up", "down"] as const),
amount: Type.Optional(
Type.Number({ description: "Pixels to scroll (default: 300)" })
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const pixels = params.amount ?? 300;
const delta = params.direction === "up" ? -pixels : pixels;
await p.mouse.wheel(0, delta);
const settle = await deps.settleAfterActionAdaptive(p);
const scrollInfo = await target.evaluate(() => ({
scrollY: Math.round(window.scrollY),
scrollHeight: document.documentElement.scrollHeight,
clientHeight: document.documentElement.clientHeight,
}));
const maxScroll = scrollInfo.scrollHeight - scrollInfo.clientHeight;
const percent = maxScroll > 0 ? Math.round((scrollInfo.scrollY / maxScroll) * 100) : 0;
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
return {
content: [
{
type: "text",
text: `Scrolled ${params.direction} by ${pixels}px\n` +
`Position: ${scrollInfo.scrollY}px / ${scrollInfo.scrollHeight}px (${percent}% down)\n` +
`Viewport height: ${scrollInfo.clientHeight}px${jsErrors}\n\nPage summary:\n${summary}`,
},
],
details: { direction: params.direction, amount: pixels, ...scrollInfo, percent, ...settle },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Scroll failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_hover
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_hover",
label: "Browser Hover",
description:
"Move the mouse over an element to trigger hover states — reveals tooltips, dropdown menus, CSS :hover effects, and other hover-dependent UI. Returns a compact page summary showing the resulting hover state.",
parameters: Type.Object({
selector: Type.String({
description: "CSS selector of the element to hover over",
}),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
await target.locator(params.selector).first().hover({ timeout: 10000 });
const settle = await deps.settleAfterActionAdaptive(p);
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
return {
content: [{ type: "text", text: `Hovering over "${params.selector}"${jsErrors}\n\nPage summary:\n${summary}` }],
details: { selector: params.selector, ...settle },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Hover failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_key_press
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_key_press",
label: "Browser Key Press",
description:
"Press a keyboard key or key combination. Returns a compact page summary plus lightweight verification details after the key press. Use for: submitting forms (Enter), closing modals (Escape), navigating focusable elements (Tab / Shift+Tab), operating dropdowns and menus (ArrowDown, ArrowUp, Space), copying/pasting (Meta+C, Meta+V). Key names follow the DOM KeyboardEvent key convention.",
parameters: Type.Object({
key: Type.String({
description:
"Key or combination to press, e.g. 'Enter', 'Escape', 'Tab', 'ArrowDown', 'ArrowUp', 'Space', 'Meta+A', 'Shift+Tab', 'Control+Enter'",
}),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
let actionId: number | null = null;
let beforeState: CompactPageState | null = null;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
beforeState = await deps.captureCompactPageState(p, { includeBodyText: true, target });
actionId = deps.beginTrackedAction("browser_key_press", params, beforeState.url).id;
const beforeUrl = p.url();
const beforeFocus = await readFocusedDescriptor(target);
const beforeDialogCount = await deps.countOpenDialogs(target);
await p.keyboard.press(params.key);
const settle = await deps.settleAfterActionAdaptive(p, { checkFocusStability: true });
const afterUrl = p.url();
const afterFocus = await readFocusedDescriptor(target);
const afterDialogCount = await deps.countOpenDialogs(target);
const verification = deps.verificationFromChecks(
[
{ name: "url_changed", passed: afterUrl !== beforeUrl, value: afterUrl, expected: `!= ${beforeUrl}` },
{ name: "focus_changed", passed: afterFocus !== beforeFocus, value: afterFocus, expected: `!= ${beforeFocus}` },
{ name: "dialog_open", passed: afterDialogCount > beforeDialogCount, value: afterDialogCount, expected: `> ${beforeDialogCount}` },
],
"If this key should trigger UI changes, confirm focus is on the intended element first."
);
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
const afterState = await deps.captureCompactPageState(p, { includeBodyText: true, target });
const diff = diffCompactStates(beforeState!, afterState);
setLastActionBeforeState(beforeState!);
setLastActionAfterState(afterState);
deps.finishTrackedAction(actionId!, {
status: "success",
afterUrl: afterState.url,
verificationSummary: verification.verificationSummary,
warningSummary: jsErrors.trim() || undefined,
diffSummary: diff.summary,
changed: diff.changed,
beforeState: beforeState!,
afterState,
});
return {
content: [{ type: "text", text: `Pressed "${params.key}"\nAction: ${actionId}\n${deps.verificationLine(verification)}${jsErrors}\n\nDiff:\n${deps.formatDiffText(diff)}\n\nPage summary:\n${summary}` }],
details: { key: params.key, beforeFocus, afterFocus, actionId, diff, ...settle, ...verification },
};
} catch (err: any) {
if (actionId !== null) {
deps.finishTrackedAction(actionId, { status: "error", afterUrl: deps.getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
}
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Key press failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_select_option
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_select_option",
label: "Browser Select Option",
description:
"Select an option from a <select> dropdown element by its visible label or value. Returns a compact page summary plus lightweight verification details. For custom-built dropdowns use browser_click to open them then browser_click to pick the option.",
parameters: Type.Object({
selector: Type.String({
description: "CSS selector targeting the <select> element",
}),
option: Type.String({
description:
"The option to select — can be the visible label text or the value attribute. Will try label first, then value.",
}),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
let actionId: number | null = null;
let beforeState: CompactPageState | null = null;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
beforeState = await deps.captureCompactPageState(p, { selectors: [params.selector], includeBodyText: true, target });
actionId = deps.beginTrackedAction("browser_select_option", params, beforeState.url).id;
let selected: string[];
try {
selected = await target.selectOption(params.selector, { label: params.option }, { timeout: 5000 });
} catch {
selected = await target.selectOption(params.selector, { value: params.option }, { timeout: 5000 });
}
const settle = await deps.settleAfterActionAdaptive(p);
const selectedState = await target.locator(params.selector).first().evaluate((el) => {
if (!(el instanceof HTMLSelectElement)) {
return { selectedValues: [] as string[], selectedLabels: [] as string[] };
}
const selectedOptions = Array.from(el.selectedOptions || []);
return {
selectedValues: selectedOptions.map((opt) => opt.value),
selectedLabels: selectedOptions.map((opt) => (opt.textContent || "").trim()),
};
});
const optionNeedle = params.option.toLowerCase();
const verification = deps.verificationFromChecks(
[
{ name: "selected_values_include_option", passed: selectedState.selectedValues.includes(params.option), value: selectedState.selectedValues, expected: params.option },
{ name: "selected_labels_include_option", passed: selectedState.selectedLabels.some((label) => label.toLowerCase().includes(optionNeedle)), value: selectedState.selectedLabels, expected: params.option },
],
"Confirm whether the target select uses option label or value, then retry with that exact text."
);
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
const afterState = await deps.captureCompactPageState(p, { selectors: [params.selector], includeBodyText: true, target });
const diff = diffCompactStates(beforeState!, afterState);
setLastActionBeforeState(beforeState!);
setLastActionAfterState(afterState);
deps.finishTrackedAction(actionId!, {
status: "success",
afterUrl: afterState.url,
verificationSummary: verification.verificationSummary,
warningSummary: jsErrors.trim() || undefined,
diffSummary: diff.summary,
changed: diff.changed,
beforeState: beforeState!,
afterState,
});
return {
content: [
{
type: "text",
text: `Selected "${params.option}" in "${params.selector}". Values: ${selected.join(", ")}\nAction: ${actionId}\n${deps.verificationLine(verification)}${jsErrors}\n\nDiff:\n${deps.formatDiffText(diff)}\n\nPage summary:\n${summary}`,
},
],
details: { selector: params.selector, option: params.option, selected, selectedState, actionId, diff, ...settle, ...verification },
};
} catch (err: any) {
if (actionId !== null) {
deps.finishTrackedAction(actionId, { status: "error", afterUrl: deps.getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
}
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Select option failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_set_checked
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_set_checked",
label: "Browser Set Checked",
description:
"Check or uncheck a checkbox or radio button. More reliable than clicking for form elements where you need a specific state.",
parameters: Type.Object({
selector: Type.String({
description: "CSS selector targeting the checkbox or radio input",
}),
checked: Type.Boolean({
description: "true to check, false to uncheck",
}),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
let actionId: number | null = null;
let beforeState: CompactPageState | null = null;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
beforeState = await deps.captureCompactPageState(p, { selectors: [params.selector], includeBodyText: true, target });
actionId = deps.beginTrackedAction("browser_set_checked", params, beforeState.url).id;
await target.locator(params.selector).first().setChecked(params.checked, { timeout: 10000 });
const settle = await deps.settleAfterActionAdaptive(p);
const actualChecked = await target.locator(params.selector).first().isChecked().catch(() => null);
const verification = deps.verificationFromChecks(
[
{ name: "checked_state_matches", passed: actualChecked === params.checked, value: actualChecked, expected: params.checked },
],
"Ensure selector points to a checkbox/radio input and retry."
);
const state = params.checked ? "checked" : "unchecked";
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
const afterState = await deps.captureCompactPageState(p, { selectors: [params.selector], includeBodyText: true, target });
const diff = diffCompactStates(beforeState!, afterState);
setLastActionBeforeState(beforeState!);
setLastActionAfterState(afterState);
deps.finishTrackedAction(actionId!, {
status: "success",
afterUrl: afterState.url,
verificationSummary: verification.verificationSummary,
warningSummary: jsErrors.trim() || undefined,
diffSummary: diff.summary,
changed: diff.changed,
beforeState: beforeState!,
afterState,
});
return {
content: [{
type: "text",
text: `Set "${params.selector}" to ${state}\nAction: ${actionId}\n${deps.verificationLine(verification)}${jsErrors}\n\nDiff:\n${deps.formatDiffText(diff)}\n\nPage summary:\n${summary}`,
}],
details: { selector: params.selector, checked: params.checked, actualChecked, actionId, diff, ...settle, ...verification },
};
} catch (err: any) {
if (actionId !== null) {
deps.finishTrackedAction(actionId, { status: "error", afterUrl: deps.getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
}
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Set checked failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return { content, details: { error: err.message }, isError: true };
}
},
});
// -------------------------------------------------------------------------
// browser_set_viewport
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_set_viewport",
label: "Browser Set Viewport",
description:
"Resize the browser viewport to test responsive layouts at different screen sizes. Use presets for common breakpoints or specify exact pixel dimensions. Essential for verifying mobile/tablet/desktop layouts.",
parameters: Type.Object({
preset: Type.Optional(
StringEnum(["mobile", "tablet", "desktop", "wide"] as const)
),
width: Type.Optional(
Type.Number({ description: "Custom viewport width in pixels (requires height too)" })
),
height: Type.Optional(
Type.Number({ description: "Custom viewport height in pixels (requires width too)" })
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
let width: number;
let height: number;
let label: string;
if (params.preset) {
switch (params.preset) {
case "mobile":
width = 390;
height = 844;
label = "mobile (390×844)";
break;
case "tablet":
width = 768;
height = 1024;
label = "tablet (768×1024)";
break;
case "desktop":
width = 1280;
height = 800;
label = "desktop (1280×800)";
break;
case "wide":
width = 1920;
height = 1080;
label = "wide (1920×1080)";
break;
}
} else if (params.width !== undefined && params.height !== undefined) {
width = params.width;
height = params.height;
label = `custom (${width}×${height})`;
} else {
return {
content: [
{
type: "text",
text: "Provide either a preset (mobile/tablet/desktop/wide) or both width and height.",
},
],
details: {},
isError: true,
};
}
await p.setViewportSize({ width: width!, height: height! });
return {
content: [{ type: "text", text: `Viewport set to ${label!}` }],
details: { width: width!, height: height!, label: label! },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Set viewport failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
}

View file

@ -0,0 +1,229 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import {
diffCompactStates,
} from "../core.js";
import type { ToolDeps, CompactPageState } from "../state.js";
import {
setLastActionBeforeState,
setLastActionAfterState,
} from "../state.js";
export function registerNavigationTools(pi: ExtensionAPI, deps: ToolDeps): void {
// -------------------------------------------------------------------------
// browser_navigate
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_navigate",
label: "Browser Navigate",
description:
"Open the browser (if not already open) and navigate to a URL. Waits for network idle. Returns page title and current URL. Use ONLY for visually verifying locally-running web apps (e.g. http://localhost:3000). Do NOT use for documentation sites, GitHub, search results, or any external URL — use web_search instead.",
parameters: Type.Object({
url: Type.String({ description: "URL to navigate to, e.g. http://localhost:3000" }),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
let actionId: number | null = null;
let beforeState: CompactPageState | null = null;
try {
const { page: p } = await deps.ensureBrowser();
beforeState = await deps.captureCompactPageState(p, { includeBodyText: true });
actionId = deps.beginTrackedAction("browser_navigate", params, beforeState.url).id;
await p.goto(params.url, { waitUntil: "domcontentloaded", timeout: 30000 });
await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
await new Promise(resolve => setTimeout(resolve, 300));
const title = await p.title();
const url = p.url();
const viewport = p.viewportSize();
const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown";
const summary = await deps.postActionSummary(p);
const jsErrors = deps.getRecentErrors(p.url());
const afterState = await deps.captureCompactPageState(p, { includeBodyText: true });
const diff = diffCompactStates(beforeState, afterState);
setLastActionBeforeState(beforeState);
setLastActionAfterState(afterState);
deps.finishTrackedAction(actionId, {
status: "success",
afterUrl: afterState.url,
warningSummary: jsErrors.trim() || undefined,
diffSummary: diff.summary,
changed: diff.changed,
beforeState,
afterState,
});
let screenshotContent: any[] = [];
try {
let buf = await p.screenshot({ type: "jpeg", quality: 80, scale: "css" });
buf = await deps.constrainScreenshot(p, buf, "image/jpeg", 80);
screenshotContent = [{ type: "image", data: buf.toString("base64"), mimeType: "image/jpeg" }];
} catch {}
return {
content: [
{ type: "text", text: `Navigated to: ${url}\nTitle: ${title}\nViewport: ${vpText}\nAction: ${actionId}${jsErrors}\n\nDiff:\n${deps.formatDiffText(diff)}\n\nPage summary:\n${summary}` },
...screenshotContent,
],
details: { title, url, status: "loaded", viewport: vpText, actionId, diff },
};
} catch (err: any) {
if (actionId !== null) {
deps.finishTrackedAction(actionId, { status: "error", afterUrl: deps.getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
}
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Navigation failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { status: "error", error: err.message, actionId },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_go_back
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_go_back",
label: "Browser Go Back",
description: "Navigate back in browser history. Returns a compact page summary after navigation.",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const response = await p.goBack({ waitUntil: "domcontentloaded", timeout: 10000 });
if (!response) {
return {
content: [{ type: "text", text: "No previous page in history." }],
details: {},
isError: true,
};
}
await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
const title = await p.title();
const url = p.url();
const summary = await deps.postActionSummary(p);
const jsErrors = deps.getRecentErrors(p.url());
return {
content: [{ type: "text", text: `Navigated back to: ${url}\nTitle: ${title}${jsErrors}\n\nPage summary:\n${summary}` }],
details: { title, url },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Go back failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return { content, details: { error: err.message }, isError: true };
}
},
});
// -------------------------------------------------------------------------
// browser_go_forward
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_go_forward",
label: "Browser Go Forward",
description: "Navigate forward in browser history. Returns a compact page summary after navigation.",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const response = await p.goForward({ waitUntil: "domcontentloaded", timeout: 10000 });
if (!response) {
return {
content: [{ type: "text", text: "No forward page in history." }],
details: {},
isError: true,
};
}
await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
const title = await p.title();
const url = p.url();
const summary = await deps.postActionSummary(p);
const jsErrors = deps.getRecentErrors(p.url());
return {
content: [{ type: "text", text: `Navigated forward to: ${url}\nTitle: ${title}${jsErrors}\n\nPage summary:\n${summary}` }],
details: { title, url },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Go forward failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return { content, details: { error: err.message }, isError: true };
}
},
});
// -------------------------------------------------------------------------
// browser_reload
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_reload",
label: "Browser Reload",
description: "Reload the current page. Returns a screenshot, compact page summary, and page metadata (same shape as browser_navigate).",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
await p.reload({ waitUntil: "domcontentloaded", timeout: 30000 });
await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
const title = await p.title();
const url = p.url();
const viewport = p.viewportSize();
const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown";
const summary = await deps.postActionSummary(p);
const jsErrors = deps.getRecentErrors(p.url());
let screenshotContent: any[] = [];
try {
let buf = await p.screenshot({ type: "jpeg", quality: 80, scale: "css" });
buf = await deps.constrainScreenshot(p, buf, "image/jpeg", 80);
screenshotContent = [{
type: "image",
data: buf.toString("base64"),
mimeType: "image/jpeg",
}];
} catch {}
return {
content: [
{
type: "text",
text: `Reloaded: ${url}\nTitle: ${title}\nViewport: ${vpText}${jsErrors}\n\nPage summary:\n${summary}`,
},
...screenshotContent,
],
details: { title, url, viewport: vpText },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const content: any[] = [{ type: "text", text: `Reload failed: ${err.message}` }];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return { content, details: { error: err.message }, isError: true };
}
},
});
}

View file

@ -0,0 +1,303 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import {
registryGetActive,
registryListPages,
registrySetActive,
} from "../core.js";
import type { ToolDeps } from "../state.js";
import {
getPageRegistry,
getActiveFrame,
setActiveFrame,
} from "../state.js";
export function registerPageTools(pi: ExtensionAPI, deps: ToolDeps): void {
// -------------------------------------------------------------------------
// browser_list_pages
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_list_pages",
label: "Browser List Pages",
description:
"List all open browser pages/tabs with their IDs, titles, URLs, and active status. Use to see what pages are available before switching.",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const pageRegistry = getPageRegistry();
for (const entry of pageRegistry.pages) {
try {
entry.title = await entry.page.title();
entry.url = entry.page.url();
} catch {
// Page may have been closed
}
}
const pages = registryListPages(pageRegistry);
if (pages.length === 0) {
return {
content: [{ type: "text", text: "No pages open." }],
details: { pages: [], count: 0 },
};
}
const lines = pages.map((p: any) => {
const active = p.isActive ? " ← active" : "";
const opener = p.opener !== null ? ` (opener: ${p.opener})` : "";
return ` [${p.id}] ${p.title || "(untitled)"}${p.url}${opener}${active}`;
});
return {
content: [{ type: "text", text: `${pages.length} page(s):\n${lines.join("\n")}` }],
details: { pages, count: pages.length },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `List pages failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_switch_page
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_switch_page",
label: "Browser Switch Page",
description:
"Switch the active browser page/tab by page ID. Use browser_list_pages to see available IDs. Clears any active frame selection.",
parameters: Type.Object({
id: Type.Number({ description: "Page ID to switch to (from browser_list_pages)" }),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const pageRegistry = getPageRegistry();
registrySetActive(pageRegistry, params.id);
setActiveFrame(null);
const entry = registryGetActive(pageRegistry);
await entry.page.bringToFront();
const title = await entry.page.title().catch(() => "");
const url = entry.page.url();
entry.title = title;
entry.url = url;
return {
content: [{ type: "text", text: `Switched to page ${params.id}: ${title || "(untitled)"}${url}` }],
details: { id: params.id, title, url },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Switch page failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_close_page
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_close_page",
label: "Browser Close Page",
description:
"Close a specific browser page/tab by ID. Cannot close the last remaining page. The page's close event triggers automatic registry cleanup and active-page fallback.",
parameters: Type.Object({
id: Type.Number({ description: "Page ID to close (from browser_list_pages)" }),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const pageRegistry = getPageRegistry();
if (pageRegistry.pages.length <= 1) {
return {
content: [{ type: "text", text: `Cannot close the last remaining page. Use browser_close to close the entire browser.` }],
details: { error: "last_page", pageCount: pageRegistry.pages.length },
isError: true,
};
}
const entry = pageRegistry.pages.find((e: any) => e.id === params.id);
if (!entry) {
const available = pageRegistry.pages.map((e: any) => e.id);
return {
content: [{ type: "text", text: `Page ${params.id} not found. Available page IDs: [${available.join(", ")}].` }],
details: { error: "not_found", available },
isError: true,
};
}
await entry.page.close();
setActiveFrame(null);
for (const remaining of pageRegistry.pages) {
try {
remaining.title = await remaining.page.title();
remaining.url = remaining.page.url();
} catch {}
}
const pages = registryListPages(pageRegistry);
const lines = pages.map((p: any) => {
const active = p.isActive ? " ← active" : "";
return ` [${p.id}] ${p.title || "(untitled)"}${p.url}${active}`;
});
return {
content: [{ type: "text", text: `Closed page ${params.id}. ${pages.length} page(s) remaining:\n${lines.join("\n")}` }],
details: { closedId: params.id, pages, count: pages.length },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Close page failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_list_frames
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_list_frames",
label: "Browser List Frames",
description:
"List all frames in the active page, including the main frame and any iframes. Shows frame name, URL, and parent frame name. Use before browser_select_frame to identify available frames.",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const p = deps.getActivePage();
const frames = p.frames();
const mainFrame = p.mainFrame();
const activeFrame = getActiveFrame();
const frameList = frames.map((f, index) => {
const isMain = f === mainFrame;
const parentName = f.parentFrame()?.name() || (f.parentFrame() === mainFrame ? "main" : "");
return {
index,
name: f.name() || (isMain ? "main" : `(unnamed-${index})`),
url: f.url(),
isMain,
parentName: isMain ? null : (parentName || "main"),
isActive: f === activeFrame,
};
});
const lines = frameList.map((f) => {
const main = f.isMain ? " [main]" : "";
const active = f.isActive ? " ← selected" : "";
const parent = f.parentName ? ` (parent: ${f.parentName})` : "";
return ` [${f.index}] "${f.name}" — ${f.url}${main}${parent}${active}`;
});
const activeInfo = activeFrame ? `Active frame: "${activeFrame.name() || "(unnamed)"}"` : "No frame selected (operating on main page)";
return {
content: [{ type: "text", text: `${frameList.length} frame(s) in active page:\n${lines.join("\n")}\n\n${activeInfo}` }],
details: { frames: frameList, count: frameList.length, activeFrame: activeFrame?.name() ?? null },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `List frames failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_select_frame
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_select_frame",
label: "Browser Select Frame",
description:
"Select a frame within the active page to operate on. Find frames by name, URL pattern, or index. Pass null or \"main\" to reset back to the main page frame. Once a frame is selected, tools like browser_evaluate, browser_find, and browser_click will operate within that frame (after T03 migration).",
parameters: Type.Object({
name: Type.Optional(Type.String({ description: "Frame name to select. Use 'main' or 'null' to reset to main frame." })),
urlPattern: Type.Optional(Type.String({ description: "URL substring to match against frame URLs." })),
index: Type.Optional(Type.Number({ description: "Frame index from browser_list_frames." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const p = deps.getActivePage();
const frames = p.frames();
if (params.name === "main" || params.name === "null" || params.name === null) {
setActiveFrame(null);
return {
content: [{ type: "text", text: "Reset to main page frame. Tools will operate on the main page." }],
details: { activeFrame: null },
};
}
if (params.name) {
const frame = frames.find((f) => f.name() === params.name);
if (!frame) {
const available = frames.map((f, i) => `[${i}] "${f.name() || "(unnamed)"}" — ${f.url()}`);
return {
content: [{ type: "text", text: `Frame with name "${params.name}" not found.\nAvailable frames:\n ${available.join("\n ")}` }],
details: { error: "frame_not_found", available },
isError: true,
};
}
setActiveFrame(frame);
return {
content: [{ type: "text", text: `Selected frame "${frame.name()}" — ${frame.url()}` }],
details: { name: frame.name(), url: frame.url() },
};
}
if (params.urlPattern) {
const frame = frames.find((f) => f.url().includes(params.urlPattern!));
if (!frame) {
const available = frames.map((f, i) => `[${i}] "${f.name() || "(unnamed)"}" — ${f.url()}`);
return {
content: [{ type: "text", text: `No frame URL matches "${params.urlPattern}".\nAvailable frames:\n ${available.join("\n ")}` }],
details: { error: "frame_not_found", available },
isError: true,
};
}
setActiveFrame(frame);
return {
content: [{ type: "text", text: `Selected frame "${frame.name() || "(unnamed)"}" — ${frame.url()}` }],
details: { name: frame.name(), url: frame.url() },
};
}
if (params.index !== undefined) {
if (params.index < 0 || params.index >= frames.length) {
return {
content: [{ type: "text", text: `Frame index ${params.index} out of range. ${frames.length} frame(s) available (0-${frames.length - 1}).` }],
details: { error: "index_out_of_range", count: frames.length },
isError: true,
};
}
const frame = frames[params.index];
setActiveFrame(frame);
return {
content: [{ type: "text", text: `Selected frame [${params.index}] "${frame.name() || "(unnamed)"}" — ${frame.url()}` }],
details: { index: params.index, name: frame.name(), url: frame.url() },
};
}
return {
content: [{ type: "text", text: "Provide name, urlPattern, or index to select a frame. Use name='main' to reset to main frame." }],
details: { error: "no_criteria" },
isError: true,
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Select frame failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
}

View file

@ -0,0 +1,539 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import {
getSnapshotModeConfig,
SNAPSHOT_MODES,
} from "../core.js";
import type { ToolDeps, RefNode } from "../state.js";
import {
getActiveFrame,
getCurrentRefMap,
setCurrentRefMap,
getRefVersion,
setRefVersion,
getRefMetadata,
setRefMetadata,
} from "../state.js";
export function registerRefTools(pi: ExtensionAPI, deps: ToolDeps): void {
// -------------------------------------------------------------------------
// browser_snapshot_refs
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_snapshot_refs",
label: "Browser Snapshot Refs",
description:
"Capture a compact inventory of interactive elements and assign deterministic versioned refs (@vN:e1, @vN:e2, ...). Use these refs with browser_click_ref, browser_fill_ref, and browser_hover_ref.",
parameters: Type.Object({
selector: Type.Optional(
Type.String({
description: "Optional CSS selector scope for the snapshot (e.g. 'main', 'form', '#modal').",
})
),
interactiveOnly: Type.Optional(
Type.Boolean({
description: "Include only interactive elements (default: true).",
})
),
limit: Type.Optional(
Type.Number({
description: "Maximum number of elements to include (default: 40).",
})
),
mode: Type.Optional(
Type.String({
description: "Semantic snapshot mode that pre-filters elements by category. When set, overrides interactiveOnly. Modes: interactive, form, dialog, navigation, errors, headings, visible_only.",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const mode = params.mode;
if (mode !== undefined) {
const modeConfig = getSnapshotModeConfig(mode);
if (!modeConfig) {
const validModes = Object.keys(SNAPSHOT_MODES).join(", ");
return {
content: [{ type: "text", text: `Unknown snapshot mode: "${mode}". Valid modes: ${validModes}` }],
details: { error: `Unknown mode: ${mode}`, validModes: Object.keys(SNAPSHOT_MODES) },
isError: true,
};
}
}
const interactiveOnly = params.interactiveOnly !== false;
const limit = Math.max(1, Math.min(200, Math.floor(params.limit ?? 40)));
const rawNodes = await deps.buildRefSnapshot(target, {
selector: params.selector,
interactiveOnly,
limit,
mode,
});
const newVersion = getRefVersion() + 1;
setRefVersion(newVersion);
const nextMap: Record<string, RefNode> = {};
for (let i = 0; i < rawNodes.length; i += 1) {
const ref = `e${i + 1}`;
nextMap[ref] = { ref, ...rawNodes[i] };
}
setCurrentRefMap(nextMap);
const activeFrame = getActiveFrame();
const frameCtx = activeFrame ? (activeFrame.name() || activeFrame.url()) : undefined;
setRefMetadata({
url: p.url(),
timestamp: Date.now(),
selectorScope: params.selector,
interactiveOnly,
limit,
version: newVersion,
frameContext: frameCtx,
mode,
});
if (rawNodes.length === 0) {
return {
content: [{
type: "text",
text: "No elements found for ref snapshot (try interactiveOnly=false or a wider selector scope).",
}],
details: {
count: 0,
version: newVersion,
metadata: getRefMetadata(),
refs: {},
},
};
}
const versionedRefs: Record<string, RefNode> = {};
const lines = Object.values(nextMap).map((node) => {
const versionedRef = deps.formatVersionedRef(newVersion, node.ref);
versionedRefs[versionedRef] = node;
const parts: string[] = [versionedRef, node.role || node.tag];
if (node.name) parts.push(`"${node.name}"`);
if (node.href) parts.push(`href="${node.href.slice(0, 80)}"`);
if (!node.isVisible) parts.push("(hidden)");
if (!node.isEnabled) parts.push("(disabled)");
return parts.join(" ");
});
const modeLabel = mode ? `Mode: ${mode}\n` : "";
return {
content: [{
type: "text",
text:
`Ref snapshot v${newVersion} (${rawNodes.length} element(s))\n` +
`URL: ${p.url()}\n` +
`Scope: ${params.selector ?? "body"}\n` +
modeLabel +
`Use versioned refs exactly as shown (e.g. @v${newVersion}:e1).\n\n` +
lines.join("\n"),
}],
details: {
count: rawNodes.length,
version: newVersion,
metadata: getRefMetadata(),
refs: nextMap,
versionedRefs,
},
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Snapshot refs failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_get_ref
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_get_ref",
label: "Browser Get Ref",
description: "Inspect stored metadata for one deterministic element ref (prefer versioned format, e.g. @v3:e1).",
parameters: Type.Object({
ref: Type.String({ description: "Reference id, preferably versioned (e.g. '@v3:e1')." }),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const parsedRef = deps.parseRef(params.ref);
const refMetadata = getRefMetadata();
const refVersion = getRefVersion();
if (parsedRef.version !== null && refMetadata && parsedRef.version !== refMetadata.version) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(parsedRef.display, `snapshot version mismatch (have v${refMetadata.version})`) }],
details: { error: "ref_stale", ref: parsedRef.display, expectedVersion: refMetadata.version, receivedVersion: parsedRef.version },
isError: true,
};
}
const currentRefMap = getCurrentRefMap();
const node = currentRefMap[parsedRef.key];
if (!node) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(parsedRef.display, "ref not found") }],
details: { error: "ref_not_found", ref: parsedRef.display, metadata: refMetadata },
isError: true,
};
}
const versionedRef = deps.formatVersionedRef(refMetadata?.version ?? refVersion, node.ref);
return {
content: [{
type: "text",
text: `${versionedRef}: ${node.role || node.tag}${node.name ? ` "${node.name}"` : ""}\nVisible: ${node.isVisible}\nEnabled: ${node.isEnabled}\nPath: ${node.xpathOrPath}`,
}],
details: { ref: versionedRef, node, metadata: refMetadata },
};
},
});
// -------------------------------------------------------------------------
// browser_click_ref
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_click_ref",
label: "Browser Click Ref",
description: "Click a previously snapshotted element by deterministic versioned ref (e.g. @v3:e2).",
parameters: Type.Object({
ref: Type.String({ description: "Reference id in versioned format, e.g. '@v3:e2'." }),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const parsedRef = deps.parseRef(params.ref);
const requestedRef = parsedRef.display;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const refMetadata = getRefMetadata();
const refVersion = getRefVersion();
if (parsedRef.version === null) {
return {
content: [{ type: "text", text: `Unversioned ref ${requestedRef} is ambiguous. Use a versioned ref (e.g. @v${refMetadata?.version ?? refVersion}:e1) from browser_snapshot_refs.` }],
details: { error: "ref_unversioned", ref: requestedRef, metadata: refMetadata },
isError: true,
};
}
if (refMetadata && parsedRef.version !== refMetadata.version) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, `snapshot version mismatch (have v${refMetadata.version})`) }],
details: { error: "ref_stale", ref: requestedRef, expectedVersion: refMetadata.version, receivedVersion: parsedRef.version },
isError: true,
};
}
const currentRefMap = getCurrentRefMap();
const ref = parsedRef.key;
const node = currentRefMap[ref];
if (!node) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, "ref not found") }],
details: { error: "ref_not_found", ref: requestedRef, metadata: refMetadata },
isError: true,
};
}
if (refMetadata?.url && refMetadata.url !== p.url()) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, "URL changed since snapshot") }],
details: { error: "ref_stale", ref: requestedRef, snapshotUrl: refMetadata.url, currentUrl: p.url() },
isError: true,
};
}
const resolved = await deps.resolveRefTarget(target, node);
if (!resolved.ok) {
const reason = (resolved as { ok: false; reason: string }).reason;
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, reason) }],
details: { error: "ref_stale", ref: requestedRef, reason },
isError: true,
};
}
const beforeUrl = p.url();
const beforeHash = deps.getUrlHash(beforeUrl);
const beforeDialogCount = await deps.countOpenDialogs(target);
const beforeTargetState = await deps.captureClickTargetState(target, resolved.selector);
await target.locator(resolved.selector).first().click({ timeout: 8000 });
const settle = await deps.settleAfterActionAdaptive(p);
const afterUrl = p.url();
const afterHash = deps.getUrlHash(afterUrl);
const afterDialogCount = await deps.countOpenDialogs(target);
const afterTargetState = await deps.captureClickTargetState(target, resolved.selector);
const targetStateChanged =
beforeTargetState.exists !== afterTargetState.exists ||
beforeTargetState.ariaExpanded !== afterTargetState.ariaExpanded ||
beforeTargetState.ariaPressed !== afterTargetState.ariaPressed ||
beforeTargetState.ariaSelected !== afterTargetState.ariaSelected ||
beforeTargetState.open !== afterTargetState.open;
const verification = deps.verificationFromChecks(
[
{ name: "url_changed", passed: afterUrl !== beforeUrl, value: afterUrl, expected: `!= ${beforeUrl}` },
{ name: "hash_changed", passed: afterHash !== beforeHash, value: afterHash, expected: `!= ${beforeHash}` },
{ name: "target_state_changed", passed: targetStateChanged, value: afterTargetState, expected: beforeTargetState },
{ name: "dialog_open", passed: afterDialogCount > beforeDialogCount, value: afterDialogCount, expected: `> ${beforeDialogCount}` },
],
"Ref may now point to an inert element. Refresh refs with browser_snapshot_refs and retry."
);
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
const versionedRef = deps.formatVersionedRef(refMetadata?.version ?? refVersion, node.ref);
return {
content: [{
type: "text",
text: `Clicked ${versionedRef} (${node.role || node.tag}${node.name ? ` "${node.name}"` : ""})\n${deps.verificationLine(verification)}${jsErrors}\n\nPage summary:\n${summary}`,
}],
details: { ref: versionedRef, selector: resolved.selector, url: p.url(), ...settle, ...verification },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const reason = deps.firstErrorLine(err);
const content: any[] = [
{ type: "text", text: deps.staleRefGuidance(requestedRef, `action failed: ${reason}`) },
{ type: "text", text: `Click ref failed: ${err.message}` },
];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { error: err.message, ref: requestedRef, hint: "Run browser_snapshot_refs to refresh refs." },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_hover_ref
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_hover_ref",
label: "Browser Hover Ref",
description: "Hover a previously snapshotted element by deterministic versioned ref (e.g. @v3:e4).",
parameters: Type.Object({
ref: Type.String({ description: "Reference id in versioned format, e.g. '@v3:e4'." }),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const parsedRef = deps.parseRef(params.ref);
const requestedRef = parsedRef.display;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const refMetadata = getRefMetadata();
const refVersion = getRefVersion();
if (parsedRef.version === null) {
return {
content: [{ type: "text", text: `Unversioned ref ${requestedRef} is ambiguous. Use a versioned ref (e.g. @v${refMetadata?.version ?? refVersion}:e1) from browser_snapshot_refs.` }],
details: { error: "ref_unversioned", ref: requestedRef, metadata: refMetadata },
isError: true,
};
}
if (refMetadata && parsedRef.version !== refMetadata.version) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, `snapshot version mismatch (have v${refMetadata.version})`) }],
details: { error: "ref_stale", ref: requestedRef, expectedVersion: refMetadata.version, receivedVersion: parsedRef.version },
isError: true,
};
}
const currentRefMap = getCurrentRefMap();
const ref = parsedRef.key;
const node = currentRefMap[ref];
if (!node) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, "ref not found") }],
details: { error: "ref_not_found", ref: requestedRef, metadata: refMetadata },
isError: true,
};
}
if (refMetadata?.url && refMetadata.url !== p.url()) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, "URL changed since snapshot") }],
details: { error: "ref_stale", ref: requestedRef, snapshotUrl: refMetadata.url, currentUrl: p.url() },
isError: true,
};
}
const resolved = await deps.resolveRefTarget(target, node);
if (!resolved.ok) {
const reason = (resolved as { ok: false; reason: string }).reason;
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, reason) }],
details: { error: "ref_stale", ref: requestedRef, reason },
isError: true,
};
}
await target.locator(resolved.selector).first().hover({ timeout: 8000 });
const settle = await deps.settleAfterActionAdaptive(p);
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
const versionedRef = deps.formatVersionedRef(refMetadata?.version ?? refVersion, node.ref);
return {
content: [{
type: "text",
text: `Hovered ${versionedRef} (${node.role || node.tag}${node.name ? ` "${node.name}"` : ""})${jsErrors}\n\nPage summary:\n${summary}`,
}],
details: { ref: versionedRef, selector: resolved.selector, url: p.url(), ...settle },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const reason = deps.firstErrorLine(err);
const content: any[] = [
{ type: "text", text: deps.staleRefGuidance(requestedRef, `action failed: ${reason}`) },
{ type: "text", text: `Hover ref failed: ${err.message}` },
];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { error: err.message, ref: requestedRef, hint: "Run browser_snapshot_refs to refresh refs." },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_fill_ref
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_fill_ref",
label: "Browser Fill Ref",
description: "Fill/type text into an input-like element by deterministic versioned ref (e.g. @v3:e1).",
parameters: Type.Object({
ref: Type.String({ description: "Reference id in versioned format, e.g. '@v3:e1'." }),
text: Type.String({ description: "Text to enter." }),
clearFirst: Type.Optional(
Type.Boolean({ description: "Clear existing value first (default: false)." })
),
submit: Type.Optional(
Type.Boolean({ description: "Press Enter after typing (default: false)." })
),
slowly: Type.Optional(
Type.Boolean({ description: "Type character-by-character (default: false)." })
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const parsedRef = deps.parseRef(params.ref);
const requestedRef = parsedRef.display;
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const refMetadata = getRefMetadata();
const refVersion = getRefVersion();
if (parsedRef.version === null) {
return {
content: [{ type: "text", text: `Unversioned ref ${requestedRef} is ambiguous. Use a versioned ref (e.g. @v${refMetadata?.version ?? refVersion}:e1) from browser_snapshot_refs.` }],
details: { error: "ref_unversioned", ref: requestedRef, metadata: refMetadata },
isError: true,
};
}
if (refMetadata && parsedRef.version !== refMetadata.version) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, `snapshot version mismatch (have v${refMetadata.version})`) }],
details: { error: "ref_stale", ref: requestedRef, expectedVersion: refMetadata.version, receivedVersion: parsedRef.version },
isError: true,
};
}
const currentRefMap = getCurrentRefMap();
const ref = parsedRef.key;
const node = currentRefMap[ref];
if (!node) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, "ref not found") }],
details: { error: "ref_not_found", ref: requestedRef, metadata: refMetadata },
isError: true,
};
}
if (refMetadata?.url && refMetadata.url !== p.url()) {
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, "URL changed since snapshot") }],
details: { error: "ref_stale", ref: requestedRef, snapshotUrl: refMetadata.url, currentUrl: p.url() },
isError: true,
};
}
const resolved = await deps.resolveRefTarget(target, node);
if (!resolved.ok) {
const reason = (resolved as { ok: false; reason: string }).reason;
return {
content: [{ type: "text", text: deps.staleRefGuidance(requestedRef, reason) }],
details: { error: "ref_stale", ref: requestedRef, reason },
isError: true,
};
}
const locator = target.locator(resolved.selector).first();
const beforeUrl = p.url();
if (params.slowly) {
await locator.click({ timeout: 8000 });
if (params.clearFirst) {
await p.keyboard.press("Control+A");
await p.keyboard.press("Delete");
}
await p.keyboard.type(params.text);
} else {
if (params.clearFirst) {
await locator.fill("");
}
await locator.fill(params.text, { timeout: 8000 });
}
if (params.submit) {
await p.keyboard.press("Enter");
}
const settle = await deps.settleAfterActionAdaptive(p);
const filledValue = await deps.readInputLikeValue(target, resolved.selector);
const afterUrl = p.url();
const verification = deps.verificationFromChecks(
[
{ name: "value_equals_expected", passed: filledValue === params.text, value: filledValue, expected: params.text },
{ name: "value_contains_expected", passed: typeof filledValue === "string" && filledValue.includes(params.text), value: filledValue, expected: params.text },
{ name: "url_changed_after_submit", passed: !!params.submit && afterUrl !== beforeUrl, value: afterUrl, expected: `!= ${beforeUrl}` },
],
"Try refreshing refs and confirm this ref still targets an input-like element."
);
const summary = await deps.postActionSummary(p, target);
const jsErrors = deps.getRecentErrors(p.url());
const versionedRef = deps.formatVersionedRef(refMetadata?.version ?? refVersion, node.ref);
return {
content: [{
type: "text",
text: `Filled ${versionedRef} (${node.role || node.tag}${node.name ? ` "${node.name}"` : ""}) with "${params.text}"\n${deps.verificationLine(verification)}${jsErrors}\n\nPage summary:\n${summary}`,
}],
details: { ref: versionedRef, selector: resolved.selector, url: p.url(), filledValue, ...settle, ...verification },
};
} catch (err: any) {
const errorShot = await deps.captureErrorScreenshot(deps.getActivePageOrNull());
const reason = deps.firstErrorLine(err);
const content: any[] = [
{ type: "text", text: deps.staleRefGuidance(requestedRef, `action failed: ${reason}`) },
{ type: "text", text: `Fill ref failed: ${err.message}` },
];
if (errorShot) {
content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
}
return {
content,
details: { error: err.message, ref: requestedRef, hint: "Run browser_snapshot_refs to refresh refs." },
isError: true,
};
}
},
});
}

View file

@ -0,0 +1,83 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import type { ToolDeps } from "../state.js";
export function registerScreenshotTools(pi: ExtensionAPI, deps: ToolDeps): void {
pi.registerTool({
name: "browser_screenshot",
label: "Browser Screenshot",
description:
"Take a screenshot of the current browser page and return it as an inline image. Uses JPEG for viewport/fullpage (smaller, configurable quality) and PNG for element crops (preserves transparency). Optionally crop to a specific element by CSS selector.",
parameters: Type.Object({
fullPage: Type.Optional(
Type.Boolean({ description: "Capture the full scrollable page (default: false)" })
),
selector: Type.Optional(
Type.String({
description:
"CSS selector of a specific element to screenshot (crops to that element's bounding box). If omitted, screenshots the entire viewport.",
})
),
quality: Type.Optional(
Type.Number({
description:
"JPEG quality 1-100 (default: 80). Only applies to viewport/fullpage screenshots, not element crops. Lower = smaller image.",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
let screenshotBuffer: Buffer;
let mimeType: string;
const quality = params.quality ?? 80;
if (params.selector) {
const locator = p.locator(params.selector).first();
screenshotBuffer = await locator.screenshot({ type: "png", scale: "css" });
mimeType = "image/png";
} else {
screenshotBuffer = await p.screenshot({
fullPage: params.fullPage ?? false,
type: "jpeg",
quality,
scale: "css",
});
mimeType = "image/jpeg";
}
screenshotBuffer = await deps.constrainScreenshot(p, screenshotBuffer, mimeType, quality);
const base64Data = screenshotBuffer.toString("base64");
const title = await p.title();
const url = p.url();
const viewport = p.viewportSize();
const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown";
const scope = params.selector ? `element "${params.selector}"` : params.fullPage ? "full page" : "viewport";
return {
content: [
{
type: "text",
text: `Screenshot of ${scope}.\nPage: ${title}\nURL: ${url}\nViewport: ${vpText}`,
},
{
type: "image",
data: base64Data,
mimeType,
},
],
details: { title, url, scope, viewport: vpText },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Screenshot failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
}

View file

@ -0,0 +1,400 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { stat } from "node:fs/promises";
import path from "node:path";
import {
formatTimelineEntries,
buildFailureHypothesis,
summarizeBrowserSession,
} from "../core.js";
import type { ToolDeps } from "../state.js";
import {
ARTIFACT_ROOT,
HAR_FILENAME,
getPageRegistry,
getActiveFrame,
getConsoleLogs,
getNetworkLogs,
getDialogLogs,
getActionTimeline,
getActiveTraceSession,
setActiveTraceSession,
getHarState,
setHarState,
getSessionStartedAt,
getSessionArtifactDir,
} from "../state.js";
import {
getActiveFrameMetadata,
ensureDir,
} from "../utils.js";
export function registerSessionTools(pi: ExtensionAPI, deps: ToolDeps): void {
// -------------------------------------------------------------------------
// browser_close
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_close",
label: "Browser Close",
description: "Close the browser and clean up all resources.",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
try {
await deps.closeBrowser();
return {
content: [{ type: "text", text: "Browser closed." }],
details: {},
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Close failed: ${err.message}` }],
details: { error: err.message },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_trace_start
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_trace_start",
label: "Browser Trace Start",
description: "Start a Playwright trace for the current browser session and persist trace metadata under the session artifact directory.",
parameters: Type.Object({
name: Type.Optional(Type.String({ description: "Optional short trace session name for artifact filenames." })),
title: Type.Optional(Type.String({ description: "Optional trace title recorded in metadata." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { context: browserContext } = await deps.ensureBrowser();
const activeTrace = getActiveTraceSession();
if (activeTrace) {
return {
content: [{ type: "text", text: `Trace already active: ${activeTrace.name}` }],
details: { error: "trace_already_active", activeTraceSession: activeTrace, ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
const startedAt = Date.now();
const name = (params.name?.trim() || `trace-${deps.formatArtifactTimestamp(startedAt)}`).replace(/[^a-zA-Z0-9._-]+/g, "-");
await browserContext.tracing.start({ screenshots: true, snapshots: true, sources: true, title: params.title ?? name });
setActiveTraceSession({ startedAt, name, title: params.title ?? name });
return {
content: [{ type: "text", text: `Trace started: ${name}\nSession dir: ${getSessionArtifactDir()}` }],
details: { activeTraceSession: getActiveTraceSession(), ...deps.getSessionArtifactMetadata() },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Trace start failed: ${err.message}` }],
details: { error: err.message, ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_trace_stop
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_trace_stop",
label: "Browser Trace Stop",
description: "Stop the active Playwright trace and write the trace zip to disk under the session artifact directory.",
parameters: Type.Object({
name: Type.Optional(Type.String({ description: "Optional artifact basename override for the trace zip." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { context: browserContext } = await deps.ensureBrowser();
const activeTrace = getActiveTraceSession();
if (!activeTrace) {
return {
content: [{ type: "text", text: "No active trace session to stop." }],
details: { error: "trace_not_active", ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
const traceSession = activeTrace;
const traceName = (params.name?.trim() || traceSession.name).replace(/[^a-zA-Z0-9._-]+/g, "-");
const tracePath = deps.buildSessionArtifactPath(`${traceName}.trace.zip`);
await browserContext.tracing.stop({ path: tracePath });
const fileStat = await stat(tracePath);
setActiveTraceSession(null);
return {
content: [{ type: "text", text: `Trace stopped: ${tracePath}` }],
details: {
path: tracePath,
bytes: fileStat.size,
elapsedMs: Date.now() - traceSession.startedAt,
traceName,
...deps.getSessionArtifactMetadata(),
},
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Trace stop failed: ${err.message}` }],
details: { error: err.message, ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_export_har
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_export_har",
label: "Browser Export HAR",
description: "Export the truthfully recorded session HAR from disk to a stable artifact path and return compact metadata.",
parameters: Type.Object({
filename: Type.Optional(Type.String({ description: "Optional destination filename within the session artifact directory." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const harState = getHarState();
if (!harState.enabled || !harState.configuredAtContextCreation || !harState.path) {
return {
content: [{ type: "text", text: "HAR export unavailable: HAR recording was not enabled at browser context creation." }],
details: { error: "har_not_enabled", ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
const sourcePath = harState.path;
const destinationName = (params.filename?.trim() || `export-${HAR_FILENAME}`).replace(/[^a-zA-Z0-9._-]+/g, "-");
const destinationPath = deps.buildSessionArtifactPath(destinationName);
const exportResult = sourcePath === destinationPath
? { path: sourcePath, bytes: (await stat(sourcePath)).size }
: await deps.copyArtifactFile(sourcePath, destinationPath);
setHarState({
...harState,
exportCount: harState.exportCount + 1,
lastExportedPath: exportResult.path,
lastExportedAt: Date.now(),
});
return {
content: [{ type: "text", text: `HAR exported: ${exportResult.path}` }],
details: { path: exportResult.path, bytes: exportResult.bytes, ...deps.getSessionArtifactMetadata() },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `HAR export failed: ${err.message}` }],
details: { error: err.message, ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_timeline
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_timeline",
label: "Browser Timeline",
description: "Return a compact structured summary of the tracked browser action timeline and optional on-disk export path.",
parameters: Type.Object({
writeToDisk: Type.Optional(Type.Boolean({ description: "Write the timeline JSON to disk under the session artifact directory." })),
filename: Type.Optional(Type.String({ description: "Optional JSON filename when writeToDisk is true." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const actionTimeline = getActionTimeline();
const timeline = formatTimelineEntries(actionTimeline.entries, {
limit: actionTimeline.limit,
totalActions: actionTimeline.nextId - 1,
});
let artifact: { path: string; bytes: number } | null = null;
if (params.writeToDisk) {
const filename = (params.filename?.trim() || "timeline.json").replace(/[^a-zA-Z0-9._-]+/g, "-");
artifact = await deps.writeArtifactFile(deps.buildSessionArtifactPath(filename), JSON.stringify(timeline, null, 2));
}
return {
content: [{ type: "text", text: artifact ? `${timeline.summary}\nArtifact: ${artifact.path}` : timeline.summary }],
details: { ...timeline, artifact, ...deps.getSessionArtifactMetadata() },
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Timeline failed: ${err.message}` }],
details: { error: err.message, ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_session_summary
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_session_summary",
label: "Browser Session Summary",
description: "Return a compact structured summary of the current browser session, including pages, actions, waits/assertions, bounded-history caveats, and trace/HAR state.",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
try {
await deps.ensureBrowser();
const pages = await deps.getLivePagesSnapshot();
const actionTimeline = getActionTimeline();
const pageRegistry = getPageRegistry();
const consoleLogs = getConsoleLogs();
const networkLogs = getNetworkLogs();
const dialogLogs = getDialogLogs();
const baseSummary = summarizeBrowserSession({
timeline: actionTimeline,
totalActions: actionTimeline.nextId - 1,
pages,
activePageId: pageRegistry.activePageId,
activeFrame: getActiveFrameMetadata(),
consoleEntries: consoleLogs,
networkEntries: networkLogs,
dialogEntries: dialogLogs,
consoleLimit: 1000,
networkLimit: 1000,
dialogLimit: 1000,
sessionStartedAt: getSessionStartedAt(),
now: Date.now(),
});
const failureHypothesis = buildFailureHypothesis({
timeline: actionTimeline,
consoleEntries: consoleLogs,
networkEntries: networkLogs,
dialogEntries: dialogLogs,
});
const activeTrace = getActiveTraceSession();
const traceState = activeTrace
? { status: "active", ...activeTrace }
: { status: "inactive", lastTracePath: getSessionArtifactDir() ? deps.buildSessionArtifactPath("*.trace.zip") : null };
const harState = getHarState();
const harSummary = {
enabled: harState.enabled,
configuredAtContextCreation: harState.configuredAtContextCreation,
path: harState.path,
exportCount: harState.exportCount,
lastExportedPath: harState.lastExportedPath,
lastExportedAt: harState.lastExportedAt,
};
return {
content: [{ type: "text", text: `${baseSummary.summary}\nFailure hypothesis: ${failureHypothesis}` }],
details: {
...baseSummary,
failureHypothesis,
trace: traceState,
har: harSummary,
...deps.getSessionArtifactMetadata(),
},
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Session summary failed: ${err.message}` }],
details: { error: err.message, ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
},
});
// -------------------------------------------------------------------------
// browser_debug_bundle
// -------------------------------------------------------------------------
pi.registerTool({
name: "browser_debug_bundle",
label: "Browser Debug Bundle",
description: "Write a timestamped debug bundle to disk with screenshot, logs, timeline, pages, session summary, and accessibility output, then return compact paths and counts.",
parameters: Type.Object({
selector: Type.Optional(Type.String({ description: "Optional CSS selector to scope the accessibility snapshot before fallback behavior applies." })),
name: Type.Optional(Type.String({ description: "Optional short bundle name suffix for the output directory." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const startedAt = Date.now();
const sessionDir = await deps.ensureSessionArtifactDir();
const bundleDir = path.join(ARTIFACT_ROOT, `${deps.formatArtifactTimestamp(startedAt)}-${deps.sanitizeArtifactName(params.name ?? "debug-bundle", "debug-bundle")}`);
await ensureDir(bundleDir);
const pages = await deps.getLivePagesSnapshot();
const actionTimeline = getActionTimeline();
const pageRegistry = getPageRegistry();
const consoleLogs = getConsoleLogs();
const networkLogs = getNetworkLogs();
const dialogLogs = getDialogLogs();
const timeline = formatTimelineEntries(actionTimeline.entries, {
limit: actionTimeline.limit,
totalActions: actionTimeline.nextId - 1,
});
const sessionSummary = summarizeBrowserSession({
timeline: actionTimeline,
totalActions: actionTimeline.nextId - 1,
pages,
activePageId: pageRegistry.activePageId,
activeFrame: getActiveFrameMetadata(),
consoleEntries: consoleLogs,
networkEntries: networkLogs,
dialogEntries: dialogLogs,
consoleLimit: 1000,
networkLimit: 1000,
dialogLimit: 1000,
sessionStartedAt: getSessionStartedAt(),
now: Date.now(),
});
const failureHypothesis = buildFailureHypothesis({
timeline: actionTimeline,
consoleEntries: consoleLogs,
networkEntries: networkLogs,
dialogEntries: dialogLogs,
});
const accessibility = await deps.captureAccessibilityMarkdown(params.selector);
const screenshotPath = path.join(bundleDir, "screenshot.jpg");
await p.screenshot({ path: screenshotPath, type: "jpeg", quality: 80, fullPage: false });
const screenshotStat = await stat(screenshotPath);
const artifacts = {
screenshot: { path: screenshotPath, bytes: screenshotStat.size },
console: await deps.writeArtifactFile(path.join(bundleDir, "console.json"), JSON.stringify(consoleLogs, null, 2)),
network: await deps.writeArtifactFile(path.join(bundleDir, "network.json"), JSON.stringify(networkLogs, null, 2)),
dialog: await deps.writeArtifactFile(path.join(bundleDir, "dialog.json"), JSON.stringify(dialogLogs, null, 2)),
timeline: await deps.writeArtifactFile(path.join(bundleDir, "timeline.json"), JSON.stringify(timeline, null, 2)),
summary: await deps.writeArtifactFile(path.join(bundleDir, "summary.json"), JSON.stringify({
...sessionSummary,
failureHypothesis,
trace: getActiveTraceSession(),
har: getHarState(),
sessionArtifactDir: sessionDir,
}, null, 2)),
pages: await deps.writeArtifactFile(path.join(bundleDir, "pages.json"), JSON.stringify(pages, null, 2)),
accessibility: await deps.writeArtifactFile(path.join(bundleDir, "accessibility.md"), accessibility.snapshot),
};
return {
content: [{ type: "text", text: `Debug bundle written: ${bundleDir}\n${sessionSummary.summary}\nFailure hypothesis: ${failureHypothesis}` }],
details: {
bundleDir,
artifacts,
accessibilityScope: accessibility.scope,
accessibilitySource: accessibility.source,
counts: {
console: consoleLogs.length,
network: networkLogs.length,
dialog: dialogLogs.length,
actions: timeline.count,
pages: pages.length,
},
elapsedMs: Date.now() - startedAt,
summary: sessionSummary,
failureHypothesis,
...deps.getSessionArtifactMetadata(),
},
};
} catch (err: any) {
return {
content: [{ type: "text", text: `Debug bundle failed: ${err.message}` }],
details: { error: err.message, ...deps.getSessionArtifactMetadata() },
isError: true,
};
}
},
});
}

View file

@ -0,0 +1,247 @@
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@gsd/pi-ai";
import {
validateWaitParams,
createRegionStableScript,
parseThreshold,
includesNeedle,
} from "../core.js";
import type { ToolDeps } from "../state.js";
import {
getConsoleLogs,
} from "../state.js";
export function registerWaitTools(pi: ExtensionAPI, deps: ToolDeps): void {
pi.registerTool({
name: "browser_wait_for",
label: "Browser Wait For",
description:
"Wait for a condition before continuing. Use after actions that trigger async updates — data fetches, route changes, animations, loading spinners. Choose the appropriate condition: 'selector_visible' waits for an element to appear, 'selector_hidden' waits for it to disappear, 'url_contains' waits for the URL to match, 'network_idle' waits for all network requests to finish, 'delay' waits a fixed number of milliseconds, 'text_visible' waits for text to appear in the page body, 'text_hidden' waits for text to disappear from the page body, 'request_completed' waits for a network response whose URL contains the given substring, 'console_message' waits for a console log message containing the given substring, 'element_count' waits for the number of elements matching the CSS selector in 'value' to satisfy the 'threshold' expression (e.g. '>=3', '==0', '<5'), 'region_stable' waits for the DOM region matching the CSS selector in 'value' to stop changing.",
parameters: Type.Object({
condition: StringEnum([
"selector_visible",
"selector_hidden",
"url_contains",
"network_idle",
"delay",
"text_visible",
"text_hidden",
"request_completed",
"console_message",
"element_count",
"region_stable",
] as const),
value: Type.Optional(
Type.String({
description:
"For selector_visible/selector_hidden/element_count/region_stable: CSS selector. For url_contains/request_completed: URL substring. For text_visible/text_hidden/console_message: text substring. For delay: milliseconds as a string (e.g. '1000'). Not used for network_idle.",
})
),
threshold: Type.Optional(
Type.String({
description:
"Threshold expression for element_count (e.g. '>=3', '==0', '<5', or bare '3' which defaults to >=). Only used with element_count condition.",
})
),
timeout: Type.Optional(
Type.Number({
description: "Maximum milliseconds to wait before failing (default: 10000)",
})
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
const { page: p } = await deps.ensureBrowser();
const target = deps.getActiveTarget();
const timeout = params.timeout ?? 10000;
const validation = validateWaitParams({ condition: params.condition, value: params.value, threshold: (params as any).threshold });
if (validation) {
return {
content: [{ type: "text", text: validation.error }],
details: { error: validation.error, condition: params.condition },
isError: true,
};
}
switch (params.condition) {
case "selector_visible": {
if (!params.value) {
return {
content: [{ type: "text", text: "selector_visible requires a value (CSS selector)" }],
details: {},
isError: true,
};
}
await target.waitForSelector(params.value, { state: "visible", timeout });
return {
content: [{ type: "text", text: `Element "${params.value}" is now visible` }],
details: { condition: params.condition, value: params.value },
};
}
case "selector_hidden": {
if (!params.value) {
return {
content: [{ type: "text", text: "selector_hidden requires a value (CSS selector)" }],
details: {},
isError: true,
};
}
await target.waitForSelector(params.value, { state: "hidden", timeout });
return {
content: [{ type: "text", text: `Element "${params.value}" is now hidden` }],
details: { condition: params.condition, value: params.value },
};
}
case "url_contains": {
if (!params.value) {
return {
content: [{ type: "text", text: "url_contains requires a value (URL substring)" }],
details: {},
isError: true,
};
}
await p.waitForURL((url) => url.toString().includes(params.value!), { timeout });
return {
content: [{ type: "text", text: `URL now contains "${params.value}". Current URL: ${p.url()}` }],
details: { condition: params.condition, value: params.value, url: p.url() },
};
}
case "network_idle": {
await p.waitForLoadState("networkidle", { timeout });
return {
content: [{ type: "text", text: "Network is idle" }],
details: { condition: params.condition },
};
}
case "delay": {
const ms = parseInt(params.value ?? "1000", 10);
if (isNaN(ms)) {
return {
content: [{ type: "text", text: "delay requires a numeric value (milliseconds)" }],
details: {},
isError: true,
};
}
await new Promise((resolve) => setTimeout(resolve, ms));
return {
content: [{ type: "text", text: `Waited ${ms}ms` }],
details: { condition: params.condition, ms },
};
}
case "text_visible": {
await target.waitForFunction(
(needle: string) => {
const body = document.body?.innerText ?? "";
return body.toLowerCase().includes(needle.toLowerCase());
},
params.value!,
{ timeout }
);
return {
content: [{ type: "text", text: `Text "${params.value}" is now visible on the page` }],
details: { condition: params.condition, value: params.value },
};
}
case "text_hidden": {
await target.waitForFunction(
(needle: string) => {
const body = document.body?.innerText ?? "";
return !body.toLowerCase().includes(needle.toLowerCase());
},
params.value!,
{ timeout }
);
return {
content: [{ type: "text", text: `Text "${params.value}" is no longer visible on the page` }],
details: { condition: params.condition, value: params.value },
};
}
case "request_completed": {
const response = await deps.getActivePage().waitForResponse(
(resp) => resp.url().includes(params.value!),
{ timeout }
);
return {
content: [{ type: "text", text: `Request completed: ${response.url()} (status ${response.status()})` }],
details: { condition: params.condition, value: params.value, url: response.url(), status: response.status() },
};
}
case "console_message": {
const needle = params.value!;
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
const match = getConsoleLogs().find((entry) => includesNeedle(entry.text, needle));
if (match) {
return {
content: [{ type: "text", text: `Console message matching "${needle}" found: "${match.text}"` }],
details: { condition: params.condition, value: needle, matchedText: match.text, matchedType: match.type },
};
}
await new Promise((resolve) => setTimeout(resolve, 100));
}
throw new Error(`Timed out waiting for console message matching "${needle}" (${timeout}ms)`);
}
case "element_count": {
const threshold = parseThreshold((params as any).threshold ?? ">=1");
if (!threshold) {
return {
content: [{ type: "text", text: `element_count threshold is malformed: "${(params as any).threshold}"` }],
details: { error: "malformed threshold", condition: params.condition },
isError: true,
};
}
const selector = params.value!;
const op = threshold.op;
const n = threshold.n;
await target.waitForFunction(
({ selector, op, n }: { selector: string; op: string; n: number }) => {
const count = document.querySelectorAll(selector).length;
switch (op) {
case ">=": return count >= n;
case "<=": return count <= n;
case "==": return count === n;
case ">": return count > n;
case "<": return count < n;
default: return false;
}
},
{ selector, op, n },
{ timeout }
);
return {
content: [{ type: "text", text: `Element count for "${selector}" satisfies ${op}${n}` }],
details: { condition: params.condition, value: selector, threshold: `${op}${n}` },
};
}
case "region_stable": {
const script = createRegionStableScript(params.value!);
await target.waitForFunction(script, undefined, { timeout, polling: 200 });
return {
content: [{ type: "text", text: `Region "${params.value}" is now stable` }],
details: { condition: params.condition, value: params.value },
};
}
}
} catch (err: any) {
return {
content: [{ type: "text", text: `Wait failed: ${err.message}` }],
details: { error: err.message, condition: params.condition, value: params.value },
isError: true,
};
}
},
});
}

View file

@ -0,0 +1,660 @@
/**
* browser-tools Node-side utility functions
*
* All functions that were helpers in index.ts but run in Node (not browser).
* They import state accessors from ./state.ts never raw module-level variables.
*/
import type { Frame, Page } from "playwright";
import { mkdir, stat, writeFile, copyFile } from "node:fs/promises";
import path from "node:path";
import {
DEFAULT_MAX_BYTES,
DEFAULT_MAX_LINES,
truncateHead,
} from "@gsd/pi-coding-agent";
import {
beginAction,
finishAction,
findAction,
toActionParamsSummary,
registryListPages,
} from "./core.js";
import {
ARTIFACT_ROOT,
getActiveFrame,
getActiveTraceSession,
getConsoleLogs,
getDialogLogs,
getHarState,
getNetworkLogs,
getSessionArtifactDir,
getSessionStartedAt,
setSessionArtifactDir,
setSessionStartedAt,
pageRegistry,
actionTimeline,
getPendingCriticalRequestsByPage,
getLastActionBeforeState,
getLastActionAfterState,
setLastActionBeforeState,
setLastActionAfterState,
type ConsoleEntry,
type NetworkEntry,
type CompactPageState,
type CompactSelectorState,
type ClickTargetStateSnapshot,
type VerificationCheck,
type VerificationResult,
type BrowserAssertionCheckInput,
type AdaptiveSettleOptions,
type AdaptiveSettleDetails,
type ParsedRefSpec,
} from "./state.js";
// ---------------------------------------------------------------------------
// Text truncation
// ---------------------------------------------------------------------------
export function truncateText(text: string): string {
const result = truncateHead(text, {
maxLines: DEFAULT_MAX_LINES,
maxBytes: DEFAULT_MAX_BYTES,
});
if (result.truncated) {
return (
result.content +
`\n\n[Output truncated: ${result.outputLines}/${result.totalLines} lines shown]`
);
}
return result.content;
}
// ---------------------------------------------------------------------------
// Artifact helpers
// ---------------------------------------------------------------------------
export function formatArtifactTimestamp(timestamp: number): string {
return new Date(timestamp).toISOString().replace(/[:.]/g, "-");
}
export async function ensureDir(dirPath: string): Promise<string> {
await mkdir(dirPath, { recursive: true });
return dirPath;
}
export async function writeArtifactFile(
filePath: string,
content: string | Uint8Array,
): Promise<{ path: string; bytes: number }> {
await ensureDir(path.dirname(filePath));
await writeFile(filePath, content);
const fileStat = await stat(filePath);
return { path: filePath, bytes: fileStat.size };
}
export async function copyArtifactFile(
sourcePath: string,
destinationPath: string,
): Promise<{ path: string; bytes: number }> {
await ensureDir(path.dirname(destinationPath));
await copyFile(sourcePath, destinationPath);
const fileStat = await stat(destinationPath);
return { path: destinationPath, bytes: fileStat.size };
}
export function ensureSessionStartedAt(): number {
let t = getSessionStartedAt();
if (!t) {
t = Date.now();
setSessionStartedAt(t);
}
return t;
}
export async function ensureSessionArtifactDir(): Promise<string> {
const existing = getSessionArtifactDir();
if (existing) {
await ensureDir(existing);
return existing;
}
const startedAt = ensureSessionStartedAt();
const dir = path.join(ARTIFACT_ROOT, `${formatArtifactTimestamp(startedAt)}-session`);
setSessionArtifactDir(dir);
await ensureDir(dir);
return dir;
}
export function buildSessionArtifactPath(filename: string): string {
const dir = getSessionArtifactDir();
if (!dir) {
throw new Error("browser session artifact directory is not initialized");
}
return path.join(dir, filename);
}
export function getActivePageMetadata() {
const registry = pageRegistry;
const activeEntry =
registry.activePageId !== null
? registry.pages.find((entry: any) => entry.id === registry.activePageId) ?? null
: null;
return {
id: activeEntry?.id ?? null,
title: activeEntry?.title ?? "",
url: activeEntry?.url ?? "",
};
}
export function getActiveFrameMetadata() {
const frame = getActiveFrame();
if (!frame) {
return { name: null, url: null };
}
return {
name: frame.name() || null,
url: frame.url() || null,
};
}
export function getSessionArtifactMetadata() {
return {
artifactRoot: ARTIFACT_ROOT,
sessionStartedAt: getSessionStartedAt(),
sessionArtifactDir: getSessionArtifactDir(),
activeTraceSession: getActiveTraceSession(),
harState: { ...getHarState() },
activePage: getActivePageMetadata(),
activeFrame: getActiveFrameMetadata(),
};
}
export function sanitizeArtifactName(value: string, fallback: string): string {
const sanitized = value
.trim()
.replace(/[^a-zA-Z0-9._-]+/g, "-")
.replace(/^-+|-+$/g, "");
return sanitized || fallback;
}
// ---------------------------------------------------------------------------
// Page helpers
// ---------------------------------------------------------------------------
/**
* getLivePagesSnapshot requires ensureBrowser (circular) it will be
* wired in via ToolDeps. This is a factory that takes ensureBrowser.
*/
export function createGetLivePagesSnapshot(
ensureBrowser: () => Promise<{ page: Page }>,
) {
return async function getLivePagesSnapshot() {
await ensureBrowser();
for (const entry of pageRegistry.pages) {
try {
entry.title = await entry.page.title();
entry.url = entry.page.url();
} catch {
// Page may have been closed between snapshots.
}
}
return registryListPages(pageRegistry);
};
}
export async function resolveAccessibilityScope(
selector?: string,
): Promise<{ selector?: string; scope: string; source: string }> {
if (selector?.trim()) {
return {
selector: selector.trim(),
scope: `selector:${selector.trim()}`,
source: "explicit_selector",
};
}
const frame = getActiveFrame();
// We need getActiveTarget for dialog check, but that requires page access.
// For non-frame scoping, the caller must handle dialog detection separately
// if needed. Here we handle the frame case and fall through to full_page.
if (frame) {
return {
selector: "body",
scope: frame.name()
? `active frame:${frame.name()}`
: "active frame",
source: "active_frame",
};
}
return { selector: "body", scope: "full page", source: "full_page" };
}
/**
* captureAccessibilityMarkdown needs access to the active target.
* Accepts the target (Page | Frame) so it doesn't need to pull from state.
*/
export async function captureAccessibilityMarkdown(
target: Page | Frame,
selector?: string,
): Promise<{ snapshot: string; scope: string; source: string }> {
const scopeInfo = await resolveAccessibilityScope(selector);
const locator = target.locator(scopeInfo.selector ?? "body").first();
const snapshot = await locator.ariaSnapshot();
return { snapshot, scope: scopeInfo.scope, source: scopeInfo.source };
}
// ---------------------------------------------------------------------------
// Critical request tracking
// ---------------------------------------------------------------------------
export function isCriticalResourceType(resourceType: string): boolean {
return resourceType === "document" || resourceType === "fetch" || resourceType === "xhr";
}
export function updatePendingCriticalRequests(p: Page, delta: number): void {
const map = getPendingCriticalRequestsByPage();
const current = map.get(p) ?? 0;
map.set(p, Math.max(0, current + delta));
}
export function getPendingCriticalRequests(p: Page): number {
return getPendingCriticalRequestsByPage().get(p) ?? 0;
}
// ---------------------------------------------------------------------------
// Verification helpers
// ---------------------------------------------------------------------------
export function verificationFromChecks(
checks: VerificationCheck[],
retryHint?: string,
): VerificationResult {
const passedChecks = checks
.filter((check) => check.passed)
.map((check) => check.name);
const verified = passedChecks.length > 0;
return {
verified,
checks,
verificationSummary: verified
? `PASS (${passedChecks.join(", ")})`
: "SOFT-FAIL (no observable state change)",
retryHint: verified ? undefined : retryHint,
};
}
export function verificationLine(verification: VerificationResult): string {
return `Verification: ${verification.verificationSummary}`;
}
// ---------------------------------------------------------------------------
// Assertion helpers
// ---------------------------------------------------------------------------
export async function collectAssertionState(
p: Page,
checks: BrowserAssertionCheckInput[],
captureCompactPageState: (
p: Page,
options?: { selectors?: string[]; includeBodyText?: boolean; target?: Page | Frame },
) => Promise<CompactPageState>,
target?: Page | Frame,
): Promise<{
url: string;
title: string;
bodyText: string;
focus: string;
selectorStates: Record<string, CompactSelectorState>;
consoleEntries: ConsoleEntry[];
networkEntries: NetworkEntry[];
allConsoleEntries: ConsoleEntry[];
allNetworkEntries: NetworkEntry[];
actionTimeline: typeof actionTimeline;
}> {
const selectors = checks
.map((check) => check.selector)
.filter((value): value is string => !!value);
const compactState = await captureCompactPageState(p, {
selectors,
includeBodyText: true,
target,
});
const sinceActionId = checks.reduce<number | undefined>((max, check) => {
if (check.sinceActionId === undefined) return max;
if (max === undefined) return check.sinceActionId;
return Math.max(max, check.sinceActionId);
}, undefined);
return {
url: compactState.url,
title: compactState.title,
bodyText: compactState.bodyText,
focus: compactState.focus,
selectorStates: compactState.selectorStates,
consoleEntries: getConsoleEntriesSince(sinceActionId),
networkEntries: getNetworkEntriesSince(sinceActionId),
allConsoleEntries: getConsoleLogs(),
allNetworkEntries: getNetworkLogs(),
actionTimeline,
};
}
export function formatAssertionText(
result: ReturnType<typeof import("./core.js").evaluateAssertionChecks>,
): string {
const lines = [result.summary];
for (const check of result.checks.slice(0, 8)) {
lines.push(
`- ${check.passed ? "PASS" : "FAIL"} ${check.name}: expected ${JSON.stringify(check.expected)}, got ${JSON.stringify(check.actual)}`,
);
}
lines.push(`Hint: ${result.agentHint}`);
return lines.join("\n");
}
export function formatDiffText(
diff: ReturnType<typeof import("./core.js").diffCompactStates>,
): string {
const lines = [diff.summary];
for (const change of diff.changes.slice(0, 8)) {
lines.push(
`- ${change.type}: ${JSON.stringify(change.before ?? null)}${JSON.stringify(change.after ?? null)}`,
);
}
return lines.join("\n");
}
// ---------------------------------------------------------------------------
// URL / dialog helpers
// ---------------------------------------------------------------------------
export function getUrlHash(url: string): string {
try {
return new URL(url).hash || "";
} catch {
return "";
}
}
export async function countOpenDialogs(target: Page | Frame): Promise<number> {
try {
return await target.evaluate(() =>
document.querySelectorAll('[role="dialog"]:not([hidden]),dialog[open]')
.length,
);
} catch {
return 0;
}
}
// ---------------------------------------------------------------------------
// Click / input helpers
// ---------------------------------------------------------------------------
export async function captureClickTargetState(
target: Page | Frame,
selector: string,
): Promise<ClickTargetStateSnapshot> {
try {
return await target.evaluate((sel) => {
const el = document.querySelector(sel) as HTMLElement | null;
if (!el) {
return {
exists: false,
ariaExpanded: null,
ariaPressed: null,
ariaSelected: null,
open: null,
};
}
return {
exists: true,
ariaExpanded: el.getAttribute("aria-expanded"),
ariaPressed: el.getAttribute("aria-pressed"),
ariaSelected: el.getAttribute("aria-selected"),
open:
el instanceof HTMLDialogElement
? el.open
: el.getAttribute("open") !== null,
};
}, selector);
} catch {
return {
exists: false,
ariaExpanded: null,
ariaPressed: null,
ariaSelected: null,
open: null,
};
}
}
export async function readInputLikeValue(
target: Page | Frame,
selector?: string,
): Promise<string | null> {
try {
return await target.evaluate((sel) => {
const resolveTarget = (): Element | null => {
if (sel) return document.querySelector(sel);
const active = document.activeElement;
if (
!active ||
active === document.body ||
active === document.documentElement
)
return null;
return active;
};
const target = resolveTarget();
if (!target) return null;
if (
target instanceof HTMLInputElement ||
target instanceof HTMLTextAreaElement
) {
return target.value;
}
if (target instanceof HTMLSelectElement) {
return target.value;
}
if ((target as HTMLElement).isContentEditable) {
return (target.textContent ?? "").trim();
}
return (target as HTMLElement).getAttribute("value");
}, selector);
} catch {
return null;
}
}
export function firstErrorLine(err: unknown): string {
const message =
typeof err === "object" && err && "message" in err
? String((err as { message?: unknown }).message ?? "")
: String(err ?? "unknown error");
return message.split("\n")[0] || "unknown error";
}
// ---------------------------------------------------------------------------
// Action tracking
// ---------------------------------------------------------------------------
export function beginTrackedAction(
tool: string,
params: unknown,
beforeUrl: string,
) {
return beginAction(actionTimeline, {
tool,
paramsSummary: toActionParamsSummary(params),
beforeUrl,
});
}
export function finishTrackedAction(
actionId: number,
updates: {
status: "success" | "error";
afterUrl?: string;
verificationSummary?: string;
warningSummary?: string;
diffSummary?: string;
changed?: boolean;
error?: string;
beforeState?: CompactPageState;
afterState?: CompactPageState;
},
) {
return finishAction(actionTimeline, actionId, updates);
}
export function getSinceTimestamp(sinceActionId?: number): number {
if (!sinceActionId) return 0;
const action = findAction(actionTimeline, sinceActionId);
if (!action) return 0;
return action.startedAt ?? 0;
}
export function getConsoleEntriesSince(sinceActionId?: number): ConsoleEntry[] {
const since = getSinceTimestamp(sinceActionId);
return getConsoleLogs().filter((entry) => entry.timestamp >= since);
}
export function getNetworkEntriesSince(sinceActionId?: number): NetworkEntry[] {
const since = getSinceTimestamp(sinceActionId);
return getNetworkLogs().filter((entry) => entry.timestamp >= since);
}
// ---------------------------------------------------------------------------
// Error summary
// ---------------------------------------------------------------------------
export function getRecentErrors(pageUrl: string): string {
const parts: string[] = [];
const now = Date.now();
const since = now - 12_000;
const toOrigin = (url: string): string | null => {
try {
return new URL(url).origin;
} catch {
return null;
}
};
const pageOrigin = toOrigin(pageUrl);
const sameOrigin = (url: string): boolean =>
!pageOrigin || toOrigin(url) === pageOrigin;
const summarize = (items: string[], max: number): string[] => {
const counts = new Map<string, number>();
const order: string[] = [];
for (const item of items) {
if (!counts.has(item)) order.push(item);
counts.set(item, (counts.get(item) ?? 0) + 1);
}
return order.slice(0, max).map((item) => {
const count = counts.get(item) ?? 1;
return count > 1 ? `${item} (x${count})` : item;
});
};
const consoleLogs = getConsoleLogs();
const jsWarnings = consoleLogs
.filter(
(e) =>
(e.type === "error" || e.type === "pageerror") &&
e.timestamp >= since &&
sameOrigin(e.url),
)
.map((e) => e.text.slice(0, 120));
if (jsWarnings.length > 0) {
parts.push("JS: " + summarize(jsWarnings, 2).join(" | "));
}
const actionableStatus = new Set([401, 403, 404, 408, 409, 422, 429]);
const actionableTypes = new Set(["document", "fetch", "xhr", "script"]);
const networkLogs = getNetworkLogs();
const netWarnings = networkLogs
.filter((e) => e.timestamp >= since && sameOrigin(e.url))
.filter((e) => {
if (e.failed) return actionableTypes.has(e.resourceType);
if (e.status === null) return false;
if (e.status >= 500) return true;
return (
actionableStatus.has(e.status) &&
actionableTypes.has(e.resourceType)
);
})
.map((e) => {
if (e.failed) return `${e.method} ${e.resourceType} FAILED`;
return `${e.method} ${e.resourceType} ${e.status}`;
});
if (netWarnings.length > 0) {
parts.push("Network: " + summarize(netWarnings, 2).join(" | "));
}
const dialogLogs = getDialogLogs();
const dialogWarnings = dialogLogs
.filter((e) => e.timestamp >= since && sameOrigin(e.url))
.map((e) => `${e.type}: ${e.message.slice(0, 80)}`);
if (dialogWarnings.length > 0) {
parts.push("Dialogs: " + summarize(dialogWarnings, 1).join(" | "));
}
if (parts.length === 0) return "";
return `\n\nWarnings: ${parts.join("; ")}\nUse browser_get_console_logs/browser_get_network_logs for full diagnostics.`;
}
// ---------------------------------------------------------------------------
// Ref helpers (parsing / formatting — no browser evaluate)
// ---------------------------------------------------------------------------
export function parseRef(input: string): ParsedRefSpec {
const trimmed = input.trim().toLowerCase();
const token = trimmed.startsWith("@") ? trimmed.slice(1) : trimmed;
const versioned = token.match(/^v(\d+):(e\d+)$/);
if (versioned) {
const version = parseInt(versioned[1], 10);
const key = versioned[2];
return { key, version, display: `@v${version}:${key}` };
}
return { key: token, version: null, display: `@${token}` };
}
export function formatVersionedRef(version: number, key: string): string {
return `@v${version}:${key}`;
}
export function staleRefGuidance(refDisplay: string, reason: string): string {
return `Ref ${refDisplay} could not be resolved (${reason}). The ref is likely stale after DOM/navigation changes. Call browser_snapshot_refs again to refresh refs.`;
}
// ---------------------------------------------------------------------------
// Compact state summary formatting
// ---------------------------------------------------------------------------
export function formatCompactStateSummary(state: CompactPageState): string {
const lines: string[] = [];
lines.push(`Title: ${state.title}`);
lines.push(`URL: ${state.url}`);
lines.push(
`Elements: ${state.counts.landmarks} landmarks, ${state.counts.buttons} buttons, ${state.counts.links} links, ${state.counts.inputs} inputs`,
);
if (state.headings.length > 0) {
lines.push(
"Headings: " +
state.headings
.map((text, index) => `H${index + 1} \"${text}\"`)
.join(", "),
);
}
if (state.focus) {
lines.push(`Focused: ${state.focus}`);
}
if (state.dialog.title) {
lines.push(`Active dialog: "${state.dialog.title}"`);
}
lines.push(
"Use browser_find for targeted discovery, browser_assert for verification, or browser_get_accessibility_tree for full detail.",
);
return lines.join("\n");
}