From 8e2d403179e94e1c501321f46d32c2e91e910f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=82CHES?= Date: Fri, 20 Mar 2026 12:51:51 -0600 Subject: [PATCH] feat(gsd): add .gsd/RUNTIME.md template for declared runtime context (#1626) Template for projects to declare stack, build, test, and environment details. Inlined into execute-task prompts when present. Co-authored-by: Claude Opus 4.6 (1M context) --- .../extensions/browser-tools/index.ts | 3 + .../extensions/browser-tools/tools/verify.ts | 117 ++++++++++++++++++ src/resources/extensions/gsd/auto-prompts.ts | 10 +- src/resources/extensions/gsd/paths.ts | 4 + .../extensions/gsd/prompts/execute-task.md | 2 + .../extensions/gsd/templates/runtime.md | 21 ++++ src/resources/extensions/gsd/types.ts | 8 ++ .../extensions/gsd/verification-evidence.ts | 16 +++ 8 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 src/resources/extensions/browser-tools/tools/verify.ts create mode 100644 src/resources/extensions/gsd/templates/runtime.md diff --git a/src/resources/extensions/browser-tools/index.ts b/src/resources/extensions/browser-tools/index.ts index 236b7b4d4..35fe7f4c2 100644 --- a/src/resources/extensions/browser-tools/index.ts +++ b/src/resources/extensions/browser-tools/index.ts @@ -33,6 +33,7 @@ async function registerBrowserTools(pi: ExtensionAPI): Promise { codegen, actionCache, injectionDetection, + verify, ] = await Promise.all([ importExtensionModule(import.meta.url, "./lifecycle.js"), importExtensionModule(import.meta.url, "./capture.js"), @@ -60,6 +61,7 @@ async function registerBrowserTools(pi: ExtensionAPI): Promise { importExtensionModule(import.meta.url, "./tools/codegen.js"), importExtensionModule(import.meta.url, "./tools/action-cache.js"), importExtensionModule(import.meta.url, "./tools/injection-detect.js"), + importExtensionModule(import.meta.url, "./tools/verify.js"), ]); const deps = { @@ -132,6 +134,7 @@ async function registerBrowserTools(pi: ExtensionAPI): Promise { codegen.registerCodegenTools(pi, deps); actionCache.registerActionCacheTools(pi, deps); injectionDetection.registerInjectionDetectionTools(pi, deps); + verify.registerVerifyTools(pi, deps); })().catch((error) => { registrationPromise = null; throw error; diff --git a/src/resources/extensions/browser-tools/tools/verify.ts b/src/resources/extensions/browser-tools/tools/verify.ts new file mode 100644 index 000000000..6059e607b --- /dev/null +++ b/src/resources/extensions/browser-tools/tools/verify.ts @@ -0,0 +1,117 @@ +import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import type { ToolDeps } from "../state.js"; + +export function registerVerifyTools(pi: ExtensionAPI, deps: ToolDeps): void { + pi.registerTool({ + name: "browser_verify", + label: "Browser Verify", + description: + "Run a structured browser verification flow: navigate to a URL, run checks (element visibility, text content), capture screenshots as evidence, and return structured pass/fail results.", + promptGuidelines: [ + "Use browser_verify for UAT verification flows that need structured evidence.", + "Each check produces a pass/fail result with captured evidence.", + "Prefer this over manual navigation + assertion sequences for verification tasks.", + ], + parameters: Type.Object({ + url: Type.String({ description: "URL to navigate to" }), + checks: Type.Array( + Type.Object({ + description: Type.String({ description: "What this check verifies" }), + selector: Type.Optional(Type.String({ description: "CSS selector to check" })), + expectedText: Type.Optional(Type.String({ description: "Expected text content" })), + expectedVisible: Type.Optional(Type.Boolean({ description: "Whether element should be visible" })), + screenshot: Type.Optional(Type.Boolean({ description: "Capture screenshot as evidence" })), + }), + { description: "Verification checks to run" }, + ), + timeout: Type.Optional(Type.Number({ description: "Navigation timeout in ms", default: 10000 })), + }), + async execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const startTime = Date.now(); + const { page } = await deps.ensureBrowser(); + const timeout = params.timeout ?? 10000; + + try { + await page.goto(params.url, { waitUntil: "domcontentloaded", timeout }); + } catch (navErr) { + const msg = navErr instanceof Error ? navErr.message : String(navErr); + return { + content: [{ type: "text" as const, text: `Navigation failed: ${msg}` }], + details: { + url: params.url, + passed: false, + checks: params.checks.map((c) => ({ description: c.description, passed: false, error: msg })), + duration: Date.now() - startTime, + }, + }; + } + + const results: Array<{ + description: string; + passed: boolean; + actual?: string; + evidence?: string; + error?: string; + }> = []; + + for (const check of params.checks) { + try { + let passed = true; + let actual: string | undefined; + let evidence: string | undefined; + + if (check.selector) { + const element = await page.$(check.selector); + + if (check.expectedVisible !== undefined) { + const isVisible = element ? await element.isVisible() : false; + passed = isVisible === check.expectedVisible; + actual = `visible=${isVisible}`; + } + + if (check.expectedText !== undefined && element) { + const text = await element.textContent(); + passed = passed && (text?.includes(check.expectedText) ?? false); + actual = `text="${text?.slice(0, 200)}"`; + } + + if (!element && (check.expectedVisible === true || check.expectedText)) { + passed = false; + actual = "element not found"; + } + } + + if (check.screenshot) { + try { + const buf = await page.screenshot({ type: "png" }); + evidence = `screenshot captured (${buf.length} bytes)`; + } catch { + evidence = "screenshot failed"; + } + } + + results.push({ description: check.description, passed, actual, evidence }); + } catch (checkErr) { + results.push({ + description: check.description, + passed: false, + error: checkErr instanceof Error ? checkErr.message : String(checkErr), + }); + } + } + + const allPassed = results.every((r) => r.passed); + const summary = results.map((r) => `${r.passed ? "PASS" : "FAIL"}: ${r.description}${r.actual ? ` (${r.actual})` : ""}${r.error ? ` — ${r.error}` : ""}`).join("\n"); + return { + content: [{ type: "text" as const, text: `Verification ${allPassed ? "PASSED" : "FAILED"} (${results.filter(r => r.passed).length}/${results.length})\n\n${summary}` }], + details: { + url: params.url, + passed: allPassed, + checks: results, + duration: Date.now() - startTime, + }, + }; + }, + }); +} diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index f1cb91540..c1008579f 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -13,7 +13,7 @@ import { resolveMilestoneFile, resolveSliceFile, resolveSlicePath, resolveTasksDir, resolveTaskFiles, resolveTaskFile, relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath, - resolveGsdRootFile, relGsdRootFile, + resolveGsdRootFile, relGsdRootFile, resolveRuntimeFile, } from "./paths.js"; import { resolveSkillDiscoveryMode, resolveInlineLevel, loadEffectiveGSDPreferences } from "./preferences.js"; import type { GSDState, InlineLevel } from "./types.js"; @@ -891,8 +891,16 @@ export async function buildExecuteTaskPrompt( finalCarryForward = truncateAtSectionBoundary(carryForwardSection, carryForwardBudget).content; } + // Inline RUNTIME.md if present + const runtimePath = resolveRuntimeFile(base); + const runtimeContent = existsSync(runtimePath) ? await loadFile(runtimePath) : null; + const runtimeContext = runtimeContent + ? `### Runtime Context\nSource: \`.gsd/RUNTIME.md\`\n\n${runtimeContent.trim()}` + : ""; + return loadPrompt("execute-task", { overridesSection, + runtimeContext, workingDirectory: base, milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle, planPath: join(base, relSliceFile(base, mid, sid, "PLAN")), diff --git a/src/resources/extensions/gsd/paths.ts b/src/resources/extensions/gsd/paths.ts index 8d77bf21e..ccd3c59f6 100644 --- a/src/resources/extensions/gsd/paths.ts +++ b/src/resources/extensions/gsd/paths.ts @@ -356,6 +356,10 @@ export function milestonesDir(basePath: string): string { return join(gsdRoot(basePath), "milestones"); } +export function resolveRuntimeFile(basePath: string): string { + return join(gsdRoot(basePath), "RUNTIME.md"); +} + export function resolveGsdRootFile(basePath: string, key: GSDRootFileKey): string { const root = gsdRoot(basePath); const canonical = join(root, GSD_ROOT_FILES[key]); diff --git a/src/resources/extensions/gsd/prompts/execute-task.md b/src/resources/extensions/gsd/prompts/execute-task.md index 948705ba5..5f3f9e101 100644 --- a/src/resources/extensions/gsd/prompts/execute-task.md +++ b/src/resources/extensions/gsd/prompts/execute-task.md @@ -10,6 +10,8 @@ A researcher explored the codebase and a planner decomposed the work — you are {{overridesSection}} +{{runtimeContext}} + {{resumeSection}} {{carryForwardSection}} diff --git a/src/resources/extensions/gsd/templates/runtime.md b/src/resources/extensions/gsd/templates/runtime.md new file mode 100644 index 000000000..20395bfca --- /dev/null +++ b/src/resources/extensions/gsd/templates/runtime.md @@ -0,0 +1,21 @@ +# Runtime Context + +## Stack +- **Language:** (e.g., TypeScript, Python, Go) +- **Framework:** (e.g., Next.js, FastAPI, Gin) +- **Build:** (e.g., npm run build, cargo build) +- **Test:** (e.g., npm run test, pytest) +- **Lint:** (e.g., npm run lint, ruff check) + +## Environment +- **Node version:** (e.g., 20.x) +- **Package manager:** (e.g., npm, pnpm, yarn) +- **Required env vars:** (list any needed for local dev) + +## Dev Server +- **Start command:** (e.g., npm run dev) +- **Default port:** (e.g., 3000) +- **Health check:** (e.g., curl http://localhost:3000/health) + +## Notes +(Any runtime-specific context the executor needs to know) diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts index 60f773388..d2ac58847 100644 --- a/src/resources/extensions/gsd/types.ts +++ b/src/resources/extensions/gsd/types.ts @@ -478,3 +478,11 @@ export interface ReactiveExecutionState { }; updatedAt: string; } + +export interface BrowserFlowResult { + url: string; + passed: boolean; + checksTotal: number; + checksPassed: number; + duration: number; +} diff --git a/src/resources/extensions/gsd/verification-evidence.ts b/src/resources/extensions/gsd/verification-evidence.ts index 0918b40f1..e6cf431ff 100644 --- a/src/resources/extensions/gsd/verification-evidence.ts +++ b/src/resources/extensions/gsd/verification-evidence.ts @@ -37,6 +37,21 @@ export interface AuditWarningJSON { fixAvailable: boolean; } +export interface BrowserEvidenceCheckJSON { + description: string; + passed: boolean; + actual?: string; + evidence?: string; + error?: string; +} + +export interface BrowserEvidenceJSON { + url: string; + passed: boolean; + checks: BrowserEvidenceCheckJSON[]; + duration: number; +} + export interface EvidenceJSON { schemaVersion: 1; taskId: string; @@ -49,6 +64,7 @@ export interface EvidenceJSON { maxRetries?: number; runtimeErrors?: RuntimeErrorJSON[]; auditWarnings?: AuditWarningJSON[]; + browser?: BrowserEvidenceJSON; } /**