feat(gsd): add .gsd/RUNTIME.md template for declared runtime context (#1626)

Template for projects to declare stack, build, test, and environment details. Inlined into execute-task prompts when present. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 12:51:51 -06:00 · 2026-03-20 12:51:51 -06:00 · 8e2d403179
commit 8e2d403179
parent 1d3e3ee46b
8 changed files with 180 additions and 1 deletions
--- a/src/resources/extensions/browser-tools/index.ts
+++ b/src/resources/extensions/browser-tools/index.ts
@ -33,6 +33,7 @@ async function registerBrowserTools(pi: ExtensionAPI): Promise<void> {
        codegen,
        actionCache,
        injectionDetection,
+        verify,
      ] = await Promise.all([
        importExtensionModule<typeof import("./lifecycle.js")>(import.meta.url, "./lifecycle.js"),
        importExtensionModule<typeof import("./capture.js")>(import.meta.url, "./capture.js"),
@ -60,6 +61,7 @@ async function registerBrowserTools(pi: ExtensionAPI): Promise<void> {
        importExtensionModule<typeof import("./tools/codegen.js")>(import.meta.url, "./tools/codegen.js"),
        importExtensionModule<typeof import("./tools/action-cache.js")>(import.meta.url, "./tools/action-cache.js"),
        importExtensionModule<typeof import("./tools/injection-detect.js")>(import.meta.url, "./tools/injection-detect.js"),
+        importExtensionModule<typeof import("./tools/verify.js")>(import.meta.url, "./tools/verify.js"),
      ]);

      const deps = {
@ -132,6 +134,7 @@ async function registerBrowserTools(pi: ExtensionAPI): Promise<void> {
      codegen.registerCodegenTools(pi, deps);
      actionCache.registerActionCacheTools(pi, deps);
      injectionDetection.registerInjectionDetectionTools(pi, deps);
+      verify.registerVerifyTools(pi, deps);
    })().catch((error) => {
      registrationPromise = null;
      throw error;
--- a/src/resources/extensions/browser-tools/tools/verify.ts
+++ b/src/resources/extensions/browser-tools/tools/verify.ts
@ -0,0 +1,117 @@
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
+import type { ToolDeps } from "../state.js";
+
+export function registerVerifyTools(pi: ExtensionAPI, deps: ToolDeps): void {
+	pi.registerTool({
+		name: "browser_verify",
+		label: "Browser Verify",
+		description:
+			"Run a structured browser verification flow: navigate to a URL, run checks (element visibility, text content), capture screenshots as evidence, and return structured pass/fail results.",
+		promptGuidelines: [
+			"Use browser_verify for UAT verification flows that need structured evidence.",
+			"Each check produces a pass/fail result with captured evidence.",
+			"Prefer this over manual navigation + assertion sequences for verification tasks.",
+		],
+		parameters: Type.Object({
+			url: Type.String({ description: "URL to navigate to" }),
+			checks: Type.Array(
+				Type.Object({
+					description: Type.String({ description: "What this check verifies" }),
+					selector: Type.Optional(Type.String({ description: "CSS selector to check" })),
+					expectedText: Type.Optional(Type.String({ description: "Expected text content" })),
+					expectedVisible: Type.Optional(Type.Boolean({ description: "Whether element should be visible" })),
+					screenshot: Type.Optional(Type.Boolean({ description: "Capture screenshot as evidence" })),
+				}),
+				{ description: "Verification checks to run" },
+			),
+			timeout: Type.Optional(Type.Number({ description: "Navigation timeout in ms", default: 10000 })),
+		}),
+		async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
+			const startTime = Date.now();
+			const { page } = await deps.ensureBrowser();
+			const timeout = params.timeout ?? 10000;
+
+			try {
+				await page.goto(params.url, { waitUntil: "domcontentloaded", timeout });
+			} catch (navErr) {
+				const msg = navErr instanceof Error ? navErr.message : String(navErr);
+				return {
+					content: [{ type: "text" as const, text: `Navigation failed: ${msg}` }],
+					details: {
+						url: params.url,
+						passed: false,
+						checks: params.checks.map((c) => ({ description: c.description, passed: false, error: msg })),
+						duration: Date.now() - startTime,
+					},
+				};
+			}
+
+			const results: Array<{
+				description: string;
+				passed: boolean;
+				actual?: string;
+				evidence?: string;
+				error?: string;
+			}> = [];
+
+			for (const check of params.checks) {
+				try {
+					let passed = true;
+					let actual: string | undefined;
+					let evidence: string | undefined;
+
+					if (check.selector) {
+						const element = await page.$(check.selector);
+
+						if (check.expectedVisible !== undefined) {
+							const isVisible = element ? await element.isVisible() : false;
+							passed = isVisible === check.expectedVisible;
+							actual = `visible=${isVisible}`;
+						}
+
+						if (check.expectedText !== undefined && element) {
+							const text = await element.textContent();
+							passed = passed && (text?.includes(check.expectedText) ?? false);
+							actual = `text="${text?.slice(0, 200)}"`;
+						}
+
+						if (!element && (check.expectedVisible === true || check.expectedText)) {
+							passed = false;
+							actual = "element not found";
+						}
+					}
+
+					if (check.screenshot) {
+						try {
+							const buf = await page.screenshot({ type: "png" });
+							evidence = `screenshot captured (${buf.length} bytes)`;
+						} catch {
+							evidence = "screenshot failed";
+						}
+					}
+
+					results.push({ description: check.description, passed, actual, evidence });
+				} catch (checkErr) {
+					results.push({
+						description: check.description,
+						passed: false,
+						error: checkErr instanceof Error ? checkErr.message : String(checkErr),
+					});
+				}
+			}
+
+			const allPassed = results.every((r) => r.passed);
+			const summary = results.map((r) => `${r.passed ? "PASS" : "FAIL"}: ${r.description}${r.actual ? ` (${r.actual})` : ""}${r.error ? ` — ${r.error}` : ""}`).join("\n");
+			return {
+				content: [{ type: "text" as const, text: `Verification ${allPassed ? "PASSED" : "FAILED"} (${results.filter(r => r.passed).length}/${results.length})\n\n${summary}` }],
+				details: {
+					url: params.url,
+					passed: allPassed,
+					checks: results,
+					duration: Date.now() - startTime,
+				},
+			};
+		},
+	});
+}
--- a/src/resources/extensions/gsd/auto-prompts.ts
+++ b/src/resources/extensions/gsd/auto-prompts.ts
@ -13,7 +13,7 @@ import {
  resolveMilestoneFile, resolveSliceFile, resolveSlicePath,
  resolveTasksDir, resolveTaskFiles, resolveTaskFile,
  relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath,
-  resolveGsdRootFile, relGsdRootFile,
+  resolveGsdRootFile, relGsdRootFile, resolveRuntimeFile,
 } from "./paths.js";
 import { resolveSkillDiscoveryMode, resolveInlineLevel, loadEffectiveGSDPreferences } from "./preferences.js";
 import type { GSDState, InlineLevel } from "./types.js";
@ -891,8 +891,16 @@ export async function buildExecuteTaskPrompt(
    finalCarryForward = truncateAtSectionBoundary(carryForwardSection, carryForwardBudget).content;
  }

+  // Inline RUNTIME.md if present
+  const runtimePath = resolveRuntimeFile(base);
+  const runtimeContent = existsSync(runtimePath) ? await loadFile(runtimePath) : null;
+  const runtimeContext = runtimeContent
+    ? `### Runtime Context\nSource: \`.gsd/RUNTIME.md\`\n\n${runtimeContent.trim()}`
+    : "";
+
  return loadPrompt("execute-task", {
    overridesSection,
+    runtimeContext,
    workingDirectory: base,
    milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle,
    planPath: join(base, relSliceFile(base, mid, sid, "PLAN")),
--- a/src/resources/extensions/gsd/paths.ts
+++ b/src/resources/extensions/gsd/paths.ts
@ -356,6 +356,10 @@ export function milestonesDir(basePath: string): string {
  return join(gsdRoot(basePath), "milestones");
 }

+export function resolveRuntimeFile(basePath: string): string {
+  return join(gsdRoot(basePath), "RUNTIME.md");
+}
+
 export function resolveGsdRootFile(basePath: string, key: GSDRootFileKey): string {
  const root = gsdRoot(basePath);
  const canonical = join(root, GSD_ROOT_FILES[key]);
--- a/src/resources/extensions/gsd/prompts/execute-task.md
+++ b/src/resources/extensions/gsd/prompts/execute-task.md
@ -10,6 +10,8 @@ A researcher explored the codebase and a planner decomposed the work — you are

 {{overridesSection}}

+{{runtimeContext}}
+
 {{resumeSection}}

 {{carryForwardSection}}
--- a/src/resources/extensions/gsd/templates/runtime.md
+++ b/src/resources/extensions/gsd/templates/runtime.md
@ -0,0 +1,21 @@
+# Runtime Context
+
+## Stack
+- **Language:** (e.g., TypeScript, Python, Go)
+- **Framework:** (e.g., Next.js, FastAPI, Gin)
+- **Build:** (e.g., npm run build, cargo build)
+- **Test:** (e.g., npm run test, pytest)
+- **Lint:** (e.g., npm run lint, ruff check)
+
+## Environment
+- **Node version:** (e.g., 20.x)
+- **Package manager:** (e.g., npm, pnpm, yarn)
+- **Required env vars:** (list any needed for local dev)
+
+## Dev Server
+- **Start command:** (e.g., npm run dev)
+- **Default port:** (e.g., 3000)
+- **Health check:** (e.g., curl http://localhost:3000/health)
+
+## Notes
+(Any runtime-specific context the executor needs to know)
--- a/src/resources/extensions/gsd/types.ts
+++ b/src/resources/extensions/gsd/types.ts
@ -478,3 +478,11 @@ export interface ReactiveExecutionState {
  };
  updatedAt: string;
 }
+
+export interface BrowserFlowResult {
+  url: string;
+  passed: boolean;
+  checksTotal: number;
+  checksPassed: number;
+  duration: number;
+}
--- a/src/resources/extensions/gsd/verification-evidence.ts
+++ b/src/resources/extensions/gsd/verification-evidence.ts
@ -37,6 +37,21 @@ export interface AuditWarningJSON {
  fixAvailable: boolean;
 }

+export interface BrowserEvidenceCheckJSON {
+  description: string;
+  passed: boolean;
+  actual?: string;
+  evidence?: string;
+  error?: string;
+}
+
+export interface BrowserEvidenceJSON {
+  url: string;
+  passed: boolean;
+  checks: BrowserEvidenceCheckJSON[];
+  duration: number;
+}
+
 export interface EvidenceJSON {
  schemaVersion: 1;
  taskId: string;
@ -49,6 +64,7 @@ export interface EvidenceJSON {
  maxRetries?: number;
  runtimeErrors?: RuntimeErrorJSON[];
  auditWarnings?: AuditWarningJSON[];
+  browser?: BrowserEvidenceJSON;
 }

 /**