diff --git a/.github/workflows/build-native.yml b/.github/workflows/build-native.yml index 47f9ef8a2..fa34ec94d 100644 --- a/.github/workflows/build-native.yml +++ b/.github/workflows/build-native.yml @@ -253,3 +253,20 @@ jobs: done echo "::error::Smoke test failed — gsd-pi@${VERSION} not installable" exit 1 + + - name: Verify dist-tag after publish + if: steps.version-check.outputs.is_prerelease == 'false' + run: | + VERSION=$(node -p "require('./package.json').version") + echo "Verifying npm dist-tag 'latest' points to ${VERSION}..." + for attempt in $(seq 1 10); do + LATEST=$(npm view gsd-pi dist-tags.latest 2>/dev/null || echo "") + if [ "${LATEST}" = "${VERSION}" ]; then + echo " ✓ npm dist-tags.latest = ${VERSION}" + exit 0 + fi + echo " Attempt ${attempt}/10: latest=${LATEST}, expected=${VERSION}, retrying in 15s..." + sleep 15 + done + echo "::error::dist-tags.latest is '${LATEST}' but expected '${VERSION}' — run: npm dist-tag add gsd-pi@${VERSION} latest" + exit 1 diff --git a/.gitignore b/.gitignore index 5e04ce633..12d15387f 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,7 @@ TODOS.md .gsd/DISCUSSION-MANIFEST.json .gsd/milestones/**/*-CONTINUE.md .gsd/milestones/**/continue.md + +# ── GSD baseline (auto-generated) ── +.gsd/forensics/ +.gsd/parallel/ diff --git a/docs/pr-876/01-index.png b/docs/pr-876/01-index.png new file mode 100644 index 000000000..dc2957b92 Binary files /dev/null and b/docs/pr-876/01-index.png differ diff --git a/docs/pr-876/02-summary.png b/docs/pr-876/02-summary.png new file mode 100644 index 000000000..dea9d8cb1 Binary files /dev/null and b/docs/pr-876/02-summary.png differ diff --git a/docs/pr-876/03-progress.png b/docs/pr-876/03-progress.png new file mode 100644 index 000000000..9dec3856b Binary files /dev/null and b/docs/pr-876/03-progress.png differ diff --git a/docs/pr-876/04-depgraph.png b/docs/pr-876/04-depgraph.png new file mode 100644 index 000000000..b1349dead Binary files /dev/null and b/docs/pr-876/04-depgraph.png differ diff --git a/docs/pr-876/05-metrics.png b/docs/pr-876/05-metrics.png new file mode 100644 index 000000000..bb8083030 Binary files /dev/null and b/docs/pr-876/05-metrics.png differ diff --git a/docs/pr-876/06-changelog.png b/docs/pr-876/06-changelog.png new file mode 100644 index 000000000..c79e00f2d Binary files /dev/null and b/docs/pr-876/06-changelog.png differ diff --git a/docs/pr-876/06-timeline.png b/docs/pr-876/06-timeline.png new file mode 100644 index 000000000..62d081703 Binary files /dev/null and b/docs/pr-876/06-timeline.png differ diff --git a/docs/pr-876/07-changelog.png b/docs/pr-876/07-changelog.png new file mode 100644 index 000000000..f279f6d95 Binary files /dev/null and b/docs/pr-876/07-changelog.png differ diff --git a/docs/pr-876/07-knowledge.png b/docs/pr-876/07-knowledge.png new file mode 100644 index 000000000..2e7e32952 Binary files /dev/null and b/docs/pr-876/07-knowledge.png differ diff --git a/docs/pr-876/08-knowledge.png b/docs/pr-876/08-knowledge.png new file mode 100644 index 000000000..14a4dd33b Binary files /dev/null and b/docs/pr-876/08-knowledge.png differ diff --git a/docs/pr-876/09-captures.png b/docs/pr-876/09-captures.png new file mode 100644 index 000000000..f3c29a40e Binary files /dev/null and b/docs/pr-876/09-captures.png differ diff --git a/docs/pr-876/10-artifacts.png b/docs/pr-876/10-artifacts.png new file mode 100644 index 000000000..7aab45ec9 Binary files /dev/null and b/docs/pr-876/10-artifacts.png differ diff --git a/packages/pi-ai/src/providers/openai-completions.ts b/packages/pi-ai/src/providers/openai-completions.ts index 50ae643ca..7372d6880 100644 --- a/packages/pi-ai/src/providers/openai-completions.ts +++ b/packages/pi-ai/src/providers/openai-completions.ts @@ -747,10 +747,13 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto return "toolUse"; case "content_filter": return "error"; - default: { - const _exhaustive: never = reason; - throw new Error(`Unhandled stop reason: ${_exhaustive}`); - } + default: + // Third-party and community models (e.g. Qwen GGUF quants) may emit + // non-standard finish_reason values like "eos_token", "eos", or + // "end_of_turn". The OpenAI spec defines finish_reason as a string, + // so we treat unrecognized values as a normal stop rather than + // throwing — which would abort in-flight tool calls (#863). + return "stop"; } } diff --git a/scripts/link-workspace-packages.cjs b/scripts/link-workspace-packages.cjs index 43ee66a83..f1faf9875 100644 --- a/scripts/link-workspace-packages.cjs +++ b/scripts/link-workspace-packages.cjs @@ -10,8 +10,12 @@ * to resolve. This script bridges the gap. * * Runs as part of postinstall (before any ESM code that imports @gsd/*). + * + * On Windows without Developer Mode or administrator rights, creating symlinks + * (even NTFS junctions) can fail with EPERM. In that case we fall back to + * cpSync (directory copy) which works universally. */ -const { existsSync, mkdirSync, symlinkSync, lstatSync, readlinkSync, unlinkSync, readdirSync } = require('fs') +const { existsSync, mkdirSync, symlinkSync, cpSync, lstatSync, readlinkSync, unlinkSync } = require('fs') const { resolve, join } = require('path') const root = resolve(__dirname, '..') @@ -33,6 +37,7 @@ if (!existsSync(nodeModulesGsd)) { } let linked = 0 +let copied = 0 for (const [dir, name] of Object.entries(packageMap)) { const source = join(packagesDir, dir) const target = join(nodeModulesGsd, name) @@ -50,21 +55,32 @@ for (const [dir, name] of Object.entries(packageMap)) { } unlinkSync(target) // Wrong target, relink } else { - continue // Real directory (e.g., from bundleDependencies), don't touch + continue // Real directory (e.g., copied or from bundleDependencies), don't touch } } catch { continue } } + let symlinkOk = false try { symlinkSync(source, target, 'junction') // junction works on Windows too + symlinkOk = true linked++ } catch { - // Non-fatal — may fail in read-only environments + // Symlink failed — common on Windows without Developer Mode or admin rights. + // Fall back to a directory copy so the package is still resolvable. + } + + if (!symlinkOk) { + try { + cpSync(source, target, { recursive: true }) + copied++ + } catch { + // Non-fatal — loader.ts will emit a clearer error if resolution still fails + } } } -if (linked > 0) { - process.stderr.write(` Linked ${linked} workspace packages\n`) -} +if (linked > 0) process.stderr.write(` Linked ${linked} workspace package${linked !== 1 ? 's' : ''}\n`) +if (copied > 0) process.stderr.write(` Copied ${copied} workspace package${copied !== 1 ? 's' : ''} (symlinks unavailable)\n`) diff --git a/scripts/validate-pack.js b/scripts/validate-pack.js index 71a2e6754..d89fb9f34 100644 --- a/scripts/validate-pack.js +++ b/scripts/validate-pack.js @@ -103,6 +103,54 @@ try { process.exit(1); } + // --- Verify @gsd/* packages resolved correctly post-install --- + // This catches the Windows-style failure where symlinkSync fails silently and + // node_modules/@gsd/ is never populated, causing ERR_MODULE_NOT_FOUND at runtime. + console.log('==> Verifying @gsd/* workspace package resolution...'); + const installedRoot = join(installDir, 'node_modules', 'gsd-pi'); + const criticalPkgs = ['pi-coding-agent']; + let resolutionFailed = false; + for (const pkg of criticalPkgs) { + const pkgPath = join(installedRoot, 'node_modules', '@gsd', pkg); + const fallbackPath = join(installedRoot, 'packages', pkg); + if (!existsSync(pkgPath)) { + if (existsSync(fallbackPath)) { + console.log(` MISSING symlink/copy: node_modules/@gsd/${pkg} (packages/${pkg} exists — postinstall may not have run)`); + } else { + console.log(` MISSING: node_modules/@gsd/${pkg} (packages/${pkg} also absent — package is broken)`); + } + resolutionFailed = true; + } + } + if (resolutionFailed) { + console.log('ERROR: @gsd/* packages are not resolvable after install.'); + console.log(' This will cause ERR_MODULE_NOT_FOUND on first run (especially on Windows).'); + process.exit(1); + } + console.log(' @gsd/* packages are resolvable.'); + + // --- Run the binary to confirm end-to-end resolution --- + console.log('==> Running installed binary (gsd -v)...'); + const loaderPath = join(installedRoot, 'dist', 'loader.js'); + try { + const versionOutput = execSync(`node "${loaderPath}" -v`, { + cwd: installDir, + encoding: 'utf8', + stdio: ['pipe', 'pipe', 'pipe'], + timeout: 15000, + }).trim(); + console.log(` gsd -v => ${versionOutput}`); + if (!versionOutput.match(/^\d+\.\d+\.\d+/)) { + console.log('ERROR: gsd -v returned unexpected output (expected a version string).'); + process.exit(1); + } + } catch (err) { + console.log('ERROR: Running gsd -v failed after install.'); + if (err.stdout) console.log(err.stdout); + if (err.stderr) console.log(err.stderr); + process.exit(1); + } + console.log(''); console.log('Package is installable. Safe to publish.'); process.exit(0); diff --git a/src/headless.ts b/src/headless.ts index 78e6934bf..006009edf 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -18,6 +18,7 @@ import { ChildProcess } from 'node:child_process' // RpcClient is not in @gsd/pi-coding-agent's public exports — import from dist directly. // This relative path resolves correctly from both src/ (via tsx) and dist/ (compiled). import { RpcClient } from '../packages/pi-coding-agent/dist/modes/rpc/rpc-client.js' +import { attachJsonlLineReader, serializeJsonLine } from '../packages/pi-coding-agent/dist/modes/rpc/jsonl.js' // --------------------------------------------------------------------------- // Types @@ -33,6 +34,9 @@ export interface HeadlessOptions { contextText?: string // inline text auto?: boolean // chain into auto-mode after milestone creation verbose?: boolean // show tool calls in output + maxRestarts?: number // auto-restart on crash (default 3, 0 to disable) + supervised?: boolean // supervised mode: forward interactive requests to orchestrator + responseTimeout?: number // timeout for orchestrator response (default 30000ms) } interface ExtensionUIRequest { @@ -92,6 +96,21 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { options.auto = true } else if (arg === '--verbose') { options.verbose = true + } else if (arg === '--max-restarts' && i + 1 < args.length) { + options.maxRestarts = parseInt(args[++i], 10) + if (Number.isNaN(options.maxRestarts) || options.maxRestarts < 0) { + process.stderr.write('[headless] Error: --max-restarts must be a non-negative integer\n') + process.exit(1) + } + } else if (arg === '--supervised') { + options.supervised = true + options.json = true // supervised implies json + } else if (arg === '--response-timeout' && i + 1 < args.length) { + options.responseTimeout = parseInt(args[++i], 10) + if (Number.isNaN(options.responseTimeout) || options.responseTimeout <= 0) { + process.stderr.write('[headless] Error: --response-timeout must be a positive integer (milliseconds)\n') + process.exit(1) + } } } else if (!positionalStarted) { positionalStarted = true @@ -104,14 +123,6 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { return options } -// --------------------------------------------------------------------------- -// JSONL Helper -// --------------------------------------------------------------------------- - -function serializeJsonLine(obj: Record): string { - return JSON.stringify(obj) + '\n' -} - // --------------------------------------------------------------------------- // Extension UI Auto-Responder // --------------------------------------------------------------------------- @@ -230,6 +241,8 @@ function isMilestoneReadyNotification(event: Record): boolean { // Quick Command Detection // --------------------------------------------------------------------------- +const FIRE_AND_FORGET_METHODS = new Set(['notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text']) + const QUICK_COMMANDS = new Set([ 'status', 'queue', 'history', 'hooks', 'export', 'stop', 'pause', 'capture', 'skip', 'undo', 'knowledge', 'config', 'prefs', @@ -241,6 +254,49 @@ function isQuickCommand(command: string): boolean { return QUICK_COMMANDS.has(command) } +// --------------------------------------------------------------------------- +// Supervised Stdin Reader +// --------------------------------------------------------------------------- + +function startSupervisedStdinReader( + stdinWriter: (data: string) => void, + client: RpcClient, + onResponse: (id: string) => void, +): () => void { + return attachJsonlLineReader(process.stdin as import('node:stream').Readable, (line) => { + let msg: Record + try { + msg = JSON.parse(line) + } catch { + process.stderr.write(`[headless] Warning: invalid JSON from orchestrator stdin, skipping\n`) + return + } + + const type = String(msg.type ?? '') + + switch (type) { + case 'extension_ui_response': + stdinWriter(line + '\n') + if (typeof msg.id === 'string') { + onResponse(msg.id) + } + break + case 'prompt': + client.prompt(String(msg.message ?? '')) + break + case 'steer': + client.steer(String(msg.message ?? '')) + break + case 'follow_up': + client.followUp(String(msg.message ?? '')) + break + default: + process.stderr.write(`[headless] Warning: unknown message type "${type}" from orchestrator stdin\n`) + break + } + }) +} + // --------------------------------------------------------------------------- // Main Orchestrator // --------------------------------------------------------------------------- @@ -279,9 +335,46 @@ function bootstrapGsdProject(basePath: string): void { } export async function runHeadless(options: HeadlessOptions): Promise { + const maxRestarts = options.maxRestarts ?? 3 + let restartCount = 0 + + while (true) { + const result = await runHeadlessOnce(options, restartCount) + + // Success or blocked — exit normally + if (result.exitCode === 0 || result.exitCode === 2) { + process.exit(result.exitCode) + } + + // Crash/error — check if we should restart + if (restartCount >= maxRestarts) { + process.stderr.write(`[headless] Max restarts (${maxRestarts}) reached. Exiting.\n`) + process.exit(result.exitCode) + } + + // Don't restart if SIGINT/SIGTERM was received + if (result.interrupted) { + process.exit(result.exitCode) + } + + restartCount++ + const backoffMs = Math.min(5000 * restartCount, 30_000) + process.stderr.write(`[headless] Restarting in ${(backoffMs / 1000).toFixed(0)}s (attempt ${restartCount}/${maxRestarts})...\n`) + await new Promise(resolve => setTimeout(resolve, backoffMs)) + } +} + +async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): Promise<{ exitCode: number; interrupted: boolean }> { + let interrupted = false const startTime = Date.now() const isNewMilestone = options.command === 'new-milestone' + // Supervised mode cannot share stdin with --context - + if (options.supervised && options.context === '-') { + process.stderr.write('[headless] Error: --supervised cannot be used with --context - (both require stdin)\n') + process.exit(1) + } + // For new-milestone, load context and bootstrap .gsd/ before spawning RPC child if (isNewMilestone) { if (!options.context && !options.contextText) { @@ -370,6 +463,18 @@ export async function runHeadless(options: HeadlessOptions): Promise { // Stdin writer for sending extension_ui_response to child let stdinWriter: ((data: string) => void) | null = null + // Supervised mode state + const pendingResponseTimers = new Map>() + let supervisedFallback = false + let stopSupervisedReader: (() => void) | null = null + const onStdinClose = () => { + supervisedFallback = true + process.stderr.write('[headless] Warning: orchestrator stdin closed, falling back to auto-response\n') + } + if (options.supervised) { + process.stdin.on('close', onStdinClose) + } + // Completion promise let resolveCompletion: () => void const completionPromise = new Promise((resolve) => { @@ -390,6 +495,9 @@ export async function runHeadless(options: HeadlessOptions): Promise { } } + // Precompute supervised response timeout + const responseTimeout = options.responseTimeout ?? 30_000 + // Overall timeout const timeoutTimer = setTimeout(() => { process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`) @@ -428,7 +536,22 @@ export async function runHeadless(options: HeadlessOptions): Promise { completed = true } - handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter) + const method = String(eventObj.method ?? '') + const shouldSupervise = options.supervised && !supervisedFallback + && !FIRE_AND_FORGET_METHODS.has(method) + + if (shouldSupervise) { + // Interactive request in supervised mode — let orchestrator respond + const eventId = String(eventObj.id ?? '') + const timer = setTimeout(() => { + pendingResponseTimers.delete(eventId) + handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter!) + process.stdout.write(JSON.stringify({ type: 'supervised_timeout', id: eventId, method }) + '\n') + }, responseTimeout) + pendingResponseTimers.set(eventId, timer) + } else { + handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter) + } // If we detected a terminal notification, resolve after responding if (completed) { @@ -452,6 +575,7 @@ export async function runHeadless(options: HeadlessOptions): Promise { // Signal handling const signalHandler = () => { process.stderr.write('\n[headless] Interrupted, stopping child process...\n') + interrupted = true exitCode = 1 client.stop().finally(() => { clearTimeout(timeoutTimer) @@ -484,6 +608,19 @@ export async function runHeadless(options: HeadlessOptions): Promise { internalProcess.stdin!.write(data) } + // Start supervised stdin reader for orchestrator commands + if (options.supervised) { + stopSupervisedReader = startSupervisedStdinReader(stdinWriter, client, (id) => { + const timer = pendingResponseTimers.get(id) + if (timer) { + clearTimeout(timer) + pendingResponseTimers.delete(id) + } + }) + // Ensure stdin is in flowing mode for JSONL reading + process.stdin.resume() + } + // Detect child process crash internalProcess.on('exit', (code) => { if (!completed) { @@ -541,6 +678,10 @@ export async function runHeadless(options: HeadlessOptions): Promise { // Cleanup clearTimeout(timeoutTimer) if (idleTimer) clearTimeout(idleTimer) + pendingResponseTimers.forEach((timer) => clearTimeout(timer)) + pendingResponseTimers.clear() + stopSupervisedReader?.() + process.stdin.removeListener('close', onStdinClose) process.removeListener('SIGINT', signalHandler) process.removeListener('SIGTERM', signalHandler) @@ -553,6 +694,9 @@ export async function runHeadless(options: HeadlessOptions): Promise { process.stderr.write(`[headless] Status: ${status}\n`) process.stderr.write(`[headless] Duration: ${duration}s\n`) process.stderr.write(`[headless] Events: ${totalEvents} total, ${toolCallCount} tool calls\n`) + if (restartCount > 0) { + process.stderr.write(`[headless] Restarts: ${restartCount}\n`) + } // On failure, print last 5 events for diagnostics if (exitCode !== 0) { @@ -565,5 +709,5 @@ export async function runHeadless(options: HeadlessOptions): Promise { } } - process.exit(exitCode) + return { exitCode, interrupted } } diff --git a/src/help-text.ts b/src/help-text.ts index 8c866b22a..864d85f3d 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -41,6 +41,8 @@ const SUBCOMMAND_HELP: Record = { ' --timeout N Overall timeout in ms (default: 300000)', ' --json JSONL event stream to stdout', ' --model ID Override model', + ' --supervised Forward interactive UI requests to orchestrator via stdout/stdin', + ' --response-timeout N Timeout (ms) for orchestrator response (default: 30000)', '', 'Commands:', ' auto Run all queued units continuously (default)', @@ -62,6 +64,7 @@ const SUBCOMMAND_HELP: Record = { ' gsd headless new-milestone --context spec.md Create milestone from file', ' cat spec.md | gsd headless new-milestone --context - From stdin', ' gsd headless new-milestone --context spec.md --auto Create + auto-execute', + ' gsd headless --supervised auto Supervised orchestrator mode', '', 'Exit codes: 0 = complete, 1 = error/timeout, 2 = blocked', ].join('\n'), diff --git a/src/loader.ts b/src/loader.ts index 9d6b4ca50..42149656c 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -3,7 +3,7 @@ // Copyright (c) 2026 Jeremy McSpadden import { fileURLToPath } from 'url' import { dirname, resolve, join, delimiter } from 'path' -import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync } from 'fs' +import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync, cpSync } from 'fs' // Fast-path: handle --version/-v and --help/-h before importing any heavy // dependencies. This avoids loading the entire pi-coding-agent barrel import @@ -151,8 +151,12 @@ if (process.env.HTTP_PROXY || process.env.HTTPS_PROXY || process.env.http_proxy setGlobalDispatcher(new EnvHttpProxyAgent()) } -// Ensure workspace packages are linked before importing cli.js (which imports @gsd/*). +// Ensure workspace packages are linked (or copied on Windows) before importing +// cli.js (which imports @gsd/*). // npm postinstall handles this normally, but npx --ignore-scripts skips postinstall. +// On Windows without Developer Mode or admin rights, symlinkSync will throw even for +// 'junction' type — so we fall back to cpSync (a full directory copy) which works +// everywhere without elevated permissions. const gsdScopeDir = join(gsdNodeModules, '@gsd') const packagesDir = join(gsdRoot, 'packages') const wsPackages = ['native', 'pi-agent-core', 'pi-ai', 'pi-coding-agent', 'pi-tui'] @@ -161,11 +165,37 @@ try { for (const pkg of wsPackages) { const target = join(gsdScopeDir, pkg) const source = join(packagesDir, pkg) - if (existsSync(source) && !existsSync(target)) { - try { symlinkSync(source, target, 'junction') } catch { /* non-fatal */ } + if (!existsSync(source) || existsSync(target)) continue + try { + symlinkSync(source, target, 'junction') + } catch { + // Symlink failed (common on Windows without Developer Mode / admin). + // Fall back to a directory copy — slower on first run but universally works. + try { cpSync(source, target, { recursive: true }) } catch { /* non-fatal */ } } } } catch { /* non-fatal */ } +// Validate critical workspace packages are resolvable. If still missing after the +// symlink+copy attempts, emit a clear diagnostic instead of a cryptic +// ERR_MODULE_NOT_FOUND from deep inside cli.js. +const criticalPackages = ['pi-coding-agent'] +const missingPackages = criticalPackages.filter(pkg => !existsSync(join(gsdScopeDir, pkg))) +if (missingPackages.length > 0) { + const missing = missingPackages.map(p => `@gsd/${p}`).join(', ') + process.stderr.write( + `\nError: GSD installation is broken — missing packages: ${missing}\n\n` + + `This is usually caused by one of:\n` + + ` • An outdated version installed from npm (run: npm install -g gsd-pi@latest)\n` + + ` • The packages/ directory was excluded from the installed tarball\n` + + ` • A filesystem error prevented linking or copying the workspace packages\n\n` + + `Fix it by reinstalling:\n\n` + + ` npm install -g gsd-pi@latest\n\n` + + `If the issue persists, please open an issue at:\n` + + ` https://github.com/gsd-build/gsd-2/issues\n` + ) + process.exit(1) +} + // Dynamic import defers ESM evaluation — config.js will see PI_PACKAGE_DIR above await import('./cli.js') diff --git a/src/resources/extensions/bg-shell/index.ts b/src/resources/extensions/bg-shell/index.ts index 4faa1fbed..2b9d49826 100644 --- a/src/resources/extensions/bg-shell/index.ts +++ b/src/resources/extensions/bg-shell/index.ts @@ -52,6 +52,7 @@ import { getGroupStatus, pruneDeadProcesses, cleanupAll, + cleanupSessionProcesses, persistManifest, loadManifest, pushAlert, @@ -71,7 +72,7 @@ import { toPosixPath } from "../shared/path-display.js"; // ── Re-exports for consumers ─────────────────────────────────────────────── export type { ProcessStatus, ProcessType, BgProcess, BgProcessInfo, OutputDigest, OutputLine, ProcessEvent } from "./types.js"; -export { processes, startProcess, killProcess, restartProcess, cleanupAll } from "./process-manager.js"; +export { processes, startProcess, killProcess, restartProcess, cleanupAll, cleanupSessionProcesses } from "./process-manager.js"; export { generateDigest, getHighlights, getOutput, formatDigestText } from "./output-formatter.js"; export { waitForReady, probePort } from "./readiness-detector.js"; export { sendAndWait, runOnSession, queryShellEnv } from "./interaction.js"; @@ -136,7 +137,13 @@ export default function (pi: ExtensionAPI) { }); // Session switch resets the agent's context. - pi.on("session_switch", async () => { + pi.on("session_switch", async (event, ctx) => { + latestCtx = ctx; + if (event.reason === "new" && event.previousSessionFile) { + await cleanupSessionProcesses(event.previousSessionFile); + syncLatestCtxCwd(); + if (latestCtx) persistManifest(latestCtx.cwd); + } buildProcessStateAlert("Session was switched."); }); @@ -232,6 +239,7 @@ export default function (pi: ExtensionAPI) { "Use 'run' to execute a command on a persistent shell session and block until it completes — returns structured output + exit code. Shell state (env vars, cwd, virtualenvs) persists across runs.", "Use 'send_and_wait' for interactive CLIs: send input and wait for expected output pattern.", "Use 'env' to check the current working directory and active environment variables of a shell session — useful after cd, source, or export commands.", + "Background processes are session-scoped by default: a new session reaps them unless you set persist_across_sessions:true.", "Use 'restart' to kill and relaunch with the same config — preserves restart count.", "Background processes are auto-classified (server/build/test/watcher) based on the command.", "Process crashes and errors are automatically surfaced as alerts at the start of your next turn — you don't need to poll.", @@ -300,6 +308,12 @@ export default function (pi: ExtensionAPI) { group: Type.Optional( Type.String({ description: "Group name for related processes (for start, group_status)" }), ), + persist_across_sessions: Type.Optional( + Type.Boolean({ + description: "Keep this process running after a new session starts. Default: false.", + default: false, + }), + ), }), async execute(_toolCallId, params, signal, _onUpdate, ctx) { @@ -318,6 +332,8 @@ export default function (pi: ExtensionAPI) { const bg = startProcess({ command: params.command, cwd: ctx.cwd, + ownerSessionFile: ctx.sessionManager.getSessionFile() ?? null, + persistAcrossSessions: params.persist_across_sessions ?? false, label: params.label, type: params.type as ProcessType | undefined, readyPattern: params.ready_pattern, @@ -341,6 +357,7 @@ export default function (pi: ExtensionAPI) { text += ` cwd: ${toPosixPath(bg.cwd)}`; if (bg.group) text += `\n group: ${bg.group}`; + if (bg.persistAcrossSessions) text += `\n persist_across_sessions: true`; if (bg.readyPort) text += `\n ready_port: ${bg.readyPort}`; if (bg.readyPattern) text += `\n ready_pattern: ${bg.readyPattern}`; if (bg.ports.length > 0) text += `\n detected ports: ${bg.ports.join(", ")}`; diff --git a/src/resources/extensions/bg-shell/process-manager.ts b/src/resources/extensions/bg-shell/process-manager.ts index c2bbf8a3f..95ee6ccd9 100644 --- a/src/resources/extensions/bg-shell/process-manager.ts +++ b/src/resources/extensions/bg-shell/process-manager.ts @@ -67,6 +67,8 @@ export function getInfo(p: BgProcess): BgProcessInfo { label: p.label, command: p.command, cwd: p.cwd, + ownerSessionFile: p.ownerSessionFile, + persistAcrossSessions: p.persistAcrossSessions, startedAt: p.startedAt, alive: p.alive, exitCode: p.exitCode, @@ -138,6 +140,8 @@ export function startProcess(opts: StartOptions): BgProcess { label: opts.label || command.slice(0, 60), command, cwd: opts.cwd, + ownerSessionFile: opts.ownerSessionFile ?? null, + persistAcrossSessions: opts.persistAcrossSessions ?? false, startedAt: Date.now(), proc, output: [], @@ -170,6 +174,8 @@ export function startProcess(opts: StartOptions): BgProcess { cwd: opts.cwd, label: opts.label || command.slice(0, 60), processType, + ownerSessionFile: opts.ownerSessionFile ?? null, + persistAcrossSessions: opts.persistAcrossSessions ?? false, readyPattern: opts.readyPattern || null, readyPort: opts.readyPort || null, group: opts.group || null, @@ -312,6 +318,8 @@ export async function restartProcess(id: string): Promise { cwd: config.cwd, label: config.label, type: config.processType, + ownerSessionFile: config.ownerSessionFile, + persistAcrossSessions: config.persistAcrossSessions, readyPattern: config.readyPattern || undefined, readyPort: config.readyPort || undefined, group: config.group || undefined, @@ -367,6 +375,41 @@ export function cleanupAll(): void { processes.clear(); } +async function waitForProcessExit(bg: BgProcess, timeoutMs: number): Promise { + if (!bg.alive) return true; + await new Promise((resolve) => { + const done = () => resolve(); + const timer = setTimeout(done, timeoutMs); + bg.proc.once("exit", () => { + clearTimeout(timer); + resolve(); + }); + }); + return !bg.alive; +} + +export async function cleanupSessionProcesses( + sessionFile: string, + options?: { graceMs?: number }, +): Promise { + const graceMs = Math.max(0, options?.graceMs ?? 300); + const matches = Array.from(processes.values()).filter( + (bg) => bg.alive && !bg.persistAcrossSessions && bg.ownerSessionFile === sessionFile, + ); + if (matches.length === 0) return []; + + for (const bg of matches) { + killProcess(bg.id, "SIGTERM"); + } + if (graceMs > 0) { + await Promise.all(matches.map((bg) => waitForProcessExit(bg, graceMs))); + } + for (const bg of matches) { + if (bg.alive) killProcess(bg.id, "SIGKILL"); + } + return matches.map((bg) => bg.id); +} + // ── Persistence ──────────────────────────────────────────────────────────── export function getManifestPath(cwd: string): string { @@ -384,6 +427,8 @@ export function persistManifest(cwd: string): void { label: p.label, command: p.command, cwd: p.cwd, + ownerSessionFile: p.ownerSessionFile, + persistAcrossSessions: p.persistAcrossSessions, startedAt: p.startedAt, processType: p.processType, group: p.group, diff --git a/src/resources/extensions/bg-shell/types.ts b/src/resources/extensions/bg-shell/types.ts index ff35a35bd..fa5131bd4 100644 --- a/src/resources/extensions/bg-shell/types.ts +++ b/src/resources/extensions/bg-shell/types.ts @@ -53,6 +53,10 @@ export interface BgProcess { label: string; command: string; cwd: string; + /** Session file that created this process (used for per-session cleanup) */ + ownerSessionFile: string | null; + /** Whether this process should survive a new-session boundary */ + persistAcrossSessions: boolean; startedAt: number; proc: import("node:child_process").ChildProcess; /** Unified chronologically-interleaved output buffer */ @@ -103,7 +107,17 @@ export interface BgProcess { /** Restart count */ restartCount: number; /** Original start config for restart */ - startConfig: { command: string; cwd: string; label: string; processType: ProcessType; readyPattern: string | null; readyPort: number | null; group: string | null }; + startConfig: { + command: string; + cwd: string; + label: string; + processType: ProcessType; + ownerSessionFile: string | null; + persistAcrossSessions: boolean; + readyPattern: string | null; + readyPort: number | null; + group: string | null; + }; } export interface BgProcessInfo { @@ -111,6 +125,8 @@ export interface BgProcessInfo { label: string; command: string; cwd: string; + ownerSessionFile: string | null; + persistAcrossSessions: boolean; startedAt: number; alive: boolean; exitCode: number | null; @@ -133,6 +149,8 @@ export interface BgProcessInfo { export interface StartOptions { command: string; cwd: string; + ownerSessionFile?: string | null; + persistAcrossSessions?: boolean; label?: string; type?: ProcessType; readyPattern?: string; @@ -154,6 +172,8 @@ export interface ProcessManifest { label: string; command: string; cwd: string; + ownerSessionFile: string | null; + persistAcrossSessions: boolean; startedAt: number; processType: ProcessType; group: string | null; diff --git a/src/resources/extensions/gsd/auto-budget.ts b/src/resources/extensions/gsd/auto-budget.ts new file mode 100644 index 000000000..290f336f0 --- /dev/null +++ b/src/resources/extensions/gsd/auto-budget.ts @@ -0,0 +1,32 @@ +/** + * Budget alert level tracking and enforcement for auto-mode. + * Pure functions — no module state or side effects. + */ + +import type { BudgetEnforcementMode } from "./types.js"; + +export type BudgetAlertLevel = 0 | 75 | 80 | 90 | 100; + +export function getBudgetAlertLevel(budgetPct: number): BudgetAlertLevel { + if (budgetPct >= 1.0) return 100; + if (budgetPct >= 0.90) return 90; + if (budgetPct >= 0.80) return 80; + if (budgetPct >= 0.75) return 75; + return 0; +} + +export function getNewBudgetAlertLevel(previousLevel: BudgetAlertLevel, budgetPct: number): BudgetAlertLevel | null { + const currentLevel = getBudgetAlertLevel(budgetPct); + if (currentLevel === 0 || currentLevel <= previousLevel) return null; + return currentLevel; +} + +export function getBudgetEnforcementAction( + enforcement: BudgetEnforcementMode, + budgetPct: number, +): "none" | "warn" | "pause" | "halt" { + if (budgetPct < 1.0) return "none"; + if (enforcement === "halt") return "halt"; + if (enforcement === "pause") return "pause"; + return "warn"; +} diff --git a/src/resources/extensions/gsd/auto-direct-dispatch.ts b/src/resources/extensions/gsd/auto-direct-dispatch.ts new file mode 100644 index 000000000..1aac353db --- /dev/null +++ b/src/resources/extensions/gsd/auto-direct-dispatch.ts @@ -0,0 +1,229 @@ +/** + * Direct phase dispatch — handles manual /gsd dispatch commands. + * Resolves phase name → unit type + prompt, creates a session, and sends the message. + */ + +import type { + ExtensionAPI, + ExtensionCommandContext, +} from "@gsd/pi-coding-agent"; + +import { deriveState } from "./state.js"; +import { loadFile, parseRoadmap } from "./files.js"; +import { + resolveMilestoneFile, resolveSliceFile, relSliceFile, +} from "./paths.js"; +import { + buildResearchSlicePrompt, + buildResearchMilestonePrompt, + buildPlanSlicePrompt, + buildPlanMilestonePrompt, + buildExecuteTaskPrompt, + buildCompleteSlicePrompt, + buildCompleteMilestonePrompt, + buildReassessRoadmapPrompt, + buildRunUatPrompt, + buildReplanSlicePrompt, +} from "./auto-prompts.js"; +import { loadEffectiveGSDPreferences } from "./preferences.js"; +import { pauseAuto } from "./auto.js"; + +export async function dispatchDirectPhase( + ctx: ExtensionCommandContext, + pi: ExtensionAPI, + phase: string, + base: string, +): Promise { + const state = await deriveState(base); + const mid = state.activeMilestone?.id; + const midTitle = state.activeMilestone?.title ?? ""; + + if (!mid) { + ctx.ui.notify("Cannot dispatch: no active milestone.", "warning"); + return; + } + + const normalized = phase.toLowerCase(); + let unitType: string; + let unitId: string; + let prompt: string; + + switch (normalized) { + case "research": + case "research-milestone": + case "research-slice": { + const isSlice = normalized === "research-slice" || (normalized === "research" && state.phase !== "pre-planning"); + if (isSlice) { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch research-slice: no active slice.", "warning"); + return; + } + + // When require_slice_discussion is enabled, pause auto-mode before + // each new slice so the user can discuss requirements first (#789). + const sliceContextFile = resolveSliceFile(base, mid, sid, "CONTEXT"); + const requireDiscussion = loadEffectiveGSDPreferences()?.preferences?.phases?.require_slice_discussion; + if (requireDiscussion && !sliceContextFile) { + ctx.ui.notify( + `Slice ${sid} requires discussion before planning. Run /gsd discuss to discuss this slice, then /gsd auto to resume.`, + "info", + ); + await pauseAuto(ctx, pi); + return; + } + + unitType = "research-slice"; + unitId = `${mid}/${sid}`; + prompt = await buildResearchSlicePrompt(mid, midTitle, sid, sTitle, base); + } else { + unitType = "research-milestone"; + unitId = mid; + prompt = await buildResearchMilestonePrompt(mid, midTitle, base); + } + break; + } + + case "plan": + case "plan-milestone": + case "plan-slice": { + const isSlice = normalized === "plan-slice" || (normalized === "plan" && state.phase !== "pre-planning"); + if (isSlice) { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch plan-slice: no active slice.", "warning"); + return; + } + unitType = "plan-slice"; + unitId = `${mid}/${sid}`; + prompt = await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, base); + } else { + unitType = "plan-milestone"; + unitId = mid; + prompt = await buildPlanMilestonePrompt(mid, midTitle, base); + } + break; + } + + case "execute": + case "execute-task": { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + const tid = state.activeTask?.id; + const tTitle = state.activeTask?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch execute-task: no active slice.", "warning"); + return; + } + if (!tid) { + ctx.ui.notify("Cannot dispatch execute-task: no active task.", "warning"); + return; + } + unitType = "execute-task"; + unitId = `${mid}/${sid}/${tid}`; + prompt = await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, base); + break; + } + + case "complete": + case "complete-slice": + case "complete-milestone": { + const isSlice = normalized === "complete-slice" || (normalized === "complete" && state.phase === "summarizing"); + if (isSlice) { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch complete-slice: no active slice.", "warning"); + return; + } + unitType = "complete-slice"; + unitId = `${mid}/${sid}`; + prompt = await buildCompleteSlicePrompt(mid, midTitle, sid, sTitle, base); + } else { + unitType = "complete-milestone"; + unitId = mid; + prompt = await buildCompleteMilestonePrompt(mid, midTitle, base); + } + break; + } + + case "reassess": + case "reassess-roadmap": { + const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP"); + const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null; + if (!roadmapContent) { + ctx.ui.notify("Cannot dispatch reassess-roadmap: no roadmap found.", "warning"); + return; + } + const roadmap = parseRoadmap(roadmapContent); + const completedSlices = roadmap.slices.filter(s => s.done); + if (completedSlices.length === 0) { + ctx.ui.notify("Cannot dispatch reassess-roadmap: no completed slices.", "warning"); + return; + } + const completedSliceId = completedSlices[completedSlices.length - 1].id; + unitType = "reassess-roadmap"; + unitId = `${mid}/${completedSliceId}`; + prompt = await buildReassessRoadmapPrompt(mid, midTitle, completedSliceId, base); + break; + } + + case "uat": + case "run-uat": { + const sid = state.activeSlice?.id; + if (!sid) { + ctx.ui.notify("Cannot dispatch run-uat: no active slice.", "warning"); + return; + } + const uatFile = resolveSliceFile(base, mid, sid, "UAT"); + if (!uatFile) { + ctx.ui.notify("Cannot dispatch run-uat: no UAT file found.", "warning"); + return; + } + const uatContent = await loadFile(uatFile); + if (!uatContent) { + ctx.ui.notify("Cannot dispatch run-uat: UAT file is empty.", "warning"); + return; + } + const uatPath = relSliceFile(base, mid, sid, "UAT"); + unitType = "run-uat"; + unitId = `${mid}/${sid}`; + prompt = await buildRunUatPrompt(mid, sid, uatPath, uatContent, base); + break; + } + + case "replan": + case "replan-slice": { + const sid = state.activeSlice?.id; + const sTitle = state.activeSlice?.title ?? ""; + if (!sid) { + ctx.ui.notify("Cannot dispatch replan-slice: no active slice.", "warning"); + return; + } + unitType = "replan-slice"; + unitId = `${mid}/${sid}`; + prompt = await buildReplanSlicePrompt(mid, midTitle, sid, sTitle, base); + break; + } + + default: + ctx.ui.notify( + `Unknown phase "${phase}". Valid phases: research, plan, execute, complete, reassess, uat, replan.`, + "warning", + ); + return; + } + + ctx.ui.notify(`Dispatching ${unitType} for ${unitId}...`, "info"); + const result = await ctx.newSession(); + if (result.cancelled) { + ctx.ui.notify("Session creation cancelled.", "warning"); + return; + } + pi.sendMessage( + { customType: "gsd-dispatch", content: prompt, display: false }, + { triggerTurn: true }, + ); +} diff --git a/src/resources/extensions/gsd/auto-dispatch.ts b/src/resources/extensions/gsd/auto-dispatch.ts index 064687e80..8cd4e2ce6 100644 --- a/src/resources/extensions/gsd/auto-dispatch.ts +++ b/src/resources/extensions/gsd/auto-dispatch.ts @@ -241,6 +241,32 @@ const DISPATCH_RULES: DispatchRule[] = [ }; }, }, + { + name: "executing → execute-task (recover missing task plan → plan-slice)", + match: async ({ state, mid, midTitle, basePath }) => { + if (state.phase !== "executing" || !state.activeTask) return null; + const sid = state.activeSlice!.id; + const sTitle = state.activeSlice!.title; + const tid = state.activeTask.id; + + // Guard: if the slice plan exists but the individual task plan files are + // missing, the planner created S##-PLAN.md with task entries but never + // wrote the tasks/ directory files. Dispatch plan-slice to regenerate + // them rather than hard-stopping — fixes the infinite-loop described in + // issue #909. + const taskPlanPath = resolveTaskFile(basePath, mid, sid, tid, "PLAN"); + if (!taskPlanPath || !existsSync(taskPlanPath)) { + return { + action: "dispatch", + unitType: "plan-slice", + unitId: `${mid}/${sid}`, + prompt: await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, basePath), + }; + } + + return null; + }, + }, { name: "executing → execute-task", match: async ({ state, mid, basePath }) => { @@ -250,19 +276,6 @@ const DISPATCH_RULES: DispatchRule[] = [ const tid = state.activeTask.id; const tTitle = state.activeTask.title; - // Guard: refuse to dispatch execute-task when the task plan file is missing. - // This prevents the agent from running blind after a failed plan-slice that - // wrote S{sid}-PLAN.md but omitted the individual T{tid}-PLAN.md files. - // (See issue #739 — missing task plan caused runaway execution and EPIPE crash.) - const taskPlanPath = resolveTaskFile(basePath, mid, sid, tid, "PLAN"); - if (!taskPlanPath || !existsSync(taskPlanPath)) { - return { - action: "stop", - reason: `Task plan ${tid}-PLAN.md is missing for ${mid}/${sid}/${tid}. Re-run plan-slice to regenerate task plans, or create the file manually and resume.`, - level: "error", - }; - } - return { action: "dispatch", unitType: "execute-task", diff --git a/src/resources/extensions/gsd/auto-model-selection.ts b/src/resources/extensions/gsd/auto-model-selection.ts new file mode 100644 index 000000000..70be37671 --- /dev/null +++ b/src/resources/extensions/gsd/auto-model-selection.ts @@ -0,0 +1,179 @@ +/** + * Model selection and dynamic routing for auto-mode unit dispatch. + * Handles complexity-based routing, model resolution across providers, + * and fallback chains. + */ + +import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent"; +import type { GSDPreferences } from "./preferences.js"; +import { resolveModelWithFallbacksForUnit, resolveDynamicRoutingConfig } from "./preferences.js"; +import { classifyUnitComplexity, tierLabel } from "./complexity-classifier.js"; +import { resolveModelForComplexity } from "./model-router.js"; +import { getLedger, getProjectTotals } from "./metrics.js"; +import { unitPhaseLabel } from "./auto-dashboard.js"; + +export interface ModelSelectionResult { + /** Routing metadata for metrics recording */ + routing: { tier: string; modelDowngraded: boolean } | null; +} + +/** + * Select and apply the appropriate model for a unit dispatch. + * Handles: per-unit-type model preferences, dynamic complexity routing, + * provider/model resolution, fallback chains, and start-model re-application. + * + * Returns routing metadata for metrics tracking. + */ +export async function selectAndApplyModel( + ctx: ExtensionContext, + pi: ExtensionAPI, + unitType: string, + unitId: string, + basePath: string, + prefs: GSDPreferences | undefined, + verbose: boolean, + autoModeStartModel: { provider: string; id: string } | null, +): Promise { + const modelConfig = resolveModelWithFallbacksForUnit(unitType); + let routing: { tier: string; modelDowngraded: boolean } | null = null; + + if (modelConfig) { + const availableModels = ctx.modelRegistry.getAvailable(); + + // ─── Dynamic Model Routing ───────────────────────────────────────── + const routingConfig = resolveDynamicRoutingConfig(); + let effectiveModelConfig = modelConfig; + let routingTierLabel = ""; + + if (routingConfig.enabled) { + let budgetPct: number | undefined; + if (routingConfig.budget_pressure !== false) { + const budgetCeiling = prefs?.budget_ceiling; + if (budgetCeiling !== undefined && budgetCeiling > 0) { + const currentLedger = getLedger(); + const totalCost = currentLedger ? getProjectTotals(currentLedger.units).cost : 0; + budgetPct = totalCost / budgetCeiling; + } + } + + const isHook = unitType.startsWith("hook/"); + const shouldClassify = !isHook || routingConfig.hooks !== false; + + if (shouldClassify) { + const classification = classifyUnitComplexity(unitType, unitId, basePath, budgetPct); + const availableModelIds = availableModels.map(m => m.id); + const routingResult = resolveModelForComplexity(classification, modelConfig, routingConfig, availableModelIds); + + if (routingResult.wasDowngraded) { + effectiveModelConfig = { + primary: routingResult.modelId, + fallbacks: routingResult.fallbacks, + }; + if (verbose) { + ctx.ui.notify( + `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`, + "info", + ); + } + } + routingTierLabel = ` [${tierLabel(classification.tier)}]`; + routing = { tier: classification.tier, modelDowngraded: routingResult.wasDowngraded }; + } + } + + const modelsToTry = [effectiveModelConfig.primary, ...effectiveModelConfig.fallbacks]; + + for (const modelId of modelsToTry) { + const model = resolveModelId(modelId, availableModels, ctx.model?.provider); + + if (!model) { + if (verbose) ctx.ui.notify(`Model ${modelId} not found, trying fallback.`, "info"); + continue; + } + + // Warn if the ID is ambiguous across providers + if (!modelId.includes("/")) { + const providers = availableModels.filter(m => m.id === modelId).map(m => m.provider); + if (providers.length > 1 && model.provider !== ctx.model?.provider) { + ctx.ui.notify( + `Model ID "${modelId}" exists in multiple providers (${providers.join(", ")}). ` + + `Resolved to ${model.provider}. Use "provider/model" format for explicit targeting.`, + "warning", + ); + } + } + + const ok = await pi.setModel(model, { persist: false }); + if (ok) { + const fallbackNote = modelId === effectiveModelConfig.primary + ? "" + : ` (fallback from ${effectiveModelConfig.primary})`; + const phase = unitPhaseLabel(unitType); + ctx.ui.notify(`Model [${phase}]${routingTierLabel}: ${model.provider}/${model.id}${fallbackNote}`, "info"); + break; + } else { + const nextModel = modelsToTry[modelsToTry.indexOf(modelId) + 1]; + if (nextModel) { + if (verbose) ctx.ui.notify(`Failed to set model ${modelId}, trying ${nextModel}...`, "info"); + } else { + ctx.ui.notify(`All preferred models unavailable for ${unitType}. Using default.`, "warning"); + } + } + } + } else if (autoModeStartModel) { + // No model preference for this unit type — re-apply the model captured + // at auto-mode start to prevent bleed from shared global settings.json (#650). + const availableModels = ctx.modelRegistry.getAvailable(); + const startModel = availableModels.find( + m => m.provider === autoModeStartModel.provider && m.id === autoModeStartModel.id, + ); + if (startModel) { + const ok = await pi.setModel(startModel, { persist: false }); + if (!ok) { + const byId = availableModels.find(m => m.id === autoModeStartModel.id); + if (byId) await pi.setModel(byId, { persist: false }); + } + } + } + + return { routing }; +} + +/** + * Resolve a model ID string to a model object from the available models list. + * Handles formats: "provider/model", "bare-id", "org/model-name" (OpenRouter). + */ +function resolveModelId( + modelId: string, + availableModels: T[], + currentProvider: string | undefined, +): T | undefined { + const slashIdx = modelId.indexOf("/"); + + if (slashIdx !== -1) { + const maybeProvider = modelId.substring(0, slashIdx); + const id = modelId.substring(slashIdx + 1); + + const knownProviders = new Set(availableModels.map(m => m.provider.toLowerCase())); + if (knownProviders.has(maybeProvider.toLowerCase())) { + const match = availableModels.find( + m => m.provider.toLowerCase() === maybeProvider.toLowerCase() + && m.id.toLowerCase() === id.toLowerCase(), + ); + if (match) return match; + } + + // Try matching the full string as a model ID (OpenRouter-style) + const lower = modelId.toLowerCase(); + return availableModels.find( + m => m.id.toLowerCase() === lower + || `${m.provider}/${m.id}`.toLowerCase() === lower, + ); + } + + // Bare ID — prefer current provider, then first available + const exactProviderMatch = availableModels.find( + m => m.id === modelId && m.provider === currentProvider, + ); + return exactProviderMatch ?? availableModels.find(m => m.id === modelId); +} diff --git a/src/resources/extensions/gsd/auto-observability.ts b/src/resources/extensions/gsd/auto-observability.ts new file mode 100644 index 000000000..ddcc0bf3d --- /dev/null +++ b/src/resources/extensions/gsd/auto-observability.ts @@ -0,0 +1,74 @@ +/** + * Pre-dispatch observability checks for auto-mode units. + * Validates plan/summary file quality and builds repair instructions + * for the agent to fix gaps before proceeding with the unit. + */ + +import type { ExtensionContext } from "@gsd/pi-coding-agent"; +import { + validatePlanBoundary, + validateExecuteBoundary, + validateCompleteBoundary, + formatValidationIssues, +} from "./observability-validator.js"; +import type { ValidationIssue } from "./observability-validator.js"; + +export async function collectObservabilityWarnings( + ctx: ExtensionContext, + basePath: string, + unitType: string, + unitId: string, +): Promise { + // Hook units have custom artifacts — skip standard observability checks + if (unitType.startsWith("hook/")) return []; + + const parts = unitId.split("/"); + const mid = parts[0]; + const sid = parts[1]; + const tid = parts[2]; + + if (!mid || !sid) return []; + + let issues = [] as Awaited>; + + if (unitType === "plan-slice") { + issues = await validatePlanBoundary(basePath, mid, sid); + } else if (unitType === "execute-task" && tid) { + issues = await validateExecuteBoundary(basePath, mid, sid, tid); + } else if (unitType === "complete-slice") { + issues = await validateCompleteBoundary(basePath, mid, sid); + } + + if (issues.length > 0) { + ctx.ui.notify( + `Observability check (${unitType}) found ${issues.length} warning${issues.length === 1 ? "" : "s"}:\n${formatValidationIssues(issues)}`, + "warning", + ); + } + + return issues; +} + +export function buildObservabilityRepairBlock(issues: ValidationIssue[]): string { + if (issues.length === 0) return ""; + const items = issues.map(issue => { + const fileName = issue.file.split("/").pop() || issue.file; + let line = `- **${fileName}**: ${issue.message}`; + if (issue.suggestion) line += ` → ${issue.suggestion}`; + return line; + }); + return [ + "", + "---", + "", + "## Pre-flight: Observability gaps to fix FIRST", + "", + "The following issues were detected in plan/summary files for this unit.", + "**Read each flagged file, apply the fix described, then proceed with the unit.**", + "", + ...items, + "", + "---", + "", + ].join("\n"); +} diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index 6daf4f8c6..471e36246 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -642,7 +642,6 @@ export async function buildPlanSlicePrompt( const commitInstruction = commitDocsEnabled ? `Commit: \`docs(${sid}): add slice plan\`` : "Do not commit — planning docs are not tracked in git for this project."; - return loadPrompt("plan-slice", { workingDirectory: base, milestoneId: mid, sliceId: sid, sliceTitle: sTitle, diff --git a/src/resources/extensions/gsd/auto-timeout-recovery.ts b/src/resources/extensions/gsd/auto-timeout-recovery.ts new file mode 100644 index 000000000..c559db243 --- /dev/null +++ b/src/resources/extensions/gsd/auto-timeout-recovery.ts @@ -0,0 +1,262 @@ +/** + * Timeout recovery logic for auto-mode units. + * Handles idle and hard timeout recovery with escalation, steering messages, + * and blocker placeholder generation. + */ + +import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent"; +import { + readUnitRuntimeRecord, + writeUnitRuntimeRecord, + formatExecuteTaskRecoveryStatus, + inspectExecuteTaskDurability, +} from "./unit-runtime.js"; +import { + resolveExpectedArtifactPath, + diagnoseExpectedArtifact, + skipExecuteTask, + writeBlockerPlaceholder, +} from "./auto-recovery.js"; +import { existsSync } from "node:fs"; + +export interface RecoveryContext { + basePath: string; + verbose: boolean; + currentUnitStartedAt: number; + unitRecoveryCount: Map; + dispatchNextUnit: (ctx: ExtensionContext, pi: ExtensionAPI) => Promise; +} + +export async function recoverTimedOutUnit( + ctx: ExtensionContext, + pi: ExtensionAPI, + unitType: string, + unitId: string, + reason: "idle" | "hard", + rctx: RecoveryContext, +): Promise<"recovered" | "paused"> { + const { basePath, verbose, currentUnitStartedAt, unitRecoveryCount, dispatchNextUnit } = rctx; + + const runtime = readUnitRuntimeRecord(basePath, unitType, unitId); + const recoveryAttempts = runtime?.recoveryAttempts ?? 0; + const maxRecoveryAttempts = reason === "idle" ? 2 : 1; + + const recoveryKey = `${unitType}/${unitId}`; + const attemptNumber = (unitRecoveryCount.get(recoveryKey) ?? 0) + 1; + unitRecoveryCount.set(recoveryKey, attemptNumber); + + if (attemptNumber > 1) { + // Exponential backoff: 2^(n-1) seconds, capped at 30s + const backoffMs = Math.min(1000 * Math.pow(2, attemptNumber - 2), 30000); + ctx.ui.notify( + `Recovery attempt ${attemptNumber} for ${unitType} ${unitId}. Waiting ${backoffMs / 1000}s before retry.`, + "info", + ); + await new Promise(r => setTimeout(r, backoffMs)); + } + + if (unitType === "execute-task") { + const status = await inspectExecuteTaskDurability(basePath, unitId); + if (!status) return "paused"; + + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + recovery: status, + }); + + const durableComplete = status.summaryExists && status.taskChecked && status.nextActionAdvanced; + if (durableComplete) { + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "finalized", + recovery: status, + }); + ctx.ui.notify( + `${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} already completed on disk. Continuing auto-mode. (attempt ${attemptNumber})`, + "info", + ); + unitRecoveryCount.delete(recoveryKey); + await dispatchNextUnit(ctx, pi); + return "recovered"; + } + + if (recoveryAttempts < maxRecoveryAttempts) { + const isEscalation = recoveryAttempts > 0; + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "recovered", + recovery: status, + recoveryAttempts: recoveryAttempts + 1, + lastRecoveryReason: reason, + lastProgressAt: Date.now(), + progressCount: (runtime?.progressCount ?? 0) + 1, + lastProgressKind: reason === "idle" ? "idle-recovery-retry" : "hard-recovery-retry", + }); + + const steeringLines = isEscalation + ? [ + `**FINAL ${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — last chance before this task is skipped.**`, + `You are still executing ${unitType} ${unitId}.`, + `Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`, + `Current durability status: ${formatExecuteTaskRecoveryStatus(status)}.`, + "You MUST finish the durable output NOW, even if incomplete.", + "Write the task summary with whatever you have accomplished so far.", + "Mark the task [x] in the plan. Commit your work.", + "A partial summary is infinitely better than no summary.", + ] + : [ + `**${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — do not stop.**`, + `You are still executing ${unitType} ${unitId}.`, + `Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`, + `Current durability status: ${formatExecuteTaskRecoveryStatus(status)}.`, + "Do not keep exploring.", + "Immediately finish the required durable output for this unit.", + "If full completion is impossible, write the partial artifact/state needed for recovery and make the blocker explicit.", + ]; + + pi.sendMessage( + { + customType: "gsd-auto-timeout-recovery", + display: verbose, + content: steeringLines.join("\n"), + }, + { triggerTurn: true, deliverAs: "steer" }, + ); + ctx.ui.notify( + `${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to finish durable output (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`, + "warning", + ); + return "recovered"; + } + + // Retries exhausted — write missing durable artifacts and advance. + const diagnostic = formatExecuteTaskRecoveryStatus(status); + const [mid, sid, tid] = unitId.split("/"); + const skipped = mid && sid && tid + ? skipExecuteTask(basePath, mid, sid, tid, status, reason, maxRecoveryAttempts) + : false; + + if (skipped) { + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "skipped", + recovery: status, + recoveryAttempts: recoveryAttempts + 1, + lastRecoveryReason: reason, + }); + ctx.ui.notify( + `${unitType} ${unitId} skipped after ${maxRecoveryAttempts} recovery attempts (${diagnostic}). Blocker artifacts written. Advancing pipeline. (attempt ${attemptNumber})`, + "warning", + ); + unitRecoveryCount.delete(recoveryKey); + await dispatchNextUnit(ctx, pi); + return "recovered"; + } + + // Fallback: couldn't write skip artifacts — pause as before. + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "paused", + recovery: status, + recoveryAttempts: recoveryAttempts + 1, + lastRecoveryReason: reason, + }); + ctx.ui.notify( + `${reason === "idle" ? "Idle" : "Timeout"} recovery check for ${unitType} ${unitId}: ${diagnostic}`, + "warning", + ); + return "paused"; + } + + const expected = diagnoseExpectedArtifact(unitType, unitId, basePath) ?? "required durable artifact"; + + // Check if the artifact already exists on disk — agent may have written it + // without signaling completion. + const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath); + if (artifactPath && existsSync(artifactPath)) { + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "finalized", + recoveryAttempts: recoveryAttempts + 1, + lastRecoveryReason: reason, + }); + ctx.ui.notify( + `${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} artifact already exists on disk. Advancing. (attempt ${attemptNumber})`, + "info", + ); + unitRecoveryCount.delete(recoveryKey); + await dispatchNextUnit(ctx, pi); + return "recovered"; + } + + if (recoveryAttempts < maxRecoveryAttempts) { + const isEscalation = recoveryAttempts > 0; + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "recovered", + recoveryAttempts: recoveryAttempts + 1, + lastRecoveryReason: reason, + lastProgressAt: Date.now(), + progressCount: (runtime?.progressCount ?? 0) + 1, + lastProgressKind: reason === "idle" ? "idle-recovery-retry" : "hard-recovery-retry", + }); + + const steeringLines = isEscalation + ? [ + `**FINAL ${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — last chance before skip.**`, + `You are still executing ${unitType} ${unitId}.`, + `Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts} — next failure skips this unit.`, + `Expected durable output: ${expected}.`, + "You MUST write the artifact file NOW, even if incomplete.", + "Write whatever you have — partial research, preliminary findings, best-effort analysis.", + "A partial artifact is infinitely better than no artifact.", + "If you are truly blocked, write the file with a BLOCKER section explaining why.", + ] + : [ + `**${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — stay in auto-mode.**`, + `You are still executing ${unitType} ${unitId}.`, + `Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`, + `Expected durable output: ${expected}.`, + "Stop broad exploration.", + "Write the required artifact now.", + "If blocked, write the partial artifact and explicitly record the blocker instead of going silent.", + ]; + + pi.sendMessage( + { + customType: "gsd-auto-timeout-recovery", + display: verbose, + content: steeringLines.join("\n"), + }, + { triggerTurn: true, deliverAs: "steer" }, + ); + ctx.ui.notify( + `${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to produce ${expected} (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`, + "warning", + ); + return "recovered"; + } + + // Retries exhausted — write a blocker placeholder and advance the pipeline + // instead of silently stalling. + const placeholder = writeBlockerPlaceholder( + unitType, unitId, basePath, + `${reason} recovery exhausted ${maxRecoveryAttempts} attempts without producing the artifact.`, + ); + + if (placeholder) { + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "skipped", + recoveryAttempts: recoveryAttempts + 1, + lastRecoveryReason: reason, + }); + ctx.ui.notify( + `${unitType} ${unitId} skipped after ${maxRecoveryAttempts} recovery attempts. Blocker placeholder written to ${placeholder}. Advancing pipeline. (attempt ${attemptNumber})`, + "warning", + ); + unitRecoveryCount.delete(recoveryKey); + await dispatchNextUnit(ctx, pi); + return "recovered"; + } + + // Fallback: couldn't resolve artifact path — pause as before. + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, { + phase: "paused", + recoveryAttempts: recoveryAttempts + 1, + lastRecoveryReason: reason, + }); + return "paused"; +} diff --git a/src/resources/extensions/gsd/auto-tool-tracking.ts b/src/resources/extensions/gsd/auto-tool-tracking.ts new file mode 100644 index 000000000..469f2174d --- /dev/null +++ b/src/resources/extensions/gsd/auto-tool-tracking.ts @@ -0,0 +1,54 @@ +/** + * In-flight tool call tracking for auto-mode idle detection. + * Tracks which tool calls are currently executing so the idle watchdog + * can distinguish "waiting for tool completion" from "truly idle". + */ + +const inFlightTools = new Map(); + +/** + * Mark a tool execution as in-flight. + * Records start time so the idle watchdog can detect tools hung longer than the idle timeout. + */ +export function markToolStart(toolCallId: string, isActive: boolean): void { + if (!isActive) return; + inFlightTools.set(toolCallId, Date.now()); +} + +/** + * Mark a tool execution as completed. + */ +export function markToolEnd(toolCallId: string): void { + inFlightTools.delete(toolCallId); +} + +/** + * Returns the age (ms) of the oldest currently in-flight tool, or 0 if none. + */ +export function getOldestInFlightToolAgeMs(): number { + if (inFlightTools.size === 0) return 0; + const oldestStart = Math.min(...inFlightTools.values()); + return Date.now() - oldestStart; +} + +/** + * Returns the number of currently in-flight tools. + */ +export function getInFlightToolCount(): number { + return inFlightTools.size; +} + +/** + * Returns the start timestamp of the oldest in-flight tool, or undefined if none. + */ +export function getOldestInFlightToolStart(): number | undefined { + if (inFlightTools.size === 0) return undefined; + return Math.min(...inFlightTools.values()); +} + +/** + * Clear all in-flight tool tracking state. + */ +export function clearInFlightTools(): void { + inFlightTools.clear(); +} diff --git a/src/resources/extensions/gsd/auto-unit-closeout.ts b/src/resources/extensions/gsd/auto-unit-closeout.ts new file mode 100644 index 000000000..db902ce90 --- /dev/null +++ b/src/resources/extensions/gsd/auto-unit-closeout.ts @@ -0,0 +1,46 @@ +/** + * Unit closeout helper — consolidates the repeated pattern of + * snapshotting metrics + saving activity log + extracting memories + * that appears 6+ times in auto.ts. + */ + +import type { ExtensionContext } from "@gsd/pi-coding-agent"; +import { snapshotUnitMetrics } from "./metrics.js"; +import { saveActivityLog } from "./activity-log.js"; + +export interface CloseoutOptions { + promptCharCount?: number; + baselineCharCount?: number; + tier?: string; + modelDowngraded?: boolean; + continueHereFired?: boolean; +} + +/** + * Snapshot metrics, save activity log, and fire-and-forget memory extraction + * for a completed unit. Returns the activity log file path (if any). + */ +export async function closeoutUnit( + ctx: ExtensionContext, + basePath: string, + unitType: string, + unitId: string, + startedAt: number, + opts?: CloseoutOptions, +): Promise { + const modelId = ctx.model?.id ?? "unknown"; + snapshotUnitMetrics(ctx, unitType, unitId, startedAt, modelId, opts); + const activityFile = saveActivityLog(ctx, basePath, unitType, unitId); + + if (activityFile) { + try { + const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import('./memory-extractor.js'); + const llmCallFn = buildMemoryLLMCall(ctx); + if (llmCallFn) { + extractMemoriesFromUnit(activityFile, unitType, unitId, llmCallFn).catch(() => {}); + } + } catch { /* non-fatal */ } + } + + return activityFile ?? undefined; +} diff --git a/src/resources/extensions/gsd/auto-worktree-sync.ts b/src/resources/extensions/gsd/auto-worktree-sync.ts new file mode 100644 index 000000000..9e948e498 --- /dev/null +++ b/src/resources/extensions/gsd/auto-worktree-sync.ts @@ -0,0 +1,207 @@ +/** + * Worktree ↔ project root state synchronization for auto-mode. + * + * When auto-mode runs inside a worktree, dispatch-critical state files + * (.gsd/ metadata) diverge between the worktree (where work happens) + * and the project root (where startAutoMode reads initial state on restart). + * Without syncing, restarting auto-mode reads stale state from the project + * root and re-dispatches already-completed units. + * + * Also contains resource staleness detection and stale worktree escape. + */ + +import { existsSync, mkdirSync, readFileSync, writeFileSync, cpSync, unlinkSync, readdirSync } from "node:fs"; +import { join, sep as pathSep } from "node:path"; +import { homedir } from "node:os"; + +// ─── Project Root → Worktree Sync ───────────────────────────────────────── + +/** + * Sync milestone artifacts from project root INTO worktree before deriveState. + * Covers the case where the LLM wrote artifacts to the main repo filesystem + * (e.g. via absolute paths) but the worktree has stale data. Also deletes + * gsd.db in the worktree so it rebuilds from fresh disk state (#853). + * Non-fatal — sync failure should never block dispatch. + */ +export function syncProjectRootToWorktree(projectRoot: string, worktreePath: string, milestoneId: string | null): void { + if (!worktreePath || !projectRoot || worktreePath === projectRoot) return; + if (!milestoneId) return; + + const prGsd = join(projectRoot, ".gsd"); + const wtGsd = join(worktreePath, ".gsd"); + + // Copy milestone directory from project root to worktree if the project root + // has newer artifacts (e.g. slices that don't exist in the worktree yet) + try { + const srcMilestone = join(prGsd, "milestones", milestoneId); + const dstMilestone = join(wtGsd, "milestones", milestoneId); + if (existsSync(srcMilestone)) { + mkdirSync(dstMilestone, { recursive: true }); + cpSync(srcMilestone, dstMilestone, { recursive: true, force: false }); + } + } catch { /* non-fatal */ } + + // Delete worktree gsd.db so it rebuilds from the freshly synced files. + // Stale DB rows are the root cause of the infinite skip loop (#853). + try { + const wtDb = join(wtGsd, "gsd.db"); + if (existsSync(wtDb)) { + unlinkSync(wtDb); + } + } catch { /* non-fatal */ } +} + +// ─── Worktree → Project Root Sync ───────────────────────────────────────── + +/** + * Sync dispatch-critical .gsd/ state files from worktree to project root. + * Only runs when inside an auto-worktree (worktreePath differs from projectRoot). + * Copies: STATE.md + active milestone directory (roadmap, slice plans, task summaries). + * Non-fatal — sync failure should never block dispatch. + */ +export function syncStateToProjectRoot(worktreePath: string, projectRoot: string, milestoneId: string | null): void { + if (!worktreePath || !projectRoot || worktreePath === projectRoot) return; + if (!milestoneId) return; + + const wtGsd = join(worktreePath, ".gsd"); + const prGsd = join(projectRoot, ".gsd"); + + // 1. STATE.md — the quick-glance status used by initial deriveState() + try { + const src = join(wtGsd, "STATE.md"); + const dst = join(prGsd, "STATE.md"); + if (existsSync(src)) cpSync(src, dst, { force: true }); + } catch { /* non-fatal */ } + + // 2. Milestone directory — ROADMAP, slice PLANs, task summaries + // Copy the entire milestone .gsd subtree so deriveState reads current checkboxes + try { + const srcMilestone = join(wtGsd, "milestones", milestoneId); + const dstMilestone = join(prGsd, "milestones", milestoneId); + if (existsSync(srcMilestone)) { + mkdirSync(dstMilestone, { recursive: true }); + cpSync(srcMilestone, dstMilestone, { recursive: true, force: true }); + } + } catch { /* non-fatal */ } + + // 3. Merge completed-units.json (set-union of both locations) + // Prevents already-completed units from being re-dispatched after crash/restart. + const srcKeysFile = join(wtGsd, "completed-units.json"); + const dstKeysFile = join(prGsd, "completed-units.json"); + if (existsSync(srcKeysFile)) { + try { + const srcKeys: string[] = JSON.parse(readFileSync(srcKeysFile, "utf8")); + let dstKeys: string[] = []; + if (existsSync(dstKeysFile)) { + try { dstKeys = JSON.parse(readFileSync(dstKeysFile, "utf8")); } catch { /* ignore corrupt dst */ } + } + const merged = [...new Set([...dstKeys, ...srcKeys])]; + writeFileSync(dstKeysFile, JSON.stringify(merged, null, 2)); + } catch { /* non-fatal */ } + } + + // 4. Runtime records — unit dispatch state used by selfHealRuntimeRecords(). + // Without this, a crash during a unit leaves the runtime record only in the + // worktree. If the next session resolves basePath before worktree re-entry, + // selfHeal can't find or clear the stale record (#769). + try { + const srcRuntime = join(wtGsd, "runtime", "units"); + const dstRuntime = join(prGsd, "runtime", "units"); + if (existsSync(srcRuntime)) { + mkdirSync(dstRuntime, { recursive: true }); + cpSync(srcRuntime, dstRuntime, { recursive: true, force: true }); + } + } catch { /* non-fatal */ } +} + +// ─── Resource Staleness ─────────────────────────────────────────────────── + +/** + * Read the resource version (semver) from the managed-resources manifest. + * Uses gsdVersion instead of syncedAt so that launching a second session + * doesn't falsely trigger staleness (#804). + */ +export function readResourceVersion(): string | null { + const agentDir = process.env.GSD_CODING_AGENT_DIR || join(homedir(), ".gsd", "agent"); + const manifestPath = join(agentDir, "managed-resources.json"); + try { + const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")); + return typeof manifest?.gsdVersion === "string" ? manifest.gsdVersion : null; + } catch { + return null; + } +} + +/** + * Check if managed resources have been updated since session start. + * Returns a warning message if stale, null otherwise. + */ +export function checkResourcesStale(versionOnStart: string | null): string | null { + if (versionOnStart === null) return null; + const current = readResourceVersion(); + if (current === null) return null; + if (current !== versionOnStart) { + return "GSD resources were updated since this session started. Restart gsd to load the new code."; + } + return null; +} + +// ─── Stale Worktree Escape ──────────────────────────────────────────────── + +/** + * Detect and escape a stale worktree cwd (#608). + * + * After milestone completion + merge, the worktree directory is removed but + * the process cwd may still point inside `.gsd/worktrees//`. + * When a new session starts, `process.cwd()` is passed as `base` to startAuto + * and all subsequent writes land in the wrong directory. This function detects + * that scenario and chdir back to the project root. + * + * Returns the corrected base path. + */ +export function escapeStaleWorktree(base: string): string { + const marker = `${pathSep}.gsd${pathSep}worktrees${pathSep}`; + const idx = base.indexOf(marker); + if (idx === -1) return base; + + // base is inside .gsd/worktrees/ — extract the project root + const projectRoot = base.slice(0, idx); + try { + process.chdir(projectRoot); + } catch { + // If chdir fails, return the original — caller will handle errors downstream + return base; + } + return projectRoot; +} + +/** + * Clean stale runtime unit files for completed milestones. + * + * After restart, stale runtime/units/*.json from prior milestones can + * cause deriveState to resume the wrong milestone (#887). Removes files + * for milestones that have a SUMMARY (fully complete). + */ +export function cleanStaleRuntimeUnits( + gsdRootPath: string, + hasMilestoneSummary: (mid: string) => boolean, +): number { + const runtimeUnitsDir = join(gsdRootPath, "runtime", "units"); + if (!existsSync(runtimeUnitsDir)) return 0; + + let cleaned = 0; + try { + for (const file of readdirSync(runtimeUnitsDir)) { + if (!file.endsWith(".json")) continue; + const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/); + if (!midMatch) continue; + if (hasMilestoneSummary(midMatch[1])) { + try { + unlinkSync(join(runtimeUnitsDir, file)); + cleaned++; + } catch { /* non-fatal */ } + } + } + } catch { /* non-fatal */ } + return cleaned; +} diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index d29ba0b48..4d92034f7 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -17,17 +17,17 @@ import type { } from "@gsd/pi-coding-agent"; import { deriveState } from "./state.js"; -import type { BudgetEnforcementMode, GSDState } from "./types.js"; -import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parseSummary } from "./files.js"; +import type { GSDState } from "./types.js"; +import { loadFile, getManifestStatus, resolveAllOverrides, parsePlan, parseSummary } from "./files.js"; import { loadPrompt } from "./prompt-loader.js"; +import { runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "./verification-gate.js"; +import { writeVerificationJSON } from "./verification-evidence.js"; export { inlinePriorMilestoneSummary } from "./files.js"; import { collectSecretsFromManifest } from "../get-secrets-from-user.js"; import { gsdRoot, resolveMilestoneFile, resolveSliceFile, resolveSlicePath, resolveMilestonePath, resolveDir, resolveTasksDir, resolveTaskFile, - relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath, - milestonesDir, - buildMilestoneFileName, buildSliceFileName, buildTaskFileName, + milestonesDir, buildTaskFileName, } from "./paths.js"; import { invalidateAllCaches } from "./cache.js"; import { saveActivityLog, clearActivityLogState } from "./activity-log.js"; @@ -35,16 +35,42 @@ import { synthesizeCrashRecovery, getDeepDiagnostic } from "./session-forensics. import { writeLock, clearLock, readCrashLock, formatCrashInfo, isLockProcessAlive } from "./crash-recovery.js"; import { clearUnitRuntimeRecord, - formatExecuteTaskRecoveryStatus, inspectExecuteTaskDurability, readUnitRuntimeRecord, writeUnitRuntimeRecord, } from "./unit-runtime.js"; -import { resolveAutoSupervisorConfig, resolveModelWithFallbacksForUnit, loadEffectiveGSDPreferences, resolveSkillDiscoveryMode, resolveDynamicRoutingConfig, getIsolationMode } from "./preferences.js"; +import { resolveAutoSupervisorConfig, loadEffectiveGSDPreferences, resolveSkillDiscoveryMode, getIsolationMode } from "./preferences.js"; import { sendDesktopNotification } from "./notifications.js"; import type { GSDPreferences } from "./preferences.js"; -import { classifyUnitComplexity, tierLabel } from "./complexity-classifier.js"; -import { resolveModelForComplexity } from "./model-router.js"; +import { + type BudgetAlertLevel, + getBudgetAlertLevel, + getNewBudgetAlertLevel, + getBudgetEnforcementAction, +} from "./auto-budget.js"; +import { + markToolStart as _markToolStart, + markToolEnd as _markToolEnd, + getOldestInFlightToolAgeMs as _getOldestInFlightToolAgeMs, + getInFlightToolCount, + getOldestInFlightToolStart, + clearInFlightTools, +} from "./auto-tool-tracking.js"; +import { + collectObservabilityWarnings as _collectObservabilityWarnings, + buildObservabilityRepairBlock, +} from "./auto-observability.js"; +import { closeoutUnit } from "./auto-unit-closeout.js"; +import { recoverTimedOutUnit } from "./auto-timeout-recovery.js"; +import { selectAndApplyModel } from "./auto-model-selection.js"; +import { + syncProjectRootToWorktree, + syncStateToProjectRoot, + readResourceVersion, + checkResourcesStale, + escapeStaleWorktree, +} from "./auto-worktree-sync.js"; +// complexity-classifier + model-router imports moved to auto-model-selection.ts import { initRoutingHistory, resetRoutingHistory, recordOutcome } from "./routing-history.js"; import { checkPostUnitHooks, @@ -57,12 +83,7 @@ import { restoreHookState, clearPersistedHookState, } from "./post-unit-hooks.js"; -import { - validatePlanBoundary, - validateExecuteBoundary, - validateCompleteBoundary, - formatValidationIssues, -} from "./observability-validator.js"; +// observability-validator imports moved to auto-observability.ts import { ensureGitignore, untrackRuntimeFiles } from "./gitignore.js"; import { runGSDDoctor, rebuildState, summarizeDoctorIssues } from "./doctor.js"; import { @@ -76,13 +97,13 @@ import { import { snapshotSkills, clearSkillSnapshot } from "./skill-discovery.js"; import { captureAvailableSkills, getAndClearSkills, resetSkillTelemetry } from "./skill-telemetry.js"; import { - initMetrics, resetMetrics, snapshotUnitMetrics, getLedger, + initMetrics, resetMetrics, getLedger, getProjectTotals, formatCost, formatTokenCount, } from "./metrics.js"; +import { computeBudgets, resolveExecutorContextWindow } from "./context-budget.js"; import { join } from "node:path"; import { sep as pathSep } from "node:path"; -import { homedir } from "node:os"; -import { readdirSync, readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync, statSync, cpSync } from "node:fs"; +import { readdirSync, readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync, statSync } from "node:fs"; import { nativeIsRepo, nativeInit, nativeAddPaths, nativeCommit } from "./native-git-bridge.js"; import { autoCommitCurrentBranch, @@ -126,18 +147,7 @@ import { reconcileMergeState, } from "./auto-recovery.js"; import { resolveDispatch, resetRewriteCircuitBreaker } from "./auto-dispatch.js"; -import { - buildResearchSlicePrompt, - buildResearchMilestonePrompt, - buildPlanSlicePrompt, - buildPlanMilestonePrompt, - buildExecuteTaskPrompt, - buildCompleteSlicePrompt, - buildCompleteMilestonePrompt, - buildReassessRoadmapPrompt, - buildRunUatPrompt, - buildReplanSlicePrompt, -} from "./auto-prompts.js"; +// Prompt builders moved to auto-direct-dispatch.ts (only used there now) import { type AutoDashboardData, updateProgressWidget as _updateProgressWidget, @@ -145,7 +155,6 @@ import { clearSliceProgressCache, describeNextUnit as _describeNextUnit, unitVerb, - unitPhaseLabel, formatAutoElapsed as _formatAutoElapsed, formatWidgetTokens, hideFooter, @@ -159,108 +168,7 @@ import { import { isDbAvailable } from "./gsd-db.js"; import { hasPendingCaptures, loadPendingCaptures, countPendingCaptures } from "./captures.js"; -// ─── Worktree → Project Root State Sync ─────────────────────────────────────── -// When running in an auto-worktree, dispatch state (.gsd/ metadata) diverges -// between the worktree (where work happens) and the project root (where -// startAutoMode reads initial state on restart). Without syncing, restarting -// auto-mode reads stale state from the project root and re-dispatches -// already-completed units. - -/** - * Sync milestone artifacts from project root INTO worktree before deriveState. - * Covers the case where the LLM wrote artifacts to the main repo filesystem - * (e.g. via absolute paths) but the worktree has stale data. Also deletes - * gsd.db in the worktree so it rebuilds from fresh disk state (#853). - * Non-fatal — sync failure should never block dispatch. - */ -function syncProjectRootToWorktree(projectRoot: string, worktreePath: string, milestoneId: string | null): void { - if (!worktreePath || !projectRoot || worktreePath === projectRoot) return; - if (!milestoneId) return; - - const prGsd = join(projectRoot, ".gsd"); - const wtGsd = join(worktreePath, ".gsd"); - - // Copy milestone directory from project root to worktree if the project root - // has newer artifacts (e.g. slices that don't exist in the worktree yet) - try { - const srcMilestone = join(prGsd, "milestones", milestoneId); - const dstMilestone = join(wtGsd, "milestones", milestoneId); - if (existsSync(srcMilestone)) { - mkdirSync(dstMilestone, { recursive: true }); - cpSync(srcMilestone, dstMilestone, { recursive: true, force: false }); - } - } catch { /* non-fatal */ } - - // Delete worktree gsd.db so it rebuilds from the freshly synced files. - // Stale DB rows are the root cause of the infinite skip loop (#853). - try { - const wtDb = join(wtGsd, "gsd.db"); - if (existsSync(wtDb)) { - unlinkSync(wtDb); - } - } catch { /* non-fatal */ } -} - -/** - * Sync dispatch-critical .gsd/ state files from worktree to project root. - * Only runs when inside an auto-worktree (worktreePath differs from projectRoot). - * Copies: STATE.md + active milestone directory (roadmap, slice plans, task summaries). - * Non-fatal — sync failure should never block dispatch. - */ -function syncStateToProjectRoot(worktreePath: string, projectRoot: string, milestoneId: string | null): void { - if (!worktreePath || !projectRoot || worktreePath === projectRoot) return; - if (!milestoneId) return; - - const wtGsd = join(worktreePath, ".gsd"); - const prGsd = join(projectRoot, ".gsd"); - - // 1. STATE.md — the quick-glance status used by initial deriveState() - try { - const src = join(wtGsd, "STATE.md"); - const dst = join(prGsd, "STATE.md"); - if (existsSync(src)) cpSync(src, dst, { force: true }); - } catch { /* non-fatal */ } - - // 2. Milestone directory — ROADMAP, slice PLANs, task summaries - // Copy the entire milestone .gsd subtree so deriveState reads current checkboxes - try { - const srcMilestone = join(wtGsd, "milestones", milestoneId); - const dstMilestone = join(prGsd, "milestones", milestoneId); - if (existsSync(srcMilestone)) { - mkdirSync(dstMilestone, { recursive: true }); - cpSync(srcMilestone, dstMilestone, { recursive: true, force: true }); - } - } catch { /* non-fatal */ } - - // 3. Merge completed-units.json (set-union of both locations) - // Prevents already-completed units from being re-dispatched after crash/restart. - const srcKeysFile = join(wtGsd, "completed-units.json"); - const dstKeysFile = join(prGsd, "completed-units.json"); - if (existsSync(srcKeysFile)) { - try { - const srcKeys: string[] = JSON.parse(readFileSync(srcKeysFile, "utf8")); - let dstKeys: string[] = []; - if (existsSync(dstKeysFile)) { - try { dstKeys = JSON.parse(readFileSync(dstKeysFile, "utf8")); } catch { /* ignore corrupt dst */ } - } - const merged = [...new Set([...dstKeys, ...srcKeys])]; - writeFileSync(dstKeysFile, JSON.stringify(merged, null, 2)); - } catch { /* non-fatal */ } - } - - // 4. Runtime records — unit dispatch state used by selfHealRuntimeRecords(). - // Without this, a crash during a unit leaves the runtime record only in the - // worktree. If the next session resolves basePath before worktree re-entry, - // selfHeal can't find or clear the stale record (#769). - try { - const srcRuntime = join(wtGsd, "runtime", "units"); - const dstRuntime = join(prGsd, "runtime", "units"); - if (existsSync(srcRuntime)) { - mkdirSync(dstRuntime, { recursive: true }); - cpSync(srcRuntime, dstRuntime, { recursive: true, force: true }); - } - } catch { /* non-fatal */ } -} +// Worktree sync, resource staleness, stale worktree escape → auto-worktree-sync.ts // ─── State ──────────────────────────────────────────────────────────────────── @@ -296,43 +204,8 @@ const MAX_CONSECUTIVE_SKIPS = 3; /** Persisted completed-unit keys — survives restarts. Loaded from .gsd/completed-units.json. */ const completedKeySet = new Set(); -/** Resource version captured at auto-mode start. If the managed-resources - * manifest version changes mid-session (e.g. npm update -g gsd-pi), - * templates on disk may expect variables the in-memory code doesn't provide. - * Detect this and stop gracefully instead of crashing. - * Uses gsdVersion (semver) instead of syncedAt (timestamp) so that - * launching a second session doesn't falsely trigger staleness (#804). */ let resourceVersionOnStart: string | null = null; -function readResourceVersion(): string | null { - const agentDir = process.env.GSD_CODING_AGENT_DIR || join(homedir(), ".gsd", "agent"); - const manifestPath = join(agentDir, "managed-resources.json"); - try { - const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")); - return typeof manifest?.gsdVersion === "string" ? manifest.gsdVersion : null; - } catch { - return null; - } -} - -function checkResourcesStale(): string | null { - if (resourceVersionOnStart === null) return null; - const current = readResourceVersion(); - if (current === null) return null; - if (current !== resourceVersionOnStart) { - return "GSD resources were updated since this session started. Restart gsd to load the new code."; - } - return null; -} - -/** - * Resolve whether auto-mode should use worktree isolation. - * Returns true for worktree mode (default), false for branch and none modes. - * Branch mode works directly in the project root — useful for repos - * with git submodules where worktrees don't work well (#531). - * None mode skips all worktree and milestone-branch logic — commits - * land on the current branch with no isolation (#M001-S02). - */ export function shouldUseWorktreeIsolation(): boolean { const prefs = loadEffectiveGSDPreferences()?.preferences?.git; if (prefs?.isolation === "none") return false; @@ -340,36 +213,14 @@ export function shouldUseWorktreeIsolation(): boolean { return true; // default: worktree } -/** - * Detect and escape a stale worktree cwd (#608). - * - * After milestone completion + merge, the worktree directory is removed but - * the process cwd may still point inside `.gsd/worktrees//`. - * When a new session starts, `process.cwd()` is passed as `base` to startAuto - * and all subsequent writes land in the wrong directory. This function detects - * that scenario and chdir back to the project root. - * - * Returns the corrected base path. - */ -function escapeStaleWorktree(base: string): string { - const marker = `${pathSep}.gsd${pathSep}worktrees${pathSep}`; - const idx = base.indexOf(marker); - if (idx === -1) return base; - - // base is inside .gsd/worktrees/ — extract the project root - const projectRoot = base.slice(0, idx); - try { - process.chdir(projectRoot); - } catch { - // If chdir fails, return the original — caller will handle errors downstream - return base; - } - return projectRoot; -} - /** Crash recovery prompt — set by startAuto, consumed by first dispatchNextUnit */ let pendingCrashRecovery: string | null = null; +/** Pending verification retry — set when gate fails with retries remaining, consumed by dispatchNextUnit */ +let pendingVerificationRetry: { unitId: string; failureContext: string; attempt: number } | null = null; +/** Verification retry count per unitId — separate from unitDispatchCount which tracks artifact-missing retries */ +const verificationRetryCount = new Map(); + /** Session file path captured at pause — used to synthesize recovery briefing on resume */ let pausedSessionFile: string | null = null; @@ -405,6 +256,8 @@ let originalModelProvider: string | null = null; let unitTimeoutHandle: ReturnType | null = null; let wrapupWarningHandle: ReturnType | null = null; let idleWatchdogHandle: ReturnType | null = null; +/** Context-pressure continue-here monitor — fires once when context usage >= 70% */ +let continueHereHandle: ReturnType | null = null; /** Dispatch gap watchdog — detects when the state machine stalls between units. * After handleAgentEnd completes, if auto-mode is still active but no new unit @@ -426,33 +279,8 @@ let _sigtermHandler: (() => void) | null = null; * Maps toolCallId → start timestamp (ms) so the idle watchdog can detect tools that have been * running suspiciously long (e.g., a Bash command hung because `&` kept stdout open). */ -const inFlightTools = new Map(); - -type BudgetAlertLevel = 0 | 75 | 80 | 90 | 100; - -export function getBudgetAlertLevel(budgetPct: number): BudgetAlertLevel { - if (budgetPct >= 1.0) return 100; - if (budgetPct >= 0.90) return 90; - if (budgetPct >= 0.80) return 80; - if (budgetPct >= 0.75) return 75; - return 0; -} - -export function getNewBudgetAlertLevel(previousLevel: BudgetAlertLevel, budgetPct: number): BudgetAlertLevel | null { - const currentLevel = getBudgetAlertLevel(budgetPct); - if (currentLevel === 0 || currentLevel <= previousLevel) return null; - return currentLevel; -} - -export function getBudgetEnforcementAction( - enforcement: BudgetEnforcementMode, - budgetPct: number, -): "none" | "warn" | "pause" | "halt" { - if (budgetPct < 1.0) return "none"; - if (enforcement === "halt") return "halt"; - if (enforcement === "pause") return "pause"; - return "warn"; -} +// Re-export budget utilities for external consumers +export { getBudgetAlertLevel, getNewBudgetAlertLevel, getBudgetEnforcementAction } from "./auto-budget.js"; /** Wrapper: register SIGTERM handler and store reference. */ function registerSigtermHandler(currentBasePath: string): void { @@ -504,30 +332,17 @@ export function isAutoPaused(): boolean { return paused; } -/** - * Mark a tool execution as in-flight. Called from index.ts on tool_execution_start. - * Records start time so the idle watchdog can detect tools hung longer than the idle timeout. - */ +// Tool tracking — delegates to auto-tool-tracking.ts export function markToolStart(toolCallId: string): void { - if (!active) return; - inFlightTools.set(toolCallId, Date.now()); + _markToolStart(toolCallId, active); } -/** - * Mark a tool execution as completed. Called from index.ts on tool_execution_end. - */ export function markToolEnd(toolCallId: string): void { - inFlightTools.delete(toolCallId); + _markToolEnd(toolCallId); } -/** - * Returns the age (ms) of the oldest currently in-flight tool, or 0 if none. - * Exported for testing. - */ export function getOldestInFlightToolAgeMs(): number { - if (inFlightTools.size === 0) return 0; - const oldestStart = Math.min(...inFlightTools.values()); - return Date.now() - oldestStart; + return _getOldestInFlightToolAgeMs(); } /** @@ -582,7 +397,11 @@ function clearUnitTimeout(): void { clearInterval(idleWatchdogHandle); idleWatchdogHandle = null; } - inFlightTools.clear(); + if (continueHereHandle) { + clearInterval(continueHereHandle); + continueHereHandle = null; + } + clearInFlightTools(); clearDispatchGapWatchdog(); } @@ -593,6 +412,17 @@ function clearDispatchGapWatchdog(): void { } } +/** Build snapshot metric opts, enriching with continueHereFired from the runtime record. */ +function buildSnapshotOpts(unitType: string, unitId: string): { continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number } & Record { + const runtime = currentUnit ? readUnitRuntimeRecord(basePath, unitType, unitId) : null; + return { + promptCharCount: lastPromptCharCount, + baselineCharCount: lastBaselineCharCount, + ...(currentUnitRouting ?? {}), + ...(runtime?.continueHereFired ? { continueHereFired: true } : {}), + }; +} + /** * Start a watchdog that fires if no new unit is dispatched within DISPATCH_GAP_TIMEOUT_MS * after handleAgentEnd completes. This catches the case where the dispatch chain silently @@ -717,7 +547,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI, reason unitDispatchCount.clear(); unitRecoveryCount.clear(); unitConsecutiveSkips.clear(); - inFlightTools.clear(); + clearInFlightTools(); lastBudgetAlertLevel = 0; unitLifetimeDispatches.clear(); currentUnit = null; @@ -730,6 +560,8 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI, reason clearActivityLogState(); resetProactiveHealing(); pendingCrashRecovery = null; + pendingVerificationRetry = null; + verificationRetryCount.clear(); pausedSessionFile = null; _handlingAgentEnd = false; ctx?.ui.setStatus("gsd-auto", undefined); @@ -767,6 +599,8 @@ export async function pauseAuto(ctx?: ExtensionContext, _pi?: ExtensionAPI): Pro active = false; paused = true; + pendingVerificationRetry = null; + verificationRetryCount.clear(); // Preserve: unitDispatchCount, currentUnit, basePath, verbose, cmdCtx, // completedUnits, autoStartTime, currentMilestoneId, originalModelId // — all needed for resume and dashboard display @@ -984,6 +818,26 @@ export async function startAuto( // after a discussion that wrote new artifacts) may cause deriveState to // return pre-planning when the roadmap already exists (#800). invalidateAllCaches(); + + // ── Clean stale runtime unit files for completed milestones (#887) ─────── + // After resource-update restart, stale runtime/units/*.json files from + // previously completed milestones can cause deriveState to resume the wrong + // milestone. If a milestone has a SUMMARY file, its unit files are stale. + try { + const runtimeUnitsDir = join(gsdRoot(base), "runtime", "units"); + if (existsSync(runtimeUnitsDir)) { + for (const file of readdirSync(runtimeUnitsDir)) { + if (!file.endsWith(".json")) continue; + const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/); + if (!midMatch) continue; + const mid = midMatch[1]; + if (resolveMilestoneFile(base, mid, "SUMMARY")) { + try { unlinkSync(join(runtimeUnitsDir, file)); } catch { /* non-fatal */ } + } + } + } + } catch { /* non-fatal — don't block startup */ } + let state = await deriveState(base); // ── Stale worktree state recovery (#654) ───────────────────────────────── @@ -1574,6 +1428,145 @@ export async function handleAgentEnd( } } + // ── Verification gate: run typecheck/lint/test after execute-task ── + if (currentUnit && currentUnit.type === "execute-task") { + try { + const effectivePrefs = loadEffectiveGSDPreferences(); + const prefs = effectivePrefs?.preferences; + + // Read task plan verify field from the current task's slice plan + // unitId format is "M001/S01/T03" — extract mid, sid, tid + const parts = currentUnit.id.split("/"); + let taskPlanVerify: string | undefined; + if (parts.length >= 3) { + const [mid, sid, tid] = parts; + const planFile = resolveSliceFile(basePath, mid, sid, "PLAN"); + if (planFile) { + const planContent = await loadFile(planFile); + if (planContent) { + const slicePlan = parsePlan(planContent); + const taskEntry = slicePlan?.tasks?.find(t => t.id === tid); + taskPlanVerify = taskEntry?.verify; + } + } + } + + const result = runVerificationGate({ + basePath, + unitId: currentUnit.id, + cwd: basePath, + preferenceCommands: prefs?.verification_commands, + taskPlanVerify, + }); + + // Capture runtime errors from bg-shell and browser console + const runtimeErrors = await captureRuntimeErrors(); + if (runtimeErrors.length > 0) { + result.runtimeErrors = runtimeErrors; + // Blocking runtime errors override gate pass + if (runtimeErrors.some(e => e.blocking)) { + result.passed = false; + } + } + + // Conditional dependency audit (R008) + const auditWarnings = runDependencyAudit(basePath); + if (auditWarnings.length > 0) { + result.auditWarnings = auditWarnings; + process.stderr.write(`verification-gate: ${auditWarnings.length} audit warning(s)\n`); + for (const w of auditWarnings) { + process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`); + } + } + + // Auto-fix retry preferences (R005 / D005) + const autoFixEnabled = prefs?.verification_auto_fix !== false; // default true + const maxRetries = typeof prefs?.verification_max_retries === "number" ? prefs.verification_max_retries : 2; + const completionKey = `${currentUnit.type}/${currentUnit.id}`; + + if (result.checks.length > 0) { + const passCount = result.checks.filter(c => c.exitCode === 0).length; + const total = result.checks.length; + if (result.passed) { + ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`); + } else { + const failures = result.checks.filter(c => c.exitCode !== 0); + const failNames = failures.map(f => f.command).join(", "); + ctx.ui.notify(`Verification gate: FAILED — ${failNames}`); + process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`); + for (const f of failures) { + process.stderr.write(` ${f.command} exited ${f.exitCode}\n`); + if (f.stderr) process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`); + } + } + } + + // Log blocking runtime errors to stderr + if (result.runtimeErrors?.some(e => e.blocking)) { + const blockingErrors = result.runtimeErrors.filter(e => e.blocking); + process.stderr.write(`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`); + for (const err of blockingErrors) { + process.stderr.write(` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`); + } + } + + // Write verification evidence JSON artifact + const attempt = verificationRetryCount.get(currentUnit.id) ?? 0; + if (parts.length >= 3) { + try { + const [mid, sid, tid] = parts; + const sDir = resolveSlicePath(basePath, mid, sid); + if (sDir) { + const tasksDir = join(sDir, "tasks"); + if (result.passed) { + writeVerificationJSON(result, tasksDir, tid, currentUnit.id); + } else { + const nextAttempt = attempt + 1; + writeVerificationJSON(result, tasksDir, tid, currentUnit.id, nextAttempt, maxRetries); + } + } + } catch (evidenceErr) { + process.stderr.write(`verification-evidence: write error — ${(evidenceErr as Error).message}\n`); + } + } + + // ── Auto-fix retry logic ── + if (result.passed) { + // Gate passed — clear retry state and continue normal flow + verificationRetryCount.delete(currentUnit.id); + pendingVerificationRetry = null; + } else if (autoFixEnabled && attempt + 1 <= maxRetries) { + // Gate failed, retries remaining — set up retry and return early + const nextAttempt = attempt + 1; + verificationRetryCount.set(currentUnit.id, nextAttempt); + pendingVerificationRetry = { + unitId: currentUnit.id, + failureContext: formatFailureContext(result), + attempt: nextAttempt, + }; + ctx.ui.notify(`Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`, "warning"); + // Remove completion key so dispatchNextUnit re-dispatches this unit + completedKeySet.delete(completionKey); + removePersistedKey(basePath, completionKey); + return; // ← Critical: exit before DB dual-write and post-unit hooks + } else { + // Gate failed, retries exhausted (or auto-fix disabled) — pause for human review + const exhaustedAttempt = attempt + 1; + verificationRetryCount.delete(currentUnit.id); + pendingVerificationRetry = null; + ctx.ui.notify( + `Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`, + "error", + ); + await pauseAuto(ctx, pi); + return; + } + } catch (err) { + // Gate errors are non-fatal — log and continue + process.stderr.write(`verification-gate: error — ${(err as Error).message}\n`); + } + } + // ── DB dual-write: re-import changed markdown files so next unit's prompts use fresh data ── if (isDbAvailable()) { try { @@ -1591,18 +1584,7 @@ export async function handleAgentEnd( // Dispatch the hook unit instead of normal flow const hookStartedAt = Date.now(); if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); - const hookActivityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); - if (hookActivityFile) { - try { - const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import('./memory-extractor.js'); - const llmCallFn = buildMemoryLLMCall(ctx); - if (llmCallFn) { - extractMemoriesFromUnit(hookActivityFile, currentUnit.type, currentUnit.id, llmCallFn).catch(() => {}); - } - } catch { /* non-fatal */ } - } + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } currentUnit = { type: hookUnit.unitType, id: hookUnit.unitId, startedAt: hookStartedAt }; writeUnitRuntimeRecord(basePath, hookUnit.unitType, hookUnit.unitId, hookStartedAt, { @@ -1742,18 +1724,7 @@ export async function handleAgentEnd( // Close out previous unit metrics if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId); - const triageActivityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); - if (triageActivityFile) { - try { - const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import('./memory-extractor.js'); - const llmCallFn = buildMemoryLLMCall(ctx); - if (llmCallFn) { - extractMemoriesFromUnit(triageActivityFile, currentUnit.type, currentUnit.id, llmCallFn).catch(() => {}); - } - } catch { /* non-fatal */ } - } + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt); } // Dispatch triage as a new unit (early-dispatch-and-return) @@ -1829,18 +1800,7 @@ export async function handleAgentEnd( // Close out previous unit metrics if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId); - const qtActivityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); - if (qtActivityFile) { - try { - const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import('./memory-extractor.js'); - const llmCallFn = buildMemoryLLMCall(ctx); - if (llmCallFn) { - extractMemoriesFromUnit(qtActivityFile, currentUnit.type, currentUnit.id, llmCallFn).catch(() => {}); - } - } catch { /* non-fatal */ } - } + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt); } // Dispatch quick-task as a new unit @@ -2085,7 +2045,7 @@ async function dispatchNextUnit( // once at startup. If resources were re-synced (e.g. /gsd:update, npm update, // or dev copy-resources), templates may expect variables the in-memory code // doesn't provide. Stop gracefully instead of crashing. - const staleMsg = checkResourcesStale(); + const staleMsg = checkResourcesStale(resourceVersionOnStart); if (staleMsg) { await stopAuto(ctx, pi, staleMsg); return; @@ -2146,6 +2106,55 @@ async function dispatchNextUnit( if (vizPrefs?.auto_visualize) { ctx.ui.notify("Run /gsd visualize to see progress overview.", "info"); } + // Auto-generate HTML report snapshot on milestone completion (default: on, disable with auto_report: false) + if (vizPrefs?.auto_report !== false) { + try { + const { loadVisualizerData } = await import("./visualizer-data.js"); + const { generateHtmlReport } = await import("./export-html.js"); + const { writeReportSnapshot, reportsDir } = await import("./reports.js"); + const { basename } = await import("node:path"); + const snapData = await loadVisualizerData(basePath); + const completedMs = snapData.milestones.find(m => m.id === currentMilestoneId); + const msTitle = completedMs?.title ?? currentMilestoneId; + const gsdVersion = process.env.GSD_VERSION ?? "0.0.0"; + const projName = basename(basePath); + const doneSlices = snapData.milestones.reduce((s, m) => s + m.slices.filter(sl => sl.done).length, 0); + const totalSlices = snapData.milestones.reduce((s, m) => s + m.slices.length, 0); + const outPath = writeReportSnapshot({ + basePath, + html: generateHtmlReport(snapData, { + projectName: projName, + projectPath: basePath, + gsdVersion, + milestoneId: currentMilestoneId, + indexRelPath: "index.html", + }), + milestoneId: currentMilestoneId, + milestoneTitle: msTitle, + kind: "milestone", + projectName: projName, + projectPath: basePath, + gsdVersion, + totalCost: snapData.totals?.cost ?? 0, + totalTokens: snapData.totals?.tokens.total ?? 0, + totalDuration: snapData.totals?.duration ?? 0, + doneSlices, + totalSlices, + doneMilestones: snapData.milestones.filter(m => m.status === "complete").length, + totalMilestones: snapData.milestones.length, + phase: snapData.phase, + }); + ctx.ui.notify( + `Report saved: .gsd/reports/${basename(outPath)} — open index.html to browse progression.`, + "info", + ); + } catch (err) { + ctx.ui.notify( + `Report generation failed: ${err instanceof Error ? err.message : String(err)}`, + "warning", + ); + } + } // Reset stuck detection for new milestone unitDispatchCount.clear(); unitRecoveryCount.clear(); @@ -2238,9 +2247,7 @@ async function dispatchNextUnit( if (!mid) { // Save final session before stopping if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); - saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } const incomplete = state.registry.filter(m => m.status !== "complete"); @@ -2283,9 +2290,7 @@ async function dispatchNextUnit( // After merge guard removal (branchless architecture), mid/midTitle could be undefined if (!mid || !midTitle) { if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); - saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } const noMilestoneReason = !mid ? "No active milestone after merge reconciliation" @@ -2301,9 +2306,7 @@ async function dispatchNextUnit( if (state.phase === "complete") { if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); - saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } // Clear completed-units.json for the finished milestone so it doesn't grow unbounded. try { @@ -2371,9 +2374,7 @@ async function dispatchNextUnit( if (state.phase === "blocked") { if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); - saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } const blockerMsg = `Blocked: ${state.blockers.join(", ")}`; await stopAuto(ctx, pi, blockerMsg); @@ -2482,9 +2483,7 @@ async function dispatchNextUnit( if (dispatchResult.action === "stop") { if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); - saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); } await stopAuto(ctx, pi, dispatchResult.reason); return; @@ -2530,7 +2529,7 @@ async function dispatchNextUnit( return; } - const observabilityIssues = await collectObservabilityWarnings(ctx, unitType, unitId); + const observabilityIssues = await _collectObservabilityWarnings(ctx, basePath, unitType, unitId); // Idempotency: skip units already completed in a prior session. const idempotencyKey = `${unitType}/${unitId}`; @@ -2704,10 +2703,10 @@ async function dispatchNextUnit( unitLifetimeDispatches.set(dispatchKey, lifetimeCount); if (lifetimeCount > MAX_LIFETIME_DISPATCHES) { if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); + } else { + saveActivityLog(ctx, basePath, unitType, unitId); } - saveActivityLog(ctx, basePath, unitType, unitId); const expected = diagnoseExpectedArtifact(unitType, unitId, basePath); await stopAuto(ctx, pi, `Hard loop: ${unitType} ${unitId}`); ctx.ui.notify( @@ -2718,10 +2717,10 @@ async function dispatchNextUnit( } if (prevCount >= MAX_UNIT_DISPATCHES) { if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); + } else { + saveActivityLog(ctx, basePath, unitType, unitId); } - saveActivityLog(ctx, basePath, unitType, unitId); // Final reconciliation pass for execute-task: write any missing durable // artifacts (summary placeholder + [x] checkbox) so the pipeline can @@ -2876,20 +2875,7 @@ async function dispatchNextUnit( // Snapshot metrics + activity log for the PREVIOUS unit before we reassign. // The session still holds the previous unit's data (newSession hasn't fired yet). if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); - const activityFile = saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id); - - // Fire-and-forget memory extraction from completed unit - if (activityFile) { - try { - const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import('./memory-extractor.js'); - const llmCallFn = buildMemoryLLMCall(ctx); - if (llmCallFn) { - extractMemoriesFromUnit(activityFile, currentUnit.type, currentUnit.id, llmCallFn).catch(() => {}); - } - } catch { /* non-fatal */ } - } + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); // Record routing outcome for adaptive learning if (currentUnitRouting) { @@ -2975,6 +2961,17 @@ async function dispatchNextUnit( // Cap injected content to prevent unbounded prompt growth → OOM const MAX_RECOVERY_CHARS = 50_000; let finalPrompt = prompt; + + // Verification retry — inject failure context so the agent can auto-fix + if (pendingVerificationRetry) { + const retryCtx = pendingVerificationRetry; + pendingVerificationRetry = null; + const capped = retryCtx.failureContext.length > MAX_RECOVERY_CHARS + ? retryCtx.failureContext.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...failure context truncated]" + : retryCtx.failureContext; + finalPrompt = `**VERIFICATION FAILED — AUTO-FIX ATTEMPT ${retryCtx.attempt}**\n\nThe verification gate ran after your previous attempt and found failures. Fix these issues before completing the task.\n\n${capped}\n\n---\n\n${finalPrompt}`; + } + if (pendingCrashRecovery) { const capped = pendingCrashRecovery.length > MAX_RECOVERY_CHARS ? pendingCrashRecovery.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...recovery briefing truncated to prevent memory exhaustion]" @@ -3018,158 +3015,9 @@ async function dispatchNextUnit( } } - // Switch model if preferences specify one for this unit type - // Try primary model, then fallbacks in order if setting fails - const modelConfig = resolveModelWithFallbacksForUnit(unitType); - if (modelConfig) { - const availableModels = ctx.modelRegistry.getAvailable(); - - // ─── Dynamic Model Routing ───────────────────────────────────────── - // If enabled, classify unit complexity and potentially downgrade to a - // cheaper model. The user's configured model is the ceiling. - const routingConfig = resolveDynamicRoutingConfig(); - let effectiveModelConfig = modelConfig; - let routingTierLabel = ""; - currentUnitRouting = null; - - if (routingConfig.enabled) { - // Compute budget pressure if budget ceiling is set - let budgetPct: number | undefined; - if (routingConfig.budget_pressure !== false) { - const budgetCeiling = prefs?.budget_ceiling; - if (budgetCeiling !== undefined && budgetCeiling > 0) { - const currentLedger = getLedger(); - const totalCost = currentLedger ? getProjectTotals(currentLedger.units).cost : 0; - budgetPct = totalCost / budgetCeiling; - } - } - - // Classify complexity (hook routing controlled by config.hooks) - const isHook = unitType.startsWith("hook/"); - const shouldClassify = !isHook || routingConfig.hooks !== false; - - if (shouldClassify) { - const classification = classifyUnitComplexity(unitType, unitId, basePath, budgetPct); - const availableModelIds = availableModels.map(m => m.id); - const routing = resolveModelForComplexity(classification, modelConfig, routingConfig, availableModelIds); - - if (routing.wasDowngraded) { - effectiveModelConfig = { - primary: routing.modelId, - fallbacks: routing.fallbacks, - }; - if (verbose) { - ctx.ui.notify( - `Dynamic routing [${tierLabel(classification.tier)}]: ${routing.modelId} (${classification.reason})`, - "info", - ); - } - } - routingTierLabel = ` [${tierLabel(classification.tier)}]`; - currentUnitRouting = { tier: classification.tier, modelDowngraded: routing.wasDowngraded }; - } - } - - const modelsToTry = [effectiveModelConfig.primary, ...effectiveModelConfig.fallbacks]; - let modelSet = false; - - for (const modelId of modelsToTry) { - // Resolve model from available models. - // Handles multiple formats: - // "provider/model" → explicit provider targeting (e.g. "anthropic/claude-opus-4-6") - // "bare-id" → match by ID across providers - // "org/model-name" → OpenRouter-style IDs where the full string is the model ID - // "openrouter/org/model" → explicit provider + OpenRouter model ID - const slashIdx = modelId.indexOf("/"); - let model; - if (slashIdx !== -1) { - const maybeProvider = modelId.substring(0, slashIdx); - const id = modelId.substring(slashIdx + 1); - - // Check if the prefix before the first slash is a known provider - const knownProviders = new Set(availableModels.map(m => m.provider.toLowerCase())); - if (knownProviders.has(maybeProvider.toLowerCase())) { - // Explicit "provider/model" format (handles "openrouter/org/model" too) - model = availableModels.find( - m => m.provider.toLowerCase() === maybeProvider.toLowerCase() - && m.id.toLowerCase() === id.toLowerCase(), - ); - } - - // If the prefix wasn't a known provider, or no match was found within that provider, - // try matching the full string as a model ID (OpenRouter-style IDs like "org/model-name") - if (!model) { - const lower = modelId.toLowerCase(); - model = availableModels.find( - m => m.id.toLowerCase() === lower - || `${m.provider}/${m.id}`.toLowerCase() === lower, - ); - } - } else { - // For bare IDs, prefer the current session's provider, then first available match - const currentProvider = ctx.model?.provider; - const exactProviderMatch = availableModels.find( - m => m.id === modelId && m.provider === currentProvider, - ); - const anyMatch = availableModels.find(m => m.id === modelId); - model = exactProviderMatch ?? anyMatch; - - // Warn if the ID is ambiguous across providers - if (anyMatch && !exactProviderMatch) { - const providers = availableModels - .filter(m => m.id === modelId) - .map(m => m.provider); - if (providers.length > 1) { - ctx.ui.notify( - `Model ID "${modelId}" exists in multiple providers (${providers.join(", ")}). ` + - `Resolved to ${anyMatch.provider}. Use "provider/model" format for explicit targeting.`, - "warning", - ); - } - } - } - if (!model) { - if (verbose) ctx.ui.notify(`Model ${modelId} not found, trying fallback.`, "info"); - continue; - } - - const ok = await pi.setModel(model, { persist: false }); - if (ok) { - const fallbackNote = modelId === effectiveModelConfig.primary - ? "" - : ` (fallback from ${effectiveModelConfig.primary})`; - const phase = unitPhaseLabel(unitType); - ctx.ui.notify(`Model [${phase}]${routingTierLabel}: ${model.provider}/${model.id}${fallbackNote}`, "info"); - modelSet = true; - break; - } else { - const nextModel = modelsToTry[modelsToTry.indexOf(modelId) + 1]; - if (nextModel) { - if (verbose) ctx.ui.notify(`Failed to set model ${modelId}, trying ${nextModel}...`, "info"); - } else { - ctx.ui.notify(`All preferred models unavailable for ${unitType}. Using default.`, "warning"); - } - } - } - - // modelSet=false is already handled by the "all fallbacks exhausted" warning above - } else if (autoModeStartModel) { - // No model preference for this unit type — re-apply the model captured - // at auto-mode start to prevent bleed from the shared global settings.json - // when multiple GSD instances run concurrently (#650). - const availableModels = ctx.modelRegistry.getAvailable(); - const startModel = availableModels.find( - m => m.provider === autoModeStartModel!.provider && m.id === autoModeStartModel!.id, - ); - if (startModel) { - const ok = await pi.setModel(startModel, { persist: false }); - if (!ok) { - // Fallback: try matching just by ID across providers - const byId = availableModels.find(m => m.id === autoModeStartModel!.id); - if (byId) await pi.setModel(byId, { persist: false }); - } - } - } + // Select and apply model for this unit (dynamic routing, fallback chains, etc.) + const modelResult = await selectAndApplyModel(ctx, pi, unitType, unitId, basePath, prefs, verbose, autoModeStartModel); + currentUnitRouting = modelResult.routing; // Start progress-aware supervision: a soft warning, an idle watchdog, and // a larger hard ceiling. Productive long-running tasks may continue past the @@ -3216,8 +3064,8 @@ async function dispatchNextUnit( // if the tool started recently. A tool in-flight for longer than the idle // timeout is likely stuck — e.g., `python -m http.server 8080 &` keeps the // shell's stdout/stderr open, causing the Bash tool to hang indefinitely. - if (inFlightTools.size > 0) { - const oldestStart = Math.min(...inFlightTools.values()); + if (getInFlightToolCount() > 0) { + const oldestStart = getOldestInFlightToolStart()!; const toolAgeMs = Date.now() - oldestStart; if (toolAgeMs < idleTimeoutMs) { writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { @@ -3247,12 +3095,12 @@ async function dispatchNextUnit( } if (currentUnit) { - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); + } else { + saveActivityLog(ctx, basePath, unitType, unitId); } - saveActivityLog(ctx, basePath, unitType, unitId); - const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "idle"); + const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "idle", buildRecoveryContext()); if (recovery === "recovered") return; writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { @@ -3273,12 +3121,12 @@ async function dispatchNextUnit( phase: "timeout", timeoutAt: Date.now(), }); - const modelId = ctx.model?.id ?? "unknown"; - snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) }); + await closeoutUnit(ctx, basePath, currentUnit.type, currentUnit.id, currentUnit.startedAt, buildSnapshotOpts(currentUnit.type, currentUnit.id)); + } else { + saveActivityLog(ctx, basePath, unitType, unitId); } - saveActivityLog(ctx, basePath, unitType, unitId); - const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "hard"); + const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "hard", buildRecoveryContext()); if (recovery === "recovered") return; ctx.ui.notify( @@ -3288,6 +3136,67 @@ async function dispatchNextUnit( await pauseAuto(ctx, pi); }, hardTimeoutMs); + // ── Continue-here context-pressure monitor ──────────────────────────── + // Polls context usage every 15s. When usage hits the continue-here + // threshold (70%), sends a one-shot wrap-up signal so the agent finishes + // gracefully and the next unit gets a fresh session. This is softer than + // context_pause_threshold which hard-pauses auto-mode entirely. + if (continueHereHandle) { + clearInterval(continueHereHandle); + continueHereHandle = null; + } + const executorContextWindow = resolveExecutorContextWindow( + ctx.modelRegistry as Parameters[0], + prefs as Parameters[1], + ctx.model?.contextWindow, + ); + const continueHereThreshold = computeBudgets(executorContextWindow).continueThresholdPercent; + continueHereHandle = setInterval(() => { + if (!active || !currentUnit || !cmdCtx) return; + // One-shot guard: skip if already fired for this unit + const runtime = readUnitRuntimeRecord(basePath, unitType, unitId); + if (runtime?.continueHereFired) return; + + const contextUsage = cmdCtx.getContextUsage(); + if (!contextUsage || contextUsage.percent == null || contextUsage.percent < continueHereThreshold) return; + + // Fire once — mark runtime record and send wrap-up message + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit!.startedAt, { + continueHereFired: true, + }); + + if (verbose) { + ctx.ui.notify( + `Context at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%) — sending wrap-up signal.`, + "info", + ); + } + + pi.sendMessage( + { + customType: "gsd-auto-wrapup", + display: verbose, + content: [ + "**CONTEXT BUDGET WARNING — wrap up this unit now.**", + `Context window is at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%).`, + "The next unit needs a fresh context to work effectively. Wrap up now:", + "1. Finish any in-progress file writes", + "2. Write or update the required durable artifacts (summary, checkboxes)", + "3. Mark task state on disk correctly", + "4. Leave precise resume notes if anything remains unfinished", + "Do NOT start new sub-tasks or investigations.", + ].join("\n"), + }, + { triggerTurn: true }, + ); + + // Clear the interval after firing — no need to keep polling + if (continueHereHandle) { + clearInterval(continueHereHandle); + continueHereHandle = null; + } + }, 15_000); + // Inject prompt — verify auto-mode still active (guards against race with timeout/pause) if (!active) return; pi.sendMessage( @@ -3356,296 +3265,19 @@ function ensurePreconditions( // ─── Diagnostics ────────────────────────────────────────────────────────────── -async function collectObservabilityWarnings( - ctx: ExtensionContext, - unitType: string, - unitId: string, -): Promise { - // Hook units have custom artifacts — skip standard observability checks - if (unitType.startsWith("hook/")) return []; +// collectObservabilityWarnings + buildObservabilityRepairBlock → auto-observability.ts - const parts = unitId.split("/"); - const mid = parts[0]; - const sid = parts[1]; - const tid = parts[2]; +// recoverTimedOutUnit → auto-timeout-recovery.ts - if (!mid || !sid) return []; - - let issues = [] as Awaited>; - - if (unitType === "plan-slice") { - issues = await validatePlanBoundary(basePath, mid, sid); - } else if (unitType === "execute-task" && tid) { - issues = await validateExecuteBoundary(basePath, mid, sid, tid); - } else if (unitType === "complete-slice") { - issues = await validateCompleteBoundary(basePath, mid, sid); - } - - if (issues.length > 0) { - ctx.ui.notify( - `Observability check (${unitType}) found ${issues.length} warning${issues.length === 1 ? "" : "s"}:\n${formatValidationIssues(issues)}`, - "warning", - ); - } - - return issues; -} - -function buildObservabilityRepairBlock(issues: import("./observability-validator.ts").ValidationIssue[]): string { - if (issues.length === 0) return ""; - const items = issues.map(issue => { - const fileName = issue.file.split("/").pop() || issue.file; - let line = `- **${fileName}**: ${issue.message}`; - if (issue.suggestion) line += ` → ${issue.suggestion}`; - return line; - }); - return [ - "", - "---", - "", - "## Pre-flight: Observability gaps to fix FIRST", - "", - "The following issues were detected in plan/summary files for this unit.", - "**Read each flagged file, apply the fix described, then proceed with the unit.**", - "", - ...items, - "", - "---", - "", - ].join("\n"); -} - -async function recoverTimedOutUnit( - ctx: ExtensionContext, - pi: ExtensionAPI, - unitType: string, - unitId: string, - reason: "idle" | "hard", -): Promise<"recovered" | "paused"> { - if (!currentUnit) return "paused"; - - const runtime = readUnitRuntimeRecord(basePath, unitType, unitId); - const recoveryAttempts = runtime?.recoveryAttempts ?? 0; - const maxRecoveryAttempts = reason === "idle" ? 2 : 1; - - const recoveryKey = `${unitType}/${unitId}`; - const attemptNumber = (unitRecoveryCount.get(recoveryKey) ?? 0) + 1; - unitRecoveryCount.set(recoveryKey, attemptNumber); - - if (attemptNumber > 1) { - // Exponential backoff: 2^(n-1) seconds, capped at 30s - const backoffMs = Math.min(1000 * Math.pow(2, attemptNumber - 2), 30000); - ctx.ui.notify( - `Recovery attempt ${attemptNumber} for ${unitType} ${unitId}. Waiting ${backoffMs / 1000}s before retry.`, - "info", - ); - await new Promise(r => setTimeout(r, backoffMs)); - } - - if (unitType === "execute-task") { - const status = await inspectExecuteTaskDurability(basePath, unitId); - if (!status) return "paused"; - - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - recovery: status, - }); - - const durableComplete = status.summaryExists && status.taskChecked && status.nextActionAdvanced; - if (durableComplete) { - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - phase: "finalized", - recovery: status, - }); - ctx.ui.notify( - `${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} already completed on disk. Continuing auto-mode. (attempt ${attemptNumber})`, - "info", - ); - unitRecoveryCount.delete(recoveryKey); - await dispatchNextUnit(ctx, pi); - return "recovered"; - } - - if (recoveryAttempts < maxRecoveryAttempts) { - const isEscalation = recoveryAttempts > 0; - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - phase: "recovered", - recovery: status, - recoveryAttempts: recoveryAttempts + 1, - lastRecoveryReason: reason, - lastProgressAt: Date.now(), - progressCount: (runtime?.progressCount ?? 0) + 1, - lastProgressKind: reason === "idle" ? "idle-recovery-retry" : "hard-recovery-retry", - }); - - const steeringLines = isEscalation - ? [ - `**FINAL ${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — last chance before this task is skipped.**`, - `You are still executing ${unitType} ${unitId}.`, - `Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`, - `Current durability status: ${formatExecuteTaskRecoveryStatus(status)}.`, - "You MUST finish the durable output NOW, even if incomplete.", - "Write the task summary with whatever you have accomplished so far.", - "Mark the task [x] in the plan. Commit your work.", - "A partial summary is infinitely better than no summary.", - ] - : [ - `**${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — do not stop.**`, - `You are still executing ${unitType} ${unitId}.`, - `Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`, - `Current durability status: ${formatExecuteTaskRecoveryStatus(status)}.`, - "Do not keep exploring.", - "Immediately finish the required durable output for this unit.", - "If full completion is impossible, write the partial artifact/state needed for recovery and make the blocker explicit.", - ]; - - pi.sendMessage( - { - customType: "gsd-auto-timeout-recovery", - display: verbose, - content: steeringLines.join("\n"), - }, - { triggerTurn: true, deliverAs: "steer" }, - ); - ctx.ui.notify( - `${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to finish durable output (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`, - "warning", - ); - return "recovered"; - } - - // Retries exhausted — write missing durable artifacts and advance. - const diagnostic = formatExecuteTaskRecoveryStatus(status); - const [mid, sid, tid] = unitId.split("/"); - const skipped = mid && sid && tid - ? skipExecuteTask(basePath, mid, sid, tid, status, reason, maxRecoveryAttempts) - : false; - - if (skipped) { - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - phase: "skipped", - recovery: status, - recoveryAttempts: recoveryAttempts + 1, - lastRecoveryReason: reason, - }); - ctx.ui.notify( - `${unitType} ${unitId} skipped after ${maxRecoveryAttempts} recovery attempts (${diagnostic}). Blocker artifacts written. Advancing pipeline. (attempt ${attemptNumber})`, - "warning", - ); - unitRecoveryCount.delete(recoveryKey); - await dispatchNextUnit(ctx, pi); - return "recovered"; - } - - // Fallback: couldn't write skip artifacts — pause as before. - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - phase: "paused", - recovery: status, - recoveryAttempts: recoveryAttempts + 1, - lastRecoveryReason: reason, - }); - ctx.ui.notify( - `${reason === "idle" ? "Idle" : "Timeout"} recovery check for ${unitType} ${unitId}: ${diagnostic}`, - "warning", - ); - return "paused"; - } - - const expected = diagnoseExpectedArtifact(unitType, unitId, basePath) ?? "required durable artifact"; - - // Check if the artifact already exists on disk — agent may have written it - // without signaling completion. - const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath); - if (artifactPath && existsSync(artifactPath)) { - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - phase: "finalized", - recoveryAttempts: recoveryAttempts + 1, - lastRecoveryReason: reason, - }); - ctx.ui.notify( - `${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} artifact already exists on disk. Advancing. (attempt ${attemptNumber})`, - "info", - ); - unitRecoveryCount.delete(recoveryKey); - await dispatchNextUnit(ctx, pi); - return "recovered"; - } - - if (recoveryAttempts < maxRecoveryAttempts) { - const isEscalation = recoveryAttempts > 0; - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - phase: "recovered", - recoveryAttempts: recoveryAttempts + 1, - lastRecoveryReason: reason, - lastProgressAt: Date.now(), - progressCount: (runtime?.progressCount ?? 0) + 1, - lastProgressKind: reason === "idle" ? "idle-recovery-retry" : "hard-recovery-retry", - }); - - const steeringLines = isEscalation - ? [ - `**FINAL ${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — last chance before skip.**`, - `You are still executing ${unitType} ${unitId}.`, - `Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts} — next failure skips this unit.`, - `Expected durable output: ${expected}.`, - "You MUST write the artifact file NOW, even if incomplete.", - "Write whatever you have — partial research, preliminary findings, best-effort analysis.", - "A partial artifact is infinitely better than no artifact.", - "If you are truly blocked, write the file with a BLOCKER section explaining why.", - ] - : [ - `**${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — stay in auto-mode.**`, - `You are still executing ${unitType} ${unitId}.`, - `Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`, - `Expected durable output: ${expected}.`, - "Stop broad exploration.", - "Write the required artifact now.", - "If blocked, write the partial artifact and explicitly record the blocker instead of going silent.", - ]; - - pi.sendMessage( - { - customType: "gsd-auto-timeout-recovery", - display: verbose, - content: steeringLines.join("\n"), - }, - { triggerTurn: true, deliverAs: "steer" }, - ); - ctx.ui.notify( - `${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to produce ${expected} (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`, - "warning", - ); - return "recovered"; - } - - // Retries exhausted — write a blocker placeholder and advance the pipeline - // instead of silently stalling. - const placeholder = writeBlockerPlaceholder( - unitType, unitId, basePath, - `${reason} recovery exhausted ${maxRecoveryAttempts} attempts without producing the artifact.`, - ); - - if (placeholder) { - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - phase: "skipped", - recoveryAttempts: recoveryAttempts + 1, - lastRecoveryReason: reason, - }); - ctx.ui.notify( - `${unitType} ${unitId} skipped after ${maxRecoveryAttempts} recovery attempts. Blocker placeholder written to ${placeholder}. Advancing pipeline. (attempt ${attemptNumber})`, - "warning", - ); - unitRecoveryCount.delete(recoveryKey); - await dispatchNextUnit(ctx, pi); - return "recovered"; - } - - // Fallback: couldn't resolve artifact path — pause as before. - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - phase: "paused", - recoveryAttempts: recoveryAttempts + 1, - lastRecoveryReason: reason, - }); - return "paused"; +/** Build recovery context from module state for recoverTimedOutUnit */ +function buildRecoveryContext(): import("./auto-timeout-recovery.js").RecoveryContext { + return { + basePath, + verbose, + currentUnitStartedAt: currentUnit?.startedAt ?? Date.now(), + unitRecoveryCount, + dispatchNextUnit, + }; } // Re-export recovery functions for external consumers @@ -3772,204 +3404,5 @@ export async function dispatchHookUnit( } -// ─── Direct Phase Dispatch ──────────────────────────────────────────────────── - -export async function dispatchDirectPhase( - ctx: ExtensionCommandContext, - pi: ExtensionAPI, - phase: string, - base: string, -): Promise { - const state = await deriveState(base); - const mid = state.activeMilestone?.id; - const midTitle = state.activeMilestone?.title ?? ""; - - if (!mid) { - ctx.ui.notify("Cannot dispatch: no active milestone.", "warning"); - return; - } - - const normalized = phase.toLowerCase(); - let unitType: string; - let unitId: string; - let prompt: string; - - switch (normalized) { - case "research": - case "research-milestone": - case "research-slice": { - const isSlice = normalized === "research-slice" || (normalized === "research" && state.phase !== "pre-planning"); - if (isSlice) { - const sid = state.activeSlice?.id; - const sTitle = state.activeSlice?.title ?? ""; - if (!sid) { - ctx.ui.notify("Cannot dispatch research-slice: no active slice.", "warning"); - return; - } - - // When require_slice_discussion is enabled, pause auto-mode before - // each new slice so the user can discuss requirements first (#789). - const sliceContextFile = resolveSliceFile(base, mid, sid, "CONTEXT"); - const requireDiscussion = loadEffectiveGSDPreferences()?.preferences?.phases?.require_slice_discussion; - if (requireDiscussion && !sliceContextFile) { - ctx.ui.notify( - `Slice ${sid} requires discussion before planning. Run /gsd discuss to discuss this slice, then /gsd auto to resume.`, - "info", - ); - await pauseAuto(ctx, pi); - return; - } - - unitType = "research-slice"; - unitId = `${mid}/${sid}`; - prompt = await buildResearchSlicePrompt(mid, midTitle, sid, sTitle, base); - } else { - unitType = "research-milestone"; - unitId = mid; - prompt = await buildResearchMilestonePrompt(mid, midTitle, base); - } - break; - } - - case "plan": - case "plan-milestone": - case "plan-slice": { - const isSlice = normalized === "plan-slice" || (normalized === "plan" && state.phase !== "pre-planning"); - if (isSlice) { - const sid = state.activeSlice?.id; - const sTitle = state.activeSlice?.title ?? ""; - if (!sid) { - ctx.ui.notify("Cannot dispatch plan-slice: no active slice.", "warning"); - return; - } - unitType = "plan-slice"; - unitId = `${mid}/${sid}`; - prompt = await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, base); - } else { - unitType = "plan-milestone"; - unitId = mid; - prompt = await buildPlanMilestonePrompt(mid, midTitle, base); - } - break; - } - - case "execute": - case "execute-task": { - const sid = state.activeSlice?.id; - const sTitle = state.activeSlice?.title ?? ""; - const tid = state.activeTask?.id; - const tTitle = state.activeTask?.title ?? ""; - if (!sid) { - ctx.ui.notify("Cannot dispatch execute-task: no active slice.", "warning"); - return; - } - if (!tid) { - ctx.ui.notify("Cannot dispatch execute-task: no active task.", "warning"); - return; - } - unitType = "execute-task"; - unitId = `${mid}/${sid}/${tid}`; - prompt = await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, base); - break; - } - - case "complete": - case "complete-slice": - case "complete-milestone": { - const isSlice = normalized === "complete-slice" || (normalized === "complete" && state.phase === "summarizing"); - if (isSlice) { - const sid = state.activeSlice?.id; - const sTitle = state.activeSlice?.title ?? ""; - if (!sid) { - ctx.ui.notify("Cannot dispatch complete-slice: no active slice.", "warning"); - return; - } - unitType = "complete-slice"; - unitId = `${mid}/${sid}`; - prompt = await buildCompleteSlicePrompt(mid, midTitle, sid, sTitle, base); - } else { - unitType = "complete-milestone"; - unitId = mid; - prompt = await buildCompleteMilestonePrompt(mid, midTitle, base); - } - break; - } - - case "reassess": - case "reassess-roadmap": { - const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP"); - const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null; - if (!roadmapContent) { - ctx.ui.notify("Cannot dispatch reassess-roadmap: no roadmap found.", "warning"); - return; - } - const roadmap = parseRoadmap(roadmapContent); - const completedSlices = roadmap.slices.filter(s => s.done); - if (completedSlices.length === 0) { - ctx.ui.notify("Cannot dispatch reassess-roadmap: no completed slices.", "warning"); - return; - } - const completedSliceId = completedSlices[completedSlices.length - 1].id; - unitType = "reassess-roadmap"; - unitId = `${mid}/${completedSliceId}`; - prompt = await buildReassessRoadmapPrompt(mid, midTitle, completedSliceId, base); - break; - } - - case "uat": - case "run-uat": { - const sid = state.activeSlice?.id; - if (!sid) { - ctx.ui.notify("Cannot dispatch run-uat: no active slice.", "warning"); - return; - } - const uatFile = resolveSliceFile(base, mid, sid, "UAT"); - if (!uatFile) { - ctx.ui.notify("Cannot dispatch run-uat: no UAT file found.", "warning"); - return; - } - const uatContent = await loadFile(uatFile); - if (!uatContent) { - ctx.ui.notify("Cannot dispatch run-uat: UAT file is empty.", "warning"); - return; - } - const uatPath = relSliceFile(base, mid, sid, "UAT"); - unitType = "run-uat"; - unitId = `${mid}/${sid}`; - prompt = await buildRunUatPrompt(mid, sid, uatPath, uatContent, base); - break; - } - - case "replan": - case "replan-slice": { - const sid = state.activeSlice?.id; - const sTitle = state.activeSlice?.title ?? ""; - if (!sid) { - ctx.ui.notify("Cannot dispatch replan-slice: no active slice.", "warning"); - return; - } - unitType = "replan-slice"; - unitId = `${mid}/${sid}`; - prompt = await buildReplanSlicePrompt(mid, midTitle, sid, sTitle, base); - break; - } - - default: - ctx.ui.notify( - `Unknown phase "${phase}". Valid phases: research, plan, execute, complete, reassess, uat, replan.`, - "warning", - ); - return; - } - - ctx.ui.notify(`Dispatching ${unitType} for ${unitId}...`, "info"); - const result = await ctx.newSession(); - if (result.cancelled) { - ctx.ui.notify("Session creation cancelled.", "warning"); - return; - } - pi.sendMessage( - { customType: "gsd-dispatch", content: prompt, display: false }, - { triggerTurn: true }, - ); -} +// Direct phase dispatch → auto-direct-dispatch.ts +export { dispatchDirectPhase } from "./auto-direct-dispatch.js"; diff --git a/src/resources/extensions/gsd/commands.ts b/src/resources/extensions/gsd/commands.ts index 447b977df..0f4315a68 100644 --- a/src/resources/extensions/gsd/commands.ts +++ b/src/resources/extensions/gsd/commands.ts @@ -625,7 +625,7 @@ function showHelp(ctx: ExtensionCommandContext): void { "", "MAINTENANCE", " /gsd doctor Diagnose and repair .gsd/ state [audit|fix|heal] [scope]", - " /gsd export Export milestone/slice results [--json|--markdown]", + " /gsd export Export milestone/slice results [--json|--markdown|--html]", " /gsd cleanup Remove merged branches or snapshots [branches|snapshots]", " /gsd migrate Upgrade .gsd/ structures to new format", " /gsd remote Control remote auto-mode [slack|discord|status|disconnect]", diff --git a/src/resources/extensions/gsd/doctor-proactive.ts b/src/resources/extensions/gsd/doctor-proactive.ts index 810cd46aa..29fabd288 100644 --- a/src/resources/extensions/gsd/doctor-proactive.ts +++ b/src/resources/extensions/gsd/doctor-proactive.ts @@ -173,14 +173,19 @@ export async function preDispatchHealthGate(basePath: string): Promise/ ${esc(opts.milestoneId)}` + : ''; + + const backLink = opts.indexRelPath + ? `All Reports` + : ''; + + return ` + + + + +GSD Report — ${esc(opts.projectName)}${opts.milestoneId ? ` — ${esc(opts.milestoneId)}` : ''} + + + +
+
+
+ + v${esc(opts.gsdVersion)} +
+
+

${esc(opts.projectName)}${milestoneTag}

+ ${esc(opts.projectPath)} +
+
+ ${backLink} +
${formatDateLong(generated)}
+
+
+
+ +
+${sections.join('\n')} +
+
+ +
+ + +`; +} + +// ─── Section: Summary ───────────────────────────────────────────────────────── + +function buildSummarySection( + data: VisualizerData, + _opts: HtmlReportOptions, + _generated: string, +): string { + const t = data.totals; + const totalSlices = data.milestones.reduce((s, m) => s + m.slices.length, 0); + const doneSlices = data.milestones.reduce((s, m) => s + m.slices.filter(sl => sl.done).length, 0); + const doneMilestones = data.milestones.filter(m => m.status === 'complete').length; + const activeMilestone = data.milestones.find(m => m.status === 'active'); + const pct = totalSlices > 0 ? Math.round((doneSlices / totalSlices) * 100) : 0; + + const act = data.agentActivity; + const kv = [ + kvi('Milestones', `${doneMilestones}/${data.milestones.length}`), + kvi('Slices', `${doneSlices}/${totalSlices}`), + kvi('Phase', data.phase), + t ? kvi('Cost', formatCost(t.cost)) : '', + t ? kvi('Tokens', formatTokenCount(t.tokens.total)) : '', + t ? kvi('Duration', formatDuration(t.duration)) : '', + t ? kvi('Tool calls', String(t.toolCalls)) : '', + t ? kvi('Units', String(t.units)) : '', + data.remainingSliceCount > 0 ? kvi('Remaining', String(data.remainingSliceCount)) : '', + act ? kvi('Rate', `${act.completionRate.toFixed(1)}/hr`) : '', + ].filter(Boolean).join(''); + + const activeInfo = activeMilestone ? (() => { + const active = activeMilestone.slices.find(s => s.active); + if (!active) return ''; + return `
+ Executing ${esc(activeMilestone.id)}/${esc(active.id)} — ${esc(active.title)} +
`; + })() : ''; + + const activityHtml = act?.active ? ` +
+ + ${esc(act.currentUnit?.type ?? '')} + ${esc(act.currentUnit?.id ?? '')} + ${formatDuration(act.elapsed)} elapsed +
` : ''; + + return section('summary', 'Summary', ` +
${kv}
+
+
+ ${pct}% +
+ ${activeInfo} + ${activityHtml} + `); +} + +// ─── Section: Health ────────────────────────────────────────────────────────── + +function buildHealthSection(data: VisualizerData): string { + const h = data.health; + const t = data.totals; + + const rows: string[] = []; + rows.push(hRow('Token profile', h.tokenProfile)); + if (h.budgetCeiling !== undefined) { + const spent = t?.cost ?? 0; + const pct = (spent / h.budgetCeiling) * 100; + const status = pct > 90 ? 'warn' : pct > 75 ? 'caution' : 'ok'; + rows.push(hRow( + 'Budget ceiling', + `${formatCost(h.budgetCeiling)} (${formatCost(spent)} spent, ${pct.toFixed(0)}% used)`, + status, + )); + } + rows.push(hRow( + 'Truncation rate', + `${h.truncationRate.toFixed(1)}% per unit (${t?.totalTruncationSections ?? 0} total)`, + h.truncationRate > 20 ? 'warn' : h.truncationRate > 10 ? 'caution' : 'ok', + )); + rows.push(hRow( + 'Continue-here rate', + `${h.continueHereRate.toFixed(1)}% per unit (${t?.continueHereFiredCount ?? 0} total)`, + h.continueHereRate > 15 ? 'warn' : h.continueHereRate > 8 ? 'caution' : 'ok', + )); + if (h.tierSavingsLine) rows.push(hRow('Routing savings', h.tierSavingsLine)); + rows.push(hRow('Tool calls', String(h.toolCalls))); + rows.push(hRow('Messages', `${h.assistantMessages} assistant / ${h.userMessages} user`)); + + const tierRows = h.tierBreakdown.length > 0 ? ` +

Tier breakdown

+ + + + ${h.tierBreakdown.map(tb => + ` + + ` + ).join('')} + +
TierUnitsCostTokens
${esc(tb.tier)}${tb.units}${formatCost(tb.cost)}${formatTokenCount(tb.tokens.total)}
` : ''; + + return section('health', 'Health', ` + ${rows.join('')}
+ ${tierRows} + `); +} + +// ─── Section: Progress ──────────────────────────────────────────────────────── + +function buildProgressSection(data: VisualizerData): string { + if (data.milestones.length === 0) { + return section('progress', 'Progress', '

No milestones found.

'); + } + + const critMS = new Set(data.criticalPath.milestonePath); + const critSL = new Set(data.criticalPath.slicePath); + + const msHtml = data.milestones.map(ms => { + const doneCount = ms.slices.filter(s => s.done).length; + const onCrit = critMS.has(ms.id); + const sliceHtml = ms.slices.length > 0 + ? ms.slices.map(sl => buildSliceRow(sl, critSL, data)).join('') + : '

No slices in roadmap yet.

'; + + return ` +
+ + + ${esc(ms.id)} + ${esc(ms.title)} + ${doneCount}/${ms.slices.length} + ${onCrit ? 'critical path' : ''} + ${ms.dependsOn.length > 0 ? `needs ${ms.dependsOn.map(esc).join(', ')}` : ''} + +
${sliceHtml}
+
`; + }).join(''); + + return section('progress', 'Progress', msHtml); +} + +function buildSliceRow(sl: VisualizerSlice, critSL: Set, data: VisualizerData): string { + const onCrit = critSL.has(sl.id); + const ver = data.sliceVerifications.find(v => v.sliceId === sl.id); + const slack = data.criticalPath.sliceSlack.get(sl.id); + const status = sl.done ? 'complete' : sl.active ? 'active' : 'pending'; + + const taskHtml = sl.tasks.length > 0 ? ` +
    + ${sl.tasks.map(t => ` +
  • + + ${esc(t.id)} + ${esc(t.title)} + ${t.estimate ? `${esc(t.estimate)}` : ''} +
  • `).join('')} +
` : ''; + + const tags = [ + ...(ver?.provides ?? []).map(p => `provides: ${esc(p)}`), + ...(ver?.requires ?? []).map(r => `requires: ${esc(r.provides)}`), + ].join(''); + + const keyDecisions = ver?.keyDecisions?.length + ? `
Decisions
    ${ver.keyDecisions.map(d => `
  • ${esc(d)}
  • `).join('')}
` + : ''; + + const patterns = ver?.patternsEstablished?.length + ? `
Patterns
    ${ver.patternsEstablished.map(p => `
  • ${esc(p)}
  • `).join('')}
` + : ''; + + const verifBadge = ver?.verificationResult + ? `
+ ${ver.blockerDiscovered ? 'Blocker: ' : ''}${esc(ver.verificationResult)} +
` + : ''; + + return ` +
+ + + ${esc(sl.id)} + ${esc(sl.title)} + ${esc(sl.risk || '?')} + ${sl.depends.length > 0 ? `${sl.depends.map(esc).join(', ')}` : ''} + ${onCrit ? 'critical' : ''} + ${slack !== undefined && slack > 0 ? `+${slack} slack` : ''} + +
+ ${tags ? `
${tags}
` : ''} + ${verifBadge} + ${keyDecisions} + ${patterns} + ${taskHtml} +
+
`; +} + +// ─── Section: Dependency Graph ──────────────────────────────────────────────── + +function buildDepGraphSection(data: VisualizerData): string { + const hasSlices = data.milestones.some(ms => ms.slices.length > 0); + if (!hasSlices) return section('depgraph', 'Dependencies', '

No slices to graph.

'); + + const hasDeps = data.milestones.some(ms => ms.slices.some(s => s.depends.length > 0)); + if (!hasDeps) return section('depgraph', 'Dependencies', '

No dependencies defined.

'); + + const svgs = data.milestones + .filter(ms => ms.slices.length > 0) + .map(ms => buildMilestoneDepSVG(ms, data)) + .filter(Boolean) + .join(''); + + return section('depgraph', 'Dependencies', svgs); +} + +function buildMilestoneDepSVG(ms: VisualizerMilestone, data: VisualizerData): string { + const slices = ms.slices; + if (slices.length === 0) return ''; + + const critSL = new Set(data.criticalPath.slicePath); + const slMap = new Map(slices.map(s => [s.id, s])); + + const layerMap = new Map(); + const inDeg = new Map(); + for (const s of slices) inDeg.set(s.id, 0); + for (const s of slices) { + for (const dep of s.depends) { + if (slMap.has(dep)) inDeg.set(s.id, (inDeg.get(s.id) ?? 0) + 1); + } + } + + const visited = new Set(); + const q: string[] = []; + for (const [id, d] of inDeg) { + if (d === 0) { q.push(id); visited.add(id); layerMap.set(id, 0); } + } + + while (q.length > 0) { + const node = q.shift()!; + for (const s of slices) { + if (!s.depends.includes(node)) continue; + const newDeg = (inDeg.get(s.id) ?? 1) - 1; + inDeg.set(s.id, newDeg); + layerMap.set(s.id, Math.max(layerMap.get(s.id) ?? 0, (layerMap.get(node) ?? 0) + 1)); + if (newDeg === 0 && !visited.has(s.id)) { visited.add(s.id); q.push(s.id); } + } + } + for (const s of slices) if (!layerMap.has(s.id)) layerMap.set(s.id, 0); + + const maxLayer = Math.max(...[...layerMap.values()]); + const byLayer = new Map(); + for (const [id, layer] of layerMap) { + const arr = byLayer.get(layer) ?? []; + arr.push(id); + byLayer.set(layer, arr); + } + + const NW = 130, NH = 40, CGAP = 56, RGAP = 14, PAD = 20; + let maxRows = 0; + for (let c = 0; c <= maxLayer; c++) maxRows = Math.max(maxRows, (byLayer.get(c) ?? []).length); + const totalH = PAD * 2 + maxRows * NH + Math.max(0, maxRows - 1) * RGAP; + const totalW = PAD * 2 + (maxLayer + 1) * NW + maxLayer * CGAP; + + const pos = new Map(); + for (let col = 0; col <= maxLayer; col++) { + const ids = byLayer.get(col) ?? []; + const colH = ids.length * NH + Math.max(0, ids.length - 1) * RGAP; + const startY = (totalH - colH) / 2; + ids.forEach((id, i) => pos.set(id, { x: PAD + col * (NW + CGAP), y: startY + i * (NH + RGAP) })); + } + + const edges = slices.flatMap(sl => sl.depends.flatMap(dep => { + if (!pos.has(dep) || !pos.has(sl.id)) return []; + const f = pos.get(dep)!, t = pos.get(sl.id)!; + const x1 = f.x + NW, y1 = f.y + NH / 2; + const x2 = t.x, y2 = t.y + NH / 2; + const mx = (x1 + x2) / 2; + const crit = critSL.has(sl.id) && critSL.has(dep); + return [``]; + })); + + const nodes = slices.map(sl => { + const p = pos.get(sl.id); + if (!p) return ''; + const crit = critSL.has(sl.id); + const sc = sl.done ? 'n-done' : sl.active ? 'n-active' : 'n-pending'; + return ` + + ${esc(truncStr(sl.id, 18))} + ${esc(truncStr(sl.title, 18))} + ${esc(sl.id)}: ${esc(sl.title)} + `; + }); + + const legend = `
+ done + active + pending +
`; + + return ` +
+

${esc(ms.id)}: ${esc(ms.title)}

+ ${legend} +
+ + + + + + + + + + ${edges.join('')} + ${nodes.join('')} + +
+
`; +} + +// ─── Section: Metrics ───────────────────────────────────────────────────────── + +function buildMetricsSection(data: VisualizerData): string { + if (!data.totals) return section('metrics', 'Metrics', '

No metrics data yet.

'); + const t = data.totals; + + const grid = [ + kvi('Total cost', formatCost(t.cost)), + kvi('Total tokens', formatTokenCount(t.tokens.total)), + kvi('Input', formatTokenCount(t.tokens.input)), + kvi('Output', formatTokenCount(t.tokens.output)), + kvi('Cache read', formatTokenCount(t.tokens.cacheRead)), + kvi('Cache write', formatTokenCount(t.tokens.cacheWrite)), + kvi('Duration', formatDuration(t.duration)), + kvi('Units', String(t.units)), + kvi('Tool calls', String(t.toolCalls)), + kvi('Truncations', String(t.totalTruncationSections)), + ].join(''); + + const tokenBreakdown = buildTokenBreakdown(t.tokens); + + const phaseRow = data.byPhase.length > 0 ? ` +
+ ${buildBarChart('Cost by phase', data.byPhase.map(p => ({ + label: p.phase, value: p.cost, display: formatCost(p.cost), sub: `${p.units} units`, + })))} + ${buildBarChart('Tokens by phase', data.byPhase.map(p => ({ + label: p.phase, value: p.tokens.total, display: formatTokenCount(p.tokens.total), sub: formatCost(p.cost), + })))} +
` : ''; + + const sliceModelRow = (data.bySlice.length > 0 || data.byModel.length > 0) ? ` +
+ ${data.bySlice.length > 0 ? buildBarChart('Cost by slice', data.bySlice.map(s => ({ + label: s.sliceId, value: s.cost, display: formatCost(s.cost), + sub: `${s.units} units`, + }))) : ''} + ${data.byModel.length > 0 ? buildBarChart('Cost by model', data.byModel.map(m => ({ + label: shortModel(m.model), value: m.cost, display: formatCost(m.cost), + sub: `${m.units} units`, + }))) : ''} +
` : ''; + + return section('metrics', 'Metrics', ` +
${grid}
+ ${tokenBreakdown} + ${phaseRow} + ${sliceModelRow} + `); +} + +function buildTokenBreakdown(tokens: { input: number; output: number; cacheRead: number; cacheWrite: number; total: number }): string { + if (tokens.total === 0) return ''; + const segs = [ + { label: 'Input', value: tokens.input, cls: 'seg-1' }, + { label: 'Output', value: tokens.output, cls: 'seg-2' }, + { label: 'Cache read', value: tokens.cacheRead, cls: 'seg-3' }, + { label: 'Cache write', value: tokens.cacheWrite, cls: 'seg-4' }, + ].filter(s => s.value > 0); + + const bars = segs.map(s => { + const pct = (s.value / tokens.total) * 100; + return `
`; + }).join(''); + + const legend = segs.map(s => { + const pct = ((s.value / tokens.total) * 100).toFixed(1); + return `${s.label}: ${formatTokenCount(s.value)} (${pct}%)`; + }).join(''); + + return ` +
+

Token breakdown

+
${bars}
+
${legend}
+
`; +} + +interface BarEntry { label: string; value: number; display: string; sub?: string; color?: number } + +const CHART_COLORS = 6; + +function buildBarChart(title: string, entries: BarEntry[]): string { + if (entries.length === 0) return ''; + const max = Math.max(...entries.map(e => e.value), 1); + const rows = entries.map((e, i) => { + const pct = (e.value / max) * 100; + const ci = e.color ?? i; + return ` +
+
${esc(truncStr(e.label, 22))}
+
+
${esc(e.display)}
+
+ ${e.sub ? `
${esc(e.sub)}
` : ''}`; + }).join(''); + return `

${esc(title)}

${rows}
`; +} + +// ─── Section: Timeline ──────────────────────────────────────────────────────── + +function buildTimelineSection(data: VisualizerData): string { + if (data.units.length === 0) return section('timeline', 'Timeline', '

No units executed yet.

'); + + const sorted = [...data.units].sort((a, b) => a.startedAt - b.startedAt); + const maxCost = Math.max(...sorted.map(u => u.cost), 0.01); + + const rows = sorted.map((u, i) => { + const dur = u.finishedAt > 0 ? formatDuration(u.finishedAt - u.startedAt) : 'running'; + // Cost heatmap: subtle red background for expensive rows + const intensity = Math.min(u.cost / maxCost, 1); + const heatStyle = intensity > 0.15 ? ` style="background:rgba(239,68,68,${(intensity * 0.15).toFixed(3)})"` : ''; + return ` + + ${i + 1} + ${esc(u.type)} + ${esc(u.id)} + ${esc(shortModel(u.model))} + ${formatDateShort(new Date(u.startedAt).toISOString())} + ${dur} + ${formatCost(u.cost)} + ${formatTokenCount(u.tokens.total)} + ${u.toolCalls} + ${u.tier ?? ''} + ${u.modelDowngraded ? 'routed' : ''} + ${(u.truncationSections ?? 0) > 0 ? u.truncationSections : ''} + ${u.continueHereFired ? 'yes' : ''} + `; + }).join(''); + + return section('timeline', 'Timeline', ` +
+ + + + + + + ${rows} +
#TypeIDModelStartedDurationCostTokensToolsTierRoutedTruncCHF
+
`); +} + +// ─── Section: Changelog ─────────────────────────────────────────────────────── + +function buildChangelogSection(data: VisualizerData): string { + if (data.changelog.entries.length === 0) return section('changelog', 'Changelog', '

No completed slices yet.

'); + + const entries = data.changelog.entries.map(e => { + const filesHtml = e.filesModified.length > 0 ? ` +
+ ${e.filesModified.length} file${e.filesModified.length !== 1 ? 's' : ''} modified +
    + ${e.filesModified.map(f => `
  • ${esc(f.path)}${f.description ? ` — ${esc(f.description)}` : ''}
  • `).join('')} +
+
` : ''; + + const ver = data.sliceVerifications.find(v => v.sliceId === e.sliceId); + const decisionsHtml = ver?.keyDecisions?.length ? ` +
Decisions +
    ${ver.keyDecisions.map(d => `
  • ${esc(d)}
  • `).join('')}
+
` : ''; + + return ` +
+
+ ${esc(e.milestoneId)}/${esc(e.sliceId)} + ${esc(e.title)} + ${e.completedAt ? `${formatDateShort(e.completedAt)}` : ''} +
+ ${e.oneLiner ? `

${esc(e.oneLiner)}

` : ''} + ${decisionsHtml} + ${filesHtml} +
`; + }).join(''); + + return section('changelog', `Changelog ${data.changelog.entries.length}`, entries); +} + +// ─── Section: Knowledge ─────────────────────────────────────────────────────── + +function buildKnowledgeSection(data: VisualizerData): string { + const k = data.knowledge; + if (!k.exists) return section('knowledge', 'Knowledge', '

No KNOWLEDGE.md found.

'); + const total = k.rules.length + k.patterns.length + k.lessons.length; + if (total === 0) return section('knowledge', 'Knowledge', '

KNOWLEDGE.md exists but no entries parsed.

'); + + const rulesHtml = k.rules.length > 0 ? ` +

Rules ${k.rules.length}

+ + + ${k.rules.map(r => ``).join('')} +
IDScopeRule
${esc(r.id)}${esc(r.scope)}${esc(r.content)}
` : ''; + + const patternsHtml = k.patterns.length > 0 ? ` +

Patterns ${k.patterns.length}

+ + + ${k.patterns.map(p => ``).join('')} +
IDPattern
${esc(p.id)}${esc(p.content)}
` : ''; + + const lessonsHtml = k.lessons.length > 0 ? ` +

Lessons ${k.lessons.length}

+ + + ${k.lessons.map(l => ``).join('')} +
IDLesson
${esc(l.id)}${esc(l.content)}
` : ''; + + return section('knowledge', `Knowledge ${total}`, `${rulesHtml}${patternsHtml}${lessonsHtml}`); +} + +// ─── Section: Captures ──────────────────────────────────────────────────────── + +function buildCapturesSection(data: VisualizerData): string { + const c = data.captures; + if (c.totalCount === 0) return section('captures', 'Captures', '

No captures recorded.

'); + + const badge = c.pendingCount > 0 + ? `${c.pendingCount} pending` + : `all triaged`; + + const rows = c.entries.map(e => ` + + ${formatDateShort(new Date(e.timestamp).toISOString())} + ${esc(e.status)} + ${e.classification ?? ''} + ${e.resolution ?? ''} + ${esc(e.text)} + ${e.rationale ?? ''} + ${e.resolvedAt ? formatDateShort(e.resolvedAt) : ''} + ${e.executed !== undefined ? (e.executed ? 'yes' : 'no') : ''} + `).join(''); + + return section('captures', `Captures ${badge}`, ` +
+ + + ${rows} +
CapturedStatusClassResolutionTextRationaleResolvedExecuted
+
`); +} + +// ─── Section: Stats ─────────────────────────────────────────────────────────── + +function buildStatsSection(data: VisualizerData): string { + const s = data.stats; + + const missingHtml = s.missingCount > 0 ? ` +

Missing changelogs ${s.missingCount}

+ + + + ${s.missingSlices.map(sl => ``).join('')} + ${s.missingCount > s.missingSlices.length + ? `` + : ''} + +
MilestoneSliceTitle
${esc(sl.milestoneId)}${esc(sl.sliceId)}${esc(sl.title)}
and ${s.missingCount - s.missingSlices.length} more
` : ''; + + const updatedHtml = s.updatedCount > 0 ? ` +

Recently completed ${s.updatedCount}

+ + + ${s.updatedSlices.map(sl => ` + `).join('')} + +
MilestoneSliceTitleCompleted
${esc(sl.milestoneId)}${esc(sl.sliceId)}${esc(sl.title)}${sl.completedAt ? formatDateShort(sl.completedAt) : ''}
` : ''; + + if (!missingHtml && !updatedHtml) { + return section('stats', 'Artifacts', '

All artifacts accounted for.

'); + } + + return section('stats', 'Artifacts', `${missingHtml}${updatedHtml}`); +} + +// ─── Section: Discussion ────────────────────────────────────────────────────── + +function buildDiscussionSection(data: VisualizerData): string { + if (data.discussion.length === 0) return section('discussion', 'Planning', '

No milestones.

'); + + const rows = data.discussion.map(d => ` + + ${esc(d.milestoneId)} + ${esc(d.title)} + ${d.state} + ${d.hasContext ? 'yes' : ''} + ${d.hasDraft ? 'draft' : ''} + ${d.lastUpdated ? formatDateShort(d.lastUpdated) : ''} + `).join(''); + + return section('discussion', 'Planning', ` + + + ${rows} +
IDMilestoneStateContextDraftUpdated
`); +} + +// ─── Primitives ──────────────────────────────────────────────────────────────── + +function section(id: string, title: string, body: string): string { + return `\n
\n

${title}

\n ${body}\n
`; +} + +function kvi(label: string, value: string): string { + return `
${esc(value)}${esc(label)}
`; +} + +function hRow(label: string, value: string, status?: 'ok' | 'caution' | 'warn'): string { + const cls = status ? ` class="h-${status}"` : ''; + return `${esc(label)}${esc(value)}`; +} + +function shortModel(m: string) { return m.replace(/^claude-/, '').replace(/^anthropic\//, ''); } +function truncStr(s: string, n: number) { return s.length > n ? s.slice(0, n - 1) + '\u2026' : s; } + +function formatDateLong(iso: string): string { + try { + const d = new Date(iso); + return d.toLocaleString('en-US', { weekday: 'short', month: 'short', day: 'numeric', year: 'numeric', hour: '2-digit', minute: '2-digit', timeZoneName: 'short' }); + } catch { return iso; } +} + +function formatDateShort(iso: string): string { + try { + const d = new Date(iso); + return d.toLocaleString('en-US', { month: 'short', day: 'numeric', year: 'numeric', hour: '2-digit', minute: '2-digit' }); + } catch { return iso; } +} + +function esc(s: string | undefined | null): string { + if (s == null) return ''; + return String(s).replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"').replace(/'/g, '''); +} + +// ─── CSS ─────────────────────────────────────────────────────────────────────── +// Linear-inspired: restrained palette, one accent, no emoji, no gradients. + +const CSS = ` +*,*::before,*::after{box-sizing:border-box;margin:0;padding:0} +:root{ + --bg-0:#0f1115;--bg-1:#16181d;--bg-2:#1e2028;--bg-3:#272a33; + --border-1:#2b2e38;--border-2:#3b3f4c; + --text-0:#ededef;--text-1:#a1a1aa;--text-2:#71717a; + --accent:#5e6ad2;--accent-subtle:rgba(94,106,210,.12); + --ok:#22c55e;--ok-subtle:rgba(34,197,94,.12);--warn:#ef4444;--caution:#eab308; + /* Chart palette — 6 hues for bar charts */ + --c0:#5e6ad2;--c1:#e5796d;--c2:#14b8a6;--c3:#a78bfa;--c4:#f59e0b;--c5:#10b981; + /* Token breakdown — 4 distinct hues */ + --tk-input:#5e6ad2;--tk-output:#e5796d;--tk-cache-r:#2dd4bf;--tk-cache-w:#64748b; + --font:'Inter',-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif; + --mono:'JetBrains Mono','Fira Code',ui-monospace,SFMono-Regular,monospace; +} +html{scroll-behavior:smooth;font-size:13px} +body{background:var(--bg-0);color:var(--text-0);font-family:var(--font);line-height:1.6;-webkit-font-smoothing:antialiased} +a{color:var(--accent);text-decoration:none} +a:hover{text-decoration:underline} +code{font-family:var(--mono);font-size:12px;background:var(--bg-3);padding:1px 5px;border-radius:3px} +.mono{font-family:var(--mono);font-size:12px} +.muted{color:var(--text-2)} +.accent{color:var(--accent)} +.sep{color:var(--border-2);margin:0 4px} +.empty{color:var(--text-2);padding:8px 0;font-size:13px} +.indent{padding-left:12px} +.num{font-variant-numeric:tabular-nums;text-align:right} + +/* Status dots — geometric, no emoji */ +.dot{display:inline-block;width:8px;height:8px;border-radius:50%;flex-shrink:0;vertical-align:middle} +.dot-sm{width:6px;height:6px} +.dot-complete{background:var(--ok);opacity:.6} +.dot-active{background:var(--accent)} +.dot-pending{background:transparent;border:1.5px solid var(--border-2)} + +/* Header */ +header{background:var(--bg-1);border-bottom:1px solid var(--border-1);padding:12px 32px;position:sticky;top:0;z-index:200} +.header-inner{display:flex;align-items:center;gap:16px;max-width:1280px;margin:0 auto} +.branding{display:flex;align-items:baseline;gap:6px;flex-shrink:0} +.logo{font-size:18px;font-weight:800;letter-spacing:-.5px;color:var(--text-0)} +.version{font-size:10px;color:var(--text-2);font-family:var(--mono)} +.header-meta{flex:1;min-width:0} +.header-meta h1{font-size:15px;font-weight:600;white-space:nowrap;overflow:hidden;text-overflow:ellipsis} +.header-path{font-size:11px;color:var(--text-2);font-family:var(--mono);display:block;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} +.header-right{text-align:right;flex-shrink:0;display:flex;flex-direction:column;align-items:flex-end;gap:4px} +.generated{font-size:11px;color:var(--text-2)} +.back-link{font-size:12px;color:var(--text-1)} +.back-link:hover{color:var(--accent)} + +/* TOC nav */ +.toc{background:var(--bg-1);border-bottom:1px solid var(--border-1);overflow-x:auto} +.toc ul{display:flex;list-style:none;max-width:1280px;margin:0 auto;padding:0 32px} +.toc a{display:inline-block;padding:8px 12px;color:var(--text-2);font-size:12px;font-weight:500;border-bottom:2px solid transparent;transition:color .12s,border-color .12s;white-space:nowrap;text-decoration:none} +.toc a:hover{color:var(--text-0);border-bottom-color:var(--border-2)} +.toc a.active{color:var(--text-0);border-bottom-color:var(--accent)} + +/* Layout */ +main{max-width:1280px;margin:0 auto;padding:32px;display:flex;flex-direction:column;gap:48px} +section{scroll-margin-top:82px} +section>h2{font-size:14px;font-weight:600;text-transform:uppercase;letter-spacing:.5px;color:var(--text-1);margin-bottom:16px;padding-bottom:8px;border-bottom:1px solid var(--border-1);display:flex;align-items:center;gap:8px} +h3{font-size:13px;font-weight:600;color:var(--text-1);margin:20px 0 8px} +.count{font-size:11px;font-weight:500;color:var(--text-2);background:var(--bg-3);border-radius:3px;padding:1px 6px} +.count-warn{color:var(--caution)} + +/* KV grid (stats/metrics) */ +.kv-grid{display:flex;flex-wrap:wrap;gap:1px;background:var(--border-1);border:1px solid var(--border-1);border-radius:4px;overflow:hidden;margin-bottom:16px} +.kv{background:var(--bg-1);padding:10px 16px;display:flex;flex-direction:column;gap:2px;min-width:110px;flex:1} +.kv-val{font-size:18px;font-weight:600;color:var(--text-0);font-variant-numeric:tabular-nums} +.kv-lbl{font-size:10px;color:var(--text-2);text-transform:uppercase;letter-spacing:.4px} + +/* Progress bar */ +.progress-wrap{display:flex;align-items:center;gap:10px;margin-bottom:12px} +.progress-track{flex:1;height:4px;background:var(--bg-3);border-radius:2px;overflow:hidden} +.progress-fill{height:100%;background:var(--accent);border-radius:2px} +.progress-label{font-size:12px;font-weight:600;color:var(--text-1);min-width:40px;text-align:right} +.active-info{font-size:12px;color:var(--text-1);margin-bottom:4px} +.activity-line{display:flex;align-items:center;gap:8px;font-size:12px;color:var(--text-1);padding:6px 0} + +/* Tables */ +.tbl{width:100%;border-collapse:collapse;font-size:12px} +.tbl th{color:var(--text-2);font-weight:500;padding:6px 12px;text-align:left;border-bottom:1px solid var(--border-1);font-size:11px;text-transform:uppercase;letter-spacing:.3px;white-space:nowrap} +.tbl td{padding:6px 12px;border-bottom:1px solid var(--border-1);vertical-align:top} +.tbl tr:last-child td{border-bottom:none} +.tbl tbody tr:hover td{background:var(--accent-subtle)} +.tbl-kv td:first-child{color:var(--text-2);width:180px} +.table-scroll{overflow-x:auto;border:1px solid var(--border-1);border-radius:4px} +.table-scroll .tbl{border:none} + +/* Health */ +.h-ok td:first-child{color:var(--text-1)} +.h-caution td{color:var(--caution)} +.h-warn td{color:var(--warn)} + +/* Labels */ +.label{font-size:10px;font-weight:500;color:var(--accent);text-transform:uppercase;letter-spacing:.4px} +.risk{font-size:10px;font-weight:600;text-transform:uppercase;letter-spacing:.3px;flex-shrink:0} +.risk-low{color:var(--text-2)} +.risk-medium{color:var(--caution)} +.risk-high{color:var(--warn)} +.risk-unknown{color:var(--text-2)} + +/* Tags */ +.tag-row{display:flex;flex-wrap:wrap;gap:4px;margin-bottom:8px} +.tag{font-size:11px;font-family:var(--mono);color:var(--text-2);background:var(--bg-3);border-radius:3px;padding:1px 6px} + +/* Verification */ +.verif{font-size:12px;color:var(--text-1);padding:4px 0;margin-bottom:6px} +.verif-blocker{color:var(--warn)} + +/* Detail blocks */ +.detail-block{font-size:12px;color:var(--text-2);margin-bottom:6px} +.detail-label{font-weight:600;color:var(--text-1);display:block;margin-bottom:2px} +.detail-block ul{padding-left:16px;margin-top:2px} +.detail-block li{margin-bottom:1px} + +/* Progress tree */ +.ms-block{border:1px solid var(--border-1);border-radius:4px;overflow:hidden;margin-bottom:8px} +.ms-summary{display:flex;align-items:center;gap:8px;padding:10px 14px;cursor:pointer;list-style:none;background:var(--bg-1);user-select:none;font-size:13px} +.ms-summary:hover{background:var(--bg-2)} +.ms-summary::-webkit-details-marker{display:none} +.ms-id{font-weight:600} +.ms-title{flex:1;font-weight:500;min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} +.ms-body{padding:6px 12px 8px 24px;display:flex;flex-direction:column;gap:4px} + +.sl-block{border:1px solid var(--border-1);border-radius:3px;overflow:hidden} +.sl-summary{display:flex;align-items:center;gap:6px;padding:6px 10px;cursor:pointer;list-style:none;background:var(--bg-2);font-size:12px;user-select:none} +.sl-summary:hover{background:var(--bg-3)} +.sl-summary::-webkit-details-marker{display:none} +.sl-crit{border-left:2px solid var(--accent)} +.sl-deps::before{content:'\\2190 ';color:var(--border-2)} +.sl-detail{padding:8px 12px;background:var(--bg-0);border-top:1px solid var(--border-1)} + +.task-list{list-style:none;padding:4px 0 0;display:flex;flex-direction:column;gap:2px} +.task-row{display:flex;align-items:center;gap:6px;font-size:12px;padding:3px 6px;border-radius:2px} + +/* Dep graph */ +.dep-block{margin-bottom:28px} +.dep-legend{display:flex;gap:14px;font-size:12px;color:var(--text-2);margin-bottom:8px;align-items:center} +.dep-legend span{display:flex;align-items:center;gap:4px} +.dep-wrap{overflow-x:auto;background:var(--bg-1);border:1px solid var(--border-1);border-radius:4px;padding:16px} +.dep-svg{display:block} +.edge{fill:none;stroke:var(--border-2);stroke-width:1.5} +.edge-crit{stroke:var(--accent);stroke-width:2} +.node rect{fill:var(--bg-2);stroke:var(--border-2);stroke-width:1} +.n-done rect{fill:var(--ok-subtle);stroke:rgba(34,197,94,.4)} +.n-active rect{fill:var(--accent-subtle);stroke:var(--accent)} +.n-crit rect{stroke:var(--accent)!important;stroke-width:1.5!important} +.n-id{font-family:var(--mono);font-size:10px;fill:var(--text-1);font-weight:600;text-anchor:middle} +.n-title{font-size:9px;fill:var(--text-2);text-anchor:middle} +.n-active .n-id{fill:var(--accent)} + +/* Metrics */ +.token-block{background:var(--bg-1);border:1px solid var(--border-1);border-radius:4px;padding:14px;margin-bottom:16px} +.token-bar{display:flex;height:16px;border-radius:2px;overflow:hidden;gap:1px;margin-bottom:8px} +.tseg{height:100%;min-width:2px} +.seg-1{background:var(--tk-input)} +.seg-2{background:var(--tk-output)} +.seg-3{background:var(--tk-cache-r)} +.seg-4{background:var(--tk-cache-w)} +.token-legend{display:flex;flex-wrap:wrap;gap:12px} +.leg-item{display:flex;align-items:center;gap:5px;font-size:11px;color:var(--text-2)} +.leg-dot{width:8px;height:8px;border-radius:2px;flex-shrink:0} +.chart-row{display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-bottom:16px} +@media(max-width:860px){.chart-row{grid-template-columns:1fr}} +.chart-block{background:var(--bg-1);border:1px solid var(--border-1);border-radius:4px;padding:14px} +.bar-row{display:grid;grid-template-columns:120px 1fr 68px;align-items:center;gap:6px;margin-bottom:2px} +.bar-lbl{font-size:12px;color:var(--text-2);text-align:right;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} +.bar-track{height:14px;background:var(--bg-3);border-radius:2px;overflow:hidden} +.bar-fill{height:100%;border-radius:2px;background:var(--c0)} +.bar-c0{background:var(--c0)}.bar-c1{background:var(--c1)}.bar-c2{background:var(--c2)} +.bar-c3{background:var(--c3)}.bar-c4{background:var(--c4)}.bar-c5{background:var(--c5)} +.bar-val{font-size:11px;font-variant-numeric:tabular-nums;color:var(--text-1)} +.bar-sub{font-size:10px;color:var(--text-2);padding-left:128px;margin-bottom:6px} + +/* Changelog */ +.cl-entry{border-bottom:1px solid var(--border-1);padding:12px 0} +.cl-entry:last-child{border-bottom:none} +.cl-header{display:flex;align-items:center;gap:8px;margin-bottom:4px} +.cl-title{flex:1;font-weight:500} +.cl-date{margin-left:auto;white-space:nowrap} +.cl-liner{font-size:13px;color:var(--text-1);margin-bottom:6px} +.files-detail summary{font-size:12px;cursor:pointer} +.file-list{list-style:none;padding-left:10px;margin-top:4px;display:flex;flex-direction:column;gap:2px} +.file-list li{font-size:12px;color:var(--text-1)} + +/* Footer */ +footer{border-top:1px solid var(--border-1);padding:20px 32px;margin-top:40px} +.footer-inner{display:flex;align-items:center;gap:6px;justify-content:center;font-size:11px;color:var(--text-2)} + +/* Print */ +@media print{ + header,nav.toc{position:static} + body{background:#fff;color:#1a1a1a} + :root{--bg-0:#fff;--bg-1:#fafafa;--bg-2:#f5f5f5;--bg-3:#ebebeb;--border-1:#e5e5e5;--border-2:#d4d4d4;--text-0:#1a1a1a;--text-1:#525252;--text-2:#a3a3a3;--accent:#4f46e5;--ok:#16a34a;--ok-subtle:rgba(22,163,74,.08);--c0:#4f46e5;--c1:#dc2626;--c2:#0d9488;--c3:#7c3aed;--c4:#d97706;--c5:#059669;--tk-input:#4f46e5;--tk-output:#dc2626;--tk-cache-r:#0d9488;--tk-cache-w:#64748b} + section{page-break-inside:avoid} + .table-scroll{overflow:visible} +} +`; + +// ─── JS ──────────────────────────────────────────────────────────────────────── + +const JS = ` +(function(){ + const sections=document.querySelectorAll('section[id]'); + const links=document.querySelectorAll('.toc a'); + if(!sections.length||!links.length)return; + const obs=new IntersectionObserver(entries=>{ + for(const e of entries){ + if(!e.isIntersecting)continue; + for(const l of links)l.classList.remove('active'); + const a=document.querySelector('.toc a[href="#'+e.target.id+'"]'); + if(a)a.classList.add('active'); + } + },{rootMargin:'-10% 0px -80% 0px',threshold:0}); + for(const s of sections)obs.observe(s); +})(); +`; diff --git a/src/resources/extensions/gsd/export.ts b/src/resources/extensions/gsd/export.ts index 7a5202bd2..f4a23c080 100644 --- a/src/resources/extensions/gsd/export.ts +++ b/src/resources/extensions/gsd/export.ts @@ -93,9 +93,57 @@ export function writeExportFile( } /** - * Export session/milestone data to JSON or markdown. + * Export session/milestone data to JSON, markdown, or HTML. */ export async function handleExport(args: string, ctx: ExtensionCommandContext, basePath: string): Promise { + // HTML report — delegates to the full visualizer-data pipeline + if (args.includes("--html")) { + try { + const { loadVisualizerData } = await import("./visualizer-data.js"); + const { generateHtmlReport } = await import("./export-html.js"); + const { writeReportSnapshot, reportsDir } = await import("./reports.js"); + const { basename: bn } = await import("node:path"); + const data = await loadVisualizerData(basePath); + const projName = basename(basePath); + const gsdVersion = process.env.GSD_VERSION ?? "0.0.0"; + const doneSlices = data.milestones.reduce((s, m) => s + m.slices.filter(sl => sl.done).length, 0); + const totalSlices = data.milestones.reduce((s, m) => s + m.slices.length, 0); + const outPath = writeReportSnapshot({ + basePath, + html: generateHtmlReport(data, { + projectName: projName, + projectPath: basePath, + gsdVersion, + indexRelPath: "index.html", + }), + milestoneId: data.milestones.find(m => m.status === "active")?.id ?? "manual", + milestoneTitle: data.milestones.find(m => m.status === "active")?.title ?? "", + kind: "manual", + projectName: projName, + projectPath: basePath, + gsdVersion, + totalCost: data.totals?.cost ?? 0, + totalTokens: data.totals?.tokens.total ?? 0, + totalDuration: data.totals?.duration ?? 0, + doneSlices, + totalSlices, + doneMilestones: data.milestones.filter(m => m.status === "complete").length, + totalMilestones: data.milestones.length, + phase: data.phase, + }); + ctx.ui.notify( + `HTML report saved: .gsd/reports/${bn(outPath)}\nBrowse all reports: .gsd/reports/index.html`, + "success", + ); + } catch (err) { + ctx.ui.notify( + `HTML export failed: ${err instanceof Error ? err.message : String(err)}`, + "error", + ); + } + return; + } + const format = args.includes("--json") ? "json" : "markdown"; const ledger = getLedger(); diff --git a/src/resources/extensions/gsd/index.ts b/src/resources/extensions/gsd/index.ts index 79b5bbc22..b337d141a 100644 --- a/src/resources/extensions/gsd/index.ts +++ b/src/resources/extensions/gsd/index.ts @@ -60,6 +60,7 @@ import { shortcutDesc } from "../shared/terminal.js"; import { Text } from "@gsd/pi-tui"; import { pauseAutoForProviderError } from "./provider-error-pause.js"; import { toPosixPath } from "../shared/path-display.js"; +import { isParallelActive, shutdownParallel } from "./parallel-orchestrator.js"; // ── Agent Instructions ──────────────────────────────────────────────────── // Lightweight "always follow" files injected into every GSD agent session. @@ -856,6 +857,12 @@ export default function (pi: ExtensionAPI) { // ── session_shutdown: save activity log on Ctrl+C / SIGTERM ───────────── pi.on("session_shutdown", async (_event, ctx: ExtensionContext) => { + if (isParallelActive()) { + try { + await shutdownParallel(process.cwd()); + } catch { /* best-effort */ } + } + if (!isAutoActive() && !isAutoPaused()) return; // Save the current session — the lock file stays on disk diff --git a/src/resources/extensions/gsd/observability-validator.ts b/src/resources/extensions/gsd/observability-validator.ts index 411cd89b8..99d109752 100644 --- a/src/resources/extensions/gsd/observability-validator.ts +++ b/src/resources/extensions/gsd/observability-validator.ts @@ -298,6 +298,27 @@ export function validateTaskSummaryContent(file: string, content: string): Valid }); } + const evidence = getSection(content, "Verification Evidence", 2); + if (!evidence) { + issues.push({ + severity: "warning", + scope: "task-summary", + file, + ruleId: "evidence_block_missing", + message: "Task summary is missing `## Verification Evidence`.", + suggestion: "Add a verification evidence table showing gate check results (command, exit code, verdict, duration).", + }); + } else if (sectionLooksPlaceholderOnly(evidence)) { + issues.push({ + severity: "warning", + scope: "task-summary", + file, + ruleId: "evidence_block_placeholder", + message: "Task summary verification evidence section still looks like placeholder text.", + suggestion: "Replace placeholders with actual gate results or note that no verification commands were discovered.", + }); + } + return issues; } diff --git a/src/resources/extensions/gsd/parallel-orchestrator.ts b/src/resources/extensions/gsd/parallel-orchestrator.ts index db5dfabff..340cfb990 100644 --- a/src/resources/extensions/gsd/parallel-orchestrator.ts +++ b/src/resources/extensions/gsd/parallel-orchestrator.ts @@ -8,7 +8,14 @@ */ import { spawn, type ChildProcess } from "node:child_process"; -import { existsSync } from "node:fs"; +import { + existsSync, + writeFileSync, + readFileSync, + renameSync, + unlinkSync, + mkdirSync, +} from "node:fs"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; import { gsdRoot } from "./paths.js"; @@ -58,6 +65,142 @@ export interface OrchestratorState { let state: OrchestratorState | null = null; +// ─── Persistence ────────────────────────────────────────────────────────── + +const ORCHESTRATOR_STATE_FILE = "orchestrator.json"; +const TMP_SUFFIX = ".tmp"; + +export interface PersistedState { + active: boolean; + workers: Array<{ + milestoneId: string; + title: string; + pid: number; + worktreePath: string; + startedAt: number; + state: "running" | "paused" | "stopped" | "error"; + completedUnits: number; + cost: number; + }>; + totalCost: number; + startedAt: number; + configSnapshot: { max_workers: number; budget_ceiling?: number }; +} + +function stateFilePath(basePath: string): string { + return join(gsdRoot(basePath), ORCHESTRATOR_STATE_FILE); +} + +/** + * Persist the current orchestrator state to .gsd/orchestrator.json. + * Uses atomic write (tmp + rename) to prevent partial reads. + */ +export function persistState(basePath: string): void { + if (!state) return; + try { + const dir = gsdRoot(basePath); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + + const persisted: PersistedState = { + active: state.active, + workers: [...state.workers.values()].map((w) => ({ + milestoneId: w.milestoneId, + title: w.title, + pid: w.pid, + worktreePath: w.worktreePath, + startedAt: w.startedAt, + state: w.state, + completedUnits: w.completedUnits, + cost: w.cost, + })), + totalCost: state.totalCost, + startedAt: state.startedAt, + configSnapshot: { + max_workers: state.config.max_workers, + budget_ceiling: state.config.budget_ceiling, + }, + }; + + const dest = stateFilePath(basePath); + const tmp = dest + TMP_SUFFIX; + writeFileSync(tmp, JSON.stringify(persisted, null, 2), "utf-8"); + renameSync(tmp, dest); + } catch { /* non-fatal */ } +} + +/** + * Remove the persisted state file. + */ +function removeStateFile(basePath: string): void { + try { + const p = stateFilePath(basePath); + if (existsSync(p)) unlinkSync(p); + } catch { /* non-fatal */ } +} + +function isPidAlive(pid: number): boolean { + if (!Number.isInteger(pid) || pid <= 0) return false; + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +/** + * Restore orchestrator state from .gsd/orchestrator.json. + * Checks PID liveness for each worker: + * - Living PID → state "running", process stays null (no handle) + * - Dead PID → removed from restored state + * Returns null if no state file exists or no workers survive. + */ +export function restoreState(basePath: string): PersistedState | null { + try { + const p = stateFilePath(basePath); + if (!existsSync(p)) return null; + const raw = readFileSync(p, "utf-8"); + const persisted = JSON.parse(raw) as PersistedState; + + // Filter to only workers with living PIDs + persisted.workers = persisted.workers.filter((w) => { + if (w.state === "stopped" || w.state === "error") return false; + return isPidAlive(w.pid); + }); + + if (persisted.workers.length === 0) { + // No surviving workers — clean up and return null + removeStateFile(basePath); + return null; + } + + return persisted; + } catch { + return null; + } +} + +async function waitForWorkerExit(worker: WorkerInfo, timeoutMs: number): Promise { + if (worker.process) { + await new Promise((resolve) => { + const done = () => resolve(); + const timer = setTimeout(done, timeoutMs); + worker.process!.once("exit", () => { + clearTimeout(timer); + resolve(); + }); + }); + return worker.process === null || !isPidAlive(worker.pid); + } + + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + if (!isPidAlive(worker.pid)) return true; + await new Promise((resolve) => setTimeout(resolve, 50)); + } + return !isPidAlive(worker.pid); +} + // ─── Accessors ───────────────────────────────────────────────────────────── /** Returns true if the orchestrator is active and has been initialized. */ @@ -81,12 +224,26 @@ export function getWorkerStatuses(): WorkerInfo[] { /** * Analyze eligibility and prepare for parallel start. * Returns the candidates report without actually starting workers. + * Also detects orphaned sessions from prior crashes. */ export async function prepareParallelStart( basePath: string, _prefs: GSDPreferences | undefined, -): Promise { - return analyzeParallelEligibility(basePath); +): Promise }> { + // Detect orphaned sessions before eligibility analysis + const sessions = readAllSessionStatuses(basePath); + const orphans: Array<{ milestoneId: string; pid: number; alive: boolean }> = []; + for (const session of sessions) { + const alive = isPidAlive(session.pid); + orphans.push({ milestoneId: session.milestoneId, pid: session.pid, alive }); + if (!alive) { + // Clean up dead session + removeSessionStatus(basePath, session.milestoneId); + } + } + + const candidates = await analyzeParallelEligibility(basePath); + return orphans.length > 0 ? { ...candidates, orphans } : candidates; } // ─── Start ───────────────────────────────────────────────────────────────── @@ -106,6 +263,36 @@ export async function startParallel( } const config = resolveParallelConfig(prefs); + + // Try to restore from a previous crash + const restored = restoreState(basePath); + if (restored && restored.workers.length > 0) { + // Adopt surviving workers instead of starting new ones + state = { + active: true, + workers: new Map(), + config, + totalCost: restored.totalCost, + startedAt: restored.startedAt, + }; + const adopted: string[] = []; + for (const w of restored.workers) { + state.workers.set(w.milestoneId, { + milestoneId: w.milestoneId, + title: w.title, + pid: w.pid, + process: null, // no handle for adopted workers + worktreePath: w.worktreePath, + startedAt: w.startedAt, + state: "running", + completedUnits: w.completedUnits, + cost: w.cost, + }); + adopted.push(w.milestoneId); + } + return { started: adopted, errors: [] }; + } + const now = Date.now(); // Initialize orchestrator state @@ -190,6 +377,9 @@ export async function startParallel( state.active = false; } + // Persist state for crash recovery + persistState(basePath); + return { started, errors }; } @@ -485,12 +675,24 @@ export async function stopParallel( try { if (worker.process) { worker.process.kill("SIGTERM"); - } else { + } else if (worker.pid !== process.pid) { process.kill(worker.pid, "SIGTERM"); } } catch { /* process may already be dead */ } } + const exitedAfterTerm = await waitForWorkerExit(worker, 750); + if (!exitedAfterTerm && worker.pid > 0) { + try { + if (worker.process) { + worker.process.kill("SIGKILL"); + } else if (worker.pid !== process.pid) { + process.kill(worker.pid, "SIGKILL"); + } + } catch { /* process may already be dead */ } + await waitForWorkerExit(worker, 250); + } + // Update in-memory state worker.state = "stopped"; worker.process = null; @@ -503,6 +705,15 @@ export async function stopParallel( if (!milestoneId) { state.active = false; } + + // Persist final state and clean up state file + removeStateFile(basePath); +} + +export async function shutdownParallel(basePath: string): Promise { + if (!state) return; + await stopParallel(basePath); + resetOrchestrator(); } // ─── Pause / Resume ──────────────────────────────────────────────────────── @@ -589,6 +800,9 @@ export function refreshWorkerStatuses(basePath: string): void { for (const worker of state.workers.values()) { state.totalCost += worker.cost; } + + // Persist updated state for crash recovery + persistState(basePath); } // ─── Budget ──────────────────────────────────────────────────────────────── diff --git a/src/resources/extensions/gsd/preferences.ts b/src/resources/extensions/gsd/preferences.ts index b2eafa576..762318493 100644 --- a/src/resources/extensions/gsd/preferences.ts +++ b/src/resources/extensions/gsd/preferences.ts @@ -75,7 +75,11 @@ const KNOWN_PREFERENCE_KEYS = new Set([ "token_profile", "phases", "auto_visualize", + "auto_report", "parallel", + "verification_commands", + "verification_auto_fix", + "verification_max_retries", ]); export interface GSDSkillRule { @@ -172,7 +176,12 @@ export interface GSDPreferences { token_profile?: TokenProfile; phases?: PhaseSkipPreferences; auto_visualize?: boolean; + /** Generate HTML report snapshot after each milestone completion. Default: true. Set false to disable. */ + auto_report?: boolean; parallel?: import("./types.js").ParallelConfig; + verification_commands?: string[]; + verification_auto_fix?: boolean; + verification_max_retries?: number; } export interface LoadedGSDPreferences { @@ -327,7 +336,7 @@ function resolveSkillReference(ref: string, cwd: string): SkillResolution { try { const entries = readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { - if (!entry.isDirectory()) continue; + if (!entry.isDirectory() && !entry.isSymbolicLink()) continue; if (entry.name === expanded) { const skillFile = join(dir, entry.name, "SKILL.md"); if (existsSync(skillFile)) { @@ -773,6 +782,9 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr parallel: (base.parallel || override.parallel) ? { ...(base.parallel ?? {}), ...(override.parallel ?? {}) } as import("./types.js").ParallelConfig : undefined, + verification_commands: mergeStringLists(base.verification_commands, override.verification_commands), + verification_auto_fix: override.verification_auto_fix ?? base.verification_auto_fix, + verification_max_retries: override.verification_max_retries ?? base.verification_max_retries, }; } @@ -1205,6 +1217,39 @@ export function validatePreferences(preferences: GSDPreferences): { } } + // ─── Verification Preferences ─────────────────────────────────────────── + if (preferences.verification_commands !== undefined) { + if (Array.isArray(preferences.verification_commands)) { + const allStrings = preferences.verification_commands.every( + (item: unknown) => typeof item === "string", + ); + if (allStrings) { + validated.verification_commands = preferences.verification_commands; + } else { + errors.push("verification_commands must be an array of strings"); + } + } else { + errors.push("verification_commands must be an array of strings"); + } + } + + if (preferences.verification_auto_fix !== undefined) { + if (typeof preferences.verification_auto_fix === "boolean") { + validated.verification_auto_fix = preferences.verification_auto_fix; + } else { + errors.push("verification_auto_fix must be a boolean"); + } + } + + if (preferences.verification_max_retries !== undefined) { + const raw = preferences.verification_max_retries; + if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) { + validated.verification_max_retries = Math.floor(raw); + } else { + errors.push("verification_max_retries must be a non-negative number"); + } + } + // ─── Git Preferences ─────────────────────────────────────────────────── if (preferences.git && typeof preferences.git === "object") { const git: Record = {}; diff --git a/src/resources/extensions/gsd/prompts/execute-task.md b/src/resources/extensions/gsd/prompts/execute-task.md index 452b5b735..e40940c55 100644 --- a/src/resources/extensions/gsd/prompts/execute-task.md +++ b/src/resources/extensions/gsd/prompts/execute-task.md @@ -38,15 +38,16 @@ Then: - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues 6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors) 7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary. -8. If the task touches UI, browser flows, DOM behavior, or user-visible web state: +8. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section. +9. If the task touches UI, browser flows, DOM behavior, or user-visible web state: - exercise the real flow in the browser - prefer `browser_batch` when the next few actions are obvious and sequential - prefer `browser_assert` for explicit pass/fail verification of the intended outcome - use `browser_diff` when an action's effect is ambiguous - use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI - record verification in terms of explicit checks passed/failed, not only prose interpretation -9. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section. -10. **If execution is running long or verification fails:** +10. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section. +11. **If execution is running long or verification fails:** **Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step. diff --git a/src/resources/extensions/gsd/prompts/system.md b/src/resources/extensions/gsd/prompts/system.md index 8d0a9775d..41d36f729 100644 --- a/src/resources/extensions/gsd/prompts/system.md +++ b/src/resources/extensions/gsd/prompts/system.md @@ -154,7 +154,7 @@ Templates showing the expected format for each artifact type are in: **External facts:** Use `search-the-web` + `fetch_page`, or `search_and_read` for one-call extraction. Use `freshness` for recency. Never state current facts from training data without verification. -**Background processes:** Use `bg_shell` with `start` + `wait_for_ready` for servers, watchers, and daemons. Never use `bash` with `&` or `nohup` to background a process — the `bash` tool waits for stdout to close, so backgrounded children that inherit the file descriptors cause it to hang indefinitely. Never poll with `sleep`/retry loops — `wait_for_ready` exists for this. For status checks, use `digest` (~30 tokens), not `output` (~2000 tokens). Use `highlights` (~100 tokens) when you need significant lines only. Use `output` only when actively debugging. +**Background processes:** Use `bg_shell` with `start` + `wait_for_ready` for servers, watchers, and daemons. Never use `bash` with `&` or `nohup` to background a process — the `bash` tool waits for stdout to close, so backgrounded children that inherit the file descriptors cause it to hang indefinitely. Never poll with `sleep`/retry loops — `wait_for_ready` exists for this. For status checks, use `digest` (~30 tokens), not `output` (~2000 tokens). Use `highlights` (~100 tokens) when you need significant lines only. Use `output` only when actively debugging. Background processes are session-scoped by default; set `persist_across_sessions:true` only when you intentionally need them to survive a fresh session. **One-shot commands:** Use `async_bash` for builds, tests, and installs. The result is pushed to you when the command exits — no polling needed. Use `await_job` to block on a specific job. diff --git a/src/resources/extensions/gsd/reports.ts b/src/resources/extensions/gsd/reports.ts new file mode 100644 index 000000000..c31d73bff --- /dev/null +++ b/src/resources/extensions/gsd/reports.ts @@ -0,0 +1,510 @@ +/** + * GSD Reports Registry + * + * Manages .gsd/reports/ — the persistent progression log of HTML snapshots. + * + * Layout: + * .gsd/reports/ + * reports.json lightweight metadata index (never re-parses HTML) + * index.html auto-regenerated on every new snapshot + * M001-20260101T120000.html per-milestone snapshot + * final-20260201T090000.html full-project final snapshot + * + * Auto-triggered: after each milestone completion (when auto_report: true). + * Manual: /gsd export --html + */ + +import { writeFileSync, readFileSync, mkdirSync, existsSync } from 'node:fs'; +import { join, basename } from 'node:path'; +import { gsdRoot } from './paths.js'; +import { formatCost, formatTokenCount } from './metrics.js'; +import { formatDuration } from './history.js'; + +// ─── Types ──────────────────────────────────────────────────────────────────── + +export interface ReportEntry { + /** Filename relative to the reports/ dir, e.g. "M001-20260101T120000.html" */ + filename: string; + /** ISO timestamp when this report was generated */ + generatedAt: string; + /** Milestone ID this snapshot covers, or "final" for a full-project snapshot */ + milestoneId: string | 'final'; + /** Milestone title at snapshot time */ + milestoneTitle: string; + /** Human-readable label shown in the index */ + label: string; + /** Snapshot kind */ + kind: 'milestone' | 'manual' | 'final'; + // Metrics at snapshot time — for the index progression view + totalCost: number; + totalTokens: number; + totalDuration: number; + doneSlices: number; + totalSlices: number; + doneMilestones: number; + totalMilestones: number; + phase: string; +} + +export interface ReportsIndex { + version: 1; + projectName: string; + projectPath: string; + gsdVersion: string; + entries: ReportEntry[]; +} + +// ─── Paths ──────────────────────────────────────────────────────────────────── + +export function reportsDir(basePath: string): string { + return join(gsdRoot(basePath), 'reports'); +} + +function reportsIndexPath(basePath: string): string { + return join(reportsDir(basePath), 'reports.json'); +} + +function reportsHtmlIndexPath(basePath: string): string { + return join(reportsDir(basePath), 'index.html'); +} + +// ─── Registry ───────────────────────────────────────────────────────────────── + +export function loadReportsIndex(basePath: string): ReportsIndex | null { + const p = reportsIndexPath(basePath); + if (!existsSync(p)) return null; + try { + return JSON.parse(readFileSync(p, 'utf-8')) as ReportsIndex; + } catch { + return null; + } +} + +function saveReportsIndex(basePath: string, index: ReportsIndex): void { + const dir = reportsDir(basePath); + mkdirSync(dir, { recursive: true }); + writeFileSync(reportsIndexPath(basePath), JSON.stringify(index, null, 2) + '\n', 'utf-8'); +} + +// ─── Write a report snapshot ────────────────────────────────────────────────── + +export interface WriteReportSnapshotArgs { + basePath: string; + html: string; + milestoneId: string | 'final'; + milestoneTitle: string; + kind: 'milestone' | 'manual' | 'final'; + projectName: string; + projectPath: string; + gsdVersion: string; + // metrics + totalCost: number; + totalTokens: number; + totalDuration: number; + doneSlices: number; + totalSlices: number; + doneMilestones: number; + totalMilestones: number; + phase: string; +} + +/** + * Write a report snapshot to .gsd/reports/, update reports.json, regenerate index.html. + * Returns the path of the written report file. + */ +export function writeReportSnapshot(args: WriteReportSnapshotArgs): string { + const dir = reportsDir(args.basePath); + mkdirSync(dir, { recursive: true }); + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const prefix = args.milestoneId === 'final' ? 'final' : args.milestoneId; + const filename = `${prefix}-${timestamp}.html`; + const filePath = join(dir, filename); + + writeFileSync(filePath, args.html, 'utf-8'); + + // Load or init registry + const existing = loadReportsIndex(args.basePath); + const index: ReportsIndex = existing ?? { + version: 1, + projectName: args.projectName, + projectPath: args.projectPath, + gsdVersion: args.gsdVersion, + entries: [], + }; + + // Keep metadata fresh + index.projectName = args.projectName; + index.projectPath = args.projectPath; + index.gsdVersion = args.gsdVersion; + + const label = args.milestoneId === 'final' + ? 'Final Report' + : `${args.milestoneId}: ${args.milestoneTitle}`; + + const entry: ReportEntry = { + filename, + generatedAt: new Date().toISOString(), + milestoneId: args.milestoneId, + milestoneTitle: args.milestoneTitle, + label, + kind: args.kind, + totalCost: args.totalCost, + totalTokens: args.totalTokens, + totalDuration: args.totalDuration, + doneSlices: args.doneSlices, + totalSlices: args.totalSlices, + doneMilestones: args.doneMilestones, + totalMilestones: args.totalMilestones, + phase: args.phase, + }; + + index.entries.push(entry); + saveReportsIndex(args.basePath, index); + regenerateHtmlIndex(args.basePath, index); + + return filePath; +} + +// ─── HTML Index Generator ───────────────────────────────────────────────────── + +export function regenerateHtmlIndex(basePath: string, index: ReportsIndex): void { + const html = buildIndexHtml(index); + writeFileSync(reportsHtmlIndexPath(basePath), html, 'utf-8'); +} + +function buildIndexHtml(index: ReportsIndex): string { + const { projectName, projectPath, gsdVersion, entries } = index; + const generated = new Date().toISOString(); + + // Sort oldest → newest for the progression timeline + const sorted = [...entries].sort( + (a, b) => new Date(a.generatedAt).getTime() - new Date(b.generatedAt).getTime() + ); + + const latestEntry = sorted[sorted.length - 1]; + const overallPct = latestEntry + ? (latestEntry.totalSlices > 0 + ? Math.round((latestEntry.doneSlices / latestEntry.totalSlices) * 100) + : 0) + : 0; + + // TOC: group by milestone + const milestoneGroups = new Map(); + for (const e of sorted) { + const key = e.milestoneId; + const arr = milestoneGroups.get(key) ?? []; + arr.push(e); + milestoneGroups.set(key, arr); + } + + const tocHtml = [...milestoneGroups.entries()].map(([mid, group]) => { + const links = group.map(e => + `
  • ${formatDateShort(e.generatedAt)} ${e.kind}
  • ` + ).join(''); + return ` +
    +
    ${esc(mid === 'final' ? 'Final' : mid)}
    +
      ${links}
    +
    `; + }).join(''); + + // Progression cards + const cardHtml = sorted.map((e, i) => { + const pct = e.totalSlices > 0 ? Math.round((e.doneSlices / e.totalSlices) * 100) : 0; + const isLatest = i === sorted.length - 1; + + // Delta vs previous + let deltaHtml = ''; + if (i > 0) { + const prev = sorted[i - 1]; + const dCost = e.totalCost - prev.totalCost; + const dSlices = e.doneSlices - prev.doneSlices; + const dMillestones = e.doneMilestones - prev.doneMilestones; + const parts: string[] = []; + if (dCost > 0) parts.push(`+${formatCost(dCost)}`); + if (dSlices > 0) parts.push(`+${dSlices} slice${dSlices !== 1 ? 's' : ''}`); + if (dMillestones > 0) parts.push(`+${dMillestones} milestone${dMillestones !== 1 ? 's' : ''}`); + if (parts.length > 0) { + deltaHtml = `
    ${parts.map(p => `${esc(p)}`).join('')}
    `; + } + } + + return ` + +
    + ${esc(e.label)} + ${e.kind} +
    +
    ${formatDateShort(e.generatedAt)}
    +
    +
    +
    +
    + ${pct}% +
    +
    + ${esc(formatCost(e.totalCost))} + ${esc(formatTokenCount(e.totalTokens))} + ${esc(formatDuration(e.totalDuration))} + ${e.doneSlices}/${e.totalSlices} slices +
    + ${deltaHtml} + ${isLatest ? '
    Latest
    ' : ''} +
    `; + }).join(''); + + // Cost progression mini-chart (inline SVG sparkline) + const sparklineSvg = sorted.length > 1 ? buildCostSparkline(sorted) : ''; + + // Summary of latest state + const summaryHtml = latestEntry ? ` +
    +
    ${formatCost(latestEntry.totalCost)}Total Cost
    +
    ${formatTokenCount(latestEntry.totalTokens)}Total Tokens
    +
    ${formatDuration(latestEntry.totalDuration)}Duration
    +
    ${latestEntry.doneSlices}/${latestEntry.totalSlices}Slices
    +
    ${latestEntry.doneMilestones}/${latestEntry.totalMilestones}Milestones
    +
    ${entries.length}Reports
    +
    +
    +
    + ${overallPct}% complete +
    ` : '

    No reports generated yet.

    '; + + return ` + + + + +GSD Reports — ${esc(projectName)} + + + +
    +
    +
    + + v${esc(gsdVersion)} +
    +
    +

    ${esc(projectName)} Reports

    + ${esc(projectPath)} +
    +
    + Updated + ${formatDateShort(generated)} +
    +
    +
    + +
    + + + + +
    +
    +

    Project Overview

    + ${summaryHtml} + ${sparklineSvg ? `

    Cost Progression

    ${sparklineSvg}
    ` : ''} +
    + +
    +

    Progression ${entries.length}

    + ${sorted.length > 0 + ? `
    ${cardHtml}
    ` + : '

    No reports generated yet. Run /gsd export --html or enable auto_report: true.

    '} +
    +
    +
    + +
    +
    + GSD v${esc(gsdVersion)} + + ${esc(projectName)} + + ${esc(projectPath)} + + Updated ${formatDateShort(generated)} +
    +
    + +`; +} + +// ─── Cost sparkline (inline SVG) ────────────────────────────────────────────── + +function buildCostSparkline(entries: ReportEntry[]): string { + const costs = entries.map(e => e.totalCost); + const maxCost = Math.max(...costs, 0.001); + const W = 600, H = 60, PAD = 12; + const xStep = entries.length > 1 ? (W - PAD * 2) / (entries.length - 1) : W - PAD * 2; + + const points = costs.map((c, i) => { + const x = PAD + i * xStep; + const y = PAD + (1 - c / maxCost) * (H - PAD * 2); + return `${x.toFixed(1)},${y.toFixed(1)}`; + }).join(' '); + + const dots = costs.map((c, i) => { + const x = PAD + i * xStep; + const y = PAD + (1 - c / maxCost) * (H - PAD * 2); + return ` + ${esc(entries[i].label)} — ${formatCost(c)} + `; + }).join(''); + + // Labels at start and end + const startLabel = formatCost(costs[0]); + const endLabel = formatCost(costs[costs.length - 1]); + + return ` +
    + + + ${dots} + ${esc(startLabel)} + ${esc(endLabel)} + +
    + ${entries.map((e, i) => { + const x = (PAD + i * xStep) / W * 100; + return `${esc(e.milestoneId === 'final' ? 'final' : e.milestoneId)}`; + }).join('')} +
    +
    `; +} + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function formatDateShort(iso: string): string { + try { + const d = new Date(iso); + return d.toLocaleString('en-US', { month: 'short', day: 'numeric', year: 'numeric', hour: '2-digit', minute: '2-digit' }); + } catch { return iso; } +} + +function esc(s: string | number | undefined | null): string { + if (s == null) return ''; + return String(s).replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"').replace(/'/g, '''); +} + +// ─── Index CSS ──────────────────────────────────────────────────────────────── + +const INDEX_CSS = ` +*,*::before,*::after{box-sizing:border-box;margin:0;padding:0} +:root{ + --bg-0:#0f1115;--bg-1:#16181d;--bg-2:#1e2028;--bg-3:#272a33; + --border-1:#2b2e38;--border-2:#3b3f4c; + --text-0:#ededef;--text-1:#a1a1aa;--text-2:#71717a; + --accent:#5e6ad2;--accent-subtle:rgba(94,106,210,.12); + --font:'Inter',-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif; + --mono:'JetBrains Mono','Fira Code',ui-monospace,monospace; +} +html{font-size:13px} +body{background:var(--bg-0);color:var(--text-0);font-family:var(--font);line-height:1.6;-webkit-font-smoothing:antialiased} +a{color:var(--accent);text-decoration:none} +a:hover{text-decoration:underline} +h2{font-size:14px;font-weight:600;text-transform:uppercase;letter-spacing:.5px;color:var(--text-1);margin-bottom:16px;padding-bottom:8px;border-bottom:1px solid var(--border-1)} +h3{font-size:13px;font-weight:600;color:var(--text-1);margin:16px 0 8px} +code{font-family:var(--mono);font-size:12px;background:var(--bg-3);padding:1px 5px;border-radius:3px} +.empty{color:var(--text-2);font-size:13px;padding:8px 0} +.count{font-size:11px;font-weight:500;color:var(--text-2);background:var(--bg-3);border-radius:3px;padding:1px 6px} + +/* Header */ +header{background:var(--bg-1);border-bottom:1px solid var(--border-1);padding:12px 32px;position:sticky;top:0;z-index:100} +.hdr-inner{display:flex;align-items:center;gap:16px;max-width:1280px;margin:0 auto} +.branding{display:flex;align-items:baseline;gap:6px;flex-shrink:0} +.logo{font-size:18px;font-weight:800;letter-spacing:-.5px;color:var(--text-0)} +.ver{font-size:10px;color:var(--text-2);font-family:var(--mono)} +.hdr-meta{flex:1;min-width:0} +.hdr-meta h1{font-size:15px;font-weight:600} +.hdr-subtitle{color:var(--text-2);font-weight:400;font-size:13px;margin-left:4px} +.hdr-path{font-size:11px;color:var(--text-2);font-family:var(--mono);display:block;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} +.hdr-right{text-align:right;flex-shrink:0} +.gen-lbl{font-size:10px;color:var(--text-2);text-transform:uppercase;letter-spacing:.5px;display:block} +.gen{font-size:11px;color:var(--text-1)} + +/* Layout */ +.layout{display:grid;grid-template-columns:200px 1fr;gap:0;max-width:1280px;margin:0 auto;min-height:calc(100vh - 120px)} + +/* Sidebar */ +.sidebar{background:var(--bg-1);border-right:1px solid var(--border-1);padding:20px 14px;position:sticky;top:52px;height:calc(100vh - 52px);overflow-y:auto} +.sidebar-title{font-size:10px;font-weight:600;color:var(--text-2);text-transform:uppercase;letter-spacing:.5px;margin-bottom:12px} +.toc-group{margin-bottom:14px} +.toc-group-label{font-size:11px;font-weight:600;color:var(--text-1);margin-bottom:3px;font-family:var(--mono)} +.toc-group ul{list-style:none;display:flex;flex-direction:column;gap:1px} +.toc-group li{display:flex;align-items:center;gap:6px} +.toc-group a{font-size:11px;color:var(--text-2);padding:2px 4px;border-radius:3px;flex:1;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} +.toc-group a:hover{background:var(--bg-2);color:var(--text-0);text-decoration:none} +.toc-kind{font-size:9px;color:var(--text-2);font-family:var(--mono);flex-shrink:0} + +/* Main */ +main{padding:28px;display:flex;flex-direction:column;gap:40px} + +/* Overview */ +.idx-summary{display:flex;flex-wrap:wrap;gap:1px;background:var(--border-1);border:1px solid var(--border-1);border-radius:4px;overflow:hidden;margin-bottom:16px} +.idx-stat{background:var(--bg-1);padding:10px 16px;display:flex;flex-direction:column;gap:2px;min-width:100px;flex:1} +.idx-val{font-size:18px;font-weight:600;color:var(--text-0);font-variant-numeric:tabular-nums} +.idx-lbl{font-size:10px;color:var(--text-2);text-transform:uppercase;letter-spacing:.4px} +.idx-progress{display:flex;align-items:center;gap:10px;margin-top:10px} +.idx-bar-track{flex:1;height:4px;background:var(--bg-3);border-radius:2px;overflow:hidden} +.idx-bar-fill{height:100%;background:var(--accent);border-radius:2px} +.idx-pct{font-size:12px;font-weight:600;color:var(--text-1);min-width:40px;text-align:right} + +/* Sparkline */ +.sparkline-wrap{margin-top:20px} +.sparkline{position:relative} +.spark-svg{display:block;background:var(--bg-1);border:1px solid var(--border-1);border-radius:4px;overflow:visible;max-width:100%} +.spark-line{stroke:var(--accent);stroke-width:1.5;fill:none} +.spark-dot{fill:var(--accent);stroke:var(--bg-1);stroke-width:2;cursor:pointer} +.spark-dot:hover{r:4;fill:var(--text-0)} +.spark-lbl{font-size:10px;fill:var(--text-2);font-family:var(--mono)} +.spark-axis{display:flex;position:relative;height:18px;margin-top:2px} +.spark-tick{position:absolute;transform:translateX(-50%);font-size:9px;color:var(--text-2);font-family:var(--mono);white-space:nowrap} + +/* Report cards */ +.cards-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(260px,1fr));gap:10px} +.report-card{ + display:flex;flex-direction:column;gap:6px; + background:var(--bg-1);border:1px solid var(--border-1);border-radius:4px; + padding:14px;text-decoration:none;color:var(--text-0); + transition:border-color .12s; +} +.report-card:hover{border-color:var(--accent);text-decoration:none} +.card-latest{border-color:var(--accent)} +.card-top{display:flex;align-items:center;gap:8px} +.card-label{flex:1;font-weight:500;font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} +.card-kind{font-size:10px;color:var(--text-2);font-family:var(--mono);flex-shrink:0} +.card-date{font-size:11px;color:var(--text-2)} +.card-progress{display:flex;align-items:center;gap:6px} +.card-bar-track{flex:1;height:3px;background:var(--bg-3);border-radius:2px;overflow:hidden} +.card-bar-fill{height:100%;background:var(--accent);border-radius:2px} +.card-pct{font-size:11px;color:var(--text-2);min-width:30px;text-align:right} +.card-stats{display:flex;gap:8px;flex-wrap:wrap} +.card-stats span{font-size:11px;color:var(--text-2);font-variant-numeric:tabular-nums} +.card-delta{display:flex;gap:4px;flex-wrap:wrap} +.card-delta span{font-size:10px;color:var(--text-1);font-family:var(--mono)} +.card-latest-badge{display:none} + +/* Footer */ +footer{border-top:1px solid var(--border-1);padding:16px 32px} +.ftr-inner{display:flex;align-items:center;gap:6px;justify-content:center;font-size:11px;color:var(--text-2)} +.ftr-sep{color:var(--border-2)} + +@media(max-width:768px){ + .layout{grid-template-columns:1fr} + .sidebar{position:static;height:auto;border-right:none;border-bottom:1px solid var(--border-1)} +} +@media print{ + .sidebar{display:none} + header{position:static} + body{background:#fff;color:#1a1a1a} + :root{--bg-0:#fff;--bg-1:#fafafa;--bg-2:#f5f5f5;--bg-3:#ebebeb;--border-1:#e5e5e5;--border-2:#d4d4d4;--text-0:#1a1a1a;--text-1:#525252;--text-2:#a3a3a3;--accent:#4f46e5} +} +`; diff --git a/src/resources/extensions/gsd/skills/gsd-headless/SKILL.md b/src/resources/extensions/gsd/skills/gsd-headless/SKILL.md new file mode 100644 index 000000000..cbb6ec23c --- /dev/null +++ b/src/resources/extensions/gsd/skills/gsd-headless/SKILL.md @@ -0,0 +1,178 @@ +--- +name: gsd-headless +description: Orchestrate GSD (Get Shit Done) projects programmatically via headless CLI. Use when an agent needs to create milestones from specs, execute software development workflows, monitor task progress, check project status, or control GSD execution (pause/stop/skip/steer). Triggers on requests to "run gsd", "create milestone", "execute project", "check gsd status", "orchestrate development", "run headless workflow", or any programmatic interaction with the GSD project management system. Essential for building orchestrators that coordinate multiple GSD workers. +--- + +# GSD Headless Orchestration + +Run GSD commands without TUI via `gsd headless`. Spawns an RPC child process, auto-responds to UI prompts, streams progress. + +## Command Syntax + +```bash +gsd headless [flags] [command] [args...] +``` + +**Flags:** `--timeout N` (ms, default 300000), `--json` (JSONL to stdout), `--model ID`, `--verbose` +**Exit codes:** 0=complete, 1=error/timeout, 2=blocked + +## Core Workflows + +### 1. Create + Execute a Milestone (end-to-end) + +```bash +gsd headless new-milestone --context spec.md --auto +``` + +Reads spec, bootstraps `.gsd/`, creates milestone, then chains into auto-mode executing all phases (discuss → research → plan → execute → summarize → complete). + +Extra flags for `new-milestone`: `--context ` (use `-` for stdin), `--context-text `, `--auto`. + +### 2. Run All Queued Work + +```bash +gsd headless auto +``` + +Default command. Loops through all pending units until milestone complete or blocked. + +### 3. Run One Unit + +```bash +gsd headless next +``` + +Execute exactly one unit (task/slice/milestone step), then exit. Ideal for step-by-step orchestration with external decision logic between steps. + +### 4. Check Status + +```bash +gsd headless --json status +``` + +Returns project state: active milestone/slice/task, phase, progress counts, blockers. Parse the JSONL output for machine-readable state. + +### 5. Dispatch Specific Phase + +```bash +gsd headless dispatch research|plan|execute|complete|reassess|uat|replan +``` + +Force-route to a specific phase, bypassing normal state-machine routing. + +## Orchestrator Patterns + +### Poll-and-React Loop + +```bash +# Check status, decide what to do +STATUS=$(gsd headless --json status 2>/dev/null) +EXIT=$? + +case $EXIT in + 0) echo "Complete" ;; + 2) echo "Blocked — needs intervention" ;; + *) echo "Error" ;; +esac +``` + +### Step-by-Step with Monitoring + +```bash +while true; do + gsd headless next + EXIT=$? + [ $EXIT -ne 0 ] && break + # Check progress, log, decide whether to continue + gsd headless --json status +done +``` + +### Multi-Session Orchestration + +GSD tracks concurrent workers via file-based IPC in `.gsd/parallel/`. See [references/multi-session.md](references/multi-session.md) for the full architecture. + +**Quick overview:** + +Each worker spawns with `GSD_MILESTONE_LOCK=M00X` + its own git worktree. Workers write heartbeats to `.gsd/parallel/.status.json`. The orchestrator enumerates all status files to get a dashboard of all workers, and sends commands via signal files. + +```bash +# Spawn a worker for milestone M001 in its worktree +GSD_MILESTONE_LOCK=M001 GSD_PARALLEL_WORKER=1 \ + gsd headless --json auto \ + --cwd .gsd/worktrees/M001 2>worker-M001.log & + +# Monitor all workers: read .gsd/parallel/*.status.json +for f in .gsd/parallel/*.status.json; do + jq '{mid: .milestoneId, state: .state, unit: .currentUnit.id, cost: .cost}' "$f" +done + +# Send pause signal to M001 +echo '{"signal":"pause","sentAt":'$(date +%s000)',"from":"coordinator"}' \ + > .gsd/parallel/M001.signal.json +``` + +**Status file fields:** `milestoneId`, `pid`, `state` (running/paused/stopped/error), `currentUnit`, `completedUnits`, `cost`, `lastHeartbeat`, `startedAt`, `worktreePath`. + +**Signal commands:** `pause`, `resume`, `stop`, `rebase`. + +**Liveness detection:** PID alive check (`kill -0 $pid`) + heartbeat freshness (30s timeout). Stale sessions are auto-cleaned. + +**For multiple projects:** each project has its own `.gsd/` directory. The orchestrator must track `(projectPath, milestoneId)` tuples externally. + +### JSONL Event Stream + +Use `--json` to get real-time events on stdout for downstream processing: + +```bash +gsd headless --json auto 2>/dev/null | while read -r line; do + TYPE=$(echo "$line" | jq -r '.type') + case "$TYPE" in + tool_execution_start) echo "Tool: $(echo "$line" | jq -r '.toolName')" ;; + extension_ui_request) echo "GSD: $(echo "$line" | jq -r '.message // .title // empty')" ;; + agent_end) echo "Session ended" ;; + esac +done +``` + +Event types: `agent_start`, `agent_end`, `tool_execution_start`, `tool_execution_end`, `extension_ui_request`, `message_update`, `error`. + +## Answer Injection + +Pre-supply answers for non-interactive runs. See [references/answer-injection.md](references/answer-injection.md) for schema and usage. + +## GSD Project Structure + +All state lives in `.gsd/` as markdown files (version-controllable): + +``` +.gsd/ + milestones/M001/ + M001-CONTEXT.md # Requirements, scope, decisions + M001-ROADMAP.md # Slices with tasks, dependencies, checkboxes + M001-SUMMARY.md # Completion summary + slices/S01/ + S01-PLAN.md # Task list + S01-SUMMARY.md # Slice summary with frontmatter + tasks/T01-PLAN.md # Individual task spec +``` + +State is derived from files on disk — checkboxes in ROADMAP.md are the source of truth for completion. + +## All Headless Commands + +Quick reference — see [references/commands.md](references/commands.md) for the complete list. + +| Command | Purpose | +|---------|---------| +| `auto` | Run all queued units (default) | +| `next` | Run one unit | +| `status` | Progress dashboard | +| `new-milestone` | Create milestone from spec | +| `queue` | Queue/reorder milestones | +| `history` | View execution history | +| `stop` / `pause` | Control auto-mode | +| `dispatch ` | Force specific phase | +| `skip` / `undo` | Unit control | +| `doctor` | Health check + auto-fix | +| `steer ` | Hard-steer plan mid-execution | diff --git a/src/resources/extensions/gsd/skills/gsd-headless/references/answer-injection.md b/src/resources/extensions/gsd/skills/gsd-headless/references/answer-injection.md new file mode 100644 index 000000000..ecf21f87f --- /dev/null +++ b/src/resources/extensions/gsd/skills/gsd-headless/references/answer-injection.md @@ -0,0 +1,54 @@ +# Answer Injection + +Pre-supply answers to eliminate interactive prompts during headless execution. + +## Answer File Schema + +```json +{ + "questions": { + "question_id": "selected_option_label", + "multi_select_question": ["option_a", "option_b"] + }, + "secrets": { + "API_KEY": "sk-...", + "DATABASE_URL": "postgres://..." + }, + "defaults": { + "strategy": "first_option" + } +} +``` + +### Fields + +- **questions**: Map question ID → answer. String for single-select, string[] for multi-select. +- **secrets**: Map env var name → value. Used for `secure_env_collect` tool calls. Values are never logged. +- **defaults.strategy**: Fallback for unmatched questions. + - `"first_option"` — auto-select first available option + - `"cancel"` — cancel the request + +## How It Works + +Two-phase correlation: +1. **Observe** `tool_execution_start` events for `ask_user_questions` — extracts question metadata (ID, options, allowMultiple) +2. **Match** subsequent `extension_ui_request` events to metadata, respond with pre-supplied answer + +Handles out-of-order events (extension_ui_request can arrive before tool_execution_start in RPC mode) via deferred processing queue. + +## Without Answer Injection + +Headless mode has built-in auto-responders: +- **select** → picks first option +- **confirm** → auto-confirms +- **input** → empty string +- **editor** → returns prefill or empty + +Answer injection overrides these defaults with specific answers when precision matters. + +## Diagnostics + +The injector tracks stats: +- `questionsAnswered` / `questionsDefaulted` +- `secretsProvided` / `secretsMissing` +- `fireAndForgetConsumed` / `confirmationsHandled` diff --git a/src/resources/extensions/gsd/skills/gsd-headless/references/commands.md b/src/resources/extensions/gsd/skills/gsd-headless/references/commands.md new file mode 100644 index 000000000..ac1bf4d00 --- /dev/null +++ b/src/resources/extensions/gsd/skills/gsd-headless/references/commands.md @@ -0,0 +1,59 @@ +# GSD Commands Reference + +All commands can be run via `gsd headless [command]`. + +## Workflow Commands + +| Command | Description | +|---------|-------------| +| `auto` | Autonomous mode — loop until milestone complete (default) | +| `next` | Step mode — execute one unit, then exit | +| `stop` | Stop auto-mode gracefully | +| `pause` | Pause auto-mode (preserves state, resumable) | +| `new-milestone` | Create milestone from specification (requires `--context`) | +| `dispatch ` | Force-dispatch: research, plan, execute, complete, reassess, uat, replan | + +## Status & Monitoring + +| Command | Description | +|---------|-------------| +| `status` | Progress dashboard (active unit, phase, blockers) | +| `visualize` | Workflow visualizer (deps, metrics, timeline) | +| `history` | Execution history (supports --cost, --phase, --model, limit) | + +## Unit Control + +| Command | Description | +|---------|-------------| +| `skip` | Prevent a unit from auto-mode dispatch | +| `undo` | Revert last completed unit (--force flag) | +| `steer ` | Hard-steer plan documents during execution | +| `queue` | Queue and reorder future milestones | +| `capture` | Fire-and-forget thought capture | +| `triage` | Manually trigger triage of pending captures | + +## Configuration & Health + +| Command | Description | +|---------|-------------| +| `prefs` | Manage preferences (global/project/status/wizard/setup) | +| `config` | Set API keys for external tools | +| `doctor` | Runtime health checks with auto-fix | +| `hooks` | Show configured post-unit and pre-dispatch hooks | +| `knowledge ` | Add persistent project knowledge | +| `cleanup` | Remove merged branches or snapshots | +| `export` | Export results (--json, --markdown) | +| `migrate` | Migrate v1 .planning directory to .gsd format | + +## Phases + +GSD workflows progress through these phases: +`pre-planning` → `needs-discussion` → `discussing` → `researching` → `planning` → `executing` → `verifying` → `summarizing` → `advancing` → `validating-milestone` → `completing-milestone` → `complete` + +Special phases: `paused`, `blocked`, `replanning-slice` + +## Hierarchy + +- **Milestone**: Shippable version (4-10 slices, 1-4 weeks) +- **Slice**: One demoable vertical capability (1-7 tasks, 1-3 days) +- **Task**: One context-window-sized unit of work (one session) diff --git a/src/resources/extensions/gsd/skills/gsd-headless/references/multi-session.md b/src/resources/extensions/gsd/skills/gsd-headless/references/multi-session.md new file mode 100644 index 000000000..ff24a9461 --- /dev/null +++ b/src/resources/extensions/gsd/skills/gsd-headless/references/multi-session.md @@ -0,0 +1,185 @@ +# Multi-Session Orchestration + +How to run and monitor multiple concurrent GSD sessions. + +## Architecture + +GSD uses **file-based IPC** — no sockets or ports. All coordination happens through JSON files in `.gsd/parallel/`. + +``` +.gsd/parallel/ +├── M001.status.json # Worker heartbeat + state +├── M001.signal.json # Coordinator → worker commands (ephemeral) +├── M002.status.json +├── M003.status.json +└── ... +``` + +## Worker Isolation + +Each worker gets: +1. **`GSD_MILESTONE_LOCK=M00X`** — state derivation only sees this milestone +2. **`GSD_PARALLEL_WORKER=1`** — prevents nested parallel spawns +3. **Own git worktree** at `.gsd/worktrees/M00X/` — branch `milestone/M00X` + +Workers cannot interfere with each other. Each has its own filesystem and git branch. + +## Status File Schema + +Written atomically (`.tmp` + rename) by each worker at `.gsd/parallel/.status.json`: + +```json +{ + "milestoneId": "M001", + "pid": 12345, + "state": "running", + "currentUnit": { + "type": "task", + "id": "T03", + "startedAt": 1710000000000 + }, + "completedUnits": 7, + "cost": 1.23, + "lastHeartbeat": 1710000015000, + "startedAt": 1710000000000, + "worktreePath": ".gsd/worktrees/M001" +} +``` + +**States:** `running`, `paused`, `stopped`, `error` + +## Signal Files + +Coordinator writes to `.gsd/parallel/.signal.json`. Worker consumes and deletes on next dispatch cycle. + +```json +{ + "signal": "pause", + "sentAt": 1710000020000, + "from": "coordinator" +} +``` + +**Signals:** `pause`, `resume`, `stop`, `rebase` + +## Spawning Workers + +```bash +# Spawn worker in its worktree +GSD_MILESTONE_LOCK=M001 \ +GSD_PARALLEL_WORKER=1 \ +GSD_BIN_PATH=$(which gsd) \ + gsd --mode json --print "/gsd auto" \ + 2>logs/M001.log & +WORKER_PID=$! +``` + +Workers emit NDJSON on stdout. Parse `message_end` events for cost tracking: +```bash +# Extract cost from worker output +gsd --mode json --print "/gsd auto" | while read -r line; do + COST=$(echo "$line" | jq -r 'select(.type=="message_end") | .message.usage.cost.total // empty') + [ -n "$COST" ] && echo "Cost update: $COST" +done +``` + +## Monitoring All Workers + +```bash +# Dashboard: enumerate all status files +for f in .gsd/parallel/*.status.json; do + [ -f "$f" ] || continue + jq -r '[.milestoneId, .state, (.currentUnit.id // "idle"), "\(.cost | tostring)$"] | join("\t")' "$f" +done + +# Liveness check +for f in .gsd/parallel/*.status.json; do + PID=$(jq -r '.pid' "$f") + MID=$(jq -r '.milestoneId' "$f") + if kill -0 "$PID" 2>/dev/null; then + echo "$MID: alive (pid=$PID)" + else + echo "$MID: DEAD (pid=$PID) — cleanup needed" + rm "$f" + fi +done +``` + +## Sending Commands + +```bash +# Pause a worker +send_signal() { + local MID=$1 SIGNAL=$2 + echo "{\"signal\":\"$SIGNAL\",\"sentAt\":$(date +%s000),\"from\":\"coordinator\"}" \ + > ".gsd/parallel/${MID}.signal.json" +} + +send_signal M001 pause +send_signal M002 stop +send_signal M003 resume +``` + +## Budget Enforcement + +Track aggregate cost across all workers: +```bash +TOTAL=$(jq -s 'map(.cost) | add // 0' .gsd/parallel/*.status.json) +CEILING=50.00 +if (( $(echo "$TOTAL > $CEILING" | bc -l) )); then + echo "Budget exceeded ($TOTAL > $CEILING) — stopping all" + for f in .gsd/parallel/*.status.json; do + MID=$(jq -r '.milestoneId' "$f") + send_signal "$MID" stop + done +fi +``` + +## Stale Session Cleanup + +A session is stale when: +- PID is dead (`kill -0 $pid` fails), OR +- `lastHeartbeat` is older than 30 seconds + +```bash +NOW=$(date +%s000) +STALE_THRESHOLD=30000 +for f in .gsd/parallel/*.status.json; do + PID=$(jq -r '.pid' "$f") + HB=$(jq -r '.lastHeartbeat' "$f") + AGE=$((NOW - HB)) + if ! kill -0 "$PID" 2>/dev/null || [ "$AGE" -gt "$STALE_THRESHOLD" ]; then + echo "Stale: $(jq -r '.milestoneId' "$f") — removing" + rm "$f" + fi +done +``` + +## Multi-Project Orchestration + +Within one project, milestones are tracked automatically in `.gsd/parallel/`. For orchestrating across **multiple projects**, maintain an external registry: + +```json +{ + "sessions": [ + { "project": "/path/to/project-a", "milestoneId": "M001" }, + { "project": "/path/to/project-b", "milestoneId": "M001" }, + { "project": "/path/to/project-b", "milestoneId": "M002" } + ] +} +``` + +Then poll each project's `.gsd/parallel/` directory. GSD has no cross-project awareness — the orchestrator must bridge this gap. + +## Built-in Parallel Commands + +Inside an interactive GSD session, these commands manage the parallel orchestrator: + +| Command | Description | +|---------|-------------| +| `/gsd parallel start` | Analyze eligibility, spawn workers | +| `/gsd parallel status` | Show all workers, costs, progress | +| `/gsd parallel stop [MID]` | Stop one or all workers | +| `/gsd parallel pause [MID]` | Pause without killing | +| `/gsd parallel resume [MID]` | Resume paused worker | +| `/gsd parallel merge [MID]` | Merge completed milestone branch | diff --git a/src/resources/extensions/gsd/templates/task-summary.md b/src/resources/extensions/gsd/templates/task-summary.md index 1f7f6c719..eda12a9df 100644 --- a/src/resources/extensions/gsd/templates/task-summary.md +++ b/src/resources/extensions/gsd/templates/task-summary.md @@ -37,6 +37,15 @@ blocker_discovered: false {{whatWasVerifiedAndHow — commands run, tests passed, behavior confirmed}} +## Verification Evidence + + + +| # | Command | Exit Code | Verdict | Duration | +|---|---------|-----------|---------|----------| +| {{row}} | {{command}} | {{exitCode}} | {{verdict}} | {{duration}} | + ## Diagnostics {{howToInspectWhatThisTaskBuiltLater — status surfaces, logs, error shapes, failure artifacts, or none}} diff --git a/src/resources/extensions/gsd/tests/continue-here.test.ts b/src/resources/extensions/gsd/tests/continue-here.test.ts index 6edcbfde1..eb31e084f 100644 --- a/src/resources/extensions/gsd/tests/continue-here.test.ts +++ b/src/resources/extensions/gsd/tests/continue-here.test.ts @@ -201,4 +201,85 @@ describe("continue-here", () => { } }); }); + + describe("context-pressure monitor integration", () => { + it("should fire wrap-up when context >= threshold and mark continueHereFired", async () => { + const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../unit-runtime.js"); + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "continue-here-monitor-")); + try { + // Simulate the monitor's one-shot logic: + // 1. Write initial runtime record (continueHereFired=false) + const startedAt = Date.now(); + writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, { + phase: "dispatched", + wrapupWarningSent: false, + }); + + const budget = computeBudgets(128_000); + const threshold = budget.continueThresholdPercent; + + // Simulate the monitor poll: context at 75% (above threshold) + const contextPercent = 75; + const runtime = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01"); + assert.ok(runtime, "runtime record should exist"); + assert.equal(runtime!.continueHereFired, false, "initially false"); + + // Check: should fire + const shouldFire = !runtime!.continueHereFired + && contextPercent >= threshold; + assert.ok(shouldFire, "should fire when context >= threshold and not yet fired"); + + // Mark as fired (what the monitor does) + writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, { + continueHereFired: true, + }); + + // Verify one-shot: second poll should NOT fire + const runtime2 = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01"); + assert.ok(runtime2, "runtime record should still exist"); + assert.equal(runtime2!.continueHereFired, true, "should be marked as fired"); + + const shouldFireAgain = !runtime2!.continueHereFired + && contextPercent >= threshold; + assert.equal(shouldFireAgain, false, "must not fire again — one-shot guard"); + + // Clean up + clearUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it("should not fire when context is below threshold", () => { + const budget = computeBudgets(200_000); + const threshold = budget.continueThresholdPercent; + + // Simulate monitor poll with context at 50% + const contextPercent = 50; + const continueHereFired = false; + const shouldFire = !continueHereFired && contextPercent >= threshold; + assert.equal(shouldFire, false, "50% should not trigger continue-here"); + }); + + it("should not fire when contextUsage is null/undefined", () => { + const budget = computeBudgets(128_000); + const threshold = budget.continueThresholdPercent; + + // Simulate the full guard chain from the monitor + const usageUndefined = undefined as { percent: number | null } | undefined; + const shouldFire1 = usageUndefined != null + && usageUndefined.percent != null + && usageUndefined.percent >= threshold; + assert.equal(shouldFire1, false, "undefined usage must not fire"); + + const usageNullPercent: { percent: number | null } = { percent: null }; + const shouldFire2 = usageNullPercent.percent != null + && usageNullPercent.percent >= threshold; + assert.equal(shouldFire2, false, "null percent must not fire"); + }); + }); }); diff --git a/src/resources/extensions/gsd/tests/dispatch-missing-task-plans.test.ts b/src/resources/extensions/gsd/tests/dispatch-missing-task-plans.test.ts new file mode 100644 index 000000000..1c92b64a0 --- /dev/null +++ b/src/resources/extensions/gsd/tests/dispatch-missing-task-plans.test.ts @@ -0,0 +1,132 @@ +/** + * Regression test for issue #909. + * + * When S##-PLAN.md exists (causing deriveState → phase:'executing') but the + * individual task plan files (tasks/T01-PLAN.md, etc.) are absent, the dispatch + * table must recover by re-running plan-slice — NOT hard-stop. + * + * Prior behaviour: action:"stop" → infinite loop on restart. + * Fixed behaviour: action:"dispatch" unitType:"plan-slice". + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { resolveDispatch } from "../auto-dispatch.ts"; +import type { DispatchContext } from "../auto-dispatch.ts"; +import type { GSDState } from "../types.ts"; + +function makeState(overrides: Partial = {}): GSDState { + return { + activeMilestone: { id: "M002", title: "Test Milestone" }, + activeSlice: { id: "S03", title: "Third Slice" }, + activeTask: { id: "T01", title: "First Task" }, + phase: "executing", + recentDecisions: [], + blockers: [], + nextAction: "", + registry: [], + ...overrides, + }; +} + +function makeContext(basePath: string, stateOverrides?: Partial): DispatchContext { + return { + basePath, + mid: "M002", + midTitle: "Test Milestone", + state: makeState(stateOverrides), + prefs: undefined, + }; +} + +// ─── Scaffold helpers ────────────────────────────────────────────────────── + +function scaffoldSlicePlan(basePath: string, mid: string, sid: string): void { + const dir = join(basePath, ".gsd", "milestones", mid, "slices", sid); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${sid}-PLAN.md`), [ + `# ${sid}: Third Slice`, + "", + "## Tasks", + "- [ ] **T01: Do something** `est:1h`", + "- [ ] **T02: Do another thing** `est:30m`", + "", + ].join("\n")); +} + +function scaffoldTaskPlan(basePath: string, mid: string, sid: string, tid: string): void { + const dir = join(basePath, ".gsd", "milestones", mid, "slices", sid, "tasks"); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${tid}-PLAN.md`), [ + `# ${tid}: Do something`, + "", + "## Steps", + "- [ ] Step 1", + "", + ].join("\n")); +} + +// ─── Tests ───────────────────────────────────────────────────────────────── + +test("dispatch: missing task plan triggers plan-slice (not stop) — issue #909", async () => { + const tmp = mkdtempSync(join(tmpdir(), "gsd-909-")); + try { + // Slice plan exists with tasks, but tasks/ directory is empty + scaffoldSlicePlan(tmp, "M002", "S03"); + + const ctx = makeContext(tmp); + const result = await resolveDispatch(ctx); + + assert.equal(result.action, "dispatch", "should dispatch, not stop"); + assert.ok(result.action === "dispatch" && result.unitType === "plan-slice", + `unitType should be plan-slice, got: ${result.action === "dispatch" ? result.unitType : "(stop)"}`); + assert.ok(result.action === "dispatch" && result.unitId === "M002/S03", + `unitId should be M002/S03, got: ${result.action === "dispatch" ? result.unitId : "(stop)"}`); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("dispatch: present task plan proceeds to execute-task normally", async () => { + const tmp = mkdtempSync(join(tmpdir(), "gsd-909-ok-")); + try { + scaffoldSlicePlan(tmp, "M002", "S03"); + scaffoldTaskPlan(tmp, "M002", "S03", "T01"); + + const ctx = makeContext(tmp); + const result = await resolveDispatch(ctx); + + assert.equal(result.action, "dispatch"); + assert.ok(result.action === "dispatch" && result.unitType === "execute-task", + `unitType should be execute-task, got: ${result.action === "dispatch" ? result.unitType : "(stop)"}`); + assert.ok(result.action === "dispatch" && result.unitId === "M002/S03/T01", + `unitId should be M002/S03/T01, got: ${result.action === "dispatch" ? result.unitId : "(stop)"}`); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("dispatch: plan-slice recovery loop — second call after plan-slice still recovers cleanly", async () => { + // Simulate: plan-slice ran but T01-PLAN.md is still missing (e.g. agent crashed mid-write). + // Dispatch should still re-dispatch plan-slice, not hard-stop. + const tmp = mkdtempSync(join(tmpdir(), "gsd-909-loop-")); + try { + scaffoldSlicePlan(tmp, "M002", "S03"); + + const ctx = makeContext(tmp); + const r1 = await resolveDispatch(ctx); + assert.equal(r1.action, "dispatch"); + assert.ok(r1.action === "dispatch" && r1.unitType === "plan-slice"); + + // Still no task plan written — dispatch again + const r2 = await resolveDispatch(ctx); + assert.equal(r2.action, "dispatch"); + assert.ok(r2.action === "dispatch" && r2.unitType === "plan-slice", + "should keep dispatching plan-slice until task plans appear"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/doctor-proactive.test.ts b/src/resources/extensions/gsd/tests/doctor-proactive.test.ts index 4e4d86bb8..0bbbf2a83 100644 --- a/src/resources/extensions/gsd/tests/doctor-proactive.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-proactive.test.ts @@ -193,6 +193,20 @@ async function main(): Promise { assertEq(result.issues.length, 0, "no issues on clean state"); } + console.log("\n=== health gate: missing STATE.md does NOT block dispatch (#889) ==="); + { + const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-"))); + cleanups.push(dir); + // Create milestones dir but no STATE.md — mimics fresh worktree + mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true }); + writeFileSync(join(dir, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# Roadmap\n"); + + const result = await preDispatchHealthGate(dir); + assertTrue(result.proceed, "gate must NOT block when STATE.md is missing (deadlock #889)"); + assertEq(result.issues.length, 0, "missing STATE.md is not a blocking issue"); + assertTrue(result.fixesApplied.some((f: string) => f.includes("STATE.md")), "reports STATE.md status as info"); + } + console.log("\n=== health gate: stale crash lock auto-cleared ==="); { const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-"))); diff --git a/src/resources/extensions/gsd/tests/parallel-crash-recovery.test.ts b/src/resources/extensions/gsd/tests/parallel-crash-recovery.test.ts new file mode 100644 index 000000000..9e38c7262 --- /dev/null +++ b/src/resources/extensions/gsd/tests/parallel-crash-recovery.test.ts @@ -0,0 +1,298 @@ +/** + * Tests for parallel orchestrator crash recovery. + * + * Validates that orchestrator state is persisted to disk and can be + * restored after a coordinator crash, with PID liveness filtering. + */ + +import { + mkdtempSync, + mkdirSync, + readFileSync, + writeFileSync, + existsSync, + rmSync, +} from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { + persistState, + restoreState, + resetOrchestrator, + getOrchestratorState, + type PersistedState, +} from "../parallel-orchestrator.ts"; +import { writeSessionStatus, readAllSessionStatuses, removeSessionStatus } from "../session-status-io.ts"; +import { createTestContext } from './test-helpers.ts'; + +const { assertEq, assertTrue, report } = createTestContext(); + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function makeTempDir(): string { + const dir = mkdtempSync(join(tmpdir(), "gsd-crash-recovery-")); + mkdirSync(join(dir, ".gsd"), { recursive: true }); + return dir; +} + +function stateFilePath(basePath: string): string { + return join(basePath, ".gsd", "orchestrator.json"); +} + +function writeStateFile(basePath: string, state: PersistedState): void { + writeFileSync(stateFilePath(basePath), JSON.stringify(state, null, 2), "utf-8"); +} + +function makePersistedState(overrides: Partial = {}): PersistedState { + return { + active: true, + workers: [], + totalCost: 0, + startedAt: Date.now(), + configSnapshot: { max_workers: 3 }, + ...overrides, + }; +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +// Test 1: persistState writes valid JSON +{ + const basePath = makeTempDir(); + try { + // We can't call persistState directly without internal state set up, + // so we test the round-trip by writing a state file and reading it back + const state = makePersistedState({ + workers: [ + { + milestoneId: "M001", + title: "M001", + pid: process.pid, + worktreePath: "/tmp/wt-M001", + startedAt: Date.now(), + state: "running", + completedUnits: 3, + cost: 0.15, + }, + ], + totalCost: 0.15, + }); + writeStateFile(basePath, state); + + const raw = readFileSync(stateFilePath(basePath), "utf-8"); + const parsed = JSON.parse(raw) as PersistedState; + assertEq(parsed.active, true, "persistState: active field preserved"); + assertEq(parsed.workers.length, 1, "persistState: worker count preserved"); + assertEq(parsed.workers[0].milestoneId, "M001", "persistState: milestoneId preserved"); + assertEq(parsed.workers[0].cost, 0.15, "persistState: cost preserved"); + assertEq(parsed.totalCost, 0.15, "persistState: totalCost preserved"); + } finally { + rmSync(basePath, { recursive: true, force: true }); + } +} + +// Test 2: restoreState returns null for missing file +{ + const basePath = makeTempDir(); + try { + const result = restoreState(basePath); + assertEq(result, null, "restoreState: returns null when no state file"); + } finally { + rmSync(basePath, { recursive: true, force: true }); + } +} + +// Test 3: restoreState filters dead PIDs +{ + const basePath = makeTempDir(); + try { + // PID 99999999 is almost certainly not alive + const state = makePersistedState({ + workers: [ + { + milestoneId: "M001", + title: "M001", + pid: 99999999, + worktreePath: "/tmp/wt-M001", + startedAt: Date.now(), + state: "running", + completedUnits: 0, + cost: 0, + }, + { + milestoneId: "M002", + title: "M002", + pid: 99999998, + worktreePath: "/tmp/wt-M002", + startedAt: Date.now(), + state: "running", + completedUnits: 0, + cost: 0, + }, + ], + }); + writeStateFile(basePath, state); + + const result = restoreState(basePath); + // Both PIDs are dead, so result should be null and file should be cleaned up + assertEq(result, null, "restoreState: returns null when all PIDs dead"); + assertTrue(!existsSync(stateFilePath(basePath)), "restoreState: cleans up state file when all dead"); + } finally { + rmSync(basePath, { recursive: true, force: true }); + } +} + +// Test 4: restoreState keeps alive PIDs +{ + const basePath = makeTempDir(); + try { + // Use current process PID (definitely alive) + const state = makePersistedState({ + workers: [ + { + milestoneId: "M001", + title: "M001", + pid: process.pid, + worktreePath: "/tmp/wt-M001", + startedAt: Date.now(), + state: "running", + completedUnits: 5, + cost: 0.25, + }, + { + milestoneId: "M002", + title: "M002", + pid: 99999999, // dead + worktreePath: "/tmp/wt-M002", + startedAt: Date.now(), + state: "running", + completedUnits: 0, + cost: 0, + }, + ], + totalCost: 0.25, + }); + writeStateFile(basePath, state); + + const result = restoreState(basePath); + assertTrue(result !== null, "restoreState: returns state when alive PID exists"); + assertEq(result!.workers.length, 1, "restoreState: filters out dead PID"); + assertEq(result!.workers[0].milestoneId, "M001", "restoreState: keeps alive worker"); + assertEq(result!.workers[0].pid, process.pid, "restoreState: preserves PID"); + assertEq(result!.workers[0].completedUnits, 5, "restoreState: preserves progress"); + } finally { + rmSync(basePath, { recursive: true, force: true }); + } +} + +// Test 5: restoreState skips stopped/error workers even with alive PIDs +{ + const basePath = makeTempDir(); + try { + const state = makePersistedState({ + workers: [ + { + milestoneId: "M001", + title: "M001", + pid: process.pid, + worktreePath: "/tmp/wt-M001", + startedAt: Date.now(), + state: "stopped", + completedUnits: 10, + cost: 0.50, + }, + ], + }); + writeStateFile(basePath, state); + + const result = restoreState(basePath); + assertEq(result, null, "restoreState: skips stopped workers"); + } finally { + rmSync(basePath, { recursive: true, force: true }); + } +} + +// Test 6: orphan detection finds stale sessions +{ + const basePath = makeTempDir(); + try { + // Write a session status with a dead PID + mkdirSync(join(basePath, ".gsd", "parallel"), { recursive: true }); + writeSessionStatus(basePath, { + milestoneId: "M001", + pid: 99999999, + state: "running", + currentUnit: null, + completedUnits: 3, + cost: 0.10, + lastHeartbeat: Date.now(), + startedAt: Date.now(), + worktreePath: "/tmp/wt-M001", + }); + + // Write a session status with alive PID + writeSessionStatus(basePath, { + milestoneId: "M002", + pid: process.pid, + state: "running", + currentUnit: null, + completedUnits: 1, + cost: 0.05, + lastHeartbeat: Date.now(), + startedAt: Date.now(), + worktreePath: "/tmp/wt-M002", + }); + + // Read all sessions — both should exist initially + const before = readAllSessionStatuses(basePath); + assertEq(before.length, 2, "orphan: both sessions exist before detection"); + + // Now simulate orphan detection logic (same as prepareParallelStart) + const sessions = readAllSessionStatuses(basePath); + const orphans: Array<{ milestoneId: string; pid: number; alive: boolean }> = []; + for (const session of sessions) { + let alive: boolean; + try { + process.kill(session.pid, 0); + alive = true; + } catch { + alive = false; + } + orphans.push({ milestoneId: session.milestoneId, pid: session.pid, alive }); + if (!alive) { + removeSessionStatus(basePath, session.milestoneId); + } + } + + assertTrue(orphans.length === 2, "orphan: detected both sessions"); + const deadOrphan = orphans.find(o => o.milestoneId === "M001"); + assertTrue(deadOrphan !== undefined && !deadOrphan.alive, "orphan: M001 detected as dead"); + const aliveOrphan = orphans.find(o => o.milestoneId === "M002"); + assertTrue(aliveOrphan !== undefined && aliveOrphan.alive, "orphan: M002 detected as alive"); + + // Dead session should be cleaned up + const after = readAllSessionStatuses(basePath); + assertEq(after.length, 1, "orphan: dead session cleaned up"); + assertEq(after[0].milestoneId, "M002", "orphan: alive session remains"); + } finally { + rmSync(basePath, { recursive: true, force: true }); + } +} + +// Test 7: restoreState handles corrupt JSON gracefully +{ + const basePath = makeTempDir(); + try { + writeFileSync(stateFilePath(basePath), "{ not valid json !!!", "utf-8"); + const result = restoreState(basePath); + assertEq(result, null, "restoreState: returns null for corrupt JSON"); + } finally { + rmSync(basePath, { recursive: true, force: true }); + } +} + +// Clean up module state +resetOrchestrator(); + +report(); diff --git a/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts b/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts index 7cf7b80be..9be5bbe48 100644 --- a/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts +++ b/src/resources/extensions/gsd/tests/parallel-orchestration.test.ts @@ -35,6 +35,7 @@ import { getWorkerStatuses, startParallel, stopParallel, + shutdownParallel, pauseWorker, resumeWorker, getAggregateCost, @@ -338,6 +339,14 @@ describe("parallel-orchestrator: lifecycle", () => { assert.ok(signal); assert.equal(signal.signal, "pause"); }); + + it("shutdownParallel deactivates the orchestrator state", async () => { + await startParallel(base, ["M001"], undefined); + assert.equal(isParallelActive(), true); + await shutdownParallel(base); + assert.equal(isParallelActive(), false); + assert.equal(getOrchestratorState(), null); + }); }); describe("parallel-orchestrator: budget", () => { diff --git a/src/resources/extensions/gsd/tests/plan-slice-prompt.test.ts b/src/resources/extensions/gsd/tests/plan-slice-prompt.test.ts new file mode 100644 index 000000000..9cdb0dbd9 --- /dev/null +++ b/src/resources/extensions/gsd/tests/plan-slice-prompt.test.ts @@ -0,0 +1,71 @@ +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; +import { createTestContext } from './test-helpers.ts'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const worktreePromptsDir = join(__dirname, "..", "prompts"); + +const { assertTrue, report } = createTestContext(); + +function loadPromptFromWorktree(name: string, vars: Record = {}): string { + const path = join(worktreePromptsDir, `${name}.md`); + let content = readFileSync(path, "utf-8"); + for (const [key, value] of Object.entries(vars)) { + content = content.replaceAll(`{{${key}}}`, value); + } + return content.trim(); +} + +const BASE_VARS = { + workingDirectory: "/tmp/test-project", + milestoneId: "M001", + sliceId: "S01", + sliceTitle: "Test Slice", + slicePath: ".gsd/milestones/M001/slices/S01", + roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md", + researchPath: ".gsd/milestones/M001/slices/S01/S01-RESEARCH.md", + outputPath: "/tmp/test-project/.gsd/milestones/M001/slices/S01/S01-PLAN.md", + inlinedContext: "--- test inlined context ---", + dependencySummaries: "", + executorContextConstraints: "", +}; + +async function main(): Promise { + + // ─── commit_docs=true (default): commit step is present ───────────────── + console.log("\n=== plan-slice prompt: commit_docs default (true) ==="); + { + const commitInstruction = `Commit: \`docs(S01): add slice plan\``; + const result = loadPromptFromWorktree("plan-slice", { ...BASE_VARS, commitInstruction }); + + assertTrue(result.includes("docs(S01): add slice plan"), "commit step present when commit_docs is not false"); + assertTrue(result.includes("Update `.gsd/STATE.md`"), "STATE.md update step present"); + assertTrue(!result.includes("{{commitInstruction}}"), "no unresolved placeholder"); + } + + // ─── commit_docs=false: no commit step, only STATE.md update ──────────── + console.log("\n=== plan-slice prompt: commit_docs=false ==="); + { + const commitInstruction = "Do not commit — planning docs are not tracked in git for this project."; + const result = loadPromptFromWorktree("plan-slice", { ...BASE_VARS, commitInstruction }); + + assertTrue(!result.includes("docs(S01): add slice plan"), "commit step absent when commit_docs=false"); + assertTrue(result.includes("Do not commit"), "no-commit instruction present"); + assertTrue(result.includes("Update `.gsd/STATE.md`"), "STATE.md update step still present"); + assertTrue(!result.includes("{{commitInstruction}}"), "no unresolved placeholder"); + } + + // ─── all base variables are substituted ───────────────────────────────── + console.log("\n=== plan-slice prompt: all variables substituted ==="); + { + const commitInstruction = `Commit: \`docs(S01): add slice plan\``; + const result = loadPromptFromWorktree("plan-slice", { ...BASE_VARS, commitInstruction }); + + assertTrue(!result.includes("{{"), "no unresolved placeholders remain"); + assertTrue(result.includes("M001"), "milestoneId substituted"); + assertTrue(result.includes("S01"), "sliceId substituted"); + } +} + +main().then(report); diff --git a/src/resources/extensions/gsd/tests/replan-slice.test.ts b/src/resources/extensions/gsd/tests/replan-slice.test.ts index d682a2b20..9d98afed0 100644 --- a/src/resources/extensions/gsd/tests/replan-slice.test.ts +++ b/src/resources/extensions/gsd/tests/replan-slice.test.ts @@ -493,4 +493,45 @@ console.log('\n=== doctor: no blocker → no blocker_discovered_no_replan issue rmSync(base, { recursive: true, force: true }); } +// ═══════════════════════════════════════════════════════════════════════════ +// Artifact Resolution: resolveExpectedArtifactPath for replan-slice (#858) +// ═══════════════════════════════════════════════════════════════════════════ + +import { resolveExpectedArtifactPath, verifyExpectedArtifact } from '../auto-recovery.ts'; + +console.log('\n=== artifact: resolveExpectedArtifactPath returns REPLAN.md path for replan-slice ==='); +{ + const base = createFixtureBase(); + writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE); + writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending()); + + const path = resolveExpectedArtifactPath('replan-slice', 'M001/S01', base); + assertTrue(path !== null, 'resolveExpectedArtifactPath returns non-null for replan-slice'); + assertTrue(path!.endsWith('S01-REPLAN.md'), 'path ends with S01-REPLAN.md'); + rmSync(base, { recursive: true, force: true }); +} + +console.log('\n=== artifact: verifyExpectedArtifact fails when REPLAN.md missing (#858) ==='); +{ + const base = createFixtureBase(); + writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE); + writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending()); + + const result = verifyExpectedArtifact('replan-slice', 'M001/S01', base); + assertEq(result, false, 'verifyExpectedArtifact returns false when REPLAN.md is missing'); + rmSync(base, { recursive: true, force: true }); +} + +console.log('\n=== artifact: verifyExpectedArtifact passes when REPLAN.md exists (#858) ==='); +{ + const base = createFixtureBase(); + writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE); + writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending()); + writeReplanFile(base, 'M001', 'S01', '# Replan\n\nBlocker addressed.'); + + const result = verifyExpectedArtifact('replan-slice', 'M001/S01', base); + assertEq(result, true, 'verifyExpectedArtifact returns true when REPLAN.md exists'); + rmSync(base, { recursive: true, force: true }); +} + report(); diff --git a/src/resources/extensions/gsd/tests/verification-evidence.test.ts b/src/resources/extensions/gsd/tests/verification-evidence.test.ts new file mode 100644 index 000000000..a02590a85 --- /dev/null +++ b/src/resources/extensions/gsd/tests/verification-evidence.test.ts @@ -0,0 +1,743 @@ +/** + * Unit tests for the verification evidence module — JSON persistence and markdown table formatting. + * + * Tests cover: + * 1. writeVerificationJSON writes correct JSON shape (schemaVersion, taskId, timestamp, passed, discoverySource, checks) + * 2. writeVerificationJSON creates directory if it doesn't exist + * 3. writeVerificationJSON maps exitCode to verdict correctly (0 = pass, non-zero = fail) + * 4. writeVerificationJSON excludes stdout/stderr from output + * 5. writeVerificationJSON handles empty checks array + * 6. writeVerificationJSON accepts optional unitId + * 7. formatEvidenceTable returns markdown table with correct columns for checks + * 8. formatEvidenceTable returns "no checks" message for empty checks + * 9. formatEvidenceTable formats duration as seconds with 1 decimal + * 10. formatEvidenceTable uses ✅/❌ emoji for pass/fail verdict + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, readFileSync, rmSync, existsSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { + writeVerificationJSON, + formatEvidenceTable, +} from "../verification-evidence.ts"; +import type { VerificationResult } from "../types.ts"; + +function makeTempDir(prefix: string): string { + const dir = join( + tmpdir(), + `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + mkdirSync(dir, { recursive: true }); + return dir; +} + +function makeResult(overrides?: Partial): VerificationResult { + return { + passed: true, + checks: [], + discoverySource: "package-json", + timestamp: 1710000000000, + ...overrides, + }; +} + +// ─── writeVerificationJSON Tests ───────────────────────────────────────────── + +test("verification-evidence: writeVerificationJSON writes correct JSON shape", () => { + const tmp = makeTempDir("ve-shape"); + try { + const result = makeResult({ + passed: true, + checks: [ + { + command: "npm run typecheck", + exitCode: 0, + stdout: "all good", + stderr: "", + durationMs: 2340, + }, + ], + }); + + writeVerificationJSON(result, tmp, "T03"); + + const filePath = join(tmp, "T03-VERIFY.json"); + assert.ok(existsSync(filePath), "JSON file should exist"); + + const json = JSON.parse(readFileSync(filePath, "utf-8")); + assert.equal(json.schemaVersion, 1); + assert.equal(json.taskId, "T03"); + assert.equal(json.unitId, "T03"); // defaults to taskId when unitId not provided + assert.equal(json.timestamp, 1710000000000); + assert.equal(json.passed, true); + assert.equal(json.discoverySource, "package-json"); + assert.equal(json.checks.length, 1); + assert.equal(json.checks[0].command, "npm run typecheck"); + assert.equal(json.checks[0].exitCode, 0); + assert.equal(json.checks[0].durationMs, 2340); + assert.equal(json.checks[0].verdict, "pass"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON creates directory if it doesn't exist", () => { + const tmp = makeTempDir("ve-mkdir"); + const nested = join(tmp, "deep", "nested", "tasks"); + try { + assert.ok(!existsSync(nested), "directory should not exist yet"); + + writeVerificationJSON(makeResult(), nested, "T01"); + + assert.ok(existsSync(nested), "directory should be created"); + assert.ok(existsSync(join(nested, "T01-VERIFY.json")), "JSON file should exist"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON maps exitCode to verdict correctly", () => { + const tmp = makeTempDir("ve-verdict"); + try { + const result = makeResult({ + passed: false, + checks: [ + { command: "lint", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, + { command: "test", exitCode: 1, stdout: "", stderr: "fail", durationMs: 200 }, + { command: "audit", exitCode: 2, stdout: "", stderr: "err", durationMs: 300 }, + ], + }); + + writeVerificationJSON(result, tmp, "T02"); + + const json = JSON.parse(readFileSync(join(tmp, "T02-VERIFY.json"), "utf-8")); + assert.equal(json.checks[0].verdict, "pass"); + assert.equal(json.checks[1].verdict, "fail"); + assert.equal(json.checks[2].verdict, "fail"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON excludes stdout/stderr from output", () => { + const tmp = makeTempDir("ve-no-stdio"); + try { + const result = makeResult({ + checks: [ + { + command: "echo hello", + exitCode: 0, + stdout: "hello\n", + stderr: "some warning", + durationMs: 50, + }, + ], + }); + + writeVerificationJSON(result, tmp, "T01"); + + const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"); + assert.ok(!raw.includes('"stdout"'), "JSON should not contain stdout key"); + assert.ok(!raw.includes('"stderr"'), "JSON should not contain stderr key"); + assert.ok(!raw.includes("hello\\n"), "JSON should not contain stdout value"); + assert.ok(!raw.includes("some warning"), "JSON should not contain stderr value"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON handles empty checks array", () => { + const tmp = makeTempDir("ve-empty"); + try { + writeVerificationJSON(makeResult({ checks: [] }), tmp, "T01"); + + const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8")); + assert.equal(json.schemaVersion, 1); + assert.equal(json.passed, true); + assert.deepStrictEqual(json.checks, []); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON uses optional unitId when provided", () => { + const tmp = makeTempDir("ve-unitid"); + try { + writeVerificationJSON(makeResult(), tmp, "T03", "M001/S01/T03"); + + const json = JSON.parse(readFileSync(join(tmp, "T03-VERIFY.json"), "utf-8")); + assert.equal(json.taskId, "T03"); + assert.equal(json.unitId, "M001/S01/T03"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +// ─── formatEvidenceTable Tests ─────────────────────────────────────────────── + +test("verification-evidence: formatEvidenceTable returns markdown table with correct columns", () => { + const result = makeResult({ + checks: [ + { command: "npm run typecheck", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 }, + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "err", durationMs: 1100 }, + ], + }); + + const table = formatEvidenceTable(result); + const lines = table.split("\n"); + + // Header row + assert.ok(lines[0].includes("# |"), "header should have # column"); + assert.ok(lines[0].includes("Command"), "header should have Command column"); + assert.ok(lines[0].includes("Exit Code"), "header should have Exit Code column"); + assert.ok(lines[0].includes("Verdict"), "header should have Verdict column"); + assert.ok(lines[0].includes("Duration"), "header should have Duration column"); + + // Separator row + assert.ok(lines[1].includes("---|"), "should have separator row"); + + // Data rows + assert.equal(lines.length, 4, "header + separator + 2 data rows"); + assert.ok(lines[2].includes("npm run typecheck"), "first row command"); + assert.ok(lines[3].includes("npm run lint"), "second row command"); +}); + +test("verification-evidence: formatEvidenceTable returns no-checks message for empty checks", () => { + const result = makeResult({ checks: [] }); + const output = formatEvidenceTable(result); + assert.equal(output, "_No verification checks discovered._"); +}); + +test("verification-evidence: formatEvidenceTable formats duration as seconds with 1 decimal", () => { + const result = makeResult({ + checks: [ + { command: "fast", exitCode: 0, stdout: "", stderr: "", durationMs: 150 }, + { command: "slow", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 }, + { command: "zero", exitCode: 0, stdout: "", stderr: "", durationMs: 0 }, + ], + }); + + const table = formatEvidenceTable(result); + assert.ok(table.includes("0.1s"), "150ms → 0.1s"); + assert.ok(table.includes("2.3s"), "2340ms → 2.3s"); + assert.ok(table.includes("0.0s"), "0ms → 0.0s"); +}); + +test("verification-evidence: formatEvidenceTable uses ✅/❌ emoji for pass/fail verdict", () => { + const result = makeResult({ + passed: false, + checks: [ + { command: "pass-cmd", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, + { command: "fail-cmd", exitCode: 1, stdout: "", stderr: "", durationMs: 200 }, + ], + }); + + const table = formatEvidenceTable(result); + assert.ok(table.includes("✅ pass"), "passing check should have ✅ pass"); + assert.ok(table.includes("❌ fail"), "failing check should have ❌ fail"); +}); + +// ─── Validator Rule Tests (T03) ────────────────────────────────────────────── + +import { validateTaskSummaryContent } from "../observability-validator.ts"; + +const MINIMAL_SUMMARY_WITH_EVIDENCE = `--- +observability_surfaces: + - gate-output +--- +# T03 Summary + +## Diagnostics +Run \`npm test\` to verify. + +## Verification Evidence +| # | Command | Exit Code | Verdict | Duration | +|---|---------|-----------|---------|----------| +| 1 | npm run typecheck | 0 | ✅ pass | 2.3s | +`; + +const MINIMAL_SUMMARY_NO_EVIDENCE = `--- +observability_surfaces: + - gate-output +--- +# T03 Summary + +## Diagnostics +Run \`npm test\` to verify. +`; + +const MINIMAL_SUMMARY_PLACEHOLDER_EVIDENCE = `--- +observability_surfaces: + - gate-output +--- +# T03 Summary + +## Diagnostics +Run \`npm test\` to verify. + +## Verification Evidence +{{evidence_table}} +`; + +const MINIMAL_SUMMARY_NO_CHECKS_EVIDENCE = `--- +observability_surfaces: + - gate-output +--- +# T03 Summary + +## Diagnostics +Run \`npm test\` to verify. + +## Verification Evidence +_No verification checks discovered._ +`; + +test("verification-evidence: validator accepts summary with real evidence table", () => { + const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_WITH_EVIDENCE); + const evidenceIssues = issues.filter( + (i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder", + ); + assert.equal(evidenceIssues.length, 0, "no evidence warnings for real table"); +}); + +test("verification-evidence: validator warns when evidence section is missing", () => { + const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_NO_EVIDENCE); + const match = issues.find((i) => i.ruleId === "evidence_block_missing"); + assert.ok(match, "should produce evidence_block_missing warning"); + assert.equal(match!.severity, "warning"); + assert.equal(match!.scope, "task-summary"); +}); + +test("verification-evidence: validator warns when evidence section has only placeholder text", () => { + const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_PLACEHOLDER_EVIDENCE); + const match = issues.find((i) => i.ruleId === "evidence_block_placeholder"); + assert.ok(match, "should produce evidence_block_placeholder warning"); + assert.equal(match!.severity, "warning"); +}); + +test("verification-evidence: validator accepts 'no checks discovered' as valid content", () => { + const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_NO_CHECKS_EVIDENCE); + const evidenceIssues = issues.filter( + (i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder", + ); + assert.equal(evidenceIssues.length, 0, "no evidence warnings for 'no checks discovered'"); +}); + +// ─── Integration Test: Full Chain (T03) ────────────────────────────────────── + +test("verification-evidence: integration — VerificationResult → JSON → table → validator accepts", () => { + const tmp = makeTempDir("ve-integration"); + try { + // 1. Create a VerificationResult with 2 checks (1 pass, 1 fail) + const result = makeResult({ + passed: false, + checks: [ + { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 }, + { command: "npm run test:unit", exitCode: 1, stdout: "", stderr: "1 failed", durationMs: 3200 }, + ], + discoverySource: "package-json", + }); + + // 2. Write JSON to temp dir and read it back + writeVerificationJSON(result, tmp, "T03"); + const jsonPath = join(tmp, "T03-VERIFY.json"); + assert.ok(existsSync(jsonPath), "JSON file should exist"); + + const json = JSON.parse(readFileSync(jsonPath, "utf-8")); + assert.equal(json.schemaVersion, 1, "schemaVersion should be 1"); + assert.equal(json.passed, false, "passed should be false"); + assert.equal(json.checks.length, 2, "should have 2 checks"); + assert.equal(json.checks[0].verdict, "pass", "first check should pass"); + assert.equal(json.checks[1].verdict, "fail", "second check should fail"); + + // 3. Generate evidence table and embed in a mock summary + const table = formatEvidenceTable(result); + assert.ok(table.includes("npm run typecheck"), "table should contain first command"); + assert.ok(table.includes("npm run test:unit"), "table should contain second command"); + + const fullSummary = `--- +observability_surfaces: + - gate-output +--- +# T03 Summary + +## Diagnostics +Run \`npm test\` to verify. + +## Verification Evidence +${table} +`; + + // 4. Validate — no evidence warnings + const issues = validateTaskSummaryContent("T03-SUMMARY.md", fullSummary); + const evidenceIssues = issues.filter( + (i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder", + ); + assert.equal(evidenceIssues.length, 0, "validator should accept real evidence from formatEvidenceTable"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +// ─── Retry Evidence Field Tests (S03/T01) ───────────────────────────────────── + +test("verification-evidence: writeVerificationJSON with retryAttempt and maxRetries includes them in output", () => { + const tmp = makeTempDir("ve-retry-fields"); + try { + const result = makeResult({ + passed: false, + checks: [ + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error", durationMs: 300 }, + ], + }); + + writeVerificationJSON(result, tmp, "T01", "M001/S03/T01", 1, 2); + + const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8")); + assert.equal(json.retryAttempt, 1, "retryAttempt should be 1"); + assert.equal(json.maxRetries, 2, "maxRetries should be 2"); + // Other fields should still be correct + assert.equal(json.schemaVersion, 1); + assert.equal(json.taskId, "T01"); + assert.equal(json.unitId, "M001/S03/T01"); + assert.equal(json.passed, false); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON without retry params omits retryAttempt/maxRetries keys", () => { + const tmp = makeTempDir("ve-no-retry"); + try { + const result = makeResult({ + passed: true, + checks: [ + { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 }, + ], + }); + + writeVerificationJSON(result, tmp, "T02"); + + const raw = readFileSync(join(tmp, "T02-VERIFY.json"), "utf-8"); + const json = JSON.parse(raw); + assert.ok(!("retryAttempt" in json), "retryAttempt key should not be present"); + assert.ok(!("maxRetries" in json), "maxRetries key should not be present"); + // Confirm the JSON string does not contain these keys at all + assert.ok(!raw.includes('"retryAttempt"'), "raw JSON should not contain retryAttempt"); + assert.ok(!raw.includes('"maxRetries"'), "raw JSON should not contain maxRetries"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +// ─── Runtime Error Evidence Tests (S04/T02) ────────────────────────────────── + +test("verification-evidence: writeVerificationJSON includes runtimeErrors when present", () => { + const tmp = makeTempDir("ve-rt-present"); + try { + const result = makeResult({ + passed: false, + checks: [ + { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 }, + ], + runtimeErrors: [ + { source: "bg-shell", severity: "crash", message: "Server crashed", blocking: true }, + { source: "browser", severity: "error", message: "Uncaught TypeError", blocking: false }, + ], + }); + + writeVerificationJSON(result, tmp, "T01"); + + const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8")); + assert.ok(Array.isArray(json.runtimeErrors), "runtimeErrors should be an array"); + assert.equal(json.runtimeErrors.length, 2, "should have 2 runtime errors"); + assert.equal(json.runtimeErrors[0].source, "bg-shell"); + assert.equal(json.runtimeErrors[0].severity, "crash"); + assert.equal(json.runtimeErrors[0].message, "Server crashed"); + assert.equal(json.runtimeErrors[0].blocking, true); + assert.equal(json.runtimeErrors[1].source, "browser"); + assert.equal(json.runtimeErrors[1].severity, "error"); + assert.equal(json.runtimeErrors[1].message, "Uncaught TypeError"); + assert.equal(json.runtimeErrors[1].blocking, false); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON omits runtimeErrors when absent", () => { + const tmp = makeTempDir("ve-rt-absent"); + try { + const result = makeResult({ + passed: true, + checks: [ + { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 }, + ], + }); + + writeVerificationJSON(result, tmp, "T01"); + + const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"); + assert.ok(!raw.includes('"runtimeErrors"'), "raw JSON should not contain runtimeErrors key"); + const json = JSON.parse(raw); + assert.ok(!("runtimeErrors" in json), "runtimeErrors key should not be present in parsed JSON"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON omits runtimeErrors when empty array", () => { + const tmp = makeTempDir("ve-rt-empty"); + try { + const result = makeResult({ + passed: true, + checks: [], + runtimeErrors: [], + }); + + writeVerificationJSON(result, tmp, "T01"); + + const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"); + assert.ok(!raw.includes('"runtimeErrors"'), "raw JSON should not contain runtimeErrors key when empty array"); + const json = JSON.parse(raw); + assert.ok(!("runtimeErrors" in json), "runtimeErrors key should not be present for empty array"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: formatEvidenceTable appends runtime errors section", () => { + const result = makeResult({ + passed: false, + checks: [ + { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, + ], + runtimeErrors: [ + { source: "bg-shell", severity: "crash", message: "Server crashed with SIGKILL", blocking: true }, + { source: "browser", severity: "warning", message: "Deprecated API usage", blocking: false }, + ], + }); + + const table = formatEvidenceTable(result); + + // Should contain runtime errors section + assert.ok(table.includes("**Runtime Errors**"), "should have Runtime Errors heading"); + assert.ok(table.includes("| # | Source | Severity | Blocking | Message |"), "should have runtime errors column headers"); + assert.ok(table.includes("bg-shell"), "should contain bg-shell source"); + assert.ok(table.includes("crash"), "should contain crash severity"); + assert.ok(table.includes("🚫 yes"), "blocking error should show 🚫 yes"); + assert.ok(table.includes("ℹ️ no"), "non-blocking error should show ℹ️ no"); + assert.ok(table.includes("Server crashed with SIGKILL"), "should contain error message"); + assert.ok(table.includes("Deprecated API usage"), "should contain warning message"); +}); + +test("verification-evidence: formatEvidenceTable omits runtime errors section when none", () => { + const result = makeResult({ + passed: true, + checks: [ + { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 }, + ], + }); + + const table = formatEvidenceTable(result); + + assert.ok(!table.includes("Runtime Errors"), "should not contain Runtime Errors heading"); + assert.ok(table.includes("npm run lint"), "should still contain the check table"); +}); + +test("verification-evidence: formatEvidenceTable truncates runtime error message to 100 chars", () => { + const longMessage = "A".repeat(150); + const result = makeResult({ + passed: false, + checks: [ + { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, + ], + runtimeErrors: [ + { source: "bg-shell", severity: "error", message: longMessage, blocking: false }, + ], + }); + + const table = formatEvidenceTable(result); + + // The table should contain the truncated message (100 chars), not the full 150 + assert.ok(table.includes("A".repeat(100)), "should contain 100 A's"); + assert.ok(!table.includes("A".repeat(101)), "should not contain 101 A's (truncated)"); +}); + +// ─── Audit Warning Evidence Tests (S05/T02) ────────────────────────────────── + +const SAMPLE_AUDIT_WARNINGS = [ + { + name: "lodash", + severity: "critical" as const, + title: "Prototype Pollution", + url: "https://github.com/advisories/GHSA-1234", + fixAvailable: true, + }, + { + name: "express", + severity: "high" as const, + title: "Open Redirect", + url: "https://github.com/advisories/GHSA-5678", + fixAvailable: false, + }, + { + name: "minimist", + severity: "moderate" as const, + title: "Prototype Pollution", + url: "https://github.com/advisories/GHSA-9012", + fixAvailable: true, + }, +]; + +test("verification-evidence: writeVerificationJSON includes auditWarnings when present", () => { + const tmp = makeTempDir("ve-audit-present"); + try { + const result = makeResult({ + passed: true, + checks: [ + { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 }, + ], + auditWarnings: SAMPLE_AUDIT_WARNINGS, + }); + + writeVerificationJSON(result, tmp, "T01"); + + const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8")); + assert.ok(Array.isArray(json.auditWarnings), "auditWarnings should be an array"); + assert.equal(json.auditWarnings.length, 3, "should have 3 audit warnings"); + assert.equal(json.auditWarnings[0].name, "lodash"); + assert.equal(json.auditWarnings[0].severity, "critical"); + assert.equal(json.auditWarnings[0].title, "Prototype Pollution"); + assert.equal(json.auditWarnings[0].url, "https://github.com/advisories/GHSA-1234"); + assert.equal(json.auditWarnings[0].fixAvailable, true); + assert.equal(json.auditWarnings[1].name, "express"); + assert.equal(json.auditWarnings[1].severity, "high"); + assert.equal(json.auditWarnings[1].fixAvailable, false); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON omits auditWarnings when absent", () => { + const tmp = makeTempDir("ve-audit-absent"); + try { + const result = makeResult({ + passed: true, + checks: [ + { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 }, + ], + }); + + writeVerificationJSON(result, tmp, "T01"); + + const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"); + assert.ok(!raw.includes('"auditWarnings"'), "raw JSON should not contain auditWarnings key"); + const json = JSON.parse(raw); + assert.ok(!("auditWarnings" in json), "auditWarnings key should not be present in parsed JSON"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: writeVerificationJSON omits auditWarnings when empty array", () => { + const tmp = makeTempDir("ve-audit-empty"); + try { + const result = makeResult({ + passed: true, + checks: [], + auditWarnings: [], + }); + + writeVerificationJSON(result, tmp, "T01"); + + const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"); + assert.ok(!raw.includes('"auditWarnings"'), "raw JSON should not contain auditWarnings key when empty array"); + const json = JSON.parse(raw); + assert.ok(!("auditWarnings" in json), "auditWarnings key should not be present for empty array"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-evidence: formatEvidenceTable appends audit warnings section", () => { + const result = makeResult({ + passed: true, + checks: [ + { command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 }, + ], + auditWarnings: SAMPLE_AUDIT_WARNINGS, + }); + + const table = formatEvidenceTable(result); + + assert.ok(table.includes("**Audit Warnings**"), "should have Audit Warnings heading"); + assert.ok(table.includes("| # | Package | Severity | Title | Fix Available |"), "should have audit warnings column headers"); + assert.ok(table.includes("lodash"), "should contain lodash package"); + assert.ok(table.includes("🔴 critical"), "should show critical emoji"); + assert.ok(table.includes("🟠 high"), "should show high emoji"); + assert.ok(table.includes("🟡 moderate"), "should show moderate emoji"); + assert.ok(table.includes("Prototype Pollution"), "should contain vulnerability title"); + assert.ok(table.includes("Open Redirect"), "should contain vulnerability title"); + assert.ok(table.includes("✅ yes"), "fixAvailable true should show ✅ yes"); + assert.ok(table.includes("❌ no"), "fixAvailable false should show ❌ no"); +}); + +test("verification-evidence: formatEvidenceTable omits audit warnings section when none", () => { + const result = makeResult({ + passed: true, + checks: [ + { command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 }, + ], + }); + + const table = formatEvidenceTable(result); + + assert.ok(!table.includes("Audit Warnings"), "should not contain Audit Warnings heading"); + assert.ok(table.includes("npm run lint"), "should still contain the check table"); +}); + +test("verification-evidence: integration — VerificationResult with auditWarnings → JSON → table", () => { + const tmp = makeTempDir("ve-audit-integration"); + try { + const result = makeResult({ + passed: true, + checks: [ + { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 }, + ], + auditWarnings: [ + { + name: "got", + severity: "moderate" as const, + title: "Redirect bypass", + url: "https://github.com/advisories/GHSA-abcd", + fixAvailable: true, + }, + ], + }); + + // 1. Write JSON and verify + writeVerificationJSON(result, tmp, "T05"); + const json = JSON.parse(readFileSync(join(tmp, "T05-VERIFY.json"), "utf-8")); + assert.equal(json.auditWarnings.length, 1, "JSON should have 1 audit warning"); + assert.equal(json.auditWarnings[0].name, "got"); + assert.equal(json.auditWarnings[0].severity, "moderate"); + assert.equal(json.auditWarnings[0].fixAvailable, true); + // passed should still be true — audit warnings are non-blocking + assert.equal(json.passed, true, "passed should remain true despite audit warnings"); + + // 2. Format table and verify + const table = formatEvidenceTable(result); + assert.ok(table.includes("**Audit Warnings**"), "table should have Audit Warnings section"); + assert.ok(table.includes("got"), "table should contain package name"); + assert.ok(table.includes("🟡 moderate"), "table should show moderate severity with emoji"); + assert.ok(table.includes("Redirect bypass"), "table should contain vulnerability title"); + assert.ok(table.includes("✅ yes"), "table should show fix available"); + // Check table still has the main verification checks + assert.ok(table.includes("npm run typecheck"), "table should still have main check"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/verification-gate.test.ts b/src/resources/extensions/gsd/tests/verification-gate.test.ts new file mode 100644 index 000000000..f3e94192c --- /dev/null +++ b/src/resources/extensions/gsd/tests/verification-gate.test.ts @@ -0,0 +1,965 @@ +/** + * Unit tests for the verification gate — command discovery and execution. + * + * Tests cover: + * 1. Discovery from explicit preference commands + * 2. Discovery from task plan verify field + * 3. Discovery from package.json typecheck/lint/test scripts + * 4. First-non-empty-wins precedence + * 5. All commands pass → gate passes + * 6. One command fails → gate fails with exit code + stderr + * 7. Missing package.json → 0 checks → pass + * 8. Empty scripts → 0 checks → pass + * 9. Preference validation for verification keys + * 10. spawnSync error (command not found) → failure with exit code 127 + * 11. Dependency audit — git diff detection, npm audit parsing, graceful failures + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "../verification-gate.ts"; +import type { CaptureRuntimeErrorsOptions, DependencyAuditOptions } from "../verification-gate.ts"; +import { validatePreferences } from "../preferences.ts"; + +function makeTempDir(prefix: string): string { + const dir = join( + tmpdir(), + `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + mkdirSync(dir, { recursive: true }); + return dir; +} + +// ─── Discovery Tests ───────────────────────────────────────────────────────── + +test("verification-gate: discoverCommands from preference commands", () => { + const tmp = makeTempDir("vg-pref"); + try { + const result = discoverCommands({ + preferenceCommands: ["npm run lint", "npm run test"], + cwd: tmp, + }); + assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]); + assert.equal(result.source, "preference"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: discoverCommands from task plan verify field", () => { + const tmp = makeTempDir("vg-taskplan"); + try { + const result = discoverCommands({ + taskPlanVerify: "npm run lint && npm run test", + cwd: tmp, + }); + assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]); + assert.equal(result.source, "task-plan"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: discoverCommands from package.json scripts", () => { + const tmp = makeTempDir("vg-pkg"); + try { + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ + scripts: { + typecheck: "tsc --noEmit", + lint: "eslint .", + test: "vitest", + build: "tsc", // should NOT be included + }, + }), + ); + const result = discoverCommands({ cwd: tmp }); + assert.deepStrictEqual(result.commands, [ + "npm run typecheck", + "npm run lint", + "npm run test", + ]); + assert.equal(result.source, "package-json"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: first-non-empty-wins — preference beats task plan and package.json", () => { + const tmp = makeTempDir("vg-precedence"); + try { + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ scripts: { lint: "eslint ." } }), + ); + const result = discoverCommands({ + preferenceCommands: ["custom-check"], + taskPlanVerify: "npm run lint", + cwd: tmp, + }); + assert.deepStrictEqual(result.commands, ["custom-check"]); + assert.equal(result.source, "preference"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: task plan verify beats package.json", () => { + const tmp = makeTempDir("vg-tp-beats-pkg"); + try { + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ scripts: { lint: "eslint ." } }), + ); + const result = discoverCommands({ + taskPlanVerify: "custom-verify", + cwd: tmp, + }); + assert.deepStrictEqual(result.commands, ["custom-verify"]); + assert.equal(result.source, "task-plan"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: missing package.json → 0 checks, source none", () => { + const tmp = makeTempDir("vg-no-pkg"); + try { + const result = discoverCommands({ cwd: tmp }); + assert.deepStrictEqual(result.commands, []); + assert.equal(result.source, "none"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: package.json with no matching scripts → 0 checks", () => { + const tmp = makeTempDir("vg-no-scripts"); + try { + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ scripts: { build: "tsc", start: "node index.js" } }), + ); + const result = discoverCommands({ cwd: tmp }); + assert.deepStrictEqual(result.commands, []); + assert.equal(result.source, "none"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: empty preference array falls through to task plan", () => { + const tmp = makeTempDir("vg-empty-pref"); + try { + const result = discoverCommands({ + preferenceCommands: [], + taskPlanVerify: "echo ok", + cwd: tmp, + }); + assert.deepStrictEqual(result.commands, ["echo ok"]); + assert.equal(result.source, "task-plan"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +// ─── Execution Tests ───────────────────────────────────────────────────────── + +test("verification-gate: all commands pass → gate passes", () => { + const tmp = makeTempDir("vg-pass"); + try { + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + preferenceCommands: ["echo hello", "echo world"], + }); + assert.equal(result.passed, true); + assert.equal(result.checks.length, 2); + assert.equal(result.discoverySource, "preference"); + assert.equal(result.checks[0].exitCode, 0); + assert.equal(result.checks[1].exitCode, 0); + assert.ok(result.checks[0].stdout.includes("hello")); + assert.ok(result.checks[1].stdout.includes("world")); + assert.equal(typeof result.timestamp, "number"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: one command fails → gate fails with exit code + stderr", () => { + const tmp = makeTempDir("vg-fail"); + try { + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + preferenceCommands: ["echo ok", "sh -c 'echo err >&2; exit 1'"], + }); + assert.equal(result.passed, false); + assert.equal(result.checks.length, 2); + assert.equal(result.checks[0].exitCode, 0); + assert.equal(result.checks[1].exitCode, 1); + assert.ok(result.checks[1].stderr.includes("err")); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: no commands discovered → gate passes with 0 checks", () => { + const tmp = makeTempDir("vg-empty"); + try { + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + }); + assert.equal(result.passed, true); + assert.equal(result.checks.length, 0); + assert.equal(result.discoverySource, "none"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: command not found → exit code 127", () => { + const tmp = makeTempDir("vg-notfound"); + try { + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + preferenceCommands: ["__nonexistent_command_xyz_42__"], + }); + assert.equal(result.passed, false); + assert.equal(result.checks.length, 1); + assert.ok(result.checks[0].exitCode !== 0, "should have non-zero exit code"); + assert.ok(result.checks[0].durationMs >= 0); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: each check has durationMs", () => { + const tmp = makeTempDir("vg-duration"); + try { + const result = runVerificationGate({ + basePath: tmp, + unitId: "T01", + cwd: tmp, + preferenceCommands: ["echo fast"], + }); + assert.equal(result.checks.length, 1); + assert.equal(typeof result.checks[0].durationMs, "number"); + assert.ok(result.checks[0].durationMs >= 0); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +// ─── Preference Validation Tests ───────────────────────────────────────────── + +test("verification-gate: validatePreferences accepts valid verification keys", () => { + const result = validatePreferences({ + verification_commands: ["npm run lint", "npm run test"], + verification_auto_fix: true, + verification_max_retries: 3, + }); + assert.deepStrictEqual(result.preferences.verification_commands, [ + "npm run lint", + "npm run test", + ]); + assert.equal(result.preferences.verification_auto_fix, true); + assert.equal(result.preferences.verification_max_retries, 3); + assert.equal(result.errors.length, 0); +}); + +test("verification-gate: validatePreferences rejects non-array verification_commands", () => { + const result = validatePreferences({ + verification_commands: "npm run lint" as unknown as string[], + }); + assert.ok(result.errors.some((e) => e.includes("verification_commands"))); + assert.equal(result.preferences.verification_commands, undefined); +}); + +test("verification-gate: validatePreferences rejects non-boolean verification_auto_fix", () => { + const result = validatePreferences({ + verification_auto_fix: "yes" as unknown as boolean, + }); + assert.ok(result.errors.some((e) => e.includes("verification_auto_fix"))); + assert.equal(result.preferences.verification_auto_fix, undefined); +}); + +test("verification-gate: validatePreferences rejects negative verification_max_retries", () => { + const result = validatePreferences({ + verification_max_retries: -1, + }); + assert.ok(result.errors.some((e) => e.includes("verification_max_retries"))); + assert.equal(result.preferences.verification_max_retries, undefined); +}); + +test("verification-gate: validatePreferences rejects non-string items in verification_commands", () => { + const result = validatePreferences({ + verification_commands: ["npm run lint", 42 as unknown as string], + }); + assert.ok(result.errors.some((e) => e.includes("verification_commands"))); + assert.equal(result.preferences.verification_commands, undefined); +}); + +test("verification-gate: validatePreferences floors verification_max_retries", () => { + const result = validatePreferences({ + verification_max_retries: 2.7, + }); + assert.equal(result.preferences.verification_max_retries, 2); + assert.equal(result.errors.length, 0); +}); + +// ─── Additional Discovery Tests (T02) ─────────────────────────────────────── + +test("verification-gate: package.json with only test script → returns only npm run test", () => { + const tmp = makeTempDir("vg-only-test"); + try { + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ + scripts: { + test: "vitest", + build: "tsc", + start: "node index.js", + }, + }), + ); + const result = discoverCommands({ cwd: tmp }); + assert.deepStrictEqual(result.commands, ["npm run test"]); + assert.equal(result.source, "package-json"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: taskPlanVerify with single command (no &&)", () => { + const tmp = makeTempDir("vg-tp-single"); + try { + const result = discoverCommands({ + taskPlanVerify: "npm test", + cwd: tmp, + }); + assert.deepStrictEqual(result.commands, ["npm test"]); + assert.equal(result.source, "task-plan"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: whitespace-only preference commands fall through", () => { + const tmp = makeTempDir("vg-ws-pref"); + try { + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ scripts: { lint: "eslint ." } }), + ); + const result = discoverCommands({ + preferenceCommands: [" ", ""], + cwd: tmp, + }); + // Whitespace-only strings are trimmed to empty and filtered out + assert.equal(result.source, "package-json"); + assert.deepStrictEqual(result.commands, ["npm run lint"]); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +// ─── Additional Execution Tests (T02) ─────────────────────────────────────── + +test("verification-gate: one command fails — remaining commands still run (non-short-circuit)", () => { + const tmp = makeTempDir("vg-no-short-circuit"); + try { + // First fails, second and third should still execute + const result = runVerificationGate({ + basePath: tmp, + unitId: "T02", + cwd: tmp, + preferenceCommands: [ + "sh -c 'exit 1'", + "echo second", + "echo third", + ], + }); + assert.equal(result.passed, false); + assert.equal(result.checks.length, 3, "all 3 commands should run"); + assert.equal(result.checks[0].exitCode, 1, "first command fails"); + assert.equal(result.checks[1].exitCode, 0, "second command runs and passes"); + assert.ok(result.checks[1].stdout.includes("second")); + assert.equal(result.checks[2].exitCode, 0, "third command runs and passes"); + assert.ok(result.checks[2].stdout.includes("third")); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: gate execution uses cwd for spawnSync", () => { + const tmp = makeTempDir("vg-cwd"); + try { + // pwd should report the temp dir + const result = runVerificationGate({ + basePath: tmp, + unitId: "T02", + cwd: tmp, + preferenceCommands: ["pwd"], + }); + assert.equal(result.passed, true); + assert.equal(result.checks.length, 1); + // The stdout should contain the tmp dir path (resolving symlinks) + assert.ok(result.checks[0].stdout.trim().length > 0, "pwd should produce output"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +// ─── Additional Preference Validation Tests (T02) ────────────────────────── + +test("verification-gate: verification_commands produces no unknown-key warnings", () => { + const result = validatePreferences({ + verification_commands: ["npm test"], + }); + const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown")); + assert.equal(unknownWarnings.length, 0, "verification_commands is a known key"); + assert.equal(result.errors.length, 0); +}); + +test("verification-gate: verification_auto_fix produces no unknown-key warnings", () => { + const result = validatePreferences({ + verification_auto_fix: true, + }); + const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown")); + assert.equal(unknownWarnings.length, 0, "verification_auto_fix is a known key"); + assert.equal(result.errors.length, 0); +}); + +test("verification-gate: verification_max_retries produces no unknown-key warnings", () => { + const result = validatePreferences({ + verification_max_retries: 2, + }); + const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown")); + assert.equal(unknownWarnings.length, 0, "verification_max_retries is a known key"); + assert.equal(result.errors.length, 0); +}); + +test("verification-gate: verification_max_retries -1 produces a validation error", () => { + const result = validatePreferences({ + verification_max_retries: -1, + }); + assert.ok( + result.errors.some(e => e.includes("verification_max_retries")), + "negative max_retries should error", + ); + assert.equal(result.preferences.verification_max_retries, undefined); +}); + +// ─── formatFailureContext Tests (S03/T01) ───────────────────────────────────── + +test("formatFailureContext: formats a single failure with command, exit code, stderr", () => { + const result: import("../types.ts").VerificationResult = { + passed: false, + checks: [ + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "error: unused var", durationMs: 500 }, + ], + discoverySource: "preference", + timestamp: Date.now(), + }; + const output = formatFailureContext(result); + assert.ok(output.startsWith("## Verification Failures"), "should start with header"); + assert.ok(output.includes("`npm run lint`"), "should include command name"); + assert.ok(output.includes("exit code 1"), "should include exit code"); + assert.ok(output.includes("error: unused var"), "should include stderr content"); + assert.ok(output.includes("```stderr"), "should have stderr code block"); +}); + +test("formatFailureContext: formats multiple failures", () => { + const result: import("../types.ts").VerificationResult = { + passed: false, + checks: [ + { command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint error", durationMs: 100 }, + { command: "npm run test", exitCode: 2, stdout: "", stderr: "test failure", durationMs: 200 }, + { command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 50 }, + ], + discoverySource: "preference", + timestamp: Date.now(), + }; + const output = formatFailureContext(result); + assert.ok(output.includes("`npm run lint`"), "should include first failed command"); + assert.ok(output.includes("exit code 1"), "should include first exit code"); + assert.ok(output.includes("`npm run test`"), "should include second failed command"); + assert.ok(output.includes("exit code 2"), "should include second exit code"); + // Passing check should NOT appear + assert.ok(!output.includes("npm run typecheck"), "should not include passing command"); +}); + +test("formatFailureContext: truncates stderr longer than 2000 chars", () => { + const longStderr = "x".repeat(3000); + const result: import("../types.ts").VerificationResult = { + passed: false, + checks: [ + { command: "big-err", exitCode: 1, stdout: "", stderr: longStderr, durationMs: 100 }, + ], + discoverySource: "preference", + timestamp: Date.now(), + }; + const output = formatFailureContext(result); + // The output should contain 2000 x's followed by truncation marker, not 3000 + assert.ok(!output.includes("x".repeat(2001)), "should not contain more than 2000 chars of stderr"); + assert.ok(output.includes("…[truncated]"), "should include truncation marker"); +}); + +test("formatFailureContext: returns empty string when all checks pass", () => { + const result: import("../types.ts").VerificationResult = { + passed: true, + checks: [ + { command: "npm run lint", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 }, + { command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 200 }, + ], + discoverySource: "preference", + timestamp: Date.now(), + }; + assert.equal(formatFailureContext(result), ""); +}); + +test("formatFailureContext: returns empty string for empty checks array", () => { + const result: import("../types.ts").VerificationResult = { + passed: true, + checks: [], + discoverySource: "none", + timestamp: Date.now(), + }; + assert.equal(formatFailureContext(result), ""); +}); + +test("formatFailureContext: caps total output at 10,000 chars", () => { + // Generate many failures to exceed 10,000 chars total + const checks: import("../types.ts").VerificationCheck[] = []; + for (let i = 0; i < 20; i++) { + checks.push({ + command: `failing-command-${i}`, + exitCode: 1, + stdout: "", + stderr: "e".repeat(1000), // 1000 chars each, 20 * ~1050 (with formatting) > 10,000 + durationMs: 100, + }); + } + const result: import("../types.ts").VerificationResult = { + passed: false, + checks, + discoverySource: "preference", + timestamp: Date.now(), + }; + const output = formatFailureContext(result); + assert.ok(output.length <= 10_100, `total output should be capped near 10,000 chars, got ${output.length}`); + assert.ok(output.includes("…[remaining failures truncated]"), "should include total truncation marker"); +}); + +// ─── captureRuntimeErrors Tests (S04/T01) ───────────────────────────────────── + +function makeProc(overrides: Record) { + return { + id: "p1", + label: "test-server", + status: "ready", + alive: true, + exitCode: null, + signal: null, + recentErrors: [] as string[], + ...overrides, + }; +} + +function makeLogs(entries: Array<{ type: string; text: string }>) { + return entries.map((e, i) => ({ + type: e.type, + text: e.text, + timestamp: Date.now() + i, + url: "http://localhost:3000", + })); +} + +test("captureRuntimeErrors: crashed bg-shell process → blocking crash error", async () => { + const processes = new Map([ + ["p1", makeProc({ status: "crashed", alive: false, exitCode: 1 })], + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => processes, + getConsoleLogs: () => [], + }); + assert.equal(result.length, 1); + assert.equal(result[0].source, "bg-shell"); + assert.equal(result[0].severity, "crash"); + assert.equal(result[0].blocking, true); + assert.ok(result[0].message.includes("test-server")); +}); + +test("captureRuntimeErrors: bg-shell non-zero exit + not alive → blocking crash error", async () => { + const processes = new Map([ + ["p1", makeProc({ status: "exited", alive: false, exitCode: 137 })], + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => processes, + getConsoleLogs: () => [], + }); + assert.equal(result.length, 1); + assert.equal(result[0].severity, "crash"); + assert.equal(result[0].blocking, true); + assert.ok(result[0].message.includes("exitCode=137")); +}); + +test("captureRuntimeErrors: bg-shell SIGABRT/SIGSEGV/SIGBUS → blocking crash error", async () => { + for (const sig of ["SIGABRT", "SIGSEGV", "SIGBUS"]) { + const processes = new Map([ + ["p1", makeProc({ signal: sig, alive: false, exitCode: null })], + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => processes, + getConsoleLogs: () => [], + }); + assert.equal(result.length, 1, `${sig} should produce 1 error`); + assert.equal(result[0].severity, "crash"); + assert.equal(result[0].blocking, true); + assert.ok(result[0].message.includes(sig), `message should contain ${sig}`); + } +}); + +test("captureRuntimeErrors: alive bg-shell process with recentErrors → non-blocking error", async () => { + const processes = new Map([ + ["p1", makeProc({ alive: true, recentErrors: ["TypeError: foo", "RangeError: bar"] })], + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => processes, + getConsoleLogs: () => [], + }); + assert.equal(result.length, 1); + assert.equal(result[0].source, "bg-shell"); + assert.equal(result[0].severity, "error"); + assert.equal(result[0].blocking, false); + assert.ok(result[0].message.includes("TypeError: foo")); + assert.ok(result[0].message.includes("RangeError: bar")); +}); + +test("captureRuntimeErrors: browser unhandled rejection → blocking crash error", async () => { + const logs = makeLogs([ + { type: "error", text: "Unhandled promise rejection: some error" }, + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => new Map(), + getConsoleLogs: () => logs, + }); + assert.equal(result.length, 1); + assert.equal(result[0].source, "browser"); + assert.equal(result[0].severity, "crash"); + assert.equal(result[0].blocking, true); + assert.ok(result[0].message.includes("Unhandled")); +}); + +test("captureRuntimeErrors: browser UnhandledRejection (case variation) → blocking crash", async () => { + const logs = makeLogs([ + { type: "error", text: "UnhandledRejection in module X" }, + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => new Map(), + getConsoleLogs: () => logs, + }); + assert.equal(result.length, 1); + assert.equal(result[0].severity, "crash"); + assert.equal(result[0].blocking, true); +}); + +test("captureRuntimeErrors: browser console.error (general) → non-blocking error", async () => { + const logs = makeLogs([ + { type: "error", text: "Failed to load resource: net::ERR_FAILED" }, + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => new Map(), + getConsoleLogs: () => logs, + }); + assert.equal(result.length, 1); + assert.equal(result[0].source, "browser"); + assert.equal(result[0].severity, "error"); + assert.equal(result[0].blocking, false); +}); + +test("captureRuntimeErrors: browser deprecation warning → non-blocking warning", async () => { + const logs = makeLogs([ + { type: "warning", text: "Event.returnValue is deprecated. Use Event.preventDefault() instead." }, + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => new Map(), + getConsoleLogs: () => logs, + }); + assert.equal(result.length, 1); + assert.equal(result[0].source, "browser"); + assert.equal(result[0].severity, "warning"); + assert.equal(result[0].blocking, false); + assert.ok(result[0].message.includes("deprecated")); +}); + +test("captureRuntimeErrors: non-deprecation warning is ignored", async () => { + const logs = makeLogs([ + { type: "warning", text: "Some general warning about performance" }, + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => new Map(), + getConsoleLogs: () => logs, + }); + assert.equal(result.length, 0, "non-deprecation warnings should be ignored"); +}); + +test("captureRuntimeErrors: no processes, no browser logs → empty array", async () => { + const result = await captureRuntimeErrors({ + getProcesses: () => new Map(), + getConsoleLogs: () => [], + }); + assert.deepStrictEqual(result, []); +}); + +test("captureRuntimeErrors: dynamic import failure → graceful empty array", async () => { + const result = await captureRuntimeErrors({ + getProcesses: () => { throw new Error("module not found"); }, + getConsoleLogs: () => { throw new Error("module not found"); }, + }); + assert.deepStrictEqual(result, []); +}); + +test("captureRuntimeErrors: browser text truncated to 500 chars", async () => { + const longText = "x".repeat(600); + const logs = makeLogs([ + { type: "error", text: longText }, + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => new Map(), + getConsoleLogs: () => logs, + }); + assert.equal(result.length, 1); + assert.ok(result[0].message.length <= 500 + 20, "message should be truncated near 500 chars"); + assert.ok(result[0].message.includes("…[truncated]"), "should include truncation marker"); + assert.ok(!result[0].message.includes("x".repeat(501)), "should not contain 501+ x's"); +}); + +test("captureRuntimeErrors: bg-shell recentErrors limited to 3 in message", async () => { + const processes = new Map([ + ["p1", makeProc({ + status: "crashed", + alive: false, + exitCode: 1, + recentErrors: ["err1", "err2", "err3", "err4", "err5"], + })], + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => processes, + getConsoleLogs: () => [], + }); + assert.equal(result.length, 1); + assert.ok(result[0].message.includes("err1")); + assert.ok(result[0].message.includes("err2")); + assert.ok(result[0].message.includes("err3")); + assert.ok(!result[0].message.includes("err4"), "should only include first 3 errors"); +}); + +test("captureRuntimeErrors: mixed bg-shell and browser errors", async () => { + const processes = new Map([ + ["p1", makeProc({ status: "crashed", alive: false, exitCode: 1 })], + ]); + const logs = makeLogs([ + { type: "error", text: "Unhandled rejection: boom" }, + { type: "error", text: "general error" }, + { type: "warning", text: "deprecated API used" }, + ]); + const result = await captureRuntimeErrors({ + getProcesses: () => processes, + getConsoleLogs: () => logs, + }); + // 1 bg-shell crash + 1 browser crash (unhandled) + 1 browser error + 1 browser warning + assert.equal(result.length, 4); + const blocking = result.filter(r => r.blocking); + const nonBlocking = result.filter(r => !r.blocking); + assert.equal(blocking.length, 2, "should have 2 blocking errors"); + assert.equal(nonBlocking.length, 2, "should have 2 non-blocking errors"); +}); + +// ─── Dependency Audit Tests (S05/T01) ───────────────────────────────────────── + +/** Helper: build a realistic npm audit JSON stdout with vulnerabilities. */ +function makeAuditJson( + vulns: Record, +): string { + return JSON.stringify({ vulnerabilities: vulns }); +} + +/** Sample npm audit JSON with a high-severity vuln. */ +const SAMPLE_AUDIT_JSON = makeAuditJson({ + "nth-check": { + severity: "high", + fixAvailable: true, + via: [ + { + title: "Inefficient Regular Expression Complexity in nth-check", + url: "https://github.com/advisories/GHSA-rp65-9cf3-cjxr", + severity: "high", + }, + ], + }, +}); + +test("dependency-audit: package.json in git diff → runs npm audit and parses vulnerabilities", () => { + let npmAuditCalled = false; + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => ["package.json", "src/index.ts"], + npmAudit: () => { + npmAuditCalled = true; + return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 }; + }, + }); + assert.equal(npmAuditCalled, true, "npm audit should be called"); + assert.equal(result.length, 1); + assert.equal(result[0].name, "nth-check"); + assert.equal(result[0].severity, "high"); + assert.equal(result[0].title, "Inefficient Regular Expression Complexity in nth-check"); + assert.equal(result[0].url, "https://github.com/advisories/GHSA-rp65-9cf3-cjxr"); + assert.equal(result[0].fixAvailable, true); +}); + +test("dependency-audit: package-lock.json change triggers audit", () => { + let npmAuditCalled = false; + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => ["package-lock.json"], + npmAudit: () => { + npmAuditCalled = true; + return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 }; + }, + }); + assert.equal(npmAuditCalled, true); + assert.equal(result.length, 1); +}); + +test("dependency-audit: pnpm-lock.yaml change triggers audit", () => { + let npmAuditCalled = false; + runDependencyAudit("/tmp/test", { + gitDiff: () => ["pnpm-lock.yaml"], + npmAudit: () => { + npmAuditCalled = true; + return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 }; + }, + }); + assert.equal(npmAuditCalled, true); +}); + +test("dependency-audit: yarn.lock change triggers audit", () => { + let npmAuditCalled = false; + runDependencyAudit("/tmp/test", { + gitDiff: () => ["yarn.lock"], + npmAudit: () => { + npmAuditCalled = true; + return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 }; + }, + }); + assert.equal(npmAuditCalled, true); +}); + +test("dependency-audit: bun.lockb change triggers audit", () => { + let npmAuditCalled = false; + runDependencyAudit("/tmp/test", { + gitDiff: () => ["bun.lockb"], + npmAudit: () => { + npmAuditCalled = true; + return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 }; + }, + }); + assert.equal(npmAuditCalled, true); +}); + +test("dependency-audit: no dependency file changes → returns empty array, npm audit not called", () => { + let npmAuditCalled = false; + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => ["src/index.ts", "README.md"], + npmAudit: () => { + npmAuditCalled = true; + return { stdout: "{}", exitCode: 0 }; + }, + }); + assert.equal(npmAuditCalled, false, "npm audit should NOT be called when no dependency files changed"); + assert.deepStrictEqual(result, []); +}); + +test("dependency-audit: git diff returns non-zero exit (not a git repo) → empty array", () => { + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => { throw new Error("not a git repo"); }, + npmAudit: () => { throw new Error("should not be called"); }, + }); + assert.deepStrictEqual(result, []); +}); + +test("dependency-audit: npm audit returns invalid JSON → empty array", () => { + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => ["package.json"], + npmAudit: () => ({ stdout: "not json at all", exitCode: 1 }), + }); + assert.deepStrictEqual(result, []); +}); + +test("dependency-audit: npm audit returns zero vulnerabilities → empty array", () => { + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => ["package.json"], + npmAudit: () => ({ + stdout: JSON.stringify({ vulnerabilities: {} }), + exitCode: 0, + }), + }); + assert.deepStrictEqual(result, []); +}); + +test("dependency-audit: npm audit non-zero exit with valid JSON → parses correctly", () => { + // npm audit exits non-zero when vulnerabilities exist — this is expected, not an error + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => ["package-lock.json"], + npmAudit: () => ({ + stdout: SAMPLE_AUDIT_JSON, + exitCode: 1, // non-zero! + }), + }); + assert.equal(result.length, 1); + assert.equal(result[0].name, "nth-check"); + assert.equal(result[0].severity, "high"); +}); + +test("dependency-audit: via entries with string-only values are skipped", () => { + const auditJson = makeAuditJson({ + "postcss": { + severity: "moderate", + fixAvailable: false, + via: ["nth-check", "css-select"], // string-only via entries + }, + }); + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => ["package.json"], + npmAudit: () => ({ stdout: auditJson, exitCode: 1 }), + }); + assert.equal(result.length, 1); + // When no object via entry is found, title falls back to the package name + assert.equal(result[0].name, "postcss"); + assert.equal(result[0].title, "postcss"); + assert.equal(result[0].url, ""); +}); + +test("dependency-audit: subdirectory package.json does not trigger audit", () => { + let npmAuditCalled = false; + const result = runDependencyAudit("/tmp/test", { + gitDiff: () => ["packages/foo/package.json", "libs/bar/package-lock.json"], + npmAudit: () => { + npmAuditCalled = true; + return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 }; + }, + }); + assert.equal(npmAuditCalled, false, "subdirectory dependency files should not trigger audit"); + assert.deepStrictEqual(result, []); +}); diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts index 2731ef67c..7438087ca 100644 --- a/src/resources/extensions/gsd/types.ts +++ b/src/resources/extensions/gsd/types.ts @@ -46,6 +46,44 @@ export interface TaskPlanEntry { verify?: string; // e.g. "run tests" — extracted from "- Verify:" subline } +// ─── Verification Gate ───────────────────────────────────────────────────── + +/** Result of a single verification command execution */ +export interface VerificationCheck { + command: string; // e.g. "npm run lint" + exitCode: number; // 0 = pass + stdout: string; + stderr: string; + durationMs: number; +} + +/** A runtime error captured from bg-shell processes or browser console */ +export interface RuntimeError { + source: "bg-shell" | "browser"; + severity: "crash" | "error" | "warning"; + message: string; + blocking: boolean; +} + +/** A dependency vulnerability warning from npm audit */ +export interface AuditWarning { + name: string; + severity: "low" | "moderate" | "high" | "critical"; + title: string; + url: string; + fixAvailable: boolean; +} + +/** Aggregate result from the verification gate */ +export interface VerificationResult { + passed: boolean; // true if all checks passed (or no checks discovered) + checks: VerificationCheck[]; // per-command results + discoverySource: "preference" | "task-plan" | "package-json" | "none"; + timestamp: number; // Date.now() at gate start + runtimeErrors?: RuntimeError[]; // optional — populated by captureRuntimeErrors() + auditWarnings?: AuditWarning[]; // optional — populated by runDependencyAudit() +} + export interface SlicePlan { id: string; // e.g. "S01" title: string; // from the H1 diff --git a/src/resources/extensions/gsd/verification-evidence.ts b/src/resources/extensions/gsd/verification-evidence.ts new file mode 100644 index 000000000..c1b68605e --- /dev/null +++ b/src/resources/extensions/gsd/verification-evidence.ts @@ -0,0 +1,183 @@ +/** + * Verification Evidence — JSON persistence and markdown table formatting. + * + * Two pure-ish functions: + * - writeVerificationJSON: persists a machine-readable T##-VERIFY.json artifact + * - formatEvidenceTable: returns a markdown evidence table string + * + * JSON schema uses schemaVersion: 1 for forward-compatibility. + * stdout/stderr are intentionally excluded from the JSON to avoid unbounded file sizes. + */ + +import { mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import type { VerificationResult } from "./types.ts"; + +// ─── JSON Evidence Artifact ────────────────────────────────────────────────── + +export interface EvidenceCheckJSON { + command: string; + exitCode: number; + durationMs: number; + verdict: "pass" | "fail"; +} + +export interface RuntimeErrorJSON { + source: string; + severity: string; + message: string; + blocking: boolean; +} + +export interface AuditWarningJSON { + name: string; + severity: string; + title: string; + url: string; + fixAvailable: boolean; +} + +export interface EvidenceJSON { + schemaVersion: 1; + taskId: string; + unitId: string; + timestamp: number; + passed: boolean; + discoverySource: string; + checks: EvidenceCheckJSON[]; + retryAttempt?: number; + maxRetries?: number; + runtimeErrors?: RuntimeErrorJSON[]; + auditWarnings?: AuditWarningJSON[]; +} + +/** + * Write a T##-VERIFY.json artifact to the tasks directory. + * Creates the directory with mkdirSync({ recursive: true }) if it doesn't exist. + * + * stdout/stderr are excluded from the JSON — the full output lives in VerificationResult + * in memory and is logged to stderr during the gate run. + */ +export function writeVerificationJSON( + result: VerificationResult, + tasksDir: string, + taskId: string, + unitId?: string, + retryAttempt?: number, + maxRetries?: number, +): void { + mkdirSync(tasksDir, { recursive: true }); + + const evidence: EvidenceJSON = { + schemaVersion: 1, + taskId, + unitId: unitId ?? taskId, + timestamp: result.timestamp, + passed: result.passed, + discoverySource: result.discoverySource, + checks: result.checks.map((check) => ({ + command: check.command, + exitCode: check.exitCode, + durationMs: check.durationMs, + verdict: check.exitCode === 0 ? "pass" : "fail", + })), + ...(retryAttempt !== undefined ? { retryAttempt } : {}), + ...(maxRetries !== undefined ? { maxRetries } : {}), + }; + + if (result.runtimeErrors && result.runtimeErrors.length > 0) { + evidence.runtimeErrors = result.runtimeErrors.map(e => ({ + source: e.source, + severity: e.severity, + message: e.message, + blocking: e.blocking, + })); + } + + if (result.auditWarnings && result.auditWarnings.length > 0) { + evidence.auditWarnings = result.auditWarnings.map(w => ({ + name: w.name, + severity: w.severity, + title: w.title, + url: w.url, + fixAvailable: w.fixAvailable, + })); + } + + const filePath = join(tasksDir, `${taskId}-VERIFY.json`); + writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8"); +} + +// ─── Markdown Evidence Table ───────────────────────────────────────────────── + +/** + * Format duration in milliseconds as seconds with 1 decimal place. + * e.g. 2340 → "2.3s", 150 → "0.2s", 0 → "0.0s" + */ +function formatDuration(ms: number): string { + return `${(ms / 1000).toFixed(1)}s`; +} + +/** + * Generate a markdown evidence table from a VerificationResult. + * + * Returns a "no checks" note if result.checks is empty. + * Otherwise returns a 5-column markdown table: #, Command, Exit Code, Verdict, Duration. + */ +export function formatEvidenceTable(result: VerificationResult): string { + if (result.checks.length === 0) { + return "_No verification checks discovered._"; + } + + const lines: string[] = [ + "| # | Command | Exit Code | Verdict | Duration |", + "|---|---------|-----------|---------|----------|", + ]; + + for (let i = 0; i < result.checks.length; i++) { + const check = result.checks[i]; + const num = i + 1; + const verdict = + check.exitCode === 0 ? "✅ pass" : "❌ fail"; + const duration = formatDuration(check.durationMs); + + lines.push( + `| ${num} | ${check.command} | ${check.exitCode} | ${verdict} | ${duration} |`, + ); + } + + if (result.runtimeErrors && result.runtimeErrors.length > 0) { + lines.push(""); + lines.push("**Runtime Errors**"); + lines.push(""); + lines.push("| # | Source | Severity | Blocking | Message |"); + lines.push("|---|--------|----------|----------|---------|"); + for (let i = 0; i < result.runtimeErrors.length; i++) { + const err = result.runtimeErrors[i]; + const blockIcon = err.blocking ? "🚫 yes" : "ℹ️ no"; + lines.push(`| ${i + 1} | ${err.source} | ${err.severity} | ${blockIcon} | ${err.message.slice(0, 100)} |`); + } + } + + if (result.auditWarnings && result.auditWarnings.length > 0) { + const severityEmoji: Record = { + critical: "🔴", + high: "🟠", + moderate: "🟡", + low: "⚪", + }; + lines.push(""); + lines.push("**Audit Warnings**"); + lines.push(""); + lines.push("| # | Package | Severity | Title | Fix Available |"); + lines.push("|---|---------|----------|-------|---------------|"); + for (let i = 0; i < result.auditWarnings.length; i++) { + const w = result.auditWarnings[i]; + const emoji = severityEmoji[w.severity] ?? "⚪"; + const fix = w.fixAvailable ? "✅ yes" : "❌ no"; + lines.push(`| ${i + 1} | ${w.name} | ${emoji} ${w.severity} | ${w.title} | ${fix} |`); + } + } + + return lines.join("\n"); +} diff --git a/src/resources/extensions/gsd/verification-gate.ts b/src/resources/extensions/gsd/verification-gate.ts new file mode 100644 index 000000000..5a8b2fa01 --- /dev/null +++ b/src/resources/extensions/gsd/verification-gate.ts @@ -0,0 +1,567 @@ +// GSD Extension — Verification Gate +// Pure functions for discovering and running verification commands. +// Discovery order (D003): preference → task plan verify → package.json scripts. +// First non-empty source wins. + +import { spawnSync } from "node:child_process"; +import { existsSync, readFileSync } from "node:fs"; +import { join, basename } from "node:path"; +import type { AuditWarning, RuntimeError, VerificationCheck, VerificationResult } from "./types.js"; + +/** Maximum bytes of stdout/stderr to retain per command (10 KB). */ +const MAX_OUTPUT_BYTES = 10 * 1024; + +/** Truncate a string to maxBytes, appending a marker if truncated. */ +function truncate(value: string | null | undefined, maxBytes: number): string { + if (!value) return ""; + if (Buffer.byteLength(value, "utf-8") <= maxBytes) return value; + // Slice conservatively then trim to last full character + const buf = Buffer.from(value, "utf-8").subarray(0, maxBytes); + return buf.toString("utf-8") + "\n…[truncated]"; +} + +// ─── Command Discovery ────────────────────────────────────────────────────── + +export interface DiscoverCommandsOptions { + preferenceCommands?: string[]; + taskPlanVerify?: string; + cwd: string; +} + +export interface DiscoveredCommands { + commands: string[]; + source: VerificationResult["discoverySource"]; +} + +/** Package.json script keys to probe, in order. */ +const PACKAGE_SCRIPT_KEYS = ["typecheck", "lint", "test"] as const; + +/** + * Discover verification commands using the first-non-empty-wins strategy (D003): + * 1. Explicit preference commands + * 2. Task plan verify field (split on &&) + * 3. package.json scripts (typecheck, lint, test) + * 4. None found + */ +export function discoverCommands(options: DiscoverCommandsOptions): DiscoveredCommands { + // 1. Preference commands + if (options.preferenceCommands && options.preferenceCommands.length > 0) { + const filtered = options.preferenceCommands + .map(c => c.trim()) + .filter(Boolean); + if (filtered.length > 0) { + return { commands: filtered, source: "preference" }; + } + } + + // 2. Task plan verify field (commands are untrusted — sanitize) + if (options.taskPlanVerify && options.taskPlanVerify.trim()) { + const commands = options.taskPlanVerify + .split("&&") + .map(c => c.trim()) + .filter(Boolean) + .filter(c => sanitizeCommand(c) !== null); + if (commands.length > 0) { + return { commands, source: "task-plan" }; + } + } + + // 3. package.json scripts + const pkgPath = join(options.cwd, "package.json"); + if (existsSync(pkgPath)) { + try { + const raw = readFileSync(pkgPath, "utf-8"); + const pkg = JSON.parse(raw); + if (pkg && typeof pkg === "object" && pkg.scripts && typeof pkg.scripts === "object") { + const commands: string[] = []; + for (const key of PACKAGE_SCRIPT_KEYS) { + if (typeof pkg.scripts[key] === "string") { + commands.push(`npm run ${key}`); + } + } + if (commands.length > 0) { + return { commands, source: "package-json" }; + } + } + } catch { + // Malformed package.json — fall through to "none" + } + } + + // 4. Nothing found + return { commands: [], source: "none" }; +} + +// ─── Failure Context Formatting ────────────────────────────────────────────── + +/** Maximum chars of stderr to include per failed check in failure context. */ +const MAX_STDERR_PER_CHECK = 2_000; + +/** Maximum total chars for the combined failure context output. */ +const MAX_FAILURE_CONTEXT_CHARS = 10_000; + +/** + * Format failed verification checks into a prompt-injectable text block. + * + * Each failed check gets a heading with the command name and exit code, + * followed by a truncated stderr excerpt. Individual stderr is capped to + * 2 000 chars; total output is capped to 10 000 chars. + * + * Returns an empty string when all checks pass or the checks array is empty. + */ +export function formatFailureContext(result: VerificationResult): string { + const failures = result.checks.filter((c) => c.exitCode !== 0); + if (failures.length === 0) return ""; + + const blocks: string[] = []; + + for (const check of failures) { + let stderr = check.stderr ?? ""; + if (stderr.length > MAX_STDERR_PER_CHECK) { + stderr = stderr.slice(0, MAX_STDERR_PER_CHECK) + "\n…[truncated]"; + } + + blocks.push( + `### ❌ \`${check.command}\` (exit code ${check.exitCode})\n\`\`\`stderr\n${stderr}\n\`\`\``, + ); + } + + let body = blocks.join("\n\n"); + const header = "## Verification Failures\n\n"; + + if (header.length + body.length > MAX_FAILURE_CONTEXT_CHARS) { + body = + body.slice(0, MAX_FAILURE_CONTEXT_CHARS - header.length) + + "\n\n…[remaining failures truncated]"; + } + + return header + body; +} + +// ─── Gate Execution ───────────────────────────────────────────────────────── + +/** Characters that indicate shell injection when found in a command string. */ +const SHELL_INJECTION_PATTERN = /[;|`]|\$\(/; + +/** + * Validate a command string for obvious shell injection patterns. + * Returns the command unchanged if safe, or null if suspicious. + */ +function sanitizeCommand(cmd: string): string | null { + if (SHELL_INJECTION_PATTERN.test(cmd)) return null; + return cmd; +} + +/** Default timeout for verification commands (ms). */ +const DEFAULT_COMMAND_TIMEOUT_MS = 120_000; + +export interface RunVerificationGateOptions { + basePath: string; + unitId: string; + cwd: string; + preferenceCommands?: string[]; + taskPlanVerify?: string; + /** Per-command timeout in ms. Defaults to 120 000 (2 minutes). */ + commandTimeoutMs?: number; +} + +/** + * Run the verification gate: discover commands, execute each via spawnSync, + * and return a structured result. + * + * - All commands run sequentially regardless of individual pass/fail. + * - `passed` is true when every command exits 0 (or no commands are discovered). + * - stdout/stderr per command are truncated to 10 KB. + */ +export function runVerificationGate(options: RunVerificationGateOptions): VerificationResult { + const timestamp = Date.now(); + + const { commands, source } = discoverCommands({ + preferenceCommands: options.preferenceCommands, + taskPlanVerify: options.taskPlanVerify, + cwd: options.cwd, + }); + + if (commands.length === 0) { + return { + passed: true, + checks: [], + discoverySource: source, + timestamp, + }; + } + + const checks: VerificationCheck[] = []; + + for (const command of commands) { + const start = Date.now(); + const result = spawnSync(command, { + shell: true, + cwd: options.cwd, + stdio: "pipe", + encoding: "utf-8", + timeout: options.commandTimeoutMs ?? DEFAULT_COMMAND_TIMEOUT_MS, + }); + const durationMs = Date.now() - start; + + let exitCode: number; + let stderr: string; + + if (result.error) { + // Command not found or spawn failure + exitCode = 127; + stderr = truncate( + (result.stderr || "") + "\n" + (result.error as Error).message, + MAX_OUTPUT_BYTES, + ); + } else { + // status is null when killed by signal — treat as failure + exitCode = result.status ?? 1; + stderr = truncate(result.stderr, MAX_OUTPUT_BYTES); + } + + checks.push({ + command, + exitCode, + stdout: truncate(result.stdout, MAX_OUTPUT_BYTES), + stderr, + durationMs, + }); + } + + return { + passed: checks.every(c => c.exitCode === 0), + checks, + discoverySource: source, + timestamp, + }; +} + +// ─── Runtime Error Capture ────────────────────────────────────────────────── + +/** Maximum characters of browser console text to retain per entry. */ +const MAX_BROWSER_TEXT_CHARS = 500; + +/** Fatal signals that indicate a crash regardless of other status fields. */ +const FATAL_SIGNALS = new Set(["SIGABRT", "SIGSEGV", "SIGBUS"]); + +/** + * Injectable dependencies for captureRuntimeErrors. + * When omitted the function uses dynamic import() to access + * bg-shell's processes Map and browser-tools' getConsoleLogs(). + * Provide overrides in tests to avoid module mocking. + */ +export interface CaptureRuntimeErrorsOptions { + getProcesses?: () => Map; + getConsoleLogs?: () => Array<{ type: string; text: string; timestamp: number; url: string }>; +} + +/** + * Scan bg-shell processes and browser console logs for runtime errors. + * + * Severity classification follows D004: + * - bg-shell status "crashed" → blocking crash + * - bg-shell !alive && exitCode !== 0 && exitCode !== null → blocking crash + * - bg-shell signal SIGABRT/SIGSEGV/SIGBUS → blocking crash + * - Browser console error with "Unhandled"/"UnhandledRejection" → blocking crash + * - Browser console error (general) → non-blocking error + * - Browser console warning with deprecation text → non-blocking warning + * - bg-shell alive process with recentErrors → non-blocking error + * + * Returns RuntimeError[] — empty when both sources are unavailable. + */ +export async function captureRuntimeErrors( + options?: CaptureRuntimeErrorsOptions, +): Promise { + const errors: RuntimeError[] = []; + + // ── bg-shell scan ───────────────────────────────────────────────────── + try { + let processes: Map; + if (options?.getProcesses) { + processes = options.getProcesses(); + } else { + const mod = await import("../bg-shell/process-manager.js"); + processes = mod.processes; + } + + for (const [id, raw] of processes) { + const proc = raw as { + id: string; + label?: string; + status?: string; + alive?: boolean; + exitCode?: number | null; + signal?: string | null; + recentErrors?: string[]; + }; + + const name = proc.label || proc.id || id; + + // Check for fatal signal first (applies regardless of alive/status) + if (proc.signal && FATAL_SIGNALS.has(proc.signal)) { + errors.push({ + source: "bg-shell", + severity: "crash", + message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors), + blocking: true, + }); + continue; + } + + // Crashed status + if (proc.status === "crashed") { + errors.push({ + source: "bg-shell", + severity: "crash", + message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors), + blocking: true, + }); + continue; + } + + // Non-zero exit on dead process + if ( + !proc.alive && + proc.exitCode !== 0 && + proc.exitCode !== null && + proc.exitCode !== undefined + ) { + errors.push({ + source: "bg-shell", + severity: "crash", + message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors), + blocking: true, + }); + continue; + } + + // Alive process with recent errors — non-blocking + if (proc.alive && proc.recentErrors && proc.recentErrors.length > 0) { + const snippet = proc.recentErrors.slice(0, 3).join("; "); + errors.push({ + source: "bg-shell", + severity: "error", + message: `[${name}] recent errors: ${snippet}`, + blocking: false, + }); + } + } + } catch { + // bg-shell not available — skip silently + } + + // ── browser console scan ────────────────────────────────────────────── + try { + let logs: Array<{ type: string; text: string; timestamp: number; url: string }>; + if (options?.getConsoleLogs) { + logs = options.getConsoleLogs(); + } else { + const mod = await import("../browser-tools/state.js"); + logs = mod.getConsoleLogs(); + } + + for (const entry of logs) { + const text = + entry.text.length > MAX_BROWSER_TEXT_CHARS + ? entry.text.slice(0, MAX_BROWSER_TEXT_CHARS) + "…[truncated]" + : entry.text; + + if (entry.type === "error") { + // Unhandled rejection / unhandled error → blocking crash + if (/unhandled/i.test(entry.text)) { + errors.push({ + source: "browser", + severity: "crash", + message: text, + blocking: true, + }); + } else { + // General console.error → non-blocking error + errors.push({ + source: "browser", + severity: "error", + message: text, + blocking: false, + }); + } + } else if (entry.type === "warning" && /deprecated/i.test(entry.text)) { + // Deprecation warning → non-blocking warning + errors.push({ + source: "browser", + severity: "warning", + message: text, + blocking: false, + }); + } + // Non-deprecation warnings are intentionally ignored + } + } catch { + // browser-tools not available — skip silently + } + + return errors; +} + +/** Build a human-readable message for a bg-shell process error. */ +function buildBgShellMessage( + name: string, + exitCode: number | null | undefined, + signal: string | null | undefined, + recentErrors: string[] | undefined, +): string { + const parts: string[] = [`[${name}]`]; + if (signal) parts.push(`signal=${signal}`); + if (exitCode !== null && exitCode !== undefined) parts.push(`exitCode=${exitCode}`); + if (recentErrors && recentErrors.length > 0) { + const snippet = recentErrors.slice(0, 3).join("; "); + parts.push(`errors: ${snippet}`); + } + return parts.join(" "); +} + +// ─── Dependency Audit ─────────────────────────────────────────────────────── + +/** Top-level dependency files that trigger an audit when changed. */ +const DEPENDENCY_FILES = new Set([ + "package.json", + "package-lock.json", + "pnpm-lock.yaml", + "yarn.lock", + "bun.lockb", +]); + +/** + * Injectable dependencies for runDependencyAudit (D023 pattern). + * When omitted the function uses real git/npm via spawnSync. + * Provide overrides in tests to avoid real git repos and npm registries. + */ +export interface DependencyAuditOptions { + gitDiff?: (cwd: string) => string[]; + npmAudit?: (cwd: string) => { stdout: string; exitCode: number }; +} + +/** + * Default gitDiff: runs `git diff --name-only HEAD` and returns file paths. + * Returns empty array on any failure (non-git dir, git not found, etc.). + */ +function defaultGitDiff(cwd: string): string[] { + try { + const result = spawnSync("git", ["diff", "--name-only", "HEAD"], { + cwd, + encoding: "utf-8", + timeout: 10_000, + }); + if (result.status !== 0 || !result.stdout) return []; + return result.stdout.trim().split("\n").filter(Boolean); + } catch { + return []; + } +} + +/** + * Default npmAudit: runs `npm audit --audit-level=moderate --json`. + * Returns { stdout, exitCode }. Non-zero exit is expected when vulnerabilities exist. + */ +function defaultNpmAudit(cwd: string): { stdout: string; exitCode: number } { + const result = spawnSync("npm", ["audit", "--audit-level=moderate", "--json"], { + cwd, + encoding: "utf-8", + timeout: 60_000, + }); + return { + stdout: result.stdout ?? "", + exitCode: result.status ?? 1, + }; +} + +/** + * Detect dependency file changes and run npm audit if changes are found. + * + * - Calls gitDiff to get changed files, checks if any are top-level dependency files + * - If no dependency files changed, returns [] + * - Runs npmAudit and parses JSON output into AuditWarning[] + * - Never throws — all errors return [] + * - Non-zero npm audit exit code is expected (vulnerabilities found), not an error + */ +export function runDependencyAudit( + cwd: string, + options?: DependencyAuditOptions, +): AuditWarning[] { + try { + const gitDiff = options?.gitDiff ?? defaultGitDiff; + const npmAudit = options?.npmAudit ?? defaultNpmAudit; + + // Get changed files and check for top-level dependency file matches + const changedFiles = gitDiff(cwd); + const hasDependencyChange = changedFiles.some((filePath) => { + const name = basename(filePath); + // Only match top-level files: the path must equal just the filename + // (no directory separators) to be considered top-level + return DEPENDENCY_FILES.has(name) && filePath === name; + }); + + if (!hasDependencyChange) return []; + + // Run npm audit + const auditResult = npmAudit(cwd); + + // Parse JSON output — npm audit exits non-zero when vulnerabilities exist + let parsed: Record; + try { + parsed = JSON.parse(auditResult.stdout); + } catch { + return []; + } + + // Extract vulnerabilities from the parsed output + const vulnerabilities = parsed.vulnerabilities; + if (!vulnerabilities || typeof vulnerabilities !== "object") return []; + + const warnings: AuditWarning[] = []; + for (const [name, raw] of Object.entries(vulnerabilities as Record)) { + const vuln = raw as { + severity?: string; + fixAvailable?: boolean; + via?: unknown[]; + }; + if (!vuln || typeof vuln !== "object") continue; + + const severity = vuln.severity; + if ( + severity !== "low" && + severity !== "moderate" && + severity !== "high" && + severity !== "critical" + ) { + continue; + } + + // Find the first `via` entry that's an object (not a string reference) + let title = name; + let url = ""; + if (Array.isArray(vuln.via)) { + for (const entry of vuln.via) { + if (entry && typeof entry === "object" && !Array.isArray(entry)) { + const obj = entry as { title?: string; url?: string }; + if (obj.title) title = obj.title; + if (obj.url) url = obj.url; + break; + } + } + } + + warnings.push({ + name, + severity: severity as AuditWarning["severity"], + title, + url, + fixAvailable: vuln.fixAvailable === true, + }); + } + + return warnings; + } catch { + return []; + } +} diff --git a/src/resources/extensions/subagent/index.ts b/src/resources/extensions/subagent/index.ts index 943154fb0..774051028 100644 --- a/src/resources/extensions/subagent/index.ts +++ b/src/resources/extensions/subagent/index.ts @@ -12,7 +12,7 @@ * Uses JSON mode to capture structured output from subagents. */ -import { spawn } from "node:child_process"; +import { spawn, type ChildProcess } from "node:child_process"; import * as crypto from "node:crypto"; import * as fs from "node:fs"; import * as os from "node:os"; @@ -38,6 +38,44 @@ import { registerWorker, updateWorker } from "./worker-registry.js"; const MAX_PARALLEL_TASKS = 8; const MAX_CONCURRENCY = 4; const COLLAPSED_ITEM_COUNT = 10; +const liveSubagentProcesses = new Set(); + +async function stopLiveSubagents(): Promise { + const active = Array.from(liveSubagentProcesses); + if (active.length === 0) return; + + for (const proc of active) { + try { + proc.kill("SIGTERM"); + } catch { + /* ignore */ + } + } + + await Promise.all( + active.map( + (proc) => + new Promise((resolve) => { + const done = () => resolve(); + const timer = setTimeout(done, 500); + proc.once("exit", () => { + clearTimeout(timer); + resolve(); + }); + }), + ), + ); + + for (const proc of active) { + if (proc.exitCode === null) { + try { + proc.kill("SIGKILL"); + } catch { + /* ignore */ + } + } + } +} function formatTokens(count: number): string { if (count < 1000) return count.toString(); @@ -302,6 +340,7 @@ async function runSingleAgent( [process.env.GSD_BIN_PATH!, ...extensionArgs, ...args], { cwd: cwd ?? defaultCwd, shell: false, stdio: ["ignore", "pipe", "pipe"] }, ); + liveSubagentProcesses.add(proc); let buffer = ""; const processLine = (line: string) => { @@ -353,11 +392,13 @@ async function runSingleAgent( }); proc.on("close", (code) => { + liveSubagentProcesses.delete(proc); if (buffer.trim()) processLine(buffer); resolve(code ?? 0); }); proc.on("error", () => { + liveSubagentProcesses.delete(proc); resolve(1); }); @@ -432,6 +473,10 @@ const SubagentParams = Type.Object({ }); export default function (pi: ExtensionAPI) { + pi.on("session_shutdown", async () => { + await stopLiveSubagents(); + }); + // /subagent command - list available agents pi.registerCommand("subagent", { description: "List available subagents", diff --git a/src/tests/auto-budget.test.ts b/src/tests/auto-budget.test.ts new file mode 100644 index 000000000..76ea01c4c --- /dev/null +++ b/src/tests/auto-budget.test.ts @@ -0,0 +1,71 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { getBudgetAlertLevel, getNewBudgetAlertLevel, getBudgetEnforcementAction } from "../resources/extensions/gsd/auto-budget.js"; + +describe("auto-budget", () => { + describe("getBudgetAlertLevel", () => { + it("returns 0 for low usage", () => { + assert.equal(getBudgetAlertLevel(0), 0); + assert.equal(getBudgetAlertLevel(0.5), 0); + assert.equal(getBudgetAlertLevel(0.74), 0); + }); + + it("returns 75 at 75%", () => { + assert.equal(getBudgetAlertLevel(0.75), 75); + assert.equal(getBudgetAlertLevel(0.79), 75); + }); + + it("returns 80 at 80%", () => { + assert.equal(getBudgetAlertLevel(0.80), 80); + assert.equal(getBudgetAlertLevel(0.89), 80); + }); + + it("returns 90 at 90%", () => { + assert.equal(getBudgetAlertLevel(0.90), 90); + assert.equal(getBudgetAlertLevel(0.99), 90); + }); + + it("returns 100 at 100%+", () => { + assert.equal(getBudgetAlertLevel(1.0), 100); + assert.equal(getBudgetAlertLevel(1.5), 100); + }); + }); + + describe("getNewBudgetAlertLevel", () => { + it("returns null when at same or lower level", () => { + assert.equal(getNewBudgetAlertLevel(75, 0.75), null); + assert.equal(getNewBudgetAlertLevel(90, 0.80), null); + assert.equal(getNewBudgetAlertLevel(80, 0.5), null); + }); + + it("returns new level when crossing up", () => { + assert.equal(getNewBudgetAlertLevel(0, 0.75), 75); + assert.equal(getNewBudgetAlertLevel(75, 0.80), 80); + assert.equal(getNewBudgetAlertLevel(80, 0.90), 90); + assert.equal(getNewBudgetAlertLevel(90, 1.0), 100); + }); + + it("returns null for 0% usage", () => { + assert.equal(getNewBudgetAlertLevel(0, 0), null); + }); + }); + + describe("getBudgetEnforcementAction", () => { + it("returns none when under budget", () => { + assert.equal(getBudgetEnforcementAction("halt", 0.5), "none"); + assert.equal(getBudgetEnforcementAction("pause", 0.99), "none"); + }); + + it("returns halt when at ceiling with halt enforcement", () => { + assert.equal(getBudgetEnforcementAction("halt", 1.0), "halt"); + }); + + it("returns pause when at ceiling with pause enforcement", () => { + assert.equal(getBudgetEnforcementAction("pause", 1.0), "pause"); + }); + + it("returns warn when at ceiling with warn enforcement", () => { + assert.equal(getBudgetEnforcementAction("warn", 1.0), "warn"); + }); + }); +}); diff --git a/src/tests/auto-tool-tracking.test.ts b/src/tests/auto-tool-tracking.test.ts new file mode 100644 index 000000000..fe1e2cbe1 --- /dev/null +++ b/src/tests/auto-tool-tracking.test.ts @@ -0,0 +1,46 @@ +import { describe, it, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { + markToolStart, + markToolEnd, + getOldestInFlightToolAgeMs, + getInFlightToolCount, + clearInFlightTools, +} from "../resources/extensions/gsd/auto-tool-tracking.js"; + +describe("auto-tool-tracking", () => { + beforeEach(() => { + clearInFlightTools(); + }); + + it("tracks tool start and end", () => { + assert.equal(getInFlightToolCount(), 0); + markToolStart("tool-1", true); + assert.equal(getInFlightToolCount(), 1); + markToolEnd("tool-1"); + assert.equal(getInFlightToolCount(), 0); + }); + + it("skips tracking when not active", () => { + markToolStart("tool-1", false); + assert.equal(getInFlightToolCount(), 0); + }); + + it("returns 0 age when no tools in flight", () => { + assert.equal(getOldestInFlightToolAgeMs(), 0); + }); + + it("returns positive age for in-flight tools", () => { + markToolStart("tool-1", true); + // Age should be very small (< 100ms) + assert.ok(getOldestInFlightToolAgeMs() < 100); + }); + + it("clears all in-flight tools", () => { + markToolStart("tool-1", true); + markToolStart("tool-2", true); + assert.equal(getInFlightToolCount(), 2); + clearInFlightTools(); + assert.equal(getInFlightToolCount(), 0); + }); +}); diff --git a/src/tests/bg-shell-session-cleanup.test.ts b/src/tests/bg-shell-session-cleanup.test.ts new file mode 100644 index 000000000..6ac74f7f1 --- /dev/null +++ b/src/tests/bg-shell-session-cleanup.test.ts @@ -0,0 +1,61 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { + startProcess, + cleanupAll, + cleanupSessionProcesses, + processes, +} from "../resources/extensions/bg-shell/process-manager.ts"; + +function isPidAlive(pid: number | undefined): boolean { + if (!pid || pid <= 0) return false; + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +// Use a shell-native sleeper so the test exercises bg_shell's real spawn path +// without relying on platform-specific quoting for `node -e "..."` +const sleeperCommand = "sleep 30"; + +test("cleanupSessionProcesses reaps only session-scoped processes from the previous session", async () => { + const owned = startProcess({ + command: sleeperCommand, + cwd: process.cwd(), + ownerSessionFile: "session-a", + }); + const persistent = startProcess({ + command: sleeperCommand, + cwd: process.cwd(), + ownerSessionFile: "session-a", + persistAcrossSessions: true, + }); + const foreign = startProcess({ + command: sleeperCommand, + cwd: process.cwd(), + ownerSessionFile: "session-b", + }); + + try { + await new Promise((resolve) => setTimeout(resolve, 150)); + assert.equal(isPidAlive(owned.proc.pid), true, "owned process should be alive before cleanup"); + assert.equal(isPidAlive(persistent.proc.pid), true, "persistent process should be alive before cleanup"); + assert.equal(isPidAlive(foreign.proc.pid), true, "foreign process should be alive before cleanup"); + + const removed = await cleanupSessionProcesses("session-a", { graceMs: 200 }); + assert.deepEqual(removed.sort(), [owned.id], "only the session-scoped process should be reaped"); + + await new Promise((resolve) => setTimeout(resolve, 150)); + assert.equal(isPidAlive(owned.proc.pid), false, "owned process should be terminated"); + assert.equal(isPidAlive(persistent.proc.pid), true, "persistent process should survive cleanup"); + assert.equal(isPidAlive(foreign.proc.pid), true, "foreign process should survive cleanup"); + assert.equal(processes.get(owned.id)?.persistAcrossSessions, false); + assert.equal(processes.get(persistent.id)?.persistAcrossSessions, true); + } finally { + cleanupAll(); + } +});