Merge pull request #906 from jeremymcs/issue-898-auto-refactor

refactor: extract 8 focused modules from auto.ts
This commit is contained in:
Lex Christopherson 2026-03-17 13:20:39 -06:00
commit 6becff186e
66 changed files with 7599 additions and 1022 deletions

View file

@ -253,3 +253,20 @@ jobs:
done
echo "::error::Smoke test failed — gsd-pi@${VERSION} not installable"
exit 1
- name: Verify dist-tag after publish
if: steps.version-check.outputs.is_prerelease == 'false'
run: |
VERSION=$(node -p "require('./package.json').version")
echo "Verifying npm dist-tag 'latest' points to ${VERSION}..."
for attempt in $(seq 1 10); do
LATEST=$(npm view gsd-pi dist-tags.latest 2>/dev/null || echo "")
if [ "${LATEST}" = "${VERSION}" ]; then
echo " ✓ npm dist-tags.latest = ${VERSION}"
exit 0
fi
echo " Attempt ${attempt}/10: latest=${LATEST}, expected=${VERSION}, retrying in 15s..."
sleep 15
done
echo "::error::dist-tags.latest is '${LATEST}' but expected '${VERSION}' — run: npm dist-tag add gsd-pi@${VERSION} latest"
exit 1

4
.gitignore vendored
View file

@ -63,3 +63,7 @@ TODOS.md
.gsd/DISCUSSION-MANIFEST.json
.gsd/milestones/**/*-CONTINUE.md
.gsd/milestones/**/continue.md
# ── GSD baseline (auto-generated) ──
.gsd/forensics/
.gsd/parallel/

BIN
docs/pr-876/01-index.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 173 KiB

BIN
docs/pr-876/02-summary.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB

BIN
docs/pr-876/03-progress.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 208 KiB

BIN
docs/pr-876/04-depgraph.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 162 KiB

BIN
docs/pr-876/05-metrics.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 267 KiB

BIN
docs/pr-876/06-timeline.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 536 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 239 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

BIN
docs/pr-876/09-captures.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 261 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 237 KiB

View file

@ -747,10 +747,13 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
return "toolUse";
case "content_filter":
return "error";
default: {
const _exhaustive: never = reason;
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
}
default:
// Third-party and community models (e.g. Qwen GGUF quants) may emit
// non-standard finish_reason values like "eos_token", "eos", or
// "end_of_turn". The OpenAI spec defines finish_reason as a string,
// so we treat unrecognized values as a normal stop rather than
// throwing — which would abort in-flight tool calls (#863).
return "stop";
}
}

View file

@ -10,8 +10,12 @@
* to resolve. This script bridges the gap.
*
* Runs as part of postinstall (before any ESM code that imports @gsd/*).
*
* On Windows without Developer Mode or administrator rights, creating symlinks
* (even NTFS junctions) can fail with EPERM. In that case we fall back to
* cpSync (directory copy) which works universally.
*/
const { existsSync, mkdirSync, symlinkSync, lstatSync, readlinkSync, unlinkSync, readdirSync } = require('fs')
const { existsSync, mkdirSync, symlinkSync, cpSync, lstatSync, readlinkSync, unlinkSync } = require('fs')
const { resolve, join } = require('path')
const root = resolve(__dirname, '..')
@ -33,6 +37,7 @@ if (!existsSync(nodeModulesGsd)) {
}
let linked = 0
let copied = 0
for (const [dir, name] of Object.entries(packageMap)) {
const source = join(packagesDir, dir)
const target = join(nodeModulesGsd, name)
@ -50,21 +55,32 @@ for (const [dir, name] of Object.entries(packageMap)) {
}
unlinkSync(target) // Wrong target, relink
} else {
continue // Real directory (e.g., from bundleDependencies), don't touch
continue // Real directory (e.g., copied or from bundleDependencies), don't touch
}
} catch {
continue
}
}
let symlinkOk = false
try {
symlinkSync(source, target, 'junction') // junction works on Windows too
symlinkOk = true
linked++
} catch {
// Non-fatal — may fail in read-only environments
// Symlink failed — common on Windows without Developer Mode or admin rights.
// Fall back to a directory copy so the package is still resolvable.
}
if (!symlinkOk) {
try {
cpSync(source, target, { recursive: true })
copied++
} catch {
// Non-fatal — loader.ts will emit a clearer error if resolution still fails
}
}
}
if (linked > 0) {
process.stderr.write(` Linked ${linked} workspace packages\n`)
}
if (linked > 0) process.stderr.write(` Linked ${linked} workspace package${linked !== 1 ? 's' : ''}\n`)
if (copied > 0) process.stderr.write(` Copied ${copied} workspace package${copied !== 1 ? 's' : ''} (symlinks unavailable)\n`)

View file

@ -103,6 +103,54 @@ try {
process.exit(1);
}
// --- Verify @gsd/* packages resolved correctly post-install ---
// This catches the Windows-style failure where symlinkSync fails silently and
// node_modules/@gsd/ is never populated, causing ERR_MODULE_NOT_FOUND at runtime.
console.log('==> Verifying @gsd/* workspace package resolution...');
const installedRoot = join(installDir, 'node_modules', 'gsd-pi');
const criticalPkgs = ['pi-coding-agent'];
let resolutionFailed = false;
for (const pkg of criticalPkgs) {
const pkgPath = join(installedRoot, 'node_modules', '@gsd', pkg);
const fallbackPath = join(installedRoot, 'packages', pkg);
if (!existsSync(pkgPath)) {
if (existsSync(fallbackPath)) {
console.log(` MISSING symlink/copy: node_modules/@gsd/${pkg} (packages/${pkg} exists — postinstall may not have run)`);
} else {
console.log(` MISSING: node_modules/@gsd/${pkg} (packages/${pkg} also absent — package is broken)`);
}
resolutionFailed = true;
}
}
if (resolutionFailed) {
console.log('ERROR: @gsd/* packages are not resolvable after install.');
console.log(' This will cause ERR_MODULE_NOT_FOUND on first run (especially on Windows).');
process.exit(1);
}
console.log(' @gsd/* packages are resolvable.');
// --- Run the binary to confirm end-to-end resolution ---
console.log('==> Running installed binary (gsd -v)...');
const loaderPath = join(installedRoot, 'dist', 'loader.js');
try {
const versionOutput = execSync(`node "${loaderPath}" -v`, {
cwd: installDir,
encoding: 'utf8',
stdio: ['pipe', 'pipe', 'pipe'],
timeout: 15000,
}).trim();
console.log(` gsd -v => ${versionOutput}`);
if (!versionOutput.match(/^\d+\.\d+\.\d+/)) {
console.log('ERROR: gsd -v returned unexpected output (expected a version string).');
process.exit(1);
}
} catch (err) {
console.log('ERROR: Running gsd -v failed after install.');
if (err.stdout) console.log(err.stdout);
if (err.stderr) console.log(err.stderr);
process.exit(1);
}
console.log('');
console.log('Package is installable. Safe to publish.');
process.exit(0);

View file

@ -18,6 +18,7 @@ import { ChildProcess } from 'node:child_process'
// RpcClient is not in @gsd/pi-coding-agent's public exports — import from dist directly.
// This relative path resolves correctly from both src/ (via tsx) and dist/ (compiled).
import { RpcClient } from '../packages/pi-coding-agent/dist/modes/rpc/rpc-client.js'
import { attachJsonlLineReader, serializeJsonLine } from '../packages/pi-coding-agent/dist/modes/rpc/jsonl.js'
// ---------------------------------------------------------------------------
// Types
@ -33,6 +34,9 @@ export interface HeadlessOptions {
contextText?: string // inline text
auto?: boolean // chain into auto-mode after milestone creation
verbose?: boolean // show tool calls in output
maxRestarts?: number // auto-restart on crash (default 3, 0 to disable)
supervised?: boolean // supervised mode: forward interactive requests to orchestrator
responseTimeout?: number // timeout for orchestrator response (default 30000ms)
}
interface ExtensionUIRequest {
@ -92,6 +96,21 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
options.auto = true
} else if (arg === '--verbose') {
options.verbose = true
} else if (arg === '--max-restarts' && i + 1 < args.length) {
options.maxRestarts = parseInt(args[++i], 10)
if (Number.isNaN(options.maxRestarts) || options.maxRestarts < 0) {
process.stderr.write('[headless] Error: --max-restarts must be a non-negative integer\n')
process.exit(1)
}
} else if (arg === '--supervised') {
options.supervised = true
options.json = true // supervised implies json
} else if (arg === '--response-timeout' && i + 1 < args.length) {
options.responseTimeout = parseInt(args[++i], 10)
if (Number.isNaN(options.responseTimeout) || options.responseTimeout <= 0) {
process.stderr.write('[headless] Error: --response-timeout must be a positive integer (milliseconds)\n')
process.exit(1)
}
}
} else if (!positionalStarted) {
positionalStarted = true
@ -104,14 +123,6 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
return options
}
// ---------------------------------------------------------------------------
// JSONL Helper
// ---------------------------------------------------------------------------
function serializeJsonLine(obj: Record<string, unknown>): string {
return JSON.stringify(obj) + '\n'
}
// ---------------------------------------------------------------------------
// Extension UI Auto-Responder
// ---------------------------------------------------------------------------
@ -230,6 +241,8 @@ function isMilestoneReadyNotification(event: Record<string, unknown>): boolean {
// Quick Command Detection
// ---------------------------------------------------------------------------
const FIRE_AND_FORGET_METHODS = new Set(['notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text'])
const QUICK_COMMANDS = new Set([
'status', 'queue', 'history', 'hooks', 'export', 'stop', 'pause',
'capture', 'skip', 'undo', 'knowledge', 'config', 'prefs',
@ -241,6 +254,49 @@ function isQuickCommand(command: string): boolean {
return QUICK_COMMANDS.has(command)
}
// ---------------------------------------------------------------------------
// Supervised Stdin Reader
// ---------------------------------------------------------------------------
function startSupervisedStdinReader(
stdinWriter: (data: string) => void,
client: RpcClient,
onResponse: (id: string) => void,
): () => void {
return attachJsonlLineReader(process.stdin as import('node:stream').Readable, (line) => {
let msg: Record<string, unknown>
try {
msg = JSON.parse(line)
} catch {
process.stderr.write(`[headless] Warning: invalid JSON from orchestrator stdin, skipping\n`)
return
}
const type = String(msg.type ?? '')
switch (type) {
case 'extension_ui_response':
stdinWriter(line + '\n')
if (typeof msg.id === 'string') {
onResponse(msg.id)
}
break
case 'prompt':
client.prompt(String(msg.message ?? ''))
break
case 'steer':
client.steer(String(msg.message ?? ''))
break
case 'follow_up':
client.followUp(String(msg.message ?? ''))
break
default:
process.stderr.write(`[headless] Warning: unknown message type "${type}" from orchestrator stdin\n`)
break
}
})
}
// ---------------------------------------------------------------------------
// Main Orchestrator
// ---------------------------------------------------------------------------
@ -279,9 +335,46 @@ function bootstrapGsdProject(basePath: string): void {
}
export async function runHeadless(options: HeadlessOptions): Promise<void> {
const maxRestarts = options.maxRestarts ?? 3
let restartCount = 0
while (true) {
const result = await runHeadlessOnce(options, restartCount)
// Success or blocked — exit normally
if (result.exitCode === 0 || result.exitCode === 2) {
process.exit(result.exitCode)
}
// Crash/error — check if we should restart
if (restartCount >= maxRestarts) {
process.stderr.write(`[headless] Max restarts (${maxRestarts}) reached. Exiting.\n`)
process.exit(result.exitCode)
}
// Don't restart if SIGINT/SIGTERM was received
if (result.interrupted) {
process.exit(result.exitCode)
}
restartCount++
const backoffMs = Math.min(5000 * restartCount, 30_000)
process.stderr.write(`[headless] Restarting in ${(backoffMs / 1000).toFixed(0)}s (attempt ${restartCount}/${maxRestarts})...\n`)
await new Promise(resolve => setTimeout(resolve, backoffMs))
}
}
async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): Promise<{ exitCode: number; interrupted: boolean }> {
let interrupted = false
const startTime = Date.now()
const isNewMilestone = options.command === 'new-milestone'
// Supervised mode cannot share stdin with --context -
if (options.supervised && options.context === '-') {
process.stderr.write('[headless] Error: --supervised cannot be used with --context - (both require stdin)\n')
process.exit(1)
}
// For new-milestone, load context and bootstrap .gsd/ before spawning RPC child
if (isNewMilestone) {
if (!options.context && !options.contextText) {
@ -370,6 +463,18 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
// Stdin writer for sending extension_ui_response to child
let stdinWriter: ((data: string) => void) | null = null
// Supervised mode state
const pendingResponseTimers = new Map<string, ReturnType<typeof setTimeout>>()
let supervisedFallback = false
let stopSupervisedReader: (() => void) | null = null
const onStdinClose = () => {
supervisedFallback = true
process.stderr.write('[headless] Warning: orchestrator stdin closed, falling back to auto-response\n')
}
if (options.supervised) {
process.stdin.on('close', onStdinClose)
}
// Completion promise
let resolveCompletion: () => void
const completionPromise = new Promise<void>((resolve) => {
@ -390,6 +495,9 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
}
}
// Precompute supervised response timeout
const responseTimeout = options.responseTimeout ?? 30_000
// Overall timeout
const timeoutTimer = setTimeout(() => {
process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`)
@ -428,7 +536,22 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
completed = true
}
handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter)
const method = String(eventObj.method ?? '')
const shouldSupervise = options.supervised && !supervisedFallback
&& !FIRE_AND_FORGET_METHODS.has(method)
if (shouldSupervise) {
// Interactive request in supervised mode — let orchestrator respond
const eventId = String(eventObj.id ?? '')
const timer = setTimeout(() => {
pendingResponseTimers.delete(eventId)
handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter!)
process.stdout.write(JSON.stringify({ type: 'supervised_timeout', id: eventId, method }) + '\n')
}, responseTimeout)
pendingResponseTimers.set(eventId, timer)
} else {
handleExtensionUIRequest(eventObj as unknown as ExtensionUIRequest, stdinWriter)
}
// If we detected a terminal notification, resolve after responding
if (completed) {
@ -452,6 +575,7 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
// Signal handling
const signalHandler = () => {
process.stderr.write('\n[headless] Interrupted, stopping child process...\n')
interrupted = true
exitCode = 1
client.stop().finally(() => {
clearTimeout(timeoutTimer)
@ -484,6 +608,19 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
internalProcess.stdin!.write(data)
}
// Start supervised stdin reader for orchestrator commands
if (options.supervised) {
stopSupervisedReader = startSupervisedStdinReader(stdinWriter, client, (id) => {
const timer = pendingResponseTimers.get(id)
if (timer) {
clearTimeout(timer)
pendingResponseTimers.delete(id)
}
})
// Ensure stdin is in flowing mode for JSONL reading
process.stdin.resume()
}
// Detect child process crash
internalProcess.on('exit', (code) => {
if (!completed) {
@ -541,6 +678,10 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
// Cleanup
clearTimeout(timeoutTimer)
if (idleTimer) clearTimeout(idleTimer)
pendingResponseTimers.forEach((timer) => clearTimeout(timer))
pendingResponseTimers.clear()
stopSupervisedReader?.()
process.stdin.removeListener('close', onStdinClose)
process.removeListener('SIGINT', signalHandler)
process.removeListener('SIGTERM', signalHandler)
@ -553,6 +694,9 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
process.stderr.write(`[headless] Status: ${status}\n`)
process.stderr.write(`[headless] Duration: ${duration}s\n`)
process.stderr.write(`[headless] Events: ${totalEvents} total, ${toolCallCount} tool calls\n`)
if (restartCount > 0) {
process.stderr.write(`[headless] Restarts: ${restartCount}\n`)
}
// On failure, print last 5 events for diagnostics
if (exitCode !== 0) {
@ -565,5 +709,5 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
}
}
process.exit(exitCode)
return { exitCode, interrupted }
}

View file

@ -41,6 +41,8 @@ const SUBCOMMAND_HELP: Record<string, string> = {
' --timeout N Overall timeout in ms (default: 300000)',
' --json JSONL event stream to stdout',
' --model ID Override model',
' --supervised Forward interactive UI requests to orchestrator via stdout/stdin',
' --response-timeout N Timeout (ms) for orchestrator response (default: 30000)',
'',
'Commands:',
' auto Run all queued units continuously (default)',
@ -62,6 +64,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
' gsd headless new-milestone --context spec.md Create milestone from file',
' cat spec.md | gsd headless new-milestone --context - From stdin',
' gsd headless new-milestone --context spec.md --auto Create + auto-execute',
' gsd headless --supervised auto Supervised orchestrator mode',
'',
'Exit codes: 0 = complete, 1 = error/timeout, 2 = blocked',
].join('\n'),

View file

@ -3,7 +3,7 @@
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
import { fileURLToPath } from 'url'
import { dirname, resolve, join, delimiter } from 'path'
import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync } from 'fs'
import { existsSync, readFileSync, readdirSync, mkdirSync, symlinkSync, cpSync } from 'fs'
// Fast-path: handle --version/-v and --help/-h before importing any heavy
// dependencies. This avoids loading the entire pi-coding-agent barrel import
@ -151,8 +151,12 @@ if (process.env.HTTP_PROXY || process.env.HTTPS_PROXY || process.env.http_proxy
setGlobalDispatcher(new EnvHttpProxyAgent())
}
// Ensure workspace packages are linked before importing cli.js (which imports @gsd/*).
// Ensure workspace packages are linked (or copied on Windows) before importing
// cli.js (which imports @gsd/*).
// npm postinstall handles this normally, but npx --ignore-scripts skips postinstall.
// On Windows without Developer Mode or admin rights, symlinkSync will throw even for
// 'junction' type — so we fall back to cpSync (a full directory copy) which works
// everywhere without elevated permissions.
const gsdScopeDir = join(gsdNodeModules, '@gsd')
const packagesDir = join(gsdRoot, 'packages')
const wsPackages = ['native', 'pi-agent-core', 'pi-ai', 'pi-coding-agent', 'pi-tui']
@ -161,11 +165,37 @@ try {
for (const pkg of wsPackages) {
const target = join(gsdScopeDir, pkg)
const source = join(packagesDir, pkg)
if (existsSync(source) && !existsSync(target)) {
try { symlinkSync(source, target, 'junction') } catch { /* non-fatal */ }
if (!existsSync(source) || existsSync(target)) continue
try {
symlinkSync(source, target, 'junction')
} catch {
// Symlink failed (common on Windows without Developer Mode / admin).
// Fall back to a directory copy — slower on first run but universally works.
try { cpSync(source, target, { recursive: true }) } catch { /* non-fatal */ }
}
}
} catch { /* non-fatal */ }
// Validate critical workspace packages are resolvable. If still missing after the
// symlink+copy attempts, emit a clear diagnostic instead of a cryptic
// ERR_MODULE_NOT_FOUND from deep inside cli.js.
const criticalPackages = ['pi-coding-agent']
const missingPackages = criticalPackages.filter(pkg => !existsSync(join(gsdScopeDir, pkg)))
if (missingPackages.length > 0) {
const missing = missingPackages.map(p => `@gsd/${p}`).join(', ')
process.stderr.write(
`\nError: GSD installation is broken — missing packages: ${missing}\n\n` +
`This is usually caused by one of:\n` +
` • An outdated version installed from npm (run: npm install -g gsd-pi@latest)\n` +
` • The packages/ directory was excluded from the installed tarball\n` +
` • A filesystem error prevented linking or copying the workspace packages\n\n` +
`Fix it by reinstalling:\n\n` +
` npm install -g gsd-pi@latest\n\n` +
`If the issue persists, please open an issue at:\n` +
` https://github.com/gsd-build/gsd-2/issues\n`
)
process.exit(1)
}
// Dynamic import defers ESM evaluation — config.js will see PI_PACKAGE_DIR above
await import('./cli.js')

View file

@ -52,6 +52,7 @@ import {
getGroupStatus,
pruneDeadProcesses,
cleanupAll,
cleanupSessionProcesses,
persistManifest,
loadManifest,
pushAlert,
@ -71,7 +72,7 @@ import { toPosixPath } from "../shared/path-display.js";
// ── Re-exports for consumers ───────────────────────────────────────────────
export type { ProcessStatus, ProcessType, BgProcess, BgProcessInfo, OutputDigest, OutputLine, ProcessEvent } from "./types.js";
export { processes, startProcess, killProcess, restartProcess, cleanupAll } from "./process-manager.js";
export { processes, startProcess, killProcess, restartProcess, cleanupAll, cleanupSessionProcesses } from "./process-manager.js";
export { generateDigest, getHighlights, getOutput, formatDigestText } from "./output-formatter.js";
export { waitForReady, probePort } from "./readiness-detector.js";
export { sendAndWait, runOnSession, queryShellEnv } from "./interaction.js";
@ -136,7 +137,13 @@ export default function (pi: ExtensionAPI) {
});
// Session switch resets the agent's context.
pi.on("session_switch", async () => {
pi.on("session_switch", async (event, ctx) => {
latestCtx = ctx;
if (event.reason === "new" && event.previousSessionFile) {
await cleanupSessionProcesses(event.previousSessionFile);
syncLatestCtxCwd();
if (latestCtx) persistManifest(latestCtx.cwd);
}
buildProcessStateAlert("Session was switched.");
});
@ -232,6 +239,7 @@ export default function (pi: ExtensionAPI) {
"Use 'run' to execute a command on a persistent shell session and block until it completes — returns structured output + exit code. Shell state (env vars, cwd, virtualenvs) persists across runs.",
"Use 'send_and_wait' for interactive CLIs: send input and wait for expected output pattern.",
"Use 'env' to check the current working directory and active environment variables of a shell session — useful after cd, source, or export commands.",
"Background processes are session-scoped by default: a new session reaps them unless you set persist_across_sessions:true.",
"Use 'restart' to kill and relaunch with the same config — preserves restart count.",
"Background processes are auto-classified (server/build/test/watcher) based on the command.",
"Process crashes and errors are automatically surfaced as alerts at the start of your next turn — you don't need to poll.",
@ -300,6 +308,12 @@ export default function (pi: ExtensionAPI) {
group: Type.Optional(
Type.String({ description: "Group name for related processes (for start, group_status)" }),
),
persist_across_sessions: Type.Optional(
Type.Boolean({
description: "Keep this process running after a new session starts. Default: false.",
default: false,
}),
),
}),
async execute(_toolCallId, params, signal, _onUpdate, ctx) {
@ -318,6 +332,8 @@ export default function (pi: ExtensionAPI) {
const bg = startProcess({
command: params.command,
cwd: ctx.cwd,
ownerSessionFile: ctx.sessionManager.getSessionFile() ?? null,
persistAcrossSessions: params.persist_across_sessions ?? false,
label: params.label,
type: params.type as ProcessType | undefined,
readyPattern: params.ready_pattern,
@ -341,6 +357,7 @@ export default function (pi: ExtensionAPI) {
text += ` cwd: ${toPosixPath(bg.cwd)}`;
if (bg.group) text += `\n group: ${bg.group}`;
if (bg.persistAcrossSessions) text += `\n persist_across_sessions: true`;
if (bg.readyPort) text += `\n ready_port: ${bg.readyPort}`;
if (bg.readyPattern) text += `\n ready_pattern: ${bg.readyPattern}`;
if (bg.ports.length > 0) text += `\n detected ports: ${bg.ports.join(", ")}`;

View file

@ -67,6 +67,8 @@ export function getInfo(p: BgProcess): BgProcessInfo {
label: p.label,
command: p.command,
cwd: p.cwd,
ownerSessionFile: p.ownerSessionFile,
persistAcrossSessions: p.persistAcrossSessions,
startedAt: p.startedAt,
alive: p.alive,
exitCode: p.exitCode,
@ -138,6 +140,8 @@ export function startProcess(opts: StartOptions): BgProcess {
label: opts.label || command.slice(0, 60),
command,
cwd: opts.cwd,
ownerSessionFile: opts.ownerSessionFile ?? null,
persistAcrossSessions: opts.persistAcrossSessions ?? false,
startedAt: Date.now(),
proc,
output: [],
@ -170,6 +174,8 @@ export function startProcess(opts: StartOptions): BgProcess {
cwd: opts.cwd,
label: opts.label || command.slice(0, 60),
processType,
ownerSessionFile: opts.ownerSessionFile ?? null,
persistAcrossSessions: opts.persistAcrossSessions ?? false,
readyPattern: opts.readyPattern || null,
readyPort: opts.readyPort || null,
group: opts.group || null,
@ -312,6 +318,8 @@ export async function restartProcess(id: string): Promise<BgProcess | null> {
cwd: config.cwd,
label: config.label,
type: config.processType,
ownerSessionFile: config.ownerSessionFile,
persistAcrossSessions: config.persistAcrossSessions,
readyPattern: config.readyPattern || undefined,
readyPort: config.readyPort || undefined,
group: config.group || undefined,
@ -367,6 +375,41 @@ export function cleanupAll(): void {
processes.clear();
}
async function waitForProcessExit(bg: BgProcess, timeoutMs: number): Promise<boolean> {
if (!bg.alive) return true;
await new Promise<void>((resolve) => {
const done = () => resolve();
const timer = setTimeout(done, timeoutMs);
bg.proc.once("exit", () => {
clearTimeout(timer);
resolve();
});
});
return !bg.alive;
}
export async function cleanupSessionProcesses(
sessionFile: string,
options?: { graceMs?: number },
): Promise<string[]> {
const graceMs = Math.max(0, options?.graceMs ?? 300);
const matches = Array.from(processes.values()).filter(
(bg) => bg.alive && !bg.persistAcrossSessions && bg.ownerSessionFile === sessionFile,
);
if (matches.length === 0) return [];
for (const bg of matches) {
killProcess(bg.id, "SIGTERM");
}
if (graceMs > 0) {
await Promise.all(matches.map((bg) => waitForProcessExit(bg, graceMs)));
}
for (const bg of matches) {
if (bg.alive) killProcess(bg.id, "SIGKILL");
}
return matches.map((bg) => bg.id);
}
// ── Persistence ────────────────────────────────────────────────────────────
export function getManifestPath(cwd: string): string {
@ -384,6 +427,8 @@ export function persistManifest(cwd: string): void {
label: p.label,
command: p.command,
cwd: p.cwd,
ownerSessionFile: p.ownerSessionFile,
persistAcrossSessions: p.persistAcrossSessions,
startedAt: p.startedAt,
processType: p.processType,
group: p.group,

View file

@ -53,6 +53,10 @@ export interface BgProcess {
label: string;
command: string;
cwd: string;
/** Session file that created this process (used for per-session cleanup) */
ownerSessionFile: string | null;
/** Whether this process should survive a new-session boundary */
persistAcrossSessions: boolean;
startedAt: number;
proc: import("node:child_process").ChildProcess;
/** Unified chronologically-interleaved output buffer */
@ -103,7 +107,17 @@ export interface BgProcess {
/** Restart count */
restartCount: number;
/** Original start config for restart */
startConfig: { command: string; cwd: string; label: string; processType: ProcessType; readyPattern: string | null; readyPort: number | null; group: string | null };
startConfig: {
command: string;
cwd: string;
label: string;
processType: ProcessType;
ownerSessionFile: string | null;
persistAcrossSessions: boolean;
readyPattern: string | null;
readyPort: number | null;
group: string | null;
};
}
export interface BgProcessInfo {
@ -111,6 +125,8 @@ export interface BgProcessInfo {
label: string;
command: string;
cwd: string;
ownerSessionFile: string | null;
persistAcrossSessions: boolean;
startedAt: number;
alive: boolean;
exitCode: number | null;
@ -133,6 +149,8 @@ export interface BgProcessInfo {
export interface StartOptions {
command: string;
cwd: string;
ownerSessionFile?: string | null;
persistAcrossSessions?: boolean;
label?: string;
type?: ProcessType;
readyPattern?: string;
@ -154,6 +172,8 @@ export interface ProcessManifest {
label: string;
command: string;
cwd: string;
ownerSessionFile: string | null;
persistAcrossSessions: boolean;
startedAt: number;
processType: ProcessType;
group: string | null;

View file

@ -0,0 +1,32 @@
/**
* Budget alert level tracking and enforcement for auto-mode.
* Pure functions no module state or side effects.
*/
import type { BudgetEnforcementMode } from "./types.js";
export type BudgetAlertLevel = 0 | 75 | 80 | 90 | 100;
export function getBudgetAlertLevel(budgetPct: number): BudgetAlertLevel {
if (budgetPct >= 1.0) return 100;
if (budgetPct >= 0.90) return 90;
if (budgetPct >= 0.80) return 80;
if (budgetPct >= 0.75) return 75;
return 0;
}
export function getNewBudgetAlertLevel(previousLevel: BudgetAlertLevel, budgetPct: number): BudgetAlertLevel | null {
const currentLevel = getBudgetAlertLevel(budgetPct);
if (currentLevel === 0 || currentLevel <= previousLevel) return null;
return currentLevel;
}
export function getBudgetEnforcementAction(
enforcement: BudgetEnforcementMode,
budgetPct: number,
): "none" | "warn" | "pause" | "halt" {
if (budgetPct < 1.0) return "none";
if (enforcement === "halt") return "halt";
if (enforcement === "pause") return "pause";
return "warn";
}

View file

@ -0,0 +1,229 @@
/**
* Direct phase dispatch handles manual /gsd dispatch commands.
* Resolves phase name unit type + prompt, creates a session, and sends the message.
*/
import type {
ExtensionAPI,
ExtensionCommandContext,
} from "@gsd/pi-coding-agent";
import { deriveState } from "./state.js";
import { loadFile, parseRoadmap } from "./files.js";
import {
resolveMilestoneFile, resolveSliceFile, relSliceFile,
} from "./paths.js";
import {
buildResearchSlicePrompt,
buildResearchMilestonePrompt,
buildPlanSlicePrompt,
buildPlanMilestonePrompt,
buildExecuteTaskPrompt,
buildCompleteSlicePrompt,
buildCompleteMilestonePrompt,
buildReassessRoadmapPrompt,
buildRunUatPrompt,
buildReplanSlicePrompt,
} from "./auto-prompts.js";
import { loadEffectiveGSDPreferences } from "./preferences.js";
import { pauseAuto } from "./auto.js";
export async function dispatchDirectPhase(
ctx: ExtensionCommandContext,
pi: ExtensionAPI,
phase: string,
base: string,
): Promise<void> {
const state = await deriveState(base);
const mid = state.activeMilestone?.id;
const midTitle = state.activeMilestone?.title ?? "";
if (!mid) {
ctx.ui.notify("Cannot dispatch: no active milestone.", "warning");
return;
}
const normalized = phase.toLowerCase();
let unitType: string;
let unitId: string;
let prompt: string;
switch (normalized) {
case "research":
case "research-milestone":
case "research-slice": {
const isSlice = normalized === "research-slice" || (normalized === "research" && state.phase !== "pre-planning");
if (isSlice) {
const sid = state.activeSlice?.id;
const sTitle = state.activeSlice?.title ?? "";
if (!sid) {
ctx.ui.notify("Cannot dispatch research-slice: no active slice.", "warning");
return;
}
// When require_slice_discussion is enabled, pause auto-mode before
// each new slice so the user can discuss requirements first (#789).
const sliceContextFile = resolveSliceFile(base, mid, sid, "CONTEXT");
const requireDiscussion = loadEffectiveGSDPreferences()?.preferences?.phases?.require_slice_discussion;
if (requireDiscussion && !sliceContextFile) {
ctx.ui.notify(
`Slice ${sid} requires discussion before planning. Run /gsd discuss to discuss this slice, then /gsd auto to resume.`,
"info",
);
await pauseAuto(ctx, pi);
return;
}
unitType = "research-slice";
unitId = `${mid}/${sid}`;
prompt = await buildResearchSlicePrompt(mid, midTitle, sid, sTitle, base);
} else {
unitType = "research-milestone";
unitId = mid;
prompt = await buildResearchMilestonePrompt(mid, midTitle, base);
}
break;
}
case "plan":
case "plan-milestone":
case "plan-slice": {
const isSlice = normalized === "plan-slice" || (normalized === "plan" && state.phase !== "pre-planning");
if (isSlice) {
const sid = state.activeSlice?.id;
const sTitle = state.activeSlice?.title ?? "";
if (!sid) {
ctx.ui.notify("Cannot dispatch plan-slice: no active slice.", "warning");
return;
}
unitType = "plan-slice";
unitId = `${mid}/${sid}`;
prompt = await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, base);
} else {
unitType = "plan-milestone";
unitId = mid;
prompt = await buildPlanMilestonePrompt(mid, midTitle, base);
}
break;
}
case "execute":
case "execute-task": {
const sid = state.activeSlice?.id;
const sTitle = state.activeSlice?.title ?? "";
const tid = state.activeTask?.id;
const tTitle = state.activeTask?.title ?? "";
if (!sid) {
ctx.ui.notify("Cannot dispatch execute-task: no active slice.", "warning");
return;
}
if (!tid) {
ctx.ui.notify("Cannot dispatch execute-task: no active task.", "warning");
return;
}
unitType = "execute-task";
unitId = `${mid}/${sid}/${tid}`;
prompt = await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, base);
break;
}
case "complete":
case "complete-slice":
case "complete-milestone": {
const isSlice = normalized === "complete-slice" || (normalized === "complete" && state.phase === "summarizing");
if (isSlice) {
const sid = state.activeSlice?.id;
const sTitle = state.activeSlice?.title ?? "";
if (!sid) {
ctx.ui.notify("Cannot dispatch complete-slice: no active slice.", "warning");
return;
}
unitType = "complete-slice";
unitId = `${mid}/${sid}`;
prompt = await buildCompleteSlicePrompt(mid, midTitle, sid, sTitle, base);
} else {
unitType = "complete-milestone";
unitId = mid;
prompt = await buildCompleteMilestonePrompt(mid, midTitle, base);
}
break;
}
case "reassess":
case "reassess-roadmap": {
const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
if (!roadmapContent) {
ctx.ui.notify("Cannot dispatch reassess-roadmap: no roadmap found.", "warning");
return;
}
const roadmap = parseRoadmap(roadmapContent);
const completedSlices = roadmap.slices.filter(s => s.done);
if (completedSlices.length === 0) {
ctx.ui.notify("Cannot dispatch reassess-roadmap: no completed slices.", "warning");
return;
}
const completedSliceId = completedSlices[completedSlices.length - 1].id;
unitType = "reassess-roadmap";
unitId = `${mid}/${completedSliceId}`;
prompt = await buildReassessRoadmapPrompt(mid, midTitle, completedSliceId, base);
break;
}
case "uat":
case "run-uat": {
const sid = state.activeSlice?.id;
if (!sid) {
ctx.ui.notify("Cannot dispatch run-uat: no active slice.", "warning");
return;
}
const uatFile = resolveSliceFile(base, mid, sid, "UAT");
if (!uatFile) {
ctx.ui.notify("Cannot dispatch run-uat: no UAT file found.", "warning");
return;
}
const uatContent = await loadFile(uatFile);
if (!uatContent) {
ctx.ui.notify("Cannot dispatch run-uat: UAT file is empty.", "warning");
return;
}
const uatPath = relSliceFile(base, mid, sid, "UAT");
unitType = "run-uat";
unitId = `${mid}/${sid}`;
prompt = await buildRunUatPrompt(mid, sid, uatPath, uatContent, base);
break;
}
case "replan":
case "replan-slice": {
const sid = state.activeSlice?.id;
const sTitle = state.activeSlice?.title ?? "";
if (!sid) {
ctx.ui.notify("Cannot dispatch replan-slice: no active slice.", "warning");
return;
}
unitType = "replan-slice";
unitId = `${mid}/${sid}`;
prompt = await buildReplanSlicePrompt(mid, midTitle, sid, sTitle, base);
break;
}
default:
ctx.ui.notify(
`Unknown phase "${phase}". Valid phases: research, plan, execute, complete, reassess, uat, replan.`,
"warning",
);
return;
}
ctx.ui.notify(`Dispatching ${unitType} for ${unitId}...`, "info");
const result = await ctx.newSession();
if (result.cancelled) {
ctx.ui.notify("Session creation cancelled.", "warning");
return;
}
pi.sendMessage(
{ customType: "gsd-dispatch", content: prompt, display: false },
{ triggerTurn: true },
);
}

View file

@ -241,6 +241,32 @@ const DISPATCH_RULES: DispatchRule[] = [
};
},
},
{
name: "executing → execute-task (recover missing task plan → plan-slice)",
match: async ({ state, mid, midTitle, basePath }) => {
if (state.phase !== "executing" || !state.activeTask) return null;
const sid = state.activeSlice!.id;
const sTitle = state.activeSlice!.title;
const tid = state.activeTask.id;
// Guard: if the slice plan exists but the individual task plan files are
// missing, the planner created S##-PLAN.md with task entries but never
// wrote the tasks/ directory files. Dispatch plan-slice to regenerate
// them rather than hard-stopping — fixes the infinite-loop described in
// issue #909.
const taskPlanPath = resolveTaskFile(basePath, mid, sid, tid, "PLAN");
if (!taskPlanPath || !existsSync(taskPlanPath)) {
return {
action: "dispatch",
unitType: "plan-slice",
unitId: `${mid}/${sid}`,
prompt: await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, basePath),
};
}
return null;
},
},
{
name: "executing → execute-task",
match: async ({ state, mid, basePath }) => {
@ -250,19 +276,6 @@ const DISPATCH_RULES: DispatchRule[] = [
const tid = state.activeTask.id;
const tTitle = state.activeTask.title;
// Guard: refuse to dispatch execute-task when the task plan file is missing.
// This prevents the agent from running blind after a failed plan-slice that
// wrote S{sid}-PLAN.md but omitted the individual T{tid}-PLAN.md files.
// (See issue #739 — missing task plan caused runaway execution and EPIPE crash.)
const taskPlanPath = resolveTaskFile(basePath, mid, sid, tid, "PLAN");
if (!taskPlanPath || !existsSync(taskPlanPath)) {
return {
action: "stop",
reason: `Task plan ${tid}-PLAN.md is missing for ${mid}/${sid}/${tid}. Re-run plan-slice to regenerate task plans, or create the file manually and resume.`,
level: "error",
};
}
return {
action: "dispatch",
unitType: "execute-task",

View file

@ -0,0 +1,179 @@
/**
* Model selection and dynamic routing for auto-mode unit dispatch.
* Handles complexity-based routing, model resolution across providers,
* and fallback chains.
*/
import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
import type { GSDPreferences } from "./preferences.js";
import { resolveModelWithFallbacksForUnit, resolveDynamicRoutingConfig } from "./preferences.js";
import { classifyUnitComplexity, tierLabel } from "./complexity-classifier.js";
import { resolveModelForComplexity } from "./model-router.js";
import { getLedger, getProjectTotals } from "./metrics.js";
import { unitPhaseLabel } from "./auto-dashboard.js";
export interface ModelSelectionResult {
/** Routing metadata for metrics recording */
routing: { tier: string; modelDowngraded: boolean } | null;
}
/**
* Select and apply the appropriate model for a unit dispatch.
* Handles: per-unit-type model preferences, dynamic complexity routing,
* provider/model resolution, fallback chains, and start-model re-application.
*
* Returns routing metadata for metrics tracking.
*/
export async function selectAndApplyModel(
ctx: ExtensionContext,
pi: ExtensionAPI,
unitType: string,
unitId: string,
basePath: string,
prefs: GSDPreferences | undefined,
verbose: boolean,
autoModeStartModel: { provider: string; id: string } | null,
): Promise<ModelSelectionResult> {
const modelConfig = resolveModelWithFallbacksForUnit(unitType);
let routing: { tier: string; modelDowngraded: boolean } | null = null;
if (modelConfig) {
const availableModels = ctx.modelRegistry.getAvailable();
// ─── Dynamic Model Routing ─────────────────────────────────────────
const routingConfig = resolveDynamicRoutingConfig();
let effectiveModelConfig = modelConfig;
let routingTierLabel = "";
if (routingConfig.enabled) {
let budgetPct: number | undefined;
if (routingConfig.budget_pressure !== false) {
const budgetCeiling = prefs?.budget_ceiling;
if (budgetCeiling !== undefined && budgetCeiling > 0) {
const currentLedger = getLedger();
const totalCost = currentLedger ? getProjectTotals(currentLedger.units).cost : 0;
budgetPct = totalCost / budgetCeiling;
}
}
const isHook = unitType.startsWith("hook/");
const shouldClassify = !isHook || routingConfig.hooks !== false;
if (shouldClassify) {
const classification = classifyUnitComplexity(unitType, unitId, basePath, budgetPct);
const availableModelIds = availableModels.map(m => m.id);
const routingResult = resolveModelForComplexity(classification, modelConfig, routingConfig, availableModelIds);
if (routingResult.wasDowngraded) {
effectiveModelConfig = {
primary: routingResult.modelId,
fallbacks: routingResult.fallbacks,
};
if (verbose) {
ctx.ui.notify(
`Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`,
"info",
);
}
}
routingTierLabel = ` [${tierLabel(classification.tier)}]`;
routing = { tier: classification.tier, modelDowngraded: routingResult.wasDowngraded };
}
}
const modelsToTry = [effectiveModelConfig.primary, ...effectiveModelConfig.fallbacks];
for (const modelId of modelsToTry) {
const model = resolveModelId(modelId, availableModels, ctx.model?.provider);
if (!model) {
if (verbose) ctx.ui.notify(`Model ${modelId} not found, trying fallback.`, "info");
continue;
}
// Warn if the ID is ambiguous across providers
if (!modelId.includes("/")) {
const providers = availableModels.filter(m => m.id === modelId).map(m => m.provider);
if (providers.length > 1 && model.provider !== ctx.model?.provider) {
ctx.ui.notify(
`Model ID "${modelId}" exists in multiple providers (${providers.join(", ")}). ` +
`Resolved to ${model.provider}. Use "provider/model" format for explicit targeting.`,
"warning",
);
}
}
const ok = await pi.setModel(model, { persist: false });
if (ok) {
const fallbackNote = modelId === effectiveModelConfig.primary
? ""
: ` (fallback from ${effectiveModelConfig.primary})`;
const phase = unitPhaseLabel(unitType);
ctx.ui.notify(`Model [${phase}]${routingTierLabel}: ${model.provider}/${model.id}${fallbackNote}`, "info");
break;
} else {
const nextModel = modelsToTry[modelsToTry.indexOf(modelId) + 1];
if (nextModel) {
if (verbose) ctx.ui.notify(`Failed to set model ${modelId}, trying ${nextModel}...`, "info");
} else {
ctx.ui.notify(`All preferred models unavailable for ${unitType}. Using default.`, "warning");
}
}
}
} else if (autoModeStartModel) {
// No model preference for this unit type — re-apply the model captured
// at auto-mode start to prevent bleed from shared global settings.json (#650).
const availableModels = ctx.modelRegistry.getAvailable();
const startModel = availableModels.find(
m => m.provider === autoModeStartModel.provider && m.id === autoModeStartModel.id,
);
if (startModel) {
const ok = await pi.setModel(startModel, { persist: false });
if (!ok) {
const byId = availableModels.find(m => m.id === autoModeStartModel.id);
if (byId) await pi.setModel(byId, { persist: false });
}
}
}
return { routing };
}
/**
* Resolve a model ID string to a model object from the available models list.
* Handles formats: "provider/model", "bare-id", "org/model-name" (OpenRouter).
*/
function resolveModelId<T extends { id: string; provider: string }>(
modelId: string,
availableModels: T[],
currentProvider: string | undefined,
): T | undefined {
const slashIdx = modelId.indexOf("/");
if (slashIdx !== -1) {
const maybeProvider = modelId.substring(0, slashIdx);
const id = modelId.substring(slashIdx + 1);
const knownProviders = new Set(availableModels.map(m => m.provider.toLowerCase()));
if (knownProviders.has(maybeProvider.toLowerCase())) {
const match = availableModels.find(
m => m.provider.toLowerCase() === maybeProvider.toLowerCase()
&& m.id.toLowerCase() === id.toLowerCase(),
);
if (match) return match;
}
// Try matching the full string as a model ID (OpenRouter-style)
const lower = modelId.toLowerCase();
return availableModels.find(
m => m.id.toLowerCase() === lower
|| `${m.provider}/${m.id}`.toLowerCase() === lower,
);
}
// Bare ID — prefer current provider, then first available
const exactProviderMatch = availableModels.find(
m => m.id === modelId && m.provider === currentProvider,
);
return exactProviderMatch ?? availableModels.find(m => m.id === modelId);
}

View file

@ -0,0 +1,74 @@
/**
* Pre-dispatch observability checks for auto-mode units.
* Validates plan/summary file quality and builds repair instructions
* for the agent to fix gaps before proceeding with the unit.
*/
import type { ExtensionContext } from "@gsd/pi-coding-agent";
import {
validatePlanBoundary,
validateExecuteBoundary,
validateCompleteBoundary,
formatValidationIssues,
} from "./observability-validator.js";
import type { ValidationIssue } from "./observability-validator.js";
export async function collectObservabilityWarnings(
ctx: ExtensionContext,
basePath: string,
unitType: string,
unitId: string,
): Promise<ValidationIssue[]> {
// Hook units have custom artifacts — skip standard observability checks
if (unitType.startsWith("hook/")) return [];
const parts = unitId.split("/");
const mid = parts[0];
const sid = parts[1];
const tid = parts[2];
if (!mid || !sid) return [];
let issues = [] as Awaited<ReturnType<typeof validatePlanBoundary>>;
if (unitType === "plan-slice") {
issues = await validatePlanBoundary(basePath, mid, sid);
} else if (unitType === "execute-task" && tid) {
issues = await validateExecuteBoundary(basePath, mid, sid, tid);
} else if (unitType === "complete-slice") {
issues = await validateCompleteBoundary(basePath, mid, sid);
}
if (issues.length > 0) {
ctx.ui.notify(
`Observability check (${unitType}) found ${issues.length} warning${issues.length === 1 ? "" : "s"}:\n${formatValidationIssues(issues)}`,
"warning",
);
}
return issues;
}
export function buildObservabilityRepairBlock(issues: ValidationIssue[]): string {
if (issues.length === 0) return "";
const items = issues.map(issue => {
const fileName = issue.file.split("/").pop() || issue.file;
let line = `- **${fileName}**: ${issue.message}`;
if (issue.suggestion) line += `${issue.suggestion}`;
return line;
});
return [
"",
"---",
"",
"## Pre-flight: Observability gaps to fix FIRST",
"",
"The following issues were detected in plan/summary files for this unit.",
"**Read each flagged file, apply the fix described, then proceed with the unit.**",
"",
...items,
"",
"---",
"",
].join("\n");
}

View file

@ -642,7 +642,6 @@ export async function buildPlanSlicePrompt(
const commitInstruction = commitDocsEnabled
? `Commit: \`docs(${sid}): add slice plan\``
: "Do not commit — planning docs are not tracked in git for this project.";
return loadPrompt("plan-slice", {
workingDirectory: base,
milestoneId: mid, sliceId: sid, sliceTitle: sTitle,

View file

@ -0,0 +1,262 @@
/**
* Timeout recovery logic for auto-mode units.
* Handles idle and hard timeout recovery with escalation, steering messages,
* and blocker placeholder generation.
*/
import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
import {
readUnitRuntimeRecord,
writeUnitRuntimeRecord,
formatExecuteTaskRecoveryStatus,
inspectExecuteTaskDurability,
} from "./unit-runtime.js";
import {
resolveExpectedArtifactPath,
diagnoseExpectedArtifact,
skipExecuteTask,
writeBlockerPlaceholder,
} from "./auto-recovery.js";
import { existsSync } from "node:fs";
export interface RecoveryContext {
basePath: string;
verbose: boolean;
currentUnitStartedAt: number;
unitRecoveryCount: Map<string, number>;
dispatchNextUnit: (ctx: ExtensionContext, pi: ExtensionAPI) => Promise<void>;
}
export async function recoverTimedOutUnit(
ctx: ExtensionContext,
pi: ExtensionAPI,
unitType: string,
unitId: string,
reason: "idle" | "hard",
rctx: RecoveryContext,
): Promise<"recovered" | "paused"> {
const { basePath, verbose, currentUnitStartedAt, unitRecoveryCount, dispatchNextUnit } = rctx;
const runtime = readUnitRuntimeRecord(basePath, unitType, unitId);
const recoveryAttempts = runtime?.recoveryAttempts ?? 0;
const maxRecoveryAttempts = reason === "idle" ? 2 : 1;
const recoveryKey = `${unitType}/${unitId}`;
const attemptNumber = (unitRecoveryCount.get(recoveryKey) ?? 0) + 1;
unitRecoveryCount.set(recoveryKey, attemptNumber);
if (attemptNumber > 1) {
// Exponential backoff: 2^(n-1) seconds, capped at 30s
const backoffMs = Math.min(1000 * Math.pow(2, attemptNumber - 2), 30000);
ctx.ui.notify(
`Recovery attempt ${attemptNumber} for ${unitType} ${unitId}. Waiting ${backoffMs / 1000}s before retry.`,
"info",
);
await new Promise(r => setTimeout(r, backoffMs));
}
if (unitType === "execute-task") {
const status = await inspectExecuteTaskDurability(basePath, unitId);
if (!status) return "paused";
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
recovery: status,
});
const durableComplete = status.summaryExists && status.taskChecked && status.nextActionAdvanced;
if (durableComplete) {
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "finalized",
recovery: status,
});
ctx.ui.notify(
`${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} already completed on disk. Continuing auto-mode. (attempt ${attemptNumber})`,
"info",
);
unitRecoveryCount.delete(recoveryKey);
await dispatchNextUnit(ctx, pi);
return "recovered";
}
if (recoveryAttempts < maxRecoveryAttempts) {
const isEscalation = recoveryAttempts > 0;
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "recovered",
recovery: status,
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
lastProgressAt: Date.now(),
progressCount: (runtime?.progressCount ?? 0) + 1,
lastProgressKind: reason === "idle" ? "idle-recovery-retry" : "hard-recovery-retry",
});
const steeringLines = isEscalation
? [
`**FINAL ${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — last chance before this task is skipped.**`,
`You are still executing ${unitType} ${unitId}.`,
`Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`,
`Current durability status: ${formatExecuteTaskRecoveryStatus(status)}.`,
"You MUST finish the durable output NOW, even if incomplete.",
"Write the task summary with whatever you have accomplished so far.",
"Mark the task [x] in the plan. Commit your work.",
"A partial summary is infinitely better than no summary.",
]
: [
`**${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — do not stop.**`,
`You are still executing ${unitType} ${unitId}.`,
`Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`,
`Current durability status: ${formatExecuteTaskRecoveryStatus(status)}.`,
"Do not keep exploring.",
"Immediately finish the required durable output for this unit.",
"If full completion is impossible, write the partial artifact/state needed for recovery and make the blocker explicit.",
];
pi.sendMessage(
{
customType: "gsd-auto-timeout-recovery",
display: verbose,
content: steeringLines.join("\n"),
},
{ triggerTurn: true, deliverAs: "steer" },
);
ctx.ui.notify(
`${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to finish durable output (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`,
"warning",
);
return "recovered";
}
// Retries exhausted — write missing durable artifacts and advance.
const diagnostic = formatExecuteTaskRecoveryStatus(status);
const [mid, sid, tid] = unitId.split("/");
const skipped = mid && sid && tid
? skipExecuteTask(basePath, mid, sid, tid, status, reason, maxRecoveryAttempts)
: false;
if (skipped) {
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "skipped",
recovery: status,
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
});
ctx.ui.notify(
`${unitType} ${unitId} skipped after ${maxRecoveryAttempts} recovery attempts (${diagnostic}). Blocker artifacts written. Advancing pipeline. (attempt ${attemptNumber})`,
"warning",
);
unitRecoveryCount.delete(recoveryKey);
await dispatchNextUnit(ctx, pi);
return "recovered";
}
// Fallback: couldn't write skip artifacts — pause as before.
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "paused",
recovery: status,
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
});
ctx.ui.notify(
`${reason === "idle" ? "Idle" : "Timeout"} recovery check for ${unitType} ${unitId}: ${diagnostic}`,
"warning",
);
return "paused";
}
const expected = diagnoseExpectedArtifact(unitType, unitId, basePath) ?? "required durable artifact";
// Check if the artifact already exists on disk — agent may have written it
// without signaling completion.
const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath);
if (artifactPath && existsSync(artifactPath)) {
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "finalized",
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
});
ctx.ui.notify(
`${reason === "idle" ? "Idle" : "Timeout"} recovery: ${unitType} ${unitId} artifact already exists on disk. Advancing. (attempt ${attemptNumber})`,
"info",
);
unitRecoveryCount.delete(recoveryKey);
await dispatchNextUnit(ctx, pi);
return "recovered";
}
if (recoveryAttempts < maxRecoveryAttempts) {
const isEscalation = recoveryAttempts > 0;
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "recovered",
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
lastProgressAt: Date.now(),
progressCount: (runtime?.progressCount ?? 0) + 1,
lastProgressKind: reason === "idle" ? "idle-recovery-retry" : "hard-recovery-retry",
});
const steeringLines = isEscalation
? [
`**FINAL ${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — last chance before skip.**`,
`You are still executing ${unitType} ${unitId}.`,
`Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts} — next failure skips this unit.`,
`Expected durable output: ${expected}.`,
"You MUST write the artifact file NOW, even if incomplete.",
"Write whatever you have — partial research, preliminary findings, best-effort analysis.",
"A partial artifact is infinitely better than no artifact.",
"If you are truly blocked, write the file with a BLOCKER section explaining why.",
]
: [
`**${reason === "idle" ? "IDLE" : "HARD TIMEOUT"} RECOVERY — stay in auto-mode.**`,
`You are still executing ${unitType} ${unitId}.`,
`Recovery attempt ${recoveryAttempts + 1} of ${maxRecoveryAttempts}.`,
`Expected durable output: ${expected}.`,
"Stop broad exploration.",
"Write the required artifact now.",
"If blocked, write the partial artifact and explicitly record the blocker instead of going silent.",
];
pi.sendMessage(
{
customType: "gsd-auto-timeout-recovery",
display: verbose,
content: steeringLines.join("\n"),
},
{ triggerTurn: true, deliverAs: "steer" },
);
ctx.ui.notify(
`${reason === "idle" ? "Idle" : "Timeout"} recovery: steering ${unitType} ${unitId} to produce ${expected} (attempt ${attemptNumber}, session ${recoveryAttempts + 1}/${maxRecoveryAttempts}).`,
"warning",
);
return "recovered";
}
// Retries exhausted — write a blocker placeholder and advance the pipeline
// instead of silently stalling.
const placeholder = writeBlockerPlaceholder(
unitType, unitId, basePath,
`${reason} recovery exhausted ${maxRecoveryAttempts} attempts without producing the artifact.`,
);
if (placeholder) {
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "skipped",
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
});
ctx.ui.notify(
`${unitType} ${unitId} skipped after ${maxRecoveryAttempts} recovery attempts. Blocker placeholder written to ${placeholder}. Advancing pipeline. (attempt ${attemptNumber})`,
"warning",
);
unitRecoveryCount.delete(recoveryKey);
await dispatchNextUnit(ctx, pi);
return "recovered";
}
// Fallback: couldn't resolve artifact path — pause as before.
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnitStartedAt, {
phase: "paused",
recoveryAttempts: recoveryAttempts + 1,
lastRecoveryReason: reason,
});
return "paused";
}

View file

@ -0,0 +1,54 @@
/**
* In-flight tool call tracking for auto-mode idle detection.
* Tracks which tool calls are currently executing so the idle watchdog
* can distinguish "waiting for tool completion" from "truly idle".
*/
const inFlightTools = new Map<string, number>();
/**
* Mark a tool execution as in-flight.
* Records start time so the idle watchdog can detect tools hung longer than the idle timeout.
*/
export function markToolStart(toolCallId: string, isActive: boolean): void {
if (!isActive) return;
inFlightTools.set(toolCallId, Date.now());
}
/**
* Mark a tool execution as completed.
*/
export function markToolEnd(toolCallId: string): void {
inFlightTools.delete(toolCallId);
}
/**
* Returns the age (ms) of the oldest currently in-flight tool, or 0 if none.
*/
export function getOldestInFlightToolAgeMs(): number {
if (inFlightTools.size === 0) return 0;
const oldestStart = Math.min(...inFlightTools.values());
return Date.now() - oldestStart;
}
/**
* Returns the number of currently in-flight tools.
*/
export function getInFlightToolCount(): number {
return inFlightTools.size;
}
/**
* Returns the start timestamp of the oldest in-flight tool, or undefined if none.
*/
export function getOldestInFlightToolStart(): number | undefined {
if (inFlightTools.size === 0) return undefined;
return Math.min(...inFlightTools.values());
}
/**
* Clear all in-flight tool tracking state.
*/
export function clearInFlightTools(): void {
inFlightTools.clear();
}

View file

@ -0,0 +1,46 @@
/**
* Unit closeout helper consolidates the repeated pattern of
* snapshotting metrics + saving activity log + extracting memories
* that appears 6+ times in auto.ts.
*/
import type { ExtensionContext } from "@gsd/pi-coding-agent";
import { snapshotUnitMetrics } from "./metrics.js";
import { saveActivityLog } from "./activity-log.js";
export interface CloseoutOptions {
promptCharCount?: number;
baselineCharCount?: number;
tier?: string;
modelDowngraded?: boolean;
continueHereFired?: boolean;
}
/**
* Snapshot metrics, save activity log, and fire-and-forget memory extraction
* for a completed unit. Returns the activity log file path (if any).
*/
export async function closeoutUnit(
ctx: ExtensionContext,
basePath: string,
unitType: string,
unitId: string,
startedAt: number,
opts?: CloseoutOptions,
): Promise<string | undefined> {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, unitType, unitId, startedAt, modelId, opts);
const activityFile = saveActivityLog(ctx, basePath, unitType, unitId);
if (activityFile) {
try {
const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import('./memory-extractor.js');
const llmCallFn = buildMemoryLLMCall(ctx);
if (llmCallFn) {
extractMemoriesFromUnit(activityFile, unitType, unitId, llmCallFn).catch(() => {});
}
} catch { /* non-fatal */ }
}
return activityFile ?? undefined;
}

View file

@ -0,0 +1,207 @@
/**
* Worktree project root state synchronization for auto-mode.
*
* When auto-mode runs inside a worktree, dispatch-critical state files
* (.gsd/ metadata) diverge between the worktree (where work happens)
* and the project root (where startAutoMode reads initial state on restart).
* Without syncing, restarting auto-mode reads stale state from the project
* root and re-dispatches already-completed units.
*
* Also contains resource staleness detection and stale worktree escape.
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync, cpSync, unlinkSync, readdirSync } from "node:fs";
import { join, sep as pathSep } from "node:path";
import { homedir } from "node:os";
// ─── Project Root → Worktree Sync ─────────────────────────────────────────
/**
* Sync milestone artifacts from project root INTO worktree before deriveState.
* Covers the case where the LLM wrote artifacts to the main repo filesystem
* (e.g. via absolute paths) but the worktree has stale data. Also deletes
* gsd.db in the worktree so it rebuilds from fresh disk state (#853).
* Non-fatal sync failure should never block dispatch.
*/
export function syncProjectRootToWorktree(projectRoot: string, worktreePath: string, milestoneId: string | null): void {
if (!worktreePath || !projectRoot || worktreePath === projectRoot) return;
if (!milestoneId) return;
const prGsd = join(projectRoot, ".gsd");
const wtGsd = join(worktreePath, ".gsd");
// Copy milestone directory from project root to worktree if the project root
// has newer artifacts (e.g. slices that don't exist in the worktree yet)
try {
const srcMilestone = join(prGsd, "milestones", milestoneId);
const dstMilestone = join(wtGsd, "milestones", milestoneId);
if (existsSync(srcMilestone)) {
mkdirSync(dstMilestone, { recursive: true });
cpSync(srcMilestone, dstMilestone, { recursive: true, force: false });
}
} catch { /* non-fatal */ }
// Delete worktree gsd.db so it rebuilds from the freshly synced files.
// Stale DB rows are the root cause of the infinite skip loop (#853).
try {
const wtDb = join(wtGsd, "gsd.db");
if (existsSync(wtDb)) {
unlinkSync(wtDb);
}
} catch { /* non-fatal */ }
}
// ─── Worktree → Project Root Sync ─────────────────────────────────────────
/**
* Sync dispatch-critical .gsd/ state files from worktree to project root.
* Only runs when inside an auto-worktree (worktreePath differs from projectRoot).
* Copies: STATE.md + active milestone directory (roadmap, slice plans, task summaries).
* Non-fatal sync failure should never block dispatch.
*/
export function syncStateToProjectRoot(worktreePath: string, projectRoot: string, milestoneId: string | null): void {
if (!worktreePath || !projectRoot || worktreePath === projectRoot) return;
if (!milestoneId) return;
const wtGsd = join(worktreePath, ".gsd");
const prGsd = join(projectRoot, ".gsd");
// 1. STATE.md — the quick-glance status used by initial deriveState()
try {
const src = join(wtGsd, "STATE.md");
const dst = join(prGsd, "STATE.md");
if (existsSync(src)) cpSync(src, dst, { force: true });
} catch { /* non-fatal */ }
// 2. Milestone directory — ROADMAP, slice PLANs, task summaries
// Copy the entire milestone .gsd subtree so deriveState reads current checkboxes
try {
const srcMilestone = join(wtGsd, "milestones", milestoneId);
const dstMilestone = join(prGsd, "milestones", milestoneId);
if (existsSync(srcMilestone)) {
mkdirSync(dstMilestone, { recursive: true });
cpSync(srcMilestone, dstMilestone, { recursive: true, force: true });
}
} catch { /* non-fatal */ }
// 3. Merge completed-units.json (set-union of both locations)
// Prevents already-completed units from being re-dispatched after crash/restart.
const srcKeysFile = join(wtGsd, "completed-units.json");
const dstKeysFile = join(prGsd, "completed-units.json");
if (existsSync(srcKeysFile)) {
try {
const srcKeys: string[] = JSON.parse(readFileSync(srcKeysFile, "utf8"));
let dstKeys: string[] = [];
if (existsSync(dstKeysFile)) {
try { dstKeys = JSON.parse(readFileSync(dstKeysFile, "utf8")); } catch { /* ignore corrupt dst */ }
}
const merged = [...new Set([...dstKeys, ...srcKeys])];
writeFileSync(dstKeysFile, JSON.stringify(merged, null, 2));
} catch { /* non-fatal */ }
}
// 4. Runtime records — unit dispatch state used by selfHealRuntimeRecords().
// Without this, a crash during a unit leaves the runtime record only in the
// worktree. If the next session resolves basePath before worktree re-entry,
// selfHeal can't find or clear the stale record (#769).
try {
const srcRuntime = join(wtGsd, "runtime", "units");
const dstRuntime = join(prGsd, "runtime", "units");
if (existsSync(srcRuntime)) {
mkdirSync(dstRuntime, { recursive: true });
cpSync(srcRuntime, dstRuntime, { recursive: true, force: true });
}
} catch { /* non-fatal */ }
}
// ─── Resource Staleness ───────────────────────────────────────────────────
/**
* Read the resource version (semver) from the managed-resources manifest.
* Uses gsdVersion instead of syncedAt so that launching a second session
* doesn't falsely trigger staleness (#804).
*/
export function readResourceVersion(): string | null {
const agentDir = process.env.GSD_CODING_AGENT_DIR || join(homedir(), ".gsd", "agent");
const manifestPath = join(agentDir, "managed-resources.json");
try {
const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
return typeof manifest?.gsdVersion === "string" ? manifest.gsdVersion : null;
} catch {
return null;
}
}
/**
* Check if managed resources have been updated since session start.
* Returns a warning message if stale, null otherwise.
*/
export function checkResourcesStale(versionOnStart: string | null): string | null {
if (versionOnStart === null) return null;
const current = readResourceVersion();
if (current === null) return null;
if (current !== versionOnStart) {
return "GSD resources were updated since this session started. Restart gsd to load the new code.";
}
return null;
}
// ─── Stale Worktree Escape ────────────────────────────────────────────────
/**
* Detect and escape a stale worktree cwd (#608).
*
* After milestone completion + merge, the worktree directory is removed but
* the process cwd may still point inside `.gsd/worktrees/<MID>/`.
* When a new session starts, `process.cwd()` is passed as `base` to startAuto
* and all subsequent writes land in the wrong directory. This function detects
* that scenario and chdir back to the project root.
*
* Returns the corrected base path.
*/
export function escapeStaleWorktree(base: string): string {
const marker = `${pathSep}.gsd${pathSep}worktrees${pathSep}`;
const idx = base.indexOf(marker);
if (idx === -1) return base;
// base is inside .gsd/worktrees/<something> — extract the project root
const projectRoot = base.slice(0, idx);
try {
process.chdir(projectRoot);
} catch {
// If chdir fails, return the original — caller will handle errors downstream
return base;
}
return projectRoot;
}
/**
* Clean stale runtime unit files for completed milestones.
*
* After restart, stale runtime/units/*.json from prior milestones can
* cause deriveState to resume the wrong milestone (#887). Removes files
* for milestones that have a SUMMARY (fully complete).
*/
export function cleanStaleRuntimeUnits(
gsdRootPath: string,
hasMilestoneSummary: (mid: string) => boolean,
): number {
const runtimeUnitsDir = join(gsdRootPath, "runtime", "units");
if (!existsSync(runtimeUnitsDir)) return 0;
let cleaned = 0;
try {
for (const file of readdirSync(runtimeUnitsDir)) {
if (!file.endsWith(".json")) continue;
const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/);
if (!midMatch) continue;
if (hasMilestoneSummary(midMatch[1])) {
try {
unlinkSync(join(runtimeUnitsDir, file));
cleaned++;
} catch { /* non-fatal */ }
}
}
} catch { /* non-fatal */ }
return cleaned;
}

File diff suppressed because it is too large Load diff

View file

@ -625,7 +625,7 @@ function showHelp(ctx: ExtensionCommandContext): void {
"",
"MAINTENANCE",
" /gsd doctor Diagnose and repair .gsd/ state [audit|fix|heal] [scope]",
" /gsd export Export milestone/slice results [--json|--markdown]",
" /gsd export Export milestone/slice results [--json|--markdown|--html]",
" /gsd cleanup Remove merged branches or snapshots [branches|snapshots]",
" /gsd migrate Upgrade .gsd/ structures to new format",
" /gsd remote Control remote auto-mode [slack|discord|status|disconnect]",

View file

@ -173,14 +173,19 @@ export async function preDispatchHealthGate(basePath: string): Promise<PreDispat
}
// ── STATE.md existence check ──
// If STATE.md is missing, rebuild it now so the next unit has accurate
// context. Non-blocking — if the rebuild throws, dispatch continues anyway.
// If STATE.md is missing, attempt to rebuild it for the next unit's context.
// Non-blocking — fresh worktrees won't have it until the first unit completes (#889).
try {
const stateFile = resolveGsdRootFile(basePath, "STATE");
const milestonesDir = join(gsdRoot(basePath), "milestones");
if (existsSync(milestonesDir) && !existsSync(stateFile)) {
await rebuildState(basePath);
fixesApplied.push("rebuilt missing STATE.md before dispatch");
try {
await rebuildState(basePath);
fixesApplied.push("rebuilt missing STATE.md before dispatch");
} catch {
// Rebuild failed — non-blocking, dispatch continues
fixesApplied.push("STATE.md missing — will rebuild after first unit completes");
}
}
} catch {
// Non-fatal — dispatch continues without STATE.md if rebuild fails

File diff suppressed because it is too large Load diff

View file

@ -93,9 +93,57 @@ export function writeExportFile(
}
/**
* Export session/milestone data to JSON or markdown.
* Export session/milestone data to JSON, markdown, or HTML.
*/
export async function handleExport(args: string, ctx: ExtensionCommandContext, basePath: string): Promise<void> {
// HTML report — delegates to the full visualizer-data pipeline
if (args.includes("--html")) {
try {
const { loadVisualizerData } = await import("./visualizer-data.js");
const { generateHtmlReport } = await import("./export-html.js");
const { writeReportSnapshot, reportsDir } = await import("./reports.js");
const { basename: bn } = await import("node:path");
const data = await loadVisualizerData(basePath);
const projName = basename(basePath);
const gsdVersion = process.env.GSD_VERSION ?? "0.0.0";
const doneSlices = data.milestones.reduce((s, m) => s + m.slices.filter(sl => sl.done).length, 0);
const totalSlices = data.milestones.reduce((s, m) => s + m.slices.length, 0);
const outPath = writeReportSnapshot({
basePath,
html: generateHtmlReport(data, {
projectName: projName,
projectPath: basePath,
gsdVersion,
indexRelPath: "index.html",
}),
milestoneId: data.milestones.find(m => m.status === "active")?.id ?? "manual",
milestoneTitle: data.milestones.find(m => m.status === "active")?.title ?? "",
kind: "manual",
projectName: projName,
projectPath: basePath,
gsdVersion,
totalCost: data.totals?.cost ?? 0,
totalTokens: data.totals?.tokens.total ?? 0,
totalDuration: data.totals?.duration ?? 0,
doneSlices,
totalSlices,
doneMilestones: data.milestones.filter(m => m.status === "complete").length,
totalMilestones: data.milestones.length,
phase: data.phase,
});
ctx.ui.notify(
`HTML report saved: .gsd/reports/${bn(outPath)}\nBrowse all reports: .gsd/reports/index.html`,
"success",
);
} catch (err) {
ctx.ui.notify(
`HTML export failed: ${err instanceof Error ? err.message : String(err)}`,
"error",
);
}
return;
}
const format = args.includes("--json") ? "json" : "markdown";
const ledger = getLedger();

View file

@ -60,6 +60,7 @@ import { shortcutDesc } from "../shared/terminal.js";
import { Text } from "@gsd/pi-tui";
import { pauseAutoForProviderError } from "./provider-error-pause.js";
import { toPosixPath } from "../shared/path-display.js";
import { isParallelActive, shutdownParallel } from "./parallel-orchestrator.js";
// ── Agent Instructions ────────────────────────────────────────────────────
// Lightweight "always follow" files injected into every GSD agent session.
@ -856,6 +857,12 @@ export default function (pi: ExtensionAPI) {
// ── session_shutdown: save activity log on Ctrl+C / SIGTERM ─────────────
pi.on("session_shutdown", async (_event, ctx: ExtensionContext) => {
if (isParallelActive()) {
try {
await shutdownParallel(process.cwd());
} catch { /* best-effort */ }
}
if (!isAutoActive() && !isAutoPaused()) return;
// Save the current session — the lock file stays on disk

View file

@ -298,6 +298,27 @@ export function validateTaskSummaryContent(file: string, content: string): Valid
});
}
const evidence = getSection(content, "Verification Evidence", 2);
if (!evidence) {
issues.push({
severity: "warning",
scope: "task-summary",
file,
ruleId: "evidence_block_missing",
message: "Task summary is missing `## Verification Evidence`.",
suggestion: "Add a verification evidence table showing gate check results (command, exit code, verdict, duration).",
});
} else if (sectionLooksPlaceholderOnly(evidence)) {
issues.push({
severity: "warning",
scope: "task-summary",
file,
ruleId: "evidence_block_placeholder",
message: "Task summary verification evidence section still looks like placeholder text.",
suggestion: "Replace placeholders with actual gate results or note that no verification commands were discovered.",
});
}
return issues;
}

View file

@ -8,7 +8,14 @@
*/
import { spawn, type ChildProcess } from "node:child_process";
import { existsSync } from "node:fs";
import {
existsSync,
writeFileSync,
readFileSync,
renameSync,
unlinkSync,
mkdirSync,
} from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { gsdRoot } from "./paths.js";
@ -58,6 +65,142 @@ export interface OrchestratorState {
let state: OrchestratorState | null = null;
// ─── Persistence ──────────────────────────────────────────────────────────
const ORCHESTRATOR_STATE_FILE = "orchestrator.json";
const TMP_SUFFIX = ".tmp";
export interface PersistedState {
active: boolean;
workers: Array<{
milestoneId: string;
title: string;
pid: number;
worktreePath: string;
startedAt: number;
state: "running" | "paused" | "stopped" | "error";
completedUnits: number;
cost: number;
}>;
totalCost: number;
startedAt: number;
configSnapshot: { max_workers: number; budget_ceiling?: number };
}
function stateFilePath(basePath: string): string {
return join(gsdRoot(basePath), ORCHESTRATOR_STATE_FILE);
}
/**
* Persist the current orchestrator state to .gsd/orchestrator.json.
* Uses atomic write (tmp + rename) to prevent partial reads.
*/
export function persistState(basePath: string): void {
if (!state) return;
try {
const dir = gsdRoot(basePath);
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
const persisted: PersistedState = {
active: state.active,
workers: [...state.workers.values()].map((w) => ({
milestoneId: w.milestoneId,
title: w.title,
pid: w.pid,
worktreePath: w.worktreePath,
startedAt: w.startedAt,
state: w.state,
completedUnits: w.completedUnits,
cost: w.cost,
})),
totalCost: state.totalCost,
startedAt: state.startedAt,
configSnapshot: {
max_workers: state.config.max_workers,
budget_ceiling: state.config.budget_ceiling,
},
};
const dest = stateFilePath(basePath);
const tmp = dest + TMP_SUFFIX;
writeFileSync(tmp, JSON.stringify(persisted, null, 2), "utf-8");
renameSync(tmp, dest);
} catch { /* non-fatal */ }
}
/**
* Remove the persisted state file.
*/
function removeStateFile(basePath: string): void {
try {
const p = stateFilePath(basePath);
if (existsSync(p)) unlinkSync(p);
} catch { /* non-fatal */ }
}
function isPidAlive(pid: number): boolean {
if (!Number.isInteger(pid) || pid <= 0) return false;
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
/**
* Restore orchestrator state from .gsd/orchestrator.json.
* Checks PID liveness for each worker:
* - Living PID state "running", process stays null (no handle)
* - Dead PID removed from restored state
* Returns null if no state file exists or no workers survive.
*/
export function restoreState(basePath: string): PersistedState | null {
try {
const p = stateFilePath(basePath);
if (!existsSync(p)) return null;
const raw = readFileSync(p, "utf-8");
const persisted = JSON.parse(raw) as PersistedState;
// Filter to only workers with living PIDs
persisted.workers = persisted.workers.filter((w) => {
if (w.state === "stopped" || w.state === "error") return false;
return isPidAlive(w.pid);
});
if (persisted.workers.length === 0) {
// No surviving workers — clean up and return null
removeStateFile(basePath);
return null;
}
return persisted;
} catch {
return null;
}
}
async function waitForWorkerExit(worker: WorkerInfo, timeoutMs: number): Promise<boolean> {
if (worker.process) {
await new Promise<void>((resolve) => {
const done = () => resolve();
const timer = setTimeout(done, timeoutMs);
worker.process!.once("exit", () => {
clearTimeout(timer);
resolve();
});
});
return worker.process === null || !isPidAlive(worker.pid);
}
const startedAt = Date.now();
while (Date.now() - startedAt < timeoutMs) {
if (!isPidAlive(worker.pid)) return true;
await new Promise((resolve) => setTimeout(resolve, 50));
}
return !isPidAlive(worker.pid);
}
// ─── Accessors ─────────────────────────────────────────────────────────────
/** Returns true if the orchestrator is active and has been initialized. */
@ -81,12 +224,26 @@ export function getWorkerStatuses(): WorkerInfo[] {
/**
* Analyze eligibility and prepare for parallel start.
* Returns the candidates report without actually starting workers.
* Also detects orphaned sessions from prior crashes.
*/
export async function prepareParallelStart(
basePath: string,
_prefs: GSDPreferences | undefined,
): Promise<ParallelCandidates> {
return analyzeParallelEligibility(basePath);
): Promise<ParallelCandidates & { orphans?: Array<{ milestoneId: string; pid: number; alive: boolean }> }> {
// Detect orphaned sessions before eligibility analysis
const sessions = readAllSessionStatuses(basePath);
const orphans: Array<{ milestoneId: string; pid: number; alive: boolean }> = [];
for (const session of sessions) {
const alive = isPidAlive(session.pid);
orphans.push({ milestoneId: session.milestoneId, pid: session.pid, alive });
if (!alive) {
// Clean up dead session
removeSessionStatus(basePath, session.milestoneId);
}
}
const candidates = await analyzeParallelEligibility(basePath);
return orphans.length > 0 ? { ...candidates, orphans } : candidates;
}
// ─── Start ─────────────────────────────────────────────────────────────────
@ -106,6 +263,36 @@ export async function startParallel(
}
const config = resolveParallelConfig(prefs);
// Try to restore from a previous crash
const restored = restoreState(basePath);
if (restored && restored.workers.length > 0) {
// Adopt surviving workers instead of starting new ones
state = {
active: true,
workers: new Map(),
config,
totalCost: restored.totalCost,
startedAt: restored.startedAt,
};
const adopted: string[] = [];
for (const w of restored.workers) {
state.workers.set(w.milestoneId, {
milestoneId: w.milestoneId,
title: w.title,
pid: w.pid,
process: null, // no handle for adopted workers
worktreePath: w.worktreePath,
startedAt: w.startedAt,
state: "running",
completedUnits: w.completedUnits,
cost: w.cost,
});
adopted.push(w.milestoneId);
}
return { started: adopted, errors: [] };
}
const now = Date.now();
// Initialize orchestrator state
@ -190,6 +377,9 @@ export async function startParallel(
state.active = false;
}
// Persist state for crash recovery
persistState(basePath);
return { started, errors };
}
@ -485,12 +675,24 @@ export async function stopParallel(
try {
if (worker.process) {
worker.process.kill("SIGTERM");
} else {
} else if (worker.pid !== process.pid) {
process.kill(worker.pid, "SIGTERM");
}
} catch { /* process may already be dead */ }
}
const exitedAfterTerm = await waitForWorkerExit(worker, 750);
if (!exitedAfterTerm && worker.pid > 0) {
try {
if (worker.process) {
worker.process.kill("SIGKILL");
} else if (worker.pid !== process.pid) {
process.kill(worker.pid, "SIGKILL");
}
} catch { /* process may already be dead */ }
await waitForWorkerExit(worker, 250);
}
// Update in-memory state
worker.state = "stopped";
worker.process = null;
@ -503,6 +705,15 @@ export async function stopParallel(
if (!milestoneId) {
state.active = false;
}
// Persist final state and clean up state file
removeStateFile(basePath);
}
export async function shutdownParallel(basePath: string): Promise<void> {
if (!state) return;
await stopParallel(basePath);
resetOrchestrator();
}
// ─── Pause / Resume ────────────────────────────────────────────────────────
@ -589,6 +800,9 @@ export function refreshWorkerStatuses(basePath: string): void {
for (const worker of state.workers.values()) {
state.totalCost += worker.cost;
}
// Persist updated state for crash recovery
persistState(basePath);
}
// ─── Budget ────────────────────────────────────────────────────────────────

View file

@ -75,7 +75,11 @@ const KNOWN_PREFERENCE_KEYS = new Set<string>([
"token_profile",
"phases",
"auto_visualize",
"auto_report",
"parallel",
"verification_commands",
"verification_auto_fix",
"verification_max_retries",
]);
export interface GSDSkillRule {
@ -172,7 +176,12 @@ export interface GSDPreferences {
token_profile?: TokenProfile;
phases?: PhaseSkipPreferences;
auto_visualize?: boolean;
/** Generate HTML report snapshot after each milestone completion. Default: true. Set false to disable. */
auto_report?: boolean;
parallel?: import("./types.js").ParallelConfig;
verification_commands?: string[];
verification_auto_fix?: boolean;
verification_max_retries?: number;
}
export interface LoadedGSDPreferences {
@ -327,7 +336,7 @@ function resolveSkillReference(ref: string, cwd: string): SkillResolution {
try {
const entries = readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory()) continue;
if (!entry.isDirectory() && !entry.isSymbolicLink()) continue;
if (entry.name === expanded) {
const skillFile = join(dir, entry.name, "SKILL.md");
if (existsSync(skillFile)) {
@ -773,6 +782,9 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
parallel: (base.parallel || override.parallel)
? { ...(base.parallel ?? {}), ...(override.parallel ?? {}) } as import("./types.js").ParallelConfig
: undefined,
verification_commands: mergeStringLists(base.verification_commands, override.verification_commands),
verification_auto_fix: override.verification_auto_fix ?? base.verification_auto_fix,
verification_max_retries: override.verification_max_retries ?? base.verification_max_retries,
};
}
@ -1205,6 +1217,39 @@ export function validatePreferences(preferences: GSDPreferences): {
}
}
// ─── Verification Preferences ───────────────────────────────────────────
if (preferences.verification_commands !== undefined) {
if (Array.isArray(preferences.verification_commands)) {
const allStrings = preferences.verification_commands.every(
(item: unknown) => typeof item === "string",
);
if (allStrings) {
validated.verification_commands = preferences.verification_commands;
} else {
errors.push("verification_commands must be an array of strings");
}
} else {
errors.push("verification_commands must be an array of strings");
}
}
if (preferences.verification_auto_fix !== undefined) {
if (typeof preferences.verification_auto_fix === "boolean") {
validated.verification_auto_fix = preferences.verification_auto_fix;
} else {
errors.push("verification_auto_fix must be a boolean");
}
}
if (preferences.verification_max_retries !== undefined) {
const raw = preferences.verification_max_retries;
if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
validated.verification_max_retries = Math.floor(raw);
} else {
errors.push("verification_max_retries must be a non-negative number");
}
}
// ─── Git Preferences ───────────────────────────────────────────────────
if (preferences.git && typeof preferences.git === "object") {
const git: Record<string, unknown> = {};

View file

@ -38,15 +38,16 @@ Then:
- Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
8. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
8. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
9. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
- exercise the real flow in the browser
- prefer `browser_batch` when the next few actions are obvious and sequential
- prefer `browser_assert` for explicit pass/fail verification of the intended outcome
- use `browser_diff` when an action's effect is ambiguous
- use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI
- record verification in terms of explicit checks passed/failed, not only prose interpretation
9. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
10. **If execution is running long or verification fails:**
10. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
11. **If execution is running long or verification fails:**
**Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step.

View file

@ -154,7 +154,7 @@ Templates showing the expected format for each artifact type are in:
**External facts:** Use `search-the-web` + `fetch_page`, or `search_and_read` for one-call extraction. Use `freshness` for recency. Never state current facts from training data without verification.
**Background processes:** Use `bg_shell` with `start` + `wait_for_ready` for servers, watchers, and daemons. Never use `bash` with `&` or `nohup` to background a process — the `bash` tool waits for stdout to close, so backgrounded children that inherit the file descriptors cause it to hang indefinitely. Never poll with `sleep`/retry loops — `wait_for_ready` exists for this. For status checks, use `digest` (~30 tokens), not `output` (~2000 tokens). Use `highlights` (~100 tokens) when you need significant lines only. Use `output` only when actively debugging.
**Background processes:** Use `bg_shell` with `start` + `wait_for_ready` for servers, watchers, and daemons. Never use `bash` with `&` or `nohup` to background a process — the `bash` tool waits for stdout to close, so backgrounded children that inherit the file descriptors cause it to hang indefinitely. Never poll with `sleep`/retry loops — `wait_for_ready` exists for this. For status checks, use `digest` (~30 tokens), not `output` (~2000 tokens). Use `highlights` (~100 tokens) when you need significant lines only. Use `output` only when actively debugging. Background processes are session-scoped by default; set `persist_across_sessions:true` only when you intentionally need them to survive a fresh session.
**One-shot commands:** Use `async_bash` for builds, tests, and installs. The result is pushed to you when the command exits — no polling needed. Use `await_job` to block on a specific job.

View file

@ -0,0 +1,510 @@
/**
* GSD Reports Registry
*
* Manages .gsd/reports/ the persistent progression log of HTML snapshots.
*
* Layout:
* .gsd/reports/
* reports.json lightweight metadata index (never re-parses HTML)
* index.html auto-regenerated on every new snapshot
* M001-20260101T120000.html per-milestone snapshot
* final-20260201T090000.html full-project final snapshot
*
* Auto-triggered: after each milestone completion (when auto_report: true).
* Manual: /gsd export --html
*/
import { writeFileSync, readFileSync, mkdirSync, existsSync } from 'node:fs';
import { join, basename } from 'node:path';
import { gsdRoot } from './paths.js';
import { formatCost, formatTokenCount } from './metrics.js';
import { formatDuration } from './history.js';
// ─── Types ────────────────────────────────────────────────────────────────────
export interface ReportEntry {
/** Filename relative to the reports/ dir, e.g. "M001-20260101T120000.html" */
filename: string;
/** ISO timestamp when this report was generated */
generatedAt: string;
/** Milestone ID this snapshot covers, or "final" for a full-project snapshot */
milestoneId: string | 'final';
/** Milestone title at snapshot time */
milestoneTitle: string;
/** Human-readable label shown in the index */
label: string;
/** Snapshot kind */
kind: 'milestone' | 'manual' | 'final';
// Metrics at snapshot time — for the index progression view
totalCost: number;
totalTokens: number;
totalDuration: number;
doneSlices: number;
totalSlices: number;
doneMilestones: number;
totalMilestones: number;
phase: string;
}
export interface ReportsIndex {
version: 1;
projectName: string;
projectPath: string;
gsdVersion: string;
entries: ReportEntry[];
}
// ─── Paths ────────────────────────────────────────────────────────────────────
export function reportsDir(basePath: string): string {
return join(gsdRoot(basePath), 'reports');
}
function reportsIndexPath(basePath: string): string {
return join(reportsDir(basePath), 'reports.json');
}
function reportsHtmlIndexPath(basePath: string): string {
return join(reportsDir(basePath), 'index.html');
}
// ─── Registry ─────────────────────────────────────────────────────────────────
export function loadReportsIndex(basePath: string): ReportsIndex | null {
const p = reportsIndexPath(basePath);
if (!existsSync(p)) return null;
try {
return JSON.parse(readFileSync(p, 'utf-8')) as ReportsIndex;
} catch {
return null;
}
}
function saveReportsIndex(basePath: string, index: ReportsIndex): void {
const dir = reportsDir(basePath);
mkdirSync(dir, { recursive: true });
writeFileSync(reportsIndexPath(basePath), JSON.stringify(index, null, 2) + '\n', 'utf-8');
}
// ─── Write a report snapshot ──────────────────────────────────────────────────
export interface WriteReportSnapshotArgs {
basePath: string;
html: string;
milestoneId: string | 'final';
milestoneTitle: string;
kind: 'milestone' | 'manual' | 'final';
projectName: string;
projectPath: string;
gsdVersion: string;
// metrics
totalCost: number;
totalTokens: number;
totalDuration: number;
doneSlices: number;
totalSlices: number;
doneMilestones: number;
totalMilestones: number;
phase: string;
}
/**
* Write a report snapshot to .gsd/reports/, update reports.json, regenerate index.html.
* Returns the path of the written report file.
*/
export function writeReportSnapshot(args: WriteReportSnapshotArgs): string {
const dir = reportsDir(args.basePath);
mkdirSync(dir, { recursive: true });
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
const prefix = args.milestoneId === 'final' ? 'final' : args.milestoneId;
const filename = `${prefix}-${timestamp}.html`;
const filePath = join(dir, filename);
writeFileSync(filePath, args.html, 'utf-8');
// Load or init registry
const existing = loadReportsIndex(args.basePath);
const index: ReportsIndex = existing ?? {
version: 1,
projectName: args.projectName,
projectPath: args.projectPath,
gsdVersion: args.gsdVersion,
entries: [],
};
// Keep metadata fresh
index.projectName = args.projectName;
index.projectPath = args.projectPath;
index.gsdVersion = args.gsdVersion;
const label = args.milestoneId === 'final'
? 'Final Report'
: `${args.milestoneId}: ${args.milestoneTitle}`;
const entry: ReportEntry = {
filename,
generatedAt: new Date().toISOString(),
milestoneId: args.milestoneId,
milestoneTitle: args.milestoneTitle,
label,
kind: args.kind,
totalCost: args.totalCost,
totalTokens: args.totalTokens,
totalDuration: args.totalDuration,
doneSlices: args.doneSlices,
totalSlices: args.totalSlices,
doneMilestones: args.doneMilestones,
totalMilestones: args.totalMilestones,
phase: args.phase,
};
index.entries.push(entry);
saveReportsIndex(args.basePath, index);
regenerateHtmlIndex(args.basePath, index);
return filePath;
}
// ─── HTML Index Generator ─────────────────────────────────────────────────────
export function regenerateHtmlIndex(basePath: string, index: ReportsIndex): void {
const html = buildIndexHtml(index);
writeFileSync(reportsHtmlIndexPath(basePath), html, 'utf-8');
}
function buildIndexHtml(index: ReportsIndex): string {
const { projectName, projectPath, gsdVersion, entries } = index;
const generated = new Date().toISOString();
// Sort oldest → newest for the progression timeline
const sorted = [...entries].sort(
(a, b) => new Date(a.generatedAt).getTime() - new Date(b.generatedAt).getTime()
);
const latestEntry = sorted[sorted.length - 1];
const overallPct = latestEntry
? (latestEntry.totalSlices > 0
? Math.round((latestEntry.doneSlices / latestEntry.totalSlices) * 100)
: 0)
: 0;
// TOC: group by milestone
const milestoneGroups = new Map<string, ReportEntry[]>();
for (const e of sorted) {
const key = e.milestoneId;
const arr = milestoneGroups.get(key) ?? [];
arr.push(e);
milestoneGroups.set(key, arr);
}
const tocHtml = [...milestoneGroups.entries()].map(([mid, group]) => {
const links = group.map(e =>
`<li><a href="${esc(e.filename)}">${formatDateShort(e.generatedAt)}</a> <span class="toc-kind toc-${e.kind}">${e.kind}</span></li>`
).join('');
return `
<div class="toc-group">
<div class="toc-group-label">${esc(mid === 'final' ? 'Final' : mid)}</div>
<ul>${links}</ul>
</div>`;
}).join('');
// Progression cards
const cardHtml = sorted.map((e, i) => {
const pct = e.totalSlices > 0 ? Math.round((e.doneSlices / e.totalSlices) * 100) : 0;
const isLatest = i === sorted.length - 1;
// Delta vs previous
let deltaHtml = '';
if (i > 0) {
const prev = sorted[i - 1];
const dCost = e.totalCost - prev.totalCost;
const dSlices = e.doneSlices - prev.doneSlices;
const dMillestones = e.doneMilestones - prev.doneMilestones;
const parts: string[] = [];
if (dCost > 0) parts.push(`+${formatCost(dCost)}`);
if (dSlices > 0) parts.push(`+${dSlices} slice${dSlices !== 1 ? 's' : ''}`);
if (dMillestones > 0) parts.push(`+${dMillestones} milestone${dMillestones !== 1 ? 's' : ''}`);
if (parts.length > 0) {
deltaHtml = `<div class="card-delta">${parts.map(p => `<span>${esc(p)}</span>`).join('')}</div>`;
}
}
return `
<a class="report-card${isLatest ? ' card-latest' : ''}" href="${esc(e.filename)}">
<div class="card-top">
<span class="card-label">${esc(e.label)}</span>
<span class="card-kind card-kind-${e.kind}">${e.kind}</span>
</div>
<div class="card-date">${formatDateShort(e.generatedAt)}</div>
<div class="card-progress">
<div class="card-bar-track">
<div class="card-bar-fill" style="width:${pct}%"></div>
</div>
<span class="card-pct">${pct}%</span>
</div>
<div class="card-stats">
<span>${esc(formatCost(e.totalCost))}</span>
<span>${esc(formatTokenCount(e.totalTokens))}</span>
<span>${esc(formatDuration(e.totalDuration))}</span>
<span>${e.doneSlices}/${e.totalSlices} slices</span>
</div>
${deltaHtml}
${isLatest ? '<div class="card-latest-badge">Latest</div>' : ''}
</a>`;
}).join('');
// Cost progression mini-chart (inline SVG sparkline)
const sparklineSvg = sorted.length > 1 ? buildCostSparkline(sorted) : '';
// Summary of latest state
const summaryHtml = latestEntry ? `
<div class="idx-summary">
<div class="idx-stat"><span class="idx-val">${formatCost(latestEntry.totalCost)}</span><span class="idx-lbl">Total Cost</span></div>
<div class="idx-stat"><span class="idx-val">${formatTokenCount(latestEntry.totalTokens)}</span><span class="idx-lbl">Total Tokens</span></div>
<div class="idx-stat"><span class="idx-val">${formatDuration(latestEntry.totalDuration)}</span><span class="idx-lbl">Duration</span></div>
<div class="idx-stat"><span class="idx-val">${latestEntry.doneSlices}/${latestEntry.totalSlices}</span><span class="idx-lbl">Slices</span></div>
<div class="idx-stat"><span class="idx-val">${latestEntry.doneMilestones}/${latestEntry.totalMilestones}</span><span class="idx-lbl">Milestones</span></div>
<div class="idx-stat"><span class="idx-val">${entries.length}</span><span class="idx-lbl">Reports</span></div>
</div>
<div class="idx-progress">
<div class="idx-bar-track"><div class="idx-bar-fill" style="width:${overallPct}%"></div></div>
<span class="idx-pct">${overallPct}% complete</span>
</div>` : '<p class="empty">No reports generated yet.</p>';
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GSD Reports ${esc(projectName)}</title>
<style>${INDEX_CSS}</style>
</head>
<body>
<header>
<div class="hdr-inner">
<div class="branding">
<span class="logo">GSD</span>
<span class="ver">v${esc(gsdVersion)}</span>
</div>
<div class="hdr-meta">
<h1>${esc(projectName)} <span class="hdr-subtitle">Reports</span></h1>
<span class="hdr-path">${esc(projectPath)}</span>
</div>
<div class="hdr-right">
<span class="gen-lbl">Updated</span>
<span class="gen">${formatDateShort(generated)}</span>
</div>
</div>
</header>
<div class="layout">
<!-- Sidebar TOC -->
<aside class="sidebar">
<div class="sidebar-title">Reports</div>
${sorted.length > 0 ? tocHtml : '<p class="empty">No reports yet.</p>'}
</aside>
<!-- Main content -->
<main>
<section class="idx-overview">
<h2>Project Overview</h2>
${summaryHtml}
${sparklineSvg ? `<div class="sparkline-wrap"><h3>Cost Progression</h3>${sparklineSvg}</div>` : ''}
</section>
<section class="idx-cards">
<h2>Progression <span class="sec-count">${entries.length}</span></h2>
${sorted.length > 0
? `<div class="cards-grid">${cardHtml}</div>`
: '<p class="empty">No reports generated yet. Run <code>/gsd export --html</code> or enable <code>auto_report: true</code>.</p>'}
</section>
</main>
</div>
<footer>
<div class="ftr-inner">
<span class="ftr-brand">GSD v${esc(gsdVersion)}</span>
<span class="ftr-sep"></span>
<span>${esc(projectName)}</span>
<span class="ftr-sep"></span>
<span>${esc(projectPath)}</span>
<span class="ftr-sep"></span>
<span>Updated ${formatDateShort(generated)}</span>
</div>
</footer>
</body>
</html>`;
}
// ─── Cost sparkline (inline SVG) ──────────────────────────────────────────────
function buildCostSparkline(entries: ReportEntry[]): string {
const costs = entries.map(e => e.totalCost);
const maxCost = Math.max(...costs, 0.001);
const W = 600, H = 60, PAD = 12;
const xStep = entries.length > 1 ? (W - PAD * 2) / (entries.length - 1) : W - PAD * 2;
const points = costs.map((c, i) => {
const x = PAD + i * xStep;
const y = PAD + (1 - c / maxCost) * (H - PAD * 2);
return `${x.toFixed(1)},${y.toFixed(1)}`;
}).join(' ');
const dots = costs.map((c, i) => {
const x = PAD + i * xStep;
const y = PAD + (1 - c / maxCost) * (H - PAD * 2);
return `<circle cx="${x.toFixed(1)}" cy="${y.toFixed(1)}" r="3" class="spark-dot">
<title>${esc(entries[i].label)} ${formatCost(c)}</title>
</circle>`;
}).join('');
// Labels at start and end
const startLabel = formatCost(costs[0]);
const endLabel = formatCost(costs[costs.length - 1]);
return `
<div class="sparkline">
<svg viewBox="0 0 ${W} ${H}" width="${W}" height="${H}" class="spark-svg">
<polyline points="${esc(points)}" class="spark-line" fill="none"/>
${dots}
<text x="${PAD}" y="${H - 2}" class="spark-lbl">${esc(startLabel)}</text>
<text x="${W - PAD}" y="${H - 2}" text-anchor="end" class="spark-lbl">${esc(endLabel)}</text>
</svg>
<div class="spark-axis">
${entries.map((e, i) => {
const x = (PAD + i * xStep) / W * 100;
return `<span class="spark-tick" style="left:${x.toFixed(1)}%" title="${esc(e.generatedAt)}">${esc(e.milestoneId === 'final' ? 'final' : e.milestoneId)}</span>`;
}).join('')}
</div>
</div>`;
}
// ─── Helpers ──────────────────────────────────────────────────────────────────
function formatDateShort(iso: string): string {
try {
const d = new Date(iso);
return d.toLocaleString('en-US', { month: 'short', day: 'numeric', year: 'numeric', hour: '2-digit', minute: '2-digit' });
} catch { return iso; }
}
function esc(s: string | number | undefined | null): string {
if (s == null) return '';
return String(s).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;').replace(/'/g, '&#39;');
}
// ─── Index CSS ────────────────────────────────────────────────────────────────
const INDEX_CSS = `
*,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
:root{
--bg-0:#0f1115;--bg-1:#16181d;--bg-2:#1e2028;--bg-3:#272a33;
--border-1:#2b2e38;--border-2:#3b3f4c;
--text-0:#ededef;--text-1:#a1a1aa;--text-2:#71717a;
--accent:#5e6ad2;--accent-subtle:rgba(94,106,210,.12);
--font:'Inter',-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;
--mono:'JetBrains Mono','Fira Code',ui-monospace,monospace;
}
html{font-size:13px}
body{background:var(--bg-0);color:var(--text-0);font-family:var(--font);line-height:1.6;-webkit-font-smoothing:antialiased}
a{color:var(--accent);text-decoration:none}
a:hover{text-decoration:underline}
h2{font-size:14px;font-weight:600;text-transform:uppercase;letter-spacing:.5px;color:var(--text-1);margin-bottom:16px;padding-bottom:8px;border-bottom:1px solid var(--border-1)}
h3{font-size:13px;font-weight:600;color:var(--text-1);margin:16px 0 8px}
code{font-family:var(--mono);font-size:12px;background:var(--bg-3);padding:1px 5px;border-radius:3px}
.empty{color:var(--text-2);font-size:13px;padding:8px 0}
.count{font-size:11px;font-weight:500;color:var(--text-2);background:var(--bg-3);border-radius:3px;padding:1px 6px}
/* Header */
header{background:var(--bg-1);border-bottom:1px solid var(--border-1);padding:12px 32px;position:sticky;top:0;z-index:100}
.hdr-inner{display:flex;align-items:center;gap:16px;max-width:1280px;margin:0 auto}
.branding{display:flex;align-items:baseline;gap:6px;flex-shrink:0}
.logo{font-size:18px;font-weight:800;letter-spacing:-.5px;color:var(--text-0)}
.ver{font-size:10px;color:var(--text-2);font-family:var(--mono)}
.hdr-meta{flex:1;min-width:0}
.hdr-meta h1{font-size:15px;font-weight:600}
.hdr-subtitle{color:var(--text-2);font-weight:400;font-size:13px;margin-left:4px}
.hdr-path{font-size:11px;color:var(--text-2);font-family:var(--mono);display:block;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
.hdr-right{text-align:right;flex-shrink:0}
.gen-lbl{font-size:10px;color:var(--text-2);text-transform:uppercase;letter-spacing:.5px;display:block}
.gen{font-size:11px;color:var(--text-1)}
/* Layout */
.layout{display:grid;grid-template-columns:200px 1fr;gap:0;max-width:1280px;margin:0 auto;min-height:calc(100vh - 120px)}
/* Sidebar */
.sidebar{background:var(--bg-1);border-right:1px solid var(--border-1);padding:20px 14px;position:sticky;top:52px;height:calc(100vh - 52px);overflow-y:auto}
.sidebar-title{font-size:10px;font-weight:600;color:var(--text-2);text-transform:uppercase;letter-spacing:.5px;margin-bottom:12px}
.toc-group{margin-bottom:14px}
.toc-group-label{font-size:11px;font-weight:600;color:var(--text-1);margin-bottom:3px;font-family:var(--mono)}
.toc-group ul{list-style:none;display:flex;flex-direction:column;gap:1px}
.toc-group li{display:flex;align-items:center;gap:6px}
.toc-group a{font-size:11px;color:var(--text-2);padding:2px 4px;border-radius:3px;flex:1;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
.toc-group a:hover{background:var(--bg-2);color:var(--text-0);text-decoration:none}
.toc-kind{font-size:9px;color:var(--text-2);font-family:var(--mono);flex-shrink:0}
/* Main */
main{padding:28px;display:flex;flex-direction:column;gap:40px}
/* Overview */
.idx-summary{display:flex;flex-wrap:wrap;gap:1px;background:var(--border-1);border:1px solid var(--border-1);border-radius:4px;overflow:hidden;margin-bottom:16px}
.idx-stat{background:var(--bg-1);padding:10px 16px;display:flex;flex-direction:column;gap:2px;min-width:100px;flex:1}
.idx-val{font-size:18px;font-weight:600;color:var(--text-0);font-variant-numeric:tabular-nums}
.idx-lbl{font-size:10px;color:var(--text-2);text-transform:uppercase;letter-spacing:.4px}
.idx-progress{display:flex;align-items:center;gap:10px;margin-top:10px}
.idx-bar-track{flex:1;height:4px;background:var(--bg-3);border-radius:2px;overflow:hidden}
.idx-bar-fill{height:100%;background:var(--accent);border-radius:2px}
.idx-pct{font-size:12px;font-weight:600;color:var(--text-1);min-width:40px;text-align:right}
/* Sparkline */
.sparkline-wrap{margin-top:20px}
.sparkline{position:relative}
.spark-svg{display:block;background:var(--bg-1);border:1px solid var(--border-1);border-radius:4px;overflow:visible;max-width:100%}
.spark-line{stroke:var(--accent);stroke-width:1.5;fill:none}
.spark-dot{fill:var(--accent);stroke:var(--bg-1);stroke-width:2;cursor:pointer}
.spark-dot:hover{r:4;fill:var(--text-0)}
.spark-lbl{font-size:10px;fill:var(--text-2);font-family:var(--mono)}
.spark-axis{display:flex;position:relative;height:18px;margin-top:2px}
.spark-tick{position:absolute;transform:translateX(-50%);font-size:9px;color:var(--text-2);font-family:var(--mono);white-space:nowrap}
/* Report cards */
.cards-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(260px,1fr));gap:10px}
.report-card{
display:flex;flex-direction:column;gap:6px;
background:var(--bg-1);border:1px solid var(--border-1);border-radius:4px;
padding:14px;text-decoration:none;color:var(--text-0);
transition:border-color .12s;
}
.report-card:hover{border-color:var(--accent);text-decoration:none}
.card-latest{border-color:var(--accent)}
.card-top{display:flex;align-items:center;gap:8px}
.card-label{flex:1;font-weight:500;font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
.card-kind{font-size:10px;color:var(--text-2);font-family:var(--mono);flex-shrink:0}
.card-date{font-size:11px;color:var(--text-2)}
.card-progress{display:flex;align-items:center;gap:6px}
.card-bar-track{flex:1;height:3px;background:var(--bg-3);border-radius:2px;overflow:hidden}
.card-bar-fill{height:100%;background:var(--accent);border-radius:2px}
.card-pct{font-size:11px;color:var(--text-2);min-width:30px;text-align:right}
.card-stats{display:flex;gap:8px;flex-wrap:wrap}
.card-stats span{font-size:11px;color:var(--text-2);font-variant-numeric:tabular-nums}
.card-delta{display:flex;gap:4px;flex-wrap:wrap}
.card-delta span{font-size:10px;color:var(--text-1);font-family:var(--mono)}
.card-latest-badge{display:none}
/* Footer */
footer{border-top:1px solid var(--border-1);padding:16px 32px}
.ftr-inner{display:flex;align-items:center;gap:6px;justify-content:center;font-size:11px;color:var(--text-2)}
.ftr-sep{color:var(--border-2)}
@media(max-width:768px){
.layout{grid-template-columns:1fr}
.sidebar{position:static;height:auto;border-right:none;border-bottom:1px solid var(--border-1)}
}
@media print{
.sidebar{display:none}
header{position:static}
body{background:#fff;color:#1a1a1a}
:root{--bg-0:#fff;--bg-1:#fafafa;--bg-2:#f5f5f5;--bg-3:#ebebeb;--border-1:#e5e5e5;--border-2:#d4d4d4;--text-0:#1a1a1a;--text-1:#525252;--text-2:#a3a3a3;--accent:#4f46e5}
}
`;

View file

@ -0,0 +1,178 @@
---
name: gsd-headless
description: Orchestrate GSD (Get Shit Done) projects programmatically via headless CLI. Use when an agent needs to create milestones from specs, execute software development workflows, monitor task progress, check project status, or control GSD execution (pause/stop/skip/steer). Triggers on requests to "run gsd", "create milestone", "execute project", "check gsd status", "orchestrate development", "run headless workflow", or any programmatic interaction with the GSD project management system. Essential for building orchestrators that coordinate multiple GSD workers.
---
# GSD Headless Orchestration
Run GSD commands without TUI via `gsd headless`. Spawns an RPC child process, auto-responds to UI prompts, streams progress.
## Command Syntax
```bash
gsd headless [flags] [command] [args...]
```
**Flags:** `--timeout N` (ms, default 300000), `--json` (JSONL to stdout), `--model ID`, `--verbose`
**Exit codes:** 0=complete, 1=error/timeout, 2=blocked
## Core Workflows
### 1. Create + Execute a Milestone (end-to-end)
```bash
gsd headless new-milestone --context spec.md --auto
```
Reads spec, bootstraps `.gsd/`, creates milestone, then chains into auto-mode executing all phases (discuss → research → plan → execute → summarize → complete).
Extra flags for `new-milestone`: `--context <path>` (use `-` for stdin), `--context-text <text>`, `--auto`.
### 2. Run All Queued Work
```bash
gsd headless auto
```
Default command. Loops through all pending units until milestone complete or blocked.
### 3. Run One Unit
```bash
gsd headless next
```
Execute exactly one unit (task/slice/milestone step), then exit. Ideal for step-by-step orchestration with external decision logic between steps.
### 4. Check Status
```bash
gsd headless --json status
```
Returns project state: active milestone/slice/task, phase, progress counts, blockers. Parse the JSONL output for machine-readable state.
### 5. Dispatch Specific Phase
```bash
gsd headless dispatch research|plan|execute|complete|reassess|uat|replan
```
Force-route to a specific phase, bypassing normal state-machine routing.
## Orchestrator Patterns
### Poll-and-React Loop
```bash
# Check status, decide what to do
STATUS=$(gsd headless --json status 2>/dev/null)
EXIT=$?
case $EXIT in
0) echo "Complete" ;;
2) echo "Blocked — needs intervention" ;;
*) echo "Error" ;;
esac
```
### Step-by-Step with Monitoring
```bash
while true; do
gsd headless next
EXIT=$?
[ $EXIT -ne 0 ] && break
# Check progress, log, decide whether to continue
gsd headless --json status
done
```
### Multi-Session Orchestration
GSD tracks concurrent workers via file-based IPC in `.gsd/parallel/`. See [references/multi-session.md](references/multi-session.md) for the full architecture.
**Quick overview:**
Each worker spawns with `GSD_MILESTONE_LOCK=M00X` + its own git worktree. Workers write heartbeats to `.gsd/parallel/<milestoneId>.status.json`. The orchestrator enumerates all status files to get a dashboard of all workers, and sends commands via signal files.
```bash
# Spawn a worker for milestone M001 in its worktree
GSD_MILESTONE_LOCK=M001 GSD_PARALLEL_WORKER=1 \
gsd headless --json auto \
--cwd .gsd/worktrees/M001 2>worker-M001.log &
# Monitor all workers: read .gsd/parallel/*.status.json
for f in .gsd/parallel/*.status.json; do
jq '{mid: .milestoneId, state: .state, unit: .currentUnit.id, cost: .cost}' "$f"
done
# Send pause signal to M001
echo '{"signal":"pause","sentAt":'$(date +%s000)',"from":"coordinator"}' \
> .gsd/parallel/M001.signal.json
```
**Status file fields:** `milestoneId`, `pid`, `state` (running/paused/stopped/error), `currentUnit`, `completedUnits`, `cost`, `lastHeartbeat`, `startedAt`, `worktreePath`.
**Signal commands:** `pause`, `resume`, `stop`, `rebase`.
**Liveness detection:** PID alive check (`kill -0 $pid`) + heartbeat freshness (30s timeout). Stale sessions are auto-cleaned.
**For multiple projects:** each project has its own `.gsd/` directory. The orchestrator must track `(projectPath, milestoneId)` tuples externally.
### JSONL Event Stream
Use `--json` to get real-time events on stdout for downstream processing:
```bash
gsd headless --json auto 2>/dev/null | while read -r line; do
TYPE=$(echo "$line" | jq -r '.type')
case "$TYPE" in
tool_execution_start) echo "Tool: $(echo "$line" | jq -r '.toolName')" ;;
extension_ui_request) echo "GSD: $(echo "$line" | jq -r '.message // .title // empty')" ;;
agent_end) echo "Session ended" ;;
esac
done
```
Event types: `agent_start`, `agent_end`, `tool_execution_start`, `tool_execution_end`, `extension_ui_request`, `message_update`, `error`.
## Answer Injection
Pre-supply answers for non-interactive runs. See [references/answer-injection.md](references/answer-injection.md) for schema and usage.
## GSD Project Structure
All state lives in `.gsd/` as markdown files (version-controllable):
```
.gsd/
milestones/M001/
M001-CONTEXT.md # Requirements, scope, decisions
M001-ROADMAP.md # Slices with tasks, dependencies, checkboxes
M001-SUMMARY.md # Completion summary
slices/S01/
S01-PLAN.md # Task list
S01-SUMMARY.md # Slice summary with frontmatter
tasks/T01-PLAN.md # Individual task spec
```
State is derived from files on disk — checkboxes in ROADMAP.md are the source of truth for completion.
## All Headless Commands
Quick reference — see [references/commands.md](references/commands.md) for the complete list.
| Command | Purpose |
|---------|---------|
| `auto` | Run all queued units (default) |
| `next` | Run one unit |
| `status` | Progress dashboard |
| `new-milestone` | Create milestone from spec |
| `queue` | Queue/reorder milestones |
| `history` | View execution history |
| `stop` / `pause` | Control auto-mode |
| `dispatch <phase>` | Force specific phase |
| `skip` / `undo` | Unit control |
| `doctor` | Health check + auto-fix |
| `steer <desc>` | Hard-steer plan mid-execution |

View file

@ -0,0 +1,54 @@
# Answer Injection
Pre-supply answers to eliminate interactive prompts during headless execution.
## Answer File Schema
```json
{
"questions": {
"question_id": "selected_option_label",
"multi_select_question": ["option_a", "option_b"]
},
"secrets": {
"API_KEY": "sk-...",
"DATABASE_URL": "postgres://..."
},
"defaults": {
"strategy": "first_option"
}
}
```
### Fields
- **questions**: Map question ID → answer. String for single-select, string[] for multi-select.
- **secrets**: Map env var name → value. Used for `secure_env_collect` tool calls. Values are never logged.
- **defaults.strategy**: Fallback for unmatched questions.
- `"first_option"` — auto-select first available option
- `"cancel"` — cancel the request
## How It Works
Two-phase correlation:
1. **Observe** `tool_execution_start` events for `ask_user_questions` — extracts question metadata (ID, options, allowMultiple)
2. **Match** subsequent `extension_ui_request` events to metadata, respond with pre-supplied answer
Handles out-of-order events (extension_ui_request can arrive before tool_execution_start in RPC mode) via deferred processing queue.
## Without Answer Injection
Headless mode has built-in auto-responders:
- **select** → picks first option
- **confirm** → auto-confirms
- **input** → empty string
- **editor** → returns prefill or empty
Answer injection overrides these defaults with specific answers when precision matters.
## Diagnostics
The injector tracks stats:
- `questionsAnswered` / `questionsDefaulted`
- `secretsProvided` / `secretsMissing`
- `fireAndForgetConsumed` / `confirmationsHandled`

View file

@ -0,0 +1,59 @@
# GSD Commands Reference
All commands can be run via `gsd headless [command]`.
## Workflow Commands
| Command | Description |
|---------|-------------|
| `auto` | Autonomous mode — loop until milestone complete (default) |
| `next` | Step mode — execute one unit, then exit |
| `stop` | Stop auto-mode gracefully |
| `pause` | Pause auto-mode (preserves state, resumable) |
| `new-milestone` | Create milestone from specification (requires `--context`) |
| `dispatch <phase>` | Force-dispatch: research, plan, execute, complete, reassess, uat, replan |
## Status & Monitoring
| Command | Description |
|---------|-------------|
| `status` | Progress dashboard (active unit, phase, blockers) |
| `visualize` | Workflow visualizer (deps, metrics, timeline) |
| `history` | Execution history (supports --cost, --phase, --model, limit) |
## Unit Control
| Command | Description |
|---------|-------------|
| `skip` | Prevent a unit from auto-mode dispatch |
| `undo` | Revert last completed unit (--force flag) |
| `steer <desc>` | Hard-steer plan documents during execution |
| `queue` | Queue and reorder future milestones |
| `capture` | Fire-and-forget thought capture |
| `triage` | Manually trigger triage of pending captures |
## Configuration & Health
| Command | Description |
|---------|-------------|
| `prefs` | Manage preferences (global/project/status/wizard/setup) |
| `config` | Set API keys for external tools |
| `doctor` | Runtime health checks with auto-fix |
| `hooks` | Show configured post-unit and pre-dispatch hooks |
| `knowledge <rule\|pattern\|lesson>` | Add persistent project knowledge |
| `cleanup` | Remove merged branches or snapshots |
| `export` | Export results (--json, --markdown) |
| `migrate` | Migrate v1 .planning directory to .gsd format |
## Phases
GSD workflows progress through these phases:
`pre-planning``needs-discussion``discussing``researching``planning``executing``verifying``summarizing``advancing``validating-milestone``completing-milestone``complete`
Special phases: `paused`, `blocked`, `replanning-slice`
## Hierarchy
- **Milestone**: Shippable version (4-10 slices, 1-4 weeks)
- **Slice**: One demoable vertical capability (1-7 tasks, 1-3 days)
- **Task**: One context-window-sized unit of work (one session)

View file

@ -0,0 +1,185 @@
# Multi-Session Orchestration
How to run and monitor multiple concurrent GSD sessions.
## Architecture
GSD uses **file-based IPC** — no sockets or ports. All coordination happens through JSON files in `.gsd/parallel/`.
```
.gsd/parallel/
├── M001.status.json # Worker heartbeat + state
├── M001.signal.json # Coordinator → worker commands (ephemeral)
├── M002.status.json
├── M003.status.json
└── ...
```
## Worker Isolation
Each worker gets:
1. **`GSD_MILESTONE_LOCK=M00X`** — state derivation only sees this milestone
2. **`GSD_PARALLEL_WORKER=1`** — prevents nested parallel spawns
3. **Own git worktree** at `.gsd/worktrees/M00X/` — branch `milestone/M00X`
Workers cannot interfere with each other. Each has its own filesystem and git branch.
## Status File Schema
Written atomically (`.tmp` + rename) by each worker at `.gsd/parallel/<milestoneId>.status.json`:
```json
{
"milestoneId": "M001",
"pid": 12345,
"state": "running",
"currentUnit": {
"type": "task",
"id": "T03",
"startedAt": 1710000000000
},
"completedUnits": 7,
"cost": 1.23,
"lastHeartbeat": 1710000015000,
"startedAt": 1710000000000,
"worktreePath": ".gsd/worktrees/M001"
}
```
**States:** `running`, `paused`, `stopped`, `error`
## Signal Files
Coordinator writes to `.gsd/parallel/<milestoneId>.signal.json`. Worker consumes and deletes on next dispatch cycle.
```json
{
"signal": "pause",
"sentAt": 1710000020000,
"from": "coordinator"
}
```
**Signals:** `pause`, `resume`, `stop`, `rebase`
## Spawning Workers
```bash
# Spawn worker in its worktree
GSD_MILESTONE_LOCK=M001 \
GSD_PARALLEL_WORKER=1 \
GSD_BIN_PATH=$(which gsd) \
gsd --mode json --print "/gsd auto" \
2>logs/M001.log &
WORKER_PID=$!
```
Workers emit NDJSON on stdout. Parse `message_end` events for cost tracking:
```bash
# Extract cost from worker output
gsd --mode json --print "/gsd auto" | while read -r line; do
COST=$(echo "$line" | jq -r 'select(.type=="message_end") | .message.usage.cost.total // empty')
[ -n "$COST" ] && echo "Cost update: $COST"
done
```
## Monitoring All Workers
```bash
# Dashboard: enumerate all status files
for f in .gsd/parallel/*.status.json; do
[ -f "$f" ] || continue
jq -r '[.milestoneId, .state, (.currentUnit.id // "idle"), "\(.cost | tostring)$"] | join("\t")' "$f"
done
# Liveness check
for f in .gsd/parallel/*.status.json; do
PID=$(jq -r '.pid' "$f")
MID=$(jq -r '.milestoneId' "$f")
if kill -0 "$PID" 2>/dev/null; then
echo "$MID: alive (pid=$PID)"
else
echo "$MID: DEAD (pid=$PID) — cleanup needed"
rm "$f"
fi
done
```
## Sending Commands
```bash
# Pause a worker
send_signal() {
local MID=$1 SIGNAL=$2
echo "{\"signal\":\"$SIGNAL\",\"sentAt\":$(date +%s000),\"from\":\"coordinator\"}" \
> ".gsd/parallel/${MID}.signal.json"
}
send_signal M001 pause
send_signal M002 stop
send_signal M003 resume
```
## Budget Enforcement
Track aggregate cost across all workers:
```bash
TOTAL=$(jq -s 'map(.cost) | add // 0' .gsd/parallel/*.status.json)
CEILING=50.00
if (( $(echo "$TOTAL > $CEILING" | bc -l) )); then
echo "Budget exceeded ($TOTAL > $CEILING) — stopping all"
for f in .gsd/parallel/*.status.json; do
MID=$(jq -r '.milestoneId' "$f")
send_signal "$MID" stop
done
fi
```
## Stale Session Cleanup
A session is stale when:
- PID is dead (`kill -0 $pid` fails), OR
- `lastHeartbeat` is older than 30 seconds
```bash
NOW=$(date +%s000)
STALE_THRESHOLD=30000
for f in .gsd/parallel/*.status.json; do
PID=$(jq -r '.pid' "$f")
HB=$(jq -r '.lastHeartbeat' "$f")
AGE=$((NOW - HB))
if ! kill -0 "$PID" 2>/dev/null || [ "$AGE" -gt "$STALE_THRESHOLD" ]; then
echo "Stale: $(jq -r '.milestoneId' "$f") — removing"
rm "$f"
fi
done
```
## Multi-Project Orchestration
Within one project, milestones are tracked automatically in `.gsd/parallel/`. For orchestrating across **multiple projects**, maintain an external registry:
```json
{
"sessions": [
{ "project": "/path/to/project-a", "milestoneId": "M001" },
{ "project": "/path/to/project-b", "milestoneId": "M001" },
{ "project": "/path/to/project-b", "milestoneId": "M002" }
]
}
```
Then poll each project's `.gsd/parallel/` directory. GSD has no cross-project awareness — the orchestrator must bridge this gap.
## Built-in Parallel Commands
Inside an interactive GSD session, these commands manage the parallel orchestrator:
| Command | Description |
|---------|-------------|
| `/gsd parallel start` | Analyze eligibility, spawn workers |
| `/gsd parallel status` | Show all workers, costs, progress |
| `/gsd parallel stop [MID]` | Stop one or all workers |
| `/gsd parallel pause [MID]` | Pause without killing |
| `/gsd parallel resume [MID]` | Resume paused worker |
| `/gsd parallel merge [MID]` | Merge completed milestone branch |

View file

@ -37,6 +37,15 @@ blocker_discovered: false
{{whatWasVerifiedAndHow — commands run, tests passed, behavior confirmed}}
## Verification Evidence
<!-- Populated from verification gate output. If the gate ran, fill in the table below.
If no gate ran (e.g., no verification commands discovered), note that. -->
| # | Command | Exit Code | Verdict | Duration |
|---|---------|-----------|---------|----------|
| {{row}} | {{command}} | {{exitCode}} | {{verdict}} | {{duration}} |
## Diagnostics
{{howToInspectWhatThisTaskBuiltLater — status surfaces, logs, error shapes, failure artifacts, or none}}

View file

@ -201,4 +201,85 @@ describe("continue-here", () => {
}
});
});
describe("context-pressure monitor integration", () => {
it("should fire wrap-up when context >= threshold and mark continueHereFired", async () => {
const { writeUnitRuntimeRecord, readUnitRuntimeRecord, clearUnitRuntimeRecord } = await import("../unit-runtime.js");
const fs = await import("node:fs");
const path = await import("node:path");
const os = await import("node:os");
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "continue-here-monitor-"));
try {
// Simulate the monitor's one-shot logic:
// 1. Write initial runtime record (continueHereFired=false)
const startedAt = Date.now();
writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
phase: "dispatched",
wrapupWarningSent: false,
});
const budget = computeBudgets(128_000);
const threshold = budget.continueThresholdPercent;
// Simulate the monitor poll: context at 75% (above threshold)
const contextPercent = 75;
const runtime = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
assert.ok(runtime, "runtime record should exist");
assert.equal(runtime!.continueHereFired, false, "initially false");
// Check: should fire
const shouldFire = !runtime!.continueHereFired
&& contextPercent >= threshold;
assert.ok(shouldFire, "should fire when context >= threshold and not yet fired");
// Mark as fired (what the monitor does)
writeUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01", startedAt, {
continueHereFired: true,
});
// Verify one-shot: second poll should NOT fire
const runtime2 = readUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
assert.ok(runtime2, "runtime record should still exist");
assert.equal(runtime2!.continueHereFired, true, "should be marked as fired");
const shouldFireAgain = !runtime2!.continueHereFired
&& contextPercent >= threshold;
assert.equal(shouldFireAgain, false, "must not fire again — one-shot guard");
// Clean up
clearUnitRuntimeRecord(tmpDir, "execute-task", "M001/S01/T01");
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});
it("should not fire when context is below threshold", () => {
const budget = computeBudgets(200_000);
const threshold = budget.continueThresholdPercent;
// Simulate monitor poll with context at 50%
const contextPercent = 50;
const continueHereFired = false;
const shouldFire = !continueHereFired && contextPercent >= threshold;
assert.equal(shouldFire, false, "50% should not trigger continue-here");
});
it("should not fire when contextUsage is null/undefined", () => {
const budget = computeBudgets(128_000);
const threshold = budget.continueThresholdPercent;
// Simulate the full guard chain from the monitor
const usageUndefined = undefined as { percent: number | null } | undefined;
const shouldFire1 = usageUndefined != null
&& usageUndefined.percent != null
&& usageUndefined.percent >= threshold;
assert.equal(shouldFire1, false, "undefined usage must not fire");
const usageNullPercent: { percent: number | null } = { percent: null };
const shouldFire2 = usageNullPercent.percent != null
&& usageNullPercent.percent >= threshold;
assert.equal(shouldFire2, false, "null percent must not fire");
});
});
});

View file

@ -0,0 +1,132 @@
/**
* Regression test for issue #909.
*
* When S##-PLAN.md exists (causing deriveState phase:'executing') but the
* individual task plan files (tasks/T01-PLAN.md, etc.) are absent, the dispatch
* table must recover by re-running plan-slice NOT hard-stop.
*
* Prior behaviour: action:"stop" infinite loop on restart.
* Fixed behaviour: action:"dispatch" unitType:"plan-slice".
*/
import test from "node:test";
import assert from "node:assert/strict";
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { resolveDispatch } from "../auto-dispatch.ts";
import type { DispatchContext } from "../auto-dispatch.ts";
import type { GSDState } from "../types.ts";
function makeState(overrides: Partial<GSDState> = {}): GSDState {
return {
activeMilestone: { id: "M002", title: "Test Milestone" },
activeSlice: { id: "S03", title: "Third Slice" },
activeTask: { id: "T01", title: "First Task" },
phase: "executing",
recentDecisions: [],
blockers: [],
nextAction: "",
registry: [],
...overrides,
};
}
function makeContext(basePath: string, stateOverrides?: Partial<GSDState>): DispatchContext {
return {
basePath,
mid: "M002",
midTitle: "Test Milestone",
state: makeState(stateOverrides),
prefs: undefined,
};
}
// ─── Scaffold helpers ──────────────────────────────────────────────────────
function scaffoldSlicePlan(basePath: string, mid: string, sid: string): void {
const dir = join(basePath, ".gsd", "milestones", mid, "slices", sid);
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, `${sid}-PLAN.md`), [
`# ${sid}: Third Slice`,
"",
"## Tasks",
"- [ ] **T01: Do something** `est:1h`",
"- [ ] **T02: Do another thing** `est:30m`",
"",
].join("\n"));
}
function scaffoldTaskPlan(basePath: string, mid: string, sid: string, tid: string): void {
const dir = join(basePath, ".gsd", "milestones", mid, "slices", sid, "tasks");
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, `${tid}-PLAN.md`), [
`# ${tid}: Do something`,
"",
"## Steps",
"- [ ] Step 1",
"",
].join("\n"));
}
// ─── Tests ─────────────────────────────────────────────────────────────────
test("dispatch: missing task plan triggers plan-slice (not stop) — issue #909", async () => {
const tmp = mkdtempSync(join(tmpdir(), "gsd-909-"));
try {
// Slice plan exists with tasks, but tasks/ directory is empty
scaffoldSlicePlan(tmp, "M002", "S03");
const ctx = makeContext(tmp);
const result = await resolveDispatch(ctx);
assert.equal(result.action, "dispatch", "should dispatch, not stop");
assert.ok(result.action === "dispatch" && result.unitType === "plan-slice",
`unitType should be plan-slice, got: ${result.action === "dispatch" ? result.unitType : "(stop)"}`);
assert.ok(result.action === "dispatch" && result.unitId === "M002/S03",
`unitId should be M002/S03, got: ${result.action === "dispatch" ? result.unitId : "(stop)"}`);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("dispatch: present task plan proceeds to execute-task normally", async () => {
const tmp = mkdtempSync(join(tmpdir(), "gsd-909-ok-"));
try {
scaffoldSlicePlan(tmp, "M002", "S03");
scaffoldTaskPlan(tmp, "M002", "S03", "T01");
const ctx = makeContext(tmp);
const result = await resolveDispatch(ctx);
assert.equal(result.action, "dispatch");
assert.ok(result.action === "dispatch" && result.unitType === "execute-task",
`unitType should be execute-task, got: ${result.action === "dispatch" ? result.unitType : "(stop)"}`);
assert.ok(result.action === "dispatch" && result.unitId === "M002/S03/T01",
`unitId should be M002/S03/T01, got: ${result.action === "dispatch" ? result.unitId : "(stop)"}`);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("dispatch: plan-slice recovery loop — second call after plan-slice still recovers cleanly", async () => {
// Simulate: plan-slice ran but T01-PLAN.md is still missing (e.g. agent crashed mid-write).
// Dispatch should still re-dispatch plan-slice, not hard-stop.
const tmp = mkdtempSync(join(tmpdir(), "gsd-909-loop-"));
try {
scaffoldSlicePlan(tmp, "M002", "S03");
const ctx = makeContext(tmp);
const r1 = await resolveDispatch(ctx);
assert.equal(r1.action, "dispatch");
assert.ok(r1.action === "dispatch" && r1.unitType === "plan-slice");
// Still no task plan written — dispatch again
const r2 = await resolveDispatch(ctx);
assert.equal(r2.action, "dispatch");
assert.ok(r2.action === "dispatch" && r2.unitType === "plan-slice",
"should keep dispatching plan-slice until task plans appear");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});

View file

@ -193,6 +193,20 @@ async function main(): Promise<void> {
assertEq(result.issues.length, 0, "no issues on clean state");
}
console.log("\n=== health gate: missing STATE.md does NOT block dispatch (#889) ===");
{
const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));
cleanups.push(dir);
// Create milestones dir but no STATE.md — mimics fresh worktree
mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true });
writeFileSync(join(dir, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), "# Roadmap\n");
const result = await preDispatchHealthGate(dir);
assertTrue(result.proceed, "gate must NOT block when STATE.md is missing (deadlock #889)");
assertEq(result.issues.length, 0, "missing STATE.md is not a blocking issue");
assertTrue(result.fixesApplied.some((f: string) => f.includes("STATE.md")), "reports STATE.md status as info");
}
console.log("\n=== health gate: stale crash lock auto-cleared ===");
{
const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-")));

View file

@ -0,0 +1,298 @@
/**
* Tests for parallel orchestrator crash recovery.
*
* Validates that orchestrator state is persisted to disk and can be
* restored after a coordinator crash, with PID liveness filtering.
*/
import {
mkdtempSync,
mkdirSync,
readFileSync,
writeFileSync,
existsSync,
rmSync,
} from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import {
persistState,
restoreState,
resetOrchestrator,
getOrchestratorState,
type PersistedState,
} from "../parallel-orchestrator.ts";
import { writeSessionStatus, readAllSessionStatuses, removeSessionStatus } from "../session-status-io.ts";
import { createTestContext } from './test-helpers.ts';
const { assertEq, assertTrue, report } = createTestContext();
// ─── Helpers ──────────────────────────────────────────────────────────────────
function makeTempDir(): string {
const dir = mkdtempSync(join(tmpdir(), "gsd-crash-recovery-"));
mkdirSync(join(dir, ".gsd"), { recursive: true });
return dir;
}
function stateFilePath(basePath: string): string {
return join(basePath, ".gsd", "orchestrator.json");
}
function writeStateFile(basePath: string, state: PersistedState): void {
writeFileSync(stateFilePath(basePath), JSON.stringify(state, null, 2), "utf-8");
}
function makePersistedState(overrides: Partial<PersistedState> = {}): PersistedState {
return {
active: true,
workers: [],
totalCost: 0,
startedAt: Date.now(),
configSnapshot: { max_workers: 3 },
...overrides,
};
}
// ─── Tests ────────────────────────────────────────────────────────────────────
// Test 1: persistState writes valid JSON
{
const basePath = makeTempDir();
try {
// We can't call persistState directly without internal state set up,
// so we test the round-trip by writing a state file and reading it back
const state = makePersistedState({
workers: [
{
milestoneId: "M001",
title: "M001",
pid: process.pid,
worktreePath: "/tmp/wt-M001",
startedAt: Date.now(),
state: "running",
completedUnits: 3,
cost: 0.15,
},
],
totalCost: 0.15,
});
writeStateFile(basePath, state);
const raw = readFileSync(stateFilePath(basePath), "utf-8");
const parsed = JSON.parse(raw) as PersistedState;
assertEq(parsed.active, true, "persistState: active field preserved");
assertEq(parsed.workers.length, 1, "persistState: worker count preserved");
assertEq(parsed.workers[0].milestoneId, "M001", "persistState: milestoneId preserved");
assertEq(parsed.workers[0].cost, 0.15, "persistState: cost preserved");
assertEq(parsed.totalCost, 0.15, "persistState: totalCost preserved");
} finally {
rmSync(basePath, { recursive: true, force: true });
}
}
// Test 2: restoreState returns null for missing file
{
const basePath = makeTempDir();
try {
const result = restoreState(basePath);
assertEq(result, null, "restoreState: returns null when no state file");
} finally {
rmSync(basePath, { recursive: true, force: true });
}
}
// Test 3: restoreState filters dead PIDs
{
const basePath = makeTempDir();
try {
// PID 99999999 is almost certainly not alive
const state = makePersistedState({
workers: [
{
milestoneId: "M001",
title: "M001",
pid: 99999999,
worktreePath: "/tmp/wt-M001",
startedAt: Date.now(),
state: "running",
completedUnits: 0,
cost: 0,
},
{
milestoneId: "M002",
title: "M002",
pid: 99999998,
worktreePath: "/tmp/wt-M002",
startedAt: Date.now(),
state: "running",
completedUnits: 0,
cost: 0,
},
],
});
writeStateFile(basePath, state);
const result = restoreState(basePath);
// Both PIDs are dead, so result should be null and file should be cleaned up
assertEq(result, null, "restoreState: returns null when all PIDs dead");
assertTrue(!existsSync(stateFilePath(basePath)), "restoreState: cleans up state file when all dead");
} finally {
rmSync(basePath, { recursive: true, force: true });
}
}
// Test 4: restoreState keeps alive PIDs
{
const basePath = makeTempDir();
try {
// Use current process PID (definitely alive)
const state = makePersistedState({
workers: [
{
milestoneId: "M001",
title: "M001",
pid: process.pid,
worktreePath: "/tmp/wt-M001",
startedAt: Date.now(),
state: "running",
completedUnits: 5,
cost: 0.25,
},
{
milestoneId: "M002",
title: "M002",
pid: 99999999, // dead
worktreePath: "/tmp/wt-M002",
startedAt: Date.now(),
state: "running",
completedUnits: 0,
cost: 0,
},
],
totalCost: 0.25,
});
writeStateFile(basePath, state);
const result = restoreState(basePath);
assertTrue(result !== null, "restoreState: returns state when alive PID exists");
assertEq(result!.workers.length, 1, "restoreState: filters out dead PID");
assertEq(result!.workers[0].milestoneId, "M001", "restoreState: keeps alive worker");
assertEq(result!.workers[0].pid, process.pid, "restoreState: preserves PID");
assertEq(result!.workers[0].completedUnits, 5, "restoreState: preserves progress");
} finally {
rmSync(basePath, { recursive: true, force: true });
}
}
// Test 5: restoreState skips stopped/error workers even with alive PIDs
{
const basePath = makeTempDir();
try {
const state = makePersistedState({
workers: [
{
milestoneId: "M001",
title: "M001",
pid: process.pid,
worktreePath: "/tmp/wt-M001",
startedAt: Date.now(),
state: "stopped",
completedUnits: 10,
cost: 0.50,
},
],
});
writeStateFile(basePath, state);
const result = restoreState(basePath);
assertEq(result, null, "restoreState: skips stopped workers");
} finally {
rmSync(basePath, { recursive: true, force: true });
}
}
// Test 6: orphan detection finds stale sessions
{
const basePath = makeTempDir();
try {
// Write a session status with a dead PID
mkdirSync(join(basePath, ".gsd", "parallel"), { recursive: true });
writeSessionStatus(basePath, {
milestoneId: "M001",
pid: 99999999,
state: "running",
currentUnit: null,
completedUnits: 3,
cost: 0.10,
lastHeartbeat: Date.now(),
startedAt: Date.now(),
worktreePath: "/tmp/wt-M001",
});
// Write a session status with alive PID
writeSessionStatus(basePath, {
milestoneId: "M002",
pid: process.pid,
state: "running",
currentUnit: null,
completedUnits: 1,
cost: 0.05,
lastHeartbeat: Date.now(),
startedAt: Date.now(),
worktreePath: "/tmp/wt-M002",
});
// Read all sessions — both should exist initially
const before = readAllSessionStatuses(basePath);
assertEq(before.length, 2, "orphan: both sessions exist before detection");
// Now simulate orphan detection logic (same as prepareParallelStart)
const sessions = readAllSessionStatuses(basePath);
const orphans: Array<{ milestoneId: string; pid: number; alive: boolean }> = [];
for (const session of sessions) {
let alive: boolean;
try {
process.kill(session.pid, 0);
alive = true;
} catch {
alive = false;
}
orphans.push({ milestoneId: session.milestoneId, pid: session.pid, alive });
if (!alive) {
removeSessionStatus(basePath, session.milestoneId);
}
}
assertTrue(orphans.length === 2, "orphan: detected both sessions");
const deadOrphan = orphans.find(o => o.milestoneId === "M001");
assertTrue(deadOrphan !== undefined && !deadOrphan.alive, "orphan: M001 detected as dead");
const aliveOrphan = orphans.find(o => o.milestoneId === "M002");
assertTrue(aliveOrphan !== undefined && aliveOrphan.alive, "orphan: M002 detected as alive");
// Dead session should be cleaned up
const after = readAllSessionStatuses(basePath);
assertEq(after.length, 1, "orphan: dead session cleaned up");
assertEq(after[0].milestoneId, "M002", "orphan: alive session remains");
} finally {
rmSync(basePath, { recursive: true, force: true });
}
}
// Test 7: restoreState handles corrupt JSON gracefully
{
const basePath = makeTempDir();
try {
writeFileSync(stateFilePath(basePath), "{ not valid json !!!", "utf-8");
const result = restoreState(basePath);
assertEq(result, null, "restoreState: returns null for corrupt JSON");
} finally {
rmSync(basePath, { recursive: true, force: true });
}
}
// Clean up module state
resetOrchestrator();
report();

View file

@ -35,6 +35,7 @@ import {
getWorkerStatuses,
startParallel,
stopParallel,
shutdownParallel,
pauseWorker,
resumeWorker,
getAggregateCost,
@ -338,6 +339,14 @@ describe("parallel-orchestrator: lifecycle", () => {
assert.ok(signal);
assert.equal(signal.signal, "pause");
});
it("shutdownParallel deactivates the orchestrator state", async () => {
await startParallel(base, ["M001"], undefined);
assert.equal(isParallelActive(), true);
await shutdownParallel(base);
assert.equal(isParallelActive(), false);
assert.equal(getOrchestratorState(), null);
});
});
describe("parallel-orchestrator: budget", () => {

View file

@ -0,0 +1,71 @@
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { createTestContext } from './test-helpers.ts';
const __dirname = dirname(fileURLToPath(import.meta.url));
const worktreePromptsDir = join(__dirname, "..", "prompts");
const { assertTrue, report } = createTestContext();
function loadPromptFromWorktree(name: string, vars: Record<string, string> = {}): string {
const path = join(worktreePromptsDir, `${name}.md`);
let content = readFileSync(path, "utf-8");
for (const [key, value] of Object.entries(vars)) {
content = content.replaceAll(`{{${key}}}`, value);
}
return content.trim();
}
const BASE_VARS = {
workingDirectory: "/tmp/test-project",
milestoneId: "M001",
sliceId: "S01",
sliceTitle: "Test Slice",
slicePath: ".gsd/milestones/M001/slices/S01",
roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
researchPath: ".gsd/milestones/M001/slices/S01/S01-RESEARCH.md",
outputPath: "/tmp/test-project/.gsd/milestones/M001/slices/S01/S01-PLAN.md",
inlinedContext: "--- test inlined context ---",
dependencySummaries: "",
executorContextConstraints: "",
};
async function main(): Promise<void> {
// ─── commit_docs=true (default): commit step is present ─────────────────
console.log("\n=== plan-slice prompt: commit_docs default (true) ===");
{
const commitInstruction = `Commit: \`docs(S01): add slice plan\``;
const result = loadPromptFromWorktree("plan-slice", { ...BASE_VARS, commitInstruction });
assertTrue(result.includes("docs(S01): add slice plan"), "commit step present when commit_docs is not false");
assertTrue(result.includes("Update `.gsd/STATE.md`"), "STATE.md update step present");
assertTrue(!result.includes("{{commitInstruction}}"), "no unresolved placeholder");
}
// ─── commit_docs=false: no commit step, only STATE.md update ────────────
console.log("\n=== plan-slice prompt: commit_docs=false ===");
{
const commitInstruction = "Do not commit — planning docs are not tracked in git for this project.";
const result = loadPromptFromWorktree("plan-slice", { ...BASE_VARS, commitInstruction });
assertTrue(!result.includes("docs(S01): add slice plan"), "commit step absent when commit_docs=false");
assertTrue(result.includes("Do not commit"), "no-commit instruction present");
assertTrue(result.includes("Update `.gsd/STATE.md`"), "STATE.md update step still present");
assertTrue(!result.includes("{{commitInstruction}}"), "no unresolved placeholder");
}
// ─── all base variables are substituted ─────────────────────────────────
console.log("\n=== plan-slice prompt: all variables substituted ===");
{
const commitInstruction = `Commit: \`docs(S01): add slice plan\``;
const result = loadPromptFromWorktree("plan-slice", { ...BASE_VARS, commitInstruction });
assertTrue(!result.includes("{{"), "no unresolved placeholders remain");
assertTrue(result.includes("M001"), "milestoneId substituted");
assertTrue(result.includes("S01"), "sliceId substituted");
}
}
main().then(report);

View file

@ -493,4 +493,45 @@ console.log('\n=== doctor: no blocker → no blocker_discovered_no_replan issue
rmSync(base, { recursive: true, force: true });
}
// ═══════════════════════════════════════════════════════════════════════════
// Artifact Resolution: resolveExpectedArtifactPath for replan-slice (#858)
// ═══════════════════════════════════════════════════════════════════════════
import { resolveExpectedArtifactPath, verifyExpectedArtifact } from '../auto-recovery.ts';
console.log('\n=== artifact: resolveExpectedArtifactPath returns REPLAN.md path for replan-slice ===');
{
const base = createFixtureBase();
writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
const path = resolveExpectedArtifactPath('replan-slice', 'M001/S01', base);
assertTrue(path !== null, 'resolveExpectedArtifactPath returns non-null for replan-slice');
assertTrue(path!.endsWith('S01-REPLAN.md'), 'path ends with S01-REPLAN.md');
rmSync(base, { recursive: true, force: true });
}
console.log('\n=== artifact: verifyExpectedArtifact fails when REPLAN.md missing (#858) ===');
{
const base = createFixtureBase();
writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
const result = verifyExpectedArtifact('replan-slice', 'M001/S01', base);
assertEq(result, false, 'verifyExpectedArtifact returns false when REPLAN.md is missing');
rmSync(base, { recursive: true, force: true });
}
console.log('\n=== artifact: verifyExpectedArtifact passes when REPLAN.md exists (#858) ===');
{
const base = createFixtureBase();
writeRoadmap(base, 'M001', ROADMAP_ONE_SLICE);
writePlan(base, 'M001', 'S01', makePlanT01DoneT02Pending());
writeReplanFile(base, 'M001', 'S01', '# Replan\n\nBlocker addressed.');
const result = verifyExpectedArtifact('replan-slice', 'M001/S01', base);
assertEq(result, true, 'verifyExpectedArtifact returns true when REPLAN.md exists');
rmSync(base, { recursive: true, force: true });
}
report();

View file

@ -0,0 +1,743 @@
/**
* Unit tests for the verification evidence module JSON persistence and markdown table formatting.
*
* Tests cover:
* 1. writeVerificationJSON writes correct JSON shape (schemaVersion, taskId, timestamp, passed, discoverySource, checks)
* 2. writeVerificationJSON creates directory if it doesn't exist
* 3. writeVerificationJSON maps exitCode to verdict correctly (0 = pass, non-zero = fail)
* 4. writeVerificationJSON excludes stdout/stderr from output
* 5. writeVerificationJSON handles empty checks array
* 6. writeVerificationJSON accepts optional unitId
* 7. formatEvidenceTable returns markdown table with correct columns for checks
* 8. formatEvidenceTable returns "no checks" message for empty checks
* 9. formatEvidenceTable formats duration as seconds with 1 decimal
* 10. formatEvidenceTable uses / emoji for pass/fail verdict
*/
import test from "node:test";
import assert from "node:assert/strict";
import { mkdirSync, readFileSync, rmSync, existsSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import {
writeVerificationJSON,
formatEvidenceTable,
} from "../verification-evidence.ts";
import type { VerificationResult } from "../types.ts";
function makeTempDir(prefix: string): string {
const dir = join(
tmpdir(),
`${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`,
);
mkdirSync(dir, { recursive: true });
return dir;
}
function makeResult(overrides?: Partial<VerificationResult>): VerificationResult {
return {
passed: true,
checks: [],
discoverySource: "package-json",
timestamp: 1710000000000,
...overrides,
};
}
// ─── writeVerificationJSON Tests ─────────────────────────────────────────────
test("verification-evidence: writeVerificationJSON writes correct JSON shape", () => {
const tmp = makeTempDir("ve-shape");
try {
const result = makeResult({
passed: true,
checks: [
{
command: "npm run typecheck",
exitCode: 0,
stdout: "all good",
stderr: "",
durationMs: 2340,
},
],
});
writeVerificationJSON(result, tmp, "T03");
const filePath = join(tmp, "T03-VERIFY.json");
assert.ok(existsSync(filePath), "JSON file should exist");
const json = JSON.parse(readFileSync(filePath, "utf-8"));
assert.equal(json.schemaVersion, 1);
assert.equal(json.taskId, "T03");
assert.equal(json.unitId, "T03"); // defaults to taskId when unitId not provided
assert.equal(json.timestamp, 1710000000000);
assert.equal(json.passed, true);
assert.equal(json.discoverySource, "package-json");
assert.equal(json.checks.length, 1);
assert.equal(json.checks[0].command, "npm run typecheck");
assert.equal(json.checks[0].exitCode, 0);
assert.equal(json.checks[0].durationMs, 2340);
assert.equal(json.checks[0].verdict, "pass");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON creates directory if it doesn't exist", () => {
const tmp = makeTempDir("ve-mkdir");
const nested = join(tmp, "deep", "nested", "tasks");
try {
assert.ok(!existsSync(nested), "directory should not exist yet");
writeVerificationJSON(makeResult(), nested, "T01");
assert.ok(existsSync(nested), "directory should be created");
assert.ok(existsSync(join(nested, "T01-VERIFY.json")), "JSON file should exist");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON maps exitCode to verdict correctly", () => {
const tmp = makeTempDir("ve-verdict");
try {
const result = makeResult({
passed: false,
checks: [
{ command: "lint", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
{ command: "test", exitCode: 1, stdout: "", stderr: "fail", durationMs: 200 },
{ command: "audit", exitCode: 2, stdout: "", stderr: "err", durationMs: 300 },
],
});
writeVerificationJSON(result, tmp, "T02");
const json = JSON.parse(readFileSync(join(tmp, "T02-VERIFY.json"), "utf-8"));
assert.equal(json.checks[0].verdict, "pass");
assert.equal(json.checks[1].verdict, "fail");
assert.equal(json.checks[2].verdict, "fail");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON excludes stdout/stderr from output", () => {
const tmp = makeTempDir("ve-no-stdio");
try {
const result = makeResult({
checks: [
{
command: "echo hello",
exitCode: 0,
stdout: "hello\n",
stderr: "some warning",
durationMs: 50,
},
],
});
writeVerificationJSON(result, tmp, "T01");
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
assert.ok(!raw.includes('"stdout"'), "JSON should not contain stdout key");
assert.ok(!raw.includes('"stderr"'), "JSON should not contain stderr key");
assert.ok(!raw.includes("hello\\n"), "JSON should not contain stdout value");
assert.ok(!raw.includes("some warning"), "JSON should not contain stderr value");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON handles empty checks array", () => {
const tmp = makeTempDir("ve-empty");
try {
writeVerificationJSON(makeResult({ checks: [] }), tmp, "T01");
const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"));
assert.equal(json.schemaVersion, 1);
assert.equal(json.passed, true);
assert.deepStrictEqual(json.checks, []);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON uses optional unitId when provided", () => {
const tmp = makeTempDir("ve-unitid");
try {
writeVerificationJSON(makeResult(), tmp, "T03", "M001/S01/T03");
const json = JSON.parse(readFileSync(join(tmp, "T03-VERIFY.json"), "utf-8"));
assert.equal(json.taskId, "T03");
assert.equal(json.unitId, "M001/S01/T03");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
// ─── formatEvidenceTable Tests ───────────────────────────────────────────────
test("verification-evidence: formatEvidenceTable returns markdown table with correct columns", () => {
const result = makeResult({
checks: [
{ command: "npm run typecheck", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 },
{ command: "npm run lint", exitCode: 1, stdout: "", stderr: "err", durationMs: 1100 },
],
});
const table = formatEvidenceTable(result);
const lines = table.split("\n");
// Header row
assert.ok(lines[0].includes("# |"), "header should have # column");
assert.ok(lines[0].includes("Command"), "header should have Command column");
assert.ok(lines[0].includes("Exit Code"), "header should have Exit Code column");
assert.ok(lines[0].includes("Verdict"), "header should have Verdict column");
assert.ok(lines[0].includes("Duration"), "header should have Duration column");
// Separator row
assert.ok(lines[1].includes("---|"), "should have separator row");
// Data rows
assert.equal(lines.length, 4, "header + separator + 2 data rows");
assert.ok(lines[2].includes("npm run typecheck"), "first row command");
assert.ok(lines[3].includes("npm run lint"), "second row command");
});
test("verification-evidence: formatEvidenceTable returns no-checks message for empty checks", () => {
const result = makeResult({ checks: [] });
const output = formatEvidenceTable(result);
assert.equal(output, "_No verification checks discovered._");
});
test("verification-evidence: formatEvidenceTable formats duration as seconds with 1 decimal", () => {
const result = makeResult({
checks: [
{ command: "fast", exitCode: 0, stdout: "", stderr: "", durationMs: 150 },
{ command: "slow", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 },
{ command: "zero", exitCode: 0, stdout: "", stderr: "", durationMs: 0 },
],
});
const table = formatEvidenceTable(result);
assert.ok(table.includes("0.1s"), "150ms → 0.1s");
assert.ok(table.includes("2.3s"), "2340ms → 2.3s");
assert.ok(table.includes("0.0s"), "0ms → 0.0s");
});
test("verification-evidence: formatEvidenceTable uses ✅/❌ emoji for pass/fail verdict", () => {
const result = makeResult({
passed: false,
checks: [
{ command: "pass-cmd", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
{ command: "fail-cmd", exitCode: 1, stdout: "", stderr: "", durationMs: 200 },
],
});
const table = formatEvidenceTable(result);
assert.ok(table.includes("✅ pass"), "passing check should have ✅ pass");
assert.ok(table.includes("❌ fail"), "failing check should have ❌ fail");
});
// ─── Validator Rule Tests (T03) ──────────────────────────────────────────────
import { validateTaskSummaryContent } from "../observability-validator.ts";
const MINIMAL_SUMMARY_WITH_EVIDENCE = `---
observability_surfaces:
- gate-output
---
# T03 Summary
## Diagnostics
Run \`npm test\` to verify.
## Verification Evidence
| # | Command | Exit Code | Verdict | Duration |
|---|---------|-----------|---------|----------|
| 1 | npm run typecheck | 0 | pass | 2.3s |
`;
const MINIMAL_SUMMARY_NO_EVIDENCE = `---
observability_surfaces:
- gate-output
---
# T03 Summary
## Diagnostics
Run \`npm test\` to verify.
`;
const MINIMAL_SUMMARY_PLACEHOLDER_EVIDENCE = `---
observability_surfaces:
- gate-output
---
# T03 Summary
## Diagnostics
Run \`npm test\` to verify.
## Verification Evidence
{{evidence_table}}
`;
const MINIMAL_SUMMARY_NO_CHECKS_EVIDENCE = `---
observability_surfaces:
- gate-output
---
# T03 Summary
## Diagnostics
Run \`npm test\` to verify.
## Verification Evidence
_No verification checks discovered._
`;
test("verification-evidence: validator accepts summary with real evidence table", () => {
const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_WITH_EVIDENCE);
const evidenceIssues = issues.filter(
(i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
);
assert.equal(evidenceIssues.length, 0, "no evidence warnings for real table");
});
test("verification-evidence: validator warns when evidence section is missing", () => {
const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_NO_EVIDENCE);
const match = issues.find((i) => i.ruleId === "evidence_block_missing");
assert.ok(match, "should produce evidence_block_missing warning");
assert.equal(match!.severity, "warning");
assert.equal(match!.scope, "task-summary");
});
test("verification-evidence: validator warns when evidence section has only placeholder text", () => {
const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_PLACEHOLDER_EVIDENCE);
const match = issues.find((i) => i.ruleId === "evidence_block_placeholder");
assert.ok(match, "should produce evidence_block_placeholder warning");
assert.equal(match!.severity, "warning");
});
test("verification-evidence: validator accepts 'no checks discovered' as valid content", () => {
const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_NO_CHECKS_EVIDENCE);
const evidenceIssues = issues.filter(
(i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
);
assert.equal(evidenceIssues.length, 0, "no evidence warnings for 'no checks discovered'");
});
// ─── Integration Test: Full Chain (T03) ──────────────────────────────────────
test("verification-evidence: integration — VerificationResult → JSON → table → validator accepts", () => {
const tmp = makeTempDir("ve-integration");
try {
// 1. Create a VerificationResult with 2 checks (1 pass, 1 fail)
const result = makeResult({
passed: false,
checks: [
{ command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 },
{ command: "npm run test:unit", exitCode: 1, stdout: "", stderr: "1 failed", durationMs: 3200 },
],
discoverySource: "package-json",
});
// 2. Write JSON to temp dir and read it back
writeVerificationJSON(result, tmp, "T03");
const jsonPath = join(tmp, "T03-VERIFY.json");
assert.ok(existsSync(jsonPath), "JSON file should exist");
const json = JSON.parse(readFileSync(jsonPath, "utf-8"));
assert.equal(json.schemaVersion, 1, "schemaVersion should be 1");
assert.equal(json.passed, false, "passed should be false");
assert.equal(json.checks.length, 2, "should have 2 checks");
assert.equal(json.checks[0].verdict, "pass", "first check should pass");
assert.equal(json.checks[1].verdict, "fail", "second check should fail");
// 3. Generate evidence table and embed in a mock summary
const table = formatEvidenceTable(result);
assert.ok(table.includes("npm run typecheck"), "table should contain first command");
assert.ok(table.includes("npm run test:unit"), "table should contain second command");
const fullSummary = `---
observability_surfaces:
- gate-output
---
# T03 Summary
## Diagnostics
Run \`npm test\` to verify.
## Verification Evidence
${table}
`;
// 4. Validate — no evidence warnings
const issues = validateTaskSummaryContent("T03-SUMMARY.md", fullSummary);
const evidenceIssues = issues.filter(
(i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
);
assert.equal(evidenceIssues.length, 0, "validator should accept real evidence from formatEvidenceTable");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
// ─── Retry Evidence Field Tests (S03/T01) ─────────────────────────────────────
test("verification-evidence: writeVerificationJSON with retryAttempt and maxRetries includes them in output", () => {
const tmp = makeTempDir("ve-retry-fields");
try {
const result = makeResult({
passed: false,
checks: [
{ command: "npm run lint", exitCode: 1, stdout: "", stderr: "error", durationMs: 300 },
],
});
writeVerificationJSON(result, tmp, "T01", "M001/S03/T01", 1, 2);
const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"));
assert.equal(json.retryAttempt, 1, "retryAttempt should be 1");
assert.equal(json.maxRetries, 2, "maxRetries should be 2");
// Other fields should still be correct
assert.equal(json.schemaVersion, 1);
assert.equal(json.taskId, "T01");
assert.equal(json.unitId, "M001/S03/T01");
assert.equal(json.passed, false);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON without retry params omits retryAttempt/maxRetries keys", () => {
const tmp = makeTempDir("ve-no-retry");
try {
const result = makeResult({
passed: true,
checks: [
{ command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
],
});
writeVerificationJSON(result, tmp, "T02");
const raw = readFileSync(join(tmp, "T02-VERIFY.json"), "utf-8");
const json = JSON.parse(raw);
assert.ok(!("retryAttempt" in json), "retryAttempt key should not be present");
assert.ok(!("maxRetries" in json), "maxRetries key should not be present");
// Confirm the JSON string does not contain these keys at all
assert.ok(!raw.includes('"retryAttempt"'), "raw JSON should not contain retryAttempt");
assert.ok(!raw.includes('"maxRetries"'), "raw JSON should not contain maxRetries");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
// ─── Runtime Error Evidence Tests (S04/T02) ──────────────────────────────────
test("verification-evidence: writeVerificationJSON includes runtimeErrors when present", () => {
const tmp = makeTempDir("ve-rt-present");
try {
const result = makeResult({
passed: false,
checks: [
{ command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
],
runtimeErrors: [
{ source: "bg-shell", severity: "crash", message: "Server crashed", blocking: true },
{ source: "browser", severity: "error", message: "Uncaught TypeError", blocking: false },
],
});
writeVerificationJSON(result, tmp, "T01");
const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"));
assert.ok(Array.isArray(json.runtimeErrors), "runtimeErrors should be an array");
assert.equal(json.runtimeErrors.length, 2, "should have 2 runtime errors");
assert.equal(json.runtimeErrors[0].source, "bg-shell");
assert.equal(json.runtimeErrors[0].severity, "crash");
assert.equal(json.runtimeErrors[0].message, "Server crashed");
assert.equal(json.runtimeErrors[0].blocking, true);
assert.equal(json.runtimeErrors[1].source, "browser");
assert.equal(json.runtimeErrors[1].severity, "error");
assert.equal(json.runtimeErrors[1].message, "Uncaught TypeError");
assert.equal(json.runtimeErrors[1].blocking, false);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON omits runtimeErrors when absent", () => {
const tmp = makeTempDir("ve-rt-absent");
try {
const result = makeResult({
passed: true,
checks: [
{ command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 },
],
});
writeVerificationJSON(result, tmp, "T01");
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
assert.ok(!raw.includes('"runtimeErrors"'), "raw JSON should not contain runtimeErrors key");
const json = JSON.parse(raw);
assert.ok(!("runtimeErrors" in json), "runtimeErrors key should not be present in parsed JSON");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON omits runtimeErrors when empty array", () => {
const tmp = makeTempDir("ve-rt-empty");
try {
const result = makeResult({
passed: true,
checks: [],
runtimeErrors: [],
});
writeVerificationJSON(result, tmp, "T01");
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
assert.ok(!raw.includes('"runtimeErrors"'), "raw JSON should not contain runtimeErrors key when empty array");
const json = JSON.parse(raw);
assert.ok(!("runtimeErrors" in json), "runtimeErrors key should not be present for empty array");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: formatEvidenceTable appends runtime errors section", () => {
const result = makeResult({
passed: false,
checks: [
{ command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
],
runtimeErrors: [
{ source: "bg-shell", severity: "crash", message: "Server crashed with SIGKILL", blocking: true },
{ source: "browser", severity: "warning", message: "Deprecated API usage", blocking: false },
],
});
const table = formatEvidenceTable(result);
// Should contain runtime errors section
assert.ok(table.includes("**Runtime Errors**"), "should have Runtime Errors heading");
assert.ok(table.includes("| # | Source | Severity | Blocking | Message |"), "should have runtime errors column headers");
assert.ok(table.includes("bg-shell"), "should contain bg-shell source");
assert.ok(table.includes("crash"), "should contain crash severity");
assert.ok(table.includes("🚫 yes"), "blocking error should show 🚫 yes");
assert.ok(table.includes(" no"), "non-blocking error should show no");
assert.ok(table.includes("Server crashed with SIGKILL"), "should contain error message");
assert.ok(table.includes("Deprecated API usage"), "should contain warning message");
});
test("verification-evidence: formatEvidenceTable omits runtime errors section when none", () => {
const result = makeResult({
passed: true,
checks: [
{ command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 },
],
});
const table = formatEvidenceTable(result);
assert.ok(!table.includes("Runtime Errors"), "should not contain Runtime Errors heading");
assert.ok(table.includes("npm run lint"), "should still contain the check table");
});
test("verification-evidence: formatEvidenceTable truncates runtime error message to 100 chars", () => {
const longMessage = "A".repeat(150);
const result = makeResult({
passed: false,
checks: [
{ command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
],
runtimeErrors: [
{ source: "bg-shell", severity: "error", message: longMessage, blocking: false },
],
});
const table = formatEvidenceTable(result);
// The table should contain the truncated message (100 chars), not the full 150
assert.ok(table.includes("A".repeat(100)), "should contain 100 A's");
assert.ok(!table.includes("A".repeat(101)), "should not contain 101 A's (truncated)");
});
// ─── Audit Warning Evidence Tests (S05/T02) ──────────────────────────────────
const SAMPLE_AUDIT_WARNINGS = [
{
name: "lodash",
severity: "critical" as const,
title: "Prototype Pollution",
url: "https://github.com/advisories/GHSA-1234",
fixAvailable: true,
},
{
name: "express",
severity: "high" as const,
title: "Open Redirect",
url: "https://github.com/advisories/GHSA-5678",
fixAvailable: false,
},
{
name: "minimist",
severity: "moderate" as const,
title: "Prototype Pollution",
url: "https://github.com/advisories/GHSA-9012",
fixAvailable: true,
},
];
test("verification-evidence: writeVerificationJSON includes auditWarnings when present", () => {
const tmp = makeTempDir("ve-audit-present");
try {
const result = makeResult({
passed: true,
checks: [
{ command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
],
auditWarnings: SAMPLE_AUDIT_WARNINGS,
});
writeVerificationJSON(result, tmp, "T01");
const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"));
assert.ok(Array.isArray(json.auditWarnings), "auditWarnings should be an array");
assert.equal(json.auditWarnings.length, 3, "should have 3 audit warnings");
assert.equal(json.auditWarnings[0].name, "lodash");
assert.equal(json.auditWarnings[0].severity, "critical");
assert.equal(json.auditWarnings[0].title, "Prototype Pollution");
assert.equal(json.auditWarnings[0].url, "https://github.com/advisories/GHSA-1234");
assert.equal(json.auditWarnings[0].fixAvailable, true);
assert.equal(json.auditWarnings[1].name, "express");
assert.equal(json.auditWarnings[1].severity, "high");
assert.equal(json.auditWarnings[1].fixAvailable, false);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON omits auditWarnings when absent", () => {
const tmp = makeTempDir("ve-audit-absent");
try {
const result = makeResult({
passed: true,
checks: [
{ command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 },
],
});
writeVerificationJSON(result, tmp, "T01");
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
assert.ok(!raw.includes('"auditWarnings"'), "raw JSON should not contain auditWarnings key");
const json = JSON.parse(raw);
assert.ok(!("auditWarnings" in json), "auditWarnings key should not be present in parsed JSON");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: writeVerificationJSON omits auditWarnings when empty array", () => {
const tmp = makeTempDir("ve-audit-empty");
try {
const result = makeResult({
passed: true,
checks: [],
auditWarnings: [],
});
writeVerificationJSON(result, tmp, "T01");
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
assert.ok(!raw.includes('"auditWarnings"'), "raw JSON should not contain auditWarnings key when empty array");
const json = JSON.parse(raw);
assert.ok(!("auditWarnings" in json), "auditWarnings key should not be present for empty array");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-evidence: formatEvidenceTable appends audit warnings section", () => {
const result = makeResult({
passed: true,
checks: [
{ command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
],
auditWarnings: SAMPLE_AUDIT_WARNINGS,
});
const table = formatEvidenceTable(result);
assert.ok(table.includes("**Audit Warnings**"), "should have Audit Warnings heading");
assert.ok(table.includes("| # | Package | Severity | Title | Fix Available |"), "should have audit warnings column headers");
assert.ok(table.includes("lodash"), "should contain lodash package");
assert.ok(table.includes("🔴 critical"), "should show critical emoji");
assert.ok(table.includes("🟠 high"), "should show high emoji");
assert.ok(table.includes("🟡 moderate"), "should show moderate emoji");
assert.ok(table.includes("Prototype Pollution"), "should contain vulnerability title");
assert.ok(table.includes("Open Redirect"), "should contain vulnerability title");
assert.ok(table.includes("✅ yes"), "fixAvailable true should show ✅ yes");
assert.ok(table.includes("❌ no"), "fixAvailable false should show ❌ no");
});
test("verification-evidence: formatEvidenceTable omits audit warnings section when none", () => {
const result = makeResult({
passed: true,
checks: [
{ command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 },
],
});
const table = formatEvidenceTable(result);
assert.ok(!table.includes("Audit Warnings"), "should not contain Audit Warnings heading");
assert.ok(table.includes("npm run lint"), "should still contain the check table");
});
test("verification-evidence: integration — VerificationResult with auditWarnings → JSON → table", () => {
const tmp = makeTempDir("ve-audit-integration");
try {
const result = makeResult({
passed: true,
checks: [
{ command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 },
],
auditWarnings: [
{
name: "got",
severity: "moderate" as const,
title: "Redirect bypass",
url: "https://github.com/advisories/GHSA-abcd",
fixAvailable: true,
},
],
});
// 1. Write JSON and verify
writeVerificationJSON(result, tmp, "T05");
const json = JSON.parse(readFileSync(join(tmp, "T05-VERIFY.json"), "utf-8"));
assert.equal(json.auditWarnings.length, 1, "JSON should have 1 audit warning");
assert.equal(json.auditWarnings[0].name, "got");
assert.equal(json.auditWarnings[0].severity, "moderate");
assert.equal(json.auditWarnings[0].fixAvailable, true);
// passed should still be true — audit warnings are non-blocking
assert.equal(json.passed, true, "passed should remain true despite audit warnings");
// 2. Format table and verify
const table = formatEvidenceTable(result);
assert.ok(table.includes("**Audit Warnings**"), "table should have Audit Warnings section");
assert.ok(table.includes("got"), "table should contain package name");
assert.ok(table.includes("🟡 moderate"), "table should show moderate severity with emoji");
assert.ok(table.includes("Redirect bypass"), "table should contain vulnerability title");
assert.ok(table.includes("✅ yes"), "table should show fix available");
// Check table still has the main verification checks
assert.ok(table.includes("npm run typecheck"), "table should still have main check");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});

View file

@ -0,0 +1,965 @@
/**
* Unit tests for the verification gate command discovery and execution.
*
* Tests cover:
* 1. Discovery from explicit preference commands
* 2. Discovery from task plan verify field
* 3. Discovery from package.json typecheck/lint/test scripts
* 4. First-non-empty-wins precedence
* 5. All commands pass gate passes
* 6. One command fails gate fails with exit code + stderr
* 7. Missing package.json 0 checks pass
* 8. Empty scripts 0 checks pass
* 9. Preference validation for verification keys
* 10. spawnSync error (command not found) failure with exit code 127
* 11. Dependency audit git diff detection, npm audit parsing, graceful failures
*/
import test from "node:test";
import assert from "node:assert/strict";
import { mkdirSync, writeFileSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "../verification-gate.ts";
import type { CaptureRuntimeErrorsOptions, DependencyAuditOptions } from "../verification-gate.ts";
import { validatePreferences } from "../preferences.ts";
function makeTempDir(prefix: string): string {
const dir = join(
tmpdir(),
`${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`,
);
mkdirSync(dir, { recursive: true });
return dir;
}
// ─── Discovery Tests ─────────────────────────────────────────────────────────
test("verification-gate: discoverCommands from preference commands", () => {
const tmp = makeTempDir("vg-pref");
try {
const result = discoverCommands({
preferenceCommands: ["npm run lint", "npm run test"],
cwd: tmp,
});
assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
assert.equal(result.source, "preference");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: discoverCommands from task plan verify field", () => {
const tmp = makeTempDir("vg-taskplan");
try {
const result = discoverCommands({
taskPlanVerify: "npm run lint && npm run test",
cwd: tmp,
});
assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
assert.equal(result.source, "task-plan");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: discoverCommands from package.json scripts", () => {
const tmp = makeTempDir("vg-pkg");
try {
writeFileSync(
join(tmp, "package.json"),
JSON.stringify({
scripts: {
typecheck: "tsc --noEmit",
lint: "eslint .",
test: "vitest",
build: "tsc", // should NOT be included
},
}),
);
const result = discoverCommands({ cwd: tmp });
assert.deepStrictEqual(result.commands, [
"npm run typecheck",
"npm run lint",
"npm run test",
]);
assert.equal(result.source, "package-json");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: first-non-empty-wins — preference beats task plan and package.json", () => {
const tmp = makeTempDir("vg-precedence");
try {
writeFileSync(
join(tmp, "package.json"),
JSON.stringify({ scripts: { lint: "eslint ." } }),
);
const result = discoverCommands({
preferenceCommands: ["custom-check"],
taskPlanVerify: "npm run lint",
cwd: tmp,
});
assert.deepStrictEqual(result.commands, ["custom-check"]);
assert.equal(result.source, "preference");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: task plan verify beats package.json", () => {
const tmp = makeTempDir("vg-tp-beats-pkg");
try {
writeFileSync(
join(tmp, "package.json"),
JSON.stringify({ scripts: { lint: "eslint ." } }),
);
const result = discoverCommands({
taskPlanVerify: "custom-verify",
cwd: tmp,
});
assert.deepStrictEqual(result.commands, ["custom-verify"]);
assert.equal(result.source, "task-plan");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: missing package.json → 0 checks, source none", () => {
const tmp = makeTempDir("vg-no-pkg");
try {
const result = discoverCommands({ cwd: tmp });
assert.deepStrictEqual(result.commands, []);
assert.equal(result.source, "none");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: package.json with no matching scripts → 0 checks", () => {
const tmp = makeTempDir("vg-no-scripts");
try {
writeFileSync(
join(tmp, "package.json"),
JSON.stringify({ scripts: { build: "tsc", start: "node index.js" } }),
);
const result = discoverCommands({ cwd: tmp });
assert.deepStrictEqual(result.commands, []);
assert.equal(result.source, "none");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: empty preference array falls through to task plan", () => {
const tmp = makeTempDir("vg-empty-pref");
try {
const result = discoverCommands({
preferenceCommands: [],
taskPlanVerify: "echo ok",
cwd: tmp,
});
assert.deepStrictEqual(result.commands, ["echo ok"]);
assert.equal(result.source, "task-plan");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
// ─── Execution Tests ─────────────────────────────────────────────────────────
test("verification-gate: all commands pass → gate passes", () => {
const tmp = makeTempDir("vg-pass");
try {
const result = runVerificationGate({
basePath: tmp,
unitId: "T01",
cwd: tmp,
preferenceCommands: ["echo hello", "echo world"],
});
assert.equal(result.passed, true);
assert.equal(result.checks.length, 2);
assert.equal(result.discoverySource, "preference");
assert.equal(result.checks[0].exitCode, 0);
assert.equal(result.checks[1].exitCode, 0);
assert.ok(result.checks[0].stdout.includes("hello"));
assert.ok(result.checks[1].stdout.includes("world"));
assert.equal(typeof result.timestamp, "number");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: one command fails → gate fails with exit code + stderr", () => {
const tmp = makeTempDir("vg-fail");
try {
const result = runVerificationGate({
basePath: tmp,
unitId: "T01",
cwd: tmp,
preferenceCommands: ["echo ok", "sh -c 'echo err >&2; exit 1'"],
});
assert.equal(result.passed, false);
assert.equal(result.checks.length, 2);
assert.equal(result.checks[0].exitCode, 0);
assert.equal(result.checks[1].exitCode, 1);
assert.ok(result.checks[1].stderr.includes("err"));
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: no commands discovered → gate passes with 0 checks", () => {
const tmp = makeTempDir("vg-empty");
try {
const result = runVerificationGate({
basePath: tmp,
unitId: "T01",
cwd: tmp,
});
assert.equal(result.passed, true);
assert.equal(result.checks.length, 0);
assert.equal(result.discoverySource, "none");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: command not found → exit code 127", () => {
const tmp = makeTempDir("vg-notfound");
try {
const result = runVerificationGate({
basePath: tmp,
unitId: "T01",
cwd: tmp,
preferenceCommands: ["__nonexistent_command_xyz_42__"],
});
assert.equal(result.passed, false);
assert.equal(result.checks.length, 1);
assert.ok(result.checks[0].exitCode !== 0, "should have non-zero exit code");
assert.ok(result.checks[0].durationMs >= 0);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: each check has durationMs", () => {
const tmp = makeTempDir("vg-duration");
try {
const result = runVerificationGate({
basePath: tmp,
unitId: "T01",
cwd: tmp,
preferenceCommands: ["echo fast"],
});
assert.equal(result.checks.length, 1);
assert.equal(typeof result.checks[0].durationMs, "number");
assert.ok(result.checks[0].durationMs >= 0);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
// ─── Preference Validation Tests ─────────────────────────────────────────────
test("verification-gate: validatePreferences accepts valid verification keys", () => {
const result = validatePreferences({
verification_commands: ["npm run lint", "npm run test"],
verification_auto_fix: true,
verification_max_retries: 3,
});
assert.deepStrictEqual(result.preferences.verification_commands, [
"npm run lint",
"npm run test",
]);
assert.equal(result.preferences.verification_auto_fix, true);
assert.equal(result.preferences.verification_max_retries, 3);
assert.equal(result.errors.length, 0);
});
test("verification-gate: validatePreferences rejects non-array verification_commands", () => {
const result = validatePreferences({
verification_commands: "npm run lint" as unknown as string[],
});
assert.ok(result.errors.some((e) => e.includes("verification_commands")));
assert.equal(result.preferences.verification_commands, undefined);
});
test("verification-gate: validatePreferences rejects non-boolean verification_auto_fix", () => {
const result = validatePreferences({
verification_auto_fix: "yes" as unknown as boolean,
});
assert.ok(result.errors.some((e) => e.includes("verification_auto_fix")));
assert.equal(result.preferences.verification_auto_fix, undefined);
});
test("verification-gate: validatePreferences rejects negative verification_max_retries", () => {
const result = validatePreferences({
verification_max_retries: -1,
});
assert.ok(result.errors.some((e) => e.includes("verification_max_retries")));
assert.equal(result.preferences.verification_max_retries, undefined);
});
test("verification-gate: validatePreferences rejects non-string items in verification_commands", () => {
const result = validatePreferences({
verification_commands: ["npm run lint", 42 as unknown as string],
});
assert.ok(result.errors.some((e) => e.includes("verification_commands")));
assert.equal(result.preferences.verification_commands, undefined);
});
test("verification-gate: validatePreferences floors verification_max_retries", () => {
const result = validatePreferences({
verification_max_retries: 2.7,
});
assert.equal(result.preferences.verification_max_retries, 2);
assert.equal(result.errors.length, 0);
});
// ─── Additional Discovery Tests (T02) ───────────────────────────────────────
test("verification-gate: package.json with only test script → returns only npm run test", () => {
const tmp = makeTempDir("vg-only-test");
try {
writeFileSync(
join(tmp, "package.json"),
JSON.stringify({
scripts: {
test: "vitest",
build: "tsc",
start: "node index.js",
},
}),
);
const result = discoverCommands({ cwd: tmp });
assert.deepStrictEqual(result.commands, ["npm run test"]);
assert.equal(result.source, "package-json");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: taskPlanVerify with single command (no &&)", () => {
const tmp = makeTempDir("vg-tp-single");
try {
const result = discoverCommands({
taskPlanVerify: "npm test",
cwd: tmp,
});
assert.deepStrictEqual(result.commands, ["npm test"]);
assert.equal(result.source, "task-plan");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: whitespace-only preference commands fall through", () => {
const tmp = makeTempDir("vg-ws-pref");
try {
writeFileSync(
join(tmp, "package.json"),
JSON.stringify({ scripts: { lint: "eslint ." } }),
);
const result = discoverCommands({
preferenceCommands: [" ", ""],
cwd: tmp,
});
// Whitespace-only strings are trimmed to empty and filtered out
assert.equal(result.source, "package-json");
assert.deepStrictEqual(result.commands, ["npm run lint"]);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
// ─── Additional Execution Tests (T02) ───────────────────────────────────────
test("verification-gate: one command fails — remaining commands still run (non-short-circuit)", () => {
const tmp = makeTempDir("vg-no-short-circuit");
try {
// First fails, second and third should still execute
const result = runVerificationGate({
basePath: tmp,
unitId: "T02",
cwd: tmp,
preferenceCommands: [
"sh -c 'exit 1'",
"echo second",
"echo third",
],
});
assert.equal(result.passed, false);
assert.equal(result.checks.length, 3, "all 3 commands should run");
assert.equal(result.checks[0].exitCode, 1, "first command fails");
assert.equal(result.checks[1].exitCode, 0, "second command runs and passes");
assert.ok(result.checks[1].stdout.includes("second"));
assert.equal(result.checks[2].exitCode, 0, "third command runs and passes");
assert.ok(result.checks[2].stdout.includes("third"));
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: gate execution uses cwd for spawnSync", () => {
const tmp = makeTempDir("vg-cwd");
try {
// pwd should report the temp dir
const result = runVerificationGate({
basePath: tmp,
unitId: "T02",
cwd: tmp,
preferenceCommands: ["pwd"],
});
assert.equal(result.passed, true);
assert.equal(result.checks.length, 1);
// The stdout should contain the tmp dir path (resolving symlinks)
assert.ok(result.checks[0].stdout.trim().length > 0, "pwd should produce output");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
// ─── Additional Preference Validation Tests (T02) ──────────────────────────
test("verification-gate: verification_commands produces no unknown-key warnings", () => {
const result = validatePreferences({
verification_commands: ["npm test"],
});
const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown"));
assert.equal(unknownWarnings.length, 0, "verification_commands is a known key");
assert.equal(result.errors.length, 0);
});
test("verification-gate: verification_auto_fix produces no unknown-key warnings", () => {
const result = validatePreferences({
verification_auto_fix: true,
});
const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown"));
assert.equal(unknownWarnings.length, 0, "verification_auto_fix is a known key");
assert.equal(result.errors.length, 0);
});
test("verification-gate: verification_max_retries produces no unknown-key warnings", () => {
const result = validatePreferences({
verification_max_retries: 2,
});
const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown"));
assert.equal(unknownWarnings.length, 0, "verification_max_retries is a known key");
assert.equal(result.errors.length, 0);
});
test("verification-gate: verification_max_retries -1 produces a validation error", () => {
const result = validatePreferences({
verification_max_retries: -1,
});
assert.ok(
result.errors.some(e => e.includes("verification_max_retries")),
"negative max_retries should error",
);
assert.equal(result.preferences.verification_max_retries, undefined);
});
// ─── formatFailureContext Tests (S03/T01) ─────────────────────────────────────
test("formatFailureContext: formats a single failure with command, exit code, stderr", () => {
const result: import("../types.ts").VerificationResult = {
passed: false,
checks: [
{ command: "npm run lint", exitCode: 1, stdout: "", stderr: "error: unused var", durationMs: 500 },
],
discoverySource: "preference",
timestamp: Date.now(),
};
const output = formatFailureContext(result);
assert.ok(output.startsWith("## Verification Failures"), "should start with header");
assert.ok(output.includes("`npm run lint`"), "should include command name");
assert.ok(output.includes("exit code 1"), "should include exit code");
assert.ok(output.includes("error: unused var"), "should include stderr content");
assert.ok(output.includes("```stderr"), "should have stderr code block");
});
test("formatFailureContext: formats multiple failures", () => {
const result: import("../types.ts").VerificationResult = {
passed: false,
checks: [
{ command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint error", durationMs: 100 },
{ command: "npm run test", exitCode: 2, stdout: "", stderr: "test failure", durationMs: 200 },
{ command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 50 },
],
discoverySource: "preference",
timestamp: Date.now(),
};
const output = formatFailureContext(result);
assert.ok(output.includes("`npm run lint`"), "should include first failed command");
assert.ok(output.includes("exit code 1"), "should include first exit code");
assert.ok(output.includes("`npm run test`"), "should include second failed command");
assert.ok(output.includes("exit code 2"), "should include second exit code");
// Passing check should NOT appear
assert.ok(!output.includes("npm run typecheck"), "should not include passing command");
});
test("formatFailureContext: truncates stderr longer than 2000 chars", () => {
const longStderr = "x".repeat(3000);
const result: import("../types.ts").VerificationResult = {
passed: false,
checks: [
{ command: "big-err", exitCode: 1, stdout: "", stderr: longStderr, durationMs: 100 },
],
discoverySource: "preference",
timestamp: Date.now(),
};
const output = formatFailureContext(result);
// The output should contain 2000 x's followed by truncation marker, not 3000
assert.ok(!output.includes("x".repeat(2001)), "should not contain more than 2000 chars of stderr");
assert.ok(output.includes("…[truncated]"), "should include truncation marker");
});
test("formatFailureContext: returns empty string when all checks pass", () => {
const result: import("../types.ts").VerificationResult = {
passed: true,
checks: [
{ command: "npm run lint", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
{ command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 200 },
],
discoverySource: "preference",
timestamp: Date.now(),
};
assert.equal(formatFailureContext(result), "");
});
test("formatFailureContext: returns empty string for empty checks array", () => {
const result: import("../types.ts").VerificationResult = {
passed: true,
checks: [],
discoverySource: "none",
timestamp: Date.now(),
};
assert.equal(formatFailureContext(result), "");
});
test("formatFailureContext: caps total output at 10,000 chars", () => {
// Generate many failures to exceed 10,000 chars total
const checks: import("../types.ts").VerificationCheck[] = [];
for (let i = 0; i < 20; i++) {
checks.push({
command: `failing-command-${i}`,
exitCode: 1,
stdout: "",
stderr: "e".repeat(1000), // 1000 chars each, 20 * ~1050 (with formatting) > 10,000
durationMs: 100,
});
}
const result: import("../types.ts").VerificationResult = {
passed: false,
checks,
discoverySource: "preference",
timestamp: Date.now(),
};
const output = formatFailureContext(result);
assert.ok(output.length <= 10_100, `total output should be capped near 10,000 chars, got ${output.length}`);
assert.ok(output.includes("…[remaining failures truncated]"), "should include total truncation marker");
});
// ─── captureRuntimeErrors Tests (S04/T01) ─────────────────────────────────────
function makeProc(overrides: Record<string, unknown>) {
return {
id: "p1",
label: "test-server",
status: "ready",
alive: true,
exitCode: null,
signal: null,
recentErrors: [] as string[],
...overrides,
};
}
function makeLogs(entries: Array<{ type: string; text: string }>) {
return entries.map((e, i) => ({
type: e.type,
text: e.text,
timestamp: Date.now() + i,
url: "http://localhost:3000",
}));
}
test("captureRuntimeErrors: crashed bg-shell process → blocking crash error", async () => {
const processes = new Map<string, unknown>([
["p1", makeProc({ status: "crashed", alive: false, exitCode: 1 })],
]);
const result = await captureRuntimeErrors({
getProcesses: () => processes,
getConsoleLogs: () => [],
});
assert.equal(result.length, 1);
assert.equal(result[0].source, "bg-shell");
assert.equal(result[0].severity, "crash");
assert.equal(result[0].blocking, true);
assert.ok(result[0].message.includes("test-server"));
});
test("captureRuntimeErrors: bg-shell non-zero exit + not alive → blocking crash error", async () => {
const processes = new Map<string, unknown>([
["p1", makeProc({ status: "exited", alive: false, exitCode: 137 })],
]);
const result = await captureRuntimeErrors({
getProcesses: () => processes,
getConsoleLogs: () => [],
});
assert.equal(result.length, 1);
assert.equal(result[0].severity, "crash");
assert.equal(result[0].blocking, true);
assert.ok(result[0].message.includes("exitCode=137"));
});
test("captureRuntimeErrors: bg-shell SIGABRT/SIGSEGV/SIGBUS → blocking crash error", async () => {
for (const sig of ["SIGABRT", "SIGSEGV", "SIGBUS"]) {
const processes = new Map<string, unknown>([
["p1", makeProc({ signal: sig, alive: false, exitCode: null })],
]);
const result = await captureRuntimeErrors({
getProcesses: () => processes,
getConsoleLogs: () => [],
});
assert.equal(result.length, 1, `${sig} should produce 1 error`);
assert.equal(result[0].severity, "crash");
assert.equal(result[0].blocking, true);
assert.ok(result[0].message.includes(sig), `message should contain ${sig}`);
}
});
test("captureRuntimeErrors: alive bg-shell process with recentErrors → non-blocking error", async () => {
const processes = new Map<string, unknown>([
["p1", makeProc({ alive: true, recentErrors: ["TypeError: foo", "RangeError: bar"] })],
]);
const result = await captureRuntimeErrors({
getProcesses: () => processes,
getConsoleLogs: () => [],
});
assert.equal(result.length, 1);
assert.equal(result[0].source, "bg-shell");
assert.equal(result[0].severity, "error");
assert.equal(result[0].blocking, false);
assert.ok(result[0].message.includes("TypeError: foo"));
assert.ok(result[0].message.includes("RangeError: bar"));
});
test("captureRuntimeErrors: browser unhandled rejection → blocking crash error", async () => {
const logs = makeLogs([
{ type: "error", text: "Unhandled promise rejection: some error" },
]);
const result = await captureRuntimeErrors({
getProcesses: () => new Map(),
getConsoleLogs: () => logs,
});
assert.equal(result.length, 1);
assert.equal(result[0].source, "browser");
assert.equal(result[0].severity, "crash");
assert.equal(result[0].blocking, true);
assert.ok(result[0].message.includes("Unhandled"));
});
test("captureRuntimeErrors: browser UnhandledRejection (case variation) → blocking crash", async () => {
const logs = makeLogs([
{ type: "error", text: "UnhandledRejection in module X" },
]);
const result = await captureRuntimeErrors({
getProcesses: () => new Map(),
getConsoleLogs: () => logs,
});
assert.equal(result.length, 1);
assert.equal(result[0].severity, "crash");
assert.equal(result[0].blocking, true);
});
test("captureRuntimeErrors: browser console.error (general) → non-blocking error", async () => {
const logs = makeLogs([
{ type: "error", text: "Failed to load resource: net::ERR_FAILED" },
]);
const result = await captureRuntimeErrors({
getProcesses: () => new Map(),
getConsoleLogs: () => logs,
});
assert.equal(result.length, 1);
assert.equal(result[0].source, "browser");
assert.equal(result[0].severity, "error");
assert.equal(result[0].blocking, false);
});
test("captureRuntimeErrors: browser deprecation warning → non-blocking warning", async () => {
const logs = makeLogs([
{ type: "warning", text: "Event.returnValue is deprecated. Use Event.preventDefault() instead." },
]);
const result = await captureRuntimeErrors({
getProcesses: () => new Map(),
getConsoleLogs: () => logs,
});
assert.equal(result.length, 1);
assert.equal(result[0].source, "browser");
assert.equal(result[0].severity, "warning");
assert.equal(result[0].blocking, false);
assert.ok(result[0].message.includes("deprecated"));
});
test("captureRuntimeErrors: non-deprecation warning is ignored", async () => {
const logs = makeLogs([
{ type: "warning", text: "Some general warning about performance" },
]);
const result = await captureRuntimeErrors({
getProcesses: () => new Map(),
getConsoleLogs: () => logs,
});
assert.equal(result.length, 0, "non-deprecation warnings should be ignored");
});
test("captureRuntimeErrors: no processes, no browser logs → empty array", async () => {
const result = await captureRuntimeErrors({
getProcesses: () => new Map(),
getConsoleLogs: () => [],
});
assert.deepStrictEqual(result, []);
});
test("captureRuntimeErrors: dynamic import failure → graceful empty array", async () => {
const result = await captureRuntimeErrors({
getProcesses: () => { throw new Error("module not found"); },
getConsoleLogs: () => { throw new Error("module not found"); },
});
assert.deepStrictEqual(result, []);
});
test("captureRuntimeErrors: browser text truncated to 500 chars", async () => {
const longText = "x".repeat(600);
const logs = makeLogs([
{ type: "error", text: longText },
]);
const result = await captureRuntimeErrors({
getProcesses: () => new Map(),
getConsoleLogs: () => logs,
});
assert.equal(result.length, 1);
assert.ok(result[0].message.length <= 500 + 20, "message should be truncated near 500 chars");
assert.ok(result[0].message.includes("…[truncated]"), "should include truncation marker");
assert.ok(!result[0].message.includes("x".repeat(501)), "should not contain 501+ x's");
});
test("captureRuntimeErrors: bg-shell recentErrors limited to 3 in message", async () => {
const processes = new Map<string, unknown>([
["p1", makeProc({
status: "crashed",
alive: false,
exitCode: 1,
recentErrors: ["err1", "err2", "err3", "err4", "err5"],
})],
]);
const result = await captureRuntimeErrors({
getProcesses: () => processes,
getConsoleLogs: () => [],
});
assert.equal(result.length, 1);
assert.ok(result[0].message.includes("err1"));
assert.ok(result[0].message.includes("err2"));
assert.ok(result[0].message.includes("err3"));
assert.ok(!result[0].message.includes("err4"), "should only include first 3 errors");
});
test("captureRuntimeErrors: mixed bg-shell and browser errors", async () => {
const processes = new Map<string, unknown>([
["p1", makeProc({ status: "crashed", alive: false, exitCode: 1 })],
]);
const logs = makeLogs([
{ type: "error", text: "Unhandled rejection: boom" },
{ type: "error", text: "general error" },
{ type: "warning", text: "deprecated API used" },
]);
const result = await captureRuntimeErrors({
getProcesses: () => processes,
getConsoleLogs: () => logs,
});
// 1 bg-shell crash + 1 browser crash (unhandled) + 1 browser error + 1 browser warning
assert.equal(result.length, 4);
const blocking = result.filter(r => r.blocking);
const nonBlocking = result.filter(r => !r.blocking);
assert.equal(blocking.length, 2, "should have 2 blocking errors");
assert.equal(nonBlocking.length, 2, "should have 2 non-blocking errors");
});
// ─── Dependency Audit Tests (S05/T01) ─────────────────────────────────────────
/** Helper: build a realistic npm audit JSON stdout with vulnerabilities. */
function makeAuditJson(
vulns: Record<string, { severity: string; fixAvailable: boolean; via: unknown[] }>,
): string {
return JSON.stringify({ vulnerabilities: vulns });
}
/** Sample npm audit JSON with a high-severity vuln. */
const SAMPLE_AUDIT_JSON = makeAuditJson({
"nth-check": {
severity: "high",
fixAvailable: true,
via: [
{
title: "Inefficient Regular Expression Complexity in nth-check",
url: "https://github.com/advisories/GHSA-rp65-9cf3-cjxr",
severity: "high",
},
],
},
});
test("dependency-audit: package.json in git diff → runs npm audit and parses vulnerabilities", () => {
let npmAuditCalled = false;
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => ["package.json", "src/index.ts"],
npmAudit: () => {
npmAuditCalled = true;
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
},
});
assert.equal(npmAuditCalled, true, "npm audit should be called");
assert.equal(result.length, 1);
assert.equal(result[0].name, "nth-check");
assert.equal(result[0].severity, "high");
assert.equal(result[0].title, "Inefficient Regular Expression Complexity in nth-check");
assert.equal(result[0].url, "https://github.com/advisories/GHSA-rp65-9cf3-cjxr");
assert.equal(result[0].fixAvailable, true);
});
test("dependency-audit: package-lock.json change triggers audit", () => {
let npmAuditCalled = false;
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => ["package-lock.json"],
npmAudit: () => {
npmAuditCalled = true;
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
},
});
assert.equal(npmAuditCalled, true);
assert.equal(result.length, 1);
});
test("dependency-audit: pnpm-lock.yaml change triggers audit", () => {
let npmAuditCalled = false;
runDependencyAudit("/tmp/test", {
gitDiff: () => ["pnpm-lock.yaml"],
npmAudit: () => {
npmAuditCalled = true;
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
},
});
assert.equal(npmAuditCalled, true);
});
test("dependency-audit: yarn.lock change triggers audit", () => {
let npmAuditCalled = false;
runDependencyAudit("/tmp/test", {
gitDiff: () => ["yarn.lock"],
npmAudit: () => {
npmAuditCalled = true;
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
},
});
assert.equal(npmAuditCalled, true);
});
test("dependency-audit: bun.lockb change triggers audit", () => {
let npmAuditCalled = false;
runDependencyAudit("/tmp/test", {
gitDiff: () => ["bun.lockb"],
npmAudit: () => {
npmAuditCalled = true;
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
},
});
assert.equal(npmAuditCalled, true);
});
test("dependency-audit: no dependency file changes → returns empty array, npm audit not called", () => {
let npmAuditCalled = false;
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => ["src/index.ts", "README.md"],
npmAudit: () => {
npmAuditCalled = true;
return { stdout: "{}", exitCode: 0 };
},
});
assert.equal(npmAuditCalled, false, "npm audit should NOT be called when no dependency files changed");
assert.deepStrictEqual(result, []);
});
test("dependency-audit: git diff returns non-zero exit (not a git repo) → empty array", () => {
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => { throw new Error("not a git repo"); },
npmAudit: () => { throw new Error("should not be called"); },
});
assert.deepStrictEqual(result, []);
});
test("dependency-audit: npm audit returns invalid JSON → empty array", () => {
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => ["package.json"],
npmAudit: () => ({ stdout: "not json at all", exitCode: 1 }),
});
assert.deepStrictEqual(result, []);
});
test("dependency-audit: npm audit returns zero vulnerabilities → empty array", () => {
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => ["package.json"],
npmAudit: () => ({
stdout: JSON.stringify({ vulnerabilities: {} }),
exitCode: 0,
}),
});
assert.deepStrictEqual(result, []);
});
test("dependency-audit: npm audit non-zero exit with valid JSON → parses correctly", () => {
// npm audit exits non-zero when vulnerabilities exist — this is expected, not an error
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => ["package-lock.json"],
npmAudit: () => ({
stdout: SAMPLE_AUDIT_JSON,
exitCode: 1, // non-zero!
}),
});
assert.equal(result.length, 1);
assert.equal(result[0].name, "nth-check");
assert.equal(result[0].severity, "high");
});
test("dependency-audit: via entries with string-only values are skipped", () => {
const auditJson = makeAuditJson({
"postcss": {
severity: "moderate",
fixAvailable: false,
via: ["nth-check", "css-select"], // string-only via entries
},
});
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => ["package.json"],
npmAudit: () => ({ stdout: auditJson, exitCode: 1 }),
});
assert.equal(result.length, 1);
// When no object via entry is found, title falls back to the package name
assert.equal(result[0].name, "postcss");
assert.equal(result[0].title, "postcss");
assert.equal(result[0].url, "");
});
test("dependency-audit: subdirectory package.json does not trigger audit", () => {
let npmAuditCalled = false;
const result = runDependencyAudit("/tmp/test", {
gitDiff: () => ["packages/foo/package.json", "libs/bar/package-lock.json"],
npmAudit: () => {
npmAuditCalled = true;
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
},
});
assert.equal(npmAuditCalled, false, "subdirectory dependency files should not trigger audit");
assert.deepStrictEqual(result, []);
});

View file

@ -46,6 +46,44 @@ export interface TaskPlanEntry {
verify?: string; // e.g. "run tests" — extracted from "- Verify:" subline
}
// ─── Verification Gate ─────────────────────────────────────────────────────
/** Result of a single verification command execution */
export interface VerificationCheck {
command: string; // e.g. "npm run lint"
exitCode: number; // 0 = pass
stdout: string;
stderr: string;
durationMs: number;
}
/** A runtime error captured from bg-shell processes or browser console */
export interface RuntimeError {
source: "bg-shell" | "browser";
severity: "crash" | "error" | "warning";
message: string;
blocking: boolean;
}
/** A dependency vulnerability warning from npm audit */
export interface AuditWarning {
name: string;
severity: "low" | "moderate" | "high" | "critical";
title: string;
url: string;
fixAvailable: boolean;
}
/** Aggregate result from the verification gate */
export interface VerificationResult {
passed: boolean; // true if all checks passed (or no checks discovered)
checks: VerificationCheck[]; // per-command results
discoverySource: "preference" | "task-plan" | "package-json" | "none";
timestamp: number; // Date.now() at gate start
runtimeErrors?: RuntimeError[]; // optional — populated by captureRuntimeErrors()
auditWarnings?: AuditWarning[]; // optional — populated by runDependencyAudit()
}
export interface SlicePlan {
id: string; // e.g. "S01"
title: string; // from the H1

View file

@ -0,0 +1,183 @@
/**
* Verification Evidence JSON persistence and markdown table formatting.
*
* Two pure-ish functions:
* - writeVerificationJSON: persists a machine-readable T##-VERIFY.json artifact
* - formatEvidenceTable: returns a markdown evidence table string
*
* JSON schema uses schemaVersion: 1 for forward-compatibility.
* stdout/stderr are intentionally excluded from the JSON to avoid unbounded file sizes.
*/
import { mkdirSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import type { VerificationResult } from "./types.ts";
// ─── JSON Evidence Artifact ──────────────────────────────────────────────────
export interface EvidenceCheckJSON {
command: string;
exitCode: number;
durationMs: number;
verdict: "pass" | "fail";
}
export interface RuntimeErrorJSON {
source: string;
severity: string;
message: string;
blocking: boolean;
}
export interface AuditWarningJSON {
name: string;
severity: string;
title: string;
url: string;
fixAvailable: boolean;
}
export interface EvidenceJSON {
schemaVersion: 1;
taskId: string;
unitId: string;
timestamp: number;
passed: boolean;
discoverySource: string;
checks: EvidenceCheckJSON[];
retryAttempt?: number;
maxRetries?: number;
runtimeErrors?: RuntimeErrorJSON[];
auditWarnings?: AuditWarningJSON[];
}
/**
* Write a T##-VERIFY.json artifact to the tasks directory.
* Creates the directory with mkdirSync({ recursive: true }) if it doesn't exist.
*
* stdout/stderr are excluded from the JSON the full output lives in VerificationResult
* in memory and is logged to stderr during the gate run.
*/
export function writeVerificationJSON(
result: VerificationResult,
tasksDir: string,
taskId: string,
unitId?: string,
retryAttempt?: number,
maxRetries?: number,
): void {
mkdirSync(tasksDir, { recursive: true });
const evidence: EvidenceJSON = {
schemaVersion: 1,
taskId,
unitId: unitId ?? taskId,
timestamp: result.timestamp,
passed: result.passed,
discoverySource: result.discoverySource,
checks: result.checks.map((check) => ({
command: check.command,
exitCode: check.exitCode,
durationMs: check.durationMs,
verdict: check.exitCode === 0 ? "pass" : "fail",
})),
...(retryAttempt !== undefined ? { retryAttempt } : {}),
...(maxRetries !== undefined ? { maxRetries } : {}),
};
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
evidence.runtimeErrors = result.runtimeErrors.map(e => ({
source: e.source,
severity: e.severity,
message: e.message,
blocking: e.blocking,
}));
}
if (result.auditWarnings && result.auditWarnings.length > 0) {
evidence.auditWarnings = result.auditWarnings.map(w => ({
name: w.name,
severity: w.severity,
title: w.title,
url: w.url,
fixAvailable: w.fixAvailable,
}));
}
const filePath = join(tasksDir, `${taskId}-VERIFY.json`);
writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
}
// ─── Markdown Evidence Table ─────────────────────────────────────────────────
/**
* Format duration in milliseconds as seconds with 1 decimal place.
* e.g. 2340 "2.3s", 150 "0.2s", 0 "0.0s"
*/
function formatDuration(ms: number): string {
return `${(ms / 1000).toFixed(1)}s`;
}
/**
* Generate a markdown evidence table from a VerificationResult.
*
* Returns a "no checks" note if result.checks is empty.
* Otherwise returns a 5-column markdown table: #, Command, Exit Code, Verdict, Duration.
*/
export function formatEvidenceTable(result: VerificationResult): string {
if (result.checks.length === 0) {
return "_No verification checks discovered._";
}
const lines: string[] = [
"| # | Command | Exit Code | Verdict | Duration |",
"|---|---------|-----------|---------|----------|",
];
for (let i = 0; i < result.checks.length; i++) {
const check = result.checks[i];
const num = i + 1;
const verdict =
check.exitCode === 0 ? "✅ pass" : "❌ fail";
const duration = formatDuration(check.durationMs);
lines.push(
`| ${num} | ${check.command} | ${check.exitCode} | ${verdict} | ${duration} |`,
);
}
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
lines.push("");
lines.push("**Runtime Errors**");
lines.push("");
lines.push("| # | Source | Severity | Blocking | Message |");
lines.push("|---|--------|----------|----------|---------|");
for (let i = 0; i < result.runtimeErrors.length; i++) {
const err = result.runtimeErrors[i];
const blockIcon = err.blocking ? "🚫 yes" : " no";
lines.push(`| ${i + 1} | ${err.source} | ${err.severity} | ${blockIcon} | ${err.message.slice(0, 100)} |`);
}
}
if (result.auditWarnings && result.auditWarnings.length > 0) {
const severityEmoji: Record<string, string> = {
critical: "🔴",
high: "🟠",
moderate: "🟡",
low: "⚪",
};
lines.push("");
lines.push("**Audit Warnings**");
lines.push("");
lines.push("| # | Package | Severity | Title | Fix Available |");
lines.push("|---|---------|----------|-------|---------------|");
for (let i = 0; i < result.auditWarnings.length; i++) {
const w = result.auditWarnings[i];
const emoji = severityEmoji[w.severity] ?? "⚪";
const fix = w.fixAvailable ? "✅ yes" : "❌ no";
lines.push(`| ${i + 1} | ${w.name} | ${emoji} ${w.severity} | ${w.title} | ${fix} |`);
}
}
return lines.join("\n");
}

View file

@ -0,0 +1,567 @@
// GSD Extension — Verification Gate
// Pure functions for discovering and running verification commands.
// Discovery order (D003): preference → task plan verify → package.json scripts.
// First non-empty source wins.
import { spawnSync } from "node:child_process";
import { existsSync, readFileSync } from "node:fs";
import { join, basename } from "node:path";
import type { AuditWarning, RuntimeError, VerificationCheck, VerificationResult } from "./types.js";
/** Maximum bytes of stdout/stderr to retain per command (10 KB). */
const MAX_OUTPUT_BYTES = 10 * 1024;
/** Truncate a string to maxBytes, appending a marker if truncated. */
function truncate(value: string | null | undefined, maxBytes: number): string {
if (!value) return "";
if (Buffer.byteLength(value, "utf-8") <= maxBytes) return value;
// Slice conservatively then trim to last full character
const buf = Buffer.from(value, "utf-8").subarray(0, maxBytes);
return buf.toString("utf-8") + "\n…[truncated]";
}
// ─── Command Discovery ──────────────────────────────────────────────────────
export interface DiscoverCommandsOptions {
preferenceCommands?: string[];
taskPlanVerify?: string;
cwd: string;
}
export interface DiscoveredCommands {
commands: string[];
source: VerificationResult["discoverySource"];
}
/** Package.json script keys to probe, in order. */
const PACKAGE_SCRIPT_KEYS = ["typecheck", "lint", "test"] as const;
/**
* Discover verification commands using the first-non-empty-wins strategy (D003):
* 1. Explicit preference commands
* 2. Task plan verify field (split on &&)
* 3. package.json scripts (typecheck, lint, test)
* 4. None found
*/
export function discoverCommands(options: DiscoverCommandsOptions): DiscoveredCommands {
// 1. Preference commands
if (options.preferenceCommands && options.preferenceCommands.length > 0) {
const filtered = options.preferenceCommands
.map(c => c.trim())
.filter(Boolean);
if (filtered.length > 0) {
return { commands: filtered, source: "preference" };
}
}
// 2. Task plan verify field (commands are untrusted — sanitize)
if (options.taskPlanVerify && options.taskPlanVerify.trim()) {
const commands = options.taskPlanVerify
.split("&&")
.map(c => c.trim())
.filter(Boolean)
.filter(c => sanitizeCommand(c) !== null);
if (commands.length > 0) {
return { commands, source: "task-plan" };
}
}
// 3. package.json scripts
const pkgPath = join(options.cwd, "package.json");
if (existsSync(pkgPath)) {
try {
const raw = readFileSync(pkgPath, "utf-8");
const pkg = JSON.parse(raw);
if (pkg && typeof pkg === "object" && pkg.scripts && typeof pkg.scripts === "object") {
const commands: string[] = [];
for (const key of PACKAGE_SCRIPT_KEYS) {
if (typeof pkg.scripts[key] === "string") {
commands.push(`npm run ${key}`);
}
}
if (commands.length > 0) {
return { commands, source: "package-json" };
}
}
} catch {
// Malformed package.json — fall through to "none"
}
}
// 4. Nothing found
return { commands: [], source: "none" };
}
// ─── Failure Context Formatting ──────────────────────────────────────────────
/** Maximum chars of stderr to include per failed check in failure context. */
const MAX_STDERR_PER_CHECK = 2_000;
/** Maximum total chars for the combined failure context output. */
const MAX_FAILURE_CONTEXT_CHARS = 10_000;
/**
* Format failed verification checks into a prompt-injectable text block.
*
* Each failed check gets a heading with the command name and exit code,
* followed by a truncated stderr excerpt. Individual stderr is capped to
* 2 000 chars; total output is capped to 10 000 chars.
*
* Returns an empty string when all checks pass or the checks array is empty.
*/
export function formatFailureContext(result: VerificationResult): string {
const failures = result.checks.filter((c) => c.exitCode !== 0);
if (failures.length === 0) return "";
const blocks: string[] = [];
for (const check of failures) {
let stderr = check.stderr ?? "";
if (stderr.length > MAX_STDERR_PER_CHECK) {
stderr = stderr.slice(0, MAX_STDERR_PER_CHECK) + "\n…[truncated]";
}
blocks.push(
`### ❌ \`${check.command}\` (exit code ${check.exitCode})\n\`\`\`stderr\n${stderr}\n\`\`\``,
);
}
let body = blocks.join("\n\n");
const header = "## Verification Failures\n\n";
if (header.length + body.length > MAX_FAILURE_CONTEXT_CHARS) {
body =
body.slice(0, MAX_FAILURE_CONTEXT_CHARS - header.length) +
"\n\n…[remaining failures truncated]";
}
return header + body;
}
// ─── Gate Execution ─────────────────────────────────────────────────────────
/** Characters that indicate shell injection when found in a command string. */
const SHELL_INJECTION_PATTERN = /[;|`]|\$\(/;
/**
* Validate a command string for obvious shell injection patterns.
* Returns the command unchanged if safe, or null if suspicious.
*/
function sanitizeCommand(cmd: string): string | null {
if (SHELL_INJECTION_PATTERN.test(cmd)) return null;
return cmd;
}
/** Default timeout for verification commands (ms). */
const DEFAULT_COMMAND_TIMEOUT_MS = 120_000;
export interface RunVerificationGateOptions {
basePath: string;
unitId: string;
cwd: string;
preferenceCommands?: string[];
taskPlanVerify?: string;
/** Per-command timeout in ms. Defaults to 120 000 (2 minutes). */
commandTimeoutMs?: number;
}
/**
* Run the verification gate: discover commands, execute each via spawnSync,
* and return a structured result.
*
* - All commands run sequentially regardless of individual pass/fail.
* - `passed` is true when every command exits 0 (or no commands are discovered).
* - stdout/stderr per command are truncated to 10 KB.
*/
export function runVerificationGate(options: RunVerificationGateOptions): VerificationResult {
const timestamp = Date.now();
const { commands, source } = discoverCommands({
preferenceCommands: options.preferenceCommands,
taskPlanVerify: options.taskPlanVerify,
cwd: options.cwd,
});
if (commands.length === 0) {
return {
passed: true,
checks: [],
discoverySource: source,
timestamp,
};
}
const checks: VerificationCheck[] = [];
for (const command of commands) {
const start = Date.now();
const result = spawnSync(command, {
shell: true,
cwd: options.cwd,
stdio: "pipe",
encoding: "utf-8",
timeout: options.commandTimeoutMs ?? DEFAULT_COMMAND_TIMEOUT_MS,
});
const durationMs = Date.now() - start;
let exitCode: number;
let stderr: string;
if (result.error) {
// Command not found or spawn failure
exitCode = 127;
stderr = truncate(
(result.stderr || "") + "\n" + (result.error as Error).message,
MAX_OUTPUT_BYTES,
);
} else {
// status is null when killed by signal — treat as failure
exitCode = result.status ?? 1;
stderr = truncate(result.stderr, MAX_OUTPUT_BYTES);
}
checks.push({
command,
exitCode,
stdout: truncate(result.stdout, MAX_OUTPUT_BYTES),
stderr,
durationMs,
});
}
return {
passed: checks.every(c => c.exitCode === 0),
checks,
discoverySource: source,
timestamp,
};
}
// ─── Runtime Error Capture ──────────────────────────────────────────────────
/** Maximum characters of browser console text to retain per entry. */
const MAX_BROWSER_TEXT_CHARS = 500;
/** Fatal signals that indicate a crash regardless of other status fields. */
const FATAL_SIGNALS = new Set(["SIGABRT", "SIGSEGV", "SIGBUS"]);
/**
* Injectable dependencies for captureRuntimeErrors.
* When omitted the function uses dynamic import() to access
* bg-shell's processes Map and browser-tools' getConsoleLogs().
* Provide overrides in tests to avoid module mocking.
*/
export interface CaptureRuntimeErrorsOptions {
getProcesses?: () => Map<string, unknown>;
getConsoleLogs?: () => Array<{ type: string; text: string; timestamp: number; url: string }>;
}
/**
* Scan bg-shell processes and browser console logs for runtime errors.
*
* Severity classification follows D004:
* - bg-shell status "crashed" blocking crash
* - bg-shell !alive && exitCode !== 0 && exitCode !== null blocking crash
* - bg-shell signal SIGABRT/SIGSEGV/SIGBUS blocking crash
* - Browser console error with "Unhandled"/"UnhandledRejection" blocking crash
* - Browser console error (general) non-blocking error
* - Browser console warning with deprecation text non-blocking warning
* - bg-shell alive process with recentErrors non-blocking error
*
* Returns RuntimeError[] empty when both sources are unavailable.
*/
export async function captureRuntimeErrors(
options?: CaptureRuntimeErrorsOptions,
): Promise<RuntimeError[]> {
const errors: RuntimeError[] = [];
// ── bg-shell scan ─────────────────────────────────────────────────────
try {
let processes: Map<string, unknown>;
if (options?.getProcesses) {
processes = options.getProcesses();
} else {
const mod = await import("../bg-shell/process-manager.js");
processes = mod.processes;
}
for (const [id, raw] of processes) {
const proc = raw as {
id: string;
label?: string;
status?: string;
alive?: boolean;
exitCode?: number | null;
signal?: string | null;
recentErrors?: string[];
};
const name = proc.label || proc.id || id;
// Check for fatal signal first (applies regardless of alive/status)
if (proc.signal && FATAL_SIGNALS.has(proc.signal)) {
errors.push({
source: "bg-shell",
severity: "crash",
message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors),
blocking: true,
});
continue;
}
// Crashed status
if (proc.status === "crashed") {
errors.push({
source: "bg-shell",
severity: "crash",
message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors),
blocking: true,
});
continue;
}
// Non-zero exit on dead process
if (
!proc.alive &&
proc.exitCode !== 0 &&
proc.exitCode !== null &&
proc.exitCode !== undefined
) {
errors.push({
source: "bg-shell",
severity: "crash",
message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors),
blocking: true,
});
continue;
}
// Alive process with recent errors — non-blocking
if (proc.alive && proc.recentErrors && proc.recentErrors.length > 0) {
const snippet = proc.recentErrors.slice(0, 3).join("; ");
errors.push({
source: "bg-shell",
severity: "error",
message: `[${name}] recent errors: ${snippet}`,
blocking: false,
});
}
}
} catch {
// bg-shell not available — skip silently
}
// ── browser console scan ──────────────────────────────────────────────
try {
let logs: Array<{ type: string; text: string; timestamp: number; url: string }>;
if (options?.getConsoleLogs) {
logs = options.getConsoleLogs();
} else {
const mod = await import("../browser-tools/state.js");
logs = mod.getConsoleLogs();
}
for (const entry of logs) {
const text =
entry.text.length > MAX_BROWSER_TEXT_CHARS
? entry.text.slice(0, MAX_BROWSER_TEXT_CHARS) + "…[truncated]"
: entry.text;
if (entry.type === "error") {
// Unhandled rejection / unhandled error → blocking crash
if (/unhandled/i.test(entry.text)) {
errors.push({
source: "browser",
severity: "crash",
message: text,
blocking: true,
});
} else {
// General console.error → non-blocking error
errors.push({
source: "browser",
severity: "error",
message: text,
blocking: false,
});
}
} else if (entry.type === "warning" && /deprecated/i.test(entry.text)) {
// Deprecation warning → non-blocking warning
errors.push({
source: "browser",
severity: "warning",
message: text,
blocking: false,
});
}
// Non-deprecation warnings are intentionally ignored
}
} catch {
// browser-tools not available — skip silently
}
return errors;
}
/** Build a human-readable message for a bg-shell process error. */
function buildBgShellMessage(
name: string,
exitCode: number | null | undefined,
signal: string | null | undefined,
recentErrors: string[] | undefined,
): string {
const parts: string[] = [`[${name}]`];
if (signal) parts.push(`signal=${signal}`);
if (exitCode !== null && exitCode !== undefined) parts.push(`exitCode=${exitCode}`);
if (recentErrors && recentErrors.length > 0) {
const snippet = recentErrors.slice(0, 3).join("; ");
parts.push(`errors: ${snippet}`);
}
return parts.join(" ");
}
// ─── Dependency Audit ───────────────────────────────────────────────────────
/** Top-level dependency files that trigger an audit when changed. */
const DEPENDENCY_FILES = new Set([
"package.json",
"package-lock.json",
"pnpm-lock.yaml",
"yarn.lock",
"bun.lockb",
]);
/**
* Injectable dependencies for runDependencyAudit (D023 pattern).
* When omitted the function uses real git/npm via spawnSync.
* Provide overrides in tests to avoid real git repos and npm registries.
*/
export interface DependencyAuditOptions {
gitDiff?: (cwd: string) => string[];
npmAudit?: (cwd: string) => { stdout: string; exitCode: number };
}
/**
* Default gitDiff: runs `git diff --name-only HEAD` and returns file paths.
* Returns empty array on any failure (non-git dir, git not found, etc.).
*/
function defaultGitDiff(cwd: string): string[] {
try {
const result = spawnSync("git", ["diff", "--name-only", "HEAD"], {
cwd,
encoding: "utf-8",
timeout: 10_000,
});
if (result.status !== 0 || !result.stdout) return [];
return result.stdout.trim().split("\n").filter(Boolean);
} catch {
return [];
}
}
/**
* Default npmAudit: runs `npm audit --audit-level=moderate --json`.
* Returns { stdout, exitCode }. Non-zero exit is expected when vulnerabilities exist.
*/
function defaultNpmAudit(cwd: string): { stdout: string; exitCode: number } {
const result = spawnSync("npm", ["audit", "--audit-level=moderate", "--json"], {
cwd,
encoding: "utf-8",
timeout: 60_000,
});
return {
stdout: result.stdout ?? "",
exitCode: result.status ?? 1,
};
}
/**
* Detect dependency file changes and run npm audit if changes are found.
*
* - Calls gitDiff to get changed files, checks if any are top-level dependency files
* - If no dependency files changed, returns []
* - Runs npmAudit and parses JSON output into AuditWarning[]
* - Never throws all errors return []
* - Non-zero npm audit exit code is expected (vulnerabilities found), not an error
*/
export function runDependencyAudit(
cwd: string,
options?: DependencyAuditOptions,
): AuditWarning[] {
try {
const gitDiff = options?.gitDiff ?? defaultGitDiff;
const npmAudit = options?.npmAudit ?? defaultNpmAudit;
// Get changed files and check for top-level dependency file matches
const changedFiles = gitDiff(cwd);
const hasDependencyChange = changedFiles.some((filePath) => {
const name = basename(filePath);
// Only match top-level files: the path must equal just the filename
// (no directory separators) to be considered top-level
return DEPENDENCY_FILES.has(name) && filePath === name;
});
if (!hasDependencyChange) return [];
// Run npm audit
const auditResult = npmAudit(cwd);
// Parse JSON output — npm audit exits non-zero when vulnerabilities exist
let parsed: Record<string, unknown>;
try {
parsed = JSON.parse(auditResult.stdout);
} catch {
return [];
}
// Extract vulnerabilities from the parsed output
const vulnerabilities = parsed.vulnerabilities;
if (!vulnerabilities || typeof vulnerabilities !== "object") return [];
const warnings: AuditWarning[] = [];
for (const [name, raw] of Object.entries(vulnerabilities as Record<string, unknown>)) {
const vuln = raw as {
severity?: string;
fixAvailable?: boolean;
via?: unknown[];
};
if (!vuln || typeof vuln !== "object") continue;
const severity = vuln.severity;
if (
severity !== "low" &&
severity !== "moderate" &&
severity !== "high" &&
severity !== "critical"
) {
continue;
}
// Find the first `via` entry that's an object (not a string reference)
let title = name;
let url = "";
if (Array.isArray(vuln.via)) {
for (const entry of vuln.via) {
if (entry && typeof entry === "object" && !Array.isArray(entry)) {
const obj = entry as { title?: string; url?: string };
if (obj.title) title = obj.title;
if (obj.url) url = obj.url;
break;
}
}
}
warnings.push({
name,
severity: severity as AuditWarning["severity"],
title,
url,
fixAvailable: vuln.fixAvailable === true,
});
}
return warnings;
} catch {
return [];
}
}

View file

@ -12,7 +12,7 @@
* Uses JSON mode to capture structured output from subagents.
*/
import { spawn } from "node:child_process";
import { spawn, type ChildProcess } from "node:child_process";
import * as crypto from "node:crypto";
import * as fs from "node:fs";
import * as os from "node:os";
@ -38,6 +38,44 @@ import { registerWorker, updateWorker } from "./worker-registry.js";
const MAX_PARALLEL_TASKS = 8;
const MAX_CONCURRENCY = 4;
const COLLAPSED_ITEM_COUNT = 10;
const liveSubagentProcesses = new Set<ChildProcess>();
async function stopLiveSubagents(): Promise<void> {
const active = Array.from(liveSubagentProcesses);
if (active.length === 0) return;
for (const proc of active) {
try {
proc.kill("SIGTERM");
} catch {
/* ignore */
}
}
await Promise.all(
active.map(
(proc) =>
new Promise<void>((resolve) => {
const done = () => resolve();
const timer = setTimeout(done, 500);
proc.once("exit", () => {
clearTimeout(timer);
resolve();
});
}),
),
);
for (const proc of active) {
if (proc.exitCode === null) {
try {
proc.kill("SIGKILL");
} catch {
/* ignore */
}
}
}
}
function formatTokens(count: number): string {
if (count < 1000) return count.toString();
@ -302,6 +340,7 @@ async function runSingleAgent(
[process.env.GSD_BIN_PATH!, ...extensionArgs, ...args],
{ cwd: cwd ?? defaultCwd, shell: false, stdio: ["ignore", "pipe", "pipe"] },
);
liveSubagentProcesses.add(proc);
let buffer = "";
const processLine = (line: string) => {
@ -353,11 +392,13 @@ async function runSingleAgent(
});
proc.on("close", (code) => {
liveSubagentProcesses.delete(proc);
if (buffer.trim()) processLine(buffer);
resolve(code ?? 0);
});
proc.on("error", () => {
liveSubagentProcesses.delete(proc);
resolve(1);
});
@ -432,6 +473,10 @@ const SubagentParams = Type.Object({
});
export default function (pi: ExtensionAPI) {
pi.on("session_shutdown", async () => {
await stopLiveSubagents();
});
// /subagent command - list available agents
pi.registerCommand("subagent", {
description: "List available subagents",

View file

@ -0,0 +1,71 @@
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import { getBudgetAlertLevel, getNewBudgetAlertLevel, getBudgetEnforcementAction } from "../resources/extensions/gsd/auto-budget.js";
describe("auto-budget", () => {
describe("getBudgetAlertLevel", () => {
it("returns 0 for low usage", () => {
assert.equal(getBudgetAlertLevel(0), 0);
assert.equal(getBudgetAlertLevel(0.5), 0);
assert.equal(getBudgetAlertLevel(0.74), 0);
});
it("returns 75 at 75%", () => {
assert.equal(getBudgetAlertLevel(0.75), 75);
assert.equal(getBudgetAlertLevel(0.79), 75);
});
it("returns 80 at 80%", () => {
assert.equal(getBudgetAlertLevel(0.80), 80);
assert.equal(getBudgetAlertLevel(0.89), 80);
});
it("returns 90 at 90%", () => {
assert.equal(getBudgetAlertLevel(0.90), 90);
assert.equal(getBudgetAlertLevel(0.99), 90);
});
it("returns 100 at 100%+", () => {
assert.equal(getBudgetAlertLevel(1.0), 100);
assert.equal(getBudgetAlertLevel(1.5), 100);
});
});
describe("getNewBudgetAlertLevel", () => {
it("returns null when at same or lower level", () => {
assert.equal(getNewBudgetAlertLevel(75, 0.75), null);
assert.equal(getNewBudgetAlertLevel(90, 0.80), null);
assert.equal(getNewBudgetAlertLevel(80, 0.5), null);
});
it("returns new level when crossing up", () => {
assert.equal(getNewBudgetAlertLevel(0, 0.75), 75);
assert.equal(getNewBudgetAlertLevel(75, 0.80), 80);
assert.equal(getNewBudgetAlertLevel(80, 0.90), 90);
assert.equal(getNewBudgetAlertLevel(90, 1.0), 100);
});
it("returns null for 0% usage", () => {
assert.equal(getNewBudgetAlertLevel(0, 0), null);
});
});
describe("getBudgetEnforcementAction", () => {
it("returns none when under budget", () => {
assert.equal(getBudgetEnforcementAction("halt", 0.5), "none");
assert.equal(getBudgetEnforcementAction("pause", 0.99), "none");
});
it("returns halt when at ceiling with halt enforcement", () => {
assert.equal(getBudgetEnforcementAction("halt", 1.0), "halt");
});
it("returns pause when at ceiling with pause enforcement", () => {
assert.equal(getBudgetEnforcementAction("pause", 1.0), "pause");
});
it("returns warn when at ceiling with warn enforcement", () => {
assert.equal(getBudgetEnforcementAction("warn", 1.0), "warn");
});
});
});

View file

@ -0,0 +1,46 @@
import { describe, it, beforeEach } from "node:test";
import assert from "node:assert/strict";
import {
markToolStart,
markToolEnd,
getOldestInFlightToolAgeMs,
getInFlightToolCount,
clearInFlightTools,
} from "../resources/extensions/gsd/auto-tool-tracking.js";
describe("auto-tool-tracking", () => {
beforeEach(() => {
clearInFlightTools();
});
it("tracks tool start and end", () => {
assert.equal(getInFlightToolCount(), 0);
markToolStart("tool-1", true);
assert.equal(getInFlightToolCount(), 1);
markToolEnd("tool-1");
assert.equal(getInFlightToolCount(), 0);
});
it("skips tracking when not active", () => {
markToolStart("tool-1", false);
assert.equal(getInFlightToolCount(), 0);
});
it("returns 0 age when no tools in flight", () => {
assert.equal(getOldestInFlightToolAgeMs(), 0);
});
it("returns positive age for in-flight tools", () => {
markToolStart("tool-1", true);
// Age should be very small (< 100ms)
assert.ok(getOldestInFlightToolAgeMs() < 100);
});
it("clears all in-flight tools", () => {
markToolStart("tool-1", true);
markToolStart("tool-2", true);
assert.equal(getInFlightToolCount(), 2);
clearInFlightTools();
assert.equal(getInFlightToolCount(), 0);
});
});

View file

@ -0,0 +1,61 @@
import test from "node:test";
import assert from "node:assert/strict";
import {
startProcess,
cleanupAll,
cleanupSessionProcesses,
processes,
} from "../resources/extensions/bg-shell/process-manager.ts";
function isPidAlive(pid: number | undefined): boolean {
if (!pid || pid <= 0) return false;
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
// Use a shell-native sleeper so the test exercises bg_shell's real spawn path
// without relying on platform-specific quoting for `node -e "..."`
const sleeperCommand = "sleep 30";
test("cleanupSessionProcesses reaps only session-scoped processes from the previous session", async () => {
const owned = startProcess({
command: sleeperCommand,
cwd: process.cwd(),
ownerSessionFile: "session-a",
});
const persistent = startProcess({
command: sleeperCommand,
cwd: process.cwd(),
ownerSessionFile: "session-a",
persistAcrossSessions: true,
});
const foreign = startProcess({
command: sleeperCommand,
cwd: process.cwd(),
ownerSessionFile: "session-b",
});
try {
await new Promise((resolve) => setTimeout(resolve, 150));
assert.equal(isPidAlive(owned.proc.pid), true, "owned process should be alive before cleanup");
assert.equal(isPidAlive(persistent.proc.pid), true, "persistent process should be alive before cleanup");
assert.equal(isPidAlive(foreign.proc.pid), true, "foreign process should be alive before cleanup");
const removed = await cleanupSessionProcesses("session-a", { graceMs: 200 });
assert.deepEqual(removed.sort(), [owned.id], "only the session-scoped process should be reaped");
await new Promise((resolve) => setTimeout(resolve, 150));
assert.equal(isPidAlive(owned.proc.pid), false, "owned process should be terminated");
assert.equal(isPidAlive(persistent.proc.pid), true, "persistent process should survive cleanup");
assert.equal(isPidAlive(foreign.proc.pid), true, "foreign process should survive cleanup");
assert.equal(processes.get(owned.id)?.persistAcrossSessions, false);
assert.equal(processes.get(persistent.id)?.persistAcrossSessions, true);
} finally {
cleanupAll();
}
});