feat: add live regression test harness for post-build pipeline validation (#1316)

10 tests that run against the installed gsd binary after npm publish:

1. headless query returns valid JSON
2. Empty project → pre-planning phase
3. Milestone with roadmap → planning phase
4. All tasks done → summarizing phase
5. Complete milestone → complete phase
6. Stale auto.lock doesn't block --version
7. Crash recovery query works with stale lock
8. Non-TTY exits quickly with clean error
9. Version skew detected before TTY check
10. --help works (native addon loads or falls back)

Wired into pipeline.yml test-verify job after fixture tests
and before @next promotion.

These catch the state machine / infrastructure bugs from #1308
that unit tests can't reach — they exercise deriveState through
the real gsd binary with real .gsd/ directory structures.

Part of #1308
This commit is contained in:
Tom Boucher 2026-03-18 22:22:54 -04:00 committed by GitHub
parent 0418458cf9
commit 6b61b75f3d
3 changed files with 322 additions and 1 deletions

View file

@ -95,6 +95,11 @@ jobs:
- name: Run fixture tests
run: npm run test:fixtures
- name: Run live regression tests (against installed binary)
run: |
export GSD_SMOKE_BINARY=$(which gsd)
npm run test:live-regression
- name: Promote to @next
run: npm dist-tag add gsd-pi@${{ needs.dev-publish.outputs.dev-version }} next
env:

View file

@ -78,7 +78,8 @@
"release:update-changelog": "node scripts/update-changelog.mjs",
"docker:build-runtime": "docker build --target runtime -t ghcr.io/gsd-build/gsd-pi .",
"docker:build-builder": "docker build --target builder -t ghcr.io/gsd-build/gsd-ci-builder .",
"prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && ([ \"$CI\" = 'true' ] || git diff --exit-code || (echo 'ERROR: version sync changed files — commit them before publishing' && exit 1)) && npm run build && npm run typecheck:extensions && npm run validate-pack"
"prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && ([ \"$CI\" = 'true' ] || git diff --exit-code || (echo 'ERROR: version sync changed files — commit them before publishing' && exit 1)) && npm run build && npm run typecheck:extensions && npm run validate-pack",
"test:live-regression": "node --experimental-strip-types tests/live-regression/run.ts"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",

View file

@ -0,0 +1,315 @@
/**
* Live Regression Test Harness Post-Build Pipeline Validation
*
* These tests run AFTER `npm publish` against the installed `gsd` binary.
* They exercise the dispatch loop state machine end-to-end by:
*
* 1. Creating real `.gsd/` directory structures with milestone artifacts
* 2. Calling `gsd headless query` to verify state derivation
* 3. Verifying phase transitions match expected outcomes
* 4. Testing crash recovery (lock file lifecycle)
* 5. Testing worktree identity hash consistency
*
* These tests DO NOT require LLM API keys they test the state machine
* and infrastructure, not the LLM execution.
*
* Run from CI pipeline after `npm install -g gsd-pi@<version>`:
* node --experimental-strip-types tests/live-regression/run.ts
*
* Or locally:
* GSD_SMOKE_BINARY=dist/loader.js node --experimental-strip-types tests/live-regression/run.ts
*/
import { execFileSync, execSync } from "child_process";
import { mkdtempSync, mkdirSync, writeFileSync, existsSync, readFileSync, rmSync, unlinkSync } from "fs";
import { join, dirname } from "path";
import { tmpdir } from "os";
// ─── Config ───────────────────────────────────────────────────────────────
const binary = process.env.GSD_SMOKE_BINARY || "gsd";
let passed = 0;
let failed = 0;
function run(label: string, fn: () => void): void {
try {
fn();
console.log(`${label}`);
passed++;
} catch (err: any) {
console.error(`${label}`);
console.error(` ${err.message || err}`);
failed++;
}
}
function assert(condition: boolean, message: string): void {
if (!condition) throw new Error(message);
}
function gsd(args: string[], cwd: string, env?: Record<string, string>): { stdout: string; stderr: string; code: number } {
try {
const stdout = execFileSync(binary === "gsd" ? "gsd" : "node",
binary === "gsd" ? args : [binary, ...args], {
cwd,
encoding: "utf-8",
timeout: 30_000,
stdio: ["pipe", "pipe", "pipe"],
env: { ...process.env, ...env, GSD_NON_INTERACTIVE: "1" },
});
return { stdout, stderr: "", code: 0 };
} catch (err: any) {
return { stdout: err.stdout || "", stderr: err.stderr || "", code: err.status ?? 1 };
}
}
function createTempProject(name: string): string {
const dir = mkdtempSync(join(tmpdir(), `gsd-live-${name}-`));
try { execSync("git init && git config user.email test@test.com && git config user.name Test && git commit --allow-empty -m init", { cwd: dir, stdio: "pipe" }); } catch {}
return dir;
}
function buildMinimalRoadmap(slices: Array<{ id: string; title: string; done: boolean }>): string {
const lines = ["# M001: Test Milestone", "", "## Slices", ""];
for (const s of slices) {
const cb = s.done ? "x" : " ";
lines.push(`- [${cb}] **${s.id}: ${s.title}** \`risk:low\` \`depends:[]\``);
lines.push(` > Demo for ${s.id}`);
lines.push("");
}
return lines.join("\n");
}
function buildMinimalPlan(tasks: Array<{ id: string; title: string; done: boolean }>): string {
const lines = ["# S01: Test Slice", "", "**Goal:** test", "", "## Tasks", ""];
for (const t of tasks) {
const cb = t.done ? "x" : " ";
lines.push(`- [${cb}] **${t.id}: ${t.title}** \`est:5m\``);
}
return lines.join("\n");
}
function buildTaskSummary(id: string): string {
return `---\nid: ${id}\nparent: S01\nmilestone: M001\nduration: 5m\nverification_result: passed\ncompleted_at: ${new Date().toISOString()}\n---\n\n# ${id}: Done\n\nCompleted.`;
}
// ─── Test: headless query returns valid JSON ──────────────────────────────
run("headless query returns valid JSON on initialized project", () => {
const dir = createTempProject("query");
try {
const gsdDir = join(dir, ".gsd");
mkdirSync(join(gsdDir, "milestones"), { recursive: true });
const result = gsd(["headless", "query"], dir);
assert(result.code === 0, `expected exit 0, got ${result.code}: ${result.stderr}`);
const json = JSON.parse(result.stdout);
assert(typeof ((json.state?.phase ?? json.phase)) === "string", "response should have phase field");
assert(Array.isArray(json.milestones) || json.milestones === undefined, "milestones should be array or undefined");
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: state derivation — empty project ──────────────────────────────
run("headless query: empty project reports pre-planning", () => {
const dir = createTempProject("empty");
try {
mkdirSync(join(dir, ".gsd", "milestones"), { recursive: true });
const result = gsd(["headless", "query"], dir);
assert(result.code === 0, `expected exit 0, got ${result.code}`);
const json = JSON.parse(result.stdout);
assert((json.state?.phase ?? json.phase) === "pre-planning" || (json.state?.phase ?? json.phase) === "idle",
`expected pre-planning or idle, got: ${(json.state?.phase ?? json.phase)}`);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: state derivation — milestone with roadmap ─────────────────────
run("headless query: milestone with roadmap reports planning phase", () => {
const dir = createTempProject("planning");
try {
const mDir = join(dir, ".gsd", "milestones", "M001");
mkdirSync(join(mDir, "slices", "S01"), { recursive: true });
writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001\n\nContext.");
writeFileSync(join(mDir, "M001-ROADMAP.md"), buildMinimalRoadmap([
{ id: "S01", title: "First Slice", done: false },
]));
const result = gsd(["headless", "query"], dir);
assert(result.code === 0, `expected exit 0, got ${result.code}`);
const json = JSON.parse(result.stdout);
assert((json.state?.phase ?? json.phase) === "planning", `expected planning, got: ${(json.state?.phase ?? json.phase)}`);
assert((json.state?.activeMilestone ?? json.activeMilestone) === "M001" || (json.state?.activeMilestone ?? json.activeMilestone)?.id === "M001",
`expected active milestone M001`);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: state derivation — all tasks done ─────────────────────────────
run("headless query: all tasks done reports summarizing phase", () => {
const dir = createTempProject("summarizing");
try {
const mDir = join(dir, ".gsd", "milestones", "M001");
const sDir = join(mDir, "slices", "S01");
mkdirSync(join(sDir, "tasks"), { recursive: true });
writeFileSync(join(mDir, "M001-CONTEXT.md"), "# M001\n\nContext.");
writeFileSync(join(mDir, "M001-ROADMAP.md"), buildMinimalRoadmap([
{ id: "S01", title: "First Slice", done: false },
]));
writeFileSync(join(sDir, "S01-PLAN.md"), buildMinimalPlan([
{ id: "T01", title: "Task One", done: true },
]));
writeFileSync(join(sDir, "tasks", "T01-SUMMARY.md"), buildTaskSummary("T01"));
const result = gsd(["headless", "query"], dir);
assert(result.code === 0, `expected exit 0, got ${result.code}`);
const json = JSON.parse(result.stdout);
assert((json.state?.phase ?? json.phase) === "summarizing", `expected summarizing, got: ${(json.state?.phase ?? json.phase)}`);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: state derivation — complete milestone ─────────────────────────
run("headless query: milestone with summary reports complete", () => {
const dir = createTempProject("complete");
try {
const mDir = join(dir, ".gsd", "milestones", "M001");
mkdirSync(mDir, { recursive: true });
writeFileSync(join(mDir, "M001-ROADMAP.md"), buildMinimalRoadmap([
{ id: "S01", title: "Done", done: true },
]));
writeFileSync(join(mDir, "M001-SUMMARY.md"), "# M001 Summary\n\nComplete.");
const result = gsd(["headless", "query"], dir);
assert(result.code === 0, `expected exit 0, got ${result.code}`);
const json = JSON.parse(result.stdout);
assert((json.state?.phase ?? json.phase) === "complete" || (json.state?.phase ?? json.phase) === "idle" || (json.state?.phase ?? json.phase) === "pre-planning",
`expected complete/idle/pre-planning, got: ${(json.state?.phase ?? json.phase)}`);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: lock file lifecycle ───────────────────────────────────────────
run("stale auto.lock with dead PID does not block --version", () => {
const dir = createTempProject("stale-lock");
try {
const gsdDir = join(dir, ".gsd");
mkdirSync(gsdDir, { recursive: true });
// Write a lock with a PID that doesn't exist
writeFileSync(join(gsdDir, "auto.lock"), JSON.stringify({
pid: 99999999,
startedAt: new Date().toISOString(),
unitType: "starting",
unitId: "bootstrap",
unitStartedAt: new Date().toISOString(),
completedUnits: 0,
}));
const result = gsd(["--version"], dir);
assert(result.code === 0, `--version should succeed even with stale lock, got code ${result.code}`);
assert(/\d+\.\d+\.\d+/.test(result.stdout.trim()), `should output version, got: ${result.stdout}`);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: crash recovery message ────────────────────────────────────────
run("crash recovery shows actionable guidance", () => {
const dir = createTempProject("crash-recovery");
try {
const gsdDir = join(dir, ".gsd");
mkdirSync(join(gsdDir, "milestones"), { recursive: true });
writeFileSync(join(gsdDir, "auto.lock"), JSON.stringify({
pid: 99999999,
startedAt: new Date().toISOString(),
unitType: "execute-task",
unitId: "M001/S01/T02",
unitStartedAt: new Date().toISOString(),
completedUnits: 5,
}));
// headless query should still work — lock is for auto-mode, not query
const result = gsd(["headless", "query"], dir);
assert(result.code === 0, `query should succeed with stale lock`);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: TTY check fires before heavy initialization ───────────────────
run("non-TTY invocation exits quickly with clean error", () => {
const dir = createTempProject("tty-check");
try {
const start = Date.now();
const result = gsd([], dir); // No args, no TTY
const elapsed = Date.now() - start;
assert(result.code === 1, `expected exit 1 for non-TTY, got ${result.code}`);
assert(elapsed < 5000, `should exit within 5s, took ${elapsed}ms`);
assert(
result.stderr.includes("TTY") || result.stderr.includes("terminal") || result.stderr.includes("Interactive"),
`should mention TTY requirement in stderr`
);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: version skew detection ────────────────────────────────────────
run("version skew is detected before TTY check", () => {
const dir = createTempProject("version-skew");
try {
// Create a fake managed-resources.json with a future version
const agentDir = join(dir, ".gsd-test-agent");
mkdirSync(agentDir, { recursive: true });
writeFileSync(join(agentDir, "managed-resources.json"), JSON.stringify({
gsdVersion: "999.0.0",
}));
// Set HOME to the temp dir so GSD reads the fake agent dir
const fakeHome = dir;
mkdirSync(join(fakeHome, ".gsd", "agent"), { recursive: true });
writeFileSync(join(fakeHome, ".gsd", "agent", "managed-resources.json"), JSON.stringify({
gsdVersion: "999.0.0",
}));
const result = gsd([], dir, { HOME: fakeHome });
// Should either exit with version mismatch or TTY error — both are fine
assert(result.code === 1, `expected exit 1, got ${result.code}`);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ─── Test: native addon graceful fallback ────────────────────────────────
run("gsd --help works (native addon loads or falls back gracefully)", () => {
const result = gsd(["--help"], process.cwd());
assert(result.code === 0, `--help should exit 0, got ${result.code}`);
assert(result.stdout.toLowerCase().includes("gsd") || result.stdout.toLowerCase().includes("usage"),
`help output should contain gsd or usage`);
});
// ─── Summary ─────────────────────────────────────────────────────────────
console.log(`\nLive regression: ${passed} passed, ${failed} failed`);
if (failed > 0) process.exit(1);