test(gsd): add pause wiring and integration tests for enhanced verification
- pre-execution-pause-wiring.test.ts: Tests blocking check → pause control flow - enhanced-verification-integration.test.ts: End-to-end integration coverage Verifies that blocking pre-execution failures trigger auto-mode pause and that the enhanced verification pipeline integrates correctly with existing verification infrastructure.
This commit is contained in:
parent
8f2c544a91
commit
9711ac3efa
4 changed files with 988 additions and 9 deletions
6
package-lock.json
generated
6
package-lock.json
generated
|
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "gsd-pi",
|
||||
"version": "2.56.0",
|
||||
"version": "2.58.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "gsd-pi",
|
||||
"version": "2.56.0",
|
||||
"version": "2.58.0",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"workspaces": [
|
||||
|
|
@ -9534,7 +9534,7 @@
|
|||
},
|
||||
"packages/pi-coding-agent": {
|
||||
"name": "@gsd/pi-coding-agent",
|
||||
"version": "2.56.0",
|
||||
"version": "2.58.0",
|
||||
"dependencies": {
|
||||
"@mariozechner/jiti": "^2.6.2",
|
||||
"@silvia-odwyer/photon-node": "^0.3.4",
|
||||
|
|
|
|||
|
|
@ -266,9 +266,7 @@ export async function runPostUnitVerification(
|
|||
}
|
||||
} catch (postExecErr) {
|
||||
// Post-execution check errors are non-fatal — log and continue
|
||||
process.stderr.write(
|
||||
`gsd-post-exec: error — ${(postExecErr as Error).message}\n`
|
||||
);
|
||||
logWarning("engine", `gsd-post-exec: error — ${(postExecErr as Error).message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -297,9 +295,7 @@ export async function runPostUnitVerification(
|
|||
);
|
||||
}
|
||||
} catch (evidenceErr) {
|
||||
process.stderr.write(
|
||||
`verification-evidence: post-exec write error — ${(evidenceErr as Error).message}\n`
|
||||
);
|
||||
logWarning("engine", `verification-evidence: post-exec write error — ${(evidenceErr as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,526 @@
|
|||
/**
|
||||
* enhanced-verification-integration.test.ts — Integration tests for enhanced verification.
|
||||
*
|
||||
* Exercises all 7 enhanced verification checks against GSD-2's actual source files.
|
||||
* This proves:
|
||||
* - R012: No false positives on production code
|
||||
* - R013: Speed targets met (<2000ms pre-execution, <1000ms post-execution per task)
|
||||
*
|
||||
* The test constructs realistic TaskRow fixtures that reference real GSD source files,
|
||||
* then runs both pre-execution and post-execution checks against them.
|
||||
*/
|
||||
|
||||
import { describe, test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { existsSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
import {
|
||||
runPreExecutionChecks,
|
||||
type PreExecutionResult,
|
||||
} from "../pre-execution-checks.ts";
|
||||
import {
|
||||
runPostExecutionChecks,
|
||||
type PostExecutionResult,
|
||||
} from "../post-execution-checks.ts";
|
||||
import type { TaskRow } from "../gsd-db.ts";
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// Path to the GSD extension source directory (relative to test file)
|
||||
const GSD_SRC_DIR = join(__dirname, "..");
|
||||
|
||||
// Speed targets from R013
|
||||
const PRE_EXECUTION_TIMEOUT_MS = 2000;
|
||||
const POST_EXECUTION_TIMEOUT_MS = 1000;
|
||||
|
||||
// ─── Test Fixtures ───────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Create a minimal TaskRow for testing.
|
||||
*/
|
||||
function createTask(overrides: Partial<TaskRow> = {}): TaskRow {
|
||||
return {
|
||||
milestone_id: "M001",
|
||||
slice_id: "S01",
|
||||
id: overrides.id ?? "T01",
|
||||
title: overrides.title ?? "Test Task",
|
||||
status: overrides.status ?? "pending",
|
||||
one_liner: "",
|
||||
narrative: "",
|
||||
verification_result: "",
|
||||
duration: "",
|
||||
completed_at: overrides.status === "complete" ? new Date().toISOString() : null,
|
||||
blocker_discovered: false,
|
||||
deviations: "",
|
||||
known_issues: "",
|
||||
key_files: overrides.key_files ?? [],
|
||||
key_decisions: [],
|
||||
full_summary_md: "",
|
||||
description: overrides.description ?? "",
|
||||
estimate: "",
|
||||
files: overrides.files ?? [],
|
||||
verify: "",
|
||||
inputs: overrides.inputs ?? [],
|
||||
expected_output: overrides.expected_output ?? [],
|
||||
observability_impact: "",
|
||||
full_plan_md: "",
|
||||
sequence: overrides.sequence ?? 0,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Real GSD Source Files for Testing ───────────────────────────────────────
|
||||
|
||||
// These are actual GSD extension source files that exist in the codebase
|
||||
const REAL_GSD_FILES = [
|
||||
"gsd-db.ts",
|
||||
"auto-verification.ts",
|
||||
"pre-execution-checks.ts",
|
||||
"post-execution-checks.ts",
|
||||
"state.ts",
|
||||
"errors.ts",
|
||||
"types.ts",
|
||||
"cache.ts",
|
||||
"atomic-write.ts",
|
||||
];
|
||||
|
||||
// Verify the test fixture files actually exist
|
||||
function verifyTestFixturesExist(): void {
|
||||
for (const file of REAL_GSD_FILES) {
|
||||
const fullPath = join(GSD_SRC_DIR, file);
|
||||
if (!existsSync(fullPath)) {
|
||||
throw new Error(`Test fixture file does not exist: ${fullPath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Integration Tests ───────────────────────────────────────────────────────
|
||||
|
||||
describe("Enhanced Verification Integration Tests", () => {
|
||||
// Verify fixtures before running tests
|
||||
test("test fixture files exist", () => {
|
||||
verifyTestFixturesExist();
|
||||
});
|
||||
|
||||
describe("Pre-Execution Checks on Real GSD Code", () => {
|
||||
test("runs pre-execution checks on realistic tasks referencing real files", async () => {
|
||||
// Simulate tasks that reference real GSD source files
|
||||
const tasks: TaskRow[] = [
|
||||
createTask({
|
||||
id: "T01",
|
||||
sequence: 0,
|
||||
title: "Add validation to gsd-db",
|
||||
description: `
|
||||
## Steps
|
||||
1. Update src/resources/extensions/gsd/gsd-db.ts to add validation
|
||||
2. Read from src/resources/extensions/gsd/types.ts for type definitions
|
||||
3. Update src/resources/extensions/gsd/errors.ts with new error types
|
||||
4. Run tests to verify changes
|
||||
`.trim(),
|
||||
files: REAL_GSD_FILES.slice(0, 4).map((f) => join(GSD_SRC_DIR, f)),
|
||||
inputs: [
|
||||
join(GSD_SRC_DIR, "types.ts"),
|
||||
join(GSD_SRC_DIR, "errors.ts"),
|
||||
],
|
||||
expected_output: [
|
||||
join(GSD_SRC_DIR, "gsd-db.ts"),
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const start = performance.now();
|
||||
const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
|
||||
const duration = performance.now() - start;
|
||||
|
||||
// R012: No blocking failures (false positives) on production code
|
||||
const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
blockingFailures.length,
|
||||
0,
|
||||
`Expected zero blocking failures, got: ${JSON.stringify(blockingFailures, null, 2)}`
|
||||
);
|
||||
|
||||
// Overall status should not be fail
|
||||
assert.notEqual(result.status, "fail", "Pre-execution checks should not fail on real GSD code");
|
||||
|
||||
// R013: Speed target met
|
||||
assert.ok(
|
||||
duration < PRE_EXECUTION_TIMEOUT_MS,
|
||||
`Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
});
|
||||
|
||||
test("handles task with code block references to real packages", async () => {
|
||||
// Task description with realistic code blocks using actual Node.js built-ins
|
||||
const tasks: TaskRow[] = [
|
||||
createTask({
|
||||
id: "T01",
|
||||
sequence: 0,
|
||||
title: "Implement file watcher",
|
||||
description: `
|
||||
## Implementation
|
||||
|
||||
\`\`\`typescript
|
||||
import { readFileSync, writeFileSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { existsSync } from "node:fs";
|
||||
|
||||
// Use existing GSD types
|
||||
import type { TaskRow } from "./gsd-db.ts";
|
||||
\`\`\`
|
||||
|
||||
Update the file watcher to use these imports.
|
||||
`.trim(),
|
||||
files: [join(GSD_SRC_DIR, "auto-verification.ts")],
|
||||
}),
|
||||
];
|
||||
|
||||
const start = performance.now();
|
||||
const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
|
||||
const duration = performance.now() - start;
|
||||
|
||||
// No blocking failures
|
||||
const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
blockingFailures.length,
|
||||
0,
|
||||
`Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
|
||||
);
|
||||
|
||||
// Speed target met
|
||||
assert.ok(
|
||||
duration < PRE_EXECUTION_TIMEOUT_MS,
|
||||
`Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
});
|
||||
|
||||
test("handles multi-task sequence with file dependencies", async () => {
|
||||
// Simulate a realistic task sequence where T02 depends on T01's output
|
||||
const tasks: TaskRow[] = [
|
||||
createTask({
|
||||
id: "T01",
|
||||
sequence: 0,
|
||||
title: "Create types file",
|
||||
status: "complete",
|
||||
expected_output: [join(GSD_SRC_DIR, "types.ts")],
|
||||
}),
|
||||
createTask({
|
||||
id: "T02",
|
||||
sequence: 1,
|
||||
title: "Use types in implementation",
|
||||
description: `
|
||||
Read the types from src/resources/extensions/gsd/types.ts and use them.
|
||||
`.trim(),
|
||||
inputs: [join(GSD_SRC_DIR, "types.ts")],
|
||||
files: [join(GSD_SRC_DIR, "gsd-db.ts")],
|
||||
}),
|
||||
];
|
||||
|
||||
const start = performance.now();
|
||||
const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
|
||||
const duration = performance.now() - start;
|
||||
|
||||
// No blocking failures
|
||||
const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
blockingFailures.length,
|
||||
0,
|
||||
`Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
|
||||
);
|
||||
|
||||
// Speed target met
|
||||
assert.ok(
|
||||
duration < PRE_EXECUTION_TIMEOUT_MS,
|
||||
`Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Post-Execution Checks on Real GSD Code", () => {
|
||||
test("runs post-execution checks on real GSD source files", () => {
|
||||
// Simulate a completed task that modified real files
|
||||
const completedTask = createTask({
|
||||
id: "T01",
|
||||
title: "Update gsd-db validation",
|
||||
status: "complete",
|
||||
key_files: [
|
||||
join(GSD_SRC_DIR, "gsd-db.ts"),
|
||||
join(GSD_SRC_DIR, "types.ts"),
|
||||
],
|
||||
});
|
||||
|
||||
const start = performance.now();
|
||||
const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
|
||||
const duration = performance.now() - start;
|
||||
|
||||
// R012: No blocking failures (false positives) on production code
|
||||
const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
blockingFailures.length,
|
||||
0,
|
||||
`Expected zero blocking failures, got: ${JSON.stringify(blockingFailures, null, 2)}`
|
||||
);
|
||||
|
||||
// Overall status should not be fail
|
||||
assert.notEqual(result.status, "fail", "Post-execution checks should not fail on real GSD code");
|
||||
|
||||
// R013: Speed target met
|
||||
assert.ok(
|
||||
duration < POST_EXECUTION_TIMEOUT_MS,
|
||||
`Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
});
|
||||
|
||||
test("analyzes imports in real TypeScript files", () => {
|
||||
// Use auto-verification.ts which imports from multiple other GSD files
|
||||
const completedTask = createTask({
|
||||
id: "T02",
|
||||
title: "Verify auto-verification imports",
|
||||
status: "complete",
|
||||
key_files: [join(GSD_SRC_DIR, "auto-verification.ts")],
|
||||
});
|
||||
|
||||
const start = performance.now();
|
||||
const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
|
||||
const duration = performance.now() - start;
|
||||
|
||||
// No blocking failures
|
||||
const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
blockingFailures.length,
|
||||
0,
|
||||
`Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
|
||||
);
|
||||
|
||||
// Speed target met
|
||||
assert.ok(
|
||||
duration < POST_EXECUTION_TIMEOUT_MS,
|
||||
`Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
});
|
||||
|
||||
test("handles multi-file task with cross-file dependencies", () => {
|
||||
// Task that touched multiple related files
|
||||
const completedTask = createTask({
|
||||
id: "T03",
|
||||
title: "Refactor state management",
|
||||
status: "complete",
|
||||
key_files: [
|
||||
join(GSD_SRC_DIR, "state.ts"),
|
||||
join(GSD_SRC_DIR, "gsd-db.ts"),
|
||||
join(GSD_SRC_DIR, "cache.ts"),
|
||||
],
|
||||
});
|
||||
|
||||
const start = performance.now();
|
||||
const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
|
||||
const duration = performance.now() - start;
|
||||
|
||||
// No blocking failures
|
||||
const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
blockingFailures.length,
|
||||
0,
|
||||
`Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
|
||||
);
|
||||
|
||||
// Speed target met
|
||||
assert.ok(
|
||||
duration < POST_EXECUTION_TIMEOUT_MS,
|
||||
`Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
});
|
||||
|
||||
test("handles task sequence with signature analysis", () => {
|
||||
// Simulate checking for signature consistency across tasks
|
||||
const priorTasks: TaskRow[] = [
|
||||
createTask({
|
||||
id: "T01",
|
||||
sequence: 0,
|
||||
title: "Define TaskRow interface",
|
||||
status: "complete",
|
||||
key_files: [join(GSD_SRC_DIR, "gsd-db.ts")],
|
||||
}),
|
||||
];
|
||||
|
||||
const completedTask = createTask({
|
||||
id: "T02",
|
||||
sequence: 1,
|
||||
title: "Use TaskRow in state module",
|
||||
status: "complete",
|
||||
key_files: [join(GSD_SRC_DIR, "state.ts")],
|
||||
});
|
||||
|
||||
const start = performance.now();
|
||||
const result = runPostExecutionChecks(completedTask, priorTasks, GSD_SRC_DIR);
|
||||
const duration = performance.now() - start;
|
||||
|
||||
// No blocking failures
|
||||
const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
blockingFailures.length,
|
||||
0,
|
||||
`Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
|
||||
);
|
||||
|
||||
// Speed target met
|
||||
assert.ok(
|
||||
duration < POST_EXECUTION_TIMEOUT_MS,
|
||||
`Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Combined Pre and Post Execution Flow", () => {
|
||||
test("full verification flow on realistic task lifecycle", async () => {
|
||||
// Simulate a complete task lifecycle
|
||||
const tasks: TaskRow[] = [
|
||||
createTask({
|
||||
id: "T01",
|
||||
sequence: 0,
|
||||
title: "Implement enhanced verification",
|
||||
status: "pending",
|
||||
description: `
|
||||
## Steps
|
||||
1. Update pre-execution-checks.ts with new validation
|
||||
2. Update post-execution-checks.ts with signature analysis
|
||||
3. Add integration tests
|
||||
|
||||
\`\`\`typescript
|
||||
import { runPreExecutionChecks } from "./pre-execution-checks.ts";
|
||||
import { runPostExecutionChecks } from "./post-execution-checks.ts";
|
||||
\`\`\`
|
||||
`.trim(),
|
||||
files: [
|
||||
join(GSD_SRC_DIR, "pre-execution-checks.ts"),
|
||||
join(GSD_SRC_DIR, "post-execution-checks.ts"),
|
||||
],
|
||||
inputs: [
|
||||
join(GSD_SRC_DIR, "types.ts"),
|
||||
join(GSD_SRC_DIR, "gsd-db.ts"),
|
||||
],
|
||||
expected_output: [
|
||||
join(GSD_SRC_DIR, "tests/enhanced-verification-integration.test.ts"),
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
// Run pre-execution checks
|
||||
const preStart = performance.now();
|
||||
const preResult = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
|
||||
const preDuration = performance.now() - preStart;
|
||||
|
||||
// Verify pre-execution results
|
||||
const preBlockingFailures = preResult.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
preBlockingFailures.length,
|
||||
0,
|
||||
`Pre-execution had blocking failures: ${JSON.stringify(preBlockingFailures, null, 2)}`
|
||||
);
|
||||
assert.ok(
|
||||
preDuration < PRE_EXECUTION_TIMEOUT_MS,
|
||||
`Pre-execution took ${preDuration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
|
||||
// Task after execution (simulated completion)
|
||||
const completedTask = createTask({
|
||||
...tasks[0],
|
||||
status: "complete",
|
||||
key_files: tasks[0].files,
|
||||
});
|
||||
|
||||
// Run post-execution checks
|
||||
const postStart = performance.now();
|
||||
const postResult = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
|
||||
const postDuration = performance.now() - postStart;
|
||||
|
||||
// Verify post-execution results
|
||||
const postBlockingFailures = postResult.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
postBlockingFailures.length,
|
||||
0,
|
||||
`Post-execution had blocking failures: ${JSON.stringify(postBlockingFailures, null, 2)}`
|
||||
);
|
||||
assert.ok(
|
||||
postDuration < POST_EXECUTION_TIMEOUT_MS,
|
||||
`Post-execution took ${postDuration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
|
||||
);
|
||||
});
|
||||
|
||||
test("handles large number of files without timeout", () => {
|
||||
// Use all available GSD source files to stress test
|
||||
const allGsdFiles = REAL_GSD_FILES.map((f) => join(GSD_SRC_DIR, f));
|
||||
|
||||
const task = createTask({
|
||||
id: "T01",
|
||||
title: "Large refactor touching many files",
|
||||
status: "complete",
|
||||
key_files: allGsdFiles,
|
||||
files: allGsdFiles,
|
||||
});
|
||||
|
||||
const start = performance.now();
|
||||
const result = runPostExecutionChecks(task, [], GSD_SRC_DIR);
|
||||
const duration = performance.now() - start;
|
||||
|
||||
// No blocking failures
|
||||
const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
|
||||
assert.equal(
|
||||
blockingFailures.length,
|
||||
0,
|
||||
`Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
|
||||
);
|
||||
|
||||
// Should still be fast even with many files
|
||||
// Allow slightly more time for multi-file analysis but still within target
|
||||
assert.ok(
|
||||
duration < POST_EXECUTION_TIMEOUT_MS * 2, // Allow 2x for stress test
|
||||
`Multi-file post-execution took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS * 2}ms`
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Warning Quality", () => {
|
||||
test("warnings on real code are actionable, not spurious", () => {
|
||||
// Run checks on well-formed production code
|
||||
const task = createTask({
|
||||
id: "T01",
|
||||
title: "Review code quality",
|
||||
status: "complete",
|
||||
key_files: [
|
||||
join(GSD_SRC_DIR, "pre-execution-checks.ts"),
|
||||
join(GSD_SRC_DIR, "post-execution-checks.ts"),
|
||||
],
|
||||
});
|
||||
|
||||
const result = runPostExecutionChecks(task, [], GSD_SRC_DIR);
|
||||
|
||||
// Extract warnings (either non-passed non-blocking, or passed with warning messages)
|
||||
const warnings = result.checks.filter(
|
||||
(c) => (!c.passed && !c.blocking) || (c.passed && c.message?.startsWith("Warning:"))
|
||||
);
|
||||
|
||||
// Warnings are acceptable but should be few on well-maintained code
|
||||
// If we get many warnings, it suggests the checks are too aggressive
|
||||
assert.ok(
|
||||
warnings.length <= 10,
|
||||
`Too many warnings (${warnings.length}) suggests overly aggressive checks: ${JSON.stringify(warnings, null, 2)}`
|
||||
);
|
||||
|
||||
// Each warning should have a clear message
|
||||
for (const warning of warnings) {
|
||||
assert.ok(warning.category, "Warning missing category");
|
||||
assert.ok(warning.message, "Warning missing message");
|
||||
assert.ok(
|
||||
warning.message.length > 10,
|
||||
`Warning message too short to be actionable: "${warning.message}"`
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,457 @@
|
|||
/**
|
||||
* pre-execution-pause-wiring.test.ts — Integration tests for pre-execution check → pauseAuto wiring.
|
||||
*
|
||||
* Tests that verify the control flow from pre-execution checks through to pauseAuto:
|
||||
* 1. When runPreExecutionChecks returns status: "fail" with blocking: true, pauseAuto is called
|
||||
* 2. When enhanced_verification_strict: true and status: "warn", pauseAuto is also called
|
||||
*
|
||||
* These are integration-level tests that exercise the actual postUnitPostVerification function
|
||||
* with controlled mocks for external dependencies.
|
||||
*/
|
||||
|
||||
import { describe, test, mock, beforeEach, afterEach } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { tmpdir } from "node:os";
|
||||
import { mkdirSync, writeFileSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
import { postUnitPostVerification, type PostUnitContext } from "../auto-post-unit.ts";
|
||||
import { AutoSession } from "../auto/session.ts";
|
||||
import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask } from "../gsd-db.ts";
|
||||
import { invalidateAllCaches } from "../cache.ts";
|
||||
import { _clearGsdRootCache } from "../paths.ts";
|
||||
|
||||
// ─── Test Fixtures ───────────────────────────────────────────────────────────
|
||||
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
let originalCwd: string;
|
||||
|
||||
/**
|
||||
* Create a minimal mock ExtensionContext.
|
||||
*/
|
||||
function makeMockCtx() {
|
||||
return {
|
||||
ui: {
|
||||
notify: mock.fn(),
|
||||
setStatus: () => {},
|
||||
setWidget: () => {},
|
||||
setFooter: () => {},
|
||||
},
|
||||
model: { id: "test-model" },
|
||||
} as any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a minimal mock ExtensionAPI.
|
||||
*/
|
||||
function makeMockPi() {
|
||||
return {
|
||||
sendMessage: mock.fn(),
|
||||
setModel: mock.fn(async () => true),
|
||||
} as any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a minimal AutoSession for testing.
|
||||
*/
|
||||
function makeMockSession(basePath: string, currentUnit?: { type: string; id: string }): AutoSession {
|
||||
const s = new AutoSession();
|
||||
s.basePath = basePath;
|
||||
s.active = true;
|
||||
if (currentUnit) {
|
||||
s.currentUnit = {
|
||||
type: currentUnit.type,
|
||||
id: currentUnit.id,
|
||||
startedAt: Date.now(),
|
||||
};
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a PostUnitContext with a mockable pauseAuto.
|
||||
*/
|
||||
function makePostUnitContext(
|
||||
s: AutoSession,
|
||||
ctx: ReturnType<typeof makeMockCtx>,
|
||||
pi: ReturnType<typeof makeMockPi>,
|
||||
pauseAutoMock: ReturnType<typeof mock.fn>,
|
||||
): PostUnitContext {
|
||||
return {
|
||||
s,
|
||||
ctx,
|
||||
pi,
|
||||
buildSnapshotOpts: () => ({}),
|
||||
lockBase: () => tempDir,
|
||||
stopAuto: mock.fn(async () => {}) as unknown as PostUnitContext["stopAuto"],
|
||||
pauseAuto: pauseAutoMock as unknown as PostUnitContext["pauseAuto"],
|
||||
updateProgressWidget: () => {},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up a temp directory with GSD structure and DB.
|
||||
* Also changes cwd so preferences loading finds the right PREFERENCES.md.
|
||||
*/
|
||||
function setupTestEnvironment(): void {
|
||||
// Save original cwd so we can restore it
|
||||
originalCwd = process.cwd();
|
||||
|
||||
tempDir = join(tmpdir(), `pre-exec-pause-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
||||
mkdirSync(tempDir, { recursive: true });
|
||||
|
||||
// Create .gsd directory structure
|
||||
const gsdDir = join(tempDir, ".gsd");
|
||||
mkdirSync(gsdDir, { recursive: true });
|
||||
|
||||
// Create milestones directory structure
|
||||
const milestonesDir = join(gsdDir, "milestones", "M001", "slices", "S01", "tasks");
|
||||
mkdirSync(milestonesDir, { recursive: true });
|
||||
|
||||
// Change cwd so loadEffectiveGSDPreferences finds our PREFERENCES.md
|
||||
process.chdir(tempDir);
|
||||
|
||||
// Clear gsdRoot cache so it finds the new .gsd directory
|
||||
_clearGsdRootCache();
|
||||
|
||||
// Initialize DB
|
||||
dbPath = join(gsdDir, "gsd.db");
|
||||
openDatabase(dbPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up test environment.
|
||||
*/
|
||||
function cleanupTestEnvironment(): void {
|
||||
// Restore original cwd before cleanup
|
||||
try {
|
||||
process.chdir(originalCwd);
|
||||
} catch {
|
||||
// Ignore if original cwd doesn't exist
|
||||
}
|
||||
|
||||
try {
|
||||
closeDatabase();
|
||||
} catch {
|
||||
// Ignore close errors
|
||||
}
|
||||
try {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a PREFERENCES.md file with specified preferences.
|
||||
* Uses YAML frontmatter format (---\nkey: value\n---).
|
||||
* Also invalidates caches so the preferences are re-read.
|
||||
*/
|
||||
function writePreferences(prefs: Record<string, unknown>): void {
|
||||
const yamlLines = Object.entries(prefs).map(([k, v]) => `${k}: ${JSON.stringify(v)}`);
|
||||
const prefsContent = `---
|
||||
${yamlLines.join("\n")}
|
||||
---
|
||||
|
||||
# GSD Preferences
|
||||
`;
|
||||
writeFileSync(join(tempDir, ".gsd", "PREFERENCES.md"), prefsContent);
|
||||
// Invalidate caches so the new preferences file is found
|
||||
invalidateAllCaches();
|
||||
_clearGsdRootCache();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create tasks in DB that will cause pre-execution checks to fail.
|
||||
* A task that references a non-existent file will produce a blocking failure.
|
||||
*/
|
||||
function createFailingTasks(): void {
|
||||
// Insert milestone first
|
||||
insertMilestone({ id: "M001" });
|
||||
|
||||
// Insert slice
|
||||
insertSlice({
|
||||
id: "S01",
|
||||
milestoneId: "M001",
|
||||
title: "Test Slice",
|
||||
risk: "low",
|
||||
});
|
||||
|
||||
// Create a task that references a file that doesn't exist
|
||||
// This will cause checkFilePathConsistency to produce a blocking failure
|
||||
insertTask({
|
||||
id: "T01",
|
||||
sliceId: "S01",
|
||||
milestoneId: "M001",
|
||||
title: "Task with missing file",
|
||||
status: "pending",
|
||||
planning: {
|
||||
description: "This task references a non-existent file",
|
||||
estimate: "1h",
|
||||
files: ["nonexistent-file-that-does-not-exist.ts"],
|
||||
verify: "npm test",
|
||||
inputs: [],
|
||||
expectedOutput: [],
|
||||
observabilityImpact: "",
|
||||
},
|
||||
sequence: 0,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create tasks in DB that will produce only warnings (non-blocking issues).
|
||||
* Interface contract mismatches produce warnings, not blocking failures.
|
||||
*/
|
||||
function createWarningOnlyTasks(): void {
|
||||
// Insert milestone first
|
||||
insertMilestone({ id: "M001" });
|
||||
|
||||
// Insert slice
|
||||
insertSlice({
|
||||
id: "S01",
|
||||
milestoneId: "M001",
|
||||
title: "Test Slice",
|
||||
risk: "low",
|
||||
});
|
||||
|
||||
// Create tasks with interface contract mismatch (produces warn, not fail)
|
||||
insertTask({
|
||||
id: "T01",
|
||||
sliceId: "S01",
|
||||
milestoneId: "M001",
|
||||
title: "Task 1 with function signature",
|
||||
status: "pending",
|
||||
planning: {
|
||||
description: `
|
||||
\`\`\`typescript
|
||||
function processData(input: string): boolean
|
||||
\`\`\`
|
||||
`.trim(),
|
||||
estimate: "1h",
|
||||
files: [],
|
||||
verify: "npm test",
|
||||
inputs: [],
|
||||
expectedOutput: [],
|
||||
observabilityImpact: "",
|
||||
},
|
||||
sequence: 0,
|
||||
});
|
||||
|
||||
insertTask({
|
||||
id: "T02",
|
||||
sliceId: "S01",
|
||||
milestoneId: "M001",
|
||||
title: "Task 2 with mismatched signature",
|
||||
status: "pending",
|
||||
planning: {
|
||||
description: `
|
||||
\`\`\`typescript
|
||||
function processData(input: number): string
|
||||
\`\`\`
|
||||
`.trim(),
|
||||
estimate: "1h",
|
||||
files: [],
|
||||
verify: "npm test",
|
||||
inputs: [],
|
||||
expectedOutput: [],
|
||||
observabilityImpact: "",
|
||||
},
|
||||
sequence: 1,
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("Pre-execution checks → pauseAuto wiring", () => {
|
||||
beforeEach(() => {
|
||||
setupTestEnvironment();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanupTestEnvironment();
|
||||
});
|
||||
|
||||
test("pauseAuto is called when pre-execution checks return status: fail with blocking: true", async () => {
|
||||
// Set up tasks that will cause a blocking failure
|
||||
createFailingTasks();
|
||||
|
||||
// Create mocks
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
|
||||
const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
|
||||
|
||||
// Call postUnitPostVerification
|
||||
const result = await postUnitPostVerification(pctx);
|
||||
|
||||
// Verify pauseAuto was called
|
||||
assert.equal(
|
||||
pauseAutoMock.mock.callCount(),
|
||||
1,
|
||||
"pauseAuto should be called exactly once when pre-execution checks fail with blocking issues"
|
||||
);
|
||||
|
||||
// Verify return value is "stopped"
|
||||
assert.equal(
|
||||
result,
|
||||
"stopped",
|
||||
"postUnitPostVerification should return 'stopped' when pre-execution checks fail"
|
||||
);
|
||||
|
||||
// Verify UI was notified of the failure
|
||||
const notifyCalls = ctx.ui.notify.mock.calls;
|
||||
const errorNotify = notifyCalls.find(
|
||||
(call: { arguments: unknown[] }) =>
|
||||
call.arguments[1] === "error" &&
|
||||
String(call.arguments[0]).includes("Pre-execution checks failed")
|
||||
);
|
||||
assert.ok(errorNotify, "Should show error notification about pre-execution check failure");
|
||||
});
|
||||
|
||||
test("pauseAuto is called when enhanced_verification_strict: true and pre-execution returns warn", async () => {
|
||||
// Write preferences with strict mode enabled
|
||||
writePreferences({
|
||||
enhanced_verification: true,
|
||||
enhanced_verification_pre: true,
|
||||
enhanced_verification_strict: true,
|
||||
});
|
||||
|
||||
// Set up tasks that will produce only warnings (interface contract mismatch)
|
||||
createWarningOnlyTasks();
|
||||
|
||||
// Create mocks
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
|
||||
const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
|
||||
|
||||
// Call postUnitPostVerification
|
||||
const result = await postUnitPostVerification(pctx);
|
||||
|
||||
// Verify pauseAuto was called (strict mode promotes warnings to blocking)
|
||||
assert.equal(
|
||||
pauseAutoMock.mock.callCount(),
|
||||
1,
|
||||
"pauseAuto should be called when strict mode is enabled and pre-execution returns warn"
|
||||
);
|
||||
|
||||
// Verify return value is "stopped"
|
||||
assert.equal(
|
||||
result,
|
||||
"stopped",
|
||||
"postUnitPostVerification should return 'stopped' when strict mode treats warnings as blocking"
|
||||
);
|
||||
|
||||
// Verify UI was notified of the warning
|
||||
const notifyCalls = ctx.ui.notify.mock.calls;
|
||||
const warnNotify = notifyCalls.find(
|
||||
(call: { arguments: unknown[] }) =>
|
||||
call.arguments[1] === "warning" &&
|
||||
String(call.arguments[0]).includes("Pre-execution checks passed with warnings")
|
||||
);
|
||||
assert.ok(warnNotify, "Should show warning notification about pre-execution check warnings");
|
||||
});
|
||||
|
||||
test("pauseAuto is NOT called when enhanced_verification_strict: false and pre-execution returns warn", async () => {
|
||||
// Write preferences with strict mode disabled (default behavior)
|
||||
writePreferences({
|
||||
enhanced_verification: true,
|
||||
enhanced_verification_pre: true,
|
||||
enhanced_verification_strict: false,
|
||||
});
|
||||
|
||||
// Set up tasks that will produce only warnings
|
||||
createWarningOnlyTasks();
|
||||
|
||||
// Create mocks
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
|
||||
const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
|
||||
|
||||
// Call postUnitPostVerification
|
||||
const result = await postUnitPostVerification(pctx);
|
||||
|
||||
// Verify pauseAuto was NOT called (warnings don't block in non-strict mode)
|
||||
assert.equal(
|
||||
pauseAutoMock.mock.callCount(),
|
||||
0,
|
||||
"pauseAuto should NOT be called when strict mode is disabled and only warnings exist"
|
||||
);
|
||||
|
||||
// Verify return value is "continue" (not "stopped")
|
||||
assert.equal(
|
||||
result,
|
||||
"continue",
|
||||
"postUnitPostVerification should return 'continue' when warnings don't block in non-strict mode"
|
||||
);
|
||||
});
|
||||
|
||||
test("pre-execution checks are skipped when unit type is not plan-slice", async () => {
|
||||
// Set up tasks that would fail if checked
|
||||
createFailingTasks();
|
||||
|
||||
// Create mocks with execute-task unit (not plan-slice)
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, { type: "execute-task", id: "M001/S01/T01" });
|
||||
const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
|
||||
|
||||
// Call postUnitPostVerification
|
||||
const result = await postUnitPostVerification(pctx);
|
||||
|
||||
// Verify pauseAuto was NOT called (pre-execution checks only run for plan-slice)
|
||||
assert.equal(
|
||||
pauseAutoMock.mock.callCount(),
|
||||
0,
|
||||
"pauseAuto should NOT be called for non-plan-slice unit types"
|
||||
);
|
||||
|
||||
// Verify return value is "continue"
|
||||
assert.equal(
|
||||
result,
|
||||
"continue",
|
||||
"postUnitPostVerification should return 'continue' for non-plan-slice unit types"
|
||||
);
|
||||
});
|
||||
|
||||
test("pre-execution checks are skipped when enhanced_verification_pre: false", async () => {
|
||||
// Write preferences with pre-execution checks disabled
|
||||
writePreferences({
|
||||
enhanced_verification: true,
|
||||
enhanced_verification_pre: false,
|
||||
});
|
||||
|
||||
// Set up tasks that would fail if checked
|
||||
createFailingTasks();
|
||||
|
||||
// Create mocks
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
const pauseAutoMock = mock.fn(async () => {});
|
||||
const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
|
||||
const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
|
||||
|
||||
// Call postUnitPostVerification
|
||||
const result = await postUnitPostVerification(pctx);
|
||||
|
||||
// Verify pauseAuto was NOT called (pre-execution checks disabled)
|
||||
assert.equal(
|
||||
pauseAutoMock.mock.callCount(),
|
||||
0,
|
||||
"pauseAuto should NOT be called when enhanced_verification_pre is disabled"
|
||||
);
|
||||
|
||||
// Verify return value is "continue"
|
||||
assert.equal(
|
||||
result,
|
||||
"continue",
|
||||
"postUnitPostVerification should return 'continue' when pre-execution checks are disabled"
|
||||
);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue