feat(M001): Verification Enforcement (#891)
* docs: project plan — 4 milestones * chore(M001): record integration branch * chore(M001/S01): auto-commit after research-slice * docs(S01): add slice plan * chore: update state to S01 execution * chore(M001/S01/T01): auto-commit after execute-task * chore(M001/S01/T02): auto-commit after execute-task * chore(M001/S01/T03): auto-commit after execute-task * chore(M001/S01): auto-commit after complete-slice * chore(M001/S01): auto-commit after reassess-roadmap * chore(M001/S02): auto-commit after research-slice * docs(S02): add slice plan * chore(M001/S02/T01): auto-commit after execute-task * chore(M001/S02/T02): auto-commit after execute-task * test(S02/T03): Added evidence_block_missing and evidence_block_placehol… - src/resources/extensions/gsd/observability-validator.ts - src/resources/extensions/gsd/tests/verification-evidence.test.ts * chore(M001/S02): auto-commit after complete-slice * chore(M001/S02): auto-commit after reassess-roadmap * chore(M001/S03): auto-commit after research-slice * docs(S03): add slice plan * fix(S03/T01): Added `formatFailureContext` pure function and retry meta… - src/resources/extensions/gsd/verification-gate.ts - src/resources/extensions/gsd/verification-evidence.ts - src/resources/extensions/gsd/tests/verification-gate.test.ts - src/resources/extensions/gsd/tests/verification-evidence.test.ts * fix(S03/T02): Wired verification gate auto-fix retry loop into auto.ts… - src/resources/extensions/gsd/auto.ts * chore(M001/S03): auto-commit after complete-slice * chore(M001/S03): auto-commit after reassess-roadmap * chore(M001/S04): auto-commit after research-slice * docs(S04): add slice plan * test(S04/T01): Added RuntimeError interface and captureRuntimeErrors()… - src/resources/extensions/gsd/types.ts - src/resources/extensions/gsd/verification-gate.ts - src/resources/extensions/gsd/tests/verification-gate.test.ts * test(S04/T02): Integrated captureRuntimeErrors() into auto.ts gate bloc… - src/resources/extensions/gsd/auto.ts - src/resources/extensions/gsd/verification-evidence.ts - src/resources/extensions/gsd/tests/verification-evidence.test.ts * chore(M001/S04): auto-commit after complete-slice * chore(M001/S04): auto-commit after reassess-roadmap * chore(M001/S05): auto-commit after research-slice * docs(S05): add slice plan * test(S05/T01): Added AuditWarning type, runDependencyAudit() with git d… - "src/resources/extensions/gsd/types.ts" - "src/resources/extensions/gsd/verification-gate.ts" - "src/resources/extensions/gsd/tests/verification-gate.test.ts" * feat(S05/T02): Wired runDependencyAudit() into the verification gate pi… - src/resources/extensions/gsd/verification-evidence.ts - src/resources/extensions/gsd/auto.ts - src/resources/extensions/gsd/tests/verification-evidence.test.ts * chore(M001/S05): auto-commit after complete-slice * chore(M001): auto-commit after validate-milestone * chore(M001): auto-commit after complete-milestone * feat(M001): Verification Enforcement Completed slices: - S01: Built-in Verification Gate - S02: Structured Evidence Format - S03: Auto-Fix Retry Loop - S04: Runtime Error Capture - S05: Dependency Security Scan Branch: milestone/M001 * chore(M002): record integration branch * chore(M003): record integration branch * chore(M004): record integration branch * fix(M001): Address verification gate review feedback 1. Add 120s default timeout to spawnSync in runVerificationGate (configurable via commandTimeoutMs) — prevents hanging commands from deadlocking the system 2. Sanitize taskPlanVerify commands — reject strings containing ;, |, backticks, or $() shell injection patterns 3. Clear verificationRetryCount in pauseAuto — previously only pendingVerificationRetry was cleared, leaving stale retry state on resume Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(ci): remove .gsd/ and .audits/ from tracking These directories were accidentally included via M001 milestone auto-commits. Both are already in .gitignore. The no-gsd-dir CI check correctly catches this. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
23b89c64c9
commit
1dd32c635f
11 changed files with 2738 additions and 4 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -63,3 +63,7 @@ TODOS.md
|
|||
.gsd/DISCUSSION-MANIFEST.json
|
||||
.gsd/milestones/**/*-CONTINUE.md
|
||||
.gsd/milestones/**/continue.md
|
||||
|
||||
# ── GSD baseline (auto-generated) ──
|
||||
.gsd/forensics/
|
||||
.gsd/parallel/
|
||||
|
|
|
|||
|
|
@ -18,8 +18,10 @@ import type {
|
|||
|
||||
import { deriveState } from "./state.js";
|
||||
import type { BudgetEnforcementMode, GSDState } from "./types.js";
|
||||
import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parseSummary } from "./files.js";
|
||||
import { loadFile, parseRoadmap, getManifestStatus, resolveAllOverrides, parsePlan, parseSummary } from "./files.js";
|
||||
import { loadPrompt } from "./prompt-loader.js";
|
||||
import { runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "./verification-gate.js";
|
||||
import { writeVerificationJSON } from "./verification-evidence.js";
|
||||
export { inlinePriorMilestoneSummary } from "./files.js";
|
||||
import { collectSecretsFromManifest } from "../get-secrets-from-user.js";
|
||||
import {
|
||||
|
|
@ -370,6 +372,11 @@ function escapeStaleWorktree(base: string): string {
|
|||
/** Crash recovery prompt — set by startAuto, consumed by first dispatchNextUnit */
|
||||
let pendingCrashRecovery: string | null = null;
|
||||
|
||||
/** Pending verification retry — set when gate fails with retries remaining, consumed by dispatchNextUnit */
|
||||
let pendingVerificationRetry: { unitId: string; failureContext: string; attempt: number } | null = null;
|
||||
/** Verification retry count per unitId — separate from unitDispatchCount which tracks artifact-missing retries */
|
||||
const verificationRetryCount = new Map<string, number>();
|
||||
|
||||
/** Session file path captured at pause — used to synthesize recovery briefing on resume */
|
||||
let pausedSessionFile: string | null = null;
|
||||
|
||||
|
|
@ -730,6 +737,8 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI, reason
|
|||
clearActivityLogState();
|
||||
resetProactiveHealing();
|
||||
pendingCrashRecovery = null;
|
||||
pendingVerificationRetry = null;
|
||||
verificationRetryCount.clear();
|
||||
pausedSessionFile = null;
|
||||
_handlingAgentEnd = false;
|
||||
ctx?.ui.setStatus("gsd-auto", undefined);
|
||||
|
|
@ -767,6 +776,8 @@ export async function pauseAuto(ctx?: ExtensionContext, _pi?: ExtensionAPI): Pro
|
|||
|
||||
active = false;
|
||||
paused = true;
|
||||
pendingVerificationRetry = null;
|
||||
verificationRetryCount.clear();
|
||||
// Preserve: unitDispatchCount, currentUnit, basePath, verbose, cmdCtx,
|
||||
// completedUnits, autoStartTime, currentMilestoneId, originalModelId
|
||||
// — all needed for resume and dashboard display
|
||||
|
|
@ -1574,6 +1585,145 @@ export async function handleAgentEnd(
|
|||
}
|
||||
}
|
||||
|
||||
// ── Verification gate: run typecheck/lint/test after execute-task ──
|
||||
if (currentUnit && currentUnit.type === "execute-task") {
|
||||
try {
|
||||
const effectivePrefs = loadEffectiveGSDPreferences();
|
||||
const prefs = effectivePrefs?.preferences;
|
||||
|
||||
// Read task plan verify field from the current task's slice plan
|
||||
// unitId format is "M001/S01/T03" — extract mid, sid, tid
|
||||
const parts = currentUnit.id.split("/");
|
||||
let taskPlanVerify: string | undefined;
|
||||
if (parts.length >= 3) {
|
||||
const [mid, sid, tid] = parts;
|
||||
const planFile = resolveSliceFile(basePath, mid, sid, "PLAN");
|
||||
if (planFile) {
|
||||
const planContent = await loadFile(planFile);
|
||||
if (planContent) {
|
||||
const slicePlan = parsePlan(planContent);
|
||||
const taskEntry = slicePlan?.tasks?.find(t => t.id === tid);
|
||||
taskPlanVerify = taskEntry?.verify;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result = runVerificationGate({
|
||||
basePath,
|
||||
unitId: currentUnit.id,
|
||||
cwd: basePath,
|
||||
preferenceCommands: prefs?.verification_commands,
|
||||
taskPlanVerify,
|
||||
});
|
||||
|
||||
// Capture runtime errors from bg-shell and browser console
|
||||
const runtimeErrors = await captureRuntimeErrors();
|
||||
if (runtimeErrors.length > 0) {
|
||||
result.runtimeErrors = runtimeErrors;
|
||||
// Blocking runtime errors override gate pass
|
||||
if (runtimeErrors.some(e => e.blocking)) {
|
||||
result.passed = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Conditional dependency audit (R008)
|
||||
const auditWarnings = runDependencyAudit(basePath);
|
||||
if (auditWarnings.length > 0) {
|
||||
result.auditWarnings = auditWarnings;
|
||||
process.stderr.write(`verification-gate: ${auditWarnings.length} audit warning(s)\n`);
|
||||
for (const w of auditWarnings) {
|
||||
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-fix retry preferences (R005 / D005)
|
||||
const autoFixEnabled = prefs?.verification_auto_fix !== false; // default true
|
||||
const maxRetries = typeof prefs?.verification_max_retries === "number" ? prefs.verification_max_retries : 2;
|
||||
const completionKey = `${currentUnit.type}/${currentUnit.id}`;
|
||||
|
||||
if (result.checks.length > 0) {
|
||||
const passCount = result.checks.filter(c => c.exitCode === 0).length;
|
||||
const total = result.checks.length;
|
||||
if (result.passed) {
|
||||
ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`);
|
||||
} else {
|
||||
const failures = result.checks.filter(c => c.exitCode !== 0);
|
||||
const failNames = failures.map(f => f.command).join(", ");
|
||||
ctx.ui.notify(`Verification gate: FAILED — ${failNames}`);
|
||||
process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`);
|
||||
for (const f of failures) {
|
||||
process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
|
||||
if (f.stderr) process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Log blocking runtime errors to stderr
|
||||
if (result.runtimeErrors?.some(e => e.blocking)) {
|
||||
const blockingErrors = result.runtimeErrors.filter(e => e.blocking);
|
||||
process.stderr.write(`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`);
|
||||
for (const err of blockingErrors) {
|
||||
process.stderr.write(` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// Write verification evidence JSON artifact
|
||||
const attempt = verificationRetryCount.get(currentUnit.id) ?? 0;
|
||||
if (parts.length >= 3) {
|
||||
try {
|
||||
const [mid, sid, tid] = parts;
|
||||
const sDir = resolveSlicePath(basePath, mid, sid);
|
||||
if (sDir) {
|
||||
const tasksDir = join(sDir, "tasks");
|
||||
if (result.passed) {
|
||||
writeVerificationJSON(result, tasksDir, tid, currentUnit.id);
|
||||
} else {
|
||||
const nextAttempt = attempt + 1;
|
||||
writeVerificationJSON(result, tasksDir, tid, currentUnit.id, nextAttempt, maxRetries);
|
||||
}
|
||||
}
|
||||
} catch (evidenceErr) {
|
||||
process.stderr.write(`verification-evidence: write error — ${(evidenceErr as Error).message}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Auto-fix retry logic ──
|
||||
if (result.passed) {
|
||||
// Gate passed — clear retry state and continue normal flow
|
||||
verificationRetryCount.delete(currentUnit.id);
|
||||
pendingVerificationRetry = null;
|
||||
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
|
||||
// Gate failed, retries remaining — set up retry and return early
|
||||
const nextAttempt = attempt + 1;
|
||||
verificationRetryCount.set(currentUnit.id, nextAttempt);
|
||||
pendingVerificationRetry = {
|
||||
unitId: currentUnit.id,
|
||||
failureContext: formatFailureContext(result),
|
||||
attempt: nextAttempt,
|
||||
};
|
||||
ctx.ui.notify(`Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`, "warning");
|
||||
// Remove completion key so dispatchNextUnit re-dispatches this unit
|
||||
completedKeySet.delete(completionKey);
|
||||
removePersistedKey(basePath, completionKey);
|
||||
return; // ← Critical: exit before DB dual-write and post-unit hooks
|
||||
} else {
|
||||
// Gate failed, retries exhausted (or auto-fix disabled) — pause for human review
|
||||
const exhaustedAttempt = attempt + 1;
|
||||
verificationRetryCount.delete(currentUnit.id);
|
||||
pendingVerificationRetry = null;
|
||||
ctx.ui.notify(
|
||||
`Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
|
||||
"error",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return;
|
||||
}
|
||||
} catch (err) {
|
||||
// Gate errors are non-fatal — log and continue
|
||||
process.stderr.write(`verification-gate: error — ${(err as Error).message}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── DB dual-write: re-import changed markdown files so next unit's prompts use fresh data ──
|
||||
if (isDbAvailable()) {
|
||||
try {
|
||||
|
|
@ -2975,6 +3125,17 @@ async function dispatchNextUnit(
|
|||
// Cap injected content to prevent unbounded prompt growth → OOM
|
||||
const MAX_RECOVERY_CHARS = 50_000;
|
||||
let finalPrompt = prompt;
|
||||
|
||||
// Verification retry — inject failure context so the agent can auto-fix
|
||||
if (pendingVerificationRetry) {
|
||||
const retryCtx = pendingVerificationRetry;
|
||||
pendingVerificationRetry = null;
|
||||
const capped = retryCtx.failureContext.length > MAX_RECOVERY_CHARS
|
||||
? retryCtx.failureContext.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...failure context truncated]"
|
||||
: retryCtx.failureContext;
|
||||
finalPrompt = `**VERIFICATION FAILED — AUTO-FIX ATTEMPT ${retryCtx.attempt}**\n\nThe verification gate ran after your previous attempt and found failures. Fix these issues before completing the task.\n\n${capped}\n\n---\n\n${finalPrompt}`;
|
||||
}
|
||||
|
||||
if (pendingCrashRecovery) {
|
||||
const capped = pendingCrashRecovery.length > MAX_RECOVERY_CHARS
|
||||
? pendingCrashRecovery.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...recovery briefing truncated to prevent memory exhaustion]"
|
||||
|
|
|
|||
|
|
@ -298,6 +298,27 @@ export function validateTaskSummaryContent(file: string, content: string): Valid
|
|||
});
|
||||
}
|
||||
|
||||
const evidence = getSection(content, "Verification Evidence", 2);
|
||||
if (!evidence) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
scope: "task-summary",
|
||||
file,
|
||||
ruleId: "evidence_block_missing",
|
||||
message: "Task summary is missing `## Verification Evidence`.",
|
||||
suggestion: "Add a verification evidence table showing gate check results (command, exit code, verdict, duration).",
|
||||
});
|
||||
} else if (sectionLooksPlaceholderOnly(evidence)) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
scope: "task-summary",
|
||||
file,
|
||||
ruleId: "evidence_block_placeholder",
|
||||
message: "Task summary verification evidence section still looks like placeholder text.",
|
||||
suggestion: "Replace placeholders with actual gate results or note that no verification commands were discovered.",
|
||||
});
|
||||
}
|
||||
|
||||
return issues;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -76,6 +76,9 @@ const KNOWN_PREFERENCE_KEYS = new Set<string>([
|
|||
"phases",
|
||||
"auto_visualize",
|
||||
"parallel",
|
||||
"verification_commands",
|
||||
"verification_auto_fix",
|
||||
"verification_max_retries",
|
||||
]);
|
||||
|
||||
export interface GSDSkillRule {
|
||||
|
|
@ -173,6 +176,9 @@ export interface GSDPreferences {
|
|||
phases?: PhaseSkipPreferences;
|
||||
auto_visualize?: boolean;
|
||||
parallel?: import("./types.js").ParallelConfig;
|
||||
verification_commands?: string[];
|
||||
verification_auto_fix?: boolean;
|
||||
verification_max_retries?: number;
|
||||
}
|
||||
|
||||
export interface LoadedGSDPreferences {
|
||||
|
|
@ -773,6 +779,9 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
|
|||
parallel: (base.parallel || override.parallel)
|
||||
? { ...(base.parallel ?? {}), ...(override.parallel ?? {}) } as import("./types.js").ParallelConfig
|
||||
: undefined,
|
||||
verification_commands: mergeStringLists(base.verification_commands, override.verification_commands),
|
||||
verification_auto_fix: override.verification_auto_fix ?? base.verification_auto_fix,
|
||||
verification_max_retries: override.verification_max_retries ?? base.verification_max_retries,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -1205,6 +1214,39 @@ export function validatePreferences(preferences: GSDPreferences): {
|
|||
}
|
||||
}
|
||||
|
||||
// ─── Verification Preferences ───────────────────────────────────────────
|
||||
if (preferences.verification_commands !== undefined) {
|
||||
if (Array.isArray(preferences.verification_commands)) {
|
||||
const allStrings = preferences.verification_commands.every(
|
||||
(item: unknown) => typeof item === "string",
|
||||
);
|
||||
if (allStrings) {
|
||||
validated.verification_commands = preferences.verification_commands;
|
||||
} else {
|
||||
errors.push("verification_commands must be an array of strings");
|
||||
}
|
||||
} else {
|
||||
errors.push("verification_commands must be an array of strings");
|
||||
}
|
||||
}
|
||||
|
||||
if (preferences.verification_auto_fix !== undefined) {
|
||||
if (typeof preferences.verification_auto_fix === "boolean") {
|
||||
validated.verification_auto_fix = preferences.verification_auto_fix;
|
||||
} else {
|
||||
errors.push("verification_auto_fix must be a boolean");
|
||||
}
|
||||
}
|
||||
|
||||
if (preferences.verification_max_retries !== undefined) {
|
||||
const raw = preferences.verification_max_retries;
|
||||
if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
|
||||
validated.verification_max_retries = Math.floor(raw);
|
||||
} else {
|
||||
errors.push("verification_max_retries must be a non-negative number");
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Git Preferences ───────────────────────────────────────────────────
|
||||
if (preferences.git && typeof preferences.git === "object") {
|
||||
const git: Record<string, unknown> = {};
|
||||
|
|
|
|||
|
|
@ -38,15 +38,16 @@ Then:
|
|||
- Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
|
||||
6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
|
||||
7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
|
||||
8. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
|
||||
8. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
|
||||
9. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
|
||||
- exercise the real flow in the browser
|
||||
- prefer `browser_batch` when the next few actions are obvious and sequential
|
||||
- prefer `browser_assert` for explicit pass/fail verification of the intended outcome
|
||||
- use `browser_diff` when an action's effect is ambiguous
|
||||
- use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI
|
||||
- record verification in terms of explicit checks passed/failed, not only prose interpretation
|
||||
9. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
|
||||
10. **If execution is running long or verification fails:**
|
||||
10. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
|
||||
11. **If execution is running long or verification fails:**
|
||||
|
||||
**Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step.
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,15 @@ blocker_discovered: false
|
|||
|
||||
{{whatWasVerifiedAndHow — commands run, tests passed, behavior confirmed}}
|
||||
|
||||
## Verification Evidence
|
||||
|
||||
<!-- Populated from verification gate output. If the gate ran, fill in the table below.
|
||||
If no gate ran (e.g., no verification commands discovered), note that. -->
|
||||
|
||||
| # | Command | Exit Code | Verdict | Duration |
|
||||
|---|---------|-----------|---------|----------|
|
||||
| {{row}} | {{command}} | {{exitCode}} | {{verdict}} | {{duration}} |
|
||||
|
||||
## Diagnostics
|
||||
|
||||
{{howToInspectWhatThisTaskBuiltLater — status surfaces, logs, error shapes, failure artifacts, or none}}
|
||||
|
|
|
|||
743
src/resources/extensions/gsd/tests/verification-evidence.test.ts
Normal file
743
src/resources/extensions/gsd/tests/verification-evidence.test.ts
Normal file
|
|
@ -0,0 +1,743 @@
|
|||
/**
|
||||
* Unit tests for the verification evidence module — JSON persistence and markdown table formatting.
|
||||
*
|
||||
* Tests cover:
|
||||
* 1. writeVerificationJSON writes correct JSON shape (schemaVersion, taskId, timestamp, passed, discoverySource, checks)
|
||||
* 2. writeVerificationJSON creates directory if it doesn't exist
|
||||
* 3. writeVerificationJSON maps exitCode to verdict correctly (0 = pass, non-zero = fail)
|
||||
* 4. writeVerificationJSON excludes stdout/stderr from output
|
||||
* 5. writeVerificationJSON handles empty checks array
|
||||
* 6. writeVerificationJSON accepts optional unitId
|
||||
* 7. formatEvidenceTable returns markdown table with correct columns for checks
|
||||
* 8. formatEvidenceTable returns "no checks" message for empty checks
|
||||
* 9. formatEvidenceTable formats duration as seconds with 1 decimal
|
||||
* 10. formatEvidenceTable uses ✅/❌ emoji for pass/fail verdict
|
||||
*/
|
||||
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, readFileSync, rmSync, existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import {
|
||||
writeVerificationJSON,
|
||||
formatEvidenceTable,
|
||||
} from "../verification-evidence.ts";
|
||||
import type { VerificationResult } from "../types.ts";
|
||||
|
||||
function makeTempDir(prefix: string): string {
|
||||
const dir = join(
|
||||
tmpdir(),
|
||||
`${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
function makeResult(overrides?: Partial<VerificationResult>): VerificationResult {
|
||||
return {
|
||||
passed: true,
|
||||
checks: [],
|
||||
discoverySource: "package-json",
|
||||
timestamp: 1710000000000,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── writeVerificationJSON Tests ─────────────────────────────────────────────
|
||||
|
||||
test("verification-evidence: writeVerificationJSON writes correct JSON shape", () => {
|
||||
const tmp = makeTempDir("ve-shape");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{
|
||||
command: "npm run typecheck",
|
||||
exitCode: 0,
|
||||
stdout: "all good",
|
||||
stderr: "",
|
||||
durationMs: 2340,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T03");
|
||||
|
||||
const filePath = join(tmp, "T03-VERIFY.json");
|
||||
assert.ok(existsSync(filePath), "JSON file should exist");
|
||||
|
||||
const json = JSON.parse(readFileSync(filePath, "utf-8"));
|
||||
assert.equal(json.schemaVersion, 1);
|
||||
assert.equal(json.taskId, "T03");
|
||||
assert.equal(json.unitId, "T03"); // defaults to taskId when unitId not provided
|
||||
assert.equal(json.timestamp, 1710000000000);
|
||||
assert.equal(json.passed, true);
|
||||
assert.equal(json.discoverySource, "package-json");
|
||||
assert.equal(json.checks.length, 1);
|
||||
assert.equal(json.checks[0].command, "npm run typecheck");
|
||||
assert.equal(json.checks[0].exitCode, 0);
|
||||
assert.equal(json.checks[0].durationMs, 2340);
|
||||
assert.equal(json.checks[0].verdict, "pass");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON creates directory if it doesn't exist", () => {
|
||||
const tmp = makeTempDir("ve-mkdir");
|
||||
const nested = join(tmp, "deep", "nested", "tasks");
|
||||
try {
|
||||
assert.ok(!existsSync(nested), "directory should not exist yet");
|
||||
|
||||
writeVerificationJSON(makeResult(), nested, "T01");
|
||||
|
||||
assert.ok(existsSync(nested), "directory should be created");
|
||||
assert.ok(existsSync(join(nested, "T01-VERIFY.json")), "JSON file should exist");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON maps exitCode to verdict correctly", () => {
|
||||
const tmp = makeTempDir("ve-verdict");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "lint", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
|
||||
{ command: "test", exitCode: 1, stdout: "", stderr: "fail", durationMs: 200 },
|
||||
{ command: "audit", exitCode: 2, stdout: "", stderr: "err", durationMs: 300 },
|
||||
],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T02");
|
||||
|
||||
const json = JSON.parse(readFileSync(join(tmp, "T02-VERIFY.json"), "utf-8"));
|
||||
assert.equal(json.checks[0].verdict, "pass");
|
||||
assert.equal(json.checks[1].verdict, "fail");
|
||||
assert.equal(json.checks[2].verdict, "fail");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON excludes stdout/stderr from output", () => {
|
||||
const tmp = makeTempDir("ve-no-stdio");
|
||||
try {
|
||||
const result = makeResult({
|
||||
checks: [
|
||||
{
|
||||
command: "echo hello",
|
||||
exitCode: 0,
|
||||
stdout: "hello\n",
|
||||
stderr: "some warning",
|
||||
durationMs: 50,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T01");
|
||||
|
||||
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
|
||||
assert.ok(!raw.includes('"stdout"'), "JSON should not contain stdout key");
|
||||
assert.ok(!raw.includes('"stderr"'), "JSON should not contain stderr key");
|
||||
assert.ok(!raw.includes("hello\\n"), "JSON should not contain stdout value");
|
||||
assert.ok(!raw.includes("some warning"), "JSON should not contain stderr value");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON handles empty checks array", () => {
|
||||
const tmp = makeTempDir("ve-empty");
|
||||
try {
|
||||
writeVerificationJSON(makeResult({ checks: [] }), tmp, "T01");
|
||||
|
||||
const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"));
|
||||
assert.equal(json.schemaVersion, 1);
|
||||
assert.equal(json.passed, true);
|
||||
assert.deepStrictEqual(json.checks, []);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON uses optional unitId when provided", () => {
|
||||
const tmp = makeTempDir("ve-unitid");
|
||||
try {
|
||||
writeVerificationJSON(makeResult(), tmp, "T03", "M001/S01/T03");
|
||||
|
||||
const json = JSON.parse(readFileSync(join(tmp, "T03-VERIFY.json"), "utf-8"));
|
||||
assert.equal(json.taskId, "T03");
|
||||
assert.equal(json.unitId, "M001/S01/T03");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── formatEvidenceTable Tests ───────────────────────────────────────────────
|
||||
|
||||
test("verification-evidence: formatEvidenceTable returns markdown table with correct columns", () => {
|
||||
const result = makeResult({
|
||||
checks: [
|
||||
{ command: "npm run typecheck", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 },
|
||||
{ command: "npm run lint", exitCode: 1, stdout: "", stderr: "err", durationMs: 1100 },
|
||||
],
|
||||
});
|
||||
|
||||
const table = formatEvidenceTable(result);
|
||||
const lines = table.split("\n");
|
||||
|
||||
// Header row
|
||||
assert.ok(lines[0].includes("# |"), "header should have # column");
|
||||
assert.ok(lines[0].includes("Command"), "header should have Command column");
|
||||
assert.ok(lines[0].includes("Exit Code"), "header should have Exit Code column");
|
||||
assert.ok(lines[0].includes("Verdict"), "header should have Verdict column");
|
||||
assert.ok(lines[0].includes("Duration"), "header should have Duration column");
|
||||
|
||||
// Separator row
|
||||
assert.ok(lines[1].includes("---|"), "should have separator row");
|
||||
|
||||
// Data rows
|
||||
assert.equal(lines.length, 4, "header + separator + 2 data rows");
|
||||
assert.ok(lines[2].includes("npm run typecheck"), "first row command");
|
||||
assert.ok(lines[3].includes("npm run lint"), "second row command");
|
||||
});
|
||||
|
||||
test("verification-evidence: formatEvidenceTable returns no-checks message for empty checks", () => {
|
||||
const result = makeResult({ checks: [] });
|
||||
const output = formatEvidenceTable(result);
|
||||
assert.equal(output, "_No verification checks discovered._");
|
||||
});
|
||||
|
||||
test("verification-evidence: formatEvidenceTable formats duration as seconds with 1 decimal", () => {
|
||||
const result = makeResult({
|
||||
checks: [
|
||||
{ command: "fast", exitCode: 0, stdout: "", stderr: "", durationMs: 150 },
|
||||
{ command: "slow", exitCode: 0, stdout: "", stderr: "", durationMs: 2340 },
|
||||
{ command: "zero", exitCode: 0, stdout: "", stderr: "", durationMs: 0 },
|
||||
],
|
||||
});
|
||||
|
||||
const table = formatEvidenceTable(result);
|
||||
assert.ok(table.includes("0.1s"), "150ms → 0.1s");
|
||||
assert.ok(table.includes("2.3s"), "2340ms → 2.3s");
|
||||
assert.ok(table.includes("0.0s"), "0ms → 0.0s");
|
||||
});
|
||||
|
||||
test("verification-evidence: formatEvidenceTable uses ✅/❌ emoji for pass/fail verdict", () => {
|
||||
const result = makeResult({
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "pass-cmd", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
|
||||
{ command: "fail-cmd", exitCode: 1, stdout: "", stderr: "", durationMs: 200 },
|
||||
],
|
||||
});
|
||||
|
||||
const table = formatEvidenceTable(result);
|
||||
assert.ok(table.includes("✅ pass"), "passing check should have ✅ pass");
|
||||
assert.ok(table.includes("❌ fail"), "failing check should have ❌ fail");
|
||||
});
|
||||
|
||||
// ─── Validator Rule Tests (T03) ──────────────────────────────────────────────
|
||||
|
||||
import { validateTaskSummaryContent } from "../observability-validator.ts";
|
||||
|
||||
const MINIMAL_SUMMARY_WITH_EVIDENCE = `---
|
||||
observability_surfaces:
|
||||
- gate-output
|
||||
---
|
||||
# T03 Summary
|
||||
|
||||
## Diagnostics
|
||||
Run \`npm test\` to verify.
|
||||
|
||||
## Verification Evidence
|
||||
| # | Command | Exit Code | Verdict | Duration |
|
||||
|---|---------|-----------|---------|----------|
|
||||
| 1 | npm run typecheck | 0 | ✅ pass | 2.3s |
|
||||
`;
|
||||
|
||||
const MINIMAL_SUMMARY_NO_EVIDENCE = `---
|
||||
observability_surfaces:
|
||||
- gate-output
|
||||
---
|
||||
# T03 Summary
|
||||
|
||||
## Diagnostics
|
||||
Run \`npm test\` to verify.
|
||||
`;
|
||||
|
||||
const MINIMAL_SUMMARY_PLACEHOLDER_EVIDENCE = `---
|
||||
observability_surfaces:
|
||||
- gate-output
|
||||
---
|
||||
# T03 Summary
|
||||
|
||||
## Diagnostics
|
||||
Run \`npm test\` to verify.
|
||||
|
||||
## Verification Evidence
|
||||
{{evidence_table}}
|
||||
`;
|
||||
|
||||
const MINIMAL_SUMMARY_NO_CHECKS_EVIDENCE = `---
|
||||
observability_surfaces:
|
||||
- gate-output
|
||||
---
|
||||
# T03 Summary
|
||||
|
||||
## Diagnostics
|
||||
Run \`npm test\` to verify.
|
||||
|
||||
## Verification Evidence
|
||||
_No verification checks discovered._
|
||||
`;
|
||||
|
||||
test("verification-evidence: validator accepts summary with real evidence table", () => {
|
||||
const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_WITH_EVIDENCE);
|
||||
const evidenceIssues = issues.filter(
|
||||
(i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
|
||||
);
|
||||
assert.equal(evidenceIssues.length, 0, "no evidence warnings for real table");
|
||||
});
|
||||
|
||||
test("verification-evidence: validator warns when evidence section is missing", () => {
|
||||
const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_NO_EVIDENCE);
|
||||
const match = issues.find((i) => i.ruleId === "evidence_block_missing");
|
||||
assert.ok(match, "should produce evidence_block_missing warning");
|
||||
assert.equal(match!.severity, "warning");
|
||||
assert.equal(match!.scope, "task-summary");
|
||||
});
|
||||
|
||||
test("verification-evidence: validator warns when evidence section has only placeholder text", () => {
|
||||
const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_PLACEHOLDER_EVIDENCE);
|
||||
const match = issues.find((i) => i.ruleId === "evidence_block_placeholder");
|
||||
assert.ok(match, "should produce evidence_block_placeholder warning");
|
||||
assert.equal(match!.severity, "warning");
|
||||
});
|
||||
|
||||
test("verification-evidence: validator accepts 'no checks discovered' as valid content", () => {
|
||||
const issues = validateTaskSummaryContent("T03-SUMMARY.md", MINIMAL_SUMMARY_NO_CHECKS_EVIDENCE);
|
||||
const evidenceIssues = issues.filter(
|
||||
(i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
|
||||
);
|
||||
assert.equal(evidenceIssues.length, 0, "no evidence warnings for 'no checks discovered'");
|
||||
});
|
||||
|
||||
// ─── Integration Test: Full Chain (T03) ──────────────────────────────────────
|
||||
|
||||
test("verification-evidence: integration — VerificationResult → JSON → table → validator accepts", () => {
|
||||
const tmp = makeTempDir("ve-integration");
|
||||
try {
|
||||
// 1. Create a VerificationResult with 2 checks (1 pass, 1 fail)
|
||||
const result = makeResult({
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 },
|
||||
{ command: "npm run test:unit", exitCode: 1, stdout: "", stderr: "1 failed", durationMs: 3200 },
|
||||
],
|
||||
discoverySource: "package-json",
|
||||
});
|
||||
|
||||
// 2. Write JSON to temp dir and read it back
|
||||
writeVerificationJSON(result, tmp, "T03");
|
||||
const jsonPath = join(tmp, "T03-VERIFY.json");
|
||||
assert.ok(existsSync(jsonPath), "JSON file should exist");
|
||||
|
||||
const json = JSON.parse(readFileSync(jsonPath, "utf-8"));
|
||||
assert.equal(json.schemaVersion, 1, "schemaVersion should be 1");
|
||||
assert.equal(json.passed, false, "passed should be false");
|
||||
assert.equal(json.checks.length, 2, "should have 2 checks");
|
||||
assert.equal(json.checks[0].verdict, "pass", "first check should pass");
|
||||
assert.equal(json.checks[1].verdict, "fail", "second check should fail");
|
||||
|
||||
// 3. Generate evidence table and embed in a mock summary
|
||||
const table = formatEvidenceTable(result);
|
||||
assert.ok(table.includes("npm run typecheck"), "table should contain first command");
|
||||
assert.ok(table.includes("npm run test:unit"), "table should contain second command");
|
||||
|
||||
const fullSummary = `---
|
||||
observability_surfaces:
|
||||
- gate-output
|
||||
---
|
||||
# T03 Summary
|
||||
|
||||
## Diagnostics
|
||||
Run \`npm test\` to verify.
|
||||
|
||||
## Verification Evidence
|
||||
${table}
|
||||
`;
|
||||
|
||||
// 4. Validate — no evidence warnings
|
||||
const issues = validateTaskSummaryContent("T03-SUMMARY.md", fullSummary);
|
||||
const evidenceIssues = issues.filter(
|
||||
(i) => i.ruleId === "evidence_block_missing" || i.ruleId === "evidence_block_placeholder",
|
||||
);
|
||||
assert.equal(evidenceIssues.length, 0, "validator should accept real evidence from formatEvidenceTable");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Retry Evidence Field Tests (S03/T01) ─────────────────────────────────────
|
||||
|
||||
test("verification-evidence: writeVerificationJSON with retryAttempt and maxRetries includes them in output", () => {
|
||||
const tmp = makeTempDir("ve-retry-fields");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "npm run lint", exitCode: 1, stdout: "", stderr: "error", durationMs: 300 },
|
||||
],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T01", "M001/S03/T01", 1, 2);
|
||||
|
||||
const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"));
|
||||
assert.equal(json.retryAttempt, 1, "retryAttempt should be 1");
|
||||
assert.equal(json.maxRetries, 2, "maxRetries should be 2");
|
||||
// Other fields should still be correct
|
||||
assert.equal(json.schemaVersion, 1);
|
||||
assert.equal(json.taskId, "T01");
|
||||
assert.equal(json.unitId, "M001/S03/T01");
|
||||
assert.equal(json.passed, false);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON without retry params omits retryAttempt/maxRetries keys", () => {
|
||||
const tmp = makeTempDir("ve-no-retry");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
|
||||
],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T02");
|
||||
|
||||
const raw = readFileSync(join(tmp, "T02-VERIFY.json"), "utf-8");
|
||||
const json = JSON.parse(raw);
|
||||
assert.ok(!("retryAttempt" in json), "retryAttempt key should not be present");
|
||||
assert.ok(!("maxRetries" in json), "maxRetries key should not be present");
|
||||
// Confirm the JSON string does not contain these keys at all
|
||||
assert.ok(!raw.includes('"retryAttempt"'), "raw JSON should not contain retryAttempt");
|
||||
assert.ok(!raw.includes('"maxRetries"'), "raw JSON should not contain maxRetries");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Runtime Error Evidence Tests (S04/T02) ──────────────────────────────────
|
||||
|
||||
test("verification-evidence: writeVerificationJSON includes runtimeErrors when present", () => {
|
||||
const tmp = makeTempDir("ve-rt-present");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
|
||||
],
|
||||
runtimeErrors: [
|
||||
{ source: "bg-shell", severity: "crash", message: "Server crashed", blocking: true },
|
||||
{ source: "browser", severity: "error", message: "Uncaught TypeError", blocking: false },
|
||||
],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T01");
|
||||
|
||||
const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"));
|
||||
assert.ok(Array.isArray(json.runtimeErrors), "runtimeErrors should be an array");
|
||||
assert.equal(json.runtimeErrors.length, 2, "should have 2 runtime errors");
|
||||
assert.equal(json.runtimeErrors[0].source, "bg-shell");
|
||||
assert.equal(json.runtimeErrors[0].severity, "crash");
|
||||
assert.equal(json.runtimeErrors[0].message, "Server crashed");
|
||||
assert.equal(json.runtimeErrors[0].blocking, true);
|
||||
assert.equal(json.runtimeErrors[1].source, "browser");
|
||||
assert.equal(json.runtimeErrors[1].severity, "error");
|
||||
assert.equal(json.runtimeErrors[1].message, "Uncaught TypeError");
|
||||
assert.equal(json.runtimeErrors[1].blocking, false);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON omits runtimeErrors when absent", () => {
|
||||
const tmp = makeTempDir("ve-rt-absent");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 },
|
||||
],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T01");
|
||||
|
||||
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
|
||||
assert.ok(!raw.includes('"runtimeErrors"'), "raw JSON should not contain runtimeErrors key");
|
||||
const json = JSON.parse(raw);
|
||||
assert.ok(!("runtimeErrors" in json), "runtimeErrors key should not be present in parsed JSON");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON omits runtimeErrors when empty array", () => {
|
||||
const tmp = makeTempDir("ve-rt-empty");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [],
|
||||
runtimeErrors: [],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T01");
|
||||
|
||||
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
|
||||
assert.ok(!raw.includes('"runtimeErrors"'), "raw JSON should not contain runtimeErrors key when empty array");
|
||||
const json = JSON.parse(raw);
|
||||
assert.ok(!("runtimeErrors" in json), "runtimeErrors key should not be present for empty array");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: formatEvidenceTable appends runtime errors section", () => {
|
||||
const result = makeResult({
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
|
||||
],
|
||||
runtimeErrors: [
|
||||
{ source: "bg-shell", severity: "crash", message: "Server crashed with SIGKILL", blocking: true },
|
||||
{ source: "browser", severity: "warning", message: "Deprecated API usage", blocking: false },
|
||||
],
|
||||
});
|
||||
|
||||
const table = formatEvidenceTable(result);
|
||||
|
||||
// Should contain runtime errors section
|
||||
assert.ok(table.includes("**Runtime Errors**"), "should have Runtime Errors heading");
|
||||
assert.ok(table.includes("| # | Source | Severity | Blocking | Message |"), "should have runtime errors column headers");
|
||||
assert.ok(table.includes("bg-shell"), "should contain bg-shell source");
|
||||
assert.ok(table.includes("crash"), "should contain crash severity");
|
||||
assert.ok(table.includes("🚫 yes"), "blocking error should show 🚫 yes");
|
||||
assert.ok(table.includes("ℹ️ no"), "non-blocking error should show ℹ️ no");
|
||||
assert.ok(table.includes("Server crashed with SIGKILL"), "should contain error message");
|
||||
assert.ok(table.includes("Deprecated API usage"), "should contain warning message");
|
||||
});
|
||||
|
||||
test("verification-evidence: formatEvidenceTable omits runtime errors section when none", () => {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 },
|
||||
],
|
||||
});
|
||||
|
||||
const table = formatEvidenceTable(result);
|
||||
|
||||
assert.ok(!table.includes("Runtime Errors"), "should not contain Runtime Errors heading");
|
||||
assert.ok(table.includes("npm run lint"), "should still contain the check table");
|
||||
});
|
||||
|
||||
test("verification-evidence: formatEvidenceTable truncates runtime error message to 100 chars", () => {
|
||||
const longMessage = "A".repeat(150);
|
||||
const result = makeResult({
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
|
||||
],
|
||||
runtimeErrors: [
|
||||
{ source: "bg-shell", severity: "error", message: longMessage, blocking: false },
|
||||
],
|
||||
});
|
||||
|
||||
const table = formatEvidenceTable(result);
|
||||
|
||||
// The table should contain the truncated message (100 chars), not the full 150
|
||||
assert.ok(table.includes("A".repeat(100)), "should contain 100 A's");
|
||||
assert.ok(!table.includes("A".repeat(101)), "should not contain 101 A's (truncated)");
|
||||
});
|
||||
|
||||
// ─── Audit Warning Evidence Tests (S05/T02) ──────────────────────────────────
|
||||
|
||||
const SAMPLE_AUDIT_WARNINGS = [
|
||||
{
|
||||
name: "lodash",
|
||||
severity: "critical" as const,
|
||||
title: "Prototype Pollution",
|
||||
url: "https://github.com/advisories/GHSA-1234",
|
||||
fixAvailable: true,
|
||||
},
|
||||
{
|
||||
name: "express",
|
||||
severity: "high" as const,
|
||||
title: "Open Redirect",
|
||||
url: "https://github.com/advisories/GHSA-5678",
|
||||
fixAvailable: false,
|
||||
},
|
||||
{
|
||||
name: "minimist",
|
||||
severity: "moderate" as const,
|
||||
title: "Prototype Pollution",
|
||||
url: "https://github.com/advisories/GHSA-9012",
|
||||
fixAvailable: true,
|
||||
},
|
||||
];
|
||||
|
||||
test("verification-evidence: writeVerificationJSON includes auditWarnings when present", () => {
|
||||
const tmp = makeTempDir("ve-audit-present");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
|
||||
],
|
||||
auditWarnings: SAMPLE_AUDIT_WARNINGS,
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T01");
|
||||
|
||||
const json = JSON.parse(readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8"));
|
||||
assert.ok(Array.isArray(json.auditWarnings), "auditWarnings should be an array");
|
||||
assert.equal(json.auditWarnings.length, 3, "should have 3 audit warnings");
|
||||
assert.equal(json.auditWarnings[0].name, "lodash");
|
||||
assert.equal(json.auditWarnings[0].severity, "critical");
|
||||
assert.equal(json.auditWarnings[0].title, "Prototype Pollution");
|
||||
assert.equal(json.auditWarnings[0].url, "https://github.com/advisories/GHSA-1234");
|
||||
assert.equal(json.auditWarnings[0].fixAvailable, true);
|
||||
assert.equal(json.auditWarnings[1].name, "express");
|
||||
assert.equal(json.auditWarnings[1].severity, "high");
|
||||
assert.equal(json.auditWarnings[1].fixAvailable, false);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON omits auditWarnings when absent", () => {
|
||||
const tmp = makeTempDir("ve-audit-absent");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 50 },
|
||||
],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T01");
|
||||
|
||||
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
|
||||
assert.ok(!raw.includes('"auditWarnings"'), "raw JSON should not contain auditWarnings key");
|
||||
const json = JSON.parse(raw);
|
||||
assert.ok(!("auditWarnings" in json), "auditWarnings key should not be present in parsed JSON");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: writeVerificationJSON omits auditWarnings when empty array", () => {
|
||||
const tmp = makeTempDir("ve-audit-empty");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [],
|
||||
auditWarnings: [],
|
||||
});
|
||||
|
||||
writeVerificationJSON(result, tmp, "T01");
|
||||
|
||||
const raw = readFileSync(join(tmp, "T01-VERIFY.json"), "utf-8");
|
||||
assert.ok(!raw.includes('"auditWarnings"'), "raw JSON should not contain auditWarnings key when empty array");
|
||||
const json = JSON.parse(raw);
|
||||
assert.ok(!("auditWarnings" in json), "auditWarnings key should not be present for empty array");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-evidence: formatEvidenceTable appends audit warnings section", () => {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run test", exitCode: 0, stdout: "", stderr: "", durationMs: 100 },
|
||||
],
|
||||
auditWarnings: SAMPLE_AUDIT_WARNINGS,
|
||||
});
|
||||
|
||||
const table = formatEvidenceTable(result);
|
||||
|
||||
assert.ok(table.includes("**Audit Warnings**"), "should have Audit Warnings heading");
|
||||
assert.ok(table.includes("| # | Package | Severity | Title | Fix Available |"), "should have audit warnings column headers");
|
||||
assert.ok(table.includes("lodash"), "should contain lodash package");
|
||||
assert.ok(table.includes("🔴 critical"), "should show critical emoji");
|
||||
assert.ok(table.includes("🟠 high"), "should show high emoji");
|
||||
assert.ok(table.includes("🟡 moderate"), "should show moderate emoji");
|
||||
assert.ok(table.includes("Prototype Pollution"), "should contain vulnerability title");
|
||||
assert.ok(table.includes("Open Redirect"), "should contain vulnerability title");
|
||||
assert.ok(table.includes("✅ yes"), "fixAvailable true should show ✅ yes");
|
||||
assert.ok(table.includes("❌ no"), "fixAvailable false should show ❌ no");
|
||||
});
|
||||
|
||||
test("verification-evidence: formatEvidenceTable omits audit warnings section when none", () => {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run lint", exitCode: 0, stdout: "", stderr: "", durationMs: 200 },
|
||||
],
|
||||
});
|
||||
|
||||
const table = formatEvidenceTable(result);
|
||||
|
||||
assert.ok(!table.includes("Audit Warnings"), "should not contain Audit Warnings heading");
|
||||
assert.ok(table.includes("npm run lint"), "should still contain the check table");
|
||||
});
|
||||
|
||||
test("verification-evidence: integration — VerificationResult with auditWarnings → JSON → table", () => {
|
||||
const tmp = makeTempDir("ve-audit-integration");
|
||||
try {
|
||||
const result = makeResult({
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 1500 },
|
||||
],
|
||||
auditWarnings: [
|
||||
{
|
||||
name: "got",
|
||||
severity: "moderate" as const,
|
||||
title: "Redirect bypass",
|
||||
url: "https://github.com/advisories/GHSA-abcd",
|
||||
fixAvailable: true,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// 1. Write JSON and verify
|
||||
writeVerificationJSON(result, tmp, "T05");
|
||||
const json = JSON.parse(readFileSync(join(tmp, "T05-VERIFY.json"), "utf-8"));
|
||||
assert.equal(json.auditWarnings.length, 1, "JSON should have 1 audit warning");
|
||||
assert.equal(json.auditWarnings[0].name, "got");
|
||||
assert.equal(json.auditWarnings[0].severity, "moderate");
|
||||
assert.equal(json.auditWarnings[0].fixAvailable, true);
|
||||
// passed should still be true — audit warnings are non-blocking
|
||||
assert.equal(json.passed, true, "passed should remain true despite audit warnings");
|
||||
|
||||
// 2. Format table and verify
|
||||
const table = formatEvidenceTable(result);
|
||||
assert.ok(table.includes("**Audit Warnings**"), "table should have Audit Warnings section");
|
||||
assert.ok(table.includes("got"), "table should contain package name");
|
||||
assert.ok(table.includes("🟡 moderate"), "table should show moderate severity with emoji");
|
||||
assert.ok(table.includes("Redirect bypass"), "table should contain vulnerability title");
|
||||
assert.ok(table.includes("✅ yes"), "table should show fix available");
|
||||
// Check table still has the main verification checks
|
||||
assert.ok(table.includes("npm run typecheck"), "table should still have main check");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
965
src/resources/extensions/gsd/tests/verification-gate.test.ts
Normal file
965
src/resources/extensions/gsd/tests/verification-gate.test.ts
Normal file
|
|
@ -0,0 +1,965 @@
|
|||
/**
|
||||
* Unit tests for the verification gate — command discovery and execution.
|
||||
*
|
||||
* Tests cover:
|
||||
* 1. Discovery from explicit preference commands
|
||||
* 2. Discovery from task plan verify field
|
||||
* 3. Discovery from package.json typecheck/lint/test scripts
|
||||
* 4. First-non-empty-wins precedence
|
||||
* 5. All commands pass → gate passes
|
||||
* 6. One command fails → gate fails with exit code + stderr
|
||||
* 7. Missing package.json → 0 checks → pass
|
||||
* 8. Empty scripts → 0 checks → pass
|
||||
* 9. Preference validation for verification keys
|
||||
* 10. spawnSync error (command not found) → failure with exit code 127
|
||||
* 11. Dependency audit — git diff detection, npm audit parsing, graceful failures
|
||||
*/
|
||||
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, writeFileSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "../verification-gate.ts";
|
||||
import type { CaptureRuntimeErrorsOptions, DependencyAuditOptions } from "../verification-gate.ts";
|
||||
import { validatePreferences } from "../preferences.ts";
|
||||
|
||||
function makeTempDir(prefix: string): string {
|
||||
const dir = join(
|
||||
tmpdir(),
|
||||
`${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
// ─── Discovery Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
test("verification-gate: discoverCommands from preference commands", () => {
|
||||
const tmp = makeTempDir("vg-pref");
|
||||
try {
|
||||
const result = discoverCommands({
|
||||
preferenceCommands: ["npm run lint", "npm run test"],
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
|
||||
assert.equal(result.source, "preference");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: discoverCommands from task plan verify field", () => {
|
||||
const tmp = makeTempDir("vg-taskplan");
|
||||
try {
|
||||
const result = discoverCommands({
|
||||
taskPlanVerify: "npm run lint && npm run test",
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
|
||||
assert.equal(result.source, "task-plan");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: discoverCommands from package.json scripts", () => {
|
||||
const tmp = makeTempDir("vg-pkg");
|
||||
try {
|
||||
writeFileSync(
|
||||
join(tmp, "package.json"),
|
||||
JSON.stringify({
|
||||
scripts: {
|
||||
typecheck: "tsc --noEmit",
|
||||
lint: "eslint .",
|
||||
test: "vitest",
|
||||
build: "tsc", // should NOT be included
|
||||
},
|
||||
}),
|
||||
);
|
||||
const result = discoverCommands({ cwd: tmp });
|
||||
assert.deepStrictEqual(result.commands, [
|
||||
"npm run typecheck",
|
||||
"npm run lint",
|
||||
"npm run test",
|
||||
]);
|
||||
assert.equal(result.source, "package-json");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: first-non-empty-wins — preference beats task plan and package.json", () => {
|
||||
const tmp = makeTempDir("vg-precedence");
|
||||
try {
|
||||
writeFileSync(
|
||||
join(tmp, "package.json"),
|
||||
JSON.stringify({ scripts: { lint: "eslint ." } }),
|
||||
);
|
||||
const result = discoverCommands({
|
||||
preferenceCommands: ["custom-check"],
|
||||
taskPlanVerify: "npm run lint",
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.deepStrictEqual(result.commands, ["custom-check"]);
|
||||
assert.equal(result.source, "preference");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: task plan verify beats package.json", () => {
|
||||
const tmp = makeTempDir("vg-tp-beats-pkg");
|
||||
try {
|
||||
writeFileSync(
|
||||
join(tmp, "package.json"),
|
||||
JSON.stringify({ scripts: { lint: "eslint ." } }),
|
||||
);
|
||||
const result = discoverCommands({
|
||||
taskPlanVerify: "custom-verify",
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.deepStrictEqual(result.commands, ["custom-verify"]);
|
||||
assert.equal(result.source, "task-plan");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: missing package.json → 0 checks, source none", () => {
|
||||
const tmp = makeTempDir("vg-no-pkg");
|
||||
try {
|
||||
const result = discoverCommands({ cwd: tmp });
|
||||
assert.deepStrictEqual(result.commands, []);
|
||||
assert.equal(result.source, "none");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: package.json with no matching scripts → 0 checks", () => {
|
||||
const tmp = makeTempDir("vg-no-scripts");
|
||||
try {
|
||||
writeFileSync(
|
||||
join(tmp, "package.json"),
|
||||
JSON.stringify({ scripts: { build: "tsc", start: "node index.js" } }),
|
||||
);
|
||||
const result = discoverCommands({ cwd: tmp });
|
||||
assert.deepStrictEqual(result.commands, []);
|
||||
assert.equal(result.source, "none");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: empty preference array falls through to task plan", () => {
|
||||
const tmp = makeTempDir("vg-empty-pref");
|
||||
try {
|
||||
const result = discoverCommands({
|
||||
preferenceCommands: [],
|
||||
taskPlanVerify: "echo ok",
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.deepStrictEqual(result.commands, ["echo ok"]);
|
||||
assert.equal(result.source, "task-plan");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Execution Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
test("verification-gate: all commands pass → gate passes", () => {
|
||||
const tmp = makeTempDir("vg-pass");
|
||||
try {
|
||||
const result = runVerificationGate({
|
||||
basePath: tmp,
|
||||
unitId: "T01",
|
||||
cwd: tmp,
|
||||
preferenceCommands: ["echo hello", "echo world"],
|
||||
});
|
||||
assert.equal(result.passed, true);
|
||||
assert.equal(result.checks.length, 2);
|
||||
assert.equal(result.discoverySource, "preference");
|
||||
assert.equal(result.checks[0].exitCode, 0);
|
||||
assert.equal(result.checks[1].exitCode, 0);
|
||||
assert.ok(result.checks[0].stdout.includes("hello"));
|
||||
assert.ok(result.checks[1].stdout.includes("world"));
|
||||
assert.equal(typeof result.timestamp, "number");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: one command fails → gate fails with exit code + stderr", () => {
|
||||
const tmp = makeTempDir("vg-fail");
|
||||
try {
|
||||
const result = runVerificationGate({
|
||||
basePath: tmp,
|
||||
unitId: "T01",
|
||||
cwd: tmp,
|
||||
preferenceCommands: ["echo ok", "sh -c 'echo err >&2; exit 1'"],
|
||||
});
|
||||
assert.equal(result.passed, false);
|
||||
assert.equal(result.checks.length, 2);
|
||||
assert.equal(result.checks[0].exitCode, 0);
|
||||
assert.equal(result.checks[1].exitCode, 1);
|
||||
assert.ok(result.checks[1].stderr.includes("err"));
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: no commands discovered → gate passes with 0 checks", () => {
|
||||
const tmp = makeTempDir("vg-empty");
|
||||
try {
|
||||
const result = runVerificationGate({
|
||||
basePath: tmp,
|
||||
unitId: "T01",
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.equal(result.passed, true);
|
||||
assert.equal(result.checks.length, 0);
|
||||
assert.equal(result.discoverySource, "none");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: command not found → exit code 127", () => {
|
||||
const tmp = makeTempDir("vg-notfound");
|
||||
try {
|
||||
const result = runVerificationGate({
|
||||
basePath: tmp,
|
||||
unitId: "T01",
|
||||
cwd: tmp,
|
||||
preferenceCommands: ["__nonexistent_command_xyz_42__"],
|
||||
});
|
||||
assert.equal(result.passed, false);
|
||||
assert.equal(result.checks.length, 1);
|
||||
assert.ok(result.checks[0].exitCode !== 0, "should have non-zero exit code");
|
||||
assert.ok(result.checks[0].durationMs >= 0);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: each check has durationMs", () => {
|
||||
const tmp = makeTempDir("vg-duration");
|
||||
try {
|
||||
const result = runVerificationGate({
|
||||
basePath: tmp,
|
||||
unitId: "T01",
|
||||
cwd: tmp,
|
||||
preferenceCommands: ["echo fast"],
|
||||
});
|
||||
assert.equal(result.checks.length, 1);
|
||||
assert.equal(typeof result.checks[0].durationMs, "number");
|
||||
assert.ok(result.checks[0].durationMs >= 0);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Preference Validation Tests ─────────────────────────────────────────────
|
||||
|
||||
test("verification-gate: validatePreferences accepts valid verification keys", () => {
|
||||
const result = validatePreferences({
|
||||
verification_commands: ["npm run lint", "npm run test"],
|
||||
verification_auto_fix: true,
|
||||
verification_max_retries: 3,
|
||||
});
|
||||
assert.deepStrictEqual(result.preferences.verification_commands, [
|
||||
"npm run lint",
|
||||
"npm run test",
|
||||
]);
|
||||
assert.equal(result.preferences.verification_auto_fix, true);
|
||||
assert.equal(result.preferences.verification_max_retries, 3);
|
||||
assert.equal(result.errors.length, 0);
|
||||
});
|
||||
|
||||
test("verification-gate: validatePreferences rejects non-array verification_commands", () => {
|
||||
const result = validatePreferences({
|
||||
verification_commands: "npm run lint" as unknown as string[],
|
||||
});
|
||||
assert.ok(result.errors.some((e) => e.includes("verification_commands")));
|
||||
assert.equal(result.preferences.verification_commands, undefined);
|
||||
});
|
||||
|
||||
test("verification-gate: validatePreferences rejects non-boolean verification_auto_fix", () => {
|
||||
const result = validatePreferences({
|
||||
verification_auto_fix: "yes" as unknown as boolean,
|
||||
});
|
||||
assert.ok(result.errors.some((e) => e.includes("verification_auto_fix")));
|
||||
assert.equal(result.preferences.verification_auto_fix, undefined);
|
||||
});
|
||||
|
||||
test("verification-gate: validatePreferences rejects negative verification_max_retries", () => {
|
||||
const result = validatePreferences({
|
||||
verification_max_retries: -1,
|
||||
});
|
||||
assert.ok(result.errors.some((e) => e.includes("verification_max_retries")));
|
||||
assert.equal(result.preferences.verification_max_retries, undefined);
|
||||
});
|
||||
|
||||
test("verification-gate: validatePreferences rejects non-string items in verification_commands", () => {
|
||||
const result = validatePreferences({
|
||||
verification_commands: ["npm run lint", 42 as unknown as string],
|
||||
});
|
||||
assert.ok(result.errors.some((e) => e.includes("verification_commands")));
|
||||
assert.equal(result.preferences.verification_commands, undefined);
|
||||
});
|
||||
|
||||
test("verification-gate: validatePreferences floors verification_max_retries", () => {
|
||||
const result = validatePreferences({
|
||||
verification_max_retries: 2.7,
|
||||
});
|
||||
assert.equal(result.preferences.verification_max_retries, 2);
|
||||
assert.equal(result.errors.length, 0);
|
||||
});
|
||||
|
||||
// ─── Additional Discovery Tests (T02) ───────────────────────────────────────
|
||||
|
||||
test("verification-gate: package.json with only test script → returns only npm run test", () => {
|
||||
const tmp = makeTempDir("vg-only-test");
|
||||
try {
|
||||
writeFileSync(
|
||||
join(tmp, "package.json"),
|
||||
JSON.stringify({
|
||||
scripts: {
|
||||
test: "vitest",
|
||||
build: "tsc",
|
||||
start: "node index.js",
|
||||
},
|
||||
}),
|
||||
);
|
||||
const result = discoverCommands({ cwd: tmp });
|
||||
assert.deepStrictEqual(result.commands, ["npm run test"]);
|
||||
assert.equal(result.source, "package-json");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: taskPlanVerify with single command (no &&)", () => {
|
||||
const tmp = makeTempDir("vg-tp-single");
|
||||
try {
|
||||
const result = discoverCommands({
|
||||
taskPlanVerify: "npm test",
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.deepStrictEqual(result.commands, ["npm test"]);
|
||||
assert.equal(result.source, "task-plan");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: whitespace-only preference commands fall through", () => {
|
||||
const tmp = makeTempDir("vg-ws-pref");
|
||||
try {
|
||||
writeFileSync(
|
||||
join(tmp, "package.json"),
|
||||
JSON.stringify({ scripts: { lint: "eslint ." } }),
|
||||
);
|
||||
const result = discoverCommands({
|
||||
preferenceCommands: [" ", ""],
|
||||
cwd: tmp,
|
||||
});
|
||||
// Whitespace-only strings are trimmed to empty and filtered out
|
||||
assert.equal(result.source, "package-json");
|
||||
assert.deepStrictEqual(result.commands, ["npm run lint"]);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Additional Execution Tests (T02) ───────────────────────────────────────
|
||||
|
||||
test("verification-gate: one command fails — remaining commands still run (non-short-circuit)", () => {
|
||||
const tmp = makeTempDir("vg-no-short-circuit");
|
||||
try {
|
||||
// First fails, second and third should still execute
|
||||
const result = runVerificationGate({
|
||||
basePath: tmp,
|
||||
unitId: "T02",
|
||||
cwd: tmp,
|
||||
preferenceCommands: [
|
||||
"sh -c 'exit 1'",
|
||||
"echo second",
|
||||
"echo third",
|
||||
],
|
||||
});
|
||||
assert.equal(result.passed, false);
|
||||
assert.equal(result.checks.length, 3, "all 3 commands should run");
|
||||
assert.equal(result.checks[0].exitCode, 1, "first command fails");
|
||||
assert.equal(result.checks[1].exitCode, 0, "second command runs and passes");
|
||||
assert.ok(result.checks[1].stdout.includes("second"));
|
||||
assert.equal(result.checks[2].exitCode, 0, "third command runs and passes");
|
||||
assert.ok(result.checks[2].stdout.includes("third"));
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: gate execution uses cwd for spawnSync", () => {
|
||||
const tmp = makeTempDir("vg-cwd");
|
||||
try {
|
||||
// pwd should report the temp dir
|
||||
const result = runVerificationGate({
|
||||
basePath: tmp,
|
||||
unitId: "T02",
|
||||
cwd: tmp,
|
||||
preferenceCommands: ["pwd"],
|
||||
});
|
||||
assert.equal(result.passed, true);
|
||||
assert.equal(result.checks.length, 1);
|
||||
// The stdout should contain the tmp dir path (resolving symlinks)
|
||||
assert.ok(result.checks[0].stdout.trim().length > 0, "pwd should produce output");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Additional Preference Validation Tests (T02) ──────────────────────────
|
||||
|
||||
test("verification-gate: verification_commands produces no unknown-key warnings", () => {
|
||||
const result = validatePreferences({
|
||||
verification_commands: ["npm test"],
|
||||
});
|
||||
const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown"));
|
||||
assert.equal(unknownWarnings.length, 0, "verification_commands is a known key");
|
||||
assert.equal(result.errors.length, 0);
|
||||
});
|
||||
|
||||
test("verification-gate: verification_auto_fix produces no unknown-key warnings", () => {
|
||||
const result = validatePreferences({
|
||||
verification_auto_fix: true,
|
||||
});
|
||||
const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown"));
|
||||
assert.equal(unknownWarnings.length, 0, "verification_auto_fix is a known key");
|
||||
assert.equal(result.errors.length, 0);
|
||||
});
|
||||
|
||||
test("verification-gate: verification_max_retries produces no unknown-key warnings", () => {
|
||||
const result = validatePreferences({
|
||||
verification_max_retries: 2,
|
||||
});
|
||||
const unknownWarnings = (result.warnings ?? []).filter(w => w.includes("unknown"));
|
||||
assert.equal(unknownWarnings.length, 0, "verification_max_retries is a known key");
|
||||
assert.equal(result.errors.length, 0);
|
||||
});
|
||||
|
||||
test("verification-gate: verification_max_retries -1 produces a validation error", () => {
|
||||
const result = validatePreferences({
|
||||
verification_max_retries: -1,
|
||||
});
|
||||
assert.ok(
|
||||
result.errors.some(e => e.includes("verification_max_retries")),
|
||||
"negative max_retries should error",
|
||||
);
|
||||
assert.equal(result.preferences.verification_max_retries, undefined);
|
||||
});
|
||||
|
||||
// ─── formatFailureContext Tests (S03/T01) ─────────────────────────────────────
|
||||
|
||||
test("formatFailureContext: formats a single failure with command, exit code, stderr", () => {
|
||||
const result: import("../types.ts").VerificationResult = {
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "npm run lint", exitCode: 1, stdout: "", stderr: "error: unused var", durationMs: 500 },
|
||||
],
|
||||
discoverySource: "preference",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
const output = formatFailureContext(result);
|
||||
assert.ok(output.startsWith("## Verification Failures"), "should start with header");
|
||||
assert.ok(output.includes("`npm run lint`"), "should include command name");
|
||||
assert.ok(output.includes("exit code 1"), "should include exit code");
|
||||
assert.ok(output.includes("error: unused var"), "should include stderr content");
|
||||
assert.ok(output.includes("```stderr"), "should have stderr code block");
|
||||
});
|
||||
|
||||
test("formatFailureContext: formats multiple failures", () => {
|
||||
const result: import("../types.ts").VerificationResult = {
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "npm run lint", exitCode: 1, stdout: "", stderr: "lint error", durationMs: 100 },
|
||||
{ command: "npm run test", exitCode: 2, stdout: "", stderr: "test failure", durationMs: 200 },
|
||||
{ command: "npm run typecheck", exitCode: 0, stdout: "ok", stderr: "", durationMs: 50 },
|
||||
],
|
||||
discoverySource: "preference",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
const output = formatFailureContext(result);
|
||||
assert.ok(output.includes("`npm run lint`"), "should include first failed command");
|
||||
assert.ok(output.includes("exit code 1"), "should include first exit code");
|
||||
assert.ok(output.includes("`npm run test`"), "should include second failed command");
|
||||
assert.ok(output.includes("exit code 2"), "should include second exit code");
|
||||
// Passing check should NOT appear
|
||||
assert.ok(!output.includes("npm run typecheck"), "should not include passing command");
|
||||
});
|
||||
|
||||
test("formatFailureContext: truncates stderr longer than 2000 chars", () => {
|
||||
const longStderr = "x".repeat(3000);
|
||||
const result: import("../types.ts").VerificationResult = {
|
||||
passed: false,
|
||||
checks: [
|
||||
{ command: "big-err", exitCode: 1, stdout: "", stderr: longStderr, durationMs: 100 },
|
||||
],
|
||||
discoverySource: "preference",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
const output = formatFailureContext(result);
|
||||
// The output should contain 2000 x's followed by truncation marker, not 3000
|
||||
assert.ok(!output.includes("x".repeat(2001)), "should not contain more than 2000 chars of stderr");
|
||||
assert.ok(output.includes("…[truncated]"), "should include truncation marker");
|
||||
});
|
||||
|
||||
test("formatFailureContext: returns empty string when all checks pass", () => {
|
||||
const result: import("../types.ts").VerificationResult = {
|
||||
passed: true,
|
||||
checks: [
|
||||
{ command: "npm run lint", exitCode: 0, stdout: "ok", stderr: "", durationMs: 100 },
|
||||
{ command: "npm run test", exitCode: 0, stdout: "ok", stderr: "", durationMs: 200 },
|
||||
],
|
||||
discoverySource: "preference",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
assert.equal(formatFailureContext(result), "");
|
||||
});
|
||||
|
||||
test("formatFailureContext: returns empty string for empty checks array", () => {
|
||||
const result: import("../types.ts").VerificationResult = {
|
||||
passed: true,
|
||||
checks: [],
|
||||
discoverySource: "none",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
assert.equal(formatFailureContext(result), "");
|
||||
});
|
||||
|
||||
test("formatFailureContext: caps total output at 10,000 chars", () => {
|
||||
// Generate many failures to exceed 10,000 chars total
|
||||
const checks: import("../types.ts").VerificationCheck[] = [];
|
||||
for (let i = 0; i < 20; i++) {
|
||||
checks.push({
|
||||
command: `failing-command-${i}`,
|
||||
exitCode: 1,
|
||||
stdout: "",
|
||||
stderr: "e".repeat(1000), // 1000 chars each, 20 * ~1050 (with formatting) > 10,000
|
||||
durationMs: 100,
|
||||
});
|
||||
}
|
||||
const result: import("../types.ts").VerificationResult = {
|
||||
passed: false,
|
||||
checks,
|
||||
discoverySource: "preference",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
const output = formatFailureContext(result);
|
||||
assert.ok(output.length <= 10_100, `total output should be capped near 10,000 chars, got ${output.length}`);
|
||||
assert.ok(output.includes("…[remaining failures truncated]"), "should include total truncation marker");
|
||||
});
|
||||
|
||||
// ─── captureRuntimeErrors Tests (S04/T01) ─────────────────────────────────────
|
||||
|
||||
function makeProc(overrides: Record<string, unknown>) {
|
||||
return {
|
||||
id: "p1",
|
||||
label: "test-server",
|
||||
status: "ready",
|
||||
alive: true,
|
||||
exitCode: null,
|
||||
signal: null,
|
||||
recentErrors: [] as string[],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeLogs(entries: Array<{ type: string; text: string }>) {
|
||||
return entries.map((e, i) => ({
|
||||
type: e.type,
|
||||
text: e.text,
|
||||
timestamp: Date.now() + i,
|
||||
url: "http://localhost:3000",
|
||||
}));
|
||||
}
|
||||
|
||||
test("captureRuntimeErrors: crashed bg-shell process → blocking crash error", async () => {
|
||||
const processes = new Map<string, unknown>([
|
||||
["p1", makeProc({ status: "crashed", alive: false, exitCode: 1 })],
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => processes,
|
||||
getConsoleLogs: () => [],
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].source, "bg-shell");
|
||||
assert.equal(result[0].severity, "crash");
|
||||
assert.equal(result[0].blocking, true);
|
||||
assert.ok(result[0].message.includes("test-server"));
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: bg-shell non-zero exit + not alive → blocking crash error", async () => {
|
||||
const processes = new Map<string, unknown>([
|
||||
["p1", makeProc({ status: "exited", alive: false, exitCode: 137 })],
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => processes,
|
||||
getConsoleLogs: () => [],
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].severity, "crash");
|
||||
assert.equal(result[0].blocking, true);
|
||||
assert.ok(result[0].message.includes("exitCode=137"));
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: bg-shell SIGABRT/SIGSEGV/SIGBUS → blocking crash error", async () => {
|
||||
for (const sig of ["SIGABRT", "SIGSEGV", "SIGBUS"]) {
|
||||
const processes = new Map<string, unknown>([
|
||||
["p1", makeProc({ signal: sig, alive: false, exitCode: null })],
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => processes,
|
||||
getConsoleLogs: () => [],
|
||||
});
|
||||
assert.equal(result.length, 1, `${sig} should produce 1 error`);
|
||||
assert.equal(result[0].severity, "crash");
|
||||
assert.equal(result[0].blocking, true);
|
||||
assert.ok(result[0].message.includes(sig), `message should contain ${sig}`);
|
||||
}
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: alive bg-shell process with recentErrors → non-blocking error", async () => {
|
||||
const processes = new Map<string, unknown>([
|
||||
["p1", makeProc({ alive: true, recentErrors: ["TypeError: foo", "RangeError: bar"] })],
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => processes,
|
||||
getConsoleLogs: () => [],
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].source, "bg-shell");
|
||||
assert.equal(result[0].severity, "error");
|
||||
assert.equal(result[0].blocking, false);
|
||||
assert.ok(result[0].message.includes("TypeError: foo"));
|
||||
assert.ok(result[0].message.includes("RangeError: bar"));
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: browser unhandled rejection → blocking crash error", async () => {
|
||||
const logs = makeLogs([
|
||||
{ type: "error", text: "Unhandled promise rejection: some error" },
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => new Map(),
|
||||
getConsoleLogs: () => logs,
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].source, "browser");
|
||||
assert.equal(result[0].severity, "crash");
|
||||
assert.equal(result[0].blocking, true);
|
||||
assert.ok(result[0].message.includes("Unhandled"));
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: browser UnhandledRejection (case variation) → blocking crash", async () => {
|
||||
const logs = makeLogs([
|
||||
{ type: "error", text: "UnhandledRejection in module X" },
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => new Map(),
|
||||
getConsoleLogs: () => logs,
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].severity, "crash");
|
||||
assert.equal(result[0].blocking, true);
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: browser console.error (general) → non-blocking error", async () => {
|
||||
const logs = makeLogs([
|
||||
{ type: "error", text: "Failed to load resource: net::ERR_FAILED" },
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => new Map(),
|
||||
getConsoleLogs: () => logs,
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].source, "browser");
|
||||
assert.equal(result[0].severity, "error");
|
||||
assert.equal(result[0].blocking, false);
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: browser deprecation warning → non-blocking warning", async () => {
|
||||
const logs = makeLogs([
|
||||
{ type: "warning", text: "Event.returnValue is deprecated. Use Event.preventDefault() instead." },
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => new Map(),
|
||||
getConsoleLogs: () => logs,
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].source, "browser");
|
||||
assert.equal(result[0].severity, "warning");
|
||||
assert.equal(result[0].blocking, false);
|
||||
assert.ok(result[0].message.includes("deprecated"));
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: non-deprecation warning is ignored", async () => {
|
||||
const logs = makeLogs([
|
||||
{ type: "warning", text: "Some general warning about performance" },
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => new Map(),
|
||||
getConsoleLogs: () => logs,
|
||||
});
|
||||
assert.equal(result.length, 0, "non-deprecation warnings should be ignored");
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: no processes, no browser logs → empty array", async () => {
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => new Map(),
|
||||
getConsoleLogs: () => [],
|
||||
});
|
||||
assert.deepStrictEqual(result, []);
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: dynamic import failure → graceful empty array", async () => {
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => { throw new Error("module not found"); },
|
||||
getConsoleLogs: () => { throw new Error("module not found"); },
|
||||
});
|
||||
assert.deepStrictEqual(result, []);
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: browser text truncated to 500 chars", async () => {
|
||||
const longText = "x".repeat(600);
|
||||
const logs = makeLogs([
|
||||
{ type: "error", text: longText },
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => new Map(),
|
||||
getConsoleLogs: () => logs,
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.ok(result[0].message.length <= 500 + 20, "message should be truncated near 500 chars");
|
||||
assert.ok(result[0].message.includes("…[truncated]"), "should include truncation marker");
|
||||
assert.ok(!result[0].message.includes("x".repeat(501)), "should not contain 501+ x's");
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: bg-shell recentErrors limited to 3 in message", async () => {
|
||||
const processes = new Map<string, unknown>([
|
||||
["p1", makeProc({
|
||||
status: "crashed",
|
||||
alive: false,
|
||||
exitCode: 1,
|
||||
recentErrors: ["err1", "err2", "err3", "err4", "err5"],
|
||||
})],
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => processes,
|
||||
getConsoleLogs: () => [],
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.ok(result[0].message.includes("err1"));
|
||||
assert.ok(result[0].message.includes("err2"));
|
||||
assert.ok(result[0].message.includes("err3"));
|
||||
assert.ok(!result[0].message.includes("err4"), "should only include first 3 errors");
|
||||
});
|
||||
|
||||
test("captureRuntimeErrors: mixed bg-shell and browser errors", async () => {
|
||||
const processes = new Map<string, unknown>([
|
||||
["p1", makeProc({ status: "crashed", alive: false, exitCode: 1 })],
|
||||
]);
|
||||
const logs = makeLogs([
|
||||
{ type: "error", text: "Unhandled rejection: boom" },
|
||||
{ type: "error", text: "general error" },
|
||||
{ type: "warning", text: "deprecated API used" },
|
||||
]);
|
||||
const result = await captureRuntimeErrors({
|
||||
getProcesses: () => processes,
|
||||
getConsoleLogs: () => logs,
|
||||
});
|
||||
// 1 bg-shell crash + 1 browser crash (unhandled) + 1 browser error + 1 browser warning
|
||||
assert.equal(result.length, 4);
|
||||
const blocking = result.filter(r => r.blocking);
|
||||
const nonBlocking = result.filter(r => !r.blocking);
|
||||
assert.equal(blocking.length, 2, "should have 2 blocking errors");
|
||||
assert.equal(nonBlocking.length, 2, "should have 2 non-blocking errors");
|
||||
});
|
||||
|
||||
// ─── Dependency Audit Tests (S05/T01) ─────────────────────────────────────────
|
||||
|
||||
/** Helper: build a realistic npm audit JSON stdout with vulnerabilities. */
|
||||
function makeAuditJson(
|
||||
vulns: Record<string, { severity: string; fixAvailable: boolean; via: unknown[] }>,
|
||||
): string {
|
||||
return JSON.stringify({ vulnerabilities: vulns });
|
||||
}
|
||||
|
||||
/** Sample npm audit JSON with a high-severity vuln. */
|
||||
const SAMPLE_AUDIT_JSON = makeAuditJson({
|
||||
"nth-check": {
|
||||
severity: "high",
|
||||
fixAvailable: true,
|
||||
via: [
|
||||
{
|
||||
title: "Inefficient Regular Expression Complexity in nth-check",
|
||||
url: "https://github.com/advisories/GHSA-rp65-9cf3-cjxr",
|
||||
severity: "high",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
test("dependency-audit: package.json in git diff → runs npm audit and parses vulnerabilities", () => {
|
||||
let npmAuditCalled = false;
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["package.json", "src/index.ts"],
|
||||
npmAudit: () => {
|
||||
npmAuditCalled = true;
|
||||
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
|
||||
},
|
||||
});
|
||||
assert.equal(npmAuditCalled, true, "npm audit should be called");
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].name, "nth-check");
|
||||
assert.equal(result[0].severity, "high");
|
||||
assert.equal(result[0].title, "Inefficient Regular Expression Complexity in nth-check");
|
||||
assert.equal(result[0].url, "https://github.com/advisories/GHSA-rp65-9cf3-cjxr");
|
||||
assert.equal(result[0].fixAvailable, true);
|
||||
});
|
||||
|
||||
test("dependency-audit: package-lock.json change triggers audit", () => {
|
||||
let npmAuditCalled = false;
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["package-lock.json"],
|
||||
npmAudit: () => {
|
||||
npmAuditCalled = true;
|
||||
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
|
||||
},
|
||||
});
|
||||
assert.equal(npmAuditCalled, true);
|
||||
assert.equal(result.length, 1);
|
||||
});
|
||||
|
||||
test("dependency-audit: pnpm-lock.yaml change triggers audit", () => {
|
||||
let npmAuditCalled = false;
|
||||
runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["pnpm-lock.yaml"],
|
||||
npmAudit: () => {
|
||||
npmAuditCalled = true;
|
||||
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
|
||||
},
|
||||
});
|
||||
assert.equal(npmAuditCalled, true);
|
||||
});
|
||||
|
||||
test("dependency-audit: yarn.lock change triggers audit", () => {
|
||||
let npmAuditCalled = false;
|
||||
runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["yarn.lock"],
|
||||
npmAudit: () => {
|
||||
npmAuditCalled = true;
|
||||
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
|
||||
},
|
||||
});
|
||||
assert.equal(npmAuditCalled, true);
|
||||
});
|
||||
|
||||
test("dependency-audit: bun.lockb change triggers audit", () => {
|
||||
let npmAuditCalled = false;
|
||||
runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["bun.lockb"],
|
||||
npmAudit: () => {
|
||||
npmAuditCalled = true;
|
||||
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
|
||||
},
|
||||
});
|
||||
assert.equal(npmAuditCalled, true);
|
||||
});
|
||||
|
||||
test("dependency-audit: no dependency file changes → returns empty array, npm audit not called", () => {
|
||||
let npmAuditCalled = false;
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["src/index.ts", "README.md"],
|
||||
npmAudit: () => {
|
||||
npmAuditCalled = true;
|
||||
return { stdout: "{}", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
assert.equal(npmAuditCalled, false, "npm audit should NOT be called when no dependency files changed");
|
||||
assert.deepStrictEqual(result, []);
|
||||
});
|
||||
|
||||
test("dependency-audit: git diff returns non-zero exit (not a git repo) → empty array", () => {
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => { throw new Error("not a git repo"); },
|
||||
npmAudit: () => { throw new Error("should not be called"); },
|
||||
});
|
||||
assert.deepStrictEqual(result, []);
|
||||
});
|
||||
|
||||
test("dependency-audit: npm audit returns invalid JSON → empty array", () => {
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["package.json"],
|
||||
npmAudit: () => ({ stdout: "not json at all", exitCode: 1 }),
|
||||
});
|
||||
assert.deepStrictEqual(result, []);
|
||||
});
|
||||
|
||||
test("dependency-audit: npm audit returns zero vulnerabilities → empty array", () => {
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["package.json"],
|
||||
npmAudit: () => ({
|
||||
stdout: JSON.stringify({ vulnerabilities: {} }),
|
||||
exitCode: 0,
|
||||
}),
|
||||
});
|
||||
assert.deepStrictEqual(result, []);
|
||||
});
|
||||
|
||||
test("dependency-audit: npm audit non-zero exit with valid JSON → parses correctly", () => {
|
||||
// npm audit exits non-zero when vulnerabilities exist — this is expected, not an error
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["package-lock.json"],
|
||||
npmAudit: () => ({
|
||||
stdout: SAMPLE_AUDIT_JSON,
|
||||
exitCode: 1, // non-zero!
|
||||
}),
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0].name, "nth-check");
|
||||
assert.equal(result[0].severity, "high");
|
||||
});
|
||||
|
||||
test("dependency-audit: via entries with string-only values are skipped", () => {
|
||||
const auditJson = makeAuditJson({
|
||||
"postcss": {
|
||||
severity: "moderate",
|
||||
fixAvailable: false,
|
||||
via: ["nth-check", "css-select"], // string-only via entries
|
||||
},
|
||||
});
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["package.json"],
|
||||
npmAudit: () => ({ stdout: auditJson, exitCode: 1 }),
|
||||
});
|
||||
assert.equal(result.length, 1);
|
||||
// When no object via entry is found, title falls back to the package name
|
||||
assert.equal(result[0].name, "postcss");
|
||||
assert.equal(result[0].title, "postcss");
|
||||
assert.equal(result[0].url, "");
|
||||
});
|
||||
|
||||
test("dependency-audit: subdirectory package.json does not trigger audit", () => {
|
||||
let npmAuditCalled = false;
|
||||
const result = runDependencyAudit("/tmp/test", {
|
||||
gitDiff: () => ["packages/foo/package.json", "libs/bar/package-lock.json"],
|
||||
npmAudit: () => {
|
||||
npmAuditCalled = true;
|
||||
return { stdout: SAMPLE_AUDIT_JSON, exitCode: 0 };
|
||||
},
|
||||
});
|
||||
assert.equal(npmAuditCalled, false, "subdirectory dependency files should not trigger audit");
|
||||
assert.deepStrictEqual(result, []);
|
||||
});
|
||||
|
|
@ -46,6 +46,44 @@ export interface TaskPlanEntry {
|
|||
verify?: string; // e.g. "run tests" — extracted from "- Verify:" subline
|
||||
}
|
||||
|
||||
// ─── Verification Gate ─────────────────────────────────────────────────────
|
||||
|
||||
/** Result of a single verification command execution */
|
||||
export interface VerificationCheck {
|
||||
command: string; // e.g. "npm run lint"
|
||||
exitCode: number; // 0 = pass
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
/** A runtime error captured from bg-shell processes or browser console */
|
||||
export interface RuntimeError {
|
||||
source: "bg-shell" | "browser";
|
||||
severity: "crash" | "error" | "warning";
|
||||
message: string;
|
||||
blocking: boolean;
|
||||
}
|
||||
|
||||
/** A dependency vulnerability warning from npm audit */
|
||||
export interface AuditWarning {
|
||||
name: string;
|
||||
severity: "low" | "moderate" | "high" | "critical";
|
||||
title: string;
|
||||
url: string;
|
||||
fixAvailable: boolean;
|
||||
}
|
||||
|
||||
/** Aggregate result from the verification gate */
|
||||
export interface VerificationResult {
|
||||
passed: boolean; // true if all checks passed (or no checks discovered)
|
||||
checks: VerificationCheck[]; // per-command results
|
||||
discoverySource: "preference" | "task-plan" | "package-json" | "none";
|
||||
timestamp: number; // Date.now() at gate start
|
||||
runtimeErrors?: RuntimeError[]; // optional — populated by captureRuntimeErrors()
|
||||
auditWarnings?: AuditWarning[]; // optional — populated by runDependencyAudit()
|
||||
}
|
||||
|
||||
export interface SlicePlan {
|
||||
id: string; // e.g. "S01"
|
||||
title: string; // from the H1
|
||||
|
|
|
|||
183
src/resources/extensions/gsd/verification-evidence.ts
Normal file
183
src/resources/extensions/gsd/verification-evidence.ts
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
/**
|
||||
* Verification Evidence — JSON persistence and markdown table formatting.
|
||||
*
|
||||
* Two pure-ish functions:
|
||||
* - writeVerificationJSON: persists a machine-readable T##-VERIFY.json artifact
|
||||
* - formatEvidenceTable: returns a markdown evidence table string
|
||||
*
|
||||
* JSON schema uses schemaVersion: 1 for forward-compatibility.
|
||||
* stdout/stderr are intentionally excluded from the JSON to avoid unbounded file sizes.
|
||||
*/
|
||||
|
||||
import { mkdirSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import type { VerificationResult } from "./types.ts";
|
||||
|
||||
// ─── JSON Evidence Artifact ──────────────────────────────────────────────────
|
||||
|
||||
export interface EvidenceCheckJSON {
|
||||
command: string;
|
||||
exitCode: number;
|
||||
durationMs: number;
|
||||
verdict: "pass" | "fail";
|
||||
}
|
||||
|
||||
export interface RuntimeErrorJSON {
|
||||
source: string;
|
||||
severity: string;
|
||||
message: string;
|
||||
blocking: boolean;
|
||||
}
|
||||
|
||||
export interface AuditWarningJSON {
|
||||
name: string;
|
||||
severity: string;
|
||||
title: string;
|
||||
url: string;
|
||||
fixAvailable: boolean;
|
||||
}
|
||||
|
||||
export interface EvidenceJSON {
|
||||
schemaVersion: 1;
|
||||
taskId: string;
|
||||
unitId: string;
|
||||
timestamp: number;
|
||||
passed: boolean;
|
||||
discoverySource: string;
|
||||
checks: EvidenceCheckJSON[];
|
||||
retryAttempt?: number;
|
||||
maxRetries?: number;
|
||||
runtimeErrors?: RuntimeErrorJSON[];
|
||||
auditWarnings?: AuditWarningJSON[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a T##-VERIFY.json artifact to the tasks directory.
|
||||
* Creates the directory with mkdirSync({ recursive: true }) if it doesn't exist.
|
||||
*
|
||||
* stdout/stderr are excluded from the JSON — the full output lives in VerificationResult
|
||||
* in memory and is logged to stderr during the gate run.
|
||||
*/
|
||||
export function writeVerificationJSON(
|
||||
result: VerificationResult,
|
||||
tasksDir: string,
|
||||
taskId: string,
|
||||
unitId?: string,
|
||||
retryAttempt?: number,
|
||||
maxRetries?: number,
|
||||
): void {
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
|
||||
const evidence: EvidenceJSON = {
|
||||
schemaVersion: 1,
|
||||
taskId,
|
||||
unitId: unitId ?? taskId,
|
||||
timestamp: result.timestamp,
|
||||
passed: result.passed,
|
||||
discoverySource: result.discoverySource,
|
||||
checks: result.checks.map((check) => ({
|
||||
command: check.command,
|
||||
exitCode: check.exitCode,
|
||||
durationMs: check.durationMs,
|
||||
verdict: check.exitCode === 0 ? "pass" : "fail",
|
||||
})),
|
||||
...(retryAttempt !== undefined ? { retryAttempt } : {}),
|
||||
...(maxRetries !== undefined ? { maxRetries } : {}),
|
||||
};
|
||||
|
||||
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
|
||||
evidence.runtimeErrors = result.runtimeErrors.map(e => ({
|
||||
source: e.source,
|
||||
severity: e.severity,
|
||||
message: e.message,
|
||||
blocking: e.blocking,
|
||||
}));
|
||||
}
|
||||
|
||||
if (result.auditWarnings && result.auditWarnings.length > 0) {
|
||||
evidence.auditWarnings = result.auditWarnings.map(w => ({
|
||||
name: w.name,
|
||||
severity: w.severity,
|
||||
title: w.title,
|
||||
url: w.url,
|
||||
fixAvailable: w.fixAvailable,
|
||||
}));
|
||||
}
|
||||
|
||||
const filePath = join(tasksDir, `${taskId}-VERIFY.json`);
|
||||
writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
|
||||
}
|
||||
|
||||
// ─── Markdown Evidence Table ─────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Format duration in milliseconds as seconds with 1 decimal place.
|
||||
* e.g. 2340 → "2.3s", 150 → "0.2s", 0 → "0.0s"
|
||||
*/
|
||||
function formatDuration(ms: number): string {
|
||||
return `${(ms / 1000).toFixed(1)}s`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a markdown evidence table from a VerificationResult.
|
||||
*
|
||||
* Returns a "no checks" note if result.checks is empty.
|
||||
* Otherwise returns a 5-column markdown table: #, Command, Exit Code, Verdict, Duration.
|
||||
*/
|
||||
export function formatEvidenceTable(result: VerificationResult): string {
|
||||
if (result.checks.length === 0) {
|
||||
return "_No verification checks discovered._";
|
||||
}
|
||||
|
||||
const lines: string[] = [
|
||||
"| # | Command | Exit Code | Verdict | Duration |",
|
||||
"|---|---------|-----------|---------|----------|",
|
||||
];
|
||||
|
||||
for (let i = 0; i < result.checks.length; i++) {
|
||||
const check = result.checks[i];
|
||||
const num = i + 1;
|
||||
const verdict =
|
||||
check.exitCode === 0 ? "✅ pass" : "❌ fail";
|
||||
const duration = formatDuration(check.durationMs);
|
||||
|
||||
lines.push(
|
||||
`| ${num} | ${check.command} | ${check.exitCode} | ${verdict} | ${duration} |`,
|
||||
);
|
||||
}
|
||||
|
||||
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
|
||||
lines.push("");
|
||||
lines.push("**Runtime Errors**");
|
||||
lines.push("");
|
||||
lines.push("| # | Source | Severity | Blocking | Message |");
|
||||
lines.push("|---|--------|----------|----------|---------|");
|
||||
for (let i = 0; i < result.runtimeErrors.length; i++) {
|
||||
const err = result.runtimeErrors[i];
|
||||
const blockIcon = err.blocking ? "🚫 yes" : "ℹ️ no";
|
||||
lines.push(`| ${i + 1} | ${err.source} | ${err.severity} | ${blockIcon} | ${err.message.slice(0, 100)} |`);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.auditWarnings && result.auditWarnings.length > 0) {
|
||||
const severityEmoji: Record<string, string> = {
|
||||
critical: "🔴",
|
||||
high: "🟠",
|
||||
moderate: "🟡",
|
||||
low: "⚪",
|
||||
};
|
||||
lines.push("");
|
||||
lines.push("**Audit Warnings**");
|
||||
lines.push("");
|
||||
lines.push("| # | Package | Severity | Title | Fix Available |");
|
||||
lines.push("|---|---------|----------|-------|---------------|");
|
||||
for (let i = 0; i < result.auditWarnings.length; i++) {
|
||||
const w = result.auditWarnings[i];
|
||||
const emoji = severityEmoji[w.severity] ?? "⚪";
|
||||
const fix = w.fixAvailable ? "✅ yes" : "❌ no";
|
||||
lines.push(`| ${i + 1} | ${w.name} | ${emoji} ${w.severity} | ${w.title} | ${fix} |`);
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
567
src/resources/extensions/gsd/verification-gate.ts
Normal file
567
src/resources/extensions/gsd/verification-gate.ts
Normal file
|
|
@ -0,0 +1,567 @@
|
|||
// GSD Extension — Verification Gate
|
||||
// Pure functions for discovering and running verification commands.
|
||||
// Discovery order (D003): preference → task plan verify → package.json scripts.
|
||||
// First non-empty source wins.
|
||||
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { join, basename } from "node:path";
|
||||
import type { AuditWarning, RuntimeError, VerificationCheck, VerificationResult } from "./types.js";
|
||||
|
||||
/** Maximum bytes of stdout/stderr to retain per command (10 KB). */
|
||||
const MAX_OUTPUT_BYTES = 10 * 1024;
|
||||
|
||||
/** Truncate a string to maxBytes, appending a marker if truncated. */
|
||||
function truncate(value: string | null | undefined, maxBytes: number): string {
|
||||
if (!value) return "";
|
||||
if (Buffer.byteLength(value, "utf-8") <= maxBytes) return value;
|
||||
// Slice conservatively then trim to last full character
|
||||
const buf = Buffer.from(value, "utf-8").subarray(0, maxBytes);
|
||||
return buf.toString("utf-8") + "\n…[truncated]";
|
||||
}
|
||||
|
||||
// ─── Command Discovery ──────────────────────────────────────────────────────
|
||||
|
||||
export interface DiscoverCommandsOptions {
|
||||
preferenceCommands?: string[];
|
||||
taskPlanVerify?: string;
|
||||
cwd: string;
|
||||
}
|
||||
|
||||
export interface DiscoveredCommands {
|
||||
commands: string[];
|
||||
source: VerificationResult["discoverySource"];
|
||||
}
|
||||
|
||||
/** Package.json script keys to probe, in order. */
|
||||
const PACKAGE_SCRIPT_KEYS = ["typecheck", "lint", "test"] as const;
|
||||
|
||||
/**
|
||||
* Discover verification commands using the first-non-empty-wins strategy (D003):
|
||||
* 1. Explicit preference commands
|
||||
* 2. Task plan verify field (split on &&)
|
||||
* 3. package.json scripts (typecheck, lint, test)
|
||||
* 4. None found
|
||||
*/
|
||||
export function discoverCommands(options: DiscoverCommandsOptions): DiscoveredCommands {
|
||||
// 1. Preference commands
|
||||
if (options.preferenceCommands && options.preferenceCommands.length > 0) {
|
||||
const filtered = options.preferenceCommands
|
||||
.map(c => c.trim())
|
||||
.filter(Boolean);
|
||||
if (filtered.length > 0) {
|
||||
return { commands: filtered, source: "preference" };
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Task plan verify field (commands are untrusted — sanitize)
|
||||
if (options.taskPlanVerify && options.taskPlanVerify.trim()) {
|
||||
const commands = options.taskPlanVerify
|
||||
.split("&&")
|
||||
.map(c => c.trim())
|
||||
.filter(Boolean)
|
||||
.filter(c => sanitizeCommand(c) !== null);
|
||||
if (commands.length > 0) {
|
||||
return { commands, source: "task-plan" };
|
||||
}
|
||||
}
|
||||
|
||||
// 3. package.json scripts
|
||||
const pkgPath = join(options.cwd, "package.json");
|
||||
if (existsSync(pkgPath)) {
|
||||
try {
|
||||
const raw = readFileSync(pkgPath, "utf-8");
|
||||
const pkg = JSON.parse(raw);
|
||||
if (pkg && typeof pkg === "object" && pkg.scripts && typeof pkg.scripts === "object") {
|
||||
const commands: string[] = [];
|
||||
for (const key of PACKAGE_SCRIPT_KEYS) {
|
||||
if (typeof pkg.scripts[key] === "string") {
|
||||
commands.push(`npm run ${key}`);
|
||||
}
|
||||
}
|
||||
if (commands.length > 0) {
|
||||
return { commands, source: "package-json" };
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Malformed package.json — fall through to "none"
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Nothing found
|
||||
return { commands: [], source: "none" };
|
||||
}
|
||||
|
||||
// ─── Failure Context Formatting ──────────────────────────────────────────────
|
||||
|
||||
/** Maximum chars of stderr to include per failed check in failure context. */
|
||||
const MAX_STDERR_PER_CHECK = 2_000;
|
||||
|
||||
/** Maximum total chars for the combined failure context output. */
|
||||
const MAX_FAILURE_CONTEXT_CHARS = 10_000;
|
||||
|
||||
/**
|
||||
* Format failed verification checks into a prompt-injectable text block.
|
||||
*
|
||||
* Each failed check gets a heading with the command name and exit code,
|
||||
* followed by a truncated stderr excerpt. Individual stderr is capped to
|
||||
* 2 000 chars; total output is capped to 10 000 chars.
|
||||
*
|
||||
* Returns an empty string when all checks pass or the checks array is empty.
|
||||
*/
|
||||
export function formatFailureContext(result: VerificationResult): string {
|
||||
const failures = result.checks.filter((c) => c.exitCode !== 0);
|
||||
if (failures.length === 0) return "";
|
||||
|
||||
const blocks: string[] = [];
|
||||
|
||||
for (const check of failures) {
|
||||
let stderr = check.stderr ?? "";
|
||||
if (stderr.length > MAX_STDERR_PER_CHECK) {
|
||||
stderr = stderr.slice(0, MAX_STDERR_PER_CHECK) + "\n…[truncated]";
|
||||
}
|
||||
|
||||
blocks.push(
|
||||
`### ❌ \`${check.command}\` (exit code ${check.exitCode})\n\`\`\`stderr\n${stderr}\n\`\`\``,
|
||||
);
|
||||
}
|
||||
|
||||
let body = blocks.join("\n\n");
|
||||
const header = "## Verification Failures\n\n";
|
||||
|
||||
if (header.length + body.length > MAX_FAILURE_CONTEXT_CHARS) {
|
||||
body =
|
||||
body.slice(0, MAX_FAILURE_CONTEXT_CHARS - header.length) +
|
||||
"\n\n…[remaining failures truncated]";
|
||||
}
|
||||
|
||||
return header + body;
|
||||
}
|
||||
|
||||
// ─── Gate Execution ─────────────────────────────────────────────────────────
|
||||
|
||||
/** Characters that indicate shell injection when found in a command string. */
|
||||
const SHELL_INJECTION_PATTERN = /[;|`]|\$\(/;
|
||||
|
||||
/**
|
||||
* Validate a command string for obvious shell injection patterns.
|
||||
* Returns the command unchanged if safe, or null if suspicious.
|
||||
*/
|
||||
function sanitizeCommand(cmd: string): string | null {
|
||||
if (SHELL_INJECTION_PATTERN.test(cmd)) return null;
|
||||
return cmd;
|
||||
}
|
||||
|
||||
/** Default timeout for verification commands (ms). */
|
||||
const DEFAULT_COMMAND_TIMEOUT_MS = 120_000;
|
||||
|
||||
export interface RunVerificationGateOptions {
|
||||
basePath: string;
|
||||
unitId: string;
|
||||
cwd: string;
|
||||
preferenceCommands?: string[];
|
||||
taskPlanVerify?: string;
|
||||
/** Per-command timeout in ms. Defaults to 120 000 (2 minutes). */
|
||||
commandTimeoutMs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the verification gate: discover commands, execute each via spawnSync,
|
||||
* and return a structured result.
|
||||
*
|
||||
* - All commands run sequentially regardless of individual pass/fail.
|
||||
* - `passed` is true when every command exits 0 (or no commands are discovered).
|
||||
* - stdout/stderr per command are truncated to 10 KB.
|
||||
*/
|
||||
export function runVerificationGate(options: RunVerificationGateOptions): VerificationResult {
|
||||
const timestamp = Date.now();
|
||||
|
||||
const { commands, source } = discoverCommands({
|
||||
preferenceCommands: options.preferenceCommands,
|
||||
taskPlanVerify: options.taskPlanVerify,
|
||||
cwd: options.cwd,
|
||||
});
|
||||
|
||||
if (commands.length === 0) {
|
||||
return {
|
||||
passed: true,
|
||||
checks: [],
|
||||
discoverySource: source,
|
||||
timestamp,
|
||||
};
|
||||
}
|
||||
|
||||
const checks: VerificationCheck[] = [];
|
||||
|
||||
for (const command of commands) {
|
||||
const start = Date.now();
|
||||
const result = spawnSync(command, {
|
||||
shell: true,
|
||||
cwd: options.cwd,
|
||||
stdio: "pipe",
|
||||
encoding: "utf-8",
|
||||
timeout: options.commandTimeoutMs ?? DEFAULT_COMMAND_TIMEOUT_MS,
|
||||
});
|
||||
const durationMs = Date.now() - start;
|
||||
|
||||
let exitCode: number;
|
||||
let stderr: string;
|
||||
|
||||
if (result.error) {
|
||||
// Command not found or spawn failure
|
||||
exitCode = 127;
|
||||
stderr = truncate(
|
||||
(result.stderr || "") + "\n" + (result.error as Error).message,
|
||||
MAX_OUTPUT_BYTES,
|
||||
);
|
||||
} else {
|
||||
// status is null when killed by signal — treat as failure
|
||||
exitCode = result.status ?? 1;
|
||||
stderr = truncate(result.stderr, MAX_OUTPUT_BYTES);
|
||||
}
|
||||
|
||||
checks.push({
|
||||
command,
|
||||
exitCode,
|
||||
stdout: truncate(result.stdout, MAX_OUTPUT_BYTES),
|
||||
stderr,
|
||||
durationMs,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
passed: checks.every(c => c.exitCode === 0),
|
||||
checks,
|
||||
discoverySource: source,
|
||||
timestamp,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Runtime Error Capture ──────────────────────────────────────────────────
|
||||
|
||||
/** Maximum characters of browser console text to retain per entry. */
|
||||
const MAX_BROWSER_TEXT_CHARS = 500;
|
||||
|
||||
/** Fatal signals that indicate a crash regardless of other status fields. */
|
||||
const FATAL_SIGNALS = new Set(["SIGABRT", "SIGSEGV", "SIGBUS"]);
|
||||
|
||||
/**
|
||||
* Injectable dependencies for captureRuntimeErrors.
|
||||
* When omitted the function uses dynamic import() to access
|
||||
* bg-shell's processes Map and browser-tools' getConsoleLogs().
|
||||
* Provide overrides in tests to avoid module mocking.
|
||||
*/
|
||||
export interface CaptureRuntimeErrorsOptions {
|
||||
getProcesses?: () => Map<string, unknown>;
|
||||
getConsoleLogs?: () => Array<{ type: string; text: string; timestamp: number; url: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan bg-shell processes and browser console logs for runtime errors.
|
||||
*
|
||||
* Severity classification follows D004:
|
||||
* - bg-shell status "crashed" → blocking crash
|
||||
* - bg-shell !alive && exitCode !== 0 && exitCode !== null → blocking crash
|
||||
* - bg-shell signal SIGABRT/SIGSEGV/SIGBUS → blocking crash
|
||||
* - Browser console error with "Unhandled"/"UnhandledRejection" → blocking crash
|
||||
* - Browser console error (general) → non-blocking error
|
||||
* - Browser console warning with deprecation text → non-blocking warning
|
||||
* - bg-shell alive process with recentErrors → non-blocking error
|
||||
*
|
||||
* Returns RuntimeError[] — empty when both sources are unavailable.
|
||||
*/
|
||||
export async function captureRuntimeErrors(
|
||||
options?: CaptureRuntimeErrorsOptions,
|
||||
): Promise<RuntimeError[]> {
|
||||
const errors: RuntimeError[] = [];
|
||||
|
||||
// ── bg-shell scan ─────────────────────────────────────────────────────
|
||||
try {
|
||||
let processes: Map<string, unknown>;
|
||||
if (options?.getProcesses) {
|
||||
processes = options.getProcesses();
|
||||
} else {
|
||||
const mod = await import("../bg-shell/process-manager.js");
|
||||
processes = mod.processes;
|
||||
}
|
||||
|
||||
for (const [id, raw] of processes) {
|
||||
const proc = raw as {
|
||||
id: string;
|
||||
label?: string;
|
||||
status?: string;
|
||||
alive?: boolean;
|
||||
exitCode?: number | null;
|
||||
signal?: string | null;
|
||||
recentErrors?: string[];
|
||||
};
|
||||
|
||||
const name = proc.label || proc.id || id;
|
||||
|
||||
// Check for fatal signal first (applies regardless of alive/status)
|
||||
if (proc.signal && FATAL_SIGNALS.has(proc.signal)) {
|
||||
errors.push({
|
||||
source: "bg-shell",
|
||||
severity: "crash",
|
||||
message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors),
|
||||
blocking: true,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Crashed status
|
||||
if (proc.status === "crashed") {
|
||||
errors.push({
|
||||
source: "bg-shell",
|
||||
severity: "crash",
|
||||
message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors),
|
||||
blocking: true,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Non-zero exit on dead process
|
||||
if (
|
||||
!proc.alive &&
|
||||
proc.exitCode !== 0 &&
|
||||
proc.exitCode !== null &&
|
||||
proc.exitCode !== undefined
|
||||
) {
|
||||
errors.push({
|
||||
source: "bg-shell",
|
||||
severity: "crash",
|
||||
message: buildBgShellMessage(name, proc.exitCode, proc.signal, proc.recentErrors),
|
||||
blocking: true,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Alive process with recent errors — non-blocking
|
||||
if (proc.alive && proc.recentErrors && proc.recentErrors.length > 0) {
|
||||
const snippet = proc.recentErrors.slice(0, 3).join("; ");
|
||||
errors.push({
|
||||
source: "bg-shell",
|
||||
severity: "error",
|
||||
message: `[${name}] recent errors: ${snippet}`,
|
||||
blocking: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// bg-shell not available — skip silently
|
||||
}
|
||||
|
||||
// ── browser console scan ──────────────────────────────────────────────
|
||||
try {
|
||||
let logs: Array<{ type: string; text: string; timestamp: number; url: string }>;
|
||||
if (options?.getConsoleLogs) {
|
||||
logs = options.getConsoleLogs();
|
||||
} else {
|
||||
const mod = await import("../browser-tools/state.js");
|
||||
logs = mod.getConsoleLogs();
|
||||
}
|
||||
|
||||
for (const entry of logs) {
|
||||
const text =
|
||||
entry.text.length > MAX_BROWSER_TEXT_CHARS
|
||||
? entry.text.slice(0, MAX_BROWSER_TEXT_CHARS) + "…[truncated]"
|
||||
: entry.text;
|
||||
|
||||
if (entry.type === "error") {
|
||||
// Unhandled rejection / unhandled error → blocking crash
|
||||
if (/unhandled/i.test(entry.text)) {
|
||||
errors.push({
|
||||
source: "browser",
|
||||
severity: "crash",
|
||||
message: text,
|
||||
blocking: true,
|
||||
});
|
||||
} else {
|
||||
// General console.error → non-blocking error
|
||||
errors.push({
|
||||
source: "browser",
|
||||
severity: "error",
|
||||
message: text,
|
||||
blocking: false,
|
||||
});
|
||||
}
|
||||
} else if (entry.type === "warning" && /deprecated/i.test(entry.text)) {
|
||||
// Deprecation warning → non-blocking warning
|
||||
errors.push({
|
||||
source: "browser",
|
||||
severity: "warning",
|
||||
message: text,
|
||||
blocking: false,
|
||||
});
|
||||
}
|
||||
// Non-deprecation warnings are intentionally ignored
|
||||
}
|
||||
} catch {
|
||||
// browser-tools not available — skip silently
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/** Build a human-readable message for a bg-shell process error. */
|
||||
function buildBgShellMessage(
|
||||
name: string,
|
||||
exitCode: number | null | undefined,
|
||||
signal: string | null | undefined,
|
||||
recentErrors: string[] | undefined,
|
||||
): string {
|
||||
const parts: string[] = [`[${name}]`];
|
||||
if (signal) parts.push(`signal=${signal}`);
|
||||
if (exitCode !== null && exitCode !== undefined) parts.push(`exitCode=${exitCode}`);
|
||||
if (recentErrors && recentErrors.length > 0) {
|
||||
const snippet = recentErrors.slice(0, 3).join("; ");
|
||||
parts.push(`errors: ${snippet}`);
|
||||
}
|
||||
return parts.join(" ");
|
||||
}
|
||||
|
||||
// ─── Dependency Audit ───────────────────────────────────────────────────────
|
||||
|
||||
/** Top-level dependency files that trigger an audit when changed. */
|
||||
const DEPENDENCY_FILES = new Set([
|
||||
"package.json",
|
||||
"package-lock.json",
|
||||
"pnpm-lock.yaml",
|
||||
"yarn.lock",
|
||||
"bun.lockb",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Injectable dependencies for runDependencyAudit (D023 pattern).
|
||||
* When omitted the function uses real git/npm via spawnSync.
|
||||
* Provide overrides in tests to avoid real git repos and npm registries.
|
||||
*/
|
||||
export interface DependencyAuditOptions {
|
||||
gitDiff?: (cwd: string) => string[];
|
||||
npmAudit?: (cwd: string) => { stdout: string; exitCode: number };
|
||||
}
|
||||
|
||||
/**
|
||||
* Default gitDiff: runs `git diff --name-only HEAD` and returns file paths.
|
||||
* Returns empty array on any failure (non-git dir, git not found, etc.).
|
||||
*/
|
||||
function defaultGitDiff(cwd: string): string[] {
|
||||
try {
|
||||
const result = spawnSync("git", ["diff", "--name-only", "HEAD"], {
|
||||
cwd,
|
||||
encoding: "utf-8",
|
||||
timeout: 10_000,
|
||||
});
|
||||
if (result.status !== 0 || !result.stdout) return [];
|
||||
return result.stdout.trim().split("\n").filter(Boolean);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default npmAudit: runs `npm audit --audit-level=moderate --json`.
|
||||
* Returns { stdout, exitCode }. Non-zero exit is expected when vulnerabilities exist.
|
||||
*/
|
||||
function defaultNpmAudit(cwd: string): { stdout: string; exitCode: number } {
|
||||
const result = spawnSync("npm", ["audit", "--audit-level=moderate", "--json"], {
|
||||
cwd,
|
||||
encoding: "utf-8",
|
||||
timeout: 60_000,
|
||||
});
|
||||
return {
|
||||
stdout: result.stdout ?? "",
|
||||
exitCode: result.status ?? 1,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect dependency file changes and run npm audit if changes are found.
|
||||
*
|
||||
* - Calls gitDiff to get changed files, checks if any are top-level dependency files
|
||||
* - If no dependency files changed, returns []
|
||||
* - Runs npmAudit and parses JSON output into AuditWarning[]
|
||||
* - Never throws — all errors return []
|
||||
* - Non-zero npm audit exit code is expected (vulnerabilities found), not an error
|
||||
*/
|
||||
export function runDependencyAudit(
|
||||
cwd: string,
|
||||
options?: DependencyAuditOptions,
|
||||
): AuditWarning[] {
|
||||
try {
|
||||
const gitDiff = options?.gitDiff ?? defaultGitDiff;
|
||||
const npmAudit = options?.npmAudit ?? defaultNpmAudit;
|
||||
|
||||
// Get changed files and check for top-level dependency file matches
|
||||
const changedFiles = gitDiff(cwd);
|
||||
const hasDependencyChange = changedFiles.some((filePath) => {
|
||||
const name = basename(filePath);
|
||||
// Only match top-level files: the path must equal just the filename
|
||||
// (no directory separators) to be considered top-level
|
||||
return DEPENDENCY_FILES.has(name) && filePath === name;
|
||||
});
|
||||
|
||||
if (!hasDependencyChange) return [];
|
||||
|
||||
// Run npm audit
|
||||
const auditResult = npmAudit(cwd);
|
||||
|
||||
// Parse JSON output — npm audit exits non-zero when vulnerabilities exist
|
||||
let parsed: Record<string, unknown>;
|
||||
try {
|
||||
parsed = JSON.parse(auditResult.stdout);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Extract vulnerabilities from the parsed output
|
||||
const vulnerabilities = parsed.vulnerabilities;
|
||||
if (!vulnerabilities || typeof vulnerabilities !== "object") return [];
|
||||
|
||||
const warnings: AuditWarning[] = [];
|
||||
for (const [name, raw] of Object.entries(vulnerabilities as Record<string, unknown>)) {
|
||||
const vuln = raw as {
|
||||
severity?: string;
|
||||
fixAvailable?: boolean;
|
||||
via?: unknown[];
|
||||
};
|
||||
if (!vuln || typeof vuln !== "object") continue;
|
||||
|
||||
const severity = vuln.severity;
|
||||
if (
|
||||
severity !== "low" &&
|
||||
severity !== "moderate" &&
|
||||
severity !== "high" &&
|
||||
severity !== "critical"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find the first `via` entry that's an object (not a string reference)
|
||||
let title = name;
|
||||
let url = "";
|
||||
if (Array.isArray(vuln.via)) {
|
||||
for (const entry of vuln.via) {
|
||||
if (entry && typeof entry === "object" && !Array.isArray(entry)) {
|
||||
const obj = entry as { title?: string; url?: string };
|
||||
if (obj.title) title = obj.title;
|
||||
if (obj.url) url = obj.url;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
warnings.push({
|
||||
name,
|
||||
severity: severity as AuditWarning["severity"],
|
||||
title,
|
||||
url,
|
||||
fixAvailable: vuln.fixAvailable === true,
|
||||
});
|
||||
}
|
||||
|
||||
return warnings;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue