Merge pull request #3576 from jeremymcs/feat/llm-safety-harness

feat(gsd): LLM safety harness for auto-mode damage control
This commit is contained in:
Jeremy McSpadden 2026-04-05 16:03:16 -05:00 committed by GitHub
commit 2298b9acab
19 changed files with 1173 additions and 6 deletions

View file

@ -0,0 +1,81 @@
// GSD2 — Regression test: pendingProviderRegistrations must be flushed exactly once (#3576)
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
import { describe, it } from "node:test";
import assert from "node:assert/strict";
/**
* This test validates that the provider preflush pattern in sdk.ts clears
* pendingProviderRegistrations after iterating, so bindCore() doesn't
* re-register the same providers.
*
* The bug: createAgentSession() iterated pendingProviderRegistrations but
* did not clear the array. Later, bindCore() replayed and registered the
* same providers again, stacking wrappers.
*/
interface ProviderEntry {
name: string;
config: Record<string, unknown>;
}
interface MockRuntime {
pendingProviderRegistrations: ProviderEntry[];
}
describe("provider registration preflush", () => {
it("clears pending registrations after preflush so bindCore does not replay", () => {
const registered: string[] = [];
const runtime: MockRuntime = {
pendingProviderRegistrations: [
{ name: "ollama", config: { type: "ollama" } },
{ name: "custom-provider", config: { type: "custom" } },
],
};
// Simulate sdk.ts preflush (lines 220-223)
for (const { name } of runtime.pendingProviderRegistrations) {
registered.push(name);
}
// The fix: clear after preflush
runtime.pendingProviderRegistrations = [];
// Simulate bindCore() flush (runner.ts lines 268-271)
for (const { name } of runtime.pendingProviderRegistrations) {
registered.push(name);
}
runtime.pendingProviderRegistrations = [];
assert.deepEqual(
registered,
["ollama", "custom-provider"],
"each provider should be registered exactly once",
);
});
it("without the fix, providers are registered twice", () => {
const registered: string[] = [];
const runtime: MockRuntime = {
pendingProviderRegistrations: [
{ name: "ollama", config: { type: "ollama" } },
],
};
// Old behavior: preflush without clearing
for (const { name } of runtime.pendingProviderRegistrations) {
registered.push(name);
}
// NOT clearing — simulating the old bug
// bindCore() replays the same queue
for (const { name } of runtime.pendingProviderRegistrations) {
registered.push(name);
}
assert.deepEqual(
registered,
["ollama", "ollama"],
"without clearing, providers are registered twice (demonstrating the bug)",
);
});
});

View file

@ -221,9 +221,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
for (const { name, config } of extensionsForModelResolution.runtime.pendingProviderRegistrations) {
modelRegistry.registerProvider(name, config);
}
// Note: we do NOT clear pendingProviderRegistrations here — bindCore() will iterate
// an empty array harmlessly, and clearing here would require the runtime to track
// whether the flush already happened.
// Clear the queue so bindCore() doesn't re-register the same providers.
extensionsForModelResolution.runtime.pendingProviderRegistrations = [];
// If still no model, use findInitialModel (checks settings default, then provider defaults)
if (!model) {

View file

@ -52,6 +52,13 @@ import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures
import { debugLog } from "./debug-logger.js";
import { runSafely } from "./auto-utils.js";
import type { AutoSession, SidecarItem } from "./auto/session.js";
import { getEvidence } from "./safety/evidence-collector.js";
import { validateFileChanges } from "./safety/file-change-validator.js";
// crossReferenceEvidence available for future use when verification_evidence is stored in DB
// import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
import { validateContent } from "./safety/content-validator.js";
import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
/** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
const MAX_VERIFICATION_RETRIES = 3;
@ -437,6 +444,87 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
debugLog("postUnit", { phase: "rogue-detection", error: String(e) });
}
// ── Safety harness: post-unit validation ──
try {
const { loadEffectiveGSDPreferences } = await import("./preferences.js");
const prefs = loadEffectiveGSDPreferences()?.preferences;
const safetyConfig = resolveSafetyHarnessConfig(
prefs?.safety_harness as Record<string, unknown> | undefined,
);
if (safetyConfig.enabled) {
const { milestone: sMid, slice: sSid, task: sTid } = parseUnitId(s.currentUnit.id);
// File change validation (execute-task only, after auto-commit)
if (safetyConfig.file_change_validation && s.currentUnit.type === "execute-task" && sMid && sSid && sTid && isDbAvailable()) {
try {
const taskRow = getTask(sMid, sSid, sTid);
if (taskRow) {
const expectedOutput = taskRow.expected_output ?? [];
const plannedFiles = taskRow.files ?? [];
const audit = validateFileChanges(s.basePath, expectedOutput, plannedFiles);
if (audit && audit.violations.length > 0) {
const warnings = audit.violations.filter(v => v.severity === "warning");
for (const v of warnings) {
logWarning("safety", `file-change: ${v.file}${v.reason}`);
}
if (warnings.length > 0) {
ctx.ui.notify(
`Safety: ${warnings.length} unexpected file change(s) outside task plan`,
"warning",
);
}
}
}
} catch (e) {
debugLog("postUnit", { phase: "safety-file-change", error: String(e) });
}
}
// Evidence cross-reference (execute-task only)
// Verification evidence is passed via the complete-task tool call and
// stored in the SUMMARY.md on disk — not available as structured data
// in the DB. The evidence collector tracks actual bash tool calls, so
// we can still detect units that claimed success but ran no commands.
if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") {
try {
const actual = getEvidence();
const bashCalls = actual.filter(e => e.kind === "bash");
// If the task is marked complete but zero bash commands were run,
// it's suspicious — the LLM may have fabricated results.
if (sMid && sSid && sTid && isDbAvailable()) {
const taskRow = getTask(sMid, sSid, sTid);
if (taskRow?.status === "complete" && taskRow.verify && bashCalls.length === 0) {
logWarning("safety", "task marked complete with verification commands but no bash calls were executed");
ctx.ui.notify(
`Safety: task ${sTid} has verification commands but no bash calls were recorded`,
"warning",
);
}
}
} catch (e) {
debugLog("postUnit", { phase: "safety-evidence-xref", error: String(e) });
}
}
// Content validation (plan-slice, plan-milestone)
if (safetyConfig.content_validation) {
try {
const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
const contentViolations = validateContent(s.currentUnit.type, artifactPath);
for (const v of contentViolations) {
logWarning("safety", `content: ${v.reason}`);
ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
}
} catch (e) {
debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
}
}
}
} catch (e) {
debugLog("postUnit", { phase: "safety-harness", error: String(e) });
}
// Artifact verification
let triggerArtifactVerified = false;
if (!s.currentUnit.type.startsWith("hook/")) {

View file

@ -106,8 +106,9 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
}
}
const estimateMinutes = taskEstimate ? parseEstimateMinutes(taskEstimate) : null;
const MAX_TIMEOUT_SCALE = 6; // Cap at 6x (60min task). Prevents 2h+ tasks from creating 120min+ timeout windows.
const timeoutScale = estimateMinutes && estimateMinutes > 0
? Math.max(1, estimateMinutes / 10) // 10min task = 1x, 30min = 3x, 2h = 12x
? Math.min(MAX_TIMEOUT_SCALE, Math.max(1, estimateMinutes / 10))
: 1;
const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000 * timeoutScale;

View file

@ -37,6 +37,9 @@ import { withTimeout, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
import { resetEvidence } from "../safety/evidence-collector.js";
import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
// ─── generateMilestoneReport ──────────────────────────────────────────────────
@ -1079,6 +1082,21 @@ export async function runUnitPhase(
if (mid)
deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);
// ── Safety harness: reset evidence + create checkpoint ──
const safetyConfig = resolveSafetyHarnessConfig(
prefs?.safety_harness as Record<string, unknown> | undefined,
);
if (safetyConfig.enabled && safetyConfig.evidence_collection) {
resetEvidence();
}
// Only checkpoint code-executing units (not lifecycle/planning units)
if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
s.checkpointSha = createCheckpoint(s.basePath, unitId);
if (s.checkpointSha) {
debugLog("runUnitPhase", { phase: "checkpoint-created", unitId, sha: s.checkpointSha.slice(0, 8) });
}
}
// Prompt injection
let finalPrompt = prompt;
@ -1376,6 +1394,27 @@ export async function runUnitPhase(
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified, ...(unitResult.errorContext ? { errorContext: unitResult.errorContext } : {}) }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });
// ── Safety harness: checkpoint cleanup or rollback ──
if (s.checkpointSha) {
if (unitResult.status === "error" && safetyConfig.auto_rollback) {
const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
if (rolled) {
ctx.ui.notify(`Rolled back to pre-unit checkpoint for ${unitId}`, "info");
debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
}
} else if (unitResult.status === "error") {
ctx.ui.notify(
`Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
"warning",
);
} else {
// Success — clean up checkpoint ref
cleanupCheckpoint(s.basePath, unitId);
debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
}
s.checkpointSha = null;
}
return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
}

View file

@ -145,6 +145,10 @@ export class AutoSession {
lastBaselineCharCount: number | undefined;
pendingQuickTasks: CaptureEntry[] = [];
// ── Safety harness ───────────────────────────────────────────────────────
/** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
checkpointSha: string | null = null;
// ── Signal handler ───────────────────────────────────────────────────────
sigtermHandler: (() => void) | null = null;
@ -223,6 +227,7 @@ export class AutoSession {
this.lastToolInvocationError = null;
this.isolationDegraded = false;
this.milestoneMergedInPhases = false;
this.checkpointSha = null;
// Signal handler
this.sigtermHandler = null;

View file

@ -18,6 +18,9 @@ import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"
import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
import { saveActivityLog } from "../activity-log.js";
import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
import { classifyCommand } from "../safety/destructive-guard.js";
import { logWarning as safetyLogWarning } from "../workflow-logger.js";
// Skip the welcome screen on the very first session_start — cli.ts already
// printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
@ -203,6 +206,26 @@ export function registerHooks(pi: ExtensionAPI): void {
if (result.block) return result;
});
// ── Safety harness: evidence collection + destructive command warnings ──
pi.on("tool_call", async (event, ctx) => {
if (!isAutoActive()) return;
safetyRecordToolCall(event.toolName, event.input as Record<string, unknown>);
// Destructive command classification (warn only, never block)
if (isToolCallEventType("bash", event)) {
const classification = classifyCommand(event.input.command);
if (classification.destructive) {
safetyLogWarning("safety", `destructive command: ${classification.labels.join(", ")}`, {
command: String(event.input.command).slice(0, 200),
});
ctx.ui.notify(
`Destructive command detected: ${classification.labels.join(", ")}`,
"warning",
);
}
}
});
pi.on("tool_result", async (event) => {
if (event.toolName !== "ask_user_questions") return;
const milestoneId = getDiscussionMilestoneId();
@ -268,6 +291,10 @@ export function registerHooks(pi: ExtensionAPI): void {
: (typeof event.result?.content?.[0]?.text === "string" ? event.result.content[0].text : String(event.result));
recordToolInvocationError(event.toolName, errorText);
}
// Safety harness: record tool execution results for evidence cross-referencing
if (isAutoActive()) {
safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
}
});
pi.on("model_select", async (_event, ctx) => {

View file

@ -105,6 +105,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
"experimental",
"codebase",
"slice_parallel",
"safety_harness",
]);
/** Canonical list of all dispatch unit types. */
@ -291,6 +292,18 @@ export interface GSDPreferences {
codebase?: CodebaseMapPreferences;
/** Slice-level parallelism within a milestone. Disabled by default. */
slice_parallel?: { enabled?: boolean; max_workers?: number };
/** LLM safety harness configuration. Monitors, validates, and constrains LLM behavior during auto-mode. Enabled by default with warn-and-continue policy. */
safety_harness?: {
enabled?: boolean;
evidence_collection?: boolean;
file_change_validation?: boolean;
evidence_cross_reference?: boolean;
destructive_command_warnings?: boolean;
content_validation?: boolean;
checkpoints?: boolean;
auto_rollback?: boolean;
timeout_scale_cap?: number;
};
}
export interface LoadedGSDPreferences {

View file

@ -0,0 +1,98 @@
/**
* Lightweight content validator for auto-mode safety harness.
* Validates that high-value unit outputs contain minimum expected content.
*
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
import { existsSync, readFileSync } from "node:fs";
import { logWarning } from "../workflow-logger.js";
// ─── Types ──────────────────────────────────────────────────────────────────
export interface ContentViolation {
severity: "warning";
reason: string;
}
// ─── Public API ─────────────────────────────────────────────────────────────
/**
* Validate content quality for a completed unit.
* Returns an array of violations. Empty array = content looks acceptable.
*
* @param unitType - The type of unit that completed (e.g. "plan-slice")
* @param artifactPath - Absolute path to the primary artifact file
*/
export function validateContent(
unitType: string,
artifactPath: string | null,
): ContentViolation[] {
if (!artifactPath || !existsSync(artifactPath)) return [];
const validator = VALIDATORS[unitType];
if (!validator) return [];
try {
const content = readFileSync(artifactPath, "utf-8");
return validator(content);
} catch (e) {
logWarning("safety", `content validation read failed: ${(e as Error).message}`);
return [];
}
}
// ─── Validators ─────────────────────────────────────────────────────────────
type ContentValidatorFn = (content: string) => ContentViolation[];
const VALIDATORS: Record<string, ContentValidatorFn> = {
"plan-slice": validatePlanSlice,
"plan-milestone": validatePlanMilestone,
};
function validatePlanSlice(content: string): ContentViolation[] {
const violations: ContentViolation[] = [];
// Must have at least 2 task entries (checkbox pattern)
const taskCount = (content.match(/- \[[ x]\] \*\*T\d+/g) || []).length;
if (taskCount < 2) {
violations.push({
severity: "warning",
reason: `Slice plan has only ${taskCount} task(s) — expected at least 2`,
});
}
// Should have a Files Likely Touched section
if (!content.includes("## Files Likely Touched") && !content.includes("## Files")) {
violations.push({
severity: "warning",
reason: "Slice plan missing 'Files Likely Touched' section",
});
}
// Should have a verification section
if (!content.includes("Verify") && !content.includes("verify")) {
violations.push({
severity: "warning",
reason: "Slice plan has no verification instructions",
});
}
return violations;
}
function validatePlanMilestone(content: string): ContentViolation[] {
const violations: ContentViolation[] = [];
// Must have at least 1 slice entry
const sliceCount = (content.match(/##\s+S\d+/g) || []).length;
if (sliceCount < 1) {
violations.push({
severity: "warning",
reason: `Milestone roadmap has ${sliceCount} slice(s) — expected at least 1`,
});
}
return violations;
}

View file

@ -0,0 +1,49 @@
/**
* Destructive command classifier for auto-mode safety harness.
* Classifies bash commands and warns on potentially destructive operations.
* Does NOT block only classifies for logging/notification.
*
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
// ─── Pattern Definitions ────────────────────────────────────────────────────
interface DestructivePattern {
pattern: RegExp;
label: string;
}
const DESTRUCTIVE_PATTERNS: readonly DestructivePattern[] = [
{ pattern: /\brm\s+(-[^\s]*[rfRF][^\s]*\s+|.*\s+-[^\s]*[rfRF])/, label: "recursive delete" },
{ pattern: /\bgit\s+push\s+.*--force/, label: "force push" },
{ pattern: /\bgit\s+push\s+-f\b/, label: "force push" },
{ pattern: /\bgit\s+reset\s+--hard/, label: "hard reset" },
{ pattern: /\bgit\s+clean\s+-[^\s]*[fdxFDX]/, label: "git clean" },
{ pattern: /\bgit\s+checkout\s+--\s+\./, label: "discard all changes" },
{ pattern: /\bdrop\s+(database|table|index)\b/i, label: "SQL drop" },
{ pattern: /\btruncate\s+table\b/i, label: "SQL truncate" },
{ pattern: /\bchmod\s+777\b/, label: "world-writable permissions" },
{ pattern: /\bcurl\s.*\|\s*(bash|sh|zsh)\b/, label: "pipe to shell" },
];
// ─── Public API ─────────────────────────────────────────────────────────────
export interface CommandClassification {
destructive: boolean;
labels: string[];
}
/**
* Classify a bash command for destructive operations.
* Returns the list of matched destructive pattern labels.
*/
export function classifyCommand(command: string): CommandClassification {
const labels: string[] = [];
for (const { pattern, label } of DESTRUCTIVE_PATTERNS) {
if (pattern.test(command)) {
// Deduplicate labels (e.g., two force-push patterns)
if (!labels.includes(label)) labels.push(label);
}
}
return { destructive: labels.length > 0, labels };
}

View file

@ -0,0 +1,151 @@
/**
* Real-time tool call evidence collector for auto-mode safety harness.
* Tracks every bash command, file write, and file edit during a unit execution.
* Evidence is compared against LLM completion claims in evidence-cross-ref.ts.
*
* Follows the same module-level Map pattern as auto-tool-tracking.ts.
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
// ─── Types ──────────────────────────────────────────────────────────────────
export interface BashEvidence {
kind: "bash";
toolCallId: string;
command: string;
exitCode: number;
outputSnippet: string;
timestamp: number;
}
export interface FileWriteEvidence {
kind: "write";
toolCallId: string;
path: string;
timestamp: number;
}
export interface FileEditEvidence {
kind: "edit";
toolCallId: string;
path: string;
timestamp: number;
}
export type EvidenceEntry = BashEvidence | FileWriteEvidence | FileEditEvidence;
// ─── Module State ───────────────────────────────────────────────────────────
let unitEvidence: EvidenceEntry[] = [];
// ─── Public API ─────────────────────────────────────────────────────────────
/** Reset all evidence for a new unit. Call at unit start. */
export function resetEvidence(): void {
unitEvidence = [];
}
/** Get a read-only view of all evidence collected for the current unit. */
export function getEvidence(): readonly EvidenceEntry[] {
return unitEvidence;
}
/** Get only bash evidence entries. */
export function getBashEvidence(): readonly BashEvidence[] {
return unitEvidence.filter((e): e is BashEvidence => e.kind === "bash");
}
/** Get all file paths touched (write + edit). */
export function getFilePaths(): string[] {
return unitEvidence
.filter((e): e is FileWriteEvidence | FileEditEvidence => e.kind === "write" || e.kind === "edit")
.map(e => e.path);
}
// ─── Recording (called from register-hooks.ts) ─────────────────────────────
/**
* Record a tool call at dispatch time (before execution).
* Exit codes and output are filled in by recordToolResult after execution.
*/
export function recordToolCall(toolName: string, input: Record<string, unknown>): void {
if (toolName === "bash" || toolName === "Bash") {
unitEvidence.push({
kind: "bash",
toolCallId: "",
command: String(input.command ?? ""),
exitCode: -1,
outputSnippet: "",
timestamp: Date.now(),
});
} else if (toolName === "write" || toolName === "Write") {
unitEvidence.push({
kind: "write",
toolCallId: "",
path: String(input.file_path ?? input.path ?? ""),
timestamp: Date.now(),
});
} else if (toolName === "edit" || toolName === "Edit") {
unitEvidence.push({
kind: "edit",
toolCallId: "",
path: String(input.file_path ?? input.path ?? ""),
timestamp: Date.now(),
});
}
}
/**
* Record a tool execution result. Matches the most recent unresolved entry
* of the same kind and fills in the toolCallId, exit code, and output.
*/
export function recordToolResult(
toolCallId: string,
toolName: string,
result: unknown,
isError: boolean,
): void {
const normalizedName = toolName.toLowerCase();
if (normalizedName === "bash") {
const entry = findLastUnresolved("bash") as BashEvidence | undefined;
if (entry) {
entry.toolCallId = toolCallId;
const text = extractResultText(result);
entry.outputSnippet = text.slice(0, 500);
const exitMatch = text.match(/Command exited with code (\d+)/);
entry.exitCode = exitMatch ? Number(exitMatch[1]) : (isError ? 1 : 0);
}
} else if (normalizedName === "write" || normalizedName === "edit") {
const entry = findLastUnresolved(normalizedName as "write" | "edit");
if (entry) {
entry.toolCallId = toolCallId;
}
}
}
// ─── Internals ──────────────────────────────────────────────────────────────
function findLastUnresolved(kind: string): EvidenceEntry | undefined {
for (let i = unitEvidence.length - 1; i >= 0; i--) {
if (unitEvidence[i].kind === kind && unitEvidence[i].toolCallId === "") {
return unitEvidence[i];
}
}
return undefined;
}
function extractResultText(result: unknown): string {
if (typeof result === "string") return result;
if (result && typeof result === "object") {
const r = result as Record<string, unknown>;
if (Array.isArray(r.content)) {
const textBlock = r.content.find(
(c: unknown) => typeof c === "object" && c !== null && (c as Record<string, unknown>).type === "text",
) as Record<string, unknown> | undefined;
if (textBlock && typeof textBlock.text === "string") return textBlock.text;
}
if (typeof r.text === "string") return r.text;
}
return String(result ?? "");
}

View file

@ -0,0 +1,120 @@
/**
* Evidence cross-reference for auto-mode safety harness.
* Compares the LLM's claimed verification evidence (command + exitCode)
* against actual bash tool calls recorded by the evidence collector.
*
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
import type { BashEvidence, EvidenceEntry } from "./evidence-collector.js";
// ─── Types ──────────────────────────────────────────────────────────────────
export interface ClaimedEvidence {
command: string;
exitCode: number;
verdict: string;
}
export interface EvidenceMismatch {
severity: "warning" | "error";
claimed: ClaimedEvidence;
actual: BashEvidence | null;
reason: string;
}
// ─── Public API ─────────────────────────────────────────────────────────────
/**
* Cross-reference claimed verification evidence against actual bash tool calls.
*
* Returns an array of mismatches. Empty array = all claims verified.
* Skips entries that were coerced from strings (already flagged by db-tools.ts).
*/
export function crossReferenceEvidence(
claimedEvidence: readonly ClaimedEvidence[],
actualEvidence: readonly EvidenceEntry[],
): EvidenceMismatch[] {
const bashCalls = actualEvidence.filter(
(e): e is BashEvidence => e.kind === "bash",
);
const mismatches: EvidenceMismatch[] = [];
for (const claimed of claimedEvidence) {
// Skip coerced entries — they're already flagged with exitCode: -1
// and verdict: "unknown (coerced from string)" by db-tools.ts
if (claimed.verdict?.includes("coerced from string")) continue;
if (claimed.exitCode === -1) continue;
// Skip entries with empty or generic commands
if (!claimed.command || claimed.command.length < 3) continue;
// Find matching bash call by command substring match
const match = findBestMatch(claimed.command, bashCalls);
if (!match) {
mismatches.push({
severity: "warning",
claimed,
actual: null,
reason: `No bash tool call found matching "${claimed.command.slice(0, 80)}"`,
});
continue;
}
// Exit code mismatch: LLM claims success but actual command failed
if (claimed.exitCode === 0 && match.exitCode !== 0) {
mismatches.push({
severity: "error",
claimed,
actual: match,
reason: `Claimed exitCode=0 but actual exitCode=${match.exitCode}`,
});
}
}
return mismatches;
}
// ─── Internals ──────────────────────────────────────────────────────────────
/**
* Find the best matching bash evidence entry for a claimed command.
* Uses substring matching the claimed command may be a shortened version
* of the actual command, or vice versa.
*/
function findBestMatch(
claimedCommand: string,
bashCalls: readonly BashEvidence[],
): BashEvidence | null {
const normalized = claimedCommand.trim();
// Exact match first
const exact = bashCalls.find(b => b.command.trim() === normalized);
if (exact) return exact;
// Substring match: claimed is contained in actual or actual in claimed
const substring = bashCalls.find(
b => b.command.includes(normalized) || normalized.includes(b.command),
);
if (substring) return substring;
// Token match: split on whitespace and check significant overlap
const claimedTokens = normalized.split(/\s+/).filter(t => t.length > 2);
if (claimedTokens.length === 0) return null;
let bestMatch: BashEvidence | null = null;
let bestScore = 0;
for (const call of bashCalls) {
const callTokens = new Set(call.command.split(/\s+/));
const matchCount = claimedTokens.filter(t => callTokens.has(t)).length;
const score = matchCount / claimedTokens.length;
if (score > bestScore && score >= 0.5) {
bestScore = score;
bestMatch = call;
}
}
return bestMatch;
}

View file

@ -0,0 +1,108 @@
/**
* Post-unit file change validator for auto-mode safety harness.
* Compares actual git diff against the task plan's expected output files.
*
* Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
* and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
* Compares against git diff HEAD~1 --name-only after auto-commit.
*
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
import { execFileSync } from "node:child_process";
import { logWarning } from "../workflow-logger.js";
// ─── Types ──────────────────────────────────────────────────────────────────
export interface FileViolation {
severity: "info" | "warning";
file: string;
reason: string;
}
export interface FileChangeAudit {
expectedFiles: string[];
actualFiles: string[];
unexpectedFiles: string[];
missingFiles: string[];
violations: FileViolation[];
}
// ─── Public API ─────────────────────────────────────────────────────────────
/**
* Validate file changes after auto-commit for an execute-task unit.
* Returns null if task data is unavailable or DB is not loaded.
*
* @param basePath - Working directory (worktree or project root)
* @param expectedOutput - JSON array from tasks.expected_output DB column
* @param plannedFiles - JSON array from tasks.files DB column
*/
export function validateFileChanges(
basePath: string,
expectedOutput: string[],
plannedFiles: string[],
): FileChangeAudit | null {
const allExpected = new Set([...expectedOutput, ...plannedFiles]);
// If no expected files were planned, skip validation
if (allExpected.size === 0) return null;
// Get actual changed files from last commit
const actualFiles = getChangedFilesFromLastCommit(basePath);
if (!actualFiles) return null;
// Filter out .gsd/ internal files — only validate project source files
const projectFiles = actualFiles.filter(f => !f.startsWith(".gsd/") && !f.startsWith(".gsd\\"));
// Normalize expected paths (strip leading ./ or /)
const normalizedExpected = new Set(
[...allExpected].map(f => f.replace(/^\.\//, "").replace(/^\//, "")),
);
// Compute symmetric difference
const unexpectedFiles = projectFiles.filter(f => !normalizedExpected.has(f));
const missingFiles = [...normalizedExpected].filter(f => !projectFiles.includes(f));
const violations: FileViolation[] = [];
for (const f of unexpectedFiles) {
violations.push({
severity: "warning",
file: f,
reason: "Modified but not in task plan's expected output",
});
}
for (const f of missingFiles) {
violations.push({
severity: "info",
file: f,
reason: "Listed in task plan but not modified",
});
}
return {
expectedFiles: [...normalizedExpected],
actualFiles: projectFiles,
unexpectedFiles,
missingFiles,
violations,
};
}
// ─── Internals ──────────────────────────────────────────────────────────────
function getChangedFilesFromLastCommit(basePath: string): string[] | null {
try {
const result = execFileSync(
"git",
["diff", "--name-only", "HEAD~1", "HEAD"],
{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
).trim();
return result ? result.split("\n").filter(Boolean) : [];
} catch (e) {
logWarning("safety", `git diff failed in file-change-validator: ${(e as Error).message}`);
return null;
}
}

View file

@ -0,0 +1,106 @@
/**
* Pre-unit git checkpoint and rollback for auto-mode safety harness.
* Uses the existing refs/gsd/ namespace (already pruned by doctor).
*
* Creates a lightweight ref at HEAD before unit execution. On failure,
* the ref can be used to rollback the branch to the pre-unit state.
*
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
import { execFileSync } from "node:child_process";
import { logWarning } from "../workflow-logger.js";
// ─── Constants ──────────────────────────────────────────────────────────────
const CHECKPOINT_PREFIX = "refs/gsd/checkpoints/";
// ─── Public API ─────────────────────────────────────────────────────────────
/**
* Create a checkpoint ref at the current HEAD for the given unit.
* Returns the SHA of HEAD, or null if the operation fails.
*/
export function createCheckpoint(basePath: string, unitId: string): string | null {
try {
const sha = execFileSync("git", ["rev-parse", "HEAD"], {
cwd: basePath,
stdio: ["ignore", "pipe", "pipe"],
encoding: "utf-8",
}).trim();
if (!sha || sha.length < 7) return null;
// Sanitize unitId for use in ref path (replace / with -)
const safeUnitId = unitId.replace(/\//g, "-");
execFileSync("git", ["update-ref", `${CHECKPOINT_PREFIX}${safeUnitId}`, sha], {
cwd: basePath,
stdio: ["ignore", "pipe", "pipe"],
});
return sha;
} catch (e) {
logWarning("safety", `checkpoint creation failed: ${(e as Error).message}`);
return null;
}
}
/**
* Rollback the current branch to a checkpoint SHA.
* Returns true on success, false on failure.
*
* WARNING: This is a destructive operation it discards all changes
* since the checkpoint. Only call when the user has opted in via
* safety_harness.auto_rollback or an explicit manual trigger.
*/
export function rollbackToCheckpoint(
basePath: string,
unitId: string,
sha: string,
): boolean {
try {
// Get current branch name
const branch = execFileSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
cwd: basePath,
stdio: ["ignore", "pipe", "pipe"],
encoding: "utf-8",
}).trim();
if (!branch || branch === "HEAD") {
logWarning("safety", "rollback: detached HEAD state, cannot rollback");
return false;
}
// Reset branch pointer and working tree to checkpoint SHA in one step.
// Using `git reset --hard <sha>` works on the currently checked-out branch
// (unlike `git branch -f` which is rejected for checked-out branches).
execFileSync("git", ["reset", "--hard", sha], {
cwd: basePath,
stdio: ["ignore", "pipe", "pipe"],
});
// Cleanup checkpoint ref
cleanupCheckpoint(basePath, unitId);
return true;
} catch (e) {
logWarning("safety", `rollback failed: ${(e as Error).message}`);
return false;
}
}
/**
* Remove a checkpoint ref after successful unit completion.
*/
export function cleanupCheckpoint(basePath: string, unitId: string): void {
try {
const safeUnitId = unitId.replace(/\//g, "-");
execFileSync("git", ["update-ref", "-d", `${CHECKPOINT_PREFIX}${safeUnitId}`], {
cwd: basePath,
stdio: ["ignore", "pipe", "pipe"],
});
} catch {
// Non-fatal — ref may already have been cleaned up
}
}

View file

@ -0,0 +1,105 @@
/**
* Safety Harness central module for LLM damage control during auto-mode.
* Provides types, preference resolution, and orchestration for all safety components.
*
* Components:
* - evidence-collector.ts: Real-time tool call tracking
* - destructive-guard.ts: Bash command classification
* - file-change-validator.ts: Post-unit git diff vs plan
* - evidence-cross-ref.ts: Claimed vs actual verification evidence
* - git-checkpoint.ts: Pre-unit checkpoints + rollback
* - content-validator.ts: Output quality validation
*
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
// ─── Types ──────────────────────────────────────────────────────────────────
export interface SafetyHarnessConfig {
enabled: boolean;
evidence_collection: boolean;
file_change_validation: boolean;
evidence_cross_reference: boolean;
destructive_command_warnings: boolean;
content_validation: boolean;
checkpoints: boolean;
auto_rollback: boolean;
timeout_scale_cap: number;
}
// ─── Defaults ───────────────────────────────────────────────────────────────
const DEFAULTS: SafetyHarnessConfig = {
enabled: true,
evidence_collection: true,
file_change_validation: true,
evidence_cross_reference: true,
destructive_command_warnings: true,
content_validation: true,
checkpoints: true,
auto_rollback: false,
timeout_scale_cap: 6,
};
// ─── Public API ─────────────────────────────────────────────────────────────
/**
* Resolve safety harness configuration from raw preferences.
* Missing fields fall back to defaults.
*/
export function resolveSafetyHarnessConfig(
raw: Record<string, unknown> | undefined,
): SafetyHarnessConfig {
if (!raw) return { ...DEFAULTS };
return {
enabled: typeof raw.enabled === "boolean" ? raw.enabled : DEFAULTS.enabled,
evidence_collection: typeof raw.evidence_collection === "boolean" ? raw.evidence_collection : DEFAULTS.evidence_collection,
file_change_validation: typeof raw.file_change_validation === "boolean" ? raw.file_change_validation : DEFAULTS.file_change_validation,
evidence_cross_reference: typeof raw.evidence_cross_reference === "boolean" ? raw.evidence_cross_reference : DEFAULTS.evidence_cross_reference,
destructive_command_warnings: typeof raw.destructive_command_warnings === "boolean" ? raw.destructive_command_warnings : DEFAULTS.destructive_command_warnings,
content_validation: typeof raw.content_validation === "boolean" ? raw.content_validation : DEFAULTS.content_validation,
checkpoints: typeof raw.checkpoints === "boolean" ? raw.checkpoints : DEFAULTS.checkpoints,
auto_rollback: typeof raw.auto_rollback === "boolean" ? raw.auto_rollback : DEFAULTS.auto_rollback,
timeout_scale_cap: typeof raw.timeout_scale_cap === "number" ? raw.timeout_scale_cap : DEFAULTS.timeout_scale_cap,
};
}
/**
* Check if the safety harness is enabled.
* Used as a fast gate at hook registration and phase integration points.
*/
export function isHarnessEnabled(
raw: Record<string, unknown> | undefined,
): boolean {
if (!raw) return DEFAULTS.enabled;
if (typeof raw.enabled === "boolean") return raw.enabled;
return DEFAULTS.enabled;
}
// ─── Re-exports ─────────────────────────────────────────────────────────────
export {
resetEvidence,
getEvidence,
getBashEvidence,
getFilePaths,
recordToolCall,
recordToolResult,
} from "./evidence-collector.js";
export type { EvidenceEntry, BashEvidence, FileWriteEvidence, FileEditEvidence } from "./evidence-collector.js";
export { classifyCommand } from "./destructive-guard.js";
export type { CommandClassification } from "./destructive-guard.js";
export { validateFileChanges } from "./file-change-validator.js";
export type { FileChangeAudit, FileViolation } from "./file-change-validator.js";
export { crossReferenceEvidence } from "./evidence-cross-ref.js";
export type { ClaimedEvidence, EvidenceMismatch } from "./evidence-cross-ref.js";
export { createCheckpoint, rollbackToCheckpoint, cleanupCheckpoint } from "./git-checkpoint.js";
export { validateContent } from "./content-validator.js";
export type { ContentViolation } from "./content-validator.js";

View file

@ -0,0 +1,94 @@
// GSD2 — Regression tests for git-checkpoint rollback (#3576)
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { execFileSync } from "node:child_process";
import { createCheckpoint, rollbackToCheckpoint, cleanupCheckpoint } from "../safety/git-checkpoint.js";
function git(args: string[], cwd: string): string {
return execFileSync("git", args, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
}
function createTempRepo(): string {
const dir = mkdtempSync(join(tmpdir(), "ckpt-test-"));
git(["init"], dir);
git(["config", "user.email", "test@test.com"], dir);
git(["config", "user.name", "Test"], dir);
writeFileSync(join(dir, "file.txt"), "initial\n");
git(["add", "."], dir);
git(["commit", "-m", "init"], dir);
git(["branch", "-M", "main"], dir);
return dir;
}
describe("git-checkpoint rollback", () => {
it("rolls back to checkpoint on checked-out branch", (t) => {
const repo = createTempRepo();
t.after(() => rmSync(repo, { recursive: true, force: true }));
// Create checkpoint at initial commit
const sha = createCheckpoint(repo, "unit-1");
assert.ok(sha, "checkpoint should return a SHA");
// Make a second commit
writeFileSync(join(repo, "file.txt"), "modified\n");
git(["add", "."], repo);
git(["commit", "-m", "second"], repo);
const headBefore = git(["rev-parse", "HEAD"], repo);
assert.notEqual(headBefore, sha, "HEAD should have advanced");
// Rollback — this must work on the checked-out branch
const result = rollbackToCheckpoint(repo, "unit-1", sha);
assert.equal(result, true, "rollback should succeed");
const headAfter = git(["rev-parse", "HEAD"], repo);
assert.equal(headAfter, sha, "HEAD should match checkpoint SHA after rollback");
});
it("returns false on detached HEAD", (t) => {
const repo = createTempRepo();
t.after(() => rmSync(repo, { recursive: true, force: true }));
const sha = git(["rev-parse", "HEAD"], repo);
git(["checkout", "--detach", sha], repo);
const result = rollbackToCheckpoint(repo, "unit-2", sha);
assert.equal(result, false, "rollback should fail on detached HEAD");
});
it("cleans up checkpoint ref after rollback", (t) => {
const repo = createTempRepo();
t.after(() => rmSync(repo, { recursive: true, force: true }));
const sha = createCheckpoint(repo, "unit-3");
assert.ok(sha);
// Ref should exist
const refBefore = git(["for-each-ref", "refs/gsd/checkpoints/unit-3", "--format=%(objectname)"], repo);
assert.equal(refBefore, sha);
rollbackToCheckpoint(repo, "unit-3", sha);
// Ref should be cleaned up
const refAfter = git(["for-each-ref", "refs/gsd/checkpoints/unit-3", "--format=%(objectname)"], repo);
assert.equal(refAfter, "", "checkpoint ref should be removed after rollback");
});
it("cleanupCheckpoint removes the ref without error", (t) => {
const repo = createTempRepo();
t.after(() => rmSync(repo, { recursive: true, force: true }));
const sha = createCheckpoint(repo, "unit-4");
assert.ok(sha);
cleanupCheckpoint(repo, "unit-4");
const ref = git(["for-each-ref", "refs/gsd/checkpoints/unit-4", "--format=%(objectname)"], repo);
assert.equal(ref, "", "ref should be gone");
});
});

View file

@ -48,7 +48,8 @@ export type LogComponent =
| "bootstrap" // Extension bootstrap (system-context, agent-end)
| "guided" // Guided flow (discuss, plan wizards)
| "registry" // Rule registry hook state
| "renderer"; // Markdown renderer and projections
| "renderer" // Markdown renderer and projections
| "safety"; // LLM safety harness
export interface LogEntry {
ts: string;

View file

@ -149,7 +149,7 @@ export function streamOllamaChat(
// Handle text content — process independently of tool_calls
// (a chunk may contain both content and tool_calls)
const content = chunk.message?.content ?? "";
if (content && !chunk.done) {
if (content) {
if (thinkParser) {
processChunks(thinkParser.push(content));
} else {

View file

@ -0,0 +1,82 @@
// GSD2 — Regression test: Ollama streaming must not drop content on done:true chunks (#3576)
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
import { describe, it } from "node:test";
import assert from "node:assert/strict";
/**
* This test validates the streaming logic pattern used in ollama-chat-provider.ts.
* The bug: content on the terminal done:true chunk was silently dropped because
* the stream loop only emitted content when `!chunk.done`.
*
* The fix: process chunk.message.content regardless of chunk.done, then handle
* done metadata. This test exercises that logic path with a simulated chunk stream.
*/
interface OllamaChunk {
done: boolean;
done_reason?: string;
message?: { content?: string; tool_calls?: unknown[] };
prompt_eval_count?: number;
eval_count?: number;
}
function simulateStreamLoop(chunks: OllamaChunk[]): string {
let output = "";
for (const chunk of chunks) {
// This mirrors the fixed logic in ollama-chat-provider.ts
const content = chunk.message?.content ?? "";
if (content) {
output += content;
}
if (chunk.done) {
break;
}
}
return output;
}
describe("Ollama stream terminal chunk handling", () => {
it("captures content from done:true chunk", () => {
const chunks: OllamaChunk[] = [
{ done: false, message: { content: "Hello " } },
{ done: false, message: { content: "world" } },
{ done: true, done_reason: "stop", message: { content: "!" } },
];
const result = simulateStreamLoop(chunks);
assert.equal(result, "Hello world!", "trailing content on done chunk must not be dropped");
});
it("works when done chunk has no content", () => {
const chunks: OllamaChunk[] = [
{ done: false, message: { content: "Hello" } },
{ done: true, done_reason: "stop", message: {} },
];
const result = simulateStreamLoop(chunks);
assert.equal(result, "Hello");
});
it("works when done chunk has empty string content", () => {
const chunks: OllamaChunk[] = [
{ done: false, message: { content: "data" } },
{ done: true, done_reason: "stop", message: { content: "" } },
];
const result = simulateStreamLoop(chunks);
assert.equal(result, "data");
});
it("handles single done chunk with content", () => {
const chunks: OllamaChunk[] = [
{ done: true, done_reason: "stop", message: { content: "one-shot" } },
];
const result = simulateStreamLoop(chunks);
assert.equal(result, "one-shot", "single done chunk with content should work");
});
});