feat(gsd): add LLM safety harness for auto-mode damage control

Unified safety layer that monitors, validates, and constrains LLM behavior during auto-mode execution. All components use warn-and-continue policy by default (log violations, notify user, keep going). Components: - Evidence collector: real-time bash/write/edit tool call tracking - Destructive command guard: classifies 10 dangerous patterns (rm -rf, force push, etc.) - File change validator: compares git diff against task plan's expected output - Evidence cross-reference: detects tasks marked complete with zero bash calls - Git checkpoint: pre-unit refs/gsd/checkpoints/ for optional rollback - Content validator: minimum quality checks on plans and summaries - Timeout scale cap: limits timeout multiplier to 6x (was unlimited) New preference: safety_harness with per-component toggles. Enabled by default, auto_rollback off by default. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 15:00:06 -05:00 · 2026-04-05 15:00:06 -05:00 · 0d3ef6b545
commit 0d3ef6b545
parent a6b7febc5e
14 changed files with 923 additions and 2 deletions
--- a/src/resources/extensions/gsd/auto-post-unit.ts
+++ b/src/resources/extensions/gsd/auto-post-unit.ts
@ -52,6 +52,13 @@ import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures
 import { debugLog } from "./debug-logger.js";
 import { runSafely } from "./auto-utils.js";
 import type { AutoSession, SidecarItem } from "./auto/session.js";
+import { getEvidence } from "./safety/evidence-collector.js";
+import { validateFileChanges } from "./safety/file-change-validator.js";
+// crossReferenceEvidence available for future use when verification_evidence is stored in DB
+// import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
+import { validateContent } from "./safety/content-validator.js";
+import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
+import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";

 /** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
 const MAX_VERIFICATION_RETRIES = 3;
@ -437,6 +444,87 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
      debugLog("postUnit", { phase: "rogue-detection", error: String(e) });
    }

+    // ── Safety harness: post-unit validation ──
+    try {
+      const { loadEffectiveGSDPreferences } = await import("./preferences.js");
+      const prefs = loadEffectiveGSDPreferences()?.preferences;
+      const safetyConfig = resolveSafetyHarnessConfig(
+        prefs?.safety_harness as Record<string, unknown> | undefined,
+      );
+
+      if (safetyConfig.enabled) {
+        const { milestone: sMid, slice: sSid, task: sTid } = parseUnitId(s.currentUnit.id);
+
+        // File change validation (execute-task only, after auto-commit)
+        if (safetyConfig.file_change_validation && s.currentUnit.type === "execute-task" && sMid && sSid && sTid && isDbAvailable()) {
+          try {
+            const taskRow = getTask(sMid, sSid, sTid);
+            if (taskRow) {
+              const expectedOutput = taskRow.expected_output ?? [];
+              const plannedFiles = taskRow.files ?? [];
+              const audit = validateFileChanges(s.basePath, expectedOutput, plannedFiles);
+              if (audit && audit.violations.length > 0) {
+                const warnings = audit.violations.filter(v => v.severity === "warning");
+                for (const v of warnings) {
+                  logWarning("safety", `file-change: ${v.file} — ${v.reason}`);
+                }
+                if (warnings.length > 0) {
+                  ctx.ui.notify(
+                    `Safety: ${warnings.length} unexpected file change(s) outside task plan`,
+                    "warning",
+                  );
+                }
+              }
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-file-change", error: String(e) });
+          }
+        }
+
+        // Evidence cross-reference (execute-task only)
+        // Verification evidence is passed via the complete-task tool call and
+        // stored in the SUMMARY.md on disk — not available as structured data
+        // in the DB. The evidence collector tracks actual bash tool calls, so
+        // we can still detect units that claimed success but ran no commands.
+        if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") {
+          try {
+            const actual = getEvidence();
+            const bashCalls = actual.filter(e => e.kind === "bash");
+            // If the task is marked complete but zero bash commands were run,
+            // it's suspicious — the LLM may have fabricated results.
+            if (sMid && sSid && sTid && isDbAvailable()) {
+              const taskRow = getTask(sMid, sSid, sTid);
+              if (taskRow?.status === "complete" && taskRow.verify && bashCalls.length === 0) {
+                logWarning("safety", "task marked complete with verification commands but no bash calls were executed");
+                ctx.ui.notify(
+                  `Safety: task ${sTid} has verification commands but no bash calls were recorded`,
+                  "warning",
+                );
+              }
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-evidence-xref", error: String(e) });
+          }
+        }
+
+        // Content validation (plan-slice, plan-milestone)
+        if (safetyConfig.content_validation) {
+          try {
+            const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
+            const contentViolations = validateContent(s.currentUnit.type, artifactPath);
+            for (const v of contentViolations) {
+              logWarning("safety", `content: ${v.reason}`);
+              ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
+          }
+        }
+      }
+    } catch (e) {
+      debugLog("postUnit", { phase: "safety-harness", error: String(e) });
+    }
+
    // Artifact verification
    let triggerArtifactVerified = false;
    if (!s.currentUnit.type.startsWith("hook/")) {
--- a/src/resources/extensions/gsd/auto-timers.ts
+++ b/src/resources/extensions/gsd/auto-timers.ts
@ -106,8 +106,9 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
    }
  }
  const estimateMinutes = taskEstimate ? parseEstimateMinutes(taskEstimate) : null;
+  const MAX_TIMEOUT_SCALE = 6; // Cap at 6x (60min task). Prevents 2h+ tasks from creating 120min+ timeout windows.
  const timeoutScale = estimateMinutes && estimateMinutes > 0
-    ? Math.max(1, estimateMinutes / 10)  // 10min task = 1x, 30min = 3x, 2h = 12x
+    ? Math.min(MAX_TIMEOUT_SCALE, Math.max(1, estimateMinutes / 10))
    : 1;

  const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000 * timeoutScale;
--- a/src/resources/extensions/gsd/auto/phases.ts
+++ b/src/resources/extensions/gsd/auto/phases.ts
@ -37,6 +37,9 @@ import { withTimeout, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
 import { getEligibleSlices } from "../slice-parallel-eligibility.js";
 import { startSliceParallel } from "../slice-parallel-orchestrator.js";
 import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
+import { resetEvidence } from "../safety/evidence-collector.js";
+import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
+import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";

 // ─── generateMilestoneReport ──────────────────────────────────────────────────

@ -1079,6 +1082,21 @@ export async function runUnitPhase(
  if (mid)
    deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);

+  // ── Safety harness: reset evidence + create checkpoint ──
+  const safetyConfig = resolveSafetyHarnessConfig(
+    prefs?.safety_harness as Record<string, unknown> | undefined,
+  );
+  if (safetyConfig.enabled && safetyConfig.evidence_collection) {
+    resetEvidence();
+  }
+  // Only checkpoint code-executing units (not lifecycle/planning units)
+  if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
+    s.checkpointSha = createCheckpoint(s.basePath, unitId);
+    if (s.checkpointSha) {
+      debugLog("runUnitPhase", { phase: "checkpoint-created", unitId, sha: s.checkpointSha.slice(0, 8) });
+    }
+  }
+
  // Prompt injection
  let finalPrompt = prompt;

@ -1376,6 +1394,27 @@ export async function runUnitPhase(

  deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified, ...(unitResult.errorContext ? { errorContext: unitResult.errorContext } : {}) }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });

+  // ── Safety harness: checkpoint cleanup or rollback ──
+  if (s.checkpointSha) {
+    if (unitResult.status === "error" && safetyConfig.auto_rollback) {
+      const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
+      if (rolled) {
+        ctx.ui.notify(`Rolled back to pre-unit checkpoint for ${unitId}`, "info");
+        debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
+      }
+    } else if (unitResult.status === "error") {
+      ctx.ui.notify(
+        `Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
+        "warning",
+      );
+    } else {
+      // Success — clean up checkpoint ref
+      cleanupCheckpoint(s.basePath, unitId);
+      debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
+    }
+    s.checkpointSha = null;
+  }
+
  return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
 }

--- a/src/resources/extensions/gsd/auto/session.ts
+++ b/src/resources/extensions/gsd/auto/session.ts
@ -145,6 +145,10 @@ export class AutoSession {
  lastBaselineCharCount: number | undefined;
  pendingQuickTasks: CaptureEntry[] = [];

+  // ── Safety harness ───────────────────────────────────────────────────────
+  /** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
+  checkpointSha: string | null = null;
+
  // ── Signal handler ───────────────────────────────────────────────────────
  sigtermHandler: (() => void) | null = null;

@ -223,6 +227,7 @@ export class AutoSession {
    this.lastToolInvocationError = null;
    this.isolationDegraded = false;
    this.milestoneMergedInPhases = false;
+    this.checkpointSha = null;

    // Signal handler
    this.sigtermHandler = null;
--- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts
+++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts
@ -18,6 +18,9 @@ import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"
 import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
 import { saveActivityLog } from "../activity-log.js";
 import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
+import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
+import { classifyCommand } from "../safety/destructive-guard.js";
+import { logWarning as safetyLogWarning } from "../workflow-logger.js";

 // Skip the welcome screen on the very first session_start — cli.ts already
 // printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
@ -203,6 +206,26 @@ export function registerHooks(pi: ExtensionAPI): void {
    if (result.block) return result;
  });

+  // ── Safety harness: evidence collection + destructive command warnings ──
+  pi.on("tool_call", async (event, ctx) => {
+    if (!isAutoActive()) return;
+    safetyRecordToolCall(event.toolName, event.input as Record<string, unknown>);
+
+    // Destructive command classification (warn only, never block)
+    if (isToolCallEventType("bash", event)) {
+      const classification = classifyCommand(event.input.command);
+      if (classification.destructive) {
+        safetyLogWarning("safety", `destructive command: ${classification.labels.join(", ")}`, {
+          command: String(event.input.command).slice(0, 200),
+        });
+        ctx.ui.notify(
+          `Destructive command detected: ${classification.labels.join(", ")}`,
+          "warning",
+        );
+      }
+    }
+  });
+
  pi.on("tool_result", async (event) => {
    if (event.toolName !== "ask_user_questions") return;
    const milestoneId = getDiscussionMilestoneId();
@ -268,6 +291,10 @@ export function registerHooks(pi: ExtensionAPI): void {
        : (typeof event.result?.content?.[0]?.text === "string" ? event.result.content[0].text : String(event.result));
      recordToolInvocationError(event.toolName, errorText);
    }
+    // Safety harness: record tool execution results for evidence cross-referencing
+    if (isAutoActive()) {
+      safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
+    }
  });

  pi.on("model_select", async (_event, ctx) => {
--- a/src/resources/extensions/gsd/preferences-types.ts
+++ b/src/resources/extensions/gsd/preferences-types.ts
@ -105,6 +105,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
  "experimental",
  "codebase",
  "slice_parallel",
+  "safety_harness",
 ]);

 /** Canonical list of all dispatch unit types. */
@ -291,6 +292,18 @@ export interface GSDPreferences {
  codebase?: CodebaseMapPreferences;
  /** Slice-level parallelism within a milestone. Disabled by default. */
  slice_parallel?: { enabled?: boolean; max_workers?: number };
+  /** LLM safety harness configuration. Monitors, validates, and constrains LLM behavior during auto-mode. Enabled by default with warn-and-continue policy. */
+  safety_harness?: {
+    enabled?: boolean;
+    evidence_collection?: boolean;
+    file_change_validation?: boolean;
+    evidence_cross_reference?: boolean;
+    destructive_command_warnings?: boolean;
+    content_validation?: boolean;
+    checkpoints?: boolean;
+    auto_rollback?: boolean;
+    timeout_scale_cap?: number;
+  };
 }

 export interface LoadedGSDPreferences {
--- a/src/resources/extensions/gsd/safety/content-validator.ts
+++ b/src/resources/extensions/gsd/safety/content-validator.ts
@ -0,0 +1,98 @@
+/**
+ * Lightweight content validator for auto-mode safety harness.
+ * Validates that high-value unit outputs contain minimum expected content.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import { existsSync, readFileSync } from "node:fs";
+import { logWarning } from "../workflow-logger.js";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface ContentViolation {
+  severity: "warning";
+  reason: string;
+}
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Validate content quality for a completed unit.
+ * Returns an array of violations. Empty array = content looks acceptable.
+ *
+ * @param unitType - The type of unit that completed (e.g. "plan-slice")
+ * @param artifactPath - Absolute path to the primary artifact file
+ */
+export function validateContent(
+  unitType: string,
+  artifactPath: string | null,
+): ContentViolation[] {
+  if (!artifactPath || !existsSync(artifactPath)) return [];
+
+  const validator = VALIDATORS[unitType];
+  if (!validator) return [];
+
+  try {
+    const content = readFileSync(artifactPath, "utf-8");
+    return validator(content);
+  } catch (e) {
+    logWarning("safety", `content validation read failed: ${(e as Error).message}`);
+    return [];
+  }
+}
+
+// ─── Validators ─────────────────────────────────────────────────────────────
+
+type ContentValidatorFn = (content: string) => ContentViolation[];
+
+const VALIDATORS: Record<string, ContentValidatorFn> = {
+  "plan-slice": validatePlanSlice,
+  "plan-milestone": validatePlanMilestone,
+};
+
+function validatePlanSlice(content: string): ContentViolation[] {
+  const violations: ContentViolation[] = [];
+
+  // Must have at least 2 task entries (checkbox pattern)
+  const taskCount = (content.match(/- \[[ x]\] \*\*T\d+/g) || []).length;
+  if (taskCount < 2) {
+    violations.push({
+      severity: "warning",
+      reason: `Slice plan has only ${taskCount} task(s) — expected at least 2`,
+    });
+  }
+
+  // Should have a Files Likely Touched section
+  if (!content.includes("## Files Likely Touched") && !content.includes("## Files")) {
+    violations.push({
+      severity: "warning",
+      reason: "Slice plan missing 'Files Likely Touched' section",
+    });
+  }
+
+  // Should have a verification section
+  if (!content.includes("Verify") && !content.includes("verify")) {
+    violations.push({
+      severity: "warning",
+      reason: "Slice plan has no verification instructions",
+    });
+  }
+
+  return violations;
+}
+
+function validatePlanMilestone(content: string): ContentViolation[] {
+  const violations: ContentViolation[] = [];
+
+  // Must have at least 1 slice entry
+  const sliceCount = (content.match(/##\s+S\d+/g) || []).length;
+  if (sliceCount < 1) {
+    violations.push({
+      severity: "warning",
+      reason: `Milestone roadmap has ${sliceCount} slice(s) — expected at least 1`,
+    });
+  }
+
+  return violations;
+}
--- a/src/resources/extensions/gsd/safety/destructive-guard.ts
+++ b/src/resources/extensions/gsd/safety/destructive-guard.ts
@ -0,0 +1,49 @@
+/**
+ * Destructive command classifier for auto-mode safety harness.
+ * Classifies bash commands and warns on potentially destructive operations.
+ * Does NOT block — only classifies for logging/notification.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+// ─── Pattern Definitions ────────────────────────────────────────────────────
+
+interface DestructivePattern {
+  pattern: RegExp;
+  label: string;
+}
+
+const DESTRUCTIVE_PATTERNS: readonly DestructivePattern[] = [
+  { pattern: /\brm\s+(-[^\s]*[rfRF][^\s]*\s+|.*\s+-[^\s]*[rfRF])/, label: "recursive delete" },
+  { pattern: /\bgit\s+push\s+.*--force/, label: "force push" },
+  { pattern: /\bgit\s+push\s+-f\b/, label: "force push" },
+  { pattern: /\bgit\s+reset\s+--hard/, label: "hard reset" },
+  { pattern: /\bgit\s+clean\s+-[^\s]*[fdxFDX]/, label: "git clean" },
+  { pattern: /\bgit\s+checkout\s+--\s+\./, label: "discard all changes" },
+  { pattern: /\bdrop\s+(database|table|index)\b/i, label: "SQL drop" },
+  { pattern: /\btruncate\s+table\b/i, label: "SQL truncate" },
+  { pattern: /\bchmod\s+777\b/, label: "world-writable permissions" },
+  { pattern: /\bcurl\s.*\|\s*(bash|sh|zsh)\b/, label: "pipe to shell" },
+];
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+export interface CommandClassification {
+  destructive: boolean;
+  labels: string[];
+}
+
+/**
+ * Classify a bash command for destructive operations.
+ * Returns the list of matched destructive pattern labels.
+ */
+export function classifyCommand(command: string): CommandClassification {
+  const labels: string[] = [];
+  for (const { pattern, label } of DESTRUCTIVE_PATTERNS) {
+    if (pattern.test(command)) {
+      // Deduplicate labels (e.g., two force-push patterns)
+      if (!labels.includes(label)) labels.push(label);
+    }
+  }
+  return { destructive: labels.length > 0, labels };
+}
--- a/src/resources/extensions/gsd/safety/evidence-collector.ts
+++ b/src/resources/extensions/gsd/safety/evidence-collector.ts
@ -0,0 +1,151 @@
+/**
+ * Real-time tool call evidence collector for auto-mode safety harness.
+ * Tracks every bash command, file write, and file edit during a unit execution.
+ * Evidence is compared against LLM completion claims in evidence-cross-ref.ts.
+ *
+ * Follows the same module-level Map pattern as auto-tool-tracking.ts.
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface BashEvidence {
+  kind: "bash";
+  toolCallId: string;
+  command: string;
+  exitCode: number;
+  outputSnippet: string;
+  timestamp: number;
+}
+
+export interface FileWriteEvidence {
+  kind: "write";
+  toolCallId: string;
+  path: string;
+  timestamp: number;
+}
+
+export interface FileEditEvidence {
+  kind: "edit";
+  toolCallId: string;
+  path: string;
+  timestamp: number;
+}
+
+export type EvidenceEntry = BashEvidence | FileWriteEvidence | FileEditEvidence;
+
+// ─── Module State ───────────────────────────────────────────────────────────
+
+let unitEvidence: EvidenceEntry[] = [];
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/** Reset all evidence for a new unit. Call at unit start. */
+export function resetEvidence(): void {
+  unitEvidence = [];
+}
+
+/** Get a read-only view of all evidence collected for the current unit. */
+export function getEvidence(): readonly EvidenceEntry[] {
+  return unitEvidence;
+}
+
+/** Get only bash evidence entries. */
+export function getBashEvidence(): readonly BashEvidence[] {
+  return unitEvidence.filter((e): e is BashEvidence => e.kind === "bash");
+}
+
+/** Get all file paths touched (write + edit). */
+export function getFilePaths(): string[] {
+  return unitEvidence
+    .filter((e): e is FileWriteEvidence | FileEditEvidence => e.kind === "write" || e.kind === "edit")
+    .map(e => e.path);
+}
+
+// ─── Recording (called from register-hooks.ts) ─────────────────────────────
+
+/**
+ * Record a tool call at dispatch time (before execution).
+ * Exit codes and output are filled in by recordToolResult after execution.
+ */
+export function recordToolCall(toolName: string, input: Record<string, unknown>): void {
+  if (toolName === "bash" || toolName === "Bash") {
+    unitEvidence.push({
+      kind: "bash",
+      toolCallId: "",
+      command: String(input.command ?? ""),
+      exitCode: -1,
+      outputSnippet: "",
+      timestamp: Date.now(),
+    });
+  } else if (toolName === "write" || toolName === "Write") {
+    unitEvidence.push({
+      kind: "write",
+      toolCallId: "",
+      path: String(input.file_path ?? input.path ?? ""),
+      timestamp: Date.now(),
+    });
+  } else if (toolName === "edit" || toolName === "Edit") {
+    unitEvidence.push({
+      kind: "edit",
+      toolCallId: "",
+      path: String(input.file_path ?? input.path ?? ""),
+      timestamp: Date.now(),
+    });
+  }
+}
+
+/**
+ * Record a tool execution result. Matches the most recent unresolved entry
+ * of the same kind and fills in the toolCallId, exit code, and output.
+ */
+export function recordToolResult(
+  toolCallId: string,
+  toolName: string,
+  result: unknown,
+  isError: boolean,
+): void {
+  const normalizedName = toolName.toLowerCase();
+
+  if (normalizedName === "bash") {
+    const entry = findLastUnresolved("bash") as BashEvidence | undefined;
+    if (entry) {
+      entry.toolCallId = toolCallId;
+      const text = extractResultText(result);
+      entry.outputSnippet = text.slice(0, 500);
+      const exitMatch = text.match(/Command exited with code (\d+)/);
+      entry.exitCode = exitMatch ? Number(exitMatch[1]) : (isError ? 1 : 0);
+    }
+  } else if (normalizedName === "write" || normalizedName === "edit") {
+    const entry = findLastUnresolved(normalizedName as "write" | "edit");
+    if (entry) {
+      entry.toolCallId = toolCallId;
+    }
+  }
+}
+
+// ─── Internals ──────────────────────────────────────────────────────────────
+
+function findLastUnresolved(kind: string): EvidenceEntry | undefined {
+  for (let i = unitEvidence.length - 1; i >= 0; i--) {
+    if (unitEvidence[i].kind === kind && unitEvidence[i].toolCallId === "") {
+      return unitEvidence[i];
+    }
+  }
+  return undefined;
+}
+
+function extractResultText(result: unknown): string {
+  if (typeof result === "string") return result;
+  if (result && typeof result === "object") {
+    const r = result as Record<string, unknown>;
+    if (Array.isArray(r.content)) {
+      const textBlock = r.content.find(
+        (c: unknown) => typeof c === "object" && c !== null && (c as Record<string, unknown>).type === "text",
+      ) as Record<string, unknown> | undefined;
+      if (textBlock && typeof textBlock.text === "string") return textBlock.text;
+    }
+    if (typeof r.text === "string") return r.text;
+  }
+  return String(result ?? "");
+}
--- a/src/resources/extensions/gsd/safety/evidence-cross-ref.ts
+++ b/src/resources/extensions/gsd/safety/evidence-cross-ref.ts
@ -0,0 +1,120 @@
+/**
+ * Evidence cross-reference for auto-mode safety harness.
+ * Compares the LLM's claimed verification evidence (command + exitCode)
+ * against actual bash tool calls recorded by the evidence collector.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import type { BashEvidence, EvidenceEntry } from "./evidence-collector.js";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface ClaimedEvidence {
+  command: string;
+  exitCode: number;
+  verdict: string;
+}
+
+export interface EvidenceMismatch {
+  severity: "warning" | "error";
+  claimed: ClaimedEvidence;
+  actual: BashEvidence | null;
+  reason: string;
+}
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Cross-reference claimed verification evidence against actual bash tool calls.
+ *
+ * Returns an array of mismatches. Empty array = all claims verified.
+ * Skips entries that were coerced from strings (already flagged by db-tools.ts).
+ */
+export function crossReferenceEvidence(
+  claimedEvidence: readonly ClaimedEvidence[],
+  actualEvidence: readonly EvidenceEntry[],
+): EvidenceMismatch[] {
+  const bashCalls = actualEvidence.filter(
+    (e): e is BashEvidence => e.kind === "bash",
+  );
+  const mismatches: EvidenceMismatch[] = [];
+
+  for (const claimed of claimedEvidence) {
+    // Skip coerced entries — they're already flagged with exitCode: -1
+    // and verdict: "unknown (coerced from string)" by db-tools.ts
+    if (claimed.verdict?.includes("coerced from string")) continue;
+    if (claimed.exitCode === -1) continue;
+
+    // Skip entries with empty or generic commands
+    if (!claimed.command || claimed.command.length < 3) continue;
+
+    // Find matching bash call by command substring match
+    const match = findBestMatch(claimed.command, bashCalls);
+
+    if (!match) {
+      mismatches.push({
+        severity: "warning",
+        claimed,
+        actual: null,
+        reason: `No bash tool call found matching "${claimed.command.slice(0, 80)}"`,
+      });
+      continue;
+    }
+
+    // Exit code mismatch: LLM claims success but actual command failed
+    if (claimed.exitCode === 0 && match.exitCode !== 0) {
+      mismatches.push({
+        severity: "error",
+        claimed,
+        actual: match,
+        reason: `Claimed exitCode=0 but actual exitCode=${match.exitCode}`,
+      });
+    }
+  }
+
+  return mismatches;
+}
+
+// ─── Internals ──────────────────────────────────────────────────────────────
+
+/**
+ * Find the best matching bash evidence entry for a claimed command.
+ * Uses substring matching — the claimed command may be a shortened version
+ * of the actual command, or vice versa.
+ */
+function findBestMatch(
+  claimedCommand: string,
+  bashCalls: readonly BashEvidence[],
+): BashEvidence | null {
+  const normalized = claimedCommand.trim();
+
+  // Exact match first
+  const exact = bashCalls.find(b => b.command.trim() === normalized);
+  if (exact) return exact;
+
+  // Substring match: claimed is contained in actual or actual in claimed
+  const substring = bashCalls.find(
+    b => b.command.includes(normalized) || normalized.includes(b.command),
+  );
+  if (substring) return substring;
+
+  // Token match: split on whitespace and check significant overlap
+  const claimedTokens = normalized.split(/\s+/).filter(t => t.length > 2);
+  if (claimedTokens.length === 0) return null;
+
+  let bestMatch: BashEvidence | null = null;
+  let bestScore = 0;
+
+  for (const call of bashCalls) {
+    const callTokens = new Set(call.command.split(/\s+/));
+    const matchCount = claimedTokens.filter(t => callTokens.has(t)).length;
+    const score = matchCount / claimedTokens.length;
+    if (score > bestScore && score >= 0.5) {
+      bestScore = score;
+      bestMatch = call;
+    }
+  }
+
+  return bestMatch;
+}
--- a/src/resources/extensions/gsd/safety/file-change-validator.ts
+++ b/src/resources/extensions/gsd/safety/file-change-validator.ts
@ -0,0 +1,108 @@
+/**
+ * Post-unit file change validator for auto-mode safety harness.
+ * Compares actual git diff against the task plan's expected output files.
+ *
+ * Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
+ * and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
+ * Compares against git diff HEAD~1 --name-only after auto-commit.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import { execFileSync } from "node:child_process";
+import { logWarning } from "../workflow-logger.js";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface FileViolation {
+  severity: "info" | "warning";
+  file: string;
+  reason: string;
+}
+
+export interface FileChangeAudit {
+  expectedFiles: string[];
+  actualFiles: string[];
+  unexpectedFiles: string[];
+  missingFiles: string[];
+  violations: FileViolation[];
+}
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Validate file changes after auto-commit for an execute-task unit.
+ * Returns null if task data is unavailable or DB is not loaded.
+ *
+ * @param basePath - Working directory (worktree or project root)
+ * @param expectedOutput - JSON array from tasks.expected_output DB column
+ * @param plannedFiles - JSON array from tasks.files DB column
+ */
+export function validateFileChanges(
+  basePath: string,
+  expectedOutput: string[],
+  plannedFiles: string[],
+): FileChangeAudit | null {
+  const allExpected = new Set([...expectedOutput, ...plannedFiles]);
+
+  // If no expected files were planned, skip validation
+  if (allExpected.size === 0) return null;
+
+  // Get actual changed files from last commit
+  const actualFiles = getChangedFilesFromLastCommit(basePath);
+  if (!actualFiles) return null;
+
+  // Filter out .gsd/ internal files — only validate project source files
+  const projectFiles = actualFiles.filter(f => !f.startsWith(".gsd/") && !f.startsWith(".gsd\\"));
+
+  // Normalize expected paths (strip leading ./ or /)
+  const normalizedExpected = new Set(
+    [...allExpected].map(f => f.replace(/^\.\//, "").replace(/^\//, "")),
+  );
+
+  // Compute symmetric difference
+  const unexpectedFiles = projectFiles.filter(f => !normalizedExpected.has(f));
+  const missingFiles = [...normalizedExpected].filter(f => !projectFiles.includes(f));
+
+  const violations: FileViolation[] = [];
+
+  for (const f of unexpectedFiles) {
+    violations.push({
+      severity: "warning",
+      file: f,
+      reason: "Modified but not in task plan's expected output",
+    });
+  }
+
+  for (const f of missingFiles) {
+    violations.push({
+      severity: "info",
+      file: f,
+      reason: "Listed in task plan but not modified",
+    });
+  }
+
+  return {
+    expectedFiles: [...normalizedExpected],
+    actualFiles: projectFiles,
+    unexpectedFiles,
+    missingFiles,
+    violations,
+  };
+}
+
+// ─── Internals ──────────────────────────────────────────────────────────────
+
+function getChangedFilesFromLastCommit(basePath: string): string[] | null {
+  try {
+    const result = execFileSync(
+      "git",
+      ["diff", "--name-only", "HEAD~1", "HEAD"],
+      { cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+    ).trim();
+    return result ? result.split("\n").filter(Boolean) : [];
+  } catch (e) {
+    logWarning("safety", `git diff failed in file-change-validator: ${(e as Error).message}`);
+    return null;
+  }
+}
--- a/src/resources/extensions/gsd/safety/git-checkpoint.ts
+++ b/src/resources/extensions/gsd/safety/git-checkpoint.ts
@ -0,0 +1,116 @@
+/**
+ * Pre-unit git checkpoint and rollback for auto-mode safety harness.
+ * Uses the existing refs/gsd/ namespace (already pruned by doctor).
+ *
+ * Creates a lightweight ref at HEAD before unit execution. On failure,
+ * the ref can be used to rollback the branch to the pre-unit state.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import { execFileSync } from "node:child_process";
+import { logWarning } from "../workflow-logger.js";
+
+// ─── Constants ──────────────────────────────────────────────────────────────
+
+const CHECKPOINT_PREFIX = "refs/gsd/checkpoints/";
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Create a checkpoint ref at the current HEAD for the given unit.
+ * Returns the SHA of HEAD, or null if the operation fails.
+ */
+export function createCheckpoint(basePath: string, unitId: string): string | null {
+  try {
+    const sha = execFileSync("git", ["rev-parse", "HEAD"], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+      encoding: "utf-8",
+    }).trim();
+
+    if (!sha || sha.length < 7) return null;
+
+    // Sanitize unitId for use in ref path (replace / with -)
+    const safeUnitId = unitId.replace(/\//g, "-");
+
+    execFileSync("git", ["update-ref", `${CHECKPOINT_PREFIX}${safeUnitId}`, sha], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    return sha;
+  } catch (e) {
+    logWarning("safety", `checkpoint creation failed: ${(e as Error).message}`);
+    return null;
+  }
+}
+
+/**
+ * Rollback the current branch to a checkpoint SHA.
+ * Returns true on success, false on failure.
+ *
+ * WARNING: This is a destructive operation — it discards all changes
+ * since the checkpoint. Only call when the user has opted in via
+ * safety_harness.auto_rollback or an explicit manual trigger.
+ */
+export function rollbackToCheckpoint(
+  basePath: string,
+  unitId: string,
+  sha: string,
+): boolean {
+  try {
+    // Get current branch name
+    const branch = execFileSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+      encoding: "utf-8",
+    }).trim();
+
+    if (!branch || branch === "HEAD") {
+      logWarning("safety", "rollback: detached HEAD state, cannot rollback");
+      return false;
+    }
+
+    // Reset working tree
+    execFileSync("git", ["reset", "--hard"], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    // Move branch pointer back to checkpoint
+    execFileSync("git", ["branch", "-f", branch, sha], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    // Sync working tree with moved branch
+    execFileSync("git", ["reset", "--hard"], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    // Cleanup checkpoint ref
+    cleanupCheckpoint(basePath, unitId);
+
+    return true;
+  } catch (e) {
+    logWarning("safety", `rollback failed: ${(e as Error).message}`);
+    return false;
+  }
+}
+
+/**
+ * Remove a checkpoint ref after successful unit completion.
+ */
+export function cleanupCheckpoint(basePath: string, unitId: string): void {
+  try {
+    const safeUnitId = unitId.replace(/\//g, "-");
+    execFileSync("git", ["update-ref", "-d", `${CHECKPOINT_PREFIX}${safeUnitId}`], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+  } catch {
+    // Non-fatal — ref may already have been cleaned up
+  }
+}
--- a/src/resources/extensions/gsd/safety/safety-harness.ts
+++ b/src/resources/extensions/gsd/safety/safety-harness.ts
@ -0,0 +1,105 @@
+/**
+ * Safety Harness — central module for LLM damage control during auto-mode.
+ * Provides types, preference resolution, and orchestration for all safety components.
+ *
+ * Components:
+ * - evidence-collector.ts: Real-time tool call tracking
+ * - destructive-guard.ts: Bash command classification
+ * - file-change-validator.ts: Post-unit git diff vs plan
+ * - evidence-cross-ref.ts: Claimed vs actual verification evidence
+ * - git-checkpoint.ts: Pre-unit checkpoints + rollback
+ * - content-validator.ts: Output quality validation
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface SafetyHarnessConfig {
+  enabled: boolean;
+  evidence_collection: boolean;
+  file_change_validation: boolean;
+  evidence_cross_reference: boolean;
+  destructive_command_warnings: boolean;
+  content_validation: boolean;
+  checkpoints: boolean;
+  auto_rollback: boolean;
+  timeout_scale_cap: number;
+}
+
+// ─── Defaults ───────────────────────────────────────────────────────────────
+
+const DEFAULTS: SafetyHarnessConfig = {
+  enabled: true,
+  evidence_collection: true,
+  file_change_validation: true,
+  evidence_cross_reference: true,
+  destructive_command_warnings: true,
+  content_validation: true,
+  checkpoints: true,
+  auto_rollback: false,
+  timeout_scale_cap: 6,
+};
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Resolve safety harness configuration from raw preferences.
+ * Missing fields fall back to defaults.
+ */
+export function resolveSafetyHarnessConfig(
+  raw: Record<string, unknown> | undefined,
+): SafetyHarnessConfig {
+  if (!raw) return { ...DEFAULTS };
+
+  return {
+    enabled: typeof raw.enabled === "boolean" ? raw.enabled : DEFAULTS.enabled,
+    evidence_collection: typeof raw.evidence_collection === "boolean" ? raw.evidence_collection : DEFAULTS.evidence_collection,
+    file_change_validation: typeof raw.file_change_validation === "boolean" ? raw.file_change_validation : DEFAULTS.file_change_validation,
+    evidence_cross_reference: typeof raw.evidence_cross_reference === "boolean" ? raw.evidence_cross_reference : DEFAULTS.evidence_cross_reference,
+    destructive_command_warnings: typeof raw.destructive_command_warnings === "boolean" ? raw.destructive_command_warnings : DEFAULTS.destructive_command_warnings,
+    content_validation: typeof raw.content_validation === "boolean" ? raw.content_validation : DEFAULTS.content_validation,
+    checkpoints: typeof raw.checkpoints === "boolean" ? raw.checkpoints : DEFAULTS.checkpoints,
+    auto_rollback: typeof raw.auto_rollback === "boolean" ? raw.auto_rollback : DEFAULTS.auto_rollback,
+    timeout_scale_cap: typeof raw.timeout_scale_cap === "number" ? raw.timeout_scale_cap : DEFAULTS.timeout_scale_cap,
+  };
+}
+
+/**
+ * Check if the safety harness is enabled.
+ * Used as a fast gate at hook registration and phase integration points.
+ */
+export function isHarnessEnabled(
+  raw: Record<string, unknown> | undefined,
+): boolean {
+  if (!raw) return DEFAULTS.enabled;
+  if (typeof raw.enabled === "boolean") return raw.enabled;
+  return DEFAULTS.enabled;
+}
+
+// ─── Re-exports ─────────────────────────────────────────────────────────────
+
+export {
+  resetEvidence,
+  getEvidence,
+  getBashEvidence,
+  getFilePaths,
+  recordToolCall,
+  recordToolResult,
+} from "./evidence-collector.js";
+
+export type { EvidenceEntry, BashEvidence, FileWriteEvidence, FileEditEvidence } from "./evidence-collector.js";
+
+export { classifyCommand } from "./destructive-guard.js";
+export type { CommandClassification } from "./destructive-guard.js";
+
+export { validateFileChanges } from "./file-change-validator.js";
+export type { FileChangeAudit, FileViolation } from "./file-change-validator.js";
+
+export { crossReferenceEvidence } from "./evidence-cross-ref.js";
+export type { ClaimedEvidence, EvidenceMismatch } from "./evidence-cross-ref.js";
+
+export { createCheckpoint, rollbackToCheckpoint, cleanupCheckpoint } from "./git-checkpoint.js";
+
+export { validateContent } from "./content-validator.js";
+export type { ContentViolation } from "./content-validator.js";
--- a/src/resources/extensions/gsd/workflow-logger.ts
+++ b/src/resources/extensions/gsd/workflow-logger.ts
@ -48,7 +48,8 @@ export type LogComponent =
  | "bootstrap"     // Extension bootstrap (system-context, agent-end)
  | "guided"        // Guided flow (discuss, plan wizards)
  | "registry"      // Rule registry hook state
-  | "renderer";     // Markdown renderer and projections
+  | "renderer"      // Markdown renderer and projections
+  | "safety";       // LLM safety harness

 export interface LogEntry {
  ts: string;