refactor: consolidate tests by area, standardize on node:test (#1059)

* docs: add Node LTS pinning guide for macOS Homebrew users New doc (docs/node-lts-macos.md) explains how to pin Node 24 LTS via Homebrew to avoid running on odd-numbered development releases. Covers brew install/link/pin, version managers as alternatives, and verification steps. Added notice banner in README linking to the guide. * refactor: consolidate tests by area, standardize on node:test Consolidated 10 test files into 4, standardizing on node:test. Provider errors (3 files → 1): provider-errors.test.ts (34 tests) Metrics (2 files → 1): metrics.test.ts (13 tests, converted from custom runner) Activity log (2 files → 1): activity-log.test.ts (11 tests, converted from custom runner) Complexity (2 files → 1): removed redundant structural string checks Net: -694 lines, -6 files.
2026-03-17 23:59:50 -04:00 · 2026-03-17 23:59:50 -04:00 · 8dfa7d058c
commit 8dfa7d058c
parent 3d4f77b2ee
10 changed files with 619 additions and 1313 deletions
--- a/src/resources/extensions/gsd/tests/activity-log-prune.test.ts
+++ b/src/resources/extensions/gsd/tests/activity-log-prune.test.ts
@ -1,297 +0,0 @@
-// Tests for pruneActivityLogs — age-based activity log pruning with
-// highest-seq preservation invariant — plus step-11 prompt text assertion.
-//
-// Sections:
-//   (a) Basic pruning: one old file deleted, two recent survive
-//   (b) Highest-seq preserved even when all files are old
-//   (c) retentionDays=0 boundary: all non-highest-seq deleted
-//   (d) No-op when all files are recent
-//   (e) Empty directory: no crash
-//   (f) All old files: only highest-seq survives
-//   (g) Single file: always preserved (it IS highest-seq)
-//   (h) Seq number is tie-breaker (010 beats 001 lexicographically and numerically)
-//   (i) Non-matching filenames ignored: notes.txt survives, no crash
-//   (j) Step-11 prompt text: "refresh current state if needed"
-
-import { mkdtempSync, mkdirSync, readdirSync, rmSync, utimesSync, writeFileSync } from 'node:fs';
-import { join, dirname } from 'node:path';
-import { tmpdir } from 'node:os';
-import { fileURLToPath } from 'node:url';
-
-import { pruneActivityLogs } from '../activity-log.ts';
-import { createTestContext } from './test-helpers.ts';
-
-const __dirname = dirname(fileURLToPath(import.meta.url));
-
-
-const { assertEq, assertTrue, report } = createTestContext();
-// ─── Fixture helpers ───────────────────────────────────────────────────────
-
-let tmpDirs: string[] = [];
-
-function createTmpActivityDir(): string {
-  const dir = mkdtempSync(join(tmpdir(), 'gsd-prune-test-'));
-  tmpDirs.push(dir);
-  return dir;
-}
-
-function writeActivityFile(activityDir: string, seq: string, name: string): string {
-  mkdirSync(activityDir, { recursive: true });
-  const filePath = join(activityDir, `${seq}-${name}.jsonl`);
-  writeFileSync(filePath, `{"seq":${parseInt(seq, 10)},"name":"${name}"}\n`, 'utf-8');
-  return filePath;
-}
-
-/** Set mtime to daysAgo days in the past. */
-function backdateFile(filePath: string, daysAgo: number): void {
-  const pastMs = Date.now() - daysAgo * 24 * 60 * 60 * 1000;
-  const pastDate = new Date(pastMs);
-  utimesSync(filePath, pastDate, pastDate);
-}
-
-function cleanup(): void {
-  for (const dir of tmpDirs) {
-    rmSync(dir, { recursive: true, force: true });
-  }
-  tmpDirs = [];
-}
-
-process.on('exit', cleanup);
-
-// ─── Helper: get sorted filenames (basenames only) in a directory ──────────
-
-function listFiles(dir: string): string[] {
-  return readdirSync(dir).sort();
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-// Tests
-// ═══════════════════════════════════════════════════════════════════════════
-
-async function main(): Promise<void> {
-
-  // ─── (a) Basic pruning ────────────────────────────────────────────────────
-  console.log('\n── (a) Basic pruning: one old file deleted, two recent survive');
-
-  {
-    const dir = createTmpActivityDir();
-    const f001 = writeActivityFile(dir, '001', 'execute-task-M001-S01-T01');
-    const _f002 = writeActivityFile(dir, '002', 'execute-task-M001-S01-T02');
-    const _f003 = writeActivityFile(dir, '003', 'execute-task-M001-S01-T03');
-
-    backdateFile(f001, 40); // older than 30-day retention
-
-    pruneActivityLogs(dir, 30);
-
-    const remaining = listFiles(dir);
-    assertTrue(
-      !remaining.includes('001-execute-task-M001-S01-T01.jsonl'),
-      '(a) file 001 deleted (40 days old, past 30-day threshold)',
-    );
-    assertTrue(
-      remaining.includes('002-execute-task-M001-S01-T02.jsonl'),
-      '(a) file 002 survives (recent)',
-    );
-    assertTrue(
-      remaining.includes('003-execute-task-M001-S01-T03.jsonl'),
-      '(a) file 003 survives (recent, also highest-seq)',
-    );
-  }
-
-  // ─── (b) Highest-seq preserved even when all files are old ───────────────
-  console.log('\n── (b) Highest-seq preserved even when all files are old');
-
-  {
-    const dir = createTmpActivityDir();
-    const f001 = writeActivityFile(dir, '001', 'execute-task-M001-S01-T01');
-    const f002 = writeActivityFile(dir, '002', 'execute-task-M001-S01-T02');
-    const f003 = writeActivityFile(dir, '003', 'execute-task-M001-S01-T03');
-
-    backdateFile(f001, 40);
-    backdateFile(f002, 40);
-    backdateFile(f003, 40); // all old, but 003 is highest-seq
-
-    pruneActivityLogs(dir, 30);
-
-    const remaining = listFiles(dir);
-    assertEq(remaining.length, 1, '(b) exactly 1 file survives when all are old');
-    assertTrue(
-      remaining.includes('003-execute-task-M001-S01-T03.jsonl'),
-      '(b) highest-seq file (003) is the survivor',
-    );
-  }
-
-  // ─── (c) retentionDays=0 boundary ────────────────────────────────────────
-  console.log('\n── (c) retentionDays=0: all non-highest-seq deleted even if brand-new');
-
-  {
-    const dir = createTmpActivityDir();
-    // All files have mtime=now (freshly written — no backdating)
-    writeActivityFile(dir, '001', 'execute-task-M002-S01-T01');
-    writeActivityFile(dir, '002', 'execute-task-M002-S01-T02');
-    writeActivityFile(dir, '003', 'execute-task-M002-S01-T03');
-
-    pruneActivityLogs(dir, 0); // cutoff = now → everything is "expired"
-
-    const remaining = listFiles(dir);
-    assertEq(remaining.length, 1, '(c) retentionDays=0: exactly 1 file survives');
-    assertTrue(
-      remaining.includes('003-execute-task-M002-S01-T03.jsonl'),
-      '(c) retentionDays=0: only highest-seq (003) survives',
-    );
-  }
-
-  // ─── (d) No-op when all files are recent ─────────────────────────────────
-  console.log('\n── (d) No-op when all files are recent');
-
-  {
-    const dir = createTmpActivityDir();
-    writeActivityFile(dir, '001', 'execute-task-M003-S01-T01');
-    writeActivityFile(dir, '002', 'execute-task-M003-S01-T02');
-    writeActivityFile(dir, '003', 'execute-task-M003-S01-T03');
-    // No backdating — all files are fresh
-
-    pruneActivityLogs(dir, 30);
-
-    const remaining = listFiles(dir);
-    assertEq(remaining.length, 3, '(d) all 3 files survive when all are recent');
-  }
-
-  // ─── (e) Empty directory: no crash ────────────────────────────────────────
-  console.log('\n── (e) Empty directory: no crash');
-
-  {
-    const dir = createTmpActivityDir();
-    // dir exists but is empty
-
-    let threw = false;
-    try {
-      pruneActivityLogs(dir, 30);
-    } catch {
-      threw = true;
-    }
-
-    assertTrue(!threw, '(e) pruneActivityLogs does not throw on empty directory');
-    assertTrue(
-      readdirSync(dir).length === 0,
-      '(e) directory still exists and is still empty after no-op',
-    );
-  }
-
-  // ─── (f) All old files: only highest-seq survives ─────────────────────────
-  console.log('\n── (f) All old files: only highest-seq survives');
-
-  {
-    const dir = createTmpActivityDir();
-    const f004 = writeActivityFile(dir, '004', 'execute-task-M004-S01-T01');
-    const f005 = writeActivityFile(dir, '005', 'execute-task-M004-S01-T02');
-    const f006 = writeActivityFile(dir, '006', 'execute-task-M004-S01-T03');
-
-    backdateFile(f004, 60);
-    backdateFile(f005, 60);
-    backdateFile(f006, 60);
-
-    pruneActivityLogs(dir, 30);
-
-    const remaining = listFiles(dir);
-    assertEq(remaining.length, 1, '(f) exactly 1 file survives when all are old');
-    assertTrue(
-      remaining[0].startsWith('006-'),
-      '(f) the surviving file starts with 006 (highest-seq)',
-    );
-  }
-
-  // ─── (g) Single file: always preserved ────────────────────────────────────
-  console.log('\n── (g) Single file: always preserved (it IS highest-seq)');
-
-  {
-    const dir = createTmpActivityDir();
-    const f001 = writeActivityFile(dir, '001', 'execute-task-M005-S01-T01');
-    backdateFile(f001, 100); // very old
-
-    pruneActivityLogs(dir, 30);
-
-    const remaining = listFiles(dir);
-    assertEq(remaining.length, 1, '(g) single file survives even when very old (it is the highest-seq)');
-    assertTrue(
-      remaining.includes('001-execute-task-M005-S01-T01.jsonl'),
-      '(g) the single file (001) is preserved',
-    );
-  }
-
-  // ─── (h) Seq tie-breaker: 010 is higher than 001 ─────────────────────────
-  console.log('\n── (h) Seq number tie-breaker: 010 beats 001 numerically');
-
-  {
-    const dir = createTmpActivityDir();
-    const f001 = writeActivityFile(dir, '001', 'execute-task-M006-S01-T01');
-    const f010 = writeActivityFile(dir, '010', 'execute-task-M006-S01-T10');
-
-    backdateFile(f001, 40);
-    backdateFile(f010, 40); // both old; 010 is numerically highest
-
-    pruneActivityLogs(dir, 30);
-
-    const remaining = listFiles(dir);
-    assertEq(remaining.length, 1, '(h) exactly 1 file survives');
-    assertTrue(
-      remaining.includes('010-execute-task-M006-S01-T10.jsonl'),
-      '(h) seq 010 (numeric 10) survives over seq 001 (numeric 1)',
-    );
-  }
-
-  // ─── (i) Non-matching filenames ignored ───────────────────────────────────
-  console.log('\n── (i) Non-matching filenames ignored: notes.txt survives, no crash');
-
-  {
-    const dir = createTmpActivityDir();
-    const f001 = writeActivityFile(dir, '001', 'execute-task-M007-S01-T01');
-    const notesPath = join(dir, 'notes.txt');
-    writeFileSync(notesPath, 'some notes\n', 'utf-8');
-
-    backdateFile(f001, 40); // eligible for pruning
-    // notes.txt never gets a seq prefix → should be ignored by pruner
-
-    let threw = false;
-    try {
-      pruneActivityLogs(dir, 30);
-    } catch {
-      threw = true;
-    }
-
-    assertTrue(!threw, '(i) no crash when non-matching file is present');
-
-    const remaining = listFiles(dir);
-    assertTrue(
-      remaining.includes('notes.txt'),
-      '(i) notes.txt (non-matching filename) survives pruning unchanged',
-    );
-    // 001 is deleted (old, and notes.txt is not counted as seq-bearing so 001 is not "highest")
-    // But wait — 001 IS the only seq file, making it highest-seq → it survives
-    assertTrue(
-      remaining.includes('001-execute-task-M007-S01-T01.jsonl'),
-      '(i) seq 001 survives (it is the highest-seq among seq files)',
-    );
-  }
-
-  // ─── (j) Step-11 prompt text assertion ────────────────────────────────────
-  console.log('\n── (j) Step-11 prompt text: "refresh current state if needed"');
-
-  {
-    const { readFileSync } = await import('node:fs');
-    const promptPath = join(__dirname, '..', 'prompts', 'complete-slice.md');
-    const content = readFileSync(promptPath, 'utf-8');
-
-    assertTrue(
-      content.includes('refresh current state if needed'),
-      '(j) complete-slice.md step 11 contains "refresh current state if needed"',
-    );
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
-});
--- a/src/resources/extensions/gsd/tests/activity-log-save.test.ts
+++ b/src/resources/extensions/gsd/tests/activity-log-save.test.ts
@ -1,127 +0,0 @@
-// Tests for saveActivityLog performance behavior:
-// - cache next sequence per activity directory instead of rescanning every save
-// - skip rewriting identical snapshots for the same unit
-// - recover safely if another writer creates the cached next sequence
-
-import { existsSync, mkdtempSync, readdirSync, rmSync, writeFileSync } from "node:fs";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-
-import { saveActivityLog } from "../activity-log.ts";
-import { createTestContext } from "./test-helpers.ts";
-
-const { assertEq, assertTrue, report } = createTestContext();
-
-let tmpDirs: string[] = [];
-
-function createBaseDir(): string {
-  const dir = mkdtempSync(join(tmpdir(), "gsd-activity-save-test-"));
-  tmpDirs.push(dir);
-  return dir;
-}
-
-function activityDir(baseDir: string): string {
-  return join(baseDir, ".gsd", "activity");
-}
-
-function listActivityFiles(baseDir: string): string[] {
-  const dir = activityDir(baseDir);
-  return existsSync(dir) ? readdirSync(dir).sort() : [];
-}
-
-function createCtx(entries: unknown[]) {
-  return {
-    sessionManager: {
-      getEntries: () => entries,
-    },
-  };
-}
-
-function cleanup(): void {
-  for (const dir of tmpDirs) {
-    rmSync(dir, { recursive: true, force: true });
-  }
-  tmpDirs = [];
-}
-
-process.on("exit", cleanup);
-
-async function main(): Promise<void> {
-  console.log("\n── (a) cache next sequence instead of rescanning every save");
-  {
-    const baseDir = createBaseDir();
-    saveActivityLog(createCtx([{ kind: "first", n: 1 }]) as any, baseDir, "execute-task", "M001/S01/T01");
-
-    writeFileSync(
-      join(activityDir(baseDir), "999-external-manual.jsonl"),
-      '{"external":true}\n',
-      "utf-8",
-    );
-
-    saveActivityLog(createCtx([{ kind: "second", n: 2 }]) as any, baseDir, "execute-task", "M001/S01/T02");
-
-    const files = listActivityFiles(baseDir);
-    assertTrue(files.includes("001-execute-task-M001-S01-T01.jsonl"), "(a) first save uses sequence 001");
-    assertTrue(files.includes("002-execute-task-M001-S01-T02.jsonl"), "(a) second save uses cached next sequence 002");
-    assertTrue(files.includes("999-external-manual.jsonl"), "(a) externally added file remains present");
-    assertTrue(!files.some(file => file.startsWith("1000-")), "(a) second save did not rescan and jump to sequence 1000");
-  }
-
-  console.log("\n── (b) skip rewriting identical snapshots for the same unit");
-  {
-    const baseDir = createBaseDir();
-    const ctx = createCtx([{ role: "assistant", content: "same snapshot" }]);
-
-    saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01");
-    saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01");
-
-    let files = listActivityFiles(baseDir);
-    assertEq(files.length, 1, "(b) identical repeated save writes only one activity file");
-    assertTrue(files[0] === "001-plan-slice-M002-S01.jsonl", "(b) the original sequence is preserved");
-
-    saveActivityLog(createCtx([{ role: "assistant", content: "changed snapshot" }]) as any, baseDir, "plan-slice", "M002/S01");
-    files = listActivityFiles(baseDir);
-    assertEq(files.length, 2, "(b) changed snapshot writes a new activity file");
-    assertTrue(files.includes("002-plan-slice-M002-S01.jsonl"), "(b) deduped save did not consume the next sequence");
-  }
-
-  console.log("\n── (c) recover if another writer creates the exact cached target file");
-  {
-    const baseDir = createBaseDir();
-    saveActivityLog(createCtx([{ turn: 1 }]) as any, baseDir, "execute-task", "M003/S02/T01");
-
-    writeFileSync(
-      join(activityDir(baseDir), "002-execute-task-M003-S02-T02.jsonl"),
-      '{"collision":true}\n',
-      "utf-8",
-    );
-
-    saveActivityLog(createCtx([{ turn: 2 }]) as any, baseDir, "execute-task", "M003/S02/T02");
-
-    const files = listActivityFiles(baseDir);
-    assertTrue(files.includes("002-execute-task-M003-S02-T02.jsonl"), "(c) exact collision file is preserved");
-    assertTrue(files.includes("003-execute-task-M003-S02-T02.jsonl"), "(c) logger rescans only on collision and advances to 003");
-  }
-
-  console.log("\n── (d) dedupe is tracked per unit, not just the last write in the directory");
-  {
-    const baseDir = createBaseDir();
-    const repeatedCtx = createCtx([{ role: "assistant", content: "same-for-unit-a" }]);
-
-    saveActivityLog(repeatedCtx as any, baseDir, "execute-task", "M004/S01/T01");
-    saveActivityLog(createCtx([{ role: "assistant", content: "other-unit" }]) as any, baseDir, "execute-task", "M004/S01/T02");
-    saveActivityLog(repeatedCtx as any, baseDir, "execute-task", "M004/S01/T01");
-
-    const files = listActivityFiles(baseDir);
-    assertEq(files.length, 2, "(d) interleaving another unit does not force a duplicate rewrite for unit A");
-    assertTrue(files.includes("001-execute-task-M004-S01-T01.jsonl"), "(d) original unit A snapshot is retained");
-    assertTrue(files.includes("002-execute-task-M004-S01-T02.jsonl"), "(d) unit B snapshot is retained");
-  }
-
-  report();
-}
-
-main().catch((error) => {
-  console.error(error);
-  process.exit(1);
-});
--- a/src/resources/extensions/gsd/tests/activity-log.test.ts
+++ b/src/resources/extensions/gsd/tests/activity-log.test.ts
@ -0,0 +1,213 @@
+/**
+ * Activity log tests — consolidated from:
+ *   - activity-log-prune.test.ts (age-based pruning with highest-seq preservation)
+ *   - activity-log-save.test.ts (caching, dedup, collision recovery)
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { existsSync, mkdtempSync, mkdirSync, readdirSync, rmSync, utimesSync, writeFileSync, readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { tmpdir } from "node:os";
+import { fileURLToPath } from "node:url";
+
+import { pruneActivityLogs, saveActivityLog } from "../activity-log.ts";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+function createTmpDir(): string {
+  return mkdtempSync(join(tmpdir(), "gsd-activity-test-"));
+}
+
+function writeActivityFile(dir: string, seq: string, name: string): string {
+  mkdirSync(dir, { recursive: true });
+  const filePath = join(dir, `${seq}-${name}.jsonl`);
+  writeFileSync(filePath, `{"seq":${parseInt(seq, 10)},"name":"${name}"}\n`, "utf-8");
+  return filePath;
+}
+
+function backdateFile(filePath: string, daysAgo: number): void {
+  const pastMs = Date.now() - daysAgo * 24 * 60 * 60 * 1000;
+  const pastDate = new Date(pastMs);
+  utimesSync(filePath, pastDate, pastDate);
+}
+
+function listFiles(dir: string): string[] {
+  return existsSync(dir) ? readdirSync(dir).sort() : [];
+}
+
+function activityDir(baseDir: string): string {
+  return join(baseDir, ".gsd", "activity");
+}
+
+function createCtx(entries: unknown[]) {
+  return { sessionManager: { getEntries: () => entries } };
+}
+
+// ── Pruning ──────────────────────────────────────────────────────────────────
+
+test("pruneActivityLogs deletes old files, keeps recent and highest-seq", () => {
+  const dir = createTmpDir();
+  try {
+    const f001 = writeActivityFile(dir, "001", "execute-task-M001-S01-T01");
+    writeActivityFile(dir, "002", "execute-task-M001-S01-T02");
+    writeActivityFile(dir, "003", "execute-task-M001-S01-T03");
+    backdateFile(f001, 40);
+
+    pruneActivityLogs(dir, 30);
+    const remaining = listFiles(dir);
+    assert.ok(!remaining.includes("001-execute-task-M001-S01-T01.jsonl"));
+    assert.ok(remaining.includes("002-execute-task-M001-S01-T02.jsonl"));
+    assert.ok(remaining.includes("003-execute-task-M001-S01-T03.jsonl"));
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("pruneActivityLogs preserves highest-seq even when all files are old", () => {
+  const dir = createTmpDir();
+  try {
+    const f001 = writeActivityFile(dir, "001", "t1");
+    const f002 = writeActivityFile(dir, "002", "t2");
+    const f003 = writeActivityFile(dir, "003", "t3");
+    backdateFile(f001, 40); backdateFile(f002, 40); backdateFile(f003, 40);
+
+    pruneActivityLogs(dir, 30);
+    const remaining = listFiles(dir);
+    assert.equal(remaining.length, 1);
+    assert.ok(remaining[0].startsWith("003-"));
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("pruneActivityLogs with retentionDays=0 keeps only highest-seq", () => {
+  const dir = createTmpDir();
+  try {
+    writeActivityFile(dir, "001", "t1");
+    writeActivityFile(dir, "002", "t2");
+    writeActivityFile(dir, "003", "t3");
+
+    pruneActivityLogs(dir, 0);
+    const remaining = listFiles(dir);
+    assert.equal(remaining.length, 1);
+    assert.ok(remaining[0].startsWith("003-"));
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("pruneActivityLogs no-op when all files are recent", () => {
+  const dir = createTmpDir();
+  try {
+    writeActivityFile(dir, "001", "t1");
+    writeActivityFile(dir, "002", "t2");
+    writeActivityFile(dir, "003", "t3");
+
+    pruneActivityLogs(dir, 30);
+    assert.equal(listFiles(dir).length, 3);
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("pruneActivityLogs handles empty directory", () => {
+  const dir = createTmpDir();
+  try {
+    assert.doesNotThrow(() => pruneActivityLogs(dir, 30));
+    assert.equal(readdirSync(dir).length, 0);
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("pruneActivityLogs preserves single old file (it is highest-seq)", () => {
+  const dir = createTmpDir();
+  try {
+    const f = writeActivityFile(dir, "001", "t1");
+    backdateFile(f, 100);
+
+    pruneActivityLogs(dir, 30);
+    assert.equal(listFiles(dir).length, 1);
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("pruneActivityLogs ignores non-matching filenames", () => {
+  const dir = createTmpDir();
+  try {
+    const f001 = writeActivityFile(dir, "001", "t1");
+    writeFileSync(join(dir, "notes.txt"), "some notes\n", "utf-8");
+    backdateFile(f001, 40);
+
+    assert.doesNotThrow(() => pruneActivityLogs(dir, 30));
+    const remaining = listFiles(dir);
+    assert.ok(remaining.includes("notes.txt"));
+    // 001 is the only seq file, so it's highest-seq and survives
+    assert.ok(remaining.includes("001-t1.jsonl"));
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+// ── Save: caching, dedup, collision recovery ─────────────────────────────────
+
+test("saveActivityLog caches sequence instead of rescanning", () => {
+  const baseDir = createTmpDir();
+  try {
+    saveActivityLog(createCtx([{ kind: "first", n: 1 }]) as any, baseDir, "execute-task", "M001/S01/T01");
+    writeFileSync(join(activityDir(baseDir), "999-external.jsonl"), '{"x":1}\n', "utf-8");
+    saveActivityLog(createCtx([{ kind: "second", n: 2 }]) as any, baseDir, "execute-task", "M001/S01/T02");
+
+    const files = listFiles(activityDir(baseDir));
+    assert.ok(files.includes("001-execute-task-M001-S01-T01.jsonl"));
+    assert.ok(files.includes("002-execute-task-M001-S01-T02.jsonl"));
+    assert.ok(!files.some(f => f.startsWith("1000-")));
+  } finally {
+    rmSync(baseDir, { recursive: true, force: true });
+  }
+});
+
+test("saveActivityLog deduplicates identical snapshots for same unit", () => {
+  const baseDir = createTmpDir();
+  try {
+    const ctx = createCtx([{ role: "assistant", content: "same" }]);
+    saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01");
+    saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01");
+
+    let files = listFiles(activityDir(baseDir));
+    assert.equal(files.length, 1);
+
+    saveActivityLog(createCtx([{ role: "assistant", content: "changed" }]) as any, baseDir, "plan-slice", "M002/S01");
+    files = listFiles(activityDir(baseDir));
+    assert.equal(files.length, 2);
+  } finally {
+    rmSync(baseDir, { recursive: true, force: true });
+  }
+});
+
+test("saveActivityLog recovers on sequence collision", () => {
+  const baseDir = createTmpDir();
+  try {
+    saveActivityLog(createCtx([{ turn: 1 }]) as any, baseDir, "execute-task", "M003/S02/T01");
+    writeFileSync(join(activityDir(baseDir), "002-execute-task-M003-S02-T02.jsonl"), '{"collision":true}\n', "utf-8");
+    saveActivityLog(createCtx([{ turn: 2 }]) as any, baseDir, "execute-task", "M003/S02/T02");
+
+    const files = listFiles(activityDir(baseDir));
+    assert.ok(files.includes("002-execute-task-M003-S02-T02.jsonl"));
+    assert.ok(files.includes("003-execute-task-M003-S02-T02.jsonl"));
+  } finally {
+    rmSync(baseDir, { recursive: true, force: true });
+  }
+});
+
+// ── Prompt text assertion ────────────────────────────────────────────────────
+
+test("complete-slice.md contains refresh state instruction", () => {
+  const promptPath = join(__dirname, "..", "prompts", "complete-slice.md");
+  const content = readFileSync(promptPath, "utf-8");
+  assert.ok(content.includes("refresh current state if needed"));
+});
--- a/src/resources/extensions/gsd/tests/agent-end-provider-error.test.ts
+++ b/src/resources/extensions/gsd/tests/agent-end-provider-error.test.ts
@ -1,110 +0,0 @@
-import test from "node:test";
-import assert from "node:assert/strict";
-
-import { pauseAutoForProviderError } from "../provider-error-pause.ts";
-
-test("pauseAutoForProviderError warns and pauses without requiring ctx.log", async () => {
-  const notifications: Array<{ message: string; level: string }> = [];
-  let pauseCalls = 0;
-
-  await pauseAutoForProviderError(
-    {
-      notify(message, level?) {
-        notifications.push({ message, level: level ?? "info" });
-      },
-    },
-    ": terminated",
-    async () => {
-      pauseCalls += 1;
-    },
-  );
-
-  assert.equal(pauseCalls, 1, "should pause auto-mode exactly once");
-  assert.deepEqual(notifications, [
-    {
-      message: "Auto-mode paused due to provider error: terminated",
-      level: "warning",
-    },
-  ]);
-});
-
-test("pauseAutoForProviderError schedules auto-resume for rate limit errors", async () => {
-  const notifications: Array<{ message: string; level: string }> = [];
-  let pauseCalls = 0;
-  let resumeCalled = false;
-
-  // Use fake timer
-  const originalSetTimeout = globalThis.setTimeout;
-  const timers: Array<{ fn: () => void; delay: number }> = [];
-  globalThis.setTimeout = ((fn: () => void, delay: number) => {
-    timers.push({ fn, delay });
-    return 0 as unknown as ReturnType<typeof setTimeout>;
-  }) as typeof setTimeout;
-
-  try {
-    await pauseAutoForProviderError(
-      {
-        notify(message, level?) {
-          notifications.push({ message, level: level ?? "info" });
-        },
-      },
-      ": rate limit exceeded",
-      async () => {
-        pauseCalls += 1;
-      },
-      {
-        isRateLimit: true,
-        retryAfterMs: 90000,
-        resume: () => {
-          resumeCalled = true;
-        },
-      },
-    );
-
-    assert.equal(pauseCalls, 1, "should pause auto-mode");
-    assert.equal(timers.length, 1, "should schedule one timer");
-    assert.equal(timers[0].delay, 90000, "timer should match retryAfterMs");
-    assert.deepEqual(notifications[0], {
-      message: "Rate limited: rate limit exceeded. Auto-resuming in 90s...",
-      level: "warning",
-    });
-
-    // Fire the timer
-    timers[0].fn();
-    assert.equal(resumeCalled, true, "should call resume after timer fires");
-    assert.deepEqual(notifications[1], {
-      message: "Rate limit window elapsed. Resuming auto-mode.",
-      level: "info",
-    });
-  } finally {
-    globalThis.setTimeout = originalSetTimeout;
-  }
-});
-
-test("pauseAutoForProviderError falls back to indefinite pause when not rate limit", async () => {
-  const notifications: Array<{ message: string; level: string }> = [];
-  let pauseCalls = 0;
-
-  await pauseAutoForProviderError(
-    {
-      notify(message, level?) {
-        notifications.push({ message, level: level ?? "info" });
-      },
-    },
-    ": connection refused",
-    async () => {
-      pauseCalls += 1;
-    },
-    {
-      isRateLimit: false,
-    },
-  );
-
-  assert.equal(pauseCalls, 1);
-  assert.deepEqual(notifications, [
-    {
-      message: "Auto-mode paused due to provider error: connection refused",
-      level: "warning",
-    },
-  ]);
-});
--- a/src/resources/extensions/gsd/tests/complexity-routing.test.ts
+++ b/src/resources/extensions/gsd/tests/complexity-routing.test.ts
@ -1,111 +0,0 @@
-/**
- * Complexity Routing — unit tests for M004/S03.
- *
- * Tests complexity classification and dispatch integration.
- * Uses source-level checks for the classifier module and preference wiring.
- */
-
-import test from "node:test";
-import assert from "node:assert/strict";
-import { readFileSync } from "node:fs";
-import { join, dirname } from "node:path";
-import { fileURLToPath } from "node:url";
-
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const preferencesSrc = readFileSync(join(__dirname, "..", "preferences.ts"), "utf-8");
-const complexitySrc = readFileSync(join(__dirname, "..", "complexity-classifier.ts"), "utf-8");
-
-// ═══════════════════════════════════════════════════════════════════════════
-// Model Config — execution_simple
-// ═══════════════════════════════════════════════════════════════════════════
-
-test("preferences: GSDModelConfig includes execution_simple field", () => {
-  const v1Match = preferencesSrc.match(/interface GSDModelConfig\s*\{[^}]*execution_simple/);
-  assert.ok(v1Match, "GSDModelConfig should have execution_simple field");
-  const v2Match = preferencesSrc.match(/interface GSDModelConfigV2\s*\{[^}]*execution_simple/);
-  assert.ok(v2Match, "GSDModelConfigV2 should have execution_simple field");
-});
-
-test("preferences: budget profile sets execution_simple model", () => {
-  const budgetIdx = preferencesSrc.indexOf('case "budget":');
-  const balancedIdx = preferencesSrc.indexOf('case "balanced":');
-  const budgetBlock = preferencesSrc.slice(budgetIdx, balancedIdx);
-  assert.ok(budgetBlock.includes("execution_simple:"), "budget profile should set execution_simple");
-});
-
-test("preferences: resolveModelWithFallbacksForUnit handles execute-task-simple", () => {
-  assert.ok(
-    preferencesSrc.includes('"execute-task-simple"'),
-    "should have execute-task-simple case in model resolution",
-  );
-});
-
-// ═══════════════════════════════════════════════════════════════════════════
-// Classifier Module Structure
-// ═══════════════════════════════════════════════════════════════════════════
-
-test("complexity: module exports classifyUnitComplexity function", () => {
-  assert.ok(
-    complexitySrc.includes("export function classifyUnitComplexity"),
-    "should export classifyUnitComplexity",
-  );
-});
-
-test("complexity: module exports ComplexityTier type", () => {
-  assert.ok(
-    complexitySrc.includes("export type ComplexityTier"),
-    "should export ComplexityTier type",
-  );
-});
-
-test("complexity: module exports tierLabel function", () => {
-  assert.ok(
-    complexitySrc.includes("export function tierLabel"),
-    "should export tierLabel for dashboard display",
-  );
-});
-
-test("complexity: module exports tierOrdinal function", () => {
-  assert.ok(
-    complexitySrc.includes("export function tierOrdinal"),
-    "should export tierOrdinal for tier comparison",
-  );
-});
-
-// ═══════════════════════════════════════════════════════════════════════════
-// Unit Complexity Classification (from #579 — combined)
-// ═══════════════════════════════════════════════════════════════════════════
-
-test("unit-classify: classifyUnitComplexity is exported", () => {
-  assert.ok(
-    complexitySrc.includes("export function classifyUnitComplexity"),
-    "should export classifyUnitComplexity",
-  );
-});
-
-test("unit-classify: unit type tier mapping exists", () => {
-  assert.ok(complexitySrc.includes("UNIT_TYPE_TIERS"), "should have unit type tier mapping");
-  assert.ok(complexitySrc.includes('"complete-slice": "light"'), "complete-slice should be light");
-  assert.ok(complexitySrc.includes('"replan-slice": "heavy"'), "replan-slice should be heavy");
-});
-
-test("unit-classify: hook units default to light", () => {
-  assert.ok(
-    complexitySrc.includes('startsWith("hook/")') && complexitySrc.includes('"light"'),
-    "hook units should default to light tier",
-  );
-});
-
-test("unit-classify: budget pressure has graduated thresholds", () => {
-  assert.ok(complexitySrc.includes("budgetPct >= 0.9"), "should have 90% threshold");
-  assert.ok(complexitySrc.includes("budgetPct >= 0.75"), "should have 75% threshold");
-  assert.ok(complexitySrc.includes("budgetPct < 0.5"), "should skip below 50%");
-});
-
-test("unit-classify: tierLabel function exists", () => {
-  assert.ok(
-    complexitySrc.includes("export function tierLabel") ||
-    complexitySrc.includes("export { tierLabel"),
-    "should export tierLabel for dashboard display",
-  );
-});
--- a/src/resources/extensions/gsd/tests/metrics-io.test.ts
+++ b/src/resources/extensions/gsd/tests/metrics-io.test.ts
@ -1,176 +0,0 @@
-/**
- * Tests for GSD metrics disk I/O — init, snapshot, load/save cycle.
- * Uses a temp directory to avoid touching real .gsd/ state.
- */
-
-import { mkdtempSync, mkdirSync, readFileSync, rmSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-import {
-  initMetrics,
-  resetMetrics,
-  getLedger,
-  snapshotUnitMetrics,
-  type MetricsLedger,
-} from "../metrics.js";
-import { createTestContext } from './test-helpers.ts';
-
-const { assertEq, assertTrue, report } = createTestContext();
-// ─── Setup ────────────────────────────────────────────────────────────────────
-
-const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-test-"));
-mkdirSync(join(tmpBase, ".gsd"), { recursive: true });
-
-// Mock ExtensionContext with session entries
-function mockCtx(messages: any[] = []): any {
-  const entries = messages.map((msg, i) => ({
-    type: "message",
-    id: `entry-${i}`,
-    parentId: i > 0 ? `entry-${i - 1}` : null,
-    timestamp: new Date().toISOString(),
-    message: msg,
-  }));
-  return {
-    sessionManager: {
-      getEntries: () => entries,
-    },
-    model: { id: "claude-sonnet-4-20250514" },
-  };
-}
-
-// ─── Tests ────────────────────────────────────────────────────────────────────
-
-console.log("\n=== initMetrics / getLedger ===");
-
-{
-  resetMetrics();
-  assertTrue(getLedger() === null, "ledger null before init");
-
-  initMetrics(tmpBase);
-  const ledger = getLedger();
-  assertTrue(ledger !== null, "ledger not null after init");
-  assertEq(ledger!.version, 1, "version is 1");
-  assertEq(ledger!.units.length, 0, "no units initially");
-}
-
-console.log("\n=== snapshotUnitMetrics ===");
-
-{
-  resetMetrics();
-  initMetrics(tmpBase);
-
-  // Simulate a session with assistant messages containing usage data
-  const ctx = mockCtx([
-    { role: "user", content: "Do the thing" },
-    {
-      role: "assistant",
-      content: [
-        { type: "text", text: "I'll do the thing" },
-        { type: "tool_call", id: "tc1", name: "bash", input: {} },
-      ],
-      usage: {
-        input: 5000,
-        output: 2000,
-        cacheRead: 3000,
-        cacheWrite: 500,
-        totalTokens: 10500,
-        cost: { input: 0.015, output: 0.03, cacheRead: 0.003, cacheWrite: 0.002, total: 0.05 },
-      },
-    },
-    { role: "toolResult", toolCallId: "tc1", content: [{ type: "text", text: "ok" }] },
-    {
-      role: "assistant",
-      content: [{ type: "text", text: "Done!" }],
-      usage: {
-        input: 8000,
-        output: 1000,
-        cacheRead: 6000,
-        cacheWrite: 200,
-        totalTokens: 15200,
-        cost: { input: 0.024, output: 0.015, cacheRead: 0.006, cacheWrite: 0.001, total: 0.046 },
-      },
-    },
-  ]);
-
-  const unit = snapshotUnitMetrics(ctx, "execute-task", "M001/S01/T01", Date.now() - 5000, "claude-sonnet-4-20250514");
-
-  assertTrue(unit !== null, "unit returned");
-  assertEq(unit!.type, "execute-task", "type");
-  assertEq(unit!.id, "M001/S01/T01", "id");
-  assertEq(unit!.tokens.input, 13000, "input tokens (5000+8000)");
-  assertEq(unit!.tokens.output, 3000, "output tokens (2000+1000)");
-  assertEq(unit!.tokens.cacheRead, 9000, "cacheRead (3000+6000)");
-  assertEq(unit!.tokens.total, 25700, "total tokens (10500+15200)");
-  assertTrue(Math.abs(unit!.cost - 0.096) < 0.001, `cost ~0.096 (got ${unit!.cost})`);
-  assertEq(unit!.toolCalls, 1, "1 tool call");
-  assertEq(unit!.assistantMessages, 2, "2 assistant messages");
-  assertEq(unit!.userMessages, 1, "1 user message");
-
-  // Verify ledger persisted
-  const ledger = getLedger()!;
-  assertEq(ledger.units.length, 1, "1 unit in ledger");
-}
-
-console.log("\n=== Persistence across init/reset cycles ===");
-
-{
-  // Reset and re-init — should load from disk
-  resetMetrics();
-  initMetrics(tmpBase);
-
-  const ledger = getLedger()!;
-  assertEq(ledger.units.length, 1, "unit survived reset+init");
-  assertEq(ledger.units[0].id, "M001/S01/T01", "correct unit ID");
-
-  // Add another unit
-  const ctx = mockCtx([
-    {
-      role: "assistant",
-      content: [{ type: "text", text: "Research complete" }],
-      usage: {
-        input: 3000, output: 1500, cacheRead: 1000, cacheWrite: 300, totalTokens: 5800,
-        cost: { input: 0.009, output: 0.023, cacheRead: 0.001, cacheWrite: 0.001, total: 0.034 },
-      },
-    },
-  ]);
-
-  snapshotUnitMetrics(ctx, "research-slice", "M001/S02", Date.now() - 3000, "claude-sonnet-4-20250514");
-
-  // Verify both units persisted
-  resetMetrics();
-  initMetrics(tmpBase);
-  const final = getLedger()!;
-  assertEq(final.units.length, 2, "2 units after second snapshot");
-}
-
-console.log("\n=== File content verification ===");
-
-{
-  const raw = readFileSync(join(tmpBase, ".gsd", "metrics.json"), "utf-8");
-  const parsed: MetricsLedger = JSON.parse(raw);
-  assertEq(parsed.version, 1, "file version is 1");
-  assertEq(parsed.units.length, 2, "file has 2 units");
-  assertTrue(parsed.projectStartedAt > 0, "projectStartedAt is set");
-}
-
-console.log("\n=== Empty session handling ===");
-
-{
-  resetMetrics();
-  initMetrics(tmpBase);
-
-  // Empty session — no messages
-  const ctx = mockCtx([]);
-  const unit = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", Date.now(), "test-model");
-  assertTrue(unit === null, "returns null for empty session");
-
-  // Ledger shouldn't have grown
-  assertEq(getLedger()!.units.length, 2, "still 2 units (empty session not added)");
-}
-
-// ─── Cleanup ──────────────────────────────────────────────────────────────────
-
-resetMetrics();
-rmSync(tmpBase, { recursive: true, force: true });
-
-report();
--- a/src/resources/extensions/gsd/tests/metrics.test.ts
+++ b/src/resources/extensions/gsd/tests/metrics.test.ts
@ -1,12 +1,17 @@
 /**
- * Tests for GSD metrics aggregation logic.
- * Tests the pure functions — no file I/O, no extension context.
+ * Metrics tests — consolidated from:
+ *   - metrics.test.ts (pure aggregation functions, formatting)
+ *   - metrics-io.test.ts (disk I/O, init, snapshot, persistence)
 */

+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, readFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
 import {
  type UnitMetrics,
-  type TokenCounts,
-  type BudgetInfo,
+  type MetricsLedger,
  classifyUnitPhase,
  aggregateByPhase,
  aggregateBySlice,
@ -14,10 +19,13 @@ import {
  getProjectTotals,
  formatCost,
  formatTokenCount,
+  initMetrics,
+  resetMetrics,
+  getLedger,
+  snapshotUnitMetrics,
 } from "../metrics.js";
-import { createTestContext } from './test-helpers.ts';

-// ─── Test helpers ─────────────────────────────────────────────────────────────
+// ── Helpers ──────────────────────────────────────────────────────────────────

 function makeUnit(overrides: Partial<UnitMetrics> = {}): UnitMetrics {
  return {
@ -35,59 +43,72 @@ function makeUnit(overrides: Partial<UnitMetrics> = {}): UnitMetrics {
  };
 }

-const { assertEq, assertTrue, report } = createTestContext();
-
-function assertClose(actual: number, expected: number, tolerance: number, message: string): void {
-  assertTrue(Math.abs(actual - expected) <= tolerance, `${message} — expected ~${expected}, got ${actual}`);
+function mockCtx(messages: any[] = []): any {
+  const entries = messages.map((msg, i) => ({
+    type: "message", id: `entry-${i}`,
+    parentId: i > 0 ? `entry-${i - 1}` : null,
+    timestamp: new Date().toISOString(), message: msg,
+  }));
+  return { sessionManager: { getEntries: () => entries }, model: { id: "claude-sonnet-4-20250514" } };
 }

-// ─── Phase classification ─────────────────────────────────────────────────────
+// ── Phase classification ─────────────────────────────────────────────────────

-console.log("\n=== classifyUnitPhase ===");
+test("classifyUnitPhase maps unit types to phases", () => {
+  assert.equal(classifyUnitPhase("research-milestone"), "research");
+  assert.equal(classifyUnitPhase("research-slice"), "research");
+  assert.equal(classifyUnitPhase("plan-milestone"), "planning");
+  assert.equal(classifyUnitPhase("plan-slice"), "planning");
+  assert.equal(classifyUnitPhase("execute-task"), "execution");
+  assert.equal(classifyUnitPhase("complete-slice"), "completion");
+  assert.equal(classifyUnitPhase("reassess-roadmap"), "reassessment");
+  assert.equal(classifyUnitPhase("unknown-thing"), "execution");
+});

-assertEq(classifyUnitPhase("research-milestone"), "research", "research-milestone → research");
-assertEq(classifyUnitPhase("research-slice"), "research", "research-slice → research");
-assertEq(classifyUnitPhase("plan-milestone"), "planning", "plan-milestone → planning");
-assertEq(classifyUnitPhase("plan-slice"), "planning", "plan-slice → planning");
-assertEq(classifyUnitPhase("execute-task"), "execution", "execute-task → execution");
-assertEq(classifyUnitPhase("complete-slice"), "completion", "complete-slice → completion");
-assertEq(classifyUnitPhase("reassess-roadmap"), "reassessment", "reassess-roadmap → reassessment");
-assertEq(classifyUnitPhase("unknown-thing"), "execution", "unknown → execution (fallback)");
+// ── getProjectTotals ─────────────────────────────────────────────────────────

-// ─── getProjectTotals ─────────────────────────────────────────────────────────
-
-console.log("\n=== getProjectTotals ===");
-
-{
+test("getProjectTotals aggregates tokens, cost, duration, and tool calls", () => {
  const units = [
    makeUnit({ tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 }, cost: 0.05, toolCalls: 3, startedAt: 1000, finishedAt: 2000 }),
    makeUnit({ tokens: { input: 2000, output: 1000, cacheRead: 400, cacheWrite: 200, total: 3600 }, cost: 0.10, toolCalls: 5, startedAt: 2000, finishedAt: 4000 }),
  ];
  const totals = getProjectTotals(units);
+  assert.equal(totals.units, 2);
+  assert.equal(totals.tokens.input, 3000);
+  assert.equal(totals.tokens.output, 1500);
+  assert.equal(totals.tokens.total, 5400);
+  assert.ok(Math.abs(totals.cost - 0.15) < 0.001);
+  assert.equal(totals.toolCalls, 8);
+  assert.equal(totals.duration, 3000);
+});

-  assertEq(totals.units, 2, "total units");
-  assertEq(totals.tokens.input, 3000, "total input tokens");
-  assertEq(totals.tokens.output, 1500, "total output tokens");
-  assertEq(totals.tokens.cacheRead, 600, "total cacheRead");
-  assertEq(totals.tokens.cacheWrite, 300, "total cacheWrite");
-  assertEq(totals.tokens.total, 5400, "total tokens");
-  assertClose(totals.cost, 0.15, 0.001, "total cost");
-  assertEq(totals.toolCalls, 8, "total tool calls");
-  assertEq(totals.duration, 3000, "total duration");
-}
-
-{
+test("getProjectTotals handles empty input", () => {
  const totals = getProjectTotals([]);
-  assertEq(totals.units, 0, "empty: zero units");
-  assertEq(totals.cost, 0, "empty: zero cost");
-  assertEq(totals.tokens.total, 0, "empty: zero tokens");
-}
+  assert.equal(totals.units, 0);
+  assert.equal(totals.cost, 0);
+  assert.equal(totals.tokens.total, 0);
+});

-// ─── aggregateByPhase ─────────────────────────────────────────────────────────
+test("getProjectTotals aggregates budget fields", () => {
+  const units = [
+    makeUnit({ truncationSections: 3, continueHereFired: true }),
+    makeUnit({ truncationSections: 2, continueHereFired: false }),
+    makeUnit({ truncationSections: 1, continueHereFired: true }),
+  ];
+  const totals = getProjectTotals(units);
+  assert.equal(totals.totalTruncationSections, 6);
+  assert.equal(totals.continueHereFiredCount, 2);
+});

-console.log("\n=== aggregateByPhase ===");
+test("getProjectTotals defaults budget fields to 0 for old units", () => {
+  const totals = getProjectTotals([makeUnit(), makeUnit()]);
+  assert.equal(totals.totalTruncationSections, 0);
+  assert.equal(totals.continueHereFiredCount, 0);
+});

-{
+// ── aggregateByPhase ─────────────────────────────────────────────────────────
+
+test("aggregateByPhase groups units by phase and sums costs", () => {
  const units = [
    makeUnit({ type: "research-milestone", cost: 0.02 }),
    makeUnit({ type: "research-slice", cost: 0.03 }),
@ -99,28 +120,17 @@ console.log("\n=== aggregateByPhase ===");
    makeUnit({ type: "reassess-roadmap", cost: 0.005 }),
  ];
  const phases = aggregateByPhase(units);
+  assert.equal(phases.length, 5);
+  assert.equal(phases[0].phase, "research");
+  assert.equal(phases[0].units, 2);
+  assert.ok(Math.abs(phases[0].cost - 0.05) < 0.001);
+  assert.equal(phases[2].phase, "execution");
+  assert.ok(Math.abs(phases[2].cost - 0.18) < 0.001);
+});

-  assertEq(phases.length, 5, "5 phases");
-  assertEq(phases[0].phase, "research", "first phase is research");
-  assertEq(phases[0].units, 2, "2 research units");
-  assertClose(phases[0].cost, 0.05, 0.001, "research cost");
+// ── aggregateBySlice ─────────────────────────────────────────────────────────

-  assertEq(phases[1].phase, "planning", "second phase is planning");
-  assertEq(phases[1].units, 2, "2 planning units");
-
-  assertEq(phases[2].phase, "execution", "third phase is execution");
-  assertEq(phases[2].units, 2, "2 execution units");
-  assertClose(phases[2].cost, 0.18, 0.001, "execution cost");
-
-  assertEq(phases[3].phase, "completion", "fourth phase is completion");
-  assertEq(phases[4].phase, "reassessment", "fifth phase is reassessment");
-}
-
-// ─── aggregateBySlice ─────────────────────────────────────────────────────────
-
-console.log("\n=== aggregateBySlice ===");
-
-{
+test("aggregateBySlice groups units by slice ID", () => {
  const units = [
    makeUnit({ id: "M001/S01/T01", cost: 0.05 }),
    makeUnit({ id: "M001/S01/T02", cost: 0.04 }),
@ -128,258 +138,116 @@ console.log("\n=== aggregateBySlice ===");
    makeUnit({ id: "M001", type: "research-milestone", cost: 0.02 }),
  ];
  const slices = aggregateBySlice(units);
-
-  assertEq(slices.length, 3, "3 slice groups");
-
+  assert.equal(slices.length, 3);
  const s01 = slices.find(s => s.sliceId === "M001/S01");
-  assertTrue(!!s01, "M001/S01 exists");
-  assertEq(s01!.units, 2, "M001/S01 has 2 units");
-  assertClose(s01!.cost, 0.09, 0.001, "M001/S01 cost");
+  assert.ok(s01);
+  assert.equal(s01!.units, 2);
+  assert.ok(Math.abs(s01!.cost - 0.09) < 0.001);
+});

-  const s02 = slices.find(s => s.sliceId === "M001/S02");
-  assertTrue(!!s02, "M001/S02 exists");
-  assertEq(s02!.units, 1, "M001/S02 has 1 unit");
+// ── aggregateByModel ─────────────────────────────────────────────────────────

-  const mLevel = slices.find(s => s.sliceId === "M001");
-  assertTrue(!!mLevel, "M001 (milestone-level) exists");
-}
-
-// ─── aggregateByModel ─────────────────────────────────────────────────────────
-
-console.log("\n=== aggregateByModel ===");
-
-{
+test("aggregateByModel groups by model sorted by cost desc", () => {
  const units = [
    makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.05 }),
    makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.04 }),
    makeUnit({ model: "claude-opus-4-20250514", cost: 0.30 }),
  ];
  const models = aggregateByModel(units);
+  assert.equal(models.length, 2);
+  assert.equal(models[0].model, "claude-opus-4-20250514");
+  assert.equal(models[1].units, 2);
+});

-  assertEq(models.length, 2, "2 models");
-  // Sorted by cost desc — opus should be first
-  assertEq(models[0].model, "claude-opus-4-20250514", "opus first (higher cost)");
-  assertClose(models[0].cost, 0.30, 0.001, "opus cost");
-  assertEq(models[1].model, "claude-sonnet-4-20250514", "sonnet second");
-  assertEq(models[1].units, 2, "sonnet has 2 units");
-}
-
-// ─── formatCost ───────────────────────────────────────────────────────────────
-
-console.log("\n=== formatCost ===");
-
-assertEq(formatCost(0), "$0.0000", "zero cost");
-assertEq(formatCost(0.001), "$0.0010", "sub-cent cost");
-assertEq(formatCost(0.05), "$0.050", "5 cents");
-assertEq(formatCost(1.50), "$1.50", "dollar+");
-assertEq(formatCost(14.20), "$14.20", "double digits");
-
-// ─── formatTokenCount ─────────────────────────────────────────────────────────
-
-console.log("\n=== formatTokenCount ===");
-
-assertEq(formatTokenCount(0), "0", "zero tokens");
-assertEq(formatTokenCount(500), "500", "sub-k");
-assertEq(formatTokenCount(1500), "1.5k", "1.5k");
-assertEq(formatTokenCount(150000), "150.0k", "150k");
-assertEq(formatTokenCount(1500000), "1.50M", "1.5M");
-
-// ─── Backward compat: UnitMetrics without budget fields ───────────────────────
-
-console.log("\n=== Backward compat: UnitMetrics without budget fields ===");
-
-{
-  // Simulate old metrics.json data — no budget fields present
-  const oldUnit: UnitMetrics = {
-    type: "execute-task",
-    id: "M001/S01/T01",
-    model: "claude-sonnet-4-20250514",
-    startedAt: 1000,
-    finishedAt: 2000,
-    tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 },
-    cost: 0.05,
-    toolCalls: 3,
-    assistantMessages: 2,
-    userMessages: 1,
-  };
-
-  // All aggregation functions must work with old data
-  const phases = aggregateByPhase([oldUnit]);
-  assertEq(phases.length, 1, "backward compat: aggregateByPhase works");
-  assertEq(phases[0].phase, "execution", "backward compat: correct phase");
-
-  const slices = aggregateBySlice([oldUnit]);
-  assertEq(slices.length, 1, "backward compat: aggregateBySlice works");
-  assertEq(slices[0].sliceId, "M001/S01", "backward compat: correct sliceId");
-
-  const models = aggregateByModel([oldUnit]);
-  assertEq(models.length, 1, "backward compat: aggregateByModel works");
-
-  const totals = getProjectTotals([oldUnit]);
-  assertEq(totals.units, 1, "backward compat: getProjectTotals works");
-  assertClose(totals.cost, 0.05, 0.001, "backward compat: cost preserved");
-
-  // Budget fields should be undefined
-  assertEq(oldUnit.contextWindowTokens, undefined, "backward compat: no contextWindowTokens");
-  assertEq(oldUnit.truncationSections, undefined, "backward compat: no truncationSections");
-  assertEq(oldUnit.continueHereFired, undefined, "backward compat: no continueHereFired");
-}
-
-// ─── UnitMetrics with budget fields populated ─────────────────────────────────
-
-console.log("\n=== UnitMetrics with budget fields ===");
-
-{
-  const unitWithBudget: UnitMetrics = {
-    type: "execute-task",
-    id: "M002/S01/T03",
-    model: "claude-sonnet-4-20250514",
-    startedAt: 5000,
-    finishedAt: 10000,
-    tokens: { input: 3000, output: 1500, cacheRead: 600, cacheWrite: 300, total: 5400 },
-    cost: 0.12,
-    toolCalls: 8,
-    assistantMessages: 4,
-    userMessages: 3,
-    contextWindowTokens: 200000,
-    truncationSections: 3,
-    continueHereFired: true,
-  };
-
-  // Budget fields are present
-  assertEq(unitWithBudget.contextWindowTokens, 200000, "budget: contextWindowTokens present");
-  assertEq(unitWithBudget.truncationSections, 3, "budget: truncationSections present");
-  assertEq(unitWithBudget.continueHereFired, true, "budget: continueHereFired present");
-
-  // Aggregation still works correctly with budget fields present
-  const phases = aggregateByPhase([unitWithBudget]);
-  assertEq(phases.length, 1, "budget: aggregateByPhase works");
-  assertClose(phases[0].cost, 0.12, 0.001, "budget: cost aggregated correctly");
-
-  const slices = aggregateBySlice([unitWithBudget]);
-  assertEq(slices.length, 1, "budget: aggregateBySlice works");
-  assertEq(slices[0].sliceId, "M002/S01", "budget: sliceId correct");
-
-  const models = aggregateByModel([unitWithBudget]);
-  assertEq(models.length, 1, "budget: aggregateByModel works");
-
-  const totals = getProjectTotals([unitWithBudget]);
-  assertEq(totals.units, 1, "budget: getProjectTotals works");
-  assertEq(totals.toolCalls, 8, "budget: toolCalls aggregated");
-
-  // Mix old and new units together
-  const oldUnit = makeUnit(); // no budget fields
-  const mixed = [oldUnit, unitWithBudget];
-  const mixedTotals = getProjectTotals(mixed);
-  assertEq(mixedTotals.units, 2, "mixed: 2 units total");
-  assertClose(mixedTotals.cost, 0.17, 0.001, "mixed: costs summed correctly");
-
-  const mixedPhases = aggregateByPhase(mixed);
-  assertEq(mixedPhases.length, 1, "mixed: both are execution phase");
-  assertEq(mixedPhases[0].units, 2, "mixed: both counted");
-}
-
-// ─── aggregateByModel: contextWindowTokens pick logic ─────────────────────────
-
-console.log("\n=== aggregateByModel: contextWindowTokens pick logic ===");
-
-{
-  // Single unit with contextWindowTokens — aggregate picks it
-  const units = [
-    makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.05 }),
-  ];
-  const models = aggregateByModel(units);
-  assertEq(models.length, 1, "ctxWindow: one model");
-  assertEq(models[0].contextWindowTokens, 200000, "ctxWindow: picks value from unit");
-}
-
-{
-  // Two units same model with different context windows — first defined value wins
+test("aggregateByModel picks first defined contextWindowTokens", () => {
  const units = [
    makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.05 }),
    makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 150000, cost: 0.04 }),
  ];
  const models = aggregateByModel(units);
-  assertEq(models.length, 1, "ctxWindow first-wins: one model");
-  assertEq(models[0].contextWindowTokens, 200000, "ctxWindow first-wins: first value kept");
-}
+  assert.equal(models[0].contextWindowTokens, 200000);
+});

-{
-  // First unit undefined, second has value — second is picked
-  const units = [
-    makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.05 }),
-    makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.04 }),
-  ];
-  const models = aggregateByModel(units);
-  assertEq(models[0].contextWindowTokens, 200000, "ctxWindow: picks first defined, not first unit");
-}
+// ── Formatting ───────────────────────────────────────────────────────────────

-{
-  // Old units without contextWindowTokens — aggregate has undefined
-  const units = [
-    makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.05 }),
-    makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.04 }),
-  ];
-  const models = aggregateByModel(units);
-  assertEq(models[0].contextWindowTokens, undefined, "ctxWindow: undefined when no unit has it");
-}
+test("formatCost formats dollar amounts correctly", () => {
+  assert.equal(formatCost(0), "$0.0000");
+  assert.equal(formatCost(0.001), "$0.0010");
+  assert.equal(formatCost(0.05), "$0.050");
+  assert.equal(formatCost(1.50), "$1.50");
+  assert.equal(formatCost(14.20), "$14.20");
+});

-{
-  // Multiple models — each gets its own context window
-  const units = [
-    makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.05 }),
-    makeUnit({ model: "claude-opus-4-20250514", contextWindowTokens: 200000, cost: 0.30 }),
-  ];
-  const models = aggregateByModel(units);
-  assertEq(models.length, 2, "ctxWindow multi-model: 2 models");
-  const opus = models.find(m => m.model === "claude-opus-4-20250514");
-  const sonnet = models.find(m => m.model === "claude-sonnet-4-20250514");
-  assertEq(opus!.contextWindowTokens, 200000, "ctxWindow multi-model: opus has value");
-  assertEq(sonnet!.contextWindowTokens, 200000, "ctxWindow multi-model: sonnet has value");
-}
+test("formatTokenCount uses k/M suffixes", () => {
+  assert.equal(formatTokenCount(0), "0");
+  assert.equal(formatTokenCount(500), "500");
+  assert.equal(formatTokenCount(1500), "1.5k");
+  assert.equal(formatTokenCount(150000), "150.0k");
+  assert.equal(formatTokenCount(1500000), "1.50M");
+});

-// ─── getProjectTotals: budget field aggregation ───────────────────────────────
+// ── Backward compatibility ───────────────────────────────────────────────────

-console.log("\n=== getProjectTotals: budget field aggregation ===");
+test("old UnitMetrics without budget fields work with all aggregation functions", () => {
+  const oldUnit = makeUnit();
+  assert.equal(aggregateByPhase([oldUnit]).length, 1);
+  assert.equal(aggregateBySlice([oldUnit]).length, 1);
+  assert.equal(aggregateByModel([oldUnit]).length, 1);
+  assert.equal(getProjectTotals([oldUnit]).units, 1);
+  assert.equal(oldUnit.contextWindowTokens, undefined);
+});

-{
-  // Units with truncationSections and continueHereFired — verify sums/counts
-  const units = [
-    makeUnit({ truncationSections: 3, continueHereFired: true }),
-    makeUnit({ truncationSections: 2, continueHereFired: false }),
-    makeUnit({ truncationSections: 1, continueHereFired: true }),
-  ];
-  const totals = getProjectTotals(units);
-  assertEq(totals.totalTruncationSections, 6, "budget totals: truncation sections summed");
-  assertEq(totals.continueHereFiredCount, 2, "budget totals: continueHereFired counted");
-}
+// ── Disk I/O ─────────────────────────────────────────────────────────────────

-{
-  // Old units without budget fields — verify 0 defaults
-  const units = [makeUnit(), makeUnit()];
-  const totals = getProjectTotals(units);
-  assertEq(totals.totalTruncationSections, 0, "budget totals backward compat: truncation = 0");
-  assertEq(totals.continueHereFiredCount, 0, "budget totals backward compat: continueHere = 0");
-}
+test("initMetrics creates ledger, snapshotUnitMetrics persists across resets", () => {
+  const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-test-"));
+  mkdirSync(join(tmpBase, ".gsd"), { recursive: true });

-{
-  // Mixed old and new units
-  const units = [
-    makeUnit(), // old, no budget fields
-    makeUnit({ truncationSections: 5, continueHereFired: true }),
-  ];
-  const totals = getProjectTotals(units);
-  assertEq(totals.totalTruncationSections, 5, "budget totals mixed: only new unit contributes");
-  assertEq(totals.continueHereFiredCount, 1, "budget totals mixed: only one fired");
-}
+  try {
+    resetMetrics();
+    assert.equal(getLedger(), null);

-{
-  // Empty input — safe defaults
-  const totals = getProjectTotals([]);
-  assertEq(totals.totalTruncationSections, 0, "budget totals empty: truncation = 0");
-  assertEq(totals.continueHereFiredCount, 0, "budget totals empty: continueHere = 0");
-}
+    initMetrics(tmpBase);
+    const ledger = getLedger();
+    assert.ok(ledger);
+    assert.equal(ledger!.version, 1);
+    assert.equal(ledger!.units.length, 0);

-// ─── Summary ──────────────────────────────────────────────────────────────────
+    // Snapshot a unit
+    const ctx = mockCtx([
+      { role: "user", content: "Do the thing" },
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "Done" }],
+        usage: {
+          input: 5000, output: 2000, cacheRead: 3000, cacheWrite: 500, totalTokens: 10500,
+          cost: { input: 0.015, output: 0.03, cacheRead: 0.003, cacheWrite: 0.002, total: 0.05 },
+        },
+      },
+    ]);
+    const unit = snapshotUnitMetrics(ctx, "execute-task", "M001/S01/T01", Date.now() - 5000, "claude-sonnet-4-20250514");
+    assert.ok(unit);
+    assert.equal(unit!.type, "execute-task");
+    assert.equal(unit!.tokens.input, 5000);

-report();
+    // Persist and reload
+    resetMetrics();
+    initMetrics(tmpBase);
+    assert.equal(getLedger()!.units.length, 1);
+    assert.equal(getLedger()!.units[0].id, "M001/S01/T01");
+
+    // Verify file content
+    const raw = readFileSync(join(tmpBase, ".gsd", "metrics.json"), "utf-8");
+    const parsed: MetricsLedger = JSON.parse(raw);
+    assert.equal(parsed.version, 1);
+    assert.equal(parsed.units.length, 1);
+
+    // Empty session returns null
+    const emptyUnit = snapshotUnitMetrics(mockCtx([]), "plan-slice", "M001/S01", Date.now(), "test-model");
+    assert.equal(emptyUnit, null);
+    assert.equal(getLedger()!.units.length, 1);
+  } finally {
+    resetMetrics();
+    rmSync(tmpBase, { recursive: true, force: true });
+  }
+});
--- a/src/resources/extensions/gsd/tests/network-error-fallback.test.ts
+++ b/src/resources/extensions/gsd/tests/network-error-fallback.test.ts
@ -1,104 +0,0 @@
-import test from "node:test";
-import assert from "node:assert/strict";
-
-// Instead of trying to mock out the entire `index.ts` extension initialization which touches
-// the disk and parses files, we test the logic via the standard test methods, or we can
-// just test that `resolveModelWithFallbacksForUnit` returns the correct format since
-// the fallback rotation logic itself was verified manually.
-
-import { getNextFallbackModel, isTransientNetworkError } from "../preferences.ts";
-
-test("getNextFallbackModel selects next fallback if current is a fallback", () => {
-    const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] };
-    const currentModelId = "model-b";
-
-    const nextModelId = getNextFallbackModel(currentModelId, modelConfig);
-
-    assert.equal(nextModelId, "model-c", "should select next model after current fallback");
-});
-
-test("getNextFallbackModel returns undefined if fallbacks exhausted", () => {
-    const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] };
-    const currentModelId = "model-c";
-
-    const nextModelId = getNextFallbackModel(currentModelId, modelConfig);
-
-    assert.equal(nextModelId, undefined, "should return undefined when exhausted");
-});
-
-test("getNextFallbackModel finds current model when formatted with provider", () => {
-    const modelConfig = { primary: "p/model-a", fallbacks: ["p/model-b"] };
-    const currentModelId = "model-a"; // context model doesn't always have provider in ID
-
-    const nextModelId = getNextFallbackModel(currentModelId, modelConfig);
-
-    assert.equal(nextModelId, "p/model-b", "should select next model after current with provider format");
-});
-
-test("getNextFallbackModel returns primary if current model is not in the list", () => {
-    const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] };
-    const currentModelId = "model-x"; // completely different model manually selected
-
-    const nextModelId = getNextFallbackModel(currentModelId, modelConfig);
-
-    assert.equal(nextModelId, "model-a", "should default to primary if current is unknown");
-});
-
-test("getNextFallbackModel returns primary if current model is undefined", () => {
-    const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] };
-    const currentModelId = undefined;
-
-    const nextModelId = getNextFallbackModel(currentModelId, modelConfig);
-
-    assert.equal(nextModelId, "model-a", "should default to primary if current is undefined");
-});
-
-// ── isTransientNetworkError tests ────────────────────────────────────────────
-
-test("isTransientNetworkError detects ECONNRESET", () => {
-    assert.ok(isTransientNetworkError("fetch failed: ECONNRESET"));
-});
-
-test("isTransientNetworkError detects ETIMEDOUT", () => {
-    assert.ok(isTransientNetworkError("ETIMEDOUT: request timed out"));
-});
-
-test("isTransientNetworkError detects generic network error", () => {
-    assert.ok(isTransientNetworkError("network error"));
-});
-
-test("isTransientNetworkError detects socket hang up", () => {
-    assert.ok(isTransientNetworkError("socket hang up"));
-});
-
-test("isTransientNetworkError detects fetch failed", () => {
-    assert.ok(isTransientNetworkError("fetch failed"));
-});
-
-test("isTransientNetworkError detects connection reset", () => {
-    assert.ok(isTransientNetworkError("connection was reset by peer"));
-});
-
-test("isTransientNetworkError detects DNS errors", () => {
-    assert.ok(isTransientNetworkError("dns resolution failed"));
-});
-
-test("isTransientNetworkError rejects auth errors", () => {
-    assert.ok(!isTransientNetworkError("unauthorized: invalid API key"));
-});
-
-test("isTransientNetworkError rejects quota errors", () => {
-    assert.ok(!isTransientNetworkError("quota exceeded"));
-});
-
-test("isTransientNetworkError rejects billing errors", () => {
-    assert.ok(!isTransientNetworkError("billing issue: network payment required"));
-});
-
-test("isTransientNetworkError rejects empty string", () => {
-    assert.ok(!isTransientNetworkError(""));
-});
-
-test("isTransientNetworkError rejects non-network errors", () => {
-    assert.ok(!isTransientNetworkError("model not found"));
-});
--- a/src/resources/extensions/gsd/tests/provider-error-classify.test.ts
+++ b/src/resources/extensions/gsd/tests/provider-error-classify.test.ts
@ -1,95 +0,0 @@
-import test from "node:test";
-import assert from "node:assert/strict";
-import { classifyProviderError } from "../provider-error-pause.ts";
-
-// ── Rate limit detection ─────────────────────────────────────────────────────
-
-test("classifyProviderError detects rate limit from 429", () => {
-  const result = classifyProviderError("HTTP 429 Too Many Requests");
-  assert.ok(result.isTransient);
-  assert.ok(result.isRateLimit);
-  assert.ok(result.suggestedDelayMs > 0);
-});
-
-test("classifyProviderError detects rate limit from message", () => {
-  const result = classifyProviderError("rate limit exceeded");
-  assert.ok(result.isTransient);
-  assert.ok(result.isRateLimit);
-});
-
-test("classifyProviderError extracts reset delay from message", () => {
-  const result = classifyProviderError("rate limit exceeded, reset in 45s");
-  assert.equal(result.suggestedDelayMs, 45000);
-});
-
-test("classifyProviderError defaults to 60s for rate limit without reset", () => {
-  const result = classifyProviderError("too many requests");
-  assert.equal(result.suggestedDelayMs, 60000);
-});
-
-// ── Server error detection ───────────────────────────────────────────────────
-
-test("classifyProviderError detects Anthropic internal server error", () => {
-  const msg = '{"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"}}';
-  const result = classifyProviderError(msg);
-  assert.ok(result.isTransient, "should be transient");
-  assert.ok(!result.isRateLimit, "should not be rate limit");
-  assert.equal(result.suggestedDelayMs, 30000, "should suggest 30s delay");
-});
-
-test("classifyProviderError detects overloaded error", () => {
-  const result = classifyProviderError("overloaded_error: Overloaded");
-  assert.ok(result.isTransient);
-  assert.equal(result.suggestedDelayMs, 30000);
-});
-
-test("classifyProviderError detects 503 service unavailable", () => {
-  const result = classifyProviderError("503 Service Unavailable");
-  assert.ok(result.isTransient);
-});
-
-test("classifyProviderError detects 502 bad gateway", () => {
-  const result = classifyProviderError("502 Bad Gateway");
-  assert.ok(result.isTransient);
-});
-
-// ── Permanent error detection ────────────────────────────────────────────────
-
-test("classifyProviderError detects auth error as permanent", () => {
-  const result = classifyProviderError("unauthorized: invalid API key");
-  assert.ok(!result.isTransient);
-  assert.ok(!result.isRateLimit);
-  assert.equal(result.suggestedDelayMs, 0);
-});
-
-test("classifyProviderError detects billing error as permanent", () => {
-  const result = classifyProviderError("billing issue: payment required");
-  assert.ok(!result.isTransient);
-});
-
-test("classifyProviderError detects quota exceeded as permanent", () => {
-  const result = classifyProviderError("quota exceeded for this account");
-  assert.ok(!result.isTransient);
-});
-
-// ── Unknown errors ───────────────────────────────────────────────────────────
-
-test("classifyProviderError treats unknown error as permanent", () => {
-  const result = classifyProviderError("something went wrong");
-  assert.ok(!result.isTransient);
-  assert.equal(result.suggestedDelayMs, 0);
-});
-
-test("classifyProviderError treats empty string as permanent", () => {
-  const result = classifyProviderError("");
-  assert.ok(!result.isTransient);
-});
-
-// ── Edge: rate limit + auth (rate limit wins) ────────────────────────────────
-
-test("classifyProviderError: rate limit takes precedence over auth keywords", () => {
-  // Edge case: "rate limit" in message that also mentions auth
-  const result = classifyProviderError("rate limit on auth endpoint");
-  assert.ok(result.isTransient);
-  assert.ok(result.isRateLimit);
-});
--- a/src/resources/extensions/gsd/tests/provider-errors.test.ts
+++ b/src/resources/extensions/gsd/tests/provider-errors.test.ts
@ -0,0 +1,245 @@
+/**
+ * Provider error handling tests — consolidated from:
+ *   - provider-error-classify.test.ts (classifyProviderError)
+ *   - network-error-fallback.test.ts (isTransientNetworkError, getNextFallbackModel)
+ *   - agent-end-provider-error.test.ts (pauseAutoForProviderError)
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { classifyProviderError, pauseAutoForProviderError } from "../provider-error-pause.ts";
+import { getNextFallbackModel, isTransientNetworkError } from "../preferences.ts";
+
+// ── classifyProviderError ────────────────────────────────────────────────────
+
+test("classifyProviderError detects rate limit from 429", () => {
+  const result = classifyProviderError("HTTP 429 Too Many Requests");
+  assert.ok(result.isTransient);
+  assert.ok(result.isRateLimit);
+  assert.ok(result.suggestedDelayMs > 0);
+});
+
+test("classifyProviderError detects rate limit from message", () => {
+  const result = classifyProviderError("rate limit exceeded");
+  assert.ok(result.isTransient);
+  assert.ok(result.isRateLimit);
+});
+
+test("classifyProviderError extracts reset delay from message", () => {
+  const result = classifyProviderError("rate limit exceeded, reset in 45s");
+  assert.ok(result.isRateLimit);
+  assert.equal(result.suggestedDelayMs, 45000);
+});
+
+test("classifyProviderError defaults to 60s for rate limit without reset", () => {
+  const result = classifyProviderError("429 too many requests");
+  assert.ok(result.isRateLimit);
+  assert.equal(result.suggestedDelayMs, 60_000);
+});
+
+test("classifyProviderError detects Anthropic internal server error", () => {
+  const msg = '{"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"}}';
+  const result = classifyProviderError(msg);
+  assert.ok(result.isTransient);
+  assert.ok(!result.isRateLimit);
+  assert.equal(result.suggestedDelayMs, 30_000);
+});
+
+test("classifyProviderError detects overloaded error", () => {
+  const result = classifyProviderError("overloaded_error: Overloaded");
+  assert.ok(result.isTransient);
+  assert.equal(result.suggestedDelayMs, 30_000);
+});
+
+test("classifyProviderError detects 503 service unavailable", () => {
+  const result = classifyProviderError("HTTP 503 Service Unavailable");
+  assert.ok(result.isTransient);
+});
+
+test("classifyProviderError detects 502 bad gateway", () => {
+  const result = classifyProviderError("HTTP 502 Bad Gateway");
+  assert.ok(result.isTransient);
+});
+
+test("classifyProviderError detects auth error as permanent", () => {
+  const result = classifyProviderError("unauthorized: invalid API key");
+  assert.ok(!result.isTransient);
+  assert.ok(!result.isRateLimit);
+});
+
+test("classifyProviderError detects billing error as permanent", () => {
+  const result = classifyProviderError("billing issue: payment required");
+  assert.ok(!result.isTransient);
+});
+
+test("classifyProviderError detects quota exceeded as permanent", () => {
+  const result = classifyProviderError("quota exceeded for this month");
+  assert.ok(!result.isTransient);
+});
+
+test("classifyProviderError treats unknown error as permanent", () => {
+  const result = classifyProviderError("something went wrong");
+  assert.ok(!result.isTransient);
+});
+
+test("classifyProviderError treats empty string as permanent", () => {
+  const result = classifyProviderError("");
+  assert.ok(!result.isTransient);
+});
+
+test("classifyProviderError: rate limit takes precedence over auth keywords", () => {
+  const result = classifyProviderError("429 unauthorized rate limit");
+  assert.ok(result.isRateLimit);
+  assert.ok(result.isTransient);
+});
+
+// ── isTransientNetworkError ──────────────────────────────────────────────────
+
+test("isTransientNetworkError detects ECONNRESET", () => {
+  assert.ok(isTransientNetworkError("fetch failed: ECONNRESET"));
+});
+
+test("isTransientNetworkError detects ETIMEDOUT", () => {
+  assert.ok(isTransientNetworkError("ETIMEDOUT: request timed out"));
+});
+
+test("isTransientNetworkError detects generic network error", () => {
+  assert.ok(isTransientNetworkError("network error"));
+});
+
+test("isTransientNetworkError detects socket hang up", () => {
+  assert.ok(isTransientNetworkError("socket hang up"));
+});
+
+test("isTransientNetworkError detects fetch failed", () => {
+  assert.ok(isTransientNetworkError("fetch failed"));
+});
+
+test("isTransientNetworkError detects connection reset", () => {
+  assert.ok(isTransientNetworkError("connection was reset by peer"));
+});
+
+test("isTransientNetworkError detects DNS errors", () => {
+  assert.ok(isTransientNetworkError("dns resolution failed"));
+});
+
+test("isTransientNetworkError rejects auth errors", () => {
+  assert.ok(!isTransientNetworkError("unauthorized: invalid API key"));
+});
+
+test("isTransientNetworkError rejects quota errors", () => {
+  assert.ok(!isTransientNetworkError("quota exceeded"));
+});
+
+test("isTransientNetworkError rejects billing errors", () => {
+  assert.ok(!isTransientNetworkError("billing issue: network payment required"));
+});
+
+test("isTransientNetworkError rejects empty string", () => {
+  assert.ok(!isTransientNetworkError(""));
+});
+
+test("isTransientNetworkError rejects non-network errors", () => {
+  assert.ok(!isTransientNetworkError("model not found"));
+});
+
+// ── getNextFallbackModel ─────────────────────────────────────────────────────
+
+test("getNextFallbackModel selects next fallback if current is a fallback", () => {
+  const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] };
+  assert.equal(getNextFallbackModel("model-b", modelConfig), "model-c");
+});
+
+test("getNextFallbackModel returns undefined if fallbacks exhausted", () => {
+  const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] };
+  assert.equal(getNextFallbackModel("model-c", modelConfig), undefined);
+});
+
+test("getNextFallbackModel finds current model with provider prefix", () => {
+  const modelConfig = { primary: "p/model-a", fallbacks: ["p/model-b"] };
+  assert.equal(getNextFallbackModel("model-a", modelConfig), "p/model-b");
+});
+
+test("getNextFallbackModel returns primary if current is unknown", () => {
+  const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] };
+  assert.equal(getNextFallbackModel("model-x", modelConfig), "model-a");
+});
+
+test("getNextFallbackModel returns primary if current is undefined", () => {
+  const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] };
+  assert.equal(getNextFallbackModel(undefined, modelConfig), "model-a");
+});
+
+// ── pauseAutoForProviderError ────────────────────────────────────────────────
+
+test("pauseAutoForProviderError warns and pauses without requiring ctx.log", async () => {
+  const notifications: Array<{ message: string; level: string }> = [];
+  let pauseCalls = 0;
+
+  await pauseAutoForProviderError(
+    { notify(message, level?) { notifications.push({ message, level: level ?? "info" }); } },
+    ": terminated",
+    async () => { pauseCalls += 1; },
+  );
+
+  assert.equal(pauseCalls, 1);
+  assert.deepEqual(notifications, [
+    { message: "Auto-mode paused due to provider error: terminated", level: "warning" },
+  ]);
+});
+
+test("pauseAutoForProviderError schedules auto-resume for rate limit errors", async () => {
+  const notifications: Array<{ message: string; level: string }> = [];
+  let pauseCalls = 0;
+  let resumeCalled = false;
+
+  const originalSetTimeout = globalThis.setTimeout;
+  const timers: Array<{ fn: () => void; delay: number }> = [];
+  globalThis.setTimeout = ((fn: () => void, delay: number) => {
+    timers.push({ fn, delay });
+    return 0 as unknown as ReturnType<typeof setTimeout>;
+  }) as typeof setTimeout;
+
+  try {
+    await pauseAutoForProviderError(
+      { notify(message, level?) { notifications.push({ message, level: level ?? "info" }); } },
+      ": rate limit exceeded",
+      async () => { pauseCalls += 1; },
+      { isRateLimit: true, retryAfterMs: 90000, resume: () => { resumeCalled = true; } },
+    );
+
+    assert.equal(pauseCalls, 1);
+    assert.equal(timers.length, 1);
+    assert.equal(timers[0].delay, 90000);
+    assert.deepEqual(notifications[0], {
+      message: "Rate limited: rate limit exceeded. Auto-resuming in 90s...",
+      level: "warning",
+    });
+
+    timers[0].fn();
+    assert.equal(resumeCalled, true);
+    assert.deepEqual(notifications[1], {
+      message: "Rate limit window elapsed. Resuming auto-mode.",
+      level: "info",
+    });
+  } finally {
+    globalThis.setTimeout = originalSetTimeout;
+  }
+});
+
+test("pauseAutoForProviderError falls back to indefinite pause when not rate limit", async () => {
+  const notifications: Array<{ message: string; level: string }> = [];
+  let pauseCalls = 0;
+
+  await pauseAutoForProviderError(
+    { notify(message, level?) { notifications.push({ message, level: level ?? "info" }); } },
+    ": connection refused",
+    async () => { pauseCalls += 1; },
+    { isRateLimit: false },
+  );
+
+  assert.equal(pauseCalls, 1);
+  assert.deepEqual(notifications, [
+    { message: "Auto-mode paused due to provider error: connection refused", level: "warning" },
+  ]);
+});