From 8dfa7d058c56321f45e43ad2d516e94d498758a3 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Tue, 17 Mar 2026 23:59:50 -0400 Subject: [PATCH] refactor: consolidate tests by area, standardize on node:test (#1059) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: add Node LTS pinning guide for macOS Homebrew users New doc (docs/node-lts-macos.md) explains how to pin Node 24 LTS via Homebrew to avoid running on odd-numbered development releases. Covers brew install/link/pin, version managers as alternatives, and verification steps. Added notice banner in README linking to the guide. * refactor: consolidate tests by area, standardize on node:test Consolidated 10 test files into 4, standardizing on node:test. Provider errors (3 files → 1): provider-errors.test.ts (34 tests) Metrics (2 files → 1): metrics.test.ts (13 tests, converted from custom runner) Activity log (2 files → 1): activity-log.test.ts (11 tests, converted from custom runner) Complexity (2 files → 1): removed redundant structural string checks Net: -694 lines, -6 files. --- .../gsd/tests/activity-log-prune.test.ts | 297 ------------ .../gsd/tests/activity-log-save.test.ts | 127 ----- .../extensions/gsd/tests/activity-log.test.ts | 213 ++++++++ .../tests/agent-end-provider-error.test.ts | 110 ----- .../gsd/tests/complexity-routing.test.ts | 111 ----- .../extensions/gsd/tests/metrics-io.test.ts | 176 ------- .../extensions/gsd/tests/metrics.test.ts | 454 +++++++----------- .../gsd/tests/network-error-fallback.test.ts | 104 ---- .../gsd/tests/provider-error-classify.test.ts | 95 ---- .../gsd/tests/provider-errors.test.ts | 245 ++++++++++ 10 files changed, 619 insertions(+), 1313 deletions(-) delete mode 100644 src/resources/extensions/gsd/tests/activity-log-prune.test.ts delete mode 100644 src/resources/extensions/gsd/tests/activity-log-save.test.ts create mode 100644 src/resources/extensions/gsd/tests/activity-log.test.ts delete mode 100644 src/resources/extensions/gsd/tests/agent-end-provider-error.test.ts delete mode 100644 src/resources/extensions/gsd/tests/complexity-routing.test.ts delete mode 100644 src/resources/extensions/gsd/tests/metrics-io.test.ts delete mode 100644 src/resources/extensions/gsd/tests/network-error-fallback.test.ts delete mode 100644 src/resources/extensions/gsd/tests/provider-error-classify.test.ts create mode 100644 src/resources/extensions/gsd/tests/provider-errors.test.ts diff --git a/src/resources/extensions/gsd/tests/activity-log-prune.test.ts b/src/resources/extensions/gsd/tests/activity-log-prune.test.ts deleted file mode 100644 index e09fdc460..000000000 --- a/src/resources/extensions/gsd/tests/activity-log-prune.test.ts +++ /dev/null @@ -1,297 +0,0 @@ -// Tests for pruneActivityLogs — age-based activity log pruning with -// highest-seq preservation invariant — plus step-11 prompt text assertion. -// -// Sections: -// (a) Basic pruning: one old file deleted, two recent survive -// (b) Highest-seq preserved even when all files are old -// (c) retentionDays=0 boundary: all non-highest-seq deleted -// (d) No-op when all files are recent -// (e) Empty directory: no crash -// (f) All old files: only highest-seq survives -// (g) Single file: always preserved (it IS highest-seq) -// (h) Seq number is tie-breaker (010 beats 001 lexicographically and numerically) -// (i) Non-matching filenames ignored: notes.txt survives, no crash -// (j) Step-11 prompt text: "refresh current state if needed" - -import { mkdtempSync, mkdirSync, readdirSync, rmSync, utimesSync, writeFileSync } from 'node:fs'; -import { join, dirname } from 'node:path'; -import { tmpdir } from 'node:os'; -import { fileURLToPath } from 'node:url'; - -import { pruneActivityLogs } from '../activity-log.ts'; -import { createTestContext } from './test-helpers.ts'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); - - -const { assertEq, assertTrue, report } = createTestContext(); -// ─── Fixture helpers ─────────────────────────────────────────────────────── - -let tmpDirs: string[] = []; - -function createTmpActivityDir(): string { - const dir = mkdtempSync(join(tmpdir(), 'gsd-prune-test-')); - tmpDirs.push(dir); - return dir; -} - -function writeActivityFile(activityDir: string, seq: string, name: string): string { - mkdirSync(activityDir, { recursive: true }); - const filePath = join(activityDir, `${seq}-${name}.jsonl`); - writeFileSync(filePath, `{"seq":${parseInt(seq, 10)},"name":"${name}"}\n`, 'utf-8'); - return filePath; -} - -/** Set mtime to daysAgo days in the past. */ -function backdateFile(filePath: string, daysAgo: number): void { - const pastMs = Date.now() - daysAgo * 24 * 60 * 60 * 1000; - const pastDate = new Date(pastMs); - utimesSync(filePath, pastDate, pastDate); -} - -function cleanup(): void { - for (const dir of tmpDirs) { - rmSync(dir, { recursive: true, force: true }); - } - tmpDirs = []; -} - -process.on('exit', cleanup); - -// ─── Helper: get sorted filenames (basenames only) in a directory ────────── - -function listFiles(dir: string): string[] { - return readdirSync(dir).sort(); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// Tests -// ═══════════════════════════════════════════════════════════════════════════ - -async function main(): Promise { - - // ─── (a) Basic pruning ──────────────────────────────────────────────────── - console.log('\n── (a) Basic pruning: one old file deleted, two recent survive'); - - { - const dir = createTmpActivityDir(); - const f001 = writeActivityFile(dir, '001', 'execute-task-M001-S01-T01'); - const _f002 = writeActivityFile(dir, '002', 'execute-task-M001-S01-T02'); - const _f003 = writeActivityFile(dir, '003', 'execute-task-M001-S01-T03'); - - backdateFile(f001, 40); // older than 30-day retention - - pruneActivityLogs(dir, 30); - - const remaining = listFiles(dir); - assertTrue( - !remaining.includes('001-execute-task-M001-S01-T01.jsonl'), - '(a) file 001 deleted (40 days old, past 30-day threshold)', - ); - assertTrue( - remaining.includes('002-execute-task-M001-S01-T02.jsonl'), - '(a) file 002 survives (recent)', - ); - assertTrue( - remaining.includes('003-execute-task-M001-S01-T03.jsonl'), - '(a) file 003 survives (recent, also highest-seq)', - ); - } - - // ─── (b) Highest-seq preserved even when all files are old ─────────────── - console.log('\n── (b) Highest-seq preserved even when all files are old'); - - { - const dir = createTmpActivityDir(); - const f001 = writeActivityFile(dir, '001', 'execute-task-M001-S01-T01'); - const f002 = writeActivityFile(dir, '002', 'execute-task-M001-S01-T02'); - const f003 = writeActivityFile(dir, '003', 'execute-task-M001-S01-T03'); - - backdateFile(f001, 40); - backdateFile(f002, 40); - backdateFile(f003, 40); // all old, but 003 is highest-seq - - pruneActivityLogs(dir, 30); - - const remaining = listFiles(dir); - assertEq(remaining.length, 1, '(b) exactly 1 file survives when all are old'); - assertTrue( - remaining.includes('003-execute-task-M001-S01-T03.jsonl'), - '(b) highest-seq file (003) is the survivor', - ); - } - - // ─── (c) retentionDays=0 boundary ──────────────────────────────────────── - console.log('\n── (c) retentionDays=0: all non-highest-seq deleted even if brand-new'); - - { - const dir = createTmpActivityDir(); - // All files have mtime=now (freshly written — no backdating) - writeActivityFile(dir, '001', 'execute-task-M002-S01-T01'); - writeActivityFile(dir, '002', 'execute-task-M002-S01-T02'); - writeActivityFile(dir, '003', 'execute-task-M002-S01-T03'); - - pruneActivityLogs(dir, 0); // cutoff = now → everything is "expired" - - const remaining = listFiles(dir); - assertEq(remaining.length, 1, '(c) retentionDays=0: exactly 1 file survives'); - assertTrue( - remaining.includes('003-execute-task-M002-S01-T03.jsonl'), - '(c) retentionDays=0: only highest-seq (003) survives', - ); - } - - // ─── (d) No-op when all files are recent ───────────────────────────────── - console.log('\n── (d) No-op when all files are recent'); - - { - const dir = createTmpActivityDir(); - writeActivityFile(dir, '001', 'execute-task-M003-S01-T01'); - writeActivityFile(dir, '002', 'execute-task-M003-S01-T02'); - writeActivityFile(dir, '003', 'execute-task-M003-S01-T03'); - // No backdating — all files are fresh - - pruneActivityLogs(dir, 30); - - const remaining = listFiles(dir); - assertEq(remaining.length, 3, '(d) all 3 files survive when all are recent'); - } - - // ─── (e) Empty directory: no crash ──────────────────────────────────────── - console.log('\n── (e) Empty directory: no crash'); - - { - const dir = createTmpActivityDir(); - // dir exists but is empty - - let threw = false; - try { - pruneActivityLogs(dir, 30); - } catch { - threw = true; - } - - assertTrue(!threw, '(e) pruneActivityLogs does not throw on empty directory'); - assertTrue( - readdirSync(dir).length === 0, - '(e) directory still exists and is still empty after no-op', - ); - } - - // ─── (f) All old files: only highest-seq survives ───────────────────────── - console.log('\n── (f) All old files: only highest-seq survives'); - - { - const dir = createTmpActivityDir(); - const f004 = writeActivityFile(dir, '004', 'execute-task-M004-S01-T01'); - const f005 = writeActivityFile(dir, '005', 'execute-task-M004-S01-T02'); - const f006 = writeActivityFile(dir, '006', 'execute-task-M004-S01-T03'); - - backdateFile(f004, 60); - backdateFile(f005, 60); - backdateFile(f006, 60); - - pruneActivityLogs(dir, 30); - - const remaining = listFiles(dir); - assertEq(remaining.length, 1, '(f) exactly 1 file survives when all are old'); - assertTrue( - remaining[0].startsWith('006-'), - '(f) the surviving file starts with 006 (highest-seq)', - ); - } - - // ─── (g) Single file: always preserved ──────────────────────────────────── - console.log('\n── (g) Single file: always preserved (it IS highest-seq)'); - - { - const dir = createTmpActivityDir(); - const f001 = writeActivityFile(dir, '001', 'execute-task-M005-S01-T01'); - backdateFile(f001, 100); // very old - - pruneActivityLogs(dir, 30); - - const remaining = listFiles(dir); - assertEq(remaining.length, 1, '(g) single file survives even when very old (it is the highest-seq)'); - assertTrue( - remaining.includes('001-execute-task-M005-S01-T01.jsonl'), - '(g) the single file (001) is preserved', - ); - } - - // ─── (h) Seq tie-breaker: 010 is higher than 001 ───────────────────────── - console.log('\n── (h) Seq number tie-breaker: 010 beats 001 numerically'); - - { - const dir = createTmpActivityDir(); - const f001 = writeActivityFile(dir, '001', 'execute-task-M006-S01-T01'); - const f010 = writeActivityFile(dir, '010', 'execute-task-M006-S01-T10'); - - backdateFile(f001, 40); - backdateFile(f010, 40); // both old; 010 is numerically highest - - pruneActivityLogs(dir, 30); - - const remaining = listFiles(dir); - assertEq(remaining.length, 1, '(h) exactly 1 file survives'); - assertTrue( - remaining.includes('010-execute-task-M006-S01-T10.jsonl'), - '(h) seq 010 (numeric 10) survives over seq 001 (numeric 1)', - ); - } - - // ─── (i) Non-matching filenames ignored ─────────────────────────────────── - console.log('\n── (i) Non-matching filenames ignored: notes.txt survives, no crash'); - - { - const dir = createTmpActivityDir(); - const f001 = writeActivityFile(dir, '001', 'execute-task-M007-S01-T01'); - const notesPath = join(dir, 'notes.txt'); - writeFileSync(notesPath, 'some notes\n', 'utf-8'); - - backdateFile(f001, 40); // eligible for pruning - // notes.txt never gets a seq prefix → should be ignored by pruner - - let threw = false; - try { - pruneActivityLogs(dir, 30); - } catch { - threw = true; - } - - assertTrue(!threw, '(i) no crash when non-matching file is present'); - - const remaining = listFiles(dir); - assertTrue( - remaining.includes('notes.txt'), - '(i) notes.txt (non-matching filename) survives pruning unchanged', - ); - // 001 is deleted (old, and notes.txt is not counted as seq-bearing so 001 is not "highest") - // But wait — 001 IS the only seq file, making it highest-seq → it survives - assertTrue( - remaining.includes('001-execute-task-M007-S01-T01.jsonl'), - '(i) seq 001 survives (it is the highest-seq among seq files)', - ); - } - - // ─── (j) Step-11 prompt text assertion ──────────────────────────────────── - console.log('\n── (j) Step-11 prompt text: "refresh current state if needed"'); - - { - const { readFileSync } = await import('node:fs'); - const promptPath = join(__dirname, '..', 'prompts', 'complete-slice.md'); - const content = readFileSync(promptPath, 'utf-8'); - - assertTrue( - content.includes('refresh current state if needed'), - '(j) complete-slice.md step 11 contains "refresh current state if needed"', - ); - } - - report(); -} - -main().catch((error) => { - console.error(error); - process.exit(1); -}); diff --git a/src/resources/extensions/gsd/tests/activity-log-save.test.ts b/src/resources/extensions/gsd/tests/activity-log-save.test.ts deleted file mode 100644 index a0e10a39a..000000000 --- a/src/resources/extensions/gsd/tests/activity-log-save.test.ts +++ /dev/null @@ -1,127 +0,0 @@ -// Tests for saveActivityLog performance behavior: -// - cache next sequence per activity directory instead of rescanning every save -// - skip rewriting identical snapshots for the same unit -// - recover safely if another writer creates the cached next sequence - -import { existsSync, mkdtempSync, readdirSync, rmSync, writeFileSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; - -import { saveActivityLog } from "../activity-log.ts"; -import { createTestContext } from "./test-helpers.ts"; - -const { assertEq, assertTrue, report } = createTestContext(); - -let tmpDirs: string[] = []; - -function createBaseDir(): string { - const dir = mkdtempSync(join(tmpdir(), "gsd-activity-save-test-")); - tmpDirs.push(dir); - return dir; -} - -function activityDir(baseDir: string): string { - return join(baseDir, ".gsd", "activity"); -} - -function listActivityFiles(baseDir: string): string[] { - const dir = activityDir(baseDir); - return existsSync(dir) ? readdirSync(dir).sort() : []; -} - -function createCtx(entries: unknown[]) { - return { - sessionManager: { - getEntries: () => entries, - }, - }; -} - -function cleanup(): void { - for (const dir of tmpDirs) { - rmSync(dir, { recursive: true, force: true }); - } - tmpDirs = []; -} - -process.on("exit", cleanup); - -async function main(): Promise { - console.log("\n── (a) cache next sequence instead of rescanning every save"); - { - const baseDir = createBaseDir(); - saveActivityLog(createCtx([{ kind: "first", n: 1 }]) as any, baseDir, "execute-task", "M001/S01/T01"); - - writeFileSync( - join(activityDir(baseDir), "999-external-manual.jsonl"), - '{"external":true}\n', - "utf-8", - ); - - saveActivityLog(createCtx([{ kind: "second", n: 2 }]) as any, baseDir, "execute-task", "M001/S01/T02"); - - const files = listActivityFiles(baseDir); - assertTrue(files.includes("001-execute-task-M001-S01-T01.jsonl"), "(a) first save uses sequence 001"); - assertTrue(files.includes("002-execute-task-M001-S01-T02.jsonl"), "(a) second save uses cached next sequence 002"); - assertTrue(files.includes("999-external-manual.jsonl"), "(a) externally added file remains present"); - assertTrue(!files.some(file => file.startsWith("1000-")), "(a) second save did not rescan and jump to sequence 1000"); - } - - console.log("\n── (b) skip rewriting identical snapshots for the same unit"); - { - const baseDir = createBaseDir(); - const ctx = createCtx([{ role: "assistant", content: "same snapshot" }]); - - saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01"); - saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01"); - - let files = listActivityFiles(baseDir); - assertEq(files.length, 1, "(b) identical repeated save writes only one activity file"); - assertTrue(files[0] === "001-plan-slice-M002-S01.jsonl", "(b) the original sequence is preserved"); - - saveActivityLog(createCtx([{ role: "assistant", content: "changed snapshot" }]) as any, baseDir, "plan-slice", "M002/S01"); - files = listActivityFiles(baseDir); - assertEq(files.length, 2, "(b) changed snapshot writes a new activity file"); - assertTrue(files.includes("002-plan-slice-M002-S01.jsonl"), "(b) deduped save did not consume the next sequence"); - } - - console.log("\n── (c) recover if another writer creates the exact cached target file"); - { - const baseDir = createBaseDir(); - saveActivityLog(createCtx([{ turn: 1 }]) as any, baseDir, "execute-task", "M003/S02/T01"); - - writeFileSync( - join(activityDir(baseDir), "002-execute-task-M003-S02-T02.jsonl"), - '{"collision":true}\n', - "utf-8", - ); - - saveActivityLog(createCtx([{ turn: 2 }]) as any, baseDir, "execute-task", "M003/S02/T02"); - - const files = listActivityFiles(baseDir); - assertTrue(files.includes("002-execute-task-M003-S02-T02.jsonl"), "(c) exact collision file is preserved"); - assertTrue(files.includes("003-execute-task-M003-S02-T02.jsonl"), "(c) logger rescans only on collision and advances to 003"); - } - - console.log("\n── (d) dedupe is tracked per unit, not just the last write in the directory"); - { - const baseDir = createBaseDir(); - const repeatedCtx = createCtx([{ role: "assistant", content: "same-for-unit-a" }]); - - saveActivityLog(repeatedCtx as any, baseDir, "execute-task", "M004/S01/T01"); - saveActivityLog(createCtx([{ role: "assistant", content: "other-unit" }]) as any, baseDir, "execute-task", "M004/S01/T02"); - saveActivityLog(repeatedCtx as any, baseDir, "execute-task", "M004/S01/T01"); - - const files = listActivityFiles(baseDir); - assertEq(files.length, 2, "(d) interleaving another unit does not force a duplicate rewrite for unit A"); - assertTrue(files.includes("001-execute-task-M004-S01-T01.jsonl"), "(d) original unit A snapshot is retained"); - assertTrue(files.includes("002-execute-task-M004-S01-T02.jsonl"), "(d) unit B snapshot is retained"); - } - - report(); -} - -main().catch((error) => { - console.error(error); - process.exit(1); -}); diff --git a/src/resources/extensions/gsd/tests/activity-log.test.ts b/src/resources/extensions/gsd/tests/activity-log.test.ts new file mode 100644 index 000000000..646d36f09 --- /dev/null +++ b/src/resources/extensions/gsd/tests/activity-log.test.ts @@ -0,0 +1,213 @@ +/** + * Activity log tests — consolidated from: + * - activity-log-prune.test.ts (age-based pruning with highest-seq preservation) + * - activity-log-save.test.ts (caching, dedup, collision recovery) + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { existsSync, mkdtempSync, mkdirSync, readdirSync, rmSync, utimesSync, writeFileSync, readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { tmpdir } from "node:os"; +import { fileURLToPath } from "node:url"; + +import { pruneActivityLogs, saveActivityLog } from "../activity-log.ts"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// ── Helpers ────────────────────────────────────────────────────────────────── + +function createTmpDir(): string { + return mkdtempSync(join(tmpdir(), "gsd-activity-test-")); +} + +function writeActivityFile(dir: string, seq: string, name: string): string { + mkdirSync(dir, { recursive: true }); + const filePath = join(dir, `${seq}-${name}.jsonl`); + writeFileSync(filePath, `{"seq":${parseInt(seq, 10)},"name":"${name}"}\n`, "utf-8"); + return filePath; +} + +function backdateFile(filePath: string, daysAgo: number): void { + const pastMs = Date.now() - daysAgo * 24 * 60 * 60 * 1000; + const pastDate = new Date(pastMs); + utimesSync(filePath, pastDate, pastDate); +} + +function listFiles(dir: string): string[] { + return existsSync(dir) ? readdirSync(dir).sort() : []; +} + +function activityDir(baseDir: string): string { + return join(baseDir, ".gsd", "activity"); +} + +function createCtx(entries: unknown[]) { + return { sessionManager: { getEntries: () => entries } }; +} + +// ── Pruning ────────────────────────────────────────────────────────────────── + +test("pruneActivityLogs deletes old files, keeps recent and highest-seq", () => { + const dir = createTmpDir(); + try { + const f001 = writeActivityFile(dir, "001", "execute-task-M001-S01-T01"); + writeActivityFile(dir, "002", "execute-task-M001-S01-T02"); + writeActivityFile(dir, "003", "execute-task-M001-S01-T03"); + backdateFile(f001, 40); + + pruneActivityLogs(dir, 30); + const remaining = listFiles(dir); + assert.ok(!remaining.includes("001-execute-task-M001-S01-T01.jsonl")); + assert.ok(remaining.includes("002-execute-task-M001-S01-T02.jsonl")); + assert.ok(remaining.includes("003-execute-task-M001-S01-T03.jsonl")); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("pruneActivityLogs preserves highest-seq even when all files are old", () => { + const dir = createTmpDir(); + try { + const f001 = writeActivityFile(dir, "001", "t1"); + const f002 = writeActivityFile(dir, "002", "t2"); + const f003 = writeActivityFile(dir, "003", "t3"); + backdateFile(f001, 40); backdateFile(f002, 40); backdateFile(f003, 40); + + pruneActivityLogs(dir, 30); + const remaining = listFiles(dir); + assert.equal(remaining.length, 1); + assert.ok(remaining[0].startsWith("003-")); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("pruneActivityLogs with retentionDays=0 keeps only highest-seq", () => { + const dir = createTmpDir(); + try { + writeActivityFile(dir, "001", "t1"); + writeActivityFile(dir, "002", "t2"); + writeActivityFile(dir, "003", "t3"); + + pruneActivityLogs(dir, 0); + const remaining = listFiles(dir); + assert.equal(remaining.length, 1); + assert.ok(remaining[0].startsWith("003-")); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("pruneActivityLogs no-op when all files are recent", () => { + const dir = createTmpDir(); + try { + writeActivityFile(dir, "001", "t1"); + writeActivityFile(dir, "002", "t2"); + writeActivityFile(dir, "003", "t3"); + + pruneActivityLogs(dir, 30); + assert.equal(listFiles(dir).length, 3); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("pruneActivityLogs handles empty directory", () => { + const dir = createTmpDir(); + try { + assert.doesNotThrow(() => pruneActivityLogs(dir, 30)); + assert.equal(readdirSync(dir).length, 0); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("pruneActivityLogs preserves single old file (it is highest-seq)", () => { + const dir = createTmpDir(); + try { + const f = writeActivityFile(dir, "001", "t1"); + backdateFile(f, 100); + + pruneActivityLogs(dir, 30); + assert.equal(listFiles(dir).length, 1); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("pruneActivityLogs ignores non-matching filenames", () => { + const dir = createTmpDir(); + try { + const f001 = writeActivityFile(dir, "001", "t1"); + writeFileSync(join(dir, "notes.txt"), "some notes\n", "utf-8"); + backdateFile(f001, 40); + + assert.doesNotThrow(() => pruneActivityLogs(dir, 30)); + const remaining = listFiles(dir); + assert.ok(remaining.includes("notes.txt")); + // 001 is the only seq file, so it's highest-seq and survives + assert.ok(remaining.includes("001-t1.jsonl")); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +// ── Save: caching, dedup, collision recovery ───────────────────────────────── + +test("saveActivityLog caches sequence instead of rescanning", () => { + const baseDir = createTmpDir(); + try { + saveActivityLog(createCtx([{ kind: "first", n: 1 }]) as any, baseDir, "execute-task", "M001/S01/T01"); + writeFileSync(join(activityDir(baseDir), "999-external.jsonl"), '{"x":1}\n', "utf-8"); + saveActivityLog(createCtx([{ kind: "second", n: 2 }]) as any, baseDir, "execute-task", "M001/S01/T02"); + + const files = listFiles(activityDir(baseDir)); + assert.ok(files.includes("001-execute-task-M001-S01-T01.jsonl")); + assert.ok(files.includes("002-execute-task-M001-S01-T02.jsonl")); + assert.ok(!files.some(f => f.startsWith("1000-"))); + } finally { + rmSync(baseDir, { recursive: true, force: true }); + } +}); + +test("saveActivityLog deduplicates identical snapshots for same unit", () => { + const baseDir = createTmpDir(); + try { + const ctx = createCtx([{ role: "assistant", content: "same" }]); + saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01"); + saveActivityLog(ctx as any, baseDir, "plan-slice", "M002/S01"); + + let files = listFiles(activityDir(baseDir)); + assert.equal(files.length, 1); + + saveActivityLog(createCtx([{ role: "assistant", content: "changed" }]) as any, baseDir, "plan-slice", "M002/S01"); + files = listFiles(activityDir(baseDir)); + assert.equal(files.length, 2); + } finally { + rmSync(baseDir, { recursive: true, force: true }); + } +}); + +test("saveActivityLog recovers on sequence collision", () => { + const baseDir = createTmpDir(); + try { + saveActivityLog(createCtx([{ turn: 1 }]) as any, baseDir, "execute-task", "M003/S02/T01"); + writeFileSync(join(activityDir(baseDir), "002-execute-task-M003-S02-T02.jsonl"), '{"collision":true}\n', "utf-8"); + saveActivityLog(createCtx([{ turn: 2 }]) as any, baseDir, "execute-task", "M003/S02/T02"); + + const files = listFiles(activityDir(baseDir)); + assert.ok(files.includes("002-execute-task-M003-S02-T02.jsonl")); + assert.ok(files.includes("003-execute-task-M003-S02-T02.jsonl")); + } finally { + rmSync(baseDir, { recursive: true, force: true }); + } +}); + +// ── Prompt text assertion ──────────────────────────────────────────────────── + +test("complete-slice.md contains refresh state instruction", () => { + const promptPath = join(__dirname, "..", "prompts", "complete-slice.md"); + const content = readFileSync(promptPath, "utf-8"); + assert.ok(content.includes("refresh current state if needed")); +}); diff --git a/src/resources/extensions/gsd/tests/agent-end-provider-error.test.ts b/src/resources/extensions/gsd/tests/agent-end-provider-error.test.ts deleted file mode 100644 index af00e6a27..000000000 --- a/src/resources/extensions/gsd/tests/agent-end-provider-error.test.ts +++ /dev/null @@ -1,110 +0,0 @@ -import test from "node:test"; -import assert from "node:assert/strict"; - -import { pauseAutoForProviderError } from "../provider-error-pause.ts"; - -test("pauseAutoForProviderError warns and pauses without requiring ctx.log", async () => { - const notifications: Array<{ message: string; level: string }> = []; - let pauseCalls = 0; - - await pauseAutoForProviderError( - { - notify(message, level?) { - notifications.push({ message, level: level ?? "info" }); - }, - }, - ": terminated", - async () => { - pauseCalls += 1; - }, - ); - - assert.equal(pauseCalls, 1, "should pause auto-mode exactly once"); - assert.deepEqual(notifications, [ - { - message: "Auto-mode paused due to provider error: terminated", - level: "warning", - }, - ]); -}); - -test("pauseAutoForProviderError schedules auto-resume for rate limit errors", async () => { - const notifications: Array<{ message: string; level: string }> = []; - let pauseCalls = 0; - let resumeCalled = false; - - // Use fake timer - const originalSetTimeout = globalThis.setTimeout; - const timers: Array<{ fn: () => void; delay: number }> = []; - globalThis.setTimeout = ((fn: () => void, delay: number) => { - timers.push({ fn, delay }); - return 0 as unknown as ReturnType; - }) as typeof setTimeout; - - try { - await pauseAutoForProviderError( - { - notify(message, level?) { - notifications.push({ message, level: level ?? "info" }); - }, - }, - ": rate limit exceeded", - async () => { - pauseCalls += 1; - }, - { - isRateLimit: true, - retryAfterMs: 90000, - resume: () => { - resumeCalled = true; - }, - }, - ); - - assert.equal(pauseCalls, 1, "should pause auto-mode"); - assert.equal(timers.length, 1, "should schedule one timer"); - assert.equal(timers[0].delay, 90000, "timer should match retryAfterMs"); - assert.deepEqual(notifications[0], { - message: "Rate limited: rate limit exceeded. Auto-resuming in 90s...", - level: "warning", - }); - - // Fire the timer - timers[0].fn(); - assert.equal(resumeCalled, true, "should call resume after timer fires"); - assert.deepEqual(notifications[1], { - message: "Rate limit window elapsed. Resuming auto-mode.", - level: "info", - }); - } finally { - globalThis.setTimeout = originalSetTimeout; - } -}); - -test("pauseAutoForProviderError falls back to indefinite pause when not rate limit", async () => { - const notifications: Array<{ message: string; level: string }> = []; - let pauseCalls = 0; - - await pauseAutoForProviderError( - { - notify(message, level?) { - notifications.push({ message, level: level ?? "info" }); - }, - }, - ": connection refused", - async () => { - pauseCalls += 1; - }, - { - isRateLimit: false, - }, - ); - - assert.equal(pauseCalls, 1); - assert.deepEqual(notifications, [ - { - message: "Auto-mode paused due to provider error: connection refused", - level: "warning", - }, - ]); -}); diff --git a/src/resources/extensions/gsd/tests/complexity-routing.test.ts b/src/resources/extensions/gsd/tests/complexity-routing.test.ts deleted file mode 100644 index 1d3ab24a7..000000000 --- a/src/resources/extensions/gsd/tests/complexity-routing.test.ts +++ /dev/null @@ -1,111 +0,0 @@ -/** - * Complexity Routing — unit tests for M004/S03. - * - * Tests complexity classification and dispatch integration. - * Uses source-level checks for the classifier module and preference wiring. - */ - -import test from "node:test"; -import assert from "node:assert/strict"; -import { readFileSync } from "node:fs"; -import { join, dirname } from "node:path"; -import { fileURLToPath } from "node:url"; - -const __dirname = dirname(fileURLToPath(import.meta.url)); -const preferencesSrc = readFileSync(join(__dirname, "..", "preferences.ts"), "utf-8"); -const complexitySrc = readFileSync(join(__dirname, "..", "complexity-classifier.ts"), "utf-8"); - -// ═══════════════════════════════════════════════════════════════════════════ -// Model Config — execution_simple -// ═══════════════════════════════════════════════════════════════════════════ - -test("preferences: GSDModelConfig includes execution_simple field", () => { - const v1Match = preferencesSrc.match(/interface GSDModelConfig\s*\{[^}]*execution_simple/); - assert.ok(v1Match, "GSDModelConfig should have execution_simple field"); - const v2Match = preferencesSrc.match(/interface GSDModelConfigV2\s*\{[^}]*execution_simple/); - assert.ok(v2Match, "GSDModelConfigV2 should have execution_simple field"); -}); - -test("preferences: budget profile sets execution_simple model", () => { - const budgetIdx = preferencesSrc.indexOf('case "budget":'); - const balancedIdx = preferencesSrc.indexOf('case "balanced":'); - const budgetBlock = preferencesSrc.slice(budgetIdx, balancedIdx); - assert.ok(budgetBlock.includes("execution_simple:"), "budget profile should set execution_simple"); -}); - -test("preferences: resolveModelWithFallbacksForUnit handles execute-task-simple", () => { - assert.ok( - preferencesSrc.includes('"execute-task-simple"'), - "should have execute-task-simple case in model resolution", - ); -}); - -// ═══════════════════════════════════════════════════════════════════════════ -// Classifier Module Structure -// ═══════════════════════════════════════════════════════════════════════════ - -test("complexity: module exports classifyUnitComplexity function", () => { - assert.ok( - complexitySrc.includes("export function classifyUnitComplexity"), - "should export classifyUnitComplexity", - ); -}); - -test("complexity: module exports ComplexityTier type", () => { - assert.ok( - complexitySrc.includes("export type ComplexityTier"), - "should export ComplexityTier type", - ); -}); - -test("complexity: module exports tierLabel function", () => { - assert.ok( - complexitySrc.includes("export function tierLabel"), - "should export tierLabel for dashboard display", - ); -}); - -test("complexity: module exports tierOrdinal function", () => { - assert.ok( - complexitySrc.includes("export function tierOrdinal"), - "should export tierOrdinal for tier comparison", - ); -}); - -// ═══════════════════════════════════════════════════════════════════════════ -// Unit Complexity Classification (from #579 — combined) -// ═══════════════════════════════════════════════════════════════════════════ - -test("unit-classify: classifyUnitComplexity is exported", () => { - assert.ok( - complexitySrc.includes("export function classifyUnitComplexity"), - "should export classifyUnitComplexity", - ); -}); - -test("unit-classify: unit type tier mapping exists", () => { - assert.ok(complexitySrc.includes("UNIT_TYPE_TIERS"), "should have unit type tier mapping"); - assert.ok(complexitySrc.includes('"complete-slice": "light"'), "complete-slice should be light"); - assert.ok(complexitySrc.includes('"replan-slice": "heavy"'), "replan-slice should be heavy"); -}); - -test("unit-classify: hook units default to light", () => { - assert.ok( - complexitySrc.includes('startsWith("hook/")') && complexitySrc.includes('"light"'), - "hook units should default to light tier", - ); -}); - -test("unit-classify: budget pressure has graduated thresholds", () => { - assert.ok(complexitySrc.includes("budgetPct >= 0.9"), "should have 90% threshold"); - assert.ok(complexitySrc.includes("budgetPct >= 0.75"), "should have 75% threshold"); - assert.ok(complexitySrc.includes("budgetPct < 0.5"), "should skip below 50%"); -}); - -test("unit-classify: tierLabel function exists", () => { - assert.ok( - complexitySrc.includes("export function tierLabel") || - complexitySrc.includes("export { tierLabel"), - "should export tierLabel for dashboard display", - ); -}); diff --git a/src/resources/extensions/gsd/tests/metrics-io.test.ts b/src/resources/extensions/gsd/tests/metrics-io.test.ts deleted file mode 100644 index fee693d7d..000000000 --- a/src/resources/extensions/gsd/tests/metrics-io.test.ts +++ /dev/null @@ -1,176 +0,0 @@ -/** - * Tests for GSD metrics disk I/O — init, snapshot, load/save cycle. - * Uses a temp directory to avoid touching real .gsd/ state. - */ - -import { mkdtempSync, mkdirSync, readFileSync, rmSync } from "node:fs"; -import { join } from "node:path"; -import { tmpdir } from "node:os"; -import { - initMetrics, - resetMetrics, - getLedger, - snapshotUnitMetrics, - type MetricsLedger, -} from "../metrics.js"; -import { createTestContext } from './test-helpers.ts'; - -const { assertEq, assertTrue, report } = createTestContext(); -// ─── Setup ──────────────────────────────────────────────────────────────────── - -const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-test-")); -mkdirSync(join(tmpBase, ".gsd"), { recursive: true }); - -// Mock ExtensionContext with session entries -function mockCtx(messages: any[] = []): any { - const entries = messages.map((msg, i) => ({ - type: "message", - id: `entry-${i}`, - parentId: i > 0 ? `entry-${i - 1}` : null, - timestamp: new Date().toISOString(), - message: msg, - })); - return { - sessionManager: { - getEntries: () => entries, - }, - model: { id: "claude-sonnet-4-20250514" }, - }; -} - -// ─── Tests ──────────────────────────────────────────────────────────────────── - -console.log("\n=== initMetrics / getLedger ==="); - -{ - resetMetrics(); - assertTrue(getLedger() === null, "ledger null before init"); - - initMetrics(tmpBase); - const ledger = getLedger(); - assertTrue(ledger !== null, "ledger not null after init"); - assertEq(ledger!.version, 1, "version is 1"); - assertEq(ledger!.units.length, 0, "no units initially"); -} - -console.log("\n=== snapshotUnitMetrics ==="); - -{ - resetMetrics(); - initMetrics(tmpBase); - - // Simulate a session with assistant messages containing usage data - const ctx = mockCtx([ - { role: "user", content: "Do the thing" }, - { - role: "assistant", - content: [ - { type: "text", text: "I'll do the thing" }, - { type: "tool_call", id: "tc1", name: "bash", input: {} }, - ], - usage: { - input: 5000, - output: 2000, - cacheRead: 3000, - cacheWrite: 500, - totalTokens: 10500, - cost: { input: 0.015, output: 0.03, cacheRead: 0.003, cacheWrite: 0.002, total: 0.05 }, - }, - }, - { role: "toolResult", toolCallId: "tc1", content: [{ type: "text", text: "ok" }] }, - { - role: "assistant", - content: [{ type: "text", text: "Done!" }], - usage: { - input: 8000, - output: 1000, - cacheRead: 6000, - cacheWrite: 200, - totalTokens: 15200, - cost: { input: 0.024, output: 0.015, cacheRead: 0.006, cacheWrite: 0.001, total: 0.046 }, - }, - }, - ]); - - const unit = snapshotUnitMetrics(ctx, "execute-task", "M001/S01/T01", Date.now() - 5000, "claude-sonnet-4-20250514"); - - assertTrue(unit !== null, "unit returned"); - assertEq(unit!.type, "execute-task", "type"); - assertEq(unit!.id, "M001/S01/T01", "id"); - assertEq(unit!.tokens.input, 13000, "input tokens (5000+8000)"); - assertEq(unit!.tokens.output, 3000, "output tokens (2000+1000)"); - assertEq(unit!.tokens.cacheRead, 9000, "cacheRead (3000+6000)"); - assertEq(unit!.tokens.total, 25700, "total tokens (10500+15200)"); - assertTrue(Math.abs(unit!.cost - 0.096) < 0.001, `cost ~0.096 (got ${unit!.cost})`); - assertEq(unit!.toolCalls, 1, "1 tool call"); - assertEq(unit!.assistantMessages, 2, "2 assistant messages"); - assertEq(unit!.userMessages, 1, "1 user message"); - - // Verify ledger persisted - const ledger = getLedger()!; - assertEq(ledger.units.length, 1, "1 unit in ledger"); -} - -console.log("\n=== Persistence across init/reset cycles ==="); - -{ - // Reset and re-init — should load from disk - resetMetrics(); - initMetrics(tmpBase); - - const ledger = getLedger()!; - assertEq(ledger.units.length, 1, "unit survived reset+init"); - assertEq(ledger.units[0].id, "M001/S01/T01", "correct unit ID"); - - // Add another unit - const ctx = mockCtx([ - { - role: "assistant", - content: [{ type: "text", text: "Research complete" }], - usage: { - input: 3000, output: 1500, cacheRead: 1000, cacheWrite: 300, totalTokens: 5800, - cost: { input: 0.009, output: 0.023, cacheRead: 0.001, cacheWrite: 0.001, total: 0.034 }, - }, - }, - ]); - - snapshotUnitMetrics(ctx, "research-slice", "M001/S02", Date.now() - 3000, "claude-sonnet-4-20250514"); - - // Verify both units persisted - resetMetrics(); - initMetrics(tmpBase); - const final = getLedger()!; - assertEq(final.units.length, 2, "2 units after second snapshot"); -} - -console.log("\n=== File content verification ==="); - -{ - const raw = readFileSync(join(tmpBase, ".gsd", "metrics.json"), "utf-8"); - const parsed: MetricsLedger = JSON.parse(raw); - assertEq(parsed.version, 1, "file version is 1"); - assertEq(parsed.units.length, 2, "file has 2 units"); - assertTrue(parsed.projectStartedAt > 0, "projectStartedAt is set"); -} - -console.log("\n=== Empty session handling ==="); - -{ - resetMetrics(); - initMetrics(tmpBase); - - // Empty session — no messages - const ctx = mockCtx([]); - const unit = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", Date.now(), "test-model"); - assertTrue(unit === null, "returns null for empty session"); - - // Ledger shouldn't have grown - assertEq(getLedger()!.units.length, 2, "still 2 units (empty session not added)"); -} - -// ─── Cleanup ────────────────────────────────────────────────────────────────── - -resetMetrics(); -rmSync(tmpBase, { recursive: true, force: true }); - -report(); diff --git a/src/resources/extensions/gsd/tests/metrics.test.ts b/src/resources/extensions/gsd/tests/metrics.test.ts index b3272e09b..a0b3d503f 100644 --- a/src/resources/extensions/gsd/tests/metrics.test.ts +++ b/src/resources/extensions/gsd/tests/metrics.test.ts @@ -1,12 +1,17 @@ /** - * Tests for GSD metrics aggregation logic. - * Tests the pure functions — no file I/O, no extension context. + * Metrics tests — consolidated from: + * - metrics.test.ts (pure aggregation functions, formatting) + * - metrics-io.test.ts (disk I/O, init, snapshot, persistence) */ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, readFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; import { type UnitMetrics, - type TokenCounts, - type BudgetInfo, + type MetricsLedger, classifyUnitPhase, aggregateByPhase, aggregateBySlice, @@ -14,10 +19,13 @@ import { getProjectTotals, formatCost, formatTokenCount, + initMetrics, + resetMetrics, + getLedger, + snapshotUnitMetrics, } from "../metrics.js"; -import { createTestContext } from './test-helpers.ts'; -// ─── Test helpers ───────────────────────────────────────────────────────────── +// ── Helpers ────────────────────────────────────────────────────────────────── function makeUnit(overrides: Partial = {}): UnitMetrics { return { @@ -35,59 +43,72 @@ function makeUnit(overrides: Partial = {}): UnitMetrics { }; } -const { assertEq, assertTrue, report } = createTestContext(); - -function assertClose(actual: number, expected: number, tolerance: number, message: string): void { - assertTrue(Math.abs(actual - expected) <= tolerance, `${message} — expected ~${expected}, got ${actual}`); +function mockCtx(messages: any[] = []): any { + const entries = messages.map((msg, i) => ({ + type: "message", id: `entry-${i}`, + parentId: i > 0 ? `entry-${i - 1}` : null, + timestamp: new Date().toISOString(), message: msg, + })); + return { sessionManager: { getEntries: () => entries }, model: { id: "claude-sonnet-4-20250514" } }; } -// ─── Phase classification ───────────────────────────────────────────────────── +// ── Phase classification ───────────────────────────────────────────────────── -console.log("\n=== classifyUnitPhase ==="); +test("classifyUnitPhase maps unit types to phases", () => { + assert.equal(classifyUnitPhase("research-milestone"), "research"); + assert.equal(classifyUnitPhase("research-slice"), "research"); + assert.equal(classifyUnitPhase("plan-milestone"), "planning"); + assert.equal(classifyUnitPhase("plan-slice"), "planning"); + assert.equal(classifyUnitPhase("execute-task"), "execution"); + assert.equal(classifyUnitPhase("complete-slice"), "completion"); + assert.equal(classifyUnitPhase("reassess-roadmap"), "reassessment"); + assert.equal(classifyUnitPhase("unknown-thing"), "execution"); +}); -assertEq(classifyUnitPhase("research-milestone"), "research", "research-milestone → research"); -assertEq(classifyUnitPhase("research-slice"), "research", "research-slice → research"); -assertEq(classifyUnitPhase("plan-milestone"), "planning", "plan-milestone → planning"); -assertEq(classifyUnitPhase("plan-slice"), "planning", "plan-slice → planning"); -assertEq(classifyUnitPhase("execute-task"), "execution", "execute-task → execution"); -assertEq(classifyUnitPhase("complete-slice"), "completion", "complete-slice → completion"); -assertEq(classifyUnitPhase("reassess-roadmap"), "reassessment", "reassess-roadmap → reassessment"); -assertEq(classifyUnitPhase("unknown-thing"), "execution", "unknown → execution (fallback)"); +// ── getProjectTotals ───────────────────────────────────────────────────────── -// ─── getProjectTotals ───────────────────────────────────────────────────────── - -console.log("\n=== getProjectTotals ==="); - -{ +test("getProjectTotals aggregates tokens, cost, duration, and tool calls", () => { const units = [ makeUnit({ tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 }, cost: 0.05, toolCalls: 3, startedAt: 1000, finishedAt: 2000 }), makeUnit({ tokens: { input: 2000, output: 1000, cacheRead: 400, cacheWrite: 200, total: 3600 }, cost: 0.10, toolCalls: 5, startedAt: 2000, finishedAt: 4000 }), ]; const totals = getProjectTotals(units); + assert.equal(totals.units, 2); + assert.equal(totals.tokens.input, 3000); + assert.equal(totals.tokens.output, 1500); + assert.equal(totals.tokens.total, 5400); + assert.ok(Math.abs(totals.cost - 0.15) < 0.001); + assert.equal(totals.toolCalls, 8); + assert.equal(totals.duration, 3000); +}); - assertEq(totals.units, 2, "total units"); - assertEq(totals.tokens.input, 3000, "total input tokens"); - assertEq(totals.tokens.output, 1500, "total output tokens"); - assertEq(totals.tokens.cacheRead, 600, "total cacheRead"); - assertEq(totals.tokens.cacheWrite, 300, "total cacheWrite"); - assertEq(totals.tokens.total, 5400, "total tokens"); - assertClose(totals.cost, 0.15, 0.001, "total cost"); - assertEq(totals.toolCalls, 8, "total tool calls"); - assertEq(totals.duration, 3000, "total duration"); -} - -{ +test("getProjectTotals handles empty input", () => { const totals = getProjectTotals([]); - assertEq(totals.units, 0, "empty: zero units"); - assertEq(totals.cost, 0, "empty: zero cost"); - assertEq(totals.tokens.total, 0, "empty: zero tokens"); -} + assert.equal(totals.units, 0); + assert.equal(totals.cost, 0); + assert.equal(totals.tokens.total, 0); +}); -// ─── aggregateByPhase ───────────────────────────────────────────────────────── +test("getProjectTotals aggregates budget fields", () => { + const units = [ + makeUnit({ truncationSections: 3, continueHereFired: true }), + makeUnit({ truncationSections: 2, continueHereFired: false }), + makeUnit({ truncationSections: 1, continueHereFired: true }), + ]; + const totals = getProjectTotals(units); + assert.equal(totals.totalTruncationSections, 6); + assert.equal(totals.continueHereFiredCount, 2); +}); -console.log("\n=== aggregateByPhase ==="); +test("getProjectTotals defaults budget fields to 0 for old units", () => { + const totals = getProjectTotals([makeUnit(), makeUnit()]); + assert.equal(totals.totalTruncationSections, 0); + assert.equal(totals.continueHereFiredCount, 0); +}); -{ +// ── aggregateByPhase ───────────────────────────────────────────────────────── + +test("aggregateByPhase groups units by phase and sums costs", () => { const units = [ makeUnit({ type: "research-milestone", cost: 0.02 }), makeUnit({ type: "research-slice", cost: 0.03 }), @@ -99,28 +120,17 @@ console.log("\n=== aggregateByPhase ==="); makeUnit({ type: "reassess-roadmap", cost: 0.005 }), ]; const phases = aggregateByPhase(units); + assert.equal(phases.length, 5); + assert.equal(phases[0].phase, "research"); + assert.equal(phases[0].units, 2); + assert.ok(Math.abs(phases[0].cost - 0.05) < 0.001); + assert.equal(phases[2].phase, "execution"); + assert.ok(Math.abs(phases[2].cost - 0.18) < 0.001); +}); - assertEq(phases.length, 5, "5 phases"); - assertEq(phases[0].phase, "research", "first phase is research"); - assertEq(phases[0].units, 2, "2 research units"); - assertClose(phases[0].cost, 0.05, 0.001, "research cost"); +// ── aggregateBySlice ───────────────────────────────────────────────────────── - assertEq(phases[1].phase, "planning", "second phase is planning"); - assertEq(phases[1].units, 2, "2 planning units"); - - assertEq(phases[2].phase, "execution", "third phase is execution"); - assertEq(phases[2].units, 2, "2 execution units"); - assertClose(phases[2].cost, 0.18, 0.001, "execution cost"); - - assertEq(phases[3].phase, "completion", "fourth phase is completion"); - assertEq(phases[4].phase, "reassessment", "fifth phase is reassessment"); -} - -// ─── aggregateBySlice ───────────────────────────────────────────────────────── - -console.log("\n=== aggregateBySlice ==="); - -{ +test("aggregateBySlice groups units by slice ID", () => { const units = [ makeUnit({ id: "M001/S01/T01", cost: 0.05 }), makeUnit({ id: "M001/S01/T02", cost: 0.04 }), @@ -128,258 +138,116 @@ console.log("\n=== aggregateBySlice ==="); makeUnit({ id: "M001", type: "research-milestone", cost: 0.02 }), ]; const slices = aggregateBySlice(units); - - assertEq(slices.length, 3, "3 slice groups"); - + assert.equal(slices.length, 3); const s01 = slices.find(s => s.sliceId === "M001/S01"); - assertTrue(!!s01, "M001/S01 exists"); - assertEq(s01!.units, 2, "M001/S01 has 2 units"); - assertClose(s01!.cost, 0.09, 0.001, "M001/S01 cost"); + assert.ok(s01); + assert.equal(s01!.units, 2); + assert.ok(Math.abs(s01!.cost - 0.09) < 0.001); +}); - const s02 = slices.find(s => s.sliceId === "M001/S02"); - assertTrue(!!s02, "M001/S02 exists"); - assertEq(s02!.units, 1, "M001/S02 has 1 unit"); +// ── aggregateByModel ───────────────────────────────────────────────────────── - const mLevel = slices.find(s => s.sliceId === "M001"); - assertTrue(!!mLevel, "M001 (milestone-level) exists"); -} - -// ─── aggregateByModel ───────────────────────────────────────────────────────── - -console.log("\n=== aggregateByModel ==="); - -{ +test("aggregateByModel groups by model sorted by cost desc", () => { const units = [ makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.05 }), makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.04 }), makeUnit({ model: "claude-opus-4-20250514", cost: 0.30 }), ]; const models = aggregateByModel(units); + assert.equal(models.length, 2); + assert.equal(models[0].model, "claude-opus-4-20250514"); + assert.equal(models[1].units, 2); +}); - assertEq(models.length, 2, "2 models"); - // Sorted by cost desc — opus should be first - assertEq(models[0].model, "claude-opus-4-20250514", "opus first (higher cost)"); - assertClose(models[0].cost, 0.30, 0.001, "opus cost"); - assertEq(models[1].model, "claude-sonnet-4-20250514", "sonnet second"); - assertEq(models[1].units, 2, "sonnet has 2 units"); -} - -// ─── formatCost ─────────────────────────────────────────────────────────────── - -console.log("\n=== formatCost ==="); - -assertEq(formatCost(0), "$0.0000", "zero cost"); -assertEq(formatCost(0.001), "$0.0010", "sub-cent cost"); -assertEq(formatCost(0.05), "$0.050", "5 cents"); -assertEq(formatCost(1.50), "$1.50", "dollar+"); -assertEq(formatCost(14.20), "$14.20", "double digits"); - -// ─── formatTokenCount ───────────────────────────────────────────────────────── - -console.log("\n=== formatTokenCount ==="); - -assertEq(formatTokenCount(0), "0", "zero tokens"); -assertEq(formatTokenCount(500), "500", "sub-k"); -assertEq(formatTokenCount(1500), "1.5k", "1.5k"); -assertEq(formatTokenCount(150000), "150.0k", "150k"); -assertEq(formatTokenCount(1500000), "1.50M", "1.5M"); - -// ─── Backward compat: UnitMetrics without budget fields ─────────────────────── - -console.log("\n=== Backward compat: UnitMetrics without budget fields ==="); - -{ - // Simulate old metrics.json data — no budget fields present - const oldUnit: UnitMetrics = { - type: "execute-task", - id: "M001/S01/T01", - model: "claude-sonnet-4-20250514", - startedAt: 1000, - finishedAt: 2000, - tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 }, - cost: 0.05, - toolCalls: 3, - assistantMessages: 2, - userMessages: 1, - }; - - // All aggregation functions must work with old data - const phases = aggregateByPhase([oldUnit]); - assertEq(phases.length, 1, "backward compat: aggregateByPhase works"); - assertEq(phases[0].phase, "execution", "backward compat: correct phase"); - - const slices = aggregateBySlice([oldUnit]); - assertEq(slices.length, 1, "backward compat: aggregateBySlice works"); - assertEq(slices[0].sliceId, "M001/S01", "backward compat: correct sliceId"); - - const models = aggregateByModel([oldUnit]); - assertEq(models.length, 1, "backward compat: aggregateByModel works"); - - const totals = getProjectTotals([oldUnit]); - assertEq(totals.units, 1, "backward compat: getProjectTotals works"); - assertClose(totals.cost, 0.05, 0.001, "backward compat: cost preserved"); - - // Budget fields should be undefined - assertEq(oldUnit.contextWindowTokens, undefined, "backward compat: no contextWindowTokens"); - assertEq(oldUnit.truncationSections, undefined, "backward compat: no truncationSections"); - assertEq(oldUnit.continueHereFired, undefined, "backward compat: no continueHereFired"); -} - -// ─── UnitMetrics with budget fields populated ───────────────────────────────── - -console.log("\n=== UnitMetrics with budget fields ==="); - -{ - const unitWithBudget: UnitMetrics = { - type: "execute-task", - id: "M002/S01/T03", - model: "claude-sonnet-4-20250514", - startedAt: 5000, - finishedAt: 10000, - tokens: { input: 3000, output: 1500, cacheRead: 600, cacheWrite: 300, total: 5400 }, - cost: 0.12, - toolCalls: 8, - assistantMessages: 4, - userMessages: 3, - contextWindowTokens: 200000, - truncationSections: 3, - continueHereFired: true, - }; - - // Budget fields are present - assertEq(unitWithBudget.contextWindowTokens, 200000, "budget: contextWindowTokens present"); - assertEq(unitWithBudget.truncationSections, 3, "budget: truncationSections present"); - assertEq(unitWithBudget.continueHereFired, true, "budget: continueHereFired present"); - - // Aggregation still works correctly with budget fields present - const phases = aggregateByPhase([unitWithBudget]); - assertEq(phases.length, 1, "budget: aggregateByPhase works"); - assertClose(phases[0].cost, 0.12, 0.001, "budget: cost aggregated correctly"); - - const slices = aggregateBySlice([unitWithBudget]); - assertEq(slices.length, 1, "budget: aggregateBySlice works"); - assertEq(slices[0].sliceId, "M002/S01", "budget: sliceId correct"); - - const models = aggregateByModel([unitWithBudget]); - assertEq(models.length, 1, "budget: aggregateByModel works"); - - const totals = getProjectTotals([unitWithBudget]); - assertEq(totals.units, 1, "budget: getProjectTotals works"); - assertEq(totals.toolCalls, 8, "budget: toolCalls aggregated"); - - // Mix old and new units together - const oldUnit = makeUnit(); // no budget fields - const mixed = [oldUnit, unitWithBudget]; - const mixedTotals = getProjectTotals(mixed); - assertEq(mixedTotals.units, 2, "mixed: 2 units total"); - assertClose(mixedTotals.cost, 0.17, 0.001, "mixed: costs summed correctly"); - - const mixedPhases = aggregateByPhase(mixed); - assertEq(mixedPhases.length, 1, "mixed: both are execution phase"); - assertEq(mixedPhases[0].units, 2, "mixed: both counted"); -} - -// ─── aggregateByModel: contextWindowTokens pick logic ───────────────────────── - -console.log("\n=== aggregateByModel: contextWindowTokens pick logic ==="); - -{ - // Single unit with contextWindowTokens — aggregate picks it - const units = [ - makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.05 }), - ]; - const models = aggregateByModel(units); - assertEq(models.length, 1, "ctxWindow: one model"); - assertEq(models[0].contextWindowTokens, 200000, "ctxWindow: picks value from unit"); -} - -{ - // Two units same model with different context windows — first defined value wins +test("aggregateByModel picks first defined contextWindowTokens", () => { const units = [ makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.05 }), makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 150000, cost: 0.04 }), ]; const models = aggregateByModel(units); - assertEq(models.length, 1, "ctxWindow first-wins: one model"); - assertEq(models[0].contextWindowTokens, 200000, "ctxWindow first-wins: first value kept"); -} + assert.equal(models[0].contextWindowTokens, 200000); +}); -{ - // First unit undefined, second has value — second is picked - const units = [ - makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.05 }), - makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.04 }), - ]; - const models = aggregateByModel(units); - assertEq(models[0].contextWindowTokens, 200000, "ctxWindow: picks first defined, not first unit"); -} +// ── Formatting ─────────────────────────────────────────────────────────────── -{ - // Old units without contextWindowTokens — aggregate has undefined - const units = [ - makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.05 }), - makeUnit({ model: "claude-sonnet-4-20250514", cost: 0.04 }), - ]; - const models = aggregateByModel(units); - assertEq(models[0].contextWindowTokens, undefined, "ctxWindow: undefined when no unit has it"); -} +test("formatCost formats dollar amounts correctly", () => { + assert.equal(formatCost(0), "$0.0000"); + assert.equal(formatCost(0.001), "$0.0010"); + assert.equal(formatCost(0.05), "$0.050"); + assert.equal(formatCost(1.50), "$1.50"); + assert.equal(formatCost(14.20), "$14.20"); +}); -{ - // Multiple models — each gets its own context window - const units = [ - makeUnit({ model: "claude-sonnet-4-20250514", contextWindowTokens: 200000, cost: 0.05 }), - makeUnit({ model: "claude-opus-4-20250514", contextWindowTokens: 200000, cost: 0.30 }), - ]; - const models = aggregateByModel(units); - assertEq(models.length, 2, "ctxWindow multi-model: 2 models"); - const opus = models.find(m => m.model === "claude-opus-4-20250514"); - const sonnet = models.find(m => m.model === "claude-sonnet-4-20250514"); - assertEq(opus!.contextWindowTokens, 200000, "ctxWindow multi-model: opus has value"); - assertEq(sonnet!.contextWindowTokens, 200000, "ctxWindow multi-model: sonnet has value"); -} +test("formatTokenCount uses k/M suffixes", () => { + assert.equal(formatTokenCount(0), "0"); + assert.equal(formatTokenCount(500), "500"); + assert.equal(formatTokenCount(1500), "1.5k"); + assert.equal(formatTokenCount(150000), "150.0k"); + assert.equal(formatTokenCount(1500000), "1.50M"); +}); -// ─── getProjectTotals: budget field aggregation ─────────────────────────────── +// ── Backward compatibility ─────────────────────────────────────────────────── -console.log("\n=== getProjectTotals: budget field aggregation ==="); +test("old UnitMetrics without budget fields work with all aggregation functions", () => { + const oldUnit = makeUnit(); + assert.equal(aggregateByPhase([oldUnit]).length, 1); + assert.equal(aggregateBySlice([oldUnit]).length, 1); + assert.equal(aggregateByModel([oldUnit]).length, 1); + assert.equal(getProjectTotals([oldUnit]).units, 1); + assert.equal(oldUnit.contextWindowTokens, undefined); +}); -{ - // Units with truncationSections and continueHereFired — verify sums/counts - const units = [ - makeUnit({ truncationSections: 3, continueHereFired: true }), - makeUnit({ truncationSections: 2, continueHereFired: false }), - makeUnit({ truncationSections: 1, continueHereFired: true }), - ]; - const totals = getProjectTotals(units); - assertEq(totals.totalTruncationSections, 6, "budget totals: truncation sections summed"); - assertEq(totals.continueHereFiredCount, 2, "budget totals: continueHereFired counted"); -} +// ── Disk I/O ───────────────────────────────────────────────────────────────── -{ - // Old units without budget fields — verify 0 defaults - const units = [makeUnit(), makeUnit()]; - const totals = getProjectTotals(units); - assertEq(totals.totalTruncationSections, 0, "budget totals backward compat: truncation = 0"); - assertEq(totals.continueHereFiredCount, 0, "budget totals backward compat: continueHere = 0"); -} +test("initMetrics creates ledger, snapshotUnitMetrics persists across resets", () => { + const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-test-")); + mkdirSync(join(tmpBase, ".gsd"), { recursive: true }); -{ - // Mixed old and new units - const units = [ - makeUnit(), // old, no budget fields - makeUnit({ truncationSections: 5, continueHereFired: true }), - ]; - const totals = getProjectTotals(units); - assertEq(totals.totalTruncationSections, 5, "budget totals mixed: only new unit contributes"); - assertEq(totals.continueHereFiredCount, 1, "budget totals mixed: only one fired"); -} + try { + resetMetrics(); + assert.equal(getLedger(), null); -{ - // Empty input — safe defaults - const totals = getProjectTotals([]); - assertEq(totals.totalTruncationSections, 0, "budget totals empty: truncation = 0"); - assertEq(totals.continueHereFiredCount, 0, "budget totals empty: continueHere = 0"); -} + initMetrics(tmpBase); + const ledger = getLedger(); + assert.ok(ledger); + assert.equal(ledger!.version, 1); + assert.equal(ledger!.units.length, 0); -// ─── Summary ────────────────────────────────────────────────────────────────── + // Snapshot a unit + const ctx = mockCtx([ + { role: "user", content: "Do the thing" }, + { + role: "assistant", + content: [{ type: "text", text: "Done" }], + usage: { + input: 5000, output: 2000, cacheRead: 3000, cacheWrite: 500, totalTokens: 10500, + cost: { input: 0.015, output: 0.03, cacheRead: 0.003, cacheWrite: 0.002, total: 0.05 }, + }, + }, + ]); + const unit = snapshotUnitMetrics(ctx, "execute-task", "M001/S01/T01", Date.now() - 5000, "claude-sonnet-4-20250514"); + assert.ok(unit); + assert.equal(unit!.type, "execute-task"); + assert.equal(unit!.tokens.input, 5000); -report(); + // Persist and reload + resetMetrics(); + initMetrics(tmpBase); + assert.equal(getLedger()!.units.length, 1); + assert.equal(getLedger()!.units[0].id, "M001/S01/T01"); + + // Verify file content + const raw = readFileSync(join(tmpBase, ".gsd", "metrics.json"), "utf-8"); + const parsed: MetricsLedger = JSON.parse(raw); + assert.equal(parsed.version, 1); + assert.equal(parsed.units.length, 1); + + // Empty session returns null + const emptyUnit = snapshotUnitMetrics(mockCtx([]), "plan-slice", "M001/S01", Date.now(), "test-model"); + assert.equal(emptyUnit, null); + assert.equal(getLedger()!.units.length, 1); + } finally { + resetMetrics(); + rmSync(tmpBase, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/network-error-fallback.test.ts b/src/resources/extensions/gsd/tests/network-error-fallback.test.ts deleted file mode 100644 index 41f7f7694..000000000 --- a/src/resources/extensions/gsd/tests/network-error-fallback.test.ts +++ /dev/null @@ -1,104 +0,0 @@ -import test from "node:test"; -import assert from "node:assert/strict"; - -// Instead of trying to mock out the entire `index.ts` extension initialization which touches -// the disk and parses files, we test the logic via the standard test methods, or we can -// just test that `resolveModelWithFallbacksForUnit` returns the correct format since -// the fallback rotation logic itself was verified manually. - -import { getNextFallbackModel, isTransientNetworkError } from "../preferences.ts"; - -test("getNextFallbackModel selects next fallback if current is a fallback", () => { - const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] }; - const currentModelId = "model-b"; - - const nextModelId = getNextFallbackModel(currentModelId, modelConfig); - - assert.equal(nextModelId, "model-c", "should select next model after current fallback"); -}); - -test("getNextFallbackModel returns undefined if fallbacks exhausted", () => { - const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] }; - const currentModelId = "model-c"; - - const nextModelId = getNextFallbackModel(currentModelId, modelConfig); - - assert.equal(nextModelId, undefined, "should return undefined when exhausted"); -}); - -test("getNextFallbackModel finds current model when formatted with provider", () => { - const modelConfig = { primary: "p/model-a", fallbacks: ["p/model-b"] }; - const currentModelId = "model-a"; // context model doesn't always have provider in ID - - const nextModelId = getNextFallbackModel(currentModelId, modelConfig); - - assert.equal(nextModelId, "p/model-b", "should select next model after current with provider format"); -}); - -test("getNextFallbackModel returns primary if current model is not in the list", () => { - const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] }; - const currentModelId = "model-x"; // completely different model manually selected - - const nextModelId = getNextFallbackModel(currentModelId, modelConfig); - - assert.equal(nextModelId, "model-a", "should default to primary if current is unknown"); -}); - -test("getNextFallbackModel returns primary if current model is undefined", () => { - const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] }; - const currentModelId = undefined; - - const nextModelId = getNextFallbackModel(currentModelId, modelConfig); - - assert.equal(nextModelId, "model-a", "should default to primary if current is undefined"); -}); - -// ── isTransientNetworkError tests ──────────────────────────────────────────── - -test("isTransientNetworkError detects ECONNRESET", () => { - assert.ok(isTransientNetworkError("fetch failed: ECONNRESET")); -}); - -test("isTransientNetworkError detects ETIMEDOUT", () => { - assert.ok(isTransientNetworkError("ETIMEDOUT: request timed out")); -}); - -test("isTransientNetworkError detects generic network error", () => { - assert.ok(isTransientNetworkError("network error")); -}); - -test("isTransientNetworkError detects socket hang up", () => { - assert.ok(isTransientNetworkError("socket hang up")); -}); - -test("isTransientNetworkError detects fetch failed", () => { - assert.ok(isTransientNetworkError("fetch failed")); -}); - -test("isTransientNetworkError detects connection reset", () => { - assert.ok(isTransientNetworkError("connection was reset by peer")); -}); - -test("isTransientNetworkError detects DNS errors", () => { - assert.ok(isTransientNetworkError("dns resolution failed")); -}); - -test("isTransientNetworkError rejects auth errors", () => { - assert.ok(!isTransientNetworkError("unauthorized: invalid API key")); -}); - -test("isTransientNetworkError rejects quota errors", () => { - assert.ok(!isTransientNetworkError("quota exceeded")); -}); - -test("isTransientNetworkError rejects billing errors", () => { - assert.ok(!isTransientNetworkError("billing issue: network payment required")); -}); - -test("isTransientNetworkError rejects empty string", () => { - assert.ok(!isTransientNetworkError("")); -}); - -test("isTransientNetworkError rejects non-network errors", () => { - assert.ok(!isTransientNetworkError("model not found")); -}); diff --git a/src/resources/extensions/gsd/tests/provider-error-classify.test.ts b/src/resources/extensions/gsd/tests/provider-error-classify.test.ts deleted file mode 100644 index 6e2511e41..000000000 --- a/src/resources/extensions/gsd/tests/provider-error-classify.test.ts +++ /dev/null @@ -1,95 +0,0 @@ -import test from "node:test"; -import assert from "node:assert/strict"; -import { classifyProviderError } from "../provider-error-pause.ts"; - -// ── Rate limit detection ───────────────────────────────────────────────────── - -test("classifyProviderError detects rate limit from 429", () => { - const result = classifyProviderError("HTTP 429 Too Many Requests"); - assert.ok(result.isTransient); - assert.ok(result.isRateLimit); - assert.ok(result.suggestedDelayMs > 0); -}); - -test("classifyProviderError detects rate limit from message", () => { - const result = classifyProviderError("rate limit exceeded"); - assert.ok(result.isTransient); - assert.ok(result.isRateLimit); -}); - -test("classifyProviderError extracts reset delay from message", () => { - const result = classifyProviderError("rate limit exceeded, reset in 45s"); - assert.equal(result.suggestedDelayMs, 45000); -}); - -test("classifyProviderError defaults to 60s for rate limit without reset", () => { - const result = classifyProviderError("too many requests"); - assert.equal(result.suggestedDelayMs, 60000); -}); - -// ── Server error detection ─────────────────────────────────────────────────── - -test("classifyProviderError detects Anthropic internal server error", () => { - const msg = '{"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"}}'; - const result = classifyProviderError(msg); - assert.ok(result.isTransient, "should be transient"); - assert.ok(!result.isRateLimit, "should not be rate limit"); - assert.equal(result.suggestedDelayMs, 30000, "should suggest 30s delay"); -}); - -test("classifyProviderError detects overloaded error", () => { - const result = classifyProviderError("overloaded_error: Overloaded"); - assert.ok(result.isTransient); - assert.equal(result.suggestedDelayMs, 30000); -}); - -test("classifyProviderError detects 503 service unavailable", () => { - const result = classifyProviderError("503 Service Unavailable"); - assert.ok(result.isTransient); -}); - -test("classifyProviderError detects 502 bad gateway", () => { - const result = classifyProviderError("502 Bad Gateway"); - assert.ok(result.isTransient); -}); - -// ── Permanent error detection ──────────────────────────────────────────────── - -test("classifyProviderError detects auth error as permanent", () => { - const result = classifyProviderError("unauthorized: invalid API key"); - assert.ok(!result.isTransient); - assert.ok(!result.isRateLimit); - assert.equal(result.suggestedDelayMs, 0); -}); - -test("classifyProviderError detects billing error as permanent", () => { - const result = classifyProviderError("billing issue: payment required"); - assert.ok(!result.isTransient); -}); - -test("classifyProviderError detects quota exceeded as permanent", () => { - const result = classifyProviderError("quota exceeded for this account"); - assert.ok(!result.isTransient); -}); - -// ── Unknown errors ─────────────────────────────────────────────────────────── - -test("classifyProviderError treats unknown error as permanent", () => { - const result = classifyProviderError("something went wrong"); - assert.ok(!result.isTransient); - assert.equal(result.suggestedDelayMs, 0); -}); - -test("classifyProviderError treats empty string as permanent", () => { - const result = classifyProviderError(""); - assert.ok(!result.isTransient); -}); - -// ── Edge: rate limit + auth (rate limit wins) ──────────────────────────────── - -test("classifyProviderError: rate limit takes precedence over auth keywords", () => { - // Edge case: "rate limit" in message that also mentions auth - const result = classifyProviderError("rate limit on auth endpoint"); - assert.ok(result.isTransient); - assert.ok(result.isRateLimit); -}); diff --git a/src/resources/extensions/gsd/tests/provider-errors.test.ts b/src/resources/extensions/gsd/tests/provider-errors.test.ts new file mode 100644 index 000000000..2a84b61c7 --- /dev/null +++ b/src/resources/extensions/gsd/tests/provider-errors.test.ts @@ -0,0 +1,245 @@ +/** + * Provider error handling tests — consolidated from: + * - provider-error-classify.test.ts (classifyProviderError) + * - network-error-fallback.test.ts (isTransientNetworkError, getNextFallbackModel) + * - agent-end-provider-error.test.ts (pauseAutoForProviderError) + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { classifyProviderError, pauseAutoForProviderError } from "../provider-error-pause.ts"; +import { getNextFallbackModel, isTransientNetworkError } from "../preferences.ts"; + +// ── classifyProviderError ──────────────────────────────────────────────────── + +test("classifyProviderError detects rate limit from 429", () => { + const result = classifyProviderError("HTTP 429 Too Many Requests"); + assert.ok(result.isTransient); + assert.ok(result.isRateLimit); + assert.ok(result.suggestedDelayMs > 0); +}); + +test("classifyProviderError detects rate limit from message", () => { + const result = classifyProviderError("rate limit exceeded"); + assert.ok(result.isTransient); + assert.ok(result.isRateLimit); +}); + +test("classifyProviderError extracts reset delay from message", () => { + const result = classifyProviderError("rate limit exceeded, reset in 45s"); + assert.ok(result.isRateLimit); + assert.equal(result.suggestedDelayMs, 45000); +}); + +test("classifyProviderError defaults to 60s for rate limit without reset", () => { + const result = classifyProviderError("429 too many requests"); + assert.ok(result.isRateLimit); + assert.equal(result.suggestedDelayMs, 60_000); +}); + +test("classifyProviderError detects Anthropic internal server error", () => { + const msg = '{"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"}}'; + const result = classifyProviderError(msg); + assert.ok(result.isTransient); + assert.ok(!result.isRateLimit); + assert.equal(result.suggestedDelayMs, 30_000); +}); + +test("classifyProviderError detects overloaded error", () => { + const result = classifyProviderError("overloaded_error: Overloaded"); + assert.ok(result.isTransient); + assert.equal(result.suggestedDelayMs, 30_000); +}); + +test("classifyProviderError detects 503 service unavailable", () => { + const result = classifyProviderError("HTTP 503 Service Unavailable"); + assert.ok(result.isTransient); +}); + +test("classifyProviderError detects 502 bad gateway", () => { + const result = classifyProviderError("HTTP 502 Bad Gateway"); + assert.ok(result.isTransient); +}); + +test("classifyProviderError detects auth error as permanent", () => { + const result = classifyProviderError("unauthorized: invalid API key"); + assert.ok(!result.isTransient); + assert.ok(!result.isRateLimit); +}); + +test("classifyProviderError detects billing error as permanent", () => { + const result = classifyProviderError("billing issue: payment required"); + assert.ok(!result.isTransient); +}); + +test("classifyProviderError detects quota exceeded as permanent", () => { + const result = classifyProviderError("quota exceeded for this month"); + assert.ok(!result.isTransient); +}); + +test("classifyProviderError treats unknown error as permanent", () => { + const result = classifyProviderError("something went wrong"); + assert.ok(!result.isTransient); +}); + +test("classifyProviderError treats empty string as permanent", () => { + const result = classifyProviderError(""); + assert.ok(!result.isTransient); +}); + +test("classifyProviderError: rate limit takes precedence over auth keywords", () => { + const result = classifyProviderError("429 unauthorized rate limit"); + assert.ok(result.isRateLimit); + assert.ok(result.isTransient); +}); + +// ── isTransientNetworkError ────────────────────────────────────────────────── + +test("isTransientNetworkError detects ECONNRESET", () => { + assert.ok(isTransientNetworkError("fetch failed: ECONNRESET")); +}); + +test("isTransientNetworkError detects ETIMEDOUT", () => { + assert.ok(isTransientNetworkError("ETIMEDOUT: request timed out")); +}); + +test("isTransientNetworkError detects generic network error", () => { + assert.ok(isTransientNetworkError("network error")); +}); + +test("isTransientNetworkError detects socket hang up", () => { + assert.ok(isTransientNetworkError("socket hang up")); +}); + +test("isTransientNetworkError detects fetch failed", () => { + assert.ok(isTransientNetworkError("fetch failed")); +}); + +test("isTransientNetworkError detects connection reset", () => { + assert.ok(isTransientNetworkError("connection was reset by peer")); +}); + +test("isTransientNetworkError detects DNS errors", () => { + assert.ok(isTransientNetworkError("dns resolution failed")); +}); + +test("isTransientNetworkError rejects auth errors", () => { + assert.ok(!isTransientNetworkError("unauthorized: invalid API key")); +}); + +test("isTransientNetworkError rejects quota errors", () => { + assert.ok(!isTransientNetworkError("quota exceeded")); +}); + +test("isTransientNetworkError rejects billing errors", () => { + assert.ok(!isTransientNetworkError("billing issue: network payment required")); +}); + +test("isTransientNetworkError rejects empty string", () => { + assert.ok(!isTransientNetworkError("")); +}); + +test("isTransientNetworkError rejects non-network errors", () => { + assert.ok(!isTransientNetworkError("model not found")); +}); + +// ── getNextFallbackModel ───────────────────────────────────────────────────── + +test("getNextFallbackModel selects next fallback if current is a fallback", () => { + const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] }; + assert.equal(getNextFallbackModel("model-b", modelConfig), "model-c"); +}); + +test("getNextFallbackModel returns undefined if fallbacks exhausted", () => { + const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] }; + assert.equal(getNextFallbackModel("model-c", modelConfig), undefined); +}); + +test("getNextFallbackModel finds current model with provider prefix", () => { + const modelConfig = { primary: "p/model-a", fallbacks: ["p/model-b"] }; + assert.equal(getNextFallbackModel("model-a", modelConfig), "p/model-b"); +}); + +test("getNextFallbackModel returns primary if current is unknown", () => { + const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] }; + assert.equal(getNextFallbackModel("model-x", modelConfig), "model-a"); +}); + +test("getNextFallbackModel returns primary if current is undefined", () => { + const modelConfig = { primary: "model-a", fallbacks: ["model-b", "model-c"] }; + assert.equal(getNextFallbackModel(undefined, modelConfig), "model-a"); +}); + +// ── pauseAutoForProviderError ──────────────────────────────────────────────── + +test("pauseAutoForProviderError warns and pauses without requiring ctx.log", async () => { + const notifications: Array<{ message: string; level: string }> = []; + let pauseCalls = 0; + + await pauseAutoForProviderError( + { notify(message, level?) { notifications.push({ message, level: level ?? "info" }); } }, + ": terminated", + async () => { pauseCalls += 1; }, + ); + + assert.equal(pauseCalls, 1); + assert.deepEqual(notifications, [ + { message: "Auto-mode paused due to provider error: terminated", level: "warning" }, + ]); +}); + +test("pauseAutoForProviderError schedules auto-resume for rate limit errors", async () => { + const notifications: Array<{ message: string; level: string }> = []; + let pauseCalls = 0; + let resumeCalled = false; + + const originalSetTimeout = globalThis.setTimeout; + const timers: Array<{ fn: () => void; delay: number }> = []; + globalThis.setTimeout = ((fn: () => void, delay: number) => { + timers.push({ fn, delay }); + return 0 as unknown as ReturnType; + }) as typeof setTimeout; + + try { + await pauseAutoForProviderError( + { notify(message, level?) { notifications.push({ message, level: level ?? "info" }); } }, + ": rate limit exceeded", + async () => { pauseCalls += 1; }, + { isRateLimit: true, retryAfterMs: 90000, resume: () => { resumeCalled = true; } }, + ); + + assert.equal(pauseCalls, 1); + assert.equal(timers.length, 1); + assert.equal(timers[0].delay, 90000); + assert.deepEqual(notifications[0], { + message: "Rate limited: rate limit exceeded. Auto-resuming in 90s...", + level: "warning", + }); + + timers[0].fn(); + assert.equal(resumeCalled, true); + assert.deepEqual(notifications[1], { + message: "Rate limit window elapsed. Resuming auto-mode.", + level: "info", + }); + } finally { + globalThis.setTimeout = originalSetTimeout; + } +}); + +test("pauseAutoForProviderError falls back to indefinite pause when not rate limit", async () => { + const notifications: Array<{ message: string; level: string }> = []; + let pauseCalls = 0; + + await pauseAutoForProviderError( + { notify(message, level?) { notifications.push({ message, level: level ?? "info" }); } }, + ": connection refused", + async () => { pauseCalls += 1; }, + { isRateLimit: false }, + ); + + assert.equal(pauseCalls, 1); + assert.deepEqual(notifications, [ + { message: "Auto-mode paused due to provider error: connection refused", level: "warning" }, + ]); +});