From 8d11e5d5074eed6b9a1794d29a869ff22ab6fe81 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Sun, 5 Apr 2026 15:52:26 -0500 Subject: [PATCH] test: add regression tests for adversarial review fixes (#3576) - git-checkpoint: rollback on checked-out branch, detached HEAD, ref cleanup - ollama streaming: terminal done:true chunk content preservation - provider registration: preflush clears queue to prevent double registration --- .../extensions/provider-registration.test.ts | 81 ++++++++++++++++ .../gsd/tests/git-checkpoint.test.ts | 94 +++++++++++++++++++ .../tests/ollama-chat-provider-stream.test.ts | 82 ++++++++++++++++ 3 files changed, 257 insertions(+) create mode 100644 packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts create mode 100644 src/resources/extensions/gsd/tests/git-checkpoint.test.ts create mode 100644 src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts diff --git a/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts b/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts new file mode 100644 index 000000000..2679feae6 --- /dev/null +++ b/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts @@ -0,0 +1,81 @@ +// GSD2 — Regression test: pendingProviderRegistrations must be flushed exactly once (#3576) +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +/** + * This test validates that the provider preflush pattern in sdk.ts clears + * pendingProviderRegistrations after iterating, so bindCore() doesn't + * re-register the same providers. + * + * The bug: createAgentSession() iterated pendingProviderRegistrations but + * did not clear the array. Later, bindCore() replayed and registered the + * same providers again, stacking wrappers. + */ + +interface ProviderEntry { + name: string; + config: Record; +} + +interface MockRuntime { + pendingProviderRegistrations: ProviderEntry[]; +} + +describe("provider registration preflush", () => { + it("clears pending registrations after preflush so bindCore does not replay", () => { + const registered: string[] = []; + const runtime: MockRuntime = { + pendingProviderRegistrations: [ + { name: "ollama", config: { type: "ollama" } }, + { name: "custom-provider", config: { type: "custom" } }, + ], + }; + + // Simulate sdk.ts preflush (lines 220-223) + for (const { name } of runtime.pendingProviderRegistrations) { + registered.push(name); + } + // The fix: clear after preflush + runtime.pendingProviderRegistrations = []; + + // Simulate bindCore() flush (runner.ts lines 268-271) + for (const { name } of runtime.pendingProviderRegistrations) { + registered.push(name); + } + runtime.pendingProviderRegistrations = []; + + assert.deepEqual( + registered, + ["ollama", "custom-provider"], + "each provider should be registered exactly once", + ); + }); + + it("without the fix, providers are registered twice", () => { + const registered: string[] = []; + const runtime: MockRuntime = { + pendingProviderRegistrations: [ + { name: "ollama", config: { type: "ollama" } }, + ], + }; + + // Old behavior: preflush without clearing + for (const { name } of runtime.pendingProviderRegistrations) { + registered.push(name); + } + // NOT clearing — simulating the old bug + + // bindCore() replays the same queue + for (const { name } of runtime.pendingProviderRegistrations) { + registered.push(name); + } + + assert.deepEqual( + registered, + ["ollama", "ollama"], + "without clearing, providers are registered twice (demonstrating the bug)", + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/git-checkpoint.test.ts b/src/resources/extensions/gsd/tests/git-checkpoint.test.ts new file mode 100644 index 000000000..33cd3829f --- /dev/null +++ b/src/resources/extensions/gsd/tests/git-checkpoint.test.ts @@ -0,0 +1,94 @@ +// GSD2 — Regression tests for git-checkpoint rollback (#3576) +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { execFileSync } from "node:child_process"; +import { createCheckpoint, rollbackToCheckpoint, cleanupCheckpoint } from "../safety/git-checkpoint.js"; + +function git(args: string[], cwd: string): string { + return execFileSync("git", args, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim(); +} + +function createTempRepo(): string { + const dir = mkdtempSync(join(tmpdir(), "ckpt-test-")); + git(["init"], dir); + git(["config", "user.email", "test@test.com"], dir); + git(["config", "user.name", "Test"], dir); + writeFileSync(join(dir, "file.txt"), "initial\n"); + git(["add", "."], dir); + git(["commit", "-m", "init"], dir); + git(["branch", "-M", "main"], dir); + return dir; +} + +describe("git-checkpoint rollback", () => { + it("rolls back to checkpoint on checked-out branch", (t) => { + const repo = createTempRepo(); + t.after(() => rmSync(repo, { recursive: true, force: true })); + + // Create checkpoint at initial commit + const sha = createCheckpoint(repo, "unit-1"); + assert.ok(sha, "checkpoint should return a SHA"); + + // Make a second commit + writeFileSync(join(repo, "file.txt"), "modified\n"); + git(["add", "."], repo); + git(["commit", "-m", "second"], repo); + + const headBefore = git(["rev-parse", "HEAD"], repo); + assert.notEqual(headBefore, sha, "HEAD should have advanced"); + + // Rollback — this must work on the checked-out branch + const result = rollbackToCheckpoint(repo, "unit-1", sha); + assert.equal(result, true, "rollback should succeed"); + + const headAfter = git(["rev-parse", "HEAD"], repo); + assert.equal(headAfter, sha, "HEAD should match checkpoint SHA after rollback"); + }); + + it("returns false on detached HEAD", (t) => { + const repo = createTempRepo(); + t.after(() => rmSync(repo, { recursive: true, force: true })); + + const sha = git(["rev-parse", "HEAD"], repo); + git(["checkout", "--detach", sha], repo); + + const result = rollbackToCheckpoint(repo, "unit-2", sha); + assert.equal(result, false, "rollback should fail on detached HEAD"); + }); + + it("cleans up checkpoint ref after rollback", (t) => { + const repo = createTempRepo(); + t.after(() => rmSync(repo, { recursive: true, force: true })); + + const sha = createCheckpoint(repo, "unit-3"); + assert.ok(sha); + + // Ref should exist + const refBefore = git(["for-each-ref", "refs/gsd/checkpoints/unit-3", "--format=%(objectname)"], repo); + assert.equal(refBefore, sha); + + rollbackToCheckpoint(repo, "unit-3", sha); + + // Ref should be cleaned up + const refAfter = git(["for-each-ref", "refs/gsd/checkpoints/unit-3", "--format=%(objectname)"], repo); + assert.equal(refAfter, "", "checkpoint ref should be removed after rollback"); + }); + + it("cleanupCheckpoint removes the ref without error", (t) => { + const repo = createTempRepo(); + t.after(() => rmSync(repo, { recursive: true, force: true })); + + const sha = createCheckpoint(repo, "unit-4"); + assert.ok(sha); + + cleanupCheckpoint(repo, "unit-4"); + + const ref = git(["for-each-ref", "refs/gsd/checkpoints/unit-4", "--format=%(objectname)"], repo); + assert.equal(ref, "", "ref should be gone"); + }); +}); diff --git a/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts b/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts new file mode 100644 index 000000000..bc3982c6e --- /dev/null +++ b/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts @@ -0,0 +1,82 @@ +// GSD2 — Regression test: Ollama streaming must not drop content on done:true chunks (#3576) +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +/** + * This test validates the streaming logic pattern used in ollama-chat-provider.ts. + * The bug: content on the terminal done:true chunk was silently dropped because + * the stream loop only emitted content when `!chunk.done`. + * + * The fix: process chunk.message.content regardless of chunk.done, then handle + * done metadata. This test exercises that logic path with a simulated chunk stream. + */ + +interface OllamaChunk { + done: boolean; + done_reason?: string; + message?: { content?: string; tool_calls?: unknown[] }; + prompt_eval_count?: number; + eval_count?: number; +} + +function simulateStreamLoop(chunks: OllamaChunk[]): string { + let output = ""; + + for (const chunk of chunks) { + // This mirrors the fixed logic in ollama-chat-provider.ts + const content = chunk.message?.content ?? ""; + if (content) { + output += content; + } + + if (chunk.done) { + break; + } + } + + return output; +} + +describe("Ollama stream terminal chunk handling", () => { + it("captures content from done:true chunk", () => { + const chunks: OllamaChunk[] = [ + { done: false, message: { content: "Hello " } }, + { done: false, message: { content: "world" } }, + { done: true, done_reason: "stop", message: { content: "!" } }, + ]; + + const result = simulateStreamLoop(chunks); + assert.equal(result, "Hello world!", "trailing content on done chunk must not be dropped"); + }); + + it("works when done chunk has no content", () => { + const chunks: OllamaChunk[] = [ + { done: false, message: { content: "Hello" } }, + { done: true, done_reason: "stop", message: {} }, + ]; + + const result = simulateStreamLoop(chunks); + assert.equal(result, "Hello"); + }); + + it("works when done chunk has empty string content", () => { + const chunks: OllamaChunk[] = [ + { done: false, message: { content: "data" } }, + { done: true, done_reason: "stop", message: { content: "" } }, + ]; + + const result = simulateStreamLoop(chunks); + assert.equal(result, "data"); + }); + + it("handles single done chunk with content", () => { + const chunks: OllamaChunk[] = [ + { done: true, done_reason: "stop", message: { content: "one-shot" } }, + ]; + + const result = simulateStreamLoop(chunks); + assert.equal(result, "one-shot", "single done chunk with content should work"); + }); +});