Merge remote-tracking branch 'origin/main' into fix/headless-auto-timeout-2586

This commit is contained in:
Lex Christopherson 2026-03-25 22:26:07 -06:00
commit 1d139da06e
32 changed files with 1039 additions and 126 deletions

View file

@ -135,7 +135,10 @@ export function agentLoopContinue(
(async () => {
const newMessages: AgentMessage[] = [];
const currentContext: AgentContext = { ...context };
const currentContext: AgentContext = {
...context,
messages: [...context.messages],
};
stream.push({ type: "agent_start" });
stream.push({ type: "turn_start" });

View file

@ -451,6 +451,7 @@ async function* parseSSE(response: Response): AsyncGenerator<Record<string, unkn
const OPENAI_BETA_RESPONSES_WEBSOCKETS = "responses_websockets=2026-02-06";
const SESSION_WEBSOCKET_CACHE_TTL_MS = 5 * 60 * 1000;
const MAX_WEBSOCKET_CACHE_SIZE = 10;
type WebSocketEventType = "open" | "message" | "error" | "close";
type WebSocketListener = (event: unknown) => void;
@ -635,6 +636,20 @@ async function acquireWebSocket(
const socket = await connectWebSocket(url, headers, signal);
const entry: CachedWebSocketConnection = { socket, busy: true };
// Evict the oldest entry if the cache is at capacity (LRU eviction).
if (websocketSessionCache.size >= MAX_WEBSOCKET_CACHE_SIZE) {
const oldestKey = websocketSessionCache.keys().next().value;
if (oldestKey) {
const oldEntry = websocketSessionCache.get(oldestKey);
websocketSessionCache.delete(oldestKey);
if (oldEntry) {
if (oldEntry.idleTimer) clearTimeout(oldEntry.idleTimer);
closeWebSocketSilently(oldEntry.socket);
}
}
}
websocketSessionCache.set(sessionId, entry);
return {
socket,
@ -705,12 +720,19 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy
resolve();
};
const cleanup = () => {
socket.removeEventListener("message", onMessage);
socket.removeEventListener("error", onError);
socket.removeEventListener("close", onClose);
signal?.removeEventListener("abort", onAbort);
};
const onMessage: WebSocketListener = (event) => {
void (async () => {
if (!event || typeof event !== "object" || !("data" in event)) return;
const text = await decodeWebSocketData((event as { data?: unknown }).data);
if (!text) return;
try {
if (!event || typeof event !== "object" || !("data" in event)) return;
const text = await decodeWebSocketData((event as { data?: unknown }).data);
if (!text) return;
const parsed = JSON.parse(text) as Record<string, unknown>;
const type = typeof parsed.type === "string" ? parsed.type : "";
if (type === "response.completed" || type === "response.done") {
@ -719,7 +741,19 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy
}
queue.push(parsed);
wake();
} catch {}
} catch (err) {
// Ensure listeners are cleaned up if the async handler errors.
// Without this, the fire-and-forget promise would swallow the
// error while leaving listeners attached to the socket.
if (err instanceof SyntaxError) {
// JSON parse failure — skip the malformed message.
return;
}
failed = err instanceof Error ? err : new Error(String(err));
done = true;
cleanup();
wake();
}
})();
};
@ -775,10 +809,7 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy
throw new Error("WebSocket stream closed before response.completed");
}
} finally {
socket.removeEventListener("message", onMessage);
socket.removeEventListener("error", onError);
socket.removeEventListener("close", onClose);
signal?.removeEventListener("abort", onAbort);
cleanup();
}
}

View file

@ -6,7 +6,7 @@
* provides automatic deduplication across sessions.
*/
import { createHash } from "node:crypto";
import { mkdirSync, readdirSync, readFileSync, writeFileSync, existsSync, accessSync, unlinkSync, statSync } from "node:fs";
import { mkdirSync, readdirSync, readFileSync, writeFileSync, accessSync, unlinkSync, statSync } from "node:fs";
import { join } from "node:path";
const BLOB_PREFIX = "blob:sha256:";
@ -37,8 +37,11 @@ export class BlobStore {
},
};
if (!existsSync(blobPath)) {
writeFileSync(blobPath, data);
try {
writeFileSync(blobPath, data, { flag: "wx" }); // Atomic: fails if file exists
} catch (err: any) {
if (err.code !== "EEXIST") throw err;
// File already exists — expected for content-addressed storage
}
return result;
}

View file

@ -3,7 +3,7 @@
* Stores results at {agentDir}/discovery-cache.json with per-provider TTLs.
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from "fs";
import { dirname, join } from "path";
import { getAgentDir } from "../config.js";
import { type DiscoveredModel, getDefaultTTL } from "./model-discovery.js";
@ -35,6 +35,8 @@ export class ModelDiscoveryCache {
}
set(provider: string, models: DiscoveredModel[], ttlMs?: number): void {
// Re-read from disk to get the latest state before modifying
this.load();
this.data.entries[provider] = {
models,
fetchedAt: Date.now(),
@ -50,6 +52,8 @@ export class ModelDiscoveryCache {
}
clear(provider?: string): void {
// Re-read from disk to get the latest state before modifying
this.load();
if (provider) {
delete this.data.entries[provider];
} else {
@ -89,7 +93,10 @@ export class ModelDiscoveryCache {
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
writeFileSync(this.cachePath, JSON.stringify(this.data, null, 2), "utf-8");
// Atomic write: write to temp file then rename to avoid partial reads
const tmpPath = this.cachePath + ".tmp";
writeFileSync(tmpPath, JSON.stringify(this.data, null, 2), "utf-8");
renameSync(tmpPath, this.cachePath);
} catch {
// Silently ignore write failures (read-only FS, permissions, etc.)
}

View file

@ -3428,14 +3428,6 @@ export class InteractiveMode {
this.ui.setFocus(dialog);
this.ui.requestRender();
// Promise for manual code input (racing with callback server)
let manualCodeResolve: ((code: string) => void) | undefined;
let manualCodeReject: ((err: Error) => void) | undefined;
const manualCodePromise = new Promise<string>((resolve, reject) => {
manualCodeResolve = resolve;
manualCodeReject = reject;
});
// Restore editor helper — also disposes the dialog to reject any
// dangling promises and prevent the UI from getting stuck.
const restoreEditor = () => {
@ -3451,23 +3443,7 @@ export class InteractiveMode {
onAuth: (info: { url: string; instructions?: string }) => {
dialog.showAuth(info.url, info.instructions);
if (usesCallbackServer) {
// Show input for manual paste, racing with callback
dialog
.showManualInput("Paste redirect URL below, or complete login in browser:")
.then((value) => {
if (value && manualCodeResolve) {
manualCodeResolve(value);
manualCodeResolve = undefined;
}
})
.catch(() => {
if (manualCodeReject) {
manualCodeReject(new Error("Login cancelled"));
manualCodeReject = undefined;
}
});
} else if (providerId === "github-copilot") {
if (!usesCallbackServer && providerId === "github-copilot") {
// GitHub Copilot polls after onAuth
dialog.showWaiting("Waiting for browser authentication...");
}
@ -3482,7 +3458,12 @@ export class InteractiveMode {
dialog.showProgress(message);
},
onManualCodeInput: () => manualCodePromise,
// Callback-server providers race browser callback with pasted redirect URL.
// Keep manual-input promise ownership inside provider flow to avoid
// orphaned rejections when the callback is not consumed.
onManualCodeInput: usesCallbackServer
? () => dialog.showManualInput("Paste redirect URL below, or complete login in browser:")
: undefined,
signal: dialog.signal,
});
@ -3514,12 +3495,6 @@ export class InteractiveMode {
this.showStatus(`Logged in to ${providerName}. Credentials saved to ${getAuthPath()}`);
} catch (error: unknown) {
restoreEditor();
// Also reject the manual code promise if it's still pending
if (manualCodeReject) {
manualCodeReject(new Error("Login cancelled"));
manualCodeReject = undefined;
manualCodeResolve = undefined;
}
const errorMsg = error instanceof Error ? error.message : String(error);
if (errorMsg !== "Login cancelled" && !errorMsg.includes("Superseded") && !errorMsg.includes("disposed")) {
this.showError(`Failed to login to ${providerName}: ${errorMsg}`);

View file

@ -48,11 +48,17 @@ export function attachJsonlLineReader(stream: Readable, onLine: (line: string) =
}
};
const onError = (_err: Error) => {
// Stream errors are non-fatal for JSONL reading
};
stream.on("data", onData);
stream.on("end", onEnd);
stream.on("error", onError);
return () => {
stream.off("data", onData);
stream.off("end", onEnd);
stream.off("error", onError);
};
}

View file

@ -488,8 +488,6 @@ export class RpcClient {
const fullCommand = { ...command, id } as RpcCommand;
return new Promise((resolve, reject) => {
this.pendingRequests.set(id, { resolve, reject });
const timeout = setTimeout(() => {
this.pendingRequests.delete(id);
reject(new Error(`Timeout waiting for response to ${command.type}. Stderr: ${this.stderr}`));

View file

@ -710,8 +710,8 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
}
default: {
const unknownCommand = command as { type: string };
return error(undefined, unknownCommand.type, `Unknown command: ${unknownCommand.type}`);
const unknownCommand = command as { type: string; id?: string };
return error(unknownCommand.id, unknownCommand.type, `Unknown command: ${unknownCommand.type}`);
}
}
};

View file

@ -419,9 +419,17 @@ function resolvePreferredSkillNames(
.map(skill => normalizeSkillReference(skill.name));
}
/** Skill names must be lowercase alphanumeric with hyphens reject anything else
* to prevent prompt injection via crafted directory names. */
const SAFE_SKILL_NAME = /^[a-z0-9][a-z0-9-]*$/;
function formatSkillActivationBlock(skillNames: string[]): string {
if (skillNames.length === 0) return "";
const calls = skillNames.map(name => `Call Skill('${name}')`).join('. ');
const safe = skillNames.filter(name => SAFE_SKILL_NAME.test(name));
if (safe.length === 0) return "";
// Use explicit parameter syntax so LLMs pass { skill: "..." } instead of { name: "..." }.
// The function-call-like syntax `Skill('name')` led LLMs to infer a positional
// parameter name, causing tool validation failures — see #2224.
const calls = safe.map(name => `Call Skill({ skill: '${name}' })`).join('. ');
return `<skill_activation>${calls}.</skill_activation>`;
}

View file

@ -44,11 +44,24 @@ export function syncProjectRootToWorktree(
const prGsd = join(projectRoot, ".gsd");
const wtGsd = join(worktreePath, ".gsd");
// Copy milestone directory from project root to worktree if the project root
// has newer artifacts (e.g. slices that don't exist in the worktree yet)
// Copy milestone directory from project root to worktree — additive only.
// force:false prevents cpSync from overwriting existing worktree files.
// Without this, worktree-authoritative files (e.g. VALIDATION.md written
// by validate-milestone) get clobbered by stale project root copies,
// causing an infinite re-validation loop (#1886).
safeCopyRecursive(
join(prGsd, "milestones", milestoneId),
join(wtGsd, "milestones", milestoneId),
{ force: false },
);
// Forward-sync completed-units.json from project root to worktree.
// Project root is authoritative for completion state after crash recovery;
// without this, the worktree re-dispatches already-completed units (#1886).
safeCopy(
join(prGsd, "completed-units.json"),
join(wtGsd, "completed-units.json"),
{ force: true },
);
// Delete worktree gsd.db so it rebuilds from the freshly synced files.

View file

@ -80,18 +80,25 @@ export async function checkGitHealth(
});
if (shouldFix("orphaned_auto_worktree")) {
// Never remove a worktree matching current working directory
// If cwd is inside the worktree, chdir out first — matching the
// pattern in removeWorktree() (#1946). Without this, git cannot
// remove the worktree and the doctor enters a deadlock where it
// detects the orphan every run but never cleans it up.
const cwd = process.cwd();
if (wt.path === cwd || cwd.startsWith(wt.path + sep)) {
fixesApplied.push(`skipped removing worktree at ${wt.path} (is cwd)`);
} else {
try {
nativeWorktreeRemove(basePath, wt.path, true);
fixesApplied.push(`removed orphaned worktree ${wt.path}`);
process.chdir(basePath);
} catch {
fixesApplied.push(`failed to remove worktree ${wt.path}`);
fixesApplied.push(`skipped removing worktree at ${wt.path} (cannot chdir to basePath)`);
continue;
}
}
try {
nativeWorktreeRemove(basePath, wt.path, true);
fixesApplied.push(`removed orphaned worktree ${wt.path}`);
} catch {
fixesApplied.push(`failed to remove worktree ${wt.path}`);
}
}
}
}

View file

@ -149,21 +149,44 @@ function checkDependenciesInstalled(basePath: string): EnvironmentCheckResult |
};
}
// Check if lockfile is newer than node_modules
const lockfiles = ["package-lock.json", "yarn.lock", "pnpm-lock.yaml"];
for (const lockfile of lockfiles) {
const lockPath = join(basePath, lockfile);
// Check if lockfile is newer than the last install.
//
// Each package manager writes a metadata marker inside node_modules on
// every install. Comparing the lockfile mtime against the marker is
// reliable; comparing against the node_modules *directory* mtime is not,
// because directory mtime only changes when entries are added or removed
// — not when files inside it are updated. (#1974)
const lockfiles: Array<{ lock: string; markers: string[] }> = [
{ lock: "package-lock.json", markers: ["node_modules/.package-lock.json"] },
{ lock: "yarn.lock", markers: ["node_modules/.yarn-integrity"] },
{ lock: "pnpm-lock.yaml", markers: ["node_modules/.modules.yaml"] },
];
for (const { lock, markers } of lockfiles) {
const lockPath = join(basePath, lock);
if (!existsSync(lockPath)) continue;
try {
const lockMtime = statSync(lockPath).mtimeMs;
const nmMtime = statSync(nodeModules).mtimeMs;
if (lockMtime > nmMtime) {
// Prefer the package manager's marker file; fall back to directory mtime
// only when no marker exists (e.g., manually created node_modules).
let installMtime = 0;
for (const marker of markers) {
const markerPath = join(basePath, marker);
if (existsSync(markerPath)) {
installMtime = Math.max(installMtime, statSync(markerPath).mtimeMs);
}
}
if (installMtime === 0) {
installMtime = statSync(nodeModules).mtimeMs;
}
if (lockMtime > installMtime) {
return {
name: "dependencies",
status: "warning",
message: `${lockfile} is newer than node_modules — dependencies may be stale`,
message: `${lock} is newer than node_modules — dependencies may be stale`,
detail: `Run npm install / yarn / pnpm install to update`,
};
}

View file

@ -487,7 +487,15 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
demo: s.demo,
}));
} else {
slices = parseLegacyRoadmap(roadmapContent).slices;
const activeMilestoneId = state.activeMilestone?.id;
const activeSliceId = state.activeSlice?.id;
slices = parseLegacyRoadmap(roadmapContent).slices.map(s => ({
...s,
// Legacy roadmaps only encode done vs not-done. For doctor's
// missing-directory checks, treat every undone slice except the
// current active slice as effectively pending/unstarted.
pending: !s.done && (milestoneId !== activeMilestoneId || s.id !== activeSliceId),
}));
}
// Wrap in Roadmap-compatible shape for detectCircularDependencies
const roadmap = { slices };

View file

@ -246,6 +246,15 @@ export function readIntegrationBranch(basePath: string, milestoneId: string): st
/** Regex matching GSD quick-task branches: gsd/quick/<num>-<slug> */
export const QUICK_BRANCH_RE = /^gsd\/quick\//;
/**
* Matches all GSD workflow-template branches: gsd/<templateId>/<slug>.
*
* Template IDs are lowercase alphanumeric with hyphens (e.g. hotfix, bugfix,
* small-feature, dep-upgrade). The negative lookahead excludes milestone
* branches (gsd/M001/... or gsd/M001-abc123/...) which use SLICE_BRANCH_RE.
*/
export const WORKFLOW_BRANCH_RE = /^gsd\/(?!M\d)[\w-]+\//;
export function writeIntegrationBranch(
basePath: string,
milestoneId: string,
@ -257,6 +266,10 @@ export function writeIntegrationBranch(
// to their origin branch on completion. Recording one as the integration
// target causes milestone merges to land on the wrong branch (#1293).
if (QUICK_BRANCH_RE.test(branch)) return;
// Don't record workflow-template branches (hotfix, bugfix, spike, etc.) —
// same root cause as quick-task branches (#2498). All templates create
// gsd/<templateId>/<slug> branches that are ephemeral.
if (WORKFLOW_BRANCH_RE.test(branch)) return;
// Validate
if (!VALID_BRANCH_NAME.test(branch)) return;
// Skip if already recorded with the same branch (idempotent across restarts).

View file

@ -35,7 +35,24 @@ Then:
**Success path** (all verifications passed — continue with steps 711):
7. **Persist completion through `gsd_complete_milestone`.** Call it with: `milestoneId`, `title`, `oneLiner`, `narrative`, `successCriteriaResults`, `definitionOfDoneResults`, `requirementOutcomes`, `keyDecisions`, `keyFiles`, `lessonsLearned`, `followUps`, `deviations`, `verificationPassed: true`. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
7. **Persist completion through `gsd_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
**Required parameters:**
- `milestoneId` (string) — Milestone ID (e.g. M001)
- `title` (string) — Milestone title
- `oneLiner` (string) — One-sentence summary of what the milestone achieved
- `narrative` (string) — Detailed narrative of what happened during the milestone
- `successCriteriaResults` (string) — Markdown detailing how each success criterion was met or not met
- `definitionOfDoneResults` (string) — Markdown detailing how each definition-of-done item was met
- `requirementOutcomes` (string) — Markdown detailing requirement status transitions with evidence
- `keyDecisions` (array of strings) — Key architectural/pattern decisions made during the milestone
- `keyFiles` (array of strings) — Key files created or modified during the milestone
- `lessonsLearned` (array of strings) — Lessons learned during the milestone
- `verificationPassed` (boolean) — Must be `true` — confirms that code change verification, success criteria, and definition of done checks all passed before completion
**Optional parameters:**
- `followUps` (string) — Follow-up items for future milestones
- `deviations` (string) — Deviations from the original plan
8. For each requirement whose status changed in step 6, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically.
9. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state.
10. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.

View file

@ -142,9 +142,10 @@ Then **offer GitHub issue creation**: "Would you like me to create a GitHub issu
If yes, create using the `bash` tool:
```bash
gh issue create --repo gsd-build/gsd-2 \
# Step 1: Create issue (use labels for metadata, NOT for classification — type is set via GraphQL)
ISSUE_URL=$(gh issue create --repo gsd-build/gsd-2 \
--title "..." \
--label "bug" --label "auto-generated" \
--label "auto-generated" \
--body "$(cat <<'EOF'
## Problem
[1-2 sentence summary]
@ -169,7 +170,13 @@ gh issue create --repo gsd-build/gsd-2 \
---
*Auto-generated by `/gsd forensics`*
EOF
)"
)")
# Step 2: Set issue type via GraphQL (gh issue create has no --type flag)
ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
```
### Redaction Rules (CRITICAL)

View file

@ -41,8 +41,8 @@ export function expandDependencies(deps: string[]): string[] {
}
function extractSlicesSection(content: string): string {
// Match "## Slices", "## Slice Overview", "## Slice Table", etc.
const headingMatch = /^## Slice(?:s| Overview| Table| Summary| Status)\b.*$/m.exec(content);
// Match "## Slices", "## Slice Overview", "## Slice Table", "## Slice Roadmap", etc.
const headingMatch = /^## Slice(?:s| Overview| Table| Summary| Status| Roadmap)\b.*$/m.exec(content);
if (!headingMatch || headingMatch.index == null) return "";
const start = headingMatch.index + headingMatch[0].length;

View file

@ -15,7 +15,7 @@ import assert from 'node:assert/strict';
* - Report formatting
*/
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
import { mkdtempSync, mkdirSync, writeFileSync, rmSync, utimesSync } from "node:fs";
import { join, dirname } from "node:path";
import { tmpdir } from "node:os";
@ -102,6 +102,120 @@ describe('doctor-environment', async () => {
assert.deepStrictEqual(depsCheck!.status, "ok", "existing node_modules is ok");
});
// ── Stale Dependencies: marker file check (#1974) ──────────────────
console.log("\n=== env: npm marker file newer than lockfile → ok (#1974) ===");
{
// Simulate the exact bug scenario:
// 1. node_modules dir mtime is old (no entries added/removed recently)
// 2. package-lock.json mtime is recent (npm rewrote it)
// 3. node_modules/.package-lock.json mtime is between dir and lockfile
// (npm wrote it during the same install that rewrote the lockfile)
//
// The bug: code compares lockfile mtime vs dir mtime → false positive warning
// The fix: compare lockfile mtime vs marker file mtime → correctly ok
const dir = createProjectDir({
"package.json": JSON.stringify({ name: "test" }),
});
mkdirSync(join(dir, "node_modules"), { recursive: true });
// Simulate the exact bug: npm install with "up to date" rewrites the
// lockfile and the marker, but no packages are added/removed so the
// directory mtime should be old. We write the marker first (which
// bumps dir mtime), then force the dir mtime back to the past.
//
// Timeline: dir(T-120s) < lockfile(T-5s) ≈ marker(T-5s)
// Bug: code compares lockfile vs dir → false positive stale warning
// Fix: code compares lockfile vs marker → correctly reports ok
const dirTime = new Date(Date.now() - 120_000);
const installTime = new Date(Date.now() - 5_000);
// Write marker file (this bumps dir mtime as a side effect)
writeFileSync(join(dir, "node_modules", ".package-lock.json"), "{}");
utimesSync(join(dir, "node_modules", ".package-lock.json"), installTime, installTime);
// Force dir mtime back to the past — simulates no top-level entries changed
utimesSync(join(dir, "node_modules"), dirTime, dirTime);
// Lockfile written at install time (same as marker, or slightly after)
writeFileSync(join(dir, "package-lock.json"), "{}");
utimesSync(join(dir, "package-lock.json"), installTime, installTime);
cleanups.push(dir);
const results = runEnvironmentChecks(dir);
const depsCheck = results.find(r => r.name === "dependencies");
assertTrue(depsCheck !== undefined, "dependencies check runs");
assertEq(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale");
}
console.log("\n=== env: yarn marker file newer than lockfile → ok (#1974) ===");
{
const dir = createProjectDir({
"package.json": JSON.stringify({ name: "test" }),
});
mkdirSync(join(dir, "node_modules"), { recursive: true });
const dirTime = new Date(Date.now() - 120_000);
const installTime = new Date(Date.now() - 5_000);
writeFileSync(join(dir, "node_modules", ".yarn-integrity"), "{}");
utimesSync(join(dir, "node_modules", ".yarn-integrity"), installTime, installTime);
utimesSync(join(dir, "node_modules"), dirTime, dirTime);
writeFileSync(join(dir, "yarn.lock"), "");
utimesSync(join(dir, "yarn.lock"), installTime, installTime);
cleanups.push(dir);
const results = runEnvironmentChecks(dir);
const depsCheck = results.find(r => r.name === "dependencies");
assertTrue(depsCheck !== undefined, "dependencies check runs");
assertEq(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale");
}
console.log("\n=== env: pnpm marker file newer than lockfile → ok (#1974) ===");
{
const dir = createProjectDir({
"package.json": JSON.stringify({ name: "test" }),
});
mkdirSync(join(dir, "node_modules"), { recursive: true });
const dirTime = new Date(Date.now() - 120_000);
const installTime = new Date(Date.now() - 5_000);
writeFileSync(join(dir, "node_modules", ".modules.yaml"), "{}");
utimesSync(join(dir, "node_modules", ".modules.yaml"), installTime, installTime);
utimesSync(join(dir, "node_modules"), dirTime, dirTime);
writeFileSync(join(dir, "pnpm-lock.yaml"), "");
utimesSync(join(dir, "pnpm-lock.yaml"), installTime, installTime);
cleanups.push(dir);
const results = runEnvironmentChecks(dir);
const depsCheck = results.find(r => r.name === "dependencies");
assertTrue(depsCheck !== undefined, "dependencies check runs");
assertEq(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale");
}
console.log("\n=== env: no marker file falls back to dir mtime → stale warning (#1974) ===");
{
// No marker file exists, lockfile newer than dir → should still warn
const dir = createProjectDir({
"package.json": JSON.stringify({ name: "test" }),
});
mkdirSync(join(dir, "node_modules"), { recursive: true });
const past = new Date(Date.now() - 60_000);
utimesSync(join(dir, "node_modules"), past, past);
writeFileSync(join(dir, "package-lock.json"), "{}");
// No marker file written — fallback to dir mtime comparison
cleanups.push(dir);
const results = runEnvironmentChecks(dir);
const depsCheck = results.find(r => r.name === "dependencies");
assertTrue(depsCheck !== undefined, "dependencies check runs");
assertEq(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning");
}
// ── Env File Check ─────────────────────────────────────────────────
test('env: .env.example without .env detected', () => {
const dir = createProjectDir({

View file

@ -15,6 +15,7 @@ import { tmpdir } from "node:os";
import test from "node:test";
import assert from "node:assert/strict";
import { runGSDDoctor } from "../doctor.ts";
import { closeDatabase } from "../gsd-db.ts";
function makeTmp(name: string): string {
const dir = join(tmpdir(), `doctor-fixlevel-${name}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
@ -112,6 +113,70 @@ test("fixLevel:all — no reconciliation issue codes are reported", async (t) =>
assert.ok(roadmapContent.includes("- [ ] **S01"), "roadmap should remain unchecked");
});
test("legacy roadmap fallback: future slices are treated as pending, active slice is not", async (t) => {
const tmp = makeTmp("legacy-pending-fallback");
t.after(() => {
try { closeDatabase(); } catch { /* noop */ }
rmSync(tmp, { recursive: true, force: true });
});
// Force the legacy parser branch.
try { closeDatabase(); } catch { /* noop */ }
const gsd = join(tmp, ".gsd");
const m = join(gsd, "milestones", "M001");
const s01 = join(m, "slices", "S01", "tasks");
mkdirSync(s01, { recursive: true });
writeFileSync(join(m, "M001-ROADMAP.md"), `# M001: Test
## Slices
- [x] **S01: Done Slice** \`risk:low\` \`depends:[]\`
> Done
- [ ] **S02: Active Slice** \`risk:medium\` \`depends:[S01]\`
> In progress
- [ ] **S03: Future Slice** \`risk:low\` \`depends:[S02]\`
> Later
- [ ] **S04: Future Slice Two** \`risk:low\` \`depends:[S03]\`
> Later
`);
writeFileSync(join(m, "slices", "S01", "S01-PLAN.md"), `# S01: Done Slice
**Goal:** done
## Tasks
- [x] **T01: Done task** \`est:5m\`
`);
// Active slice exists in state/registry but has no directory yet — this should
// still be reported as a real error, while future untouched slices should be skipped.
const report = await runGSDDoctor(tmp, { scope: "M001" });
const missingSliceDirUnits = report.issues
.filter(i => i.code === "missing_slice_dir")
.map(i => i.unitId)
.sort();
assert.deepStrictEqual(
missingSliceDirUnits,
["M001/S02"],
"legacy fallback should only report the active slice, not future unstarted slices",
);
const missingTasksDirUnits = report.issues
.filter(i => i.code === "missing_tasks_dir")
.map(i => i.unitId)
.sort();
assert.deepStrictEqual(
missingTasksDirUnits,
[],
"future slices without directories should be skipped before missing_tasks_dir checks",
);
});
test("fixLevel:all — delimiter_in_title still fixable", async (t) => {
const tmp = makeTmp("delimiter-fix");
t.after(() => rmSync(tmp, { recursive: true, force: true }));
@ -141,7 +206,6 @@ test("fixLevel:all — delimiter_in_title still fixable", async (t) => {
const report = await runGSDDoctor(tmp, { fix: true });
const delimiterIssues = report.issues.filter(i => i.code === "delimiter_in_title");
// The milestone-level delimiter is auto-fixed, but the report may or may not include it
// depending on whether it was fixed successfully. Just verify it ran without crashing.
assert.ok(report.issues !== undefined, "doctor produces a report");

View file

@ -145,6 +145,56 @@ describe('doctor-git', async () => {
} else {
}
// ─── Test 1b: Orphaned worktree fix when cwd is inside worktree (#1946) ──
// Reproduces the deadlock: if process.cwd() is inside the orphaned worktree,
// the doctor must chdir out before removing it — not skip the removal.
if (process.platform !== "win32") {
console.log("\n=== orphaned_auto_worktree (cwd inside worktree) ===");
{
const dir = createRepoWithCompletedMilestone();
cleanups.push(dir);
// Create worktree with milestone/M001 branch under .gsd/worktrees/
mkdirSync(join(dir, ".gsd", "worktrees"), { recursive: true });
run("git worktree add -b milestone/M001 .gsd/worktrees/M001", dir);
const wtPath = realpathSync(join(dir, ".gsd", "worktrees", "M001"));
// Simulate the deadlock: set cwd inside the orphaned worktree
const previousCwd = process.cwd();
process.chdir(wtPath);
try {
const fixed = await runGSDDoctor(dir, { fix: true, isolationMode: "worktree" });
// The fix must NOT skip removal — it should chdir out and remove
assertTrue(
!fixed.fixesApplied.some(f => f.includes("skipped removing worktree")),
"does NOT skip removal when cwd is inside worktree",
);
assertTrue(
fixed.fixesApplied.some(f => f.includes("removed orphaned worktree")),
"removes orphaned worktree even when cwd was inside it",
);
// Verify worktree is gone
const wtList = run("git worktree list", dir);
assertTrue(!wtList.includes("milestone/M001"), "worktree removed after fix with cwd inside");
// Verify cwd was moved out (should be basePath, not still inside worktree)
const newCwd = process.cwd();
assertTrue(
!newCwd.startsWith(wtPath),
"cwd moved out of worktree after fix",
);
} finally {
// Restore cwd — the worktree dir may be gone, so chdir to previousCwd
try { process.chdir(previousCwd); } catch { process.chdir(dir); }
}
}
} else {
console.log("\n=== orphaned_auto_worktree (cwd inside worktree — skipped on Windows) ===");
}
// ─── Test 2: Stale milestone branch detection & fix ────────────────
// Skip on Windows: git branch glob matching and path resolution
// behave differently in Windows temp dirs.

View file

@ -868,6 +868,55 @@ describe('git-service', async () => {
rmSync(repo, { recursive: true, force: true });
});
// ─── writeIntegrationBranch: rejects workflow-template branches (#2498) ─
test('Integration branch: rejects workflow-template branches', () => {
const repo = initBranchTestRepo();
// All 8 registered workflow templates should be rejected
writeIntegrationBranch(repo, "M001", "gsd/hotfix/fix-login");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "hotfix branch is not recorded");
writeIntegrationBranch(repo, "M001", "gsd/bugfix/null-pointer");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "bugfix branch is not recorded");
writeIntegrationBranch(repo, "M001", "gsd/small-feature/add-button");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "small-feature branch is not recorded");
writeIntegrationBranch(repo, "M001", "gsd/refactor/rename-module");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "refactor branch is not recorded");
writeIntegrationBranch(repo, "M001", "gsd/spike/evaluate-lib");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "spike branch is not recorded");
writeIntegrationBranch(repo, "M001", "gsd/security-audit/owasp-scan");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "security-audit branch is not recorded");
writeIntegrationBranch(repo, "M001", "gsd/dep-upgrade/bump-react");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "dep-upgrade branch is not recorded");
writeIntegrationBranch(repo, "M001", "gsd/full-project/new-app");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), null, "full-project branch is not recorded");
rmSync(repo, { recursive: true, force: true });
});
// ─── writeIntegrationBranch: still records legitimate branches ────────
test('Integration branch: records non-ephemeral gsd branches', () => {
const repo = initBranchTestRepo();
// A normal feature branch should still be recorded
writeIntegrationBranch(repo, "M001", "feature/new-thing");
assert.deepStrictEqual(readIntegrationBranch(repo, "M001"), "feature/new-thing", "normal branches are recorded");
// The main branch should be recorded
writeIntegrationBranch(repo, "M002", "main");
assert.deepStrictEqual(readIntegrationBranch(repo, "M002"), "main", "main branch is recorded");
rmSync(repo, { recursive: true, force: true });
});
// ─── writeIntegrationBranch: rejects invalid branch names ─────────────
test('Integration branch: rejects invalid names', () => {

View file

@ -236,6 +236,32 @@ test("parseRoadmapSlices: ## Slices with valid checkboxes does NOT invoke prose
assert.equal(slices[0]?.done, true);
});
// ── Regression test for #1940 ───────────────────────────────────────────────
// '## Slice Roadmap' header is not recognized by extractSlicesSection, causing
// checkbox-format slices to be missed and all slices reported as incomplete.
test("parseRoadmapSlices: ## Slice Roadmap heading recognized (#1940)", () => {
const roadmapContent = [
"# M002: Current Milestone", "",
"**Vision:** Ship it.", "",
"## Slice Roadmap", "",
"- [x] **S01: Foundation** `risk:low` `depends:[]`",
" > After this: base layer works.",
"- [x] **S02: Core Logic** `risk:medium` `depends:[S01]`",
"- [ ] **S03: Polish** `risk:low` `depends:[S01,S02]`", "",
"## Boundary Map",
].join("\n");
const slices = parseRoadmapSlices(roadmapContent);
assert.equal(slices.length, 3, "should parse 3 slices under '## Slice Roadmap'");
assert.equal(slices[0]?.id, "S01");
assert.equal(slices[0]?.done, true, "S01 should be marked done");
assert.equal(slices[1]?.id, "S02");
assert.equal(slices[1]?.done, true, "S02 should be marked done");
assert.equal(slices[2]?.id, "S03");
assert.equal(slices[2]?.done, false, "S03 should be pending");
assert.deepEqual(slices[2]?.depends, ["S01", "S02"]);
});
test("parseRoadmapSlices: ## Slices with only non-matching lines returns prose fallback results", () => {
const weirdContent = `# M020: Odd

View file

@ -75,7 +75,7 @@ test("buildSkillActivationBlock activates skills via prefer_skills when context
prefer_skills: ["react"],
});
assert.match(result, /Call Skill\('react'\)/);
assert.match(result, /Call Skill\(\{ skill: 'react' \}\)/);
assert.doesNotMatch(result, /swiftui/);
} finally {
cleanup(base);
@ -92,7 +92,7 @@ test("buildSkillActivationBlock includes always_use_skills from preferences usin
always_use_skills: ["swift-testing"],
});
assert.equal(result, "<skill_activation>Call Skill('swift-testing').</skill_activation>");
assert.equal(result, "<skill_activation>Call Skill({ skill: 'swift-testing' }).</skill_activation>");
} finally {
cleanup(base);
}
@ -120,8 +120,8 @@ test("buildSkillActivationBlock includes skill_rules matches and task-plan skill
skill_rules: [{ when: "prisma database schema", use: ["prisma"] }],
});
assert.match(result, /Call Skill\('accessibility'\)/);
assert.match(result, /Call Skill\('prisma'\)/);
assert.match(result, /Call Skill\(\{ skill: 'accessibility' \}\)/);
assert.match(result, /Call Skill\(\{ skill: 'prisma' \}\)/);
} finally {
cleanup(base);
}
@ -191,3 +191,43 @@ test("buildSkillActivationBlock does not activate skills from extraContext or ta
cleanup(base);
}
});
test("buildSkillActivationBlock rejects skill names with special characters", () => {
const base = makeTempBase();
try {
// Skill names with quotes, braces, or other non-alphanumeric characters are
// rejected by the SAFE_SKILL_NAME guard to prevent prompt injection.
writeSkill(base, "my-skill's", "Skill with apostrophe in name.");
loadOnlyTestSkills(base);
const result = buildBlock(base, {}, {
always_use_skills: ["my-skill's"],
});
// Unsafe skill name is filtered out — empty result
assert.equal(result, "");
} finally {
cleanup(base);
}
});
test("buildSkillActivationBlock allows valid skill names and rejects invalid ones", () => {
const base = makeTempBase();
try {
writeSkill(base, "react", "React skill.");
writeSkill(base, "bad'name", "Injection attempt.");
writeSkill(base, "good-skill-2", "Another valid skill.");
loadOnlyTestSkills(base);
const result = buildBlock(base, {}, {
always_use_skills: ["react", "bad'name", "good-skill-2"],
});
assert.match(result, /skill_activation/);
assert.match(result, /Call Skill\(\{ skill: 'react' \}\)/);
assert.match(result, /Call Skill\(\{ skill: 'good-skill-2' \}\)/);
assert.doesNotMatch(result, /bad'name/);
} finally {
cleanup(base);
}
});

View file

@ -0,0 +1,204 @@
/**
* worktree-sync-overwrite-loop.test.ts Regression tests for #1886.
*
* Reproduces the infinite validate-milestone loop caused by two bugs
* in syncProjectRootToWorktree:
*
* 1. safeCopyRecursive overwrites worktree-authoritative files (e.g.
* VALIDATION.md written by validate-milestone gets clobbered by the
* stale project root copy that lacks the file).
*
* 2. completed-units.json is not forward-synced from project root to
* worktree, so the worktree never learns about already-completed units.
*
* Covers:
* - syncProjectRootToWorktree does NOT overwrite existing worktree files
* - syncProjectRootToWorktree copies files missing from the worktree
* - completed-units.json is forward-synced from project root to worktree
* - completed-units.json sync uses force:true (project root is authoritative)
*/
import {
mkdtempSync,
mkdirSync,
writeFileSync,
rmSync,
existsSync,
readFileSync,
} from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { syncProjectRootToWorktree } from "../auto-worktree-sync.ts";
import { createTestContext } from "./test-helpers.ts";
const { assertTrue, assertEq, report } = createTestContext();
function createBase(name: string): string {
const base = mkdtempSync(join(tmpdir(), `gsd-wt-1886-${name}-`));
mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
return base;
}
function cleanup(base: string): void {
rmSync(base, { recursive: true, force: true });
}
async function main(): Promise<void> {
// ─── 1. Worktree VALIDATION.md must NOT be overwritten by project root ──
console.log(
"\n=== 1. #1886: worktree VALIDATION.md preserved (not overwritten) ===",
);
{
const mainBase = createBase("main");
const wtBase = createBase("wt");
try {
// Project root has an older CONTEXT but no VALIDATION
const prM004 = join(mainBase, ".gsd", "milestones", "M004");
mkdirSync(prM004, { recursive: true });
writeFileSync(join(prM004, "M004-CONTEXT.md"), "# old context");
// Worktree has CONTEXT + VALIDATION (written by validate-milestone)
const wtM004 = join(wtBase, ".gsd", "milestones", "M004");
mkdirSync(wtM004, { recursive: true });
writeFileSync(join(wtM004, "M004-CONTEXT.md"), "# worktree context");
writeFileSync(
join(wtM004, "M004-VALIDATION.md"),
"verdict: pass\nremediation_round: 1",
);
syncProjectRootToWorktree(mainBase, wtBase, "M004");
// VALIDATION.md must still exist in worktree
assertTrue(
existsSync(join(wtM004, "M004-VALIDATION.md")),
"#1886: VALIDATION.md still exists after sync",
);
assertEq(
readFileSync(join(wtM004, "M004-VALIDATION.md"), "utf-8"),
"verdict: pass\nremediation_round: 1",
"#1886: VALIDATION.md content preserved",
);
// CONTEXT.md should NOT be overwritten — worktree version is authoritative
assertEq(
readFileSync(join(wtM004, "M004-CONTEXT.md"), "utf-8"),
"# worktree context",
"#1886: existing worktree CONTEXT.md not overwritten",
);
} finally {
cleanup(mainBase);
cleanup(wtBase);
}
}
// ─── 2. Missing files ARE still copied from project root ────────────────
console.log("\n=== 2. #1886: missing worktree files still copied ===");
{
const mainBase = createBase("main");
const wtBase = createBase("wt");
try {
const prM004 = join(mainBase, ".gsd", "milestones", "M004");
mkdirSync(prM004, { recursive: true });
writeFileSync(join(prM004, "M004-CONTEXT.md"), "# from project root");
writeFileSync(join(prM004, "M004-ROADMAP.md"), "# roadmap");
// Worktree has no M004 directory at all
syncProjectRootToWorktree(mainBase, wtBase, "M004");
assertTrue(
existsSync(join(wtBase, ".gsd", "milestones", "M004", "M004-CONTEXT.md")),
"#1886: missing CONTEXT.md copied from project root",
);
assertTrue(
existsSync(join(wtBase, ".gsd", "milestones", "M004", "M004-ROADMAP.md")),
"#1886: missing ROADMAP.md copied from project root",
);
} finally {
cleanup(mainBase);
cleanup(wtBase);
}
}
// ─── 3. completed-units.json forward-synced from project root ───────────
console.log(
"\n=== 3. #1886: completed-units.json forward-synced to worktree ===",
);
{
const mainBase = createBase("main");
const wtBase = createBase("wt");
try {
// Project root has completed units (authoritative after crash recovery)
writeFileSync(
join(mainBase, ".gsd", "completed-units.json"),
JSON.stringify(["validate-milestone/M004"]),
);
// Worktree has empty completed-units
writeFileSync(
join(wtBase, ".gsd", "completed-units.json"),
JSON.stringify([]),
);
syncProjectRootToWorktree(mainBase, wtBase, "M004");
const wtCompleted = JSON.parse(
readFileSync(join(wtBase, ".gsd", "completed-units.json"), "utf-8"),
);
assertEq(
wtCompleted,
["validate-milestone/M004"],
"#1886: completed-units.json synced from project root (force:true)",
);
} finally {
cleanup(mainBase);
cleanup(wtBase);
}
}
// ─── 4. completed-units.json: no-op when project root has no file ───────
console.log(
"\n=== 4. #1886: completed-units.json no-op when missing in project root ===",
);
{
const mainBase = createBase("main");
const wtBase = createBase("wt");
try {
// Project root milestone dir must exist for sync to run
const prM004 = join(mainBase, ".gsd", "milestones", "M004");
mkdirSync(prM004, { recursive: true });
// No completed-units.json in project root
// Worktree has its own
writeFileSync(
join(wtBase, ".gsd", "completed-units.json"),
JSON.stringify(["some-unit/M001"]),
);
syncProjectRootToWorktree(mainBase, wtBase, "M004");
const wtCompleted = JSON.parse(
readFileSync(join(wtBase, ".gsd", "completed-units.json"), "utf-8"),
);
assertEq(
wtCompleted,
["some-unit/M001"],
"#1886: worktree completed-units.json untouched when project root has none",
);
} finally {
cleanup(mainBase);
cleanup(wtBase);
}
}
report();
}
main().catch((error) => {
console.error(error);
process.exit(1);
});

View file

@ -2,7 +2,7 @@
* Remote Questions configuration resolution and validation
*/
import { join } from "node:path";
import { AuthStorage } from "@gsd/pi-coding-agent";
import { loadEffectiveGSDPreferences, type RemoteQuestionsConfig } from "../gsd/preferences.js";
import type { RemoteChannel } from "./types.js";
@ -54,9 +54,7 @@ function hydrateRemoteTokensFromAuth(): void {
if (needed.length === 0) return;
try {
const { AuthStorage } = require("@gsd/pi-coding-agent") as typeof import("@gsd/pi-coding-agent");
const authPath = join(process.env.HOME ?? "~", ".gsd", "agent", "auth.json");
const auth = AuthStorage.create(authPath);
const auth = AuthStorage.create();
for (const [providerId, envVar] of needed) {
try {
@ -72,7 +70,7 @@ function hydrateRemoteTokensFromAuth(): void {
}
}
} catch {
// AuthStorage unavailable (unit tests, stripped build) — skip silently.
// AuthStorage unavailable or auth.json missing/unreadable — skip silently.
}
}

View file

@ -176,11 +176,15 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
);
payload.tools = tools;
// ── Session-level search budget (#1309) ──────────────────────────────
// ── Session-level search budget (#1309, #compaction-safe) ─────────────
// Count web_search_tool_result blocks in the conversation history to
// determine how many native searches have already been used this session.
// The Anthropic API's max_uses resets per request, so without this guard,
// pause_turn → resubmit cycles allow unlimited total searches.
//
// Use the monotonic high-water mark: take the max of the history count
// and the running counter. This prevents budget resets when context
// compaction removes web_search_tool_result blocks from history.
if (Array.isArray(messages)) {
let historySearchCount = 0;
for (const msg of messages) {
@ -192,8 +196,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
}
}
}
// Sync counter from history (handles session restore / context replay)
sessionSearchCount = historySearchCount;
// High-water mark: never decrease the counter, even if compaction
// removes web_search_tool_result blocks from the visible history.
sessionSearchCount = Math.max(sessionSearchCount, historySearchCount);
}
const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);

View file

@ -106,14 +106,20 @@ searchCache.startPurgeInterval(60_000);
// Consecutive duplicate search guard (#949)
// Tracks recent query keys to detect and break search loops.
const MAX_CONSECUTIVE_DUPES = 3;
const MAX_CONSECUTIVE_DUPES = 1;
let lastSearchKey = "";
let consecutiveDupeCount = 0;
/** Reset session-scoped duplicate-search guard state. */
// Session-level total search budget (all queries, not just duplicates).
// Prevents unbounded search accumulation across varied queries.
const MAX_SEARCHES_PER_SESSION = 15;
let sessionTotalSearches = 0;
/** Reset session-scoped search guard state (both duplicate and budget). */
export function resetSearchLoopGuardState(): void {
lastSearchKey = "";
consecutiveDupeCount = 0;
sessionTotalSearches = 0;
}
// Summarizer responses: max 50 entries, 15-minute TTL
@ -357,6 +363,17 @@ export function registerSearchTool(pi: ExtensionAPI) {
};
}
// ------------------------------------------------------------------
// Session-level search budget
// ------------------------------------------------------------------
if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) {
return {
content: [{ type: "text" as const, text: `⚠️ Search budget exhausted: ${sessionTotalSearches}/${MAX_SEARCHES_PER_SESSION} searches used this session. The information you need should already be in previous search results. Stop searching and use those results to proceed with your task.` }],
isError: true,
details: { errorKind: "budget_exhausted", error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})` } satisfies Partial<SearchDetails>,
};
}
const count = params.count ?? 5;
const wantSummary = params.summary ?? false;
@ -410,6 +427,9 @@ export function registerSearchTool(pi: ExtensionAPI) {
consecutiveDupeCount = 1;
}
// Count every search that passes the guards toward the session budget.
sessionTotalSearches++;
const cached = searchCache.get(cacheKey);
if (cached) {

View file

@ -103,9 +103,12 @@ gh issue list -R gsd-build/gsd-2
gh issue list -R gsd-build/gsd-2 --label "priority:p1" --state open
# Create issue with labels and milestone
# NOTE: Do NOT use labels for issue classification (bug, feature, etc.)
# Use labels for metadata (priority, status, auto-generated) only.
# Issue classification uses GitHub Issue Types, set via GraphQL after creation.
gh issue create -R gsd-build/gsd-2 \
--title "feat: add feature X" \
--label "priority:p1" --label "type:feature" \
--label "priority:p1" \
--milestone "v1.0"
# View issue
@ -120,6 +123,24 @@ gh issue edit <number> -R gsd-build/gsd-2 \
--remove-label "status:needs-grooming"
```
### Issue Types (Classification)
`gh issue create` has no `--type` flag. Issue types (Bug, Feature Request, etc.) are set via GraphQL after creation:
```bash
# Step 1: Create the issue (returns URL)
ISSUE_URL=$(gh issue create -R gsd-build/gsd-2 \
--title "..." --body "...")
# Step 2: Set the issue type via GraphQL
ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
```
Replace `"Bug"` with the appropriate type name (`"Feature Request"`, `"Task"`, etc.).
### Labels
```bash

View file

@ -855,6 +855,51 @@ test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => {
assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)");
});
test("session search budget: survives context compaction (high-water mark)", async () => {
const pi = createMockPI();
registerNativeSearchHooks(pi);
await pi.fire("model_select", {
type: "model_select",
model: { provider: "anthropic", name: "claude-sonnet-4-6" },
previousModel: undefined,
source: "set",
});
// First request: history has 12 web_search_tool_result blocks
const searchBlocks = Array.from({ length: 12 }, (_, i) => ({
type: "web_search_tool_result",
tool_use_id: `ws${i}`,
content: [],
}));
let payload: Record<string, unknown> = {
model: "claude-sonnet-4-6-20250514",
tools: [{ name: "bash", type: "custom" }],
messages: [{ role: "user", content: [{ type: "text", text: "search" }, ...searchBlocks] }],
};
await pi.fire("before_provider_request", { type: "before_provider_request", payload });
let tools = payload.tools as any[];
let nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
assert.ok(nativeTool, "Should still inject web_search with 12/15 used");
assert.equal(nativeTool.max_uses, 3, "Should have 3 remaining (15 - 12)");
// Second request: context was compacted — search blocks gone from history.
// Without high-water mark, the budget would reset to 15.
payload = {
model: "claude-sonnet-4-6-20250514",
tools: [{ name: "bash", type: "custom" }],
messages: [{ role: "user", content: "compacted context — no search blocks" }],
};
await pi.fire("before_provider_request", { type: "before_provider_request", payload });
tools = payload.tools as any[];
nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
assert.ok(nativeTool, "Should still inject web_search with 12/15 used (high-water mark)");
assert.equal(nativeTool.max_uses, 3, "High-water mark should preserve 12 — only 3 remaining");
});
// ─── stripThinkingFromHistory tests ─────────────────────────────────────────
test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => {

View file

@ -11,7 +11,7 @@
import test from "node:test";
import assert from "node:assert/strict";
import { registerSearchTool } from "../resources/extensions/search-the-web/tool-search.ts";
import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts";
import searchExtension from "../resources/extensions/search-the-web/index.ts";
const ORIGINAL_ENV = {
@ -72,6 +72,8 @@ function createMockPI() {
const toolsByName = new Map<string, any>();
let registeredTool: any = null;
let activeTools: string[] = [];
const pi = {
on(event: string, handler: (...args: any[]) => unknown) {
handlers.push({ event, handler });
@ -91,6 +93,8 @@ function createMockPI() {
getRegisteredTool(name = "search-the-web") {
return toolsByName.get(name) ?? registeredTool;
},
getActiveTools() { return activeTools; },
setActiveTools(tools: string[]) { activeTools = tools; },
writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt",
};
@ -134,18 +138,16 @@ test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t)
const execute = tool.execute.bind(tool);
// Calls 13: below threshold, should return search results (not an error)
for (let i = 1; i <= 3; i++) {
const result = await callSearch(execute, "loop test query", `call-${i}`);
assert.notEqual(result.isError, true, `call ${i} should not trigger loop guard`);
}
// Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1)
const result1 = await callSearch(execute, "loop test query", "call-1");
assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard");
// Call 4: hits the threshold — guard fires
const result4 = await callSearch(execute, "loop test query", "call-4");
assert.equal(result4.isError, true, "call 4 should trigger the loop guard");
assert.equal(result4.details?.errorKind, "search_loop");
// Call 2: identical query — guard fires immediately (threshold = 1)
const result2 = await callSearch(execute, "loop test query", "call-2");
assert.equal(result2.isError, true, "call 2 should trigger the loop guard");
assert.equal(result2.details?.errorKind, "search_loop");
assert.ok(
result4.content[0].text.includes("Search loop detected"),
result2.content[0].text.includes("Search loop detected"),
"error message should mention search loop"
);
});
@ -174,11 +176,9 @@ test("search loop guard resets at session_start boundary", async (t) => {
assert.ok(tool, "search tool should be registered");
const execute = tool.execute.bind(tool);
// Trigger guard in session 1
for (let i = 1; i <= 4; i++) {
await callSearch(execute, query, `s1-call-${i}`);
}
const guardResult = await callSearch(execute, query, "s1-call-5");
// Trigger guard in session 1 (call 1 succeeds, call 2 fires guard)
await callSearch(execute, query, "s1-call-1");
const guardResult = await callSearch(execute, query, "s1-call-2");
assert.equal(guardResult.isError, true, "session 1 should be guarded");
assert.equal(guardResult.details?.errorKind, "search_loop");
@ -211,28 +211,26 @@ test("search loop guard stays armed after firing — subsequent duplicates immed
const tool = pi.getRegisteredTool();
const execute = tool.execute.bind(tool);
// Exhaust the initial window (calls 13 succeed, call 4 fires guard)
for (let i = 1; i <= 3; i++) {
await callSearch(execute, query, `call-${i}`);
}
const guardFirst = await callSearch(execute, query, "call-4");
assert.equal(guardFirst.isError, true, "call 4 should trigger the loop guard");
// Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1)
await callSearch(execute, query, "call-1");
const guardFirst = await callSearch(execute, query, "call-2");
assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard");
// Key regression test: call 5 (and beyond) must ALSO trigger the guard.
// The original bug reset state on trigger, so call 5 was treated as a fresh
// Key regression test: call 3 (and beyond) must ALSO trigger the guard.
// The original bug reset state on trigger, so call 3 was treated as a fresh
// first search and the loop restarted.
const guardSecond = await callSearch(execute, query, "call-5");
const guardSecond = await callSearch(execute, query, "call-3");
assert.equal(
guardSecond.isError, true,
"call 5 should STILL trigger the loop guard (guard must stay armed after firing)"
"call 3 should STILL trigger the loop guard (guard must stay armed after firing)"
);
assert.equal(guardSecond.details?.errorKind, "search_loop");
// Call 6 as well — guard should keep firing
const guardThird = await callSearch(execute, query, "call-6");
// Call 4 as well — guard should keep firing
const guardThird = await callSearch(execute, query, "call-4");
assert.equal(
guardThird.isError, true,
"call 6 should STILL trigger the loop guard"
"call 4 should STILL trigger the loop guard"
);
});
@ -255,10 +253,9 @@ test("search loop guard resets cleanly when a different query is issued", async
const tool = pi.getRegisteredTool();
const execute = tool.execute.bind(tool);
// Trigger guard for queryA
for (let i = 1; i <= 4; i++) {
await callSearch(execute, queryA, `call-a-${i}`);
}
// Trigger guard for queryA (call 1 succeeds, call 2 fires guard)
await callSearch(execute, queryA, "call-a-1");
await callSearch(execute, queryA, "call-a-2");
// Issue a different query — should succeed (resets the duplicate counter)
const resultB = await callSearch(execute, queryB, "call-b-1");
@ -267,3 +264,71 @@ test("search loop guard resets cleanly when a different query is issued", async
"a different query after guard should not be treated as a loop"
);
});
test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => {
process.env.BRAVE_API_KEY = "test-key-budget";
delete process.env.TAVILY_API_KEY;
delete process.env.OLLAMA_API_KEY;
const restoreFetch = mockFetch(makeBraveResponse());
t.after(() => {
restoreFetch();
restoreSearchEnv();
});
// Reset guard state (including session budget) and register directly
resetSearchLoopGuardState();
const pi = createMockPI();
registerSearchTool(pi as any);
const tool = pi.getRegisteredTool();
assert.ok(tool, "search tool should be registered");
const execute = tool.execute.bind(tool);
// Issue 15 unique queries — all should succeed (budget = 15)
for (let i = 1; i <= 15; i++) {
const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`);
assert.notEqual(result.isError, true, `query ${i} should succeed within budget`);
}
// Query 16: budget exhausted — should be blocked
const blocked = await callSearch(execute, "one more query", "budget-16");
assert.equal(blocked.isError, true, "query 16 should be blocked by budget");
assert.equal(blocked.details?.errorKind, "budget_exhausted");
assert.ok(
blocked.content[0].text.includes("Search budget exhausted"),
"error message should mention budget"
);
});
test("session search budget resets via resetSearchLoopGuardState", async (t) => {
process.env.BRAVE_API_KEY = "test-key-budget-reset";
delete process.env.TAVILY_API_KEY;
delete process.env.OLLAMA_API_KEY;
const restoreFetch = mockFetch(makeBraveResponse());
t.after(() => {
restoreFetch();
restoreSearchEnv();
});
// Reset and register directly
resetSearchLoopGuardState();
const pi = createMockPI();
registerSearchTool(pi as any);
const tool = pi.getRegisteredTool();
const execute = tool.execute.bind(tool);
// Exhaust budget
for (let i = 1; i <= 15; i++) {
await callSearch(execute, `budget reset query ${i}`, `br-${i}`);
}
const exhausted = await callSearch(execute, "exhausted query", "br-exhausted");
assert.equal(exhausted.isError, true, "budget should be exhausted");
// Reset simulates new session
resetSearchLoopGuardState();
const fresh = await callSearch(execute, "fresh session query", "br-fresh");
assert.notEqual(fresh.isError, true, "first query after reset should succeed");
});

View file

@ -0,0 +1,70 @@
/**
* Regression tests for the default package root fallback in bridge-service.
*
* Issue: gsd-build/gsd-2#1881
* The standalone Next.js bundle bakes import.meta.url at build time with the
* CI runner's absolute path. On Windows, fileURLToPath() rejects the Unix
* file:// URL at module load time, 500-ing all API routes.
*
* The fix makes the fallback lazy and catch-guarded so the module loads safely
* on any OS regardless of what import.meta.url resolved to at build time.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { resolve } from "node:path";
const bridge = await import("../web/bridge-service.ts");
test("resolveBridgeRuntimeConfig uses GSD_WEB_PACKAGE_ROOT when set", () => {
const env = {
GSD_WEB_PACKAGE_ROOT: "/custom/package/root",
GSD_WEB_PROJECT_CWD: "/some/project",
} as unknown as NodeJS.ProcessEnv;
const config = bridge.resolveBridgeRuntimeConfig(env);
assert.equal(config.packageRoot, "/custom/package/root");
});
test("resolveBridgeRuntimeConfig falls back to lazy default when GSD_WEB_PACKAGE_ROOT is absent", () => {
// Reset the memoized value so we exercise the lazy computation path.
bridge.resetDefaultPackageRootForTests();
const env = {
GSD_WEB_PROJECT_CWD: "/some/project",
} as unknown as NodeJS.ProcessEnv;
// Should not throw — the lazy getter catches cross-platform failures.
const config = bridge.resolveBridgeRuntimeConfig(env);
assert.equal(typeof config.packageRoot, "string");
assert.ok(config.packageRoot.length > 0, "packageRoot must be a non-empty string");
});
test("lazy default package root is an absolute path", () => {
bridge.resetDefaultPackageRootForTests();
const env = {
GSD_WEB_PROJECT_CWD: "/some/project",
} as unknown as NodeJS.ProcessEnv;
const config = bridge.resolveBridgeRuntimeConfig(env);
// resolve() returns the same path if already absolute.
assert.equal(config.packageRoot, resolve(config.packageRoot));
});
test("lazy default package root is memoized across calls", () => {
bridge.resetDefaultPackageRootForTests();
const env = {} as unknown as NodeJS.ProcessEnv;
const first = bridge.resolveBridgeRuntimeConfig(env).packageRoot;
const second = bridge.resolveBridgeRuntimeConfig(env).packageRoot;
assert.equal(first, second, "memoized value should be stable across calls");
});
test("module loads without throwing (regression: eager fileURLToPath crash)", () => {
// The fact that we can import bridge-service at the top of this file without
// an unhandled exception is itself the primary regression gate. This test
// makes that contract explicit.
assert.ok(typeof bridge.resolveBridgeRuntimeConfig === "function");
});

View file

@ -39,7 +39,30 @@ import {
} from "./auto-dashboard-service.ts";
import { resolveGsdCliEntry } from "./cli-entry.ts";
const DEFAULT_PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
// Lazily computed fallback — import.meta.url is baked in at build time by
// webpack, so when the standalone bundle built on Linux CI runs on Windows the
// literal file:// URL contains a Unix path that fileURLToPath() rejects.
// Deferring the computation means it only fires when GSD_WEB_PACKAGE_ROOT is
// absent, and if it does fire we handle the cross-platform failure gracefully.
let _defaultPackageRoot: string | undefined;
function getDefaultPackageRoot(): string {
if (_defaultPackageRoot !== undefined) return _defaultPackageRoot;
try {
_defaultPackageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
} catch {
// Standalone bundle running on a different OS than the builder — the
// baked-in import.meta.url is not a valid local file URL. Fall back to
// cwd which is the best available approximation; callers that need the
// real package root should set GSD_WEB_PACKAGE_ROOT.
_defaultPackageRoot = process.cwd();
}
return _defaultPackageRoot;
}
/** @internal — test-only: reset the memoized default package root */
export function resetDefaultPackageRootForTests(): void {
_defaultPackageRoot = undefined;
}
const RESPONSE_TIMEOUT_MS = 30_000;
const START_TIMEOUT_MS = 150_000;
const MAX_STDERR_BUFFER = 8_000;
@ -1058,7 +1081,7 @@ async function fallbackWorkspaceIndex(basePath: string): Promise<GSDWorkspaceInd
export function resolveBridgeRuntimeConfig(env: NodeJS.ProcessEnv = getBridgeDeps().env ?? process.env, projectCwdOverride?: string): BridgeRuntimeConfig {
const projectCwd = projectCwdOverride || env.GSD_WEB_PROJECT_CWD || process.cwd();
const projectSessionsDir = env.GSD_WEB_PROJECT_SESSIONS_DIR || getProjectSessionsDir(projectCwd);
const packageRoot = env.GSD_WEB_PACKAGE_ROOT || DEFAULT_PACKAGE_ROOT;
const packageRoot = env.GSD_WEB_PACKAGE_ROOT || getDefaultPackageRoot();
return { projectCwd, projectSessionsDir, packageRoot };
}