Merge pull request #673 from jeremymcs/feat/v2.20-phase2-3-features

feat: v2.20 Phase 2-4 — skills, integrations, MCP server
2026-03-16 14:29:07 -06:00 · 2026-03-16 14:29:07 -06:00 · 966e5e80fb
commit 966e5e80fb
parent b1b8a1f782 062b5c65eb
18 changed files with 3242 additions and 11 deletions
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -72,12 +72,15 @@
    "@google/genai": "^1.40.0",
    "@mariozechner/jiti": "^2.6.2",
    "@mistralai/mistralai": "1.14.1",
+    "@modelcontextprotocol/sdk": "^1.27.1",
+    "@octokit/rest": "^22.0.1",
    "@silvia-odwyer/photon-node": "^0.3.4",
    "@sinclair/typebox": "^0.34.41",
    "@types/mime-types": "^2.1.4",
    "ajv": "^8.17.1",
    "ajv-formats": "^3.0.1",
    "chalk": "^5.6.2",
+    "chokidar": "^5.0.0",
    "diff": "^8.0.2",
    "extract-zip": "^2.0.1",
    "file-type": "^21.1.1",
--- a/src/cli.ts
+++ b/src/cli.ts
@ -19,13 +19,13 @@ import { getPiDefaultModelAndProvider, migratePiCredentials } from './pi-migrati
 import { shouldRunOnboarding, runOnboarding } from './onboarding.js'
 import chalk from 'chalk'
 import { checkForUpdates } from './update-check.js'
-import { printHelp } from './help-text.js'
+import { printHelp, printSubcommandHelp } from './help-text.js'

 // ---------------------------------------------------------------------------
 // Minimal CLI arg parser — detects print/subagent mode flags
 // ---------------------------------------------------------------------------
 interface CliFlags {
-  mode?: 'text' | 'json' | 'rpc'
+  mode?: 'text' | 'json' | 'rpc' | 'mcp'
  print?: boolean
  continue?: boolean
  noSession?: boolean
@ -59,7 +59,7 @@ function parseCliArgs(argv: string[]): CliFlags {
    const arg = args[i]
    if (arg === '--mode' && i + 1 < args.length) {
      const m = args[++i]
-      if (m === 'text' || m === 'json' || m === 'rpc') flags.mode = m
+      if (m === 'text' || m === 'json' || m === 'rpc' || m === 'mcp') flags.mode = m
    } else if (arg === '--print' || arg === '-p') {
      flags.print = true
    } else if (arg === '--continue' || arg === '-c') {
@ -92,6 +92,14 @@ function parseCliArgs(argv: string[]): CliFlags {
 const cliFlags = parseCliArgs(process.argv)
 const isPrintMode = cliFlags.print || cliFlags.mode !== undefined

+// `gsd <subcommand> --help` — show subcommand-specific help
+const subcommand = cliFlags.messages[0]
+if (subcommand && process.argv.includes('--help')) {
+  if (printSubcommandHelp(subcommand, process.env.GSD_VERSION || '0.0.0')) {
+    process.exit(0)
+  }
+}
+
 // `gsd config` — replay the setup wizard and exit
 if (cliFlags.messages[0] === 'config') {
  const authStorage = AuthStorage.create(authFilePath)
@ -292,8 +300,18 @@ if (isPrintMode) {
    process.exit(0)
  }

+  if (mode === 'mcp') {
+    const { startMcpServer } = await import('./mcp-server.js')
+    await startMcpServer({
+      tools: session.agent.state.tools ?? [],
+      version: process.env.GSD_VERSION || '0.0.0',
+    })
+    // MCP server runs until the transport closes; keep alive
+    await new Promise(() => {})
+  }
+
  await runPrintMode(session, {
-    mode,
+    mode: mode as 'text' | 'json',
    messages: cliFlags.messages,
  })
  process.exit(0)
@ -403,6 +421,7 @@ if (!process.stdin.isTTY) {
  process.stderr.write('[gsd] Non-interactive alternatives:\n')
  process.stderr.write('[gsd]   gsd --print "your message"     Single-shot prompt\n')
  process.stderr.write('[gsd]   gsd --mode rpc                 JSON-RPC over stdin/stdout\n')
+  process.stderr.write('[gsd]   gsd --mode mcp                 MCP server over stdin/stdout\n')
  process.stderr.write('[gsd]   gsd --mode text "message"      Text output mode\n')
  process.exit(1)
 }
--- a/src/help-text.ts
+++ b/src/help-text.ts
@ -1,8 +1,30 @@
+const SUBCOMMAND_HELP: Record<string, string> = {
+  config: [
+    'Usage: gsd config',
+    '',
+    'Re-run the interactive setup wizard to configure:',
+    '  - LLM provider (Anthropic, OpenAI, Google, etc.)',
+    '  - Web search provider (Brave, Tavily, built-in)',
+    '  - Remote questions (Discord, Slack, Telegram)',
+    '  - Tool API keys (Context7, Jina, Groq)',
+    '',
+    'All steps are skippable and can be changed later with /login or /search-provider.',
+  ].join('\n'),
+
+  update: [
+    'Usage: gsd update',
+    '',
+    'Update GSD to the latest version.',
+    '',
+    'Equivalent to: npm install -g gsd-pi@latest',
+  ].join('\n'),
+}
+
 export function printHelp(version: string): void {
  process.stdout.write(`GSD v${version} — Get Shit Done\n\n`)
  process.stdout.write('Usage: gsd [options] [message...]\n\n')
  process.stdout.write('Options:\n')
-  process.stdout.write('  --mode <text|json|rpc>   Output mode (default: interactive)\n')
+  process.stdout.write('  --mode <text|json|rpc|mcp> Output mode (default: interactive)\n')
  process.stdout.write('  --print, -p              Single-shot print mode\n')
  process.stdout.write('  --continue, -c           Resume the most recent session\n')
  process.stdout.write('  --model <id>             Override model (e.g. claude-opus-4-6)\n')
@ -15,4 +37,13 @@ export function printHelp(version: string): void {
  process.stdout.write('\nSubcommands:\n')
  process.stdout.write('  config                   Re-run the setup wizard\n')
  process.stdout.write('  update                   Update GSD to the latest version\n')
+  process.stdout.write('\nRun gsd <subcommand> --help for subcommand-specific help.\n')
+}
+
+export function printSubcommandHelp(subcommand: string, version: string): boolean {
+  const help = SUBCOMMAND_HELP[subcommand]
+  if (!help) return false
+  process.stdout.write(`GSD v${version} — Get Shit Done\n\n`)
+  process.stdout.write(help + '\n')
+  return true
 }
--- a/src/mcp-server.ts
+++ b/src/mcp-server.ts
@ -0,0 +1,79 @@
+interface McpTool {
+  name: string
+  description: string
+  parameters: Record<string, unknown>
+  execute(
+    toolCallId: string,
+    params: Record<string, unknown>,
+    signal?: AbortSignal,
+    onUpdate?: unknown,
+  ): Promise<{
+    content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>
+  }>
+}
+
+// MCP SDK subpath imports use wildcard exports (./*) that NodeNext resolves
+// at runtime but TypeScript cannot statically type-check. We construct the
+// specifiers dynamically so tsc treats them as `any`.
+const MCP_PKG = '@modelcontextprotocol/sdk'
+
+export async function startMcpServer(options: {
+  tools: McpTool[]
+  version?: string
+}): Promise<void> {
+  const { tools, version = '0.0.0' } = options
+
+  const serverMod = await import(`${MCP_PKG}/server`)
+  const stdioMod = await import(`${MCP_PKG}/server/stdio`)
+  const typesMod = await import(`${MCP_PKG}/types`)
+
+  const Server = serverMod.Server
+  const StdioServerTransport = stdioMod.StdioServerTransport
+  const { ListToolsRequestSchema, CallToolRequestSchema } = typesMod
+
+  const toolMap = new Map<string, McpTool>()
+  for (const tool of tools) {
+    toolMap.set(tool.name, tool)
+  }
+
+  const server = new Server(
+    { name: 'gsd', version },
+    { capabilities: { tools: {} } },
+  )
+
+  server.setRequestHandler(ListToolsRequestSchema, async () => ({
+    tools: tools.map((t: McpTool) => ({
+      name: t.name,
+      description: t.description,
+      inputSchema: t.parameters,
+    })),
+  }))
+
+  server.setRequestHandler(CallToolRequestSchema, async (request: any) => {
+    const { name, arguments: args } = request.params
+    const tool = toolMap.get(name)
+    if (!tool) {
+      return {
+        isError: true,
+        content: [{ type: 'text' as const, text: `Unknown tool: ${name}` }],
+      }
+    }
+
+    try {
+      const result = await tool.execute(`mcp-${Date.now()}`, args ?? {}, undefined, undefined)
+      const content = result.content.map((block: any) => {
+        if (block.type === 'text') return { type: 'text' as const, text: block.text ?? '' }
+        if (block.type === 'image') return { type: 'image' as const, data: block.data ?? '', mimeType: block.mimeType ?? 'image/png' }
+        return { type: 'text' as const, text: JSON.stringify(block) }
+      })
+      return { content }
+    } catch (err: unknown) {
+      const message = err instanceof Error ? err.message : String(err)
+      return { isError: true, content: [{ type: 'text' as const, text: message }] }
+    }
+  })
+
+  const transport = new StdioServerTransport()
+  await server.connect(transport)
+  process.stderr.write(`[gsd] MCP server started (v${version})\n`)
+}
--- a/src/resources/extensions/gsd/diff-context.ts
+++ b/src/resources/extensions/gsd/diff-context.ts
@ -0,0 +1,219 @@
+/**
+ * Diff-aware context module — prioritizes recently-changed files when building
+ * context for the AI agent. Uses git diff/status to discover changes, then
+ * provides ranking utilities for context-window budget allocation.
+ *
+ * Standalone module: only imports node:child_process and node:path.
+ */
+
+import { execSync } from "node:child_process";
+import { resolve } from "node:path";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface ChangedFileInfo {
+  path: string;
+  changeType: "modified" | "added" | "deleted" | "staged";
+  linesChanged?: number;
+}
+
+export interface RecentFilesOptions {
+  /** Maximum number of files to return (default 20) */
+  maxFiles?: number;
+  /** Only consider commits within this many days (default 7) */
+  sinceDays?: number;
+}
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+const EXEC_OPTS = {
+  encoding: "utf-8" as const,
+  timeout: 5000,
+  stdio: ["pipe", "pipe", "pipe"] as ["pipe", "pipe", "pipe"],
+};
+
+function git(cmd: string, cwd: string): string {
+  return execSync(`git ${cmd}`, { ...EXEC_OPTS, cwd }).trim();
+}
+
+function splitLines(output: string): string[] {
+  return output
+    .split("\n")
+    .map((l) => l.trim())
+    .filter(Boolean);
+}
+
+// ─── Public API ─────────────────────────────────────────────────────────────
+
+/**
+ * Returns recently-changed file paths, deduplicated and sorted by recency
+ * (most recent first). Combines committed diffs, staged changes, and
+ * unstaged/untracked files from `git status`.
+ */
+export async function getRecentlyChangedFiles(
+  cwd: string,
+  options?: RecentFilesOptions,
+): Promise<string[]> {
+  const maxFiles = options?.maxFiles ?? 20;
+  const sinceDays = options?.sinceDays ?? 7;
+  const dir = resolve(cwd);
+
+  try {
+    // 1. Committed changes in the last N commits (or since sinceDays)
+    let committedFiles: string[] = [];
+    try {
+      const since = `--since="${sinceDays} days ago"`;
+      const raw = git(`log --diff-filter=ACMR --name-only --pretty=format: ${since}`, dir);
+      committedFiles = splitLines(raw);
+    } catch {
+      // Fallback: use HEAD~10
+      try {
+        const raw = git("diff --name-only HEAD~10", dir);
+        committedFiles = splitLines(raw);
+      } catch {
+        // Shallow clone or <10 commits — ignore
+      }
+    }
+
+    // 2. Staged changes
+    let stagedFiles: string[] = [];
+    try {
+      const raw = git("diff --cached --name-only", dir);
+      stagedFiles = splitLines(raw);
+    } catch {
+      // ignore
+    }
+
+    // 3. Unstaged / untracked via porcelain status
+    let statusFiles: string[] = [];
+    try {
+      const raw = git("status --porcelain", dir);
+      statusFiles = splitLines(raw).map((line) => line.slice(3)); // strip XY + space
+    } catch {
+      // ignore
+    }
+
+    // Deduplicate, preserving insertion order (most-recent-first: status → staged → committed)
+    const seen = new Set<string>();
+    const result: string[] = [];
+    for (const file of [...statusFiles, ...stagedFiles, ...committedFiles]) {
+      if (!seen.has(file)) {
+        seen.add(file);
+        result.push(file);
+      }
+    }
+
+    return result.slice(0, maxFiles);
+  } catch {
+    // Non-git directory or git unavailable — graceful fallback
+    return [];
+  }
+}
+
+/**
+ * Returns richer change metadata: change type and approximate line counts.
+ */
+export async function getChangedFilesWithContext(
+  cwd: string,
+): Promise<ChangedFileInfo[]> {
+  const dir = resolve(cwd);
+
+  try {
+    const result: ChangedFileInfo[] = [];
+    const seen = new Set<string>();
+
+    const add = (info: ChangedFileInfo) => {
+      if (!seen.has(info.path)) {
+        seen.add(info.path);
+        result.push(info);
+      }
+    };
+
+    // 1. Staged files with numstat
+    try {
+      const numstat = git("diff --cached --numstat", dir);
+      for (const line of splitLines(numstat)) {
+        const [added, deleted, filePath] = line.split("\t");
+        if (!filePath) continue;
+        const lines =
+          added === "-" || deleted === "-"
+            ? undefined
+            : Number(added) + Number(deleted);
+        add({ path: filePath, changeType: "staged", linesChanged: lines });
+      }
+    } catch {
+      // ignore
+    }
+
+    // 2. Unstaged modifications with numstat
+    try {
+      const numstat = git("diff --numstat", dir);
+      for (const line of splitLines(numstat)) {
+        const [added, deleted, filePath] = line.split("\t");
+        if (!filePath) continue;
+        const lines =
+          added === "-" || deleted === "-"
+            ? undefined
+            : Number(added) + Number(deleted);
+        add({ path: filePath, changeType: "modified", linesChanged: lines });
+      }
+    } catch {
+      // ignore
+    }
+
+    // 3. Untracked / deleted from porcelain status
+    try {
+      const raw = git("status --porcelain", dir);
+      for (const line of splitLines(raw)) {
+        const code = line.slice(0, 2);
+        const filePath = line.slice(3);
+        if (seen.has(filePath)) continue;
+
+        if (code.includes("?")) {
+          add({ path: filePath, changeType: "added" });
+        } else if (code.includes("D")) {
+          add({ path: filePath, changeType: "deleted" });
+        } else if (code.includes("A")) {
+          add({ path: filePath, changeType: "added" });
+        } else {
+          add({ path: filePath, changeType: "modified" });
+        }
+      }
+    } catch {
+      // ignore
+    }
+
+    return result;
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Ranks a file list so that recently-changed files appear first.
+ * Files present in `changedFiles` are placed at the front (in their
+ * original changedFiles order), followed by unchanged files in their
+ * original order.
+ */
+export function rankFilesByRelevance(
+  files: string[],
+  changedFiles: string[],
+): string[] {
+  const changedSet = new Set(changedFiles);
+  const changed: string[] = [];
+  const rest: string[] = [];
+
+  for (const f of files) {
+    if (changedSet.has(f)) {
+      changed.push(f);
+    } else {
+      rest.push(f);
+    }
+  }
+
+  // Maintain changedFiles priority order within the changed group
+  const changedOrder = new Map(changedFiles.map((f, i) => [f, i]));
+  changed.sort((a, b) => (changedOrder.get(a) ?? 0) - (changedOrder.get(b) ?? 0));
+
+  return [...changed, ...rest];
+}
--- a/src/resources/extensions/gsd/file-watcher.ts
+++ b/src/resources/extensions/gsd/file-watcher.ts
@ -0,0 +1,97 @@
+import type { FSWatcher } from "chokidar";
+import type { EventBus } from "@gsd/pi-coding-agent";
+
+let watcher: FSWatcher | null = null;
+
+const EVENT_MAP: Record<string, string> = {
+	"settings.json": "settings-changed",
+	"auth.json": "auth-changed",
+	"models.json": "models-changed",
+};
+
+const EXTENSIONS_DIR = "extensions";
+
+const IGNORED_PATTERNS = [
+	"**/sessions/**",
+	"**/*.tmp",
+	"**/*.swp",
+	"**/*~",
+	"**/.DS_Store",
+];
+
+const DEBOUNCE_MS = 300;
+
+/**
+ * Start watching `agentDir` (e.g. `~/.gsd/agent/`) for config changes.
+ * Emits events on the supplied EventBus when watched files are modified.
+ */
+export async function startFileWatcher(
+	agentDir: string,
+	eventBus: EventBus,
+): Promise<void> {
+	if (watcher) {
+		await watcher.close();
+	}
+
+	const { watch } = await import("chokidar");
+
+	const pending = new Map<string, ReturnType<typeof setTimeout>>();
+
+	function debounceEmit(event: string): void {
+		const existing = pending.get(event);
+		if (existing) clearTimeout(existing);
+		pending.set(
+			event,
+			setTimeout(() => {
+				pending.delete(event);
+				eventBus.emit(event, { timestamp: Date.now() });
+			}, DEBOUNCE_MS),
+		);
+	}
+
+	function resolveEvent(filePath: string): string | null {
+		const relative = filePath
+			.replace(agentDir, "")
+			.replace(/^[/\\]+/, "");
+
+		// Check direct file matches
+		for (const [file, event] of Object.entries(EVENT_MAP)) {
+			if (relative === file) return event;
+		}
+
+		// Check extensions directory
+		if (relative.startsWith(EXTENSIONS_DIR + "/") || relative === EXTENSIONS_DIR) {
+			return "extensions-changed";
+		}
+
+		return null;
+	}
+
+	watcher = watch(agentDir, {
+		ignoreInitial: true,
+		depth: 2,
+		ignored: IGNORED_PATTERNS,
+	});
+
+	for (const eventType of ["add", "change", "unlink"] as const) {
+		watcher.on(eventType, (filePath: string) => {
+			const event = resolveEvent(filePath);
+			if (event) debounceEmit(event);
+		});
+	}
+
+	// Wait for watcher to be ready
+	await new Promise<void>((resolve) => {
+		watcher!.on("ready", resolve);
+	});
+}
+
+/**
+ * Stop the file watcher and clean up resources.
+ */
+export async function stopFileWatcher(): Promise<void> {
+	if (watcher) {
+		await watcher.close();
+		watcher = null;
+	}
+}
--- a/src/resources/extensions/gsd/github-client.ts
+++ b/src/resources/extensions/gsd/github-client.ts
@ -0,0 +1,235 @@
+/**
+ * GSD GitHub Client
+ *
+ * Standalone utility for interacting with GitHub's API via Octokit.
+ * Provides helpers for PR creation, review reading, and issue management.
+ * Can be used by other extensions that need GitHub integration.
+ */
+
+import { execSync } from "node:child_process";
+import { Octokit } from "@octokit/rest";
+
+// ─── Types ─────────────────────────────────────────────────────────────────
+
+export interface RepoInfo {
+  owner: string;
+  repo: string;
+}
+
+export interface PullRequestOptions {
+  owner: string;
+  repo: string;
+  title: string;
+  body: string;
+  head: string;
+  base: string;
+}
+
+export interface PullRequestResult {
+  number: number;
+  url: string;
+}
+
+export interface PR {
+  number: number;
+  title: string;
+  body: string | null;
+  state: string;
+  head: { ref: string; sha: string };
+  base: { ref: string };
+  url: string;
+  user: { login: string } | null;
+}
+
+export interface Review {
+  id: number;
+  user: { login: string } | null;
+  state: string;
+  body: string | null;
+  submitted_at: string | null;
+}
+
+export interface IssueCommentOptions {
+  owner: string;
+  repo: string;
+  number: number;
+  body: string;
+}
+
+// ─── Remote URL Parsing ────────────────────────────────────────────────────
+
+/**
+ * Parse a GitHub owner/repo from a git remote URL.
+ * Supports both HTTPS and SSH formats:
+ *   https://github.com/owner/repo.git
+ *   git@github.com:owner/repo.git
+ *   https://github.com/owner/repo
+ *   ssh://git@github.com/owner/repo.git
+ */
+export function parseRemoteUrl(url: string): RepoInfo | null {
+  // SSH format: git@github.com:owner/repo.git
+  const sshMatch = url.match(/^git@github\.com:([^/]+)\/([^/.]+?)(?:\.git)?$/);
+  if (sshMatch) {
+    return { owner: sshMatch[1], repo: sshMatch[2] };
+  }
+
+  // HTTPS or ssh:// format
+  const httpsMatch = url.match(
+    /(?:https?|ssh):\/\/(?:[^@]+@)?github\.com\/([^/]+)\/([^/.]+?)(?:\.git)?$/,
+  );
+  if (httpsMatch) {
+    return { owner: httpsMatch[1], repo: httpsMatch[2] };
+  }
+
+  return null;
+}
+
+// ─── Client Creation ───────────────────────────────────────────────────────
+
+/**
+ * Create an authenticated Octokit client.
+ * Uses the provided token, or falls back to GITHUB_TOKEN / GH_TOKEN env vars.
+ * Returns null if no token is available.
+ */
+export function createGitHubClient(token?: string): Octokit | null {
+  const auth = token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN;
+  if (!auth) {
+    return null;
+  }
+  return new Octokit({ auth });
+}
+
+// ─── Repository Info ───────────────────────────────────────────────────────
+
+/**
+ * Detect the GitHub owner/repo from the git remote in the given working directory.
+ */
+export async function getRepoInfo(cwd: string): Promise<RepoInfo | null> {
+  try {
+    const url = execSync("git config --get remote.origin.url", {
+      cwd,
+      encoding: "utf-8",
+      stdio: ["ignore", "pipe", "pipe"],
+    }).trim();
+
+    if (!url) return null;
+    return parseRemoteUrl(url);
+  } catch {
+    return null;
+  }
+}
+
+// ─── Pull Request Operations ───────────────────────────────────────────────
+
+/**
+ * Create a pull request on GitHub.
+ */
+export async function createPullRequest(
+  client: Octokit,
+  options: PullRequestOptions,
+): Promise<PullRequestResult> {
+  try {
+    const { data } = await client.pulls.create({
+      owner: options.owner,
+      repo: options.repo,
+      title: options.title,
+      body: options.body,
+      head: options.head,
+      base: options.base,
+    });
+    return { number: data.number, url: data.html_url };
+  } catch (error: unknown) {
+    const message =
+      error instanceof Error ? error.message : "Unknown error";
+    throw new Error(
+      `Failed to create pull request for ${options.owner}/${options.repo}: ${message}`,
+    );
+  }
+}
+
+/**
+ * Fetch a single pull request by number.
+ */
+export async function getPullRequest(
+  client: Octokit,
+  options: { owner: string; repo: string; number: number },
+): Promise<PR> {
+  try {
+    const { data } = await client.pulls.get({
+      owner: options.owner,
+      repo: options.repo,
+      pull_number: options.number,
+    });
+    return {
+      number: data.number,
+      title: data.title,
+      body: data.body,
+      state: data.state,
+      head: { ref: data.head.ref, sha: data.head.sha },
+      base: { ref: data.base.ref },
+      url: data.html_url,
+      user: data.user ? { login: data.user.login } : null,
+    };
+  } catch (error: unknown) {
+    const message =
+      error instanceof Error ? error.message : "Unknown error";
+    throw new Error(
+      `Failed to get pull request #${options.number} for ${options.owner}/${options.repo}: ${message}`,
+    );
+  }
+}
+
+/**
+ * List reviews on a pull request.
+ */
+export async function listPullRequestReviews(
+  client: Octokit,
+  options: { owner: string; repo: string; number: number },
+): Promise<Review[]> {
+  try {
+    const { data } = await client.pulls.listReviews({
+      owner: options.owner,
+      repo: options.repo,
+      pull_number: options.number,
+    });
+    return data.map((review) => ({
+      id: review.id,
+      user: review.user ? { login: review.user.login } : null,
+      state: review.state,
+      body: review.body,
+      submitted_at: review.submitted_at ?? null,
+    }));
+  } catch (error: unknown) {
+    const message =
+      error instanceof Error ? error.message : "Unknown error";
+    throw new Error(
+      `Failed to list reviews for PR #${options.number} in ${options.owner}/${options.repo}: ${message}`,
+    );
+  }
+}
+
+// ─── Issue Comments ────────────────────────────────────────────────────────
+
+/**
+ * Create a comment on an issue or pull request.
+ */
+export async function createIssueComment(
+  client: Octokit,
+  options: IssueCommentOptions,
+): Promise<{ id: number }> {
+  try {
+    const { data } = await client.issues.createComment({
+      owner: options.owner,
+      repo: options.repo,
+      issue_number: options.number,
+      body: options.body,
+    });
+    return { id: data.id };
+  } catch (error: unknown) {
+    const message =
+      error instanceof Error ? error.message : "Unknown error";
+    throw new Error(
+      `Failed to create comment on issue #${options.number} in ${options.owner}/${options.repo}: ${message}`,
+    );
+  }
+}
--- a/src/resources/extensions/gsd/mcp-server.ts
+++ b/src/resources/extensions/gsd/mcp-server.ts
@ -0,0 +1,87 @@
+// @ts-ignore — @modelcontextprotocol/sdk types may not be in extensions tsconfig
+import { Server } from '@modelcontextprotocol/sdk/server'
+// @ts-ignore
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio'
+// @ts-ignore
+import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types'
+
+interface McpTool {
+  name: string
+  description: string
+  parameters: Record<string, unknown>
+  execute(toolCallId: string, params: Record<string, unknown>, signal?: AbortSignal, onUpdate?: unknown): Promise<{ content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> }>
+}
+
+export async function startMcpServer(options: {
+  tools: McpTool[]
+  version?: string
+}): Promise<void> {
+  const { tools, version = '0.0.0' } = options
+
+  const toolMap = new Map<string, McpTool>()
+  for (const tool of tools) {
+    toolMap.set(tool.name, tool)
+  }
+
+  const server = new Server(
+    { name: 'gsd', version },
+    { capabilities: { tools: {} } },
+  )
+
+  server.setRequestHandler(ListToolsRequestSchema, async () => {
+    return {
+      tools: tools.map((t) => ({
+        name: t.name,
+        description: t.description,
+        inputSchema: t.parameters,
+      })),
+    }
+  })
+
+  server.setRequestHandler(CallToolRequestSchema, async (request: any) => {
+    const { name, arguments: args } = request.params
+    const tool = toolMap.get(name)
+    if (!tool) {
+      return {
+        isError: true,
+        content: [{ type: 'text' as const, text: `Unknown tool: ${name}` }],
+      }
+    }
+
+    try {
+      const result = await tool.execute(
+        `mcp-${Date.now()}`,
+        args ?? {},
+        undefined,
+        undefined,
+      )
+
+      const content = result.content.map((block) => {
+        if (block.type === 'text') {
+          return { type: 'text' as const, text: block.text }
+        }
+        if (block.type === 'image') {
+          return {
+            type: 'image' as const,
+            data: block.data,
+            mimeType: block.mimeType,
+          }
+        }
+        return { type: 'text' as const, text: JSON.stringify(block) }
+      })
+
+      return { content }
+    } catch (err: unknown) {
+      const message = err instanceof Error ? err.message : String(err)
+      return {
+        isError: true,
+        content: [{ type: 'text' as const, text: message }],
+      }
+    }
+  })
+
+  const transport = new StdioServerTransport()
+  await server.connect(transport)
+
+  process.stderr.write(`[gsd] MCP server started (v${version})\n`)
+}
--- a/src/resources/extensions/gsd/tests/diff-context.test.ts
+++ b/src/resources/extensions/gsd/tests/diff-context.test.ts
@ -0,0 +1,136 @@
+/**
+ * Unit tests for diff-context.ts — diff-aware context module.
+ * Tests git-based file discovery and relevance ranking.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { tmpdir } from "node:os";
+import { mkdtempSync } from "node:fs";
+import { join } from "node:path";
+
+import {
+  getRecentlyChangedFiles,
+  getChangedFilesWithContext,
+  rankFilesByRelevance,
+} from "../diff-context.js";
+
+// ─── getRecentlyChangedFiles ────────────────────────────────────────────────
+
+describe("diff-context: getRecentlyChangedFiles", () => {
+  it("returns an array of file paths in the current git repo", async () => {
+    // Use the project root — guaranteed to be a git repo
+    const cwd = process.cwd();
+    const files = await getRecentlyChangedFiles(cwd);
+
+    assert.ok(Array.isArray(files), "should return an array");
+    // The result may be empty if the repo is totally clean with no recent
+    // commits, but the function should not throw.
+  });
+
+  it("respects maxFiles option", async () => {
+    const cwd = process.cwd();
+    const files = await getRecentlyChangedFiles(cwd, { maxFiles: 3 });
+
+    assert.ok(files.length <= 3, "should not exceed maxFiles");
+  });
+
+  it("returns empty array for non-git directory", async () => {
+    const tmp = mkdtempSync(join(tmpdir(), "diff-ctx-test-"));
+    const files = await getRecentlyChangedFiles(tmp);
+
+    assert.deepStrictEqual(files, [], "should return empty array for non-git dir");
+  });
+
+  it("returns deduplicated paths", async () => {
+    const cwd = process.cwd();
+    const files = await getRecentlyChangedFiles(cwd, { maxFiles: 100 });
+    const unique = new Set(files);
+
+    assert.equal(files.length, unique.size, "should have no duplicates");
+  });
+});
+
+// ─── getChangedFilesWithContext ─────────────────────────────────────────────
+
+describe("diff-context: getChangedFilesWithContext", () => {
+  it("returns array of ChangedFileInfo objects", async () => {
+    const cwd = process.cwd();
+    const infos = await getChangedFilesWithContext(cwd);
+
+    assert.ok(Array.isArray(infos), "should return an array");
+
+    for (const info of infos) {
+      assert.ok(typeof info.path === "string", "path should be a string");
+      assert.ok(
+        ["modified", "added", "deleted", "staged"].includes(info.changeType),
+        `changeType should be valid, got: ${info.changeType}`,
+      );
+      if (info.linesChanged !== undefined) {
+        assert.ok(typeof info.linesChanged === "number", "linesChanged should be a number");
+      }
+    }
+  });
+
+  it("returns empty array for non-git directory", async () => {
+    const tmp = mkdtempSync(join(tmpdir(), "diff-ctx-test2-"));
+    const infos = await getChangedFilesWithContext(tmp);
+
+    assert.deepStrictEqual(infos, [], "should return empty array for non-git dir");
+  });
+});
+
+// ─── rankFilesByRelevance ───────────────────────────────────────────────────
+
+describe("diff-context: rankFilesByRelevance", () => {
+  it("places changed files before unchanged files", () => {
+    const allFiles = ["a.ts", "b.ts", "c.ts", "d.ts"];
+    const changed = ["c.ts", "a.ts"];
+
+    const ranked = rankFilesByRelevance(allFiles, changed);
+
+    // Changed files come first, sorted by changedFiles priority (c before a)
+    assert.equal(ranked[0], "c.ts");
+    assert.equal(ranked[1], "a.ts");
+    // Unchanged files follow in original order
+    assert.equal(ranked[2], "b.ts");
+    assert.equal(ranked[3], "d.ts");
+  });
+
+  it("preserves order of changed files based on changedFiles priority", () => {
+    const allFiles = ["x.ts", "y.ts", "z.ts", "w.ts"];
+    const changed = ["z.ts", "x.ts"]; // z has higher priority (index 0)
+
+    const ranked = rankFilesByRelevance(allFiles, changed);
+
+    assert.equal(ranked[0], "z.ts", "z.ts should be first (higher priority in changedFiles)");
+    assert.equal(ranked[1], "x.ts", "x.ts should be second");
+  });
+
+  it("returns unchanged files in original order when no changed files match", () => {
+    const allFiles = ["a.ts", "b.ts", "c.ts"];
+    const changed = ["x.ts", "y.ts"]; // none match
+
+    const ranked = rankFilesByRelevance(allFiles, changed);
+
+    assert.deepStrictEqual(ranked, ["a.ts", "b.ts", "c.ts"]);
+  });
+
+  it("handles empty inputs gracefully", () => {
+    assert.deepStrictEqual(rankFilesByRelevance([], []), []);
+    assert.deepStrictEqual(rankFilesByRelevance(["a.ts"], []), ["a.ts"]);
+    assert.deepStrictEqual(rankFilesByRelevance([], ["a.ts"]), []);
+  });
+
+  it("handles all files being changed", () => {
+    const allFiles = ["a.ts", "b.ts"];
+    const changed = ["b.ts", "a.ts"];
+
+    const ranked = rankFilesByRelevance(allFiles, changed);
+
+    // Both are changed, so sorted by changedFiles order: b first, then a
+    assert.equal(ranked[0], "b.ts");
+    assert.equal(ranked[1], "a.ts");
+    assert.equal(ranked.length, 2);
+  });
+});
--- a/src/resources/extensions/gsd/token-counter.ts
+++ b/src/resources/extensions/gsd/token-counter.ts
@ -0,0 +1,45 @@
+interface TokenEncoder {
+	encode(text: string): Uint32Array | number[];
+}
+
+let encoder: TokenEncoder | null = null;
+let encoderFailed = false;
+
+async function getEncoder(): Promise<TokenEncoder | null> {
+	if (encoder) return encoder;
+	if (encoderFailed) return null;
+	try {
+		// @ts-ignore — tiktoken may not have type declarations in extensions tsconfig
+		const tiktoken = await import("tiktoken");
+		encoder = tiktoken.encoding_for_model("gpt-4o") as TokenEncoder;
+		return encoder;
+	} catch {
+		encoderFailed = true;
+		return null;
+	}
+}
+
+export async function countTokens(text: string): Promise<number> {
+	const enc = await getEncoder();
+	if (enc) {
+		const tokens = enc.encode(text);
+		return tokens.length;
+	}
+	return Math.ceil(text.length / 4);
+}
+
+export function countTokensSync(text: string): number {
+	if (encoder) {
+		return encoder.encode(text).length;
+	}
+	return Math.ceil(text.length / 4);
+}
+
+export async function initTokenCounter(): Promise<boolean> {
+	const enc = await getEncoder();
+	return enc !== null;
+}
+
+export function isAccurateCountingAvailable(): boolean {
+	return encoder !== null;
+}
--- a/src/resources/skills/lint/SKILL.md
+++ b/src/resources/skills/lint/SKILL.md
@ -0,0 +1,141 @@
+---
+name: lint
+description: Lint and format code. Auto-detects ESLint, Biome, Prettier, or language-native formatters and runs them with auto-fix. Reports remaining issues with actionable suggestions.
+---
+
+<objective>
+Lint and format code in the current project. Auto-detect the project's linter and formatter toolchain, run them against the target files, and report results grouped by severity with actionable fix suggestions.
+</objective>
+
+<arguments>
+This skill accepts optional arguments after `/lint`:
+
+- **No arguments**: Lint only files changed in the current working tree (`git diff --name-only` and `git diff --cached --name-only`).
+- **A file or directory path**: Lint only that specific path (e.g., `/lint src/utils`).
+- **`--fix`**: Automatically apply safe fixes. Can be combined with a path (e.g., `/lint src/ --fix`).
+- **`--fix` without a path**: Auto-fix changed files only.
+
+Parse the arguments before proceeding. If `--fix` is present, set fix mode. If a non-flag argument is present, treat it as the target path.
+</arguments>
+
+<detection>
+Auto-detect the project's linter and formatter by checking configuration files in the project root. Check in this order and use the **first match found** for each category (linter vs. formatter). A project may have both a linter and a formatter.
+
+**JavaScript/TypeScript Linters:**
+
+1. **Biome** — Look for `biome.json` or `biome.jsonc` in the project root.
+   - Lint command: `npx @biomejs/biome check .` (or `--apply` with `--fix`)
+   - Format command: `npx @biomejs/biome format .` (or `--write` with `--fix`)
+   - Biome handles both linting and formatting. No need for a separate formatter if Biome is detected.
+
+2. **ESLint** — Look for `.eslintrc`, `.eslintrc.*` (js, cjs, json, yml, yaml), `eslint.config.*` (js, mjs, cjs, ts, mts, cts), or an `"eslintConfig"` key in `package.json`.
+   - Lint command: `npx eslint .` (or `--fix` with `--fix`)
+   - Check `package.json` for the installed version. ESLint 9+ uses flat config (`eslint.config.*`).
+
+**JavaScript/TypeScript Formatters (only if Biome was NOT detected):**
+
+3. **Prettier** — Look for `.prettierrc`, `.prettierrc.*`, `prettier.config.*`, or a `"prettier"` key in `package.json`.
+   - Format check: `npx prettier --check .`
+   - Format fix: `npx prettier --write .`
+
+**Rust:**
+
+4. **rustfmt** — Look for `rustfmt.toml` or `.rustfmt.toml`, or `Cargo.toml` in the project root.
+   - Format check: `cargo fmt -- --check`
+   - Format fix: `cargo fmt`
+   - Lint: `cargo clippy` (if available)
+
+**Go:**
+
+5. **Go tools** — Look for `go.mod` in the project root.
+   - Format check: `gofmt -l .`
+   - Format fix: `gofmt -w .`
+   - Lint: `golangci-lint run` (if installed), otherwise `go vet ./...`
+
+**Python:**
+
+6. **Ruff** — Look for `ruff.toml` or a `[tool.ruff]` section in `pyproject.toml`.
+   - Lint command: `ruff check .` (or `--fix` with `--fix`)
+   - Format command: `ruff format .` (or `--check` without `--fix`)
+
+7. **Black** — Look for a `[tool.black]` section in `pyproject.toml`, or `black` in requirements files.
+   - Format check: `black --check .`
+   - Format fix: `black .`
+
+If no linter or formatter is detected, inform the user and suggest common options for their project type based on the files present.
+</detection>
+
+<execution>
+
+**Step 1: Determine target files**
+
+- If a path argument was provided, use that path.
+- If no path argument, get changed files:
+  ```bash
+  git diff --name-only
+  git diff --cached --name-only
+  ```
+  Filter to files that still exist on disk. If no files are changed, inform the user and offer to lint the entire project instead.
+
+**Step 2: Run the detected tools**
+
+Run the linter and/or formatter against the target files or directory.
+
+- **Without `--fix`**: Run in check/report mode only. Do NOT modify any files.
+- **With `--fix`**: Run with auto-fix flags enabled.
+
+When running formatters without `--fix`, show a preview of what would change:
+- For Prettier: use `--check` and list files that would change.
+- For Biome: use `check` without `--apply`.
+- For Black: use `--check --diff` to show the diff preview.
+- For Ruff: use `--diff` for format and standard output for lint.
+- For rustfmt/gofmt: use `--check` or `-l` to list files, then show a diff for up to 5 files using `diff <(command) file`.
+
+**Step 3: Parse and organize output**
+
+Parse the tool output and organize issues:
+
+```markdown
+## Lint Results
+
+### Errors (X issues)
+| File | Line | Rule | Message |
+|------|------|------|---------|
+| ... | ... | ... | ... |
+
+### Warnings (X issues)
+| File | Line | Rule | Message |
+|------|------|------|---------|
+| ... | ... | ... | ... |
+
+### Formatting
+- X files would be reformatted
+- [list files]
+
+### Summary
+- Total issues: X errors, Y warnings, Z formatting
+- Auto-fixable: N issues (run `/lint --fix` to apply)
+```
+
+**Step 4: Suggest fixes for common issues**
+
+For the most frequent issues, provide brief actionable guidance:
+
+- If the same rule appears 5+ times, suggest a bulk fix or config change.
+- For unused imports/variables, list them for quick removal.
+- For formatting-only issues, note that `--fix` will resolve them safely.
+- For issues that cannot be auto-fixed, provide a one-line explanation of how to resolve each unique rule violation.
+
+</execution>
+
+<critical_rules>
+
+1. **Never modify files without `--fix`**: Default mode is report-only. Respect the user's working tree.
+2. **Use the project's own config**: Do not invent lint rules. Use whatever config files exist in the project.
+3. **Use the project's installed version**: Always prefer `npx`, `cargo`, or the project-local binary. Do not use globally installed tools unless no local version exists.
+4. **Handle missing tools gracefully**: If a config file exists but the tool is not installed, inform the user and provide the install command (e.g., `npm install --save-dev eslint`).
+5. **Respect `.gitignore` and ignore patterns**: Do not lint `node_modules`, `dist`, `build`, `target`, `.git`, or other commonly ignored directories. Most tools handle this automatically; verify they do.
+6. **Limit output**: If there are more than 50 issues, show the first 30 grouped by severity, then summarize the rest with counts per file. Do not flood the user with hundreds of lines.
+7. **Exit cleanly**: After presenting results, do not take further action. Let the user decide next steps.
+
+</critical_rules>
--- a/src/resources/skills/review/SKILL.md
+++ b/src/resources/skills/review/SKILL.md
@ -0,0 +1,214 @@
+---
+name: review
+description: Review code changes for security, performance, bugs, and quality. Reviews staged changes, unstaged changes, specific commits, or PR-ready diffs.
+---
+
+<objective>
+Review code changes and provide structured feedback covering security, performance, bug risks, code quality, and test coverage gaps. This skill analyzes diffs and surrounding context to catch issues before they reach production.
+</objective>
+
+<context>
+This skill reviews code changes at various stages of the development workflow. It can review staged changes before a commit, unstaged work-in-progress, a specific commit, or the full set of changes on a branch that are ready for a pull request.
+
+The reviewer reads both the diff and the surrounding source files to understand intent and catch issues that only appear in context.
+</context>
+
+<core_principle>
+**FIND REAL ISSUES, NOT STYLE NITS.** Focus on problems that cause bugs, security vulnerabilities, performance degradation, or maintainability pain. Avoid nitpicking formatting or subjective style preferences unless they harm readability.
+</core_principle>
+
+<analysis_only_rule>
+**THIS SKILL IS READ-ONLY. DO NOT MODIFY CODE.**
+
+The purpose is to review and report findings. Making changes during review conflates the reviewer and author roles. Present findings and let the user decide what to act on.
+</analysis_only_rule>
+
+<quick_start>
+
+<determine_review_scope>
+
+Parse the user's input to determine what to review:
+
+1. **No arguments** - Review staged changes first. If nothing is staged, review unstaged changes.
+   - Staged: `git diff --cached`
+   - Unstaged: `git diff`
+   - If both are empty, review the most recent commit: `git show HEAD`
+
+2. **Commit hash argument** (e.g., `/review abc1234`) - Review that specific commit.
+   - `git show <hash>`
+
+3. **File path argument** (e.g., `/review src/foo.ts`) - Review unstaged changes in that file.
+   - `git diff -- <path>` then fall back to `git diff --cached -- <path>`
+
+4. **"pr" argument** (e.g., `/review pr`) - Review all changes since branching from main.
+   - `git diff main...HEAD`
+   - If on main, review `git diff HEAD~1`
+
+After obtaining the diff, if it is empty, inform the user that there are no changes to review and stop.
+
+</determine_review_scope>
+
+<gather_context>
+
+Before analyzing the diff:
+
+1. **Read changed files in full** - Do not review a diff in isolation. Read each modified file to understand the surrounding code, imports, types, and control flow.
+2. **Identify the tech stack** - Note languages, frameworks, and libraries in use. This affects what patterns are risky.
+3. **Check for related test files** - For each changed source file, look for corresponding test files. Note whether tests were updated alongside the changes.
+4. **Check for configuration changes** - If config files changed (env, CI, package.json, tsconfig, etc.), pay extra attention to side effects.
+
+</gather_context>
+
+<review_categories>
+
+Analyze the changes against each category below. Only report findings that are actually present. Skip categories with no issues.
+
+**A. Security Issues** (Severity: CRITICAL or HIGH)
+- Injection vulnerabilities (SQL injection, command injection, template injection)
+- Cross-site scripting (XSS) - unsanitized user input rendered in HTML
+- Authentication and authorization flaws (missing auth checks, privilege escalation)
+- Secrets or credentials hardcoded or logged
+- Insecure deserialization or unsafe eval usage
+- Path traversal or file access vulnerabilities
+- Missing input validation on external data
+
+**B. Performance Concerns** (Severity: HIGH or MEDIUM)
+- N+1 query patterns in database access
+- Unnecessary memory allocations in hot paths or loops
+- Blocking operations on the main thread or in async contexts
+- Missing pagination on unbounded queries
+- Redundant computation that could be cached or memoized
+- Large payloads without streaming or chunking
+
+**C. Bug Risks** (Severity: HIGH or MEDIUM)
+- Off-by-one errors in loops or array access
+- Null/undefined dereferences without guards
+- Race conditions in concurrent or async code
+- Incorrect error handling (swallowed errors, wrong error types)
+- Type mismatches or unsafe type assertions
+- Logic errors in conditionals (inverted checks, missing cases)
+- Resource leaks (unclosed connections, file handles, listeners)
+
+**D. Code Quality** (Severity: MEDIUM or LOW)
+- Unclear or misleading naming
+- Significant code duplication that should be extracted
+- Excessive complexity (deeply nested logic, functions doing too many things)
+- Dead code or unreachable branches
+- Missing or misleading comments on non-obvious logic
+- Inconsistency with patterns used elsewhere in the codebase
+
+**E. Test Coverage Gaps** (Severity: MEDIUM or LOW)
+- New logic paths without corresponding test cases
+- Changed behavior without updated tests
+- Edge cases not covered (empty inputs, boundary values, error paths)
+- Missing integration tests for new API endpoints or database changes
+
+</review_categories>
+
+<format_findings>
+
+For each finding, use this structure:
+
+```
+### [SEVERITY] Category: Brief Title
+
+**File**: `path/to/file.ext` (lines X-Y)
+
+**Issue**: Clear description of the problem.
+
+**Why it matters**: What could go wrong if this is not addressed.
+
+**Suggestion**: How to fix it, with a code snippet if helpful.
+```
+
+Severity levels:
+- **CRITICAL** - Must fix before merge. Security vulnerability or data loss risk.
+- **HIGH** - Should fix before merge. Likely bug or significant performance issue.
+- **MEDIUM** - Should fix soon. Code quality or moderate risk issue.
+- **LOW** - Consider fixing. Minor improvement opportunity.
+
+</format_findings>
+
+</quick_start>
+
+<critical_rules>
+
+1. **READ THE FULL FILE**: Never review a diff without reading the complete source file for context
+2. **NO FALSE ALARMS**: Only report issues you can explain concretely. Do not report vague concerns
+3. **PRIORITIZE**: Lead with the most severe findings. Do not bury critical issues under style nits
+4. **BE SPECIFIC**: Include file paths, line numbers, and code references for every finding
+5. **EXPLAIN THE RISK**: For each finding, explain what could actually go wrong
+6. **CHECK TESTS**: Always check whether changes have corresponding test updates
+7. **CONSIDER THE STACK**: Apply language-specific and framework-specific knowledge to your review
+8. **DO NOT MODIFY CODE**: Present findings only. The user decides what to act on
+
+</critical_rules>
+
+<output_format>
+
+```markdown
+## Code Review: [brief description of what was reviewed]
+
+**Scope**: [staged changes | unstaged changes | commit abc1234 | PR changes from main]
+**Files reviewed**: [count] files changed, [additions] additions, [deletions] deletions
+
+---
+
+### Findings
+
+[Findings grouped by severity, highest first. Use the format from <format_findings>.]
+
+---
+
+### Summary
+
+| Severity | Count |
+|----------|-------|
+| CRITICAL | X     |
+| HIGH     | X     |
+| MEDIUM   | X     |
+| LOW      | X     |
+
+### Recommended Actions
+
+1. [Most important action to take]
+2. [Next most important action]
+3. [...]
+```
+
+If no issues are found:
+
+```markdown
+## Code Review: [brief description]
+
+**Scope**: [what was reviewed]
+**Files reviewed**: [count]
+
+No significant issues found. The changes look good to merge.
+```
+
+</output_format>
+
+<decision_gate>
+
+**After presenting findings, ALWAYS offer these options:**
+
+```
+─────────────────────────────────────────
+REVIEW COMPLETE
+
+What would you like to do?
+
+1. **Fix issues** - I'll address the findings starting with the most critical
+2. **Save review** - Export findings to a markdown file
+3. **Review again** - Re-review with different scope or focus
+4. **Discuss a finding** - Ask questions about a specific issue
+5. **Other** - Tell me what you need
+─────────────────────────────────────────
+```
+
+**Wait for user response before taking any action.**
+
+This gate is MANDATORY. Never skip it. Never auto-implement fixes.
+
+</decision_gate>
--- a/src/resources/skills/test/SKILL.md
+++ b/src/resources/skills/test/SKILL.md
@ -0,0 +1,201 @@
+---
+name: test
+description: Generate or run tests. Auto-detects test framework, generates comprehensive tests for source files, or runs existing test suites with failure analysis.
+---
+
+<objective>
+Generate or run tests for the current project. This skill auto-detects the test framework in use, generates comprehensive tests for source files, or runs existing test suites and analyzes failures.
+
+Accepts optional arguments:
+- A file path: generate tests for that source file
+- `run`: run the existing test suite and analyze results
+- No arguments: suggest what to test based on recent changes
+</objective>
+
+<context>
+This skill handles test generation and execution across multiple languages and frameworks. It adapts to whatever testing conventions the project already uses rather than imposing new ones.
+</context>
+
+<quick_start>
+
+<step_1_detect_framework>
+
+**Detect the test framework and conventions before doing anything else.**
+
+Check these sources in order:
+
+1. **package.json** (Node/JS/TS projects):
+   - `scripts.test` for the test command
+   - `devDependencies` for jest, vitest, mocha, ava, tap, node:test, playwright, cypress
+   - `jest` or `vitest` config keys
+
+2. **Config files**:
+   - `jest.config.*`, `vitest.config.*`, `.mocharc.*`, `ava.config.*`
+   - `pytest.ini`, `pyproject.toml` (look for `[tool.pytest]`), `setup.cfg`
+   - `go.mod` (Go projects use `go test` by default)
+   - `Cargo.toml` (Rust projects use `cargo test`)
+
+3. **Existing test files**:
+   - Scan for `*.test.*`, `*.spec.*`, `*_test.*`, `test_*.*` files
+   - Read 1-2 existing test files to understand patterns, imports, assertion style, and structure
+   - Note the directory structure (co-located tests vs `__tests__/` vs `tests/` vs `test/`)
+
+4. **Record your findings**:
+   - Framework name and version
+   - Test file naming convention
+   - Test file location convention
+   - Import/require style
+   - Assertion style (expect, assert, chai, etc.)
+   - Any custom utilities, fixtures, or helpers used
+
+</step_1_detect_framework>
+
+<step_2_handle_arguments>
+
+**Route based on the argument provided.**
+
+- **File path given** -> Go to `generate_tests`
+- **"run" given** -> Go to `run_tests`
+- **No arguments** -> Go to `suggest_tests`
+
+</step_2_handle_arguments>
+
+<generate_tests>
+
+**Generate tests for the specified source file.**
+
+**A. Read and analyze the source file:**
+- Identify all exported/public functions, classes, methods, and types
+- Understand each function's parameters, return types, and side effects
+- Note error handling patterns (throws, returns null, returns Result, etc.)
+- Identify dependencies that will need mocking
+
+**B. Read existing test files in the project (1-2 files minimum):**
+- Match their import style exactly
+- Match their describe/it or test block structure
+- Match their assertion patterns
+- Match their mock/stub approach
+- Use the same test utilities and helpers
+
+**C. Generate tests covering:**
+
+1. **Happy paths**: Normal expected inputs produce correct outputs
+2. **Edge cases**:
+   - Empty inputs (empty string, empty array, null, undefined, zero)
+   - Boundary values (min/max integers, very long strings)
+   - Single element collections
+3. **Error handling**:
+   - Invalid inputs that should throw or return errors
+   - Missing required parameters
+   - Type mismatches (if applicable)
+4. **Async behavior** (if the function is async):
+   - Successful resolution
+   - Rejection/error cases
+   - Timeout scenarios (if relevant)
+5. **Dependencies**:
+   - Mock external dependencies (APIs, databases, file system)
+   - Verify correct interaction with dependencies (called with right args)
+
+**D. Place the test file correctly:**
+- Follow the project's existing convention for test file location
+- Use the project's naming convention (`.test.ts`, `.spec.js`, `_test.go`, `test_*.py`, etc.)
+
+**E. Run the generated tests immediately to verify they pass.**
+- If tests fail, read the error output carefully
+- Fix the test code (not the source code)
+- Re-run until all tests pass
+
+</generate_tests>
+
+<run_tests>
+
+**Run the existing test suite and analyze results.**
+
+**A. Determine the test command:**
+- Check `package.json` `scripts.test` for Node projects
+- Use `pytest` for Python projects
+- Use `go test ./...` for Go projects
+- Use `cargo test` for Rust projects
+- Fall back to the detected framework's CLI
+
+**B. Run the tests:**
+- Execute the test command
+- Capture full output including failures and errors
+
+**C. Analyze results:**
+- Report total passed, failed, skipped counts
+- For each failure:
+  - Identify the failing test name and file
+  - Show the assertion that failed (expected vs actual)
+  - Read the relevant source code if needed
+  - Provide a specific diagnosis of why it failed
+  - Suggest a concrete fix (is it a test bug or a source bug?)
+
+**D. Present a summary:**
+
+```
+Test Results: X passed, Y failed, Z skipped
+
+Failures:
+1. [test name] - [brief diagnosis]
+   Fix: [specific suggestion]
+
+2. [test name] - [brief diagnosis]
+   Fix: [specific suggestion]
+```
+
+</run_tests>
+
+<suggest_tests>
+
+**Suggest what to test when no arguments are given.**
+
+**A. Check recent changes:**
+- Run `git diff --name-only HEAD~5` to find recently changed files
+- Run `git diff --name-only --cached` for staged files
+- Filter to source files (exclude configs, docs, lockfiles)
+
+**B. Check test coverage gaps:**
+- Find source files that have no corresponding test file
+- Prioritize files that were recently modified
+
+**C. Present suggestions:**
+
+```
+Suggested files to test (based on recent changes and coverage gaps):
+
+1. [file path] - modified recently, no test file exists
+2. [file path] - modified recently, tests exist but may need updating
+3. [file path] - no test coverage found
+
+Run `/test <file path>` to generate tests for any of these.
+Run `/test run` to run the existing test suite.
+```
+
+</suggest_tests>
+
+</quick_start>
+
+<critical_rules>
+
+1. **MATCH EXISTING PATTERNS**: Never impose a new test style. Always mirror what the project already does.
+2. **READ BEFORE WRITING**: Always read existing test files before generating new ones.
+3. **VERIFY GENERATED TESTS**: Always run generated tests. Untested test code is unreliable.
+4. **DON'T MODIFY SOURCE CODE**: If generated tests fail, fix the tests, not the source. If the source has a real bug, report it to the user.
+5. **MOCK EXTERNAL DEPENDENCIES**: Never let tests hit real APIs, databases, or file systems unless the project explicitly uses integration tests that way.
+6. **ONE FILE AT A TIME**: Generate tests for one source file per invocation. Keep scope manageable.
+7. **USE PROJECT DEPENDENCIES**: Only use test libraries already installed in the project. Do not add new dependencies without asking.
+
+</critical_rules>
+
+<success_criteria>
+
+Before completing:
+- [ ] Test framework and conventions were detected correctly
+- [ ] Generated tests match the project's existing test style
+- [ ] All generated tests pass when run
+- [ ] Tests cover happy paths, edge cases, and error handling
+- [ ] Test file is placed in the correct location with the correct naming convention
+- [ ] No source code was modified
+
+</success_criteria>
--- a/src/tests/file-watcher.test.ts
+++ b/src/tests/file-watcher.test.ts
@ -0,0 +1,144 @@
+import { test, afterEach } from "node:test";
+import assert from "node:assert";
+import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { setTimeout as delay } from "node:timers/promises";
+
+import {
+	startFileWatcher,
+	stopFileWatcher,
+} from "../resources/extensions/gsd/file-watcher.ts";
+
+function createTempAgentDir(): string {
+	const tmp = mkdtempSync(join(tmpdir(), "gsd-fw-test-"));
+	mkdirSync(join(tmp, "extensions"), { recursive: true });
+	// Seed watched files so chokidar treats writes as "change" not "add"
+	writeFileSync(join(tmp, "settings.json"), "{}");
+	writeFileSync(join(tmp, "auth.json"), "{}");
+	writeFileSync(join(tmp, "models.json"), "{}");
+	return tmp;
+}
+
+function createMockEventBus() {
+	const events: { channel: string; data: unknown }[] = [];
+	return {
+		events,
+		emit(channel: string, data: unknown) {
+			events.push({ channel, data });
+		},
+		on(_channel: string, _handler: (data: unknown) => void) {
+			return () => {};
+		},
+	};
+}
+
+afterEach(async () => {
+	await stopFileWatcher();
+});
+
+test("startFileWatcher and stopFileWatcher run without errors", async () => {
+	const dir = createTempAgentDir();
+	const bus = createMockEventBus();
+
+	await startFileWatcher(dir, bus);
+	await stopFileWatcher();
+});
+
+test("stopFileWatcher is safe to call when no watcher is active", async () => {
+	await stopFileWatcher();
+});
+
+test("settings.json change emits settings-changed event", async () => {
+	const dir = createTempAgentDir();
+	const bus = createMockEventBus();
+
+	await startFileWatcher(dir, bus);
+
+	writeFileSync(join(dir, "settings.json"), JSON.stringify({ updated: true }));
+	// Wait for debounce (300ms) + filesystem propagation
+	await delay(600);
+
+	const matched = bus.events.filter((e) => e.channel === "settings-changed");
+	assert.ok(matched.length > 0, "should emit settings-changed event");
+});
+
+test("auth.json change emits auth-changed event", async () => {
+	const dir = createTempAgentDir();
+	const bus = createMockEventBus();
+
+	await startFileWatcher(dir, bus);
+
+	writeFileSync(join(dir, "auth.json"), JSON.stringify({ token: "new" }));
+	await delay(600);
+
+	const matched = bus.events.filter((e) => e.channel === "auth-changed");
+	assert.ok(matched.length > 0, "should emit auth-changed event");
+});
+
+test("models.json change emits models-changed event", async () => {
+	const dir = createTempAgentDir();
+	const bus = createMockEventBus();
+
+	await startFileWatcher(dir, bus);
+
+	writeFileSync(join(dir, "models.json"), JSON.stringify({ model: "new" }));
+	await delay(600);
+
+	const matched = bus.events.filter((e) => e.channel === "models-changed");
+	assert.ok(matched.length > 0, "should emit models-changed event");
+});
+
+test("extensions directory change emits extensions-changed event", { skip: process.platform === "win32" ? "chokidar subdirectory events are unreliable on Windows CI" : undefined }, async () => {
+	const dir = createTempAgentDir();
+	const bus = createMockEventBus();
+
+	await startFileWatcher(dir, bus);
+	await delay(500);
+
+	writeFileSync(
+		join(dir, "extensions", "my-ext.json"),
+		JSON.stringify({ name: "test" }),
+	);
+	await delay(2000);
+
+	const matched = bus.events.filter(
+		(e) => e.channel === "extensions-changed",
+	);
+	assert.ok(matched.length > 0, "should emit extensions-changed event");
+});
+
+test("unrelated file changes are ignored", async () => {
+	const dir = createTempAgentDir();
+	const bus = createMockEventBus();
+
+	await startFileWatcher(dir, bus);
+	// Wait for watcher to settle, then clear any residual events from setup
+	await delay(400);
+	bus.events.length = 0;
+
+	writeFileSync(join(dir, "random.txt"), "hello");
+	await delay(600);
+
+	assert.strictEqual(bus.events.length, 0, "should not emit any events");
+});
+
+test("debouncing coalesces rapid changes into one event", async () => {
+	const dir = createTempAgentDir();
+	const bus = createMockEventBus();
+
+	await startFileWatcher(dir, bus);
+
+	// Rapid-fire writes
+	for (let i = 0; i < 5; i++) {
+		writeFileSync(join(dir, "settings.json"), JSON.stringify({ i }));
+	}
+	await delay(600);
+
+	const matched = bus.events.filter((e) => e.channel === "settings-changed");
+	assert.strictEqual(
+		matched.length,
+		1,
+		"rapid changes should be debounced into a single event",
+	);
+});
--- a/src/tests/github-client.test.ts
+++ b/src/tests/github-client.test.ts
@ -0,0 +1,150 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import {
+  parseRemoteUrl,
+  createGitHubClient,
+  getRepoInfo,
+} from "../resources/extensions/gsd/github-client.ts";
+
+describe("parseRemoteUrl — extracts owner/repo from git remote URLs", () => {
+  it("parses HTTPS URL with .git suffix", () => {
+    const result = parseRemoteUrl("https://github.com/octocat/hello-world.git");
+    assert.deepEqual(result, { owner: "octocat", repo: "hello-world" });
+  });
+
+  it("parses HTTPS URL without .git suffix", () => {
+    const result = parseRemoteUrl("https://github.com/octocat/hello-world");
+    assert.deepEqual(result, { owner: "octocat", repo: "hello-world" });
+  });
+
+  it("parses SSH URL with .git suffix", () => {
+    const result = parseRemoteUrl("git@github.com:octocat/hello-world.git");
+    assert.deepEqual(result, { owner: "octocat", repo: "hello-world" });
+  });
+
+  it("parses SSH URL without .git suffix", () => {
+    const result = parseRemoteUrl("git@github.com:octocat/hello-world");
+    assert.deepEqual(result, { owner: "octocat", repo: "hello-world" });
+  });
+
+  it("parses ssh:// protocol URL", () => {
+    const result = parseRemoteUrl(
+      "ssh://git@github.com/octocat/hello-world.git",
+    );
+    assert.deepEqual(result, { owner: "octocat", repo: "hello-world" });
+  });
+
+  it("handles repos with hyphens and underscores", () => {
+    const result = parseRemoteUrl(
+      "https://github.com/my-org/my_cool-repo.git",
+    );
+    assert.deepEqual(result, { owner: "my-org", repo: "my_cool-repo" });
+  });
+
+  it("returns null for non-GitHub URLs", () => {
+    const result = parseRemoteUrl("https://gitlab.com/owner/repo.git");
+    assert.equal(result, null);
+  });
+
+  it("returns null for malformed URLs", () => {
+    assert.equal(parseRemoteUrl("not-a-url"), null);
+    assert.equal(parseRemoteUrl(""), null);
+  });
+
+  it("returns null for bare paths", () => {
+    assert.equal(parseRemoteUrl("/home/user/repo.git"), null);
+  });
+});
+
+describe("createGitHubClient — Octokit instantiation", () => {
+  it("returns null when no token is provided and env vars are unset", () => {
+    const origGH = process.env.GITHUB_TOKEN;
+    const origGH2 = process.env.GH_TOKEN;
+    delete process.env.GITHUB_TOKEN;
+    delete process.env.GH_TOKEN;
+
+    try {
+      const client = createGitHubClient();
+      assert.equal(client, null);
+    } finally {
+      if (origGH !== undefined) process.env.GITHUB_TOKEN = origGH;
+      if (origGH2 !== undefined) process.env.GH_TOKEN = origGH2;
+    }
+  });
+
+  it("creates a client when a token is provided directly", () => {
+    const client = createGitHubClient("ghp_test123");
+    assert.notEqual(client, null);
+    assert.equal(typeof client!.pulls, "object");
+    assert.equal(typeof client!.issues, "object");
+  });
+
+  it("creates a client from GITHUB_TOKEN env var", () => {
+    const origGH = process.env.GITHUB_TOKEN;
+    const origGH2 = process.env.GH_TOKEN;
+    delete process.env.GH_TOKEN;
+    process.env.GITHUB_TOKEN = "ghp_env_test";
+
+    try {
+      const client = createGitHubClient();
+      assert.notEqual(client, null);
+    } finally {
+      if (origGH !== undefined) {
+        process.env.GITHUB_TOKEN = origGH;
+      } else {
+        delete process.env.GITHUB_TOKEN;
+      }
+      if (origGH2 !== undefined) process.env.GH_TOKEN = origGH2;
+    }
+  });
+
+  it("creates a client from GH_TOKEN env var", () => {
+    const origGH = process.env.GITHUB_TOKEN;
+    const origGH2 = process.env.GH_TOKEN;
+    delete process.env.GITHUB_TOKEN;
+    process.env.GH_TOKEN = "ghp_gh_token_test";
+
+    try {
+      const client = createGitHubClient();
+      assert.notEqual(client, null);
+    } finally {
+      if (origGH !== undefined) process.env.GITHUB_TOKEN = origGH;
+      if (origGH2 !== undefined) {
+        process.env.GH_TOKEN = origGH2;
+      } else {
+        delete process.env.GH_TOKEN;
+      }
+    }
+  });
+
+  it("prefers explicit token over env vars", () => {
+    const origGH = process.env.GITHUB_TOKEN;
+    process.env.GITHUB_TOKEN = "ghp_from_env";
+
+    try {
+      const client = createGitHubClient("ghp_explicit");
+      assert.notEqual(client, null);
+    } finally {
+      if (origGH !== undefined) {
+        process.env.GITHUB_TOKEN = origGH;
+      } else {
+        delete process.env.GITHUB_TOKEN;
+      }
+    }
+  });
+});
+
+describe("getRepoInfo — detects repo from git working directory", () => {
+  it("returns owner/repo for the current repository", async () => {
+    const info = await getRepoInfo(process.cwd());
+    // This test repo is gsd-build/gsd-2
+    assert.notEqual(info, null);
+    assert.equal(info!.owner, "gsd-build");
+    assert.equal(info!.repo, "gsd-2" /* or GSD-2 depending on remote */);
+  });
+
+  it("returns null for a non-git directory", async () => {
+    const info = await getRepoInfo("/tmp");
+    assert.equal(info, null);
+  });
+});
--- a/src/tests/integration/e2e-smoke.test.ts
+++ b/src/tests/integration/e2e-smoke.test.ts
@ -0,0 +1,264 @@
+/**
+ * E2E smoke tests for the GSD CLI binary (dist/loader.js).
+ *
+ * These tests exercise the CLI entry point as a black box by spawning child
+ * processes and asserting on exit codes and output text.  They do NOT require
+ * API keys; tests that depend on a live LLM are scoped to gracefully handle
+ * the "No model selected" error path.
+ *
+ * Prerequisite: npm run build must be run first.
+ *
+ * Run with:
+ *   node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs \
+ *        --experimental-strip-types --test \
+ *        src/tests/integration/e2e-smoke.test.ts
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { spawn } from "node:child_process";
+import { existsSync } from "node:fs";
+import { join } from "node:path";
+
+const projectRoot = process.cwd();
+const loaderPath = join(projectRoot, "dist", "loader.js");
+
+if (!existsSync(loaderPath)) {
+  throw new Error("dist/loader.js not found — run: npm run build");
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+type RunResult = {
+  stdout: string;
+  stderr: string;
+  code: number | null;
+  timedOut: boolean;
+};
+
+/**
+ * Spawn `node dist/loader.js ...args` and collect output.
+ *
+ * @param args    CLI arguments to pass after the script path
+ * @param timeoutMs  Maximum time to wait before SIGTERM (default 8 s)
+ * @param env     Additional / override environment variables
+ */
+function runGsd(
+  args: string[],
+  timeoutMs = 8_000,
+  env: NodeJS.ProcessEnv = {},
+): Promise<RunResult> {
+  return new Promise((resolve) => {
+    let stdout = "";
+    let stderr = "";
+    let timedOut = false;
+
+    const child = spawn("node", [loaderPath, ...args], {
+      cwd: projectRoot,
+      env: { ...process.env, ...env },
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    child.stdout.on("data", (chunk: Buffer) => { stdout += chunk.toString(); });
+    child.stderr.on("data", (chunk: Buffer) => { stderr += chunk.toString(); });
+
+    // Close stdin so the process sees a non-TTY environment.
+    child.stdin.end();
+
+    const timer = setTimeout(() => {
+      timedOut = true;
+      child.kill("SIGTERM");
+    }, timeoutMs);
+
+    child.on("close", (code) => {
+      clearTimeout(timer);
+      resolve({ stdout, stderr, code, timedOut });
+    });
+  });
+}
+
+/** Strip ANSI escape codes from a string. */
+function stripAnsi(s: string): string {
+  // eslint-disable-next-line no-control-regex
+  return s.replace(/\x1b\[[0-9;]*[A-Za-z]/g, "");
+}
+
+// ---------------------------------------------------------------------------
+// 1. gsd --version outputs a semver string and exits 0
+// ---------------------------------------------------------------------------
+
+test("gsd --version outputs a semver version string and exits 0", async () => {
+  const result = await runGsd(["--version"]);
+
+  assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
+  assert.ok(!result.timedOut, "process should not time out");
+
+  const version = result.stdout.trim();
+  // Semver: MAJOR.MINOR.PATCH with optional pre-release / build metadata
+  assert.match(
+    version,
+    /^\d+\.\d+\.\d+/,
+    `expected semver output, got: ${JSON.stringify(version)}`,
+  );
+});
+
+// ---------------------------------------------------------------------------
+// 2. gsd --help outputs usage information and exits 0
+// ---------------------------------------------------------------------------
+
+test("gsd --help outputs usage information and exits 0", async () => {
+  const result = await runGsd(["--help"]);
+
+  assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
+  assert.ok(!result.timedOut, "process should not time out");
+
+  const output = stripAnsi(result.stdout);
+
+  assert.ok(
+    output.includes("Usage:"),
+    `expected 'Usage:' in help output, got:\n${output.slice(0, 500)}`,
+  );
+  assert.ok(
+    output.includes("--version"),
+    "help output should mention --version flag",
+  );
+  assert.ok(
+    output.includes("--help"),
+    "help output should mention --help flag",
+  );
+  assert.ok(
+    output.includes("--print"),
+    "help output should mention --print flag",
+  );
+  assert.ok(
+    output.includes("--list-models"),
+    "help output should mention --list-models flag",
+  );
+});
+
+// ---------------------------------------------------------------------------
+// 3. gsd config --help outputs config-specific or general help and exits 0
+// ---------------------------------------------------------------------------
+
+test("gsd config --help outputs help and exits 0", async () => {
+  const result = await runGsd(["config", "--help"]);
+
+  assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
+  assert.ok(!result.timedOut, "process should not time out");
+
+  // The loader fast-path intercepts --help only when it is the first argument.
+  // "config --help" passes through to cli.js where parseCliArgs() encounters
+  // --help and calls printHelp(), producing the full usage text.
+  const output = stripAnsi(result.stdout);
+  assert.ok(
+    output.includes("Usage:"),
+    `expected 'Usage:' in output, got:\n${output.slice(0, 500)}`,
+  );
+});
+
+// ---------------------------------------------------------------------------
+// 4. gsd update --help outputs update-specific or general help and exits 0
+// ---------------------------------------------------------------------------
+
+test("gsd update --help outputs help and exits 0", async () => {
+  const result = await runGsd(["update", "--help"]);
+
+  assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
+  assert.ok(!result.timedOut, "process should not time out");
+
+  const output = stripAnsi(result.stdout);
+  assert.ok(
+    output.includes("Usage:"),
+    `expected 'Usage:' in output, got:\n${output.slice(0, 500)}`,
+  );
+});
+
+// ---------------------------------------------------------------------------
+// 5. gsd --list-models runs without crashing
+// ---------------------------------------------------------------------------
+
+test("gsd --list-models runs without crashing", async () => {
+  const result = await runGsd(["--list-models"]);
+
+  assert.ok(!result.timedOut, "gsd --list-models should exit within the timeout");
+  assert.strictEqual(result.code, 0, `expected exit 0, got ${result.code}`);
+
+  // No unhandled crash markers
+  const combinedOutput = stripAnsi(result.stdout + result.stderr);
+  assert.ok(
+    !combinedOutput.includes("Error: Cannot find module"),
+    "should not have missing module errors",
+  );
+  assert.ok(
+    !combinedOutput.includes("ERR_MODULE_NOT_FOUND"),
+    "should not have ERR_MODULE_NOT_FOUND",
+  );
+
+  // Either a table of models or the "no models" message
+  const hasTable = result.stdout.includes("provider") || result.stdout.includes("model");
+  const hasNoModelsMsg = result.stdout.includes("No models available");
+  assert.ok(
+    hasTable || hasNoModelsMsg,
+    `expected model list or 'No models available', got stdout:\n${result.stdout.slice(0, 300)}`,
+  );
+});
+
+// ---------------------------------------------------------------------------
+// 6. gsd --print in text mode does not segfault or throw unhandled errors
+//    (may fail with "No model selected" when no API keys are configured)
+// ---------------------------------------------------------------------------
+
+test("gsd --mode text --print does not segfault or throw unhandled errors", { skip: !process.env.ANTHROPIC_API_KEY && !process.env.OPENAI_API_KEY ? "no API key available — print mode requires a configured provider" : undefined }, async () => {
+  const result = await runGsd(
+    ["--mode", "text", "--print", "echo hello"],
+    15_000,
+  );
+
+  assert.ok(!result.timedOut, "gsd --print should not hang indefinitely");
+
+  const combinedOutput = stripAnsi(result.stdout + result.stderr);
+
+  // Must not crash with module-not-found errors
+  assert.ok(
+    !combinedOutput.includes("ERR_MODULE_NOT_FOUND"),
+    "should not have ERR_MODULE_NOT_FOUND",
+  );
+  assert.ok(
+    !combinedOutput.includes("Error: Cannot find module"),
+    "should not have missing module errors",
+  );
+
+  // Must not terminate from a fatal signal (SIGSEGV, SIGABRT, etc.)
+  // Node exits with 128 + signal number on signal termination.
+  // SIGTERM is 15 (128+15=143), but we sent SIGTERM ourselves only on timeout,
+  // and we already asserted timedOut is false above.
+  assert.ok(
+    result.code !== null,
+    "process should exit cleanly, not be killed by a signal",
+  );
+
+  // Acceptable exit codes: 0 (success) or 1 (no model / API key error)
+  const acceptableCodes = new Set([0, 1]);
+  assert.ok(
+    acceptableCodes.has(result.code as number),
+    `expected exit code 0 or 1, got ${result.code}.\nstdout: ${result.stdout.slice(0, 300)}\nstderr: ${combinedOutput.slice(0, 300)}`,
+  );
+
+  // If exit code is 1, verify it's a clean error (no stack traces from
+  // unhandled exceptions). The specific error message varies by environment.
+  if (result.code === 1) {
+    const combined = stripAnsi(result.stdout + result.stderr);
+    const hasUnhandledCrash =
+      combined.includes("SyntaxError:") ||
+      combined.includes("ReferenceError:") ||
+      combined.includes("TypeError: Cannot read") ||
+      combined.includes("FATAL ERROR");
+
+    assert.ok(
+      !hasUnhandledCrash,
+      `exit 1 should be a clean error, not an unhandled crash:\n${combined.slice(0, 500)}`,
+    );
+  }
+});
--- a/src/tests/token-counter.test.ts
+++ b/src/tests/token-counter.test.ts
@ -0,0 +1,54 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import {
+	countTokens,
+	countTokensSync,
+	initTokenCounter,
+	isAccurateCountingAvailable,
+} from "../resources/extensions/gsd/token-counter.ts";
+
+describe("token-counter", () => {
+	it("countTokensSync returns heuristic estimate before init", () => {
+		const count = countTokensSync("hello world");
+		assert.equal(count, Math.ceil("hello world".length / 4));
+	});
+
+	it("initTokenCounter initializes the encoder", async () => {
+		const result = await initTokenCounter();
+		assert.equal(typeof result, "boolean");
+	});
+
+	it("countTokens returns a positive number for non-empty text", async () => {
+		const count = await countTokens("The quick brown fox jumps over the lazy dog.");
+		assert.ok(count > 0, "should return positive token count");
+	});
+
+	it("countTokens returns 0 for empty string", async () => {
+		const count = await countTokens("");
+		assert.equal(count, 0);
+	});
+
+	it("isAccurateCountingAvailable reflects encoder state", () => {
+		const available = isAccurateCountingAvailable();
+		assert.equal(typeof available, "boolean");
+	});
+
+	it("countTokensSync gives accurate count after init", async () => {
+		await initTokenCounter();
+		if (isAccurateCountingAvailable()) {
+			const syncCount = countTokensSync("hello world");
+			const asyncCount = await countTokens("hello world");
+			assert.equal(syncCount, asyncCount, "sync and async should match after init");
+		}
+	});
+
+	it("token count is more accurate than chars/4 for code", async () => {
+		await initTokenCounter();
+		if (isAccurateCountingAvailable()) {
+			const code = 'function add(a: number, b: number): number { return a + b; }';
+			const tokens = await countTokens(code);
+			const heuristic = Math.ceil(code.length / 4);
+			assert.ok(tokens !== heuristic, "tiktoken count should differ from simple heuristic for code");
+		}
+	});
+});