diff --git a/src/resources/extensions/gsd/tests/verification-gate.test.ts b/src/resources/extensions/gsd/tests/verification-gate.test.ts index f3e94192c..2b6b90929 100644 --- a/src/resources/extensions/gsd/tests/verification-gate.test.ts +++ b/src/resources/extensions/gsd/tests/verification-gate.test.ts @@ -20,7 +20,7 @@ import assert from "node:assert/strict"; import { mkdirSync, writeFileSync, rmSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; -import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "../verification-gate.ts"; +import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit, isLikelyCommand } from "../verification-gate.ts"; import type { CaptureRuntimeErrorsOptions, DependencyAuditOptions } from "../verification-gate.ts"; import { validatePreferences } from "../preferences.ts"; @@ -374,6 +374,120 @@ test("verification-gate: whitespace-only preference commands fall through", () = } }); +// ─── isLikelyCommand Tests (issue #1066) ──────────────────────────────────── + +test("isLikelyCommand: known command prefixes are accepted", () => { + assert.equal(isLikelyCommand("npm run lint"), true); + assert.equal(isLikelyCommand("npx vitest"), true); + assert.equal(isLikelyCommand("yarn test"), true); + assert.equal(isLikelyCommand("pnpm run typecheck"), true); + assert.equal(isLikelyCommand("node script.js"), true); + assert.equal(isLikelyCommand("tsc --noEmit"), true); + assert.equal(isLikelyCommand("eslint ."), true); + assert.equal(isLikelyCommand("jest --ci"), true); + assert.equal(isLikelyCommand("python3 -m pytest"), true); + assert.equal(isLikelyCommand("cargo test"), true); + assert.equal(isLikelyCommand("go test ./..."), true); + assert.equal(isLikelyCommand("make test"), true); +}); + +test("isLikelyCommand: path-like first tokens are accepted", () => { + assert.equal(isLikelyCommand("./scripts/verify.sh"), true); + assert.equal(isLikelyCommand("/usr/local/bin/check"), true); + assert.equal(isLikelyCommand("../tools/lint.sh"), true); +}); + +test("isLikelyCommand: flag-like tokens indicate a command", () => { + assert.equal(isLikelyCommand("custom-tool --check"), true); + assert.equal(isLikelyCommand("mycheck -v"), true); +}); + +test("isLikelyCommand: prose descriptions are rejected", () => { + // The exact string from issue #1066 + assert.equal( + isLikelyCommand("Document exists, contains all 5 scale names, all 14 semantic tokens, Inter assessment, philosophy and competitive citations present"), + false, + ); + assert.equal(isLikelyCommand("Check that the file has been created with the correct content"), false); + assert.equal(isLikelyCommand("Verify the output matches expected format"), false); + assert.equal(isLikelyCommand("All tests pass and coverage is above 80%"), false); + assert.equal(isLikelyCommand("File should exist in the output directory"), false); + assert.equal(isLikelyCommand("Build succeeds without errors or warnings"), false); +}); + +test("isLikelyCommand: empty or whitespace-only strings are rejected", () => { + assert.equal(isLikelyCommand(""), false); + assert.equal(isLikelyCommand(" "), false); +}); + +test("isLikelyCommand: short lowercase tokens without flags are accepted (could be custom scripts)", () => { + assert.equal(isLikelyCommand("custom-verify"), true); + assert.equal(isLikelyCommand("mycheck"), true); +}); + +test("verification-gate: prose taskPlanVerify is rejected, falls through to package.json", () => { + const tmp = makeTempDir("vg-prose-reject"); + try { + writeFileSync( + join(tmp, "package.json"), + JSON.stringify({ scripts: { test: "vitest" } }), + ); + const result = discoverCommands({ + taskPlanVerify: "Document exists, contains all 5 scale names, all 14 semantic tokens", + cwd: tmp, + }); + // Prose should be rejected, so it falls through to package.json + assert.equal(result.source, "package-json"); + assert.deepStrictEqual(result.commands, ["npm run test"]); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: prose taskPlanVerify with no package.json → source none", () => { + const tmp = makeTempDir("vg-prose-none"); + try { + const result = discoverCommands({ + taskPlanVerify: "Verify the output matches expected format and all fields are present", + cwd: tmp, + }); + assert.equal(result.source, "none"); + assert.deepStrictEqual(result.commands, []); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: valid command in taskPlanVerify still works", () => { + const tmp = makeTempDir("vg-valid-cmd"); + try { + const result = discoverCommands({ + taskPlanVerify: "npm run lint && npm run test", + cwd: tmp, + }); + assert.equal(result.source, "task-plan"); + assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + +test("verification-gate: mixed prose and commands in taskPlanVerify — only commands kept", () => { + const tmp = makeTempDir("vg-mixed"); + try { + const result = discoverCommands({ + taskPlanVerify: "Check that everything works && npm run test", + cwd: tmp, + }); + // "Check that everything works" is prose (starts with capital, 4+ words) + // "npm run test" is a valid command + assert.equal(result.source, "task-plan"); + assert.deepStrictEqual(result.commands, ["npm run test"]); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); + // ─── Additional Execution Tests (T02) ─────────────────────────────────────── test("verification-gate: one command fails — remaining commands still run (non-short-circuit)", () => { diff --git a/src/resources/extensions/gsd/verification-gate.ts b/src/resources/extensions/gsd/verification-gate.ts index f23908a0f..22af55f92 100644 --- a/src/resources/extensions/gsd/verification-gate.ts +++ b/src/resources/extensions/gsd/verification-gate.ts @@ -144,12 +144,78 @@ export function formatFailureContext(result: VerificationResult): string { /** Characters that indicate shell injection when found in a command string. */ const SHELL_INJECTION_PATTERN = /[;|`]|\$\(/; +/** + * Known executable first-tokens that are safe to run. + * Lowercase commands, common build/test tools, and npm/yarn/pnpm invocations. + */ +const KNOWN_COMMAND_PREFIXES = new Set([ + "npm", "npx", "yarn", "pnpm", "bun", "bunx", "deno", + "node", "ts-node", "tsx", "tsc", + "sh", "bash", "zsh", + "echo", "cat", "ls", "test", "true", "false", "pwd", "env", + "make", "cargo", "go", "python", "python3", "pip", "pip3", + "ruby", "gem", "bundle", "rake", + "java", "javac", "mvn", "gradle", + "docker", "docker-compose", + "git", "gh", + "eslint", "prettier", "vitest", "jest", "mocha", "pytest", "phpunit", + "curl", "wget", + "grep", "find", "diff", "wc", "sort", "head", "tail", +]); + +/** + * Heuristic check: does this string look like an executable shell command + * rather than a prose description? + * + * Returns true when the string appears to be a command. Returns false + * for English prose (e.g. "Document exists, contains all 5 scale names"). + * + * Heuristics (any true → command-like): + * 1. First token is a known command prefix + * 2. First token starts with `.` or `/` (path-like) + * 3. Any token starts with `-` (flag-like) + * 4. First token contains no uppercase letters (commands are lowercase) + * AND first token does not end with a comma or colon (prose punctuation) + * + * Heuristics (any true → prose-like): + * 1. First token starts with an uppercase letter and the string has 4+ words + * 2. String contains commas followed by spaces (prose clause structure) + */ +export function isLikelyCommand(cmd: string): boolean { + const trimmed = cmd.trim(); + if (!trimmed) return false; + + const tokens = trimmed.split(/\s+/); + const firstToken = tokens[0]; + + // Known command prefix → definitely a command + if (KNOWN_COMMAND_PREFIXES.has(firstToken)) return true; + + // Path-like first token → command + if (firstToken.startsWith("/") || firstToken.startsWith("./") || firstToken.startsWith("../")) return true; + + // Has flag-like tokens → command + if (tokens.some(t => t.startsWith("-"))) return true; + + // First token starts with uppercase + 4 or more words → prose + if (/^[A-Z]/.test(firstToken) && tokens.length >= 4) return false; + + // Contains comma-space patterns (prose clause separators) → prose + if (/,\s/.test(trimmed) && tokens.length >= 4) return false; + + // First token has uppercase letters and no path separators → prose + if (/[A-Z]/.test(firstToken) && !firstToken.includes("/")) return false; + + return true; +} + /** * Validate a command string for obvious shell injection patterns. * Returns the command unchanged if safe, or null if suspicious. */ function sanitizeCommand(cmd: string): string | null { if (SHELL_INJECTION_PATTERN.test(cmd)) return null; + if (!isLikelyCommand(cmd)) return null; return cmd; }