fix: reject prose Verify: fields from being executed as shell commands (#1066) (#1068)

The verification gate's discoverCommands() was passing prose descriptions from task plan Verify: fields through sanitizeCommand(), which only checked for shell injection characters. English prose like "Document exists, contains all 5 scale names..." passed the filter and was executed via spawnSync, causing exit code 127 false negatives. Added isLikelyCommand() heuristic that distinguishes executable commands from prose descriptions by checking: - Known command prefixes (npm, node, tsc, eslint, etc.) - Path-like first tokens (./script.sh, /usr/bin/check) - Flag-like tokens (-v, --check) - Uppercase-initial words with 4+ tokens (prose pattern) - Comma-space clause separators (prose pattern) Prose Verify: fields now fall through to package.json scripts or "none" instead of being executed. Valid commands continue to work as before. Closes #1066
2026-03-17 23:00:52 -05:00 · 2026-03-17 23:00:52 -05:00 · 668f12b97f
commit 668f12b97f
parent 9083d86766
2 changed files with 181 additions and 1 deletions
--- a/src/resources/extensions/gsd/tests/verification-gate.test.ts
+++ b/src/resources/extensions/gsd/tests/verification-gate.test.ts
@ -20,7 +20,7 @@ import assert from "node:assert/strict";
 import { mkdirSync, writeFileSync, rmSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
-import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "../verification-gate.ts";
+import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit, isLikelyCommand } from "../verification-gate.ts";
 import type { CaptureRuntimeErrorsOptions, DependencyAuditOptions } from "../verification-gate.ts";
 import { validatePreferences } from "../preferences.ts";

@ -374,6 +374,120 @@ test("verification-gate: whitespace-only preference commands fall through", () =
  }
 });

+// ─── isLikelyCommand Tests (issue #1066) ────────────────────────────────────
+
+test("isLikelyCommand: known command prefixes are accepted", () => {
+  assert.equal(isLikelyCommand("npm run lint"), true);
+  assert.equal(isLikelyCommand("npx vitest"), true);
+  assert.equal(isLikelyCommand("yarn test"), true);
+  assert.equal(isLikelyCommand("pnpm run typecheck"), true);
+  assert.equal(isLikelyCommand("node script.js"), true);
+  assert.equal(isLikelyCommand("tsc --noEmit"), true);
+  assert.equal(isLikelyCommand("eslint ."), true);
+  assert.equal(isLikelyCommand("jest --ci"), true);
+  assert.equal(isLikelyCommand("python3 -m pytest"), true);
+  assert.equal(isLikelyCommand("cargo test"), true);
+  assert.equal(isLikelyCommand("go test ./..."), true);
+  assert.equal(isLikelyCommand("make test"), true);
+});
+
+test("isLikelyCommand: path-like first tokens are accepted", () => {
+  assert.equal(isLikelyCommand("./scripts/verify.sh"), true);
+  assert.equal(isLikelyCommand("/usr/local/bin/check"), true);
+  assert.equal(isLikelyCommand("../tools/lint.sh"), true);
+});
+
+test("isLikelyCommand: flag-like tokens indicate a command", () => {
+  assert.equal(isLikelyCommand("custom-tool --check"), true);
+  assert.equal(isLikelyCommand("mycheck -v"), true);
+});
+
+test("isLikelyCommand: prose descriptions are rejected", () => {
+  // The exact string from issue #1066
+  assert.equal(
+    isLikelyCommand("Document exists, contains all 5 scale names, all 14 semantic tokens, Inter assessment, philosophy and competitive citations present"),
+    false,
+  );
+  assert.equal(isLikelyCommand("Check that the file has been created with the correct content"), false);
+  assert.equal(isLikelyCommand("Verify the output matches expected format"), false);
+  assert.equal(isLikelyCommand("All tests pass and coverage is above 80%"), false);
+  assert.equal(isLikelyCommand("File should exist in the output directory"), false);
+  assert.equal(isLikelyCommand("Build succeeds without errors or warnings"), false);
+});
+
+test("isLikelyCommand: empty or whitespace-only strings are rejected", () => {
+  assert.equal(isLikelyCommand(""), false);
+  assert.equal(isLikelyCommand("   "), false);
+});
+
+test("isLikelyCommand: short lowercase tokens without flags are accepted (could be custom scripts)", () => {
+  assert.equal(isLikelyCommand("custom-verify"), true);
+  assert.equal(isLikelyCommand("mycheck"), true);
+});
+
+test("verification-gate: prose taskPlanVerify is rejected, falls through to package.json", () => {
+  const tmp = makeTempDir("vg-prose-reject");
+  try {
+    writeFileSync(
+      join(tmp, "package.json"),
+      JSON.stringify({ scripts: { test: "vitest" } }),
+    );
+    const result = discoverCommands({
+      taskPlanVerify: "Document exists, contains all 5 scale names, all 14 semantic tokens",
+      cwd: tmp,
+    });
+    // Prose should be rejected, so it falls through to package.json
+    assert.equal(result.source, "package-json");
+    assert.deepStrictEqual(result.commands, ["npm run test"]);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test("verification-gate: prose taskPlanVerify with no package.json → source none", () => {
+  const tmp = makeTempDir("vg-prose-none");
+  try {
+    const result = discoverCommands({
+      taskPlanVerify: "Verify the output matches expected format and all fields are present",
+      cwd: tmp,
+    });
+    assert.equal(result.source, "none");
+    assert.deepStrictEqual(result.commands, []);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test("verification-gate: valid command in taskPlanVerify still works", () => {
+  const tmp = makeTempDir("vg-valid-cmd");
+  try {
+    const result = discoverCommands({
+      taskPlanVerify: "npm run lint && npm run test",
+      cwd: tmp,
+    });
+    assert.equal(result.source, "task-plan");
+    assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test("verification-gate: mixed prose and commands in taskPlanVerify — only commands kept", () => {
+  const tmp = makeTempDir("vg-mixed");
+  try {
+    const result = discoverCommands({
+      taskPlanVerify: "Check that everything works && npm run test",
+      cwd: tmp,
+    });
+    // "Check that everything works" is prose (starts with capital, 4+ words)
+    // "npm run test" is a valid command
+    assert.equal(result.source, "task-plan");
+    assert.deepStrictEqual(result.commands, ["npm run test"]);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
 // ─── Additional Execution Tests (T02) ───────────────────────────────────────

 test("verification-gate: one command fails — remaining commands still run (non-short-circuit)", () => {
--- a/src/resources/extensions/gsd/verification-gate.ts
+++ b/src/resources/extensions/gsd/verification-gate.ts
@ -144,12 +144,78 @@ export function formatFailureContext(result: VerificationResult): string {
 /** Characters that indicate shell injection when found in a command string. */
 const SHELL_INJECTION_PATTERN = /[;|`]|\$\(/;

+/**
+ * Known executable first-tokens that are safe to run.
+ * Lowercase commands, common build/test tools, and npm/yarn/pnpm invocations.
+ */
+const KNOWN_COMMAND_PREFIXES = new Set([
+  "npm", "npx", "yarn", "pnpm", "bun", "bunx", "deno",
+  "node", "ts-node", "tsx", "tsc",
+  "sh", "bash", "zsh",
+  "echo", "cat", "ls", "test", "true", "false", "pwd", "env",
+  "make", "cargo", "go", "python", "python3", "pip", "pip3",
+  "ruby", "gem", "bundle", "rake",
+  "java", "javac", "mvn", "gradle",
+  "docker", "docker-compose",
+  "git", "gh",
+  "eslint", "prettier", "vitest", "jest", "mocha", "pytest", "phpunit",
+  "curl", "wget",
+  "grep", "find", "diff", "wc", "sort", "head", "tail",
+]);
+
+/**
+ * Heuristic check: does this string look like an executable shell command
+ * rather than a prose description?
+ *
+ * Returns true when the string appears to be a command. Returns false
+ * for English prose (e.g. "Document exists, contains all 5 scale names").
+ *
+ * Heuristics (any true → command-like):
+ *   1. First token is a known command prefix
+ *   2. First token starts with `.` or `/` (path-like)
+ *   3. Any token starts with `-` (flag-like)
+ *   4. First token contains no uppercase letters (commands are lowercase)
+ *      AND first token does not end with a comma or colon (prose punctuation)
+ *
+ * Heuristics (any true → prose-like):
+ *   1. First token starts with an uppercase letter and the string has 4+ words
+ *   2. String contains commas followed by spaces (prose clause structure)
+ */
+export function isLikelyCommand(cmd: string): boolean {
+  const trimmed = cmd.trim();
+  if (!trimmed) return false;
+
+  const tokens = trimmed.split(/\s+/);
+  const firstToken = tokens[0];
+
+  // Known command prefix → definitely a command
+  if (KNOWN_COMMAND_PREFIXES.has(firstToken)) return true;
+
+  // Path-like first token → command
+  if (firstToken.startsWith("/") || firstToken.startsWith("./") || firstToken.startsWith("../")) return true;
+
+  // Has flag-like tokens → command
+  if (tokens.some(t => t.startsWith("-"))) return true;
+
+  // First token starts with uppercase + 4 or more words → prose
+  if (/^[A-Z]/.test(firstToken) && tokens.length >= 4) return false;
+
+  // Contains comma-space patterns (prose clause separators) → prose
+  if (/,\s/.test(trimmed) && tokens.length >= 4) return false;
+
+  // First token has uppercase letters and no path separators → prose
+  if (/[A-Z]/.test(firstToken) && !firstToken.includes("/")) return false;
+
+  return true;
+}
+
 /**
 * Validate a command string for obvious shell injection patterns.
 * Returns the command unchanged if safe, or null if suspicious.
 */
 function sanitizeCommand(cmd: string): string | null {
  if (SHELL_INJECTION_PATTERN.test(cmd)) return null;
+  if (!isLikelyCommand(cmd)) return null;
  return cmd;
 }