fix: reject prose Verify: fields from being executed as shell commands (#1066) (#1068)

The verification gate's discoverCommands() was passing prose descriptions
from task plan Verify: fields through sanitizeCommand(), which only checked
for shell injection characters. English prose like "Document exists, contains
all 5 scale names..." passed the filter and was executed via spawnSync,
causing exit code 127 false negatives.

Added isLikelyCommand() heuristic that distinguishes executable commands
from prose descriptions by checking:
- Known command prefixes (npm, node, tsc, eslint, etc.)
- Path-like first tokens (./script.sh, /usr/bin/check)
- Flag-like tokens (-v, --check)
- Uppercase-initial words with 4+ tokens (prose pattern)
- Comma-space clause separators (prose pattern)

Prose Verify: fields now fall through to package.json scripts or "none"
instead of being executed. Valid commands continue to work as before.

Closes #1066
This commit is contained in:
Jeremy McSpadden 2026-03-17 23:00:52 -05:00 committed by GitHub
parent 9083d86766
commit 668f12b97f
2 changed files with 181 additions and 1 deletions

View file

@ -20,7 +20,7 @@ import assert from "node:assert/strict";
import { mkdirSync, writeFileSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "../verification-gate.ts";
import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit, isLikelyCommand } from "../verification-gate.ts";
import type { CaptureRuntimeErrorsOptions, DependencyAuditOptions } from "../verification-gate.ts";
import { validatePreferences } from "../preferences.ts";
@ -374,6 +374,120 @@ test("verification-gate: whitespace-only preference commands fall through", () =
}
});
// ─── isLikelyCommand Tests (issue #1066) ────────────────────────────────────
test("isLikelyCommand: known command prefixes are accepted", () => {
assert.equal(isLikelyCommand("npm run lint"), true);
assert.equal(isLikelyCommand("npx vitest"), true);
assert.equal(isLikelyCommand("yarn test"), true);
assert.equal(isLikelyCommand("pnpm run typecheck"), true);
assert.equal(isLikelyCommand("node script.js"), true);
assert.equal(isLikelyCommand("tsc --noEmit"), true);
assert.equal(isLikelyCommand("eslint ."), true);
assert.equal(isLikelyCommand("jest --ci"), true);
assert.equal(isLikelyCommand("python3 -m pytest"), true);
assert.equal(isLikelyCommand("cargo test"), true);
assert.equal(isLikelyCommand("go test ./..."), true);
assert.equal(isLikelyCommand("make test"), true);
});
test("isLikelyCommand: path-like first tokens are accepted", () => {
assert.equal(isLikelyCommand("./scripts/verify.sh"), true);
assert.equal(isLikelyCommand("/usr/local/bin/check"), true);
assert.equal(isLikelyCommand("../tools/lint.sh"), true);
});
test("isLikelyCommand: flag-like tokens indicate a command", () => {
assert.equal(isLikelyCommand("custom-tool --check"), true);
assert.equal(isLikelyCommand("mycheck -v"), true);
});
test("isLikelyCommand: prose descriptions are rejected", () => {
// The exact string from issue #1066
assert.equal(
isLikelyCommand("Document exists, contains all 5 scale names, all 14 semantic tokens, Inter assessment, philosophy and competitive citations present"),
false,
);
assert.equal(isLikelyCommand("Check that the file has been created with the correct content"), false);
assert.equal(isLikelyCommand("Verify the output matches expected format"), false);
assert.equal(isLikelyCommand("All tests pass and coverage is above 80%"), false);
assert.equal(isLikelyCommand("File should exist in the output directory"), false);
assert.equal(isLikelyCommand("Build succeeds without errors or warnings"), false);
});
test("isLikelyCommand: empty or whitespace-only strings are rejected", () => {
assert.equal(isLikelyCommand(""), false);
assert.equal(isLikelyCommand(" "), false);
});
test("isLikelyCommand: short lowercase tokens without flags are accepted (could be custom scripts)", () => {
assert.equal(isLikelyCommand("custom-verify"), true);
assert.equal(isLikelyCommand("mycheck"), true);
});
test("verification-gate: prose taskPlanVerify is rejected, falls through to package.json", () => {
const tmp = makeTempDir("vg-prose-reject");
try {
writeFileSync(
join(tmp, "package.json"),
JSON.stringify({ scripts: { test: "vitest" } }),
);
const result = discoverCommands({
taskPlanVerify: "Document exists, contains all 5 scale names, all 14 semantic tokens",
cwd: tmp,
});
// Prose should be rejected, so it falls through to package.json
assert.equal(result.source, "package-json");
assert.deepStrictEqual(result.commands, ["npm run test"]);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: prose taskPlanVerify with no package.json → source none", () => {
const tmp = makeTempDir("vg-prose-none");
try {
const result = discoverCommands({
taskPlanVerify: "Verify the output matches expected format and all fields are present",
cwd: tmp,
});
assert.equal(result.source, "none");
assert.deepStrictEqual(result.commands, []);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: valid command in taskPlanVerify still works", () => {
const tmp = makeTempDir("vg-valid-cmd");
try {
const result = discoverCommands({
taskPlanVerify: "npm run lint && npm run test",
cwd: tmp,
});
assert.equal(result.source, "task-plan");
assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("verification-gate: mixed prose and commands in taskPlanVerify — only commands kept", () => {
const tmp = makeTempDir("vg-mixed");
try {
const result = discoverCommands({
taskPlanVerify: "Check that everything works && npm run test",
cwd: tmp,
});
// "Check that everything works" is prose (starts with capital, 4+ words)
// "npm run test" is a valid command
assert.equal(result.source, "task-plan");
assert.deepStrictEqual(result.commands, ["npm run test"]);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
// ─── Additional Execution Tests (T02) ───────────────────────────────────────
test("verification-gate: one command fails — remaining commands still run (non-short-circuit)", () => {

View file

@ -144,12 +144,78 @@ export function formatFailureContext(result: VerificationResult): string {
/** Characters that indicate shell injection when found in a command string. */
const SHELL_INJECTION_PATTERN = /[;|`]|\$\(/;
/**
* Known executable first-tokens that are safe to run.
* Lowercase commands, common build/test tools, and npm/yarn/pnpm invocations.
*/
const KNOWN_COMMAND_PREFIXES = new Set([
"npm", "npx", "yarn", "pnpm", "bun", "bunx", "deno",
"node", "ts-node", "tsx", "tsc",
"sh", "bash", "zsh",
"echo", "cat", "ls", "test", "true", "false", "pwd", "env",
"make", "cargo", "go", "python", "python3", "pip", "pip3",
"ruby", "gem", "bundle", "rake",
"java", "javac", "mvn", "gradle",
"docker", "docker-compose",
"git", "gh",
"eslint", "prettier", "vitest", "jest", "mocha", "pytest", "phpunit",
"curl", "wget",
"grep", "find", "diff", "wc", "sort", "head", "tail",
]);
/**
* Heuristic check: does this string look like an executable shell command
* rather than a prose description?
*
* Returns true when the string appears to be a command. Returns false
* for English prose (e.g. "Document exists, contains all 5 scale names").
*
* Heuristics (any true command-like):
* 1. First token is a known command prefix
* 2. First token starts with `.` or `/` (path-like)
* 3. Any token starts with `-` (flag-like)
* 4. First token contains no uppercase letters (commands are lowercase)
* AND first token does not end with a comma or colon (prose punctuation)
*
* Heuristics (any true prose-like):
* 1. First token starts with an uppercase letter and the string has 4+ words
* 2. String contains commas followed by spaces (prose clause structure)
*/
export function isLikelyCommand(cmd: string): boolean {
const trimmed = cmd.trim();
if (!trimmed) return false;
const tokens = trimmed.split(/\s+/);
const firstToken = tokens[0];
// Known command prefix → definitely a command
if (KNOWN_COMMAND_PREFIXES.has(firstToken)) return true;
// Path-like first token → command
if (firstToken.startsWith("/") || firstToken.startsWith("./") || firstToken.startsWith("../")) return true;
// Has flag-like tokens → command
if (tokens.some(t => t.startsWith("-"))) return true;
// First token starts with uppercase + 4 or more words → prose
if (/^[A-Z]/.test(firstToken) && tokens.length >= 4) return false;
// Contains comma-space patterns (prose clause separators) → prose
if (/,\s/.test(trimmed) && tokens.length >= 4) return false;
// First token has uppercase letters and no path separators → prose
if (/[A-Z]/.test(firstToken) && !firstToken.includes("/")) return false;
return true;
}
/**
* Validate a command string for obvious shell injection patterns.
* Returns the command unchanged if safe, or null if suspicious.
*/
function sanitizeCommand(cmd: string): string | null {
if (SHELL_INJECTION_PATTERN.test(cmd)) return null;
if (!isLikelyCommand(cmd)) return null;
return cmd;
}