The verification gate's discoverCommands() was passing prose descriptions from task plan Verify: fields through sanitizeCommand(), which only checked for shell injection characters. English prose like "Document exists, contains all 5 scale names..." passed the filter and was executed via spawnSync, causing exit code 127 false negatives. Added isLikelyCommand() heuristic that distinguishes executable commands from prose descriptions by checking: - Known command prefixes (npm, node, tsc, eslint, etc.) - Path-like first tokens (./script.sh, /usr/bin/check) - Flag-like tokens (-v, --check) - Uppercase-initial words with 4+ tokens (prose pattern) - Comma-space clause separators (prose pattern) Prose Verify: fields now fall through to package.json scripts or "none" instead of being executed. Valid commands continue to work as before. Closes #1066
This commit is contained in:
parent
9083d86766
commit
668f12b97f
2 changed files with 181 additions and 1 deletions
|
|
@ -20,7 +20,7 @@ import assert from "node:assert/strict";
|
|||
import { mkdirSync, writeFileSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit } from "../verification-gate.ts";
|
||||
import { discoverCommands, runVerificationGate, formatFailureContext, captureRuntimeErrors, runDependencyAudit, isLikelyCommand } from "../verification-gate.ts";
|
||||
import type { CaptureRuntimeErrorsOptions, DependencyAuditOptions } from "../verification-gate.ts";
|
||||
import { validatePreferences } from "../preferences.ts";
|
||||
|
||||
|
|
@ -374,6 +374,120 @@ test("verification-gate: whitespace-only preference commands fall through", () =
|
|||
}
|
||||
});
|
||||
|
||||
// ─── isLikelyCommand Tests (issue #1066) ────────────────────────────────────
|
||||
|
||||
test("isLikelyCommand: known command prefixes are accepted", () => {
|
||||
assert.equal(isLikelyCommand("npm run lint"), true);
|
||||
assert.equal(isLikelyCommand("npx vitest"), true);
|
||||
assert.equal(isLikelyCommand("yarn test"), true);
|
||||
assert.equal(isLikelyCommand("pnpm run typecheck"), true);
|
||||
assert.equal(isLikelyCommand("node script.js"), true);
|
||||
assert.equal(isLikelyCommand("tsc --noEmit"), true);
|
||||
assert.equal(isLikelyCommand("eslint ."), true);
|
||||
assert.equal(isLikelyCommand("jest --ci"), true);
|
||||
assert.equal(isLikelyCommand("python3 -m pytest"), true);
|
||||
assert.equal(isLikelyCommand("cargo test"), true);
|
||||
assert.equal(isLikelyCommand("go test ./..."), true);
|
||||
assert.equal(isLikelyCommand("make test"), true);
|
||||
});
|
||||
|
||||
test("isLikelyCommand: path-like first tokens are accepted", () => {
|
||||
assert.equal(isLikelyCommand("./scripts/verify.sh"), true);
|
||||
assert.equal(isLikelyCommand("/usr/local/bin/check"), true);
|
||||
assert.equal(isLikelyCommand("../tools/lint.sh"), true);
|
||||
});
|
||||
|
||||
test("isLikelyCommand: flag-like tokens indicate a command", () => {
|
||||
assert.equal(isLikelyCommand("custom-tool --check"), true);
|
||||
assert.equal(isLikelyCommand("mycheck -v"), true);
|
||||
});
|
||||
|
||||
test("isLikelyCommand: prose descriptions are rejected", () => {
|
||||
// The exact string from issue #1066
|
||||
assert.equal(
|
||||
isLikelyCommand("Document exists, contains all 5 scale names, all 14 semantic tokens, Inter assessment, philosophy and competitive citations present"),
|
||||
false,
|
||||
);
|
||||
assert.equal(isLikelyCommand("Check that the file has been created with the correct content"), false);
|
||||
assert.equal(isLikelyCommand("Verify the output matches expected format"), false);
|
||||
assert.equal(isLikelyCommand("All tests pass and coverage is above 80%"), false);
|
||||
assert.equal(isLikelyCommand("File should exist in the output directory"), false);
|
||||
assert.equal(isLikelyCommand("Build succeeds without errors or warnings"), false);
|
||||
});
|
||||
|
||||
test("isLikelyCommand: empty or whitespace-only strings are rejected", () => {
|
||||
assert.equal(isLikelyCommand(""), false);
|
||||
assert.equal(isLikelyCommand(" "), false);
|
||||
});
|
||||
|
||||
test("isLikelyCommand: short lowercase tokens without flags are accepted (could be custom scripts)", () => {
|
||||
assert.equal(isLikelyCommand("custom-verify"), true);
|
||||
assert.equal(isLikelyCommand("mycheck"), true);
|
||||
});
|
||||
|
||||
test("verification-gate: prose taskPlanVerify is rejected, falls through to package.json", () => {
|
||||
const tmp = makeTempDir("vg-prose-reject");
|
||||
try {
|
||||
writeFileSync(
|
||||
join(tmp, "package.json"),
|
||||
JSON.stringify({ scripts: { test: "vitest" } }),
|
||||
);
|
||||
const result = discoverCommands({
|
||||
taskPlanVerify: "Document exists, contains all 5 scale names, all 14 semantic tokens",
|
||||
cwd: tmp,
|
||||
});
|
||||
// Prose should be rejected, so it falls through to package.json
|
||||
assert.equal(result.source, "package-json");
|
||||
assert.deepStrictEqual(result.commands, ["npm run test"]);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: prose taskPlanVerify with no package.json → source none", () => {
|
||||
const tmp = makeTempDir("vg-prose-none");
|
||||
try {
|
||||
const result = discoverCommands({
|
||||
taskPlanVerify: "Verify the output matches expected format and all fields are present",
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.equal(result.source, "none");
|
||||
assert.deepStrictEqual(result.commands, []);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: valid command in taskPlanVerify still works", () => {
|
||||
const tmp = makeTempDir("vg-valid-cmd");
|
||||
try {
|
||||
const result = discoverCommands({
|
||||
taskPlanVerify: "npm run lint && npm run test",
|
||||
cwd: tmp,
|
||||
});
|
||||
assert.equal(result.source, "task-plan");
|
||||
assert.deepStrictEqual(result.commands, ["npm run lint", "npm run test"]);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("verification-gate: mixed prose and commands in taskPlanVerify — only commands kept", () => {
|
||||
const tmp = makeTempDir("vg-mixed");
|
||||
try {
|
||||
const result = discoverCommands({
|
||||
taskPlanVerify: "Check that everything works && npm run test",
|
||||
cwd: tmp,
|
||||
});
|
||||
// "Check that everything works" is prose (starts with capital, 4+ words)
|
||||
// "npm run test" is a valid command
|
||||
assert.equal(result.source, "task-plan");
|
||||
assert.deepStrictEqual(result.commands, ["npm run test"]);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Additional Execution Tests (T02) ───────────────────────────────────────
|
||||
|
||||
test("verification-gate: one command fails — remaining commands still run (non-short-circuit)", () => {
|
||||
|
|
|
|||
|
|
@ -144,12 +144,78 @@ export function formatFailureContext(result: VerificationResult): string {
|
|||
/** Characters that indicate shell injection when found in a command string. */
|
||||
const SHELL_INJECTION_PATTERN = /[;|`]|\$\(/;
|
||||
|
||||
/**
|
||||
* Known executable first-tokens that are safe to run.
|
||||
* Lowercase commands, common build/test tools, and npm/yarn/pnpm invocations.
|
||||
*/
|
||||
const KNOWN_COMMAND_PREFIXES = new Set([
|
||||
"npm", "npx", "yarn", "pnpm", "bun", "bunx", "deno",
|
||||
"node", "ts-node", "tsx", "tsc",
|
||||
"sh", "bash", "zsh",
|
||||
"echo", "cat", "ls", "test", "true", "false", "pwd", "env",
|
||||
"make", "cargo", "go", "python", "python3", "pip", "pip3",
|
||||
"ruby", "gem", "bundle", "rake",
|
||||
"java", "javac", "mvn", "gradle",
|
||||
"docker", "docker-compose",
|
||||
"git", "gh",
|
||||
"eslint", "prettier", "vitest", "jest", "mocha", "pytest", "phpunit",
|
||||
"curl", "wget",
|
||||
"grep", "find", "diff", "wc", "sort", "head", "tail",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Heuristic check: does this string look like an executable shell command
|
||||
* rather than a prose description?
|
||||
*
|
||||
* Returns true when the string appears to be a command. Returns false
|
||||
* for English prose (e.g. "Document exists, contains all 5 scale names").
|
||||
*
|
||||
* Heuristics (any true → command-like):
|
||||
* 1. First token is a known command prefix
|
||||
* 2. First token starts with `.` or `/` (path-like)
|
||||
* 3. Any token starts with `-` (flag-like)
|
||||
* 4. First token contains no uppercase letters (commands are lowercase)
|
||||
* AND first token does not end with a comma or colon (prose punctuation)
|
||||
*
|
||||
* Heuristics (any true → prose-like):
|
||||
* 1. First token starts with an uppercase letter and the string has 4+ words
|
||||
* 2. String contains commas followed by spaces (prose clause structure)
|
||||
*/
|
||||
export function isLikelyCommand(cmd: string): boolean {
|
||||
const trimmed = cmd.trim();
|
||||
if (!trimmed) return false;
|
||||
|
||||
const tokens = trimmed.split(/\s+/);
|
||||
const firstToken = tokens[0];
|
||||
|
||||
// Known command prefix → definitely a command
|
||||
if (KNOWN_COMMAND_PREFIXES.has(firstToken)) return true;
|
||||
|
||||
// Path-like first token → command
|
||||
if (firstToken.startsWith("/") || firstToken.startsWith("./") || firstToken.startsWith("../")) return true;
|
||||
|
||||
// Has flag-like tokens → command
|
||||
if (tokens.some(t => t.startsWith("-"))) return true;
|
||||
|
||||
// First token starts with uppercase + 4 or more words → prose
|
||||
if (/^[A-Z]/.test(firstToken) && tokens.length >= 4) return false;
|
||||
|
||||
// Contains comma-space patterns (prose clause separators) → prose
|
||||
if (/,\s/.test(trimmed) && tokens.length >= 4) return false;
|
||||
|
||||
// First token has uppercase letters and no path separators → prose
|
||||
if (/[A-Z]/.test(firstToken) && !firstToken.includes("/")) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a command string for obvious shell injection patterns.
|
||||
* Returns the command unchanged if safe, or null if suspicious.
|
||||
*/
|
||||
function sanitizeCommand(cmd: string): string | null {
|
||||
if (SHELL_INJECTION_PATTERN.test(cmd)) return null;
|
||||
if (!isLikelyCommand(cmd)) return null;
|
||||
return cmd;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue