detection: fix 6 bugs surfaced by ace-coder validation

Phase 4-D fixes from the Phase 3 validation report. ace-coder is a
uv-managed Python repo with Rust crates in subdirectories; SF was
mis-detecting it in ways that would have failed every autonomous
verification.

1. detectPackageManager: return undefined when no root package.json
   (previously hallucinated "npm" as default, leaking into reports)
2. detectVerificationCommands: only synthesize npm runner when
   package.json actually present at root
3. ROOT_ONLY_PROJECT_FILES: expanded with Cargo.toml, go.mod,
   pyproject.toml, setup.py, pom.xml, pubspec.yaml, Package.swift,
   mix.exs — these are root-only signals; nested instances are
   handled explicitly by emitter logic
4. Cargo block: distinguishes workspace-root vs single-crate-root vs
   nested-only-crates layouts; emits per-crate bash loop for the last
   case (mirrors the Go multi-module branch pattern)
5. pyprojectHasTool: matches both [tool.X] and [tool.X.subkey] so
   ace-coder's [tool.ruff.lint] / [tool.ruff.format] are detected
6. Makefile branch: skip `make test` when (a) test command already
   emitted by another block, or (b) the test target depends on
   _verify_nix or similar nix-shell gates (ace-coder's case)

After these fixes, detectProjectSignals on ace-coder yields the
expected output: no spurious "npm", per-crate cargo loops, ruff/pyright
detected, no nix-gated `make test`.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-02 00:19:49 +02:00
parent eb56173fe5
commit 0399bb9c8c

View file

@ -289,11 +289,28 @@ const RECURSIVE_SCAN_IGNORED_DIRS = new Set([
"out",
]) as ReadonlySet<string>;
/** Project file markers safe to detect recursively via suffix matching. */
/** Project file markers that should ONLY be detected at the repo root.
*
* These markers signal "this is an X project at the root" finding them in
* nested subdirectories doesn't make the repo as a whole an X project, and
* emitting bare commands like `cargo check` from root will fail when the only
* Cargo.toml is in a subcrate.
*
* Suffix-matching via the recursive scan would over-detect; root-only is the
* conservative choice. Verification command emitters that want to handle
* nested-only layouts (e.g. cargo with nested crates) must scan explicitly. */
const ROOT_ONLY_PROJECT_FILES = new Set<string>([
".github/workflows",
"package.json",
"Cargo.toml",
"go.mod",
"pyproject.toml",
"setup.py",
"Gemfile",
"pom.xml",
"pubspec.yaml",
"Package.swift",
"mix.exs",
"Makefile",
"CMakeLists.txt",
"build.gradle",
@ -691,6 +708,12 @@ function detectXcodePlatforms(basePath: string): XcodePlatform[] {
// ─── Package Manager Detection ──────────────────────────────────────────────────
function detectPackageManager(basePath: string): string | undefined {
// No package.json at root → no JS/TS package manager. Avoid hallucinating
// "npm" just because some downstream marker (lockfile-only repo, stale
// artifact, etc.) happens to exist. Callers that expect a JS package
// manager already gate on detectedFiles.includes("package.json").
if (!existsSync(join(basePath, "package.json"))) return undefined;
const declared = readPackageJsonPackageManager(basePath);
if (declared) return declared;
@ -700,10 +723,9 @@ function detectPackageManager(basePath: string): string | undefined {
existsSync(join(basePath, "bun.lockb")) ||
existsSync(join(basePath, "bun.lock"))
)
return existsSync(join(basePath, "package.json")) ? "npm" : undefined;
return "npm";
if (existsSync(join(basePath, "package-lock.json"))) return "npm";
if (existsSync(join(basePath, "package.json"))) return "npm";
return undefined;
return "npm";
}
function readPackageJsonPackageManager(basePath: string): string | undefined {
@ -738,15 +760,18 @@ function detectVerificationCommands(
packageManager?: string,
): string[] {
const commands: string[] = [];
const pm = packageManager ?? "npm";
const run =
pm === "npm"
? "npm run"
: pm === "yarn"
? "yarn"
: `${pm} run`;
if (detectedFiles.includes("package.json")) {
// Only synthesize a runner when there's actually a package.json. Without
// one, "npm run X" is meaningless — and silently defaulting `pm` to "npm"
// here would leak into commands emitted for repos that have no JS at all.
const pm = packageManager ?? "npm";
const run =
pm === "npm"
? "npm run"
: pm === "yarn"
? "yarn"
: `${pm} run`;
const scripts = readPackageJsonScripts(basePath);
if (scripts) {
// Typecheck first — fast, no worker processes
@ -782,7 +807,14 @@ function detectVerificationCommands(
}
}
if (detectedFiles.includes("Cargo.toml")) {
// Cargo / Rust — three layouts:
// 1. Root Cargo.toml with [workspace] → workspace root, bare cargo works.
// 2. Root Cargo.toml without workspace → single crate, bare cargo works.
// 3. No root Cargo.toml, only nested crates → emit per-crate bash loop so
// commands can run from repo root (mirrors the Go multi-module branch).
const rootCargoPath = join(basePath, "Cargo.toml");
const rootHasCargoToml = existsSync(rootCargoPath);
if (rootHasCargoToml) {
// Format check first — fastest, catches style drift before anything else runs.
commands.push("cargo fmt --check");
// Type-check without running tests (faster than test, catches most regressions).
@ -790,6 +822,27 @@ function detectVerificationCommands(
// Limit test threads so Rust tests don't saturate all CPUs.
commands.push("cargo test -- --test-threads=2");
commands.push("cargo clippy -- -D warnings");
} else {
const scanned = scanProjectFiles(basePath);
const crateDirs = scanned
.filter((f) => f.endsWith("/Cargo.toml"))
.map((f) => f.slice(0, -"/Cargo.toml".length))
.filter((d) => d.length > 0 && !d.includes(".."));
if (crateDirs.length > 0) {
const dirsArg = crateDirs.map((d) => `"${d}"`).join(" ");
commands.push(
`bash -c 'set -e; for d in ${dirsArg}; do (cd "$d" && cargo fmt --check); done'`,
);
commands.push(
`bash -c 'set -e; for d in ${dirsArg}; do (cd "$d" && cargo check); done'`,
);
commands.push(
`bash -c 'set -e; for d in ${dirsArg}; do (cd "$d" && cargo test -- --test-threads=2); done'`,
);
commands.push(
`bash -c 'set -e; for d in ${dirsArg}; do (cd "$d" && cargo clippy -- -D warnings); done'`,
);
}
}
if (detectedFiles.includes("go.mod")) {
@ -880,7 +933,22 @@ function detectVerificationCommands(
if (detectedFiles.includes("Makefile")) {
const makeTargets = readMakefileTargets(basePath);
if (makeTargets.includes("test")) {
// Only emit `make test` if:
// 1. A `test` target exists.
// 2. No prior block already pushed a test command (defensive — e.g. a
// pytest/cargo/go/npm test was already emitted; recommending
// `make test` on top is redundant and can confuse users with
// conflicting verification paths).
// 3. The `test` target isn't gated on a nix-only dependency such as
// `_verify_nix`. Such recipes fail outside a nix environment, so
// surfacing them as auto-detected verification breaks every run on
// machines without nix-shell.
const alreadyHasTestCommand = commands.some((cmd) => isTestCommand(cmd));
if (
makeTargets.includes("test") &&
!alreadyHasTestCommand &&
isMakeTestTargetSafe(basePath)
) {
commands.push("make test");
}
}
@ -888,6 +956,65 @@ function detectVerificationCommands(
return commands;
}
/**
* Heuristic check: does an emitted command appear to invoke a test runner?
* Conservative only matches patterns we actually emit elsewhere in this
* file (pytest, cargo test, go test, npm/yarn/pnpm test, rspec, rake test).
*/
function isTestCommand(command: string): boolean {
return (
/\bpytest\b/.test(command) ||
/\bcargo\s+test\b/.test(command) ||
/\bgo\s+test\b/.test(command) ||
/\b(?:npm|yarn|pnpm|bun)\s+(?:run\s+)?test\b/.test(command) ||
/\brspec\b/.test(command) ||
/\brake\s+test\b/.test(command)
);
}
/**
* Inspect the Makefile to decide whether `make test` is safely runnable.
* Returns false when the `test` target depends on a nix-only sentinel such
* as `_verify_nix`, or when the recipe body references `nix-shell` / `nix `.
*
* Naive line-based scan avoids pulling in a Make parser. Reads the file
* directly so we can see both prerequisites (after `:` on the target line)
* and recipe lines (TAB-indented lines following the target).
*/
function isMakeTestTargetSafe(basePath: string): boolean {
let raw: string;
try {
raw = readFileSync(join(basePath, "Makefile"), "utf-8");
} catch {
return false;
}
const lines = raw.split("\n");
const testHeaderRe = /^test\s*:(.*)$/;
for (let i = 0; i < lines.length; i++) {
const headerMatch = lines[i].match(testHeaderRe);
if (!headerMatch) continue;
const prereqs = headerMatch[1].trim();
if (/(^|\s)_verify_nix(\s|$)/.test(prereqs)) return false;
if (/\bnix(-shell)?\b/.test(prereqs)) return false;
// Walk the recipe body — TAB-indented lines until blank line or next
// target declaration.
for (let j = i + 1; j < lines.length; j++) {
const line = lines[j];
if (line.trim() === "") break;
// New rule starts when a non-tab line contains `:` followed by
// non-`=` (avoid matching variable assignments like `FOO := bar`).
if (!line.startsWith("\t") && /^[A-Za-z0-9_.-]+\s*:[^=]/.test(line)) {
break;
}
if (!line.startsWith("\t")) continue;
if (/\bnix-shell\b/.test(line)) return false;
if (/\bnix\s/.test(line)) return false;
}
return true;
}
return true;
}
// ─── Global Setup Detection ─────────────────────────────────────────────────────
/**
@ -972,14 +1099,19 @@ function readMakefileTargets(basePath: string): string[] {
* `ruff` invocations for projects that actually configure those tools.
*
* Naive substring scan avoids pulling in a TOML parser for a check this simple.
* Matches the standard `[tool.<name>]` section header at the start of a line.
* Matches `[tool.<name>]` AND `[tool.<name>.<sub>]` (e.g. `[tool.ruff.lint]`,
* `[tool.ruff.format]`) since modern tools often only configure sub-sections.
*/
function pyprojectHasTool(basePath: string, toolName: string): boolean {
try {
const raw = readFileSync(join(basePath, "pyproject.toml"), "utf-8");
const header = `[tool.${toolName}]`;
const exactHeader = `[tool.${toolName}]`;
const subHeader = `[tool.${toolName}.`;
for (const line of raw.split("\n")) {
if (line.trim().startsWith(header)) return true;
const trimmed = line.trim();
if (trimmed.startsWith(exactHeader) || trimmed.startsWith(subHeader)) {
return true;
}
}
return false;
} catch {