detection: fix 6 bugs surfaced by ace-coder validation

Phase 4-D fixes from the Phase 3 validation report. ace-coder is a uv-managed Python repo with Rust crates in subdirectories; SF was mis-detecting it in ways that would have failed every autonomous verification. 1. detectPackageManager: return undefined when no root package.json (previously hallucinated "npm" as default, leaking into reports) 2. detectVerificationCommands: only synthesize npm runner when package.json actually present at root 3. ROOT_ONLY_PROJECT_FILES: expanded with Cargo.toml, go.mod, pyproject.toml, setup.py, pom.xml, pubspec.yaml, Package.swift, mix.exs — these are root-only signals; nested instances are handled explicitly by emitter logic 4. Cargo block: distinguishes workspace-root vs single-crate-root vs nested-only-crates layouts; emits per-crate bash loop for the last case (mirrors the Go multi-module branch pattern) 5. pyprojectHasTool: matches both [tool.X] and [tool.X.subkey] so ace-coder's [tool.ruff.lint] / [tool.ruff.format] are detected 6. Makefile branch: skip `make test` when (a) test command already emitted by another block, or (b) the test target depends on _verify_nix or similar nix-shell gates (ace-coder's case) After these fixes, detectProjectSignals on ace-coder yields the expected output: no spurious "npm", per-crate cargo loops, ruff/pyright detected, no nix-gated `make test`. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 00:19:49 +02:00 · 2026-05-02 00:19:49 +02:00 · 0399bb9c8c
commit 0399bb9c8c
parent eb56173fe5
1 changed files with 148 additions and 16 deletions
--- a/src/resources/extensions/sf/detection.ts
+++ b/src/resources/extensions/sf/detection.ts
@ -289,11 +289,28 @@ const RECURSIVE_SCAN_IGNORED_DIRS = new Set([
 	"out",
 ]) as ReadonlySet<string>;

-/** Project file markers safe to detect recursively via suffix matching. */
+/** Project file markers that should ONLY be detected at the repo root.
+ *
+ * These markers signal "this is an X project at the root" — finding them in
+ * nested subdirectories doesn't make the repo as a whole an X project, and
+ * emitting bare commands like `cargo check` from root will fail when the only
+ * Cargo.toml is in a subcrate.
+ *
+ * Suffix-matching via the recursive scan would over-detect; root-only is the
+ * conservative choice. Verification command emitters that want to handle
+ * nested-only layouts (e.g. cargo with nested crates) must scan explicitly. */
 const ROOT_ONLY_PROJECT_FILES = new Set<string>([
 	".github/workflows",
 	"package.json",
+	"Cargo.toml",
+	"go.mod",
+	"pyproject.toml",
+	"setup.py",
 	"Gemfile",
+	"pom.xml",
+	"pubspec.yaml",
+	"Package.swift",
+	"mix.exs",
 	"Makefile",
 	"CMakeLists.txt",
 	"build.gradle",
@ -691,6 +708,12 @@ function detectXcodePlatforms(basePath: string): XcodePlatform[] {
 // ─── Package Manager Detection ──────────────────────────────────────────────────

 function detectPackageManager(basePath: string): string | undefined {
+	// No package.json at root → no JS/TS package manager. Avoid hallucinating
+	// "npm" just because some downstream marker (lockfile-only repo, stale
+	// artifact, etc.) happens to exist. Callers that expect a JS package
+	// manager already gate on detectedFiles.includes("package.json").
+	if (!existsSync(join(basePath, "package.json"))) return undefined;
+
 	const declared = readPackageJsonPackageManager(basePath);
 	if (declared) return declared;

@ -700,10 +723,9 @@ function detectPackageManager(basePath: string): string | undefined {
 		existsSync(join(basePath, "bun.lockb")) ||
 		existsSync(join(basePath, "bun.lock"))
 	)
-		return existsSync(join(basePath, "package.json")) ? "npm" : undefined;
+		return "npm";
 	if (existsSync(join(basePath, "package-lock.json"))) return "npm";
-	if (existsSync(join(basePath, "package.json"))) return "npm";
-	return undefined;
+	return "npm";
 }

 function readPackageJsonPackageManager(basePath: string): string | undefined {
@ -738,15 +760,18 @@ function detectVerificationCommands(
 	packageManager?: string,
 ): string[] {
 	const commands: string[] = [];
-	const pm = packageManager ?? "npm";
-	const run =
-		pm === "npm"
-			? "npm run"
-			: pm === "yarn"
-				? "yarn"
-				: `${pm} run`;

 	if (detectedFiles.includes("package.json")) {
+		// Only synthesize a runner when there's actually a package.json. Without
+		// one, "npm run X" is meaningless — and silently defaulting `pm` to "npm"
+		// here would leak into commands emitted for repos that have no JS at all.
+		const pm = packageManager ?? "npm";
+		const run =
+			pm === "npm"
+				? "npm run"
+				: pm === "yarn"
+					? "yarn"
+					: `${pm} run`;
 		const scripts = readPackageJsonScripts(basePath);
 		if (scripts) {
 			// Typecheck first — fast, no worker processes
@ -782,7 +807,14 @@ function detectVerificationCommands(
 		}
 	}

-	if (detectedFiles.includes("Cargo.toml")) {
+	// Cargo / Rust — three layouts:
+	//   1. Root Cargo.toml with [workspace] → workspace root, bare cargo works.
+	//   2. Root Cargo.toml without workspace → single crate, bare cargo works.
+	//   3. No root Cargo.toml, only nested crates → emit per-crate bash loop so
+	//      commands can run from repo root (mirrors the Go multi-module branch).
+	const rootCargoPath = join(basePath, "Cargo.toml");
+	const rootHasCargoToml = existsSync(rootCargoPath);
+	if (rootHasCargoToml) {
 		// Format check first — fastest, catches style drift before anything else runs.
 		commands.push("cargo fmt --check");
 		// Type-check without running tests (faster than test, catches most regressions).
@ -790,6 +822,27 @@ function detectVerificationCommands(
 		// Limit test threads so Rust tests don't saturate all CPUs.
 		commands.push("cargo test -- --test-threads=2");
 		commands.push("cargo clippy -- -D warnings");
+	} else {
+		const scanned = scanProjectFiles(basePath);
+		const crateDirs = scanned
+			.filter((f) => f.endsWith("/Cargo.toml"))
+			.map((f) => f.slice(0, -"/Cargo.toml".length))
+			.filter((d) => d.length > 0 && !d.includes(".."));
+		if (crateDirs.length > 0) {
+			const dirsArg = crateDirs.map((d) => `"${d}"`).join(" ");
+			commands.push(
+				`bash -c 'set -e; for d in ${dirsArg}; do (cd "$d" && cargo fmt --check); done'`,
+			);
+			commands.push(
+				`bash -c 'set -e; for d in ${dirsArg}; do (cd "$d" && cargo check); done'`,
+			);
+			commands.push(
+				`bash -c 'set -e; for d in ${dirsArg}; do (cd "$d" && cargo test -- --test-threads=2); done'`,
+			);
+			commands.push(
+				`bash -c 'set -e; for d in ${dirsArg}; do (cd "$d" && cargo clippy -- -D warnings); done'`,
+			);
+		}
 	}

 	if (detectedFiles.includes("go.mod")) {
@ -880,7 +933,22 @@ function detectVerificationCommands(

 	if (detectedFiles.includes("Makefile")) {
 		const makeTargets = readMakefileTargets(basePath);
-		if (makeTargets.includes("test")) {
+		// Only emit `make test` if:
+		//   1. A `test` target exists.
+		//   2. No prior block already pushed a test command (defensive — e.g. a
+		//      pytest/cargo/go/npm test was already emitted; recommending
+		//      `make test` on top is redundant and can confuse users with
+		//      conflicting verification paths).
+		//   3. The `test` target isn't gated on a nix-only dependency such as
+		//      `_verify_nix`. Such recipes fail outside a nix environment, so
+		//      surfacing them as auto-detected verification breaks every run on
+		//      machines without nix-shell.
+		const alreadyHasTestCommand = commands.some((cmd) => isTestCommand(cmd));
+		if (
+			makeTargets.includes("test") &&
+			!alreadyHasTestCommand &&
+			isMakeTestTargetSafe(basePath)
+		) {
 			commands.push("make test");
 		}
 	}
@ -888,6 +956,65 @@ function detectVerificationCommands(
 	return commands;
 }

+/**
+ * Heuristic check: does an emitted command appear to invoke a test runner?
+ * Conservative — only matches patterns we actually emit elsewhere in this
+ * file (pytest, cargo test, go test, npm/yarn/pnpm test, rspec, rake test).
+ */
+function isTestCommand(command: string): boolean {
+	return (
+		/\bpytest\b/.test(command) ||
+		/\bcargo\s+test\b/.test(command) ||
+		/\bgo\s+test\b/.test(command) ||
+		/\b(?:npm|yarn|pnpm|bun)\s+(?:run\s+)?test\b/.test(command) ||
+		/\brspec\b/.test(command) ||
+		/\brake\s+test\b/.test(command)
+	);
+}
+
+/**
+ * Inspect the Makefile to decide whether `make test` is safely runnable.
+ * Returns false when the `test` target depends on a nix-only sentinel such
+ * as `_verify_nix`, or when the recipe body references `nix-shell` / `nix `.
+ *
+ * Naive line-based scan — avoids pulling in a Make parser. Reads the file
+ * directly so we can see both prerequisites (after `:` on the target line)
+ * and recipe lines (TAB-indented lines following the target).
+ */
+function isMakeTestTargetSafe(basePath: string): boolean {
+	let raw: string;
+	try {
+		raw = readFileSync(join(basePath, "Makefile"), "utf-8");
+	} catch {
+		return false;
+	}
+	const lines = raw.split("\n");
+	const testHeaderRe = /^test\s*:(.*)$/;
+	for (let i = 0; i < lines.length; i++) {
+		const headerMatch = lines[i].match(testHeaderRe);
+		if (!headerMatch) continue;
+		const prereqs = headerMatch[1].trim();
+		if (/(^|\s)_verify_nix(\s|$)/.test(prereqs)) return false;
+		if (/\bnix(-shell)?\b/.test(prereqs)) return false;
+		// Walk the recipe body — TAB-indented lines until blank line or next
+		// target declaration.
+		for (let j = i + 1; j < lines.length; j++) {
+			const line = lines[j];
+			if (line.trim() === "") break;
+			// New rule starts when a non-tab line contains `:` followed by
+			// non-`=` (avoid matching variable assignments like `FOO := bar`).
+			if (!line.startsWith("\t") && /^[A-Za-z0-9_.-]+\s*:[^=]/.test(line)) {
+				break;
+			}
+			if (!line.startsWith("\t")) continue;
+			if (/\bnix-shell\b/.test(line)) return false;
+			if (/\bnix\s/.test(line)) return false;
+		}
+		return true;
+	}
+	return true;
+}
+
 // ─── Global Setup Detection ─────────────────────────────────────────────────────

 /**
@ -972,14 +1099,19 @@ function readMakefileTargets(basePath: string): string[] {
 * `ruff` invocations for projects that actually configure those tools.
 *
 * Naive substring scan — avoids pulling in a TOML parser for a check this simple.
- * Matches the standard `[tool.<name>]` section header at the start of a line.
+ * Matches `[tool.<name>]` AND `[tool.<name>.<sub>]` (e.g. `[tool.ruff.lint]`,
+ * `[tool.ruff.format]`) since modern tools often only configure sub-sections.
 */
 function pyprojectHasTool(basePath: string, toolName: string): boolean {
 	try {
 		const raw = readFileSync(join(basePath, "pyproject.toml"), "utf-8");
-		const header = `[tool.${toolName}]`;
+		const exactHeader = `[tool.${toolName}]`;
+		const subHeader = `[tool.${toolName}.`;
 		for (const line of raw.split("\n")) {
-			if (line.trim().startsWith(header)) return true;
+			const trimmed = line.trim();
+			if (trimmed.startsWith(exactHeader) || trimmed.startsWith(subHeader)) {
+				return true;
+			}
 		}
 		return false;
 	} catch {