Per-milestone lock isolation prevents workers from contending on shared .gsd/auto.lock. Budget ceiling scoped to current session for parallel workers. Symlink sync skip prevents ERR_FS_CP_EINVAL. Planning artifacts copied to worktree so workers can find their roadmap. Closes #2184
This commit is contained in:
parent
cedf6a558d
commit
b8d4f03747
6 changed files with 301 additions and 18 deletions
|
|
@ -196,6 +196,11 @@ export function syncProjectRootToWorktree(
|
|||
const prGsd = join(projectRoot, ".gsd");
|
||||
const wtGsd = join(worktreePath_, ".gsd");
|
||||
|
||||
// When .gsd is a symlink to the same external directory in both locations,
|
||||
// cpSync rejects the copy because source === destination (ERR_FS_CP_EINVAL).
|
||||
// Compare realpaths and skip when they resolve to the same physical path (#2184).
|
||||
if (isSamePath(prGsd, wtGsd)) return;
|
||||
|
||||
// Copy milestone directory from project root to worktree — additive only.
|
||||
// force:false prevents cpSync from overwriting existing worktree files.
|
||||
// Without this, worktree-authoritative files (e.g. VALIDATION.md written
|
||||
|
|
@ -245,6 +250,11 @@ export function syncStateToProjectRoot(
|
|||
const wtGsd = join(worktreePath_, ".gsd");
|
||||
const prGsd = join(projectRoot, ".gsd");
|
||||
|
||||
// When .gsd is a symlink to the same external directory in both locations,
|
||||
// cpSync rejects the copy because source === destination (ERR_FS_CP_EINVAL).
|
||||
// Compare realpaths and skip when they resolve to the same physical path (#2184).
|
||||
if (isSamePath(wtGsd, prGsd)) return;
|
||||
|
||||
// 1. STATE.md — the quick-glance status used by initial deriveState()
|
||||
safeCopy(join(wtGsd, "STATE.md"), join(prGsd, "STATE.md"), { force: true });
|
||||
|
||||
|
|
|
|||
|
|
@ -719,8 +719,17 @@ export async function runGuards(
|
|||
const budgetCeiling = prefs?.budget_ceiling;
|
||||
if (budgetCeiling !== undefined && budgetCeiling > 0) {
|
||||
const currentLedger = deps.getLedger() as { units: unknown } | null;
|
||||
const totalCost = currentLedger
|
||||
? deps.getProjectTotals(currentLedger.units).cost
|
||||
// In parallel worker mode, only count cost from the current auto-mode session
|
||||
// to avoid hitting the ceiling due to historical project-wide spend (#2184).
|
||||
let costUnits = currentLedger?.units;
|
||||
if (process.env.GSD_PARALLEL_WORKER && s.autoStartTime && Array.isArray(costUnits)) {
|
||||
const sessionStartISO = new Date(s.autoStartTime).toISOString();
|
||||
costUnits = costUnits.filter(
|
||||
(u: { startedAt?: string }) => u.startedAt != null && u.startedAt >= sessionStartISO,
|
||||
);
|
||||
}
|
||||
const totalCost = costUnits
|
||||
? deps.getProjectTotals(costUnits).cost
|
||||
: 0;
|
||||
const budgetPct = totalCost / budgetCeiling;
|
||||
const budgetAlertLevel = deps.getBudgetAlertLevel(budgetPct);
|
||||
|
|
|
|||
|
|
@ -14,8 +14,7 @@ import { readFileSync, unlinkSync, existsSync } from "node:fs";
|
|||
import { join } from "node:path";
|
||||
import { gsdRoot } from "./paths.js";
|
||||
import { atomicWriteSync } from "./atomic-write.js";
|
||||
|
||||
const LOCK_FILE = "auto.lock";
|
||||
import { effectiveLockFile } from "./session-lock.js";
|
||||
|
||||
export interface LockData {
|
||||
pid: number;
|
||||
|
|
@ -28,7 +27,7 @@ export interface LockData {
|
|||
}
|
||||
|
||||
function lockPath(basePath: string): string {
|
||||
return join(gsdRoot(basePath), LOCK_FILE);
|
||||
return join(gsdRoot(basePath), effectiveLockFile());
|
||||
}
|
||||
|
||||
/** Write or update the lock file with current auto-mode state. */
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ import { join, dirname } from "node:path";
|
|||
import { fileURLToPath } from "node:url";
|
||||
import { gsdRoot } from "./paths.js";
|
||||
import { createWorktree, worktreePath } from "./worktree-manager.js";
|
||||
import { autoWorktreeBranch, runWorktreePostCreateHook } from "./auto-worktree.js";
|
||||
import { autoWorktreeBranch, runWorktreePostCreateHook, syncGsdStateToWorktree } from "./auto-worktree.js";
|
||||
import { nativeBranchExists } from "./native-git-bridge.js";
|
||||
import { readIntegrationBranch } from "./git-service.js";
|
||||
import { resolveParallelConfig } from "./preferences.js";
|
||||
|
|
@ -507,6 +507,11 @@ function createMilestoneWorktree(basePath: string, milestoneId: string): string
|
|||
// Run post-create hook if configured
|
||||
runWorktreePostCreateHook(basePath, info.path);
|
||||
|
||||
// Copy .gsd/ planning artifacts (milestones, CONTEXT, ROADMAP, etc.) from the
|
||||
// project root into the worktree. Without this, workers for newly-planned
|
||||
// milestones can't find their roadmap and exit immediately (#2184 Bug 4).
|
||||
syncGsdStateToWorktree(basePath, info.path);
|
||||
|
||||
return info.path;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -83,10 +83,31 @@ let _lockAcquiredAt: number = 0;
|
|||
|
||||
const LOCK_FILE = "auto.lock";
|
||||
|
||||
/**
|
||||
* Derive the effective lock file name for the current process.
|
||||
* In parallel worker mode (GSD_PARALLEL_WORKER + GSD_MILESTONE_LOCK),
|
||||
* each worker uses a per-milestone lock file (`auto-<milestoneId>.lock`)
|
||||
* to avoid contending on the shared `.gsd/auto.lock` (#2184).
|
||||
*/
|
||||
export function effectiveLockFile(): string {
|
||||
const mid = process.env.GSD_PARALLEL_WORKER ? process.env.GSD_MILESTONE_LOCK : null;
|
||||
return mid ? `auto-${mid}.lock` : LOCK_FILE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive the OS-level lock target directory for the current process.
|
||||
* In parallel worker mode, uses `.gsd/parallel/<milestoneId>/` instead of
|
||||
* `.gsd/` so workers don't contend on the same proper-lockfile directory (#2184).
|
||||
*/
|
||||
export function effectiveLockTarget(gsdDir: string): string {
|
||||
const mid = process.env.GSD_PARALLEL_WORKER ? process.env.GSD_MILESTONE_LOCK : null;
|
||||
return mid ? join(gsdDir, "parallel", mid) : gsdDir;
|
||||
}
|
||||
|
||||
function lockPath(basePath: string): string {
|
||||
// If we have a snapshotted path from acquisition, use it for consistency
|
||||
if (_snapshotLockPath) return _snapshotLockPath;
|
||||
return join(gsdRoot(basePath), LOCK_FILE);
|
||||
return join(gsdRoot(basePath), effectiveLockFile());
|
||||
}
|
||||
|
||||
// ─── Stray Lock Cleanup ─────────────────────────────────────────────────────
|
||||
|
|
@ -265,14 +286,16 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
|
|||
}
|
||||
|
||||
const gsdDir = gsdRoot(basePath);
|
||||
const lockTarget = effectiveLockTarget(gsdDir);
|
||||
|
||||
try {
|
||||
// Try to acquire an exclusive OS-level lock on the lock file.
|
||||
// We lock the directory (gsdRoot) since proper-lockfile works best
|
||||
// on directories, and the lock file itself may not exist yet.
|
||||
mkdirSync(gsdDir, { recursive: true });
|
||||
// Try to acquire an exclusive OS-level lock on the lock target.
|
||||
// We lock a directory since proper-lockfile works best on directories,
|
||||
// and the lock file itself may not exist yet.
|
||||
// In parallel worker mode, lockTarget is .gsd/parallel/<MID>/ (#2184).
|
||||
mkdirSync(lockTarget, { recursive: true });
|
||||
|
||||
const release = lockfile.lockSync(gsdDir, {
|
||||
const release = lockfile.lockSync(lockTarget, {
|
||||
realpath: false,
|
||||
stale: 1_800_000, // 30 minutes — safe for laptop sleep / long event loop stalls
|
||||
update: 10_000, // Update lock mtime every 10s to prove liveness
|
||||
|
|
@ -283,7 +306,7 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
|
|||
|
||||
// Safety net: clean up lock dir on process exit if _releaseFunction
|
||||
// wasn't called (e.g., normal exit after clean completion) (#1245).
|
||||
ensureExitHandler(gsdDir);
|
||||
ensureExitHandler(lockTarget);
|
||||
|
||||
// Write the informational lock data
|
||||
atomicWriteSync(lp, JSON.stringify(lockData, null, 2));
|
||||
|
|
@ -298,12 +321,12 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
|
|||
// If no lock file or no alive process, try to clean up and re-acquire (#1245)
|
||||
if (!existingData || (existingPid && !isPidAlive(existingPid))) {
|
||||
try {
|
||||
const lockDir = join(gsdDir + ".lock");
|
||||
const lockDir = join(lockTarget + ".lock");
|
||||
if (existsSync(lockDir)) rmSync(lockDir, { recursive: true, force: true });
|
||||
if (existsSync(lp)) unlinkSync(lp);
|
||||
|
||||
// Retry acquisition after cleanup
|
||||
const release = lockfile.lockSync(gsdDir, {
|
||||
const release = lockfile.lockSync(lockTarget, {
|
||||
realpath: false,
|
||||
stale: 1_800_000, // 30 minutes — match primary lock settings
|
||||
update: 10_000,
|
||||
|
|
@ -312,7 +335,7 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
|
|||
assignLockState(basePath, release, lp);
|
||||
|
||||
// Safety net — uses centralized handler to avoid double-registration
|
||||
ensureExitHandler(gsdDir);
|
||||
ensureExitHandler(lockTarget);
|
||||
|
||||
atomicWriteSync(lp, JSON.stringify(lockData, null, 2));
|
||||
return { acquired: true };
|
||||
|
|
@ -483,13 +506,24 @@ export function releaseSessionLock(basePath: string): void {
|
|||
// Non-fatal
|
||||
}
|
||||
|
||||
// Remove the proper-lockfile directory (.gsd.lock/) for the current path
|
||||
// Remove the proper-lockfile directory for the current lock target.
|
||||
// In parallel worker mode, this is .gsd/parallel/<MID>.lock/ (#2184).
|
||||
const gsdDir = gsdRoot(basePath);
|
||||
const lockTarget = effectiveLockTarget(gsdDir);
|
||||
try {
|
||||
const lockDir = join(gsdRoot(basePath) + ".lock");
|
||||
const lockDir = join(lockTarget + ".lock");
|
||||
if (existsSync(lockDir)) rmSync(lockDir, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
// Also clean the per-milestone parallel directory itself if it exists
|
||||
if (lockTarget !== gsdDir) {
|
||||
try {
|
||||
if (existsSync(lockTarget)) rmSync(lockTarget, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
// Clean ALL registered lock paths (#1578) — lock files accumulate across
|
||||
// main project .gsd/, worktree .gsd/, and projects registry paths.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,226 @@
|
|||
/**
|
||||
* parallel-worker-lock-contention.test.ts — Regression tests for #2184.
|
||||
*
|
||||
* Covers all four bugs from the parallel worker contention issue:
|
||||
* Bug 1: Session lock contention — per-milestone lock isolation
|
||||
* Bug 2: Budget ceiling scoped to current session for parallel workers
|
||||
* Bug 3: syncProjectRootToWorktree skips when source === destination (symlinks)
|
||||
* Bug 4: createMilestoneWorktree copies planning artifacts
|
||||
*
|
||||
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
||||
*/
|
||||
|
||||
import {
|
||||
mkdtempSync,
|
||||
mkdirSync,
|
||||
writeFileSync,
|
||||
rmSync,
|
||||
existsSync,
|
||||
symlinkSync,
|
||||
readFileSync,
|
||||
} from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
|
||||
import {
|
||||
acquireSessionLock,
|
||||
releaseSessionLock,
|
||||
effectiveLockFile,
|
||||
effectiveLockTarget,
|
||||
} from "../session-lock.ts";
|
||||
import { gsdRoot } from "../paths.ts";
|
||||
import {
|
||||
syncProjectRootToWorktree,
|
||||
syncStateToProjectRoot,
|
||||
} from "../auto-worktree.ts";
|
||||
import { writeLock, readCrashLock, clearLock } from "../crash-recovery.ts";
|
||||
import { describe, test, beforeEach, afterEach } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
// ─── Bug 1: Per-milestone lock isolation ──────────────────────────────────────
|
||||
|
||||
describe("parallel-worker-lock-contention (#2184)", () => {
|
||||
// Save and restore env vars between tests
|
||||
const savedEnv: Record<string, string | undefined> = {};
|
||||
|
||||
beforeEach(() => {
|
||||
savedEnv.GSD_PARALLEL_WORKER = process.env.GSD_PARALLEL_WORKER;
|
||||
savedEnv.GSD_MILESTONE_LOCK = process.env.GSD_MILESTONE_LOCK;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (savedEnv.GSD_PARALLEL_WORKER === undefined) {
|
||||
delete process.env.GSD_PARALLEL_WORKER;
|
||||
} else {
|
||||
process.env.GSD_PARALLEL_WORKER = savedEnv.GSD_PARALLEL_WORKER;
|
||||
}
|
||||
if (savedEnv.GSD_MILESTONE_LOCK === undefined) {
|
||||
delete process.env.GSD_MILESTONE_LOCK;
|
||||
} else {
|
||||
process.env.GSD_MILESTONE_LOCK = savedEnv.GSD_MILESTONE_LOCK;
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Bug 1a: effectiveLockFile returns per-milestone name ────────────────
|
||||
test("Bug 1a: effectiveLockFile returns auto.lock without parallel env", () => {
|
||||
delete process.env.GSD_PARALLEL_WORKER;
|
||||
delete process.env.GSD_MILESTONE_LOCK;
|
||||
assert.equal(effectiveLockFile(), "auto.lock");
|
||||
});
|
||||
|
||||
test("Bug 1a: effectiveLockFile returns auto-<MID>.lock in parallel mode", () => {
|
||||
process.env.GSD_PARALLEL_WORKER = "1";
|
||||
process.env.GSD_MILESTONE_LOCK = "M003";
|
||||
assert.equal(effectiveLockFile(), "auto-M003.lock");
|
||||
});
|
||||
|
||||
// ─── Bug 1b: effectiveLockTarget returns per-milestone directory ─────────
|
||||
test("Bug 1b: effectiveLockTarget returns gsdDir without parallel env", () => {
|
||||
delete process.env.GSD_PARALLEL_WORKER;
|
||||
const gsdDir = "/tmp/test/.gsd";
|
||||
assert.equal(effectiveLockTarget(gsdDir), gsdDir);
|
||||
});
|
||||
|
||||
test("Bug 1b: effectiveLockTarget returns parallel/<MID> in parallel mode", () => {
|
||||
process.env.GSD_PARALLEL_WORKER = "1";
|
||||
process.env.GSD_MILESTONE_LOCK = "M003";
|
||||
const gsdDir = "/tmp/test/.gsd";
|
||||
assert.equal(effectiveLockTarget(gsdDir), join(gsdDir, "parallel", "M003"));
|
||||
});
|
||||
|
||||
// ─── Bug 1c: Two parallel workers acquire independent locks ──────────────
|
||||
test("Bug 1c: parallel workers use per-milestone lock files, not shared auto.lock", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "gsd-parallel-lock-"));
|
||||
mkdirSync(join(base, ".gsd"), { recursive: true });
|
||||
|
||||
try {
|
||||
// Simulate worker for M001
|
||||
process.env.GSD_PARALLEL_WORKER = "1";
|
||||
process.env.GSD_MILESTONE_LOCK = "M001";
|
||||
|
||||
const r1 = acquireSessionLock(base);
|
||||
assert.ok(r1.acquired, "M001 worker acquires lock");
|
||||
|
||||
// Verify the lock file is per-milestone
|
||||
const gsdDir = gsdRoot(base);
|
||||
const m001LockFile = join(gsdDir, "auto-M001.lock");
|
||||
assert.ok(existsSync(m001LockFile), "auto-M001.lock exists");
|
||||
|
||||
// The shared auto.lock should NOT exist
|
||||
const sharedLockFile = join(gsdDir, "auto.lock");
|
||||
assert.ok(!existsSync(sharedLockFile), "shared auto.lock does NOT exist");
|
||||
|
||||
// The per-milestone lock target directory should exist
|
||||
const m001LockTarget = join(gsdDir, "parallel", "M001");
|
||||
assert.ok(existsSync(m001LockTarget), "parallel/M001 directory exists");
|
||||
|
||||
releaseSessionLock(base);
|
||||
|
||||
// After release, per-milestone lock file should be cleaned
|
||||
assert.ok(!existsSync(m001LockFile), "auto-M001.lock cleaned after release");
|
||||
} finally {
|
||||
delete process.env.GSD_PARALLEL_WORKER;
|
||||
delete process.env.GSD_MILESTONE_LOCK;
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Bug 1d: crash-recovery uses per-milestone lock file ─────────────────
|
||||
test("Bug 1d: crash-recovery writeLock/readCrashLock uses per-milestone lock in parallel mode", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "gsd-parallel-crash-"));
|
||||
mkdirSync(join(base, ".gsd"), { recursive: true });
|
||||
|
||||
try {
|
||||
process.env.GSD_PARALLEL_WORKER = "1";
|
||||
process.env.GSD_MILESTONE_LOCK = "M002";
|
||||
|
||||
writeLock(base, "execute-task", "M002/S01/T01");
|
||||
|
||||
const gsdDir = gsdRoot(base);
|
||||
const lockFile = join(gsdDir, "auto-M002.lock");
|
||||
assert.ok(existsSync(lockFile), "crash-recovery writes auto-M002.lock");
|
||||
|
||||
const data = readCrashLock(base);
|
||||
assert.ok(data !== null, "readCrashLock reads per-milestone lock");
|
||||
assert.equal(data!.unitId, "M002/S01/T01");
|
||||
|
||||
clearLock(base);
|
||||
assert.ok(!existsSync(lockFile), "clearLock removes per-milestone lock");
|
||||
} finally {
|
||||
delete process.env.GSD_PARALLEL_WORKER;
|
||||
delete process.env.GSD_MILESTONE_LOCK;
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Bug 3: syncProjectRootToWorktree skips same-path symlinks ───────────
|
||||
test("Bug 3: syncProjectRootToWorktree skips when .gsd resolves to same path (symlink)", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "gsd-symlink-sync-"));
|
||||
const externalGsd = join(base, "external-gsd");
|
||||
const projectRoot = join(base, "project");
|
||||
const worktreePath = join(base, "worktree");
|
||||
|
||||
mkdirSync(externalGsd, { recursive: true });
|
||||
mkdirSync(projectRoot, { recursive: true });
|
||||
mkdirSync(worktreePath, { recursive: true });
|
||||
|
||||
// Create the external state directory with a milestone
|
||||
mkdirSync(join(externalGsd, "milestones", "M001"), { recursive: true });
|
||||
writeFileSync(
|
||||
join(externalGsd, "milestones", "M001", "M001-ROADMAP.md"),
|
||||
"# Roadmap",
|
||||
);
|
||||
|
||||
// Symlink both project and worktree .gsd to the same external directory
|
||||
symlinkSync(externalGsd, join(projectRoot, ".gsd"));
|
||||
symlinkSync(externalGsd, join(worktreePath, ".gsd"));
|
||||
|
||||
try {
|
||||
// This should NOT throw ERR_FS_CP_EINVAL — it should skip silently
|
||||
let threw = false;
|
||||
try {
|
||||
syncProjectRootToWorktree(projectRoot, worktreePath, "M001");
|
||||
} catch {
|
||||
threw = true;
|
||||
}
|
||||
assert.ok(!threw, "syncProjectRootToWorktree does not throw on same-path symlink");
|
||||
|
||||
// Same for reverse direction
|
||||
threw = false;
|
||||
try {
|
||||
syncStateToProjectRoot(worktreePath, projectRoot, "M001");
|
||||
} catch {
|
||||
threw = true;
|
||||
}
|
||||
assert.ok(!threw, "syncStateToProjectRoot does not throw on same-path symlink");
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Bug 3b: sync still works when paths are different ───────────────────
|
||||
test("Bug 3b: syncProjectRootToWorktree copies when .gsd paths are different", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "gsd-diff-sync-"));
|
||||
const projectRoot = join(base, "project");
|
||||
const worktreePath = join(base, "worktree");
|
||||
|
||||
mkdirSync(join(projectRoot, ".gsd", "milestones", "M001"), { recursive: true });
|
||||
mkdirSync(join(worktreePath, ".gsd", "milestones"), { recursive: true });
|
||||
|
||||
writeFileSync(
|
||||
join(projectRoot, ".gsd", "milestones", "M001", "M001-ROADMAP.md"),
|
||||
"# Roadmap content",
|
||||
);
|
||||
|
||||
try {
|
||||
syncProjectRootToWorktree(projectRoot, worktreePath, "M001");
|
||||
|
||||
// The roadmap should have been copied
|
||||
const copied = join(worktreePath, ".gsd", "milestones", "M001", "M001-ROADMAP.md");
|
||||
assert.ok(existsSync(copied), "milestone roadmap copied to worktree");
|
||||
assert.equal(readFileSync(copied, "utf-8"), "# Roadmap content");
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue