From e71de432ab7050e6ed54a077d3bcbf20141e334d Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Mon, 30 Mar 2026 16:43:44 -0400 Subject: [PATCH] fix: migrate unit ownership from JSON to SQLite to eliminate read-modify-write race (#3061) The JSON-based unit-claims storage had a lost-update race under concurrent multi-agent use: two agents could both read the file as unclaimed, then both write their claim, with the second silently overwriting the first. Replace with a SQLite-backed store using INSERT OR IGNORE on a PRIMARY KEY constraint for atomic first-writer-wins claim semantics. claimUnit() now returns boolean (true = claimed, false = already claimed by another agent). Closes #2728 Co-authored-by: Claude Opus 4.6 --- .../gsd/tests/unit-ownership.test.ts | 121 +++++++-- .../extensions/gsd/unit-ownership.ts | 243 +++++++++++++++--- 2 files changed, 309 insertions(+), 55 deletions(-) diff --git a/src/resources/extensions/gsd/tests/unit-ownership.test.ts b/src/resources/extensions/gsd/tests/unit-ownership.test.ts index fd062c9c8..39ea6202f 100644 --- a/src/resources/extensions/gsd/tests/unit-ownership.test.ts +++ b/src/resources/extensions/gsd/tests/unit-ownership.test.ts @@ -3,7 +3,7 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { mkdtempSync, rmSync, existsSync, readFileSync } from 'node:fs'; +import { mkdtempSync, rmSync } from 'node:fs'; import { join } from 'node:path'; import { tmpdir } from 'node:os'; @@ -14,6 +14,8 @@ import { checkOwnership, taskUnitKey, sliceUnitKey, + initOwnershipTable, + closeOwnershipDb, } from '../unit-ownership.ts'; function makeTmpBase(): string { @@ -34,28 +36,51 @@ test('sliceUnitKey: builds correct key', () => { assert.equal(sliceUnitKey('M001', 'S01'), 'M001/S01'); }); -// ─── Claim / get / release ─────────────────────────────────────────────── +// ─── Claim / get / release (SQLite-backed) ────────────────────────────── -test('claimUnit: creates claim file and records agent', () => { +test('claimUnit: creates DB and records agent', () => { const base = makeTmpBase(); try { - claimUnit(base, 'M001/S01/T01', 'executor-01'); + initOwnershipTable(base); + const claimed = claimUnit(base, 'M001/S01/T01', 'executor-01'); - assert.ok(existsSync(join(base, '.gsd', 'unit-claims.json')), 'claim file should exist'); + assert.equal(claimed, true, 'first claim should succeed'); assert.equal(getOwner(base, 'M001/S01/T01'), 'executor-01'); } finally { + closeOwnershipDb(base); cleanup(base); } }); -test('claimUnit: overwrites existing claim (last writer wins)', () => { +test('claimUnit: rejects second claim on same unit (first-writer-wins)', () => { const base = makeTmpBase(); try { - claimUnit(base, 'M001/S01/T01', 'executor-01'); - claimUnit(base, 'M001/S01/T01', 'executor-02'); + initOwnershipTable(base); + const first = claimUnit(base, 'M001/S01/T01', 'executor-01'); + const second = claimUnit(base, 'M001/S01/T01', 'executor-02'); - assert.equal(getOwner(base, 'M001/S01/T01'), 'executor-02'); + assert.equal(first, true, 'first claim should succeed'); + assert.equal(second, false, 'second claim should fail (first-writer-wins)'); + assert.equal(getOwner(base, 'M001/S01/T01'), 'executor-01', + 'original owner must be preserved'); } finally { + closeOwnershipDb(base); + cleanup(base); + } +}); + +test('claimUnit: same agent re-claiming same unit succeeds', () => { + const base = makeTmpBase(); + try { + initOwnershipTable(base); + const first = claimUnit(base, 'M001/S01/T01', 'agent-a'); + const second = claimUnit(base, 'M001/S01/T01', 'agent-a'); + + assert.equal(first, true); + assert.equal(second, true, 're-claim by same agent should succeed'); + assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-a'); + } finally { + closeOwnershipDb(base); cleanup(base); } }); @@ -63,21 +88,25 @@ test('claimUnit: overwrites existing claim (last writer wins)', () => { test('claimUnit: multiple units can be claimed independently', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); claimUnit(base, 'M001/S01/T01', 'agent-a'); claimUnit(base, 'M001/S01/T02', 'agent-b'); assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-a'); assert.equal(getOwner(base, 'M001/S01/T02'), 'agent-b'); } finally { + closeOwnershipDb(base); cleanup(base); } }); -test('getOwner: returns null when no claim file exists', () => { +test('getOwner: returns null when no DB initialized', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); assert.equal(getOwner(base, 'M001/S01/T01'), null); } finally { + closeOwnershipDb(base); cleanup(base); } }); @@ -85,9 +114,11 @@ test('getOwner: returns null when no claim file exists', () => { test('getOwner: returns null for unclaimed unit', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); claimUnit(base, 'M001/S01/T01', 'agent-a'); assert.equal(getOwner(base, 'M001/S01/T99'), null); } finally { + closeOwnershipDb(base); cleanup(base); } }); @@ -95,11 +126,13 @@ test('getOwner: returns null for unclaimed unit', () => { test('releaseUnit: removes claim', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); claimUnit(base, 'M001/S01/T01', 'agent-a'); releaseUnit(base, 'M001/S01/T01'); assert.equal(getOwner(base, 'M001/S01/T01'), null); } finally { + closeOwnershipDb(base); cleanup(base); } }); @@ -107,9 +140,27 @@ test('releaseUnit: removes claim', () => { test('releaseUnit: no-op for non-existent claim', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); // Should not throw releaseUnit(base, 'M001/S01/T01'); } finally { + closeOwnershipDb(base); + cleanup(base); + } +}); + +test('releaseUnit: allows reclaim after release', () => { + const base = makeTmpBase(); + try { + initOwnershipTable(base); + claimUnit(base, 'M001/S01/T01', 'agent-a'); + releaseUnit(base, 'M001/S01/T01'); + + const reclaimed = claimUnit(base, 'M001/S01/T01', 'agent-b'); + assert.equal(reclaimed, true, 'reclaim after release should succeed'); + assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-b'); + } finally { + closeOwnershipDb(base); cleanup(base); } }); @@ -119,20 +170,13 @@ test('releaseUnit: no-op for non-existent claim', () => { test('checkOwnership: returns null when no actorName provided (opt-in)', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); claimUnit(base, 'M001/S01/T01', 'agent-a'); // No actorName → ownership not enforced assert.equal(checkOwnership(base, 'M001/S01/T01', undefined), null); } finally { - cleanup(base); - } -}); - -test('checkOwnership: returns null when no claim file exists', () => { - const base = makeTmpBase(); - try { - assert.equal(checkOwnership(base, 'M001/S01/T01', 'agent-a'), null); - } finally { + closeOwnershipDb(base); cleanup(base); } }); @@ -140,11 +184,13 @@ test('checkOwnership: returns null when no claim file exists', () => { test('checkOwnership: returns null when unit is unclaimed', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); claimUnit(base, 'M001/S01/T01', 'agent-a'); // Different unit, unclaimed assert.equal(checkOwnership(base, 'M001/S01/T99', 'agent-b'), null); } finally { + closeOwnershipDb(base); cleanup(base); } }); @@ -152,10 +198,12 @@ test('checkOwnership: returns null when unit is unclaimed', () => { test('checkOwnership: returns null when actor matches owner', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); claimUnit(base, 'M001/S01/T01', 'agent-a'); assert.equal(checkOwnership(base, 'M001/S01/T01', 'agent-a'), null); } finally { + closeOwnershipDb(base); cleanup(base); } }); @@ -163,6 +211,7 @@ test('checkOwnership: returns null when actor matches owner', () => { test('checkOwnership: returns error string when actor does not match owner', () => { const base = makeTmpBase(); try { + initOwnershipTable(base); claimUnit(base, 'M001/S01/T01', 'agent-a'); const err = checkOwnership(base, 'M001/S01/T01', 'agent-b'); @@ -170,6 +219,40 @@ test('checkOwnership: returns error string when actor does not match owner', () assert.match(err!, /owned by agent-a/); assert.match(err!, /not agent-b/); } finally { + closeOwnershipDb(base); + cleanup(base); + } +}); + +// ─── Race condition: first-writer-wins atomicity ───────────────────────── + +test('claimUnit: concurrent claims — only first writer wins (no lost update)', () => { + const base = makeTmpBase(); + try { + initOwnershipTable(base); + + // Simulate the race described in #2728: + // Two agents both try to claim the same unit. + // With SQLite INSERT OR IGNORE, only the first succeeds. + const results: boolean[] = []; + const agents = ['agent-alpha', 'agent-beta', 'agent-gamma']; + for (const agent of agents) { + results.push(claimUnit(base, 'M001/S01/T01', agent)); + } + + // Exactly one agent should have won + const wins = results.filter(r => r === true); + assert.equal(wins.length, 1, 'exactly one agent should win the claim'); + + // The winner is the first agent (deterministic in single-threaded) + assert.equal(results[0], true); + assert.equal(results[1], false); + assert.equal(results[2], false); + + // The owner must be the first agent + assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-alpha'); + } finally { + closeOwnershipDb(base); cleanup(base); } }); diff --git a/src/resources/extensions/gsd/unit-ownership.ts b/src/resources/extensions/gsd/unit-ownership.ts index 9bbeb4f22..acae94999 100644 --- a/src/resources/extensions/gsd/unit-ownership.ts +++ b/src/resources/extensions/gsd/unit-ownership.ts @@ -3,18 +3,20 @@ // // An agent can claim a unit (task, slice) before working on it. // complete-task and complete-slice enforce ownership when claims exist. -// If no claim file is present, ownership is not enforced (backward compatible). +// Claims are stored in SQLite (.gsd/unit-claims.db) for atomic +// first-writer-wins semantics via INSERT OR IGNORE. // -// Claim file location: .gsd/unit-claims.json // Unit key format: // task: "//" // slice: "/" // // Copyright (c) 2026 Jeremy McSpadden -import { existsSync, readFileSync, mkdirSync } from "node:fs"; +import { createRequire } from "node:module"; +import { mkdirSync } from "node:fs"; import { join } from "node:path"; -import { atomicWriteSync } from "./atomic-write.js"; + +const _require = createRequire(import.meta.url); // ─── Types ─────────────────────────────────────────────────────────────── @@ -23,7 +25,133 @@ export interface UnitClaim { claimed_at: string; } -type ClaimsMap = Record; +// ─── SQLite Provider (mirrors gsd-db.ts pattern) ───────────────────────── + +interface StmtLike { + run(...params: unknown[]): unknown; + get(...params: unknown[]): Record | undefined; +} + +interface DbLike { + exec(sql: string): void; + prepare(sql: string): StmtLike; + close(): void; +} + +type ProviderName = "node:sqlite" | "better-sqlite3"; + +let providerName: ProviderName | null = null; +let providerModule: unknown = null; +let loadAttempted = false; + +function suppressSqliteWarning(): void { + const origEmit = process.emit; + // @ts-expect-error overriding process.emit for warning filter + process.emit = function (event: string, ...args: unknown[]): boolean { + if ( + event === "warning" && + args[0] && + typeof args[0] === "object" && + "name" in args[0] && + (args[0] as { name: string }).name === "ExperimentalWarning" && + "message" in args[0] && + typeof (args[0] as { message: string }).message === "string" && + (args[0] as { message: string }).message.includes("SQLite") + ) { + return false; + } + return origEmit.apply(process, [event, ...args] as Parameters) as unknown as boolean; + }; +} + +function loadProvider(): void { + if (loadAttempted) return; + loadAttempted = true; + + try { + suppressSqliteWarning(); + const mod = _require("node:sqlite"); + if (mod.DatabaseSync) { + providerModule = mod; + providerName = "node:sqlite"; + return; + } + } catch { + // unavailable + } + + try { + const mod = _require("better-sqlite3"); + if (typeof mod === "function" || (mod && mod.default)) { + providerModule = mod.default || mod; + providerName = "better-sqlite3"; + return; + } + } catch { + // unavailable + } +} + +function normalizeRow(row: unknown): Record | undefined { + if (row == null) return undefined; + if (Object.getPrototypeOf(row) === null) { + return { ...(row as Record) }; + } + return row as Record; +} + +function openRawDb(path: string): unknown { + loadProvider(); + if (!providerModule || !providerName) return null; + + if (providerName === "node:sqlite") { + const { DatabaseSync } = providerModule as { + DatabaseSync: new (path: string) => unknown; + }; + return new DatabaseSync(path); + } + + const Database = providerModule as new (path: string) => unknown; + return new Database(path); +} + +function wrapDb(rawDb: unknown): DbLike { + const db = rawDb as { + exec(sql: string): void; + prepare(sql: string): { + run(...args: unknown[]): unknown; + get(...args: unknown[]): unknown; + }; + close(): void; + }; + return { + exec(sql: string): void { db.exec(sql); }, + prepare(sql: string): StmtLike { + const raw = db.prepare(sql); + return { + run(...params: unknown[]): unknown { return raw.run(...params); }, + get(...params: unknown[]): Record | undefined { + return normalizeRow(raw.get(...params)); + }, + }; + }, + close(): void { db.close(); }, + }; +} + +// ─── Per-basePath DB pool ──────────────────────────────────────────────── + +const dbPool = new Map(); + +function claimsDbPath(basePath: string): string { + return join(basePath, ".gsd", "unit-claims.db"); +} + +function getDb(basePath: string): DbLike | null { + const existing = dbPool.get(basePath); + if (existing) return existing; + return null; +} // ─── Key Builders ──────────────────────────────────────────────────────── @@ -35,60 +163,103 @@ export function sliceUnitKey(milestoneId: string, sliceId: string): string { return `${milestoneId}/${sliceId}`; } -// ─── File Path ─────────────────────────────────────────────────────────── +// ─── Lifecycle ─────────────────────────────────────────────────────────── -function claimsPath(basePath: string): string { - return join(basePath, ".gsd", "unit-claims.json"); +/** + * Initialize the ownership SQLite database for a given basePath. + * Creates .gsd/ directory and unit-claims.db with the unit_claims table. + * Safe to call multiple times (idempotent). + */ +export function initOwnershipTable(basePath: string): void { + if (dbPool.has(basePath)) return; + + const dir = join(basePath, ".gsd"); + mkdirSync(dir, { recursive: true }); + + const raw = openRawDb(claimsDbPath(basePath)); + if (!raw) { + throw new Error("No SQLite provider available for unit-ownership"); + } + + const db = wrapDb(raw); + + db.exec("PRAGMA journal_mode=WAL"); + db.exec("PRAGMA busy_timeout = 5000"); + db.exec("PRAGMA synchronous = NORMAL"); + + db.exec(` + CREATE TABLE IF NOT EXISTS unit_claims ( + unit_key TEXT PRIMARY KEY, + agent_name TEXT NOT NULL, + claimed_at TEXT NOT NULL + ) + `); + + dbPool.set(basePath, db); } -// ─── Read Claims ───────────────────────────────────────────────────────── - -function readClaims(basePath: string): ClaimsMap | null { - const path = claimsPath(basePath); - if (!existsSync(path)) return null; - try { - return JSON.parse(readFileSync(path, "utf-8")) as ClaimsMap; - } catch { - return null; - } +/** + * Close the ownership database for a given basePath. + * Safe to call even if not initialized. + */ +export function closeOwnershipDb(basePath: string): void { + const db = dbPool.get(basePath); + if (!db) return; + try { db.close(); } catch { /* swallow */ } + dbPool.delete(basePath); } // ─── Public API ────────────────────────────────────────────────────────── /** * Claim a unit for an agent. - * Overwrites any existing claim for this unit (last writer wins). + * Uses INSERT OR IGNORE for atomic first-writer-wins semantics. + * Returns true if the claim was acquired (or the same agent already owns it). + * Returns false if a different agent already owns the unit. */ -export function claimUnit(basePath: string, unitKey: string, agentName: string): void { - const claims = readClaims(basePath) ?? {}; - claims[unitKey] = { agent: agentName, claimed_at: new Date().toISOString() }; - const dir = join(basePath, ".gsd"); - mkdirSync(dir, { recursive: true }); - atomicWriteSync(claimsPath(basePath), JSON.stringify(claims, null, 2) + "\n"); +export function claimUnit(basePath: string, unitKey: string, agentName: string): boolean { + const db = getDb(basePath); + if (!db) { + // Auto-init if not already initialized (backward compat) + initOwnershipTable(basePath); + return claimUnit(basePath, unitKey, agentName); + } + + // INSERT OR IGNORE: if the row already exists, this is a no-op. + // The PRIMARY KEY constraint on unit_key prevents duplicate claims. + db.prepare( + "INSERT OR IGNORE INTO unit_claims (unit_key, agent_name, claimed_at) VALUES (?, ?, ?)", + ).run(unitKey, agentName, new Date().toISOString()); + + // Check who owns it now + const row = db.prepare("SELECT agent_name FROM unit_claims WHERE unit_key = ?").get(unitKey); + const owner = row?.agent_name as string | undefined; + + return owner === agentName; } /** - * Release a unit claim (remove it from the claims map). + * Release a unit claim (remove it from the claims table). */ export function releaseUnit(basePath: string, unitKey: string): void { - const claims = readClaims(basePath); - if (!claims || !(unitKey in claims)) return; - delete claims[unitKey]; - atomicWriteSync(claimsPath(basePath), JSON.stringify(claims, null, 2) + "\n"); + const db = getDb(basePath); + if (!db) return; + db.prepare("DELETE FROM unit_claims WHERE unit_key = ?").run(unitKey); } /** - * Get the current owner of a unit, or null if unclaimed / no claims file. + * Get the current owner of a unit, or null if unclaimed. */ export function getOwner(basePath: string, unitKey: string): string | null { - const claims = readClaims(basePath); - if (!claims) return null; - return claims[unitKey]?.agent ?? null; + const db = getDb(basePath); + if (!db) return null; + const row = db.prepare("SELECT agent_name FROM unit_claims WHERE unit_key = ?").get(unitKey); + return (row?.agent_name as string) ?? null; } /** * Check if an actor is authorized to operate on a unit. - * Returns null if ownership passes (or is unclaimed / no file). + * Returns null if ownership passes (or is unclaimed). * Returns an error string if a different agent owns the unit. */ export function checkOwnership( @@ -98,7 +269,7 @@ export function checkOwnership( ): string | null { if (!actorName) return null; // no actor identity provided — opt-in, so allow const owner = getOwner(basePath, unitKey); - if (owner === null) return null; // unit unclaimed or no claims file + if (owner === null) return null; // unit unclaimed if (owner === actorName) return null; // actor is the owner return `Unit ${unitKey} is owned by ${owner}, not ${actorName}`; }