From cbb9c2edd91925c6a8ede06f02a992722f45e5fd Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Mon, 30 Mar 2026 16:39:50 -0400 Subject: [PATCH] fix: detect project relocation and recover state without data loss (#3080) * fix: detect project relocation and recover state without data loss For repos with a remote URL, compute identity as SHA256(remoteUrl) only, dropping the git root path from the hash. This makes the identity stable across directory moves/renames -- the most common cause of silent data loss. For local-only repos, write a .gsd-id marker file in the project root that records the identity hash. After a move, ensureGsdSymlink reads the marker, finds the orphaned state directory, and migrates data to the new identity path automatically. Also handles the upgrade migration: when an existing .gsd symlink points to a valid state dir under the old hash format, data is transparently migrated to the new remote-only hash path. Closes #2750 Co-Authored-By: Claude Opus 4.6 * fix: handle existing symlink in project-relocation recovery test Add defensive unlinkSync calls before symlinkSync in ensureGsdSymlinkCore to prevent EEXIST race conditions when a dangling or residual symlink exists at the .gsd path during project relocation recovery. Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- src/resources/extensions/gsd/gitignore.ts | 1 + src/resources/extensions/gsd/repo-identity.ts | 197 +++++++++++- .../tests/project-relocation-recovery.test.ts | 297 ++++++++++++++++++ 3 files changed, 484 insertions(+), 11 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts diff --git a/src/resources/extensions/gsd/gitignore.ts b/src/resources/extensions/gsd/gitignore.ts index da4b2ee91..93cddca4b 100644 --- a/src/resources/extensions/gsd/gitignore.ts +++ b/src/resources/extensions/gsd/gitignore.ts @@ -41,6 +41,7 @@ const GSD_RUNTIME_PATTERNS = [ const BASELINE_PATTERNS = [ // ── GSD state directory (symlink to external storage) ── ".gsd", + ".gsd-id", // ── OS junk ── ".DS_Store", diff --git a/src/resources/extensions/gsd/repo-identity.ts b/src/resources/extensions/gsd/repo-identity.ts index 39204ab91..8de304f36 100644 --- a/src/resources/extensions/gsd/repo-identity.ts +++ b/src/resources/extensions/gsd/repo-identity.ts @@ -8,7 +8,7 @@ import { createHash } from "node:crypto"; import { execFileSync } from "node:child_process"; -import { existsSync, lstatSync, mkdirSync, readdirSync, readFileSync, realpathSync, rmSync, symlinkSync, writeFileSync } from "node:fs"; +import { cpSync, existsSync, lstatSync, mkdirSync, readdirSync, readFileSync, realpathSync, renameSync, rmSync, symlinkSync, unlinkSync, writeFileSync } from "node:fs"; import { homedir } from "node:os"; import { basename, dirname, join, resolve } from "node:path"; @@ -276,9 +276,14 @@ export function validateProjectId(id: string): boolean { * If `GSD_PROJECT_ID` is set, returns it directly (validation is expected * to have already happened at startup via `validateProjectId`). * - * Otherwise returns SHA-256 of `${remoteUrl}\n${resolvedRoot}`, truncated - * to 12 hex chars. Deterministic: same repo always produces the same hash - * regardless of which worktree the caller is inside. + * For repos with a remote URL, returns SHA-256 of the remote URL only — + * this makes the identity stable across directory moves/renames (#2750). + * + * For local-only repos (no remote), includes the git root in the hash. + * Local repos use a `.gsd-id` marker file for recovery after moves. + * + * Deterministic: same repo always produces the same hash regardless of + * which worktree the caller is inside. */ export function repoIdentity(basePath: string): string { const projectId = process.env.GSD_PROJECT_ID; @@ -286,8 +291,14 @@ export function repoIdentity(basePath: string): string { return projectId; } const remoteUrl = getRemoteUrl(basePath); + if (remoteUrl) { + // Remote URL alone uniquely identifies the repo — path is redundant. + // This makes moves transparent for repos with remotes (#2750). + return createHash("sha256").update(remoteUrl).digest("hex").slice(0, 12); + } + // Local-only repo: include git root since there's no remote to anchor identity. const root = resolveGitRoot(basePath); - const input = `${remoteUrl}\n${root}`; + const input = `\n${root}`; return createHash("sha256").update(input).digest("hex").slice(0, 12); } @@ -351,21 +362,148 @@ export function cleanNumberedGsdVariants(projectPath: string): string[] { return removed; } +// ─── .gsd-id Marker ───────────────────────────────────────────────────────── + +/** + * Write a `.gsd-id` marker file in the project root. + * + * This file records the identity hash used for the external state directory. + * For local-only repos (no remote), this marker survives directory moves and + * enables automatic recovery of orphaned state (#2750). + * + * The marker is gitignored by ensureGitignore(). Non-fatal: failure to write + * the marker must never block project setup. + */ +function writeGsdIdMarker(projectPath: string, identity: string): void { + try { + const markerPath = join(projectPath, ".gsd-id"); + // Only write if content differs to avoid unnecessary disk writes. + if (existsSync(markerPath)) { + try { + if (readFileSync(markerPath, "utf-8").trim() === identity) return; + } catch { /* fall through and overwrite */ } + } + writeFileSync(markerPath, identity + "\n", "utf-8"); + } catch { + // Non-fatal — marker write failure should not block project setup + } +} + +/** + * Read the `.gsd-id` marker from the project root. + * Returns the identity hash, or null if the marker doesn't exist or is unreadable. + */ +function readGsdIdMarker(projectPath: string): string | null { + try { + const markerPath = join(projectPath, ".gsd-id"); + if (!existsSync(markerPath)) return null; + const content = readFileSync(markerPath, "utf-8").trim(); + return /^[a-zA-Z0-9_-]+$/.test(content) ? content : null; + } catch { + return null; + } +} + +/** + * Check whether an external state directory has meaningful content. + * Returns true if the directory contains any files or subdirectories + * beyond just repo-meta.json. + */ +function hasProjectState(externalPath: string): boolean { + try { + if (!existsSync(externalPath)) return false; + const entries = readdirSync(externalPath); + return entries.some(e => e !== "repo-meta.json"); + } catch { + return false; + } +} + +/** + * Resolve the external state directory, with recovery for relocated projects. + * + * For local-only repos where the computed identity produces an empty state dir, + * checks the `.gsd-id` marker for the original identity hash and recovers + * the old state directory if it still exists and contains data (#2750). + * + * Returns the resolved external path (may differ from the computed identity). + */ +function resolveExternalPathWithRecovery(projectPath: string): string { + const computedPath = externalGsdRoot(projectPath); + const computedId = repoIdentity(projectPath); + + // Check if computed path already has state — fast path, no recovery needed. + if (hasProjectState(computedPath)) { + return computedPath; + } + + // Check for .gsd-id marker from a previous location. + const markerId = readGsdIdMarker(projectPath); + if (markerId && markerId !== computedId) { + // The marker points to a different identity — the repo was likely moved. + const base = process.env.GSD_STATE_DIR || gsdHome; + const markerPath = join(base, "projects", markerId); + if (hasProjectState(markerPath)) { + // Recover: use the old state directory and update the marker to the new identity. + // Move the state from the old hash dir to the new one so future lookups work + // without the marker. + try { + mkdirSync(computedPath, { recursive: true }); + const entries = readdirSync(markerPath); + for (const entry of entries) { + try { + const src = join(markerPath, entry); + const dst = join(computedPath, entry); + // Use rename for same-filesystem (fast) or fall back to copy. + try { + renameSync(src, dst); + } catch { + cpSync(src, dst, { recursive: true, force: true }); + } + } catch { /* continue with remaining entries */ } + } + // Clean up old directory after successful migration. + try { rmSync(markerPath, { recursive: true, force: true }); } catch { /* non-fatal */ } + } catch { + // If migration fails, just point at the old directory. + return markerPath; + } + } + } + + return computedPath; +} + // ─── Symlink Management ───────────────────────────────────────────────────── /** * Ensure the `/.gsd` symlink points to the external state directory. * * 1. Clean up any macOS numbered collision variants (`.gsd 2`, `.gsd 3`, etc.) - * 2. mkdir -p the external dir - * 3. If `/.gsd` doesn't exist → create symlink - * 4. If `/.gsd` is already the correct symlink → no-op - * 5. If `/.gsd` is a real directory → return as-is (migration handles later) + * 2. Resolve external dir (with relocation recovery via `.gsd-id` marker) + * 3. mkdir -p the external dir + * 4. If `/.gsd` doesn't exist → create symlink + * 5. If `/.gsd` is already the correct symlink → no-op + * 6. If `/.gsd` is a real directory → return as-is (migration handles later) + * 7. Write `.gsd-id` marker for future relocation recovery * * Returns the resolved external path. */ export function ensureGsdSymlink(projectPath: string): string { - const externalPath = externalGsdRoot(projectPath); + const result = ensureGsdSymlinkCore(projectPath); + + // Write .gsd-id marker so future relocations can recover this state (#2750). + // Only write for the project root (not subdirectories or worktrees that + // delegate to a parent .gsd). + if (!isInsideWorktree(projectPath)) { + writeGsdIdMarker(projectPath, repoIdentity(projectPath)); + } + + return result; +} + +function ensureGsdSymlinkCore(projectPath: string): string { + const externalPath = resolveExternalPathWithRecovery(projectPath); const localGsd = join(projectPath, ".gsd"); const inWorktree = isInsideWorktree(projectPath); @@ -418,12 +556,28 @@ export function ensureGsdSymlink(projectPath: string): string { const replaceWithSymlink = (): string => { rmSync(localGsd, { recursive: true, force: true }); + // Defensive: remove any residual entry (e.g. dangling symlink) before creating. + try { unlinkSync(localGsd); } catch { /* already gone */ } symlinkSync(externalPath, localGsd, "junction"); return externalPath; }; + // Check for dangling symlinks (e.g. after relocation recovery removed the old + // state dir). existsSync follows symlinks, so it returns false for dangling ones. + // lstatSync does NOT follow, so we can detect the dangling symlink and replace it. if (!existsSync(localGsd)) { - // Nothing exists yet — create symlink + try { + const stat = lstatSync(localGsd); + if (stat.isSymbolicLink()) { + // Dangling symlink — replace with correct one (#2750). + return replaceWithSymlink(); + } + } catch { + // lstat also failed — nothing exists at this path + } + // Nothing exists yet — create symlink. + // Defensive: remove any residual entry to avoid EEXIST race (#2750). + try { unlinkSync(localGsd); } catch { /* nothing to remove */ } symlinkSync(externalPath, localGsd, "junction"); return externalPath; } @@ -442,6 +596,27 @@ export function ensureGsdSymlink(projectPath: string): string { if (inWorktree) { return replaceWithSymlink(); } + // After identity hash change (e.g. upgrade from path-based to remote-only + // hash, or relocation recovery), migrate data from old target to new path + // and update the symlink (#2750). + if (!hasProjectState(externalPath) && hasProjectState(target)) { + try { + mkdirSync(externalPath, { recursive: true }); + const oldEntries = readdirSync(target); + for (const entry of oldEntries) { + try { + const src = join(target, entry); + const dst = join(externalPath, entry); + try { renameSync(src, dst); } catch { cpSync(src, dst, { recursive: true, force: true }); } + } catch { /* continue */ } + } + try { rmSync(target, { recursive: true, force: true }); } catch { /* non-fatal */ } + return replaceWithSymlink(); + } catch { + // Migration failed — preserve old symlink + return target; + } + } // Outside worktrees, preserve custom overrides or legacy symlinks. return target; } diff --git a/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts b/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts new file mode 100644 index 000000000..22b451c4a --- /dev/null +++ b/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts @@ -0,0 +1,297 @@ +/** + * Project Relocation Recovery Tests (#2750) + * + * Verifies that moving/renaming a GSD project directory does not cause + * silent data loss. When a repo has a remote URL, the identity hash + * should be based solely on the remote — making moves transparent. + * + * For local-only repos (no remote), ensureGsdSymlink should detect + * orphaned state directories with a matching .gsd-id marker and + * recover them automatically. + */ + +import { describe, test, before, after } from "node:test"; +import assert from "node:assert/strict"; +import { + mkdtempSync, + rmSync, + writeFileSync, + readFileSync, + existsSync, + realpathSync, + mkdirSync, + readdirSync, + renameSync, +} from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { execFileSync } from "node:child_process"; + +import { + repoIdentity, + externalGsdRoot, + ensureGsdSymlink, + readRepoMeta, + externalProjectsRoot, +} from "../repo-identity.ts"; + +function git(args: string[], cwd: string): string { + return execFileSync("git", args, { + cwd, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + }).trim(); +} + +function normalizePath(p: string): string { + const resolved = + process.platform === "win32" ? realpathSync.native(p) : realpathSync(p); + return process.platform === "win32" ? resolved.toLowerCase() : resolved; +} + +function initRepo(dir: string, remote?: string): void { + git(["init", "-b", "main"], dir); + git(["config", "user.name", "Test"], dir); + git(["config", "user.email", "test@example.com"], dir); + if (remote) { + git(["remote", "add", "origin", remote], dir); + } + writeFileSync(join(dir, "README.md"), "# Test\n", "utf-8"); + git(["add", "README.md"], dir); + git(["commit", "-m", "init"], dir); +} + +describe("project-relocation-recovery (#2750)", () => { + let stateDir: string; + let savedStateDir: string | undefined; + + before(() => { + savedStateDir = process.env.GSD_STATE_DIR; + stateDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-state-"))); + process.env.GSD_STATE_DIR = stateDir; + }); + + after(() => { + if (savedStateDir !== undefined) { + process.env.GSD_STATE_DIR = savedStateDir; + } else { + delete process.env.GSD_STATE_DIR; + } + rmSync(stateDir, { recursive: true, force: true }); + }); + + // ── Remote repos: identity should be path-independent ───────────────── + + test("repoIdentity is stable across moves for repos with a remote URL", () => { + const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-a-"))); + initRepo(repoA, "https://github.com/example/myrepo.git"); + + const identityBefore = repoIdentity(repoA); + + // Move the repo to a new location + const repoB = join( + tmpdir(), + `gsd-reloc-b-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + renameSync(repoA, repoB); + + const identityAfter = repoIdentity(repoB); + + assert.strictEqual( + identityAfter, + identityBefore, + "identity hash must be stable when a remote-enabled repo is moved", + ); + + rmSync(repoB, { recursive: true, force: true }); + }); + + test("ensureGsdSymlink reuses the same external dir after repo move (remote repo)", () => { + const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-reuse-a-"))); + initRepo(repoA, "https://github.com/example/reloc-reuse.git"); + + // Initialize GSD state with some planning data + const externalA = ensureGsdSymlink(repoA); + const milestonesPath = join(externalA, "milestones"); + mkdirSync(milestonesPath, { recursive: true }); + writeFileSync( + join(milestonesPath, "M001.md"), + "# Milestone 1\nImportant planning data\n", + "utf-8", + ); + + // Move the repo + const repoB = join( + tmpdir(), + `gsd-reloc-reuse-b-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + renameSync(repoA, repoB); + + // ensureGsdSymlink at the new location should find the same external dir + const externalB = ensureGsdSymlink(repoB); + + assert.strictEqual( + normalizePath(externalB), + normalizePath(externalA), + "external state dir must be the same after move", + ); + + // Planning data must survive the move + assert.ok( + existsSync(join(externalB, "milestones", "M001.md")), + "milestone data must survive project relocation", + ); + + const content = readFileSync( + join(externalB, "milestones", "M001.md"), + "utf-8", + ); + assert.ok( + content.includes("Important planning data"), + "milestone content must be preserved", + ); + + rmSync(repoB, { recursive: true, force: true }); + }); + + test("repo-meta.json gitRoot is updated after move (remote repo)", () => { + const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-meta-a-"))); + initRepo(repoA, "https://github.com/example/reloc-meta.git"); + + const externalA = ensureGsdSymlink(repoA); + const metaBefore = readRepoMeta(externalA); + assert.ok(metaBefore !== null, "metadata should exist before move"); + + // Move the repo + const repoB = join( + tmpdir(), + `gsd-reloc-meta-b-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + renameSync(repoA, repoB); + + const externalB = ensureGsdSymlink(repoB); + const metaAfter = readRepoMeta(externalB); + assert.ok(metaAfter !== null, "metadata should exist after move"); + assert.strictEqual( + normalizePath(metaAfter!.gitRoot), + normalizePath(repoB), + "repo-meta.json gitRoot must be updated to new location", + ); + assert.strictEqual( + metaAfter!.createdAt, + metaBefore!.createdAt, + "createdAt must be preserved across moves", + ); + + rmSync(repoB, { recursive: true, force: true }); + }); + + // ── Local-only repos: .gsd-id marker provides recovery ──────────────── + + test("ensureGsdSymlink writes a .gsd-id marker in the project root", () => { + const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-marker-"))); + initRepo(repo); + + ensureGsdSymlink(repo); + + const markerPath = join(repo, ".gsd-id"); + assert.ok(existsSync(markerPath), ".gsd-id marker must be written by ensureGsdSymlink"); + + const markerId = readFileSync(markerPath, "utf-8").trim(); + const computedId = repoIdentity(repo); + assert.strictEqual(markerId, computedId, ".gsd-id must contain the repo identity hash"); + + rmSync(repo, { recursive: true, force: true }); + }); + + test("local-only repo recovers state via .gsd-id marker after move", () => { + const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-local-a-"))); + initRepo(repoA); + // No remote — identity includes gitRoot + + // Initialize GSD state + const externalA = ensureGsdSymlink(repoA); + mkdirSync(join(externalA, "milestones"), { recursive: true }); + writeFileSync( + join(externalA, "milestones", "M001.md"), + "# Local Milestone\n", + "utf-8", + ); + + const identityBefore = repoIdentity(repoA); + + // Move the repo + const repoB = join( + tmpdir(), + `gsd-reloc-local-b-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + renameSync(repoA, repoB); + + // The identity WILL change (no remote, gitRoot changed) + const identityAfter = repoIdentity(repoB); + assert.notStrictEqual( + identityAfter, + identityBefore, + "local-only repo identity changes with move (expected)", + ); + + // But ensureGsdSymlink should detect .gsd-id marker and recover + const externalB = ensureGsdSymlink(repoB); + assert.ok( + existsSync(join(externalB, "milestones", "M001.md")), + "local-only repo must recover state via .gsd-id marker after move", + ); + + rmSync(repoB, { recursive: true, force: true }); + }); + + // ── Edge cases ──────────────────────────────────────────────────────── + + test("identity remains different for repos with different remotes", () => { + const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-diff-a-"))); + initRepo(repoA, "https://github.com/example/repo-alpha.git"); + + const repoB = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-diff-b-"))); + initRepo(repoB, "https://github.com/example/repo-beta.git"); + + assert.notStrictEqual( + repoIdentity(repoA), + repoIdentity(repoB), + "repos with different remotes must have different identities", + ); + + rmSync(repoA, { recursive: true, force: true }); + rmSync(repoB, { recursive: true, force: true }); + }); + + test("no orphaned state dir created when remote repo is moved", () => { + const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-orphan-a-"))); + initRepo(repoA, "https://github.com/example/no-orphan.git"); + + ensureGsdSymlink(repoA); + + // Count project dirs before move + const projectsDir = externalProjectsRoot(); + const countBefore = existsSync(projectsDir) + ? readdirSync(projectsDir).length + : 0; + + // Move the repo + const repoB = join( + tmpdir(), + `gsd-reloc-orphan-b-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + renameSync(repoA, repoB); + + ensureGsdSymlink(repoB); + + const countAfter = readdirSync(projectsDir).length; + assert.strictEqual( + countAfter, + countBefore, + "moving a remote repo must not create a new orphaned state directory", + ); + + rmSync(repoB, { recursive: true, force: true }); + }); +});