From 9a6a341b5749ea833f99301bda96e106b5a92d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=82CHES?= Date: Tue, 24 Mar 2026 22:36:19 -0600 Subject: [PATCH] fix(gsd): prevent saveArtifactToDb from overwriting larger files with truncated content (#2442) (#2447) When a file already exists on disk and the new content is <50% of the existing file size, skip the disk write and store the existing file content in the DB instead. This prevents data loss when research prompts write full content via `write` then `gsd_summary_save` is called with an abbreviated summary. Co-authored-by: Claude Opus 4.6 (1M context) --- src/resources/extensions/gsd/db-writer.ts | 56 +++++++++---- .../extensions/gsd/tests/db-writer.test.ts | 79 +++++++++++++++++++ 2 files changed, 118 insertions(+), 17 deletions(-) diff --git a/src/resources/extensions/gsd/db-writer.ts b/src/resources/extensions/gsd/db-writer.ts index 02ec94c11..bff6fccff 100644 --- a/src/resources/extensions/gsd/db-writer.ts +++ b/src/resources/extensions/gsd/db-writer.ts @@ -9,7 +9,7 @@ // parseDecisionsTable() and parseRequirementsSections() with field fidelity. import { join, resolve } from 'node:path'; -import { readFileSync, existsSync } from 'node:fs'; +import { readFileSync, existsSync, statSync } from 'node:fs'; import type { Decision, Requirement } from './types.js'; import { resolveGsdRootFile } from './paths.js'; import { saveFile } from './files.js'; @@ -428,30 +428,52 @@ export async function saveArtifactToDb( try { const db = await import('./gsd-db.js'); + // Guard against path traversal before any reads/writes + const gsdDir = resolve(basePath, '.gsd'); + const fullPath = resolve(basePath, '.gsd', opts.path); + if (!fullPath.startsWith(gsdDir)) { + throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`); + } + + // Shrinkage guard: if the file already exists and the new content is + // significantly smaller (<50%), preserve the richer file on disk and + // store its content in the DB instead of the abbreviated version. + let dbContent = opts.content; + let skipDiskWrite = false; + if (existsSync(fullPath)) { + const existingSize = statSync(fullPath).size; + const newSize = Buffer.byteLength(opts.content, 'utf-8'); + if (existingSize > 0 && newSize < existingSize * 0.5) { + process.stderr.write( + `gsd-db: saveArtifactToDb — new content (${newSize}B) is <50% of existing file ` + + `(${existingSize}B) at ${opts.path}. Preserving disk file to prevent data loss.\n`, + ); + dbContent = readFileSync(fullPath, 'utf-8'); + skipDiskWrite = true; + } + } + db.insertArtifact({ path: opts.path, artifact_type: opts.artifact_type, milestone_id: opts.milestone_id ?? null, slice_id: opts.slice_id ?? null, task_id: opts.task_id ?? null, - full_content: opts.content, + full_content: dbContent, }); - // Write the file to disk (guard against path traversal) - const gsdDir = resolve(basePath, '.gsd'); - const fullPath = resolve(basePath, '.gsd', opts.path); - if (!fullPath.startsWith(gsdDir)) { - throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`); - } - try { - await saveFile(fullPath, opts.content); - } catch (diskErr) { - process.stderr.write( - `gsd-db: saveArtifactToDb — disk write failed, rolling back DB row: ${(diskErr as Error).message}\n`, - ); - const rollbackAdapter = db._getAdapter(); - rollbackAdapter?.prepare('DELETE FROM artifacts WHERE path = :path').run({ ':path': opts.path }); - throw diskErr; + // Write the file to disk (only if we're not preserving a richer existing file) + if (!skipDiskWrite) { + try { + await saveFile(fullPath, opts.content); + } catch (diskErr) { + process.stderr.write( + `gsd-db: saveArtifactToDb — disk write failed, rolling back DB row: ${(diskErr as Error).message}\n`, + ); + const rollbackAdapter = db._getAdapter(); + rollbackAdapter?.prepare('DELETE FROM artifacts WHERE path = :path').run({ ':path': opts.path }); + throw diskErr; + } } // Invalidate file-read caches so deriveState() sees the updated markdown. // Do NOT clear the artifacts table — we just wrote to it intentionally. diff --git a/src/resources/extensions/gsd/tests/db-writer.test.ts b/src/resources/extensions/gsd/tests/db-writer.test.ts index fa8f7170d..180e8578b 100644 --- a/src/resources/extensions/gsd/tests/db-writer.test.ts +++ b/src/resources/extensions/gsd/tests/db-writer.test.ts @@ -483,6 +483,85 @@ describe('db-writer', () => { } }); + test('saveArtifactToDb — shrinkage guard preserves larger existing file', async () => { + const tmpDir = makeTmpDir(); + const dbPath = path.join(tmpDir, '.gsd', 'gsd.db'); + openDatabase(dbPath); + + try { + const fullContent = '# Full Research\n\n' + 'x'.repeat(20000) + '\n'; + const abbreviatedContent = '# Summary\n\nShort version.\n'; + + // Pre-create the file with full content (simulating a prior `write` tool call) + const relPath = 'milestones/M001/M001-RESEARCH.md'; + const filePath = path.join(tmpDir, '.gsd', relPath); + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, fullContent); + + // Call saveArtifactToDb with abbreviated content — should trigger shrinkage guard + await saveArtifactToDb({ + path: relPath, + artifact_type: 'RESEARCH', + content: abbreviatedContent, + milestone_id: 'M001', + }, tmpDir); + + // Disk file should be preserved (not overwritten) + assert.deepStrictEqual( + fs.readFileSync(filePath, 'utf-8'), + fullContent, + 'disk file preserved — shrinkage guard prevented overwrite', + ); + + // DB should contain the full disk content, not the abbreviated content + const adapter = _getAdapter(); + const row = adapter! + .prepare('SELECT full_content FROM artifacts WHERE path = ?') + .get(relPath); + assert.deepStrictEqual( + row!['full_content'], + fullContent, + 'DB stores the richer disk content instead of abbreviated content', + ); + } finally { + closeDatabase(); + cleanupDir(tmpDir); + } + }); + + test('saveArtifactToDb — allows overwrite when new content is similar size', async () => { + const tmpDir = makeTmpDir(); + const dbPath = path.join(tmpDir, '.gsd', 'gsd.db'); + openDatabase(dbPath); + + try { + const oldContent = '# Summary v1\n\nOriginal content here.\n'; + const newContent = '# Summary v2\n\nUpdated content here with more details.\n'; + + const relPath = 'milestones/M001/M001-SUMMARY.md'; + const filePath = path.join(tmpDir, '.gsd', relPath); + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, oldContent); + + await saveArtifactToDb({ + path: relPath, + artifact_type: 'SUMMARY', + content: newContent, + milestone_id: 'M001', + }, tmpDir); + + // Disk file should be updated (new content is >=50% of old size) + assert.deepStrictEqual( + fs.readFileSync(filePath, 'utf-8'), + newContent, + 'disk file updated when new content is similar size', + ); + } finally { + closeDatabase(); + cleanupDir(tmpDir); + } + }); + // ═══════════════════════════════════════════════════════════════════════════ // Full Round-Trip: DB → Markdown → Parse → Compare // ═══════════════════════════════════════════════════════════════════════════