fix(gsd): prevent saveArtifactToDb from overwriting larger files with truncated content (#2442) (#2447)

When a file already exists on disk and the new content is <50% of the
existing file size, skip the disk write and store the existing file
content in the DB instead. This prevents data loss when research prompts
write full content via `write` then `gsd_summary_save` is called with
an abbreviated summary.

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-24 22:36:19 -06:00 committed by GitHub
parent cf0fe6c571
commit 9a6a341b57
2 changed files with 118 additions and 17 deletions

View file

@ -9,7 +9,7 @@
// parseDecisionsTable() and parseRequirementsSections() with field fidelity.
import { join, resolve } from 'node:path';
import { readFileSync, existsSync } from 'node:fs';
import { readFileSync, existsSync, statSync } from 'node:fs';
import type { Decision, Requirement } from './types.js';
import { resolveGsdRootFile } from './paths.js';
import { saveFile } from './files.js';
@ -428,30 +428,52 @@ export async function saveArtifactToDb(
try {
const db = await import('./gsd-db.js');
// Guard against path traversal before any reads/writes
const gsdDir = resolve(basePath, '.gsd');
const fullPath = resolve(basePath, '.gsd', opts.path);
if (!fullPath.startsWith(gsdDir)) {
throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`);
}
// Shrinkage guard: if the file already exists and the new content is
// significantly smaller (<50%), preserve the richer file on disk and
// store its content in the DB instead of the abbreviated version.
let dbContent = opts.content;
let skipDiskWrite = false;
if (existsSync(fullPath)) {
const existingSize = statSync(fullPath).size;
const newSize = Buffer.byteLength(opts.content, 'utf-8');
if (existingSize > 0 && newSize < existingSize * 0.5) {
process.stderr.write(
`gsd-db: saveArtifactToDb — new content (${newSize}B) is <50% of existing file ` +
`(${existingSize}B) at ${opts.path}. Preserving disk file to prevent data loss.\n`,
);
dbContent = readFileSync(fullPath, 'utf-8');
skipDiskWrite = true;
}
}
db.insertArtifact({
path: opts.path,
artifact_type: opts.artifact_type,
milestone_id: opts.milestone_id ?? null,
slice_id: opts.slice_id ?? null,
task_id: opts.task_id ?? null,
full_content: opts.content,
full_content: dbContent,
});
// Write the file to disk (guard against path traversal)
const gsdDir = resolve(basePath, '.gsd');
const fullPath = resolve(basePath, '.gsd', opts.path);
if (!fullPath.startsWith(gsdDir)) {
throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`);
}
try {
await saveFile(fullPath, opts.content);
} catch (diskErr) {
process.stderr.write(
`gsd-db: saveArtifactToDb — disk write failed, rolling back DB row: ${(diskErr as Error).message}\n`,
);
const rollbackAdapter = db._getAdapter();
rollbackAdapter?.prepare('DELETE FROM artifacts WHERE path = :path').run({ ':path': opts.path });
throw diskErr;
// Write the file to disk (only if we're not preserving a richer existing file)
if (!skipDiskWrite) {
try {
await saveFile(fullPath, opts.content);
} catch (diskErr) {
process.stderr.write(
`gsd-db: saveArtifactToDb — disk write failed, rolling back DB row: ${(diskErr as Error).message}\n`,
);
const rollbackAdapter = db._getAdapter();
rollbackAdapter?.prepare('DELETE FROM artifacts WHERE path = :path').run({ ':path': opts.path });
throw diskErr;
}
}
// Invalidate file-read caches so deriveState() sees the updated markdown.
// Do NOT clear the artifacts table — we just wrote to it intentionally.

View file

@ -483,6 +483,85 @@ describe('db-writer', () => {
}
});
test('saveArtifactToDb — shrinkage guard preserves larger existing file', async () => {
const tmpDir = makeTmpDir();
const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
openDatabase(dbPath);
try {
const fullContent = '# Full Research\n\n' + 'x'.repeat(20000) + '\n';
const abbreviatedContent = '# Summary\n\nShort version.\n';
// Pre-create the file with full content (simulating a prior `write` tool call)
const relPath = 'milestones/M001/M001-RESEARCH.md';
const filePath = path.join(tmpDir, '.gsd', relPath);
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, fullContent);
// Call saveArtifactToDb with abbreviated content — should trigger shrinkage guard
await saveArtifactToDb({
path: relPath,
artifact_type: 'RESEARCH',
content: abbreviatedContent,
milestone_id: 'M001',
}, tmpDir);
// Disk file should be preserved (not overwritten)
assert.deepStrictEqual(
fs.readFileSync(filePath, 'utf-8'),
fullContent,
'disk file preserved — shrinkage guard prevented overwrite',
);
// DB should contain the full disk content, not the abbreviated content
const adapter = _getAdapter();
const row = adapter!
.prepare('SELECT full_content FROM artifacts WHERE path = ?')
.get(relPath);
assert.deepStrictEqual(
row!['full_content'],
fullContent,
'DB stores the richer disk content instead of abbreviated content',
);
} finally {
closeDatabase();
cleanupDir(tmpDir);
}
});
test('saveArtifactToDb — allows overwrite when new content is similar size', async () => {
const tmpDir = makeTmpDir();
const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
openDatabase(dbPath);
try {
const oldContent = '# Summary v1\n\nOriginal content here.\n';
const newContent = '# Summary v2\n\nUpdated content here with more details.\n';
const relPath = 'milestones/M001/M001-SUMMARY.md';
const filePath = path.join(tmpDir, '.gsd', relPath);
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, oldContent);
await saveArtifactToDb({
path: relPath,
artifact_type: 'SUMMARY',
content: newContent,
milestone_id: 'M001',
}, tmpDir);
// Disk file should be updated (new content is >=50% of old size)
assert.deepStrictEqual(
fs.readFileSync(filePath, 'utf-8'),
newContent,
'disk file updated when new content is similar size',
);
} finally {
closeDatabase();
cleanupDir(tmpDir);
}
});
// ═══════════════════════════════════════════════════════════════════════════
// Full Round-Trip: DB → Markdown → Parse → Compare
// ═══════════════════════════════════════════════════════════════════════════