fix(gsd): prevent saveArtifactToDb from overwriting larger files with truncated content (#2442) (#2447)
When a file already exists on disk and the new content is <50% of the existing file size, skip the disk write and store the existing file content in the DB instead. This prevents data loss when research prompts write full content via `write` then `gsd_summary_save` is called with an abbreviated summary. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
cf0fe6c571
commit
9a6a341b57
2 changed files with 118 additions and 17 deletions
|
|
@ -9,7 +9,7 @@
|
|||
// parseDecisionsTable() and parseRequirementsSections() with field fidelity.
|
||||
|
||||
import { join, resolve } from 'node:path';
|
||||
import { readFileSync, existsSync } from 'node:fs';
|
||||
import { readFileSync, existsSync, statSync } from 'node:fs';
|
||||
import type { Decision, Requirement } from './types.js';
|
||||
import { resolveGsdRootFile } from './paths.js';
|
||||
import { saveFile } from './files.js';
|
||||
|
|
@ -428,30 +428,52 @@ export async function saveArtifactToDb(
|
|||
try {
|
||||
const db = await import('./gsd-db.js');
|
||||
|
||||
// Guard against path traversal before any reads/writes
|
||||
const gsdDir = resolve(basePath, '.gsd');
|
||||
const fullPath = resolve(basePath, '.gsd', opts.path);
|
||||
if (!fullPath.startsWith(gsdDir)) {
|
||||
throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`);
|
||||
}
|
||||
|
||||
// Shrinkage guard: if the file already exists and the new content is
|
||||
// significantly smaller (<50%), preserve the richer file on disk and
|
||||
// store its content in the DB instead of the abbreviated version.
|
||||
let dbContent = opts.content;
|
||||
let skipDiskWrite = false;
|
||||
if (existsSync(fullPath)) {
|
||||
const existingSize = statSync(fullPath).size;
|
||||
const newSize = Buffer.byteLength(opts.content, 'utf-8');
|
||||
if (existingSize > 0 && newSize < existingSize * 0.5) {
|
||||
process.stderr.write(
|
||||
`gsd-db: saveArtifactToDb — new content (${newSize}B) is <50% of existing file ` +
|
||||
`(${existingSize}B) at ${opts.path}. Preserving disk file to prevent data loss.\n`,
|
||||
);
|
||||
dbContent = readFileSync(fullPath, 'utf-8');
|
||||
skipDiskWrite = true;
|
||||
}
|
||||
}
|
||||
|
||||
db.insertArtifact({
|
||||
path: opts.path,
|
||||
artifact_type: opts.artifact_type,
|
||||
milestone_id: opts.milestone_id ?? null,
|
||||
slice_id: opts.slice_id ?? null,
|
||||
task_id: opts.task_id ?? null,
|
||||
full_content: opts.content,
|
||||
full_content: dbContent,
|
||||
});
|
||||
|
||||
// Write the file to disk (guard against path traversal)
|
||||
const gsdDir = resolve(basePath, '.gsd');
|
||||
const fullPath = resolve(basePath, '.gsd', opts.path);
|
||||
if (!fullPath.startsWith(gsdDir)) {
|
||||
throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`);
|
||||
}
|
||||
try {
|
||||
await saveFile(fullPath, opts.content);
|
||||
} catch (diskErr) {
|
||||
process.stderr.write(
|
||||
`gsd-db: saveArtifactToDb — disk write failed, rolling back DB row: ${(diskErr as Error).message}\n`,
|
||||
);
|
||||
const rollbackAdapter = db._getAdapter();
|
||||
rollbackAdapter?.prepare('DELETE FROM artifacts WHERE path = :path').run({ ':path': opts.path });
|
||||
throw diskErr;
|
||||
// Write the file to disk (only if we're not preserving a richer existing file)
|
||||
if (!skipDiskWrite) {
|
||||
try {
|
||||
await saveFile(fullPath, opts.content);
|
||||
} catch (diskErr) {
|
||||
process.stderr.write(
|
||||
`gsd-db: saveArtifactToDb — disk write failed, rolling back DB row: ${(diskErr as Error).message}\n`,
|
||||
);
|
||||
const rollbackAdapter = db._getAdapter();
|
||||
rollbackAdapter?.prepare('DELETE FROM artifacts WHERE path = :path').run({ ':path': opts.path });
|
||||
throw diskErr;
|
||||
}
|
||||
}
|
||||
// Invalidate file-read caches so deriveState() sees the updated markdown.
|
||||
// Do NOT clear the artifacts table — we just wrote to it intentionally.
|
||||
|
|
|
|||
|
|
@ -483,6 +483,85 @@ describe('db-writer', () => {
|
|||
}
|
||||
});
|
||||
|
||||
test('saveArtifactToDb — shrinkage guard preserves larger existing file', async () => {
|
||||
const tmpDir = makeTmpDir();
|
||||
const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
|
||||
openDatabase(dbPath);
|
||||
|
||||
try {
|
||||
const fullContent = '# Full Research\n\n' + 'x'.repeat(20000) + '\n';
|
||||
const abbreviatedContent = '# Summary\n\nShort version.\n';
|
||||
|
||||
// Pre-create the file with full content (simulating a prior `write` tool call)
|
||||
const relPath = 'milestones/M001/M001-RESEARCH.md';
|
||||
const filePath = path.join(tmpDir, '.gsd', relPath);
|
||||
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||||
fs.writeFileSync(filePath, fullContent);
|
||||
|
||||
// Call saveArtifactToDb with abbreviated content — should trigger shrinkage guard
|
||||
await saveArtifactToDb({
|
||||
path: relPath,
|
||||
artifact_type: 'RESEARCH',
|
||||
content: abbreviatedContent,
|
||||
milestone_id: 'M001',
|
||||
}, tmpDir);
|
||||
|
||||
// Disk file should be preserved (not overwritten)
|
||||
assert.deepStrictEqual(
|
||||
fs.readFileSync(filePath, 'utf-8'),
|
||||
fullContent,
|
||||
'disk file preserved — shrinkage guard prevented overwrite',
|
||||
);
|
||||
|
||||
// DB should contain the full disk content, not the abbreviated content
|
||||
const adapter = _getAdapter();
|
||||
const row = adapter!
|
||||
.prepare('SELECT full_content FROM artifacts WHERE path = ?')
|
||||
.get(relPath);
|
||||
assert.deepStrictEqual(
|
||||
row!['full_content'],
|
||||
fullContent,
|
||||
'DB stores the richer disk content instead of abbreviated content',
|
||||
);
|
||||
} finally {
|
||||
closeDatabase();
|
||||
cleanupDir(tmpDir);
|
||||
}
|
||||
});
|
||||
|
||||
test('saveArtifactToDb — allows overwrite when new content is similar size', async () => {
|
||||
const tmpDir = makeTmpDir();
|
||||
const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
|
||||
openDatabase(dbPath);
|
||||
|
||||
try {
|
||||
const oldContent = '# Summary v1\n\nOriginal content here.\n';
|
||||
const newContent = '# Summary v2\n\nUpdated content here with more details.\n';
|
||||
|
||||
const relPath = 'milestones/M001/M001-SUMMARY.md';
|
||||
const filePath = path.join(tmpDir, '.gsd', relPath);
|
||||
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||||
fs.writeFileSync(filePath, oldContent);
|
||||
|
||||
await saveArtifactToDb({
|
||||
path: relPath,
|
||||
artifact_type: 'SUMMARY',
|
||||
content: newContent,
|
||||
milestone_id: 'M001',
|
||||
}, tmpDir);
|
||||
|
||||
// Disk file should be updated (new content is >=50% of old size)
|
||||
assert.deepStrictEqual(
|
||||
fs.readFileSync(filePath, 'utf-8'),
|
||||
newContent,
|
||||
'disk file updated when new content is similar size',
|
||||
);
|
||||
} finally {
|
||||
closeDatabase();
|
||||
cleanupDir(tmpDir);
|
||||
}
|
||||
});
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Full Round-Trip: DB → Markdown → Parse → Compare
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue