From f90c83460f02095c37d4eca0d3bed15df22bcaae Mon Sep 17 00:00:00 2001 From: deseltrus <101901449+deseltrus@users.noreply.github.com> Date: Sat, 21 Mar 2026 15:47:27 +0100 Subject: [PATCH] =?UTF-8?q?fix(gsd):=20harden=20auto-mode=20telemetry=20?= =?UTF-8?q?=E2=80=94=20metrics=20idempotency,=20elapsed=20guard,=20title?= =?UTF-8?q?=20sanitization=20(#1722)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four fixes for auto-mode telemetry and display bugs: 1. Metrics idempotency guard (metrics.ts) - snapshotUnitMetrics now deduplicates entries by type+id+startedAt - Prevents idle-watchdog from creating N duplicate entries per unit - On duplicate: updates existing entry in-place instead of appending - Observed: 31 duplicate entries for a single plan-slice unit 2. Elapsed time zero-guard (auto.ts, auto-dashboard.ts, dashboard-overlay.ts) - getAutoDashboardData guards against autoStartTime=0 (uninitialized) - formatAutoElapsed rejects negative, NaN, and >30-day values - Dashboard overlay adds 30-day sanity check before formatting - Observed: dashboard showed '492804h' (Date.now() - 0) 3. Em/en-dash title auto-fix (doctor.ts) - Doctor now sanitizes em/en dashes in milestone H1 titles when fix=true - Replaces Unicode dashes with ASCII hyphens in the roadmap file - Prevents state document delimiter ambiguity - delimiter_in_title issues are now marked fixable=true 4. Tests for all three fix areas - Metrics: idempotency guard, simulated watchdog duplicate pattern - Dashboard: negative/NaN autoStartTime handling - Doctor: em-dash auto-fix with fix=true and fix=false verification Root cause analysis: - The idle watchdog (auto-timers.ts) calls closeoutUnit every 15s when idle is detected. closeoutUnit calls snapshotUnitMetrics which blindly appended to ledger.units. Each watchdog tick created a new entry with identical type/id/startedAt but incremented finishedAt. - autoStartTime defaults to 0 in the session class. If getAutoDashboardData is called before auto-start sets the value, elapsed = Date.now() - 0. - Milestone titles with em-dashes (U+2014) are written by the LLM during roadmap creation and never sanitized, causing permanent doctor warnings. --- .../extensions/gsd/auto-dashboard.ts | 3 +- src/resources/extensions/gsd/auto.ts | 4 +- .../extensions/gsd/dashboard-overlay.ts | 6 +- src/resources/extensions/gsd/doctor.ts | 39 +++++++-- src/resources/extensions/gsd/metrics.ts | 15 +++- .../gsd/tests/auto-dashboard.test.ts | 14 +++ .../gsd/tests/doctor-delimiter-fix.test.ts | 87 +++++++++++++++++++ .../extensions/gsd/tests/doctor.test.ts | 2 +- .../extensions/gsd/tests/metrics.test.ts | 83 ++++++++++++++++++ 9 files changed, 239 insertions(+), 14 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/doctor-delimiter-fix.test.ts diff --git a/src/resources/extensions/gsd/auto-dashboard.ts b/src/resources/extensions/gsd/auto-dashboard.ts index d0369bd37..ddeab5256 100644 --- a/src/resources/extensions/gsd/auto-dashboard.ts +++ b/src/resources/extensions/gsd/auto-dashboard.ts @@ -142,8 +142,9 @@ export function describeNextUnit(state: GSDState): { label: string; description: /** Format elapsed time since auto-mode started */ export function formatAutoElapsed(autoStartTime: number): string { - if (!autoStartTime) return ""; + if (!autoStartTime || autoStartTime <= 0 || !Number.isFinite(autoStartTime)) return ""; const ms = Date.now() - autoStartTime; + if (ms < 0 || ms > 30 * 24 * 3600_000) return ""; // negative or >30 days = invalid const s = Math.floor(ms / 1000); if (s < 60) return `${s}s`; const m = Math.floor(s / 60); diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index c419933df..37cef2d3d 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -333,7 +333,9 @@ export function getAutoDashboardData(): AutoDashboardData { paused: s.paused, stepMode: s.stepMode, startTime: s.autoStartTime, - elapsed: s.active || s.paused ? Date.now() - s.autoStartTime : 0, + elapsed: s.active || s.paused + ? (s.autoStartTime > 0 ? Date.now() - s.autoStartTime : 0) + : 0, currentUnit: s.currentUnit ? { ...s.currentUnit } : null, completedUnits: [...s.completedUnits], basePath: s.basePath, diff --git a/src/resources/extensions/gsd/dashboard-overlay.ts b/src/resources/extensions/gsd/dashboard-overlay.ts index 337899c4d..0982cf268 100644 --- a/src/resources/extensions/gsd/dashboard-overlay.ts +++ b/src/resources/extensions/gsd/dashboard-overlay.ts @@ -305,7 +305,11 @@ export class GSDDashboardOverlay { : ""; let elapsedParts = ""; if (this.dashData.active || this.dashData.paused) { - elapsedParts = th.fg("dim", formatDuration(this.dashData.elapsed)); + // Guard: skip display when elapsed is zero or unreasonably large (>30 days) + const elapsed = this.dashData.elapsed; + elapsedParts = elapsed > 0 && elapsed < 30 * 24 * 3600_000 + ? th.fg("dim", formatDuration(elapsed)) + : ""; const eta = estimateTimeRemaining(); if (eta) elapsedParts += th.fg("dim", ` · ${eta}`); } else if (isRemote) { diff --git a/src/resources/extensions/gsd/doctor.ts b/src/resources/extensions/gsd/doctor.ts index 538568ef5..7c48f1075 100644 --- a/src/resources/extensions/gsd/doctor.ts +++ b/src/resources/extensions/gsd/doctor.ts @@ -582,15 +582,33 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; // Validate milestone title for delimiter characters that break state documents. const milestoneTitleIssue = validateTitle(milestone.title); if (milestoneTitleIssue) { - issues.push({ - severity: "warning", - code: "delimiter_in_title", - scope: "milestone", - unitId: milestoneId, - message: `Milestone ${milestoneId} ${milestoneTitleIssue}. Rename the milestone to remove these characters to prevent state corruption.`, - file: relMilestoneFile(basePath, milestoneId, "ROADMAP"), - fixable: false, - }); + const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); + let wasFixed = false; + if (shouldFix("delimiter_in_title") && roadmapFile) { + try { + const raw = readFileSync(roadmapFile, "utf-8"); + // Replace em/en dashes with " - " in the H1 title line only + const sanitized = raw.replace(/^(# .*)$/m, (line) => + line.replace(/[\u2014\u2013]/g, "-"), + ); + if (sanitized !== raw) { + await saveFile(roadmapFile, sanitized); + fixesApplied.push(`sanitized delimiter characters in ${milestoneId} title`); + wasFixed = true; + } + } catch { /* non-fatal — report the warning below */ } + } + if (!wasFixed) { + issues.push({ + severity: "warning", + code: "delimiter_in_title", + scope: "milestone", + unitId: milestoneId, + message: `Milestone ${milestoneId} ${milestoneTitleIssue}. Rename the milestone to remove these characters to prevent state corruption.`, + file: relMilestoneFile(basePath, milestoneId, "ROADMAP"), + fixable: true, + }); + } } const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); @@ -642,6 +660,9 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; // Validate slice title for delimiter characters. const sliceTitleIssue = validateTitle(slice.title); if (sliceTitleIssue) { + // Slice titles live inside the roadmap H1/checkbox lines — the milestone-level + // fix above already sanitizes the roadmap file. For slices we only report, because + // the title comes from the checkbox text and requires careful regex to fix safely. issues.push({ severity: "warning", code: "delimiter_in_title", diff --git a/src/resources/extensions/gsd/metrics.ts b/src/resources/extensions/gsd/metrics.ts index 9081057e6..f27f34b00 100644 --- a/src/resources/extensions/gsd/metrics.ts +++ b/src/resources/extensions/gsd/metrics.ts @@ -205,7 +205,20 @@ export function snapshotUnitMetrics( unit.cacheHitRate = totalInput > 0 ? Math.round((tokens.cacheRead / totalInput) * 100) : 0; } - ledger.units.push(unit); + // ── Idempotency guard ────────────────────────────────────────────────── + // Prevent duplicate metrics entries when multiple callers snapshot the + // same unit (e.g. idle-watchdog closeoutUnit + normal loop closeoutUnit). + // A unit is considered a duplicate when type, id, AND startedAt all match + // an existing entry. On duplicate, the existing entry is updated in-place + // with the latest finishedAt and token counts instead of appending. + const dupeIdx = ledger.units.findIndex( + (u) => u.type === unit.type && u.id === unit.id && u.startedAt === unit.startedAt, + ); + if (dupeIdx >= 0) { + ledger.units[dupeIdx] = unit; + } else { + ledger.units.push(unit); + } saveLedger(basePath, ledger); return unit; diff --git a/src/resources/extensions/gsd/tests/auto-dashboard.test.ts b/src/resources/extensions/gsd/tests/auto-dashboard.test.ts index 45ca2fb23..d514420a3 100644 --- a/src/resources/extensions/gsd/tests/auto-dashboard.test.ts +++ b/src/resources/extensions/gsd/tests/auto-dashboard.test.ts @@ -164,3 +164,17 @@ test("estimateTimeRemaining returns null when no ledger data", () => { test("estimateTimeRemaining is exported and callable", () => { assert.equal(typeof estimateTimeRemaining, "function"); }); + +// ─── getAutoDashboardData elapsed guard ────────────────────────────────────── +// These tests verify the elapsed time calculation in getAutoDashboardData() +// doesn't produce absurd values when autoStartTime is 0 (uninitialized). +// The actual function is in auto.ts and tested structurally here by verifying +// that formatAutoElapsed properly handles the zero case. + +test("formatAutoElapsed returns empty string for negative autoStartTime", () => { + // A negative value should be treated as invalid — the guard in + // getAutoDashboardData prevents this, but formatAutoElapsed should also + // handle it gracefully via its falsy check. + assert.equal(formatAutoElapsed(-1), ""); + assert.equal(formatAutoElapsed(NaN), ""); +}); diff --git a/src/resources/extensions/gsd/tests/doctor-delimiter-fix.test.ts b/src/resources/extensions/gsd/tests/doctor-delimiter-fix.test.ts new file mode 100644 index 000000000..afd9332fa --- /dev/null +++ b/src/resources/extensions/gsd/tests/doctor-delimiter-fix.test.ts @@ -0,0 +1,87 @@ +/** + * Test: Doctor auto-fix for delimiter_in_title + * + * Verifies that `runGSDDoctor({ fix: true })` sanitizes em/en dashes + * in milestone H1 titles by replacing them with ASCII hyphens. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { runGSDDoctor } from "../doctor.js"; + +test("doctor fix=true sanitizes em-dash in milestone title", async () => { + const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-delim-")); + const gsd = join(tmpBase, ".gsd"); + const mDir = join(gsd, "milestones", "M001"); + const sDir = join(mDir, "slices", "S01"); + const tDir = join(sDir, "tasks"); + mkdirSync(tDir, { recursive: true }); + + const roadmapWithEmDash = `# M001: Cockpit Foundation \u2014 Daemon + State Bridge + +## Success Criteria +- HTTP server runs + +## Slices +- [ ] **S01: Initial Setup** \`risk:low\` \`depends:[]\` + > After this: setup works +`; + + writeFileSync(join(mDir, "M001-ROADMAP.md"), roadmapWithEmDash); + writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Initial Setup\n\n## Tasks\n- [ ] **T01: Scaffold** \`est:15m\`\n`); + writeFileSync(join(tDir, "T01-PLAN.md"), "# T01: Scaffold\n"); + + try { + // Run doctor with fix=true + const report = await runGSDDoctor(tmpBase, { fix: true }); + + // The em-dash should have been replaced + const fixed = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8"); + const h1 = fixed.split("\n").find(l => l.startsWith("# "))!; + assert.ok(h1, "H1 line should exist"); + assert.ok(!h1.includes("\u2014"), "em-dash should be replaced"); + assert.ok(!h1.includes("\u2013"), "en-dash should be replaced"); + assert.ok(h1.includes("-"), "should contain ASCII hyphen as replacement"); + + // Should have recorded the fix + assert.ok( + report.fixesApplied.some(f => f.includes("sanitized")), + `fixesApplied should mention sanitization, got: ${JSON.stringify(report.fixesApplied)}`, + ); + + // The issue should NOT appear in the report (it was fixed) + const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title" && i.unitId === "M001"); + assert.equal(delimIssues.length, 0, "fixed issue should not appear in issues list"); + } finally { + rmSync(tmpBase, { recursive: true, force: true }); + } +}); + +test("doctor fix=false still reports delimiter_in_title as warning", async () => { + const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-delim-nf-")); + const gsd = join(tmpBase, ".gsd"); + const mDir = join(gsd, "milestones", "M001"); + const sDir = join(mDir, "slices", "S01"); + const tDir = join(sDir, "tasks"); + mkdirSync(tDir, { recursive: true }); + + writeFileSync(join(mDir, "M001-ROADMAP.md"), `# M001: Foundation \u2014 Core\n\n## Slices\n- [ ] **S01: Setup** \`risk:low\` \`depends:[]\`\n > After: done\n`); + writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Setup\n\n## Tasks\n- [ ] **T01: Init** \`est:10m\`\n`); + writeFileSync(join(tDir, "T01-PLAN.md"), "# T01: Init\n"); + + try { + const report = await runGSDDoctor(tmpBase, { fix: false }); + const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title"); + assert.ok(delimIssues.length > 0, "should report delimiter_in_title as issue when fix=false"); + assert.equal(delimIssues[0].severity, "warning"); + + // File should be unchanged + const content = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8"); + assert.ok(content.includes("\u2014"), "file should not be modified when fix=false"); + } finally { + rmSync(tmpBase, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/doctor.test.ts b/src/resources/extensions/gsd/tests/doctor.test.ts index 12c19c042..efad6088b 100644 --- a/src/resources/extensions/gsd/tests/doctor.test.ts +++ b/src/resources/extensions/gsd/tests/doctor.test.ts @@ -532,7 +532,7 @@ Discovered an issue. assertEq(milestoneIssue?.severity, "warning", "delimiter issue has warning severity"); assertEq(milestoneIssue?.unitId, "M001", "delimiter issue unitId is M001"); assertTrue(milestoneIssue?.message?.includes("em/en dash") ?? false, "issue message mentions em/en dash"); - assertEq(milestoneIssue?.fixable, false, "delimiter issue is not auto-fixable"); + assertEq(milestoneIssue?.fixable, true, "delimiter issue is auto-fixable"); rmSync(dtBase, { recursive: true, force: true }); } diff --git a/src/resources/extensions/gsd/tests/metrics.test.ts b/src/resources/extensions/gsd/tests/metrics.test.ts index a0b3d503f..801bd7adb 100644 --- a/src/resources/extensions/gsd/tests/metrics.test.ts +++ b/src/resources/extensions/gsd/tests/metrics.test.ts @@ -251,3 +251,86 @@ test("initMetrics creates ledger, snapshotUnitMetrics persists across resets", ( rmSync(tmpBase, { recursive: true, force: true }); } }); + +// ── snapshotUnitMetrics idempotency ────────────────────────────────────────── + +test("snapshotUnitMetrics deduplicates entries with same type+id+startedAt", () => { + const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-dedup-")); + mkdirSync(join(tmpBase, ".gsd"), { recursive: true }); + try { + initMetrics(tmpBase); + const startedAt = Date.now() - 10000; + const ctx = mockCtx([ + { + role: "assistant", + content: [{ type: "text", text: "Working" }], + usage: { + input: 1000, output: 500, cacheRead: 0, cacheWrite: 0, totalTokens: 1500, + cost: 0.01, + }, + }, + ]); + + // First snapshot — should create entry + const unit1 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model"); + assert.ok(unit1); + assert.equal(getLedger()!.units.length, 1); + + // Second snapshot with same type+id+startedAt — should UPDATE, not append + const unit2 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model"); + assert.ok(unit2); + assert.equal(getLedger()!.units.length, 1, "should still be 1 entry after duplicate snapshot"); + + // The entry should have the latest finishedAt + assert.ok(getLedger()!.units[0].finishedAt >= unit1!.finishedAt); + + // Different startedAt — should create a NEW entry (different execution) + const unit3 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt + 5000, "test-model"); + assert.ok(unit3); + assert.equal(getLedger()!.units.length, 2, "different startedAt = different execution = new entry"); + + // Persist and verify on disk + resetMetrics(); + initMetrics(tmpBase); + assert.equal(getLedger()!.units.length, 2); + } finally { + resetMetrics(); + rmSync(tmpBase, { recursive: true, force: true }); + } +}); + +test("snapshotUnitMetrics handles simulated idle-watchdog duplicate pattern", () => { + const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-watchdog-")); + mkdirSync(join(tmpBase, ".gsd"), { recursive: true }); + try { + initMetrics(tmpBase); + const startedAt = Date.now() - 60000; + const ctx = mockCtx([ + { + role: "assistant", + content: [{ type: "text", text: "Done" }], + usage: { + input: 2000, output: 1000, cacheRead: 500, cacheWrite: 100, totalTokens: 3600, + cost: 0.05, + }, + }, + ]); + + // Simulate watchdog calling closeoutUnit (which calls snapshotUnitMetrics) + // 10 times at 15s intervals — mimicking the bug scenario + for (let i = 0; i < 10; i++) { + snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model"); + } + + // Should still be exactly 1 entry, not 10 + assert.equal(getLedger()!.units.length, 1, "10 watchdog snapshots should produce 1 entry, not 10"); + + // Persist and verify + const raw = readFileSync(join(tmpBase, ".gsd", "metrics.json"), "utf-8"); + const parsed: MetricsLedger = JSON.parse(raw); + assert.equal(parsed.units.length, 1); + } finally { + resetMetrics(); + rmSync(tmpBase, { recursive: true, force: true }); + } +}); \ No newline at end of file