fix(gsd): harden auto-mode telemetry — metrics idempotency, elapsed guard, title sanitization (#1722)

Four fixes for auto-mode telemetry and display bugs:

1. Metrics idempotency guard (metrics.ts)
   - snapshotUnitMetrics now deduplicates entries by type+id+startedAt
   - Prevents idle-watchdog from creating N duplicate entries per unit
   - On duplicate: updates existing entry in-place instead of appending
   - Observed: 31 duplicate entries for a single plan-slice unit

2. Elapsed time zero-guard (auto.ts, auto-dashboard.ts, dashboard-overlay.ts)
   - getAutoDashboardData guards against autoStartTime=0 (uninitialized)
   - formatAutoElapsed rejects negative, NaN, and >30-day values
   - Dashboard overlay adds 30-day sanity check before formatting
   - Observed: dashboard showed '492804h' (Date.now() - 0)

3. Em/en-dash title auto-fix (doctor.ts)
   - Doctor now sanitizes em/en dashes in milestone H1 titles when fix=true
   - Replaces Unicode dashes with ASCII hyphens in the roadmap file
   - Prevents state document delimiter ambiguity
   - delimiter_in_title issues are now marked fixable=true

4. Tests for all three fix areas
   - Metrics: idempotency guard, simulated watchdog duplicate pattern
   - Dashboard: negative/NaN autoStartTime handling
   - Doctor: em-dash auto-fix with fix=true and fix=false verification

Root cause analysis:
- The idle watchdog (auto-timers.ts) calls closeoutUnit every 15s when
  idle is detected. closeoutUnit calls snapshotUnitMetrics which blindly
  appended to ledger.units. Each watchdog tick created a new entry with
  identical type/id/startedAt but incremented finishedAt.
- autoStartTime defaults to 0 in the session class. If getAutoDashboardData
  is called before auto-start sets the value, elapsed = Date.now() - 0.
- Milestone titles with em-dashes (U+2014) are written by the LLM during
  roadmap creation and never sanitized, causing permanent doctor warnings.
This commit is contained in:
deseltrus 2026-03-21 15:47:27 +01:00 committed by GitHub
parent e81931625a
commit f90c83460f
9 changed files with 239 additions and 14 deletions

View file

@ -142,8 +142,9 @@ export function describeNextUnit(state: GSDState): { label: string; description:
/** Format elapsed time since auto-mode started */
export function formatAutoElapsed(autoStartTime: number): string {
if (!autoStartTime) return "";
if (!autoStartTime || autoStartTime <= 0 || !Number.isFinite(autoStartTime)) return "";
const ms = Date.now() - autoStartTime;
if (ms < 0 || ms > 30 * 24 * 3600_000) return ""; // negative or >30 days = invalid
const s = Math.floor(ms / 1000);
if (s < 60) return `${s}s`;
const m = Math.floor(s / 60);

View file

@ -333,7 +333,9 @@ export function getAutoDashboardData(): AutoDashboardData {
paused: s.paused,
stepMode: s.stepMode,
startTime: s.autoStartTime,
elapsed: s.active || s.paused ? Date.now() - s.autoStartTime : 0,
elapsed: s.active || s.paused
? (s.autoStartTime > 0 ? Date.now() - s.autoStartTime : 0)
: 0,
currentUnit: s.currentUnit ? { ...s.currentUnit } : null,
completedUnits: [...s.completedUnits],
basePath: s.basePath,

View file

@ -305,7 +305,11 @@ export class GSDDashboardOverlay {
: "";
let elapsedParts = "";
if (this.dashData.active || this.dashData.paused) {
elapsedParts = th.fg("dim", formatDuration(this.dashData.elapsed));
// Guard: skip display when elapsed is zero or unreasonably large (>30 days)
const elapsed = this.dashData.elapsed;
elapsedParts = elapsed > 0 && elapsed < 30 * 24 * 3600_000
? th.fg("dim", formatDuration(elapsed))
: "";
const eta = estimateTimeRemaining();
if (eta) elapsedParts += th.fg("dim", ` · ${eta}`);
} else if (isRemote) {

View file

@ -582,15 +582,33 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
// Validate milestone title for delimiter characters that break state documents.
const milestoneTitleIssue = validateTitle(milestone.title);
if (milestoneTitleIssue) {
issues.push({
severity: "warning",
code: "delimiter_in_title",
scope: "milestone",
unitId: milestoneId,
message: `Milestone ${milestoneId} ${milestoneTitleIssue}. Rename the milestone to remove these characters to prevent state corruption.`,
file: relMilestoneFile(basePath, milestoneId, "ROADMAP"),
fixable: false,
});
const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
let wasFixed = false;
if (shouldFix("delimiter_in_title") && roadmapFile) {
try {
const raw = readFileSync(roadmapFile, "utf-8");
// Replace em/en dashes with " - " in the H1 title line only
const sanitized = raw.replace(/^(# .*)$/m, (line) =>
line.replace(/[\u2014\u2013]/g, "-"),
);
if (sanitized !== raw) {
await saveFile(roadmapFile, sanitized);
fixesApplied.push(`sanitized delimiter characters in ${milestoneId} title`);
wasFixed = true;
}
} catch { /* non-fatal — report the warning below */ }
}
if (!wasFixed) {
issues.push({
severity: "warning",
code: "delimiter_in_title",
scope: "milestone",
unitId: milestoneId,
message: `Milestone ${milestoneId} ${milestoneTitleIssue}. Rename the milestone to remove these characters to prevent state corruption.`,
file: relMilestoneFile(basePath, milestoneId, "ROADMAP"),
fixable: true,
});
}
}
const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
@ -642,6 +660,9 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
// Validate slice title for delimiter characters.
const sliceTitleIssue = validateTitle(slice.title);
if (sliceTitleIssue) {
// Slice titles live inside the roadmap H1/checkbox lines — the milestone-level
// fix above already sanitizes the roadmap file. For slices we only report, because
// the title comes from the checkbox text and requires careful regex to fix safely.
issues.push({
severity: "warning",
code: "delimiter_in_title",

View file

@ -205,7 +205,20 @@ export function snapshotUnitMetrics(
unit.cacheHitRate = totalInput > 0 ? Math.round((tokens.cacheRead / totalInput) * 100) : 0;
}
ledger.units.push(unit);
// ── Idempotency guard ──────────────────────────────────────────────────
// Prevent duplicate metrics entries when multiple callers snapshot the
// same unit (e.g. idle-watchdog closeoutUnit + normal loop closeoutUnit).
// A unit is considered a duplicate when type, id, AND startedAt all match
// an existing entry. On duplicate, the existing entry is updated in-place
// with the latest finishedAt and token counts instead of appending.
const dupeIdx = ledger.units.findIndex(
(u) => u.type === unit.type && u.id === unit.id && u.startedAt === unit.startedAt,
);
if (dupeIdx >= 0) {
ledger.units[dupeIdx] = unit;
} else {
ledger.units.push(unit);
}
saveLedger(basePath, ledger);
return unit;

View file

@ -164,3 +164,17 @@ test("estimateTimeRemaining returns null when no ledger data", () => {
test("estimateTimeRemaining is exported and callable", () => {
assert.equal(typeof estimateTimeRemaining, "function");
});
// ─── getAutoDashboardData elapsed guard ──────────────────────────────────────
// These tests verify the elapsed time calculation in getAutoDashboardData()
// doesn't produce absurd values when autoStartTime is 0 (uninitialized).
// The actual function is in auto.ts and tested structurally here by verifying
// that formatAutoElapsed properly handles the zero case.
test("formatAutoElapsed returns empty string for negative autoStartTime", () => {
// A negative value should be treated as invalid — the guard in
// getAutoDashboardData prevents this, but formatAutoElapsed should also
// handle it gracefully via its falsy check.
assert.equal(formatAutoElapsed(-1), "");
assert.equal(formatAutoElapsed(NaN), "");
});

View file

@ -0,0 +1,87 @@
/**
* Test: Doctor auto-fix for delimiter_in_title
*
* Verifies that `runGSDDoctor({ fix: true })` sanitizes em/en dashes
* in milestone H1 titles by replacing them with ASCII hyphens.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { runGSDDoctor } from "../doctor.js";
test("doctor fix=true sanitizes em-dash in milestone title", async () => {
const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-delim-"));
const gsd = join(tmpBase, ".gsd");
const mDir = join(gsd, "milestones", "M001");
const sDir = join(mDir, "slices", "S01");
const tDir = join(sDir, "tasks");
mkdirSync(tDir, { recursive: true });
const roadmapWithEmDash = `# M001: Cockpit Foundation \u2014 Daemon + State Bridge
## Success Criteria
- HTTP server runs
## Slices
- [ ] **S01: Initial Setup** \`risk:low\` \`depends:[]\`
> After this: setup works
`;
writeFileSync(join(mDir, "M001-ROADMAP.md"), roadmapWithEmDash);
writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Initial Setup\n\n## Tasks\n- [ ] **T01: Scaffold** \`est:15m\`\n`);
writeFileSync(join(tDir, "T01-PLAN.md"), "# T01: Scaffold\n");
try {
// Run doctor with fix=true
const report = await runGSDDoctor(tmpBase, { fix: true });
// The em-dash should have been replaced
const fixed = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
const h1 = fixed.split("\n").find(l => l.startsWith("# "))!;
assert.ok(h1, "H1 line should exist");
assert.ok(!h1.includes("\u2014"), "em-dash should be replaced");
assert.ok(!h1.includes("\u2013"), "en-dash should be replaced");
assert.ok(h1.includes("-"), "should contain ASCII hyphen as replacement");
// Should have recorded the fix
assert.ok(
report.fixesApplied.some(f => f.includes("sanitized")),
`fixesApplied should mention sanitization, got: ${JSON.stringify(report.fixesApplied)}`,
);
// The issue should NOT appear in the report (it was fixed)
const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title" && i.unitId === "M001");
assert.equal(delimIssues.length, 0, "fixed issue should not appear in issues list");
} finally {
rmSync(tmpBase, { recursive: true, force: true });
}
});
test("doctor fix=false still reports delimiter_in_title as warning", async () => {
const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-delim-nf-"));
const gsd = join(tmpBase, ".gsd");
const mDir = join(gsd, "milestones", "M001");
const sDir = join(mDir, "slices", "S01");
const tDir = join(sDir, "tasks");
mkdirSync(tDir, { recursive: true });
writeFileSync(join(mDir, "M001-ROADMAP.md"), `# M001: Foundation \u2014 Core\n\n## Slices\n- [ ] **S01: Setup** \`risk:low\` \`depends:[]\`\n > After: done\n`);
writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Setup\n\n## Tasks\n- [ ] **T01: Init** \`est:10m\`\n`);
writeFileSync(join(tDir, "T01-PLAN.md"), "# T01: Init\n");
try {
const report = await runGSDDoctor(tmpBase, { fix: false });
const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title");
assert.ok(delimIssues.length > 0, "should report delimiter_in_title as issue when fix=false");
assert.equal(delimIssues[0].severity, "warning");
// File should be unchanged
const content = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
assert.ok(content.includes("\u2014"), "file should not be modified when fix=false");
} finally {
rmSync(tmpBase, { recursive: true, force: true });
}
});

View file

@ -532,7 +532,7 @@ Discovered an issue.
assertEq(milestoneIssue?.severity, "warning", "delimiter issue has warning severity");
assertEq(milestoneIssue?.unitId, "M001", "delimiter issue unitId is M001");
assertTrue(milestoneIssue?.message?.includes("em/en dash") ?? false, "issue message mentions em/en dash");
assertEq(milestoneIssue?.fixable, false, "delimiter issue is not auto-fixable");
assertEq(milestoneIssue?.fixable, true, "delimiter issue is auto-fixable");
rmSync(dtBase, { recursive: true, force: true });
}

View file

@ -251,3 +251,86 @@ test("initMetrics creates ledger, snapshotUnitMetrics persists across resets", (
rmSync(tmpBase, { recursive: true, force: true });
}
});
// ── snapshotUnitMetrics idempotency ──────────────────────────────────────────
test("snapshotUnitMetrics deduplicates entries with same type+id+startedAt", () => {
const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-dedup-"));
mkdirSync(join(tmpBase, ".gsd"), { recursive: true });
try {
initMetrics(tmpBase);
const startedAt = Date.now() - 10000;
const ctx = mockCtx([
{
role: "assistant",
content: [{ type: "text", text: "Working" }],
usage: {
input: 1000, output: 500, cacheRead: 0, cacheWrite: 0, totalTokens: 1500,
cost: 0.01,
},
},
]);
// First snapshot — should create entry
const unit1 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model");
assert.ok(unit1);
assert.equal(getLedger()!.units.length, 1);
// Second snapshot with same type+id+startedAt — should UPDATE, not append
const unit2 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model");
assert.ok(unit2);
assert.equal(getLedger()!.units.length, 1, "should still be 1 entry after duplicate snapshot");
// The entry should have the latest finishedAt
assert.ok(getLedger()!.units[0].finishedAt >= unit1!.finishedAt);
// Different startedAt — should create a NEW entry (different execution)
const unit3 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt + 5000, "test-model");
assert.ok(unit3);
assert.equal(getLedger()!.units.length, 2, "different startedAt = different execution = new entry");
// Persist and verify on disk
resetMetrics();
initMetrics(tmpBase);
assert.equal(getLedger()!.units.length, 2);
} finally {
resetMetrics();
rmSync(tmpBase, { recursive: true, force: true });
}
});
test("snapshotUnitMetrics handles simulated idle-watchdog duplicate pattern", () => {
const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-watchdog-"));
mkdirSync(join(tmpBase, ".gsd"), { recursive: true });
try {
initMetrics(tmpBase);
const startedAt = Date.now() - 60000;
const ctx = mockCtx([
{
role: "assistant",
content: [{ type: "text", text: "Done" }],
usage: {
input: 2000, output: 1000, cacheRead: 500, cacheWrite: 100, totalTokens: 3600,
cost: 0.05,
},
},
]);
// Simulate watchdog calling closeoutUnit (which calls snapshotUnitMetrics)
// 10 times at 15s intervals — mimicking the bug scenario
for (let i = 0; i < 10; i++) {
snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model");
}
// Should still be exactly 1 entry, not 10
assert.equal(getLedger()!.units.length, 1, "10 watchdog snapshots should produce 1 entry, not 10");
// Persist and verify
const raw = readFileSync(join(tmpBase, ".gsd", "metrics.json"), "utf-8");
const parsed: MetricsLedger = JSON.parse(raw);
assert.equal(parsed.units.length, 1);
} finally {
resetMetrics();
rmSync(tmpBase, { recursive: true, force: true });
}
});