fix(gsd): harden auto-mode telemetry — metrics idempotency, elapsed guard, title sanitization (#1722)
Four fixes for auto-mode telemetry and display bugs: 1. Metrics idempotency guard (metrics.ts) - snapshotUnitMetrics now deduplicates entries by type+id+startedAt - Prevents idle-watchdog from creating N duplicate entries per unit - On duplicate: updates existing entry in-place instead of appending - Observed: 31 duplicate entries for a single plan-slice unit 2. Elapsed time zero-guard (auto.ts, auto-dashboard.ts, dashboard-overlay.ts) - getAutoDashboardData guards against autoStartTime=0 (uninitialized) - formatAutoElapsed rejects negative, NaN, and >30-day values - Dashboard overlay adds 30-day sanity check before formatting - Observed: dashboard showed '492804h' (Date.now() - 0) 3. Em/en-dash title auto-fix (doctor.ts) - Doctor now sanitizes em/en dashes in milestone H1 titles when fix=true - Replaces Unicode dashes with ASCII hyphens in the roadmap file - Prevents state document delimiter ambiguity - delimiter_in_title issues are now marked fixable=true 4. Tests for all three fix areas - Metrics: idempotency guard, simulated watchdog duplicate pattern - Dashboard: negative/NaN autoStartTime handling - Doctor: em-dash auto-fix with fix=true and fix=false verification Root cause analysis: - The idle watchdog (auto-timers.ts) calls closeoutUnit every 15s when idle is detected. closeoutUnit calls snapshotUnitMetrics which blindly appended to ledger.units. Each watchdog tick created a new entry with identical type/id/startedAt but incremented finishedAt. - autoStartTime defaults to 0 in the session class. If getAutoDashboardData is called before auto-start sets the value, elapsed = Date.now() - 0. - Milestone titles with em-dashes (U+2014) are written by the LLM during roadmap creation and never sanitized, causing permanent doctor warnings.
This commit is contained in:
parent
e81931625a
commit
f90c83460f
9 changed files with 239 additions and 14 deletions
|
|
@ -142,8 +142,9 @@ export function describeNextUnit(state: GSDState): { label: string; description:
|
|||
|
||||
/** Format elapsed time since auto-mode started */
|
||||
export function formatAutoElapsed(autoStartTime: number): string {
|
||||
if (!autoStartTime) return "";
|
||||
if (!autoStartTime || autoStartTime <= 0 || !Number.isFinite(autoStartTime)) return "";
|
||||
const ms = Date.now() - autoStartTime;
|
||||
if (ms < 0 || ms > 30 * 24 * 3600_000) return ""; // negative or >30 days = invalid
|
||||
const s = Math.floor(ms / 1000);
|
||||
if (s < 60) return `${s}s`;
|
||||
const m = Math.floor(s / 60);
|
||||
|
|
|
|||
|
|
@ -333,7 +333,9 @@ export function getAutoDashboardData(): AutoDashboardData {
|
|||
paused: s.paused,
|
||||
stepMode: s.stepMode,
|
||||
startTime: s.autoStartTime,
|
||||
elapsed: s.active || s.paused ? Date.now() - s.autoStartTime : 0,
|
||||
elapsed: s.active || s.paused
|
||||
? (s.autoStartTime > 0 ? Date.now() - s.autoStartTime : 0)
|
||||
: 0,
|
||||
currentUnit: s.currentUnit ? { ...s.currentUnit } : null,
|
||||
completedUnits: [...s.completedUnits],
|
||||
basePath: s.basePath,
|
||||
|
|
|
|||
|
|
@ -305,7 +305,11 @@ export class GSDDashboardOverlay {
|
|||
: "";
|
||||
let elapsedParts = "";
|
||||
if (this.dashData.active || this.dashData.paused) {
|
||||
elapsedParts = th.fg("dim", formatDuration(this.dashData.elapsed));
|
||||
// Guard: skip display when elapsed is zero or unreasonably large (>30 days)
|
||||
const elapsed = this.dashData.elapsed;
|
||||
elapsedParts = elapsed > 0 && elapsed < 30 * 24 * 3600_000
|
||||
? th.fg("dim", formatDuration(elapsed))
|
||||
: "";
|
||||
const eta = estimateTimeRemaining();
|
||||
if (eta) elapsedParts += th.fg("dim", ` · ${eta}`);
|
||||
} else if (isRemote) {
|
||||
|
|
|
|||
|
|
@ -582,15 +582,33 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
|
|||
// Validate milestone title for delimiter characters that break state documents.
|
||||
const milestoneTitleIssue = validateTitle(milestone.title);
|
||||
if (milestoneTitleIssue) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "delimiter_in_title",
|
||||
scope: "milestone",
|
||||
unitId: milestoneId,
|
||||
message: `Milestone ${milestoneId} ${milestoneTitleIssue}. Rename the milestone to remove these characters to prevent state corruption.`,
|
||||
file: relMilestoneFile(basePath, milestoneId, "ROADMAP"),
|
||||
fixable: false,
|
||||
});
|
||||
const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
|
||||
let wasFixed = false;
|
||||
if (shouldFix("delimiter_in_title") && roadmapFile) {
|
||||
try {
|
||||
const raw = readFileSync(roadmapFile, "utf-8");
|
||||
// Replace em/en dashes with " - " in the H1 title line only
|
||||
const sanitized = raw.replace(/^(# .*)$/m, (line) =>
|
||||
line.replace(/[\u2014\u2013]/g, "-"),
|
||||
);
|
||||
if (sanitized !== raw) {
|
||||
await saveFile(roadmapFile, sanitized);
|
||||
fixesApplied.push(`sanitized delimiter characters in ${milestoneId} title`);
|
||||
wasFixed = true;
|
||||
}
|
||||
} catch { /* non-fatal — report the warning below */ }
|
||||
}
|
||||
if (!wasFixed) {
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "delimiter_in_title",
|
||||
scope: "milestone",
|
||||
unitId: milestoneId,
|
||||
message: `Milestone ${milestoneId} ${milestoneTitleIssue}. Rename the milestone to remove these characters to prevent state corruption.`,
|
||||
file: relMilestoneFile(basePath, milestoneId, "ROADMAP"),
|
||||
fixable: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
|
||||
|
|
@ -642,6 +660,9 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
|
|||
// Validate slice title for delimiter characters.
|
||||
const sliceTitleIssue = validateTitle(slice.title);
|
||||
if (sliceTitleIssue) {
|
||||
// Slice titles live inside the roadmap H1/checkbox lines — the milestone-level
|
||||
// fix above already sanitizes the roadmap file. For slices we only report, because
|
||||
// the title comes from the checkbox text and requires careful regex to fix safely.
|
||||
issues.push({
|
||||
severity: "warning",
|
||||
code: "delimiter_in_title",
|
||||
|
|
|
|||
|
|
@ -205,7 +205,20 @@ export function snapshotUnitMetrics(
|
|||
unit.cacheHitRate = totalInput > 0 ? Math.round((tokens.cacheRead / totalInput) * 100) : 0;
|
||||
}
|
||||
|
||||
ledger.units.push(unit);
|
||||
// ── Idempotency guard ──────────────────────────────────────────────────
|
||||
// Prevent duplicate metrics entries when multiple callers snapshot the
|
||||
// same unit (e.g. idle-watchdog closeoutUnit + normal loop closeoutUnit).
|
||||
// A unit is considered a duplicate when type, id, AND startedAt all match
|
||||
// an existing entry. On duplicate, the existing entry is updated in-place
|
||||
// with the latest finishedAt and token counts instead of appending.
|
||||
const dupeIdx = ledger.units.findIndex(
|
||||
(u) => u.type === unit.type && u.id === unit.id && u.startedAt === unit.startedAt,
|
||||
);
|
||||
if (dupeIdx >= 0) {
|
||||
ledger.units[dupeIdx] = unit;
|
||||
} else {
|
||||
ledger.units.push(unit);
|
||||
}
|
||||
saveLedger(basePath, ledger);
|
||||
|
||||
return unit;
|
||||
|
|
|
|||
|
|
@ -164,3 +164,17 @@ test("estimateTimeRemaining returns null when no ledger data", () => {
|
|||
test("estimateTimeRemaining is exported and callable", () => {
|
||||
assert.equal(typeof estimateTimeRemaining, "function");
|
||||
});
|
||||
|
||||
// ─── getAutoDashboardData elapsed guard ──────────────────────────────────────
|
||||
// These tests verify the elapsed time calculation in getAutoDashboardData()
|
||||
// doesn't produce absurd values when autoStartTime is 0 (uninitialized).
|
||||
// The actual function is in auto.ts and tested structurally here by verifying
|
||||
// that formatAutoElapsed properly handles the zero case.
|
||||
|
||||
test("formatAutoElapsed returns empty string for negative autoStartTime", () => {
|
||||
// A negative value should be treated as invalid — the guard in
|
||||
// getAutoDashboardData prevents this, but formatAutoElapsed should also
|
||||
// handle it gracefully via its falsy check.
|
||||
assert.equal(formatAutoElapsed(-1), "");
|
||||
assert.equal(formatAutoElapsed(NaN), "");
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
/**
|
||||
* Test: Doctor auto-fix for delimiter_in_title
|
||||
*
|
||||
* Verifies that `runGSDDoctor({ fix: true })` sanitizes em/en dashes
|
||||
* in milestone H1 titles by replacing them with ASCII hyphens.
|
||||
*/
|
||||
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { runGSDDoctor } from "../doctor.js";
|
||||
|
||||
test("doctor fix=true sanitizes em-dash in milestone title", async () => {
|
||||
const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-delim-"));
|
||||
const gsd = join(tmpBase, ".gsd");
|
||||
const mDir = join(gsd, "milestones", "M001");
|
||||
const sDir = join(mDir, "slices", "S01");
|
||||
const tDir = join(sDir, "tasks");
|
||||
mkdirSync(tDir, { recursive: true });
|
||||
|
||||
const roadmapWithEmDash = `# M001: Cockpit Foundation \u2014 Daemon + State Bridge
|
||||
|
||||
## Success Criteria
|
||||
- HTTP server runs
|
||||
|
||||
## Slices
|
||||
- [ ] **S01: Initial Setup** \`risk:low\` \`depends:[]\`
|
||||
> After this: setup works
|
||||
`;
|
||||
|
||||
writeFileSync(join(mDir, "M001-ROADMAP.md"), roadmapWithEmDash);
|
||||
writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Initial Setup\n\n## Tasks\n- [ ] **T01: Scaffold** \`est:15m\`\n`);
|
||||
writeFileSync(join(tDir, "T01-PLAN.md"), "# T01: Scaffold\n");
|
||||
|
||||
try {
|
||||
// Run doctor with fix=true
|
||||
const report = await runGSDDoctor(tmpBase, { fix: true });
|
||||
|
||||
// The em-dash should have been replaced
|
||||
const fixed = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
|
||||
const h1 = fixed.split("\n").find(l => l.startsWith("# "))!;
|
||||
assert.ok(h1, "H1 line should exist");
|
||||
assert.ok(!h1.includes("\u2014"), "em-dash should be replaced");
|
||||
assert.ok(!h1.includes("\u2013"), "en-dash should be replaced");
|
||||
assert.ok(h1.includes("-"), "should contain ASCII hyphen as replacement");
|
||||
|
||||
// Should have recorded the fix
|
||||
assert.ok(
|
||||
report.fixesApplied.some(f => f.includes("sanitized")),
|
||||
`fixesApplied should mention sanitization, got: ${JSON.stringify(report.fixesApplied)}`,
|
||||
);
|
||||
|
||||
// The issue should NOT appear in the report (it was fixed)
|
||||
const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title" && i.unitId === "M001");
|
||||
assert.equal(delimIssues.length, 0, "fixed issue should not appear in issues list");
|
||||
} finally {
|
||||
rmSync(tmpBase, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("doctor fix=false still reports delimiter_in_title as warning", async () => {
|
||||
const tmpBase = mkdtempSync(join(tmpdir(), "gsd-doctor-delim-nf-"));
|
||||
const gsd = join(tmpBase, ".gsd");
|
||||
const mDir = join(gsd, "milestones", "M001");
|
||||
const sDir = join(mDir, "slices", "S01");
|
||||
const tDir = join(sDir, "tasks");
|
||||
mkdirSync(tDir, { recursive: true });
|
||||
|
||||
writeFileSync(join(mDir, "M001-ROADMAP.md"), `# M001: Foundation \u2014 Core\n\n## Slices\n- [ ] **S01: Setup** \`risk:low\` \`depends:[]\`\n > After: done\n`);
|
||||
writeFileSync(join(sDir, "S01-PLAN.md"), `# S01: Setup\n\n## Tasks\n- [ ] **T01: Init** \`est:10m\`\n`);
|
||||
writeFileSync(join(tDir, "T01-PLAN.md"), "# T01: Init\n");
|
||||
|
||||
try {
|
||||
const report = await runGSDDoctor(tmpBase, { fix: false });
|
||||
const delimIssues = report.issues.filter(i => i.code === "delimiter_in_title");
|
||||
assert.ok(delimIssues.length > 0, "should report delimiter_in_title as issue when fix=false");
|
||||
assert.equal(delimIssues[0].severity, "warning");
|
||||
|
||||
// File should be unchanged
|
||||
const content = readFileSync(join(mDir, "M001-ROADMAP.md"), "utf-8");
|
||||
assert.ok(content.includes("\u2014"), "file should not be modified when fix=false");
|
||||
} finally {
|
||||
rmSync(tmpBase, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
|
@ -532,7 +532,7 @@ Discovered an issue.
|
|||
assertEq(milestoneIssue?.severity, "warning", "delimiter issue has warning severity");
|
||||
assertEq(milestoneIssue?.unitId, "M001", "delimiter issue unitId is M001");
|
||||
assertTrue(milestoneIssue?.message?.includes("em/en dash") ?? false, "issue message mentions em/en dash");
|
||||
assertEq(milestoneIssue?.fixable, false, "delimiter issue is not auto-fixable");
|
||||
assertEq(milestoneIssue?.fixable, true, "delimiter issue is auto-fixable");
|
||||
|
||||
rmSync(dtBase, { recursive: true, force: true });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -251,3 +251,86 @@ test("initMetrics creates ledger, snapshotUnitMetrics persists across resets", (
|
|||
rmSync(tmpBase, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
// ── snapshotUnitMetrics idempotency ──────────────────────────────────────────
|
||||
|
||||
test("snapshotUnitMetrics deduplicates entries with same type+id+startedAt", () => {
|
||||
const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-dedup-"));
|
||||
mkdirSync(join(tmpBase, ".gsd"), { recursive: true });
|
||||
try {
|
||||
initMetrics(tmpBase);
|
||||
const startedAt = Date.now() - 10000;
|
||||
const ctx = mockCtx([
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "Working" }],
|
||||
usage: {
|
||||
input: 1000, output: 500, cacheRead: 0, cacheWrite: 0, totalTokens: 1500,
|
||||
cost: 0.01,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
// First snapshot — should create entry
|
||||
const unit1 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model");
|
||||
assert.ok(unit1);
|
||||
assert.equal(getLedger()!.units.length, 1);
|
||||
|
||||
// Second snapshot with same type+id+startedAt — should UPDATE, not append
|
||||
const unit2 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model");
|
||||
assert.ok(unit2);
|
||||
assert.equal(getLedger()!.units.length, 1, "should still be 1 entry after duplicate snapshot");
|
||||
|
||||
// The entry should have the latest finishedAt
|
||||
assert.ok(getLedger()!.units[0].finishedAt >= unit1!.finishedAt);
|
||||
|
||||
// Different startedAt — should create a NEW entry (different execution)
|
||||
const unit3 = snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt + 5000, "test-model");
|
||||
assert.ok(unit3);
|
||||
assert.equal(getLedger()!.units.length, 2, "different startedAt = different execution = new entry");
|
||||
|
||||
// Persist and verify on disk
|
||||
resetMetrics();
|
||||
initMetrics(tmpBase);
|
||||
assert.equal(getLedger()!.units.length, 2);
|
||||
} finally {
|
||||
resetMetrics();
|
||||
rmSync(tmpBase, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("snapshotUnitMetrics handles simulated idle-watchdog duplicate pattern", () => {
|
||||
const tmpBase = mkdtempSync(join(tmpdir(), "gsd-metrics-watchdog-"));
|
||||
mkdirSync(join(tmpBase, ".gsd"), { recursive: true });
|
||||
try {
|
||||
initMetrics(tmpBase);
|
||||
const startedAt = Date.now() - 60000;
|
||||
const ctx = mockCtx([
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "Done" }],
|
||||
usage: {
|
||||
input: 2000, output: 1000, cacheRead: 500, cacheWrite: 100, totalTokens: 3600,
|
||||
cost: 0.05,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
// Simulate watchdog calling closeoutUnit (which calls snapshotUnitMetrics)
|
||||
// 10 times at 15s intervals — mimicking the bug scenario
|
||||
for (let i = 0; i < 10; i++) {
|
||||
snapshotUnitMetrics(ctx, "plan-slice", "M001/S01", startedAt, "test-model");
|
||||
}
|
||||
|
||||
// Should still be exactly 1 entry, not 10
|
||||
assert.equal(getLedger()!.units.length, 1, "10 watchdog snapshots should produce 1 entry, not 10");
|
||||
|
||||
// Persist and verify
|
||||
const raw = readFileSync(join(tmpBase, ".gsd", "metrics.json"), "utf-8");
|
||||
const parsed: MetricsLedger = JSON.parse(raw);
|
||||
assert.equal(parsed.units.length, 1);
|
||||
} finally {
|
||||
resetMetrics();
|
||||
rmSync(tmpBase, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue