diff --git a/src/resources/extensions/gsd/forensics.ts b/src/resources/extensions/gsd/forensics.ts index 56a7ce0b5..d66c737d9 100644 --- a/src/resources/extensions/gsd/forensics.ts +++ b/src/resources/extensions/gsd/forensics.ts @@ -28,6 +28,7 @@ import { deriveState } from "./state.js"; import { isAutoActive } from "./auto.js"; import { loadPrompt } from "./prompt-loader.js"; import { gsdRoot } from "./paths.js"; +import { queryJournal } from "./journal.js"; import { formatDuration } from "../shared/format-utils.js"; import { getAutoWorktreePath } from "./auto-worktree.js"; import { loadEffectiveGSDPreferences, loadGlobalGSDPreferences, getGlobalGSDPreferencesPath } from "./preferences.js"; @@ -37,7 +38,7 @@ import { ensurePreferencesFile, serializePreferencesToFrontmatter } from "./comm // ─── Types ──────────────────────────────────────────────────────────────────── interface ForensicAnomaly { - type: "stuck-loop" | "cost-spike" | "timeout" | "missing-artifact" | "crash" | "doctor-issue" | "error-trace"; + type: "stuck-loop" | "cost-spike" | "timeout" | "missing-artifact" | "crash" | "doctor-issue" | "error-trace" | "journal-stuck" | "journal-guard-block" | "journal-rapid-iterations" | "journal-worktree-failure"; severity: "info" | "warning" | "error"; unitType?: string; unitId?: string; @@ -54,6 +55,31 @@ interface UnitTrace { mtime: number; } +/** Summary of .gsd/activity/ directory metadata. */ +interface ActivityLogMeta { + fileCount: number; + totalSizeBytes: number; + oldestFile: string | null; + newestFile: string | null; +} + +/** Summary of .gsd/journal/ data for forensic investigation. */ +interface JournalSummary { + /** Total journal entries scanned */ + totalEntries: number; + /** Distinct flow IDs (each = one auto-mode iteration) */ + flowCount: number; + /** Event counts by type */ + eventCounts: Record; + /** Most recent journal entries (last 20) for context */ + recentEvents: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[]; + /** Date range of journal data */ + oldestEntry: string | null; + newestEntry: string | null; + /** Daily file count */ + fileCount: number; +} + interface ForensicReport { gsdVersion: string; timestamp: string; @@ -68,6 +94,8 @@ interface ForensicReport { doctorIssues: DoctorIssue[]; anomalies: ForensicAnomaly[]; recentUnits: { type: string; id: string; cost: number; duration: number; model: string; finishedAt: number }[]; + journalSummary: JournalSummary | null; + activityLogMeta: ActivityLogMeta | null; } // ─── Duplicate Detection ────────────────────────────────────────────────────── @@ -276,7 +304,13 @@ export async function buildForensicReport(basePath: string): Promise f.endsWith(".jsonl")).sort(); + if (files.length === 0) return null; + + const entries = queryJournal(basePath); + if (entries.length === 0) return null; + + // Count events by type + const eventCounts: Record = {}; + const flowIds = new Set(); + for (const e of entries) { + eventCounts[e.eventType] = (eventCounts[e.eventType] ?? 0) + 1; + flowIds.add(e.flowId); + } + + // Extract recent events (last 20) with key fields for the report + const recentEvents = entries.slice(-20).map(e => ({ + ts: e.ts, + flowId: e.flowId, + eventType: e.eventType, + rule: e.rule, + unitId: (e.data as Record | undefined)?.unitId as string | undefined, + })); + + return { + totalEntries: entries.length, + flowCount: flowIds.size, + eventCounts, + recentEvents, + oldestEntry: entries[0]?.ts ?? null, + newestEntry: entries[entries.length - 1]?.ts ?? null, + fileCount: files.length, + }; + } catch { + return null; + } +} + +// ─── Activity Log Metadata ──────────────────────────────────────────────────── + +function gatherActivityLogMeta(basePath: string, activeMilestone?: string | null): ActivityLogMeta | null { + try { + const activityDirs = resolveActivityDirs(basePath, activeMilestone); + let fileCount = 0; + let totalSizeBytes = 0; + let oldestFile: string | null = null; + let newestFile: string | null = null; + let oldestMtime = Infinity; + let newestMtime = 0; + + for (const activityDir of activityDirs) { + if (!existsSync(activityDir)) continue; + const files = readdirSync(activityDir).filter(f => f.endsWith(".jsonl")); + for (const file of files) { + const filePath = join(activityDir, file); + const stat = statSync(filePath, { throwIfNoEntry: false }); + if (!stat) continue; + fileCount++; + totalSizeBytes += stat.size; + if (stat.mtimeMs < oldestMtime) { + oldestMtime = stat.mtimeMs; + oldestFile = file; + } + if (stat.mtimeMs > newestMtime) { + newestMtime = stat.mtimeMs; + newestFile = file; + } + } + } + + if (fileCount === 0) return null; + return { fileCount, totalSizeBytes, oldestFile, newestFile }; + } catch { + return null; + } +} + // ─── Completed Keys Loader ──────────────────────────────────────────────────── function loadCompletedKeys(basePath: string): string[] { @@ -524,6 +644,66 @@ function detectErrorTraces(traces: UnitTrace[], anomalies: ForensicAnomaly[]): v } } +function detectJournalAnomalies(journal: JournalSummary | null, anomalies: ForensicAnomaly[]): void { + if (!journal) return; + + // Detect stuck-detected events from the journal + const stuckCount = journal.eventCounts["stuck-detected"] ?? 0; + if (stuckCount > 0) { + anomalies.push({ + type: "journal-stuck", + severity: stuckCount >= 3 ? "error" : "warning", + summary: `Journal recorded ${stuckCount} stuck-detected event(s)`, + details: `The auto-mode loop detected it was stuck ${stuckCount} time(s). Check journal events for flow IDs and causal chains to trace the root cause.`, + }); + } + + // Detect guard-block events (dispatch was blocked by a guard) + const guardCount = journal.eventCounts["guard-block"] ?? 0; + if (guardCount > 0) { + anomalies.push({ + type: "journal-guard-block", + severity: guardCount >= 5 ? "warning" : "info", + summary: `Journal recorded ${guardCount} guard-block event(s)`, + details: `Dispatch was blocked by a guard condition ${guardCount} time(s). This may indicate a persistent blocking condition preventing progress.`, + }); + } + + // Detect rapid iterations (many flows in short time = likely thrashing) + if (journal.flowCount > 0 && journal.oldestEntry && journal.newestEntry) { + const oldest = new Date(journal.oldestEntry).getTime(); + const newest = new Date(journal.newestEntry).getTime(); + const spanMs = newest - oldest; + if (spanMs > 0 && journal.flowCount > 10) { + const avgMs = spanMs / journal.flowCount; + if (avgMs < 5000) { // Less than 5 seconds per iteration + anomalies.push({ + type: "journal-rapid-iterations", + severity: "warning", + summary: `${journal.flowCount} iterations in ${formatDuration(spanMs)} (avg ${formatDuration(avgMs)}/iteration)`, + details: `Unusually rapid iteration cadence suggests the loop may be thrashing without making progress. Review recent journal events for dispatch-stop or terminal events.`, + }); + } + } + } + + // Detect worktree failures from journal events + const wtCreateFailed = journal.eventCounts["worktree-create-failed"] ?? 0; + const wtMergeFailed = journal.eventCounts["worktree-merge-failed"] ?? 0; + const wtFailures = wtCreateFailed + wtMergeFailed; + if (wtFailures > 0) { + const parts: string[] = []; + if (wtCreateFailed > 0) parts.push(`${wtCreateFailed} create failure(s)`); + if (wtMergeFailed > 0) parts.push(`${wtMergeFailed} merge failure(s)`); + anomalies.push({ + type: "journal-worktree-failure", + severity: "warning", + summary: `Worktree failures: ${parts.join(", ")}`, + details: `Journal recorded worktree operation failures. These may indicate git state corruption or conflicting branches.`, + }); + } +} + // ─── Report Persistence ─────────────────────────────────────────────────────── function saveForensicReport(basePath: string, report: ForensicReport, problemDescription: string): string { @@ -600,6 +780,45 @@ function saveForensicReport(basePath: string, report: ForensicReport, problemDes sections.push(redact(formatCrashInfo(report.crashLock)), ``); } + // Activity log metadata + if (report.activityLogMeta) { + const meta = report.activityLogMeta; + sections.push(`## Activity Log Metadata`, ``); + sections.push(`- Files: ${meta.fileCount}`); + sections.push(`- Total size: ${(meta.totalSizeBytes / 1024).toFixed(1)} KB`); + if (meta.oldestFile) sections.push(`- Oldest: ${meta.oldestFile}`); + if (meta.newestFile) sections.push(`- Newest: ${meta.newestFile}`); + sections.push(``); + } + + // Journal summary + if (report.journalSummary) { + const js = report.journalSummary; + sections.push(`## Journal Summary`, ``); + sections.push(`- Total entries: ${js.totalEntries}`); + sections.push(`- Distinct flows (iterations): ${js.flowCount}`); + sections.push(`- Daily files: ${js.fileCount}`); + if (js.oldestEntry) sections.push(`- Date range: ${js.oldestEntry} — ${js.newestEntry}`); + sections.push(``); + sections.push(`### Event Type Distribution`, ``); + sections.push(`| Event Type | Count |`); + sections.push(`|------------|-------|`); + for (const [evType, count] of Object.entries(js.eventCounts).sort((a, b) => b[1] - a[1])) { + sections.push(`| ${evType} | ${count} |`); + } + sections.push(``); + if (js.recentEvents.length > 0) { + sections.push(`### Recent Journal Events (last ${js.recentEvents.length})`, ``); + for (const ev of js.recentEvents) { + const parts = [`${ev.ts} [${ev.eventType}] flow=${ev.flowId.slice(0, 8)}`]; + if (ev.rule) parts.push(`rule=${ev.rule}`); + if (ev.unitId) parts.push(`unit=${ev.unitId}`); + sections.push(`- ${parts.join(" ")}`); + } + sections.push(``); + } + } + writeFileSync(filePath, sections.join("\n"), "utf-8"); return filePath; } @@ -681,6 +900,41 @@ function formatReportForPrompt(report: ForensicReport): string { sections.push(""); } + // Activity log metadata + if (report.activityLogMeta) { + const meta = report.activityLogMeta; + sections.push("### Activity Log Overview"); + sections.push(`- Files: ${meta.fileCount}, Total size: ${(meta.totalSizeBytes / 1024).toFixed(1)} KB`); + if (meta.oldestFile) sections.push(`- Oldest: ${meta.oldestFile}`); + if (meta.newestFile) sections.push(`- Newest: ${meta.newestFile}`); + sections.push(""); + } + + // Journal summary — structured event timeline + if (report.journalSummary) { + const js = report.journalSummary; + sections.push("### Journal Summary (Iteration Event Log)"); + sections.push(`- Total entries: ${js.totalEntries}, Distinct flows: ${js.flowCount}, Daily files: ${js.fileCount}`); + if (js.oldestEntry) sections.push(`- Date range: ${js.oldestEntry} — ${js.newestEntry}`); + + // Event type distribution (compact) + const eventPairs = Object.entries(js.eventCounts).sort((a, b) => b[1] - a[1]); + sections.push(`- Events: ${eventPairs.map(([t, c]) => `${t}(${c})`).join(", ")}`); + + // Recent events timeline (for tracing what just happened) + if (js.recentEvents.length > 0) { + sections.push(""); + sections.push(`**Recent Journal Events (last ${js.recentEvents.length}):**`); + for (const ev of js.recentEvents) { + const parts = [`${ev.ts} [${ev.eventType}] flow=${ev.flowId.slice(0, 8)}`]; + if (ev.rule) parts.push(`rule=${ev.rule}`); + if (ev.unitId) parts.push(`unit=${ev.unitId}`); + sections.push(`- ${parts.join(" ")}`); + } + } + sections.push(""); + } + // Completed keys count sections.push(`### Completed Keys: ${report.completedKeys.length}`); sections.push(`### GSD Version: ${report.gsdVersion}`); diff --git a/src/resources/extensions/gsd/prompts/forensics.md b/src/resources/extensions/gsd/prompts/forensics.md index bad2a126b..6be348c6e 100644 --- a/src/resources/extensions/gsd/prompts/forensics.md +++ b/src/resources/extensions/gsd/prompts/forensics.md @@ -36,6 +36,8 @@ GSD extension source code is at: `{{gsdSourceDir}}` ├── doctor-history.jsonl — doctor check history ├── activity/ — session activity logs (JSONL per unit) │ └── {seq}-{unitType}-{unitId}.jsonl +├── journal/ — structured event journal (JSONL per day) +│ └── YYYY-MM-DD.jsonl ├── runtime/ │ ├── paused-session.json — serialized session when auto pauses │ └── headless-context.md — headless resume context @@ -60,6 +62,32 @@ GSD extension source code is at: `{{gsdSourceDir}}` - `usage` field on assistant messages: `input`, `output`, `cacheRead`, `cacheWrite`, `totalTokens`, `cost` - **To trace a failure**: find the last activity log, search for `isError: true` tool results, then read the agent's reasoning text preceding that error +### Journal Format (`.gsd/journal/`) + +The journal is a structured event log for auto-mode iterations. Each daily file contains JSONL entries: + +``` +{ ts: "ISO-8601", flowId: "UUID", seq: 0, eventType: "iteration-start", rule?: "rule-name", causedBy?: { flowId, seq }, data?: { unitId, status, ... } } +``` + +**Key event types:** +- `iteration-start` / `iteration-end` — marks loop iteration boundaries +- `dispatch-match` / `dispatch-stop` — what the auto-mode decided to do (or not do) +- `unit-start` / `unit-end` — lifecycle of individual work units +- `terminal` — auto-mode reached a terminal state (all done, budget exceeded, etc.) +- `guard-block` — dispatch was blocked by a guard condition (e.g. needs user input) +- `stuck-detected` — the loop detected it was stuck (same unit repeatedly dispatched) +- `milestone-transition` — a milestone was promoted or completed +- `worktree-enter` / `worktree-create-failed` / `worktree-merge-start` / `worktree-merge-failed` — worktree operations + +**Key concepts:** +- **flowId**: UUID grouping all events in one iteration. Use to reconstruct what happened in a single loop pass. +- **causedBy**: Cross-reference to a prior event (same or different flow). Enables causal chain tracing. +- **seq**: Monotonically increasing within a flow. Reconstruct event order within an iteration. + +**To trace a stuck loop**: filter for `stuck-detected` events, then follow `flowId` to see the surrounding dispatch and unit events. +**To trace a guard block**: filter for `guard-block` events, check `data.reason` for why dispatch was blocked. + ### Crash Lock Format (`auto.lock`) JSON with fields: `pid`, `startedAt`, `unitType`, `unitId`, `unitStartedAt`, `completedUnits`, `sessionFile` @@ -78,20 +106,24 @@ A unit dispatched more than once (`type/id` appears multiple times) indicates a 1. **Start with the pre-parsed forensic report** above. The anomaly section contains automated findings — treat these as leads, not conclusions. -2. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files. +2. **Check the journal timeline** if present. The journal events show the auto-mode's decision sequence (dispatches, guards, stuck detection, worktree operations). Use flow IDs to group related events and trace causal chains. -3. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it. +3. **Cross-reference activity logs and journal**. Activity logs show *what the LLM did* (tool calls, reasoning, errors). Journal events show *what auto-mode decided* (dispatch rules, iteration boundaries, state transitions). Together they reveal the full picture. -4. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files. +4. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files. -5. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is: +5. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it. + +6. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files. + +7. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is: - Missing edge case / unhandled condition - Wrong boolean logic or comparison - Race condition or ordering issue - State corruption (e.g. completed-units.json out of sync with artifacts) - Timeout / recovery logic not triggering correctly -6. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code. +8. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code. ## Output diff --git a/src/resources/extensions/gsd/tests/forensics-journal.test.ts b/src/resources/extensions/gsd/tests/forensics-journal.test.ts new file mode 100644 index 000000000..f086e6f6f --- /dev/null +++ b/src/resources/extensions/gsd/tests/forensics-journal.test.ts @@ -0,0 +1,107 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const gsdDir = join(__dirname, ".."); + +describe("forensics journal & activity log awareness", () => { + const forensicsSrc = readFileSync(join(gsdDir, "forensics.ts"), "utf-8"); + const promptSrc = readFileSync(join(gsdDir, "prompts", "forensics.md"), "utf-8"); + + it("forensics.ts imports queryJournal from journal module", () => { + assert.ok( + forensicsSrc.includes('from "./journal.js"') || forensicsSrc.includes("from './journal.js'"), + "forensics.ts must import from journal.js", + ); + assert.ok( + forensicsSrc.includes("queryJournal"), + "forensics.ts must reference queryJournal", + ); + }); + + it("ForensicReport includes journalSummary field", () => { + assert.ok( + forensicsSrc.includes("journalSummary"), + "ForensicReport must include journalSummary field", + ); + }); + + it("ForensicReport includes activityLogMeta field", () => { + assert.ok( + forensicsSrc.includes("activityLogMeta"), + "ForensicReport must include activityLogMeta field", + ); + }); + + it("buildForensicReport calls scanJournalForForensics", () => { + assert.ok( + forensicsSrc.includes("scanJournalForForensics"), + "buildForensicReport must call scanJournalForForensics", + ); + }); + + it("buildForensicReport calls gatherActivityLogMeta", () => { + assert.ok( + forensicsSrc.includes("gatherActivityLogMeta"), + "buildForensicReport must call gatherActivityLogMeta", + ); + }); + + it("forensics detects journal-based anomalies", () => { + assert.ok( + forensicsSrc.includes("detectJournalAnomalies"), + "forensics.ts must have detectJournalAnomalies function", + ); + // Check for specific journal anomaly types + assert.ok(forensicsSrc.includes('"journal-stuck"'), "must detect journal-stuck anomalies"); + assert.ok(forensicsSrc.includes('"journal-guard-block"'), "must detect journal-guard-block anomalies"); + assert.ok(forensicsSrc.includes('"journal-rapid-iterations"'), "must detect journal-rapid-iterations anomalies"); + assert.ok(forensicsSrc.includes('"journal-worktree-failure"'), "must detect journal-worktree-failure anomalies"); + }); + + it("formatReportForPrompt includes journal summary section", () => { + assert.ok( + forensicsSrc.includes("Journal Summary"), + "prompt formatter must include a Journal Summary section", + ); + }); + + it("formatReportForPrompt includes activity log overview section", () => { + assert.ok( + forensicsSrc.includes("Activity Log Overview"), + "prompt formatter must include an Activity Log Overview section", + ); + }); + + it("forensics prompt documents journal format", () => { + assert.ok( + promptSrc.includes("### Journal Format"), + "forensics.md must document the journal format", + ); + assert.ok( + promptSrc.includes("flowId"), + "forensics.md must reference flowId concept", + ); + assert.ok( + promptSrc.includes("causedBy"), + "forensics.md must reference causedBy for causal chains", + ); + }); + + it("forensics prompt includes journal directory in runtime path reference", () => { + assert.ok( + promptSrc.includes("journal/"), + "forensics.md runtime path reference must include journal/", + ); + }); + + it("investigation protocol references journal data", () => { + assert.ok( + promptSrc.includes("journal timeline") || promptSrc.includes("journal events"), + "investigation protocol must reference journal data for tracing", + ); + }); +}); diff --git a/src/tests/web-diagnostics-contract.test.ts b/src/tests/web-diagnostics-contract.test.ts index 633dec3c4..ede1e68dd 100644 --- a/src/tests/web-diagnostics-contract.test.ts +++ b/src/tests/web-diagnostics-contract.test.ts @@ -69,6 +69,8 @@ describe("diagnostics type exports", () => { unitTraces: [], completedKeyCount: 0, metrics: null, + journalSummary: null, + activityLogMeta: null, } assert.equal(typeof report.gsdVersion, "string") assert.equal(typeof report.timestamp, "string") @@ -79,6 +81,8 @@ describe("diagnostics type exports", () => { assert.equal(typeof report.doctorIssueCount, "number") assert.equal(typeof report.unitTraceCount, "number") assert.equal(typeof report.completedKeyCount, "number") + assert.equal(report.journalSummary, null) + assert.equal(report.activityLogMeta, null) }) it("ForensicMetricsSummary has required fields", () => { diff --git a/src/web/forensics-service.ts b/src/web/forensics-service.ts index e40703055..445fa59e6 100644 --- a/src/web/forensics-service.ts +++ b/src/web/forensics-service.ts @@ -70,6 +70,8 @@ export async function collectForensicsData(projectCwdOverride?: string): Promise ' unitTraces,', ' completedKeyCount: (report.completedKeys || []).length,', ' metrics,', + ' journalSummary: report.journalSummary || null,', + ' activityLogMeta: report.activityLogMeta || null,', '};', 'process.stdout.write(JSON.stringify(result));', ].join(" ") diff --git a/web/lib/diagnostics-types.ts b/web/lib/diagnostics-types.ts index 079e25ec1..5e39c612b 100644 --- a/web/lib/diagnostics-types.ts +++ b/web/lib/diagnostics-types.ts @@ -13,6 +13,10 @@ export type ForensicAnomalyType = | "crash" | "doctor-issue" | "error-trace" + | "journal-stuck" + | "journal-guard-block" + | "journal-rapid-iterations" + | "journal-worktree-failure" export interface ForensicAnomaly { type: ForensicAnomalyType @@ -56,6 +60,23 @@ export interface ForensicRecentUnit { finishedAt: number } +export interface ForensicActivityLogMeta { + fileCount: number + totalSizeBytes: number + oldestFile: string | null + newestFile: string | null +} + +export interface ForensicJournalSummary { + totalEntries: number + flowCount: number + eventCounts: Record + recentEvents: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[] + oldestEntry: string | null + newestEntry: string | null + fileCount: number +} + export interface ForensicReport { gsdVersion: string timestamp: string @@ -70,6 +91,8 @@ export interface ForensicReport { unitTraces: ForensicUnitTrace[] completedKeyCount: number metrics: ForensicMetricsSummary | null + journalSummary: ForensicJournalSummary | null + activityLogMeta: ForensicActivityLogMeta | null } // ─── Doctor ───────────────────────────────────────────────────────────────────