fix: make journal scanning intelligent — limit parsed files, line-count older ones
scanJournalForForensics() previously called queryJournal() which loaded ALL journal entries from ALL daily files into memory. For long-running projects this could be thousands of entries and megabytes of data. Now: - Only the last 3 daily files are fully JSON-parsed (event counts, flows) - Older files are line-counted only (no JSON parsing) for totals - Recent events use a rolling window of 20 (shift, not accumulate) - Constants MAX_JOURNAL_RECENT_FILES and MAX_JOURNAL_RECENT_EVENTS make limits explicit and tunable Activity log scanning was already intelligent: - nativeParseJsonlTail with 10MB byte cap - Only last 5 files scanned - extractTrace() distills raw JSONL into compact ExecutionTrace structs - formatReportForPrompt has 30KB hard cap on total output Co-authored-by: glittercowboy <186001655+glittercowboy@users.noreply.github.com> Agent-Logs-Url: https://github.com/gsd-build/gsd-2/sessions/7e7f71ec-0d56-409b-930e-5dff1305ff2a
This commit is contained in:
parent
ce4720bad8
commit
aee8973d81
2 changed files with 155 additions and 30 deletions
|
|
@ -28,7 +28,6 @@ import { deriveState } from "./state.js";
|
|||
import { isAutoActive } from "./auto.js";
|
||||
import { loadPrompt } from "./prompt-loader.js";
|
||||
import { gsdRoot } from "./paths.js";
|
||||
import { queryJournal } from "./journal.js";
|
||||
import { formatDuration } from "../shared/format-utils.js";
|
||||
import { getAutoWorktreePath } from "./auto-worktree.js";
|
||||
import { loadEffectiveGSDPreferences, loadGlobalGSDPreferences, getGlobalGSDPreferencesPath } from "./preferences.js";
|
||||
|
|
@ -63,13 +62,19 @@ interface ActivityLogMeta {
|
|||
newestFile: string | null;
|
||||
}
|
||||
|
||||
/** Summary of .gsd/journal/ data for forensic investigation. */
|
||||
/**
|
||||
* Summary of .gsd/journal/ data for forensic investigation.
|
||||
*
|
||||
* To avoid loading huge journal histories into memory, only the most recent
|
||||
* daily files are fully parsed. Older files are line-counted for totals.
|
||||
* Event counts and flow IDs reflect only recent files.
|
||||
*/
|
||||
interface JournalSummary {
|
||||
/** Total journal entries scanned */
|
||||
/** Total journal entries across all files (recent parsed + older line-counted) */
|
||||
totalEntries: number;
|
||||
/** Distinct flow IDs (each = one auto-mode iteration) */
|
||||
/** Distinct flow IDs from recent files (each = one auto-mode iteration) */
|
||||
flowCount: number;
|
||||
/** Event counts by type */
|
||||
/** Event counts by type (from recent files only) */
|
||||
eventCounts: Record<string, number>;
|
||||
/** Most recent journal entries (last 20) for context */
|
||||
recentEvents: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[];
|
||||
|
|
@ -422,6 +427,24 @@ function resolveActivityDirs(basePath: string, activeMilestone?: string | null):
|
|||
|
||||
// ─── Journal Scanner ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Max recent journal files to fully parse for event counts and recent events.
|
||||
* Older files are line-counted only to avoid loading huge amounts of data.
|
||||
*/
|
||||
const MAX_JOURNAL_RECENT_FILES = 3;
|
||||
|
||||
/** Max recent events to extract for the forensic report timeline. */
|
||||
const MAX_JOURNAL_RECENT_EVENTS = 20;
|
||||
|
||||
/**
|
||||
* Intelligently scan journal files for forensic summary.
|
||||
*
|
||||
* Journal files can be huge (thousands of JSONL entries over weeks of auto-mode).
|
||||
* Instead of loading all entries into memory:
|
||||
* - Only fully parse the most recent N daily files (event counts, flow tracking)
|
||||
* - Line-count older files for approximate totals (no JSON parsing)
|
||||
* - Extract only the last 20 events for the timeline
|
||||
*/
|
||||
function scanJournalForForensics(basePath: string): JournalSummary | null {
|
||||
try {
|
||||
const journalDir = join(gsdRoot(basePath), "journal");
|
||||
|
|
@ -430,33 +453,80 @@ function scanJournalForForensics(basePath: string): JournalSummary | null {
|
|||
const files = readdirSync(journalDir).filter(f => f.endsWith(".jsonl")).sort();
|
||||
if (files.length === 0) return null;
|
||||
|
||||
const entries = queryJournal(basePath);
|
||||
if (entries.length === 0) return null;
|
||||
// Split into recent (fully parsed) and older (line-counted only)
|
||||
const recentFiles = files.slice(-MAX_JOURNAL_RECENT_FILES);
|
||||
const olderFiles = files.slice(0, -MAX_JOURNAL_RECENT_FILES);
|
||||
|
||||
// Count events by type
|
||||
const eventCounts: Record<string, number> = {};
|
||||
const flowIds = new Set<string>();
|
||||
for (const e of entries) {
|
||||
eventCounts[e.eventType] = (eventCounts[e.eventType] ?? 0) + 1;
|
||||
flowIds.add(e.flowId);
|
||||
// Line-count older files without parsing — avoids loading megabytes of JSON
|
||||
let olderEntryCount = 0;
|
||||
let oldestEntry: string | null = null;
|
||||
for (const file of olderFiles) {
|
||||
try {
|
||||
const raw = readFileSync(join(journalDir, file), "utf-8");
|
||||
const lines = raw.split("\n");
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
olderEntryCount++;
|
||||
// Extract only the timestamp from the first non-empty line of the oldest file
|
||||
if (!oldestEntry) {
|
||||
try {
|
||||
const parsed = JSON.parse(line) as { ts?: string };
|
||||
if (parsed.ts) oldestEntry = parsed.ts;
|
||||
} catch { /* skip malformed */ }
|
||||
}
|
||||
}
|
||||
} catch { /* skip unreadable files */ }
|
||||
}
|
||||
|
||||
// Extract recent events (last 20) with key fields for the report
|
||||
const recentEvents = entries.slice(-20).map(e => ({
|
||||
ts: e.ts,
|
||||
flowId: e.flowId,
|
||||
eventType: e.eventType,
|
||||
rule: e.rule,
|
||||
unitId: e.data?.unitId as string | undefined,
|
||||
}));
|
||||
// Fully parse recent files for event counts and timeline
|
||||
const eventCounts: Record<string, number> = {};
|
||||
const flowIds = new Set<string>();
|
||||
const recentParsedEntries: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[] = [];
|
||||
let recentEntryCount = 0;
|
||||
|
||||
for (const file of recentFiles) {
|
||||
try {
|
||||
const raw = readFileSync(join(journalDir, file), "utf-8");
|
||||
for (const line of raw.split("\n")) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const entry = JSON.parse(line) as { ts: string; flowId: string; eventType: string; rule?: string; data?: Record<string, unknown> };
|
||||
recentEntryCount++;
|
||||
eventCounts[entry.eventType] = (eventCounts[entry.eventType] ?? 0) + 1;
|
||||
flowIds.add(entry.flowId);
|
||||
|
||||
if (!oldestEntry) oldestEntry = entry.ts;
|
||||
|
||||
// Keep a rolling window of last N events — avoids accumulating unbounded arrays
|
||||
recentParsedEntries.push({
|
||||
ts: entry.ts,
|
||||
flowId: entry.flowId,
|
||||
eventType: entry.eventType,
|
||||
rule: entry.rule,
|
||||
unitId: entry.data?.unitId as string | undefined,
|
||||
});
|
||||
if (recentParsedEntries.length > MAX_JOURNAL_RECENT_EVENTS) {
|
||||
recentParsedEntries.shift();
|
||||
}
|
||||
} catch { /* skip malformed lines */ }
|
||||
}
|
||||
} catch { /* skip unreadable files */ }
|
||||
}
|
||||
|
||||
const totalEntries = olderEntryCount + recentEntryCount;
|
||||
if (totalEntries === 0) return null;
|
||||
|
||||
const newestEntry = recentParsedEntries.length > 0
|
||||
? recentParsedEntries[recentParsedEntries.length - 1]!.ts
|
||||
: null;
|
||||
|
||||
return {
|
||||
totalEntries: entries.length,
|
||||
totalEntries,
|
||||
flowCount: flowIds.size,
|
||||
eventCounts,
|
||||
recentEvents,
|
||||
oldestEntry: entries[0]?.ts ?? null,
|
||||
newestEntry: entries[entries.length - 1]?.ts ?? null,
|
||||
recentEvents: recentParsedEntries,
|
||||
oldestEntry,
|
||||
newestEntry,
|
||||
fileCount: files.length,
|
||||
};
|
||||
} catch {
|
||||
|
|
|
|||
|
|
@ -11,14 +11,34 @@ describe("forensics journal & activity log awareness", () => {
|
|||
const forensicsSrc = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
|
||||
const promptSrc = readFileSync(join(gsdDir, "prompts", "forensics.md"), "utf-8");
|
||||
|
||||
it("forensics.ts imports queryJournal from journal module", () => {
|
||||
it("scanJournalForForensics reads journal files directly (no full queryJournal load)", () => {
|
||||
// Must NOT use queryJournal which loads ALL entries into memory
|
||||
assert.ok(
|
||||
forensicsSrc.includes('from "./journal.js"') || forensicsSrc.includes("from './journal.js'"),
|
||||
"forensics.ts must import from journal.js",
|
||||
!forensicsSrc.includes('queryJournal('),
|
||||
"forensics.ts must NOT call queryJournal() which loads all entries at once",
|
||||
);
|
||||
// Must have its own journal scanning with file-level limits
|
||||
assert.ok(
|
||||
forensicsSrc.includes("scanJournalForForensics"),
|
||||
"forensics.ts must have scanJournalForForensics function",
|
||||
);
|
||||
});
|
||||
|
||||
it("journal scanning limits files parsed to avoid memory bloat", () => {
|
||||
assert.ok(
|
||||
forensicsSrc.includes("MAX_JOURNAL_RECENT_FILES"),
|
||||
"must have MAX_JOURNAL_RECENT_FILES constant to limit parsed files",
|
||||
);
|
||||
assert.ok(
|
||||
forensicsSrc.includes("queryJournal"),
|
||||
"forensics.ts must reference queryJournal",
|
||||
forensicsSrc.includes("MAX_JOURNAL_RECENT_EVENTS"),
|
||||
"must have MAX_JOURNAL_RECENT_EVENTS constant to limit events extracted",
|
||||
);
|
||||
});
|
||||
|
||||
it("older journal files are line-counted without full JSON parse", () => {
|
||||
assert.ok(
|
||||
forensicsSrc.includes("olderEntryCount") || forensicsSrc.includes("olderFiles"),
|
||||
"must handle older files separately from recent files",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -76,6 +96,41 @@ describe("forensics journal & activity log awareness", () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("activity log scanning uses tail-read with byte cap (not full file load)", () => {
|
||||
// scanActivityLogs uses nativeParseJsonlTail + MAX_JSONL_BYTES for efficient reading
|
||||
assert.ok(
|
||||
forensicsSrc.includes("nativeParseJsonlTail"),
|
||||
"activity log scanning must use nativeParseJsonlTail for tail-reading",
|
||||
);
|
||||
assert.ok(
|
||||
forensicsSrc.includes("MAX_JSONL_BYTES"),
|
||||
"activity log scanning must respect MAX_JSONL_BYTES cap",
|
||||
);
|
||||
// Only reads last 5 files
|
||||
assert.ok(
|
||||
forensicsSrc.includes("slice(-5)"),
|
||||
"activity log scanning must limit to last 5 files",
|
||||
);
|
||||
});
|
||||
|
||||
it("activity log entries are distilled through extractTrace, not sent raw", () => {
|
||||
assert.ok(
|
||||
forensicsSrc.includes("extractTrace("),
|
||||
"activity log entries must be distilled through extractTrace before reporting",
|
||||
);
|
||||
});
|
||||
|
||||
it("prompt output is hard-capped at 30KB", () => {
|
||||
assert.ok(
|
||||
forensicsSrc.includes("MAX_BYTES") && forensicsSrc.includes("30 * 1024"),
|
||||
"formatReportForPrompt must have a 30KB hard cap",
|
||||
);
|
||||
assert.ok(
|
||||
forensicsSrc.includes("truncated at 30KB"),
|
||||
"prompt must show truncation message when capped",
|
||||
);
|
||||
});
|
||||
|
||||
it("forensics prompt documents journal format", () => {
|
||||
assert.ok(
|
||||
promptSrc.includes("### Journal Format"),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue