From 8487507d1b971af8718f70edbdb5cf81ddb7d3f9 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 30 Apr 2026 08:41:49 +0200 Subject: [PATCH] Add TODO triage and validation recheck flow --- TODO.md | 142 +++++ src/resources/extensions/sf/auto-dispatch.ts | 116 +++- src/resources/extensions/sf/commands-todo.ts | 499 ++++++++++++++++++ .../extensions/sf/commands/catalog.ts | 8 +- .../extensions/sf/commands/handlers/ops.ts | 5 + .../remediation-completion-guard.test.ts | 85 +++ 6 files changed, 834 insertions(+), 21 deletions(-) create mode 100644 TODO.md create mode 100644 src/resources/extensions/sf/commands-todo.ts diff --git a/TODO.md b/TODO.md new file mode 100644 index 000000000..265d2dba0 --- /dev/null +++ b/TODO.md @@ -0,0 +1,142 @@ +# TODO + +Dump anything here. + +SF agentic engineering / harness / memory / eval context dump: + +We want a low-friction dump inbox that turns rough human notes into project +evals, harness work, memory requirements, docs, tests, or implementation tasks. +Root TODO.md is the dump place. AGENTS.md carries the durable instruction: +agents should read TODO.md when present, triage it, and clear processed notes +after converting them into reviewable artifacts. + +Important split: +- AGENTS.md = durable startup-visible instructions. +- TODO.md = messy temporary dump inbox. +- Memory = experience store. +- GEPA/DSPy/self-evolution = offline lab. +- Runtime agent = uses approved skills/prompts/tools/memory, not unreviewed + evolved candidates. + +Harness.io note: +- Harness Agents are AI workers inside Harness CI/CD pipelines. +- They inherit pipeline context, secrets, RBAC, approvals, logs, and OPA policy. +- Useful SF lesson: run agents inside a governed workflow with permissions, + logs, approvals, artifacts, reusable templates, and reviewable outputs. +- This is different from repo-native test/eval harnesses, but the control-plane + pattern is valuable. + +Current SF state: +- Auto-mode safety harness exists and is default-on: evidence collection, + file-change validation, evidence cross-reference, destructive command + warnings, content validation, checkpoints. Auto rollback is off by default. +- gate-evaluate exists but is opt-in via gate_evaluation.enabled. +- Repo-native harness evolution is mostly read-only/proposed today: + /sf harness profile records repo facts in .sf/sf.db, but does not yet enforce + harness/manifest gates or write harness/, gates/, eval suites, or CI files. + +Slow conversion of TS into fast agents: +- Do not rewrite the deterministic SF state machine into LLM behavior. +- Keep TypeScript for CLI, TUI, extension API, preferences, state machine, DB + schema, safety gates, prompt rendering, workflow orchestration, and file + ownership rules. +- Convert fuzzy/read-only work into narrow agents: repo profiling + interpretation, TODO triage, eval generation, harness proposal, failure + analysis, review, remediation proposals, memory extraction, drift detection. +- SF remains the orchestrator and ledger. Agents consume typed jobs and return + structured JSON. + +Possible AgentJob shape: + +type AgentJob = + | { kind: "repo_profile"; cwd: string } + | { kind: "todo_triage"; cwd: string; todoPath: string } + | { kind: "eval_candidate_generation"; cwd: string; sources: string[] } + | { kind: "failure_analysis"; cwd: string; runId: string } + | { kind: "harness_proposal"; cwd: string; profileId: string }; + +First useful agents: +- TODO triage agent: reads TODO.md, creates eval candidates, implementation + tasks, memory facts, docs/harness suggestions, then clears processed notes. +- Eval candidate agent: converts notes/session failures into JSONL with + task_input, expected_behavior, failure_mode, evidence, source. +- Repo profile interpretation agent: uses deterministic TS repo-profiler output + and identifies missing gates/evals/docs. +- Harness proposal agent: produces dry-run proposals only; no tracked file + writes except reviewed artifacts later. +- Remediation agent: later, after evals are stable, takes failing evals and + proposes code/test patches. + +Speed strategy: +- Deterministic TS: scan files, parse manifests, read git state, write DB rows. +- Cheap/local model agents: classify dump notes, summarize failures, label risk. +- Strong model agents: propose harnesses, generate eval rubrics, repair complex + failures. + +Desired pipeline: +TODO.md dump -> triage agent -> eval candidate JSONL / backlog / docs / tests +-> reviewed project artifact -> eval suite / harness gate -> self-evolution +can consume later. + +Potential eval candidate JSONL shape: + +{ + "id": "sf.todo-triage.001", + "task_input": "...", + "expected_behavior": "...", + "failure_mode": "...", + "evidence": "...", + "source": "TODO.md" +} + +Self-evolution principle: +- Repeated failure -> add eval first, then fix behavior. +- Raw memory/dump notes are evidence, not approved behavior. +- GEPA/DSPy output must become reviewable diffs against skills/prompts/tool + descriptions and pass held-out evals plus deterministic gates. + +GEPA/DSPy placement across SF vs memory/brain: +- GEPA/DSPy should not run inside normal SF runtime turns and should not live + as direct mutable memory behavior. +- SF owns the project workflow control plane: TODO triage, backlog handoff, + eval artifacts, harness proposals, deterministic gates, reviewed diffs, and + dispatch rules. +- Memory/brain owns durable experience: session traces, user corrections, + repeated failures, successful patterns, evidence IDs, source sessions, and + recall/export APIs. +- Memory/brain should expose dataset export surfaces for SF/self-evolution: + "give me candidate eval cases for this repo/risk/skill/tool from past + evidence". +- GEPA/DSPy consumes approved eval datasets and memory-exported candidates + offline, proposes prompt/skill/tool-description diffs, and hands those diffs + back to SF as reviewable implementation work. +- Accepted GEPA outputs become tracked repo artifacts or versioned SF resources, + not raw memory entries. +- Future home should be an offline evolution runner, either a separate repo + such as `singularity-evolution` or a clearly isolated SF package/command such + as `packages/evolution` plus `/sf evolve ...`. It should read + `.sf/triage/evals/*.evals.jsonl`, approved harness evals, and memory-exported + eval candidates; run DSPy/GEPA; then write candidate diffs/reports under + `.sf/evolution/` or a review branch. It must not mutate live prompts, + skills, memory, or tool descriptions directly. + +Proper info flow: +- Raw human dump: root TODO.md. +- Raw agent self-report: .sf/BACKLOG.md and ~/.sf/agent/upstream-feedback.jsonl. +- Raw session-derived evidence: Singularity Memory / brain. +- First normalizer: /sf todo triage for TODO.md now; future /sf inbox triage + should normalize TODO.md + self-feedback + memory exports through the same + schema. +- Normalized pending items live in .sf/triage/inbox/*.jsonl with source, kind, + evidence, status, and created_at. +- Human-readable triage reports live in .sf/triage/reports/*.md. +- Eval-ready cases live in .sf/triage/evals/*.evals.jsonl. +- Human/planner-visible implementation tasks may be copied into .sf/BACKLOG.md + with /sf todo triage --backlog, but auto-mode must not execute backlog + directly. Planning/reassessment proposes promotion; user or explicit command + approves promotion into roadmap/slice/task artifacts. +- Memory-worthy notes are retained by memory/brain only after triage attaches + evidence/source; raw TODO notes are not memory. +- Preferred triage model tier: MiniMax M2.7 highspeed when available, then + MiniMax M2.5 highspeed, then other cheap/fast classification models. Triage + is structuring/classification, not final code editing. diff --git a/src/resources/extensions/sf/auto-dispatch.ts b/src/resources/extensions/sf/auto-dispatch.ts index ffca3f311..6582f04a6 100644 --- a/src/resources/extensions/sf/auto-dispatch.ts +++ b/src/resources/extensions/sf/auto-dispatch.ts @@ -278,6 +278,52 @@ function validationAttentionMarkerPath(basePath: string, mid: string): string { ); } +function parseValidationRemediationRound(content: string): number | null { + const match = content.match(/^remediation_round:\s*(\d+)\s*$/m); + if (!match) return null; + const round = Number.parseInt(match[1]!, 10); + return Number.isFinite(round) ? round : null; +} + +interface ValidationAttentionMarker { + milestoneId?: string; + createdAt?: string; + source?: string; + remediationRound?: number | null; + revalidationRound?: number; + revalidationRequestedAt?: string; +} + +function readValidationAttentionMarker( + basePath: string, + mid: string, +): ValidationAttentionMarker | null { + const markerPath = validationAttentionMarkerPath(basePath, mid); + if (!existsSync(markerPath)) return null; + try { + const parsed = JSON.parse(readFileSync(markerPath, "utf-8")) as unknown; + if (!parsed || typeof parsed !== "object") return null; + return parsed as ValidationAttentionMarker; + } catch { + return null; + } +} + +function writeValidationAttentionMarker( + basePath: string, + mid: string, + marker: ValidationAttentionMarker, +): void { + mkdirSync(join(sfRoot(basePath), "runtime", "validation-attention"), { + recursive: true, + }); + writeFileSync( + validationAttentionMarkerPath(basePath, mid), + JSON.stringify(marker, null, 2) + "\n", + "utf-8", + ); +} + function validationAttentionRuntimePath(basePath: string, mid: string): string { return join( sfRoot(basePath), @@ -301,6 +347,31 @@ function hasActiveValidationAttentionMarker( return false; } +function shouldDispatchValidationAttentionRevalidation( + basePath: string, + mid: string, + validationContent: string, +): boolean { + if (!hasActiveValidationAttentionMarker(basePath, mid)) return false; + const marker = readValidationAttentionMarker(basePath, mid); + if (marker?.milestoneId && marker.milestoneId !== mid) return false; + + const currentRound = parseValidationRemediationRound(validationContent); + if (currentRound === null) return false; + const originalRound = + typeof marker?.remediationRound === "number" ? marker.remediationRound : -1; + if (currentRound <= originalRound) return false; + if (marker?.revalidationRound === currentRound) return false; + + writeValidationAttentionMarker(basePath, mid, { + ...marker, + milestoneId: mid, + revalidationRound: currentRound, + revalidationRequestedAt: new Date().toISOString(), + }); + return true; +} + function buildValidationAttentionRemediationPrompt( mid: string, midTitle: string, @@ -1156,27 +1227,14 @@ export const DISPATCH_RULES: DispatchRule[] = [ attentionPlan && !hasActiveValidationAttentionMarker(basePath, mid) ) { - const markerPath = validationAttentionMarkerPath(basePath, mid); try { - mkdirSync( - join(sfRoot(basePath), "runtime", "validation-attention"), - { - recursive: true, - }, - ); - writeFileSync( - markerPath, - JSON.stringify( - { - milestoneId: mid, - createdAt: new Date().toISOString(), - source: validationFile, - }, - null, - 2, - ) + "\n", - "utf-8", - ); + writeValidationAttentionMarker(basePath, mid, { + milestoneId: mid, + createdAt: new Date().toISOString(), + source: validationFile, + remediationRound: + parseValidationRemediationRound(validationContent), + }); } catch (err) { logWarning( "dispatch", @@ -1196,6 +1254,24 @@ export const DISPATCH_RULES: DispatchRule[] = [ ), }; } + if ( + shouldDispatchValidationAttentionRevalidation( + basePath, + mid, + validationContent, + ) + ) { + return { + action: "dispatch", + unitType: "validate-milestone", + unitId: mid, + prompt: await buildValidateMilestonePrompt( + mid, + midTitle, + basePath, + ), + }; + } } return { action: "stop", diff --git a/src/resources/extensions/sf/commands-todo.ts b/src/resources/extensions/sf/commands-todo.ts new file mode 100644 index 000000000..df5828905 --- /dev/null +++ b/src/resources/extensions/sf/commands-todo.ts @@ -0,0 +1,499 @@ +/** + * commands-todo.ts - triage the repo-root TODO.md dump inbox. + * + * Purpose: turn low-friction human dumps into reviewable eval, harness, memory, + * docs, test, and implementation artifacts without treating raw notes as + * approved runtime behavior. + * + * Consumer: `/sf todo triage` command. + */ + +import { + existsSync, + mkdirSync, + readFileSync, + writeFileSync, +} from "node:fs"; +import { dirname, join } from "node:path"; +import type { + ExtensionAPI, + ExtensionCommandContext, +} from "@singularity-forge/pi-coding-agent"; +import type { Api, AssistantMessage, Model } from "@singularity-forge/pi-ai"; +import { type LLMCallFn } from "./memory-extractor.js"; +import { projectRoot } from "./commands/context.js"; +import { sfRoot } from "./paths.js"; + +const EMPTY_TODO = "# TODO\n\nDump anything here.\n"; +const MAX_DUMP_CHARS = 48_000; +const PREFERRED_TRIAGE_MODEL_PATTERNS = [ + /minimax.*m2\.7.*highspeed/i, + /minimax.*m2\.5.*highspeed/i, + /minimax.*m2\.7/i, + /minimax.*m2\.5/i, + /haiku/i, +]; + +export interface TodoEvalCandidate { + id?: string; + task_input: string; + expected_behavior: string; + failure_mode?: string; + evidence?: string; + source?: string; + suggested_location?: string; +} + +export interface TodoTriageResult { + summary: string; + eval_candidates: TodoEvalCandidate[]; + implementation_tasks: string[]; + memory_requirements: string[]; + harness_suggestions: string[]; + docs_or_tests: string[]; + unclear_notes: string[]; +} + +interface NormalizedTriageItem { + id: string; + source: "todo.md"; + kind: + | "eval_candidate" + | "implementation_task" + | "memory_requirement" + | "harness_suggestion" + | "docs_or_tests" + | "unclear_note"; + content: string; + evidence?: string; + status: "pending"; + created_at: string; +} + +function timestampId(date = new Date()): string { + const pad = (n: number) => String(n).padStart(2, "0"); + return [ + date.getFullYear(), + pad(date.getMonth() + 1), + pad(date.getDate()), + "-", + pad(date.getHours()), + pad(date.getMinutes()), + pad(date.getSeconds()), + ].join(""); +} + +function extractJsonObject(text: string): string { + const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i); + if (fenced?.[1]?.trim()) return fenced[1].trim(); + + const first = text.indexOf("{"); + const last = text.lastIndexOf("}"); + if (first !== -1 && last > first) return text.slice(first, last + 1); + return text; +} + +function stringArray(value: unknown): string[] { + if (!Array.isArray(value)) return []; + return value + .filter((item): item is string => typeof item === "string") + .map((item) => item.trim()) + .filter(Boolean); +} + +function evalCandidates(value: unknown): TodoEvalCandidate[] { + if (!Array.isArray(value)) return []; + return value + .filter((item): item is Record => { + return ( + typeof item === "object" && + item !== null && + typeof item.task_input === "string" && + typeof item.expected_behavior === "string" + ); + }) + .map((item, idx) => ({ + id: + typeof item.id === "string" && item.id.trim() + ? item.id.trim() + : `todo.eval.${String(idx + 1).padStart(3, "0")}`, + task_input: + typeof item.task_input === "string" ? item.task_input.trim() : "", + expected_behavior: + typeof item.expected_behavior === "string" + ? item.expected_behavior.trim() + : "", + failure_mode: + typeof item.failure_mode === "string" + ? item.failure_mode.trim() + : undefined, + evidence: + typeof item.evidence === "string" ? item.evidence.trim() : undefined, + source: typeof item.source === "string" ? item.source.trim() : "TODO.md", + suggested_location: + typeof item.suggested_location === "string" + ? item.suggested_location.trim() + : undefined, + })) + .filter((item) => item.task_input && item.expected_behavior); +} + +export function parseTodoTriageResponse(response: string): TodoTriageResult { + const parsed = JSON.parse(extractJsonObject(response)) as Record; + return { + summary: + typeof parsed.summary === "string" && parsed.summary.trim() + ? parsed.summary.trim() + : "TODO dump triaged.", + eval_candidates: evalCandidates(parsed.eval_candidates), + implementation_tasks: stringArray(parsed.implementation_tasks), + memory_requirements: stringArray(parsed.memory_requirements), + harness_suggestions: stringArray(parsed.harness_suggestions), + docs_or_tests: stringArray(parsed.docs_or_tests), + unclear_notes: stringArray(parsed.unclear_notes), + }; +} + +export function extractTodoDump(rawTodo: string): string { + const lines = rawTodo.replace(/\r\n/g, "\n").split("\n"); + const body = lines + .filter((line, idx) => { + if (idx === 0 && line.trim().toLowerCase() === "# todo") return false; + if (line.trim() === "Dump anything here.") return false; + return true; + }) + .join("\n") + .trim(); + return body; +} + +function section(title: string, items: string[]): string { + if (items.length === 0) return `## ${title}\n\nNone.\n`; + return `## ${title}\n\n${items.map((item) => `- ${item}`).join("\n")}\n`; +} + +export function renderTriageMarkdown( + result: TodoTriageResult, + sourcePath: string, +): string { + const evals = + result.eval_candidates.length === 0 + ? "None.\n" + : result.eval_candidates + .map((item) => { + const lines = [ + `- ${item.id ?? "todo.eval"}`, + ` - Trigger/input: ${item.task_input}`, + ` - Expected behavior: ${item.expected_behavior}`, + ]; + if (item.failure_mode) + lines.push(` - Failure mode observed: ${item.failure_mode}`); + if (item.evidence) lines.push(` - Evidence/source: ${item.evidence}`); + if (item.suggested_location) + lines.push(` - Suggested location: ${item.suggested_location}`); + return lines.join("\n"); + }) + .join("\n\n") + "\n"; + + return [ + "# TODO Triage", + "", + `Source: ${sourcePath}`, + `Generated: ${new Date().toISOString()}`, + "", + "## Summary", + "", + result.summary, + "", + "## Eval Candidates", + "", + evals, + section("Implementation Tasks", result.implementation_tasks), + section("Memory Requirements", result.memory_requirements), + section("Harness Suggestions", result.harness_suggestions), + section("Docs Or Tests", result.docs_or_tests), + section("Unclear Notes", result.unclear_notes), + ].join("\n"); +} + +function renderEvalJsonl(result: TodoTriageResult): string { + return ( + result.eval_candidates + .map((item) => JSON.stringify({ ...item, source: item.source ?? "TODO.md" })) + .join("\n") + (result.eval_candidates.length > 0 ? "\n" : "") + ); +} + +function backlogPath(basePath: string): string { + return join(sfRoot(basePath), "BACKLOG.md"); +} + +function nextBacklogId(content: string): string { + let maxNum = 0; + for (const match of content.matchAll(/^- \[[ x]\] 999\.(\d+) — /gm)) { + const num = Number.parseInt(match[1], 10); + if (Number.isFinite(num) && num > maxNum) maxNum = num; + } + return `999.${maxNum + 1}`; +} + +function appendBacklogItems(basePath: string, titles: string[]): number { + const cleanTitles = titles.map((title) => title.trim()).filter(Boolean); + if (cleanTitles.length === 0) return 0; + + const filePath = backlogPath(basePath); + mkdirSync(dirname(filePath), { recursive: true }); + let content = existsSync(filePath) + ? readFileSync(filePath, "utf-8") + : "# Backlog\n\n"; + if (!content.endsWith("\n")) content += "\n"; + + const date = new Date().toISOString().slice(0, 10); + for (const title of cleanTitles) { + const id = nextBacklogId(content); + content += `- [ ] ${id} — ${title.replace(/^['"]|['"]$/g, "")} (triaged ${date})\n`; + } + writeFileSync(filePath, content, "utf-8"); + return cleanTitles.length; +} + +function normalizedItems(result: TodoTriageResult, createdAt: string): NormalizedTriageItem[] { + const items: NormalizedTriageItem[] = []; + let seq = 1; + const push = ( + kind: NormalizedTriageItem["kind"], + content: string, + evidence?: string, + ) => { + items.push({ + id: `triage.${String(seq++).padStart(3, "0")}`, + source: "todo.md", + kind, + content, + evidence, + status: "pending", + created_at: createdAt, + }); + }; + + for (const item of result.eval_candidates) { + push( + "eval_candidate", + `${item.task_input}\nExpected: ${item.expected_behavior}`, + item.evidence ?? item.failure_mode, + ); + } + for (const item of result.implementation_tasks) push("implementation_task", item); + for (const item of result.memory_requirements) push("memory_requirement", item); + for (const item of result.harness_suggestions) push("harness_suggestion", item); + for (const item of result.docs_or_tests) push("docs_or_tests", item); + for (const item of result.unclear_notes) push("unclear_note", item); + return items; +} + +function renderNormalizedJsonl(result: TodoTriageResult, createdAt: string): string { + const items = normalizedItems(result, createdAt); + return items.map((item) => JSON.stringify(item)).join("\n") + (items.length ? "\n" : ""); +} + +function buildTriagePrompt(dump: string): { system: string; user: string } { + return { + system: `You are a triage agent for a software engineering repository. +Convert a messy TODO.md dump into structured, reviewable project work. + +Return ONLY valid JSON with this shape: +{ + "summary": "short summary", + "eval_candidates": [ + { + "id": "short stable id if obvious", + "task_input": "user/task input that should be evaluated", + "expected_behavior": "specific expected behavior", + "failure_mode": "observed failure or risk", + "evidence": "quote or short source note", + "source": "TODO.md", + "suggested_location": "suggested eval/test/harness path" + } + ], + "implementation_tasks": ["concrete implementation task"], + "memory_requirements": ["memory extraction or retention requirement"], + "harness_suggestions": ["gate/eval/harness suggestion"], + "docs_or_tests": ["doc or test artifact to add/update"], + "unclear_notes": ["notes that need clarification"] +} + +Rules: +- Preserve concrete details from the dump. +- Do not invent completed work. +- Raw dump notes are evidence, not approved runtime behavior. +- Repeated failures should become eval candidates before behavior changes. +- Prefer deterministic tests/gates when possible; use model judges only as advisory unless calibrated.`, + user: `Triage this repo-root TODO.md dump:\n\n\n${dump}\n`, + }; +} + +async function triageWithModel( + dump: string, + llmCall: LLMCallFn, +): Promise { + const prompt = buildTriagePrompt(dump.slice(0, MAX_DUMP_CHARS)); + const response = await llmCall(prompt.system, prompt.user); + return parseTodoTriageResponse(response); +} + +function chooseTodoTriageModel(ctx: ExtensionCommandContext): Model | null { + try { + const available = ctx.modelRegistry?.getAvailable?.() ?? []; + for (const pattern of PREFERRED_TRIAGE_MODEL_PATTERNS) { + const match = available.find((model: Model) => { + return ( + pattern.test(`${model.provider}/${model.id}`) || + pattern.test(model.name ?? "") + ); + }); + if (match) return match as Model; + } + return (ctx.model as Model | undefined) ?? (available[0] as Model | undefined) ?? null; + } catch { + return (ctx.model as Model | undefined) ?? null; + } +} + +function buildTodoTriageLLMCall(ctx: ExtensionCommandContext): LLMCallFn | null { + const model = chooseTodoTriageModel(ctx); + if (!model) return null; + const resolvedKeyPromise = ctx.modelRegistry + ?.getApiKey?.(model) + .catch(() => undefined); + + return async (system: string, user: string): Promise => { + const { completeSimple } = await import("@singularity-forge/pi-ai"); + const resolvedApiKey = await resolvedKeyPromise; + const result: AssistantMessage = await completeSimple( + model, + { + systemPrompt: system, + messages: [ + { + role: "user", + content: [{ type: "text", text: user }], + timestamp: Date.now(), + }, + ], + }, + { + maxTokens: 4096, + temperature: 0, + ...(resolvedApiKey ? { apiKey: resolvedApiKey } : {}), + }, + ); + return result.content + .filter((part): part is { type: "text"; text: string } => part.type === "text") + .map((part) => part.text) + .join(""); + }; +} + +export async function triageTodoDump( + basePath: string, + llmCall: LLMCallFn, + options: { clear?: boolean; date?: Date; backlog?: boolean } = {}, +): Promise<{ + markdownPath: string; + evalJsonlPath: string; + normalizedJsonlPath: string; + backlogItemsAdded: number; + result: TodoTriageResult; +}> { + const todoPath = join(basePath, "TODO.md"); + if (!existsSync(todoPath)) { + throw new Error("No root TODO.md found."); + } + + const raw = readFileSync(todoPath, "utf-8"); + const dump = extractTodoDump(raw); + if (!dump) { + throw new Error("TODO.md has no dump content to triage."); + } + + const result = await triageWithModel(dump, llmCall); + const id = timestampId(options.date); + const createdAt = (options.date ?? new Date()).toISOString(); + const triageRoot = join(basePath, ".sf", "triage"); + const reportsDir = join(triageRoot, "reports"); + const evalsDir = join(triageRoot, "evals"); + const inboxDir = join(triageRoot, "inbox"); + mkdirSync(reportsDir, { recursive: true }); + mkdirSync(evalsDir, { recursive: true }); + mkdirSync(inboxDir, { recursive: true }); + + const markdownPath = join(reportsDir, `${id}.md`); + const evalJsonlPath = join(evalsDir, `${id}.evals.jsonl`); + const normalizedJsonlPath = join(inboxDir, `${id}.jsonl`); + writeFileSync(markdownPath, renderTriageMarkdown(result, "TODO.md")); + writeFileSync(evalJsonlPath, renderEvalJsonl(result)); + writeFileSync(normalizedJsonlPath, renderNormalizedJsonl(result, createdAt)); + + const backlogItemsAdded = + options.backlog === true + ? appendBacklogItems(basePath, result.implementation_tasks) + : 0; + + if (options.clear !== false) { + writeFileSync(todoPath, EMPTY_TODO); + } + + return { + markdownPath, + evalJsonlPath, + normalizedJsonlPath, + backlogItemsAdded, + result, + }; +} + +export async function handleTodo( + args: string, + ctx: ExtensionCommandContext, + _pi: ExtensionAPI, +): Promise { + const parts = args.trim().split(/\s+/).filter(Boolean); + const subcommand = parts[0] || "triage"; + const clear = !parts.includes("--no-clear"); + const backlog = parts.includes("--backlog"); + + if (subcommand !== "triage") { + ctx.ui.notify( + "Usage: /sf todo triage [--no-clear] [--backlog]\nReads root TODO.md, writes .sf/triage artifacts, and clears processed dump notes by default.", + "warning", + ); + return; + } + + const llmCall = buildTodoTriageLLMCall(ctx); + if (!llmCall) { + ctx.ui.notify("No model available for TODO triage.", "warning"); + return; + } + + try { + const output = await triageTodoDump(projectRoot(), llmCall, { clear, backlog }); + ctx.ui.notify( + [ + "TODO triage complete.", + `Report: ${output.markdownPath}`, + `Normalized inbox: ${output.normalizedJsonlPath}`, + `Eval candidates: ${output.evalJsonlPath}`, + `Eval candidate count: ${output.result.eval_candidates.length}`, + `Backlog items added: ${output.backlogItemsAdded}`, + clear ? "TODO.md was reset to the empty dump inbox." : "TODO.md was left unchanged.", + ].join("\n"), + "info", + ); + } catch (err) { + ctx.ui.notify( + `TODO triage failed: ${err instanceof Error ? err.message : String(err)}`, + "warning", + ); + } +} diff --git a/src/resources/extensions/sf/commands/catalog.ts b/src/resources/extensions/sf/commands/catalog.ts index 7543a6523..5f31aaf68 100644 --- a/src/resources/extensions/sf/commands/catalog.ts +++ b/src/resources/extensions/sf/commands/catalog.ts @@ -15,7 +15,7 @@ export interface GsdCommandDefinition { type CompletionMap = Record; export const SF_COMMAND_DESCRIPTION = - "SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan"; + "SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|todo|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan"; export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ { cmd: "help", desc: "Categorized command reference with descriptions" }, @@ -41,6 +41,7 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ { cmd: "capture", desc: "Fire-and-forget thought capture" }, { cmd: "changelog", desc: "Show categorized release notes" }, { cmd: "triage", desc: "Manually trigger triage of pending captures" }, + { cmd: "todo", desc: "Triage root TODO.md dump into eval/backlog artifacts" }, { cmd: "dispatch", desc: "Dispatch a specific phase directly" }, { cmd: "history", desc: "View execution history" }, { cmd: "undo", desc: "Revert last completed unit" }, @@ -373,6 +374,11 @@ const NESTED_COMPLETIONS: CompletionMap = { { cmd: "promote", desc: "Promote backlog item to active slice" }, { cmd: "remove", desc: "Remove backlog item" }, ], + todo: [ + { cmd: "triage", desc: "Triage root TODO.md into .sf/triage artifacts" }, + { cmd: "triage --no-clear", desc: "Triage TODO.md without resetting it" }, + { cmd: "triage --backlog", desc: "Also add implementation tasks to .sf/BACKLOG.md" }, + ], "pr-branch": [ { cmd: "--dry-run", desc: "Preview what would be filtered" }, { cmd: "--name", desc: "Custom branch name" }, diff --git a/src/resources/extensions/sf/commands/handlers/ops.ts b/src/resources/extensions/sf/commands/handlers/ops.ts index 23cce6bad..b844a4dfc 100644 --- a/src/resources/extensions/sf/commands/handlers/ops.ts +++ b/src/resources/extensions/sf/commands/handlers/ops.ts @@ -179,6 +179,11 @@ export async function handleOpsCommand( await handleTriage(ctx, pi, process.cwd()); return true; } + if (trimmed === "todo" || trimmed.startsWith("todo ")) { + const { handleTodo } = await import("../../commands-todo.js"); + await handleTodo(trimmed.replace(/^todo\s*/, "").trim(), ctx, pi); + return true; + } if (trimmed === "config") { await handleConfig(ctx); return true; diff --git a/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts b/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts index eab916f54..9048ad422 100644 --- a/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts +++ b/src/resources/extensions/sf/tests/remediation-completion-guard.test.ts @@ -12,6 +12,7 @@ import { existsSync, mkdirSync, mkdtempSync, + readFileSync, rmSync, writeFileSync, } from "node:fs"; @@ -221,6 +222,90 @@ test("completing-milestone redispatches validation-attention when marker is stal } }); +test("completing-milestone revalidates after validation-attention remediation advances round", async () => { + const base = mkdtempSync(join(tmpdir(), "sf-remediation-")); + mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true }); + mkdirSync(join(base, ".sf", "runtime", "validation-attention"), { + recursive: true, + }); + mkdirSync(join(base, ".sf", "runtime", "units"), { recursive: true }); + + try { + writeFileSync( + join(base, ".sf", "milestones", "M001", "M001-VALIDATION.md"), + [ + "---", + "verdict: needs-attention", + "remediation_round: 1", + "---", + "", + "# Validation Report", + "", + "Tracking cleanup was applied. Revalidate before completion.", + ].join("\n"), + ); + writeFileSync( + join(base, ".sf", "runtime", "validation-attention", "M001.json"), + JSON.stringify({ + milestoneId: "M001", + createdAt: new Date().toISOString(), + remediationRound: 0, + }), + ); + writeFileSync( + join( + base, + ".sf", + "runtime", + "units", + "rewrite-docs-M001-validation-attention.json", + ), + JSON.stringify({ + version: 1, + unitType: "rewrite-docs", + unitId: "M001/validation-attention", + phase: "dispatched", + }), + ); + + const ctx = { + mid: "M001", + midTitle: "Test Milestone", + basePath: base, + state: { phase: "completing-milestone" } as any, + prefs: {} as any, + session: undefined, + }; + + const result = await completingRule!.match(ctx); + + assert.ok(result !== null, "rule should match"); + assert.equal(result!.action, "dispatch"); + if (result!.action === "dispatch") { + assert.equal(result!.unitType, "validate-milestone"); + assert.equal(result!.unitId, "M001"); + } + + const marker = JSON.parse( + readFileSync( + join(base, ".sf", "runtime", "validation-attention", "M001.json"), + "utf-8", + ), + ); + assert.equal(marker.revalidationRound, 1); + + const second = await completingRule!.match(ctx); + assert.ok(second !== null, "second rule should match"); + assert.equal( + second!.action, + "stop", + "second pass should not revalidate-loop", + ); + } finally { + rmSync(base, { recursive: true, force: true }); + } +}); + test("completing-milestone proceeds normally when VALIDATION verdict is pass (#2675 guard)", async () => { const base = mkdtempSync(join(tmpdir(), "sf-remediation-")); mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });