diff --git a/.sf/backups/db/sf.db.2026-05-10T04-45-58-550Z b/.sf/backups/db/sf.db.2026-05-10T04-45-58-550Z new file mode 100644 index 000000000..281183ab4 Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T04-45-58-550Z differ diff --git a/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z b/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z new file mode 100644 index 000000000..8ace58971 Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z differ diff --git a/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z b/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z new file mode 100644 index 000000000..f18e61f46 Binary files /dev/null and b/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z differ diff --git a/.sf/graphs/graph.json b/.sf/graphs/graph.json new file mode 100644 index 000000000..3443a2b32 --- /dev/null +++ b/.sf/graphs/graph.json @@ -0,0 +1,64 @@ +{ + "nodes": [ + { + "id": "concept:phase:planning", + "label": "Phase: planning", + "type": "concept", + "confidence": "EXTRACTED", + "sourceFile": "STATE.md" + }, + { + "id": "milestone:M001", + "label": "M001", + "type": "milestone", + "confidence": "EXTRACTED" + }, + { + "id": "slice:M001:S01", + "label": "S01: Recover corrupted DB from backup", + "type": "slice", + "confidence": "EXTRACTED", + "sourceFile": "milestones/M001/slices/S01/S01-PLAN.md" + }, + { + "id": "slice:M001:S02", + "label": "S02: Execute S01-T01 — CLI/help doctrine fix", + "type": "slice", + "confidence": "EXTRACTED", + "sourceFile": "milestones/M001/slices/S02/S02-PLAN.md" + }, + { + "id": "milestone:M002", + "label": "M002", + "type": "milestone", + "confidence": "EXTRACTED" + } + ], + "edges": [ + { + "from": "milestone:M001", + "to": "slice:M001:S01", + "type": "contains", + "confidence": "EXTRACTED" + }, + { + "from": "milestone:M001", + "to": "slice:M001:S02", + "type": "contains", + "confidence": "EXTRACTED" + }, + { + "from": "milestone:M001", + "to": "slice:M001:S01", + "type": "contains", + "confidence": "EXTRACTED" + }, + { + "from": "milestone:M001", + "to": "slice:M001:S02", + "type": "contains", + "confidence": "EXTRACTED" + } + ], + "builtAt": "2026-05-10T05:08:17.329Z" +} \ No newline at end of file diff --git a/.sf/metrics.db b/.sf/metrics.db index 18247c1ef..52f417f8a 100644 Binary files a/.sf/metrics.db and b/.sf/metrics.db differ diff --git a/.sf/metrics.db-shm b/.sf/metrics.db-shm index 79e6bceaf..a7c929dfa 100644 Binary files a/.sf/metrics.db-shm and b/.sf/metrics.db-shm differ diff --git a/.sf/metrics.db-wal b/.sf/metrics.db-wal index 4cb2edaf0..91b3c05e2 100644 Binary files a/.sf/metrics.db-wal and b/.sf/metrics.db-wal differ diff --git a/.sf/model-performance.json b/.sf/model-performance.json index 994ee5429..ba649f294 100644 --- a/.sf/model-performance.json +++ b/.sf/model-performance.json @@ -44,5 +44,27 @@ "successRate": 1, "total": 2 } + }, + "run-uat": { + "minimax/MiniMax-M2.7-highspeed": { + "successes": 1, + "failures": 0, + "timeouts": 0, + "totalTokens": 0, + "totalCost": 0, + "lastUsed": "2026-05-10T05:22:57.604Z", + "successRate": 1, + "total": 1 + }, + "google-gemini-cli/gemini-3.1-pro-preview": { + "successes": 2, + "failures": 0, + "timeouts": 0, + "totalTokens": 1700534, + "totalCost": 0.14063507999999997, + "lastUsed": "2026-05-10T05:40:27.616Z", + "successRate": 1, + "total": 2 + } } } \ No newline at end of file diff --git a/src/resources/extensions/sf/auto.js b/src/resources/extensions/sf/auto.js index 39b506086..6d273f6c1 100644 --- a/src/resources/extensions/sf/auto.js +++ b/src/resources/extensions/sf/auto.js @@ -1399,6 +1399,26 @@ export async function startAuto(ctx, pi, base, verboseMode, options) { debugLog("startAuto", { phase: "already-active", skipping: true }); return; } + // ── Command context guard ──────────────────────────────────────────────── + // Autonomous mode requires a ctx with newSession() to start clean sessions + // for each unit. Shortcut handlers (Ctrl+Y, registerShortcut) receive an + // ExtensionContext which does NOT have newSession. Fall back to the last + // known command ctx if available; otherwise block with an actionable message. + if (typeof ctx.newSession !== "function") { + if (typeof s.lastCommandCtx?.newSession === "function") { + ctx = s.lastCommandCtx; + } else { + ctx.ui.notify( + "Autonomous mode requires a command context with newSession. Run /autonomous once first, then use the keyboard shortcut.", + "warning", + ); + debugLog("startAuto", { phase: "no-command-ctx", skipping: true }); + return; + } + } else { + // Cache the valid command ctx for future shortcut-triggered starts. + s.lastCommandCtx = ctx; + } // Gate: if the user is in Ask mode (manual runControl and not already in // build workMode), ask permission to switch to Build mode. // Skip if workMode is already "build" — runControl is reset to "manual" on @@ -1901,10 +1921,14 @@ export async function dispatchHookUnit( targetBasePath, ) { if (!s.active) { + // Guard: ctx from hook/shortcut callers may lack newSession(); fall back to cached command ctx. + const hookCtx = typeof ctx.newSession === "function" + ? ctx + : (typeof s.lastCommandCtx?.newSession === "function" ? s.lastCommandCtx : ctx); s.active = true; s.stepMode = true; s.runControl = "assisted"; - s.cmdCtx = ctx; + s.cmdCtx = hookCtx; s.basePath = targetBasePath; s.autoStartTime = Date.now(); s.currentUnit = null; @@ -1917,6 +1941,14 @@ export async function dispatchHookUnit( id: triggerUnitId, startedAt: hookStartedAt, }; + if (typeof s.cmdCtx?.newSession !== "function") { + ctx.ui.notify( + `Hook dispatch for ${hookName} failed: no command context with newSession available. Run /autonomous once first.`, + "error", + ); + await stopAuto(ctx, pi); + return false; + } const result = await s.cmdCtx.newSession(); if (result.cancelled) { await stopAuto(ctx, pi); diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js index 86ac9c6cb..87fd4aed7 100644 --- a/src/resources/extensions/sf/auto/run-unit.js +++ b/src/resources/extensions/sf/auto/run-unit.js @@ -75,6 +75,21 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) { // keepSession=false (default): start a clean session for each new unit. if (!keepSession) { debugLog("runUnit", { phase: "session-create", unitType, unitId }); + // Guard: s.cmdCtx must have newSession() (ExtensionCommandContext). If it + // doesn't, autonomous mode was started from a shortcut handler without a + // valid command ctx — startAuto() should have caught this, but defend here + // as a last resort so we get a clear error category instead of a TypeError. + if (typeof s.cmdCtx?.newSession !== "function") { + return { + status: "cancelled", + errorContext: { + message: + "cmdCtx.newSession is not available — autonomous mode was started from a non-command context. Run /autonomous once to establish a command context.", + category: "session-failed", + isTransient: false, + }, + }; + } let sessionResult; let sessionTimeoutHandle; const mySessionSwitchGeneration = ++sessionSwitchGeneration; diff --git a/src/resources/extensions/sf/auto/session.js b/src/resources/extensions/sf/auto/session.js index a209424ab..abb0f88c3 100644 --- a/src/resources/extensions/sf/auto/session.js +++ b/src/resources/extensions/sf/auto/session.js @@ -137,6 +137,18 @@ export class AutoSession { activeEngineId = null; activeRunDir = null; cmdCtx = null; + /** + * Last known ExtensionCommandContext that had newSession(). + * + * Purpose: allow autonomous mode to start from non-command contexts (shortcut + * handlers, event handlers) by falling back to a previously cached command + * context. newSession() is bound at registration time and remains valid across + * session switches, so this is safe to reuse after /clear. + * + * NOT in reset() — intentionally persists across auto-stop/restart cycles so + * Ctrl+Y and similar shortcuts work after the loop exits cleanly. + */ + lastCommandCtx = null; /** * YOLO mode: build + autonomous + deep + unrestricted. * Tracks the local toggle state so the terminal title and status display diff --git a/src/resources/extensions/sf/bootstrap/db-tools.js b/src/resources/extensions/sf/bootstrap/db-tools.js index 4a6c1b266..e6eaa5986 100644 --- a/src/resources/extensions/sf/bootstrap/db-tools.js +++ b/src/resources/extensions/sf/bootstrap/db-tools.js @@ -83,16 +83,15 @@ export function registerDbTools(pi) { name: "save_decision", label: "Save Decision", description: - "Record a project decision to the SF database and regenerate DECISIONS.md. " + - "Decision IDs are auto-assigned — never provide an ID manually.", + "Record an architectural or technical decision and return its auto-assigned ID (e.g. D001). " + + "Call this whenever a non-trivial choice is made about architecture, libraries, patterns, or observability so the rationale is durable and reviewable.", promptSnippet: - "Record a project decision to the SF database (auto-assigns ID, regenerates DECISIONS.md)", + "Record a project decision (auto-assigns ID, regenerates DECISIONS.md)", promptGuidelines: [ - "Use save_decision when recording an architectural, pattern, library, or observability decision.", - "Decision IDs are auto-assigned (D001, D002, ...) — never guess or provide an ID.", - "All fields except revisable, when_context, and made_by are required.", - "The tool writes to the DB and regenerates .sf/DECISIONS.md automatically.", - "Set made_by to 'human' when the user explicitly directed the decision, 'agent' when the LLM chose autonomously (default), or 'collaborative' when it was discussed and agreed together.", + "Call save_decision for architectural, library, pattern, or observability choices — not for task-level implementation details.", + "Decision IDs are auto-assigned — never guess or provide one.", + "scope, decision, choice, and rationale are required; revisable, when_context, and made_by are optional.", + "Set made_by to 'human' when the user directed it, 'agent' when you chose autonomously, or 'collaborative' when agreed together.", ], parameters: Type.Object({ scope: Type.String({ @@ -211,15 +210,14 @@ export function registerDbTools(pi) { name: "update_requirement", label: "Update Requirement", description: - "Update an existing requirement in the SF database and regenerate REQUIREMENTS.md. " + - "Provide the requirement ID (e.g. R001) and any fields to update.", + "Update an existing requirement by ID and return confirmation — only fields you provide are changed. " + + "Call this when a requirement's status, validation evidence, description, or owning slice changes after it was first recorded.", promptSnippet: - "Update an existing SF requirement by ID (regenerates REQUIREMENTS.md)", + "Update an existing requirement by ID (only provided fields are changed)", promptGuidelines: [ - "Use update_requirement to change status, validation, notes, or other fields on an existing requirement.", - "The id parameter is required — it must be an existing RXXX identifier.", - "All other fields are optional — only provided fields are updated.", - "The tool verifies the requirement exists before updating.", + "id is required and must be an existing requirement identifier (e.g. R001).", + "All other fields are optional — only the fields you provide are updated.", + "Use this to mark a requirement validated, deferred, or to correct its description after new evidence.", ], parameters: Type.Object({ id: Type.String({ description: "The requirement ID (e.g. R001, R014)" }), @@ -326,15 +324,14 @@ export function registerDbTools(pi) { name: "save_requirement", label: "Save Requirement", description: - "Record a new requirement to the SF database and regenerate REQUIREMENTS.md. " + - "Requirement IDs are auto-assigned — never provide an ID manually.", + "Record a new requirement and return its auto-assigned ID (e.g. R001). " + + "Call this when a functional, non-functional, or operational requirement is identified that the project must satisfy.", promptSnippet: - "Record a new SF requirement to the database (auto-assigns ID, regenerates REQUIREMENTS.md)", + "Record a new requirement (auto-assigns ID, regenerates REQUIREMENTS.md)", promptGuidelines: [ - "Use save_requirement when recording a new functional, non-functional, or operational requirement.", - "Requirement IDs are auto-assigned (R001, R002, ...) — never guess or provide an ID.", - "class, description, why, and source are required. All other fields are optional.", - "The tool writes to the DB and regenerates .sf/REQUIREMENTS.md automatically.", + "Requirement IDs are auto-assigned — never guess or provide one.", + "class, description, why, and source are required; all other fields are optional.", + "Use update_requirement to change an existing requirement rather than saving a duplicate.", ], parameters: Type.Object({ class: Type.String({ @@ -399,16 +396,14 @@ export function registerDbTools(pi) { name: "save_summary", label: "Save Summary", description: - "Save a summary, research, context, or assessment artifact to the SF database and write it to disk. " + - "Computes the file path from milestone/slice/task IDs automatically.", + "Save a research, summary, context, or assessment artifact to disk with an auto-computed path. " + + "Call this to persist planning or research output (e.g. a research brief, context doc, or summary) for a milestone, slice, or task.", promptSnippet: - "Save a SF artifact (summary/research/context/assessment) to DB and disk", + "Save a planning artifact (SUMMARY/RESEARCH/CONTEXT/ASSESSMENT) to disk", promptGuidelines: [ - "Use save_summary to persist structured artifacts (SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT).", - "milestone_id is required. slice_id and task_id are optional — they determine the file path.", - "The tool computes the relative path automatically: milestones/M001/M001-SUMMARY.md, milestones/M001/slices/S01/S01-SUMMARY.md, etc.", + "milestone_id is required; slice_id and task_id are optional and determine the file path.", "artifact_type must be one of: SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT.", - "Use CONTEXT-DRAFT for incremental draft persistence; use CONTEXT for the final milestone context after depth verification.", + "Use CONTEXT-DRAFT for incremental saves; use CONTEXT only for the final milestone context after verification.", ], parameters: Type.Object({ milestone_id: Type.String({ description: "Milestone ID (e.g. M001)" }), @@ -528,15 +523,14 @@ export function registerDbTools(pi) { name: "new_milestone_id", label: "Generate Milestone ID", description: - "Generate the next sequential milestone ID and return it as a string. " + - "Always call this when creating a new milestone — never invent or hardcode milestone IDs manually.", + "Generate and reserve the next milestone ID (e.g. M001) and return it as a string. " + + "Always call this before creating a new milestone — never invent or hardcode a milestone ID.", promptSnippet: - "Generate a valid milestone ID (respects unique_milestone_ids preference)", + "Generate a valid milestone ID before creating a new milestone", promptGuidelines: [ - "ALWAYS call new_milestone_id before creating a new milestone directory or writing milestone files.", - "Never invent or hardcode milestone IDs like M001, M002 — always use this tool.", - "Call it once per milestone you need to create. For multi-milestone projects, call it once for each milestone in sequence.", - "The tool returns the correct format based on project preferences (e.g. M001 or M001-r5jzab).", + "Call new_milestone_id once per new milestone, before any other milestone creation steps.", + "Never hardcode milestone IDs like M001 or M002 — always use this tool.", + "For multiple milestones, call it once per milestone in sequence.", ], parameters: Type.Object({}), execute: milestoneGenerateIdExecute, @@ -638,22 +632,17 @@ export function registerDbTools(pi) { name: "report_issue", label: "Self Report", description: - "Record an observation about the agent tooling itself — a bug, missing feature, confusing prompt, friction, or improvement idea — for future review. " + - "Over-reporting is preferred; duplicates are resolved later. " + - "Do not use this for bugs in the user's project — only for observations about the agent tooling itself.", + "File an observation about the agent tooling itself — a bug, confusing prompt, missing feature, friction, or improvement idea — and return the new entry ID. " + + "Use this whenever you notice something wrong or suboptimal about how the agent tooling behaves, not for bugs in the user's project.", promptSnippet: - "Report any sf-internal observation: bug, missing feature, prompt issue, idea, friction", + "Report any agent-tooling observation: bug, prompt issue, missing feature, or improvement idea", promptGuidelines: [ - "Use report_issue for ANY sf-internal observation — not just bugs. Acceptable kinds include: 'prompt-quality-issue' (you found a prompt ambiguous, contradictory, or missing context), 'improvement-idea' (a non-bug enhancement that would help), 'agent-friction' (workflow friction you worked around), 'design-thought' (broader speculation), 'missing-feature' (capability you wished sf had), as well as classic bug kinds like 'brittle-predicate' or 'git-empty-pathspec'.", - "Do NOT use this for bugs in the user's project, for your own task work, or to track your task's todo list. ONLY for observations about sf-the-tool itself.", - "This tool FILES new entries; it does not resolve existing ones. High/critical forge self-feedback may be queued autonomously at startup or an idle turn boundary as repair work. Use resolve_issue after fixing an entry; do not hand-edit the JSONL.", - "Over-reporting is preferred to under-reporting at this stage. If you noticed it about sf, file it. Dedup and threshold-to-roadmap promotion are tracked as their own self-feedback items and will eventually clean noise.", - "Severity guide: low = cosmetic / nice-to-have / improvement idea. medium = noisy or imperfect or recurring friction. high = blocked the unit (sf-the-tool prevented you from completing the task). critical = needs immediate fix (currently treated as high until inline-fix dispatch lands).", - "high/critical entries mark the originating unit as blocked: it will not seal as success, and will be re-queued only after sf is bumped past the recorded version.", - "Provide concrete evidence — log excerpt, command, file path, error message, the literal prompt text that confused you, etc. Vague reports are not actionable; specific ones are.", - "If you have a hypothesis about the fix, include it as suggested_fix. Even a half-baked idea is more useful than nothing.", - "For high/critical entries, include acceptance_criteria — concrete conditions a future resolver must satisfy before calling this resolved. Without it, 'resolved' is just trust; with it, the resolver has a falsifiable bar. Phrase as 1. ... 2. ... 3. ... so each can be checked off independently.", - "occurred_in is auto-filled from the active auto.lock; only override if you're reporting from outside the current unit.", + "Use report_issue for agent-tooling observations only — not for bugs in the user's project or your own task work.", + "Acceptable kinds: prompt-quality-issue, improvement-idea, agent-friction, design-thought, missing-feature, brittle-predicate, git-empty-pathspec.", + "Severity: low = cosmetic, medium = recurring friction, high = blocked a task unit, critical = needs immediate fix.", + "Include concrete evidence (log excerpt, command, file path, error message) and a suggested_fix if you have one.", + "For high/critical entries, include acceptance_criteria so the resolver has a falsifiable bar to meet.", + "Over-reporting is preferred — dedup and cleanup happen separately.", ], parameters: Type.Object({ kind: Type.String({ @@ -816,15 +805,14 @@ export function registerDbTools(pi) { name: "resolve_issue", label: "Resolve Self Feedback", description: - "Mark a previously reported agent-system issue as resolved and record the fix evidence (commit SHA, test path, or narrative). " + + "Mark a previously reported agent-tooling issue as resolved and record the fix evidence (commit SHA, test path, or narrative). " + "Call this only after the fix is implemented and verified — not speculatively.", promptSnippet: - "Resolve a repaired SF self-feedback entry with commit/test evidence", + "Resolve a filed agent-tooling issue with commit or test evidence", promptGuidelines: [ - "Use resolve_issue during self-feedback inline-fix repair turns after the fix is implemented and verified.", - "Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`; this tool updates the durable self-feedback store and regenerates the markdown projection.", - "If the entry has acceptance criteria, pass criteria_met with the criteria that were satisfied.", - "Pass commit_sha when a commit exists. If an entry was already fixed, cite the existing commit or include summary_narrative and test_path.", + "Call resolve_issue after implementing and verifying the fix, not before.", + "Pass commit_sha when a commit exists; use summary_narrative and test_path when a commit is not the right artifact.", + "If the entry had acceptance criteria, pass criteria_met with the criteria you satisfied.", ], parameters: Type.Object({ id: Type.String({ @@ -923,19 +911,17 @@ export function registerDbTools(pi) { name: "checkpoint", label: "Autonomous Checkpoint", description: - "Save a structured progress checkpoint for the current autonomous task — capturing what was done, what's blocked, and what remains. " + - "Call this before ending every turn in autonomous mode to make progress visible and recoverable.", + "Save a structured progress snapshot for the current task iteration — what was done, what's blocked, and what remains — so progress is visible and recoverable. " + + "Call this before ending every autonomous turn.", promptSnippet: - "Checkpoint autonomous solver progress with PDD fields and semantic outcome", + "Save a progress checkpoint before ending an autonomous turn", promptGuidelines: [ - "Call checkpoint before ending an autonomous unit turn.", - "Do not write SUMMARY.md, LOOP.md, task files, or chat prose as a substitute for this tool call.", - "The checkpoint is recorded only when this actual tool returns success.", - "Use outcome=complete only when the normal unit completion artifact/tool is also complete.", - "Use outcome=continue when you made real progress but the unit needs another autonomous iteration.", - "Use outcome=blocked for missing facts, credentials, broken environment, or impossible next steps.", - "Use outcome=decide for material product or architecture choices that autonomous mode must not decide silently.", - "Fill all eight PDD fields: purpose, consumer, contract, failureBoundary, evidence, nonGoals, invariants, assumptions.", + "Call checkpoint before ending any autonomous task turn.", + "outcome=complete only when the unit's normal completion tool also succeeded.", + "outcome=continue when real progress was made but more iterations are needed.", + "outcome=blocked for missing facts, broken environment, or impossible next steps.", + "outcome=decide for material product or architecture choices that must not be made silently.", + "Fill all eight structured fields: purpose, consumer, contract, failureBoundary, evidence, nonGoals, invariants, assumptions.", ], parameters: Type.Object({ unitType: Type.String({