From 76b218762b5bdda81b67c33bce665aa6e5714a32 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Wed, 6 May 2026 06:02:46 +0200 Subject: [PATCH] fix: harden sf autonomous runtime --- AGENTS.md | 17 + docs/adr/0002-sf-schedule-pull-based.md | 82 +++++ docs/specs/sf-schedule.md | 294 ++++++++++++++++++ .../src/core/extensions/runner.test.ts | 50 +++ .../src/core/extensions/runner.ts | 20 +- .../extension-ui-controller.test.ts | 16 + .../controllers/extension-ui-controller.ts | 9 +- .../pi-coding-agent/src/modes/rpc/rpc-mode.ts | 19 +- src/cli.ts | 22 ++ src/headless-query.ts | 34 +- src/headless.ts | 33 ++ src/help-text.ts | 34 ++ src/resources/extensions/sf/auto-prompts.js | 54 ++++ src/resources/extensions/sf/auto-start.js | 21 +- .../extensions/sf/bootstrap/register-hooks.js | 9 +- .../extensions/sf/doctor-runtime-checks.js | 78 ++++- src/resources/extensions/sf/doctor.js | 109 ++++++- .../extensions/sf/prompts/complete-slice.md | 2 + .../extensions/sf/schedule/schedule-types.js | 1 + .../auto-prompts-complete-slice.test.mjs | 115 +++++++ .../doctor-flow-audit-auto-cleanup.test.mjs | 177 +++++++++++ .../tests/doctor-runtime-stale-units.test.mjs | 80 +++++ .../tests/doctor-task-plan-id-drift.test.mjs | 125 ++++++++ .../sf/tests/schedule-kinds.test.mjs | 9 +- .../sf/tests/schedule-store.test.mjs | 1 + src/resources/extensions/sf/unit-runtime.js | 75 +++++ src/tests/schedule-cli-noninteractive.test.ts | 31 ++ src/tests/unit-runtime-reconcile.test.ts | 54 ++++ 28 files changed, 1542 insertions(+), 29 deletions(-) create mode 100644 docs/adr/0002-sf-schedule-pull-based.md create mode 100644 docs/specs/sf-schedule.md create mode 100644 src/resources/extensions/sf/tests/auto-prompts-complete-slice.test.mjs create mode 100644 src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs create mode 100644 src/resources/extensions/sf/tests/doctor-runtime-stale-units.test.mjs create mode 100644 src/resources/extensions/sf/tests/doctor-task-plan-id-drift.test.mjs create mode 100644 src/tests/schedule-cli-noninteractive.test.ts diff --git a/AGENTS.md b/AGENTS.md index b66e64230..9610ca6de 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -217,6 +217,23 @@ Promoted artifacts — milestone summaries, architecture decision records (ADRs) See [`docs/plans/README.md`](docs/plans/README.md), [`docs/adr/README.md`](docs/adr/README.md), and [`docs/specs/README.md`](docs/specs/README.md) for directory-specific conventions. +## SF Schedule + +The SF schedule system (`/sf schedule`) stores time-bound reminders in `.sf/schedule.jsonl` as append-only JSONL. Items surface on their due date via pull queries at launch and auto-mode boundaries — there is no background daemon. + +**When to use `sf schedule` vs backlog:** +- **`sf schedule`** — time-bound items that must surface at a future date: a 2-week adoption review after shipping a feature, a 1-month audit of an architectural decision, a 30-minute reminder to run a command. Use when the *timing* matters, not just the *priority*. +- **Backlog** (milestone/slice queue) — priority-ordered items with no specific timing. Items are dispatched in sequence by the autonomous controller based on readiness and dependency, not wall-clock time. + +**Examples:** +``` +sf schedule add --in 2w "Review feature adoption metrics" +sf schedule add --in 1mo --kind audit "Audit ADR-007 decision implementation" +sf schedule add --in 30m --kind reminder "Run integration tests" +``` + +For the full specification, see [`docs/specs/sf-schedule.md`](docs/specs/sf-schedule.md). + ## Eval Dump Inbox SF/Pi automatically loads `AGENTS.md` and `CLAUDE.md` from the repo tree at diff --git a/docs/adr/0002-sf-schedule-pull-based.md b/docs/adr/0002-sf-schedule-pull-based.md new file mode 100644 index 000000000..fba4d69ab --- /dev/null +++ b/docs/adr/0002-sf-schedule-pull-based.md @@ -0,0 +1,82 @@ +# ADR-0002: SF Schedule System is Pull-Based, Not Daemon-Based + +**Date:** 2026-05-05 +**Status:** Accepted +**Deciders:** SF core team (M010) +**Related:** M010 S01 (schedule store), M010 S02 (schedule CLI), M010 S03 (milestone YAML integration), M010 S05 (this slice) + +--- + +## Context + +The SF schedule system requires time-bound reminders that surface at a future date. Several design options were considered: + +1. **Daemon-based (cron/launchd)** — A background process fires items at their due time using the OS scheduler. +2. **Daemon-based (in-process timer)** — SF itself runs as a long-lived process with in-process timers. +3. **Pull-based (on-demand query)** — Items are stored durably and queried at integration points (launch, auto-mode boundaries, explicit CLI query). + +Option 1 was explicitly ruled out early: platform-specific (cron on Unix, launchd on macOS, Task Scheduler on Windows), requires daemon installation, and cannot fire items when SF is not running. + +Option 2 was ruled out because SF is designed to be a session-based tool — agents run in fresh contexts per unit, state does not accumulate across sessions, and there is no persistent long-lived process in the happy path. + +Option 3 (pull-based) is what we adopted. + +--- + +## Decision + +The SF schedule system is **pull-based**: + +- Schedule entries are stored as append-only JSONL in `.sf/schedule.jsonl` (project) or `~/.sf/schedule.jsonl` (global). +- There is no background daemon or timer process. +- Entries are queried ("pulled") at defined integration points: + 1. **Launch** — `loader.ts` calls `findDue()` and prints a banner if items are overdue + 2. **Auto-mode boundaries** — `sf headless query` populates a `schedule` field with `due` and `upcoming` entries + 3. **CLI** — `sf schedule list --due` for explicit human query + 4. **TUI status overlay** — displays due/upcoming schedule entries in the dashboard + +--- + +## Consequences + +### Positive + +- **Portable** — works identically on Linux, macOS, and Windows without platform-specific code +- **Simple** — no process management, no signal handlers, no daemon lifecycle +- **Auditable** — the JSONL file is a complete, append-only audit trail of all schedule operations +- **Resilient** — no fire-and-forget timer that might miss if the process is restarted +- **Stateless** — fits SF's session model: fresh context per unit, no in-memory state + +### Negative / Explicitly Deferred + +- **No fire-at-exact-time** — items are not delivered at their exact `due_at`; they surface at the next pull query. If an item is due at 3 AM and the user opens SF at 9 AM, the item appears as overdue. +- **No background notification** — SF cannot send a system notification when an item becomes due unless SF is open and the user is interacting with it. +- **No recurring fire precision** — `kind: recurring` entries are stored but the recurring fire mechanism is deferred to a future iteration. + +These limitations are accepted trade-offs for the portability and simplicity benefits. A future iteration could add an optional lightweight notification helper (e.g. a separate binary that reads the schedule and posts system notifications) without changing the core design. + +--- + +## Implementation Notes + +- `schedule-store.js` — append-only JSONL store with `findDue()` and `findUpcoming()` queries +- `loader.ts` — calls `findDue()` on both scopes at startup; prints banner if any items are due +- `headless-query.ts` — populates `schedule: { due, upcoming }` in `QuerySnapshot` +- `sf schedule` CLI — add, list, done, cancel, snooze, run subcommands +- `sf_plan_milestone` YAML — supports `schedule[]` array with `in` and `on_complete` duration fields + +--- + +## Alternatives Considered + +### In-Process Timer (Rejected) + +A long-lived SF process could maintain a timer queue and fire items at their due time. Rejected because it conflicts with SF's session architecture — each unit runs in isolation with no shared timer state across dispatch cycles. + +### External Cron Wrapper (Rejected) + +A `sf-schedule-daemon` sidecar process managed by the user. Rejected because it adds an installation and运维 burden that conflicts with the "install and use immediately" experience goal. + +### Third-Party Scheduling Service (Rejected) + +Using a hosted service (e.g. cron-job.org, AWS EventBridge) to fire webhook calls. Rejected because it introduces an external dependency and network requirement that does not fit SF's self-contained model. diff --git a/docs/specs/sf-schedule.md b/docs/specs/sf-schedule.md new file mode 100644 index 000000000..bebb09f93 --- /dev/null +++ b/docs/specs/sf-schedule.md @@ -0,0 +1,294 @@ +# SF Schedule System — Specification + +> **Spec version:** 1.0.0 +> **Status:** Implemented (M010 S02) +> **Owner:** M010 S05 + +--- + +## Overview + +The SF schedule system provides time-based reminders and deferred work items that surface at a future date. Entries are stored as append-only JSONL and queried on demand (pull-based), not fired by a daemon or cron job. This makes the system portable, auditable, and free of background processes. + +Use `sf schedule` when something needs to happen at a specific future time but cannot (or should not) happen immediately: + +- **Schedule** — time-bound items that must surface on a date, even if SF is not running continuously +- **Backlog** — priority-ordered items with no specific timing (SF's standard milestone/slice queue) + +--- + +## Design Rationale + +### Pull-Based, Not Daemon-Based + +SF has no long-running daemon. Entries are not "fired" by a timer. Instead, the schedule store is queried at specific integration points: + +1. **On launch** — `loader.ts` calls `findDue()` and prints a banner if items are due +2. **Auto-mode boundaries** — `sf headless query` (and the TUI status overlay) includes due/upcoming entries in its output +3. **CLI query** — `sf schedule list --due` shows items whose `due_at <= now` + +This means: if an item is scheduled for 3 AM and you open SF at 9 AM, you will see the item as overdue. There is no fire-at-exact-time guarantee. This is an explicit trade-off — see the [pull-based ADR](../adr/0002-sf-schedule-pull-based.md) for the full decision record. + +### ULID Identifiers + +Schedule entries use [ULID](https://github.com/ulid/spec) (Universally Unique Lexicographically Sortable Identifier) instead of UUID. ULIDs are: + +- 28 characters, Crockford Base32 encoded +- Lexicographically sortable by creation time (useful for JSONL ordering) +- Unique enough to avoid collisions across concurrent appends +- Monotonic within millisecond precision via sub-millisecond counter + +The `generateULID()` function in `schedule-ulid.js` is used for all new entries. + +### Append-Only JSONL + +Each write appends a JSON line to `schedule.jsonl`. The latest entry per ID wins on read (via `created_at` comparison). This means status transitions (`pending` → `done`, `cancelled`, `snoozed`) are implemented as new entries, not mutations. The file is never rewritten — only appended to. + +Corrupt lines are skipped with a warning, never fatal. + +--- + +## Storage Format + +### File Locations + +| Scope | Path | +|-------|------| +| `project` | `/.sf/schedule.jsonl` | +| `global` | `~/.sf/schedule.jsonl` | + +### Schema + +```json +{ + "id": "01ARZ3NDEKTSV4RRFFQ69G5FAV", // ULID — 28 chars + "kind": "reminder", // ScheduleKind enum + "status": "pending", // pending | done | cancelled | snoozed + "due_at": "2026-06-15T09:00:00.000Z", // ISO-8601 timestamp + "created_at": "2026-05-15T09:00:00.000Z", + "snoozed_at": "2026-06-01T09:00:00.000Z", // ISO-8601 — set on each snooze + "payload": { "message": "Review adoption metrics" }, // kind-specific + "created_by": "user", // user | auto | system + "auto_dispatch": false // if true + kind=reminder, surface in auto-mode dispatch +} +``` + +### JSONL Line Example + +``` +{"id":"01ARZ3NDEKTSV4RRFFQ69G5FAV","kind":"reminder","status":"pending","due_at":"2026-06-15T09:00:00.000Z","created_at":"2026-05-15T09:00:00.000Z","payload":{"message":"Review adoption metrics"},"created_by":"user","auto_dispatch":false} +``` + +--- + +## Schedule Kinds + +| Kind | Description | Payload fields | +|------|-------------|----------------| +| `reminder` | General time-based reminder | `message`, `unitId?`, `milestoneId?` | +| `milestone_check` | Milestone health check | `milestoneId`, `checkType?` | +| `review_due` | Review prompt surfaced at next planning turn | `prUrl?`, `reviewer?`, `unitId?` | +| `recurring` | Cron-based recurring entry (future) | `cron`, `unitId?`, `milestoneId?` | +| `review` | Alias for `review_due` — same behaviour | — | +| `audit` | Audit surfaced at next planning turn | `unitId?` | +| `command` | Shell command run by explicit `sf schedule run ` | `command`, `capture?` | + +`review` and `audit` kinds are surfaced to the next autonomous planning turn (TBD: integration point in `sf_plan_slice` / `sf_plan_task` / auto-dispatch). They are stored but not auto-dispatched without a consumer. + +--- + +## CLI Reference + +All commands are invoked as `/sf schedule ` in the TUI or `sf schedule ` from the shell. + +### `sf schedule add` + +``` +sf schedule add --in [--kind ] [--scope ] +sf schedule add --at <ISO-date> [--kind <kind>] [--scope <scope>] <title> +``` + +Schedule a new item. + +**Flags:** +- `--in <duration>` — Relative time from now (e.g. `2w`, `30m`, `1d`, `4h`) +- `--at <ISO-date>` — Absolute ISO-8601 date +- `--kind <kind>` — Entry kind (default: `reminder`). Valid: `reminder`, `milestone_check`, `review_due`, `review`, `audit`, `recurring`, `command` +- `--scope <scope>` — `project` (default) or `global` + +**Examples:** +``` +sf schedule add --in 2w "Review feature adoption metrics" +sf schedule add --in 30m --kind milestone_check "Check M003 validation" +sf schedule add --at 2026-06-01T09:00:00Z --scope global "Team sync" +``` + +### `sf schedule list` + +``` +sf schedule list [--due] [--all] [--json] [--scope <scope>] +``` + +List scheduled items. + +**Flags:** +- `--due`, `-d` — Show only items whose `due_at <= now` (overdue + just-due) +- `--all`, `-a` — Show all entries including `done` and `cancelled` +- `--json`, `-j` — Raw JSON output +- `--scope <scope>` — `project` (default) or `global` + +**Output columns:** ID (8-char prefix), Title, Due (relative), Status, Kind + +**Examples:** +``` +sf schedule list +sf schedule list --due +sf schedule list --all --json +sf schedule list --scope global +``` + +### `sf schedule done` + +``` +sf schedule done <id> +``` + +Mark a pending item as done. ID can be a prefix (ULID prefix match). + +``` +sf schedule done 01ARZ3ND +``` + +### `sf schedule cancel` + +``` +sf schedule cancel <id> +``` + +Cancel a scheduled item. ID can be a prefix. + +``` +sf schedule cancel 01ARZ3ND +``` + +### `sf schedule snooze` + +``` +sf schedule snooze <id> --by <duration> +``` + +Postpone a scheduled item by a relative duration. Updates `due_at` and sets `snoozed_at`. + +``` +sf schedule snooze 01ARZ3ND --by 1d +sf schedule snooze 01ARZ3ND --by 30m +``` + +### `sf schedule run` + +``` +sf schedule run <id> +``` + +Execute a scheduled item. For `reminder`, `milestone_check`, `review_due` kinds: displays the title and marks done. For `command` kind: executes the stored shell command and captures output. + +``` +sf schedule run 01ARZ3ND +``` + +--- + +## Integration Points + +### Loader Banner (`loader.ts`) + +On every SF startup, `loader.ts` calls `findDue()` for both project and global scopes. If any items are due, it prints: + +``` +[forge] N scheduled item(s) due now. Manage: /sf schedule list +``` + +### Headless Query (`sf headless query`) + +`headless-query.ts` populates a `schedule` field in `QuerySnapshot`: + +```ts +schedule: { + due: ScheduleEntry[], // due_at <= now + upcoming: ScheduleEntry[] // due_at within 7 days +} +``` + +This feeds the `sf status` dashboard and autonomous dispatch context. + +### Milestone YAML Schedule (`sf_plan_milestone`) + +The milestone plan schema supports a `schedule[]` array in the YAML spec: + +```yaml +schedule: + - in: 2w + kind: review + title: "Review adoption after shipping feature" +``` + +These entries are created at milestone creation time. The `in` field is relative to `now`. The `on_complete` variant fires a duration after milestone completion. + +### Auto-Dispatch + +When `auto_dispatch: true` and `kind: "reminder"`, the item is surfaced as a dispatch input in auto-mode when `due_at <= now`. This is the mechanism for time-bound autonomous reminders. + +--- + +## Duration Format + +All duration strings follow the format `<number><unit>`: + +| Unit | Meaning | +|------|---------| +| `w` | weeks | +| `d` | days | +| `h` | hours | +| `m` | minutes | + +Examples: `30m`, `4h`, `2d`, `1w` + +--- + +## Examples + +### Reminder (2 weeks out) + +``` +sf schedule add --in 2w "Review feature adoption metrics" +``` + +### Milestone Check (at milestone creation via plan YAML) + +```yaml +# In milestone spec +schedule: + - in: 2w + kind: milestone_check + title: "Validate M003 success criteria" +``` + +### Audit (surfaced at next planning turn) + +``` +sf schedule add --in 1mo --kind audit "Audit ADR-007 decision implementation" +``` + +### Command (shell command execution) + +``` +sf schedule add --in 30m --kind command "Reminder: run integration tests" +# Note: kind=command requires payload.command field — use the CLI directly +# to set kind=reminder for simple reminders +``` + +### Global Scope (across all projects) + +``` +sf schedule add --in 1w --scope global "Review all open milestones" +``` diff --git a/packages/pi-coding-agent/src/core/extensions/runner.test.ts b/packages/pi-coding-agent/src/core/extensions/runner.test.ts index e50b34c4e..c76846646 100644 --- a/packages/pi-coding-agent/src/core/extensions/runner.test.ts +++ b/packages/pi-coding-agent/src/core/extensions/runner.test.ts @@ -94,3 +94,53 @@ describe("ExtensionRunner.emitToolCall", () => { } }); }); + +describe("ExtensionRunner UI compatibility", () => { + it("set_widget_when_host_widget_method_throws_does_not_fail_extension_event", async () => { + const dir = mkdtempSync(join(tmpdir(), "runner-widget-test-")); + try { + const sessionManager = SessionManager.create(dir, dir); + const authStorage = AuthStorage.create(); + const modelRegistry = new ModelRegistry( + authStorage, + join(dir, "models.json"), + ); + const handlers = new Map(); + handlers.set("session_start", [ + async (_event: unknown, ctx: any) => { + ctx.ui.setWidget("sf-progress", ["ready"], { + placement: "belowEditor", + }); + }, + ]); + const extension = { + path: "/test/widget-ext", + handlers, + commands: [], + shortcuts: [], + diagnostics: [], + } as unknown as Extension; + const runner = new ExtensionRunner( + [extension], + makeMinimalRuntime(), + dir, + sessionManager, + modelRegistry, + ); + const errors: any[] = []; + runner.onError((err) => errors.push(err)); + runner.setUIContext({ + ...runner.getUIContext(), + setWidget: () => { + throw new TypeError("host.setExtensionWidget is not a function"); + }, + }); + + await runner.emit({ type: "session_start" }); + + assert.deepEqual(errors, []); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/pi-coding-agent/src/core/extensions/runner.ts b/packages/pi-coding-agent/src/core/extensions/runner.ts index 641759726..9f463ac75 100644 --- a/packages/pi-coding-agent/src/core/extensions/runner.ts +++ b/packages/pi-coding-agent/src/core/extensions/runner.ts @@ -209,6 +209,22 @@ const noOpUIContext: ExtensionUIContext = { setToolsExpanded: () => {}, }; +function wrapExtensionUIContext( + uiContext: ExtensionUIContext, +): ExtensionUIContext { + return { + ...uiContext, + setWidget: (key, content, options) => { + try { + uiContext.setWidget(key, content as never, options); + } catch { + // Extension widgets are optional UI sugar. Older or embedded hosts can + // expose a stale setWidget shim; never let that break extension hooks. + } + }, + }; +} + export class ExtensionRunner { private extensions: Extension[]; private runtime: ExtensionRuntime; @@ -339,7 +355,9 @@ export class ExtensionRunner { } setUIContext(uiContext?: ExtensionUIContext): void { - this.uiContext = uiContext ?? noOpUIContext; + this.uiContext = uiContext + ? wrapExtensionUIContext(uiContext) + : noOpUIContext; } getUIContext(): ExtensionUIContext { diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/extension-ui-controller.test.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/extension-ui-controller.test.ts index 4b06c5e83..91fac9cac 100644 --- a/packages/pi-coding-agent/src/modes/interactive/controllers/extension-ui-controller.test.ts +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/extension-ui-controller.test.ts @@ -67,6 +67,22 @@ test("set_widget_when_host_supports_widgets_uses_dedicated_handler", () => { assert.deepEqual(calls, [["sf-notifications", content, options]]); }); +test("set_widget_when_widget_host_throws_falls_back_without_extension_error", () => { + const statuses: unknown[][] = []; + const ui = createExtensionUIContext({ + setExtensionWidget() { + throw new TypeError("host.setExtensionWidget is not a function"); + }, + showStatus(message: string, options?: unknown) { + statuses.push([message, options]); + }, + }); + + ui.setWidget("sf-progress", ["Ready"], { placement: "belowEditor" }); + + assert.deepEqual(statuses, [["Ready", { append: false }]]); +}); + test("set_widget_when_widget_host_missing_routes_string_content_to_status", () => { const statuses: unknown[][] = []; const ui = createExtensionUIContext({ diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/extension-ui-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/extension-ui-controller.ts index f66e19780..eb4c58076 100644 --- a/packages/pi-coding-agent/src/modes/interactive/controllers/extension-ui-controller.ts +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/extension-ui-controller.ts @@ -51,8 +51,13 @@ function setWidgetHost( options?: ExtensionWidgetOptions, ): void { if (typeof host.setExtensionWidget === "function") { - host.setExtensionWidget(key, content, options); - return; + try { + host.setExtensionWidget(key, content, options); + return; + } catch { + // Widget rendering is optional. Embedded/stale hosts may expose an + // incompatible shim; degrade to status/render fallback below. + } } if (content === undefined) { diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts index 41076ce43..9830ea50d 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts @@ -252,7 +252,12 @@ export async function runRpcMode(session: AgentSession): Promise<never> { if (!embeddedInteractiveMode) { return; } - await apply(embeddedInteractiveMode.getExtensionUIContext()); + try { + await apply(embeddedInteractiveMode.getExtensionUIContext()); + } catch { + // Embedded UI replay is best-effort. A stale interactive host should not + // turn optional extension widgets into RPC-mode extension failures. + } }; const replayEmbeddedUiState = async ( @@ -262,10 +267,18 @@ export async function runRpcMode(session: AgentSession): Promise<never> { ui.setHeader(headerFactory); ui.setFooter(footerFactory); for (const [key, text] of statusState.entries()) { - ui.setStatus(key, text); + try { + ui.setStatus(key, text); + } catch { + // Best-effort UI replay. + } } for (const [key, widget] of widgetState.entries()) { - ui.setWidget(key, widget.content as any, widget.options); + try { + ui.setWidget(key, widget.content as any, widget.options); + } catch { + // Best-effort UI replay. + } } ui.setWorkingMessage(workingMessageState); if (titleState) { diff --git a/src/cli.ts b/src/cli.ts index 59fe96d0d..a372ad2d1 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -596,6 +596,28 @@ if (cliFlags.messages[0] === "autonomous") { ]); } +// `sf schedule ...` — first-class non-interactive schedule CLI. +// Keep this before the interactive/TUI path so commands like +// `sf schedule list --json | jq` never fall through to the TTY guard. +if (cliFlags.messages[0] === "schedule") { + const scheduleModulePath = "./resources/extensions/sf/commands-schedule.js"; + const { handleSchedule } = await import(scheduleModulePath); + const rawScheduleArgs = process.argv.slice(3).join(" "); + const output = (message: string, level = "info") => { + const stream = + level === "warning" || level === "error" + ? process.stderr + : process.stdout; + stream.write(message.endsWith("\n") ? message : `${message}\n`); + }; + await handleSchedule(rawScheduleArgs, { + ui: { + notify: output, + }, + }); + process.exit(0); +} + // Pi's tool bootstrap can mis-detect already-installed fd/rg on some systems // because spawnSync(..., ["--version"]) returns EPERM despite a zero exit code. // Provision local managed binaries first so Pi sees them without probing PATH. diff --git a/src/headless-query.ts b/src/headless-query.ts index 1f4f37219..42ac9630f 100644 --- a/src/headless-query.ts +++ b/src/headless-query.ts @@ -201,6 +201,31 @@ function numberField(value: unknown): number | null { return typeof value === "number" && Number.isFinite(value) ? value : null; } +function pidIsAlive(pid: unknown): boolean { + if (!Number.isInteger(pid) || Number(pid) <= 0) return false; + if (pid === process.pid) return true; + try { + process.kill(Number(pid), 0); + return true; + } catch (err) { + return (err as NodeJS.ErrnoException).code === "EPERM"; + } +} + +function queryHasLiveAutoLock(basePath: string): boolean { + const lockPath = join(resolveSfRootForQuery(basePath), "auto.lock"); + if (!existsSync(lockPath)) return false; + try { + const lock = JSON.parse(readFileSync(lockPath, "utf-8")) as Record< + string, + unknown + >; + return pidIsAlive(lock.pid); + } catch { + return false; + } +} + function inferQueryStatus( phase: string, record: Record<string, unknown>, @@ -296,6 +321,7 @@ function queryRuntimeDecision(input: { function readRuntimeUnitSummaries(basePath: string): RuntimeUnitSummary[] { const unitsDir = join(resolveSfRootForQuery(basePath), "runtime", "units"); if (!existsSync(unitsDir)) return []; + const hasLiveAutoLock = queryHasLiveAutoLock(basePath); const results: RuntimeUnitSummary[] = []; for (const file of readdirSync(unitsDir)) { if (!file.endsWith(".json")) continue; @@ -307,10 +333,10 @@ function readRuntimeUnitSummaries(basePath: string): RuntimeUnitSummary[] { const unitId = stringField(record.unitId); if (!unitType || !unitId) continue; const phase = stringField(record.phase, "dispatched"); - const status = stringField( - record.status, - inferQueryStatus(phase, record), - ); + let status = stringField(record.status, inferQueryStatus(phase, record)); + if (!hasLiveAutoLock && !QUERY_TERMINAL_STATUSES.has(status)) { + status = "stale"; + } const recoveryAttempts = numberField(record.recoveryAttempts) ?? 0; const retryCount = numberField(record.retryCount) ?? recoveryAttempts; const maxRetries = diff --git a/src/headless.ts b/src/headless.ts index f52b42ca4..932312f92 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -93,6 +93,35 @@ import { const HEADLESS_HEARTBEAT_INTERVAL_MS = 60_000; +async function runHeadlessTimeoutSolverEval(basePath: string): Promise<void> { + try { + const evalModulePath = + "./resources/extensions/sf/autonomous-solver-eval.js"; + const { runAutomaticAutonomousSolverEval } = await import(evalModulePath); + const result = await runAutomaticAutonomousSolverEval({ + basePath, + reason: "headless-autonomous-timeout", + }); + if (result?.ok && result.report?.dbRecorded) { + process.stderr.write( + `[headless] Autonomous solver eval recorded after timeout: ${result.report.reportPath}\n`, + ); + } else if (result?.ok && result.report) { + process.stderr.write( + `[headless] Autonomous solver eval wrote ${result.report.reportPath}, but DB evidence was not recorded.\n`, + ); + } else if (!result?.skipped) { + process.stderr.write( + `[headless] Autonomous solver eval after timeout failed: ${result?.error ?? "unknown error"}\n`, + ); + } + } catch (err) { + process.stderr.write( + `[headless] Autonomous solver eval after timeout failed: ${err instanceof Error ? err.message : String(err)}\n`, + ); + } +} + // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- @@ -1746,6 +1775,10 @@ async function runHeadlessOnce( await client.stop(); + if (isAutoMode && timedOut) { + await runHeadlessTimeoutSolverEval(process.cwd()); + } + // Summary const duration = ((Date.now() - startTime) / 1000).toFixed(1); const status = blocked diff --git a/src/help-text.ts b/src/help-text.ts index f1c8bb038..f046cea78 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -168,6 +168,37 @@ const SUBCOMMAND_HELP: Record<string, string> = { "See docs/plans/README.md, docs/adr/README.md, and docs/specs/README.md for conventions.", ].join("\n"), + schedule: [ + "Usage: sf schedule <command> [args]", + "", + "Manage time-bound reminders and deferred work items.", + "Entries are stored as append-only JSONL in .sf/schedule.jsonl (project)", + "or ~/.sf/schedule.jsonl (global). No daemon required — items surface on pull.", + "", + "Commands:", + " add --in <duration> [--kind <kind>] [--scope <scope>] <title>", + " --at <ISO-date> Schedule by absolute date instead", + " list [--due] [--all] [--json] [--scope <scope>]", + " done <id> Mark a scheduled item as done", + " cancel <id> Cancel a scheduled item", + " snooze <id> --by <duration> Postpone by relative time", + " run <id> Execute a scheduled item (show reminder or run command)", + "", + "Duration format: <number><unit> where unit is w(weeks), d(days), h(hours), m(minutes).", + " e.g. 30m, 4h, 2d, 1w", + "", + "Scope: project (default, stored in .sf/schedule.jsonl) or global (~/.sf/schedule.jsonl).", + "", + "Kinds: reminder, milestone_check, review_due, review, audit, recurring, command", + "", + "Examples:", + ' sf schedule add --in 2w "Review feature adoption metrics"', + ' sf schedule add --at 2026-06-01T09:00:00Z --kind audit "Audit ADR-007"', + " sf schedule list --due", + " sf schedule snooze 01ARZ3ND --by 1d", + " sf schedule done 01ARZ3ND", + ].join("\n"), + headless: [ "Usage: sf headless [flags] [command] [args...]", "", @@ -297,6 +328,9 @@ export function printHelp(version: string): void { process.stdout.write( " plan <cmd> Manage SF planning artifacts (promote, list, diff)\n", ); + process.stdout.write( + " schedule <cmd> Manage time-bound reminders (add, list, done, cancel, snooze, run)\n", + ); process.stdout.write( "\nRun sf <subcommand> --help for subcommand-specific help.\n", ); diff --git a/src/resources/extensions/sf/auto-prompts.js b/src/resources/extensions/sf/auto-prompts.js index 30d305432..91e111d68 100644 --- a/src/resources/extensions/sf/auto-prompts.js +++ b/src/resources/extensions/sf/auto-prompts.js @@ -75,6 +75,58 @@ import { logWarning } from "./workflow-logger.js"; * from their configured executor window. */ const MAX_PREAMBLE_CHARS = 30_000; + +function formatTaskLedgerFiles(task) { + const files = [...(task.key_files ?? []), ...(task.files ?? [])] + .map((entry) => String(entry).trim()) + .filter(Boolean); + const unique = [...new Set(files)].slice(0, 4); + return unique.length > 0 ? unique.join(", ") : "(none recorded)"; +} + +function escapeMarkdownTableCell(value) { + return String(value ?? "") + .replace(/\|/g, "\\|") + .replace(/\n/g, " "); +} + +function buildCompleteSliceControlBlock(mid, sid, base) { + try { + if (!isDbAvailable()) return ""; + const tasks = getSliceTasks(mid, sid); + if (tasks.length === 0) return ""; + const sliceRel = relSlicePath(base, mid, sid); + const rows = tasks.map((task) => { + const summaryPath = `${sliceRel}/tasks/${task.id}-SUMMARY.md`; + const status = task.status || "pending"; + const verification = + task.verification_status || + task.verification_result || + "(not recorded)"; + const oneLiner = task.one_liner || task.title || "(untitled)"; + return `| ${escapeMarkdownTableCell(task.id)} | ${escapeMarkdownTableCell(status)} | ${escapeMarkdownTableCell(summaryPath)} | ${escapeMarkdownTableCell(oneLiner)} | ${escapeMarkdownTableCell(verification)} | ${escapeMarkdownTableCell(formatTaskLedgerFiles(task))} |`; + }); + return [ + "## Slice Closeout Control", + "", + "Use this DB task ledger as the authoritative closeout index. Do not rediscover task state from the roadmap or by scanning unrelated milestone directories.", + "", + "| Task | Status | Summary | Delivered / purpose | Verification | Key files |", + "|---|---|---|---|---|---|", + ...rows, + "", + "If every task row is `done`, `complete`, or `skipped`, verify the slice-level contract once and call `sf_slice_complete`. Do not reopen planning, do not re-run completed task work, and do not assume a missing roadmap checkbox means the tasks are incomplete.", + "If any task row is still pending or blocked, stop and report the exact task IDs instead of synthesizing new work.", + ].join("\n"); + } catch (err) { + logWarning( + "prompt", + `complete-slice task ledger failed: ${err instanceof Error ? err.message : String(err)}`, + ); + return ""; + } +} + // Module-scope budget cache: `loadEffectiveSFPreferences` does existsSync + // readFileSync on every call, which is expensive when `resolvePromptBudgets` // is called multiple times per prompt build (capPreamble + resolveSummaryBudgetChars). @@ -2326,6 +2378,7 @@ export async function buildCompleteSlicePrompt( const inlinedContext = capPreamble( `## Inlined Context (preloaded — do not re-read these files)\n\n${finalBody}`, ); + const closeoutControl = buildCompleteSliceControlBlock(mid, sid, base); const roadmapRel = relMilestoneFile(base, mid, "ROADMAP"); const sliceRel = relSlicePath(base, mid, sid); const sliceSummaryPath = join(base, `${sliceRel}/${sid}-SUMMARY.md`); @@ -2349,6 +2402,7 @@ export async function buildCompleteSlicePrompt( sliceTitle: sTitle, slicePath: sliceRel, roadmapPath: join(base, roadmapRel), + closeoutControl, inlinedContext, sliceSummaryPath, sliceUatPath, diff --git a/src/resources/extensions/sf/auto-start.js b/src/resources/extensions/sf/auto-start.js index 134e3d173..551e338d6 100644 --- a/src/resources/extensions/sf/auto-start.js +++ b/src/resources/extensions/sf/auto-start.js @@ -90,7 +90,10 @@ import { getMilestone, isDbAvailable, openDatabase } from "./sf-db.js"; import { snapshotSkills } from "./skill-discovery.js"; import { deriveState, isGhostMilestone } from "./state.js"; import { isClosedStatus } from "./status-guards.js"; -import { reconcileStaleCompleteSliceRecords } from "./unit-runtime.js"; +import { + reconcileDurableCompleteUnitRuntimeRecords, + reconcileStaleCompleteSliceRecords, +} from "./unit-runtime.js"; import { logError, logWarning } from "./workflow-logger.js"; function safeSetWidget(ctx, key, content, options) { @@ -535,6 +538,22 @@ export async function bootstrapAutoSession( // Open the project-root DB before deriveState so DB-backed state // derivation (queue-order, task status) works on a cold start (#2841). await openProjectDbIfPresent(base); + try { + const reconciled = await reconcileDurableCompleteUnitRuntimeRecords(base); + if (reconciled.cleared > 0) { + debugLog("bootstrap", { + phase: "durable-complete-runtime-reconciled", + cleared: reconciled.cleared, + units: reconciled.details, + }); + } + } catch (err) { + // Non-fatal — defensive cleanup, never block bootstrap + logWarning( + "bootstrap", + `durable complete runtime reconciliation failed: ${err instanceof Error ? err.message : String(err)}`, + ); + } // ── Orphaned milestone branch audit ── // Catches completed milestones whose teardown (merge + branch delete) // was lost due to session ending between completion and teardown. diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index 2e1f94de3..5dc54662f 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -206,12 +206,13 @@ export function registerHooks(pi, ecosystemHandlers = []) { } } loadToolApiKeys(); - // Flow audit is read-only by default: surface stale dispatched units, - // missing session pointers, runaway history, and optional child hangs at - // startup before another auto unit compounds the same milestone failure. + // Flow audit cleans over-budget optional child processes automatically and + // only warns for real blockers such as stale dispatch or recent errors. try { const { runFlowAudit } = await import("../doctor.js"); - const flow = await runFlowAudit(process.cwd()); + const flow = await runFlowAudit(process.cwd(), { + killOverBudgetChildren: true, + }); if (!flow.ok) { ctx.ui?.notify?.(`Flow audit: ${flow.recommendedAction}`, "warning"); } diff --git a/src/resources/extensions/sf/doctor-runtime-checks.js b/src/resources/extensions/sf/doctor-runtime-checks.js index 131607bdd..19ddd34b7 100644 --- a/src/resources/extensions/sf/doctor-runtime-checks.js +++ b/src/resources/extensions/sf/doctor-runtime-checks.js @@ -35,6 +35,29 @@ import { } from "./session-status-io.js"; import { deriveState } from "./state.js"; import { getAuditEmitFailureCount } from "./workflow-logger.js"; + +const ACTIVE_UNIT_RUNTIME_STATUSES = new Set([ + "queued", + "running", + "progress", + "repair", +]); +const ACTIVE_UNIT_RUNTIME_PHASES = new Set([ + "dispatched", + "running", + "progress", + "repair-dispatched", + "runaway-warning-sent", + "runaway-final-warning-sent", +]); + +function isActiveUnitRuntimeRecord(record) { + return ( + ACTIVE_UNIT_RUNTIME_STATUSES.has(String(record?.status ?? "")) || + ACTIVE_UNIT_RUNTIME_PHASES.has(String(record?.phase ?? "")) + ); +} + export async function checkRuntimeHealth( basePath, issues, @@ -42,18 +65,20 @@ export async function checkRuntimeHealth( shouldFix, ) { const root = sfRoot(basePath); + let crashLock = null; + let crashLockAlive = false; // ── Stale crash lock ────────────────────────────────────────────────── try { - const lock = readCrashLock(basePath); - if (lock) { - const alive = isLockProcessAlive(lock); - if (!alive) { + crashLock = readCrashLock(basePath); + if (crashLock) { + crashLockAlive = isLockProcessAlive(crashLock); + if (!crashLockAlive) { issues.push({ severity: "error", code: "stale_crash_lock", scope: "project", unitId: "project", - message: `Stale auto.lock from PID ${lock.pid} (started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`, + message: `Stale auto.lock from PID ${crashLock.pid} (started ${crashLock.startedAt}, was executing ${crashLock.unitType} ${crashLock.unitId}) — process is no longer running`, file: ".sf/auto.lock", fixable: true, }); @@ -66,6 +91,49 @@ export async function checkRuntimeHealth( } catch { // Non-fatal — crash lock check failed } + // ── Stale active unit runtime records ───────────────────────────────── + // auto.lock is the ownership proof for active unit records. If it is absent + // or points at a dead PID, dispatched/runtime-active unit files are stale + // leftovers and make query/dispatch believe old units are still claimed. + try { + const unitsDir = join(root, "runtime", "units"); + if (!crashLockAlive && existsSync(unitsDir)) { + const staleFiles = []; + for (const file of readdirSync(unitsDir)) { + if (!file.endsWith(".json")) continue; + const abs = join(unitsDir, file); + try { + const record = JSON.parse(readFileSync(abs, "utf8")); + if (isActiveUnitRuntimeRecord(record)) { + staleFiles.push({ file, abs, record }); + } + } catch { + // Malformed runtime unit records are handled by other checks. + } + } + if (staleFiles.length > 0) { + issues.push({ + severity: "error", + code: "stale_active_unit_runtime", + scope: "project", + unitId: "project", + message: `${staleFiles.length} active unit runtime record(s) exist without a live auto.lock owner. These stale claims can block autonomous dispatch.`, + file: ".sf/runtime/units", + fixable: true, + }); + if (shouldFix("stale_active_unit_runtime")) { + for (const stale of staleFiles) { + rmSync(stale.abs, { force: true }); + } + fixesApplied.push( + `cleared ${staleFiles.length} stale active unit runtime record(s)`, + ); + } + } + } + } catch { + // Non-fatal — runtime unit cleanup should not block doctor. + } // ── Stranded lock directory ──────────────────────────────────────────── // proper-lockfile creates a `.sf.lock/` directory as the OS-level lock // mechanism. If the process was SIGKILLed or crashed hard, this directory diff --git a/src/resources/extensions/sf/doctor.js b/src/resources/extensions/sf/doctor.js index 53f47d2bb..d2919f4b8 100644 --- a/src/resources/extensions/sf/doctor.js +++ b/src/resources/extensions/sf/doctor.js @@ -1,4 +1,5 @@ import { + copyFileSync, existsSync, lstatSync, mkdirSync, @@ -56,6 +57,7 @@ import { parseUnitId } from "./unit-id.js"; // ─── Flow Audit Implementation ──────────────────────────────────────────── const DEFAULT_STALE_PROGRESS_MS = 20 * 60 * 1000; const DEFAULT_OPTIONAL_CHILD_BUDGET_MS = 30 * 60 * 1000; +const DEFAULT_RECENT_ERROR_MAX_AGE_MS = 30 * 60 * 1000; const REPEATED_FAILURE_THRESHOLD = 3; const FLOW_AUDIT_ROLLUP_KIND = "flow-audit:repeated-milestone-failure"; const LEGACY_ROOT_HARNESS_PATHS = [ @@ -411,10 +413,20 @@ async function readPsRows(options) { return []; } } -function classifyProcess(row) { +function classifyProcess(row, rows = []) { const cmd = row.cmd.toLowerCase(); if (cmd.includes("sift") || cmd.includes("warmup")) return "warmup"; if (row.ppid === 1 && cmd.includes("next-server")) return "orphan"; + const parent = rows.find((candidate) => candidate.pid === row.ppid); + const parentCmd = parent?.cmd?.toLowerCase?.() ?? ""; + if ( + cmd.trim() === "sf" && + (parentCmd.includes("next-server") || + parentCmd.includes("vite") || + parentCmd.includes("turbopack")) + ) { + return "orphan"; + } if ( cmd.includes("next-server") || cmd.includes("vite") || @@ -446,9 +458,17 @@ function shouldIncludeProcess(row, classification, activePid) { if (activePid === undefined) return false; return row.pid === activePid || row.ppid === activePid; } -function readRecentErrors(runtimeRoot) { +function parseNotificationEpochMs(entry) { + const value = entry.ts ?? entry.timestamp ?? entry.time; + if (typeof value !== "string") return null; + const parsed = Date.parse(value); + return Number.isFinite(parsed) ? parsed : null; +} +function readRecentErrors(runtimeRoot, options = {}) { const notificationsPath = join(runtimeRoot, "notifications.jsonl"); if (!existsSync(notificationsPath)) return []; + const nowMs = options.nowMs ?? Date.now(); + const maxAgeMs = options.maxAgeMs ?? DEFAULT_RECENT_ERROR_MAX_AGE_MS; const errors = []; try { const lines = readFileSync(notificationsPath, "utf8") @@ -458,6 +478,19 @@ function readRecentErrors(runtimeRoot) { try { const entry = JSON.parse(line); const message = entry.message ?? entry.text ?? ""; + if ( + typeof message === "string" && + message.startsWith("Flow audit: Review recent errors") + ) { + continue; + } + const entryMs = parseNotificationEpochMs(entry); + if ( + entryMs !== null && + (nowMs - entryMs < 0 || nowMs - entryMs > maxAgeMs) + ) { + continue; + } if ( entry.severity === "error" || message.toLowerCase().includes("error") || @@ -608,7 +641,7 @@ function chooseRecommendedAction(args) { return `Inspect session${session} for ${unit.unitType} ${unit.unitId}; if no new output exists, stop/requeue the stale dispatched unit before continuing.`; } const overBudgetOptional = args.childProcesses.find( - (p) => p.nonBlocking && p.overBudget, + (p) => p.nonBlocking && p.overBudget && !p.killed, ); if (overBudgetOptional) { return `Optional ${overBudgetOptional.classification} child pid ${overBudgetOptional.pid} is over budget; it is non-blocking, or rerun with --kill-children to terminate it.`; @@ -640,7 +673,10 @@ export async function runFlowAudit(basePath, options = {}) { const warnings = []; const recommendations = []; const childProcesses = []; - const lastErrors = readRecentErrors(runtimeRoot); + const lastErrors = readRecentErrors(runtimeRoot, { + nowMs, + maxAgeMs: options.recentErrorMaxAgeMs ?? DEFAULT_RECENT_ERROR_MAX_AGE_MS, + }); const staleDispatchedUnits = []; let sessionPointer; let activeMilestone; @@ -779,7 +815,7 @@ export async function runFlowAudit(basePath, options = {}) { } const psRows = await readPsRows(options); for (const row of psRows) { - const classification = classifyProcess(row); + const classification = classifyProcess(row, psRows); if (!shouldIncludeProcess(row, classification, activePid)) continue; const nonBlocking = isOptionalChild(classification); const overBudget = @@ -790,9 +826,6 @@ export async function runFlowAudit(basePath, options = {}) { let killed = false; let killError; if (overBudget) { - warnings.push( - `${classification} child pid ${row.pid} is over budget (${minutes(row.ageMs ?? 0)} minutes).`, - ); if (options.killOverBudgetChildren) { action = "kill"; try { @@ -805,6 +838,10 @@ export async function runFlowAudit(basePath, options = {}) { `Failed to kill over-budget ${classification} child pid ${row.pid}: ${killError}`, ); } + } else { + warnings.push( + `${classification} child pid ${row.pid} is over budget (${minutes(row.ageMs ?? 0)} minutes).`, + ); } } childProcesses.push({ @@ -1705,6 +1742,62 @@ export async function runSFDoctor(basePath, options) { } catch { /* non-fatal */ } + // ── Single-task DB/disk ID drift ─────────────────────────────────── + // A killed plan-slice can leave DB state pointing at the intended task ID + // while the task plan exists under a generated ordinal ID. In that state + // dispatch conservatively re-runs plan-slice forever because the active + // task's PLAN file is missing. If there is exactly one DB task and one + // task PLAN file, copying the orphan plan to the DB task ID is safe and + // preserves the original file for audit. + try { + if (tasksDir && plan.tasks.length === 1) { + const task = plan.tasks[0]; + const expectedPlanPath = resolveTaskFile( + basePath, + milestoneId, + slice.id, + task.id, + "PLAN", + ); + const hasExpectedPlan = !!( + expectedPlanPath && existsSync(expectedPlanPath) + ); + if (!hasExpectedPlan) { + const planFiles = readdirSync(tasksDir).filter((f) => + f.endsWith("-PLAN.md"), + ); + if (planFiles.length === 1) { + const sourceFile = planFiles[0]; + const sourceTaskId = sourceFile.replace(/-PLAN\.md$/, ""); + const sourceAbs = join(tasksDir, sourceFile); + const targetAbs = join(tasksDir, `${task.id}-PLAN.md`); + issues.push({ + severity: "error", + code: "task_plan_id_drift", + scope: "task", + unitId: `${unitId}/${task.id}`, + message: `Task ${task.id} is active in DB, but the only task plan on disk is ${sourceFile}. This makes autonomous redispatch plan-slice instead of execute-task.`, + file: relTaskFile( + basePath, + milestoneId, + slice.id, + task.id, + "PLAN", + ), + fixable: true, + }); + if (shouldFix("task_plan_id_drift")) { + copyFileSync(sourceAbs, targetAbs); + fixesApplied.push( + `copied ${sourceTaskId}-PLAN.md to ${task.id}-PLAN.md for ${unitId}`, + ); + } + } + } + } + } catch { + /* non-fatal */ + } let allTasksDone = plan.tasks.length > 0; for (const task of plan.tasks) { const taskUnitId = `${unitId}/${task.id}`; diff --git a/src/resources/extensions/sf/prompts/complete-slice.md b/src/resources/extensions/sf/prompts/complete-slice.md index 101f005d4..d3228ab98 100644 --- a/src/resources/extensions/sf/prompts/complete-slice.md +++ b/src/resources/extensions/sf/prompts/complete-slice.md @@ -14,6 +14,8 @@ Write the summary for those downstream readers. What did this slice actually del All relevant context has been preloaded below — the slice plan, all task summaries, and the milestone roadmap are inlined. Start working immediately without re-reading these files. +{{closeoutControl}} + {{inlinedContext}} {{gatesToClose}} diff --git a/src/resources/extensions/sf/schedule/schedule-types.js b/src/resources/extensions/sf/schedule/schedule-types.js index 1e595b390..452525254 100644 --- a/src/resources/extensions/sf/schedule/schedule-types.js +++ b/src/resources/extensions/sf/schedule/schedule-types.js @@ -99,6 +99,7 @@ export const VALID_KINDS = new Set([ "recurring", "review", "audit", + "command", ]); /** diff --git a/src/resources/extensions/sf/tests/auto-prompts-complete-slice.test.mjs b/src/resources/extensions/sf/tests/auto-prompts-complete-slice.test.mjs new file mode 100644 index 000000000..1d7d84cf1 --- /dev/null +++ b/src/resources/extensions/sf/tests/auto-prompts-complete-slice.test.mjs @@ -0,0 +1,115 @@ +/** + * auto-prompts-complete-slice.test.mjs - complete-slice prompt contracts. + * + * Purpose: prove slice closeout receives structured task state so autonomous + * agents verify and close completed slices instead of rediscovering task status. + */ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, expect, test } from "vitest"; +import { buildCompleteSlicePrompt } from "../auto-prompts.js"; +import { + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + openDatabase, +} from "../sf-db.js"; + +let tempDirs = []; + +function makeProject() { + const dir = mkdtempSync(join(tmpdir(), "sf-complete-slice-prompt-")); + tempDirs.push(dir); + mkdirSync(join(dir, ".sf", "milestones", "M900", "slices", "S01", "tasks"), { + recursive: true, + }); + return dir; +} + +afterEach(() => { + closeDatabase(); + for (const dir of tempDirs) { + rmSync(dir, { recursive: true, force: true }); + } + tempDirs = []; +}); + +describe("complete-slice prompt", () => { + test("complete_slice_prompt_includes_db_task_ledger_for_closeout", async () => { + const base = makeProject(); + openDatabase(join(base, ".sf", "sf.db")); + insertMilestone({ + id: "M900", + title: "Closeout prompt", + status: "active", + planning: { + vision: "Close completed slices without state rediscovery.", + successCriteria: ["Completed task rows drive slice closeout."], + }, + }); + insertSlice({ + milestoneId: "M900", + id: "S01", + title: "Already built docs", + status: "active", + risk: "low", + depends: [], + demo: "Docs are present and verified.", + sequence: 1, + }); + insertTask({ + milestoneId: "M900", + sliceId: "S01", + id: "T01", + title: "Write docs", + status: "done", + oneLiner: "Schedule docs exist.", + verificationResult: "node dist/loader.js schedule --help", + verificationStatus: "passed", + keyFiles: ["docs/specs/sf-schedule.md"], + fullSummaryMd: "Docs written and help verified.", + sequence: 1, + }); + writeFileSync( + join(base, ".sf", "milestones", "M900", "M900-ROADMAP.md"), + "# M900: Closeout prompt\n\n## S01: Already built docs\n", + ); + writeFileSync( + join(base, ".sf", "milestones", "M900", "slices", "S01", "S01-PLAN.md"), + "# S01: Already built docs\n\n## Verification\n\n- schedule help passes\n", + ); + writeFileSync( + join( + base, + ".sf", + "milestones", + "M900", + "slices", + "S01", + "tasks", + "T01-SUMMARY.md", + ), + "# T01 Summary\n\nSchedule docs exist.\n", + ); + + const prompt = await buildCompleteSlicePrompt( + "M900", + "Closeout prompt", + "S01", + "Already built docs", + base, + "minimal", + ); + + expect(prompt).toContain("## Slice Closeout Control"); + expect(prompt).toContain( + "| T01 | done | .sf/milestones/M900/slices/S01/tasks/T01-SUMMARY.md | Schedule docs exist. | passed | docs/specs/sf-schedule.md |", + ); + expect(prompt).toContain( + "verify the slice-level contract once and call `sf_slice_complete`", + ); + expect(prompt).toContain("Do not reopen planning"); + }); +}); diff --git a/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs b/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs new file mode 100644 index 000000000..080699ad0 --- /dev/null +++ b/src/resources/extensions/sf/tests/doctor-flow-audit-auto-cleanup.test.mjs @@ -0,0 +1,177 @@ +/** + * doctor-flow-audit-auto-cleanup.test.mjs - optional child cleanup. + * + * Purpose: prove startup flow audit can clean stale optional child processes + * without surfacing non-actionable warnings to the operator. + */ +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, test } from "vitest"; +import { runFlowAudit } from "../doctor.js"; + +const tmpDirs = []; + +afterEach(() => { + while (tmpDirs.length > 0) { + const dir = tmpDirs.pop(); + if (dir) rmSync(dir, { recursive: true, force: true }); + } +}); + +function makeProject() { + const dir = mkdtempSync(join(tmpdir(), "sf-flow-audit-cleanup-")); + tmpDirs.push(dir); + mkdirSync(join(dir, ".sf"), { recursive: true }); + return dir; +} + +describe("flow audit optional child cleanup", () => { + test("runFlowAudit_when_optional_orphan_is_over_budget_and_cleanup_enabled_kills_without_warning", async () => { + const project = makeProject(); + const killed = []; + const report = await runFlowAudit(project, { + nowMs: Date.parse("2026-05-06T12:00:00.000Z"), + optionalChildBudgetMs: 30 * 60 * 1000, + killOverBudgetChildren: true, + killProcess(pid) { + killed.push(pid); + }, + psOutput: [ + " 12345 1 7200 next-server (v16.2.3)", + " 23456 12345 7200 sf", + ].join("\n"), + }); + + assert.deepEqual(killed, [12345, 23456]); + assert.equal(report.ok, true); + assert.deepEqual(report.warnings, []); + assert.equal(report.recommendedAction, "No flow-auditor action needed."); + assert.deepEqual( + report.childProcesses.map((p) => ({ + pid: p.pid, + classification: p.classification, + overBudget: p.overBudget, + action: p.action, + killed: p.killed, + })), + [ + { + pid: 12345, + classification: "orphan", + overBudget: true, + action: "kill", + killed: true, + }, + { + pid: 23456, + classification: "orphan", + overBudget: true, + action: "kill", + killed: true, + }, + ], + ); + }); + + test("runFlowAudit_when_bare_sf_child_of_web_host_is_over_budget_cleans_it", async () => { + const project = makeProject(); + const killed = []; + const report = await runFlowAudit(project, { + nowMs: Date.parse("2026-05-06T12:00:00.000Z"), + optionalChildBudgetMs: 30 * 60 * 1000, + killOverBudgetChildren: true, + killProcess(pid) { + killed.push(pid); + }, + psOutput: [ + " 30001 1 7200 next-server (v1)", + " 30002 30001 7200 sf", + ].join("\n"), + }); + + assert.deepEqual(killed, [30001, 30002]); + assert.equal(report.ok, true); + assert.deepEqual(report.warnings, []); + assert.deepEqual( + report.childProcesses.map((p) => ({ + pid: p.pid, + classification: p.classification, + action: p.action, + killed: p.killed, + })), + [ + { + pid: 30001, + classification: "orphan", + action: "kill", + killed: true, + }, + { + pid: 30002, + classification: "orphan", + action: "kill", + killed: true, + }, + ], + ); + }); + + test("runFlowAudit_when_only_old_errors_exist_does_not_warn_before_dispatch", async () => { + const project = makeProject(); + writeFileSync( + join(project, ".sf", "notifications.jsonl"), + [ + JSON.stringify({ + ts: "2026-05-06T10:00:00.000Z", + severity: "error", + message: "Auto-mode paused: old provider failure", + }), + JSON.stringify({ + ts: "2026-05-06T10:45:00.000Z", + severity: "warning", + message: + "Flow audit: Review recent errors before dispatching another unit.", + }), + ].join("\n") + "\n", + "utf-8", + ); + + const report = await runFlowAudit(project, { + nowMs: Date.parse("2026-05-06T12:00:00.000Z"), + psOutput: "", + }); + + assert.equal(report.ok, true); + assert.deepEqual(report.lastErrors, []); + assert.equal(report.recommendedAction, "No flow-auditor action needed."); + }); + + test("runFlowAudit_when_recent_error_exists_warns_before_dispatch", async () => { + const project = makeProject(); + writeFileSync( + join(project, ".sf", "notifications.jsonl"), + JSON.stringify({ + ts: "2026-05-06T11:45:00.000Z", + severity: "error", + message: "Auto-mode paused: recent provider failure", + }) + "\n", + "utf-8", + ); + + const report = await runFlowAudit(project, { + nowMs: Date.parse("2026-05-06T12:00:00.000Z"), + psOutput: "", + }); + + assert.equal(report.ok, false); + assert.deepEqual(report.lastErrors, [ + "Auto-mode paused: recent provider failure", + ]); + assert.equal( + report.recommendedAction, + "Review recent errors before dispatching another unit.", + ); + }); +}); diff --git a/src/resources/extensions/sf/tests/doctor-runtime-stale-units.test.mjs b/src/resources/extensions/sf/tests/doctor-runtime-stale-units.test.mjs new file mode 100644 index 000000000..20143fdcb --- /dev/null +++ b/src/resources/extensions/sf/tests/doctor-runtime-stale-units.test.mjs @@ -0,0 +1,80 @@ +/** + * doctor-runtime-stale-units.test.mjs - stale autonomous unit cleanup. + * + * Purpose: prove doctor clears active runtime unit claims when no live auto.lock + * owner exists, so autonomous dispatch is not blocked by dead headless sessions. + */ +import assert from "node:assert/strict"; +import { + existsSync, + mkdirSync, + mkdtempSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, test } from "vitest"; +import { runSFDoctor } from "../doctor.js"; + +const tmpDirs = []; + +afterEach(() => { + while (tmpDirs.length > 0) { + const dir = tmpDirs.pop(); + if (dir) rmSync(dir, { recursive: true, force: true }); + } +}); + +function makeProject() { + const dir = mkdtempSync(join(tmpdir(), "sf-doctor-stale-units-")); + tmpDirs.push(dir); + mkdirSync(join(dir, ".sf", "runtime", "units"), { recursive: true }); + return dir; +} + +describe("doctor stale unit runtime cleanup", () => { + test("runSFDoctor_fix_removes_active_unit_records_without_live_lock_owner", async () => { + const project = makeProject(); + const runtimePath = join( + project, + ".sf", + "runtime", + "units", + "plan-slice-M010-S07.json", + ); + writeFileSync( + runtimePath, + JSON.stringify( + { + version: 1, + unitType: "plan-slice", + unitId: "M010/S07", + phase: "dispatched", + status: "running", + startedAt: Date.now() - 60_000, + updatedAt: Date.now() - 60_000, + }, + null, + 2, + ), + ); + + const report = await runSFDoctor(project, { + fix: true, + fixLevel: "all", + scope: "project", + }); + + assert.equal( + report.issues.some((issue) => issue.code === "stale_active_unit_runtime"), + true, + ); + assert.equal(existsSync(runtimePath), false); + assert.ok( + report.fixesApplied.includes( + "cleared 1 stale active unit runtime record(s)", + ), + ); + }); +}); diff --git a/src/resources/extensions/sf/tests/doctor-task-plan-id-drift.test.mjs b/src/resources/extensions/sf/tests/doctor-task-plan-id-drift.test.mjs new file mode 100644 index 000000000..52d308758 --- /dev/null +++ b/src/resources/extensions/sf/tests/doctor-task-plan-id-drift.test.mjs @@ -0,0 +1,125 @@ +/** + * doctor-task-plan-id-drift.test.mjs - task plan ID drift repair. + * + * Purpose: prove doctor repairs the single-task DB/disk mismatch that makes + * autonomous redispatch plan-slice instead of executing the active task. + */ +import assert from "node:assert/strict"; +import { + existsSync, + mkdirSync, + mkdtempSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, test } from "vitest"; +import { runSFDoctor } from "../doctor.js"; +import { + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + openDatabase, +} from "../sf-db.js"; + +const tmpDirs = []; + +afterEach(() => { + closeDatabase(); + while (tmpDirs.length > 0) { + const dir = tmpDirs.pop(); + if (dir) rmSync(dir, { recursive: true, force: true }); + } +}); + +function makeProject() { + const dir = mkdtempSync(join(tmpdir(), "sf-doctor-task-plan-drift-")); + tmpDirs.push(dir); + mkdirSync(join(dir, ".sf", "milestones", "M910", "slices", "S07", "tasks"), { + recursive: true, + }); + openDatabase(join(dir, ".sf", "sf.db")); + insertMilestone({ + id: "M910", + title: "Task plan drift", + status: "active", + planning: { + vision: "Repair task plan ID drift.", + successCriteria: ["Active task plan can be dispatched."], + }, + }); + insertSlice({ + milestoneId: "M910", + id: "S07", + title: "Tie back", + status: "active", + risk: "low", + depends: [], + demo: "Single task executes.", + sequence: 1, + }); + insertTask({ + milestoneId: "M910", + sliceId: "S07", + id: "T07.1", + title: "Replace backlog with schedule", + status: "pending", + sequence: 1, + }); + writeFileSync( + join(dir, ".sf", "milestones", "M910", "M910-ROADMAP.md"), + "# M910: Task plan drift\n\n## S07: Tie back\n", + ); + writeFileSync( + join(dir, ".sf", "milestones", "M910", "slices", "S07", "S07-PLAN.md"), + "# S07: Tie back\n\n## Tasks\n\n- [ ] T07.1: Replace backlog with schedule\n", + ); + writeFileSync( + join( + dir, + ".sf", + "milestones", + "M910", + "slices", + "S07", + "tasks", + "T01-PLAN.md", + ), + "# T07.1: Replace backlog with schedule\n", + ); + return dir; +} + +describe("doctor task plan ID drift", () => { + test("runSFDoctor_fix_copies_single_orphan_plan_to_active_db_task_id", async () => { + const project = makeProject(); + const expected = join( + project, + ".sf", + "milestones", + "M910", + "slices", + "S07", + "tasks", + "T07.1-PLAN.md", + ); + + const report = await runSFDoctor(project, { + fix: true, + fixLevel: "all", + }); + + assert.equal( + report.issues.some((issue) => issue.code === "task_plan_id_drift"), + true, + ); + assert.equal(existsSync(expected), true); + assert.ok( + report.fixesApplied.includes( + "copied T01-PLAN.md to T07.1-PLAN.md for M910/S07", + ), + ); + }); +}); diff --git a/src/resources/extensions/sf/tests/schedule-kinds.test.mjs b/src/resources/extensions/sf/tests/schedule-kinds.test.mjs index 81649031e..981327433 100644 --- a/src/resources/extensions/sf/tests/schedule-kinds.test.mjs +++ b/src/resources/extensions/sf/tests/schedule-kinds.test.mjs @@ -91,7 +91,7 @@ describe("schedule-kinds sync", () => { } }); - describe("VALID_KINDS has review and audit (SF schedule system kinds)", () => { + describe("VALID_KINDS has documented SF schedule system kinds", () => { it('has "review" kind', () => { assert.ok( VALID_KINDS.has("review"), @@ -105,5 +105,12 @@ describe("schedule-kinds sync", () => { "audit kind missing from VALID_KINDS", ); }); + + it('has "command" kind', () => { + assert.ok( + VALID_KINDS.has("command"), + "command kind missing from VALID_KINDS", + ); + }); }); }); diff --git a/src/resources/extensions/sf/tests/schedule-store.test.mjs b/src/resources/extensions/sf/tests/schedule-store.test.mjs index c28d1e24e..098dbe7b2 100644 --- a/src/resources/extensions/sf/tests/schedule-store.test.mjs +++ b/src/resources/extensions/sf/tests/schedule-store.test.mjs @@ -28,6 +28,7 @@ describe("schedule-types", () => { assert.equal(isValidKind("recurring"), true); assert.equal(isValidKind("review"), true); assert.equal(isValidKind("audit"), true); + assert.equal(isValidKind("command"), true); }); it("rejects unknown kinds", () => { diff --git a/src/resources/extensions/sf/unit-runtime.js b/src/resources/extensions/sf/unit-runtime.js index 0598b3245..8c1e85191 100644 --- a/src/resources/extensions/sf/unit-runtime.js +++ b/src/resources/extensions/sf/unit-runtime.js @@ -544,3 +544,78 @@ export function reconcileStaleCompleteSliceRecords(basePath) { } return { cleared, details }; } + +/** + * Clear runtime records whose durable artifacts already prove completion. + * + * Purpose: recover from crashes, process timeouts, or hard exits that happen + * after a unit wrote its durable completion artifacts but before the in-memory + * finalizer cleared `.sf/runtime/units/*.json`. + * + * Consumer: auto-mode bootstrap before dispatching the next autonomous unit. + */ +export async function reconcileDurableCompleteUnitRuntimeRecords(basePath) { + const dir = runtimeDir(basePath); + if (!existsSync(dir)) return { cleared: 0, details: [] }; + let cleared = 0; + const details = []; + for (const file of readdirSync(dir)) { + if (!file.endsWith(".json")) continue; + const abs = join(dir, file); + let record; + try { + record = JSON.parse(readFileSync(abs, "utf-8")); + } catch { + continue; + } + if (!record.unitType || !record.unitId) continue; + let durableComplete = false; + if (record.unitType === "execute-task") { + const status = await inspectExecuteTaskDurability( + basePath, + record.unitId, + ); + durableComplete = !!( + status?.summaryExists && + status.taskChecked && + status.nextActionAdvanced + ); + } else if (record.unitType === "complete-slice") { + const { milestone: mid, slice: sid } = parseUnitId(record.unitId); + if (mid && sid) { + let dbComplete = false; + if (isDbAvailable()) { + try { + const sliceRow = getSlice(mid, sid); + dbComplete = sliceRow?.status === "complete"; + } catch { + dbComplete = false; + } + } + const summaryPath = resolveSliceFile(basePath, mid, sid, "SUMMARY"); + let artifactValid = false; + if (summaryPath && existsSync(summaryPath)) { + try { + const content = readFileSync(summaryPath, "utf-8"); + const summary = parseSummary(content); + artifactValid = !!summary.frontmatter.completed_at; + } catch { + artifactValid = false; + } + } + durableComplete = dbComplete && artifactValid; + } + } + if (!durableComplete) continue; + try { + unlinkSync(abs); + _runtimeCache.delete(abs); + cleared++; + const state = getUnitRuntimeState(record); + details.push(`${record.unitType} ${record.unitId} (was ${state.status})`); + } catch { + // Non-fatal — leave the record for the next bootstrap/doctor pass. + } + } + return { cleared, details }; +} diff --git a/src/tests/schedule-cli-noninteractive.test.ts b/src/tests/schedule-cli-noninteractive.test.ts new file mode 100644 index 000000000..321ff94a3 --- /dev/null +++ b/src/tests/schedule-cli-noninteractive.test.ts @@ -0,0 +1,31 @@ +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { test } from "vitest"; + +test("cli.ts routes top-level schedule before interactive TUI", () => { + const cliSource = readFileSync(join(__dirname, "..", "cli.ts"), "utf-8"); + const scheduleBranch = cliSource.indexOf( + 'if (cliFlags.messages[0] === "schedule")', + ); + const interactiveMode = cliSource.indexOf("new InteractiveMode"); + + assert.notEqual(scheduleBranch, -1, "top-level schedule branch must exist"); + assert.notEqual( + interactiveMode, + -1, + "interactive mode construction must exist", + ); + assert.ok( + scheduleBranch < interactiveMode, + "sf schedule must route before the interactive TUI path", + ); + assert.ok( + cliSource.includes("handleSchedule"), + "top-level schedule branch must reuse the schedule handler", + ); + assert.ok( + cliSource.includes("process.argv.slice(3).join"), + "schedule branch must pass raw argv tail so command-specific flags survive top-level parsing", + ); +}); diff --git a/src/tests/unit-runtime-reconcile.test.ts b/src/tests/unit-runtime-reconcile.test.ts index be4789d9c..51dbbf849 100644 --- a/src/tests/unit-runtime-reconcile.test.ts +++ b/src/tests/unit-runtime-reconcile.test.ts @@ -4,6 +4,7 @@ import { join } from "node:path"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { readUnitRuntimeRecord, + reconcileDurableCompleteUnitRuntimeRecords, reconcileStaleCompleteSliceRecords, writeUnitRuntimeRecord, } from "../resources/extensions/sf/unit-runtime.js"; @@ -95,4 +96,57 @@ describe("reconcileStaleCompleteSliceRecords", () => { rmSync(emptyBase, { recursive: true, force: true }); } }); + + it("clears a stale execute-task record when durable task artifacts prove completion", async () => { + writeUnitRuntimeRecord( + basePath, + "execute-task", + "M001/S01/T01", + Date.now(), + { + status: "running", + }, + ); + + const sliceDir = join( + basePath, + ".sf", + "milestones", + "M001", + "slices", + "S01", + ); + const taskDir = join(sliceDir, "tasks"); + mkdirSync(taskDir, { recursive: true }); + writeFileSync( + join(sliceDir, "S01-PLAN.md"), + "# S01 Plan\n\n- [x] **T01:** Durable completion\n", + "utf-8", + ); + writeFileSync( + join(taskDir, "T01-PLAN.md"), + "# T01 Plan\n\n## Must-Haves\n\n- Durable summary\n", + "utf-8", + ); + writeFileSync( + join(taskDir, "T01-SUMMARY.md"), + "# T01 Summary\n\nDurable summary.\n", + "utf-8", + ); + writeFileSync( + join(basePath, ".sf", "STATE.md"), + "**Next Action:** Complete S01\n", + "utf-8", + ); + + const result = await reconcileDurableCompleteUnitRuntimeRecords(basePath); + + expect(result).toEqual({ + cleared: 1, + details: ["execute-task M001/S01/T01 (was running)"], + }); + expect( + readUnitRuntimeRecord(basePath, "execute-task", "M001/S01/T01"), + ).toBeNull(); + }); });