From 12e7333f1cb38fc7d6956858042354e3880f3362 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Fri, 1 May 2026 20:18:50 +0200 Subject: [PATCH] feat: stabilize autonomous workflow system --- .agents/skills/nix-build/SKILL.md | 26 +++ .agents/skills/smoke-test/SKILL.md | 17 ++ README.md | 47 ++-- docs/README.md | 4 +- docs/dev/json-contracts.md | 13 ++ docs/user-docs/auto-mode.md | 20 +- docs/user-docs/commands.md | 9 +- docs/user-docs/getting-started.md | 16 +- docs/user-docs/troubleshooting.md | 10 +- packages/daemon/src/orchestrator.ts | 4 +- packages/daemon/src/session-manager.test.ts | 6 +- packages/daemon/src/session-manager.ts | 6 +- packages/daemon/src/types.ts | 2 +- packages/mcp-server/README.md | 2 +- packages/mcp-server/src/mcp-server.test.ts | 8 +- packages/mcp-server/src/server.ts | 6 +- packages/mcp-server/src/session-manager.ts | 6 +- packages/mcp-server/src/types.ts | 2 +- packages/pi-agent-core/src/agent-loop.test.ts | 103 +++++++++ packages/pi-agent-core/src/agent-loop.ts | 8 +- packages/pi-ai/src/utils/event-stream.test.ts | 138 +++++++++++ ...agent-session-custom-message-queue.test.ts | 110 +++++++++ .../pi-coding-agent/src/core/agent-session.ts | 121 ++++++++-- .../src/core/keybindings-followup.test.ts | 12 + .../pi-coding-agent/src/core/keybindings.ts | 2 +- .../src/core/slash-commands.ts | 1 + .../__tests__/tool-execution.test.ts | 23 +- .../interactive/components/tool-execution.ts | 16 +- .../controllers/input-controller.test.ts | 20 ++ .../interactive/slash-command-handlers.ts | 5 + scripts/check-versioned-json.mjs | 115 ++++++---- scripts/check-versioned-json.test.mjs | 54 +++++ src/cli-web-branch.ts | 4 + src/cli.ts | 52 +++-- src/headless-ui.ts | 12 + src/headless.ts | 32 ++- src/help-text.ts | 39 ++-- src/resources/agents/scout.md | 8 +- src/resources/agents/worker.md | 2 +- .../claude-code-cli/stream-adapter.ts | 14 ++ .../search-the-web/native-search.ts | 20 +- src/resources/extensions/sf-tui/footer.ts | 29 ++- .../extensions/sf/auto-direct-dispatch.ts | 2 +- src/resources/extensions/sf/auto-post-unit.ts | 8 +- src/resources/extensions/sf/auto-start.ts | 4 +- .../extensions/sf/auto-timeout-recovery.ts | 2 +- src/resources/extensions/sf/auto.ts | 4 +- src/resources/extensions/sf/auto/loop.ts | 138 ++++++++++- src/resources/extensions/sf/auto/phases.ts | 87 +++++-- src/resources/extensions/sf/auto/session.ts | 6 + .../extensions/sf/bootstrap/db-tools.ts | 55 +++++ .../extensions/sf/bootstrap/register-hooks.ts | 26 +++ .../extensions/sf/code-intelligence.ts | 18 +- .../extensions/sf/commands-bootstrap.ts | 27 +-- .../extensions/sf/commands-handlers.ts | 35 ++- .../extensions/sf/commands-inspect.ts | 4 +- src/resources/extensions/sf/commands-logs.ts | 4 +- .../sf/commands-workflow-templates.ts | 132 +++++++---- .../extensions/sf/commands/catalog.ts | 33 +-- .../extensions/sf/commands/handlers/auto.ts | 18 +- .../extensions/sf/commands/handlers/core.ts | 14 +- .../sf/commands/handlers/workflow.ts | 5 +- src/resources/extensions/sf/crash-recovery.ts | 8 +- .../extensions/sf/custom-workflow-engine.ts | 158 ++++++++----- .../extensions/sf/dashboard-overlay.ts | 4 +- src/resources/extensions/sf/forensics.ts | 4 +- src/resources/extensions/sf/graph.ts | 39 ++++ src/resources/extensions/sf/guided-flow.ts | 2 +- .../sf/learning/data/model-benchmarks.json | 2 +- .../sf/learning/data/unit-weights.json | 2 +- src/resources/extensions/sf/memory-sleeper.ts | 139 +++++++++++ .../extensions/sf/prompts/discuss-headless.md | 4 +- .../extensions/sf/prompts/discuss.md | 2 +- .../sf/prompts/guided-discuss-milestone.md | 2 +- .../sf/prompts/guided-discuss-slice.md | 2 +- src/resources/extensions/sf/prompts/queue.md | 2 +- src/resources/extensions/sf/prompts/system.md | 5 +- src/resources/extensions/sf/run-manager.ts | 126 ++++++++-- .../sf/safety/evidence-collector.ts | 63 ++++- .../extensions/sf/skills/researcher/SKILL.md | 2 +- .../sf/slice-parallel-orchestrator.ts | 6 +- .../sf/tests/bundled-workflow-defs.test.ts | 62 +++++ .../sf/tests/commands-workflow-custom.test.ts | 179 ++++++++++++++- .../sf/tests/complete-slice.test.ts | 2 +- .../custom-engine-loop-integration.test.ts | 98 ++++++++ .../sf/tests/custom-workflow-engine.test.ts | 58 ++++- .../sf/tests/graph-operations.test.ts | 44 ++++ .../sf/tests/integration/git-service.test.ts | 65 +----- .../extensions/sf/tests/md-importer.test.ts | 4 +- .../sf/tests/memory-sleeper.test.ts | 113 +++++++++ .../extensions/sf/tests/memory-store.test.ts | 4 +- .../phases-merge-error-stops-auto.test.ts | 2 +- .../extensions/sf/tests/run-manager.test.ts | 60 ++++- .../extensions/sf/tests/sf-db.test.ts | 4 +- .../sf/tests/start-auto-detached.test.ts | 4 + .../extensions/sf/tests/tool-naming.test.ts | 44 ++++ .../sf/tests/update-command.test.ts | 30 +++ .../tests/workflow-template-compiler.test.ts | 72 ++++++ .../sf/tests/workflow-templates.test.ts | 117 +++++++++- .../sf/tests/workspace-index.test.ts | 4 +- .../extensions/sf/tools/plan-milestone.ts | 8 + .../sf/tools/workflow-tool-executors.ts | 11 +- .../sf/workflow-template-compiler.ts | 101 ++++++++ .../extensions/sf/workflow-templates.ts | 146 +++++++++++- .../sf/workflow-templates/full-project.md | 4 +- .../sf/workflow-templates/product-plan.md | 83 +++++++ .../sf/workflow-templates/product-tracking.md | 116 ++++++++++ .../sf/workflow-templates/registry.json | 53 ++++- .../extensions/sf/workspace-index.ts | 2 +- .../extensions/sf/worktree-command.ts | 4 +- src/resources/extensions/subagent/index.ts | 215 ++++++++++++++---- .../subagent/tests/node-launch.test.ts | 39 +++- src/resources/skills/create-workflow/SKILL.md | 1 + .../templates/product-tracking-lifecycle.yaml | 144 ++++++++++++ .../workflows/create-from-template.md | 1 + src/tests/auto-mode-piped.test.ts | 28 ++- src/tests/auto-piped-io.test.ts | 8 +- src/tests/headless-cli-surface.test.ts | 37 ++- src/tests/headless-events.test.ts | 2 +- .../web-workflow-controls-contract.test.ts | 20 +- src/tests/native-search.test.ts | 210 +++++++++++++++++ src/tests/parse-cli-args.test.ts | 8 + src/web/recovery-diagnostics-service.ts | 2 +- tsconfig.extensions.json | 4 +- web/components/sf/chat-mode.tsx | 12 +- web/lib/workflow-actions.ts | 8 +- 126 files changed, 3954 insertions(+), 617 deletions(-) create mode 100644 .agents/skills/nix-build/SKILL.md create mode 100644 .agents/skills/smoke-test/SKILL.md create mode 100644 docs/dev/json-contracts.md create mode 100644 packages/pi-ai/src/utils/event-stream.test.ts create mode 100644 packages/pi-coding-agent/src/core/agent-session-custom-message-queue.test.ts create mode 100644 packages/pi-coding-agent/src/core/keybindings-followup.test.ts create mode 100644 scripts/check-versioned-json.test.mjs create mode 100644 src/resources/extensions/sf/memory-sleeper.ts create mode 100644 src/resources/extensions/sf/tests/memory-sleeper.test.ts create mode 100644 src/resources/extensions/sf/tests/workflow-template-compiler.test.ts create mode 100644 src/resources/extensions/sf/workflow-template-compiler.ts create mode 100644 src/resources/extensions/sf/workflow-templates/product-plan.md create mode 100644 src/resources/extensions/sf/workflow-templates/product-tracking.md create mode 100644 src/resources/skills/create-workflow/templates/product-tracking-lifecycle.yaml diff --git a/.agents/skills/nix-build/SKILL.md b/.agents/skills/nix-build/SKILL.md new file mode 100644 index 000000000..0646c0474 --- /dev/null +++ b/.agents/skills/nix-build/SKILL.md @@ -0,0 +1,26 @@ +--- +name: nix-build +description: Build any @singularity-forge/* package (or the full stack) via nix develop. Pass a package name like "pi-coding-agent", "native", "mcp-server", or "all" for a full core build. +--- + +All build commands in this repo must run inside `nix develop`. Never use bare cargo/bun/rustc. + +For a single package: +``` +nix develop --command bash -c "bun run --filter @singularity-forge/ build" +``` + +For the full core build (native + all TS packages): +``` +nix develop --command bash -c "npm run build:core" +``` + +For typecheck only: +``` +nix develop --command bash -c "tsc --noEmit -p tsconfig.json" +``` + +For extensions typecheck: +``` +nix develop --command bash -c "tsc --noEmit -p tsconfig.extensions.json" +``` diff --git a/.agents/skills/smoke-test/SKILL.md b/.agents/skills/smoke-test/SKILL.md new file mode 100644 index 000000000..dc2c016cf --- /dev/null +++ b/.agents/skills/smoke-test/SKILL.md @@ -0,0 +1,17 @@ +--- +name: smoke-test +description: Run the standard sf-run smoke tests (--version, --help, --print). All three must pass before shipping a build. +--- + +#!/bin/bash +set -e +echo "=== --version ===" +node dist/loader.js --version + +echo "=== --help (first 5 lines) ===" +node dist/loader.js --help 2>&1 | head -5 + +echo "=== --print (graceful degradation) ===" +node dist/loader.js --print 2>&1 | head -5 + +echo "All smoke tests passed." diff --git a/README.md b/README.md index 6b04bba3a..c65d68b90 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ Full documentation is in the [`docs/`](./docs/) directory: ### User Guides - **[Getting Started](./docs/user-docs/getting-started.md)** — install, first run, basic usage -- **[Auto Mode](./docs/user-docs/auto-mode.md)** — autonomous execution deep-dive +- **[Autonomous Mode](./docs/user-docs/auto-mode.md)** — autonomous execution deep-dive - **[Configuration](./docs/user-docs/configuration.md)** — all preferences, models, git, and hooks - **[Custom Models](./docs/user-docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies) - **[Token Optimization](./docs/user-docs/token-optimization.md)** — profiles, context compression, complexity routing @@ -139,7 +139,7 @@ Full documentation is in the [`docs/`](./docs/) directory: - **[Dynamic Model Routing](./docs/user-docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure - **[Web Interface](./docs/user-docs/web-interface.md)** — browser-based project management and real-time progress - **[Migration from v1](./docs/user-docs/migration.md)** — `.planning` → `.sf` migration -- **[Docker Sandbox](./docker/README.md)** — run SF auto mode in an isolated Docker container +- **[Docker Sandbox](./docker/README.md)** — run SF autonomous mode in an isolated Docker container ### Developer Docs @@ -229,15 +229,15 @@ Plan (with integrated research) → Execute (per task) → Complete → Reassess **Plan** scouts the codebase, researches relevant docs, and decomposes the slice into tasks with must-haves (mechanically verifiable outcomes). **Execute** runs each task in a fresh context window with only the relevant files pre-loaded — then runs configured verification commands (lint, test, etc.) with auto-fix retries. **Complete** writes the summary, UAT script, marks the roadmap, and commits with meaningful messages derived from task summaries. **Reassess** checks if the roadmap still makes sense given what was learned. **Validate Milestone** runs a reconciliation gate after all slices complete — comparing roadmap success criteria against actual results before sealing the milestone. -### `/sf auto` — The Main Event +### `/sf autonomous` — The Main Event This is what makes SF different. Run it, walk away, come back to built software. ``` -/sf auto +/sf autonomous ``` -Auto mode is a state machine driven by files on disk. It reads `.sf/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, auto mode reads disk state again and dispatches the next unit. +Autonomous mode is a state machine driven by files on disk. It reads `.sf/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, autonomous mode reads disk state again and dispatches the next unit. `/sf auto` remains supported as a short alias. **What happens under the hood:** @@ -247,15 +247,15 @@ Auto mode is a state machine driven by files on disk. It reads `.sf/STATE.md`, d 3. **Git isolation** — When `git.isolation` is set to `worktree` or `branch`, each milestone runs on its own `milestone/` branch (in a worktree or in-place). All slice work commits sequentially — no branch switching, no merge conflicts. When the milestone completes, it's squash-merged to main as one clean commit. The default is `none` (work on the current branch), configurable via preferences. -4. **Crash recovery** — A lock file tracks the current unit. If the session dies, the next `/sf auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. Parallel orchestrator state is persisted to disk with PID liveness detection, so multi-worker sessions survive crashes too. In headless mode, crashes trigger automatic restart with exponential backoff (default 3 attempts). +4. **Crash recovery** — A lock file tracks the current unit. If the session dies, the next `/sf autonomous` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. Parallel orchestrator state is persisted to disk with PID liveness detection, so multi-worker sessions survive crashes too. In headless mode, crashes trigger automatic restart with exponential backoff (default 3 attempts). 5. **Provider error recovery** — Transient provider errors (rate limits, 500/503 server errors, overloaded) auto-resume after a delay. Permanent errors (auth, billing) pause for manual review. The model fallback chain retries transient network errors before switching models. 6. **Stuck detection** — A sliding-window detector identifies repeated dispatch patterns (including multi-unit cycles). On detection, it retries once with a deep diagnostic. If it fails again, auto mode stops with the exact file it expected. -7. **Timeout supervision** — Soft timeout warns the LLM to wrap up. Idle watchdog detects stalls. Hard timeout pauses auto mode. Recovery steering nudges the LLM to finish durable output before giving up. +7. **Timeout supervision** — Soft timeout warns the LLM to wrap up. Idle watchdog detects stalls. Hard timeout pauses autonomous mode. Recovery steering nudges the LLM to finish durable output before giving up. -8. **Cost tracking** — Every unit's token usage and cost is captured, broken down by phase, slice, and model. The dashboard shows running totals and projections. Budget ceilings can pause auto mode before overspending. +8. **Cost tracking** — Every unit's token usage and cost is captured, broken down by phase, slice, and model. The dashboard shows running totals and projections. Budget ceilings can pause autonomous mode before overspending. 9. **Adaptive replanning** — After each slice completes, the roadmap is reassessed. If the work revealed new information that changes the plan, slices are reordered, added, or removed before continuing. @@ -263,11 +263,11 @@ Auto mode is a state machine driven by files on disk. It reads `.sf/STATE.md`, d 11. **Milestone validation** — After all slices complete, a `validate-milestone` gate compares roadmap success criteria against actual results before sealing the milestone. -12. **Escape hatch** — Press Escape to pause. The conversation is preserved. Interact with the agent, inspect what happened, or just `/sf auto` to resume from disk state. +12. **Escape hatch** — Press Escape to pause. The conversation is preserved. Interact with the agent, inspect what happened, or just `/sf autonomous` to resume from disk state. ### `/sf` and `/sf next` — Step Mode -By default, `/sf` runs in **step mode**: the same state machine as auto mode, but it pauses between units with a wizard showing what completed and what's next. You advance one step at a time, review the output, and continue when ready. +By default, `/sf` runs in **step mode**: the same state machine as autonomous mode, but it pauses between units with a wizard showing what completed and what's next. You advance one step at a time, review the output, and continue when ready. - **No `.sf/` directory** → Start a new project. Discussion flow captures your vision, constraints, and preferences. - **Milestone exists, no roadmap** → Discuss or research the milestone. @@ -315,19 +315,19 @@ sf SF opens an interactive agent session. From there, you have two ways to work: -**`/sf` — step mode.** Type `/sf` and SF executes one unit of work at a time, pausing between each with a wizard showing what completed and what's next. Same state machine as auto mode, but you stay in the loop. No project yet? It starts the discussion flow. Roadmap exists? It plans or executes the next step. +**`/sf` — step mode.** Type `/sf` and SF executes one unit of work at a time, pausing between each with a wizard showing what completed and what's next. Same state machine as autonomous mode, but you stay in the loop. No project yet? It starts the discussion flow. Roadmap exists? It plans or executes the next step. -**`/sf auto` — autonomous mode.** Type `/sf auto` and walk away. SF researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete. Fresh context window per task. No babysitting. +**`/sf autonomous` — autonomous mode.** Type `/sf autonomous` and walk away. SF researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete. Fresh context window per task. No babysitting. `/sf auto` is an alias. ### Two terminals, one project -The real workflow: run auto mode in one terminal, steer from another. +The real workflow: run autonomous mode in one terminal, steer from another. **Terminal 1 — let it build** ```bash sf -/sf auto +/sf autonomous ``` **Terminal 2 — steer while it works** @@ -339,14 +339,14 @@ sf /sf queue # queue the next milestone ``` -Both terminals read and write the same `.sf/` files on disk. Your decisions in terminal 2 are picked up automatically at the next phase boundary — no need to stop auto mode. +Both terminals read and write the same `.sf/` files on disk. Your decisions in terminal 2 are picked up automatically at the next phase boundary — no need to stop autonomous mode. ### Headless mode — CI and scripts `sf headless` runs any `/sf` command without a TUI. Designed for CI pipelines, cron jobs, and scripted automation. ```bash -# Run auto mode in CI +# Run autonomous mode in CI sf headless --timeout 600000 # Create and execute a milestone end-to-end @@ -376,15 +376,16 @@ On first run, SF launches a branded setup wizard that walks you through LLM prov | ----------------------- | --------------------------------------------------------------- | | `/sf` | Step mode — executes one unit at a time, pauses between each | | `/sf next` | Explicit step mode (same as bare `/sf`) | -| `/sf auto` | Autonomous mode — researches, plans, executes, commits, repeats | +| `/sf autonomous` | Autonomous mode — researches, plans, executes, commits, repeats | +| `/sf auto` | Alias for `/sf autonomous` | | `/sf quick` | Execute a quick task with SF guarantees, skip planning overhead | -| `/sf stop` | Stop auto mode gracefully | +| `/sf stop` | Stop autonomous mode gracefully | | `/sf steer` | Hard-steer plan documents during execution | -| `/sf discuss` | Discuss architecture and decisions (works alongside auto mode) | +| `/sf discuss` | Discuss architecture and decisions (works alongside autonomous mode) | | `/sf rethink` | Conversational project reorganization | | `/sf mcp` | MCP server status and connectivity | | `/sf status` | Progress dashboard | -| `/sf queue` | Queue future milestones (safe during auto mode) | +| `/sf queue` | Queue future milestones (safe during autonomous mode) | | `/sf prefs` | Model selection, timeouts, budget ceiling | | `/sf migrate` | Migrate a v1 `.planning` directory to `.sf` format | | `/sf help` | Categorized command reference for all SF subcommands | @@ -523,8 +524,8 @@ auto_report: true | ---------------------- | ----------------------------------------------------------------------------------------------------- | | `models.*` | Per-phase model selection — string for a single model, or `{model, fallbacks}` for automatic failover | | `skill_discovery` | `auto` / `suggest` / `off` — how SF finds and applies skills | -| `auto_supervisor.*` | Timeout thresholds for auto mode supervision | -| `budget_ceiling` | USD ceiling — auto mode pauses when reached | +| `auto_supervisor.*` | Timeout thresholds for autonomous mode supervision | +| `budget_ceiling` | USD ceiling — autonomous mode pauses when reached | | `uat_dispatch` | Enable automatic UAT runs after slice completion | | `always_use_skills` | Skills to always load when relevant | | `skill_rules` | Situational rules for skill routing | @@ -574,7 +575,7 @@ SF ships with 24 extensions, all loaded automatically: | Extension | What it provides | | ---------------------- | ---------------------------------------------------------------------------------------------------------------------- | -| **SF** | Core workflow engine, auto mode, commands, dashboard | +| **SF** | Core workflow engine, autonomous mode, commands, dashboard | | **Browser Tools** | Playwright-based browser with form intelligence, intent-ranked element finding, semantic actions, PDF export, session state persistence, network mocking, device emulation, structured extraction, visual diffing, region zoom, test code generation, and prompt injection detection | | **Search the Web** | Brave Search, Tavily, or Jina page extraction | | **Google Search** | Gemini-powered web search with AI-synthesized answers | diff --git a/docs/README.md b/docs/README.md index 61f54849e..7cf5ebd67 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ # SF Documentation -Welcome to the SF documentation. This covers everything from getting started to advanced configuration, auto-mode internals, and extending SF with the Pi SDK. +Welcome to the SF documentation. This covers everything from getting started to advanced configuration, autonomous-mode internals, and extending SF with the Pi SDK. ## User Documentation @@ -11,7 +11,7 @@ Simplified Chinese translation: [`zh-CN/`](./zh-CN/). | Guide | Description | |-------|-------------| | [Getting Started](./user-docs/getting-started.md) | Installation, first run, and basic usage | -| [Auto Mode](./user-docs/auto-mode.md) | How autonomous execution works — the state machine, crash recovery, and steering | +| [Autonomous Mode](./user-docs/auto-mode.md) | How autonomous execution works — the state machine, crash recovery, and steering | | [Commands Reference](./user-docs/commands.md) | All commands, keyboard shortcuts, and CLI flags | | [Remote Questions](./user-docs/remote-questions.md) | Discord and Slack integration for headless auto-mode | | [Configuration](./user-docs/configuration.md) | Preferences, model selection, git settings, and token profiles | diff --git a/docs/dev/json-contracts.md b/docs/dev/json-contracts.md new file mode 100644 index 000000000..bde4c9c41 --- /dev/null +++ b/docs/dev/json-contracts.md @@ -0,0 +1,13 @@ +# JSON Contracts + +SF uses `schemaVersion` for JSON files whose shape is owned by SF runtime code. +Use a numeric value starting at `1`. + +Use `version` only for package, plugin, extension, or release versions. Do not +use `version` as a schema marker for SF-owned data. + +For map-like JSON where top-level keys are domain entries, put the marker under +`_meta.schemaVersion`. + +The `check:versioned-json` script parses every tracked `.json` file and enforces +`schemaVersion` on the SF-owned contract allowlist. diff --git a/docs/user-docs/auto-mode.md b/docs/user-docs/auto-mode.md index 1122922f6..693b06181 100644 --- a/docs/user-docs/auto-mode.md +++ b/docs/user-docs/auto-mode.md @@ -1,10 +1,10 @@ -# Auto Mode +# Autonomous Mode -Auto mode is SF's autonomous execution engine. Run `/sf auto`, walk away, come back to built software with clean git history. +Autonomous mode is SF's product-development execution engine. Run `/sf autonomous`, walk away, come back to built software with clean git history. `/sf auto` remains supported as a short alias. ## How It Works -Auto mode is a **state machine driven by files on disk**. It reads `.sf/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, auto mode reads disk state again and dispatches the next unit. +Autonomous mode is a **state machine driven by files on disk**. It reads `.sf/STATE.md`, determines the next unit of work, creates a fresh agent session, injects a focused prompt with all relevant context pre-inlined, and lets the LLM execute. When the LLM finishes, autonomous mode reads disk state again and dispatches the next unit. ### The Loop @@ -59,7 +59,7 @@ When your project has independent milestones, you can run them simultaneously. E ### Crash Recovery -A lock file tracks the current unit. If the session dies, the next `/sf auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. +A lock file tracks the current unit. If the session dies, the next `/sf autonomous` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. **Headless auto-restart (v2.26):** When running `sf headless auto`, crashes trigger automatic restart with exponential backoff (5s → 10s → 30s cap, default 3 attempts). Configure with `--max-restarts N`. SIGINT/SIGTERM bypasses restart. Combined with crash recovery, this enables true overnight "run until done" execution. @@ -77,7 +77,7 @@ No manual intervention needed for transient errors — the session pauses briefl ### Incremental Memory (v2.26) -SF maintains a `KNOWLEDGE.md` file — an append-only register of project-specific rules, patterns, and lessons learned. The agent reads it at the start of every unit and appends to it when discovering recurring issues, non-obvious patterns, or rules that future sessions should follow. This gives auto-mode cross-session memory that survives context window boundaries. +SF maintains a `KNOWLEDGE.md` file — an append-only register of project-specific rules, patterns, and lessons learned. The agent reads it at the start of every unit and appends to it when discovering recurring issues, non-obvious patterns, or rules that future sessions should follow. This gives autonomous mode cross-session memory that survives context window boundaries. ### Context Pressure Monitor (v2.26) @@ -208,14 +208,16 @@ Configured skills are automatically resolved and injected into dispatch prompts. See [Configuration](./configuration.md) for skill routing preferences. -## Controlling Auto Mode +## Controlling Autonomous Mode ### Start ``` -/sf auto +/sf autonomous ``` +`/sf auto` is equivalent to `/sf autonomous`. + ### Pause Press **Escape**. The conversation is preserved. You can interact with the agent, inspect state, or resume. @@ -223,10 +225,10 @@ Press **Escape**. The conversation is preserved. You can interact with the agent ### Resume ``` -/sf auto +/sf autonomous ``` -Auto mode reads disk state and picks up where it left off. +Autonomous mode reads disk state and picks up where it left off. ### Stop diff --git a/docs/user-docs/commands.md b/docs/user-docs/commands.md index 88e7bae06..4a224843a 100644 --- a/docs/user-docs/commands.md +++ b/docs/user-docs/commands.md @@ -6,10 +6,11 @@ |---------|-------------| | `/sf` | Step mode — execute one unit at a time, pause between each | | `/sf next` | Explicit step mode (same as `/sf`) | -| `/sf auto` | Autonomous mode — research, plan, execute, commit, repeat | +| `/sf autonomous` | Autonomous product loop — research, plan, execute, commit, repeat | +| `/sf auto` | Alias for `/sf autonomous` | | `/sf quick` | Execute a quick task with SF guarantees (atomic commits, state tracking) without full planning overhead | -| `/sf stop` | Stop auto mode gracefully | -| `/sf pause` | Pause auto-mode (preserves state, `/sf auto` to resume) | +| `/sf stop` | Stop autonomous mode gracefully | +| `/sf pause` | Pause autonomous mode (preserves state, `/sf autonomous` to resume) | | `/sf steer` | Hard-steer plan documents during execution | | `/sf discuss` | Discuss architecture and decisions (works alongside auto mode) | | `/sf status` | Progress dashboard | @@ -99,6 +100,8 @@ See [Parallel Orchestration](./parallel-orchestration.md) for full documentation | `/sf workflow pause` | Pause custom workflow auto-mode | | `/sf workflow resume` | Resume paused custom workflow auto-mode | +`/sf autonomous` is the product-development loop that chooses the next useful unit from project state. `/sf start` is guided workflow kickoff and may ask clarifying questions. `/sf workflow run` executes an explicit YAML workflow definition. `/sf auto` remains supported as shorthand for `/sf autonomous`. + ## Extensions | Command | Description | diff --git a/docs/user-docs/getting-started.md b/docs/user-docs/getting-started.md index abec860be..893bbfe86 100644 --- a/docs/user-docs/getting-started.md +++ b/docs/user-docs/getting-started.md @@ -294,7 +294,7 @@ docker sandbox exec -it sf-sandbox bash ```bash export ANTHROPIC_API_KEY="sk-ant-..." -sf auto "implement the feature described in issue #42" +sf autonomous "implement the feature described in issue #42" ``` See [Docker Sandbox docs](../../docker/README.md) for full configuration, resource limits, and compose files. @@ -328,27 +328,27 @@ Type `/sf` inside a session. SF executes one unit of work at a time, pausing bet Step mode keeps you in the loop, reviewing output between each step. -### Auto Mode — `/sf auto` +### Autonomous Mode — `/sf autonomous` -Type `/sf auto` and walk away. SF autonomously researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete. +Type `/sf autonomous` and walk away. SF researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete. `/sf auto` remains available as a short alias. ``` -/sf auto +/sf autonomous ``` -See [Auto Mode](./auto-mode.md) for full details. +See [Autonomous Mode](./auto-mode.md) for full details. --- ## Recommended Workflow: Two Terminals -Run auto mode in one terminal, steer from another. +Run autonomous mode in one terminal, steer from another. **Terminal 1 — let it build:** ```bash sf -/sf auto +/sf autonomous ``` **Terminal 2 — steer while it works:** @@ -466,7 +466,7 @@ For more, see [Troubleshooting](./troubleshooting.md). ## Next Steps -- [Auto Mode](./auto-mode.md) — deep dive into autonomous execution +- [Autonomous Mode](./auto-mode.md) — deep dive into autonomous execution - [Configuration](./configuration.md) — model selection, timeouts, budgets - [Commands Reference](./commands.md) — all commands and shortcuts - [Provider Setup](./providers.md) — detailed setup for every provider diff --git a/docs/user-docs/troubleshooting.md b/docs/user-docs/troubleshooting.md index a7671f818..3489bc54d 100644 --- a/docs/user-docs/troubleshooting.md +++ b/docs/user-docs/troubleshooting.md @@ -25,13 +25,13 @@ It checks: - Stale cache after a crash — the in-memory file listing doesn't reflect new artifacts - The LLM didn't produce the expected artifact file -**Fix:** Run `/sf doctor` to repair state, then resume with `/sf auto`. If the issue persists, check that the expected artifact file exists on disk. +**Fix:** Run `/sf doctor` to repair state, then resume with `/sf autonomous`. If the issue persists, check that the expected artifact file exists on disk. ### Auto mode stops with "Loop detected" **Cause:** A unit failed to produce its expected artifact twice in a row. -**Fix:** Check the task plan for clarity. If the plan is ambiguous, refine it manually, then `/sf auto` to resume. +**Fix:** Check the task plan for clarity. If the plan is ambiguous, refine it manually, then `/sf autonomous` to resume. ### Wrong files in worktree @@ -103,13 +103,13 @@ For common provider setup issues (role errors, streaming errors, model ID mismat **Symptoms:** Auto mode pauses with "Budget ceiling reached." -**Fix:** Increase `budget_ceiling` in preferences, or switch to `budget` token profile to reduce per-unit cost, then resume with `/sf auto`. +**Fix:** Increase `budget_ceiling` in preferences, or switch to `budget` token profile to reduce per-unit cost, then resume with `/sf autonomous`. ### Stale lock file **Symptoms:** Auto mode won't start, says another session is running. -**Fix:** SF automatically detects stale locks — if the owning PID is dead, the lock is cleaned up and re-acquired on the next `/sf auto`. This includes stranded `.sf.lock/` directories left by `proper-lockfile` after crashes. If automatic recovery fails, delete `.sf/auto.lock` and the `.sf.lock/` directory manually: +**Fix:** SF automatically detects stale locks — if the owning PID is dead, the lock is cleaned up and re-acquired on the next `/sf autonomous`. This includes stranded `.sf.lock/` directories left by `proper-lockfile` after crashes. If automatic recovery fails, delete `.sf/auto.lock` and the `.sf.lock/` directory manually: ```bash rm -f .sf/auto.lock @@ -287,7 +287,7 @@ rm .sf/auto.lock rm .sf/completed-units.json ``` -Then `/sf auto` to restart from current disk state. +Then `/sf autonomous` to restart from current disk state. ### Reset routing history diff --git a/packages/daemon/src/orchestrator.ts b/packages/daemon/src/orchestrator.ts index d80e2b243..d46cb1f71 100644 --- a/packages/daemon/src/orchestrator.ts +++ b/packages/daemon/src/orchestrator.ts @@ -93,12 +93,12 @@ const TOOLS: Tool[] = [ }, { name: 'start_session', - description: 'Start a new SF auto-mode session for a project. Provide the absolute project path. Optionally provide a command to run instead of the default "/sf auto".', + description: 'Start a new SF autonomous-mode session for a project. Provide the absolute project path. Optionally provide a command to run instead of the default "/sf autonomous".', input_schema: { type: 'object' as const, properties: { projectPath: { type: 'string', description: 'Absolute path to the project directory' }, - command: { type: 'string', description: 'Optional command to send instead of "/sf auto"' }, + command: { type: 'string', description: 'Optional command to send instead of "/sf autonomous"' }, }, required: ['projectPath'], }, diff --git a/packages/daemon/src/session-manager.test.ts b/packages/daemon/src/session-manager.test.ts index 5e9748239..63175f555 100644 --- a/packages/daemon/src/session-manager.test.ts +++ b/packages/daemon/src/session-manager.test.ts @@ -159,8 +159,8 @@ class TestableSessionManager extends SessionManager { (this as any).handleEvent(session, event); }); - // Kick off auto-mode - const command = options.command ?? '/sf auto'; + // Kick off autonomous mode + const command = options.command ?? '/sf autonomous'; await client.prompt(command); // Emit lifecycle events (matching parent behavior) @@ -805,7 +805,7 @@ describe('SessionManager', () => { const client = manager.lastClient!; assert.ok(client.prompted.includes('/sf quick fix-typo')); - assert.ok(!client.prompted.includes('/sf auto')); + assert.ok(!client.prompted.includes('/sf autonomous')); }); // ---- getSessionByDir returns session by directory lookup ---- diff --git a/packages/daemon/src/session-manager.ts b/packages/daemon/src/session-manager.ts index cc1c1be2a..215db18e2 100644 --- a/packages/daemon/src/session-manager.ts +++ b/packages/daemon/src/session-manager.ts @@ -71,7 +71,7 @@ export class SessionManager extends EventEmitter { * * Rejects if a session already exists for this projectDir. * Creates an RpcClient, starts the process, performs the v2 init handshake, - * wires event tracking, and sends '/sf auto' to begin execution. + * wires event tracking, and sends '/sf autonomous' to begin execution. */ async startSession(options: StartSessionOptions): Promise { const { projectDir } = options; @@ -139,8 +139,8 @@ export class SessionManager extends EventEmitter { this.handleEvent(session, event); }); - // Kick off auto-mode - const command = options.command ?? '/sf auto'; + // Kick off autonomous mode + const command = options.command ?? '/sf autonomous'; await client.prompt(command); this.logger.info('session started', { sessionId: session.sessionId, projectDir: resolvedDir }); diff --git a/packages/daemon/src/types.ts b/packages/daemon/src/types.ts index 9db856878..f47925a4a 100644 --- a/packages/daemon/src/types.ts +++ b/packages/daemon/src/types.ts @@ -161,7 +161,7 @@ export interface StartSessionOptions { /** Absolute path to the project directory */ projectDir: string; - /** Command to send after '/sf auto' (default: none) */ + /** Command to send instead of the default '/sf autonomous' (default: none) */ command?: string; /** Model ID override */ diff --git a/packages/mcp-server/README.md b/packages/mcp-server/README.md index d00171c0d..b304fb0c3 100644 --- a/packages/mcp-server/README.md +++ b/packages/mcp-server/README.md @@ -133,7 +133,7 @@ Start a SF auto-mode session for a project directory. | Parameter | Type | Required | Description | |-----------|------|----------|-------------| | `projectDir` | `string` | ✅ | Absolute path to the project directory | -| `command` | `string` | | Command to send (default: `"/sf auto"`) | +| `command` | `string` | | Command to send (default: `"/sf autonomous"`) | | `model` | `string` | | Model ID override | | `bare` | `boolean` | | Run in bare mode (skip user config) | diff --git a/packages/mcp-server/src/mcp-server.test.ts b/packages/mcp-server/src/mcp-server.test.ts index f6f99df9c..e61a70018 100644 --- a/packages/mcp-server/src/mcp-server.test.ts +++ b/packages/mcp-server/src/mcp-server.test.ts @@ -171,8 +171,8 @@ class TestableSessionManager extends SessionManager { this._handleEvent(session, event); }); - // Kick off auto-mode - const command = options.command ?? '/sf auto'; + // Kick off autonomous mode + const command = options.command ?? '/sf autonomous'; await client.prompt(command); return session.sessionId; @@ -236,10 +236,10 @@ describe('SessionManager', () => { assert.equal(session.projectDir, resolve('/tmp/test-project')); }); - it('startSession sends /sf auto by default', async () => { + it('startSession sends /sf autonomous by default', async () => { await sm.startSession('/tmp/test-prompt', { cliPath: '/usr/bin/sf' }); assert.ok(sm.lastClient); - assert.deepEqual(sm.lastClient.prompted, ['/sf auto']); + assert.deepEqual(sm.lastClient.prompted, ['/sf autonomous']); }); it('startSession sends custom command when provided', async () => { diff --git a/packages/mcp-server/src/server.ts b/packages/mcp-server/src/server.ts index e7ba62abc..ed7bb4eca 100644 --- a/packages/mcp-server/src/server.ts +++ b/packages/mcp-server/src/server.ts @@ -24,7 +24,7 @@ import { buildGraph, writeGraph, writeSnapshot, graphStatus, graphQuery, graphDi import { resolveSFRoot } from './readers/paths.js'; import { runDoctorLite } from './readers/doctor-lite.js'; import { registerWorkflowTools } from './workflow-tools.js'; -import { applySecrets, checkExistingEnvKeys, detectDestination } from './env-writer.js'; +import { applySecrets, checkExistingEnvKeys, detectDestination, resolveProjectEnvFilePath } from './env-writer.js'; // --------------------------------------------------------------------------- // Constants @@ -367,7 +367,7 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{ 'Start a SF auto-mode session for a project directory. Returns a sessionId for tracking.', { projectDir: z.string().describe('Absolute path to the project directory'), - command: z.string().optional().describe('Command to send (default: "/sf auto")'), + command: z.string().optional().describe('Command to send (default: "/sf autonomous")'), model: z.string().optional().describe('Model ID override'), bare: z.boolean().optional().describe('Run in bare mode (skip user config)'), }, @@ -589,7 +589,7 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{ }; try { - const resolvedProjectDir = resolve(projectDir); + const resolvedProjectDir = resolveProjectEnvFilePath(projectDir); const resolvedEnvPath = resolve(resolvedProjectDir, envFilePath ?? '.env'); // (1) Check which keys already exist diff --git a/packages/mcp-server/src/session-manager.ts b/packages/mcp-server/src/session-manager.ts index 029a9d635..9c8b1fe2a 100644 --- a/packages/mcp-server/src/session-manager.ts +++ b/packages/mcp-server/src/session-manager.ts @@ -60,7 +60,7 @@ export class SessionManager { * * Rejects if a session already exists for this projectDir. * Creates an RpcClient, starts the process, performs the v2 init handshake, - * wires event tracking, and sends '/sf auto' to begin execution. + * wires event tracking, and sends '/sf autonomous' to begin execution. */ async startSession(projectDir: string, options: ExecuteOptions = {}): Promise { if (!projectDir || projectDir.trim() === '') { @@ -124,8 +124,8 @@ export class SessionManager { this.handleEvent(session, event); }); - // Kick off auto-mode - const command = options.command ?? '/sf auto'; + // Kick off autonomous mode + const command = options.command ?? '/sf autonomous'; await client.prompt(command); return session.sessionId; diff --git a/packages/mcp-server/src/types.ts b/packages/mcp-server/src/types.ts index dc5588259..fbe93d87e 100644 --- a/packages/mcp-server/src/types.ts +++ b/packages/mcp-server/src/types.ts @@ -83,7 +83,7 @@ export interface CostAccumulator { // --------------------------------------------------------------------------- export interface ExecuteOptions { - /** Command to send after '/sf auto' (default: none) */ + /** Command to send instead of the default '/sf autonomous' (default: none) */ command?: string; /** Model ID override */ diff --git a/packages/pi-agent-core/src/agent-loop.test.ts b/packages/pi-agent-core/src/agent-loop.test.ts index a6e463bb2..5ddb1d637 100644 --- a/packages/pi-agent-core/src/agent-loop.test.ts +++ b/packages/pi-agent-core/src/agent-loop.test.ts @@ -101,6 +101,109 @@ describe("agent-loop — pauseTurn handling (#2869)", () => { }); describe("agent-loop — steering during tool batches", () => { + it("does not interrupt the current tool batch for custom system steering", async () => { + const calls: string[] = []; + const tool = { + name: "record", + label: "Record", + description: "Record a value", + parameters: Type.Object({ value: Type.String() }), + execute: async (_id: string, args: { value: string }) => { + calls.push(args.value); + return { + content: [{ type: "text" as const, text: `recorded ${args.value}` }], + details: {}, + }; + }, + } satisfies AgentTool<{ value: string }>; + + const first = makeAssistantMessage({ + content: [ + { + type: "toolCall", + id: "tc-1", + name: "record", + arguments: { value: "one" }, + }, + { + type: "toolCall", + id: "tc-2", + name: "record", + arguments: { value: "two" }, + }, + ], + stopReason: "toolUse", + }); + const second = makeAssistantMessage({ + content: [{ type: "text", text: "saw system steering" }], + stopReason: "stop", + }); + const mockStream = createMockStreamFn([first, second]); + let steeringPolls = 0; + const steering: AgentMessage = { + role: "custom", + customType: "sf-memory-sleeper", + content: "system notice", + display: false, + timestamp: Date.now(), + } as AgentMessage; + + const context: AgentContext = { + systemPrompt: "You are a test agent.", + messages: [ + { + role: "user", + content: [{ type: "text", text: "record values" }], + timestamp: Date.now(), + }, + ], + tools: [tool], + }; + + const config: AgentLoopConfig = { + model: TEST_MODEL, + convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"), + toolExecution: "sequential", + getSteeringMessages: async () => { + steeringPolls += 1; + return steeringPolls === 1 ? [steering] : []; + }, + }; + + const stream = agentLoop( + [ + { + role: "user", + content: [{ type: "text", text: "record values" }], + timestamp: Date.now(), + }, + ], + context, + config, + undefined, + mockStream as any, + ); + + const events = await collectEvents(stream); + const skipped = events.filter( + (event) => + event.type === "tool_execution_end" && + JSON.stringify(event.result.content).includes( + "Skipped due to queued user message", + ), + ); + + assert.deepEqual(calls, ["one", "two"]); + assert.equal(skipped.length, 0); + assert.ok( + events.some( + (event) => + event.type === "message_start" && event.message === steering, + ), + "system steering should still be delivered after the tool batch", + ); + }); + it("defers queued steering until after the current tool batch when configured", async () => { const calls: string[] = []; const tool = { diff --git a/packages/pi-agent-core/src/agent-loop.ts b/packages/pi-agent-core/src/agent-loop.ts index 23fbfdad2..9d909c211 100644 --- a/packages/pi-agent-core/src/agent-loop.ts +++ b/packages/pi-agent-core/src/agent-loop.ts @@ -489,6 +489,10 @@ interface ToolExecutionResult { preparationErrorCount: number; } +function hasUserSteeringMessage(messages: readonly AgentMessage[]): boolean { + return messages.some((message) => message.role === "user"); +} + /** * Execute tool calls from an assistant message. */ @@ -553,7 +557,7 @@ async function executeToolCallsSequential( const steering = await config.getSteeringMessages(); if (steering.length > 0) { steeringMessages = [...(steeringMessages ?? []), ...steering]; - if (interruptOnSteering) { + if (interruptOnSteering && hasUserSteeringMessage(steering)) { const remainingCalls = toolCalls.slice(index + 1); for (const skipped of remainingCalls) { results.push(skipToolCall(skipped, stream)); @@ -604,7 +608,7 @@ async function executeToolCallsParallel( const steering = await config.getSteeringMessages(); if (steering.length > 0) { steeringMessages = [...(steeringMessages ?? []), ...steering]; - if (interruptOnSteering) { + if (interruptOnSteering && hasUserSteeringMessage(steering)) { for (const runnable of runnableCalls) { results.push(skipToolCall(runnable.toolCall, stream, { emitStart: false })); } diff --git a/packages/pi-ai/src/utils/event-stream.test.ts b/packages/pi-ai/src/utils/event-stream.test.ts new file mode 100644 index 000000000..d5ae103d7 --- /dev/null +++ b/packages/pi-ai/src/utils/event-stream.test.ts @@ -0,0 +1,138 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { parseAnthropicSSE } from "./event-stream.js"; + +function createMockResponse(chunks: string[]): Response { + let index = 0; + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + pull(controller) { + if (index < chunks.length) { + controller.enqueue(encoder.encode(chunks[index++])); + } else { + controller.close(); + } + }, + }); + return new Response(stream); +} + +describe("parseAnthropicSSE", () => { + it("yields parsed JSON for known Anthropic events", async () => { + const sse = + 'event: message_start\n' + + 'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n' + + '\n' + + 'event: content_block_start\n' + + 'data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}\n' + + '\n' + + 'event: content_block_delta\n' + + 'data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}\n' + + '\n' + + 'event: content_block_stop\n' + + 'data: {"type":"content_block_stop","index":0}\n' + + '\n' + + 'event: message_delta\n' + + 'data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"input_tokens":10,"output_tokens":1,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}\n' + + '\n' + + 'event: message_stop\n' + + 'data: {"type":"message_stop"}\n' + + '\n'; + + const response = createMockResponse([sse]); + const events: unknown[] = []; + for await (const event of parseAnthropicSSE(response)) { + events.push(event); + } + + assert.equal(events.length, 6); + assert.equal((events[0] as any).type, "message_start"); + assert.equal((events[1] as any).type, "content_block_start"); + assert.equal((events[2] as any).type, "content_block_delta"); + assert.equal((events[3] as any).type, "content_block_stop"); + assert.equal((events[4] as any).type, "message_delta"); + assert.equal((events[5] as any).type, "message_stop"); + }); + + it("silently drops unknown events (e.g. OpenAI-style done)", async () => { + const sse = + 'event: message_start\n' + + 'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n' + + '\n' + + 'event: done\n' + + 'data: [DONE]\n' + + '\n' + + 'event: content_block_start\n' + + 'data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}\n' + + '\n'; + + const response = createMockResponse([sse]); + const events: unknown[] = []; + for await (const event of parseAnthropicSSE(response)) { + events.push(event); + } + + assert.equal(events.length, 2); + assert.equal((events[0] as any).type, "message_start"); + assert.equal((events[1] as any).type, "content_block_start"); + }); + + it("ignores ping events", async () => { + const sse = + 'event: message_start\n' + + 'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n' + + '\n' + + 'event: ping\n' + + 'data: {}\n' + + '\n' + + 'event: message_stop\n' + + 'data: {"type":"message_stop"}\n' + + '\n'; + + const response = createMockResponse([sse]); + const events: unknown[] = []; + for await (const event of parseAnthropicSSE(response)) { + events.push(event); + } + + assert.equal(events.length, 2); + assert.equal((events[0] as any).type, "message_start"); + assert.equal((events[1] as any).type, "message_stop"); + }); + + it("handles chunked SSE data across multiple reads", async () => { + const chunks = [ + 'event: message_start\n', + 'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n\n', + 'event: message_stop\n', + 'data: {"type":"message_stop"}\n\n', + ]; + + const response = createMockResponse(chunks); + const events: unknown[] = []; + for await (const event of parseAnthropicSSE(response)) { + events.push(event); + } + + assert.equal(events.length, 2); + assert.equal((events[0] as any).type, "message_start"); + assert.equal((events[1] as any).type, "message_stop"); + }); + + it("handles comment lines", async () => { + const sse = + ': comment line\n' + + 'event: message_start\n' + + 'data: {"type":"message_start","message":{"id":"msg_1","role":"assistant","content":[],"model":"claude-3","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}\n' + + '\n'; + + const response = createMockResponse([sse]); + const events: unknown[] = []; + for await (const event of parseAnthropicSSE(response)) { + events.push(event); + } + + assert.equal(events.length, 1); + assert.equal((events[0] as any).type, "message_start"); + }); +}); diff --git a/packages/pi-coding-agent/src/core/agent-session-custom-message-queue.test.ts b/packages/pi-coding-agent/src/core/agent-session-custom-message-queue.test.ts new file mode 100644 index 000000000..9195f89b7 --- /dev/null +++ b/packages/pi-coding-agent/src/core/agent-session-custom-message-queue.test.ts @@ -0,0 +1,110 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, it } from "node:test"; + +import { Agent, type AgentMessage } from "@singularity-forge/pi-agent-core"; +import { AgentSession } from "./agent-session.js"; +import { AuthStorage } from "./auth-storage.js"; +import { ModelRegistry } from "./model-registry.js"; +import { DefaultResourceLoader } from "./resource-loader.js"; +import { SessionManager } from "./session-manager.js"; +import { SettingsManager } from "./settings-manager.js"; + +let testDir: string; + +async function createSession() { + const agentDir = join(testDir, "agent-home"); + const authStorage = AuthStorage.inMemory({}); + const modelRegistry = new ModelRegistry(authStorage, join(agentDir, "models.json")); + const settingsManager = SettingsManager.inMemory(); + const resourceLoader = new DefaultResourceLoader({ + cwd: testDir, + agentDir, + settingsManager, + noExtensions: true, + noPromptTemplates: true, + noThemes: true, + }); + await resourceLoader.reload(); + + return new AgentSession({ + agent: new Agent(), + sessionManager: SessionManager.inMemory(testDir), + settingsManager, + cwd: testDir, + resourceLoader, + modelRegistry, + }); +} + +describe("AgentSession custom message queueing", () => { + beforeEach(() => { + testDir = mkdtempSync(join(tmpdir(), "agent-session-custom-message-")); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it("queues triggerTurn custom messages as steering when the agent is already processing", async () => { + const session = await createSession(); + const agent = (session as any).agent as Agent & { + prompt: (message: AgentMessage) => Promise; + steer: (message: AgentMessage) => void; + }; + const steered: AgentMessage[] = []; + agent.prompt = async () => { + throw new Error( + "Agent is already processing a prompt. Use steer() or followUp() to queue messages, or wait for completion.", + ); + }; + agent.steer = (message) => { + steered.push(message); + }; + + await session.sendCustomMessage( + { + customType: "sf-test", + content: "continue the active run", + display: false, + }, + { triggerTurn: true }, + ); + + assert.equal(steered.length, 1); + assert.equal(steered[0]?.role, "custom"); + assert.equal((steered[0] as any).customType, "sf-test"); + }); + + it("preserves explicit followUp delivery when triggerTurn races with active processing", async () => { + const session = await createSession(); + const agent = (session as any).agent as Agent & { + prompt: (message: AgentMessage) => Promise; + followUp: (message: AgentMessage) => void; + }; + const followUps: AgentMessage[] = []; + agent.prompt = async () => { + throw new Error( + "Agent is already processing a prompt. Use steer() or followUp() to queue messages, or wait for completion.", + ); + }; + agent.followUp = (message) => { + followUps.push(message); + }; + + await session.sendCustomMessage( + { + customType: "sf-test", + content: "after the current run", + display: false, + }, + { triggerTurn: true, deliverAs: "followUp" }, + ); + + assert.equal(followUps.length, 1); + assert.equal(followUps[0]?.role, "custom"); + assert.equal((followUps[0] as any).content, "after the current run"); + }); +}); diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts index 116261f29..bdcbc89df 100644 --- a/packages/pi-coding-agent/src/core/agent-session.ts +++ b/packages/pi-coding-agent/src/core/agent-session.ts @@ -194,6 +194,14 @@ export interface PromptOptions { source?: InputSource; } +function isAgentAlreadyProcessingError(error: unknown): boolean { + const message = error instanceof Error ? error.message : String(error); + return ( + message.includes("Agent is already processing a prompt") || + message.includes("Agent is already processing.") + ); +} + /** Result from cycleModel() */ export interface ModelCycleResult { model: Model; @@ -275,6 +283,10 @@ export class AgentSession { // Extension system private _extensionRunner: ExtensionRunner | undefined = undefined; private _turnIndex = 0; + private _processingAgentEnd = false; + private _processingQueuedAgentEnd = false; + private _sessionSwitchPending = false; + private _sessionTransitionStartedDuringAgentEnd = false; private _resourceLoader: ResourceLoader; private _customTools: ToolDefinition[]; @@ -446,7 +458,24 @@ export class AgentSession { } // Emit to extensions first - await this._emitExtensionEvent(event); + // Guard agent_end: track when session transition starts during extension handlers + // so post-handlers (retry/compaction) can bail before corrupting new-session state. + let skipAgentEndPostHandlers = false; + if (event.type === "agent_end") { + this._processingQueuedAgentEnd = true; + try { + await this._emitExtensionEvent(event); + } finally { + this._processingQueuedAgentEnd = false; + skipAgentEndPostHandlers = this._sessionTransitionStartedDuringAgentEnd; + this._sessionTransitionStartedDuringAgentEnd = false; + } + if (skipAgentEndPostHandlers) { + return; + } + } else { + await this._emitExtensionEvent(event); + } // Notify all listeners this._emit(event); @@ -498,6 +527,13 @@ export class AgentSession { // Check auto-retry and auto-compaction after agent completes if (event.type === "agent_end" && this._lastAssistantMessage) { + // A session transition started during agent_end handler execution - + // bail to avoid running retry/compaction against new-session state. + if (this._sessionSwitchPending) { + this._lastAssistantMessage = undefined; + return; + } + const msg = this._lastAssistantMessage; this._lastAssistantMessage = undefined; @@ -629,20 +665,26 @@ export class AgentSession { /** Emit extension events based on agent events */ private async _emitExtensionEvent(event: AgentEvent): Promise { - if (!this._extensionRunner) return; + const extensionRunner = this._extensionRunner; + if (!extensionRunner) return; if (event.type === "agent_start") { this._turnIndex = 0; - await this._extensionRunner.emit({ type: "agent_start" }); + await extensionRunner.emit({ type: "agent_start" }); } else if (event.type === "agent_end") { - await this._extensionRunner.emit({ type: "agent_end", messages: event.messages }); + this._processingAgentEnd = true; + try { + await extensionRunner.emit({ type: "agent_end", messages: event.messages }); + } finally { + this._processingAgentEnd = false; + } } else if (event.type === "turn_start") { const extensionEvent: TurnStartEvent = { type: "turn_start", turnIndex: this._turnIndex, timestamp: Date.now(), }; - await this._extensionRunner.emit(extensionEvent); + await extensionRunner.emit(extensionEvent); } else if (event.type === "turn_end") { const extensionEvent: TurnEndEvent = { type: "turn_end", @@ -650,27 +692,27 @@ export class AgentSession { message: event.message, toolResults: event.toolResults, }; - await this._extensionRunner.emit(extensionEvent); + await extensionRunner.emit(extensionEvent); this._turnIndex++; } else if (event.type === "message_start") { const extensionEvent: MessageStartEvent = { type: "message_start", message: event.message, }; - await this._extensionRunner.emit(extensionEvent); + await extensionRunner.emit(extensionEvent); } else if (event.type === "message_update") { const extensionEvent: MessageUpdateEvent = { type: "message_update", message: event.message, assistantMessageEvent: event.assistantMessageEvent, }; - await this._extensionRunner.emit(extensionEvent); + await extensionRunner.emit(extensionEvent); } else if (event.type === "message_end") { const extensionEvent: MessageEndEvent = { type: "message_end", message: event.message, }; - await this._extensionRunner.emit(extensionEvent); + await extensionRunner.emit(extensionEvent); } else if (event.type === "tool_execution_start") { const extensionEvent: ToolExecutionStartEvent = { type: "tool_execution_start", @@ -678,7 +720,7 @@ export class AgentSession { toolName: event.toolName, args: event.args, }; - await this._extensionRunner.emit(extensionEvent); + await extensionRunner.emit(extensionEvent); } else if (event.type === "tool_execution_update") { const extensionEvent: ToolExecutionUpdateEvent = { type: "tool_execution_update", @@ -687,7 +729,7 @@ export class AgentSession { args: event.args, partialResult: event.partialResult, }; - await this._extensionRunner.emit(extensionEvent); + await extensionRunner.emit(extensionEvent); } else if (event.type === "tool_execution_end") { const extensionEvent: ToolExecutionEndEvent = { type: "tool_execution_end", @@ -696,7 +738,7 @@ export class AgentSession { result: event.result, isError: event.isError, }; - await this._extensionRunner.emit(extensionEvent); + await extensionRunner.emit(extensionEvent); } } @@ -1424,7 +1466,18 @@ export class AgentSession { this.agent.steer(appMessage); } } else if (options?.triggerTurn) { - await this.agent.prompt(appMessage); + try { + await this.agent.prompt(appMessage); + } catch (error) { + if (!isAgentAlreadyProcessingError(error)) { + throw error; + } + if (options?.deliverAs === "followUp") { + this.agent.followUp(appMessage); + } else { + this.agent.steer(appMessage); + } + } } else { this.agent.appendMessage(appMessage); this.sessionManager.appendCustomMessageEntry( @@ -1540,10 +1593,24 @@ export class AgentSession { // The agent may go idle without emitting agent_end if the abort happens // between tool execution and response processing. if (!this.isStreaming && this._extensionRunner) { - await this._extensionRunner.emit({ - type: "agent_end", - messages: this.agent.state.messages, - }); + const wasProcessingAgentEnd = this._processingAgentEnd; + this._processingAgentEnd = true; + try { + // Track that a session switch started during agent_end: + // _processingQueuedAgentEnd is set by _processAgentEvent for queued + // agent_end emission. If it is still true here, abort() was called + // from a session switch that fired during agent_end handling — + // post-handlers must bail. + if (this._processingQueuedAgentEnd) { + this._sessionTransitionStartedDuringAgentEnd = true; + } + await this._extensionRunner.emit({ + type: "agent_end", + messages: this.agent.state.messages, + }); + } finally { + this._processingAgentEnd = wasProcessingAgentEnd; + } } } @@ -1573,9 +1640,14 @@ export class AgentSession { } } - this._disconnectFromAgent(); - await this.abort(); - this.agent.reset(); + this._sessionSwitchPending = true; + try { + this._disconnectFromAgent(); + await this.abort(); + this.agent.reset(); + } finally { + this._sessionSwitchPending = false; + } // Update cwd to current process directory — auto-mode may have chdir'd // into a worktree since the original session was created. const previousCwd = this._cwd; @@ -2426,8 +2498,13 @@ export class AgentSession { } } - this._disconnectFromAgent(); - await this.abort(); + this._sessionSwitchPending = true; + try { + this._disconnectFromAgent(); + await this.abort(); + } finally { + this._sessionSwitchPending = false; + } this._steeringMessages = []; this._followUpMessages = []; this._pendingNextTurnMessages = []; diff --git a/packages/pi-coding-agent/src/core/keybindings-followup.test.ts b/packages/pi-coding-agent/src/core/keybindings-followup.test.ts new file mode 100644 index 000000000..fd97f52f6 --- /dev/null +++ b/packages/pi-coding-agent/src/core/keybindings-followup.test.ts @@ -0,0 +1,12 @@ +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import test from "node:test"; + +const source = readFileSync(join(process.cwd(), "packages/pi-coding-agent/src/core/keybindings.ts"), "utf-8"); + +test("default follow-up keybinding includes Alt+Enter and Ctrl+Enter", () => { + const followUpDefault = source.match(/followUp:\s*\[([^\]]+)\]/)?.[1] ?? ""; + assert.match(followUpDefault, /"alt\+enter"/); + assert.match(followUpDefault, /"ctrl\+enter"/); +}); diff --git a/packages/pi-coding-agent/src/core/keybindings.ts b/packages/pi-coding-agent/src/core/keybindings.ts index f2df75b76..5ba03c63c 100644 --- a/packages/pi-coding-agent/src/core/keybindings.ts +++ b/packages/pi-coding-agent/src/core/keybindings.ts @@ -63,7 +63,7 @@ const DEFAULT_APP_KEYBINDINGS: Record = { toggleThinking: "ctrl+t", toggleSessionNamedFilter: "ctrl+n", externalEditor: "ctrl+g", - followUp: "alt+enter", + followUp: ["alt+enter", "ctrl+enter"], dequeue: "alt+up", pasteImage: process.platform === "win32" ? "alt+v" : ["ctrl+v", "alt+v"], newSession: [], diff --git a/packages/pi-coding-agent/src/core/slash-commands.ts b/packages/pi-coding-agent/src/core/slash-commands.ts index 05cbb1f5e..bcde22d9b 100644 --- a/packages/pi-coding-agent/src/core/slash-commands.ts +++ b/packages/pi-coding-agent/src/core/slash-commands.ts @@ -38,5 +38,6 @@ export const BUILTIN_SLASH_COMMANDS: ReadonlyArray = [ { name: "thinking", description: "Set thinking level (off/minimal/low/medium/high/xhigh)" }, { name: "edit-mode", description: "Toggle edit mode (standard/hashline)" }, { name: "terminal", description: "Run a shell command directly (e.g. /terminal ping -c3 1.1.1.1)" }, + { name: "stop", description: "Stop the currently running response" }, { name: "quit", description: "Quit pi" }, ]; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts index 283a31f99..d61d92466 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts @@ -14,12 +14,13 @@ function renderTool( isError: boolean; details?: Record; }, + toolDefinition?: { label?: string }, ): string { const component = new ToolExecutionComponent( toolName, args, {}, - undefined, + toolDefinition as any, { requestRender() {} } as any, ); component.setExpanded(true); @@ -48,7 +49,7 @@ function renderToolCollapsed( } describe("ToolExecutionComponent", () => { - test("renders capitalized Claude Code Bash tool names with bash output instead of generic args JSON", () => { + test("renders capitalized adapter Bash tool names with bash output instead of generic args JSON", () => { const rendered = renderTool( "Bash", { command: "pwd" }, @@ -60,7 +61,7 @@ describe("ToolExecutionComponent", () => { assert.doesNotMatch(rendered, /^\{\s*\}$/m); }); - test("renders capitalized Claude Code Read tool names with read output", () => { + test("renders capitalized adapter Read tool names with read output", () => { const rendered = renderTool( "Read", { path: "/tmp/demo.txt" }, @@ -91,7 +92,8 @@ describe("ToolExecutionComponent", () => { { count: 3, enabled: true, label: "hello" }, ); - assert.match(rendered, /some_unknown_tool/); + assert.match(rendered, /Some Unknown Tool/); + assert.doesNotMatch(rendered, /some_unknown_tool/); assert.match(rendered, /count=3/); assert.match(rendered, /enabled=true/); assert.match(rendered, /label="hello"/); @@ -123,4 +125,17 @@ describe("ToolExecutionComponent", () => { assert.match(rendered, /"payload"/); assert.match(rendered, /"nested"/); }); + + test("custom tools without renderers use registered labels instead of raw ids", () => { + const rendered = renderTool( + "sf_plan_milestone", + { milestoneId: "M001" }, + undefined, + { label: "Plan Milestone" }, + ); + + assert.match(rendered, /Tool Plan Milestone/); + assert.match(rendered, /Plan Milestone/); + assert.doesNotMatch(rendered, /sf_plan_milestone/); + }); }); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts index 6620cba1d..17ec0bfcf 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts @@ -602,7 +602,21 @@ export class ToolExecutionComponent extends Container { } } else { // No custom renderCall, show tool name - this.contentBox.addChild(new Text(theme.fg("toolTitle", theme.bold(this.toolName)), 0, 0)); + this.contentBox.addChild( + new Text( + theme.fg( + "toolTitle", + theme.bold( + prettifyToolName( + this.toolName, + this.toolDefinition.label, + ), + ), + ), + 0, + 0, + ), + ); customRendererHasContent = true; } diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts index 293ab188a..4119d028c 100644 --- a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts @@ -23,6 +23,7 @@ function createHost(options: HostOptions = {}) { let editorText = ""; let settingsOpened = 0; let aborts = 0; + const statuses: string[] = []; let pendingDisplayUpdates = 0; let renderRequests = 0; @@ -61,9 +62,11 @@ function createHost(options: HostOptions = {}) { }, }, getSlashCommandContext: () => ({ + session: host.session, showSettingsSelector: () => { settingsOpened += 1; }, + showStatus: host.showStatus, }), handleBashCommand: async () => {}, showWarning(message: string) { @@ -72,6 +75,9 @@ function createHost(options: HostOptions = {}) { showError(message: string) { errors.push(message); }, + showStatus(message: string) { + statuses.push(message); + }, updateEditorBorderColor() {}, isExtensionCommand() { return false; @@ -107,6 +113,7 @@ function createHost(options: HostOptions = {}) { getEditorText: () => editorText, getSettingsOpened: () => settingsOpened, getAborts: () => aborts, + statuses, getPendingDisplayUpdates: () => pendingDisplayUpdates, getRenderRequests: () => renderRequests, }; @@ -140,6 +147,19 @@ test("input-controller: built-in slash commands stay in TUI dispatch", async () ); }); +test("input-controller: /stop aborts the current response", async () => { + const { host, prompted, errors, statuses, getAborts, getEditorText } = + createHost(); + + await host.defaultEditor.onSubmit("/stop"); + + assert.equal(getAborts(), 1); + assert.deepEqual(prompted, []); + assert.deepEqual(errors, []); + assert.deepEqual(statuses, ["Stopped current response."]); + assert.equal(getEditorText(), ""); +}); + test("input-controller: extension slash commands fall through to session.prompt", async () => { const { host, prompted, errors, history } = createHost({ knownSlashCommands: ["sf"], diff --git a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts index 7e943e7f3..203a51bc9 100644 --- a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts +++ b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts @@ -235,6 +235,11 @@ export async function dispatchSlashCommand( await ctx.handleBashCommand(command, { loginShell: true }); return true; } + if (text === "/stop") { + await ctx.session.abort(); + ctx.showStatus("Stopped current response."); + return true; + } return false; } diff --git a/scripts/check-versioned-json.mjs b/scripts/check-versioned-json.mjs index 9ad4ca033..2eb6bbc4e 100644 --- a/scripts/check-versioned-json.mjs +++ b/scripts/check-versioned-json.mjs @@ -1,19 +1,22 @@ #!/usr/bin/env node /** - * Enforce schema/version markers on SF-owned JSON contracts. + * Enforce valid JSON everywhere and schemaVersion markers on SF-owned contracts. * - * This intentionally does not scan ecosystem configuration files such as - * tsconfig.json, package.json, Biome config, or lockfiles. Those files are - * versioned by their owning tools. This check covers JSON that SF owns as - * runtime data, persisted contracts, or generated artifact templates. + * Ecosystem JSON such as package.json, tsconfig.json, lockfiles, and extension + * manifests are parsed for validity but are not treated as SF data contracts. + * Their `version` fields belong to their owning tools or component release + * lifecycle. SF-owned runtime/data contracts use `schemaVersion` for shape + * compatibility. */ import { execFileSync } from "node:child_process"; import { readFileSync } from "node:fs"; -const REQUIRED_PREFIXES = ["src/resources/extensions/sf/"]; -const EXEMPT_SUFFIXES = ["/package.json"]; -const VERSION_KEYS = ["schemaVersion", "version"]; +const CONTRACT_EXACT_PATHS = new Set([ + "src/resources/extensions/sf/workflow-templates/registry.json", +]); + +const CONTRACT_PREFIXES = ["src/resources/extensions/sf/learning/data/"]; function trackedJsonFiles() { try { @@ -31,59 +34,87 @@ function trackedJsonFiles() { } } -function shouldCheck(path) { +export function isSfOwnedJsonContract(path) { return ( - REQUIRED_PREFIXES.some((prefix) => path.startsWith(prefix)) && - !EXEMPT_SUFFIXES.some((suffix) => path.endsWith(suffix)) + CONTRACT_EXACT_PATHS.has(path) || + CONTRACT_PREFIXES.some((prefix) => path.startsWith(prefix)) ); } -function hasOwn(object, key) { +export function hasOwn(object, key) { return Object.prototype.hasOwnProperty.call(object, key); } -function hasVersionMarker(parsed) { +export function getSchemaVersion(parsed) { if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return false; - if (VERSION_KEYS.some((key) => hasOwn(parsed, key))) return true; + if (hasOwn(parsed, "schemaVersion")) return parsed.schemaVersion; const meta = parsed._meta; - return Boolean( - meta && - typeof meta === "object" && - !Array.isArray(meta) && - VERSION_KEYS.some((key) => hasOwn(meta, key)), + if (meta && typeof meta === "object" && !Array.isArray(meta) && hasOwn(meta, "schemaVersion")) { + return meta.schemaVersion; + } + + return undefined; +} + +export function hasValidSchemaVersion(parsed) { + const schemaVersion = getSchemaVersion(parsed); + return ( + typeof schemaVersion === "number" && + Number.isInteger(schemaVersion) && + schemaVersion >= 1 ); } -const failures = []; -let checked = 0; +export function checkJsonPolicy(paths, readText) { + const failures = []; + let contractsChecked = 0; + let filesParsed = 0; -for (const path of trackedJsonFiles()) { - if (!shouldCheck(path)) continue; - checked++; + for (const path of paths) { + filesParsed++; - let parsed; - try { - parsed = JSON.parse(readFileSync(path, "utf8")); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - failures.push(`${path}: invalid JSON (${message})`); - continue; + let parsed; + try { + parsed = JSON.parse(readText(path)); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + failures.push(`${path}: invalid JSON (${message})`); + continue; + } + + if (!isSfOwnedJsonContract(path)) continue; + contractsChecked++; + + if (!hasValidSchemaVersion(parsed)) { + failures.push( + `${path}: missing numeric schemaVersion marker (top-level or _meta)`, + ); + } } - if (!hasVersionMarker(parsed)) { - failures.push( - `${path}: missing schemaVersion/version marker (top-level or _meta)`, - ); - } + return { failures, filesParsed, contractsChecked }; } -if (failures.length > 0) { - console.error("Versioned JSON check failed:"); - for (const failure of failures) { - console.error(` - ${failure}`); +export function run() { + const result = checkJsonPolicy(trackedJsonFiles(), (path) => + readFileSync(path, "utf8"), + ); + + if (result.failures.length > 0) { + console.error("Versioned JSON check failed:"); + for (const failure of result.failures) { + console.error(` - ${failure}`); + } + process.exit(1); } - process.exit(1); + + console.log( + `Versioned JSON check passed (${result.filesParsed} JSON file${result.filesParsed === 1 ? "" : "s"} parsed, ` + + `${result.contractsChecked} SF contract${result.contractsChecked === 1 ? "" : "s"} checked).`, + ); } -console.log(`Versioned JSON check passed (${checked} file${checked === 1 ? "" : "s"}).`); +if (import.meta.url === `file://${process.argv[1]}`) { + run(); +} diff --git a/scripts/check-versioned-json.test.mjs b/scripts/check-versioned-json.test.mjs new file mode 100644 index 000000000..28747c935 --- /dev/null +++ b/scripts/check-versioned-json.test.mjs @@ -0,0 +1,54 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { + checkJsonPolicy, + hasValidSchemaVersion, + isSfOwnedJsonContract, +} from "./check-versioned-json.mjs"; + +test("check-versioned-json: parses every JSON file", () => { + const files = { + "package.json": '{"version":"1.0.0"}', + "src/resources/extensions/sf/learning/data/model-benchmarks.json": "{bad", + }; + + const result = checkJsonPolicy(Object.keys(files), (path) => files[path]); + + assert.equal(result.failures.length, 1); + assert.match( + result.failures[0], + /^src\/resources\/extensions\/sf\/learning\/data\/model-benchmarks\.json: invalid JSON/, + ); + assert.equal(result.filesParsed, 2); +}); + +test("check-versioned-json: requires numeric schemaVersion for SF contracts", () => { + const files = { + "src/resources/extensions/sf/learning/data/unit-weights.json": + '{"_meta":{"schemaVersion":1}}', + "src/resources/extensions/sf/workflow-templates/registry.json": + '{"schemaVersion":1,"templates":{}}', + "src/resources/extensions/sf/learning/data/model-benchmarks.json": + '{"_meta":{"version":"1"}}', + "src/resources/extensions/sf/extension-manifest.json": + '{"version":"1.0.0"}', + }; + + const result = checkJsonPolicy(Object.keys(files), (path) => files[path]); + + assert.deepEqual(result.failures, [ + "src/resources/extensions/sf/learning/data/model-benchmarks.json: missing numeric schemaVersion marker (top-level or _meta)", + ]); + assert.equal(result.contractsChecked, 3); +}); + +test("check-versioned-json: treats extension version as component version", () => { + assert.equal( + isSfOwnedJsonContract("src/resources/extensions/sf/extension-manifest.json"), + false, + ); + assert.equal(hasValidSchemaVersion({ version: "1.0.0" }), false); + assert.equal(hasValidSchemaVersion({ schemaVersion: 1 }), true); + assert.equal(hasValidSchemaVersion({ _meta: { schemaVersion: 1 } }), true); +}); diff --git a/src/cli-web-branch.ts b/src/cli-web-branch.ts index 035435b03..ceb462d9d 100644 --- a/src/cli-web-branch.ts +++ b/src/cli-web-branch.ts @@ -45,6 +45,8 @@ export interface CliFlags { /** Set by `sf sessions` when the user picks a specific session to resume */ _selectedSessionPath?: string; + /** `sf sessions --all` — list sessions across all projects */ + allSessions?: boolean; } type WritableLike = Pick; @@ -82,6 +84,8 @@ export function parseCliArgs(argv: string[]): CliFlags { flags.continue = true; } else if (arg === "--no-session") { flags.noSession = true; + } else if (arg === "--all" || arg === "-a") { + flags.allSessions = true; } else if (arg === "--worktree" || arg === "-w") { // -w with no value → auto-generate name; -w → use that name if (i + 1 < args.length && !args[i + 1].startsWith("-")) { diff --git a/src/cli.ts b/src/cli.ts index dcb42f7e0..1226a1daa 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -459,20 +459,27 @@ if ( // `sf sessions` — list past sessions and pick one to resume if (cliFlags.messages[0] === "sessions") { const cwd = process.cwd(); - const safePath = `--${cwd.replace(/^[/\\]/, "").replace(/[/\\:]/g, "-")}--`; - const projectSessionsDir = join(sessionsDir, safePath); - process.stderr.write(chalk.dim(`Loading sessions for ${cwd}...\n`)); - const sessions = await SessionManager.list(cwd, projectSessionsDir); + let sessions; + if (cliFlags.allSessions) { + process.stderr.write(chalk.dim("Loading all sessions across all projects...\n")); + sessions = await SessionManager.listAll(); + } else { + const safePath = `--${cwd.replace(/^[/\\]/, "").replace(/[/\\:]/g, "-")}--`; + const projectSessionsDir = join(sessionsDir, safePath); + process.stderr.write(chalk.dim(`Loading sessions for ${cwd}...\n`)); + sessions = await SessionManager.list(cwd, projectSessionsDir); + } if (sessions.length === 0) { process.stderr.write( - chalk.yellow("No sessions found for this directory.\n"), + chalk.yellow("No sessions found.\n"), ); process.exit(0); } - process.stderr.write(chalk.bold(`\n Sessions (${sessions.length}):\n\n`)); + const label = cliFlags.allSessions ? "all projects" : cwd; + process.stderr.write(chalk.bold(`\n Sessions (${sessions.length}) for ${label}:\n\n`)); const maxShow = 20; const toShow = sessions.slice(0, maxShow); @@ -485,8 +492,11 @@ if (cliFlags.messages[0] === "sessions") { ? s.firstMessage.replace(/\n/g, " ").substring(0, 80) : chalk.dim("(empty)"); const num = String(i + 1).padStart(3); + const projectLabel = cliFlags.allSessions && s.cwd + ? ` ${chalk.yellow(`[${s.cwd}]`)}` + : ""; process.stderr.write( - ` ${chalk.bold(num)}. ${chalk.green(date)} ${chalk.dim(`(${msgs} msgs)`)}${name}\n`, + ` ${chalk.bold(num)}. ${chalk.green(date)} ${chalk.dim(`(${msgs} msgs)`)}${name}${projectLabel}\n`, ); process.stderr.write(` ${chalk.dim(preview)}\n\n`); } @@ -562,11 +572,15 @@ async function runHeadlessFromAuto(headlessArgs: string[]): Promise { process.exit(0); } -// `sf auto [args...]` — shorthand for `sf headless auto [args...]` (#2732) -// Without this, `sf auto` falls through to the interactive TUI which hangs +// `sf autonomous [args...]` / `sf auto [args...]` — shorthand for headless +// autonomous mode (#2732). Without this, the command falls through to the TUI // when stdin/stdout are piped (non-TTY environments). -if (cliFlags.messages[0] === "auto") { - await runHeadlessFromAuto(cliFlags.messages); +if (cliFlags.messages[0] === "auto" || cliFlags.messages[0] === "autonomous") { + const headlessArgs = + cliFlags.messages[0] === "autonomous" + ? ["auto", ...cliFlags.messages.slice(1)] + : cliFlags.messages; + await runHeadlessFromAuto(headlessArgs); } // Pi's tool bootstrap can mis-detect already-installed fd/rg on some systems @@ -829,16 +843,24 @@ if (!cliFlags.worktree && !isPrintMode) { } // --------------------------------------------------------------------------- -// Auto-redirect: `sf auto` with piped stdout → headless mode (#2732) +// Auto-redirect: autonomous mode with piped stdout → headless mode (#2732) // When stdout is not a TTY (e.g. `sf auto | cat`, `sf auto > file`), // the TUI cannot render and the process hangs. Redirect to headless mode // which handles non-interactive output gracefully. // --------------------------------------------------------------------------- -if (cliFlags.messages[0] === "auto" && !process.stdout.isTTY) { +if ( + (cliFlags.messages[0] === "auto" || + cliFlags.messages[0] === "autonomous") && + !process.stdout.isTTY +) { process.stderr.write( - "[forge] stdout is not a terminal — running auto-mode in headless mode.\n", + "[forge] stdout is not a terminal — running autonomous mode in headless mode.\n", ); - await runHeadlessFromAuto(cliFlags.messages.slice(1)); + const headlessArgs = + cliFlags.messages[0] === "autonomous" + ? ["auto", ...cliFlags.messages.slice(1)] + : cliFlags.messages; + await runHeadlessFromAuto(headlessArgs); } // --------------------------------------------------------------------------- diff --git a/src/headless-ui.ts b/src/headless-ui.ts index 0c7cda6a7..69e0618e0 100644 --- a/src/headless-ui.ts +++ b/src/headless-ui.ts @@ -470,6 +470,18 @@ export function formatThinkingLine(text: string): string { return `${c.dim}${c.italic}${tag("thinking")}${truncated}${c.reset}`; } +/** + * Format a text preview line from accumulated assistant text deltas. + * Used as a fallback when streaming is not enabled — shows a truncated one-liner. + * Unlike thinking, text is NOT italicized. + */ +export function formatTextLine(text: string): string { + const trimmed = text.replace(/\s+/g, " ").trim(); + const truncated = + trimmed.length > 120 ? trimmed.slice(0, 117) + "..." : trimmed; + return `${c.dim}${tag("text")}${truncated}${c.reset}`; +} + // --------------------------------------------------------------------------- // Streaming Text / Thinking Formatters // --------------------------------------------------------------------------- diff --git a/src/headless.ts b/src/headless.ts index 9cb22d568..036dbe5c4 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -59,6 +59,7 @@ import { formatHeadlessHeartbeat, formatProgress, formatTextEnd, + formatTextLine, formatTextStart, formatThinkingEnd, formatThinkingLine, @@ -217,6 +218,7 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { }; const args = argv.slice(2); + let commandSeen = false; for (let i = 0; i < args.length; i++) { const arg = args[i]; @@ -297,8 +299,9 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { } else if (arg === "--bare") { options.bare = true; } - } else if (options.command === "auto") { - options.command = arg; + } else if (!commandSeen) { + options.command = arg === "autonomous" ? "auto" : arg; + commandSeen = true; } else { options.commandArgs.push(arg); } @@ -684,6 +687,7 @@ async function runHeadlessOnce( | { costUsd: number; inputTokens: number; outputTokens: number } | undefined; let thinkingBuffer = ""; + let textBuffer = ""; // Drop only adjacent identical formatProgress output. A widget that // re-emits the same setStatus on every LLM call would otherwise print // the same line N times in a row. Two different lines still both show; @@ -1179,7 +1183,7 @@ async function runHeadlessOnce( } // Non-verbose: accumulate text_delta for truncated one-liner else if (ame?.type === "text_delta") { - thinkingBuffer += String(ame.delta ?? ame.text ?? ""); + textBuffer += String(ame.delta ?? ame.text ?? ""); } } @@ -1203,14 +1207,19 @@ async function runHeadlessOnce( inThinkingBlock = false; } } - // Non-verbose: flush accumulated buffer as truncated one-liner + // Non-verbose: flush accumulated buffers as truncated one-liners else if ( !options.verbose && - thinkingBuffer.trim() && (eventType === "tool_execution_start" || eventType === "message_end") ) { - writeHeadlessLine(formatThinkingLine(thinkingBuffer)); - thinkingBuffer = ""; + if (textBuffer.trim()) { + writeHeadlessLine(formatTextLine(textBuffer)); + textBuffer = ""; + } + if (thinkingBuffer.trim()) { + writeHeadlessLine(formatThinkingLine(thinkingBuffer)); + thinkingBuffer = ""; + } } // Compute tool duration for tool_execution_end @@ -1550,7 +1559,8 @@ async function runHeadlessOnce( await completionPromise; } - // Auto-mode chaining: if --auto and milestone creation succeeded, send /sf auto + // Autonomous-mode chaining: if --auto and milestone creation succeeded, + // send the canonical autonomous command. if ( isNewMilestone && options.auto && @@ -1560,7 +1570,7 @@ async function runHeadlessOnce( ) { if (!options.json) { process.stderr.write( - "[headless] Milestone ready — chaining into auto-mode...\n", + "[headless] Milestone ready — chaining into autonomous mode...\n", ); } @@ -1575,10 +1585,10 @@ async function runHeadlessOnce( }); try { - await client.prompt("/sf auto"); + await client.prompt("/sf autonomous"); } catch (err) { process.stderr.write( - `[headless] Error: Failed to start auto-mode: ${err instanceof Error ? err.message : String(err)}\n`, + `[headless] Error: Failed to start autonomous mode: ${err instanceof Error ? err.message : String(err)}\n`, ); exitCode = EXIT_ERROR; } diff --git a/src/help-text.ts b/src/help-text.ts index 3467faaff..4ad7dc19e 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -24,13 +24,16 @@ const SUBCOMMAND_HELP: Record = { sessions: [ "Usage: sf sessions", + " sf sessions --all", "", - "List all saved sessions for the current directory and interactively", - "pick one to resume. Shows date, message count, and a preview of the", - "first message for each session.", + "List saved sessions and interactively pick one to resume. Shows date,", + "message count, and a preview of the first message for each session.", "", - "Sessions are stored per-directory, so you only see sessions that were", - "started from the current working directory.", + "Sessions are stored per-directory by default. Use --all to list sessions", + "across all projects.", + "", + " sf sessions List sessions for the current directory", + " sf sessions --all List sessions across all projects", "", "Compare with --continue (-c) which always resumes the most recent session.", ].join("\n"), @@ -155,7 +158,7 @@ const SUBCOMMAND_HELP: Record = { headless: [ "Usage: sf headless [flags] [command] [args...]", "", - "Run /sf commands without the TUI. Default command: auto", + "Run /sf commands without the TUI. Default command: autonomous", "", "Flags:", " --timeout N Overall timeout in ms (default: 300000)", @@ -170,7 +173,8 @@ const SUBCOMMAND_HELP: Record = { " --events Filter JSONL output to specific event types (comma-separated)", "", "Commands:", - " auto Run all queued units continuously (default)", + " autonomous Run all queued product units continuously (default)", + " auto Alias for autonomous", " next Run one unit", " status Show progress dashboard", " new-milestone Create a milestone from a specification document", @@ -179,7 +183,7 @@ const SUBCOMMAND_HELP: Record = { "new-milestone flags:", " --context Path to spec/PRD file (use '-' for stdin)", " --context-text Inline specification text", - " --auto Start auto-mode after milestone creation", + " --auto Start autonomous mode after milestone creation", " --verbose Show tool calls in progress output", "", "Output formats:", @@ -188,19 +192,19 @@ const SUBCOMMAND_HELP: Record = { " stream-json Stream JSONL events to stdout in real time (same as --json)", "", "Examples:", - " sf headless Run /sf auto", + " sf headless Run /sf autonomous", " sf headless next Run one unit", - " sf headless --output-format json auto Structured JSON result on stdout", + " sf headless --output-format json autonomous Structured JSON result on stdout", " sf headless --json status Machine-readable JSONL stream", " sf headless --timeout 60000 With 1-minute timeout", - " sf headless --bare auto Minimal context (CI/ecosystem use)", - " sf headless --resume abc123 auto Resume a prior session", + " sf headless --bare autonomous Minimal context (CI/ecosystem use)", + " sf headless --resume abc123 autonomous Resume a prior session", " sf headless new-milestone --context spec.md Create milestone from file", " cat spec.md | sf headless new-milestone --context - From stdin", " sf headless new-milestone --context spec.md --auto Create + auto-execute", - " sf headless --supervised auto Supervised orchestrator mode", - " sf headless --answers answers.json auto With pre-supplied answers", - " sf headless --events agent_end,extension_ui_request auto Filtered event stream", + " sf headless --supervised autonomous Supervised orchestrator mode", + " sf headless --answers answers.json autonomous With pre-supplied answers", + " sf headless --events agent_end,extension_ui_request autonomous Filtered event stream", " sf headless query Instant JSON state snapshot", "", "Exit codes: 0 = success, 1 = error/timeout, 10 = blocked, 11 = cancelled", @@ -269,7 +273,10 @@ export function printHelp(version: string): void { " worktree Manage worktrees (list, merge, clean, remove)\n", ); process.stdout.write( - " auto [args] Run auto-mode without TUI (pipeable)\n", + " autonomous [args] Run autonomous mode without TUI (pipeable)\n", + ); + process.stdout.write( + " auto [args] Alias for autonomous\n", ); process.stdout.write( " headless [cmd] [args] Run /sf commands without TUI (default: auto)\n", diff --git a/src/resources/agents/scout.md b/src/resources/agents/scout.md index f8c484ef3..f606eb68f 100644 --- a/src/resources/agents/scout.md +++ b/src/resources/agents/scout.md @@ -1,11 +1,15 @@ --- name: scout description: Fast codebase recon that returns compressed context for handoff to other agents -tools: read, grep, find, ls, bash +tools: read, grep, find, ls, bash, codebase_search --- You are a scout. Quickly investigate a codebase and return structured findings that another agent can use without re-reading everything. +Use in-process `grep`, `find`, `ls`, and `lsp` before shelling out. These keep exploration inside SF's tool surface and use native backends where available. + +`codebase_search` is the Sift-backed local retrieval tool. Use it when exact text search is too literal, when the relevant file path is unknown, or when you need hybrid BM25/vector/path evidence before reading files. You are still the scout role; Sift is one tool you can use. + Your output will be passed to an agent who has NOT seen the files you explored. Thoroughness (infer from task, default medium): @@ -16,7 +20,7 @@ Thoroughness (infer from task, default medium): Strategy: -1. grep/find to locate relevant code +1. Use `grep`, `find`, `ls`, and `lsp` to locate relevant code 2. Read key sections (not entire files) 3. Identify types, interfaces, key functions 4. Note dependencies between files diff --git a/src/resources/agents/worker.md b/src/resources/agents/worker.md index e319213df..00bf5f5d1 100644 --- a/src/resources/agents/worker.md +++ b/src/resources/agents/worker.md @@ -9,7 +9,7 @@ Work autonomously to complete the assigned task. Use all available tools as need - Do **not** spawn subagents or act as an orchestrator unless the parent task explicitly instructs you to do so. - If the task looks like SF orchestration, planning, scouting, parallel dispatch, or review routing, stop and report that the caller should use the appropriate specialist agent instead (for example: `sf-worker`, `sf-scout`, `sf-reviewer`, or the top-level orchestrator). -- In particular, do **not** call `sf_scout`, `subagent`, `launch_parallel_view`, or `sf_execute_parallel` on your own initiative. +- In particular, do **not** call `subagent`, `launch_parallel_view`, or `sf_execute_parallel` on your own initiative. Output format when finished: diff --git a/src/resources/extensions/claude-code-cli/stream-adapter.ts b/src/resources/extensions/claude-code-cli/stream-adapter.ts index 103fbe43d..0052d545f 100644 --- a/src/resources/extensions/claude-code-cli/stream-adapter.ts +++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts @@ -1155,6 +1155,20 @@ export function createClaudeCodeCanUseToolHandler( }, ]; } + } else if (!perms || (Array.isArray(perms) && perms.length === 0)) { + // Non-Bash tool with no SDK-supplied suggestions. Without a + // fallback rule the SDK would return `behavior: "allow"` + // with no `updatedPermissions`, so "Always Allow" silently + // fails to persist for tools whose input varies per call + // (e.g. AskUserQuestion with different `questions` payloads). + // A bare `{ toolName }` rule matches any input. + perms = [{ + type: "addRules", + rules: [{ toolName }], + behavior: "allow", + destination: "localSettings", + }]; + notifyLabel = toolName; } // Notify with the resolved pattern (label already previewed it) if (notifyLabel) { diff --git a/src/resources/extensions/search-the-web/native-search.ts b/src/resources/extensions/search-the-web/native-search.ts index 1f47d1ecb..1b88e9a40 100644 --- a/src/resources/extensions/search-the-web/native-search.ts +++ b/src/resources/extensions/search-the-web/native-search.ts @@ -30,6 +30,18 @@ const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]); */ export const MAX_NATIVE_SEARCHES_PER_SESSION = 15; +/** + * Returns true when the provider supports native Anthropic web_search injection. + * + * Purpose: github-copilot, minimax, and kimi use Claude-compatible wire format + * but do NOT support the web_search tool — injecting it causes a 400 error. + * The `claude-` model-name prefix heuristic is too broad (those providers also + * use claude-* names). Only the explicit "anthropic" provider tag is trusted. + */ +export function supportsNativeWebSearch(provider: string): boolean { + return provider === "anthropic"; +} + /** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */ export function preferBraveSearch(): boolean { // PREFERENCES.md takes priority over env var @@ -183,8 +195,12 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { } else if (modelSelectFired) { isAnthropic = isAnthropicProvider; } else { - const modelName = typeof payload.model === "string" ? payload.model : ""; - isAnthropic = modelName.startsWith("claude-"); + // No provider info available and no model_select event fired. + // Without a confirmed provider, skip native web_search injection entirely + // rather than relying on the too-broad `claude-` prefix heuristic + // (github-copilot/minimax/kimi also use claude-* names but do not + // support web_search, causing 400 errors on injection). + isAnthropic = false; } if (!isAnthropic) return; diff --git a/src/resources/extensions/sf-tui/footer.ts b/src/resources/extensions/sf-tui/footer.ts index 0371114bf..daecbeba7 100644 --- a/src/resources/extensions/sf-tui/footer.ts +++ b/src/resources/extensions/sf-tui/footer.ts @@ -30,8 +30,35 @@ function hexToRgb(hex: string): { r: number; g: number; b: number } { } function ansiFg(hex: string, text: string, bold = false): string { + // Use 16-color ANSI codes for Termius compatibility + // Map hex colors to nearest standard ANSI color const { r, g, b } = hexToRgb(hex); - return `\x1b[${bold ? "1;" : ""}38;2;${r};${g};${b}m${text}${RESET}`; + const brightness = (r + g + b) / 3; + + let colorCode: number; + if (brightness < 50) { + colorCode = 30; // black + } else if (brightness < 100) { + colorCode = 90; // bright black + } else if (r > g + b) { + colorCode = bold ? 91 : 31; // red + } else if (g > r + b) { + colorCode = bold ? 92 : 32; // green + } else if (b > r + g) { + colorCode = bold ? 94 : 34; // blue + } else if (r > 200 && g > 150) { + colorCode = bold ? 93 : 33; // yellow/orange + } else if (r > 200 && g < 100 && b > 150) { + colorCode = bold ? 95 : 35; // magenta + } else if (g > 200 && b > 150) { + colorCode = bold ? 96 : 36; // cyan + } else if (brightness > 200) { + colorCode = bold ? 97 : 37; // white + } else { + colorCode = bold ? 97 : 37; // default white + } + + return `\x1b[${bold ? "1;" : ""}${colorCode}m${text}${RESET}`; } function toneHex(tone: Tone): string { diff --git a/src/resources/extensions/sf/auto-direct-dispatch.ts b/src/resources/extensions/sf/auto-direct-dispatch.ts index 99746c13f..b800b1922 100644 --- a/src/resources/extensions/sf/auto-direct-dispatch.ts +++ b/src/resources/extensions/sf/auto-direct-dispatch.ts @@ -81,7 +81,7 @@ export async function dispatchDirectPhase( ?.require_slice_discussion; if (requireDiscussion && !sliceContextFile) { ctx.ui.notify( - `Slice ${sid} requires discussion before planning. Run /sf discuss to discuss this slice, then /sf auto to resume.`, + `Slice ${sid} requires discussion before planning. Run /sf discuss to discuss this slice, then /sf autonomous to resume.`, "info", ); await pauseAuto(ctx, pi); diff --git a/src/resources/extensions/sf/auto-post-unit.ts b/src/resources/extensions/sf/auto-post-unit.ts index 16b1e1fd3..63413cd7e 100644 --- a/src/resources/extensions/sf/auto-post-unit.ts +++ b/src/resources/extensions/sf/auto-post-unit.ts @@ -308,7 +308,7 @@ export function detectRogueFileWrites( } export const STEP_COMPLETE_FALLBACK_MESSAGE = - "Step complete. Run /clear, then /sf to continue (or /sf auto to run continuously)."; + "Step complete. Run /clear, then /sf to continue (or /sf autonomous to run continuously)."; export function buildStepCompleteMessage( nextState: import("./types.js").SFState, @@ -319,7 +319,7 @@ export function buildStepCompleteMessage( const next = describeNextUnit(nextState); return ( `Step complete. Next: ${next.label}\n` + - `Run /clear, then /sf to continue (or /sf auto to run continuously).` + `Run /clear, then /sf to continue (or /sf autonomous to run continuously).` ); } @@ -913,7 +913,7 @@ export async function postUnitPreVerification( if (err instanceof MergeConflictError) { ctx.ui.notify( `slice-cadence merge conflict in ${sid}: ${err.conflictedFiles.join(", ")}. ` + - `Resolve manually on main and run \`/sf auto\` to resume.`, + `Resolve manually on main and run \`/sf autonomous\` to resume.`, "error", ); // Stop auto AND signal the outer postUnit flow to exit early. @@ -1289,7 +1289,7 @@ export async function postUnitPreVerification( s.verificationRetryCount.delete(retryKey); s.pendingVerificationRetry = null; ctx.ui.notify( - `Milestone ${s.currentUnit.id} verification failed after ${MAX_VERIFICATION_RETRIES} retries — worktree branch preserved. Re-run /sf auto once blockers are resolved.`, + `Milestone ${s.currentUnit.id} verification failed after ${MAX_VERIFICATION_RETRIES} retries — worktree branch preserved. Re-run /sf autonomous once blockers are resolved.`, "error", ); await pauseAuto(ctx, pi); diff --git a/src/resources/extensions/sf/auto-start.ts b/src/resources/extensions/sf/auto-start.ts index 77844b32f..e4faeae46 100644 --- a/src/resources/extensions/sf/auto-start.ts +++ b/src/resources/extensions/sf/auto-start.ts @@ -255,9 +255,9 @@ export function auditOrphanedMilestoneBranches( ? ` Worktree directory at .sf/worktrees/${milestoneId}/ holds the live work.` : ""; warnings.push( - `Branch ${branch} has ${commitsAhead} commit(s) ahead of ${mainBranch} for in-progress milestone ${milestoneId}.` + + `Branch ${branch} has ${commitsAhead} commit(s) ahead of ${mainBranch} for in-progress milestone ${milestoneId}.` + wtSuffix + - ` Run \`/sf auto\` to resume, or merge manually if abandoning.`, + ` Run \`/sf autonomous\` to resume, or merge manually if abandoning.`, ); // #4764 telemetry diff --git a/src/resources/extensions/sf/auto-timeout-recovery.ts b/src/resources/extensions/sf/auto-timeout-recovery.ts index 64d744a02..afa0c3d72 100644 --- a/src/resources/extensions/sf/auto-timeout-recovery.ts +++ b/src/resources/extensions/sf/auto-timeout-recovery.ts @@ -347,7 +347,7 @@ export async function recoverTimedOutUnit( lastRecoveryReason: reason, }); ctx.ui.notify( - `Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s): ${diagnostic}. Worktree branch preserved. Re-run /sf auto once blockers are resolved.`, + `Milestone ${unitId} ${reason}-recovery exhausted ${maxRecoveryAttempts} attempt(s): ${diagnostic}. Worktree branch preserved. Re-run /sf autonomous once blockers are resolved.`, "error", ); return "paused"; diff --git a/src/resources/extensions/sf/auto.ts b/src/resources/extensions/sf/auto.ts index 480119de7..5998f1d80 100644 --- a/src/resources/extensions/sf/auto.ts +++ b/src/resources/extensions/sf/auto.ts @@ -1255,9 +1255,9 @@ export async function pauseAuto( ctx?.ui.setWidget("sf-progress", undefined); ctx?.ui.setFooter(undefined); if (ctx) initHealthWidget(ctx); - const resumeCmd = s.stepMode ? "/sf next" : "/sf auto"; + const resumeCmd = s.stepMode ? "/sf next" : "/sf autonomous"; ctx?.ui.notify( - `${s.stepMode ? "Step" : "Auto"}-mode paused (Escape). Type to interact, or ${resumeCmd} to resume.`, + `${s.stepMode ? "Step" : "Autonomous"} mode paused (Escape). Type to interact, or ${resumeCmd} to resume.`, "info", ); } diff --git a/src/resources/extensions/sf/auto/loop.ts b/src/resources/extensions/sf/auto/loop.ts index dfc4f1af4..0491899aa 100644 --- a/src/resources/extensions/sf/auto/loop.ts +++ b/src/resources/extensions/sf/auto/loop.ts @@ -8,7 +8,7 @@ */ import { randomUUID } from "node:crypto"; -import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import type { ExtensionAPI, @@ -95,6 +95,85 @@ function saveStuckState(basePath: string, state: LoopState): void { } } +// ── Custom workflow verification retry persistence ─────────────────────── +// Custom workflow verifiers can request a retry after a step runs. Persisting +// retry counts under the run directory prevents restart loops from resetting the +// retry budget and repeatedly dispatching the same failing step. +const MAX_CUSTOM_ENGINE_VERIFY_RETRIES = 3; + +function customVerifyRetryStateDir(s: { + activeRunDir?: string | null; + basePath: string; +}): string { + return s.activeRunDir + ? join(s.activeRunDir, "runtime") + : join(sfRoot(s.basePath), "runtime"); +} + +function customVerifyRetryStatePath(s: { + activeRunDir?: string | null; + basePath: string; +}): string { + return join(customVerifyRetryStateDir(s), "custom-verify-retries.json"); +} + +function hydrateCustomVerifyRetryCounts(s: AutoSession): Map { + if (s.verificationRetryCount.size > 0) { + return s.verificationRetryCount; + } + + try { + const raw = JSON.parse(readFileSync(customVerifyRetryStatePath(s), "utf-8")); + const counts = + raw && typeof raw === "object" && raw.counts && typeof raw.counts === "object" + ? (raw.counts as Record) + : {}; + for (const [key, value] of Object.entries(counts)) { + if (typeof value === "number" && Number.isFinite(value) && value > 0) { + s.verificationRetryCount.set(key, Math.floor(value)); + } + } + } catch (err) { + debugLog("autoLoop", { + phase: "load-custom-verify-retries-failed", + error: err instanceof Error ? err.message : String(err), + }); + } + + return s.verificationRetryCount; +} + +function saveCustomVerifyRetryCounts(s: AutoSession): void { + const retryCounts = s.verificationRetryCount; + const filePath = customVerifyRetryStatePath(s); + + try { + if (retryCounts.size === 0) { + unlinkSync(filePath); + return; + } + mkdirSync(customVerifyRetryStateDir(s), { recursive: true }); + writeFileSync( + filePath, + JSON.stringify({ + counts: Object.fromEntries(retryCounts), + updatedAt: new Date().toISOString(), + }) + "\n", + ); + } catch (err) { + const code = + err && typeof err === "object" && "code" in err + ? (err as { code?: string }).code + : undefined; + if (code !== "ENOENT") { + debugLog("autoLoop", { + phase: "save-custom-verify-retries-failed", + error: err instanceof Error ? err.message : String(err), + }); + } + } +} + // ── Memory pressure monitoring (#3331) ────────────────────────────────── // Check heap usage every N iterations and trigger graceful shutdown before // the OS OOM killer sends SIGKILL. The threshold is 90% of the V8 heap @@ -281,7 +360,7 @@ export async function autoLoop( pi, `Memory pressure: heap at ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%). ` + `Stopping gracefully to prevent OOM kill after ${iteration} iterations. ` + - `Resume with /sf auto to continue from where you left off.`, + `Resume with /sf autonomous to continue from where you left off.`, ); finishTurn("stopped", "timeout", "memory-pressure"); break; @@ -508,20 +587,75 @@ export async function autoLoop( break; } if (verifyResult === "retry") { + const recoveryKey = `${iterData.unitType}/${iterData.unitId}`; + const retryCounts = hydrateCustomVerifyRetryCounts(s); + const attempts = (retryCounts.get(recoveryKey) ?? 0) + 1; + retryCounts.set(recoveryKey, attempts); + saveCustomVerifyRetryCounts(s); debugLog("autoLoop", { phase: "custom-engine-verify-retry", iteration, unitId: iterData.unitId, + attempts, }); deps.uokObserver?.onPhaseResult("custom-engine", "retry", { unitType: iterData.unitType, unitId: iterData.unitId, + attempts, }); + if (attempts > MAX_CUSTOM_ENGINE_VERIFY_RETRIES) { + const recovery = await policy.recover( + iterData.unitType, + iterData.unitId, + { basePath: s.basePath }, + ); + if (recovery.outcome === "pause") { + await deps.pauseAuto(ctx, pi); + finishTurn( + "paused", + "manual-attention", + recovery.reason ?? "custom-engine-verify-retry-exhausted", + ); + break; + } + if (recovery.outcome === "skip") { + await deps.stopAuto( + ctx, + pi, + recovery.reason ?? + `Custom workflow verification for ${iterData.unitId} requested skip after retry exhaustion, but the custom engine cannot reconcile skipped steps.`, + ); + finishTurn( + "stopped", + "manual-attention", + "custom-engine-verify-retry-exhausted", + ); + break; + } + const exhaustedReason = `Custom workflow verification for ${iterData.unitId} requested retry ${attempts} times without passing.`; + await deps.stopAuto( + ctx, + pi, + recovery.outcome === "stop" && recovery.reason + ? recovery.reason + : exhaustedReason, + ); + finishTurn( + "stopped", + "manual-attention", + "custom-engine-verify-retry-exhausted", + ); + break; + } finishTurn("retry"); continue; } // Verification passed — mark step complete + s.verificationRetryCount.delete( + `${iterData.unitType}/${iterData.unitId}`, + ); + saveCustomVerifyRetryCounts(s); debugLog("autoLoop", { phase: "custom-engine-reconcile", iteration, diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts index efc92cb4a..b54f8597d 100644 --- a/src/resources/extensions/sf/auto/phases.ts +++ b/src/resources/extensions/sf/auto/phases.ts @@ -78,6 +78,10 @@ import { logError, logWarning, } from "../workflow-logger.js"; +import { + handleProductAudit, + type ProductAuditParams, +} from "../tools/product-audit-tool.js"; import { getRequiredWorkflowToolsForAutoUnit, getWorkflowTransportSupportError, @@ -115,6 +119,53 @@ export function _resolveReportBasePath( return s.originalBasePath || s.basePath; } +/** + * Fire the product-audit for a milestone after successful merge. + * Uses s.productAuditMilestoneId as a guard to ensure the audit fires exactly + * once per milestone (mergeAndExit can be called multiple times for the same + * milestone at different transition points). + * + * The audit is fired with a "no-gaps" placeholder verdict. Re-run + * `/sf product-audit` manually for full LLM-powered gap analysis. + */ +async function maybeFireProductAudit( + s: AutoSession, + ctx: ExtensionContext, +): Promise { + const mid = s.currentMilestoneId; + if (!mid) return; + + // Guard: only fire once per milestone + if (s.productAuditMilestoneId === mid) return; + s.productAuditMilestoneId = mid; + + const params: ProductAuditParams = { + milestoneId: mid, + verdict: "no-gaps", + summary: + "Auto-fired placeholder audit at milestone merge. Re-run `/sf product-audit` for full LLM-powered gap analysis.", + gaps: [], + }; + + const result = await handleProductAudit(params, s.basePath); + if ("error" in result) { + logWarning("engine", "Product audit auto-fire failed", { + milestone: mid, + error: result.error, + }); + ctx.ui.notify( + `Product audit for ${mid} auto-fired but may need manual refresh: ${result.error}`, + "warning", + ); + } else { + debugLog("autoLoop", { + phase: "product-audit-fired", + milestone: mid, + jsonPath: result.jsonPath, + }); + } +} + function clearDeferredCommitAfterCancelledUnit( s: AutoSession, ctx: ExtensionContext, @@ -696,7 +747,7 @@ export async function runPreDispatch( if (mergeErr instanceof MergeConflictError) { // Real code conflicts — stop the loop instead of retrying forever (#2330) ctx.ui.notify( - `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf auto to resume.`, + `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`, "error", ); await deps.stopAuto( @@ -712,7 +763,7 @@ export async function runPreDispatch( error: String(mergeErr), }); ctx.ui.notify( - `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf auto to resume.`, + `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`, "error", ); await deps.stopAuto( @@ -723,6 +774,9 @@ export async function runPreDispatch( return { action: "break", reason: "merge-failed" }; } + // Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId) + await maybeFireProductAudit(s, ctx); + // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302) deps.invalidateAllCaches(); @@ -815,10 +869,12 @@ export async function runPreDispatch( deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui); // Prevent stopAuto from attempting the same merge (#2645) s.milestoneMergedInPhases = true; + // Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId) + await maybeFireProductAudit(s, ctx); } catch (mergeErr) { if (mergeErr instanceof MergeConflictError) { ctx.ui.notify( - `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf auto to resume.`, + `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`, "error", ); await deps.stopAuto( @@ -833,7 +889,7 @@ export async function runPreDispatch( error: String(mergeErr), }); ctx.ui.notify( - `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf auto to resume.`, + `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`, "error", ); await deps.stopAuto( @@ -869,12 +925,12 @@ export async function runPreDispatch( ); } else if (state.phase === "blocked") { const blockerMsg = `Blocked: ${state.blockers.join(", ")}`; - // Pause instead of hard-stop so the session is resumable with `/sf auto`. + // Pause instead of hard-stop so the session is resumable with `/sf autonomous`. // Hard-stop here was causing premature termination when slice dependencies // were temporarily unresolvable (e.g. after reassessment added new slices). await deps.pauseAuto(ctx, pi); ctx.ui.notify( - `${blockerMsg}. Fix and run /sf auto to resume.`, + `${blockerMsg}. Fix and run /sf autonomous to resume.`, "warning", ); deps.sendDesktopNotification( @@ -954,10 +1010,12 @@ export async function runPreDispatch( deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui); // Prevent stopAuto from attempting the same merge (#2645) s.milestoneMergedInPhases = true; + // Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId) + await maybeFireProductAudit(s, ctx); } catch (mergeErr) { if (mergeErr instanceof MergeConflictError) { ctx.ui.notify( - `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf auto to resume.`, + `Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`, "error", ); await deps.stopAuto( @@ -972,7 +1030,7 @@ export async function runPreDispatch( error: String(mergeErr), }); ctx.ui.notify( - `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf auto to resume.`, + `Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`, "error", ); await deps.stopAuto( @@ -1019,7 +1077,10 @@ export async function runPreDispatch( ); } await deps.pauseAuto(ctx, pi); - ctx.ui.notify(`${blockerMsg}. Fix and run /sf auto to resume.`, "warning"); + ctx.ui.notify( + `${blockerMsg}. Fix and run /sf autonomous to resume.`, + "warning", + ); deps.sendDesktopNotification( "SF", blockerMsg, @@ -1078,7 +1139,7 @@ export async function runDispatch( }); // Warning-level stops are recoverable human checkpoints (e.g. UAT verdict // gate) — pause instead of hard-stopping so the session is resumable with - // `/sf auto`. Error/info-level stops remain hard stops for infrastructure + // `/sf autonomous`. Error/info-level stops remain hard stops for infrastructure // failures and terminal conditions respectively. // See: https://github.com/singularity-forge/sf-run/issues/2474 if (dispatchResult.level === "warning") { @@ -1530,7 +1591,7 @@ export async function runGuards( } if (budgetEnforcementAction === "pause") { ctx.ui.notify( - `${msg} Pausing auto-mode — /sf auto to override and continue.`, + `${msg} Pausing autonomous mode — /sf autonomous to override and continue.`, "warning", ); deps.sendDesktopNotification( @@ -1585,7 +1646,7 @@ export async function runGuards( ) { const msg = `Context window at ${contextUsage.percent}% (threshold: ${contextThreshold}%). Pausing to prevent truncated output.`; ctx.ui.notify( - `${msg} Run /sf auto to continue (will start fresh session).`, + `${msg} Run /sf autonomous to continue (will start fresh session).`, "warning", ); deps.sendDesktopNotification( @@ -1817,7 +1878,7 @@ export async function runUnitPhase( prefs?.safety_harness as Record | undefined, ); if (safetyConfig.enabled && safetyConfig.evidence_collection) { - resetEvidence(); + resetEvidence(unitId, s.basePath); } if ( safetyConfig.enabled && diff --git a/src/resources/extensions/sf/auto/session.ts b/src/resources/extensions/sf/auto/session.ts index 0e6fb6ad3..c4907fe94 100644 --- a/src/resources/extensions/sf/auto/session.ts +++ b/src/resources/extensions/sf/auto/session.ts @@ -173,6 +173,11 @@ export class AutoSession { * stopAuto does not attempt the same merge a second time (#2645). */ milestoneMergedInPhases = false; + /** Set to the milestoneId after product audit fires at merge, so the audit + * fires exactly once per milestone (not twice when mergeAndExit is called + * at both the transition point and the terminal complete point). */ + productAuditMilestoneId: string | null = null; + // ── Dispatch circuit breakers ────────────────────────────────────── rewriteAttemptCount = 0; /** Tracks consecutive bootstrap attempts that found phase === "complete". @@ -330,6 +335,7 @@ export class AutoSession { this.pendingTaskCompleteFailures.clear(); this.isolationDegraded = false; this.milestoneMergedInPhases = false; + this.productAuditMilestoneId = null; this.checkpointSha = null; this.preUnitDirtyFiles = []; this.stagedPendingCommit = false; diff --git a/src/resources/extensions/sf/bootstrap/db-tools.ts b/src/resources/extensions/sf/bootstrap/db-tools.ts index 4a425f104..7c255ee32 100644 --- a/src/resources/extensions/sf/bootstrap/db-tools.ts +++ b/src/resources/extensions/sf/bootstrap/db-tools.ts @@ -1017,6 +1017,61 @@ export function registerDbTools(pi: ExtensionAPI): void { ), }), execute: planMilestoneExecute, + renderCall(args: any, theme: any) { + const milestoneId = args?.milestoneId ? String(args.milestoneId) : ""; + const title = args?.title ? String(args.title) : ""; + const slices = Array.isArray(args?.slices) ? args.slices : []; + let text = theme.fg("toolTitle", theme.bold("Plan Milestone")); + if (milestoneId || title) { + text += theme.fg( + "muted", + ` ${[milestoneId, title].filter(Boolean).join(": ")}`, + ); + } + if (slices.length > 0) { + text += theme.fg( + "dim", + ` — ${slices.length} slice${slices.length === 1 ? "" : "s"}`, + ); + } + return new Text(text, 0, 0); + }, + renderResult(result: any, _options: any, theme: any) { + const d = result.details; + if (result.isError || d?.error) { + const textContent = result.content?.find?.( + (item: any) => item?.type === "text", + )?.text; + return new Text( + theme.fg("error", `Error: ${d?.error ?? textContent ?? "unknown"}`), + 0, + 0, + ); + } + const milestoneId = d?.milestoneId ? String(d.milestoneId) : "milestone"; + const title = d?.title ? String(d.title) : ""; + const sliceCount = + typeof d?.sliceCount === "number" ? d.sliceCount : undefined; + let text = theme.fg( + "success", + `${milestoneId} planned${title ? `: ${title}` : ""}`, + ); + if (sliceCount !== undefined) { + text += theme.fg( + "dim", + ` · ${sliceCount} slice${sliceCount === 1 ? "" : "s"}`, + ); + } + if (d?.firstSliceId || d?.firstSliceTitle) { + text += theme.fg( + "dim", + ` · next ${[d.firstSliceId, d.firstSliceTitle] + .filter(Boolean) + .join(": ")}`, + ); + } + return new Text(text, 0, 0); + }, }; pi.registerTool(planMilestoneTool); diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.ts b/src/resources/extensions/sf/bootstrap/register-hooks.ts index a0a364d05..63597d109 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.ts +++ b/src/resources/extensions/sf/bootstrap/register-hooks.ts @@ -34,6 +34,10 @@ import { resetLearningRuntime, selectLearnedModel, } from "../learning/runtime.js"; +import { + observeMemorySleeperToolResult, + resetMemorySleeper, +} from "../memory-sleeper.js"; import { initNotificationStore } from "../notification-store.js"; import { initNotificationWidget } from "../notification-widget.js"; import { @@ -113,6 +117,7 @@ export function registerHooks( pi.on("session_start", async (_event, ctx) => { lastGeminiPreflightWarning = undefined; resetLearningRuntime(); + resetMemorySleeper(); try { const sid = ctx.sessionManager?.getSessionId?.() ?? ""; const sfile = ctx.sessionManager?.getSessionFile?.() ?? ""; @@ -233,6 +238,7 @@ export function registerHooks( pi.on("session_switch", async (_event, ctx) => { lastGeminiPreflightWarning = undefined; resetLearningRuntime(); + resetMemorySleeper(); initNotificationStore(process.cwd()); installNotifyInterceptor(ctx); resetWriteGateState(); @@ -520,6 +526,26 @@ export function registerHooks( }); pi.on("tool_result", async (event) => { + if (isAutoActive()) { + const steer = observeMemorySleeperToolResult(event); + if (steer) { + pi.sendMessage( + { + customType: "sf-memory-sleeper", + content: steer.content, + display: false, + details: { + key: steer.key, + severity: steer.severity, + toolName: event.toolName, + toolCallId: event.toolCallId, + }, + }, + { deliverAs: "steer" }, + ); + } + } + if (event.toolName !== "ask_user_questions") return; const milestoneId = getDiscussionMilestoneId(process.cwd()); const queueActive = isQueuePhaseActive(); diff --git a/src/resources/extensions/sf/code-intelligence.ts b/src/resources/extensions/sf/code-intelligence.ts index db5b08674..0b7db4947 100644 --- a/src/resources/extensions/sf/code-intelligence.ts +++ b/src/resources/extensions/sf/code-intelligence.ts @@ -679,13 +679,13 @@ function buildProjectRagContextLines( lines.push( prefs?.project_rag_auto_index === false ? "- Do not auto-index unless explicitly needed; query existing indexes first. " + - "If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout." + "If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, `codebase_search`, and scout." : "- Index first if the backend is stale or empty; use incremental indexing when available. " + - "If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.", + "If any Project RAG tool is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, `codebase_search`, and scout.", ); } else { lines.push( - "- Project RAG: not configured. This is optional; continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.", + "- Project RAG: not configured. This is optional; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, `codebase_search`, and scout.", ); lines.push( "- To enable later: build/install Brainwires/project-rag, then run `/sf codebase rag init` or set `SF_PROJECT_RAG_BIN` before initializing MCP config.", @@ -720,11 +720,11 @@ function buildSiftContextLines( ); lines.push( "- Sift uses a sector-aware cache in the platform cache directory, typically `~/.cache/sift`; " + - "if the CLI is missing or fails, continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.", + "if the CLI is missing or fails, continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.", ); } else { lines.push( - "- Sift: not available. This is optional; continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.", + "- Sift: not available. This is optional; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.", ); lines.push( "- To enable later: install `rupurt/sift` on PATH or set `SIFT_PATH` to the sift binary.", @@ -736,7 +736,7 @@ function buildSiftContextLines( function buildNoCodebaseIndexerContextLines(): string[] { return [ - "- Codebase indexer: disabled by `codebase.indexer_backend: none`; continue with `.sf/CODEBASE.md`, `rg`, `lsp`, and scout.", + "- Codebase indexer: disabled by `codebase.indexer_backend: none`; continue with `.sf/CODEBASE.md`, native `grep`/`find`/`ls`, `lsp`, and scout.", ]; } @@ -852,7 +852,7 @@ export function formatProjectRagStatus( } lines.push(""); lines.push( - "Project RAG is optional. SF falls back to CODEBASE.md, rg, lsp, and scout when it is unavailable.", + "Project RAG is optional. SF falls back to CODEBASE.md, native grep/find/ls, lsp, codebase_search, and scout when it is unavailable.", ); lines.push( "When configured, agents should use index_codebase, query_codebase, search_by_filters, find_definition, find_references, and get_call_graph before manual file-by-file reading.", @@ -882,7 +882,7 @@ export function formatSiftStatus( } lines.push(""); lines.push( - "Sift is optional. SF falls back to CODEBASE.md, rg, lsp, and scout when it is unavailable.", + "Sift is optional. SF falls back to CODEBASE.md, native grep/find/ls, lsp, and scout when it is unavailable.", ); lines.push( 'When configured, agents should use `sift search --json ""`; `page-index-hybrid` is the strongest direct-search preset and `path-hybrid` is best for path-heavy queries.', @@ -901,7 +901,7 @@ function formatNoCodebaseIndexerStatus(): string { "Reason: codebase.indexer_backend is none", "Operational: no - optional codebase indexer disabled.", "", - "SF will use CODEBASE.md, rg, lsp, and scout for codebase orientation.", + "SF will use CODEBASE.md, native grep/find/ls, lsp, and scout for codebase orientation.", ].join("\n"); } diff --git a/src/resources/extensions/sf/commands-bootstrap.ts b/src/resources/extensions/sf/commands-bootstrap.ts index 3f8203e8b..a38f3364c 100644 --- a/src/resources/extensions/sf/commands-bootstrap.ts +++ b/src/resources/extensions/sf/commands-bootstrap.ts @@ -3,18 +3,20 @@ import { type ExtensionCommandContext, importExtensionModule, } from "@singularity-forge/pi-coding-agent"; +import { workflowTemplateCommandDefinitions } from "./workflow-templates.js"; const TOP_LEVEL_SUBCOMMANDS = [ { cmd: "help", desc: "Categorized command reference with descriptions" }, { cmd: "next", desc: "Explicit step mode (same as /sf)" }, { - cmd: "auto", + cmd: "autonomous", desc: "Autonomous mode — research, plan, execute, commit, repeat", }, - { cmd: "stop", desc: "Stop auto mode gracefully" }, + { cmd: "auto", desc: "Alias for /sf autonomous" }, + { cmd: "stop", desc: "Stop autonomous mode gracefully" }, { cmd: "pause", - desc: "Pause auto-mode (preserves state, /sf auto to resume)", + desc: "Pause autonomous mode (preserves state, /sf autonomous to resume)", }, { cmd: "status", desc: "Progress dashboard" }, { cmd: "visualize", desc: "Open workflow visualizer" }, @@ -88,14 +90,14 @@ function getSfArgumentCompletions(prefix: string) { const partial = parts[1] ?? ""; - if (parts[0] === "auto" && parts.length <= 2) { + if ((parts[0] === "auto" || parts[0] === "autonomous") && parts.length <= 2) { return filterStartsWith( partial, [ { cmd: "--verbose", desc: "Show detailed execution output" }, { cmd: "--debug", desc: "Enable debug logging" }, ], - "auto", + parts[0], ); } @@ -260,20 +262,7 @@ function getSfArgumentCompletions(prefix: string) { return filterStartsWith( partial, [ - { cmd: "bugfix", desc: "Triage, fix, test, and ship a bug fix" }, - { - cmd: "small-feature", - desc: "Lightweight feature with optional discussion", - }, - { cmd: "spike", desc: "Research, prototype, and document findings" }, - { cmd: "hotfix", desc: "Minimal: fix it, test it, ship it" }, - { cmd: "refactor", desc: "Inventory, plan waves, migrate, verify" }, - { cmd: "security-audit", desc: "Scan, triage, remediate, re-scan" }, - { cmd: "dep-upgrade", desc: "Assess, upgrade, fix breaks, verify" }, - { - cmd: "full-project", - desc: "Complete SF workflow with full ceremony", - }, + ...workflowTemplateCommandDefinitions(), { cmd: "resume", desc: "Resume an in-progress workflow" }, { cmd: "--list", desc: "List all available templates" }, { cmd: "--dry-run", desc: "Preview workflow without executing" }, diff --git a/src/resources/extensions/sf/commands-handlers.ts b/src/resources/extensions/sf/commands-handlers.ts index 8d4318814..6803a20dd 100644 --- a/src/resources/extensions/sf/commands-handlers.ts +++ b/src/resources/extensions/sf/commands-handlers.ts @@ -596,18 +596,23 @@ function compareSemverLocal(a: string, b: string): number { export async function handleUpdate( ctx: ExtensionCommandContext, + deps: { + currentVersion?: string; + fetchLatestVersion?: () => Promise; + install?: (command: string) => void; + } = {}, ): Promise { const { execSync } = await import("node:child_process"); const NPM_PACKAGE = "sf-run"; - const current = process.env.SF_VERSION || "0.0.0"; + const current = deps.currentVersion ?? process.env.SF_VERSION ?? "0.0.0"; ctx.ui.notify( `Current version: v${current}\nChecking npm registry...`, "info", ); - const latest = await fetchLatestVersionForCommand(); + const latest = await (deps.fetchLatestVersion ?? fetchLatestVersionForCommand)(); if (!latest) { ctx.ui.notify( "Failed to reach npm registry. Check your network connection.", @@ -625,13 +630,25 @@ export async function handleUpdate( const installCmd = resolveInstallCommand(`${NPM_PACKAGE}@latest`); try { - execSync(installCmd, { - stdio: ["ignore", "pipe", "ignore"], - }); - ctx.ui.notify( - `Updated to v${latest}. Restart your SF session to use the new version.`, - "info", - ); + if (deps.install) { + deps.install(installCmd); + } else { + execSync(installCmd, { + stdio: ["ignore", "pipe", "ignore"], + }); + } + ctx.ui.notify(`Updated to v${latest}. Reloading current session...`, "info"); + try { + await ctx.reload(); + ctx.ui.notify(`Updated to v${latest}. Reloaded current session.`, "info"); + } catch (reloadError) { + const message = + reloadError instanceof Error ? reloadError.message : String(reloadError); + ctx.ui.notify( + `Updated to v${latest}, but automatic reload failed: ${message}. Use /sf reload to resume with the new version.`, + "warning", + ); + } } catch { ctx.ui.notify(`Update failed. Try manually: ${installCmd}`, "error"); } diff --git a/src/resources/extensions/sf/commands-inspect.ts b/src/resources/extensions/sf/commands-inspect.ts index 6d8adf4f4..e5488c8d0 100644 --- a/src/resources/extensions/sf/commands-inspect.ts +++ b/src/resources/extensions/sf/commands-inspect.ts @@ -63,7 +63,7 @@ export async function handleInspect( const dbPath = join(sfDir, "sf.db"); if (!existsSync(sfDir) || !existsSync(dbPath) || !openDatabase(dbPath)) { ctx.ui.notify( - "No SF database available. Run /sf auto to create one.", + "No SF database available. Run /sf autonomous to create one.", "info", ); return; @@ -73,7 +73,7 @@ export async function handleInspect( const adapter = _getAdapter(); if (!adapter) { ctx.ui.notify( - "No SF database available. Run /sf auto to create one.", + "No SF database available. Run /sf autonomous to create one.", "info", ); return; diff --git a/src/resources/extensions/sf/commands-logs.ts b/src/resources/extensions/sf/commands-logs.ts index d2d8468a8..8e2893884 100644 --- a/src/resources/extensions/sf/commands-logs.ts +++ b/src/resources/extensions/sf/commands-logs.ts @@ -409,7 +409,7 @@ async function handleLogsList( } lines.push(""); - lines.push("Tip: Enable debug logging with SF_DEBUG=1 before /sf auto"); + lines.push("Tip: Enable debug logging with SF_DEBUG=1 before /sf autonomous"); ctx.ui.notify(lines.join("\n"), "info"); } @@ -495,7 +495,7 @@ async function handleLogsDebug( if (debugLogs.length === 0) { ctx.ui.notify( - "No debug logs found.\n\nEnable debug logging: SF_DEBUG=1 sf auto", + "No debug logs found.\n\nEnable debug logging: SF_DEBUG=1 sf autonomous", "info", ); return; diff --git a/src/resources/extensions/sf/commands-workflow-templates.ts b/src/resources/extensions/sf/commands-workflow-templates.ts index 24a565777..281b45b8d 100644 --- a/src/resources/extensions/sf/commands-workflow-templates.ts +++ b/src/resources/extensions/sf/commands-workflow-templates.ts @@ -17,13 +17,23 @@ import type { ExtensionAPI, ExtensionCommandContext, } from "@singularity-forge/pi-coding-agent"; -import { isAutoActive, isAutoPaused } from "./auto.js"; +import { + isAutoActive, + isAutoPaused, + setActiveEngineId, + setActiveRunDir, + startAutoDetached, +} from "./auto.js"; import { getErrorMessage } from "./error-utils.js"; import { createGitService, runGit } from "./git-service.js"; +import { readGraph } from "./graph.js"; import { sfRoot } from "./paths.js"; import { loadPrompt } from "./prompt-loader.js"; +import { createRunFromDefinition } from "./run-manager.js"; +import { compileTemplateRun } from "./workflow-template-compiler.js"; import { autoDetect, + formatStartUsage, getTemplateInfo, listTemplates, loadRegistry, @@ -98,6 +108,7 @@ interface WorkflowState { updatedAt: string; completedAt?: string; artifactDir: string; + runDir?: string; } /** @@ -110,6 +121,7 @@ function writeWorkflowState( phases: string[], description: string, branch: string, + runDir?: string, ): void { const statePath = join(artifactDir, "STATE.json"); const state: WorkflowState = { @@ -126,6 +138,7 @@ function writeWorkflowState( startedAt: new Date().toISOString(), updatedAt: new Date().toISOString(), artifactDir, + runDir, }; writeFileSync(statePath, JSON.stringify(state, null, 2) + "\n"); } @@ -157,6 +170,34 @@ function findInProgressWorkflows(basePath: string): WorkflowState[] { try { const raw = readFileSync(statePath, "utf-8"); const state = JSON.parse(raw) as WorkflowState; + if (state.runDir) { + try { + const graph = readGraph(state.runDir); + const allDone = graph.steps.every( + (step) => + step.status === "complete" || step.status === "expanded", + ); + if (allDone) continue; + const firstPendingIndex = graph.steps.findIndex( + (step) => step.status === "pending" || step.status === "active", + ); + state.phases = state.phases.map((phase, index) => { + const graphStep = graph.steps[index]; + if ( + graphStep?.status === "complete" || + graphStep?.status === "expanded" + ) { + return { ...phase, status: "completed" as const }; + } + if (index === firstPendingIndex) { + return { ...phase, status: "active" as const }; + } + return { ...phase, status: "pending" as const }; + }); + } catch { + /* fall back to legacy state if graph is unreadable */ + } + } if (!state.completedAt) { results.push(state); } @@ -204,7 +245,7 @@ export async function handleStart( if (isAutoPaused()) { ctx.ui.notify( "Auto-mode is paused. Starting a workflow template will run independently.\n" + - "The paused auto-mode session can be resumed later with /sf auto.", + "The paused autonomous session can be resumed later with /sf autonomous.", "info", ); } @@ -236,6 +277,13 @@ export async function handleStart( "info", ); + if (wf.runDir) { + setActiveEngineId("custom"); + setActiveRunDir(wf.runDir); + startAutoDetached(ctx, pi, basePath, false); + return; + } + const workflowContent = loadWorkflowTemplate(wf.template); if (!workflowContent) { ctx.ui.notify( @@ -301,7 +349,7 @@ export async function handleStart( // Check for --issue flag (bugfix shortcut) const issueMatch = cleanedArgs.match(/--issue\s+(\S+)/); - const issueRef = issueMatch ? issueMatch[1] : null; + const issueRef = issueMatch?.[1] ?? null; // Try resolving first word as a template name let match: TemplateMatch | null = null; @@ -347,27 +395,7 @@ export async function handleStart( // No template resolved at all if (!match) { if (!trimmed) { - ctx.ui.notify( - "Usage: /sf start