diff --git a/.gitignore b/.gitignore index 3e5b3e8d9..d4dd1212d 100644 --- a/.gitignore +++ b/.gitignore @@ -71,8 +71,8 @@ docs/coherence-audit/ .plans/ # ── SF project state (per-worktree, never committed) ── -.sf/ -.sf/ +# Runtime-only patterns are managed per-clone in .git/info/exclude by sf. +# Tracked artifacts (.sf/milestones/, .sf/PROJECT.md, etc.) live in version control. # ── Native Rust build outputs ── native/addon/*.node @@ -86,9 +86,6 @@ rust-engine/target/ pnpm-lock.yaml bun.lock -# ── SF baseline (auto-generated) ── -.sf - # ── SF baseline (auto-generated) ── .sf-id .direnv/ diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 38548f894..400547ba5 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -1,3 +1,56 @@ # Design -Record interaction patterns, visual constraints, and design-system usage here. +SF's UI is a terminal application built on the Pi TUI framework (`@mariozechner/pi-tui`). These are the binding constraints any UI work must respect. + +## The Cardinal Rule: Line Width + +**Every line returned from `render(width)` must not exceed `width` in visible characters.** Exceeding it causes terminal line-wrapping, cursor misposition, and visual corruption the framework cannot fix. + +Use the Pi TUI utilities — never raw `string.length`: + +```typescript +import { visibleWidth, truncateToWidth, wrapTextWithAnsi } from "@mariozechner/pi-tui"; + +visibleWidth("\x1b[32mHello\x1b[0m"); // 5, not 14 +truncateToWidth("Very long text here", 10); // "Very lo..." +wrapTextWithAnsi("\x1b[32mlong green\x1b[0m", 15); // preserves ANSI per line +``` + +`visibleWidth` strips ANSI escape codes before measuring. `truncateToWidth` preserves ANSI codes in the truncated output. Use these everywhere a line's display length matters. + +## Render Pattern + +```typescript +render(width: number): string[] { + const lines: string[] = []; + lines.push(truncateToWidth(` ${prefix}${content}`, width)); + + const labelWidth = visibleWidth(label); + const available = width - labelWidth - 4; // padding + lines.push(` ${label}: ${truncateToWidth(value, available)}`); + + return lines; +} +``` + +## Overlays and Modals + +Floating panels use the Pi TUI overlay pattern: they render at a fixed position within the terminal bounds and must still respect the outer `width` constraint. An overlay that overflows its bounds causes the same wrapping corruption as any other component. + +Use `ctx.ui.dialog()` for modal user input. Use `ctx.ui.notify()` for transient non-blocking notices. Persistent notification state goes through `notification-store.ts` → `notification-overlay.ts`. + +## Theming + +Colors and styles come from the Pi TUI theme system, not from hardcoded ANSI codes. Access the active theme via the `ExtensionContext`. Respect theme changes: components must re-render when the theme changes (implement `onThemeChange` if caching rendered output). + +## IME and Focus + +Interactive input components must implement the `Focusable` interface to receive keyboard events correctly, especially for IME (input method editor) support on non-ASCII keyboards. Components that handle key input but do not implement `Focusable` will silently swallow events. + +## Performance + +Cache rendered output when the underlying data hasn't changed. Invalidate the cache on data change or theme change. Do not re-render on every tick. The TUI framework calls `render()` frequently; rendering must be cheap. + +## Reference + +Full TUI documentation: [`docs/dev/pi-ui-tui/`](./dev/pi-ui-tui/README.md) diff --git a/docs/PLANS.md b/docs/PLANS.md index d9966f962..ab32fed3d 100644 --- a/docs/PLANS.md +++ b/docs/PLANS.md @@ -1,3 +1,24 @@ # Plans -Use this as the index for current and upcoming work. Link detailed plans in `docs/exec-plans/`. +Index of current and upcoming work. Detailed plans live in [`docs/exec-plans/`](./exec-plans/). + +## Active + +| Initiative | Purpose | ADR / Doc | +|-----------|---------|-----------| +| Repo-native harness evolution | Stage-by-stage wiring of the harness profiler, template kits, and evidence runner into autonomous dispatch | [ADR-018](./dev/ADR-018-repo-native-harness-evolution.md) | +| SF tools over MCP (Phase 1) | Expose workflow mutation tools over MCP so Claude Code and external providers can participate in autonomous execution | [ADR-008](./dev/ADR-008-sf-tools-over-mcp-for-provider-parity.md) | +| Notification event model | Implement structured source/kind/blocking metadata on all event paths, replacing fragile text matching | [design doc](./design-docs/notification-event-model.md) | +| repo-vcs skill | Landed — VCS context injection into system prompt; repo-vcs bundled skill for commit/push/safe-push | commit `a611cd579` | + +## Upcoming + +| Initiative | Depends on | +|-----------|-----------| +| Parallel milestone state locking (SQLite) | ADR-018 Phase 1 | +| ADR template + `just adr` / `just spec` generation recipes | — | +| Skill health dashboard (`/sf skill-health`) | Telemetry already wired | +| Go/Charm judge-calibration service | ADR-018 Phase 5 | + +See [`exec-plans/active/`](./exec-plans/active/) for task-level breakdowns and +[`exec-plans/tech-debt-tracker.md`](./exec-plans/tech-debt-tracker.md) for known cleanup. diff --git a/docs/PRODUCT_SENSE.md b/docs/PRODUCT_SENSE.md index 323f7d271..9b3c67255 100644 --- a/docs/PRODUCT_SENSE.md +++ b/docs/PRODUCT_SENSE.md @@ -1,3 +1,43 @@ # Product Sense -Capture user goals, non-goals, tradeoffs, and examples of good product judgment for this repo. +## The Core Thesis + +Autonomous execution is the end gate. SF exists to take a multi-phase software project — a milestone with slices and tasks — and run it to completion without human intervention, producing a clean git history, passing tests, and a deployable artifact. + +Every design decision should be evaluated against this question: **does it make autonomous execution more reliable, more observable, or more recoverable?** + +## User Goals + +- Hand off a milestone and have it complete without babysitting +- Know the agent won't make irreversible mistakes (write gates, protected files, budget ceilings) +- Resume after a crash without losing work (state-on-disk, crash recovery) +- See what the agent did and why (trace files, decision register, records keeper) +- Steer mid-run without breaking the loop (message queue, steering gate) + +## Non-Goals + +- Being a chat interface — use the Pi interactive mode for exploratory conversation +- Replacing CI — SF triggers verification but does not replace your existing CI pipeline +- Working without context — SF needs a spec, a roadmap, and a task plan; it does not invent work from nothing + +## What Good Product Judgment Looks Like + +**Fresh context per unit, not accumulated context.** Each task gets a new session with exactly the context it needs pre-injected (task plan, slice plan, prior summaries, relevant skills). This prevents quality degradation from context accumulation — one of the primary failure modes of naive LLM agents on long projects. + +**State machine, not LLM guessing.** The loop is deterministic: read STATE.md → validate → dispatch → post-unit → verify → advance. The LLM executes work inside a unit; it does not decide what the next unit is. Separating orchestration from execution keeps the system predictable. + +**Spec-first.** No behavior change without a failing test first. No completion without a real consumer. This is the iron law — not a suggestion. An agent that completes tasks without specs is just making things up. + +**Crash recovery must be invisible.** A crashed session should resume within seconds with no visible data loss. If recovery requires human intervention, it is a product failure. + +**User stays in the loop via gates, not via interrupts.** Discussion gates, write gates, budget ceilings, and approval prompts are the designed points of human interaction. The agent should not need to ask for help in the middle of a task. + +## Tradeoffs + +| Choice | What we gave up | Why | +|--------|----------------|-----| +| Fresh session per unit | Conversational continuity across units | Quality and predictability over convenience | +| State on disk (not in memory) | Speed of in-memory state | Crash recovery and multi-process visibility | +| Write gate during queue | Faster iteration in planning | Safety: prevents accidental file mutations during discussion | +| Protected files (ADRs, SPEC.md) | Agent autonomy over architecture docs | Human oversight over durable decisions | +| Serial execution default | Throughput | Correctness before parallelism; parallel locking is deferred debt | diff --git a/docs/QUALITY_SCORE.md b/docs/QUALITY_SCORE.md index 79a0d1f7f..20b5e8578 100644 --- a/docs/QUALITY_SCORE.md +++ b/docs/QUALITY_SCORE.md @@ -1,10 +1,59 @@ # Quality Score -Define what good looks like for this repo. Include fast checks, slow checks, evals, and known blind spots. - -Use these principles: +## Principles - Make code legible to agents with semantic names and explicit boundaries. - Prefer small, testable modules over files that require broad context to edit. - Enforce style, architecture, and reliability rules mechanically where possible. - Keep a cleanup loop for stale docs, generated artifacts, and accumulated implementation debt. + +## Fast Checks (run on every change) + +```bash +just typecheck # tsc --project tsconfig.resources.json, no emit +just lint # eslint across src/ +``` + +Both must pass before any commit. Typecheck catches type drift early. Lint enforces import rules that enforce the Pi clean seam (ADR-010). + +## Slow Checks (run before shipping) + +```bash +just test # full unit suite — node --test runner, no coverage overhead +just test-smoke # sf --version, sf --help, sf --print — all three must pass +``` + +Coverage thresholds (enforced by `npm run test:coverage`): +- Statements: **40%** minimum +- Lines: **40%** minimum +- Branches: **20%** minimum +- Functions: **20%** minimum + +These are floors, not targets. The real quality bar is purposeful tests that assert behavior contracts (see `docs/SPEC_FIRST_TDD.md`). + +## Evals (ad-hoc, not yet automated) + +No automated eval suite exists yet. ADR-018 Phase 3 defines the eval runner contract. Until then, quality for autonomous behavior is measured by: + +- Smoke test pass rate across providers +- Manual milestone runs with trace inspection (`.sf/traces/`) +- Decision register review at milestone close + +## Known Blind Spots + +| Area | Gap | Risk | +|------|-----|------| +| `headless.ts` | RPC lifecycle (spawn → event stream → restart) is not covered by unit tests; only integration-tested manually | High: crash recovery correctness | +| Parallel milestone orchestration | No tests for concurrent STATE.md mutations | Medium: data loss under parallelism | +| Notification routing | Text-matching classification has no per-pattern unit tests | Low: wrong exit code on wording change | +| Stuck detection | Sliding-window logic tested, but real-loop replay is not | Medium: false positives under unusual patterns | +| Provider fallback | Model routing under simulated provider failure not covered | Medium: silent routing to wrong tier | + +## Doc Quality Signal + +```bash +grep -r "TODO\|placeholder\|Describe the\|Document.*here\|Record.*here\|Use this as\|Capture.*here\|Track cleanup" \ + docs/ --include="*.md" +``` + +This should return empty. Any match is a placeholder doc that needs real content. diff --git a/docs/RELIABILITY.md b/docs/RELIABILITY.md index 8c0264108..b3420578a 100644 --- a/docs/RELIABILITY.md +++ b/docs/RELIABILITY.md @@ -1,3 +1,72 @@ # Reliability -Document expected failure modes, recovery paths, observability, and release checks here. +## Exit Codes (headless mode) + +| Code | Meaning | +|------|---------| +| 0 | Success — unit or session completed cleanly | +| 1 | Error or timeout | +| 10 | Blocked — LLM called an interactive tool that requires user input; parent must respond or abort | +| 11 | Cancelled — SIGINT or SIGTERM received | +| 12 | Reload — agent requested restart-with-resume on the same session | + +## Failure Modes and Recovery + +### Process crash mid-unit +**Detection:** Lock file in `.sf/` is present on next launch; RPC child process is gone. + +**Recovery path (`src/resources/extensions/sf/auto-recovery.ts`):** +1. Read the surviving session JSONL from `~/.sf/sessions//` +2. Synthesize a recovery briefing from every tool call recorded on disk +3. Resume the LLM mid-unit with the briefing as context — no state is lost +4. If the session JSONL is unreadable, fall back to starting the unit fresh + +### Timeout +**Detection:** Headless parent receives no heartbeat within `HEADLESS_HEARTBEAT_INTERVAL_MS` (60 000 ms), or the unit wall-clock exceeds the configured timeout. + +**Recovery path:** `auto-timeout-recovery.ts` writes a timeout summary, marks the unit `needs_fix`, and advances the loop. The parent exits with code 1 unless `--max-restarts` allows a retry. + +### Stuck detection (repeating-pattern loops) +**Detection (`src/resources/extensions/sf/auto-stuck-detection.ts`):** Sliding-window analysis over the last ~10 unit results. If the same A→B→A→B pattern repeats, the loop is classified as stuck. + +**Recovery path:** Retry once with a deep diagnostic prompt that shows the pattern. If still stuck, stop and surface the exact expected file for human inspection. Stuck state persists across session restarts. + +### Provider API errors (transient) +**Detection:** `bootstrap/provider-error-resume.ts` intercepts 429, 500, 503 responses. + +**Recovery path:** Exponential backoff; re-queue the unit. If a provider is consistently unavailable, route to the configured fallback model. + +### Verification gate failures +**Detection:** `auto-verification.ts` runs lint/test after each task; non-zero exit = failure. + +**Recovery path:** Auto-retry the task up to 2× with the agent receiving full command output as context. After 2 failures the task is marked `needs_fix` and the loop advances with a warning. + +### Budget ceiling hit +**Detection:** `auto-budget.ts` tracks cumulative dollar cost; emits warnings at 75%, 80%, 90%, and halts at 100%. + +**Recovery path:** Auto-mode pauses; user must explicitly approve resumption. The current unit is not retried. + +## Restart Loop (headless daemon mode) + +`sf headless auto --max-restarts 3` applies exponential backoff: 5 s → 10 s → 30 s (cap). After exhausting restarts the parent exits with code 1. Each restart resumes via crash recovery above. + +## Observability + +| Signal | Location | +|--------|----------| +| Structured trace | `.sf/traces/trace-.json` — full session span tree with tokens, cost, duration | +| Event audit log | `.sf/event-log.jsonl` — every unit completion, tool call, decision save (v2 format) | +| Desktop notifications | OS-native; configurable via preferences (`notifications.*`) | +| Stderr progress | All headless output goes to stderr; stdout carries JSON result when `--output-format json` | +| Heartbeat | Emitted every 60 s to detect hung parent/child communication | + +## Release Checks + +Before shipping a build: + +```bash +just test # full unit test suite +just smoke-test # sf --version, sf --help, sf --print +just typecheck # tsc extensions, no emit +just lint # eslint +``` diff --git a/docs/SECURITY.md b/docs/SECURITY.md index 910ac5476..fef0f17ae 100644 --- a/docs/SECURITY.md +++ b/docs/SECURITY.md @@ -1,3 +1,53 @@ # Security -Document trust boundaries, secrets handling, dependency risk, and security review requirements here. +## Auth Model and Trust Boundaries + +SF never manages Anthropic OAuth directly. The safe paths are: + +- **API key** — user sets `ANTHROPIC_API_KEY` or configures it in auth.json. SF reads it; never generates or exchanges it. +- **Claude Code CLI (`claude-code` provider)** — SF shells out to the real `claude` CLI and lets it handle its own credential selection. SF does not reuse Claude subscription tokens. +- **Cloud providers** — Bedrock, Vertex, Azure via their own credential chains. + +**Prohibited patterns (from `docs/user-docs/claude-code-auth-compliance.md`):** +- SF-managed Anthropic OAuth flow for subscription accounts +- Reusing user Claude subscription credentials inside SF's own API client +- Making Anthropic believe requests come from Claude Code when they come from SF infrastructure + +## Write Gate + +`src/resources/extensions/sf/bootstrap/write-gate.ts` enforces a phase-aware write boundary: + +- During **queue mode** (pre-dispatch planning): only `.sf/` writes and read-only tool calls are permitted. All other file writes are blocked. +- **QUEUE_SAFE_TOOLS** allowlist: `read`, `grep`, `find`, `ls`, `ask_user_questions`, planning tools, web research tools. +- **BASH_READ_ONLY_RE**: regex allowlist of commands safe to run during write-restricted phases (`cat`, `git log`, `npm run test|lint|typecheck`, `jq`, etc.). +- Write-gate violations are logged and surfaced to the user; they do not crash the session. + +## Protected Files + +The following files require human review before any automated modification (per `docs/SPEC_FIRST_TDD.md`): + +- `ADR-*.md` — architecture decision records +- `SPEC.md`, `ARCHITECTURE.md`, `AGENTS.md` +- `docs/SECURITY.md`, `docs/RELIABILITY.md` + +SF will not autonomously overwrite these. Any proposed change to a protected file is surfaced as a diff for human acceptance. + +## Secret Scanning + +Pre-commit hook via `npm run secret-scan:install-hook`. Blocks commits containing patterns matching API keys, tokens, and credentials. Install with: + +```bash +npm run secret-scan:install-hook +``` + +## Dependency Risk + +- `npm audit` runs in CI on every push. +- No `--ignore-scripts` bypass: postinstall scripts are reviewed before adding new dependencies. +- Rust N-API bindings (`packages/native/`) undergo separate native-build review for ABI safety. + +## Sandbox Model + +SF agents execute inside the Pi RPC child process. The write gate and tool allowlist are the primary sandbox. There is no OS-level sandbox (no container or seccomp) in the default local deployment. + +**Headless unsupervised mode** (`--no-supervised`): SF exits with code 10 (blocked) rather than auto-responding to any interactive tool call. This is the safe default for CI pipelines where no human is available to respond. diff --git a/docs/design-docs/index.md b/docs/design-docs/index.md index 296b52b2f..f69af1516 100644 --- a/docs/design-docs/index.md +++ b/docs/design-docs/index.md @@ -1,3 +1,32 @@ # Design Docs -Durable design decisions live here. Link active proposals, completed decisions, and rejected alternatives. +Durable design decisions live here. ADRs (Architecture Decision Records) are numbered sequentially +in `docs/dev/`. Lighter design docs (problem framing, event model decisions) live in this directory. + +## Architecture Decision Records (`docs/dev/`) + +| ADR | Title | Status | +|-----|-------|--------| +| [ADR-001](../dev/ADR-001-branchless-worktree-architecture.md) | Branchless Worktree Architecture — `.sf/milestones/` tracked, runtime gitignored | Accepted | +| [ADR-003](../dev/ADR-003-pipeline-simplification.md) | Pipeline Simplification — research merged into planning | Accepted | +| [ADR-004](../dev/ADR-004-capability-aware-model-routing.md) | Capability-Aware Model Routing | Accepted | +| [ADR-005](../dev/ADR-005-multi-model-provider-tool-strategy.md) | Multi-Model Provider Tool Strategy | Accepted | +| [ADR-007](../dev/ADR-007-model-catalog-split.md) | Model Catalog Split | Accepted | +| [ADR-008](../dev/ADR-008-sf-tools-over-mcp-for-provider-parity.md) | SF Tools over MCP for Provider Parity | Proposed — deferred (usage model mismatch) | +| [ADR-009](../dev/ADR-009-orchestration-kernel-refactor.md) | Orchestration Kernel Refactor | Accepted | +| [ADR-010](../dev/ADR-010-pi-clean-seam-architecture.md) | Pi Clean Seam Architecture | Accepted | +| [ADR-011](../dev/ADR-011-swarm-chat-and-debate-mode.md) | Swarm Chat and Debate Mode | Proposed | +| [ADR-012](../dev/ADR-012-multi-instance-federation.md) | Multi-Instance Federation | Proposed | +| [ADR-013](../dev/ADR-013-network-and-remote-execution.md) | Network and Remote Execution | Proposed | +| [ADR-014](../dev/ADR-014-singularity-knowledge-and-agent-platform.md) | Singularity Knowledge and Agent Platform | Proposed | +| [ADR-015](../dev/ADR-015-flight-recorder.md) | Flight Recorder | Proposed | +| [ADR-016](../dev/ADR-016-charm-ai-stack-adoption.md) | Charm AI Stack Adoption | Proposed | +| [ADR-017](../dev/ADR-017-charm-tui-client.md) | Charm TUI Client | Proposed | +| [ADR-018](../dev/ADR-018-repo-native-harness-evolution.md) | Repo-Native Harness Evolution | Proposed — staged impl | + +## Design Docs (this directory) + +| Doc | Title | Status | +|-----|-------|--------| +| [core-beliefs.md](./core-beliefs.md) | Core Beliefs | Accepted | +| [notification-event-model.md](./notification-event-model.md) | Notification Event Model | Draft | diff --git a/docs/dev/ADR-001-branchless-worktree-architecture.md b/docs/dev/ADR-001-branchless-worktree-architecture.md index 10f43c688..c9d90193f 100644 --- a/docs/dev/ADR-001-branchless-worktree-architecture.md +++ b/docs/dev/ADR-001-branchless-worktree-architecture.md @@ -1,6 +1,6 @@ # ADR-001: Branchless Worktree Architecture -**Status:** Proposed +**Status:** Accepted **Date:** 2026-03-15 **Deciders:** Lex Christopherson **Advisors:** Claude Opus 4.6, Gemini 2.5 Pro, GPT-5.4 (Codex) diff --git a/docs/exec-plans/active/index.md b/docs/exec-plans/active/index.md index 503bbf866..c99cdc642 100644 --- a/docs/exec-plans/active/index.md +++ b/docs/exec-plans/active/index.md @@ -1,3 +1,36 @@ # Active Execution Plans -Link active plans here. Each plan should state purpose, scope, tasks, acceptance criteria, and verification. +## ADR-018: Repo-Native Harness Evolution + +**Purpose:** Make SF's harness mechanisms (verification gates, repo profiler, template kits, eval runner) useful in every repo SF works on, adapting over time as the repo changes shape. + +**Scope:** Staged in 7 phases per ADR-018. Only phases 1–2 are in scope for near-term execution. + +**Phase 1 — Repo profile snapshots (next)** +- Add read-only `RepoProfile` snapshot before each planning milestone +- Record observed (untracked) files in `.sf/sf.db` as `observed_only` +- No tracked repo file writes; no worker-prompt changes + +**Phase 2 — Template kit registry and harness manifest** +- Parameterized harness template kit registry (Agent Runtime, RAG, Web App, Nix, Charm) +- Dry-run harness proposals as planning artifacts only — no tracked repo writes + +**Acceptance criteria:** Phase 1 produces a repo profile snapshot in `.sf/sf.db` before every planning milestone. Phase 2 produces a dry-run harness proposal as a planning artifact viewable at milestone review. + +**Falsifier:** If a planning milestone produces no repo profile entry in `.sf/sf.db`, Phase 1 is incomplete. + +**Verification:** `node -e "require('./src/resources/extensions/sf/repo-profiler.js').buildRepoProfile(process.cwd()).then(p => console.log(JSON.stringify(p, null, 2)))"` + +**ADR:** [ADR-018](../../dev/ADR-018-repo-native-harness-evolution.md) + +--- + +## Notification Event Model Implementation + +**Purpose:** Replace text-matching heuristics in `src/headless-events.ts` and `src/resources/extensions/sf/notification-overlay.ts` with structured `source`/`kind`/`blocking`/`dedupe_key` metadata on all inbound transcript events. + +**Scope:** Propagate event metadata through all notification paths; update headless event parser to use structured fields; add deduplication by key instead of by text. + +**Acceptance criteria:** `headless-events.ts` no longer uses string matching for event classification. Duplicate non-blocking workflow notices are collapsed by `dedupe_key`. A regression test asserts that automated notices cannot supersede the latest real user message. + +**Design doc:** [notification-event-model.md](../../design-docs/notification-event-model.md) · [product spec](../../product-specs/notification-source-hygiene.md) diff --git a/docs/exec-plans/completed/index.md b/docs/exec-plans/completed/index.md index a923b989d..5e0057088 100644 --- a/docs/exec-plans/completed/index.md +++ b/docs/exec-plans/completed/index.md @@ -1,3 +1,59 @@ # Completed Execution Plans -Move finished plan summaries here with evidence links and follow-up debt. +## repo-vcs skill — 2026-05-01 + +**What shipped:** `repository-vcs-context.ts` detects Git vs Jujutsu and injects VCS guidance into the agent system prompt. `src/resources/skills/repo-vcs/` bundled skill for commit, push, and safe-push workflows. Skill trigger registered in `bootstrap/system-context.ts`. + +**Evidence:** commit `a611cd579` — 18 files, 943 insertions. + +**Follow-up debt:** None — no regressions in smoke tests. + +--- + +## Autonomous workflow stabilization — 2026-05-01 + +**What shipped:** Major hardening pass on the auto-loop: crash recovery, stuck detection, production mutation approval gate, safe-smoke task LLM approval, headless source startup progress. See commit `12e7333f1`. + +**Evidence:** All existing tests pass. Smoke tests: `sf --version`, `sf --help`, `sf --print` all pass. + +**Follow-up debt:** Parallel milestone state locking (SQLite) deferred to ADR-018 Phase 1+. + +--- + +## JSDoc Purpose/Consumer annotations — 2026-05-01 + +**What shipped:** `Purpose:` and `Consumer:` JSDoc annotations added to `app-paths.ts`, `bundled-extension-paths.ts`, `errors.ts`, `extension-discovery.ts`, `extension-registry.ts`, `headless-types.ts`, `headless.ts`, `traces.ts`. Fulfills SPEC_FIRST_TDD "JSDoc is the purpose" iron law for core modules. + +**Evidence:** commit `a611cd579`. + +--- + +## Pi clean seam architecture (ADR-010) — 2026-04 + +**What shipped:** Hard boundary between SF extension code and Pi SDK internals. SF extensions may only call the public Pi extension API; no direct access to Pi internals. Enforced via import rules. + +**ADR:** [ADR-010](../../dev/ADR-010-pi-clean-seam-architecture.md) + +--- + +## Branchless worktree architecture (ADR-001) — prior + +**What shipped:** Git worktrees for milestone isolation without branch-per-milestone overhead. Each milestone executes in its own worktree; changes merge back to main on completion. + +**ADR:** [ADR-001](../../dev/ADR-001-branchless-worktree-architecture.md) + +--- + +## Pipeline simplification (ADR-003) — prior + +**What shipped:** Research phase merged into planning; mechanical completion model for tasks that need no LLM judgment. Eliminated a redundant dispatch phase. + +**ADR:** [ADR-003](../../dev/ADR-003-pipeline-simplification.md) + +--- + +## Capability-aware model routing (ADR-004) — prior + +**What shipped:** Routing from tier/cost selection to task-capability matching. Model selection considers tool requirements, vision, function-calling, and context size, not just cost tier. + +**ADR:** [ADR-004](../../dev/ADR-004-capability-aware-model-routing.md) diff --git a/docs/exec-plans/tech-debt-tracker.md b/docs/exec-plans/tech-debt-tracker.md index d6502174b..605e51f25 100644 --- a/docs/exec-plans/tech-debt-tracker.md +++ b/docs/exec-plans/tech-debt-tracker.md @@ -1,3 +1,69 @@ # Tech Debt Tracker -Track cleanup discovered during implementation. Include owner, impact, proposed fix, and verification. +## Notification event classification — text matching only + +**Impact:** `src/headless-events.ts` classifies events (blocked, milestone-ready, auto-stopped) by regex against stderr text. Fragile: any wording change in a notification breaks classification silently. + +**Proposed fix:** Implement structured `source`/`kind`/`blocking` metadata per notification-event-model.md. Update headless event parser to use typed fields. + +**Verification:** Remove string-match classifiers; confirm headless exit-code logic still triggers correctly via integration test. + +**Tracked in:** [active/index.md — Notification Event Model](./active/index.md) + +--- + +## MCP workflow mutations — read-only only + +**Impact:** External providers (Claude Code CLI, remote orchestrators) that route through MCP can query SF state but cannot advance it. `sf_complete_task`, `sf_plan_milestone`, etc. are in-process only. + +**Proposed fix:** Extract shared handlers from native tools; expose over MCP server (ADR-008 Phase 1). + +**Verification:** Claude Code provider session completes a task via MCP `sf_complete_task` and produces identical STATE.md outcome. + +**Tracked in:** [active/index.md — ADR-008 Phase 1](./active/index.md) · [ADR-008](../dev/ADR-008-IMPLEMENTATION-PLAN.md) + +--- + +## No ADR template or generation recipes + +**Impact:** Every ADR is hand-authored from scratch. No enforced schema means some ADRs omit falsifiers, status dates, or sequencing. No `just adr` recipe means the numbering is manual. + +**Proposed fix:** Add `docs/dev/ADR-TEMPLATE.md` with required sections (Status, Date, Context, Options, Decision, Consequences, Falsifiers, Verification). Add `just adr ` recipe that stamps the template. + +**Verification:** `just adr 019 my-decision` produces `docs/dev/ADR-019-my-decision.md` with all required section headings. + +--- + +## Parallel milestone state locking — file-based, ad-hoc + +**Impact:** Concurrent milestone execution uses ad-hoc file locks on STATE.md and roadmap.md. Race condition possible under heavy parallelism; not blocking for serial execution (current default). + +**Proposed fix:** SQLite database in `.sf/sf.db` with atomic transactions for all state mutations. Deferred to ADR-018 Phase 1+. + +**Verification:** Two milestone workers simultaneously completing tasks produce consistent STATE.md with no lost updates. + +**Tracked in:** [ADR-018](../dev/ADR-018-repo-native-harness-evolution.md) sequencing stage 1. + +--- + +## write-gate BASH_READ_ONLY_RE — monolithic regex + +**Location:** `src/resources/extensions/sf/bootstrap/write-gate.ts` + +**Impact:** 30+ command patterns are encoded in a single 900-character regex. Adding a new safe command requires editing the regex inline, with no unit coverage per command. Risk of subtle regex alternation bugs. + +**Proposed fix:** Replace with a data-driven allowlist (array of patterns with names and comments) that the gate compiles at startup. Each entry is individually testable. + +**Verification:** `write-gate.test.ts` achieves per-command coverage without a single monolithic regex match test. + +--- + +## ADR-018 runtime — harness profiler not yet wired + +**Impact:** `repo-profiler.ts` exists but is not called during any autonomous dispatch phase. The harness evolution system (ADR-018) exists only as design documentation; no runtime behavior has shipped. + +**Proposed fix:** Wire `buildRepoProfile()` call into the pre-dispatch phase (Phase 1). Record result in `.sf/sf.db`. + +**Verification:** After any planning milestone, `.sf/sf.db` contains a `repo_profiles` row for the current session. + +**Tracked in:** [active/index.md — ADR-018 Phase 1](./active/index.md) diff --git a/docs/records/2026-05-01-repo-vcs-and-notifications.md b/docs/records/2026-05-01-repo-vcs-and-notifications.md new file mode 100644 index 000000000..75d48feb6 --- /dev/null +++ b/docs/records/2026-05-01-repo-vcs-and-notifications.md @@ -0,0 +1,50 @@ +# Records Note — 2026-05-01 + +## What Changed + +**commit `a611cd579`** — feat: introduce repo-vcs skill and add JSDoc annotations across core modules + +- `src/resources/extensions/sf/repository-vcs-context.ts` — new: detects Git vs Jujutsu, builds VCS guidance block injected into system prompt +- `src/resources/skills/repo-vcs/` — new: bundled skill for commit, push, safe-push workflows +- `src/resources/extensions/sf/bootstrap/system-context.ts` — added `repo-vcs` to bundled skill trigger table; injects `repositoryVcsBlock` into system prompt +- `src/resources/extensions/sf/tests/repository-vcs-context.test.ts` — new: test suite for VCS context detection +- JSDoc `Purpose:` and `Consumer:` annotations added to: `app-paths.ts`, `bundled-extension-paths.ts`, `errors.ts`, `extension-discovery.ts`, `extension-registry.ts`, `headless-types.ts`, `headless.ts`, `traces.ts` +- `flake.nix` — added `just` to devShell +- `justfile` — new: build, test, typecheck, lint, sf recipes + +**Notification specs drafted:** +- `docs/design-docs/notification-event-model.md` — design decision: structured source/kind/blocking/dedupe_key on all events +- `docs/product-specs/notification-source-hygiene.md` — product spec: separate user messages from automated notices + +**Docs filled (previously placeholder):** +- `docs/design-docs/index.md` — ADR index +- `docs/PLANS.md` — active and upcoming work index +- `docs/exec-plans/active/index.md` — ADR-018, ADR-008, notification model +- `docs/exec-plans/completed/index.md` — repo-vcs, stabilization, JSDoc, ADR-001/003/004/010 +- `docs/exec-plans/tech-debt-tracker.md` — 6 known items +- `docs/RELIABILITY.md` — exit codes, failure modes, recovery paths, observability +- `docs/SECURITY.md` — auth model, write gate, protected files, secret scan +- `docs/DESIGN.md` — TUI line-width rule, overlays, theming, IME, performance +- `docs/PRODUCT_SENSE.md` — product thesis, user goals, non-goals, tradeoffs +- `docs/QUALITY_SCORE.md` — thresholds, fast/slow checks, known blind spots +- `docs/records/index.md` — this index + +## What Canonical Docs Were Updated + +- `docs/design-docs/index.md` — now indexes all 18 ADRs and 2 design docs +- `docs/PLANS.md` — now reflects active initiatives and upcoming work +- All exec-plan index files — now have real content + +## Contradictions Found + +- ADR-008 (SF tools over MCP) is marked "Accepted — impl in progress" but the user has clarified that SF is the only runtime in use; Claude Code is used as an external dev assistant, not as a provider inside SF. ADR-008's premise (provider parity for Claude Code CLI as a Pi provider) may not apply to the current usage model. Needs clarification. + +- `docs/design-docs/` and `docs/dev/ADR-*.md` are split across two directories. The design-docs folder has 2 files; 18 ADRs live in dev/. This split is navigable with the index but worth consolidating eventually. + +## What Remains Unresolved + +- ADR-008 relevance: does exposing workflow mutations over MCP make sense if SF is always the sole runtime? +- ADR-018 Phase 1 (repo profiler wired into dispatch) is not yet started +- Notification event model implementation (Phase 2 of the spec) is not yet started +- No ADR template or `just adr` recipe +- `write-gate.ts` BASH_READ_ONLY_RE monolithic regex not yet refactored diff --git a/docs/records/index.md b/docs/records/index.md index 700849601..dfa5562f5 100644 --- a/docs/records/index.md +++ b/docs/records/index.md @@ -1,3 +1,9 @@ # Records -This folder holds repo-memory audits, decision ledgers, context-gardening notes, and records-keeper outputs. +Repo-memory audits, decision ledgers, context-gardening notes, and records-keeper outputs. Each entry is a dated note describing what changed, what canonical docs were updated, and what remains unresolved. + +## Index + +| Date | Note | Summary | +|------|------|---------| +| 2026-05-01 | [repo-vcs and notifications](./2026-05-01-repo-vcs-and-notifications.md) | repo-vcs skill landed; notification specs drafted; JSDoc annotations added; placeholder docs filled | diff --git a/src/resources/extensions/sf/gitignore.ts b/src/resources/extensions/sf/gitignore.ts index f5c0533f1..c0096dea9 100644 --- a/src/resources/extensions/sf/gitignore.ts +++ b/src/resources/extensions/sf/gitignore.ts @@ -47,17 +47,29 @@ const SF_RUNTIME_PATTERNS = [ ] as const; /** - * SF-specific runtime exclusion patterns. These live in .git/info/exclude - * (per-clone, never committed) instead of .gitignore so that: - * - Re-running sf doesn't dirty the working tree on every invocation - * - The project's .gitignore stays human-curated (sf doesn't own it) - * - User-equivalent patterns like `/.sf` (root-only) coexist without - * triggering naive duplicate-add since we don't touch .gitignore at all - * for these. + * SF runtime exclusion patterns for repos where .sf/ is a LOCAL DIRECTORY. + * Granular so that durable planning artifacts (.sf/milestones/, .sf/PROJECT.md, + * .sf/DECISIONS.md) remain trackable in git per ADR-001. * - * Migrated out of BASELINE_PATTERNS on 2026-04-29. + * NOT used when .sf/ is a symlink — symlinks need the blanket SF_SYMLINK_EXCLUSION_PATTERNS + * because git cannot traverse symlinks to match per-file patterns. + * + * Migrated from blanket `.sf` on 2026-05-01 to implement ADR-001. + * Previously migrated out of BASELINE_PATTERNS into .git/info/exclude on 2026-04-29. */ -const SF_RUNTIME_EXCLUSION_PATTERNS = [".sf", ".sf-id", ".bg-shell/"] as const; +const SF_RUNTIME_EXCLUSION_PATTERNS: readonly string[] = [ + ".sf-id", + ".bg-shell/", + ...SF_RUNTIME_PATTERNS, +]; + +/** + * SF exclusion patterns for repos where .sf/ is a SYMLINK (external state). + * Git sees the symlink as an opaque file and cannot traverse it, so granular + * patterns like .sf/activity/ would never match. The blanket .sf pattern + * excludes the symlink itself. + */ +const SF_SYMLINK_EXCLUSION_PATTERNS = [".sf", ".sf-id", ".bg-shell/"] as const; const BASELINE_PATTERNS = [ // SF-specific patterns now live in SF_RUNTIME_EXCLUSION_PATTERNS, applied @@ -216,25 +228,56 @@ export function ensureGitInfoExclude(basePath: string): boolean { ? readFileSync(excludePath, "utf-8") : ""; - const existingLines = new Set( - existing - .split("\n") - .map((l) => l.trim()) - .filter((l) => l && !l.startsWith("#")), - ); - const missing = SF_RUNTIME_EXCLUSION_PATTERNS.filter( - (p) => !existingLines.has(p), - ); - if (missing.length === 0) return false; + // Determine whether .sf is a symlink (external state) or a local directory. + // Symlink: git cannot traverse it, so only the blanket .sf pattern works. + // Directory: use granular patterns so .sf/milestones/ and other durable + // planning artifacts can be tracked per ADR-001. + const sfIsSymlink = (() => { + const localSf = join(basePath, ".sf"); + try { + return existsSync(localSf) && lstatSync(localSf).isSymbolicLink(); + } catch { + return false; + } + })(); - const block = [ - "", - "# ── SF runtime exclusion (managed by sf, per-clone) ──", - ...missing, - "", - ].join("\n"); - const prefix = existing && !existing.endsWith("\n") ? "\n" : ""; - writeFileSync(excludePath, existing + prefix + block, "utf-8"); + const targetPatterns: readonly string[] = sfIsSymlink + ? SF_SYMLINK_EXCLUSION_PATTERNS + : SF_RUNTIME_EXCLUSION_PATTERNS; + + // Patterns to remove: whatever the OTHER mode would have written. + // This handles transitions (symlink↔directory) by cleaning up stale entries. + const stalePatterns = sfIsSymlink + ? SF_RUNTIME_EXCLUSION_PATTERNS + : SF_SYMLINK_EXCLUSION_PATTERNS; + + const existingLines = existing.split("\n").map((l) => l.trim()); + const existingSet = new Set( + existingLines.filter((l) => l && !l.startsWith("#")), + ); + + const missing = targetPatterns.filter((p) => !existingSet.has(p)); + const toRemove = new Set(stalePatterns.filter((p) => existingSet.has(p))); + + if (missing.length === 0 && toRemove.size === 0) return false; + + let content = existing + .split("\n") + .filter((l) => !toRemove.has(l.trim())) + .join("\n"); + + if (missing.length > 0) { + const block = [ + "", + "# ── SF runtime exclusion (managed by sf, per-clone) ──", + ...missing, + "", + ].join("\n"); + const prefix = content && !content.endsWith("\n") ? "\n" : ""; + content = content + prefix + block; + } + + writeFileSync(excludePath, content, "utf-8"); return true; }