refactor: align agent resource overlays

This commit is contained in:
Mikael Hugo 2026-05-14 19:32:41 +02:00
parent 7000373e88
commit 62fbc5d57b
385 changed files with 1116 additions and 23524 deletions

View file

@ -1,33 +1,52 @@
# .agents/ # .agents/
Agent configuration for this repository following the Agent configuration for this repository. The `.agents/` layout tracks the
[AGENTS-1 spec](https://github.com/agentsfolder/spec). [agents folder convention](https://github.com/agentsfolder/spec), while skills
inside it follow the [open Agent Skills format](https://agentskills.io/specification):
each skill is a directory with `SKILL.md` frontmatter and Markdown
instructions.
SF treats this as `sf-agents-overlay/v1` until the external `.agents` spec
settles. The stable contract is:
- `.agents/manifest.yaml` is the repo-owned machine index.
- `.agents/prompts/`, `.agents/policies/`, `.agents/modes/`, `.agents/scopes/`,
`.agents/profiles/`, and `.agents/adapters/` are optional project override
inputs.
- `.agents/skills/<name>/SKILL.md` is the canonical skill payload.
- `.agents/skills/<name>/skill.yaml` may exist as generated or adapter metadata,
but it is not the instruction source.
- `.agents/state/state.yaml` is local-only and ignored.
- `.sf/` remains SF runtime state; structured SF state is DB-first.
This folder is the **override and extension layer only**. SF's built-in This folder is the **override and extension layer only**. SF's built-in
defaults (modes, skills, policies) apply automatically. Files here exist defaults (modes, skills, policies) apply automatically. Files here exist
only when the project needs to override or add something. only when the project needs to override or add something.
This mirrors Copilot-style project customization: repository-owned agent
instructions and optional overrides live in the repo, while product-shipped
defaults live outside the repo overlay. For SF, bundled user-visible skills are
sourced from `src/resources/skills/`; hidden workflow pattern skills are sourced
from `src/resources/workflow-skills/`; bundled default prompts and policies are
sourced from `src/resources/agent-overlays/singularity-forge/`. `.agents/`
only adds project-specific overrides.
## Structure ## Structure
``` ```
.agents/ .agents/
AGENTS.md ← this file AGENTS.md ← this file
manifest.yaml ← specVersion, defaults, enabled skills/policies manifest.yaml ← SF overlay schema; no enabled overrides by default
prompts/ prompts/
base.md ← injected into every agent turn (iron law, DB-first, key pointers) .gitkeep ← project prompt overrides only
project.md ← SF-specific context (modes, state, build commands, source layout) snippets/ ← project prompt fragments only
snippets/ ← reusable prompt fragments (empty — no project snippets yet)
modes/ ← project mode OVERRIDES only (empty — SF built-ins apply) modes/ ← project mode OVERRIDES only (empty — SF built-ins apply)
policies/ policies/
default-safe.yaml ← conservative policy: confirm destructive ops, deny secrets paths .gitkeep ← project policy overrides only
skills/ ← project-specific skills + built-in overrides (same name = override) skills/ ← optional project user skills + built-in overrides (empty by default)
forge-autonomous-runtime/ ← explains SF autonomous loop, UOK gates, recovery paths
forge-command-surface/ ← SF slash commands, browser command parity, headless dispatch
nix-build/ ← build any @singularity-forge/* package via nix develop
sf-wiki/ ← override of built-in sf-wiki: use UPPERCASE filenames (.sf/ convention)
smoke-test/ ← run sf-run smoke tests (--version, --help, --print)
scopes/ ← path-based config overrides (empty) scopes/ ← path-based config overrides (empty)
profiles/ ← named overlays e.g. "ci", "dev" (empty) profiles/ ← named overlays e.g. "ci", "dev" (empty)
adapters/ ← optional projection targets (absent until needed)
schemas/ ← generated JSON schemas (not committed) schemas/ ← generated JSON schemas (not committed)
state/ state/
.gitignore ← excludes state.yaml (per-developer convenience, never committed) .gitignore ← excludes state.yaml (per-developer convenience, never committed)
@ -38,12 +57,13 @@ only when the project needs to override or add something.
To override a built-in mode or skill, add a file with the **same name**: To override a built-in mode or skill, add a file with the **same name**:
``` ```
# Override built-in sf-wiki skill # Override a product workflow pattern for this repo
.agents/skills/sf-wiki/SKILL.md .agents/skills/sf-repo-orientation/SKILL.md
# Override built-in build mode # Override built-in build mode
.agents/modes/build.md .agents/modes/build.md
``` ```
Built-in defaults (ask, build, autonomous modes; default-safe policy; all SF Built-in defaults (ask, build, autonomous modes; default-safe policy; bundled
system skills) are provided by SF and do not need to be listed here. prompts; bundled user skills; hidden workflow pattern skills) are provided by SF from
`src/resources/` and do not need to be listed here.

View file

@ -0,0 +1,2 @@
# Projection adapter configs belong here when this repo needs to render
# `.agents/` into agent-native files. Empty by default.

View file

@ -1,30 +1,43 @@
# .agents/ canonical agent configuration # .agents/ SF repo overlay manifest
# Spec: https://github.com/agentsfolder/spec # Layout target: https://github.com/agentsfolder/spec
# Skill source: https://agentskills.io/specification
# #
# Status: pre-1.0 spec adoption — schema may shift. Pin specVersion; # Status: SF-specific repo overlay aligned with the emerging .agents folder
# track upstream for breaking changes. # convention. This file indexes optional repo-owned overrides only. Bundled SF
# defaults, default prompts, default policies, and hidden pattern skills live in
# src/resources.
specVersion: "0.1.0" specVersion: "1.0.0"
defaults: defaults:
mode: build mode: build
policy: default-safe policy: bundled:default-safe
enabled:
modes: [] # no project overrides; SF built-in modes (ask/build/autonomous) apply
policies:
- default-safe
skills:
- forge-autonomous-runtime
- forge-command-surface
- nix-build
- sf-wiki
- smoke-test
resolution: resolution:
enableUserOverlay: false enableUserOverlay: false
denyOverridesAllow: true denyOverridesAllow: true
onConflict: error onConflict: error
precedence:
- project
- global
- bundled
prompts: {}
modes: []
adapters: {}
policies: {}
skills: {}
enabled:
modes: [] # no project overrides; SF built-in modes (ask/build/autonomous) apply
adapters: [] # no generated projection targets yet
policies: []
prompts: []
skills: []
project: project:
name: singularity-forge name: singularity-forge
@ -38,6 +51,47 @@ project:
frameworks: [] frameworks: []
x: x:
sf:
schemaVersion: sf-agents-overlay/v1
contract:
canonicalRepoOverlay: .agents/manifest.yaml
canonicalSkillPayload: SKILL.md
optionalSkillMetadata: skill.yaml
skillMetadataRequired: false
bundledResourceRoot: ../src/resources/
bundledUserSkillRoot: ../src/resources/skills/
bundledWorkflowSkillRoot: ../src/resources/workflow-skills/
bundledAgentOverlayRoot: ../src/resources/agent-overlays/singularity-forge/
runtimeStateRoot: ../.sf/
runtimeStateSourceOfTruth: false
projectSkillRootPurpose: optional repo-local user skills and overrides only
projectOverlayPurpose: optional repo-local overrides only
projectLearningTarget: reviewed repo-local .agents overrides proposed from .sf evidence
layoutFormat:
name: agents-folder
spec: https://github.com/agentsfolder/spec
role: repo-overlay-layout
canonicalSkillFormat:
name: agent-skills
spec: https://agentskills.io/specification
entrypoint: SKILL.md
agentsFolderSkillYaml:
status: compatibility-adapter
note: >-
agentsfolder/agents-cli currently loads .agents/skills/*/skill.yaml
while the AGENTS-1 README names SKILL.yaml and the
broader Agent Skills ecosystem uses SKILL.md. SF treats SKILL.md as
canonical and may generate/read skill.yaml as compatibility metadata,
but does not make it the source of truth.
runtimeGenerated:
repoMap:
path: ../.sf/repo-map/
gitignored: true
sourceOfTruth: false
traces:
path: ../.sf/traces/
gitignored: true
sourceOfTruth: false
centralcloud: centralcloud:
legacy_pointers: legacy_pointers:
- AGENTS.md - AGENTS.md

View file

@ -1,26 +0,0 @@
---
name: nix-build
description: Build any @singularity-forge/* package (or the full stack) via nix develop. Pass a package name like "pi-coding-agent", "native", "daemon", or "all" for a full core build.
---
All build commands in this repo must run inside `nix develop`. Never use bare cargo/rustc.
For a single package:
```
nix develop --command bash -c "npm run --workspace=@singularity-forge/<package> build"
```
For the full core build (native + all TS packages):
```
nix develop --command bash -c "npm run build:core"
```
For typecheck only:
```
nix develop --command bash -c "tsc --noEmit -p tsconfig.json"
```
For extensions typecheck:
```
nix develop --command bash -c "tsc --noEmit -p tsconfig.extensions.json"
```

View file

@ -1,17 +0,0 @@
---
name: smoke-test
description: Run the standard sf-run smoke tests (--version, --help, --print). All three must pass before shipping a build.
---
#!/bin/bash
set -e
echo "=== --version ==="
node dist/loader.js --version
echo "=== --help (first 5 lines) ==="
node dist/loader.js --help 2>&1 | head -5
echo "=== --print (graceful degradation) ==="
node dist/loader.js --print 2>&1 | head -5
echo "All smoke tests passed."

17
.gitignore vendored
View file

@ -107,16 +107,27 @@ repowise.db
.sf/interactive.lock .sf/interactive.lock
.sf/interactive.lock.d/ .sf/interactive.lock.d/
# SQLite WAL/SHM are ephemeral checkpoint files — only the .db is durable. # SQLite WAL/SHM are ephemeral checkpoint files — only the .db is durable.
.sf/metrics.db
.sf/metrics.db-wal .sf/metrics.db-wal
.sf/metrics.db-shm .sf/metrics.db-shm
.sf/sf.db-wal .sf/sf.db-wal
.sf/sf.db-shm .sf/sf.db-shm
# DB backups are local recovery artifacts created by migrations/maintenance.
.sf/backups/db/
# Generated SF runtime projections, caches, reports, and recovery evidence.
.sf/graphs/
.sf/model-catalog/
.sf/model-performance.json
.sf/recovery/
.sf/reflection/
.sf/safety/
.sf/slice-routing.json
.sf/triage/decisions/
.sf/repo-map/
# Per-dispatch trace files accumulate one-per-request and are runtime-only. # Per-dispatch trace files accumulate one-per-request and are runtime-only.
# Consumers (sf-db-gates, adaptive verification policy) read by mtime window # Consumers (sf-db-gates, adaptive verification policy) read by mtime window
# (24h30d) — on-disk retention is needed, but git tracking is not. # (24h30d) — on-disk retention is needed, but git tracking is not.
.sf/traces/pre-dispatch:*.jsonl .sf/traces/*.jsonl
.sf/traces/finalize:*.jsonl
.sf/traces/guard:*.jsonl
# `latest` is a symlink retargeted on every dispatch — pure git noise. # `latest` is a symlink retargeted on every dispatch — pure git noise.
.sf/traces/latest .sf/traces/latest
test_output.log test_output.log

View file

@ -667,24 +667,20 @@
| Skill Directory | System Label(s) | Description | | Skill Directory | System Label(s) | Description |
|-----------------|-----------------|-------------| |-----------------|-----------------|-------------|
| react-best-practices/ | Skills | React development patterns (62 files) | | create-skill/ | User-visible Skills | Skill creation scaffolding and templates |
| userinterface-wiki/ | Skills | UI/UX guidelines and component reference (155 files) |
| create-skill/ | Skills | Skill creation scaffolding and templates (25 files) | ### src/resources/workflow-skills/
| create-sf-extension/ | Skills, Extensions | SF extension scaffolding (22 files) |
| code-optimizer/ | Skills | Performance optimization techniques (16 files) | | Skill Directory | System Label(s) | Description |
| agent-browser/ | Skills, Browser Tools | Browser automation guidance (11 files) | |-----------------|-----------------|-------------|
| github-workflows/ | Skills | GitHub Actions workflow patterns (10 files) | | assumption-log/, context-lean/, error-routing/, handoff-readability/, irreversible-ops/, observe-first/, state-discipline/, vertical-slice/ | Workflow Skills | Always-on hidden autonomy constraints. Not shown in the user skill catalog. |
| debug-like-expert/ | Skills | Advanced debugging techniques (6 files) | | sf-debug-forensics/, sf-human-writing/, sf-metric-optimization/, sf-repo-orientation/, sf-vcs-hygiene/ | Workflow Skills | Automatic workflow helpers selected by context or explicit workflow preference. |
| make-interfaces-feel-better/ | Skills | UI/UX improvement patterns (5 files) |
| accessibility/ | Skills | WCAG and accessibility standards | ### docs/dev/sf-runtime/
| core-web-vitals/ | Skills | Web performance metrics guidance |
| web-quality-audit/ | Skills | Quality audit procedures | | File | System Label(s) | Description |
| best-practices/ | Skills | General development best practices | |------|-----------------|-------------|
| frontend-design/ | Skills | Frontend design principles | | autonomous-runtime.md, command-surface.md, harness.md, operating-model.md, planning.md, state.md | Contributor Docs | SF maintainer references moved out of workflow skills because they are documentation, not runtime-routed skills. |
| lint/ | Skills | Code linting standards |
| review/ | Skills | Code review guidelines |
| test/ | Skills | Testing strategies and patterns |
| web-design-guidelines/ | Skills | Web design principles |
--- ---

View file

@ -6,15 +6,83 @@ explicitly promotes them into durable project documentation.
## Default Locations ## Default Locations
- `.sf/` stores SF-local operational state, generated harness notes, scaffold - `.sf/` stores SF-local operational state, generated harness notes, scaffold
manifests, runtime caches, locks, and temporary agent files. manifests, runtime caches, locks, temporary agent files, and generated
orientation caches such as `.sf/repo-map/`.
- `docs/plans/`, `docs/specs/`, and `docs/adr/` store promoted or generated - `docs/plans/`, `docs/specs/`, and `docs/adr/` store promoted or generated
human-facing exports for review and git history. human-facing exports for review and git history.
- `docs/generated/` stores explicitly promoted generated documentation when a
project wants a generated artifact in version control.
- Generated docs may change by design. Git keeps their human-facing history; - Generated docs may change by design. Git keeps their human-facing history;
SF-owned operational history belongs in `.sf`/SQLite when runtime replay, SF-owned operational history belongs in `.sf`/SQLite when runtime replay,
ledgers, memory, or drift analysis matter. ledgers, memory, or drift analysis matter.
- Root files such as `AGENTS.md`, `ARCHITECTURE.md`, and `.siftignore` are - Root files such as `AGENTS.md`, `ARCHITECTURE.md`, and `.siftignore` are
allowed only when they are part of the versioned scaffold contract. allowed only when they are part of the versioned scaffold contract.
## Resource Layers
SF follows the same separation as Copilot-style agent customization:
| Layer | Path | Git ownership | Purpose |
|---|---|---|---|
| Bundled SF resources | `src/resources/skills/`, `src/resources/workflow-skills/`, `src/resources/agent-overlays/`, `src/resources/extensions/sf/prompts/`, `src/resources/extensions/sf/` | tracked source | Product-shipped defaults. Change these to change SF itself. Workflow skills are hidden system patterns, not a general skill catalog. |
| Repo agent overlay | `.agents/` | tracked project policy | Project-owned prompts, policies, and optional user skill overrides. Same-name skills override non-locked bundled skills. |
| Repo instructions | `AGENTS.md`, `CLAUDE.md`, `.github/copilot-instructions.md`, `.github/instructions/*.instructions.md` | tracked project policy | Agent-facing repo guidance. Keep short, durable, and non-runtime. |
| Runtime generated context | `.sf/repo-map/`, `.sf/traces/`, `.sf/backups/`, `.sf/metrics.db*` | gitignored | Local cache, observability, recovery, and orientation. Safe to delete/regenerate. |
| Promoted generated docs | `docs/generated/` | tracked after review | Durable generated docs that humans chose to version. |
`.sf/repo-map/` is not a source of truth. It is a generated context cache for
agent orientation. If a repo-map page becomes valuable as documentation, promote
it to `docs/generated/repo-map/` or rewrite it into normal `docs/`.
## Prompt and Instruction Layers
Prompt resources follow the same split:
- **Built-in prompts:** `src/resources/extensions/sf/prompts/` and other
bundled resource directories. These ship with SF and should be edited only
when changing product behavior.
- **Built-in repo defaults:** `src/resources/agent-overlays/` contains product
defaults that SF ships, including default repo prompts and default policies.
- **Repo prompts and instructions:** `.agents/prompts/`, root `AGENTS.md`,
root `CLAUDE.md`, and compatible tool files such as
`.github/copilot-instructions.md`. `.agents/prompts/` is for repo-owned
overrides only; root instruction files remain durable human-readable pointers.
- **Path-specific instructions:** use repo instruction files when behavior
should apply only to part of the tree. Keep them stable and reviewable.
- **Generated context:** `.sf/repo-map/` and other `.sf/` runtime outputs are
not prompt source. They may be injected as context when fresh, but they are
ignored and regenerable.
Do not promote generated `.sf` context by copying it into prompt overlays
verbatim. Summarize the durable rule, link the canonical source, and keep the
overlay short enough to be loaded every run.
## SF `.agents` Overlay
Until the external `.agents` specs settle, this repo uses
`sf-agents-overlay/v1`:
- `.agents/manifest.yaml` is the repo-owned machine index for optional prompt,
policy, mode, adapter, and project skill overrides.
- `.agents/prompts/`, `.agents/policies/`, `.agents/modes/`,
`.agents/adapters/`, `.agents/scopes/`, and `.agents/profiles/` are empty by
default. Defaults live in `src/resources/`.
- `.agents/skills/<name>/SKILL.md` is the canonical skill payload and follows
the Agent Skills specification.
- `.agents/skills/` is empty by default. SF product-owned user skills live under
`src/resources/skills/`; automatic workflow guidance lives under
`src/resources/workflow-skills/` as hidden system pattern skills.
- SF self-learning is per repo: runtime evidence and candidate variants belong
in `.sf` / SQLite first, reviewed repo-specific improvements are proposed as
`.agents/` overrides, and bundled defaults under `src/resources/` change only
when the improvement is product-wide.
- `.agents/skills/<name>/skill.yaml` is optional compatibility metadata for
projection tools. It may point at `SKILL.md`, but it must not replace it as
the instruction source.
- `.agents/state/state.yaml` is local convenience state and must remain
gitignored.
- `.sf/` remains SF runtime state. Structured SF state stays DB-first.
## Harness ## Harness
Generated harness material belongs under `.sf/harness/`. Generated harness material belongs under `.sf/harness/`.

View file

@ -1,9 +1,9 @@
--- ---
name: forge-autonomous-runtime name: sf-autonomous-runtime
description: Explains SF autonomous loop, UOK gates, installed-runtime drift, and recovery paths. description: Explains SF autonomous loop, UOK gates, installed-runtime drift, and recovery paths.
--- ---
# forge-autonomous-runtime # SF Autonomous Runtime
## Context ## Context

View file

@ -1,9 +1,9 @@
--- ---
name: forge-command-surface name: sf-command-surface
description: Use when changing SF slash commands, browser command parity, or headless command dispatch. description: Use when changing SF slash commands, browser command parity, or headless command dispatch.
--- ---
# forge-command-surface # SF Command Surface
## When to Use ## When to Use

View file

@ -0,0 +1,15 @@
---
name: sf-harness
description: Use when changing SF harness behavior, generated evidence, verification loops, or repo-native harness boundaries.
---
# SF Harness
Use this only for SF harness changes in this repository.
## Rules
- Keep generated harness material under `.sf/harness/` unless explicitly promoted.
- Do not create tracked top-level `harness/` files during normal runtime.
- Add executable evidence before claiming a harness behavior works.
- Keep verification paths narrow and reproducible.

View file

@ -0,0 +1,18 @@
---
name: sf-operating-model
description: Use when changing SF operating-model vocabulary, surfaces, protocols, output formats, run control, or permission-profile boundaries.
---
# SF Operating Model
Use this when editing SF operating-model docs or code paths.
## Terms
- Surface: TUI, CLI, web, editor, or machine entrypoint.
- Protocol: ACP, RPC, stdio JSON-RPC, HTTP, or wire transport.
- Output format: text, JSON, or stream JSON.
- Run control: manual, assisted, supervised, or autonomous.
- Permission profile: restricted, normal, trusted, or unrestricted.
Keep these terms separate in plans, docs, prompts, and API names.

View file

@ -0,0 +1,15 @@
---
name: sf-planning
description: Use when changing SF milestone, slice, task, backlog, promotion, or spec-first planning behavior.
---
# SF Planning
Use this for SF planning-state changes in this repository.
## Rules
- SQLite `.sf/sf.db` is canonical when ordering, joins, validation, or status matter.
- Plans must state purpose before implementation detail.
- Promote durable human-facing artifacts into `docs/plans/`, `docs/specs/`, or `docs/adr/`.
- Runtime planning drafts stay under `.sf/` and remain gitignored unless promoted.

View file

@ -0,0 +1,16 @@
---
name: sf-state
description: Use when changing SF state ownership, DB-first runtime state, generated artifacts, or .sf/.agents boundary rules.
---
# SF State
Use this for SF state-boundary work in this repository.
## Rules
- `.sf/sf.db` is the canonical structured runtime store.
- `.sf/` runtime files are generated projections, logs, recovery inputs, or caches.
- `.agents/` is repo-owned agent overlay configuration, not runtime state.
- `src/resources/` owns bundled SF defaults.
- Do not commit generated `.sf` runtime churn.

View file

@ -435,8 +435,8 @@ Parallel workers must stay worktree-isolated and report heartbeat/status into
```yaml ```yaml
--- ---
name: forge-command-surface name: sf-debug-forensics
description: Use when changing SF slash commands, browser command parity, or headless command dispatch. description: Post-mortem a failed SF autonomous mode run from persisted runtime evidence.
user-invocable: true user-invocable: true
model-invocable: true model-invocable: true
side-effects: code-edits side-effects: code-edits
@ -456,9 +456,9 @@ Fields:
| Type | Example | `model-invocable` | | Type | Example | `model-invocable` |
|------|---------|-------------------| |------|---------|-------------------|
| Background knowledge | `forge-autonomous-runtime` | true | | Background knowledge | `sf-repo-orientation` | true |
| User tool | `production-deploy` | false | | User tool | `production-deploy` | false |
| Shared capability | `forge-command-surface` | true | | Shared capability | `sf-debug-forensics` | true |
Dangerous skills (`production-mutation`) are never model-invoked by default. Dangerous skills (`production-mutation`) are never model-invoked by default.

View file

@ -92,7 +92,7 @@ See [Parallel Orchestration](./parallel-orchestration.md) for full documentation
| Command | Description | | Command | Description |
|---------|-------------| |---------|-------------|
| `/workflow new` | Create a new workflow definition (via skill) | | `/workflow new` | Show where to create and validate a workflow definition |
| `/workflow run <name>` | Create a run and start autonomous mode | | `/workflow run <name>` | Create a run and start autonomous mode |
| `/workflow list` | List workflow runs | | `/workflow list` | List workflow runs |
| `/workflow validate <name>` | Validate a workflow definition YAML | | `/workflow validate <name>` | Validate a workflow definition YAML |

View file

@ -617,9 +617,9 @@ Skill routing preferences:
```yaml ```yaml
always_use_skills: always_use_skills:
- debug-like-expert - my-debug-skill
prefer_skills: prefer_skills:
- frontend-design - my-frontend-skill
avoid_skills: [] avoid_skills: []
``` ```
@ -634,7 +634,7 @@ skill_rules:
- when: task involves authentication - when: task involves authentication
use: [clerk] use: [clerk]
- when: frontend styling work - when: frontend styling work
prefer: [frontend-design] prefer: [my-frontend-skill]
- when: working with legacy code - when: working with legacy code
avoid: [aggressive-refactor] avoid: [aggressive-refactor]
``` ```
@ -808,7 +808,7 @@ git:
skill_discovery: suggest skill_discovery: suggest
skill_staleness_days: 60 # Skills unused for N days get deprioritized (0 = disabled) skill_staleness_days: 60 # Skills unused for N days get deprioritized (0 = disabled)
always_use_skills: always_use_skills:
- debug-like-expert - my-debug-skill
skill_rules: skill_rules:
- when: task involves authentication - when: task involves authentication
use: [clerk] use: [clerk]

View file

@ -6,14 +6,35 @@ Skills follow the open [Agent Skills standard](https://agentskills.io/) and are
## Skill Directories ## Skill Directories
SF reads skills from two locations, in priority order: SF has three user skill layers:
| Layer | Location | Scope | Description |
|---|---|---|---|
| Bundled user-visible | `src/resources/skills/` | SF product | Built into SF releases. Only `create-skill` is listed as a default user skill. |
| Global | `~/.agents/skills/` | User | Shared across projects and compatible agents. |
| Project | `.agents/skills/` | Repo | Optional project-specific user skills and overrides, committable to version control. |
At runtime, SF resolves user/project skills over bundled defaults when names
collide. This mirrors Copilot-style project skills: `.agents/skills/` is a
repo-owned customization layer, not where SF built-ins live.
For default releases, the only bundled skill shown to users is `create-skill`.
Internal automatic workflow guidance is modeled as hidden system pattern skills
under `src/resources/workflow-skills/`. That directory is intentionally small:
core workflow constraints, SF runtime/planning/state patterns, and automatic
workflow helpers such as `sf-repo-orientation`, `sf-debug-forensics`,
`sf-metric-optimization`, `sf-human-writing`, and `sf-vcs-hygiene`. They use the
`SKILL.md` shape so SF can test and evolve them like Hermes-style pattern
packages, but they are not listed in the user skill catalog.
SF reads user and project skills from these portable locations:
| Location | Scope | Description | | Location | Scope | Description |
|-----------------------------------|---------|----------------------------------------------------------| |-----------------------------------|---------|----------------------------------------------------------|
| `~/.agents/skills/` | Global | Shared across all projects and all compatible agents | | `~/.agents/skills/` | Global | Shared across all projects and all compatible agents |
| `.agents/skills/` (project root) | Project | Project-specific skills, committable to version control | | `.agents/skills/` (project root) | Project | Project-specific skills, committable to version control |
Global skills take precedence over project skills when names collide. Global/project skills take precedence over bundled skills when names collide.
> **Migration from `~/.sf/agent/skills/`:** On first launch after upgrading, SF automatically copies skills from the legacy `~/.sf/agent/skills/` directory to `~/.agents/skills/`. The old directory is preserved for backward compatibility. > **Migration from `~/.sf/agent/skills/`:** On first launch after upgrading, SF automatically copies skills from the legacy `~/.sf/agent/skills/` directory to `~/.agents/skills/`. The old directory is preserved for backward compatibility.
@ -100,27 +121,29 @@ Control which skills are used via preferences:
--- ---
version: 1 version: 1
always_use_skills: always_use_skills:
- debug-like-expert - my-debug-skill
prefer_skills: prefer_skills:
- frontend-design - my-frontend-skill
avoid_skills: avoid_skills:
- security-docker - security-docker
skill_rules: skill_rules:
- when: task involves Clerk authentication - when: task involves Clerk authentication
use: [clerk] use: [clerk]
- when: frontend styling work - when: frontend styling work
prefer: [frontend-design] prefer: [my-frontend-skill]
--- ---
``` ```
### Resolution Order ### Resolution Order
Skills can be referenced by: Skills can be referenced by:
1. **Bare name** — e.g., `frontend-design` → scans `~/.agents/skills/` and project `.agents/skills/` 1. **Bare name** — e.g., `my-frontend-skill` → scans `~/.agents/skills/` and project `.agents/skills/`
2. **Absolute path** — e.g., `/Users/you/.agents/skills/my-skill/SKILL.md` 2. **Absolute path** — e.g., `/Users/you/.agents/skills/my-skill/SKILL.md`
3. **Directory path** — e.g., `~/custom-skills/my-skill` → looks for `SKILL.md` inside 3. **Directory path** — e.g., `~/custom-skills/my-skill` → looks for `SKILL.md` inside
Global skills (`~/.agents/skills/`) take precedence over project skills (`.agents/skills/`). Project skills (`.agents/skills/`) take precedence over global skills
(`~/.agents/skills/`) for the same name. Global skills take precedence over
non-locked bundled defaults.
## Custom Skills ## Custom Skills
@ -143,7 +166,28 @@ Place skills in your project for project-specific guidance:
SKILL.md SKILL.md
``` ```
Project-local skills can be committed to version control so team members share the same skill set. Project-local skills can be committed to version control so team members share
the same skill set. Do not put SF product-owned workflow guidance here;
product-wide defaults belong in `src/resources/workflow-skills/`. A repo may
still add a same-name `.agents/skills/<name>/SKILL.md` file to override a
non-locked system pattern for that repo.
### Learning and Overrides
SF self-learning is per repo. Runtime evidence, traces, and candidate variants
belong in `.sf` / SQLite first. Reviewed repo-specific improvements are proposed
as `.agents/` overrides. Bundled defaults under `src/resources/` are changed
only when the improvement is generally useful across projects.
### `SKILL.md` and `skill.yaml`
`SKILL.md` is the canonical skill payload. It contains the frontmatter and
Markdown instructions that compatible agents load.
Some projection tools also use `skill.yaml` for resolver metadata such as
activation mode, interface, requirements, or contracts. In SF, that file is
optional compatibility metadata. It should point at `SKILL.md` when present and
must not duplicate or replace the model-facing instructions.
## Skill Lifecycle Management ## Skill Lifecycle Management

View file

@ -271,10 +271,10 @@ function collectAutoBootstrapFiles(basePath: string): string[] {
} }
} }
// Include .sf/wiki/*.md pages — excluded from the general walk because // Include .sf/repo-map/*.md pages — excluded from the general walk because
// .sf is in AUTO_BOOTSTRAP_EXCLUDED_DIRS, but wiki pages are high-value // .sf is in AUTO_BOOTSTRAP_EXCLUDED_DIRS, but repo-map pages are high-value
// orientation context that should always be available to new agents. // generated orientation context that should always be available to new agents.
for (const path of collectWikiFiles(basePath)) { for (const path of collectRepoMapFiles(basePath)) {
if (!seen.has(path)) { if (!seen.has(path)) {
seen.add(path); seen.add(path);
files.push(path); files.push(path);
@ -290,18 +290,18 @@ function collectAutoBootstrapFiles(basePath: string): string[] {
return files; return files;
} }
function collectWikiFiles(basePath: string): string[] { function collectRepoMapFiles(basePath: string): string[] {
const wikiDir = join(basePath, ".sf", "wiki"); const repoMapDir = join(basePath, ".sf", "repo-map");
let entries: Dirent[]; let entries: Dirent[];
try { try {
entries = readdirSync(wikiDir, { withFileTypes: true }) as Dirent[]; entries = readdirSync(repoMapDir, { withFileTypes: true }) as Dirent[];
} catch { } catch {
return []; return [];
} }
return entries return entries
.filter((e) => e.isFile() && e.name.toLowerCase().endsWith(".md")) .filter((e) => e.isFile() && e.name.toLowerCase().endsWith(".md"))
.sort((a, b) => a.name.localeCompare(b.name)) .sort((a, b) => a.name.localeCompare(b.name))
.map((e) => join(wikiDir, e.name)); .map((e) => join(repoMapDir, e.name));
} }
function existsMarkdownFile(path: string): boolean { function existsMarkdownFile(path: string): boolean {

View file

@ -520,11 +520,21 @@ export async function runTriageApply(
const traceWriterModule = (await jiti.import( const traceWriterModule = (await jiti.import(
sfExtensionPath("uok/trace-writer"), sfExtensionPath("uok/trace-writer"),
)) as { )) as {
appendTraceEventRequired: ( appendTraceEvent?: (
basePath: string, basePath: string,
traceId: string, traceId: string,
event: Record<string, unknown>, event: Record<string, unknown>,
) => void; ) => void;
appendTraceEventRequired?: (
basePath: string,
traceId: string,
event: Record<string, unknown>,
) => void;
readTraceEvents?: (
basePath: string,
type: string,
windowHours?: number,
) => Array<Record<string, unknown>>;
}; };
// surface: "headless" - runTriageApply is always operator-invoked // surface: "headless" - runTriageApply is always operator-invoked
@ -550,8 +560,7 @@ export async function runTriageApply(
if (!uokContext) { if (!uokContext) {
return new Error("buildUokRunContext returned null for triage --apply"); return new Error("buildUokRunContext returned null for triage --apply");
} }
try { const event = {
traceWriterModule.appendTraceEventRequired(cwd, flowId, {
type: "gate_run", type: "gate_run",
traceId: uokContext.traceId, traceId: uokContext.traceId,
turnId: `triage-apply:${gateId}`, turnId: `triage-apply:${gateId}`,
@ -578,7 +587,28 @@ export async function runTriageApply(
runControl: uokContext.runControl, runControl: uokContext.runControl,
permissionProfile: uokContext.permissionProfile, permissionProfile: uokContext.permissionProfile,
...extra, ...extra,
}); };
try {
if (typeof traceWriterModule.appendTraceEventRequired === "function") {
traceWriterModule.appendTraceEventRequired(cwd, flowId, event);
} else if (typeof traceWriterModule.appendTraceEvent === "function") {
traceWriterModule.appendTraceEvent(cwd, flowId, event);
const persisted = traceWriterModule
.readTraceEvents?.(cwd, "gate_run", 24 * 30)
.some(
(ev) =>
ev.traceId === event.traceId &&
ev.turnId === event.turnId &&
ev.gateId === event.gateId,
);
if (!persisted) {
return new Error(
`legacy trace writer did not persist ${gateId} gate event`,
);
}
} else {
return new Error("trace writer does not expose appendTraceEvent");
}
return null; return null;
} catch (err) { } catch (err) {
return err instanceof Error ? err : new Error(String(err)); return err instanceof Error ? err : new Error(String(err));

View file

@ -184,10 +184,10 @@ function collectAutoBootstrapFiles(basePath) {
files.push(path); files.push(path);
} }
} }
// Include .sf/wiki/*.md pages — excluded from the general walk because // Include .sf/repo-map/*.md pages — excluded from the general walk because
// .sf is in AUTO_BOOTSTRAP_EXCLUDED_DIRS, but wiki pages are high-value // .sf is in AUTO_BOOTSTRAP_EXCLUDED_DIRS, but repo-map pages are high-value
// orientation context that should always be available to new agents. // generated orientation context that should always be available to new agents.
for (const path of collectWikiFiles(basePath)) { for (const path of collectRepoMapFiles(basePath)) {
if (!seen.has(path)) { if (!seen.has(path)) {
seen.add(path); seen.add(path);
files.push(path); files.push(path);
@ -200,18 +200,18 @@ function collectAutoBootstrapFiles(basePath) {
} }
return files; return files;
} }
function collectWikiFiles(basePath) { function collectRepoMapFiles(basePath) {
const wikiDir = join(basePath, ".sf", "wiki"); const repoMapDir = join(basePath, ".sf", "repo-map");
let entries; let entries;
try { try {
entries = readdirSync(wikiDir, { withFileTypes: true }); entries = readdirSync(repoMapDir, { withFileTypes: true });
} catch { } catch {
return []; return [];
} }
return entries return entries
.filter((e) => e.isFile() && e.name.toLowerCase().endsWith(".md")) .filter((e) => e.isFile() && e.name.toLowerCase().endsWith(".md"))
.sort((a, b) => a.name.localeCompare(b.name)) .sort((a, b) => a.name.localeCompare(b.name))
.map((e) => join(wikiDir, e.name)); .map((e) => join(repoMapDir, e.name));
} }
function existsMarkdownFile(path) { function existsMarkdownFile(path) {
try { try {

View file

@ -6,7 +6,7 @@
* utility. * utility.
*/ */
import { existsSync } from "node:fs"; import { existsSync } from "node:fs";
import { basename, join } from "node:path"; import { basename } from "node:path";
import { getLoadedSkills } from "@singularity-forge/coding-agent"; import { getLoadedSkills } from "@singularity-forge/coding-agent";
import { getAutoSession } from "./auto/session.js"; import { getAutoSession } from "./auto/session.js";
import { buildExtractionStepsBlock } from "./commands-extract-learnings.js"; import { buildExtractionStepsBlock } from "./commands-extract-learnings.js";
@ -66,11 +66,6 @@ import {
isDbAvailable, isDbAvailable,
} from "./sf-db.js"; } from "./sf-db.js";
import { warnIfManifestHasMissingSkills } from "./skill-manifest.js"; import { warnIfManifestHasMissingSkills } from "./skill-manifest.js";
import {
getModelInvocableSkills,
getPermittedSkills,
loadSkills,
} from "./skills/index.js";
import { import {
formatDecisionsCompact, formatDecisionsCompact,
formatRequirementsCompact, formatRequirementsCompact,
@ -89,6 +84,7 @@ import {
} from "./workflow-helpers.js"; } from "./workflow-helpers.js";
import { logWarning } from "./workflow-logger.js"; import { logWarning } from "./workflow-logger.js";
import { getErrorMessage } from "./error-utils.js"; import { getErrorMessage } from "./error-utils.js";
import { loadSkills } from "./skills/index.js";
// ─── Preamble Cap ───────────────────────────────────────────────────────────── // ─── Preamble Cap ─────────────────────────────────────────────────────────────
/** /**
@ -764,16 +760,26 @@ function tokenizeSkillContext(...parts) {
return tokens; return tokens;
} }
function skillMatchesContext(skill, contextTokens) { function skillMatchesContext(skill, contextTokens) {
const score = scoreSkillContextMatch(skill, contextTokens);
return score > 0;
}
function scoreSkillContextMatch(skill, contextTokens) {
const haystacks = [ const haystacks = [
skill.name.toLowerCase(), skill.name.toLowerCase(),
skill.name.toLowerCase().replace(/[-_]+/g, " "), skill.name.toLowerCase().replace(/[-_]+/g, " "),
skill.description.toLowerCase(), skill.description.toLowerCase(),
]; ];
return [...contextTokens].some( let score = 0;
(token) => for (const token of contextTokens) {
token.length >= 3 && if (token.length < 3) continue;
haystacks.some((haystack) => haystack.includes(token)), if (token === "sf") continue;
); for (const haystack of haystacks) {
if (haystack === token) score += 5;
else if (haystack.split(/[^a-z0-9+.#]+/g).includes(token)) score += 3;
else if (haystack.includes(token)) score += 1;
}
}
return score;
} }
function resolvePreferenceSkillNames(refs, base) { function resolvePreferenceSkillNames(refs, base) {
if (refs.length === 0) return []; if (refs.length === 0) return [];
@ -828,36 +834,121 @@ function resolvePreferredSkillNames(prefs, visibleSkills, contextTokens, base) {
/** /**
* Build the workflow constraints block for agent dispatch prompts. * Build the workflow constraints block for agent dispatch prompts.
* *
* Purpose: inject locked workflow skills as behavioral constraints into every * Purpose: inject product-owned hidden workflow pattern skills into every
* agent dispatch so the 8 cross-cutting patterns are always active not as * agent dispatch so cross-cutting patterns are always active. These use the
* invocable tools, but as enforced behavioral guidelines. * SKILL.md shape for evaluation/evolution, but are not user-invocable.
* *
* Consumer: buildSkillActivationBlock appended after the skill_activation block. * Consumer: buildSkillActivationBlock appended after the skill_activation block.
*/ */
function buildWorkflowConstraintsBlock(base, workMode, permissionProfile) { function buildWorkflowConstraintsBlock(params) {
let skills; let allPatternSkills;
let constraintPatterns;
let workflowSkills;
try { try {
skills = loadSkills(base, { includeWorkflow: true, includeBundled: false }); allPatternSkills = loadWorkflowPatternSkills(params);
constraintPatterns = allPatternSkills.filter((skill) =>
ALWAYS_ON_WORKFLOW_SKILL_NAMES.has(normalizeSkillReference(skill.name)),
);
workflowSkills = allPatternSkills.filter(
(skill) =>
!ALWAYS_ON_WORKFLOW_SKILL_NAMES.has(normalizeSkillReference(skill.name)),
);
} catch { } catch {
return ""; return "";
} }
const permitted = getPermittedSkills(skills, permissionProfile ?? "normal"); if (constraintPatterns.length === 0 && workflowSkills.length === 0) return "";
const active = getModelInvocableSkills(permitted, workMode ?? "build");
if (active.length === 0) return "";
// Cap at 5 skills (P0 first, then P1, P2 in order of triggers match) const constraintNames = new Set(
const capped = active.slice(0, 5); constraintPatterns.map((pattern) => normalizeSkillReference(pattern.name)),
const sections = capped.map((skill) => { );
const body = skill.body ?? ""; const explicitNames = params.explicitSkillNames ?? new Set();
// Truncate to ~500 chars at a paragraph boundary to keep context lean const avoidedNames = params.avoidedSkillNames ?? new Set();
const selectedByName = new Map();
for (const pattern of constraintPatterns.slice(0, 8)) {
selectedByName.set(normalizeSkillReference(pattern.name), {
...pattern,
reason: "constraint",
});
}
const contextMatches = [];
for (const skill of workflowSkills) {
const name = normalizeSkillReference(skill.name);
if (selectedByName.has(name)) continue;
if (avoidedNames.has(name)) continue;
if (explicitNames.has(name)) {
selectedByName.set(name, { ...skill, reason: "explicit" });
continue;
}
if (
skill.modelInvocable !== false &&
!constraintNames.has(name) &&
skillMatchesContext(skill, params.contextTokens ?? new Set())
) {
contextMatches.push({
...skill,
reason: "context",
matchScore: scoreSkillContextMatch(
skill,
params.contextTokens ?? new Set(),
),
});
}
}
for (const skill of contextMatches
.sort((a, b) => b.matchScore - a.matchScore || a.name.localeCompare(b.name))
.slice(0, 4)) {
selectedByName.set(normalizeSkillReference(skill.name), skill);
}
const sections = [...selectedByName.values()].map((pattern) => {
const body = pattern.body ?? "";
const limit = pattern.reason === "constraint" ? 500 : 700;
const truncated = const truncated =
body.length > 500 body.length > limit
? body.slice(0, 500).replace(/\n[^\n]*$/, "") + "\n..." ? body.slice(0, limit).replace(/\n[^\n]*$/, "") + "\n..."
: body; : body;
return `### ${skill.name}\n\n${truncated}`; const label =
pattern.reason === "constraint"
? "always-on constraint"
: `${pattern.reason} workflow pattern`;
return `### ${pattern.name} (${label})\n\n${truncated}`;
}); });
return `\n\n## Active Workflow Constraints\n\n${sections.join("\n\n")}`; return `\n\n## Active Workflow Patterns\n\n${sections.join("\n\n")}`;
}
const ALWAYS_ON_WORKFLOW_SKILL_NAMES = new Set([
"assumption-log",
"context-lean",
"error-routing",
"handoff-readability",
"irreversible-ops",
"observe-first",
"state-discipline",
"vertical-slice",
]);
function loadWorkflowPatternSkills(params) {
return loadSkills(params.base, {
includeBundled: false,
includeWorkflow: true,
})
.filter(
(skill) =>
skill.valid &&
(skill.source === "workflow" ||
ALWAYS_ON_WORKFLOW_SKILL_NAMES.has(
normalizeSkillReference(skill.name),
)),
)
.map((skill) => ({
name: skill.name,
description: skill.description ?? "",
body: skill.body ?? "",
modelInvocable: skill.modelInvocable,
}));
} }
/** Skill names must be lowercase alphanumeric with hyphens reject anything else /** Skill names must be lowercase alphanumeric with hyphens reject anything else
@ -958,9 +1049,14 @@ export function buildSkillActivationBlock(params) {
// getAutoSession may be unavailable in test contexts — use defaults // getAutoSession may be unavailable in test contexts — use defaults
} }
const workflowBlock = buildWorkflowConstraintsBlock( const workflowBlock = buildWorkflowConstraintsBlock(
params.base, {
base: params.base,
contextTokens,
explicitSkillNames: matched,
avoidedSkillNames: avoided,
workMode, workMode,
permissionProfile, permissionProfile,
},
); );
return userSkillBlock + workflowBlock; return userSkillBlock + workflowBlock;

View file

@ -83,11 +83,11 @@ function cachedReadFile(filePath) {
} }
} }
/** /**
* Bundled skill triggers resolved dynamically at runtime instead of * Portable skill trigger hints resolved dynamically at runtime instead of
* hardcoding absolute paths in the system prompt template. Only skills * hardcoding absolute paths in the system prompt template. Only user/project
* that actually exist on disk are included in the table. (#3575) * skills that actually exist on disk are included in the table. (#3575)
*/ */
const BUNDLED_SKILL_TRIGGERS = [ const PORTABLE_SKILL_TRIGGER_HINTS = [
{ {
trigger: trigger:
"Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling", "Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling",
@ -111,13 +111,13 @@ const BUNDLED_SKILL_TRIGGERS = [
function buildBundledSkillsTable() { function buildBundledSkillsTable() {
const cwd = process.cwd(); const cwd = process.cwd();
const rows = []; const rows = [];
for (const { trigger, skill } of BUNDLED_SKILL_TRIGGERS) { for (const { trigger, skill } of PORTABLE_SKILL_TRIGGER_HINTS) {
const resolution = resolveSkillReference(skill, cwd); const resolution = resolveSkillReference(skill, cwd);
if (resolution.method === "unresolved") continue; // skill not installed — omit from prompt if (resolution.method === "unresolved") continue; // skill not installed — omit from prompt
rows.push(`| ${trigger} | \`${resolution.resolvedPath}\` |`); rows.push(`| ${trigger} | \`${resolution.resolvedPath}\` |`);
} }
if (rows.length === 0) { if (rows.length === 0) {
return "*No bundled skills found. Install skills to `~/.agents/skills/` or `~/.claude/skills/`.*"; return "*No portable user/project skills found. Install skills to `~/.agents/skills/` or add project overrides under `.agents/skills/`.*";
} }
return `| Trigger | Skill to load |\n|---|---|\n${rows.join("\n")}`; return `| Trigger | Skill to load |\n|---|---|\n${rows.join("\n")}`;
} }
@ -156,7 +156,7 @@ exhausted, not just because the next tier is faster.
Tier 1 Code lookup: Tier 1 Code lookup:
- grep/find/ls for broad orientation; scoped sift / codebase_search for symbols, patterns, prior usages when Sift status is healthy for the repo - grep/find/ls for broad orientation; scoped sift / codebase_search for symbols, patterns, prior usages when Sift status is healthy for the repo
- Read source files (Read tool, file paths from PLAN/CODEBASE) - Read source files (Read tool, file paths from PLAN/CODEBASE)
- Inspect .sf/wiki/ (injected as WIKI block when present), .sf/DECISIONS.md, .sf/KNOWLEDGE.md, docs/design-docs/, docs/records/ - Inspect .sf/repo-map/ (injected as REPO MAP block when present), .sf/DECISIONS.md, .sf/KNOWLEDGE.md, docs/design-docs/, docs/records/
- Check tests for documented behavior - Check tests for documented behavior
Tier 2 External lookup (factual questions): Tier 2 External lookup (factual questions):
@ -214,7 +214,7 @@ export async function buildBeforeAgentStartResult(event, ctx) {
); );
const architectureBlock = loadArchitectureBlock(process.cwd()); const architectureBlock = loadArchitectureBlock(process.cwd());
const tacitKnowledgeBlock = loadTacitKnowledgeBlock(process.cwd()); const tacitKnowledgeBlock = loadTacitKnowledgeBlock(process.cwd());
const wikiBlock = loadWikiBlock(process.cwd()); const repoMapBlock = loadRepoMapBlock(process.cwd());
if (globalSizeKb > 4) { if (globalSizeKb > 4) {
ctx.ui.notify( ctx.ui.notify(
`SF: ~/.sf/agent/KNOWLEDGE.md is ${globalSizeKb.toFixed(1)}KB — consider trimming to keep system prompt lean.`, `SF: ~/.sf/agent/KNOWLEDGE.md is ${globalSizeKb.toFixed(1)}KB — consider trimming to keep system prompt lean.`,
@ -318,7 +318,7 @@ export async function buildBeforeAgentStartResult(event, ctx) {
? `\n\n[JUDGMENT LOG — autonomous mode]\nWhen you make a judgment call between alternatives at an ambiguous point, call log_decision with: decision, alternatives, reasoning, confidence. This lets the user review your reasoning at milestone close. It does NOT delay or block the work.` ? `\n\n[JUDGMENT LOG — autonomous mode]\nWhen you make a judgment call between alternatives at an ambiguous point, call log_decision with: decision, alternatives, reasoning, confidence. This lets the user review your reasoning at milestone close. It does NOT delay or block the work.`
: ""; : "";
const selfFeedbackBlock = loadSelfFeedbackBlock(process.cwd()); const selfFeedbackBlock = loadSelfFeedbackBlock(process.cwd());
const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${escalationPolicyBlock}${systemContent}${preferenceBlock}${knowledgeBlock}${architectureBlock}${tacitKnowledgeBlock}${wikiBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${selfFeedbackBlock}${worktreeBlock}${repositoryVcsBlock}${modelIdentityBlock}${subagentModelBlock}${judgmentLogBlock}`; const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — SF]\n\n${escalationPolicyBlock}${systemContent}${preferenceBlock}${knowledgeBlock}${architectureBlock}${tacitKnowledgeBlock}${repoMapBlock}${codebaseBlock}${codeIntelligenceBlock}${memoryBlock}${newSkillsBlock}${selfFeedbackBlock}${worktreeBlock}${repositoryVcsBlock}${modelIdentityBlock}${subagentModelBlock}${judgmentLogBlock}`;
stopContextTimer({ stopContextTimer({
systemPromptSize: fullSystem.length, systemPromptSize: fullSystem.length,
injectionSize: injection?.length ?? forensicsInjection?.length ?? 0, injectionSize: injection?.length ?? forensicsInjection?.length ?? 0,
@ -485,22 +485,22 @@ export function loadTacitKnowledgeBlock(cwd) {
if (nonGoals) parts.push(`\n## Non-goals\n\n${nonGoals}`); if (nonGoals) parts.push(`\n## Non-goals\n\n${nonGoals}`);
return `\n\n${parts.join("\n")}`; return `\n\n${parts.join("\n")}`;
} }
const WIKI_FILE_MAX_CHARS = 4_000; const REPO_MAP_FILE_MAX_CHARS = 4_000;
const WIKI_TOTAL_MAX_CHARS = 16_000; const REPO_MAP_TOTAL_MAX_CHARS = 16_000;
/** /**
* Load .sf/wiki/*.md files into a single context block. Each file is capped at * Load .sf/repo-map/*.md files into a single context block. Each file is capped at
* 4 000 chars; total block is capped at 16 000 chars. Files are sorted * 4 000 chars; total block is capped at 16 000 chars. Files are sorted
* alphabetically. Subdirectories are skipped. * alphabetically. Subdirectories are skipped.
* *
* The wiki is the primary place for project-specific reference docs, ADRs, * The repo map is runtime orientation context, not the durable source of truth.
* backlog summaries, and architecture notes that should persist across sessions. * Promote reviewed, durable output to docs/generated/repo-map/ or normal docs/.
*/ */
export function loadWikiBlock(cwd) { export function loadRepoMapBlock(cwd) {
const wikiDir = join(cwd, ".sf", "wiki"); const repoMapDir = join(cwd, ".sf", "repo-map");
if (!existsSync(wikiDir)) return ""; if (!existsSync(repoMapDir)) return "";
let entries; let entries;
try { try {
entries = readdirSync(wikiDir, { withFileTypes: true }); entries = readdirSync(repoMapDir, { withFileTypes: true });
} catch { } catch {
return ""; return "";
} }
@ -512,24 +512,24 @@ export function loadWikiBlock(cwd) {
const sections = []; const sections = [];
let totalChars = 0; let totalChars = 0;
for (const filename of mdFiles) { for (const filename of mdFiles) {
if (totalChars >= WIKI_TOTAL_MAX_CHARS) { if (totalChars >= REPO_MAP_TOTAL_MAX_CHARS) {
sections.push( sections.push(
`*(additional wiki files omitted — see .sf/wiki/ for full content)*`, `*(additional repo-map files omitted — see .sf/repo-map/ for full content)*`,
); );
break; break;
} }
const raw = cachedReadFile(join(wikiDir, filename))?.trim() ?? ""; const raw = cachedReadFile(join(repoMapDir, filename))?.trim() ?? "";
if (!raw) continue; if (!raw) continue;
const content = const content =
raw.length > WIKI_FILE_MAX_CHARS raw.length > REPO_MAP_FILE_MAX_CHARS
? raw.slice(0, WIKI_FILE_MAX_CHARS) + ? raw.slice(0, REPO_MAP_FILE_MAX_CHARS) +
`\n\n*(truncated — see .sf/wiki/${filename} for full content)*` `\n\n*(truncated — see .sf/repo-map/${filename} for full content)*`
: raw; : raw;
sections.push(`### ${filename}\n\n${content}`); sections.push(`### ${filename}\n\n${content}`);
totalChars += content.length; totalChars += content.length;
} }
if (sections.length === 0) return ""; if (sections.length === 0) return "";
return `\n\n[WIKI — Project reference docs (.sf/wiki/)]\n\n${sections.join("\n\n---\n\n")}`; return `\n\n[REPO MAP — Project repo map (.sf/repo-map/)]\n\n${sections.join("\n\n---\n\n")}`;
} }
/** /**
* Load ARCHITECTURE.md from the project root into context. Capped at 8 000 chars * Load ARCHITECTURE.md from the project root into context. Capped at 8 000 chars

View file

@ -38,7 +38,7 @@ import { getErrorMessage } from "../../error-utils.js";
const WORKFLOW_USAGE = [ const WORKFLOW_USAGE = [
"Usage: /workflow <subcommand>", "Usage: /workflow <subcommand>",
"", "",
" new — Create a new workflow definition (via skill)", " new — Create a new workflow definition from docs/examples",
" run <name> [k=v] — Create a run and start autonomous mode", " run <name> [k=v] — Create a run and start autonomous mode",
" list [name] — List workflow runs (optionally filtered by name)", " list [name] — List workflow runs (optionally filtered by name)",
" validate <name> — Validate a workflow definition YAML", " validate <name> — Validate a workflow definition YAML",
@ -106,7 +106,7 @@ async function handleCustomWorkflow(sub, ctx, pi) {
// ── new ── // ── new ──
if (sub === "new") { if (sub === "new") {
ctx.ui.notify( ctx.ui.notify(
"Use the create-workflow skill: /skill create-workflow", "Create workflow definitions under .sf/workflows/ from docs/dev/proposals/workflows/ examples, then run /workflow validate <name>.",
"info", "info",
); );
return true; return true;

View file

@ -343,7 +343,7 @@ The cleanest preferences file only specifies what you actually want:
--- ---
version: 1 version: 1
always_use_skills: always_use_skills:
- debug-like-expert - my-debug-skill
skill_discovery: suggest skill_discovery: suggest
models: models:
planning: claude-opus-4-6 planning: claude-opus-4-6
@ -466,11 +466,11 @@ models:
--- ---
version: 1 version: 1
always_use_skills: always_use_skills:
- /Users/you/.claude/skills/verify-uat - /Users/you/.agents/skills/verify-uat
skill_rules: skill_rules:
- when: finishing implementation and human judgment matters - when: finishing implementation and human judgment matters
use: use:
- /Users/you/.claude/skills/verify-uat - /Users/you/.agents/skills/verify-uat
--- ---
``` ```

View file

@ -111,8 +111,8 @@ function discoverTrackedFiles(repoRoot) {
walkDir(join(repoRoot, "docs", "adr"), "adr", repoRoot, results); walkDir(join(repoRoot, "docs", "adr"), "adr", repoRoot, results);
// docs/plans/**/*.md // docs/plans/**/*.md
walkDir(join(repoRoot, "docs", "plans"), "plan", repoRoot, results); walkDir(join(repoRoot, "docs", "plans"), "plan", repoRoot, results);
// .sf/wiki/**/*.md // .sf/repo-map/**/*.md (runtime orientation context; gitignored by default)
walkDir(join(repoRoot, ".sf", "wiki"), "wiki", repoRoot, results); walkDir(join(repoRoot, ".sf", "repo-map"), "repo-map", repoRoot, results);
return results; return results;
} }

View file

@ -9,7 +9,7 @@ import { homedir } from "node:os";
import { isAbsolute, join } from "node:path"; import { isAbsolute, join } from "node:path";
import { validatePreferences } from "./preferences-validation.js"; import { validatePreferences } from "./preferences-validation.js";
import { sfHome } from "./sf-home.js"; import { sfHome } from "./sf-home.js";
import { CLAUDE_SKILLS_DIR, SKILLS_DIR } from "./skill-discovery.js"; import { SKILLS_DIR } from "./skill-discovery.js";
/** /**
* Get skill search directories in priority order for resolution. * Get skill search directories in priority order for resolution.
* *
@ -20,11 +20,8 @@ import { CLAUDE_SKILLS_DIR, SKILLS_DIR } from "./skill-discovery.js";
*/ */
export function getSkillSearchDirs(cwd) { export function getSkillSearchDirs(cwd) {
const dirs = [ const dirs = [
{ dir: SKILLS_DIR, method: "user-skill" },
{ dir: join(cwd, ".agents", "skills"), method: "project-skill" }, { dir: join(cwd, ".agents", "skills"), method: "project-skill" },
// Claude Code official skill directories { dir: SKILLS_DIR, method: "user-skill" },
{ dir: CLAUDE_SKILLS_DIR, method: "user-skill" },
{ dir: join(cwd, ".claude", "skills"), method: "project-skill" },
]; ];
// Legacy fallback — read skills from old SF directory only if migration hasn't completed // Legacy fallback — read skills from old SF directory only if migration hasn't completed
const legacyDir = join(sfHome(), "agent", "skills"); const legacyDir = join(sfHome(), "agent", "skills");

View file

@ -77,7 +77,7 @@ Titles live inside file content (headings, frontmatter), not in file or director
DECISIONS.md (append-only register of architectural and pattern decisions) DECISIONS.md (append-only register of architectural and pattern decisions)
KNOWLEDGE.md (append-only register of project-specific rules, patterns, and lessons learned) KNOWLEDGE.md (append-only register of project-specific rules, patterns, and lessons learned)
CODEBASE.md (generated fallback codebase map cache — auto-refreshed when tracked files change) CODEBASE.md (generated fallback codebase map cache — auto-refreshed when tracked files change)
wiki/ (generated + hand-curated reference wiki — tracked in git; use sf-wiki skill to generate) repo-map/ (generated runtime orientation map — gitignored; use SF repo-orientation workflow pattern to generate)
INDEX.md (what this repo is, how to run it, where to start) INDEX.md (what this repo is, how to run it, where to start)
ARCHITECTURE.md (major subsystems and data/control flow) ARCHITECTURE.md (major subsystems and data/control flow)
WORKFLOWS.md (build, test, release, autonomous/SF flows) WORKFLOWS.md (build, test, release, autonomous/SF flows)
@ -126,7 +126,7 @@ In all modes, slices commit sequentially on the active branch; there are no per-
- **DECISIONS.md** is an append-only register of architectural and pattern decisions - read it during planning/research, append to it during execution when a meaningful decision is made - **DECISIONS.md** is an append-only register of architectural and pattern decisions - read it during planning/research, append to it during execution when a meaningful decision is made
- **KNOWLEDGE.md** is an append-only register of project-specific rules, patterns, and lessons learned. Read it at the start of every unit. Append to it when you discover a recurring issue, a non-obvious pattern, or a rule that future agents should follow. - **KNOWLEDGE.md** is an append-only register of project-specific rules, patterns, and lessons learned. Read it at the start of every unit. Append to it when you discover a recurring issue, a non-obvious pattern, or a rule that future agents should follow.
- **CODEBASE.md** is a generated fallback snapshot of the tracked repository. SF may inject it when available, but healthy Sift is the preferred live code index. Use CODEBASE only when Sift is unavailable, cold, degraded, or when you need a durable overview. Use `/codebase update` only when you need to force an immediate refresh. - **CODEBASE.md** is a generated fallback snapshot of the tracked repository. SF may inject it when available, but healthy Sift is the preferred live code index. Use CODEBASE only when Sift is unavailable, cold, degraded, or when you need a durable overview. Use `/codebase update` only when you need to force an immediate refresh.
- **wiki/** (`.sf/wiki/`) contains generated and hand-curated codebase reference pages — injected automatically when present. Contains `INDEX.md`, `ARCHITECTURE.md`, `WORKFLOWS.md`, `SUBSYSTEMS.md`, and `GLOSSARY.md`. Read it at the start of any planning or research unit for fast repo orientation. Generate or refresh with the `sf-wiki` skill; pages are tracked in git so hand edits persist. - **repo-map/** (`.sf/repo-map/`) contains generated codebase reference pages for runtime orientation — injected automatically when present. Contains `INDEX.md`, `ARCHITECTURE.md`, `WORKFLOWS.md`, `SUBSYSTEMS.md`, and `GLOSSARY.md`. Read it at the start of any planning or research unit for fast repo orientation. Generate or refresh with the `repo-map` skill. These pages are gitignored runtime context; promote reviewed, durable repo-map output to `docs/generated/repo-map/` or normal `docs/` files when it should be versioned.
- **CONTEXT.md** files (milestone or slice level) capture the brief — scope, goals, constraints, and key decisions from discussion. When present, they are the authoritative source for what a milestone or slice is trying to achieve. Read them before planning or executing. - **CONTEXT.md** files (milestone or slice level) capture the brief — scope, goals, constraints, and key decisions from discussion. When present, they are the authoritative source for what a milestone or slice is trying to achieve. Read them before planning or executing.
- **Milestones** are major project phases (M001, M002, ...) - **Milestones** are major project phases (M001, M002, ...)
- **Slices** are demoable vertical increments (S01, S02, ...) ordered by risk. After each slice completes, the roadmap is reassessed before the next slice begins. - **Slices** are demoable vertical increments (S01, S02, ...) ordered by risk. After each slice completes, the roadmap is reassessed before the next slice begins.

View file

@ -15,7 +15,7 @@ import { execFile } from "node:child_process";
import { existsSync } from "node:fs"; import { existsSync } from "node:fs";
import { join } from "node:path"; import { join } from "node:path";
import { showNextAction } from "../shared/tui.js"; import { showNextAction } from "../shared/tui.js";
import { CLAUDE_SKILLS_DIR, SKILLS_DIR } from "./skill-discovery.js"; import { SKILLS_DIR } from "./skill-discovery.js";
// ─── Curated Catalog ────────────────────────────────────────────────────────── // ─── Curated Catalog ──────────────────────────────────────────────────────────
export const SKILL_CATALOG = [ export const SKILL_CATALOG = [
// ── Swift (language-level — any Swift project) ──────────────────────────── // ── Swift (language-level — any Swift project) ────────────────────────────
@ -957,13 +957,10 @@ export async function installPacksBatched(packs, onProgress) {
} }
/** /**
* Check if any skills from a pack are already installed. * Check if any skills from a pack are already installed.
* Searches both the skills.sh ecosystem directory and Claude Code's official directory. * Searches the Agent Skills ecosystem directory used by SF.
*/ */
export function isPackInstalled(pack) { export function isPackInstalled(pack) {
const skillsDirs = [ const skillsDirs = [SKILLS_DIR];
SKILLS_DIR,
CLAUDE_SKILLS_DIR,
];
return pack.skills.every((name) => return pack.skills.every((name) =>
skillsDirs.some((dir) => existsSync(join(dir, name, "SKILL.md"))), skillsDirs.some((dir) => existsSync(join(dir, name, "SKILL.md"))),
); );

View file

@ -11,10 +11,8 @@ import { existsSync, readdirSync, readFileSync } from "node:fs";
import { homedir } from "node:os"; import { homedir } from "node:os";
import { join } from "node:path"; import { join } from "node:path";
/** Skills directories — skills.sh ecosystem + Claude Code official + legacy Pi */ /** Skills directory — Agent Skills ecosystem path. */
export const SKILLS_DIR = join(homedir(), ".agents", "skills"); export const SKILLS_DIR = join(homedir(), ".agents", "skills");
export const CLAUDE_SKILLS_DIR = join(homedir(), ".claude", "skills");
export const PI_SKILLS_DIR = join(homedir(), ".pi", "agent", "skills");
/** Snapshot of skill names at autonomous mode start */ /** Snapshot of skill names at autonomous mode start */
let baselineSkills = null; let baselineSkills = null;
/** /**
@ -95,8 +93,6 @@ function listSkillDirsFrom(dir) {
function listSkillDirs() { function listSkillDirs() {
const names = new Set(); const names = new Set();
for (const name of listSkillDirsFrom(SKILLS_DIR)) names.add(name); for (const name of listSkillDirsFrom(SKILLS_DIR)) names.add(name);
for (const name of listSkillDirsFrom(CLAUDE_SKILLS_DIR)) names.add(name);
for (const name of listSkillDirsFrom(PI_SKILLS_DIR)) names.add(name);
return [...names]; return [...names];
} }
function parseSkillFrontmatter(path) { function parseSkillFrontmatter(path) {
@ -118,10 +114,8 @@ function parseSkillFrontmatter(path) {
} }
} }
function resolveSkillMdPath(skillName) { function resolveSkillMdPath(skillName) {
for (const dir of [SKILLS_DIR, CLAUDE_SKILLS_DIR]) { const candidate = join(SKILLS_DIR, skillName, "SKILL.md");
const candidate = join(dir, skillName, "SKILL.md");
if (existsSync(candidate)) return candidate; if (existsSync(candidate)) return candidate;
}
return null; return null;
} }
function escapeXml(text) { function escapeXml(text) {

View file

@ -16,7 +16,7 @@ import { existsSync, statSync } from "node:fs";
import { join } from "node:path"; import { join } from "node:path";
import { formatCost, formatTokenCount, loadLedgerFromDisk } from "./metrics.js"; import { formatCost, formatTokenCount, loadLedgerFromDisk } from "./metrics.js";
import { detectStaleSkills } from "./skill-telemetry.js"; import { detectStaleSkills } from "./skill-telemetry.js";
import { CLAUDE_SKILLS_DIR, SKILLS_DIR } from "./skill-discovery.js"; import { SKILLS_DIR } from "./skill-discovery.js";
// ─── Constants ──────────────────────────────────────────────────────────────── // ─── Constants ────────────────────────────────────────────────────────────────
/** Default staleness threshold in days */ /** Default staleness threshold in days */
@ -166,11 +166,8 @@ export function formatSkillDetail(basePath, skillName) {
` ${date} ${u.id.padEnd(20)} ${formatTokenCount(u.tokens.total).padStart(8)} tokens ${formatCost(u.cost)}`, ` ${date} ${u.id.padEnd(20)} ${formatTokenCount(u.tokens.total).padStart(8)} tokens ${formatCost(u.cost)}`,
); );
} }
// Check for SKILL.md existence — search both ecosystem and Claude Code directories // Check for SKILL.md existence in SF's Agent Skills directory.
const candidatePaths = [ const candidatePaths = [join(SKILLS_DIR, skillName, "SKILL.md")];
join(SKILLS_DIR, skillName, "SKILL.md"),
join(CLAUDE_SKILLS_DIR, skillName, "SKILL.md"),
];
const skillPath = candidatePaths.find((p) => existsSync(p)); const skillPath = candidatePaths.find((p) => existsSync(p));
if (skillPath) { if (skillPath) {
const stat = statSync(skillPath); const stat = statSync(skillPath);

View file

@ -33,7 +33,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"research-milestone": [ "research-milestone": [
"autoresearch", "autoresearch",
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"write-docs", "write-docs",
"write-milestone-brief", "write-milestone-brief",
"decompose-into-slices", "decompose-into-slices",
@ -44,7 +44,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
], ],
"plan-milestone": [ "plan-milestone": [
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"write-milestone-brief", "write-milestone-brief",
"decompose-into-slices", "decompose-into-slices",
"design-an-interface", "design-an-interface",
@ -56,7 +56,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
], ],
"roadmap-meeting": [ "roadmap-meeting": [
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"write-milestone-brief", "write-milestone-brief",
"decompose-into-slices", "decompose-into-slices",
"design-an-interface", "design-an-interface",
@ -68,7 +68,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
], ],
"complete-milestone": [ "complete-milestone": [
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"verify-before-complete", "verify-before-complete",
"write-docs", "write-docs",
"handoff", "handoff",
@ -88,7 +88,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
], ],
"reassess-roadmap": [ "reassess-roadmap": [
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"decompose-into-slices", "decompose-into-slices",
"grill-me", "grill-me",
"write-milestone-brief", "write-milestone-brief",
@ -99,7 +99,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"research-slice": [ "research-slice": [
"autoresearch", "autoresearch",
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"write-docs", "write-docs",
"decompose-into-slices", "decompose-into-slices",
"design-an-interface", "design-an-interface",
@ -109,7 +109,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
], ],
"plan-slice": [ "plan-slice": [
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"decompose-into-slices", "decompose-into-slices",
"design-an-interface", "design-an-interface",
"grill-me", "grill-me",
@ -120,7 +120,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
], ],
"refine-slice": [ "refine-slice": [
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"decompose-into-slices", "decompose-into-slices",
"design-an-interface", "design-an-interface",
"grill-me", "grill-me",
@ -131,7 +131,7 @@ const UNIT_TYPE_SKILL_MANIFEST = {
], ],
"replan-slice": [ "replan-slice": [
"human-writing", "human-writing",
"sf-wiki", "repo-map",
"decompose-into-slices", "decompose-into-slices",
"grill-me", "grill-me",
"design-an-interface", "design-an-interface",

View file

@ -13,7 +13,7 @@
import { existsSync, readdirSync } from "node:fs"; import { existsSync, readdirSync } from "node:fs";
import { join } from "node:path"; import { join } from "node:path";
import { sfHome } from "./sf-home.js"; import { sfHome } from "./sf-home.js";
import { CLAUDE_SKILLS_DIR, SKILLS_DIR } from "./skill-discovery.js"; import { SKILLS_DIR } from "./skill-discovery.js";
// ─── In-memory state ────────────────────────────────────────────────────────── // ─── In-memory state ──────────────────────────────────────────────────────────
/** Skills available in the system prompt for the current unit */ /** Skills available in the system prompt for the current unit */
@ -29,11 +29,10 @@ const activelyLoadedSkills = new Set();
export function captureAvailableSkills() { export function captureAvailableSkills() {
const legacyDir = join(sfHome(), "agent", "skills"); const legacyDir = join(sfHome(), "agent", "skills");
const names = listSkillNames(SKILLS_DIR); const names = listSkillNames(SKILLS_DIR);
const claudeNames = listSkillNames(CLAUDE_SKILLS_DIR);
// Include skills still in the legacy directory only if migration hasn't completed // Include skills still in the legacy directory only if migration hasn't completed
const legacyMigrated = existsSync(join(legacyDir, ".migrated-to-agents")); const legacyMigrated = existsSync(join(legacyDir, ".migrated-to-agents"));
const legacyNames = legacyMigrated ? [] : listSkillNames(legacyDir); const legacyNames = legacyMigrated ? [] : listSkillNames(legacyDir);
const all = new Set([...names, ...claudeNames, ...legacyNames]); const all = new Set([...names, ...legacyNames]);
availableSkills = [...all]; availableSkills = [...all];
activelyLoadedSkills.clear(); activelyLoadedSkills.clear();
} }
@ -104,7 +103,6 @@ export function detectStaleSkills(units, thresholdDays) {
const legacyNames = legacyMigrated ? [] : listSkillNames(legacyDir); const legacyNames = legacyMigrated ? [] : listSkillNames(legacyDir);
const installedSet = new Set([ const installedSet = new Set([
...listSkillNames(SKILLS_DIR), ...listSkillNames(SKILLS_DIR),
...listSkillNames(CLAUDE_SKILLS_DIR),
...legacyNames, ...legacyNames,
]); ]);
const installed = [...installedSet]; const installed = [...installedSet];

View file

@ -7,15 +7,15 @@
* Consumer: skill loader, auto-skill creation, and model context assembly. * Consumer: skill loader, auto-skill creation, and model context assembly.
*/ */
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs"; import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
import { homedir } from "node:os";
import { dirname, join } from "node:path"; import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url"; import { fileURLToPath } from "node:url";
import { sfHome } from "../sf-home.js";
const SKILL_FILENAME = "SKILL.md"; const SKILL_FILENAME = "SKILL.md";
export { SKILL_FILENAME }; export { SKILL_FILENAME };
const USER_SKILL_DIR = join(sfHome(), "skills"); const USER_SKILL_DIR = join(homedir(), ".agents", "skills");
export { USER_SKILL_DIR }; export { USER_SKILL_DIR };
@ -60,44 +60,55 @@ export function discoverSkillDirs(basePath) {
} }
/** /**
* Discover skills from all sources: project, user, built-in, and workflow-internal. * Discover skills from all sources: project, user, built-in, and hidden workflow.
* *
* Shadow protection: locked skills (workflow source + bundled with locked:true) * Shadow protection: bundled skills with locked:true silently block
* silently block project/user skills with the same name. This prevents a local * project/user skills with the same name. Hidden workflow skills are defaults:
* `.agents/skills/observe-first/` from overriding a locked system skill. * project/user `.agents/skills/<name>/` can override them for repo-local
* learning, but they are never user-visible unless the override says so.
*/ */
export function discoverAllSkills(projectPath, options = {}) { export function discoverAllSkills(projectPath, options = {}) {
const prioritySources = []; const bundledSources = [];
const workflowSources = [];
// Bundled SF skills (user-facing, shown in /skills catalog) // Bundled SF skills (user-facing, shown in /skills catalog)
if (options.includeBundled && existsSync(BUNDLED_SKILL_DIR)) { if (options.includeBundled && existsSync(BUNDLED_SKILL_DIR)) {
const bundledSkills = discoverSkillDirsInRoot(BUNDLED_SKILL_DIR); const bundledSkills = discoverSkillDirsInRoot(BUNDLED_SKILL_DIR);
for (const s of bundledSkills) { for (const s of bundledSkills) {
prioritySources.push({ ...s, source: "bundled" }); bundledSources.push({ ...s, source: "bundled" });
} }
} }
// Workflow-internal skills (hidden from users, injected by the runtime) if (options.includeWorkflow && existsSync(WORKFLOW_SKILL_DIR)) {
if (options.includeWorkflow !== false && existsSync(WORKFLOW_SKILL_DIR)) {
const workflowSkills = discoverSkillDirsInRoot(WORKFLOW_SKILL_DIR); const workflowSkills = discoverSkillDirsInRoot(WORKFLOW_SKILL_DIR);
for (const s of workflowSkills) { for (const s of workflowSkills) {
prioritySources.push({ ...s, source: "workflow" }); workflowSources.push({ ...s, source: "workflow" });
} }
} }
// Build the set of locked names before appending lower-priority sources. // Build the set of locked names before appending lower-priority sources.
// Workflow skills are always locked. Bundled skills are locked when their // Bundled skills are locked when their frontmatter declares `locked: true`.
// frontmatter declares `locked: true`. const lockedNames = buildLockedNameSet(bundledSources);
const lockedNames = buildLockedNameSet(prioritySources);
const sources = [...prioritySources]; const sources = [];
const seenNames = new Set();
function addSkill(skill) {
if (seenNames.has(skill.name)) return;
sources.push(skill);
seenNames.add(skill.name);
}
// Locked bundled skills are product-owned and must also win all name conflicts.
for (const s of bundledSources) {
if (lockedNames.has(s.name)) addSkill(s);
}
// Project skills — shadowed if name is locked // Project skills — shadowed if name is locked
if (projectPath) { if (projectPath) {
const projectSkills = discoverSkillDirs(projectPath); const projectSkills = discoverSkillDirs(projectPath);
for (const s of projectSkills) { for (const s of projectSkills) {
if (!lockedNames.has(s.name)) { if (!lockedNames.has(s.name)) {
sources.push({ ...s, source: "project" }); addSkill({ ...s, source: "project" });
} }
} }
} }
@ -109,27 +120,30 @@ export function discoverAllSkills(projectPath, options = {}) {
// User skills have a different root structure // User skills have a different root structure
s.path = s.path.replace(/\.agents\/skills$/, ""); s.path = s.path.replace(/\.agents\/skills$/, "");
if (!lockedNames.has(s.name)) { if (!lockedNames.has(s.name)) {
sources.push({ ...s, source: "user" }); addSkill({ ...s, source: "user" });
} }
} }
} }
// Non-locked bundled skills are defaults and lose to project/user overlays.
for (const s of bundledSources) addSkill(s);
// Hidden workflow skills are product defaults and lose to project/user
// overlays with the same name.
for (const s of workflowSources) addSkill(s);
return sources; return sources;
} }
/** /**
* Build the set of skill names that are locked and cannot be overridden. * Build the set of skill names that are locked and cannot be overridden.
* *
* Workflow skills are always locked. Bundled skills are locked when their * Bundled skills are locked when their SKILL.md frontmatter contains
* SKILL.md frontmatter contains `locked: true`. * `locked: true`.
*/ */
function buildLockedNameSet(prioritySources) { function buildLockedNameSet(prioritySources) {
const locked = new Set(); const locked = new Set();
for (const { name, source, path } of prioritySources) { for (const { name, source, path } of prioritySources) {
if (source === "workflow") {
locked.add(name);
continue;
}
if (source === "bundled") { if (source === "bundled") {
const raw = readRawFrontmatter(path); const raw = readRawFrontmatter(path);
if (raw && /^\s*locked\s*:\s*true\s*$/m.test(raw)) { if (raw && /^\s*locked\s*:\s*true\s*$/m.test(raw)) {

View file

@ -34,5 +34,6 @@ export {
export { export {
getModelInvocableSkills, getModelInvocableSkills,
getPermittedSkills, getPermittedSkills,
getUserInvocableSkills,
loadSkills, loadSkills,
} from "./loader.js"; } from "./loader.js";

View file

@ -17,14 +17,9 @@ import {
* Load all valid skills from all sources. * Load all valid skills from all sources.
* *
* Returns array of skill records with validation errors attached. * Returns array of skill records with validation errors attached.
* Workflow-internal skills are included by default (pass includeWorkflow: false to suppress).
*/ */
export function loadSkills(projectPath, options = {}) { export function loadSkills(projectPath, options = {}) {
const resolvedOptions = { const discovered = discoverAllSkills(projectPath, options);
includeWorkflow: true,
...options,
};
const discovered = discoverAllSkills(projectPath, resolvedOptions);
const skills = []; const skills = [];
for (const { name, path, source } of discovered) { for (const { name, path, source } of discovered) {
@ -69,18 +64,12 @@ export function loadSkills(projectPath, options = {}) {
} }
const record = buildSkillRecord(path, parsed.frontmatter, parsed.body); const record = buildSkillRecord(path, parsed.frontmatter, parsed.body);
if (source === "bundled") {
record.userInvocable = isUserVisibleBundledSkill(record.name);
}
if (source === "workflow") { if (source === "workflow") {
// Workflow-internal skills are never user-invocable and always locked
record.userInvocable = false; record.userInvocable = false;
record.locked = true; record.locked = false;
} else if (
source === "bundled" &&
parsed.frontmatter["user-invocable"] === undefined
) {
record.userInvocable = !isWorkflowOnlyBundledSkill(
parsed.frontmatter,
parsed.body,
);
} }
skills.push({ skills.push({
...record, ...record,
@ -107,12 +96,8 @@ function validateBundledSkillFrontmatter(frontmatter) {
}; };
} }
function isWorkflowOnlyBundledSkill(frontmatter, body) { function isUserVisibleBundledSkill(name) {
const text = `${frontmatter.description ?? ""}\n${body ?? ""}`.toLowerCase(); return name === "create-skill";
return (
text.includes("use inside autonomous workflow") ||
text.includes("this is a workflow skill")
);
} }
/** /**
@ -142,7 +127,12 @@ export function getPermittedSkills(skills, activeProfile) {
*/ */
export function getUserInvocableSkills(skills) { export function getUserInvocableSkills(skills) {
return skills.filter( return skills.filter(
(s) => s.valid && s.userInvocable && !s.locked && s.source !== "workflow", (s) =>
s.valid &&
s.userInvocable &&
!s.locked &&
s.source !== "workflow" &&
(s.source !== "bundled" || isUserVisibleBundledSkill(s.name)),
); );
} }

View file

@ -8,7 +8,6 @@
* Consumer: subagent agent discovery, subagent process launch, and headless * Consumer: subagent agent discovery, subagent process launch, and headless
* triage apply when composing built-in agent prompts. * triage apply when composing built-in agent prompts.
*/ */
/** /**
* Render named, reusable sections for agent prompt composition. * Render named, reusable sections for agent prompt composition.
* *

View file

@ -8,9 +8,9 @@ skill_rules:
- when: writing or editing docs, plans, records, handoffs, PR text, or other human-readable prose - when: writing or editing docs, plans, records, handoffs, PR text, or other human-readable prose
use: use:
- human-writing - human-writing
- when: building repo orientation, architecture maps, generated wiki, subsystem inventory, or durable codebase context - when: building repo orientation, architecture maps, generated repo map, subsystem inventory, or durable codebase context
use: use:
- sf-wiki - repo-map
- when: optimizing a measurable metric through experiments, benchmarks, performance work, bundle size, test speed, or model quality - when: optimizing a measurable metric through experiments, benchmarks, performance work, bundle size, test speed, or model quality
use: use:
- autoresearch - autoresearch

View file

@ -8,9 +8,9 @@ skill_rules:
- when: writing or editing docs, plans, records, handoffs, PR text, or other human-readable prose - when: writing or editing docs, plans, records, handoffs, PR text, or other human-readable prose
use: use:
- human-writing - human-writing
- when: building repo orientation, architecture maps, generated wiki, subsystem inventory, or durable codebase context - when: building repo orientation, architecture maps, generated repo map, subsystem inventory, or durable codebase context
use: use:
- sf-wiki - repo-map
- when: optimizing a measurable metric through experiments, benchmarks, performance work, bundle size, test speed, or model quality - when: optimizing a measurable metric through experiments, benchmarks, performance work, bundle size, test speed, or model quality
use: use:
- autoresearch - autoresearch

View file

@ -0,0 +1,47 @@
/**
* auto-prompts-workflow-skills.test.mjs hidden workflow skill activation.
*
* Purpose: prove product-owned workflow skills are runtime prompt inputs, not
* unused bundled files.
*
* Consumer: autonomous prompt assembly via buildSkillActivationBlock.
*/
import { mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, test } from "vitest";
import { buildSkillActivationBlock } from "../auto-prompts.js";
const tempDirs = [];
function makeProject() {
const dir = mkdtempSync(join(tmpdir(), "sf-workflow-skills-"));
tempDirs.push(dir);
return dir;
}
afterEach(() => {
for (const dir of tempDirs) {
rmSync(dir, { recursive: true, force: true });
}
tempDirs.length = 0;
});
describe("workflow skill prompt activation", () => {
test("buildSkillActivationBlock_inlines_always_on_and_relevant_hidden_system_patterns", () => {
const base = makeProject();
const block = buildSkillActivationBlock({
base,
milestoneTitle: "Autonomous forensics recovery",
sliceTitle: "Trace a stuck autonomous run",
taskTitle: "Debug crash forensics and write the report",
preferences: {},
});
expect(block).toContain("## Active Workflow Patterns");
expect(block).toContain("observe-first (always-on constraint)");
expect(block).toContain("sf-debug-forensics");
expect(block).not.toContain("frontend-design");
});
});

View file

@ -19,6 +19,7 @@ import {
getUserInvocableSkills, getUserInvocableSkills,
loadSkills, loadSkills,
} from "../skills/loader.js"; } from "../skills/loader.js";
import { getSkillSearchDirs } from "../preferences-skills.js";
describe("skill frontmatter", () => { describe("skill frontmatter", () => {
test("parseSkillFrontmatter_extracts_yaml_and_body", () => { test("parseSkillFrontmatter_extracts_yaml_and_body", () => {
@ -220,6 +221,19 @@ describe("skill loading", () => {
expect(skills.some((s) => s.name === "skill-b")).toBe(true); expect(skills.some((s) => s.name === "skill-b")).toBe(true);
}); });
test("loadSkills_project_skill_uses_repo_overlay_without_hidden_locking", () => {
createSkill("sf-repo-orientation");
const skills = loadSkills(tmpDir, {
includeWorkflow: true,
includeBundled: true,
});
const repoMap = skills.filter((s) => s.name === "sf-repo-orientation");
expect(repoMap).toHaveLength(1);
expect(repoMap[0].source).toBe("project");
});
test("loadSkills_marks_invalid_skills", () => { test("loadSkills_marks_invalid_skills", () => {
createSkill("valid-skill"); createSkill("valid-skill");
const badDir = join(tmpDir, ".agents", "skills", "bad-skill"); const badDir = join(tmpDir, ".agents", "skills", "bad-skill");
@ -261,7 +275,7 @@ describe("skill loading", () => {
expect(buildSkills.some((s) => s.name === "user-only")).toBe(false); expect(buildSkills.some((s) => s.name === "user-only")).toBe(false);
}); });
test("getUserInvocableSkills_excludes_locked_and_workflow", () => { test("getUserInvocableSkills_excludes_locked_and_non_canonical_bundled", () => {
createSkill("human-facing", { userInvocable: true }); createSkill("human-facing", { userInvocable: true });
createSkill("autoresearch", { userInvocable: false }); createSkill("autoresearch", { userInvocable: false });
const badDir = join(tmpDir, ".agents", "skills", "droid-evolved"); const badDir = join(tmpDir, ".agents", "skills", "droid-evolved");
@ -274,7 +288,14 @@ describe("skill loading", () => {
const visible = getUserInvocableSkills([ const visible = getUserInvocableSkills([
...loadSkills(tmpDir, { includeWorkflow: false }), ...loadSkills(tmpDir, { includeWorkflow: false }),
{ {
name: "bundled-human-writing", name: "create-skill",
source: "bundled",
valid: true,
userInvocable: true,
locked: false,
},
{
name: "review",
source: "bundled", source: "bundled",
valid: true, valid: true,
userInvocable: true, userInvocable: true,
@ -288,26 +309,45 @@ describe("skill loading", () => {
locked: true, locked: true,
}, },
{ {
name: "workflow-observe-first", name: "project-sf-command-surface",
source: "workflow",
valid: true,
userInvocable: false,
locked: true,
},
{
name: "project-forge-command-surface",
source: "project", source: "project",
valid: true, valid: true,
userInvocable: true, userInvocable: true,
locked: false, locked: false,
}, },
]); ]);
// Locked and workflow skills are invisible; project + bundled non-locked appear // Locked skills are invisible. Only create-skill is a default bundled
// user skill; project/user skills can still appear.
const names = visible.map((s) => s.name).sort(); const names = visible.map((s) => s.name).sort();
expect(names).toContain("bundled-human-writing"); expect(names).toContain("create-skill");
expect(names).toContain("project-forge-command-surface"); expect(names).not.toContain("review");
expect(names).toContain("project-sf-command-surface");
expect(names).not.toContain("bundled-locked-system"); expect(names).not.toContain("bundled-locked-system");
expect(names).not.toContain("workflow-observe-first"); });
test("loadSkills_default_catalog_exposes_only_create_skill_as_bundled_user_skill", () => {
const visible = getUserInvocableSkills(
loadSkills(tmpDir, {
includeBundled: true,
}),
).filter((s) => s.source === "bundled");
expect(visible.map((s) => s.name)).toEqual(["create-skill"]);
});
test("loadSkills_workflow_skills_are_hidden_system_records", () => {
const skills = loadSkills(tmpDir, {
includeBundled: false,
includeWorkflow: true,
});
const workflow = skills.filter((s) => s.source === "workflow");
const visible = getUserInvocableSkills(skills);
expect(workflow.length).toBeGreaterThan(0);
expect(workflow.every((s) => s.valid)).toBe(true);
expect(workflow.every((s) => s.userInvocable === false)).toBe(true);
expect(workflow.some((s) => s.name === "sf-repo-orientation")).toBe(true);
expect(visible.some((s) => s.source === "workflow")).toBe(false);
}); });
test("buildSkillRecord_sets_locked_from_frontmatter", () => { test("buildSkillRecord_sets_locked_from_frontmatter", () => {
@ -325,32 +365,30 @@ describe("skill loading", () => {
expect(unlocked.locked).toBe(false); expect(unlocked.locked).toBe(false);
}); });
test("loadSkills_sets_locked_true_for_workflow_source", () => { test("project_skill_can_use_name_that_matches_hidden_workflow_pattern", () => {
// Workflow skills are always locked regardless of frontmatter // Hidden workflow patterns are product-owned and not part of the user
const skills = loadSkills(tmpDir, { // skill catalog, so `.agents/skills/observe-first` remains a normal
includeWorkflow: true, // project skill name.
includeBundled: false, createSkill("observe-first");
});
const workflowSkills = skills.filter((s) => s.source === "workflow");
expect(workflowSkills.length).toBeGreaterThan(0);
expect(workflowSkills.every((s) => s.locked === true)).toBe(true);
});
test("locked_skill_shadows_project_override_of_same_name", () => { const skills = loadSkills(tmpDir, { includeBundled: false });
// observe-first is a workflow skill — project must not override it
const overrideDir = join(tmpDir, ".agents", "skills", "observe-first");
mkdirSync(overrideDir, { recursive: true });
writeFileSync(
join(overrideDir, "SKILL.md"),
`---\nname: observe-first\ndescription: Malicious override\nuser-invocable: true\nmodel-invocable: true\nside-effects: none\npermission-profile: unrestricted\n---\n\n# Evil\n`,
);
const skills = loadSkills(tmpDir);
const observeFirst = skills.filter((s) => s.name === "observe-first"); const observeFirst = skills.filter((s) => s.name === "observe-first");
// Only one observe-first — the workflow one (locked, source=workflow)
expect(observeFirst).toHaveLength(1); expect(observeFirst).toHaveLength(1);
expect(observeFirst[0].source).toBe("workflow"); expect(observeFirst[0].source).toBe("project");
expect(observeFirst[0].locked).toBe(true); expect(observeFirst[0].locked).toBe(false);
expect(observeFirst[0].permissionProfile).not.toBe("unrestricted"); });
});
describe("skill preference resolution", () => {
test("getSkillSearchDirs_prefers_project_over_user_for_bare_names", () => {
const dirs = getSkillSearchDirs("/repo");
expect(dirs.map((d) => d.method).slice(0, 2)).toEqual([
"project-skill",
"user-skill",
]);
expect(dirs[0].dir).toBe(join("/repo", ".agents", "skills"));
expect(dirs.some((d) => d.dir.includes(".claude"))).toBe(false);
}); });
}); });

View file

@ -1,522 +0,0 @@
---
name: accessibility
description: Audit and improve web accessibility following WCAG 2.1 guidelines. Use when asked to "improve accessibility", "a11y audit", "WCAG compliance", "screen reader support", "keyboard navigation", or "make accessible".
license: MIT
metadata:
author: web-quality-skills
version: "1.0"
---
# Accessibility (a11y)
Comprehensive accessibility guidelines based on WCAG 2.1 and Lighthouse accessibility audits. Goal: make content usable by everyone, including people with disabilities.
## WCAG Principles: POUR
| Principle | Description |
|-----------|-------------|
| **P**erceivable | Content can be perceived through different senses |
| **O**perable | Interface can be operated by all users |
| **U**nderstandable | Content and interface are understandable |
| **R**obust | Content works with assistive technologies |
## Conformance levels
| Level | Requirement | Target |
|-------|-------------|--------|
| **A** | Minimum accessibility | Must pass |
| **AA** | Standard compliance | Should pass (legal requirement in many jurisdictions) |
| **AAA** | Enhanced accessibility | Nice to have |
---
## Perceivable
### Text alternatives (1.1)
**Images require alt text:**
```html
<!-- ❌ Missing alt -->
<img src="chart.png">
<!-- ✅ Descriptive alt -->
<img src="chart.png" alt="Bar chart showing 40% increase in Q3 sales">
<!-- ✅ Decorative image (empty alt) -->
<img src="decorative-border.png" alt="" role="presentation">
<!-- ✅ Complex image with longer description -->
<figure>
<img src="infographic.png" alt="2024 market trends infographic"
aria-describedby="infographic-desc">
<figcaption id="infographic-desc">
<!-- Detailed description -->
</figcaption>
</figure>
```
**Icon buttons need accessible names:**
```html
<!-- ❌ No accessible name -->
<button><svg><!-- menu icon --></svg></button>
<!-- ✅ Using aria-label -->
<button aria-label="Open menu">
<svg aria-hidden="true"><!-- menu icon --></svg>
</button>
<!-- ✅ Using visually hidden text -->
<button>
<svg aria-hidden="true"><!-- menu icon --></svg>
<span class="visually-hidden">Open menu</span>
</button>
```
**Visually hidden class:**
```css
.visually-hidden {
position: absolute;
width: 1px;
height: 1px;
padding: 0;
margin: -1px;
overflow: hidden;
clip: rect(0, 0, 0, 0);
white-space: nowrap;
border: 0;
}
```
### Color contrast (1.4.3, 1.4.6)
| Text Size | AA minimum | AAA enhanced |
|-----------|------------|--------------|
| Normal text (< 18px / < 14px bold) | 4.5:1 | 7:1 |
| Large text (≥ 18px / ≥ 14px bold) | 3:1 | 4.5:1 |
| UI components & graphics | 3:1 | 3:1 |
```css
/* ❌ Low contrast (2.5:1) */
.low-contrast {
color: #999;
background: #fff;
}
/* ✅ Sufficient contrast (7:1) */
.high-contrast {
color: #333;
background: #fff;
}
/* ✅ Focus states need contrast too */
:focus-visible {
outline: 2px solid #005fcc;
outline-offset: 2px;
}
```
**Don't rely on color alone:**
```html
<!-- ❌ Only color indicates error -->
<input class="error-border">
<style>.error-border { border-color: red; }</style>
<!-- ✅ Color + icon + text -->
<div class="field-error">
<input aria-invalid="true" aria-describedby="email-error">
<span id="email-error" class="error-message">
<svg aria-hidden="true"><!-- error icon --></svg>
Please enter a valid email address
</span>
</div>
```
### Media alternatives (1.2)
```html
<!-- Video with captions -->
<video controls>
<source src="video.mp4" type="video/mp4">
<track kind="captions" src="captions.vtt" srclang="en" label="English" default>
<track kind="descriptions" src="descriptions.vtt" srclang="en" label="Descriptions">
</video>
<!-- Audio with transcript -->
<audio controls>
<source src="podcast.mp3" type="audio/mp3">
</audio>
<details>
<summary>Transcript</summary>
<p>Full transcript text...</p>
</details>
```
---
## Operable
### Keyboard accessible (2.1)
**All functionality must be keyboard accessible:**
```javascript
// ❌ Only handles click
element.addEventListener('click', handleAction);
// ✅ Handles both click and keyboard
element.addEventListener('click', handleAction);
element.addEventListener('keydown', (e) => {
if (e.key === 'Enter' || e.key === ' ') {
e.preventDefault();
handleAction();
}
});
```
**No keyboard traps:**
```javascript
// Modal focus management
function openModal(modal) {
const focusableElements = modal.querySelectorAll(
'button, [href], input, select, textarea, [tabindex]:not([tabindex="-1"])'
);
const firstElement = focusableElements[0];
const lastElement = focusableElements[focusableElements.length - 1];
// Trap focus within modal
modal.addEventListener('keydown', (e) => {
if (e.key === 'Tab') {
if (e.shiftKey && document.activeElement === firstElement) {
e.preventDefault();
lastElement.focus();
} else if (!e.shiftKey && document.activeElement === lastElement) {
e.preventDefault();
firstElement.focus();
}
}
if (e.key === 'Escape') {
closeModal();
}
});
firstElement.focus();
}
```
### Focus visible (2.4.7)
```css
/* ❌ Never remove focus outlines */
*:focus { outline: none; }
/* ✅ Use :focus-visible for keyboard-only focus */
:focus {
outline: none;
}
:focus-visible {
outline: 2px solid #005fcc;
outline-offset: 2px;
}
/* ✅ Or custom focus styles */
button:focus-visible {
box-shadow: 0 0 0 3px rgba(0, 95, 204, 0.5);
}
```
### Skip links (2.4.1)
```html
<body>
<a href="#main-content" class="skip-link">Skip to main content</a>
<header><!-- navigation --></header>
<main id="main-content" tabindex="-1">
<!-- main content -->
</main>
</body>
```
```css
.skip-link {
position: absolute;
top: -40px;
left: 0;
background: #000;
color: #fff;
padding: 8px 16px;
z-index: 100;
}
.skip-link:focus {
top: 0;
}
```
### Timing (2.2)
```javascript
// Allow users to extend time limits
function showSessionWarning() {
const modal = createModal({
title: 'Session Expiring',
content: 'Your session will expire in 2 minutes.',
actions: [
{ label: 'Extend session', action: extendSession },
{ label: 'Log out', action: logout }
],
timeout: 120000 // 2 minutes to respond
});
}
```
### Motion (2.3)
```css
/* Respect reduced motion preference */
@media (prefers-reduced-motion: reduce) {
*,
*::before,
*::after {
animation-duration: 0.01ms !important;
animation-iteration-count: 1 !important;
transition-duration: 0.01ms !important;
scroll-behavior: auto !important;
}
}
```
---
## Understandable
### Page language (3.1.1)
```html
<!-- ❌ No language specified -->
<html>
<!-- ✅ Language specified -->
<html lang="en">
<!-- ✅ Language changes within page -->
<p>The French word for hello is <span lang="fr">bonjour</span>.</p>
```
### Consistent navigation (3.2.3)
```html
<!-- Navigation should be consistent across pages -->
<nav aria-label="Main">
<ul>
<li><a href="/" aria-current="page">Home</a></li>
<li><a href="/products">Products</a></li>
<li><a href="/about">About</a></li>
</ul>
</nav>
```
### Form labels (3.3.2)
```html
<!-- ❌ No label association -->
<input type="email" placeholder="Email">
<!-- ✅ Explicit label -->
<label for="email">Email address</label>
<input type="email" id="email" name="email"
autocomplete="email" required>
<!-- ✅ Implicit label -->
<label>
Email address
<input type="email" name="email" autocomplete="email" required>
</label>
<!-- ✅ With instructions -->
<label for="password">Password</label>
<input type="password" id="password"
aria-describedby="password-requirements">
<p id="password-requirements">
Must be at least 8 characters with one number.
</p>
```
### Error handling (3.3.1, 3.3.3)
```html
<!-- Announce errors to screen readers -->
<form novalidate>
<div class="field" aria-live="polite">
<label for="email">Email</label>
<input type="email" id="email"
aria-invalid="true"
aria-describedby="email-error">
<p id="email-error" class="error" role="alert">
Please enter a valid email address (e.g., name@example.com)
</p>
</div>
</form>
```
```javascript
// Focus first error on submit
form.addEventListener('submit', (e) => {
const firstError = form.querySelector('[aria-invalid="true"]');
if (firstError) {
e.preventDefault();
firstError.focus();
// Announce error summary
const errorSummary = document.getElementById('error-summary');
errorSummary.textContent = `${errors.length} errors found. Please fix them and try again.`;
errorSummary.focus();
}
});
```
---
## Robust
### Valid HTML (4.1.1)
```html
<!-- ❌ Duplicate IDs -->
<div id="content">...</div>
<div id="content">...</div>
<!-- ❌ Invalid nesting -->
<a href="/"><button>Click</button></a>
<!-- ✅ Unique IDs -->
<div id="main-content">...</div>
<div id="sidebar-content">...</div>
<!-- ✅ Proper nesting -->
<a href="/" class="button-link">Click</a>
```
### ARIA usage (4.1.2)
**Prefer native elements:**
```html
<!-- ❌ ARIA role on div -->
<div role="button" tabindex="0">Click me</div>
<!-- ✅ Native button -->
<button>Click me</button>
<!-- ❌ ARIA checkbox -->
<div role="checkbox" aria-checked="false">Option</div>
<!-- ✅ Native checkbox -->
<label><input type="checkbox"> Option</label>
```
**When ARIA is needed:**
```html
<!-- Custom tabs component -->
<div role="tablist" aria-label="Product information">
<button role="tab" id="tab-1" aria-selected="true"
aria-controls="panel-1">Description</button>
<button role="tab" id="tab-2" aria-selected="false"
aria-controls="panel-2" tabindex="-1">Reviews</button>
</div>
<div role="tabpanel" id="panel-1" aria-labelledby="tab-1">
<!-- Panel content -->
</div>
<div role="tabpanel" id="panel-2" aria-labelledby="tab-2" hidden>
<!-- Panel content -->
</div>
```
### Live regions (4.1.3)
```html
<!-- Status updates -->
<div aria-live="polite" aria-atomic="true" class="status">
<!-- Content updates announced to screen readers -->
</div>
<!-- Urgent alerts -->
<div role="alert" aria-live="assertive">
<!-- Interrupts current announcement -->
</div>
```
```javascript
// Announce dynamic content changes
function showNotification(message, type = 'polite') {
const container = document.getElementById(`${type}-announcer`);
container.textContent = ''; // Clear first
requestAnimationFrame(() => {
container.textContent = message;
});
}
```
---
## Testing checklist
### Automated testing
```bash
# Lighthouse accessibility audit
npx lighthouse https://example.com --only-categories=accessibility
# axe-core
npm install @axe-core/cli -g
axe https://example.com
```
### Manual testing
- [ ] **Keyboard navigation:** Tab through entire page, use Enter/Space to activate
- [ ] **Screen reader:** Test with VoiceOver (Mac), NVDA (Windows), or TalkBack (Android)
- [ ] **Zoom:** Content usable at 200% zoom
- [ ] **High contrast:** Test with Windows High Contrast Mode
- [ ] **Reduced motion:** Test with `prefers-reduced-motion: reduce`
- [ ] **Focus order:** Logical and follows visual order
### Screen reader commands
| Action | VoiceOver (Mac) | NVDA (Windows) |
|--------|-----------------|----------------|
| Start/Stop | ⌘ + F5 | Ctrl + Alt + N |
| Next item | VO + → | ↓ |
| Previous item | VO + ← | ↑ |
| Activate | VO + Space | Enter |
| Headings list | VO + U, then arrows | H / Shift + H |
| Links list | VO + U | K / Shift + K |
---
## Common issues by impact
### Critical (fix immediately)
1. Missing form labels
2. Missing image alt text
3. Insufficient color contrast
4. Keyboard traps
5. No focus indicators
### Serious (fix before launch)
1. Missing page language
2. Missing heading structure
3. Non-descriptive link text
4. Auto-playing media
5. Missing skip links
### Moderate (fix soon)
1. Missing ARIA labels on icons
2. Inconsistent navigation
3. Missing error identification
4. Timing without controls
5. Missing landmark regions
## References
- [WCAG 2.1 Quick Reference](https://www.w3.org/WAI/WCAG21/quickref/)
- [WAI-ARIA Authoring Practices](https://www.w3.org/WAI/ARIA/apg/)
- [Deque axe Rules](https://dequeuniversity.com/rules/axe/)
- [Web Quality Audit](../web-quality-audit/SKILL.md)

View file

@ -1,162 +0,0 @@
# WCAG 2.1 Quick Reference
## Success criteria by level
### Level A (minimum)
| Criterion | Description |
|-----------|-------------|
| **1.1.1** Non-text Content | All images, icons have text alternatives |
| **1.2.1** Audio-only/Video-only | Provide transcript or audio description |
| **1.2.2** Captions | Video with audio has captions |
| **1.2.3** Audio Description | Video has audio description |
| **1.3.1** Info and Relationships | Information conveyed through presentation is available programmatically |
| **1.3.2** Meaningful Sequence | Reading order is logical |
| **1.3.3** Sensory Characteristics | Instructions don't rely solely on shape, color, size, location, orientation, or sound |
| **1.4.1** Use of Color | Color is not the only visual means of conveying information |
| **1.4.2** Audio Control | Audio playing automatically can be paused/stopped |
| **2.1.1** Keyboard | All functionality available via keyboard |
| **2.1.2** No Keyboard Trap | Keyboard focus can be moved away from any component |
| **2.1.4** Character Key Shortcuts | Single-key shortcuts can be turned off or remapped |
| **2.2.1** Timing Adjustable | Time limits can be extended |
| **2.2.2** Pause, Stop, Hide | Moving/blinking content can be paused |
| **2.3.1** Three Flashes | Nothing flashes more than 3 times per second |
| **2.4.1** Bypass Blocks | Skip link or landmark navigation available |
| **2.4.2** Page Titled | Pages have descriptive titles |
| **2.4.3** Focus Order | Focus order preserves meaning |
| **2.4.4** Link Purpose | Link purpose clear from link text or context |
| **2.5.1** Pointer Gestures | Multi-point gestures have single-pointer alternatives |
| **2.5.2** Pointer Cancellation | Down-event doesn't trigger action (use up-event or click) |
| **2.5.3** Label in Name | Accessible name contains visible label text |
| **2.5.4** Motion Actuation | Motion-triggered functions have alternatives |
| **3.1.1** Language of Page | Default language specified in HTML |
| **3.2.1** On Focus | Focus doesn't trigger unexpected changes |
| **3.2.2** On Input | Input doesn't trigger unexpected changes |
| **3.3.1** Error Identification | Input errors clearly described |
| **3.3.2** Labels or Instructions | Form inputs have labels or instructions |
| **4.1.1** Parsing | HTML is well-formed (no duplicate IDs, proper nesting) |
| **4.1.2** Name, Role, Value | UI components have accessible names and correct roles |
### Level AA (standard)
| Criterion | Description |
|-----------|-------------|
| **1.2.4** Captions (Live) | Live audio has captions |
| **1.2.5** Audio Description | Pre-recorded video has audio description |
| **1.3.4** Orientation | Content doesn't restrict orientation |
| **1.3.5** Identify Input Purpose | Input purpose can be programmatically determined |
| **1.4.3** Contrast (Minimum) | 4.5:1 for normal text, 3:1 for large text |
| **1.4.4** Resize Text | Text can be resized to 200% without loss of functionality |
| **1.4.5** Images of Text | Text used instead of images of text |
| **1.4.10** Reflow | Content reflows at 320px width without horizontal scroll |
| **1.4.11** Non-text Contrast | UI components have 3:1 contrast |
| **1.4.12** Text Spacing | Content adapts to text spacing changes |
| **1.4.13** Content on Hover/Focus | Additional content is dismissible, hoverable, persistent |
| **2.4.5** Multiple Ways | Multiple ways to find pages |
| **2.4.6** Headings and Labels | Headings and labels are descriptive |
| **2.4.7** Focus Visible | Focus indicator is visible |
| **3.1.2** Language of Parts | Language changes are marked |
| **3.2.3** Consistent Navigation | Navigation is consistent across pages |
| **3.2.4** Consistent Identification | Same functionality uses same labels |
| **3.3.3** Error Suggestion | Error corrections suggested when known |
| **3.3.4** Error Prevention (Legal) | Actions can be reversed or confirmed |
| **4.1.3** Status Messages | Status messages announced to screen readers |
### Level AAA (enhanced)
| Criterion | Description |
|-----------|-------------|
| **1.4.6** Contrast (Enhanced) | 7:1 for normal text, 4.5:1 for large text |
| **1.4.8** Visual Presentation | Foreground/background colors can be selected |
| **1.4.9** Images of Text (No Exception) | No images of text |
| **2.1.3** Keyboard (No Exception) | All functionality keyboard accessible |
| **2.2.3** No Timing | No time limits |
| **2.2.4** Interruptions | Interruptions can be postponed |
| **2.2.5** Re-authenticating | Data preserved on re-authentication |
| **2.2.6** Timeouts | Users warned about data loss from inactivity |
| **2.3.2** Three Flashes | No content flashes more than 3 times |
| **2.3.3** Animation from Interactions | Motion animation can be disabled |
| **2.4.8** Location | User location within site is available |
| **2.4.9** Link Purpose (Link Only) | Link purpose clear from link text alone |
| **2.4.10** Section Headings | Sections have headings |
| **3.1.3** Unusual Words | Definitions available for unusual words |
| **3.1.4** Abbreviations | Abbreviations expanded |
| **3.1.5** Reading Level | Alternative content for complex text |
| **3.1.6** Pronunciation | Pronunciation available where needed |
| **3.2.5** Change on Request | Changes initiated only by user |
| **3.3.5** Help | Context-sensitive help available |
| **3.3.6** Error Prevention (All) | All form submissions can be reviewed |
## Common ARIA patterns
### Buttons
```html
<button>Label</button>
<!-- or -->
<button aria-label="Close dialog">×</button>
```
### Links
```html
<a href="/page">Descriptive link text</a>
<!-- External links -->
<a href="https://external.com" target="_blank" rel="noopener">
External site
<span class="visually-hidden">(opens in new tab)</span>
</a>
```
### Form fields
```html
<label for="email">Email address</label>
<input type="email" id="email" aria-describedby="email-hint">
<p id="email-hint">We'll never share your email.</p>
```
### Error states
```html
<label for="email">Email</label>
<input type="email" id="email" aria-invalid="true" aria-describedby="email-error">
<p id="email-error" role="alert">Please enter a valid email address.</p>
```
### Navigation
```html
<nav aria-label="Main">
<ul>
<li><a href="/" aria-current="page">Home</a></li>
<li><a href="/about">About</a></li>
</ul>
</nav>
```
### Modals
```html
<div role="dialog" aria-modal="true" aria-labelledby="dialog-title">
<h2 id="dialog-title">Confirm Action</h2>
<!-- content -->
</div>
```
### Live regions
```html
<!-- Polite (waits for pause in speech) -->
<div aria-live="polite">Status update here</div>
<!-- Assertive (interrupts immediately) -->
<div aria-live="assertive" role="alert">Error message here</div>
<!-- Status (polite, implicit) -->
<div role="status">Loading complete</div>
```
## Testing tools
| Tool | Type | URL |
|------|------|-----|
| axe DevTools | Browser extension | [deque.com/axe](https://www.deque.com/axe/) |
| WAVE | Browser extension | [wave.webaim.org](https://wave.webaim.org/) |
| Lighthouse | Built into Chrome | DevTools → Lighthouse |
| NVDA | Screen reader (Windows) | [nvaccess.org](https://www.nvaccess.org/) |
| VoiceOver | Screen reader (Mac) | Built into macOS |
| Colour Contrast Analyser | Desktop app | [tpgi.com](https://www.tpgi.com/color-contrast-checker/) |

View file

@ -1,517 +0,0 @@
---
name: agent-browser
description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", or any task requiring programmatic web interaction.
allowed-tools: Bash(npx agent-browser:*), Bash(agent-browser:*)
---
# Browser Automation with agent-browser
## Core Workflow
Every browser automation follows this pattern:
1. **Navigate**: `agent-browser open <url>`
2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
3. **Interact**: Use refs to click, fill, select
4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
```bash
agent-browser open https://example.com/form
agent-browser snapshot -i
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
agent-browser fill @e1 "user@example.com"
agent-browser fill @e2 "password123"
agent-browser click @e3
agent-browser wait --load networkidle
agent-browser snapshot -i # Check result
```
## Command Chaining
Commands can be chained with `&&` in a single shell invocation. The browser persists between commands via a background daemon, so chaining is safe and more efficient than separate calls.
```bash
# Chain open + wait + snapshot in one call
agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser snapshot -i
# Chain multiple interactions
agent-browser fill @e1 "user@example.com" && agent-browser fill @e2 "password123" && agent-browser click @e3
# Navigate and capture
agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser screenshot page.png
```
**When to chain:** Use `&&` when you don't need to read the output of an intermediate command before proceeding (e.g., open + wait + screenshot). Run commands separately when you need to parse the output first (e.g., snapshot to discover refs, then interact using those refs).
## Essential Commands
```bash
# Navigation
agent-browser open <url> # Navigate (aliases: goto, navigate)
agent-browser close # Close browser
# Snapshot
agent-browser snapshot -i # Interactive elements with refs (recommended)
agent-browser snapshot -i -C # Include cursor-interactive elements (divs with onclick, cursor:pointer)
agent-browser snapshot -s "#selector" # Scope to CSS selector
# Interaction (use @refs from snapshot)
agent-browser click @e1 # Click element
agent-browser click @e1 --new-tab # Click and open in new tab
agent-browser fill @e2 "text" # Clear and type text
agent-browser type @e2 "text" # Type without clearing
agent-browser select @e1 "option" # Select dropdown option
agent-browser check @e1 # Check checkbox
agent-browser press Enter # Press key
agent-browser keyboard type "text" # Type at current focus (no selector)
agent-browser keyboard inserttext "text" # Insert without key events
agent-browser scroll down 500 # Scroll page
agent-browser scroll down 500 --selector "div.content" # Scroll within a specific container
# Get information
agent-browser get text @e1 # Get element text
agent-browser get url # Get current URL
agent-browser get title # Get page title
# Wait
agent-browser wait @e1 # Wait for element
agent-browser wait --load networkidle # Wait for network idle
agent-browser wait --url "**/page" # Wait for URL pattern
agent-browser wait 2000 # Wait milliseconds
# Downloads
agent-browser download @e1 ./file.pdf # Click element to trigger download
agent-browser wait --download ./output.zip # Wait for any download to complete
agent-browser --download-path ./downloads open <url> # Set default download directory
# Capture
agent-browser screenshot # Screenshot to temp dir
agent-browser screenshot --full # Full page screenshot
agent-browser screenshot --annotate # Annotated screenshot with numbered element labels
agent-browser pdf output.pdf # Save as PDF
# Diff (compare page states)
agent-browser diff snapshot # Compare current vs last snapshot
agent-browser diff snapshot --baseline before.txt # Compare current vs saved file
agent-browser diff screenshot --baseline before.png # Visual pixel diff
agent-browser diff url <url1> <url2> # Compare two pages
agent-browser diff url <url1> <url2> --wait-until networkidle # Custom wait strategy
agent-browser diff url <url1> <url2> --selector "#main" # Scope to element
```
## Common Patterns
### Form Submission
```bash
agent-browser open https://example.com/signup
agent-browser snapshot -i
agent-browser fill @e1 "Jane Doe"
agent-browser fill @e2 "jane@example.com"
agent-browser select @e3 "California"
agent-browser check @e4
agent-browser click @e5
agent-browser wait --load networkidle
```
### Authentication with Auth Vault (Recommended)
```bash
# Save credentials once (encrypted with AGENT_BROWSER_ENCRYPTION_KEY)
# Recommended: pipe password via stdin to avoid shell history exposure
echo "pass" | agent-browser auth save github --url https://github.com/login --username user --password-stdin
# Login using saved profile (LLM never sees password)
agent-browser auth login github
# List/show/delete profiles
agent-browser auth list
agent-browser auth show github
agent-browser auth delete github
```
### Authentication with State Persistence
```bash
# Login once and save state
agent-browser open https://app.example.com/login
agent-browser snapshot -i
agent-browser fill @e1 "$USERNAME"
agent-browser fill @e2 "$PASSWORD"
agent-browser click @e3
agent-browser wait --url "**/dashboard"
agent-browser state save auth.json
# Reuse in future sessions
agent-browser state load auth.json
agent-browser open https://app.example.com/dashboard
```
### Session Persistence
```bash
# Auto-save/restore cookies and localStorage across browser restarts
agent-browser --session-name myapp open https://app.example.com/login
# ... login flow ...
agent-browser close # State auto-saved to ~/.agent-browser/sessions/
# Next time, state is auto-loaded
agent-browser --session-name myapp open https://app.example.com/dashboard
# Encrypt state at rest
export AGENT_BROWSER_ENCRYPTION_KEY=$(openssl rand -hex 32)
agent-browser --session-name secure open https://app.example.com
# Manage saved states
agent-browser state list
agent-browser state show myapp-default.json
agent-browser state clear myapp
agent-browser state clean --older-than 7
```
### Data Extraction
```bash
agent-browser open https://example.com/products
agent-browser snapshot -i
agent-browser get text @e5 # Get specific element text
agent-browser get text body > page.txt # Get all page text
# JSON output for parsing
agent-browser snapshot -i --json
agent-browser get text @e1 --json
```
### Parallel Sessions
```bash
agent-browser --session site1 open https://site-a.com
agent-browser --session site2 open https://site-b.com
agent-browser --session site1 snapshot -i
agent-browser --session site2 snapshot -i
agent-browser session list
```
### Connect to Existing Chrome
```bash
# Auto-discover running Chrome with remote debugging enabled
agent-browser --auto-connect open https://example.com
agent-browser --auto-connect snapshot
# Or with explicit CDP port
agent-browser --cdp 9222 snapshot
```
### Color Scheme (Dark Mode)
```bash
# Persistent dark mode via flag (applies to all pages and new tabs)
agent-browser --color-scheme dark open https://example.com
# Or via environment variable
AGENT_BROWSER_COLOR_SCHEME=dark agent-browser open https://example.com
# Or set during session (persists for subsequent commands)
agent-browser set media dark
```
### Visual Browser (Debugging)
```bash
agent-browser --headed open https://example.com
agent-browser highlight @e1 # Highlight element
agent-browser record start demo.webm # Record session
agent-browser profiler start # Start Chrome DevTools profiling
agent-browser profiler stop trace.json # Stop and save profile (path optional)
```
Use `AGENT_BROWSER_HEADED=1` to enable headed mode via environment variable. Browser extensions work in both headed and headless mode.
### Local Files (PDFs, HTML)
```bash
# Open local files with file:// URLs
agent-browser --allow-file-access open file:///path/to/document.pdf
agent-browser --allow-file-access open file:///path/to/page.html
agent-browser screenshot output.png
```
### iOS Simulator (Mobile Safari)
```bash
# List available iOS simulators
agent-browser device list
# Launch Safari on a specific device
agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
# Same workflow as desktop - snapshot, interact, re-snapshot
agent-browser -p ios snapshot -i
agent-browser -p ios tap @e1 # Tap (alias for click)
agent-browser -p ios fill @e2 "text"
agent-browser -p ios swipe up # Mobile-specific gesture
# Take screenshot
agent-browser -p ios screenshot mobile.png
# Close session (shuts down simulator)
agent-browser -p ios close
```
**Requirements:** macOS with Xcode, Appium (`npm install -g appium && appium driver install xcuitest`)
**Real devices:** Works with physical iOS devices if pre-configured. Use `--device "<UDID>"` where UDID is from `xcrun xctrace list devices`.
## Security
All security features are opt-in. By default, agent-browser imposes no restrictions on navigation, actions, or output.
### Content Boundaries (Recommended for AI Agents)
Enable `--content-boundaries` to wrap page-sourced output in markers that help LLMs distinguish tool output from untrusted page content:
```bash
export AGENT_BROWSER_CONTENT_BOUNDARIES=1
agent-browser snapshot
# Output:
# --- AGENT_BROWSER_PAGE_CONTENT nonce=<hex> origin=https://example.com ---
# [accessibility tree]
# --- END_AGENT_BROWSER_PAGE_CONTENT nonce=<hex> ---
```
### Domain Allowlist
Restrict navigation to trusted domains. Wildcards like `*.example.com` also match the bare domain `example.com`. Sub-resource requests, WebSocket, and EventSource connections to non-allowed domains are also blocked. Include CDN domains your target pages depend on:
```bash
export AGENT_BROWSER_ALLOWED_DOMAINS="example.com,*.example.com"
agent-browser open https://example.com # OK
agent-browser open https://malicious.com # Blocked
```
### Action Policy
Use a policy file to gate destructive actions:
```bash
export AGENT_BROWSER_ACTION_POLICY=./policy.json
```
Example `policy.json`:
```json
{"default": "deny", "allow": ["navigate", "snapshot", "click", "scroll", "wait", "get"]}
```
Auth vault operations (`auth login`, etc.) bypass action policy but domain allowlist still applies.
### Output Limits
Prevent context flooding from large pages:
```bash
export AGENT_BROWSER_MAX_OUTPUT=50000
```
## Diffing (Verifying Changes)
Use `diff snapshot` after performing an action to verify it had the intended effect. This compares the current accessibility tree against the last snapshot taken in the session.
```bash
# Typical workflow: snapshot -> action -> diff
agent-browser snapshot -i # Take baseline snapshot
agent-browser click @e2 # Perform action
agent-browser diff snapshot # See what changed (auto-compares to last snapshot)
```
For visual regression testing or monitoring:
```bash
# Save a baseline screenshot, then compare later
agent-browser screenshot baseline.png
# ... time passes or changes are made ...
agent-browser diff screenshot --baseline baseline.png
# Compare staging vs production
agent-browser diff url https://staging.example.com https://prod.example.com --screenshot
```
`diff snapshot` output uses `+` for additions and `-` for removals, similar to git diff. `diff screenshot` produces a diff image with changed pixels highlighted in red, plus a mismatch percentage.
## Timeouts and Slow Pages
The default Playwright timeout is 25 seconds for local browsers. This can be overridden with the `AGENT_BROWSER_DEFAULT_TIMEOUT` environment variable (value in milliseconds). For slow websites or large pages, use explicit waits instead of relying on the default timeout:
```bash
# Wait for network activity to settle (best for slow pages)
agent-browser wait --load networkidle
# Wait for a specific element to appear
agent-browser wait "#content"
agent-browser wait @e1
# Wait for a specific URL pattern (useful after redirects)
agent-browser wait --url "**/dashboard"
# Wait for a JavaScript condition
agent-browser wait --fn "document.readyState === 'complete'"
# Wait a fixed duration (milliseconds) as a last resort
agent-browser wait 5000
```
When dealing with consistently slow websites, use `wait --load networkidle` after `open` to ensure the page is fully loaded before taking a snapshot. If a specific element is slow to render, wait for it directly with `wait <selector>` or `wait @ref`.
## Session Management and Cleanup
When running multiple agents or automations concurrently, always use named sessions to avoid conflicts:
```bash
# Each agent gets its own isolated session
agent-browser --session agent1 open site-a.com
agent-browser --session agent2 open site-b.com
# Check active sessions
agent-browser session list
```
Always close your browser session when done to avoid leaked processes:
```bash
agent-browser close # Close default session
agent-browser --session agent1 close # Close specific session
```
If a previous session was not closed properly, the daemon may still be running. Use `agent-browser close` to clean it up before starting new work.
## Ref Lifecycle (Important)
Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after:
- Clicking links or buttons that navigate
- Form submissions
- Dynamic content loading (dropdowns, modals)
```bash
agent-browser click @e5 # Navigates to new page
agent-browser snapshot -i # MUST re-snapshot
agent-browser click @e1 # Use new refs
```
## Annotated Screenshots (Vision Mode)
Use `--annotate` to take a screenshot with numbered labels overlaid on interactive elements. Each label `[N]` maps to ref `@eN`. This also caches refs, so you can interact with elements immediately without a separate snapshot.
```bash
agent-browser screenshot --annotate
# Output includes the image path and a legend:
# [1] @e1 button "Submit"
# [2] @e2 link "Home"
# [3] @e3 textbox "Email"
agent-browser click @e2 # Click using ref from annotated screenshot
```
Use annotated screenshots when:
- The page has unlabeled icon buttons or visual-only elements
- You need to verify visual layout or styling
- Canvas or chart elements are present (invisible to text snapshots)
- You need spatial reasoning about element positions
## Semantic Locators (Alternative to Refs)
When refs are unavailable or unreliable, use semantic locators:
```bash
agent-browser find text "Sign In" click
agent-browser find label "Email" fill "user@test.com"
agent-browser find role button click --name "Submit"
agent-browser find placeholder "Search" type "query"
agent-browser find testid "submit-btn" click
```
## JavaScript Evaluation (eval)
Use `eval` to run JavaScript in the browser context. **Shell quoting can corrupt complex expressions** -- use `--stdin` or `-b` to avoid issues.
```bash
# Simple expressions work with regular quoting
agent-browser eval 'document.title'
agent-browser eval 'document.querySelectorAll("img").length'
# Complex JS: use --stdin with heredoc (RECOMMENDED)
agent-browser eval --stdin <<'EVALEOF'
JSON.stringify(
Array.from(document.querySelectorAll("img"))
.filter(i => !i.alt)
.map(i => ({ src: i.src.split("/").pop(), width: i.width }))
)
EVALEOF
# Alternative: base64 encoding (avoids all shell escaping issues)
agent-browser eval -b "$(echo -n 'Array.from(document.querySelectorAll("a")).map(a => a.href)' | base64)"
```
**Why this matters:** When the shell processes your command, inner double quotes, `!` characters (history expansion), backticks, and `$()` can all corrupt the JavaScript before it reaches agent-browser. The `--stdin` and `-b` flags bypass shell interpretation entirely.
**Rules of thumb:**
- Single-line, no nested quotes -> regular `eval 'expression'` with single quotes is fine
- Nested quotes, arrow functions, template literals, or multiline -> use `eval --stdin <<'EVALEOF'`
- Programmatic/generated scripts -> use `eval -b` with base64
## Configuration File
Create `agent-browser.json` in the project root for persistent settings:
```json
{
"headed": true,
"proxy": "http://localhost:8080",
"profile": "./browser-data"
}
```
Priority (lowest to highest): `~/.agent-browser/config.json` < `./agent-browser.json` < env vars < CLI flags. Use `--config <path>` or `AGENT_BROWSER_CONFIG` env var for a custom config file (exits with error if missing/invalid). All CLI options map to camelCase keys (e.g., `--executable-path` -> `"executablePath"`). Boolean flags accept `true`/`false` values (e.g., `--headed false` overrides config). Extensions from user and project configs are merged, not replaced.
## Deep-Dive Documentation
| Reference | When to Use |
|-----------|-------------|
| [references/commands.md](references/commands.md) | Full command reference with all options |
| [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting |
| [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
| [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
| [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation |
| [references/profiling.md](references/profiling.md) | Chrome DevTools profiling for performance analysis |
| [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies |
## Experimental: Native Mode
agent-browser has an experimental native Rust daemon that communicates with Chrome directly via CDP, bypassing Node.js and Playwright entirely. It is opt-in and not recommended for production use yet.
```bash
# Enable via flag
agent-browser --native open example.com
# Enable via environment variable (avoids passing --native every time)
export AGENT_BROWSER_NATIVE=1
agent-browser open example.com
```
The native daemon supports Chromium and Safari (via WebDriver). Firefox and WebKit are not yet supported. All core commands (navigate, snapshot, click, fill, screenshot, cookies, storage, tabs, eval, etc.) work identically in native mode. Use `agent-browser close` before switching between native and default mode within the same session.
## Ready-to-Use Templates
| Template | Description |
|----------|-------------|
| [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation |
| [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state |
| [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots |
```bash
./templates/form-automation.sh https://example.com/form
./templates/authenticated-session.sh https://app.example.com/login
./templates/capture-workflow.sh https://example.com ./output
```

View file

@ -1,202 +0,0 @@
# Authentication Patterns
Login flows, session persistence, OAuth, 2FA, and authenticated browsing.
**Related**: [session-management.md](session-management.md) for state persistence details, [SKILL.md](../SKILL.md) for quick start.
## Contents
- [Basic Login Flow](#basic-login-flow)
- [Saving Authentication State](#saving-authentication-state)
- [Restoring Authentication](#restoring-authentication)
- [OAuth / SSO Flows](#oauth--sso-flows)
- [Two-Factor Authentication](#two-factor-authentication)
- [HTTP Basic Auth](#http-basic-auth)
- [Cookie-Based Auth](#cookie-based-auth)
- [Token Refresh Handling](#token-refresh-handling)
- [Security Best Practices](#security-best-practices)
## Basic Login Flow
```bash
# Navigate to login page
agent-browser open https://app.example.com/login
agent-browser wait --load networkidle
# Get form elements
agent-browser snapshot -i
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In"
# Fill credentials
agent-browser fill @e1 "user@example.com"
agent-browser fill @e2 "password123"
# Submit
agent-browser click @e3
agent-browser wait --load networkidle
# Verify login succeeded
agent-browser get url # Should be dashboard, not login
```
## Saving Authentication State
After logging in, save state for reuse:
```bash
# Login first (see above)
agent-browser open https://app.example.com/login
agent-browser snapshot -i
agent-browser fill @e1 "user@example.com"
agent-browser fill @e2 "password123"
agent-browser click @e3
agent-browser wait --url "**/dashboard"
# Save authenticated state
agent-browser state save ./auth-state.json
```
## Restoring Authentication
Skip login by loading saved state:
```bash
# Load saved auth state
agent-browser state load ./auth-state.json
# Navigate directly to protected page
agent-browser open https://app.example.com/dashboard
# Verify authenticated
agent-browser snapshot -i
```
## OAuth / SSO Flows
For OAuth redirects:
```bash
# Start OAuth flow
agent-browser open https://app.example.com/auth/google
# Handle redirects automatically
agent-browser wait --url "**/accounts.google.com**"
agent-browser snapshot -i
# Fill Google credentials
agent-browser fill @e1 "user@gmail.com"
agent-browser click @e2 # Next button
agent-browser wait 2000
agent-browser snapshot -i
agent-browser fill @e3 "password"
agent-browser click @e4 # Sign in
# Wait for redirect back
agent-browser wait --url "**/app.example.com**"
agent-browser state save ./oauth-state.json
```
## Two-Factor Authentication
Handle 2FA with manual intervention:
```bash
# Login with credentials
agent-browser open https://app.example.com/login --headed # Show browser
agent-browser snapshot -i
agent-browser fill @e1 "user@example.com"
agent-browser fill @e2 "password123"
agent-browser click @e3
# Wait for user to complete 2FA manually
echo "Complete 2FA in the browser window..."
agent-browser wait --url "**/dashboard" --timeout 120000
# Save state after 2FA
agent-browser state save ./2fa-state.json
```
## HTTP Basic Auth
For sites using HTTP Basic Authentication:
```bash
# Set credentials before navigation
agent-browser set credentials username password
# Navigate to protected resource
agent-browser open https://protected.example.com/api
```
## Cookie-Based Auth
Manually set authentication cookies:
```bash
# Set auth cookie
agent-browser cookies set session_token "abc123xyz"
# Navigate to protected page
agent-browser open https://app.example.com/dashboard
```
## Token Refresh Handling
For sessions with expiring tokens:
```bash
#!/bin/bash
# Wrapper that handles token refresh
STATE_FILE="./auth-state.json"
# Try loading existing state
if [[ -f "$STATE_FILE" ]]; then
agent-browser state load "$STATE_FILE"
agent-browser open https://app.example.com/dashboard
# Check if session is still valid
URL=$(agent-browser get url)
if [[ "$URL" == *"/login"* ]]; then
echo "Session expired, re-authenticating..."
# Perform fresh login
agent-browser snapshot -i
agent-browser fill @e1 "$USERNAME"
agent-browser fill @e2 "$PASSWORD"
agent-browser click @e3
agent-browser wait --url "**/dashboard"
agent-browser state save "$STATE_FILE"
fi
else
# First-time login
agent-browser open https://app.example.com/login
# ... login flow ...
fi
```
## Security Best Practices
1. **Never commit state files** - They contain session tokens
```bash
echo "*.auth-state.json" >> .gitignore
```
2. **Use environment variables for credentials**
```bash
agent-browser fill @e1 "$APP_USERNAME"
agent-browser fill @e2 "$APP_PASSWORD"
```
3. **Clean up after automation**
```bash
agent-browser cookies clear
rm -f ./auth-state.json
```
4. **Use short-lived sessions for CI/CD**
```bash
# Don't persist state in CI
agent-browser open https://app.example.com/login
# ... login and perform actions ...
agent-browser close # Session ends, nothing persisted
```

View file

@ -1,263 +0,0 @@
# Command Reference
Complete reference for all agent-browser commands. For quick start and common patterns, see SKILL.md.
## Navigation
```bash
agent-browser open <url> # Navigate to URL (aliases: goto, navigate)
# Supports: https://, http://, file://, about:, data://
# Auto-prepends https:// if no protocol given
agent-browser back # Go back
agent-browser forward # Go forward
agent-browser reload # Reload page
agent-browser close # Close browser (aliases: quit, exit)
agent-browser connect 9222 # Connect to browser via CDP port
```
## Snapshot (page analysis)
```bash
agent-browser snapshot # Full accessibility tree
agent-browser snapshot -i # Interactive elements only (recommended)
agent-browser snapshot -c # Compact output
agent-browser snapshot -d 3 # Limit depth to 3
agent-browser snapshot -s "#main" # Scope to CSS selector
```
## Interactions (use @refs from snapshot)
```bash
agent-browser click @e1 # Click
agent-browser click @e1 --new-tab # Click and open in new tab
agent-browser dblclick @e1 # Double-click
agent-browser focus @e1 # Focus element
agent-browser fill @e2 "text" # Clear and type
agent-browser type @e2 "text" # Type without clearing
agent-browser press Enter # Press key (alias: key)
agent-browser press Control+a # Key combination
agent-browser keydown Shift # Hold key down
agent-browser keyup Shift # Release key
agent-browser hover @e1 # Hover
agent-browser check @e1 # Check checkbox
agent-browser uncheck @e1 # Uncheck checkbox
agent-browser select @e1 "value" # Select dropdown option
agent-browser select @e1 "a" "b" # Select multiple options
agent-browser scroll down 500 # Scroll page (default: down 300px)
agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto)
agent-browser drag @e1 @e2 # Drag and drop
agent-browser upload @e1 file.pdf # Upload files
```
## Get Information
```bash
agent-browser get text @e1 # Get element text
agent-browser get html @e1 # Get innerHTML
agent-browser get value @e1 # Get input value
agent-browser get attr @e1 href # Get attribute
agent-browser get title # Get page title
agent-browser get url # Get current URL
agent-browser get count ".item" # Count matching elements
agent-browser get box @e1 # Get bounding box
agent-browser get styles @e1 # Get computed styles (font, color, bg, etc.)
```
## Check State
```bash
agent-browser is visible @e1 # Check if visible
agent-browser is enabled @e1 # Check if enabled
agent-browser is checked @e1 # Check if checked
```
## Screenshots and PDF
```bash
agent-browser screenshot # Save to temporary directory
agent-browser screenshot path.png # Save to specific path
agent-browser screenshot --full # Full page
agent-browser pdf output.pdf # Save as PDF
```
## Video Recording
```bash
agent-browser record start ./demo.webm # Start recording
agent-browser click @e1 # Perform actions
agent-browser record stop # Stop and save video
agent-browser record restart ./take2.webm # Stop current + start new
```
## Wait
```bash
agent-browser wait @e1 # Wait for element
agent-browser wait 2000 # Wait milliseconds
agent-browser wait --text "Success" # Wait for text (or -t)
agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u)
agent-browser wait --load networkidle # Wait for network idle (or -l)
agent-browser wait --fn "window.ready" # Wait for JS condition (or -f)
```
## Mouse Control
```bash
agent-browser mouse move 100 200 # Move mouse
agent-browser mouse down left # Press button
agent-browser mouse up left # Release button
agent-browser mouse wheel 100 # Scroll wheel
```
## Semantic Locators (alternative to refs)
```bash
agent-browser find role button click --name "Submit"
agent-browser find text "Sign In" click
agent-browser find text "Sign In" click --exact # Exact match only
agent-browser find label "Email" fill "user@test.com"
agent-browser find placeholder "Search" type "query"
agent-browser find alt "Logo" click
agent-browser find title "Close" click
agent-browser find testid "submit-btn" click
agent-browser find first ".item" click
agent-browser find last ".item" click
agent-browser find nth 2 "a" hover
```
## Browser Settings
```bash
agent-browser set viewport 1920 1080 # Set viewport size
agent-browser set device "iPhone 14" # Emulate device
agent-browser set geo 37.7749 -122.4194 # Set geolocation (alias: geolocation)
agent-browser set offline on # Toggle offline mode
agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
agent-browser set credentials user pass # HTTP basic auth (alias: auth)
agent-browser set media dark # Emulate color scheme
agent-browser set media light reduced-motion # Light mode + reduced motion
```
## Cookies and Storage
```bash
agent-browser cookies # Get all cookies
agent-browser cookies set name value # Set cookie
agent-browser cookies clear # Clear cookies
agent-browser storage local # Get all localStorage
agent-browser storage local key # Get specific key
agent-browser storage local set k v # Set value
agent-browser storage local clear # Clear all
```
## Network
```bash
agent-browser network route <url> # Intercept requests
agent-browser network route <url> --abort # Block requests
agent-browser network route <url> --body '{}' # Mock response
agent-browser network unroute [url] # Remove routes
agent-browser network requests # View tracked requests
agent-browser network requests --filter api # Filter requests
```
## Tabs and Windows
```bash
agent-browser tab # List tabs
agent-browser tab new [url] # New tab
agent-browser tab 2 # Switch to tab by index
agent-browser tab close # Close current tab
agent-browser tab close 2 # Close tab by index
agent-browser window new # New window
```
## Frames
```bash
agent-browser frame "#iframe" # Switch to iframe
agent-browser frame main # Back to main frame
```
## Dialogs
```bash
agent-browser dialog accept [text] # Accept dialog
agent-browser dialog dismiss # Dismiss dialog
```
## JavaScript
```bash
agent-browser eval "document.title" # Simple expressions only
agent-browser eval -b "<base64>" # Any JavaScript (base64 encoded)
agent-browser eval --stdin # Read script from stdin
```
Use `-b`/`--base64` or `--stdin` for reliable execution. Shell escaping with nested quotes and special characters is error-prone.
```bash
# Base64 encode your script, then:
agent-browser eval -b "ZG9jdW1lbnQucXVlcnlTZWxlY3RvcignW3NyYyo9Il9uZXh0Il0nKQ=="
# Or use stdin with heredoc for multiline scripts:
cat <<'EOF' | agent-browser eval --stdin
const links = document.querySelectorAll('a');
Array.from(links).map(a => a.href);
EOF
```
## State Management
```bash
agent-browser state save auth.json # Save cookies, storage, auth state
agent-browser state load auth.json # Restore saved state
```
## Global Options
```bash
agent-browser --session <name> ... # Isolated browser session
agent-browser --json ... # JSON output for parsing
agent-browser --headed ... # Show browser window (not headless)
agent-browser --full ... # Full page screenshot (-f)
agent-browser --cdp <port> ... # Connect via Chrome DevTools Protocol
agent-browser -p <provider> ... # Cloud browser provider (--provider)
agent-browser --proxy <url> ... # Use proxy server
agent-browser --proxy-bypass <hosts> # Hosts to bypass proxy
agent-browser --headers <json> ... # HTTP headers scoped to URL's origin
agent-browser --executable-path <p> # Custom browser executable
agent-browser --extension <path> ... # Load browser extension (repeatable)
agent-browser --ignore-https-errors # Ignore SSL certificate errors
agent-browser --help # Show help (-h)
agent-browser --version # Show version (-V)
agent-browser <command> --help # Show detailed help for a command
```
## Debugging
```bash
agent-browser --headed open example.com # Show browser window
agent-browser --cdp 9222 snapshot # Connect via CDP port
agent-browser connect 9222 # Alternative: connect command
agent-browser console # View console messages
agent-browser console --clear # Clear console
agent-browser errors # View page errors
agent-browser errors --clear # Clear errors
agent-browser highlight @e1 # Highlight element
agent-browser trace start # Start recording trace
agent-browser trace stop trace.zip # Stop and save trace
agent-browser profiler start # Start Chrome DevTools profiling
agent-browser profiler stop trace.json # Stop and save profile
```
## Environment Variables
```bash
AGENT_BROWSER_SESSION="mysession" # Default session name
AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path
AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths
AGENT_BROWSER_PROVIDER="browserbase" # Cloud browser provider
AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port
AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location
```

View file

@ -1,120 +0,0 @@
# Profiling
Capture Chrome DevTools performance profiles during browser automation for performance analysis.
**Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start.
## Contents
- [Basic Profiling](#basic-profiling)
- [Profiler Commands](#profiler-commands)
- [Categories](#categories)
- [Use Cases](#use-cases)
- [Output Format](#output-format)
- [Viewing Profiles](#viewing-profiles)
- [Limitations](#limitations)
## Basic Profiling
```bash
# Start profiling
agent-browser profiler start
# Perform actions
agent-browser navigate https://example.com
agent-browser click "#button"
agent-browser wait 1000
# Stop and save
agent-browser profiler stop ./trace.json
```
## Profiler Commands
```bash
# Start profiling with default categories
agent-browser profiler start
# Start with custom trace categories
agent-browser profiler start --categories "devtools.timeline,v8.execute,blink.user_timing"
# Stop profiling and save to file
agent-browser profiler stop ./trace.json
```
## Categories
The `--categories` flag accepts a comma-separated list of Chrome trace categories. Default categories include:
- `devtools.timeline` -- standard DevTools performance traces
- `v8.execute` -- time spent running JavaScript
- `blink` -- renderer events
- `blink.user_timing` -- `performance.mark()` / `performance.measure()` calls
- `latencyInfo` -- input-to-latency tracking
- `renderer.scheduler` -- task scheduling and execution
- `toplevel` -- broad-spectrum basic events
Several `disabled-by-default-*` categories are also included for detailed timeline, call stack, and V8 CPU profiling data.
## Use Cases
### Diagnosing Slow Page Loads
```bash
agent-browser profiler start
agent-browser navigate https://app.example.com
agent-browser wait --load networkidle
agent-browser profiler stop ./page-load-profile.json
```
### Profiling User Interactions
```bash
agent-browser navigate https://app.example.com
agent-browser profiler start
agent-browser click "#submit"
agent-browser wait 2000
agent-browser profiler stop ./interaction-profile.json
```
### CI Performance Regression Checks
```bash
#!/bin/bash
agent-browser profiler start
agent-browser navigate https://app.example.com
agent-browser wait --load networkidle
agent-browser profiler stop "./profiles/build-${BUILD_ID}.json"
```
## Output Format
The output is a JSON file in Chrome Trace Event format:
```json
{
"traceEvents": [
{ "cat": "devtools.timeline", "name": "RunTask", "ph": "X", "ts": 12345, "dur": 100, ... },
...
],
"metadata": {
"clock-domain": "LINUX_CLOCK_MONOTONIC"
}
}
```
The `metadata.clock-domain` field is set based on the host platform (Linux or macOS). On Windows it is omitted.
## Viewing Profiles
Load the output JSON file in any of these tools:
- **Chrome DevTools**: Performance panel > Load profile (Ctrl+Shift+I > Performance)
- **Perfetto UI**: https://ui.perfetto.dev/ -- drag and drop the JSON file
- **Trace Viewer**: `chrome://tracing` in any Chromium browser
## Limitations
- Only works with Chromium-based browsers (Chrome, Edge). Not supported on Firefox or WebKit.
- Trace data accumulates in memory while profiling is active (capped at 5 million events). Stop profiling promptly after the area of interest.
- Data collection on stop has a 30-second timeout. If the browser is unresponsive, the stop command may fail.

View file

@ -1,194 +0,0 @@
# Proxy Support
Proxy configuration for geo-testing, rate limiting avoidance, and corporate environments.
**Related**: [commands.md](commands.md) for global options, [SKILL.md](../SKILL.md) for quick start.
## Contents
- [Basic Proxy Configuration](#basic-proxy-configuration)
- [Authenticated Proxy](#authenticated-proxy)
- [SOCKS Proxy](#socks-proxy)
- [Proxy Bypass](#proxy-bypass)
- [Common Use Cases](#common-use-cases)
- [Verifying Proxy Connection](#verifying-proxy-connection)
- [Troubleshooting](#troubleshooting)
- [Best Practices](#best-practices)
## Basic Proxy Configuration
Use the `--proxy` flag or set proxy via environment variable:
```bash
# Via CLI flag
agent-browser --proxy "http://proxy.example.com:8080" open https://example.com
# Via environment variable
export HTTP_PROXY="http://proxy.example.com:8080"
agent-browser open https://example.com
# HTTPS proxy
export HTTPS_PROXY="https://proxy.example.com:8080"
agent-browser open https://example.com
# Both
export HTTP_PROXY="http://proxy.example.com:8080"
export HTTPS_PROXY="http://proxy.example.com:8080"
agent-browser open https://example.com
```
## Authenticated Proxy
For proxies requiring authentication:
```bash
# Include credentials in URL
export HTTP_PROXY="http://username:password@proxy.example.com:8080"
agent-browser open https://example.com
```
## SOCKS Proxy
```bash
# SOCKS5 proxy
export ALL_PROXY="socks5://proxy.example.com:1080"
agent-browser open https://example.com
# SOCKS5 with auth
export ALL_PROXY="socks5://user:pass@proxy.example.com:1080"
agent-browser open https://example.com
```
## Proxy Bypass
Skip proxy for specific domains using `--proxy-bypass` or `NO_PROXY`:
```bash
# Via CLI flag
agent-browser --proxy "http://proxy.example.com:8080" --proxy-bypass "localhost,*.internal.com" open https://example.com
# Via environment variable
export NO_PROXY="localhost,127.0.0.1,.internal.company.com"
agent-browser open https://internal.company.com # Direct connection
agent-browser open https://external.com # Via proxy
```
## Common Use Cases
### Geo-Location Testing
```bash
#!/bin/bash
# Test site from different regions using geo-located proxies
PROXIES=(
"http://us-proxy.example.com:8080"
"http://eu-proxy.example.com:8080"
"http://asia-proxy.example.com:8080"
)
for proxy in "${PROXIES[@]}"; do
export HTTP_PROXY="$proxy"
export HTTPS_PROXY="$proxy"
region=$(echo "$proxy" | grep -oP '^\w+-\w+')
echo "Testing from: $region"
agent-browser --session "$region" open https://example.com
agent-browser --session "$region" screenshot "./screenshots/$region.png"
agent-browser --session "$region" close
done
```
### Rotating Proxies for Scraping
```bash
#!/bin/bash
# Rotate through proxy list to avoid rate limiting
PROXY_LIST=(
"http://proxy1.example.com:8080"
"http://proxy2.example.com:8080"
"http://proxy3.example.com:8080"
)
URLS=(
"https://site.com/page1"
"https://site.com/page2"
"https://site.com/page3"
)
for i in "${!URLS[@]}"; do
proxy_index=$((i % ${#PROXY_LIST[@]}))
export HTTP_PROXY="${PROXY_LIST[$proxy_index]}"
export HTTPS_PROXY="${PROXY_LIST[$proxy_index]}"
agent-browser open "${URLS[$i]}"
agent-browser get text body > "output-$i.txt"
agent-browser close
sleep 1 # Polite delay
done
```
### Corporate Network Access
```bash
#!/bin/bash
# Access internal sites via corporate proxy
export HTTP_PROXY="http://corpproxy.company.com:8080"
export HTTPS_PROXY="http://corpproxy.company.com:8080"
export NO_PROXY="localhost,127.0.0.1,.company.com"
# External sites go through proxy
agent-browser open https://external-vendor.com
# Internal sites bypass proxy
agent-browser open https://intranet.company.com
```
## Verifying Proxy Connection
```bash
# Check your apparent IP
agent-browser open https://httpbin.org/ip
agent-browser get text body
# Should show proxy's IP, not your real IP
```
## Troubleshooting
### Proxy Connection Failed
```bash
# Test proxy connectivity first
curl -x http://proxy.example.com:8080 https://httpbin.org/ip
# Check if proxy requires auth
export HTTP_PROXY="http://user:pass@proxy.example.com:8080"
```
### SSL/TLS Errors Through Proxy
Some proxies perform SSL inspection. If you encounter certificate errors:
```bash
# For testing only - not recommended for production
agent-browser open https://example.com --ignore-https-errors
```
### Slow Performance
```bash
# Use proxy only when necessary
export NO_PROXY="*.cdn.com,*.static.com" # Direct CDN access
```
## Best Practices
1. **Use environment variables** - Don't hardcode proxy credentials
2. **Set NO_PROXY appropriately** - Avoid routing local traffic through proxy
3. **Test proxy before automation** - Verify connectivity with simple requests
4. **Handle proxy failures gracefully** - Implement retry logic for unstable proxies
5. **Rotate proxies for large scraping jobs** - Distribute load and avoid bans

View file

@ -1,193 +0,0 @@
# Session Management
Multiple isolated browser sessions with state persistence and concurrent browsing.
**Related**: [authentication.md](authentication.md) for login patterns, [SKILL.md](../SKILL.md) for quick start.
## Contents
- [Named Sessions](#named-sessions)
- [Session Isolation Properties](#session-isolation-properties)
- [Session State Persistence](#session-state-persistence)
- [Common Patterns](#common-patterns)
- [Default Session](#default-session)
- [Session Cleanup](#session-cleanup)
- [Best Practices](#best-practices)
## Named Sessions
Use `--session` flag to isolate browser contexts:
```bash
# Session 1: Authentication flow
agent-browser --session auth open https://app.example.com/login
# Session 2: Public browsing (separate cookies, storage)
agent-browser --session public open https://example.com
# Commands are isolated by session
agent-browser --session auth fill @e1 "user@example.com"
agent-browser --session public get text body
```
## Session Isolation Properties
Each session has independent:
- Cookies
- LocalStorage / SessionStorage
- IndexedDB
- Cache
- Browsing history
- Open tabs
## Session State Persistence
### Save Session State
```bash
# Save cookies, storage, and auth state
agent-browser state save /path/to/auth-state.json
```
### Load Session State
```bash
# Restore saved state
agent-browser state load /path/to/auth-state.json
# Continue with authenticated session
agent-browser open https://app.example.com/dashboard
```
### State File Contents
```json
{
"cookies": [...],
"localStorage": {...},
"sessionStorage": {...},
"origins": [...]
}
```
## Common Patterns
### Authenticated Session Reuse
```bash
#!/bin/bash
# Save login state once, reuse many times
STATE_FILE="/tmp/auth-state.json"
# Check if we have saved state
if [[ -f "$STATE_FILE" ]]; then
agent-browser state load "$STATE_FILE"
agent-browser open https://app.example.com/dashboard
else
# Perform login
agent-browser open https://app.example.com/login
agent-browser snapshot -i
agent-browser fill @e1 "$USERNAME"
agent-browser fill @e2 "$PASSWORD"
agent-browser click @e3
agent-browser wait --load networkidle
# Save for future use
agent-browser state save "$STATE_FILE"
fi
```
### Concurrent Scraping
```bash
#!/bin/bash
# Scrape multiple sites concurrently
# Start all sessions
agent-browser --session site1 open https://site1.com &
agent-browser --session site2 open https://site2.com &
agent-browser --session site3 open https://site3.com &
wait
# Extract from each
agent-browser --session site1 get text body > site1.txt
agent-browser --session site2 get text body > site2.txt
agent-browser --session site3 get text body > site3.txt
# Cleanup
agent-browser --session site1 close
agent-browser --session site2 close
agent-browser --session site3 close
```
### A/B Testing Sessions
```bash
# Test different user experiences
agent-browser --session variant-a open "https://app.com?variant=a"
agent-browser --session variant-b open "https://app.com?variant=b"
# Compare
agent-browser --session variant-a screenshot /tmp/variant-a.png
agent-browser --session variant-b screenshot /tmp/variant-b.png
```
## Default Session
When `--session` is omitted, commands use the default session:
```bash
# These use the same default session
agent-browser open https://example.com
agent-browser snapshot -i
agent-browser close # Closes default session
```
## Session Cleanup
```bash
# Close specific session
agent-browser --session auth close
# List active sessions
agent-browser session list
```
## Best Practices
### 1. Name Sessions Semantically
```bash
# GOOD: Clear purpose
agent-browser --session github-auth open https://github.com
agent-browser --session docs-scrape open https://docs.example.com
# AVOID: Generic names
agent-browser --session s1 open https://github.com
```
### 2. Always Clean Up
```bash
# Close sessions when done
agent-browser --session auth close
agent-browser --session scrape close
```
### 3. Handle State Files Securely
```bash
# Don't commit state files (contain auth tokens!)
echo "*.auth-state.json" >> .gitignore
# Delete after use
rm /tmp/auth-state.json
```
### 4. Timeout Long Sessions
```bash
# Set timeout for automated scripts
timeout 60 agent-browser --session long-task get text body
```

View file

@ -1,194 +0,0 @@
# Snapshot and Refs
Compact element references that reduce context usage dramatically for AI agents.
**Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start.
## Contents
- [How Refs Work](#how-refs-work)
- [Snapshot Command](#the-snapshot-command)
- [Using Refs](#using-refs)
- [Ref Lifecycle](#ref-lifecycle)
- [Best Practices](#best-practices)
- [Ref Notation Details](#ref-notation-details)
- [Troubleshooting](#troubleshooting)
## How Refs Work
Traditional approach:
```
Full DOM/HTML → AI parses → CSS selector → Action (~3000-5000 tokens)
```
agent-browser approach:
```
Compact snapshot → @refs assigned → Direct interaction (~200-400 tokens)
```
## The Snapshot Command
```bash
# Basic snapshot (shows page structure)
agent-browser snapshot
# Interactive snapshot (-i flag) - RECOMMENDED
agent-browser snapshot -i
```
### Snapshot Output Format
```
Page: Example Site - Home
URL: https://example.com
@e1 [header]
@e2 [nav]
@e3 [a] "Home"
@e4 [a] "Products"
@e5 [a] "About"
@e6 [button] "Sign In"
@e7 [main]
@e8 [h1] "Welcome"
@e9 [form]
@e10 [input type="email"] placeholder="Email"
@e11 [input type="password"] placeholder="Password"
@e12 [button type="submit"] "Log In"
@e13 [footer]
@e14 [a] "Privacy Policy"
```
## Using Refs
Once you have refs, interact directly:
```bash
# Click the "Sign In" button
agent-browser click @e6
# Fill email input
agent-browser fill @e10 "user@example.com"
# Fill password
agent-browser fill @e11 "password123"
# Submit the form
agent-browser click @e12
```
## Ref Lifecycle
**IMPORTANT**: Refs are invalidated when the page changes!
```bash
# Get initial snapshot
agent-browser snapshot -i
# @e1 [button] "Next"
# Click triggers page change
agent-browser click @e1
# MUST re-snapshot to get new refs!
agent-browser snapshot -i
# @e1 [h1] "Page 2" ← Different element now!
```
## Best Practices
### 1. Always Snapshot Before Interacting
```bash
# CORRECT
agent-browser open https://example.com
agent-browser snapshot -i # Get refs first
agent-browser click @e1 # Use ref
# WRONG
agent-browser open https://example.com
agent-browser click @e1 # Ref doesn't exist yet!
```
### 2. Re-Snapshot After Navigation
```bash
agent-browser click @e5 # Navigates to new page
agent-browser snapshot -i # Get new refs
agent-browser click @e1 # Use new refs
```
### 3. Re-Snapshot After Dynamic Changes
```bash
agent-browser click @e1 # Opens dropdown
agent-browser snapshot -i # See dropdown items
agent-browser click @e7 # Select item
```
### 4. Snapshot Specific Regions
For complex pages, snapshot specific areas:
```bash
# Snapshot just the form
agent-browser snapshot @e9
```
## Ref Notation Details
```
@e1 [tag type="value"] "text content" placeholder="hint"
│ │ │ │ │
│ │ │ │ └─ Additional attributes
│ │ │ └─ Visible text
│ │ └─ Key attributes shown
│ └─ HTML tag name
└─ Unique ref ID
```
### Common Patterns
```
@e1 [button] "Submit" # Button with text
@e2 [input type="email"] # Email input
@e3 [input type="password"] # Password input
@e4 [a href="/page"] "Link Text" # Anchor link
@e5 [select] # Dropdown
@e6 [textarea] placeholder="Message" # Text area
@e7 [div class="modal"] # Container (when relevant)
@e8 [img alt="Logo"] # Image
@e9 [checkbox] checked # Checked checkbox
@e10 [radio] selected # Selected radio
```
## Troubleshooting
### "Ref not found" Error
```bash
# Ref may have changed - re-snapshot
agent-browser snapshot -i
```
### Element Not Visible in Snapshot
```bash
# Scroll down to reveal element
agent-browser scroll down 1000
agent-browser snapshot -i
# Or wait for dynamic content
agent-browser wait 1000
agent-browser snapshot -i
```
### Too Many Elements
```bash
# Snapshot specific container
agent-browser snapshot @e5
# Or use get text for content-only extraction
agent-browser get text @e5
```

View file

@ -1,173 +0,0 @@
# Video Recording
Capture browser automation as video for debugging, documentation, or verification.
**Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start.
## Contents
- [Basic Recording](#basic-recording)
- [Recording Commands](#recording-commands)
- [Use Cases](#use-cases)
- [Best Practices](#best-practices)
- [Output Format](#output-format)
- [Limitations](#limitations)
## Basic Recording
```bash
# Start recording
agent-browser record start ./demo.webm
# Perform actions
agent-browser open https://example.com
agent-browser snapshot -i
agent-browser click @e1
agent-browser fill @e2 "test input"
# Stop and save
agent-browser record stop
```
## Recording Commands
```bash
# Start recording to file
agent-browser record start ./output.webm
# Stop current recording
agent-browser record stop
# Restart with new file (stops current + starts new)
agent-browser record restart ./take2.webm
```
## Use Cases
### Debugging Failed Automation
```bash
#!/bin/bash
# Record automation for debugging
agent-browser record start ./debug-$(date +%Y%m%d-%H%M%S).webm
# Run your automation
agent-browser open https://app.example.com
agent-browser snapshot -i
agent-browser click @e1 || {
echo "Click failed - check recording"
agent-browser record stop
exit 1
}
agent-browser record stop
```
### Documentation Generation
```bash
#!/bin/bash
# Record workflow for documentation
agent-browser record start ./docs/how-to-login.webm
agent-browser open https://app.example.com/login
agent-browser wait 1000 # Pause for visibility
agent-browser snapshot -i
agent-browser fill @e1 "demo@example.com"
agent-browser wait 500
agent-browser fill @e2 "password"
agent-browser wait 500
agent-browser click @e3
agent-browser wait --load networkidle
agent-browser wait 1000 # Show result
agent-browser record stop
```
### CI/CD Test Evidence
```bash
#!/bin/bash
# Record E2E test runs for CI artifacts
TEST_NAME="${1:-e2e-test}"
RECORDING_DIR="./test-recordings"
mkdir -p "$RECORDING_DIR"
agent-browser record start "$RECORDING_DIR/$TEST_NAME-$(date +%s).webm"
# Run test
if run_e2e_test; then
echo "Test passed"
else
echo "Test failed - recording saved"
fi
agent-browser record stop
```
## Best Practices
### 1. Add Pauses for Clarity
```bash
# Slow down for human viewing
agent-browser click @e1
agent-browser wait 500 # Let viewer see result
```
### 2. Use Descriptive Filenames
```bash
# Include context in filename
agent-browser record start ./recordings/login-flow-2024-01-15.webm
agent-browser record start ./recordings/checkout-test-run-42.webm
```
### 3. Handle Recording in Error Cases
```bash
#!/bin/bash
set -e
cleanup() {
agent-browser record stop 2>/dev/null || true
agent-browser close 2>/dev/null || true
}
trap cleanup EXIT
agent-browser record start ./automation.webm
# ... automation steps ...
```
### 4. Combine with Screenshots
```bash
# Record video AND capture key frames
agent-browser record start ./flow.webm
agent-browser open https://example.com
agent-browser screenshot ./screenshots/step1-homepage.png
agent-browser click @e1
agent-browser screenshot ./screenshots/step2-after-click.png
agent-browser record stop
```
## Output Format
- Default format: WebM (VP8/VP9 codec)
- Compatible with all modern browsers and video players
- Compressed but high quality
## Limitations
- Recording adds slight overhead to automation
- Large recordings can consume significant disk space
- Some headless environments may have codec limitations

View file

@ -1,105 +0,0 @@
#!/bin/bash
# Template: Authenticated Session Workflow
# Purpose: Login once, save state, reuse for subsequent runs
# Usage: ./authenticated-session.sh <login-url> [state-file]
#
# RECOMMENDED: Use the auth vault instead of this template:
# echo "<pass>" | agent-browser auth save myapp --url <login-url> --username <user> --password-stdin
# agent-browser auth login myapp
# The auth vault stores credentials securely and the LLM never sees passwords.
#
# Environment variables:
# APP_USERNAME - Login username/email
# APP_PASSWORD - Login password
#
# Two modes:
# 1. Discovery mode (default): Shows form structure so you can identify refs
# 2. Login mode: Performs actual login after you update the refs
#
# Setup steps:
# 1. Run once to see form structure (discovery mode)
# 2. Update refs in LOGIN FLOW section below
# 3. Set APP_USERNAME and APP_PASSWORD
# 4. Delete the DISCOVERY section
set -euo pipefail
LOGIN_URL="${1:?Usage: $0 <login-url> [state-file]}"
STATE_FILE="${2:-./auth-state.json}"
echo "Authentication workflow: $LOGIN_URL"
# ================================================================
# SAVED STATE: Skip login if valid saved state exists
# ================================================================
if [[ -f "$STATE_FILE" ]]; then
echo "Loading saved state from $STATE_FILE..."
if agent-browser --state "$STATE_FILE" open "$LOGIN_URL" 2>/dev/null; then
agent-browser wait --load networkidle
CURRENT_URL=$(agent-browser get url)
if [[ "$CURRENT_URL" != *"login"* ]] && [[ "$CURRENT_URL" != *"signin"* ]]; then
echo "Session restored successfully"
agent-browser snapshot -i
exit 0
fi
echo "Session expired, performing fresh login..."
agent-browser close 2>/dev/null || true
else
echo "Failed to load state, re-authenticating..."
fi
rm -f "$STATE_FILE"
fi
# ================================================================
# DISCOVERY MODE: Shows form structure (delete after setup)
# ================================================================
echo "Opening login page..."
agent-browser open "$LOGIN_URL"
agent-browser wait --load networkidle
echo ""
echo "Login form structure:"
echo "---"
agent-browser snapshot -i
echo "---"
echo ""
echo "Next steps:"
echo " 1. Note the refs: username=@e?, password=@e?, submit=@e?"
echo " 2. Update the LOGIN FLOW section below with your refs"
echo " 3. Set: export APP_USERNAME='...' APP_PASSWORD='...'"
echo " 4. Delete this DISCOVERY MODE section"
echo ""
agent-browser close
exit 0
# ================================================================
# LOGIN FLOW: Uncomment and customize after discovery
# ================================================================
# : "${APP_USERNAME:?Set APP_USERNAME environment variable}"
# : "${APP_PASSWORD:?Set APP_PASSWORD environment variable}"
#
# agent-browser open "$LOGIN_URL"
# agent-browser wait --load networkidle
# agent-browser snapshot -i
#
# # Fill credentials (update refs to match your form)
# agent-browser fill @e1 "$APP_USERNAME"
# agent-browser fill @e2 "$APP_PASSWORD"
# agent-browser click @e3
# agent-browser wait --load networkidle
#
# # Verify login succeeded
# FINAL_URL=$(agent-browser get url)
# if [[ "$FINAL_URL" == *"login"* ]] || [[ "$FINAL_URL" == *"signin"* ]]; then
# echo "Login failed - still on login page"
# agent-browser screenshot /tmp/login-failed.png
# agent-browser close
# exit 1
# fi
#
# # Save state for future runs
# echo "Saving state to $STATE_FILE"
# agent-browser state save "$STATE_FILE"
# echo "Login successful"
# agent-browser snapshot -i

View file

@ -1,69 +0,0 @@
#!/bin/bash
# Template: Content Capture Workflow
# Purpose: Extract content from web pages (text, screenshots, PDF)
# Usage: ./capture-workflow.sh <url> [output-dir]
#
# Outputs:
# - page-full.png: Full page screenshot
# - page-structure.txt: Page element structure with refs
# - page-text.txt: All text content
# - page.pdf: PDF version
#
# Optional: Load auth state for protected pages
set -euo pipefail
TARGET_URL="${1:?Usage: $0 <url> [output-dir]}"
OUTPUT_DIR="${2:-.}"
echo "Capturing: $TARGET_URL"
mkdir -p "$OUTPUT_DIR"
# Optional: Load authentication state
# if [[ -f "./auth-state.json" ]]; then
# echo "Loading authentication state..."
# agent-browser state load "./auth-state.json"
# fi
# Navigate to target
agent-browser open "$TARGET_URL"
agent-browser wait --load networkidle
# Get metadata
TITLE=$(agent-browser get title)
URL=$(agent-browser get url)
echo "Title: $TITLE"
echo "URL: $URL"
# Capture full page screenshot
agent-browser screenshot --full "$OUTPUT_DIR/page-full.png"
echo "Saved: $OUTPUT_DIR/page-full.png"
# Get page structure with refs
agent-browser snapshot -i > "$OUTPUT_DIR/page-structure.txt"
echo "Saved: $OUTPUT_DIR/page-structure.txt"
# Extract all text content
agent-browser get text body > "$OUTPUT_DIR/page-text.txt"
echo "Saved: $OUTPUT_DIR/page-text.txt"
# Save as PDF
agent-browser pdf "$OUTPUT_DIR/page.pdf"
echo "Saved: $OUTPUT_DIR/page.pdf"
# Optional: Extract specific elements using refs from structure
# agent-browser get text @e5 > "$OUTPUT_DIR/main-content.txt"
# Optional: Handle infinite scroll pages
# for i in {1..5}; do
# agent-browser scroll down 1000
# agent-browser wait 1000
# done
# agent-browser screenshot --full "$OUTPUT_DIR/page-scrolled.png"
# Cleanup
agent-browser close
echo ""
echo "Capture complete:"
ls -la "$OUTPUT_DIR"

View file

@ -1,62 +0,0 @@
#!/bin/bash
# Template: Form Automation Workflow
# Purpose: Fill and submit web forms with validation
# Usage: ./form-automation.sh <form-url>
#
# This template demonstrates the snapshot-interact-verify pattern:
# 1. Navigate to form
# 2. Snapshot to get element refs
# 3. Fill fields using refs
# 4. Submit and verify result
#
# Customize: Update the refs (@e1, @e2, etc.) based on your form's snapshot output
set -euo pipefail
FORM_URL="${1:?Usage: $0 <form-url>}"
echo "Form automation: $FORM_URL"
# Step 1: Navigate to form
agent-browser open "$FORM_URL"
agent-browser wait --load networkidle
# Step 2: Snapshot to discover form elements
echo ""
echo "Form structure:"
agent-browser snapshot -i
# Step 3: Fill form fields (customize these refs based on snapshot output)
#
# Common field types:
# agent-browser fill @e1 "John Doe" # Text input
# agent-browser fill @e2 "user@example.com" # Email input
# agent-browser fill @e3 "SecureP@ss123" # Password input
# agent-browser select @e4 "Option Value" # Dropdown
# agent-browser check @e5 # Checkbox
# agent-browser click @e6 # Radio button
# agent-browser fill @e7 "Multi-line text" # Textarea
# agent-browser upload @e8 /path/to/file.pdf # File upload
#
# Uncomment and modify:
# agent-browser fill @e1 "Test User"
# agent-browser fill @e2 "test@example.com"
# agent-browser click @e3 # Submit button
# Step 4: Wait for submission
# agent-browser wait --load networkidle
# agent-browser wait --url "**/success" # Or wait for redirect
# Step 5: Verify result
echo ""
echo "Result:"
agent-browser get url
agent-browser snapshot -i
# Optional: Capture evidence
agent-browser screenshot /tmp/form-result.png
echo "Screenshot saved: /tmp/form-result.png"
# Cleanup
agent-browser close
echo "Done"

View file

@ -1,583 +0,0 @@
---
name: best-practices
description: Apply modern web development best practices for security, compatibility, and code quality. Use when asked to "apply best practices", "security audit", "modernize code", "code quality review", or "check for vulnerabilities".
license: MIT
metadata:
author: web-quality-skills
version: "1.0"
---
# Best practices
Modern web development standards based on Lighthouse best practices audits. Covers security, browser compatibility, and code quality patterns.
## Security
### HTTPS everywhere
**Enforce HTTPS:**
```html
<!-- ❌ Mixed content -->
<img src="http://example.com/image.jpg">
<script src="http://cdn.example.com/script.js"></script>
<!-- ✅ HTTPS only -->
<img src="https://example.com/image.jpg">
<script src="https://cdn.example.com/script.js"></script>
<!-- ✅ Protocol-relative (will use page's protocol) -->
<img src="//example.com/image.jpg">
```
**HSTS Header:**
```
Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
```
### Content Security Policy (CSP)
```html
<!-- Basic CSP via meta tag -->
<meta http-equiv="Content-Security-Policy"
content="default-src 'self';
script-src 'self' https://trusted-cdn.com;
style-src 'self' 'unsafe-inline';
img-src 'self' data: https:;
connect-src 'self' https://api.example.com;">
<!-- Better: HTTP header -->
```
**CSP Header (recommended):**
```
Content-Security-Policy:
default-src 'self';
script-src 'self' 'nonce-abc123' https://trusted.com;
style-src 'self' 'nonce-abc123';
img-src 'self' data: https:;
connect-src 'self' https://api.example.com;
frame-ancestors 'self';
base-uri 'self';
form-action 'self';
```
**Using nonces for inline scripts:**
```html
<script nonce="abc123">
// This inline script is allowed
</script>
```
### Security headers
```
# Prevent clickjacking
X-Frame-Options: DENY
# Prevent MIME type sniffing
X-Content-Type-Options: nosniff
# Enable XSS filter (legacy browsers)
X-XSS-Protection: 1; mode=block
# Control referrer information
Referrer-Policy: strict-origin-when-cross-origin
# Permissions policy (formerly Feature-Policy)
Permissions-Policy: geolocation=(), microphone=(), camera=()
```
### No vulnerable libraries
```bash
# Check for vulnerabilities
npm audit
yarn audit
# Auto-fix when possible
npm audit fix
# Check specific package
npm ls lodash
```
**Keep dependencies updated:**
```json
// package.json
{
"scripts": {
"audit": "npm audit --audit-level=moderate",
"update": "npm update && npm audit fix"
}
}
```
**Known vulnerable patterns to avoid:**
```javascript
// ❌ Prototype pollution vulnerable patterns
Object.assign(target, userInput);
_.merge(target, userInput);
// ✅ Safer alternatives
const safeData = JSON.parse(JSON.stringify(userInput));
```
### Input sanitization
```javascript
// ❌ XSS vulnerable
element.innerHTML = userInput;
document.write(userInput);
// ✅ Safe text content
element.textContent = userInput;
// ✅ If HTML needed, sanitize
import DOMPurify from 'dompurify';
element.innerHTML = DOMPurify.sanitize(userInput);
```
### Secure cookies
```javascript
// ❌ Insecure cookie
document.cookie = "session=abc123";
// ✅ Secure cookie (server-side)
Set-Cookie: session=abc123; Secure; HttpOnly; SameSite=Strict; Path=/
```
---
## Browser compatibility
### Doctype declaration
```html
<!-- ❌ Missing or invalid doctype -->
<HTML>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<!-- ✅ HTML5 doctype -->
<!DOCTYPE html>
<html lang="en">
```
### Character encoding
```html
<!-- ❌ Missing or late charset -->
<html>
<head>
<title>Page</title>
<meta charset="UTF-8">
</head>
<!-- ✅ Charset as first element in head -->
<html>
<head>
<meta charset="UTF-8">
<title>Page</title>
</head>
```
### Viewport meta tag
```html
<!-- ❌ Missing viewport -->
<head>
<title>Page</title>
</head>
<!-- ✅ Responsive viewport -->
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Page</title>
</head>
```
### Feature detection
```javascript
// ❌ Browser detection (brittle)
if (navigator.userAgent.includes('Chrome')) {
// Chrome-specific code
}
// ✅ Feature detection
if ('IntersectionObserver' in window) {
// Use IntersectionObserver
} else {
// Fallback
}
// ✅ Using @supports in CSS
@supports (display: grid) {
.container {
display: grid;
}
}
@supports not (display: grid) {
.container {
display: flex;
}
}
```
### Polyfills (when needed)
```html
<!-- Load polyfills conditionally -->
<script>
if (!('fetch' in window)) {
document.write('<script src="/polyfills/fetch.js"><\/script>');
}
</script>
<!-- Or use polyfill.io -->
<script src="https://polyfill.io/v3/polyfill.min.js?features=fetch,IntersectionObserver"></script>
```
---
## Deprecated APIs
### Avoid these
```javascript
// ❌ document.write (blocks parsing)
document.write('<script src="..."></script>');
// ✅ Dynamic script loading
const script = document.createElement('script');
script.src = '...';
document.head.appendChild(script);
// ❌ Synchronous XHR (blocks main thread)
const xhr = new XMLHttpRequest();
xhr.open('GET', url, false); // false = synchronous
// ✅ Async fetch
const response = await fetch(url);
// ❌ Application Cache (deprecated)
<html manifest="cache.manifest">
// ✅ Service Workers
if ('serviceWorker' in navigator) {
navigator.serviceWorker.register('/sw.js');
}
```
### Event listener passive
```javascript
// ❌ Non-passive touch/wheel (may block scrolling)
element.addEventListener('touchstart', handler);
element.addEventListener('wheel', handler);
// ✅ Passive listeners (allows smooth scrolling)
element.addEventListener('touchstart', handler, { passive: true });
element.addEventListener('wheel', handler, { passive: true });
// ✅ If you need preventDefault, be explicit
element.addEventListener('touchstart', handler, { passive: false });
```
---
## Console & errors
### No console errors
```javascript
// ❌ Errors in production
console.log('Debug info'); // Remove in production
throw new Error('Unhandled'); // Catch all errors
// ✅ Proper error handling
try {
riskyOperation();
} catch (error) {
// Log to error tracking service
errorTracker.captureException(error);
// Show user-friendly message
showErrorMessage('Something went wrong. Please try again.');
}
```
### Error boundaries (React)
```jsx
class ErrorBoundary extends React.Component {
state = { hasError: false };
static getDerivedStateFromError(error) {
return { hasError: true };
}
componentDidCatch(error, info) {
errorTracker.captureException(error, { extra: info });
}
render() {
if (this.state.hasError) {
return <FallbackUI />;
}
return this.props.children;
}
}
// Usage
<ErrorBoundary>
<App />
</ErrorBoundary>
```
### Global error handler
```javascript
// Catch unhandled errors
window.addEventListener('error', (event) => {
errorTracker.captureException(event.error);
});
// Catch unhandled promise rejections
window.addEventListener('unhandledrejection', (event) => {
errorTracker.captureException(event.reason);
});
```
---
## Source maps
### Production configuration
```javascript
// ❌ Source maps exposed in production
// webpack.config.js
module.exports = {
devtool: 'source-map', // Exposes source code
};
// ✅ Hidden source maps (uploaded to error tracker)
module.exports = {
devtool: 'hidden-source-map',
};
// ✅ Or no source maps in production
module.exports = {
devtool: process.env.NODE_ENV === 'production' ? false : 'source-map',
};
```
---
## Performance best practices
### Avoid blocking patterns
```javascript
// ❌ Blocking script
<script src="heavy-library.js"></script>
// ✅ Deferred script
<script defer src="heavy-library.js"></script>
// ❌ Blocking CSS import
@import url('other-styles.css');
// ✅ Link tags (parallel loading)
<link rel="stylesheet" href="styles.css">
<link rel="stylesheet" href="other-styles.css">
```
### Efficient event handlers
```javascript
// ❌ Handler on every element
items.forEach(item => {
item.addEventListener('click', handleClick);
});
// ✅ Event delegation
container.addEventListener('click', (e) => {
if (e.target.matches('.item')) {
handleClick(e);
}
});
```
### Memory management
```javascript
// ❌ Memory leak (never removed)
const handler = () => { /* ... */ };
window.addEventListener('resize', handler);
// ✅ Cleanup when done
const handler = () => { /* ... */ };
window.addEventListener('resize', handler);
// Later, when component unmounts:
window.removeEventListener('resize', handler);
// ✅ Using AbortController
const controller = new AbortController();
window.addEventListener('resize', handler, { signal: controller.signal });
// Cleanup:
controller.abort();
```
---
## Code quality
### Valid HTML
```html
<!-- ❌ Invalid HTML -->
<div id="header">
<div id="header"> <!-- Duplicate ID -->
<ul>
<div>Item</div> <!-- Invalid child -->
</ul>
<a href="/"><button>Click</button></a> <!-- Invalid nesting -->
<!-- ✅ Valid HTML -->
<header id="site-header">
</header>
<ul>
<li>Item</li>
</ul>
<a href="/" class="button">Click</a>
```
### Semantic HTML
```html
<!-- ❌ Non-semantic -->
<div class="header">
<div class="nav">
<div class="nav-item">Home</div>
</div>
</div>
<div class="main">
<div class="article">
<div class="title">Headline</div>
</div>
</div>
<!-- ✅ Semantic HTML5 -->
<header>
<nav>
<a href="/">Home</a>
</nav>
</header>
<main>
<article>
<h1>Headline</h1>
</article>
</main>
```
### Image aspect ratios
```html
<!-- ❌ Distorted images -->
<img src="photo.jpg" width="300" height="100">
<!-- If actual ratio is 4:3, this squishes the image -->
<!-- ✅ Preserve aspect ratio -->
<img src="photo.jpg" width="300" height="225">
<!-- Actual 4:3 dimensions -->
<!-- ✅ CSS object-fit for flexibility -->
<img src="photo.jpg" style="width: 300px; height: 200px; object-fit: cover;">
```
---
## Permissions & privacy
### Request permissions properly
```javascript
// ❌ Request on page load (bad UX, often denied)
navigator.geolocation.getCurrentPosition(success, error);
// ✅ Request in context, after user action
findNearbyButton.addEventListener('click', async () => {
// Explain why you need it
if (await showPermissionExplanation()) {
navigator.geolocation.getCurrentPosition(success, error);
}
});
```
### Permissions policy
```html
<!-- Restrict powerful features -->
<meta http-equiv="Permissions-Policy"
content="geolocation=(), camera=(), microphone=()">
<!-- Or allow for specific origins -->
<meta http-equiv="Permissions-Policy"
content="geolocation=(self 'https://maps.example.com')">
```
---
## Audit checklist
### Security (critical)
- [ ] HTTPS enabled, no mixed content
- [ ] No vulnerable dependencies (`npm audit`)
- [ ] CSP headers configured
- [ ] Security headers present
- [ ] No exposed source maps
### Compatibility
- [ ] Valid HTML5 doctype
- [ ] Charset declared first in head
- [ ] Viewport meta tag present
- [ ] No deprecated APIs used
- [ ] Passive event listeners for scroll/touch
### Code quality
- [ ] No console errors
- [ ] Valid HTML (no duplicate IDs)
- [ ] Semantic HTML elements used
- [ ] Proper error handling
- [ ] Memory cleanup in components
### UX
- [ ] No intrusive interstitials
- [ ] Permission requests in context
- [ ] Clear error messages
- [ ] Appropriate image aspect ratios
## Tools
| Tool | Purpose |
|------|---------|
| `npm audit` | Dependency vulnerabilities |
| [SecurityHeaders.com](https://securityheaders.com) | Header analysis |
| [W3C Validator](https://validator.w3.org) | HTML validation |
| Lighthouse | Best practices audit |
| [Observatory](https://observatory.mozilla.org) | Security scan |
## References
- [MDN Web Security](https://developer.mozilla.org/en-US/docs/Web/Security)
- [OWASP Top 10](https://owasp.org/www-project-top-ten/)
- [Web Quality Audit](../web-quality-audit/SKILL.md)

View file

@ -1,42 +0,0 @@
---
name: btw
description: Ask a quick side question about your current work without derailing the main task. Answers from existing conversation context only — no tool calls, no file reads, single concise response. Use when you need a fast answer from what is already in this session.
---
<objective>
Answer a quick side question using only what is already present in the current conversation context. Do not read files, run commands, search, or use any tools. Give a single, concise response and return focus to the main work.
</objective>
<behavior>
**This is a side question, not a task.**
- Answer only from information already in the conversation (files read, decisions made, code seen, context established)
- Do NOT use any tools — no Read, no Bash, no Grep, no Search
- If the answer requires reading something new, say so briefly and suggest the user ask as a normal prompt instead
- Keep the response short and direct — one to a few sentences unless the question genuinely needs more
- Do not summarize the main work, ask follow-up questions, or offer to do anything else
- After answering, stop — do not prompt for next steps
</behavior>
<quick_start>
Parse the argument after `/btw` as the question. Answer it directly from context.
If no argument is provided, ask: "What did you want to know?"
If the question cannot be answered from current context (requires reading a file, running a command, or information not yet in the session), respond with:
"I'd need to [read X / run Y / look up Z] to answer that — ask it as a normal prompt when you're ready."
</quick_start>
<examples>
**Good uses of /btw:**
- `/btw what was the name of that config file again?` → answers from files already read in session
- `/btw which branch are we on?` → answers from git context already established
- `/btw did we already handle the null case in that function?` → answers from code already reviewed
- `/btw what model does this use?` → answers from code or config already in context
**Not a good fit for /btw (suggest normal prompt):**
- Questions requiring reading a file not yet seen
- Questions requiring running a command
- Questions needing a multi-step answer or follow-up
- Starting a new task or changing direction
</examples>

View file

@ -1,160 +0,0 @@
---
name: code-optimizer
description: >
Deep code optimization audit using parallel specialist agents. Each agent hunts for performance
anti-patterns, inefficiencies, and suboptimal code using pattern-based detection (Grep/Glob)
WITHOUT reading the full source code first — avoiding anchoring bias on existing implementations.
Covers ALL optimization domains: database queries, memory leaks, algorithmic complexity,
concurrency, bundle size, dead code, I/O & network, rendering/UI, data structures,
error handling, caching, build config, security-performance, logging, and infrastructure.
Use when asked to: "optimize my code", "find performance issues", "audit code quality",
"speed up my app", "find bottlenecks", "code review for performance", "find anti-patterns",
"improve code efficiency", "reduce latency", "optimize performance", "code smell detection",
"find slow code", "optimize this project", "performance audit", "code optimization".
Also triggers on: "optimizar codigo", "encontrar cuellos de botella", "mejorar rendimiento".
---
# Code Optimizer
Parallel multi-agent code optimization audit. Spawn 13 specialist agents simultaneously, each
hunting for a different class of performance problem using pattern-based detection.
## Critical Principle: No Code Reading Before Analysis
Agents MUST NOT read source files before searching for patterns. Reading the code first causes
anchoring bias — the agent accepts the existing implementation as "reasonable" and misses
better alternatives. Instead, each agent:
1. Read its assigned reference file from `references/` to load detection patterns
2. Use Grep/Glob to scan the codebase for anti-patterns
3. For each finding, ONLY THEN read the surrounding context (5-10 lines) to confirm the issue
4. Propose the optimal solution based on best practices, NOT based on the existing code
## Workflow
### Step 1: Detect Stack
Use Glob to identify the project's tech stack:
- `**/package.json` → Node.js/JS/TS (check for React, Next.js, Express, etc.)
- `**/requirements.txt`, `**/pyproject.toml`, `**/setup.py` → Python
- `**/go.mod` → Go
- `**/Cargo.toml` → Rust
- `**/pom.xml`, `**/build.gradle` → Java
- `**/Gemfile` → Ruby
- `**/Dockerfile` → Docker
- `**/*.sql` → SQL
- `**/webpack.config.*`, `**/vite.config.*`, `**/tsconfig.json` → Build tools
### Step 2: Spawn 13 Parallel Agents
Launch ALL agents simultaneously using the Agent tool. Each agent receives:
- Its domain name and reference file path
- The detected tech stack (so it can focus on relevant patterns)
- The project root path
- Instructions to NOT read code files, only Grep/Glob for patterns
**Agent definitions** (spawn all 13 in a single message):
| # | Agent Name | Reference File | Focus |
|---|-----------|----------------|-------|
| 1 | Database & Queries | `references/database-queries.md` | N+1 queries, SELECT *, missing indexes, ORM misuse, connection pooling |
| 2 | Memory & Resources | `references/memory-resources.md` | Memory leaks, unclosed resources, large allocations, string concat in loops |
| 3 | Algorithmic Complexity | `references/algorithmic-complexity.md` | O(n^2) patterns, unnecessary iterations, wrong data structures for lookups |
| 4 | Concurrency & Async | `references/concurrency-async.md` | Sequential awaits, blocking in async, race conditions, unbounded concurrency |
| 5 | Bundle & Dependencies | `references/bundle-dependencies.md` | Heavy imports, unused deps, duplicate libs, missing lazy loading |
| 6 | Dead Code & Redundancy | `references/dead-code-redundancy.md` | Unused exports, commented code, dead branches, duplicate logic |
| 7 | I/O & Network | `references/io-network.md` | Sequential requests, missing batching, no dedup, missing compression |
| 8 | Rendering & UI | `references/rendering-ui.md` | Re-renders, missing virtualization, layout thrashing, animation perf |
| 9 | Data Structures | `references/data-structures.md` | Wrong structures, unnecessary copies, inefficient serialization |
| 10 | Error & Resilience | `references/error-resilience.md` | Missing timeouts, swallowed errors, no retries, no circuit breakers |
| 11 | Caching & Memoization | `references/caching-memoization.md` | Missing memoization, cache without invalidation, redundant API calls |
| 12 | Build & Compilation | `references/build-compilation.md` | Dev code in prod, missing optimization flags, slow tests, Docker issues |
| 13 | Security-Performance | `references/security-performance.md` | Crypto misuse, missing rate limiting, ReDoS, SQL injection vectors |
**Optional agents** (spawn if relevant to detected stack):
- Logging & Observability (`references/logging-observability.md`) — if logging framework detected
- Config & Infrastructure (`references/config-infra.md`) — if Docker/deployment config detected
### Agent Prompt Template
Each agent MUST receive this prompt structure:
```
You are a {DOMAIN_NAME} optimization specialist. Your job is to find performance
anti-patterns in the codebase at {PROJECT_ROOT}.
CRITICAL RULES:
1. DO NOT read source code files before searching. This avoids anchoring bias.
2. First, read your reference file: {SKILL_DIR}/references/{REFERENCE_FILE}
3. Use Grep and Glob to search for the patterns described in the reference file.
4. Only read 5-10 lines of context around each finding to confirm it's a real issue.
5. Skip patterns that don't match the project's stack: {DETECTED_STACK}
Tech stack detected: {DETECTED_STACK}
Project root: {PROJECT_ROOT}
For each finding, report:
- **File**: path:line_number
- **Pattern**: what anti-pattern was detected
- **Severity**: CRITICAL / HIGH / MEDIUM / LOW
- **Current code**: the problematic snippet (keep short)
- **Why it's slow**: brief explanation of the performance impact
- **Optimal fix**: the recommended solution (code snippet or approach)
- **Estimated impact**: qualitative improvement expected (e.g., "10x faster for large lists")
If you find 0 issues in your domain, report "No issues found" — this is a valid outcome.
Sort findings by severity (CRITICAL first).
```
### Step 3: Consolidate Report
After all agents complete, consolidate their findings into a single prioritized report:
1. Collect all findings from all agents
2. Deduplicate (different agents may flag the same code for different reasons)
3. Sort by severity: CRITICAL > HIGH > MEDIUM > LOW
4. Group by file (so the user can fix file-by-file)
5. Present the final report with:
- Executive summary: total findings by severity, top 3 most impactful
- Detailed findings table grouped by file
- Improvement plan: ordered list of fixes from highest to lowest impact
### Report Format
```markdown
# Code Optimization Audit Report
## Executive Summary
- **X** critical issues, **Y** high, **Z** medium, **W** low
- Top 3 highest-impact fixes:
1. [brief description] — [estimated impact]
2. [brief description] — [estimated impact]
3. [brief description] — [estimated impact]
## Findings by File
### `path/to/file.ts`
| # | Severity | Domain | Pattern | Fix | Impact |
|---|----------|--------|---------|-----|--------|
| 1 | CRITICAL | Database | N+1 query in loop | Use prefetch_related | 50x fewer queries |
| 2 | HIGH | Async | Sequential awaits | Use Promise.all | 3x faster |
[... for each file with findings ...]
## Improvement Plan
Priority-ordered steps to implement the fixes:
1. **[CRITICAL] Fix N+1 queries in `api/users.py`**
- Current: loop queries user.posts for each user
- Fix: add prefetch_related('posts') to queryset
- Impact: reduces N+1 to 2 queries
2. **[HIGH] Parallelize API calls in `services/sync.ts`**
- Current: 5 sequential await fetch() calls
- Fix: Promise.all([fetch1, fetch2, ...])
- Impact: ~5x faster sync operation
[... continue for all findings ...]
```

View file

@ -1,66 +0,0 @@
# Algorithmic Complexity
## Grep/Glob Patterns to Detect
### O(n^2) and Worse Patterns
```
# Nested loops over same/related collections
for.*in.*\n.*for.*in (nested for loops)
\.forEach\(.*\.forEach\( (nested forEach)
\.map\(.*\.map\( (nested map)
\.filter\(.*\.includes\( (filter+includes = O(n*m))
\.find\(.*inside.*\.map\( (find inside map)
\.indexOf\(.*inside.*for (indexOf in loop)
\.includes\(.*inside.*for (includes in loop)
# Array as lookup table
array\.find\(.*=== (use Map/Set instead)
array\.some\(.*=== (use Set.has instead)
list\.index\( (Python: use dict instead)
if.*in\s+list (Python: O(n) lookup in list)
```
### Unnecessary Iterations
```
\.filter\(.*\.length (filter just to count)
\.filter\(.*\[0\] (filter just to get first - use find)
\.map\(.*\.filter\( (map then filter - combine or reverse order)
\.filter\(.*\.map\(.*\.filter (multiple passes when one suffices)
\.sort\(\).*\[0\] (sort to get min/max - use Math.min/max or reduce)
\.sort\(\).*\.slice\(0 (sort to get top-k - use partial sort/heap)
sorted\(.*\)\[0\] (Python: use min() instead)
sorted\(.*\)\[-1\] (Python: use max() instead)
\.reverse\(\).*\.forEach (reverse just to iterate backwards)
Object\.keys\(.*\.map\(.*Object\.values (iterating keys then accessing values)
```
### Redundant Computation
```
# Same computation in loop
for.*\n.*Math\. (math operations that could be hoisted)
for.*\n.*\.length (accessing .length repeatedly - may be fine, check)
for.*\n.*document\.querySelector (DOM queries in loops)
for.*\n.*JSON\.parse (parsing same JSON repeatedly)
for.*\n.*new RegExp\( (creating regex in loop)
for.*\n.*new Date\( (creating Date objects in loop for same date)
```
### Inefficient Data Structure Choice
```
# Using arrays where Set/Map would be better
\.push\(.*\.includes\( (array as unique set)
\.filter\(.*\.indexOf\( (dedup with filter+indexOf)
\[\].*\.find\( (array for lookups)
# Using objects where Map would be better
\{\}.*\[.*\]\s*= (frequent dynamic key insertion)
delete.*\[ (frequent key deletion from object)
```
## Improvement Strategies
1. **Nested loops**: Pre-build lookup Map/Set, use hash-based approaches
2. **Filter+includes**: Convert one collection to Set for O(1) lookups
3. **Sort for min/max**: Use Math.min/max, reduce, or heap for top-k
4. **Multiple passes**: Combine into single reduce/loop
5. **Redundant computation**: Hoist invariants out of loops, memoize
6. **Array as lookup**: Use Map for key-value, Set for existence checks
7. **String matching in loops**: Pre-compile regex, use Map for exact matches

View file

@ -1,90 +0,0 @@
# Build & Compilation Optimization
## Grep/Glob Patterns to Detect
### Unoptimized Build Config
```
# Webpack
mode:\s*['"]development['"] (dev mode in production build)
devtool:\s*['"]source-map['"] (full source maps in production)
devtool:\s*['"]eval (eval source maps in production)
# No code splitting
splitChunks.*false (code splitting disabled)
# No minification
minimize:\s*false (minification disabled)
# Missing tree shaking
sideEffects.*true (prevents tree shaking)
```
### Development-Only Code in Production
```
console\.log\( (debug logging)
console\.debug\( (debug logging)
console\.trace\( (trace logging)
debugger; (debugger statement)
\.only\( (test.only left in)
\.skip\( (test.skip left in)
if\s*\(.*process\.env\.NODE_ENV.*development (dev-only code)
__DEV__ (React Native dev flag)
```
### Missing Optimization Flags
```
# TypeScript
"strict":\s*false (strict mode disabled)
"skipLibCheck":\s*false (slow lib checking)
"incremental":\s*false (no incremental compilation)
# Python
python\s+-O (check if optimized flag used)
__debug__ (debug-only code)
# Docker
FROM.*:latest (unpinned base image)
RUN.*pip install(?!.*--no-cache) (pip without --no-cache-dir)
RUN.*npm install(?!.*--production) (npm install without --production)
COPY\s+\.\s+\. (copying entire context)
```
### Large/Slow Imports at Startup
```
# Top-level heavy imports that could be lazy
import.*tensorflow (heavy ML library at top)
import.*pandas (heavy data library at top)
import.*matplotlib (heavy viz library at top)
import.*scipy (heavy math library at top)
from.*import\s+\* (wildcard imports slow startup)
# Circular imports
ImportError.*circular (circular import errors)
```
### Missing Caching in CI/CD
```
# No caching steps
npm install(?!.*cache) (npm install without cache)
pip install(?!.*cache) (pip install without cache)
go build(?!.*cache) (go build without cache)
docker build(?!.*cache) (docker build without layer cache)
```
### Slow Test Suite
```
# Real I/O in tests
fetch\(.*test (real network calls in tests)
requests\.\w+\(.*test (real HTTP in Python tests)
open\(.*test (real file I/O in tests)
# No test parallelization
--runInBand (Jest sequential mode)
-p no:xdist (pytest parallelization disabled)
# Heavy setup/teardown
beforeAll.*database (real DB setup in tests)
setUp.*database (real DB in Python tests)
```
## Improvement Strategies
1. **Build config**: Ensure production mode, minification, tree shaking, code splitting
2. **Dev code**: Strip console.log/debugger via build plugin (e.g., babel-plugin-transform-remove-console)
3. **TypeScript**: Enable strict, incremental, skipLibCheck for faster builds
4. **Docker**: Multi-stage builds, .dockerignore, layer caching, pinned versions
5. **Lazy imports**: Move heavy imports to function scope where they're needed
6. **CI caching**: Cache node_modules, pip cache, go build cache, Docker layers
7. **Test speed**: Mock I/O, run tests in parallel, use in-memory DBs for integration tests

View file

@ -1,82 +0,0 @@
# Bundle Size & Dependencies
## Grep/Glob Patterns to Detect
### Heavy Imports
```
import\s+\w+\s+from\s+['"]lodash['"] (full lodash import vs lodash/specific)
import\s+\w+\s+from\s+['"]moment['"] (moment.js - use date-fns/dayjs)
import\s+\w+\s+from\s+['"]underscore['"] (underscore - mostly native now)
import\s+\*\s+as (wildcard imports prevent tree-shaking)
require\(['"]lodash['"]\) (CJS lodash import)
from\s+pandas\s+import\s+\* (full pandas import)
import\s+tensorflow (full TF import)
import\s+boto3 (full AWS SDK)
```
### Unused Dependencies
```
# Check package.json dependencies vs actual imports
# Check requirements.txt vs actual imports
# Check go.mod vs actual imports
import.*from.*['"](\w+)['"] (cross-reference with package.json)
```
### Duplicate Functionality
```
# Multiple date libraries
moment.*\n.*date-fns (both moment and date-fns)
moment.*\n.*dayjs (both moment and dayjs)
# Multiple HTTP clients
axios.*\n.*node-fetch (both axios and fetch)
axios.*\n.*got (both axios and got)
# Multiple utility libraries
lodash.*\n.*underscore (both lodash and underscore)
# Multiple state managers
redux.*\n.*mobx (both redux and mobx)
zustand.*\n.*jotai (multiple state libs)
```
### Dev Dependencies in Production
```
# devDependencies imported in src/
import.*from.*['"](@testing|jest|mocha|chai|sinon|cypress|storybook)
# Debug/test code in production
console\.log\(
console\.debug\(
debugger;
\.only\( (test.only left in)
```
### Dynamic Imports Missing
```
# Large components imported statically that could be lazy
import.*Modal (modals are great candidates for lazy loading)
import.*Chart (charts are heavy)
import.*Editor (rich editors are heavy)
import.*PDF (PDF libs are heavy)
import.*Map (map components are heavy)
# Route-level components not lazy loaded
import.*Page.*from (page components should often be lazy)
```
### Large Assets
```
# Check for unoptimized assets
\.png['"] (check if could be webp/avif)
\.jpg['"] (check if could be webp/avif)
\.gif['"] (check if could be video/webp)
\.svg['"].*import (SVGs imported as modules - check size)
base64 (inline base64 assets)
data:image (inline images)
```
## Improvement Strategies
1. **Lodash**: Use `lodash-es/specific` or native equivalents (Array.find, Object.entries, etc.)
2. **Moment.js**: Replace with date-fns or dayjs (10x smaller)
3. **Wildcard imports**: Use named imports for tree-shaking
4. **Unused deps**: Remove from package.json/requirements.txt
5. **Dynamic imports**: Use React.lazy/import() for heavy, below-fold components
6. **Images**: Convert to WebP/AVIF, use responsive srcset, lazy load below-fold
7. **Duplicate libs**: Consolidate to one library per concern

View file

@ -1,76 +0,0 @@
# Caching & Memoization
## Grep/Glob Patterns to Detect
### Missing Memoization
```
# Expensive computations without caching
def\s+\w+\(.*\).*:\s*\n.*for.*for (Python: expensive function without @lru_cache)
function\s+\w+\(.*\).*\{.*for.*for (JS: expensive function without memoization)
# React missing useMemo/useCallback
const\s+\w+\s*=\s*\w+\.filter\( (derived data on every render)
const\s+\w+\s*=\s*\w+\.map\( (derived data on every render)
const\s+\w+\s*=\s*\w+\.reduce\( (derived data on every render)
const\s+\w+\s*=\s*\w+\.sort\( (sorting on every render)
# Same computation called multiple times
(\w+)\(same_args\).*\1\(same_args\) (same function, same args, called twice)
```
### Cache Without Invalidation
```
cache\s*=\s*\{\} (cache without TTL or max size)
_cache\s*=\s*\{\} (module cache without eviction)
memo\s*=\s*\{\} (memo without invalidation)
\.cache\s*=\s*\{\} (instance cache without cleanup)
CACHE_TTL.*=.*(?:86400|3600.*24) (very long TTL - stale data risk)
```
### Redundant API/DB Calls
```
# Same query executed multiple times
\.query\(.*same.*\.query\( (duplicate queries)
fetch\(['"]same_url['"]\).*fetch\( (duplicate fetches)
# No SWR/stale-while-revalidate
useEffect\(.*fetch\(.*\[\] (fetch on every mount without caching)
useEffect\(.*axios\.\w+\(.*\[\] (API call on every mount)
componentDidMount.*fetch (fetch without caching layer)
```
### Over-Caching
```
# Caching things that change frequently
cache.*user.*session (caching session-specific data)
cache.*real.?time (caching real-time data)
cache.*current.*time (caching time-dependent data)
# Caching large objects
cache\[.*\]\s*=\s*.*large (large objects in cache)
```
### Missing HTTP Caching
```
# API responses without cache headers
res\.json\( (check if Cache-Control is set)
return\s+Response\( (check if cache headers are set)
return\s+JsonResponse\( (Django: check cache headers)
# Static assets without long cache
express\.static\( (check maxAge setting)
nginx.*location.*static (check expires/cache-control)
```
### Computed Properties Recalculated
```
# Getters that compute on every access
get\s+\w+\(\)\s*\{.*return.*\.filter (getter computing on each access)
get\s+\w+\(\)\s*\{.*return.*\.map (getter computing on each access)
@property\s*\n\s*def.*\n.*for (Python property computing in loop)
```
## Improvement Strategies
1. **Memoization**: Use @lru_cache (Python), useMemo/useCallback (React), _.memoize (JS)
2. **Cache invalidation**: Always set TTL and max size; prefer LRU eviction
3. **API caching**: Use SWR/React Query for client, Redis/Memcached for server
4. **HTTP caching**: Set Cache-Control headers, use ETags, stale-while-revalidate
5. **Computed properties**: Cache results with dirty flag or use memoized selectors (reselect)
6. **Request deduplication**: Deduplicate identical in-flight requests
7. **Multi-level cache**: L1 (in-memory) -> L2 (Redis) -> L3 (DB) for read-heavy workloads

View file

@ -1,80 +0,0 @@
# Concurrency & Async Patterns
## Grep/Glob Patterns to Detect
### Sequential Async (Should Be Parallel)
```
await.*\n.*await.*\n.*await (multiple sequential awaits that could be parallel)
for.*await (sequential await in loop)
\.then\(.*\.then\(.*\.then\( (promise chain that could be Promise.all)
# Python
await.*\n.*await.*\n.*await (sequential awaits)
for.*in.*:\n.*await (await in loop)
```
### Missing Parallelization
```
# Should use Promise.all / asyncio.gather
fetch\(.*\n.*fetch\( (sequential fetches)
axios\.\w+\(.*\n.*axios\. (sequential HTTP calls)
requests\.\w+\(.*\n.*requests\. (Python sequential requests)
```
### Blocking Operations in Async Context
```
# Node.js sync operations in async code
fs\.readFileSync (blocking file read)
fs\.writeFileSync (blocking file write)
fs\.existsSync (blocking existence check)
child_process\.execSync (blocking exec)
\.readFileSync\( (any sync file operation)
# Python blocking in async
time\.sleep\( (use asyncio.sleep instead)
requests\. (use aiohttp/httpx instead)
open\(.*\.read\(\) (use aiofiles instead)
os\.path\.exists (use aio equivalent)
```
### Race Conditions & Thread Safety
```
# Shared mutable state
global\s+\w+.*= (Python global mutation)
threading\.Thread.*shared (shared state across threads)
# Missing locks
\.append\(.*thread (list append without lock)
\+=.*without.*lock (increment without lock)
# JavaScript
let\s+\w+.*=.*\n.*async (mutable let used in async)
```
### Unbounded Concurrency
```
# No concurrency limit
\.map\(.*fetch (unbounded parallel fetches)
\.map\(.*axios (unbounded parallel requests)
Promise\.all\(.*\.map\( (all items in parallel, no limit)
asyncio\.gather\(.*for (all coroutines at once)
# Missing backpressure
while.*true.*await (infinite async loop without backpressure)
```
### Error Handling in Async
```
# Unhandled rejections
\.then\(.*without.*\.catch (promise without catch)
async.*without.*try.*catch (async without error handling)
# Swallowed errors
catch\s*\(\s*\)\s*\{ (empty catch block)
except:\s*$ (bare except)
except\s+Exception\s*:.*pass (catch-all with pass)
```
## Improvement Strategies
1. **Sequential awaits**: Use Promise.all/allSettled, asyncio.gather for independent operations
2. **Await in loops**: Batch with Promise.all or use p-limit for controlled concurrency
3. **Blocking in async**: Replace sync APIs with async equivalents
4. **Race conditions**: Use locks, atomic operations, or immutable patterns
5. **Unbounded concurrency**: Use semaphores, p-limit, connection pools
6. **Error handling**: Always catch async errors, use Promise.allSettled for partial failure tolerance
7. **Backpressure**: Use queues, streaming, or batching for producer-consumer patterns

View file

@ -1,71 +0,0 @@
# Configuration & Infrastructure Inefficiencies
## Grep/Glob Patterns to Detect
### Missing Connection Pooling
```
# New connection per request
create_engine\(.*(?!.*pool) (SQLAlchemy without pool config)
new Pool\(.*(?!.*max) (pg Pool without max connections)
mongoose\.connect\(.*(?!.*pool) (Mongoose without pool)
DriverManager\.getConnection\( (Java: new connection per call)
psycopg2\.connect\(.*(?!.*pool) (psycopg2 without pool)
redis\.createClient\(.*per.*request (new Redis client per request)
```
### Missing Environment-Based Config
```
hardcoded.*url (hardcoded URLs)
['"]http://localhost (hardcoded localhost URLs)
['"]127\.0\.0\.1 (hardcoded localhost IPs)
password\s*=\s*['"] (hardcoded passwords)
api.?key\s*=\s*['"] (hardcoded API keys)
secret\s*=\s*['"] (hardcoded secrets)
port\s*=\s*\d{4} (hardcoded port numbers)
```
### Missing Process Management
```
# Single-threaded Node.js without clustering
app\.listen\(.*(?!.*cluster) (Node without cluster module)
# Python without proper WSGI/ASGI workers
\.run\(.*debug=True (Flask debug mode)
uvicorn\.run\(.*workers=1 (single worker)
gunicorn.*-w\s*1\b (single gunicorn worker)
```
### Docker/Container Issues
```
FROM.*:latest (unpinned image version)
RUN.*apt-get.*&&.*apt-get (check if apt cache is cleaned)
COPY\s+\.\s+\. (copying entire context - no .dockerignore)
RUN.*npm install\b(?!.*--production) (installing devDeps in production)
RUN.*pip install\b(?!.*--no-cache) (pip without cache clearing)
# Multiple RUN commands that should be combined
RUN.*\nRUN.*\nRUN (multiple RUN layers)
```
### Missing Health Checks
```
# Services without health endpoints
app\.(listen|start)\(.*(?!.*health) (server without health check)
# Docker without HEALTHCHECK
Dockerfile.*(?!.*HEALTHCHECK) (Dockerfile without health check)
```
### Inefficient Polling
```
setInterval\(.*fetch (polling instead of WebSocket/SSE)
setInterval\(.*axios (polling instead of push)
while.*sleep.*fetch (polling loop)
time\.sleep\(.*requests (Python: polling with sleep)
```
## Improvement Strategies
1. **Connection pooling**: Configure pool_size, max_overflow, pool_recycle
2. **Environment config**: Use .env files, config libraries, never hardcode secrets
3. **Process management**: Use cluster mode (Node), multiple workers (Python ASGI/WSGI)
4. **Docker**: Multi-stage builds, .dockerignore, combine RUN layers, pin versions
5. **Health checks**: Add /health endpoint, Docker HEALTHCHECK, readiness/liveness probes
6. **Polling -> Push**: Use WebSocket, SSE, or long-polling instead of interval polling

View file

@ -1,80 +0,0 @@
# Data Structures & Serialization
## Grep/Glob Patterns to Detect
### Wrong Data Structure for the Job
```
# Array used for frequent lookups (should be Map/Set/dict)
\.find\(.*=== (linear search - use Map)
\.findIndex\(.*=== (linear search for index)
\.includes\(.*inside.*loop (O(n) lookup in loop)
\.indexOf\(.*inside.*loop (O(n) lookup in loop)
if.*in\s+\[ (Python: list membership test)
list\.count\( (Python: counting in list)
# Searching sorted data linearly (should use binary search)
\.find\(.*sorted (linear search on sorted array)
for.*sorted (iterating sorted data to find)
# Using objects where Map is better (non-string keys, frequent add/delete)
\w+\[\w+\.id\]\s*= (object with dynamic keys from IDs)
delete\s+\w+\[ (frequent deletion from object)
Object\.keys\(.*\.length (counting object keys - Map.size is O(1))
# Using array for queue/deque operations
\.shift\(\) (Array.shift is O(n) - use proper queue)
\.unshift\( (Array.unshift is O(n))
```
### Unnecessary Deep Copies
```
JSON\.parse\(JSON\.stringify (JSON round-trip for deep clone)
\.map\(.*\.map\(.*spread (nested spread for deep copy)
\{\.\.\..*\{\.\.\. (nested object spread)
structuredClone\(.*inside.*loop (deep cloning in loop)
copy\.deepcopy\(.*loop (Python deepcopy in loop)
import\s+copy (check if deepcopy is overused)
```
### Inefficient Serialization
```
# JSON for internal communication (use binary formats)
JSON\.stringify.*JSON\.parse.*internal
pickle\.dump.*pickle\.load (Python: consider msgpack for cross-language)
# Serializing more than needed
JSON\.stringify\(.*entire (serializing entire object when subset needed)
\.toJSON\(\) (check what's being serialized)
# Repeated serialization
JSON\.stringify\(.*loop (stringifying in loop)
JSON\.parse\(.*loop (parsing in loop)
```
### Unnecessary Object Creation
```
new Date\(.*inside.*loop (creating Date objects in loop)
new RegExp\(.*inside.*loop (compiling regex in loop)
new URL\(.*inside.*loop (creating URL objects in loop)
\.split\(.*\.join\( (split then join - use replace)
\.toString\(\).*\.split\( (unnecessary string conversion)
Array\.from\(.*Array\.from\( (double Array.from)
```
### Immutability Overhead
```
# Excessive spread operators
\{\.\.\.state, (spreading large state objects)
\[\.\.\.array, (spreading large arrays)
\.map\(.*=>.*\{\.\.\. (creating new objects in map with spread)
# Immer not used where it should be
produce\( (check if immer is used consistently)
```
## Improvement Strategies
1. **Array -> Map/Set**: Use Map for key-value lookups, Set for membership testing
2. **Array.shift/unshift**: Use a proper deque/queue implementation
3. **Deep copies**: Use structuredClone (modern), or targeted shallow copies
4. **Serialization**: Use msgpack/protobuf for internal services, only JSON for external APIs
5. **Object creation in loops**: Hoist object creation, reuse instances, use object pools
6. **Large state spreads**: Use Immer's produce(), or targeted updates
7. **Binary search**: Use on sorted data instead of linear search

View file

@ -1,76 +0,0 @@
# Database & Query Optimization
## Grep/Glob Patterns to Detect
### N+1 Query Problems
```
# ORM loops - querying inside iterations
for.*in.*\.all\(\)
for.*in.*\.filter\(
for.*in.*\.objects\.
\.prefetch_related (absence of - check if loops exist WITHOUT prefetch)
\.select_related (absence of - check if FK access exists WITHOUT select_related)
# SQLAlchemy
session\.query.*for.*in
\.lazy\s*=\s*True
# ActiveRecord
\.each.*\.where
\.map.*\.find
# Sequelize / TypeORM
findOne.*inside.*map
findOne.*inside.*for
await.*find.*inside.*loop
```
### Unoptimized Queries
```
SELECT \*
SELECT.*FROM.*WITHOUT.*WHERE (full table scans)
LIKE '% (leading wildcard - can't use index)
ORDER BY.*RAND()
NOT IN.*SELECT (subquery instead of JOIN)
DISTINCT.*SELECT \*
GROUP BY.*without.*index
\.raw\(.*SELECT (raw queries - potential SQL injection too)
COUNT\(\*\).*WHERE (count with filter vs indexed count)
```
### Missing Indexes (Heuristic)
```
WHERE.*=.*AND.*= (composite queries without composite index)
ORDER BY.*multiple columns
JOIN.*ON.*without index hint
\.filter\(.*__in= (IN queries on large sets)
```
### ORM Anti-patterns
```
\.save\(\).*inside.*loop (batch update instead)
\.create\(\).*inside.*loop (bulk_create instead)
\.update\(\).*for.*in (queryset.update instead)
len\(.*\.all\(\)\) (.count() instead)
list\(.*\.all\(\)\) (unnecessary materialization)
if.*\.exists\(\).*\.first\(\) (double query)
\.values\(\).*\.values\(\) (chained values)
```
### Connection Management
```
# Missing connection pooling
create_engine\(.*pool_size (check if pool is configured)
new Pool\( (check pool settings)
max_connections (check if reasonable)
# Unclosed connections
connection\.open.*without.*close
cursor.*without.*close
```
## Improvement Strategies
1. **N+1 Queries**: Use eager loading (prefetch_related, select_related, JOIN FETCH, include/eager)
2. **SELECT ***: Select only needed columns
3. **Missing indexes**: Add indexes on frequently filtered/joined columns
4. **Loop queries**: Use bulk operations (bulk_create, bulk_update, executemany)
5. **Connection pooling**: Configure connection pools with appropriate size
6. **Query caching**: Cache frequently-read, rarely-changing data
7. **Pagination**: Never load unbounded result sets

View file

@ -1,84 +0,0 @@
# Dead Code & Redundancy
## Grep/Glob Patterns to Detect
### Unused Exports/Functions
```
export\s+(function|const|class)\s+\w+ (cross-reference: is it imported anywhere?)
def\s+\w+\( (cross-reference: is it called anywhere?)
public\s+(static\s+)?\w+\s+\w+\( (Java/C# methods - are they called?)
func\s+\w+\( (Go functions - are they called?)
```
### Unused Imports
```
import.*from.*['"].*['"] (cross-reference with usage in file)
from\s+\w+\s+import\s+\w+ (Python: check if imported name is used)
require\(['"].*['"]\) (CJS: check if result is used)
use\s+\w+; (Rust: check if used)
```
### Commented-Out Code
```
//\s*(function|const|let|var|class|import|return|if|for|while)
#\s*(def|class|import|return|if|for|while)
/\*[\s\S]*?(function|class|import)[\s\S]*?\*/
```
### Dead Branches
```
if\s*\(\s*false\s*\) (always-false condition)
if\s*\(\s*true\s*\) (always-true condition - dead else)
if\s*\(\s*0\s*\) (falsy constant)
if\s*\(\s*['"]['"] (empty string - always falsy)
TODO.*remove (TODOs indicating dead code)
FIXME.*remove
HACK.*temporary
# Feature flags stuck off
FEATURE_.*=\s*false
ENABLE_.*=\s*false
```
### Duplicate Logic
```
# Similar function signatures in same file or nearby files
function\s+\w*(get|fetch|load|process|handle)\w*\( (many similar handlers)
def\s+\w*(get|fetch|load|process|handle)\w*\( (Python: similar functions)
# Copy-paste indicators
# Same code block appearing multiple times (use Grep to find identical blocks)
```
### Deprecated/Legacy Code
```
@deprecated
@Deprecated
# deprecated
\.deprecated
DEPRECATED
legacy
Legacy
LEGACY
__legacy__
_old\b
_backup\b
_v[0-9]\b (versioned functions like process_v1)
```
### Unreachable Code
```
return.*\n\s*(var|let|const|function) (code after return)
throw.*\n\s*(var|let|const|function) (code after throw)
exit\(\).*\n (code after exit)
sys\.exit\(.*\n (Python: code after sys.exit)
break\s*;\s*\n\s*\w (code after break)
```
## Improvement Strategies
1. **Unused exports**: Remove and verify no external consumers (check all import statements)
2. **Unused imports**: Remove with IDE or linting tooling
3. **Commented code**: Delete it - version control preserves history
4. **Dead branches**: Remove unreachable code, clean up feature flags
5. **Duplicate logic**: Extract shared function, use strategy pattern if variants differ slightly
6. **Deprecated code**: Plan migration, remove after all callers are updated
7. **Unreachable code**: Remove statements after return/throw/exit

View file

@ -1,80 +0,0 @@
# Error Handling & Resilience
## Grep/Glob Patterns to Detect
### Missing Timeouts
```
fetch\(.*(?!.*timeout) (fetch without timeout)
axios\.\w+\(.*(?!.*timeout) (axios without timeout)
requests\.\w+\(.*(?!.*timeout) (Python requests without timeout)
http\.\w+\(.*(?!.*timeout) (http call without timeout)
new Promise\(.*(?!.*setTimeout) (promise without timeout)
\.connect\(.*(?!.*timeout) (DB/socket connect without timeout)
```
### Swallowed Errors
```
catch\s*\(\s*\w*\s*\)\s*\{\s*\} (empty catch block)
catch\s*\(\s*\)\s*\{\s*\} (empty catch, no error param)
except:\s*$ (bare except)
except\s+Exception.*pass (catch-all with pass)
except\s+Exception.*continue (catch-all with continue)
\.catch\(\s*\(\)\s*=>\s*\{\s*\}\) (empty .catch handler)
\.catch\(\s*\(\)\s*=>\s*null\) (swallowing with null)
on_error.*pass (error handler that does nothing)
```
### Missing Retries for Transient Failures
```
# Network calls without retry logic
fetch\(.*(?!.*retry) (fetch without retry)
axios\.\w+\(.*(?!.*retry) (API call without retry)
requests\.\w+\(.*(?!.*retry) (Python request without retry)
# Database operations without retry
\.query\(.*(?!.*retry) (DB query without retry)
\.execute\(.*(?!.*retry) (DB execute without retry)
```
### No Circuit Breaker
```
# Repeated calls to potentially failing services without circuit breaking
while.*retry.*fetch (retry loop without circuit break)
MAX_RETRIES.*=.*[5-9]|[1-9]\d+ (high retry count without circuit breaker)
```
### Resource Cleanup on Error
```
# try without finally for resource cleanup
try\s*\{.*open.*(?!.*finally) (open resource without finally)
try:.*open\(.*(?!.*finally) (Python: open without finally or context manager)
# Async cleanup missing
async.*try.*(?!.*finally) (async operation without cleanup)
```
### Cascading Failures
```
# No fallback/default values
\?\?.*undefined (check fallback quality)
\|\|.*null (check fallback quality)
\.get\(.*,\s*None\) (Python: check if None is appropriate default)
# No graceful degradation
catch.*throw (catching just to re-throw - no degradation)
catch.*return\s+null (returning null on error - caller may not handle)
```
### Logging Without Action
```
console\.error\(.*(?!.*throw|return|retry) (logging error but not handling it)
logger\.error\(.*(?!.*raise|return|retry) (Python: logging without action)
print\(.*error.*(?!.*raise|return) (print error without handling)
```
## Improvement Strategies
1. **Timeouts**: Add timeouts to ALL external calls (network, DB, file I/O). Use AbortController for fetch
2. **Swallowed errors**: At minimum log errors, prefer explicit handling or re-throwing
3. **Retries**: Implement exponential backoff with jitter for transient failures
4. **Circuit breakers**: Use circuit breaker pattern for external service calls
5. **Resource cleanup**: Use try-finally, context managers, or using statements
6. **Graceful degradation**: Return cached/default data instead of failing completely
7. **Error propagation**: Don't catch errors you can't handle - let them bubble up

View file

@ -1,89 +0,0 @@
# I/O & Network Optimization
## Grep/Glob Patterns to Detect
### Sequential Requests (Should Be Batched/Parallel)
```
fetch\(.*\n.*fetch\( (sequential fetch calls)
axios\.\w+\(.*\n.*axios\. (sequential axios calls)
requests\.\w+\(.*\n.*requests (sequential Python requests)
http\.\w+\(.*\n.*http\. (sequential Node http calls)
\.get\(.*\n.*\.get\( (sequential GET requests)
\.post\(.*\n.*\.post\( (sequential POST requests)
```
### Missing Batching
```
# Individual API calls in loops
for.*\n.*fetch\( (fetch in loop)
for.*\n.*axios\. (axios in loop)
for.*\n.*requests\. (requests in loop)
\.map\(.*fetch (map with individual fetches)
\.forEach\(.*fetch (forEach with individual fetches)
# Individual DB writes in loops
\.save\(\).*for (save in loop - should batch)
\.insert\(.*for (insert in loop - should bulk insert)
```
### No Request Deduplication
```
# Same endpoint called multiple times
fetch\(['"]([^'"]+)['"]\) (check for duplicate URLs)
axios\.\w+\(['"]([^'"]+)['"] (check for duplicate URLs)
useQuery\(.*['"]([^'"]+)['"] (check for duplicate query keys)
```
### Missing Compression
```
# Large payload without compression
Content-Type.*application/json (check if gzip/br enabled)
res\.json\( (response without compression middleware)
# No compression middleware
express\(\).*without.*compression
```
### Inefficient Serialization
```
JSON\.stringify\(.*large (stringifying large objects)
JSON\.parse\(.*JSON\.stringify (deep clone via JSON - use structuredClone)
pickle\.dumps\( (Python: consider msgpack/protobuf for performance)
yaml\.dump\(.*yaml\.load\( (YAML round-trip - slow for data exchange)
```
### Missing Streaming
```
\.readFile\( (read entire file vs createReadStream)
\.readFileSync\( (sync + entire file)
body\.json\(\) (parse entire body vs streaming parser)
\.text\(\) (entire response as text)
\.json\(\).*large (entire JSON response in memory)
response\.data (entire response buffered)
```
### Missing Caching Headers
```
# API responses without caching
res\.json\(.*without.*cache-control
res\.send\(.*without.*etag
# Static assets without cache headers
express\.static\(.*without.*maxAge
```
### Retry Without Backoff
```
retry.*count (check if exponential backoff exists)
while.*retry (retry loop without delay increase)
catch.*retry (catch-retry without backoff)
MAX_RETRIES (check backoff strategy)
```
## Improvement Strategies
1. **Sequential requests**: Use Promise.all, asyncio.gather, or batch APIs
2. **Loop requests**: Batch into single API call or use DataLoader pattern
3. **Deduplication**: Use request deduplication (SWR, React Query, custom cache)
4. **Compression**: Enable gzip/brotli at server and CDN level
5. **Serialization**: Use efficient formats (protobuf, msgpack) for internal services
6. **Streaming**: Use streams for large files/responses, NDJSON for large JSON
7. **Caching**: Set appropriate Cache-Control, ETag, use stale-while-revalidate
8. **Retries**: Implement exponential backoff with jitter

View file

@ -1,64 +0,0 @@
# Logging & Observability Performance
## Grep/Glob Patterns to Detect
### Excessive Logging
```
console\.log\( (console.log in production code)
console\.debug\( (console.debug in production)
print\(.*debug (Python print debugging)
logger\.debug\(.*inside.*loop (debug logging in hot loop)
log\.\w+\(.*inside.*for (any logging in tight loop)
console\.log\(JSON\.stringify\( (serializing objects just to log)
```
### Expensive String Formatting in Logs
```
# String interpolation/formatting when log level is disabled
logger\.debug\(f" (Python f-string even when debug disabled)
logger\.debug\(.*\.format\( (Python .format() even when debug disabled)
logger\.debug\(` (JS template literal even when debug disabled)
logger\.debug\(.*\+.*\+ (string concat for debug log)
JSON\.stringify\(.*log (JSON stringify for logging)
```
### Missing Structured Logging
```
console\.log\(["'].*:.*["'] (unstructured string logging)
print\(.*["'].*:.*["'] (unstructured print logging)
logger\.\w+\(["'].*["'] % (format string logging vs structured)
```
### Synchronous Logging
```
fs\.writeFileSync.*log (sync file write for logging)
fs\.appendFileSync.*log (sync file append for logging)
open\(.*log.*\)\.write\( (Python: sync log file write)
```
### Missing Request/Trace IDs
```
# API handlers without correlation IDs
app\.(get|post)\(.*req.*res (check if request ID is propagated)
@app\.route\( (check if request ID middleware exists)
```
### Metrics Collection Overhead
```
# Metrics in hot paths
\.observe\(.*inside.*loop (Prometheus observe in loop)
\.increment\(.*inside.*loop (counter increment in loop)
statsd\..*inside.*loop (StatsD in loop)
\.timing\(.*inside.*loop (timing metric in loop)
Date\.now\(\).*Date\.now\(\) (manual timing - use proper instrumentation)
performance\.now\(\).*performance (manual performance timing)
```
## Improvement Strategies
1. **Console.log**: Remove from production, use proper logger with levels
2. **Log formatting**: Use lazy evaluation - logger.debug("msg %s", expensive_value) vs f-strings
3. **Structured logging**: Use JSON structured logs for machine parsing
4. **Async logging**: Buffer and flush logs asynchronously, don't block request handling
5. **Request IDs**: Add correlation ID middleware, propagate through all service calls
6. **Metrics**: Pre-aggregate metrics, use histograms instead of per-request timers in hot loops

View file

@ -1,66 +0,0 @@
# Memory & Resource Management
## Grep/Glob Patterns to Detect
### Memory Leaks
```
# Event listeners never removed
addEventListener.*without.*removeEventListener
\.on\(.*without.*\.off\(
\.subscribe\(.*without.*unsubscribe
# Timers never cleared
setInterval\(.*without.*clearInterval
setTimeout\(.*without.*clearTimeout
# Global/module-level caches without eviction
global.*\[\] (unbounded global arrays)
global.*\{\} (unbounded global dicts/objects)
module\.exports.*cache.*=.*\{\}
_cache\s*=\s*\{\} (module-level cache without LRU/TTL)
# Closures retaining references
closure.*large.*object
# React-specific
useEffect.*without.*cleanup
useRef.*large.*object
```
### Unclosed Resources
```
open\(.*without.*close
open\(.*without.*with\s (Python: not using context manager)
new FileReader\(.*without.*close
createReadStream\(.*without.*destroy
createWriteStream\(.*without.*end
fs\.open\(.*without.*fs\.close
new Socket\(.*without.*\.close
new WebSocket\(.*without.*\.close
acquire\(.*without.*release
```
### Large Allocations
```
new Array\(\d{5,} (arrays > 10k elements)
Buffer\.alloc\(\d{6,} (buffers > 1MB)
\.fill\(.*\d{6,} (filling large arrays)
\.repeat\(\d{4,} (string repeat large count)
JSON\.parse\(.*large (parsing large JSON in memory)
\.readFileSync\( (synchronous large file reads)
\.readFile\(.*without.*stream (reading whole file vs streaming)
```
### String Concatenation in Loops
```
\+=.*string.*for
\+=.*\".*loop
str\s*\+= (Python string concat in loop)
\.join\(\[ (check if used correctly)
```
## Improvement Strategies
1. **Event listeners**: Always pair add/remove, use AbortController for bulk cleanup
2. **Timers**: Clear intervals in cleanup/unmount, use refs for timer IDs
3. **Caches**: Use LRU cache with max size, add TTL, use WeakMap/WeakRef where possible
4. **File handling**: Use context managers (Python with), try-finally, or using statements
5. **Streams**: Use streaming for large data instead of loading everything in memory
6. **String building**: Use StringBuilder/list join pattern instead of concatenation in loops
7. **Buffers**: Pool and reuse buffers, use streaming transforms

View file

@ -1,90 +0,0 @@
# Rendering & UI Performance
## Grep/Glob Patterns to Detect
### React Re-render Issues
```
# Missing memoization
const\s+\w+\s*=\s*\(\s*\)\s*=>.*return\s*\( (inline component definitions)
\w+\s*=\s*\{.*\}.*prop= (object literal as prop - new ref every render)
\w+\s*=\s*\[.*\].*prop= (array literal as prop)
\w+\s*=\s*\(\).*=>.*prop= (arrow function as prop - new ref every render)
style=\{\{ (inline style object)
# Context causing re-renders
useContext\( (check context value stability)
<\w+Provider\s+value=\{\{ (new object in Provider value)
<\w+Provider\s+value=\{[^}]*\} (unstable provider value)
# State management
useState\(.*\{ (object state - check if needs splitting)
setState\(.*\{\.\.\.state (spreading entire state on each update)
```
### Missing Virtualization
```
\.map\(.*<\w+ (rendering list items - check list size)
{items\.map\( (JSX list rendering - check if >50 items)
\.map\(.*return.*<li (list rendering without virtualization)
\.map\(.*return.*<tr (table row rendering without virtualization)
\.map\(.*return.*<div (div list - check count)
v-for= (Vue list rendering)
ngFor (Angular list rendering)
```
### Layout Thrashing
```
offsetWidth.*style\. (read then write in sequence)
offsetHeight.*style\. (read then write)
getBoundingClientRect.*style (read then write)
clientWidth.*className (read then class change)
scrollTop.*style (read then write)
\.style\..*\.style\. (multiple style writes - batch with class)
```
### Large DOM
```
document\.createElement.*loop (creating elements in loop)
innerHTML\s*\+= (innerHTML concatenation - causes reparse)
\.appendChild\(.*loop (appending in loop without fragment)
document\.querySelector\(.*loop (DOM query in loop)
\$\(.*\).*loop (jQuery selector in loop)
```
### Missing Lazy Loading
```
<img\s+(?!.*loading) (images without loading="lazy")
<iframe\s+(?!.*loading) (iframes without lazy loading)
import.*above.*fold (heavy imports for below-fold content)
```
### Animation Performance
```
# Layout-triggering animations
animate.*width (animating width triggers layout)
animate.*height (animating height triggers layout)
animate.*top (animating top triggers layout)
animate.*left (animating left triggers layout)
animate.*margin (animating margin triggers layout)
transition.*width (transitioning layout properties)
transition.*height
# Should use transform/opacity instead
@keyframes.*\{.*(?:width|height|top|left|margin|padding)
```
### SSR/Hydration Issues
```
useEffect\(.*\[\].*setState (client-side data fetch causing hydration mismatch)
typeof window (window checks indicating SSR issues)
document\. (direct document access in components)
window\. (direct window access in components)
```
## Improvement Strategies
1. **Re-renders**: Use React.memo, useMemo, useCallback for stable references
2. **Context**: Split contexts by update frequency, memoize provider values
3. **Virtualization**: Use react-window/react-virtuoso for lists > 50 items
4. **Layout thrashing**: Batch reads, then batch writes; use requestAnimationFrame
5. **DOM manipulation**: Use DocumentFragment, batch insertions, avoid innerHTML +=
6. **Lazy loading**: Add loading="lazy" to images/iframes, use Intersection Observer
7. **Animations**: Only animate transform and opacity (GPU-composited properties)
8. **SSR**: Pre-fetch data server-side, avoid hydration mismatches

View file

@ -1,68 +0,0 @@
# Security-Related Performance Issues
## Grep/Glob Patterns to Detect
### Cryptographic Misuse
```
md5\( (MD5 is fast but broken - use bcrypt/argon2 for passwords)
sha1\( (SHA1 is weak)
\.hashSync\(.*rounds.*[1-5]\b (bcrypt with low rounds)
DES\b (DES is obsolete)
Math\.random\(\).*token (Math.random for security tokens)
Math\.random\(\).*password (Math.random for password generation)
random\.random\(\).*secret (Python: insecure random for secrets)
```
### Expensive Security Operations in Hot Paths
```
bcrypt.*inside.*loop (hashing in loop - expensive by design)
jwt\.verify\(.*inside.*loop (JWT verification in loop)
encrypt\(.*inside.*loop (encryption in loop)
\.hash\(.*inside.*loop (hashing in loop)
```
### Missing Rate Limiting
```
app\.(get|post|put|delete)\( (routes without rate limiting)
@app\.route\( (Flask routes without rate limiting)
router\.(get|post|put|delete)\( (Express routes without rate limiting)
```
### SQL Injection Vectors (Also Performance)
```
f"SELECT.*\{ (Python f-string SQL)
f"INSERT.*\{ (Python f-string SQL)
`SELECT.*\$\{ (JS template literal SQL)
"SELECT.*" \+ \w+ (string concat SQL)
'SELECT.*' \+ \w+ (string concat SQL)
\.raw\(.*\+ (raw query with concatenation)
\.execute\(.*%.*% (Python format string SQL)
```
### ReDoS Vulnerable Patterns
```
\(\.\*\)\+ (catastrophic backtracking)
\(\.\+\)\+ (catastrophic backtracking)
\([^)]*\|[^)]*\)\+ (alternation with repetition)
\(\[.*\]\+\)\+ (nested quantifiers)
new RegExp\(.*user (user input in regex)
re\.compile\(.*user (Python: user input in regex)
```
### N+1 Auth Checks
```
# Checking permissions inside loops
\.can\(.*inside.*loop (permission check in loop)
\.authorize\(.*inside.*loop (authorization in loop)
isAllowed\(.*inside.*loop (permission check in loop)
hasPermission\(.*inside.*loop (permission check in loop)
```
## Improvement Strategies
1. **Crypto**: Use bcrypt/argon2 for passwords, SHA-256+ for hashing, crypto.randomBytes for tokens
2. **Hot path crypto**: Cache JWT verification results, batch encrypt/decrypt
3. **Rate limiting**: Add rate limiters (express-rate-limit, django-ratelimit, etc.)
4. **SQL injection**: Use parameterized queries/prepared statements (also faster due to query plan caching)
5. **ReDoS**: Audit regex patterns, use RE2 engine, set regex timeouts
6. **Auth batching**: Batch permission checks, pre-load permissions per request

View file

@ -1,441 +0,0 @@
---
name: core-web-vitals
description: Optimize Core Web Vitals (LCP, INP, CLS) for better page experience and search ranking. Use when asked to "improve Core Web Vitals", "fix LCP", "reduce CLS", "optimize INP", "page experience optimization", or "fix layout shifts".
license: MIT
metadata:
author: web-quality-skills
version: "1.0"
---
# Core Web Vitals optimization
Targeted optimization for the three Core Web Vitals metrics that affect Google Search ranking and user experience.
## The three metrics
| Metric | Measures | Good | Needs work | Poor |
|--------|----------|------|------------|------|
| **LCP** | Loading | ≤ 2.5s | 2.5s 4s | > 4s |
| **INP** | Interactivity | ≤ 200ms | 200ms 500ms | > 500ms |
| **CLS** | Visual Stability | ≤ 0.1 | 0.1 0.25 | > 0.25 |
Google measures at the **75th percentile** — 75% of page visits must meet "Good" thresholds.
---
## LCP: Largest Contentful Paint
LCP measures when the largest visible content element renders. Usually this is:
- Hero image or video
- Large text block
- Background image
- `<svg>` element
### Common LCP issues
**1. Slow server response (TTFB > 800ms)**
```
Fix: CDN, caching, optimized backend, edge rendering
```
**2. Render-blocking resources**
```html
<!-- ❌ Blocks rendering -->
<link rel="stylesheet" href="/all-styles.css">
<!-- ✅ Critical CSS inlined, rest deferred -->
<style>/* Critical above-fold CSS */</style>
<link rel="preload" href="/styles.css" as="style"
onload="this.onload=null;this.rel='stylesheet'">
```
**3. Slow resource load times**
```html
<!-- ❌ No hints, discovered late -->
<img src="/hero.jpg" alt="Hero">
<!-- ✅ Preloaded with high priority -->
<link rel="preload" href="/hero.webp" as="image" fetchpriority="high">
<img src="/hero.webp" alt="Hero" fetchpriority="high">
```
**4. Client-side rendering delays**
```javascript
// ❌ Content loads after JavaScript
useEffect(() => {
fetch('/api/hero-text').then(r => r.json()).then(setHeroText);
}, []);
// ✅ Server-side or static rendering
// Use SSR, SSG, or streaming to send HTML with content
export async function getServerSideProps() {
const heroText = await fetchHeroText();
return { props: { heroText } };
}
```
### LCP optimization checklist
```markdown
- [ ] TTFB < 800ms (use CDN, edge caching)
- [ ] LCP image preloaded with fetchpriority="high"
- [ ] LCP image optimized (WebP/AVIF, correct size)
- [ ] Critical CSS inlined (< 14KB)
- [ ] No render-blocking JavaScript in <head>
- [ ] Fonts don't block text rendering (font-display: swap)
- [ ] LCP element in initial HTML (not JS-rendered)
```
### LCP element identification
```javascript
// Find your LCP element
new PerformanceObserver((list) => {
const entries = list.getEntries();
const lastEntry = entries[entries.length - 1];
console.log('LCP element:', lastEntry.element);
console.log('LCP time:', lastEntry.startTime);
}).observe({ type: 'largest-contentful-paint', buffered: true });
```
---
## INP: Interaction to Next Paint
INP measures responsiveness across ALL interactions (clicks, taps, key presses) during a page visit. It reports the worst interaction (at 98th percentile for high-traffic pages).
### INP breakdown
Total INP = **Input Delay** + **Processing Time** + **Presentation Delay**
| Phase | Target | Optimization |
|-------|--------|--------------|
| Input Delay | < 50ms | Reduce main thread blocking |
| Processing | < 100ms | Optimize event handlers |
| Presentation | < 50ms | Minimize rendering work |
### Common INP issues
**1. Long tasks blocking main thread**
```javascript
// ❌ Long synchronous task
function processLargeArray(items) {
items.forEach(item => expensiveOperation(item));
}
// ✅ Break into chunks with yielding
async function processLargeArray(items) {
const CHUNK_SIZE = 100;
for (let i = 0; i < items.length; i += CHUNK_SIZE) {
const chunk = items.slice(i, i + CHUNK_SIZE);
chunk.forEach(item => expensiveOperation(item));
// Yield to main thread
await new Promise(r => setTimeout(r, 0));
// Or use scheduler.yield() when available
}
}
```
**2. Heavy event handlers**
```javascript
// ❌ All work in handler
button.addEventListener('click', () => {
// Heavy computation
const result = calculateComplexThing();
// DOM updates
updateUI(result);
// Analytics
trackEvent('click');
});
// ✅ Prioritize visual feedback
button.addEventListener('click', () => {
// Immediate visual feedback
button.classList.add('loading');
// Defer non-critical work
requestAnimationFrame(() => {
const result = calculateComplexThing();
updateUI(result);
});
// Use requestIdleCallback for analytics
requestIdleCallback(() => trackEvent('click'));
});
```
**3. Third-party scripts**
```javascript
// ❌ Eagerly loaded, blocks interactions
<script src="https://heavy-widget.com/widget.js"></script>
// ✅ Lazy loaded on interaction or visibility
const loadWidget = () => {
import('https://heavy-widget.com/widget.js')
.then(widget => widget.init());
};
button.addEventListener('click', loadWidget, { once: true });
```
**4. Excessive re-renders (React/Vue)**
```javascript
// ❌ Re-renders entire tree
function App() {
const [count, setCount] = useState(0);
return (
<div>
<Counter count={count} />
<ExpensiveComponent /> {/* Re-renders on every count change */}
</div>
);
}
// ✅ Memoized expensive components
const MemoizedExpensive = React.memo(ExpensiveComponent);
function App() {
const [count, setCount] = useState(0);
return (
<div>
<Counter count={count} />
<MemoizedExpensive />
</div>
);
}
```
### INP optimization checklist
```markdown
- [ ] No tasks > 50ms on main thread
- [ ] Event handlers complete quickly (< 100ms)
- [ ] Visual feedback provided immediately
- [ ] Heavy work deferred with requestIdleCallback
- [ ] Third-party scripts don't block interactions
- [ ] Debounced input handlers where appropriate
- [ ] Web Workers for CPU-intensive operations
```
### INP debugging
```javascript
// Identify slow interactions
new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
if (entry.duration > 200) {
console.warn('Slow interaction:', {
type: entry.name,
duration: entry.duration,
processingStart: entry.processingStart,
processingEnd: entry.processingEnd,
target: entry.target
});
}
}
}).observe({ type: 'event', buffered: true, durationThreshold: 16 });
```
---
## CLS: Cumulative Layout Shift
CLS measures unexpected layout shifts. A shift occurs when a visible element changes position between frames without user interaction.
**CLS Formula:** `impact fraction × distance fraction`
### Common CLS causes
**1. Images without dimensions**
```html
<!-- ❌ Causes layout shift when loaded -->
<img src="photo.jpg" alt="Photo">
<!-- ✅ Space reserved -->
<img src="photo.jpg" alt="Photo" width="800" height="600">
<!-- ✅ Or use aspect-ratio -->
<img src="photo.jpg" alt="Photo" style="aspect-ratio: 4/3; width: 100%;">
```
**2. Ads, embeds, and iframes**
```html
<!-- ❌ Unknown size until loaded -->
<iframe src="https://ad-network.com/ad"></iframe>
<!-- ✅ Reserve space with min-height -->
<div style="min-height: 250px;">
<iframe src="https://ad-network.com/ad" height="250"></iframe>
</div>
<!-- ✅ Or use aspect-ratio container -->
<div style="aspect-ratio: 16/9;">
<iframe src="https://youtube.com/embed/..."
style="width: 100%; height: 100%;"></iframe>
</div>
```
**3. Dynamically injected content**
```javascript
// ❌ Inserts content above viewport
notifications.prepend(newNotification);
// ✅ Insert below viewport or use transform
const insertBelow = viewport.bottom < newNotification.top;
if (insertBelow) {
notifications.prepend(newNotification);
} else {
// Animate in without shifting
newNotification.style.transform = 'translateY(-100%)';
notifications.prepend(newNotification);
requestAnimationFrame(() => {
newNotification.style.transform = '';
});
}
```
**4. Web fonts causing FOUT**
```css
/* ❌ Font swap shifts text */
@font-face {
font-family: 'Custom';
src: url('custom.woff2') format('woff2');
}
/* ✅ Optional font (no shift if slow) */
@font-face {
font-family: 'Custom';
src: url('custom.woff2') format('woff2');
font-display: optional;
}
/* ✅ Or match fallback metrics */
@font-face {
font-family: 'Custom';
src: url('custom.woff2') format('woff2');
font-display: swap;
size-adjust: 105%; /* Match fallback size */
ascent-override: 95%;
descent-override: 20%;
}
```
**5. Animations triggering layout**
```css
/* ❌ Animates layout properties */
.animate {
transition: height 0.3s, width 0.3s;
}
/* ✅ Use transform instead */
.animate {
transition: transform 0.3s;
}
.animate.expanded {
transform: scale(1.2);
}
```
### CLS optimization checklist
```markdown
- [ ] All images have width/height or aspect-ratio
- [ ] All videos/embeds have reserved space
- [ ] Ads have min-height containers
- [ ] Fonts use font-display: optional or matched metrics
- [ ] Dynamic content inserted below viewport
- [ ] Animations use transform/opacity only
- [ ] No content injected above existing content
```
### CLS debugging
```javascript
// Track layout shifts
new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
if (!entry.hadRecentInput) {
console.log('Layout shift:', entry.value);
entry.sources?.forEach(source => {
console.log(' Shifted element:', source.node);
console.log(' Previous rect:', source.previousRect);
console.log(' Current rect:', source.currentRect);
});
}
}
}).observe({ type: 'layout-shift', buffered: true });
```
---
## Measurement tools
### Lab testing
- **Chrome DevTools** → Performance panel, Lighthouse
- **WebPageTest** → Detailed waterfall, filmstrip
- **Lighthouse CLI**`npx lighthouse <url>`
### Field data (real users)
- **Chrome User Experience Report (CrUX)** → BigQuery or API
- **Search Console** → Core Web Vitals report
- **web-vitals library** → Send to your analytics
```javascript
import {onLCP, onINP, onCLS} from 'web-vitals';
function sendToAnalytics({name, value, rating}) {
gtag('event', name, {
event_category: 'Web Vitals',
value: Math.round(name === 'CLS' ? value * 1000 : value),
event_label: rating
});
}
onLCP(sendToAnalytics);
onINP(sendToAnalytics);
onCLS(sendToAnalytics);
```
---
## Framework quick fixes
### Next.js
```jsx
// LCP: Use next/image with priority
import Image from 'next/image';
<Image src="/hero.jpg" priority fill alt="Hero" />
// INP: Use dynamic imports
const HeavyComponent = dynamic(() => import('./Heavy'), { ssr: false });
// CLS: Image component handles dimensions automatically
```
### React
```jsx
// LCP: Preload in head
<link rel="preload" href="/hero.jpg" as="image" fetchpriority="high" />
// INP: Memoize and useTransition
const [isPending, startTransition] = useTransition();
startTransition(() => setExpensiveState(newValue));
// CLS: Always specify dimensions in img tags
```
### Vue/Nuxt
```vue
<!-- LCP: Use nuxt/image with preload -->
<NuxtImg src="/hero.jpg" preload loading="eager" />
<!-- INP: Use async components -->
<component :is="() => import('./Heavy.vue')" />
<!-- CLS: Use aspect-ratio CSS -->
<img :style="{ aspectRatio: '16/9' }" />
```
## References
- [web.dev LCP](https://web.dev/articles/lcp)
- [web.dev INP](https://web.dev/articles/inp)
- [web.dev CLS](https://web.dev/articles/cls)
- [Code Optimizer skill](../code-optimizer/SKILL.md)

View file

@ -1,208 +0,0 @@
# LCP optimization reference
## What is LCP?
Largest Contentful Paint (LCP) measures when the largest content element in the viewport becomes visible. This is typically:
- An `<img>` element
- An `<image>` element inside `<svg>`
- A `<video>` element with poster image
- An element with a background image via `url()`
- A block-level element containing text nodes
## LCP timeline
```
[ Server Response ][ Resource Load ][ Render ]
TTFB Download Paint
└─────────────────────────────────────┘
LCP Time
```
## Detailed optimizations
### 1. Server response time (TTFB)
Target: < 800ms
**Causes:**
- Slow server/database queries
- No CDN/edge caching
- Inefficient backend code
- Cold starts (serverless)
**Solutions:**
```javascript
// Use edge functions for dynamic content
// Vercel example
export const config = { runtime: 'edge' };
// Use stale-while-revalidate caching
// Cache-Control header
res.setHeader('Cache-Control', 's-maxage=60, stale-while-revalidate=300');
```
### 2. Resource load time
**For images:**
```html
<!-- Preload LCP image -->
<link rel="preload" as="image" href="/hero.webp"
imagesrcset="/hero-400.webp 400w, /hero-800.webp 800w"
imagesizes="100vw"
fetchpriority="high">
<!-- Modern format with fallback -->
<picture>
<source srcset="/hero.avif" type="image/avif">
<source srcset="/hero.webp" type="image/webp">
<img src="/hero.jpg" width="1200" height="600"
fetchpriority="high" alt="Hero">
</picture>
```
**For text (web fonts):**
```css
@font-face {
font-family: 'Heading';
src: url('/fonts/heading.woff2') format('woff2');
font-display: swap; /* Show fallback immediately */
}
```
### 3. Render blocking resources
**Critical CSS pattern:**
```html
<head>
<!-- Inline critical CSS -->
<style>
/* Only above-fold styles, < 14KB */
.hero { /* ... */ }
.nav { /* ... */ }
</style>
<!-- Defer non-critical CSS -->
<link rel="preload" href="/styles.css" as="style"
onload="this.onload=null;this.rel='stylesheet'">
</head>
```
**Defer JavaScript:**
```html
<!-- ❌ Blocks parsing -->
<script src="/app.js"></script>
<!-- ✅ Deferred (runs after HTML parsed) -->
<script defer src="/app.js"></script>
<!-- ✅ Module (deferred by default) -->
<script type="module" src="/app.mjs"></script>
```
### 4. Client-side rendering
**Problem:** Content not in initial HTML.
**Solutions:**
**Server-side rendering (SSR):**
```javascript
// Next.js
export async function getServerSideProps() {
const data = await fetchHeroContent();
return { props: { hero: data } };
}
```
**Static site generation (SSG):**
```javascript
// Next.js
export async function getStaticProps() {
const data = await fetchHeroContent();
return { props: { hero: data }, revalidate: 3600 };
}
```
**Streaming SSR:**
```jsx
// React 18+
import { Suspense } from 'react';
function Page() {
return (
<Suspense fallback={<HeroSkeleton />}>
<Hero />
</Suspense>
);
}
```
## Framework-specific tips
### Next.js
```jsx
import Image from 'next/image';
// LCP image with priority
<Image
src="/hero.jpg"
priority
fill
sizes="100vw"
alt="Hero"
/>
```
### Nuxt
```vue
<NuxtImg
src="/hero.jpg"
preload
loading="eager"
sizes="100vw"
/>
```
### Astro
```astro
---
import { Image } from 'astro:assets';
import hero from '../assets/hero.jpg';
---
<Image
src={hero}
loading="eager"
decoding="sync"
alt="Hero"
/>
```
## Debugging LCP
```javascript
// Identify LCP element
new PerformanceObserver((entryList) => {
const entries = entryList.getEntries();
const lastEntry = entries[entries.length - 1];
console.log('LCP:', {
element: lastEntry.element,
time: lastEntry.startTime,
size: lastEntry.size,
url: lastEntry.url,
renderTime: lastEntry.renderTime,
loadTime: lastEntry.loadTime
});
}).observe({ type: 'largest-contentful-paint', buffered: true });
```
## Common issues
| Issue | Impact | Fix |
|-------|--------|-----|
| No preload for LCP image | +500-1000ms | Add `<link rel="preload">` |
| Large unoptimized image | +300-800ms | Compress, use WebP/AVIF |
| Render-blocking CSS | +200-500ms | Inline critical CSS |
| Slow TTFB | +300-2000ms | CDN, edge caching |
| Client-rendered content | +500-2000ms | SSR/SSG |

View file

@ -1,93 +0,0 @@
---
name: create-sf-extension
description: Create, debug, and iterate on SF extensions (TypeScript modules that add tools, commands, event hooks, custom UI, and providers to SF). Use when asked to build an extension, add a tool the LLM can call, register a slash command, hook into SF events, create custom TUI components, or modify SF behavior. Triggers on "create extension", "build extension", "add a tool", "register command", "hook into sf", "custom tool", "sf plugin", "sf extension".
---
<essential_principles>
**Extensions are TypeScript modules** that hook into SF's runtime (built on pi). They export a default function receiving `ExtensionAPI` and use it to subscribe to events, register tools/commands/shortcuts, and interact with the session.
**SF extension paths (community/user-installed extensions):**
- Global: `~/.pi/agent/extensions/*.ts` or `~/.pi/agent/extensions/*/index.ts`
- Project-local: `.sf/extensions/*.ts` or `.sf/extensions/*/index.ts`
Note: `~/.sf/agent/extensions/` is reserved for bundled extensions synced from the sf package. Community extensions placed there are silently ignored by the loader.
**The three primitives:**
1. **Events** — Listen and react (`pi.on("event", handler)`). Can block tool calls, modify messages, inject context.
2. **Tools** — Give the LLM new abilities (`pi.registerTool()`). LLM calls them autonomously.
3. **Commands** — Give users slash commands (`pi.registerCommand()`). Users type `/mycommand`.
**Non-negotiable rules:**
- Use `StringEnum` from `@singularity-forge/ai` for string enum params (NOT `Type.Union`/`Type.Literal` — breaks Google's API)
- Truncate tool output to 50KB / 2000 lines max (use `truncateHead`/`truncateTail` from `@singularity-forge/coding-agent`)
- Store stateful tool state in `details` for branching support
- Check `signal?.aborted` in long-running tool executions
- Use `pi.exec()` not `child_process` for shell commands
- Check `ctx.hasUI` before dialog methods (non-interactive modes exist)
- Session control methods (`waitForIdle`, `newSession`, `fork`, `navigateTree`, `reload`) are ONLY available in command handlers — they deadlock in event handlers
- Lines from `render()` must not exceed `width` — use `truncateToWidth()`
- Use theme from callback params, never import directly
- Strip leading `@` from path params in custom tools (some models add it)
**Available imports:**
| Package | Purpose |
|---------|---------|
| `@singularity-forge/coding-agent` | `ExtensionAPI`, `ExtensionContext`, `Theme`, event types, tool utilities, `DynamicBorder`, `BorderedLoader`, `CustomEditor`, `highlightCode` |
| `@sinclair/typebox` | `Type.Object`, `Type.String`, `Type.Number`, `Type.Optional`, `Type.Boolean`, `Type.Array` |
| `@singularity-forge/ai` | `StringEnum` (required for string enums), `Type` re-export |
| `@singularity-forge/tui` | `Text`, `Box`, `Container`, `Spacer`, `Markdown`, `SelectList`, `Input`, `matchesKey`, `Key`, `truncateToWidth`, `visibleWidth` |
| Node.js built-ins | `node:fs`, `node:path`, `node:child_process`, etc. |
</essential_principles>
<routing>
Based on user intent, route to the appropriate workflow:
**Building a new extension:**
- "Create an extension", "build a tool", "I want to add a command" → `workflows/create-extension.md`
**Adding capabilities to an existing extension:**
- "Add a tool to my extension", "add event hook", "add custom rendering" → `workflows/add-capability.md`
**Debugging an extension:**
- "My extension doesn't work", "tool not showing up", "event not firing" → `workflows/debug-extension.md`
**If user intent is clear from context, skip the question and go directly to the workflow.**
</routing>
<reference_index>
All domain knowledge in `references/`:
**Core architecture:** extension-lifecycle.md, events-reference.md
**API surface:** extensionapi-reference.md, extensioncontext-reference.md
**Capabilities:** custom-tools.md, custom-commands.md, custom-ui.md, custom-rendering.md
**Patterns:** state-management.md, system-prompt-modification.md, compaction-session-control.md
**Infrastructure:** model-provider-management.md, remote-execution-overrides.md, packaging-distribution.md, mode-behavior.md
**Spec:** `docs/extension-sdk/manifest-spec.md` — manifest format, tiers, validation
**Testing:** `docs/extension-sdk/testing.md` — mock patterns, test conventions
**SDK:** `docs/extension-sdk/` — the authoritative SF extension guide
**Gotchas:** key-rules-gotchas.md
</reference_index>
<workflows_index>
| Workflow | Purpose |
|----------|---------|
| create-extension.md | Build a new extension from scratch |
| add-capability.md | Add tools, commands, hooks, UI to an existing extension |
| debug-extension.md | Diagnose and fix extension issues |
</workflows_index>
<success_criteria>
Extension is complete when:
- `extension-manifest.json` exists with accurate `provides` listing all registered tools/commands/hooks/shortcuts
- TypeScript compiles without errors (jiti handles this at runtime)
- Extension loads on SF startup or `/reload` without errors
- Tools appear in the LLM's system prompt and are callable
- Commands respond to `/command` input
- Event hooks fire at the expected lifecycle points
- Custom UI renders correctly within terminal width
- State persists correctly across session restarts (if stateful)
- Output is truncated to safe limits (if tools produce variable output)
</success_criteria>

View file

@ -1,77 +0,0 @@
<overview>
Custom compaction hooks, triggering compaction, and session control methods available only in command handlers.
</overview>
<custom_compaction>
Override default compaction behavior:
```typescript
pi.on("session_before_compact", async (event, ctx) => {
const { preparation, branchEntries, customInstructions, signal } = event;
// Option 1: Cancel
return { cancel: true };
// Option 2: Custom summary
return {
compaction: {
summary: "Custom summary of conversation so far...",
firstKeptEntryId: preparation.firstKeptEntryId,
tokensBefore: preparation.tokensBefore,
}
};
});
```
</custom_compaction>
<trigger_compaction>
Trigger compaction programmatically from any handler:
```typescript
ctx.compact({
customInstructions: "Focus on the authentication changes",
onComplete: (result) => ctx.ui.notify("Compacted!", "info"),
onError: (error) => ctx.ui.notify(`Failed: ${error.message}`, "error"),
});
```
</trigger_compaction>
<session_control>
**Only available in command handlers** (deadlocks in event handlers):
```typescript
pi.registerCommand("handoff", {
handler: async (args, ctx) => {
await ctx.waitForIdle();
// Create new session with initial context
const result = await ctx.newSession({
parentSession: ctx.sessionManager.getSessionFile(),
setup: async (sm) => {
sm.appendMessage({
role: "user",
content: [{ type: "text", text: `Context: ${args}` }],
timestamp: Date.now(),
});
},
});
if (result.cancelled) { /* extension cancelled via session_before_switch */ }
},
});
```
| Method | Purpose |
|--------|---------|
| `ctx.waitForIdle()` | Wait for agent to finish streaming |
| `ctx.newSession(options?)` | Create a new session |
| `ctx.fork(entryId)` | Fork from a specific entry |
| `ctx.navigateTree(targetId, options?)` | Navigate session tree (with optional summary) |
| `ctx.reload()` | Hot-reload everything (treat as terminal — code after runs pre-reload version) |
`navigateTree` options:
- `summarize: boolean` — generate summary of abandoned branch
- `customInstructions: string` — instructions for summarizer
- `replaceInstructions: boolean` — replace default prompt entirely
- `label: string` — label to attach to branch summary
</session_control>

View file

@ -1,139 +0,0 @@
<overview>
Custom slash commands — registration, argument completions, subcommand patterns, and the extended command context.
</overview>
<basic_registration>
```typescript
pi.registerCommand("deploy", {
description: "Deploy to an environment",
handler: async (args, ctx) => {
// args = everything after "/deploy "
// ctx = ExtensionCommandContext (has session control methods)
ctx.ui.notify(`Deploying to ${args || "production"}`, "info");
},
});
```
</basic_registration>
<argument_completions>
Add tab-completion for command arguments:
```typescript
import type { AutocompleteItem } from "@singularity-forge/tui";
pi.registerCommand("deploy", {
description: "Deploy to an environment",
getArgumentCompletions: (prefix: string): AutocompleteItem[] | null => {
const envs = ["dev", "staging", "prod"];
const items = envs.map(e => ({ value: e, label: e }));
const filtered = items.filter(i => i.value.startsWith(prefix));
return filtered.length > 0 ? filtered : null;
},
handler: async (args, ctx) => {
ctx.ui.notify(`Deploying to ${args}`, "info");
},
});
```
</argument_completions>
<subcommand_pattern>
Fake nested commands via first-argument parsing. Used by `/wt new|ls|switch|merge|rm`.
```typescript
pi.registerCommand("foo", {
description: "Manage foo items: /foo new|list|delete [name]",
getArgumentCompletions: (prefix: string) => {
const parts = prefix.trim().split(/\s+/);
// First arg: subcommand
if (parts.length <= 1) {
return ["new", "list", "delete"]
.filter(cmd => cmd.startsWith(parts[0] ?? ""))
.map(cmd => ({ value: cmd, label: cmd }));
}
// Second arg: depends on subcommand
if (parts[0] === "delete") {
const items = getItemsSomehow();
return items
.filter(name => name.startsWith(parts[1] ?? ""))
.map(name => ({ value: `delete ${name}`, label: name }));
}
return [];
},
handler: async (args, ctx) => {
const parts = args.trim().split(/\s+/);
const sub = parts[0];
switch (sub) {
case "new": /* ... */ return;
case "list": /* ... */ return;
case "delete": /* handle parts[1] */ return;
default:
ctx.ui.notify("Usage: /foo <new|list|delete> [name]", "info");
}
},
});
```
**Gotcha:** `"".trim().split(/\s+/)` produces `['']`, not `[]`. That's why `parts.length <= 1` handles both empty and partial first arg.
</subcommand_pattern>
<command_context>
Command handlers get `ExtensionCommandContext` which extends `ExtensionContext` with session control methods:
| Method | Purpose |
|--------|---------|
| `ctx.waitForIdle()` | Wait for agent to finish streaming |
| `ctx.newSession(options?)` | Create a new session |
| `ctx.fork(entryId)` | Fork from an entry |
| `ctx.navigateTree(targetId, options?)` | Navigate session tree |
| `ctx.reload()` | Hot-reload everything |
**⚠️ These methods are ONLY available in command handlers.** Calling them from event handlers causes deadlocks.
```typescript
pi.registerCommand("handoff", {
handler: async (args, ctx) => {
await ctx.waitForIdle();
await ctx.newSession({
setup: async (sm) => {
sm.appendMessage({
role: "user",
content: [{ type: "text", text: `Context: ${args}` }],
timestamp: Date.now(),
});
},
});
},
});
```
</command_context>
<reload_pattern>
Expose reload as both a command and a tool the LLM can call:
```typescript
pi.registerCommand("reload-runtime", {
description: "Reload extensions, skills, prompts, and themes",
handler: async (_args, ctx) => {
await ctx.reload();
return; // Treat reload as terminal
},
});
pi.registerTool({
name: "reload_runtime",
label: "Reload Runtime",
description: "Reload extensions, skills, prompts, and themes",
parameters: Type.Object({}),
async execute() {
pi.sendUserMessage("/reload-runtime", { deliverAs: "followUp" });
return { content: [{ type: "text", text: "Queued /reload-runtime as follow-up." }] };
},
});
```
</reload_pattern>

View file

@ -1,108 +0,0 @@
<overview>
Custom rendering for tools and messages — control how they appear in the TUI.
</overview>
<tool_rendering>
Tools can provide `renderCall` (how the call looks) and `renderResult` (how the result looks):
```typescript
import { Text } from "@singularity-forge/tui";
import { keyHint } from "@singularity-forge/coding-agent";
pi.registerTool({
name: "my_tool",
// ...
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("my_tool "));
text += theme.fg("muted", args.action);
if (args.text) text += " " + theme.fg("dim", `"${args.text}"`);
return new Text(text, 0, 0); // 0,0 padding — Box handles it
},
renderResult(result, { expanded, isPartial }, theme) {
// isPartial = true during streaming (onUpdate was called)
if (isPartial) {
return new Text(theme.fg("warning", "Processing..."), 0, 0);
}
// expanded = user toggled expand (Ctrl+O)
if (result.details?.error) {
return new Text(theme.fg("error", `Error: ${result.details.error}`), 0, 0);
}
let text = theme.fg("success", "✓ Done");
if (!expanded) {
text += ` (${keyHint("expandTools", "to expand")})`;
}
if (expanded && result.details?.items) {
for (const item of result.details.items) {
text += "\n " + theme.fg("dim", item);
}
}
return new Text(text, 0, 0);
},
});
```
If you omit `renderCall`/`renderResult`, the built-in renderer is used. Useful for tool overrides where you just wrap logic without reimplementing UI.
**Fallback:** If render methods throw, `renderCall` shows tool name, `renderResult` shows raw `content` text.
</tool_rendering>
<key_hints>
Key hint helpers for showing keybinding info in render output:
```typescript
import { keyHint, appKeyHint, editorKey, rawKeyHint } from "@singularity-forge/coding-agent";
// Editor action hint (respects user keybinding config)
keyHint("expandTools", "to expand") // e.g., "Ctrl+O to expand"
keyHint("selectConfirm", "to select")
// Raw key hint (always shows literal key)
rawKeyHint("Ctrl+O", "to expand")
```
</key_hints>
<message_rendering>
Register a renderer for custom message types:
```typescript
import { Text } from "@singularity-forge/tui";
pi.registerMessageRenderer("my-extension", (message, options, theme) => {
const { expanded } = options;
let text = theme.fg("accent", `[${message.customType}] `) + message.content;
if (expanded && message.details) {
text += "\n" + theme.fg("dim", JSON.stringify(message.details, null, 2));
}
return new Text(text, 0, 0);
});
// Send messages that use this renderer:
pi.sendMessage({
customType: "my-extension", // Matches renderer name
content: "Status update",
display: true,
details: { foo: "bar" },
});
```
</message_rendering>
<syntax_highlighting>
```typescript
import { highlightCode, getLanguageFromPath } from "@singularity-forge/coding-agent";
const lang = getLanguageFromPath("/path/to/file.rs"); // "rust"
const highlighted = highlightCode(code, lang, theme);
```
</syntax_highlighting>
<best_practices>
- Return `Text` with padding `(0, 0)` — the wrapping `Box` handles padding
- Support `expanded` for detail on demand
- Handle `isPartial` for streaming progress
- Keep collapsed view compact
- Use `\n` for multi-line content within a single `Text`
</best_practices>

View file

@ -1,183 +0,0 @@
<overview>
Complete custom tools reference — registration, parameters, execution, output truncation, overrides, rendering, and dynamic registration.
</overview>
<registration>
```typescript
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/ai";
pi.registerTool({
name: "my_tool", // Unique identifier (snake_case)
label: "My Tool", // Display name in TUI
description: "What this does", // Full description shown to LLM
// Optional: one-liner for system prompt "Available tools" section
promptSnippet: "Manage project todo items",
// Optional: bullets added to system prompt "Guidelines" when tool is active
promptGuidelines: [
"Use my_tool for task management instead of file edits."
],
// Parameter schema (MUST use TypeBox)
parameters: Type.Object({
action: StringEnum(["list", "add", "remove"] as const),
text: Type.Optional(Type.String({ description: "Item text" })),
id: Type.Optional(Type.Number({ description: "Item ID" })),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
// 1. Check cancellation
if (signal?.aborted) {
return { content: [{ type: "text", text: "Cancelled" }] };
}
// 2. Stream progress (optional)
onUpdate?.({
content: [{ type: "text", text: "Working..." }],
details: { progress: 50 },
});
// 3. Do the work
const result = await doWork(params);
// 4. Return result
return {
content: [{ type: "text", text: "Result text for LLM" }], // Sent to LLM context
details: { data: result }, // For rendering & state
};
},
// Optional: custom TUI rendering
renderCall(args, theme) { ... },
renderResult(result, { expanded, isPartial }, theme) { ... },
});
```
</registration>
<critical_stringenum>
**⚠️ MUST use `StringEnum` for string enum parameters:**
```typescript
import { StringEnum } from "@singularity-forge/ai";
// ✅ Correct — works with all providers including Google
action: StringEnum(["list", "add", "remove"] as const)
// ❌ BROKEN with Google's API
action: Type.Union([Type.Literal("list"), Type.Literal("add")])
```
</critical_stringenum>
<output_truncation>
Tools MUST truncate output to avoid context overflow. Built-in limit: 50KB / 2000 lines.
```typescript
import {
truncateHead, truncateTail, formatSize,
DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES,
} from "@singularity-forge/coding-agent";
async execute(toolCallId, params, signal, onUpdate, ctx) {
const output = await runCommand();
const truncation = truncateHead(output, {
maxLines: DEFAULT_MAX_LINES,
maxBytes: DEFAULT_MAX_BYTES,
});
let result = truncation.content;
if (truncation.truncated) {
const tempFile = writeTempFile(output);
result += `\n\n[Output truncated: ${truncation.outputLines}/${truncation.totalLines} lines`;
result += ` (${formatSize(truncation.outputBytes)}/${formatSize(truncation.totalBytes)}).`;
result += ` Full output: ${tempFile}]`;
}
return { content: [{ type: "text", text: result }] };
}
```
Use `truncateHead` when beginning matters (search results, file reads). Use `truncateTail` when end matters (logs, command output).
</output_truncation>
<signaling_errors>
Throw to signal an error (sets `isError: true`). Returning a value never sets error flag.
```typescript
async execute(toolCallId, params) {
if (!isValid(params.input)) {
throw new Error(`Invalid input: ${params.input}`);
}
return { content: [{ type: "text", text: "OK" }], details: {} };
}
```
</signaling_errors>
<dynamic_registration>
Tools can be registered at any time — during load, in `session_start`, in command handlers. Available immediately without `/reload`.
```typescript
pi.on("session_start", async (_event, ctx) => {
pi.registerTool({ name: "dynamic_tool", ... });
});
```
Use `pi.setActiveTools(names)` to enable/disable tools at runtime.
</dynamic_registration>
<overriding_builtins>
Register a tool with the same name as a built-in (`read`, `bash`, `edit`, `write`, `grep`, `find`, `ls`) to override it. **Must match exact result shape including `details` type.**
```typescript
import { createReadTool } from "@singularity-forge/coding-agent";
pi.registerTool({
name: "read",
label: "Read (Logged)",
description: "Read file contents with logging",
parameters: Type.Object({
path: Type.String(),
offset: Type.Optional(Type.Number()),
limit: Type.Optional(Type.Number()),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
console.log(`[AUDIT] Reading: ${params.path}`);
const builtIn = createReadTool(ctx.cwd);
return builtIn.execute(toolCallId, params, signal, onUpdate);
},
// Omit renderCall/renderResult to use built-in renderer
});
```
Start with no built-in tools: `sf --no-tools -e ./my-extension.ts`
</overriding_builtins>
<multiple_tools>
One extension can register multiple tools with shared state:
```typescript
export default function (pi: ExtensionAPI) {
let connection = null;
pi.registerTool({ name: "db_connect", ... });
pi.registerTool({ name: "db_query", ... });
pi.registerTool({ name: "db_close", ... });
pi.on("session_shutdown", async () => {
connection?.close();
});
}
```
</multiple_tools>
<path_normalization>
Some models add `@` prefix to path arguments. Strip it:
```typescript
async execute(toolCallId, params, signal, onUpdate, ctx) {
let path = params.path;
if (path.startsWith("@")) path = path.slice(1);
// ...
}
```
</path_normalization>

View file

@ -1,490 +0,0 @@
<overview>
Complete custom UI reference — dialogs, persistent elements, custom components, overlays, custom editors, built-in components, keyboard input, performance, theming, and common mistakes.
</overview>
<ui_architecture>
```
┌─────────────────────────────────────────────────┐
│ Custom Header (ctx.ui.setHeader) │
├─────────────────────────────────────────────────┤
│ Message Area │
│ - User/assistant messages │
│ - Tool calls ◄── renderCall/renderResult │
│ - Custom messages ◄── registerMessageRenderer │
├─────────────────────────────────────────────────┤
│ Widgets (above editor) ◄── ctx.ui.setWidget │
├─────────────────────────────────────────────────┤
│ Editor ◄── ctx.ui.custom() / setEditorComponent│
├─────────────────────────────────────────────────┤
│ Widgets (below editor) ◄── ctx.ui.setWidget │
├─────────────────────────────────────────────────┤
│ Footer ◄── ctx.ui.setFooter / setStatus │
└─────────────────────────────────────────────────┘
┌─────────────────────┐
│ Overlay (floating) │ ◄── ctx.ui.custom({ overlay })
└─────────────────────┘
```
**11 ways to get UI on screen:**
| Method | Blocks? | Replaces editor? |
|--------|---------|-------------------|
| `ctx.ui.select/confirm/input/editor` | Yes | Temporarily |
| `ctx.ui.notify` | No | No |
| `ctx.ui.setStatus` | No | No (footer) |
| `ctx.ui.setWidget` | No | No |
| `ctx.ui.setFooter` | No | No (replaces footer) |
| `ctx.ui.setHeader` | No | No (replaces header) |
| `ctx.ui.custom()` | Yes | Temporarily |
| `ctx.ui.custom({overlay})` | Yes | No (renders on top) |
| `ctx.ui.setEditorComponent` | No | Yes (permanently) |
| `renderCall/renderResult` | No | No (inline in messages) |
| `registerMessageRenderer` | No | No (inline in messages) |
</ui_architecture>
<component_interface>
Every visual element implements:
```typescript
interface Component {
render(width: number): string[]; // Required — each line ≤ width visible chars
handleInput?(data: string): void; // Optional — receive keyboard input
wantsKeyRelease?: boolean; // Optional — receive key release events (Kitty protocol)
invalidate(): void; // Required — clear cached render state
}
```
**Render contract:**
- Return array of strings, one per line
- Each string MUST NOT exceed `width` in visible characters
- ANSI escape codes don't count toward visible width
- **Styles are reset at end of each line** — reapply per line
- Return `[]` for zero-height component
**Invalidation contract:**
- Clear ALL cached render output
- Clear any pre-baked themed strings
- Call `super.invalidate()` if extending a built-in component
</component_interface>
<dialogs>
Blocking dialog methods on `ctx.ui`:
```typescript
const choice = await ctx.ui.select("Pick one:", ["A", "B", "C"]); // string | undefined
const ok = await ctx.ui.confirm("Delete?", "This cannot be undone"); // boolean
const name = await ctx.ui.input("Name:", "placeholder"); // string | undefined
const text = await ctx.ui.editor("Edit:", "prefilled text"); // string | undefined
// Timed auto-dismiss with countdown
const ok = await ctx.ui.confirm("Proceed?", "Auto-continues in 5s", { timeout: 5000 });
// Returns false on timeout, undefined for select/input
// Manual dismissal with AbortSignal (distinguish timeout from cancel)
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 5000);
const ok = await ctx.ui.confirm("Timed", "Auto-cancels in 5s", { signal: controller.signal });
clearTimeout(timeoutId);
if (controller.signal.aborted) { /* timed out */ }
```
</dialogs>
<persistent_ui>
```typescript
// Footer status (multiple extensions can set independent entries)
ctx.ui.setStatus("my-ext", "● Active");
ctx.ui.setStatus("my-ext", undefined); // Clear
// Widgets
ctx.ui.setWidget("my-id", ["Line 1", "Line 2"]); // Above editor
ctx.ui.setWidget("my-id", ["Below"], { placement: "belowEditor" }); // Below editor
ctx.ui.setWidget("my-id", (_tui, theme) => ({ // Themed
render: () => [theme.fg("accent", "Styled")],
invalidate: () => {},
}));
ctx.ui.setWidget("my-id", undefined); // Clear
// Working message during streaming
ctx.ui.setWorkingMessage("Analyzing code...");
ctx.ui.setWorkingMessage(); // Restore default
// Custom footer (full replacement)
ctx.ui.setFooter((tui, theme, footerData) => ({
render(width) {
const branch = footerData.getGitBranch(); // Only available here
const statuses = footerData.getExtensionStatuses(); // All setStatus values
return [truncateToWidth(`${branch} | model`, width)];
},
invalidate() {},
dispose: footerData.onBranchChange(() => tui.requestRender()), // Reactive
}));
ctx.ui.setFooter(undefined); // Restore default
// Custom header
ctx.ui.setHeader((tui, theme) => ({
render(width) { return [theme.fg("accent", theme.bold("My Header"))]; },
invalidate() {},
}));
// Editor control
ctx.ui.setEditorText("Prefill");
const current = ctx.ui.getEditorText();
ctx.ui.pasteToEditor("pasted content"); // Triggers paste handling
// Tool expansion
ctx.ui.setToolsExpanded(true);
const expanded = ctx.ui.getToolsExpanded();
// Theme management
const themes = ctx.ui.getAllThemes();
ctx.ui.setTheme("light");
ctx.ui.theme.fg("accent", "text"); // Access current theme
```
</persistent_ui>
<custom_components>
`ctx.ui.custom()` temporarily replaces the editor. Returns a value when `done()` is called.
**Factory callback args:**
| Argument | Type | Purpose |
|----------|------|---------|
| `tui` | `TUI` | `tui.requestRender()` triggers re-render after state changes |
| `theme` | `Theme` | Current theme for styling |
| `keybindings` | `KeybindingsManager` | App keybinding config |
| `done` | `(value: T) => void` | Close component and return value |
**Inline pattern:**
```typescript
const result = await ctx.ui.custom<string | null>((tui, theme, keybindings, done) => ({
render(width: number): string[] {
return [truncateToWidth("Press Enter to confirm, Escape to cancel", width)];
},
handleInput(data: string) {
if (matchesKey(data, Key.enter)) done("confirmed");
if (matchesKey(data, Key.escape)) done(null);
},
invalidate() {},
}));
```
**Class-based pattern (recommended for complex UI):**
```typescript
class MyComponent {
private selected = 0;
private cachedWidth?: number;
private cachedLines?: string[];
constructor(
private tui: { requestRender: () => void },
private theme: Theme,
private items: string[],
private done: (value: string | null) => void,
) {}
handleInput(data: string) {
if (matchesKey(data, Key.up) && this.selected > 0) this.selected--;
else if (matchesKey(data, Key.down) && this.selected < this.items.length - 1) this.selected++;
else if (matchesKey(data, Key.enter)) { this.done(this.items[this.selected]); return; }
else if (matchesKey(data, Key.escape)) { this.done(null); return; }
else return;
this.invalidate();
this.tui.requestRender();
}
render(width: number): string[] {
if (this.cachedLines && this.cachedWidth === width) return this.cachedLines;
this.cachedLines = this.items.map((item, i) =>
truncateToWidth((i === this.selected ? "> " : " ") + item, width)
);
this.cachedWidth = width;
return this.cachedLines;
}
invalidate() { this.cachedWidth = undefined; this.cachedLines = undefined; }
}
const result = await ctx.ui.custom<string | null>((tui, theme, _kb, done) =>
new MyComponent(tui, theme, ["A", "B", "C"], done)
);
```
**Composing with built-in components:**
```typescript
const result = await ctx.ui.custom<string | null>((tui, theme, _kb, done) => {
const container = new Container();
container.addChild(new DynamicBorder((s: string) => theme.fg("accent", s)));
container.addChild(new Text(theme.fg("accent", theme.bold("Title")), 1, 0));
const selectList = new SelectList(items, 10, {
selectedPrefix: (t) => theme.fg("accent", t),
selectedText: (t) => theme.fg("accent", t),
description: (t) => theme.fg("muted", t),
scrollInfo: (t) => theme.fg("dim", t),
noMatch: (t) => theme.fg("warning", t),
});
selectList.onSelect = (item) => done(item.value);
selectList.onCancel = () => done(null);
container.addChild(selectList);
return {
render: (w) => container.render(w),
invalidate: () => container.invalidate(),
handleInput: (data) => { selectList.handleInput(data); tui.requestRender(); },
};
});
```
</custom_components>
<overlays>
Floating modals rendered on top of everything:
```typescript
const result = await ctx.ui.custom<string | null>(
(tui, theme, _kb, done) => new MyDialog({ onClose: done }),
{
overlay: true,
overlayOptions: {
anchor: "center", // 9 positions (see below)
width: "50%", // number = columns, string = percentage
minWidth: 40,
maxHeight: "80%",
margin: 2, // All sides, or { top, right, bottom, left }
offsetX: 0, offsetY: 0, // Fine-tune position
visible: (w, h) => w >= 80, // Hide on narrow terminals
},
onHandle: (handle) => {
// handle.setHidden(true/false) — temporarily hide
// handle.hide() — permanently remove
},
}
);
```
**Anchor positions:**
```
top-left top-center top-right
left-center center right-center
bottom-left bottom-center bottom-right
```
**Stacked overlays:** Multiple overlays stack (newest on top). Closing one gives focus to the one below.
**⚠️ Overlay lifecycle:** Components are disposed when closed. Never reuse references — create fresh instances each time.
</overlays>
<custom_editor>
Replace the main input editor permanently:
```typescript
import { CustomEditor } from "@singularity-forge/coding-agent";
class VimEditor extends CustomEditor {
private mode: "normal" | "insert" = "insert";
handleInput(data: string): void {
if (matchesKey(data, "escape") && this.mode === "insert") {
this.mode = "normal"; return;
}
if (this.mode === "insert") { super.handleInput(data); return; }
switch (data) {
case "i": this.mode = "insert"; return;
case "h": super.handleInput("\x1b[D"); return; // Left
case "j": super.handleInput("\x1b[B"); return; // Down
case "k": super.handleInput("\x1b[A"); return; // Up
case "l": super.handleInput("\x1b[C"); return; // Right
}
if (data.length === 1 && data.charCodeAt(0) >= 32) return; // Block printable in normal
super.handleInput(data);
}
}
ctx.ui.setEditorComponent((_tui, theme, keybindings) => new VimEditor(theme, keybindings));
ctx.ui.setEditorComponent(undefined); // Restore default
```
**Critical:** Extend `CustomEditor` (NOT `Editor`) to get app keybindings (escape to abort, ctrl+d, model switching).
</custom_editor>
<built_in_components>
**From `@singularity-forge/tui`:**
| Component | Constructor | Purpose |
|-----------|-------------|---------|
| `Text` | `new Text(content, paddingX, paddingY, bgFn?)` | Multi-line text with word wrap |
| `Box` | `new Box(paddingX, paddingY, bgFn)` | Container with padding+background, `.addChild()` |
| `Container` | `new Container()` | Vertical stack, `.addChild()`, `.removeChild()`, `.clear()` |
| `Spacer` | `new Spacer(lines)` | Empty vertical space |
| `Markdown` | `new Markdown(content, padX, padY, getMarkdownTheme())` | Rendered markdown with syntax highlighting |
| `Image` | `new Image(base64, mimeType, theme, opts?)` | Image rendering (Kitty, iTerm2) |
| `SelectList` | `new SelectList(items, maxVisible, themeOpts)` | Interactive selection with search and scrolling |
| `SettingsList` | `new SettingsList(items, maxVisible, theme, onChange, onClose, opts?)` | Toggle settings with left/right arrows |
| `Input` | `new Input()` | Text input field |
| `Editor` | `new Editor(tui, editorTheme)` | Multi-line editor with undo |
**SelectList usage:**
```typescript
const items: SelectItem[] = [
{ value: "opt1", label: "Option 1", description: "First option" },
{ value: "opt2", label: "Option 2" },
];
const selectList = new SelectList(items, 10, {
selectedPrefix: (t) => theme.fg("accent", t),
selectedText: (t) => theme.fg("accent", t),
description: (t) => theme.fg("muted", t),
scrollInfo: (t) => theme.fg("dim", t),
noMatch: (t) => theme.fg("warning", t),
});
selectList.onSelect = (item) => { /* item.value */ };
selectList.onCancel = () => { /* escape pressed */ };
```
**SettingsList usage:**
```typescript
const items: SettingItem[] = [
{ id: "verbose", label: "Verbose mode", currentValue: "off", values: ["on", "off"] },
{ id: "theme", label: "Theme", currentValue: "dark", values: ["dark", "light", "auto"] },
];
const settings = new SettingsList(items, 15, getSettingsListTheme(),
(id, newValue) => { /* setting changed */ },
() => { /* close requested */ },
{ enableSearch: true },
);
```
**From `@singularity-forge/coding-agent`:**
| Component | Constructor | Purpose |
|-----------|-------------|---------|
| `DynamicBorder` | `new DynamicBorder((s: string) => theme.fg("accent", s))` | Border line |
| `BorderedLoader` | — | Spinner with cancel support |
| `CustomEditor` | `new CustomEditor(theme, keybindings)` | Base class for custom editors |
</built_in_components>
<keyboard_input>
```typescript
import { matchesKey, Key } from "@singularity-forge/tui";
handleInput(data: string) {
// Basic keys
if (matchesKey(data, Key.up)) {}
if (matchesKey(data, Key.down)) {}
if (matchesKey(data, Key.enter)) {}
if (matchesKey(data, Key.escape)) {}
if (matchesKey(data, Key.tab)) {}
if (matchesKey(data, Key.space)) {}
if (matchesKey(data, Key.backspace)) {}
if (matchesKey(data, Key.home)) {}
if (matchesKey(data, Key.end)) {}
// With modifiers
if (matchesKey(data, Key.ctrl("c"))) {}
if (matchesKey(data, Key.shift("tab"))) {}
if (matchesKey(data, Key.alt("left"))) {}
if (matchesKey(data, Key.ctrlShift("p"))) {}
// String format also works: "enter", "ctrl+c", "shift+tab"
// Printable character detection
if (data.length === 1 && data.charCodeAt(0) >= 32) {
// Letter, number, symbol
}
}
```
**handleInput contract:**
1. Check for your keys
2. Update state
3. Call `this.invalidate()` if render output changes
4. Call `tui.requestRender()` to trigger re-render
</keyboard_input>
<line_width_rule>
**Cardinal rule: each line from render() must not exceed `width` visible characters.**
```typescript
import { visibleWidth, truncateToWidth, wrapTextWithAnsi } from "@singularity-forge/tui";
visibleWidth("\x1b[32mHello\x1b[0m"); // Returns 5 (ignores ANSI codes)
truncateToWidth("Very long text here", 10); // "Very lo..."
truncateToWidth("Very long text here", 10, ""); // "Very long " (no ellipsis)
wrapTextWithAnsi("\x1b[32mLong green text\x1b[0m", 10); // Word wrap preserving ANSI
```
If lines exceed `width`, terminal wraps cause visual corruption.
</line_width_rule>
<performance_caching>
Always cache render output:
```typescript
class CachedComponent {
private cachedWidth?: number;
private cachedLines?: string[];
render(width: number): string[] {
if (this.cachedLines && this.cachedWidth === width) return this.cachedLines;
const lines = this.computeLines(width);
this.cachedWidth = width;
this.cachedLines = lines;
return lines;
}
invalidate() { this.cachedWidth = undefined; this.cachedLines = undefined; }
}
```
**Update cycle:** State changes → `invalidate()``tui.requestRender()``render(width)` called
**Game loop pattern** (real-time updates):
```typescript
this.interval = setInterval(() => {
this.tick();
this.version++;
this.tui.requestRender();
}, 100); // 10 FPS
// Clean up in dispose()
clearInterval(this.interval);
```
</performance_caching>
<theme_colors>
Always use theme from callback params, never import directly.
**All foreground colors:**
| Category | Colors |
|----------|--------|
| General | `text`, `accent`, `muted`, `dim` |
| Status | `success`, `error`, `warning` |
| Borders | `border`, `borderAccent`, `borderMuted` |
| Messages | `userMessageText`, `customMessageText`, `customMessageLabel` |
| Tools | `toolTitle`, `toolOutput` |
| Diffs | `toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext` |
| Markdown | `mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet` |
| Syntax | `syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation` |
| Thinking | `thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh` |
**All background colors:** `selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`
**Syntax highlighting:**
```typescript
import { highlightCode, getLanguageFromPath } from "@singularity-forge/coding-agent";
const lang = getLanguageFromPath("/file.rs"); // "rust"
const highlighted = highlightCode(code, lang, theme);
```
</theme_colors>
<common_mistakes>
1. **Lines exceed width** → Visual corruption. Use `truncateToWidth()` on every line.
2. **Forgetting `tui.requestRender()`** → UI doesn't update. Call after invalidate().
3. **Importing theme directly** → Wrong colors after theme switch. Use theme from callback.
4. **Not typing DynamicBorder param**`new DynamicBorder((s: string) => theme.fg("accent", s))`.
5. **Reusing disposed overlay components** → Create fresh instances each time.
6. **Styles bleeding across lines** → TUI resets per line. Reapply styles, or use `wrapTextWithAnsi()`.
7. **Not implementing invalidate()** → Theme changes don't take effect.
8. **Forgetting super.invalidate()**`override invalidate() { super.invalidate(); /* cleanup */ }`
9. **Timer not cleaned up** → Call `clearInterval` before `done()`.
10. **Using ctx.ui in non-interactive mode** → Check `ctx.hasUI` first.
</common_mistakes>

View file

@ -1,126 +0,0 @@
<overview>
Complete event reference with handler signatures, return types, and type narrowing utilities.
</overview>
<event_categories>
**Session events:** `session_start`, `session_before_switch`, `session_switch`, `session_before_fork`, `session_fork`, `session_before_compact`, `session_compact`, `session_before_tree`, `session_tree`, `session_shutdown`
**Agent events:** `before_agent_start`, `agent_start`, `agent_end`, `turn_start`, `turn_end`, `context`, `before_provider_request`, `message_start`, `message_update`, `message_end`
**Tool events:** `tool_call`, `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `tool_result`
**Input events:** `input`
**Model events:** `model_select`
**User bash events:** `user_bash`
**Special:** `session_directory` (CLI startup only, no `ctx` — receives only event)
</event_categories>
<handler_signature>
```typescript
pi.on("event_name", async (event, ctx: ExtensionContext) => {
// event — typed payload for this event
// ctx — access to UI, session, model, control flow
// Return undefined for no action, or a typed response
});
```
</handler_signature>
<key_events>
**before_agent_start** — Fired after user prompt, before agent loop. Primary hook for context injection and system prompt modification.
```typescript
pi.on("before_agent_start", async (event, ctx) => {
// event.prompt — user's prompt text
// event.images — attached images
// event.systemPrompt — current system prompt
return {
message: { customType: "my-ext", content: "Extra context", display: true },
systemPrompt: event.systemPrompt + "\n\nExtra instructions...",
};
});
```
**tool_call** — Fired before tool executes. Can block.
```typescript
import { isToolCallEventType } from "@singularity-forge/coding-agent";
pi.on("tool_call", async (event, ctx) => {
if (isToolCallEventType("bash", event)) {
// event.input is typed as { command: string; timeout?: number }
if (event.input.command.includes("rm -rf")) {
return { block: true, reason: "Dangerous command" };
}
}
});
```
**tool_result** — Fired after tool executes. Can modify result. Handlers chain like middleware.
```typescript
import { isToolResultEventType } from "@singularity-forge/coding-agent";
pi.on("tool_result", async (event, ctx) => {
if (isToolResultEventType("bash", event)) {
// event.details is typed as BashToolDetails
}
// Return partial patch: { content, details, isError }
// Omitted fields keep current values
});
```
**context** — Fired before each LLM call. Modify messages non-destructively.
```typescript
pi.on("context", async (event, ctx) => {
// event.messages is a deep copy — safe to modify
const filtered = event.messages.filter(m => !shouldPrune(m));
return { messages: filtered };
});
```
**input** — Fired when user input is received, before skill/template expansion.
```typescript
pi.on("input", async (event, ctx) => {
// event.text — raw input
// event.source — "interactive", "rpc", or "extension"
if (event.text.startsWith("?quick "))
return { action: "transform", text: `Respond briefly: ${event.text.slice(7)}` };
return { action: "continue" };
});
```
**model_select** — Fired when model changes.
```typescript
pi.on("model_select", async (event, ctx) => {
// event.model, event.previousModel, event.source ("set"|"cycle"|"restore")
});
```
</key_events>
<type_narrowing>
Built-in type guards for tool events:
```typescript
import { isToolCallEventType, isToolResultEventType } from "@singularity-forge/coding-agent";
// Tool calls — narrows event.input type
if (isToolCallEventType("bash", event)) { /* event.input: { command, timeout? } */ }
if (isToolCallEventType("read", event)) { /* event.input: { path, offset?, limit? } */ }
if (isToolCallEventType("write", event)) { /* event.input: { path, content } */ }
if (isToolCallEventType("edit", event)) { /* event.input: { path, oldText, newText } */ }
// Tool results — narrows event.details type
if (isToolResultEventType("bash", event)) { /* event.details: BashToolDetails */ }
```
For custom tools, export your input type and use explicit type params:
```typescript
if (isToolCallEventType<"my_tool", MyToolInput>("my_tool", event)) {
event.input.action; // typed
}
```
</type_narrowing>

View file

@ -1,64 +0,0 @@
<overview>
The extension lifecycle from load to shutdown, including the full event flow.
</overview>
<loading>
Extensions load when SF starts (or on `/reload`). The default export function runs synchronously — subscribe to events and register tools/commands during this call.
```
SF starts
└─► Extension default function runs
├── pi.on("event", handler) ← Subscribe
├── pi.registerTool({...}) ← Register tools
├── pi.registerCommand(...) ← Register commands
└── pi.registerShortcut(...) ← Register shortcuts
└─► session_start fires
```
</loading>
<event_flow>
Full event flow per user prompt:
```
user sends prompt
├─► Extension commands checked (bypass if match)
├─► input event (can intercept/transform/handle)
├─► Skill/template expansion
├─► before_agent_start (inject message, modify system prompt)
├─► agent_start
│ ┌── Turn loop (repeats while LLM calls tools) ──┐
│ │ turn_start │
│ │ context (can modify messages sent to LLM) │
│ │ before_provider_request (inspect/replace payload)│
│ │ LLM responds → may call tools: │
│ │ tool_call (can BLOCK) │
│ │ tool_execution_start/update/end │
│ │ tool_result (can MODIFY) │
│ │ turn_end │
│ └────────────────────────────────────────────────┘
└─► agent_end
```
</event_flow>
<session_events>
| Event | When | Can Return |
|-------|------|------------|
| `session_start` | Session loads | — |
| `session_before_switch` | Before `/new` or `/resume` | `{ cancel: true }` |
| `session_switch` | After switch | — |
| `session_before_fork` | Before `/fork` | `{ cancel: true }`, `{ skipConversationRestore: true }` |
| `session_fork` | After fork | — |
| `session_before_compact` | Before compaction | `{ cancel: true }`, `{ compaction: {...} }` |
| `session_compact` | After compaction | — |
| `session_shutdown` | On exit | — |
</session_events>
<hot_reload>
Extensions in auto-discovered locations hot-reload with `/reload`:
- `session_shutdown` fires for old runtime
- Resources re-scanned
- `session_start` fires for new runtime
- Code after `await ctx.reload()` still runs from the pre-reload version — treat as terminal
</hot_reload>

View file

@ -1,75 +0,0 @@
<overview>
ExtensionAPI methods — the `pi` object received in the default export function.
</overview>
<core_registration>
| Method | Purpose |
|--------|---------|
| `pi.on(event, handler)` | Subscribe to events |
| `pi.registerTool(definition)` | Register LLM-callable tool |
| `pi.registerCommand(name, options)` | Register `/command` |
| `pi.registerShortcut(key, options)` | Register keyboard shortcut |
| `pi.registerFlag(name, options)` | Register CLI flag |
| `pi.registerMessageRenderer(customType, renderer)` | Custom message rendering |
| `pi.registerProvider(name, config)` | Register/override model provider |
| `pi.unregisterProvider(name)` | Remove a provider |
</core_registration>
<messaging>
| Method | Purpose |
|--------|---------|
| `pi.sendMessage(message, options?)` | Inject custom message into session |
| `pi.sendUserMessage(content, options?)` | Send user message (triggers turn) |
**Delivery modes for `sendMessage`:**
- `"steer"` (default) — Interrupts streaming after current tool
- `"followUp"` — Waits for agent to finish all tools
- `"nextTurn"` — Queued for next user prompt
```typescript
pi.sendMessage({
customType: "my-extension",
content: "Additional context",
display: true,
details: { ... },
}, { deliverAs: "steer", triggerTurn: true });
```
</messaging>
<state_session>
| Method | Purpose |
|--------|---------|
| `pi.appendEntry(customType, data?)` | Persist state (NOT sent to LLM) |
| `pi.setSessionName(name)` | Set session display name |
| `pi.getSessionName()` | Get session name |
| `pi.setLabel(entryId, label)` | Bookmark entry for `/tree` |
</state_session>
<tool_management>
```typescript
const active = pi.getActiveTools(); // ["read", "bash", "edit", "write"]
const all = pi.getAllTools(); // [{ name, description }, ...]
pi.setActiveTools(["read", "bash"]); // Enable/disable tools
```
</tool_management>
<model_management>
```typescript
const model = ctx.modelRegistry.find("anthropic", "claude-sonnet-4-5");
if (model) {
const success = await pi.setModel(model); // Returns false if no API key
}
pi.getThinkingLevel(); // "off" | "minimal" | "low" | "medium" | "high" | "xhigh"
pi.setThinkingLevel("high");
```
</model_management>
<utilities>
| Method | Purpose |
|--------|---------|
| `pi.exec(cmd, args, opts?)` | Shell command (prefer over child_process) |
| `pi.events` | Shared event bus for inter-extension communication |
| `pi.getFlag(name)` | Get CLI flag value |
| `pi.getCommands()` | All available slash commands |
</utilities>

View file

@ -1,53 +0,0 @@
<overview>
ExtensionContext (`ctx`) — available in all event handlers (except `session_directory`).
</overview>
<ui_methods>
**Dialogs (blocking — wait for user response):**
```typescript
const choice = await ctx.ui.select("Pick one:", ["A", "B", "C"]);
const ok = await ctx.ui.confirm("Delete?", "This cannot be undone");
const name = await ctx.ui.input("Name:", "placeholder");
const text = await ctx.ui.editor("Edit:", "prefilled text");
// Timed dialog — auto-dismiss after timeout
const ok = await ctx.ui.confirm("Auto-confirm?", "Proceeds in 5s", { timeout: 5000 });
```
**Non-blocking UI:**
```typescript
ctx.ui.notify("Done!", "info"); // Toast: "info" | "warning" | "error"
ctx.ui.setStatus("my-ext", "● Active"); // Footer status
ctx.ui.setStatus("my-ext", undefined); // Clear
ctx.ui.setWidget("my-id", ["Line 1", "Line 2"]); // Widget above editor
ctx.ui.setWidget("my-id", ["Below!"], { placement: "belowEditor" });
ctx.ui.setTitle("sf - my project"); // Terminal title
ctx.ui.setEditorText("Prefill"); // Set editor content
ctx.ui.setWorkingMessage("Analyzing..."); // Working message during streaming
ctx.ui.setToolsExpanded(true); // Expand tool output
```
</ui_methods>
<ctx_properties>
| Property/Method | Purpose |
|----------------|---------|
| `ctx.hasUI` | `false` in print/JSON mode — check before dialogs |
| `ctx.cwd` | Current working directory |
| `ctx.sessionManager` | Read-only session state |
| `ctx.modelRegistry` / `ctx.model` | Model access |
| `ctx.isIdle()` / `ctx.abort()` / `ctx.hasPendingMessages()` | Agent state |
| `ctx.shutdown()` | Request graceful exit (deferred until idle) |
| `ctx.getContextUsage()` | Current context token usage |
| `ctx.compact(options?)` | Trigger compaction |
| `ctx.getSystemPrompt()` | Current effective system prompt |
</ctx_properties>
<session_manager>
```typescript
ctx.sessionManager.getEntries() // All entries
ctx.sessionManager.getBranch() // Current branch
ctx.sessionManager.getLeafId() // Current leaf entry ID
ctx.sessionManager.getSessionFile() // Session JSONL path
ctx.sessionManager.getLabel(entryId) // Entry label
```
</session_manager>

View file

@ -1,37 +0,0 @@
<overview>
Non-negotiable rules and common gotchas when building SF extensions.
</overview>
<must_follow>
1. **Use `StringEnum` for string enums**`Type.Union`/`Type.Literal` breaks Google's API.
2. **Truncate tool output** — Large output causes context overflow, compaction failures, degraded performance. Limit: 50KB / 2000 lines.
3. **Use theme from callback** — Don't import theme directly. Use the `theme` parameter from `ctx.ui.custom()` or render functions.
4. **`DynamicBorder` color param** — Type as `(s: string) => theme.fg("accent", s)`.
5. **Call `tui.requestRender()` after state changes** in `handleInput`.
6. **Return `{ render, invalidate, handleInput }`** from custom components.
7. **Lines must not exceed `width`** in `render()` — use `truncateToWidth()`.
8. **Session control methods ONLY in commands**`waitForIdle()`, `newSession()`, `fork()`, `navigateTree()`, `reload()` will **deadlock** in event handlers.
9. **Strip leading `@` from path arguments** — some models add it.
10. **Store state in tool result `details`** for proper branching support.
</must_follow>
<common_patterns>
- Rebuild component on `invalidate()` when pre-baking theme colors
- Check `signal?.aborted` in long-running tool executions
- Use `pi.exec()` instead of `child_process` for shell commands
- Overlay components are **disposed when closed** — create fresh instances each time
- Treat `ctx.reload()` as terminal — code after runs from pre-reload version
- Check `ctx.hasUI` before dialog methods (false in print/JSON mode)
- Extension errors are logged but don't crash SF — tool_call handler errors fail-safe (block the tool)
</common_patterns>
<_sf_paths>
**SF extension paths (community/user-installed extensions):**
- Global: `~/.pi/agent/extensions/*.ts`
- Global (subdir): `~/.pi/agent/extensions/*/index.ts`
- Project-local: `.sf/extensions/*.ts`
- Project-local (subdir): `.sf/extensions/*/index.ts`
Note: `~/.sf/agent/extensions/` is reserved for bundled extensions synced from the sf package.
Community extensions placed there are silently ignored by the loader.
</_sf_paths>

View file

@ -1,32 +0,0 @@
<overview>
Mode behavior determines which UI methods work. Extensions may run in non-interactive modes where dialogs are unavailable.
</overview>
<mode_table>
| Mode | UI Methods | Notes |
|------|-----------|-------|
| **Interactive** (default) | Full TUI | Normal operation — all UI works |
| **RPC** (`--mode rpc`) | JSON protocol | Host handles UI, dialogs work via sub-protocol |
| **JSON** (`--mode json`) | No-op | Event stream to stdout, no UI |
| **Print** (`-p`) | No-op | Extensions run but can't prompt users |
</mode_table>
<checking_ui>
**Always check `ctx.hasUI`** before calling dialog methods:
```typescript
if (ctx.hasUI) {
const ok = await ctx.ui.confirm("Delete?", "Sure?");
if (!ok) return;
} else {
// Default behavior for non-interactive mode
// Or just proceed without confirmation
}
```
`ctx.hasUI` is `false` in print mode (`-p`) and JSON mode. `true` in interactive and RPC mode.
</checking_ui>
<fire_and_forget>
Non-blocking methods (`notify`, `setStatus`, `setWidget`, `setTitle`, `setEditorText`) are safe in all modes — they're no-ops when no UI is available.
</fire_and_forget>

View file

@ -1,89 +0,0 @@
<overview>
Model and provider management — switching models, registering custom providers with OAuth, and reacting to model changes.
</overview>
<switching_models>
```typescript
const model = ctx.modelRegistry.find("anthropic", "claude-sonnet-4-5");
if (model) {
const success = await pi.setModel(model);
if (!success) ctx.ui.notify("No API key for this model", "error");
}
// Thinking level
pi.getThinkingLevel(); // "off" | "minimal" | "low" | "medium" | "high" | "xhigh"
pi.setThinkingLevel("high"); // Clamped to model capabilities
```
</switching_models>
<register_provider>
```typescript
pi.registerProvider("my-proxy", {
baseUrl: "https://proxy.example.com",
apiKey: "PROXY_API_KEY", // Env var name or literal
api: "anthropic-messages", // or "openai-completions", "openai-responses"
headers: { "X-Custom": "value" }, // Optional custom headers
authHeader: true, // Auto-add Authorization: Bearer header
models: [
{
id: "claude-sonnet-4-20250514",
name: "Claude 4 Sonnet (proxy)",
reasoning: false,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 16384,
}
],
});
// Override just baseUrl for an existing provider (keeps all models)
pi.registerProvider("anthropic", {
baseUrl: "https://proxy.example.com",
});
// Remove a provider (restores any overridden built-in models)
pi.unregisterProvider("my-proxy");
```
Takes effect immediately after initial load phase — no `/reload` required.
</register_provider>
<oauth_provider>
Register a provider with OAuth support for `/login`:
```typescript
pi.registerProvider("corporate-ai", {
baseUrl: "https://ai.corp.com",
api: "openai-responses",
models: [/* ... */],
oauth: {
name: "Corporate AI (SSO)",
async login(callbacks) {
callbacks.onAuth({ url: "https://sso.corp.com/..." });
const code = await callbacks.onPrompt({ message: "Enter code:" });
return { refresh: code, access: code, expires: Date.now() + 3600000 };
},
async refreshToken(credentials) {
return credentials; // Refresh logic
},
getApiKey(credentials) {
return credentials.access;
},
},
});
```
</oauth_provider>
<model_events>
React to model changes:
```typescript
pi.on("model_select", async (event, ctx) => {
// event.model — newly selected model
// event.previousModel — previous model (undefined if first)
// event.source — "set" | "cycle" | "restore"
ctx.ui.setStatus("model", `${event.model.provider}/${event.model.id}`);
});
```
</model_events>

View file

@ -1,55 +0,0 @@
<overview>
Packaging extensions for distribution via npm, git, or local paths. Creating SF packages.
</overview>
<package_manifest>
Add a `pi` manifest to `package.json`:
```json
{
"name": "my-sf-package",
"keywords": ["pi-package"],
"pi": {
"extensions": ["./extensions"],
"skills": ["./skills"],
"prompts": ["./prompts"],
"themes": ["./themes"]
}
}
```
</package_manifest>
<installing>
```bash
sf install npm:@foo/bar@1.0.0
sf install git:github.com/user/repo@v1
sf install ./local/path
# Try without installing:
sf -e npm:@foo/bar
```
</installing>
<convention_directories>
If no `pi` manifest exists, auto-discovers:
- `extensions/``.ts` and `.js` files
- `skills/``SKILL.md` folders
- `prompts/``.md` files
- `themes/``.json` files
</convention_directories>
<dependencies>
- List `@singularity-forge/ai`, `@singularity-forge/coding-agent`, `@singularity-forge/tui`, `@sinclair/typebox` in `peerDependencies` with `"*"` — they're bundled by the runtime.
- Other npm deps go in `dependencies`. The runtime runs `npm install` on package installation.
</dependencies>
<gallery_metadata>
```json
{
"pi": {
"video": "https://example.com/demo.mp4",
"image": "https://example.com/screenshot.png"
}
}
```
</gallery_metadata>

View file

@ -1,90 +0,0 @@
<overview>
Remote execution via pluggable operations, spawnHook for bash, and tool override patterns.
</overview>
<pluggable_operations>
Built-in tools support pluggable operations for SSH, containers, etc.:
```typescript
import { createReadTool, createBashTool, createWriteTool } from "@singularity-forge/coding-agent";
// Create tool with custom remote operations
const remoteBash = createBashTool(cwd, {
operations: {
execute: (cmd) => sshExec(remote, cmd),
},
});
```
**Operations interfaces:** `ReadOperations`, `WriteOperations`, `EditOperations`, `BashOperations`, `LsOperations`, `GrepOperations`, `FindOperations`
</pluggable_operations>
<spawn_hook>
The bash tool supports a `spawnHook` to modify commands before execution:
```typescript
const bashTool = createBashTool(cwd, {
spawnHook: ({ command, cwd, env }) => ({
command: `source ~/.profile\n${command}`,
cwd: `/mnt/sandbox${cwd}`,
env: { ...env, CI: "1" },
}),
});
```
</spawn_hook>
<ssh_pattern>
Full SSH pattern with flag-based switching:
```typescript
import { createBashTool, type ExtensionAPI } from "@singularity-forge/coding-agent";
export default function (pi: ExtensionAPI) {
pi.registerFlag("ssh", { description: "SSH target", type: "string" });
const localBash = createBashTool(process.cwd());
pi.registerTool({
...localBash,
async execute(id, params, signal, onUpdate, ctx) {
const sshTarget = pi.getFlag("--ssh");
if (sshTarget) {
const remoteBash = createBashTool(process.cwd(), {
operations: createSSHOperations(sshTarget),
});
return remoteBash.execute(id, params, signal, onUpdate);
}
return localBash.execute(id, params, signal, onUpdate);
},
});
}
```
</ssh_pattern>
<tool_override_pattern>
Override built-in tools for logging/access control — omit renderCall/renderResult to keep built-in rendering:
```typescript
import { createReadTool } from "@singularity-forge/coding-agent";
import { Type } from "@sinclair/typebox";
pi.registerTool({
name: "read", // Same name = overrides built-in
label: "Read (Logged)",
description: "Read file contents with logging",
parameters: Type.Object({
path: Type.String(),
offset: Type.Optional(Type.Number()),
limit: Type.Optional(Type.Number()),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
console.log(`[AUDIT] Reading: ${params.path}`);
const builtIn = createReadTool(ctx.cwd);
return builtIn.execute(toolCallId, params, signal, onUpdate);
},
// Omit renderCall/renderResult → built-in renderer used automatically
});
```
**Must match exact result shape** including `details` type.
</tool_override_pattern>

View file

@ -1,70 +0,0 @@
<overview>
State management patterns for extensions — tool result details (branch-safe) and appendEntry (private).
</overview>
<tool_result_details>
**Recommended for stateful tools.** State in `details` works correctly with branching/forking.
```typescript
export default function (pi: ExtensionAPI) {
let items: string[] = [];
// Reconstruct state from session on load
pi.on("session_start", async (_event, ctx) => reconstructState(ctx));
pi.on("session_switch", async (_event, ctx) => reconstructState(ctx));
pi.on("session_fork", async (_event, ctx) => reconstructState(ctx));
pi.on("session_tree", async (_event, ctx) => reconstructState(ctx));
const reconstructState = (ctx: ExtensionContext) => {
items = [];
for (const entry of ctx.sessionManager.getBranch()) {
if (entry.type === "message" && entry.message.role === "toolResult") {
if (entry.message.toolName === "my_tool") {
items = entry.message.details?.items ?? [];
}
}
}
};
pi.registerTool({
name: "my_tool",
// ...
async execute(toolCallId, params, signal, onUpdate, ctx) {
items.push(params.text);
return {
content: [{ type: "text", text: "Added" }],
details: { items: [...items] }, // ← Snapshot full state
};
},
});
}
```
**Key:** Reconstruct on ALL session change events: `session_start`, `session_switch`, `session_fork`, `session_tree`.
</tool_result_details>
<append_entry>
**For extension-private state** that doesn't participate in LLM context but needs to survive restarts:
```typescript
// Save
pi.appendEntry("my-state", { count: 42, lastRun: Date.now() });
// Restore
pi.on("session_start", async (_event, ctx) => {
for (const entry of ctx.sessionManager.getEntries()) {
if (entry.type === "custom" && entry.customType === "my-state") {
const data = entry.data; // { count: 42, lastRun: ... }
}
}
});
```
</append_entry>
<when_to_use_which>
| Pattern | Use When |
|---------|----------|
| Tool result `details` | State the LLM's tools produce (todo items, connection state, query results) |
| `pi.appendEntry()` | Extension-private config, timestamps, counters the LLM doesn't need |
| File on disk | Large data, config files, caches that shouldn't be in session |
</when_to_use_which>

View file

@ -1,52 +0,0 @@
<overview>
System prompt modification — per-turn injection, context manipulation, and tool-specific prompt content.
</overview>
<per_turn_modification>
Use `before_agent_start` to inject messages and/or modify the system prompt for each turn:
```typescript
pi.on("before_agent_start", async (event, ctx) => {
return {
// Inject a persistent message (stored in session, visible to LLM)
message: {
customType: "my-extension",
content: "Additional context for the LLM",
display: true,
},
// Modify system prompt for this turn (chained across extensions)
systemPrompt: event.systemPrompt + "\n\nYou must respond only in haiku.",
};
});
```
</per_turn_modification>
<context_manipulation>
Use the `context` event to modify messages before each LLM call:
```typescript
pi.on("context", async (event, ctx) => {
// event.messages is a deep copy — safe to modify
const filtered = event.messages.filter(m => !isIrrelevant(m));
return { messages: filtered };
});
```
</context_manipulation>
<tool_specific_prompts>
Tools can add content to the system prompt when active:
```typescript
pi.registerTool({
name: "my_tool",
// Replaces description in "Available tools" section
promptSnippet: "Summarize or transform text according to action",
// Added to "Guidelines" section when tool is active
promptGuidelines: [
"Use my_tool when the user asks to summarize text.",
"Prefer my_tool over direct output for structured data."
],
// ...
});
```
</tool_specific_prompts>

View file

@ -1,51 +0,0 @@
/**
* {{EXTENSION_NAME}} {{DESCRIPTION}}
*
* Capabilities:
* {{CAPABILITIES_LIST}}
*/
import type { ExtensionAPI } from "@singularity-forge/coding-agent";
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/ai";
export default function (pi: ExtensionAPI) {
// === Events ===
pi.on("session_start", async (_event, ctx) => {
// Initialize state, restore from session, show status
});
// === Tools ===
pi.registerTool({
name: "{{tool_name}}",
label: "{{Tool Label}}",
description: "{{Tool description for LLM}}",
parameters: Type.Object({
action: StringEnum(["list", "add"] as const),
text: Type.Optional(Type.String({ description: "Item text" })),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
if (signal?.aborted) {
return { content: [{ type: "text", text: "Cancelled" }] };
}
// Do work here
return {
content: [{ type: "text", text: "Result for LLM" }],
details: {},
};
},
});
// === Commands ===
pi.registerCommand("{{command_name}}", {
description: "{{Command description}}",
handler: async (args, ctx) => {
ctx.ui.notify(`Running ${args}`, "info");
},
});
}

View file

@ -1,143 +0,0 @@
/**
* {{EXTENSION_NAME}} Stateful tool with persistence
*
* State is stored in tool result details for proper branching support.
*/
import type { ExtensionAPI, ExtensionContext } from "@singularity-forge/coding-agent";
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/ai";
import { Text, truncateToWidth, matchesKey, Key } from "@singularity-forge/tui";
interface {{ItemType}} {
id: number;
// Add fields
}
interface {{ToolDetails}} {
action: string;
items: {{ItemType}}[];
nextId: number;
error?: string;
}
export default function (pi: ExtensionAPI) {
let items: {{ItemType}}[] = [];
let nextId = 1;
// Reconstruct state from session
const reconstructState = (ctx: ExtensionContext) => {
items = [];
nextId = 1;
for (const entry of ctx.sessionManager.getBranch()) {
if (entry.type === "message" && entry.message.role === "toolResult") {
if (entry.message.toolName === "{{tool_name}}") {
const details = entry.message.details as {{ToolDetails}} | undefined;
if (details) {
items = details.items;
nextId = details.nextId;
}
}
}
}
};
// Reconstruct on ALL session change events
pi.on("session_start", async (_event, ctx) => reconstructState(ctx));
pi.on("session_switch", async (_event, ctx) => reconstructState(ctx));
pi.on("session_fork", async (_event, ctx) => reconstructState(ctx));
pi.on("session_tree", async (_event, ctx) => reconstructState(ctx));
// Register the tool
pi.registerTool({
name: "{{tool_name}}",
label: "{{Tool Label}}",
description: "{{Description for LLM}}",
parameters: Type.Object({
action: StringEnum(["list", "add", "remove"] as const),
text: Type.Optional(Type.String({ description: "Item text" })),
id: Type.Optional(Type.Number({ description: "Item ID" })),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
if (signal?.aborted) {
return { content: [{ type: "text", text: "Cancelled" }] };
}
switch (params.action) {
case "list":
return {
content: [{ type: "text", text: items.length ? JSON.stringify(items) : "No items" }],
details: { action: "list", items: [...items], nextId } as {{ToolDetails}},
};
case "add": {
if (!params.text) throw new Error("text required for add");
const item: {{ItemType}} = { id: nextId++ /* , ... */ };
items.push(item);
return {
content: [{ type: "text", text: `Added #${item.id}` }],
details: { action: "add", items: [...items], nextId } as {{ToolDetails}},
};
}
case "remove": {
if (params.id === undefined) throw new Error("id required for remove");
const idx = items.findIndex(i => i.id === params.id);
if (idx === -1) throw new Error(`Item #${params.id} not found`);
items.splice(idx, 1);
return {
content: [{ type: "text", text: `Removed #${params.id}` }],
details: { action: "remove", items: [...items], nextId } as {{ToolDetails}},
};
}
default:
throw new Error(`Unknown action: ${params.action}`);
}
},
// Custom rendering
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("{{tool_name}} "));
text += theme.fg("muted", args.action);
return new Text(text, 0, 0);
},
renderResult(result, { expanded }, theme) {
const details = result.details as {{ToolDetails}} | undefined;
if (!details) return new Text("", 0, 0);
if (details.error) return new Text(theme.fg("error", details.error), 0, 0);
return new Text(theme.fg("success", `${details.action} (${details.items.length} items)`), 0, 0);
},
});
// User command to view state
pi.registerCommand("{{command_name}}", {
description: "View {{items}}",
handler: async (_args, ctx) => {
if (!ctx.hasUI) {
ctx.ui.notify("Requires interactive mode", "error");
return;
}
await ctx.ui.custom<void>((_tui, theme, _kb, done) => ({
render(width: number): string[] {
const lines = [
"",
truncateToWidth(theme.fg("accent", ` {{Items}} (${items.length}) `), width),
"",
];
for (const item of items) {
lines.push(truncateToWidth(` #${item.id}`, width));
}
lines.push("", truncateToWidth(theme.fg("dim", " Press Escape to close"), width), "");
return lines;
},
handleInput(data: string) {
if (matchesKey(data, Key.escape)) done();
},
invalidate() {},
}));
},
});
}

Some files were not shown because too many files have changed in this diff Show more