fix(gsd): align ADR-009 integration with type-safe builds
Add ADR-009 docs and resolve compile/runtime typing regressions in UOK and extension modules. Refs #4214
This commit is contained in:
parent
bb1b9dce07
commit
76a85300ae
10 changed files with 1003 additions and 19 deletions
497
docs/dev/ADR-009-IMPLEMENTATION-PLAN.md
Normal file
497
docs/dev/ADR-009-IMPLEMENTATION-PLAN.md
Normal file
|
|
@ -0,0 +1,497 @@
|
|||
# ADR-009 Implementation Plan
|
||||
|
||||
**Related ADR:** [ADR-009-orchestration-kernel-refactor.md](/Users/jeremymcspadden/Github/gsd-2/docs/dev/ADR-009-orchestration-kernel-refactor.md)
|
||||
**Status:** Draft
|
||||
**Date:** 2026-04-14
|
||||
**Target Window:** 8-10 waves (incremental, no big-bang rewrite)
|
||||
|
||||
## Objective
|
||||
|
||||
Implement ADR-009 by migrating GSD orchestration internals to a Unified Orchestration Kernel (UOK) with six control planes:
|
||||
|
||||
1. Plan
|
||||
2. Execution
|
||||
3. Model
|
||||
4. Gate
|
||||
5. GitOps
|
||||
6. Audit
|
||||
|
||||
without breaking existing CLI/web/MCP workflows.
|
||||
|
||||
The first production-safe outcome is:
|
||||
|
||||
- existing auto-mode behavior remains stable
|
||||
- new kernel contracts exist behind feature flags
|
||||
- every turn is traceable with deterministic gate outcomes
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Rewriting user-facing command surfaces
|
||||
- Replacing all legacy modules in a single PR
|
||||
- Introducing new provider auth flows that bypass existing compliance boundaries
|
||||
- Forcing `burn-max` behavior as default
|
||||
|
||||
## Constraints
|
||||
|
||||
- Maintain current runtime compatibility and defaults
|
||||
- Preserve existing state-on-disk and DB-backed transition model
|
||||
- Keep provider-agnostic behavior while enforcing provider-specific policy constraints
|
||||
- All migration steps must be reversible behind flags
|
||||
- High-risk changes require parity tests against existing behavior
|
||||
|
||||
## Program Structure
|
||||
|
||||
Implementation is organized into parallel workstreams and executed in waves.
|
||||
|
||||
### Workstream A: Kernel Contracts and Orchestrator Spine
|
||||
|
||||
Goal: define typed contracts and a new orchestration spine without changing behavior.
|
||||
|
||||
Primary targets:
|
||||
|
||||
- `src/resources/extensions/gsd/auto.ts`
|
||||
- `src/resources/extensions/gsd/auto/loop.ts`
|
||||
- `src/resources/extensions/gsd/auto/types.ts`
|
||||
- `src/resources/extensions/gsd/auto/session.ts`
|
||||
|
||||
Deliverables:
|
||||
|
||||
- `TurnContract` and `TurnResult` types
|
||||
- `GateResult` envelope
|
||||
- kernel entrypoint that wraps current dispatch loop via adapter
|
||||
|
||||
### Workstream B: Gate Plane
|
||||
|
||||
Goal: normalize all checks into a unified gate runner.
|
||||
|
||||
Primary targets:
|
||||
|
||||
- `src/resources/extensions/gsd/verification-gate.ts`
|
||||
- `src/resources/extensions/gsd/auto-verification.ts`
|
||||
- `src/resources/extensions/gsd/pre-execution-checks.ts`
|
||||
- `src/resources/extensions/gsd/post-execution-checks.ts`
|
||||
- `src/resources/extensions/gsd/milestone-validation-gates.ts`
|
||||
|
||||
Deliverables:
|
||||
|
||||
- unified gate registry and execution API
|
||||
- deterministic failure classes and retry policies
|
||||
- explicit terminal status persistence
|
||||
|
||||
### Workstream C: Model Plane + Policy Engine
|
||||
|
||||
Goal: enable any-model-any-phase through requirement-based selection plus policy filtering.
|
||||
|
||||
Primary targets:
|
||||
|
||||
- `src/resources/extensions/gsd/model-router.ts`
|
||||
- `src/resources/extensions/gsd/auto-model-selection.ts`
|
||||
- `src/resources/extensions/gsd/preferences-models.ts`
|
||||
- `src/resources/extensions/gsd/model-cost-table.ts`
|
||||
- `src/resources/extensions/gsd/custom-execution-policy.ts`
|
||||
|
||||
Deliverables:
|
||||
|
||||
- requirement vector builder for units
|
||||
- policy filter before capability scoring
|
||||
- new `burn-max` profile
|
||||
- policy decision audit events
|
||||
|
||||
### Workstream D: Execution Graph (Agents/Subagents/Parallel/Teams)
|
||||
|
||||
Goal: move to one DAG scheduler contract.
|
||||
|
||||
Primary targets:
|
||||
|
||||
- `src/resources/extensions/gsd/reactive-graph.ts`
|
||||
- `src/resources/extensions/gsd/slice-parallel-orchestrator.ts`
|
||||
- `src/resources/extensions/gsd/parallel-orchestrator.ts`
|
||||
- `src/resources/extensions/gsd/graph.ts`
|
||||
- `src/resources/extensions/gsd/unit-runtime.ts`
|
||||
|
||||
Deliverables:
|
||||
|
||||
- typed node kinds (`unit`, `hook`, `subagent`, `team-worker`, `verification`, `reprocess`)
|
||||
- shared dependency/conflict resolver
|
||||
- scheduler adapter for current parallel and reactive paths
|
||||
|
||||
### Workstream E: GitOps Transaction Layer
|
||||
|
||||
Goal: guarantee git action and metadata record per turn.
|
||||
|
||||
Primary targets:
|
||||
|
||||
- `src/resources/extensions/gsd/git-service.ts`
|
||||
- `src/resources/extensions/gsd/auto-post-unit.ts`
|
||||
- `src/resources/extensions/gsd/auto-unit-closeout.ts`
|
||||
- `src/resources/extensions/gsd/auto-worktree.ts`
|
||||
|
||||
Deliverables:
|
||||
|
||||
- `turn-start -> stage -> checkpoint -> publish -> record` transaction API
|
||||
- configurable turn action mode (`commit|snapshot|status-only`)
|
||||
- closeout gate integration for git failures
|
||||
|
||||
### Workstream F: Unified Audit Plane
|
||||
|
||||
Goal: unify journal/activity/metrics into a causal event model.
|
||||
|
||||
Primary targets:
|
||||
|
||||
- `src/resources/extensions/gsd/journal.ts`
|
||||
- `src/resources/extensions/gsd/activity-log.ts`
|
||||
- `src/resources/extensions/gsd/metrics.ts`
|
||||
- `src/resources/extensions/gsd/workflow-logger.ts`
|
||||
- `src/resources/extensions/gsd/gsd-db.ts`
|
||||
|
||||
Deliverables:
|
||||
|
||||
- common `AuditEventEnvelope`
|
||||
- trace/turn IDs on all events
|
||||
- append-only JSONL raw log + DB projection index
|
||||
|
||||
### Workstream G: Plan Plane v2
|
||||
|
||||
Goal: formal multi-round clarify/research/draft/compile flow.
|
||||
|
||||
Primary targets:
|
||||
|
||||
- `src/resources/extensions/gsd/guided-flow.ts`
|
||||
- `src/resources/extensions/gsd/preparation.ts`
|
||||
- `src/resources/extensions/gsd/auto/phases.ts`
|
||||
- `src/resources/extensions/gsd/auto-prompts.ts`
|
||||
- prompt templates under `src/resources/extensions/gsd/prompts/`
|
||||
|
||||
Deliverables:
|
||||
|
||||
- bounded multi-round question loop
|
||||
- plan compile step producing executable unit graph
|
||||
- plan gate fail-closed behavior
|
||||
|
||||
## Wave Plan (Execution Order)
|
||||
|
||||
## Wave 0: Baseline and Flag Scaffolding
|
||||
|
||||
Purpose: establish safe rollout controls and baseline telemetry.
|
||||
|
||||
Tasks:
|
||||
|
||||
- Add feature flags:
|
||||
- `uok.enabled`
|
||||
- `uok.gates.enabled`
|
||||
- `uok.model_policy.enabled`
|
||||
- `uok.execution_graph.enabled`
|
||||
- `uok.gitops.enabled`
|
||||
- `uok.audit_unified.enabled`
|
||||
- `uok.plan_v2.enabled`
|
||||
- Add no-op kernel wrapper around current auto loop
|
||||
- Add baseline metrics for parity comparison
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- zero behavior change with all flags off
|
||||
- parity telemetry collected for existing loop
|
||||
|
||||
Verification:
|
||||
|
||||
- `npm run typecheck:extensions`
|
||||
- `npm run test:unit`
|
||||
|
||||
## Wave 1: Contract Extraction
|
||||
|
||||
Purpose: create stable internal API boundaries.
|
||||
|
||||
Tasks:
|
||||
|
||||
- Introduce:
|
||||
- `TurnContract`
|
||||
- `UnitExecutionContext`
|
||||
- `GateResult`
|
||||
- `FailureClass`
|
||||
- `TurnCloseoutRecord`
|
||||
- Adapter layer from legacy auto loop into contracts
|
||||
- Add contract fixtures and serialization tests
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- current auto dispatch runs through adapter path without behavior change
|
||||
- all turn outcomes represented in structured result type
|
||||
|
||||
Verification:
|
||||
|
||||
- targeted tests in `src/resources/extensions/gsd/tests/*auto*`
|
||||
- `npm run test:unit`
|
||||
|
||||
## Wave 2: Gate Plane Unification
|
||||
|
||||
Purpose: centralize pre/in/post checks and retries.
|
||||
|
||||
Tasks:
|
||||
|
||||
- Build `gate-runner` and gate registry
|
||||
- Port existing checks into registered gates:
|
||||
- policy/input/execution/artifact/verification/closeout
|
||||
- Implement deterministic retry matrix by failure class
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- every unit passes through gate runner
|
||||
- explicit gate result persisted for pass/fail/retry/manual-attention
|
||||
|
||||
Verification:
|
||||
|
||||
- extend `verification-gate.test.ts`
|
||||
- extend `validation-gate-patterns.test.ts`
|
||||
- add integration tests for retry escalation
|
||||
|
||||
## Wave 3: Model Plane + Policy Filter
|
||||
|
||||
Purpose: enable requirement-based selection constrained by policy.
|
||||
|
||||
Tasks:
|
||||
|
||||
- Add requirement extraction from unit metadata
|
||||
- Insert policy filter before model scoring
|
||||
- Add `burn-max` token profile wiring
|
||||
- Emit model policy allow/deny events
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- units can select any eligible model across phases
|
||||
- policy-denied routes fail before dispatch
|
||||
- fallback chains remain deterministic
|
||||
|
||||
Verification:
|
||||
|
||||
- extend `model-cost-table.test.ts`
|
||||
- extend model routing tests (`interactive-routing-bypass`, `tool-compatibility`, related router suites)
|
||||
- add policy denial regression tests
|
||||
|
||||
## Wave 4: Execution Graph Scheduler
|
||||
|
||||
Purpose: unify hooks/subagents/parallel/team work under one scheduler contract.
|
||||
|
||||
Tasks:
|
||||
|
||||
- Introduce graph scheduler facade
|
||||
- Map reactive execution nodes to shared node model
|
||||
- Map slice/milestone parallel orchestrators onto scheduler
|
||||
- Add file IO conflict lock integration
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- same task set can execute in deterministic single-worker or parallel graph mode
|
||||
- no deadlock under known reactive/parallel fixtures
|
||||
|
||||
Verification:
|
||||
|
||||
- `slice-parallel-orchestrator.test.ts`
|
||||
- `slice-parallel-conflict.test.ts`
|
||||
- `sidecar-queue.test.ts`
|
||||
- integration: `src/resources/extensions/gsd/tests/integration/*.test.ts`
|
||||
|
||||
## Wave 5: GitOps Transactions Per Turn
|
||||
|
||||
Purpose: enforce turn-level git actions and closeout discipline.
|
||||
|
||||
Tasks:
|
||||
|
||||
- Implement turn transaction API
|
||||
- Wire turn transactions into auto closeout path
|
||||
- Add configurable `turn_action` and `turn_push` semantics
|
||||
- Persist git transaction metadata into audit stream
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- each turn has a git transaction record
|
||||
- blocked git states surface as closeout gate failures
|
||||
|
||||
Verification:
|
||||
|
||||
- `git-service` integration tests
|
||||
- worktree-related integration suites
|
||||
- closeout and merge regression suites
|
||||
|
||||
## Wave 6: Unified Audit Plane
|
||||
|
||||
Purpose: converge logging/metrics/journal into one causal model.
|
||||
|
||||
Tasks:
|
||||
|
||||
- Define `AuditEventEnvelope` schema
|
||||
- Add `traceId`, `turnId`, `causedBy` to event emitters
|
||||
- Write projection pipeline into DB index tables
|
||||
- Maintain append-only raw JSONL logs
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- action-level traceability across model/tool/git/gate/test events
|
||||
- legacy readers remain functional through compatibility projection
|
||||
|
||||
Verification:
|
||||
|
||||
- `workflow-logger*.test.ts`
|
||||
- `workflow-events.test.ts`
|
||||
- `journal` and `metrics` regression tests
|
||||
|
||||
## Wave 7: Plan Plane v2
|
||||
|
||||
Purpose: deliver full multi-round planning and compile-to-unit graph.
|
||||
|
||||
Tasks:
|
||||
|
||||
- Implement bounded clarify rounds
|
||||
- Add explicit research synthesis stage
|
||||
- Add plan compile stage with dependency graph output
|
||||
- Add plan gate with fail-closed checks
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- full roadmap and unit graph produced before execution begins (when enabled)
|
||||
- invalid plans cannot proceed to execution
|
||||
|
||||
Verification:
|
||||
|
||||
- prompt and plan parsing tests
|
||||
- planning tool tests (`plan-milestone`, `plan-slice`, `plan-task`)
|
||||
- discuss/guided flow regression tests
|
||||
|
||||
## Wave 8: Legacy Branch Retirement + Default Flip
|
||||
|
||||
Purpose: reduce maintenance burden and enable UOK as default.
|
||||
|
||||
Tasks:
|
||||
|
||||
- remove superseded code paths in `auto.ts`, `auto-phases`, and legacy closeout paths
|
||||
- keep legacy fallback behind emergency flag for one release window
|
||||
- update docs and preferences reference
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- UOK default in stable channel
|
||||
- no critical parity regressions in one full release cycle
|
||||
|
||||
Verification:
|
||||
|
||||
- full `npm test`
|
||||
- smoke + integration suites
|
||||
- targeted manual UAT for CLI/web/headless
|
||||
|
||||
## Testing and Validation Matrix
|
||||
|
||||
### 1. Unit
|
||||
|
||||
- contract serialization
|
||||
- gate runner behavior by failure class
|
||||
- model policy filter decisions
|
||||
- git transaction state machine
|
||||
- event envelope schema validation
|
||||
|
||||
### 2. Integration
|
||||
|
||||
- auto dispatch across plan/execute/complete/reassess/uat
|
||||
- worktree/branch/none isolation behaviors
|
||||
- parallel and reactive execution parity
|
||||
- policy-denied dispatch fast-fail
|
||||
|
||||
### 3. End-to-End
|
||||
|
||||
- greenfield milestone from discuss -> plan -> execute -> complete -> merge
|
||||
- failure reprocessing (test failure, tool failure, model failure)
|
||||
- full audit trace reconstruction by `traceId`
|
||||
- provider compliance scenarios (allowed vs denied paths)
|
||||
|
||||
### 4. Parity Harness
|
||||
|
||||
- replay selected historical workflows against legacy and UOK paths
|
||||
- compare:
|
||||
- state transitions
|
||||
- produced artifacts
|
||||
- gate decisions
|
||||
- commit outcomes
|
||||
|
||||
## Rollout Strategy
|
||||
|
||||
### Stages
|
||||
|
||||
1. Internal dogfood with flags on
|
||||
2. Beta cohort opt-in via project preference
|
||||
3. General availability with flags default-on
|
||||
4. Legacy fallback removed after stability window
|
||||
|
||||
### Safety Controls
|
||||
|
||||
- runtime kill-switch for each plane
|
||||
- release-note explicit migration warnings
|
||||
- auto-rollback trigger on critical regressions (gates, git integrity, state corruption)
|
||||
|
||||
## Data and Schema Changes
|
||||
|
||||
Expected schema additions:
|
||||
|
||||
- audit projection tables in `gsd.db`
|
||||
- gate result persistence tables
|
||||
- turn transaction metadata
|
||||
|
||||
Rules:
|
||||
|
||||
- additive migrations only until Wave 8
|
||||
- keep backwards-compatible readers during migration window
|
||||
|
||||
## Dependencies
|
||||
|
||||
1. Stable contract definitions before gate/model/scheduler rewires
|
||||
2. Gate plane before gitops hard enforcement
|
||||
3. Model policy engine before enabling any-model-any-phase by default
|
||||
4. Audit envelope before legacy logger removal
|
||||
5. Plan v2 before enforcing front-loaded planning defaults
|
||||
|
||||
## Risk Register
|
||||
|
||||
### Risk 1: Hidden Coupling in Auto Loop
|
||||
|
||||
Impact: migration bugs due to implicit side effects.
|
||||
Mitigation: adapter-first extraction and parity harness before path switch.
|
||||
|
||||
### Risk 2: Parallel Deadlocks
|
||||
|
||||
Impact: blocked runs or inconsistent state.
|
||||
Mitigation: graph-level deadlock checks, IO lock tests, staged rollout behind flags.
|
||||
|
||||
### Risk 3: Git Noise / Team Workflow Friction
|
||||
|
||||
Impact: commit churn and review overhead.
|
||||
Mitigation: milestone squash defaults and configurable turn transaction modes.
|
||||
|
||||
### Risk 4: Policy Drift Across Providers
|
||||
|
||||
Impact: compliance regressions.
|
||||
Mitigation: provider policy registry tests and release checklist gates.
|
||||
|
||||
### Risk 5: Telemetry Volume Growth
|
||||
|
||||
Impact: storage/perf pressure in long-running projects.
|
||||
Mitigation: append-only raw + indexed projection + retention policies.
|
||||
|
||||
## Definition of Done (ADR-009)
|
||||
|
||||
ADR-009 is complete when all are true:
|
||||
|
||||
1. UOK path is default and stable.
|
||||
2. All units execute through unified gate runner.
|
||||
3. Model selection supports any eligible model in any phase with policy enforcement.
|
||||
4. Hooks/agents/subagents/parallel/team execution runs through one scheduler contract.
|
||||
5. Turn-level git transaction record exists for every executed turn.
|
||||
6. Unified audit events provide causal traceability across orchestration, model, tool, git, and test actions.
|
||||
7. Plan v2 can produce a complete unit graph with fail-closed plan gate.
|
||||
8. `burn-max` profile is available and policy-safe.
|
||||
9. Legacy orchestration branches are retired or behind emergency-only fallback.
|
||||
10. CLI/web/headless behavior remains user-compatible.
|
||||
|
||||
## Recommended Immediate Next Tasks (Week 1)
|
||||
|
||||
1. Add Wave 0 feature flags and default-off wiring.
|
||||
2. Introduce contract types and adapter shell (Wave 1 scaffolding).
|
||||
3. Add parity telemetry capture for legacy loop baseline.
|
||||
4. Land initial tests for contract serialization and turn result envelopes.
|
||||
|
||||
401
docs/dev/ADR-009-orchestration-kernel-refactor.md
Normal file
401
docs/dev/ADR-009-orchestration-kernel-refactor.md
Normal file
|
|
@ -0,0 +1,401 @@
|
|||
# ADR-009: Unified Orchestration Kernel Refactor
|
||||
|
||||
**Status:** Proposed
|
||||
**Date:** 2026-04-14
|
||||
**Deciders:** Jeremy McSpadden, GSD Core Team
|
||||
**Related:** ADR-001 (worktree architecture), ADR-003 (pipeline simplification), ADR-004 (capability-aware routing), ADR-005 (multi-provider strategy), ADR-008 (tools over MCP)
|
||||
|
||||
## Context
|
||||
|
||||
GSD already ships many advanced features:
|
||||
|
||||
- dynamic model routing and multi-provider support
|
||||
- hooks (`pre_dispatch_hooks`, `post_unit_hooks`)
|
||||
- subagents and parallel execution
|
||||
- worktree/branch isolation and automated git flows
|
||||
- per-unit metrics and cost ledgers
|
||||
- activity logs and structured journal events
|
||||
- verification retries and failure recovery
|
||||
|
||||
The current limitation is not missing capability. The limitation is **distribution of control logic across large, mixed-concern modules**, especially in auto-mode and related orchestration files. This raises change risk, creates duplicated policy paths, and slows the introduction of stronger guarantees.
|
||||
|
||||
The target requirements for the next architecture are:
|
||||
|
||||
1. User can use any available model during any phase.
|
||||
2. First-class hooks, agents, sub-agents, team execution, and parallel workflows.
|
||||
3. Git actions on every turn with deterministic, auditable behavior.
|
||||
4. Logging of every action with causal traceability.
|
||||
5. Long upfront planning via multi-round questioning and research.
|
||||
6. Plan slicing and controlled dispatch through strict gate validation.
|
||||
7. Deterministic failure reprocessing loops.
|
||||
8. Automatic testing during build and gate transitions.
|
||||
9. Explicit token usage controls including a high-burn mode.
|
||||
10. Enforced compliance with provider/model terms of service.
|
||||
|
||||
## Decision
|
||||
|
||||
Refactor GSD into a **Unified Orchestration Kernel (UOK)** with explicit control planes, typed contracts, and an incremental strangler migration. This is a staged architectural replacement of orchestration internals, not a rewrite of user-facing CLI/web/MCP surfaces.
|
||||
|
||||
### Core Architectural Model
|
||||
|
||||
The orchestrator is split into six control planes:
|
||||
|
||||
1. **Plan Plane**
|
||||
2. **Execution Plane**
|
||||
3. **Model Plane**
|
||||
4. **Gate Plane**
|
||||
5. **GitOps Plane**
|
||||
6. **Audit Plane**
|
||||
|
||||
Each dispatched unit (turn) executes through a single deterministic pipeline:
|
||||
|
||||
```text
|
||||
Discover/Clarify/Research -> Plan Compile -> Model Select -> Execute -> Validate -> Git Transaction -> Persist Audit -> Next Unit
|
||||
```
|
||||
|
||||
## Detailed Design
|
||||
|
||||
### 1) Plan Plane: Multi-Round Front-Loaded Planning
|
||||
|
||||
Add a formal planning lifecycle:
|
||||
|
||||
1. `discover`: codebase and state scan
|
||||
2. `clarify`: multi-round user questions (bounded rounds, explicit stop condition)
|
||||
3. `research`: internal and external synthesis
|
||||
4. `draft-plan`: produce full roadmap and milestones
|
||||
5. `compile`: slice into executable units with IO boundaries
|
||||
6. `plan-gate`: reject/repair invalid plans before execution starts
|
||||
|
||||
Required outputs:
|
||||
|
||||
- `ROADMAP.md` (complete)
|
||||
- per-milestone slice graph
|
||||
- per-task executable unit specs
|
||||
- requirement trace matrix (requirement -> unit(s) -> verification)
|
||||
- plan risk register
|
||||
|
||||
Plan gate fails closed if:
|
||||
|
||||
- missing acceptance criteria
|
||||
- missing verification strategy
|
||||
- cyclic task dependencies
|
||||
- unowned artifacts
|
||||
- missing rollback/recovery semantics for risky units
|
||||
|
||||
### 2) Execution Plane: Agents, Sub-Agents, Teams, Parallel
|
||||
|
||||
Unify all execution into a typed DAG scheduler.
|
||||
|
||||
Node kinds:
|
||||
|
||||
- `unit` (single execution task)
|
||||
- `hook`
|
||||
- `subagent`
|
||||
- `team-worker`
|
||||
- `verification`
|
||||
- `reprocess`
|
||||
|
||||
Edges express:
|
||||
|
||||
- hard dependencies
|
||||
- resource conflicts (file-level IO locks)
|
||||
- ordering constraints (gate-before-merge, test-before-closeout)
|
||||
|
||||
Execution modes:
|
||||
|
||||
- single-worker deterministic mode
|
||||
- multi-worker parallel mode
|
||||
- team mode (shared repo, unique milestone IDs, gated merge)
|
||||
|
||||
This removes ad-hoc parallel behavior and makes sub-agent and team paths first-class scheduler decisions.
|
||||
|
||||
### 3) Model Plane: Any Model in Any Phase
|
||||
|
||||
Replace rigid phase->model assumptions with **requirement-based eligibility**.
|
||||
|
||||
Selection pipeline:
|
||||
|
||||
1. gather phase/unit requirements (capabilities, context size, latency profile)
|
||||
2. gather eligible models from configured providers
|
||||
3. apply hard policy filters (provider auth, TOS, tool compatibility, org rules)
|
||||
4. apply soft scoring (capability vectors, budget profile, historical outcomes)
|
||||
5. choose primary + fallback chain
|
||||
|
||||
Rules:
|
||||
|
||||
- Any model can run any phase if it passes policy and capability constraints.
|
||||
- User pins remain hard ceilings only when configured explicitly.
|
||||
- Unknown models are allowed with conservative default capability scores.
|
||||
|
||||
Add model intent profiles:
|
||||
|
||||
- `economy` (lowest cost)
|
||||
- `balanced`
|
||||
- `quality`
|
||||
- `burn-max` (highest compute/token burn within policy and budget limits)
|
||||
|
||||
### 4) Gate Plane: Controlled Dispatch and Reprocessing
|
||||
|
||||
All units pass explicit gates:
|
||||
|
||||
1. `policy-gate` (provider/tool/TOS/security checks)
|
||||
2. `input-gate` (unit contract completeness, artifact readiness)
|
||||
3. `execution-gate` (runtime guardrails, timeout strategy, tool allowlist)
|
||||
4. `artifact-gate` (expected outputs and format validation)
|
||||
5. `verification-gate` (lint/test/typecheck/security checks)
|
||||
6. `closeout-gate` (state transition safety + git transaction outcome)
|
||||
|
||||
Gate outcomes:
|
||||
|
||||
- `pass`
|
||||
- `retryable-fail`
|
||||
- `hard-fail`
|
||||
- `manual-attention`
|
||||
|
||||
Failure reprocessing matrix (deterministic):
|
||||
|
||||
- code failure -> targeted fix prompt + bounded retry
|
||||
- test failure -> impacted test fix loop
|
||||
- tool failure -> alternate tool/provider fallback
|
||||
- model failure -> fallback model chain
|
||||
- policy failure -> immediate hard stop and explicit reason
|
||||
|
||||
Retry policy:
|
||||
|
||||
- bounded attempts per gate
|
||||
- escalating strategy per attempt
|
||||
- terminal state persisted with full evidence
|
||||
|
||||
### 5) GitOps Plane: Git Action Every Turn
|
||||
|
||||
Every dispatched unit is wrapped in a git transaction:
|
||||
|
||||
1. `turn-start`: capture branch/worktree status and dirty-state snapshot
|
||||
2. `turn-exec`: run unit
|
||||
3. `turn-stage`: stage relevant changes
|
||||
4. `turn-checkpoint`: commit checkpoint or structured no-op record
|
||||
5. `turn-publish`: optional push per policy
|
||||
6. `turn-record`: write commit metadata into audit ledger
|
||||
|
||||
Defaults:
|
||||
|
||||
- checkpoint commit each turn in milestone branch/worktree
|
||||
- squash on milestone merge to keep main history clean
|
||||
|
||||
Configurable strictness:
|
||||
|
||||
- `git.turn_action: commit|snapshot|status-only`
|
||||
- `git.turn_push: never|milestone|always`
|
||||
|
||||
If a repo state blocks commit (e.g., conflicts), turn fails at closeout gate with explicit diagnostics.
|
||||
|
||||
### 6) Audit Plane: Log Every Action
|
||||
|
||||
Promote current activity/journal into a single causal event model.
|
||||
|
||||
Event classes:
|
||||
|
||||
- orchestrator (`dispatch`, `gate-result`, `state-transition`)
|
||||
- model (`selection`, `fallback`, `provider-switch`)
|
||||
- tool (`call`, `result`, `error`)
|
||||
- git (`status`, `stage`, `commit`, `merge`, `push`)
|
||||
- test (`command`, `result`, `retry`)
|
||||
- policy (`allow`, `deny`, `warning`)
|
||||
- cost (`tokens`, `cost`, `cache-hit`, `budget-pressure`)
|
||||
|
||||
Every event includes:
|
||||
|
||||
- `eventId`
|
||||
- `traceId` (session)
|
||||
- `turnId` (unit)
|
||||
- `causedBy` reference
|
||||
- timestamp
|
||||
- durable payload
|
||||
|
||||
Storage:
|
||||
|
||||
- append-only JSONL + indexed SQLite projection for queryability
|
||||
- no destructive rewrites of source audit logs
|
||||
|
||||
## Compliance and TOS Enforcement
|
||||
|
||||
Introduce a provider policy engine as a hard dependency of the policy gate.
|
||||
|
||||
Provider policy definition includes:
|
||||
|
||||
- allowed auth modes
|
||||
- prohibited token exchange paths
|
||||
- tool/protocol constraints
|
||||
- subscription vs API usage boundaries
|
||||
- model-specific restrictions
|
||||
|
||||
Enforcement rules:
|
||||
|
||||
- deny disallowed auth/routing before dispatch
|
||||
- deny model selection if provider constraints are not met
|
||||
- emit policy evidence events on every allow/deny decision
|
||||
|
||||
This formalizes current compliance work (notably Anthropic/Claude Code boundaries) into a reusable engine rather than scattered checks.
|
||||
|
||||
## Automatic Testing Strategy
|
||||
|
||||
Testing becomes mandatory at three levels:
|
||||
|
||||
1. **Per-turn**: impacted tests + lint/typecheck subset
|
||||
2. **Per-slice closeout**: full slice verification profile
|
||||
3. **Per-milestone closeout**: full suite (or policy-defined release profile)
|
||||
|
||||
Verification commands become declarative policies by unit type, not ad-hoc shell lists only.
|
||||
|
||||
## Token Strategy and Burn-Max Mode
|
||||
|
||||
Existing token optimization modes remain, plus explicit high-burn profile.
|
||||
|
||||
`burn-max` behavior:
|
||||
|
||||
- maximize context inclusion
|
||||
- prefer high-capability models
|
||||
- enable deeper critique/review passes
|
||||
- increase planning/research depth
|
||||
|
||||
Hard limits still apply:
|
||||
|
||||
- budget ceiling and enforcement rules
|
||||
- provider rate limits
|
||||
- TOS/policy constraints
|
||||
|
||||
The system must never bypass provider restrictions to increase usage.
|
||||
|
||||
## Migration Plan (Strangler Refactor)
|
||||
|
||||
No big-bang rewrite. Migrate in waves with compatibility adapters.
|
||||
|
||||
### Wave 0: Contracts and Telemetry Baseline
|
||||
|
||||
- define turn contract and gate result schemas
|
||||
- add trace IDs/turn IDs to current paths
|
||||
- keep behavior unchanged
|
||||
|
||||
### Wave 1: Gate Plane Extraction
|
||||
|
||||
- extract gate runner from auto loop
|
||||
- route existing checks through unified gate API
|
||||
|
||||
### Wave 2: Model Plane Unification
|
||||
|
||||
- requirement-based model selection
|
||||
- policy filter insertion before scoring
|
||||
- preserve existing model config semantics
|
||||
|
||||
### Wave 3: Scheduler and Execution Graph
|
||||
|
||||
- introduce DAG scheduler
|
||||
- map existing subagent/parallel features to graph nodes
|
||||
- enable graph mode behind flag
|
||||
|
||||
### Wave 4: GitOps Transaction Layer
|
||||
|
||||
- enforce turn-level git actions
|
||||
- add deterministic checkpoint behavior
|
||||
|
||||
### Wave 5: Audit Plane Consolidation
|
||||
|
||||
- unify journal/activity/metrics events under common envelope
|
||||
- add query projection
|
||||
|
||||
### Wave 6: Plan Plane v2
|
||||
|
||||
- multi-round clarify/research planner
|
||||
- compiled unit graph + plan gate
|
||||
|
||||
### Wave 7: Legacy Path Retirement
|
||||
|
||||
- remove obsolete branches in `auto.ts` and related modules
|
||||
- keep CLI/API compatibility
|
||||
|
||||
## Module Extraction Targets
|
||||
|
||||
Primary decomposition targets:
|
||||
|
||||
- `auto.ts` -> orchestrator kernel + adapters
|
||||
- `auto-prompts.ts` -> plan compiler + prompt renderers
|
||||
- `state.ts` -> state query service + immutable state views
|
||||
- `gsd-db.ts` -> data access layer + event projection store
|
||||
- `auto-post-unit.ts` / `auto-verification.ts` -> closeout gate services
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
The refactor is accepted when all conditions are true:
|
||||
|
||||
1. Any configured model can be selected in any phase when policy permits.
|
||||
2. Hooks, agents, sub-agents, teams, and parallel all execute under one scheduler contract.
|
||||
3. Every turn produces at least one git action record and auditable turn closeout.
|
||||
4. Every dispatch and action is traceable by `traceId` and `turnId`.
|
||||
5. Multi-round planning produces a full executable unit graph before execution.
|
||||
6. Gate outcomes are explicit, deterministic, and persisted.
|
||||
7. Failure reprocessing uses typed failure classes, not generic retries.
|
||||
8. Automatic tests run per policy on every turn/slice/milestone gate.
|
||||
9. Token usage is tracked at turn granularity with burn-max profile support.
|
||||
10. Policy engine blocks TOS-violating routes and records evidence.
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
- Stronger reliability through fail-closed gates
|
||||
- Faster feature delivery by isolating orchestration concerns
|
||||
- Clear compliance and audit posture
|
||||
- Better debuggability from causal event logs
|
||||
- Controlled support for aggressive high-burn workflows
|
||||
|
||||
### Negative
|
||||
|
||||
- Significant migration effort across core modules
|
||||
- More configuration surface area
|
||||
- Temporary complexity during dual-path migration
|
||||
|
||||
### Neutral
|
||||
|
||||
- Existing user commands and workflows remain stable during migration
|
||||
- Existing preferences remain supported with compatibility adapters
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### A) Full rewrite in a new codebase
|
||||
|
||||
Rejected. Too risky for a live project with broad surface area and active releases.
|
||||
|
||||
### B) Continue incremental patching without architecture split
|
||||
|
||||
Rejected. Slows delivery and increases regression risk as orchestration complexity grows.
|
||||
|
||||
### C) Keep existing optimization-first token model only
|
||||
|
||||
Rejected. Does not satisfy explicit requirement for intentional high-burn workflows.
|
||||
|
||||
## Risks and Mitigations
|
||||
|
||||
1. **Migration regressions**
|
||||
- Mitigation: golden-path replay tests and shadow mode comparisons per wave.
|
||||
2. **Audit log volume growth**
|
||||
- Mitigation: append-only raw logs plus indexed projections and retention policies.
|
||||
3. **Git noise from per-turn commits**
|
||||
- Mitigation: milestone squash merge defaults and configurable checkpoint modes.
|
||||
4. **Provider policy drift**
|
||||
- Mitigation: versioned provider policy registry with test fixtures per provider.
|
||||
|
||||
## Open Questions
|
||||
|
||||
1. Should `turn_action: commit` be mandatory default for all modes or only auto-mode?
|
||||
2. Should `burn-max` be opt-in global, project-scoped, or both?
|
||||
3. Should policy violations always halt or allow configurable warn-only mode for local development?
|
||||
|
||||
## Implementation Note
|
||||
|
||||
This ADR intentionally aligns with current architecture principles:
|
||||
|
||||
- extension-first where practical
|
||||
- strong test contracts
|
||||
- pragmatic incremental rollout
|
||||
- provider-agnostic execution with explicit policy constraints
|
||||
|
||||
69
packages/pi-coding-agent/src/types/ambient-modules.d.ts
vendored
Normal file
69
packages/pi-coding-agent/src/types/ambient-modules.d.ts
vendored
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
declare module "proper-lockfile" {
|
||||
export interface RetryOptions {
|
||||
retries?: number;
|
||||
factor?: number;
|
||||
minTimeout?: number;
|
||||
maxTimeout?: number;
|
||||
randomize?: boolean;
|
||||
}
|
||||
|
||||
export interface LockOptions {
|
||||
realpath?: boolean;
|
||||
retries?: number | RetryOptions;
|
||||
stale?: number;
|
||||
onCompromised?: (err: Error) => void;
|
||||
}
|
||||
|
||||
export type ReleaseSync = () => void;
|
||||
export type ReleaseAsync = () => Promise<void>;
|
||||
|
||||
export interface ProperLockfileApi {
|
||||
lockSync(path: string, options?: LockOptions): ReleaseSync;
|
||||
lock(path: string, options?: LockOptions): Promise<ReleaseAsync>;
|
||||
}
|
||||
|
||||
const lockfile: ProperLockfileApi;
|
||||
export default lockfile;
|
||||
}
|
||||
|
||||
declare module "sql.js" {
|
||||
export interface Statement {
|
||||
bind(values: (string | number | null | Uint8Array)[]): void;
|
||||
step(): boolean;
|
||||
getAsObject(): Record<string, unknown>;
|
||||
free(): void;
|
||||
}
|
||||
|
||||
export interface Database {
|
||||
run(sql: string, params?: unknown[]): void;
|
||||
prepare(sql: string): Statement;
|
||||
export(): Uint8Array;
|
||||
close(): void;
|
||||
}
|
||||
|
||||
export interface SqlJsStatic {
|
||||
Database: new (data?: Uint8Array | ArrayBuffer | Buffer) => Database;
|
||||
}
|
||||
|
||||
export interface SqlJsConfig {
|
||||
locateFile?: (file: string) => string;
|
||||
}
|
||||
|
||||
export default function initSqlJs(config?: SqlJsConfig): Promise<SqlJsStatic>;
|
||||
}
|
||||
|
||||
declare module "hosted-git-info" {
|
||||
export interface HostedGitInfo {
|
||||
domain?: string;
|
||||
user?: string;
|
||||
project?: string;
|
||||
committish?: string;
|
||||
}
|
||||
|
||||
export interface HostedGitInfoApi {
|
||||
fromUrl(url: string): HostedGitInfo | undefined;
|
||||
}
|
||||
|
||||
const hostedGitInfo: HostedGitInfoApi;
|
||||
export default hostedGitInfo;
|
||||
}
|
||||
|
|
@ -23,6 +23,6 @@
|
|||
"outDir": "./dist",
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["src/**/*.ts"],
|
||||
"exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"]
|
||||
"include": ["src/**/*.ts", "src/**/*.d.ts"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ export async function autoLoop(
|
|||
let turnFinished = false;
|
||||
const finishTurn = (
|
||||
status: "completed" | "failed" | "paused" | "stopped" | "skipped" | "retry",
|
||||
failureClass: "none" | "unknown" | "manual-attention" | "timeout" | "execution" = "none",
|
||||
failureClass: "none" | "unknown" | "manual-attention" | "timeout" | "execution" | "closeout" = "none",
|
||||
error?: string,
|
||||
): void => {
|
||||
if (turnFinished) return;
|
||||
|
|
|
|||
|
|
@ -54,8 +54,8 @@ let loadAttempted = false;
|
|||
|
||||
function suppressSqliteWarning(): void {
|
||||
const origEmit = process.emit;
|
||||
// @ts-expect-error overriding process.emit for warning filter
|
||||
process.emit = function (event: string, ...args: unknown[]): boolean {
|
||||
// Override via loose cast: Node's overloaded emit signature is not directly assignable.
|
||||
(process as any).emit = function (event: string, ...args: unknown[]): boolean {
|
||||
if (
|
||||
event === "warning" &&
|
||||
args[0] &&
|
||||
|
|
|
|||
|
|
@ -52,6 +52,18 @@ export interface SessionLockStatus {
|
|||
recovered?: boolean;
|
||||
}
|
||||
|
||||
interface ProperLockfileApi {
|
||||
lockSync(
|
||||
path: string,
|
||||
options?: {
|
||||
realpath?: boolean;
|
||||
stale?: number;
|
||||
update?: number;
|
||||
onCompromised?: () => void;
|
||||
},
|
||||
): () => void;
|
||||
}
|
||||
|
||||
// ─── Module State ───────────────────────────────────────────────────────────
|
||||
|
||||
/** Release function from proper-lockfile — calling it releases the OS lock. */
|
||||
|
|
@ -277,9 +289,9 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
|
|||
unitStartedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
let lockfile: typeof import("proper-lockfile");
|
||||
let lockfile: ProperLockfileApi;
|
||||
try {
|
||||
lockfile = _require("proper-lockfile") as typeof import("proper-lockfile");
|
||||
lockfile = _require("proper-lockfile") as ProperLockfileApi;
|
||||
} catch {
|
||||
// proper-lockfile not available — fall back to PID-based check
|
||||
return acquireFallbackLock(basePath, lp, lockData);
|
||||
|
|
|
|||
|
|
@ -46,8 +46,8 @@ let loadAttempted = false;
|
|||
|
||||
function suppressSqliteWarning(): void {
|
||||
const origEmit = process.emit;
|
||||
// @ts-expect-error overriding process.emit for warning filter
|
||||
process.emit = function (event: string, ...args: unknown[]): boolean {
|
||||
// Override via loose cast: Node's overloaded emit signature is not directly assignable.
|
||||
(process as any).emit = function (event: string, ...args: unknown[]): boolean {
|
||||
if (
|
||||
event === "warning" &&
|
||||
args[0] &&
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import type { TaskMetadata } from "../model-router.js";
|
||||
import type { TaskMetadata } from "../complexity-classifier.js";
|
||||
import { computeTaskRequirements, filterToolsForProvider } from "../model-router.js";
|
||||
import { buildAuditEnvelope, emitUokAuditEvent } from "./audit.js";
|
||||
|
||||
|
|
@ -33,11 +33,11 @@ export function buildRequirementVector(unitType?: string, taskMetadata?: TaskMet
|
|||
return computeTaskRequirements(unitType, taskMetadata) as unknown as Partial<Record<string, number>>;
|
||||
}
|
||||
|
||||
export function applyModelPolicyFilter(
|
||||
candidates: ModelCandidate[],
|
||||
export function applyModelPolicyFilter<T extends ModelCandidate>(
|
||||
candidates: T[],
|
||||
options: ModelPolicyOptions,
|
||||
): {
|
||||
eligible: ModelCandidate[];
|
||||
eligible: T[];
|
||||
decisions: ModelPolicyDecision[];
|
||||
requirements: Partial<Record<string, number>>;
|
||||
} {
|
||||
|
|
@ -46,7 +46,7 @@ export function applyModelPolicyFilter(
|
|||
const allowedApis = options.allowedApis ? new Set(options.allowedApis) : null;
|
||||
const requirements = buildRequirementVector(options.unitType, options.taskMetadata);
|
||||
const decisions: ModelPolicyDecision[] = [];
|
||||
const eligible: ModelCandidate[] = [];
|
||||
const eligible: T[] = [];
|
||||
|
||||
for (const model of candidates) {
|
||||
let allowed = true;
|
||||
|
|
|
|||
|
|
@ -9,9 +9,14 @@
|
|||
* available, testing all patterns in a single DFA pass. Falls back to
|
||||
* per-rule JS RegExp iteration when the native module is not loaded.
|
||||
*/
|
||||
import picomatch from "picomatch";
|
||||
import { createRequire } from "node:module";
|
||||
import { debugTime, debugCount, debugPeak } from "../gsd/debug-logger.js";
|
||||
|
||||
const _require = createRequire(import.meta.url);
|
||||
type PicomatchMatcher = (input: string) => boolean;
|
||||
type PicomatchFn = (pattern: string) => PicomatchMatcher;
|
||||
const picomatch = _require("picomatch") as PicomatchFn;
|
||||
|
||||
// ── Native TTSR engine (optional) ─────────────────────────────────────
|
||||
let nativeTtsr: {
|
||||
ttsrCompileRules: (rules: { name: string; conditions: string[] }[]) => number;
|
||||
|
|
@ -65,7 +70,7 @@ export interface TtsrSettings {
|
|||
|
||||
interface ToolScope {
|
||||
toolName?: string;
|
||||
pathMatcher?: picomatch.Matcher;
|
||||
pathMatcher?: PicomatchMatcher;
|
||||
pathPattern?: string;
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +85,7 @@ interface TtsrEntry {
|
|||
rule: Rule;
|
||||
conditions: RegExp[];
|
||||
scope: TtsrScope;
|
||||
globalPathMatchers?: picomatch.Matcher[];
|
||||
globalPathMatchers?: PicomatchMatcher[];
|
||||
}
|
||||
|
||||
/** Tracks when a rule was last injected (for repeat gating). */
|
||||
|
|
@ -147,7 +152,7 @@ export class TtsrManager {
|
|||
return compiled;
|
||||
}
|
||||
|
||||
#compileGlobalPathMatchers(globs: Rule["globs"]): picomatch.Matcher[] | undefined {
|
||||
#compileGlobalPathMatchers(globs: Rule["globs"]): PicomatchMatcher[] | undefined {
|
||||
if (!globs || globs.length === 0) return undefined;
|
||||
const matchers = globs
|
||||
.map((g) => g.trim())
|
||||
|
|
@ -239,7 +244,7 @@ export class TtsrManager {
|
|||
return pathValue.replaceAll("\\", "/");
|
||||
}
|
||||
|
||||
#matchesGlob(matcher: picomatch.Matcher, filePaths: string[] | undefined): boolean {
|
||||
#matchesGlob(matcher: PicomatchMatcher, filePaths: string[] | undefined): boolean {
|
||||
if (!filePaths || filePaths.length === 0) return false;
|
||||
for (const filePath of filePaths) {
|
||||
const normalized = this.#normalizePath(filePath);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue