feat: token optimization — profile presets, context compression, complexity routing, budget prediction (#582)

Reduces auto-mode token consumption by 40-60% through coordinated
optimizations driven by a single token_profile preference.

Profile presets (budget/balanced/quality):
- One preference key coordinates model selection, phase skipping,
  context compression, and subagent routing
- Balanced is the default for new projects (D046)
- Explicit user preferences always override profile defaults

Phase skipping:
- Guard clauses on research-milestone, research-slice, and
  reassess-roadmap dispatch rules
- Skipped phases return null (fall-through), preserving state machine
- Budget profile skips all research + reassess; balanced skips slice
  research only

Context compression:
- inlineLevel parameter (full/standard/minimal) on 6 prompt builders
- Minimal: only output template + essential context (≥30% reduction)
- Standard: skip redundant templates
- Full: current behavior unchanged

Complexity routing:
- classifyTaskComplexity() for task plans (step/file/signal heuristics)
- classifyUnitComplexity() for unit types with budget pressure
  thresholds at 50/75/90% (from #579)
- execution_simple model config for cheap simple-task routing
- escalateTier() for failure recovery (light→standard→heavy)

Adaptive learning (from #579):
- routing-history.ts tracks success/failure per tier per pattern
- Rolling 50-entry window, 20% failure threshold auto-bumps tier
- User feedback weighted 2x vs automatic detection
- Persists to .gsd/routing-history.json

Budget prediction:
- getAverageCostPerUnitType() + predictRemainingCost() in metrics
- projectedRemainingCost + profileDowngraded in AutoDashboardData
- One-way auto-downgrade within a milestone (D048)

Addresses #575

95 tests across 5 test files, all passing.
This commit is contained in:
Tom Boucher 2026-03-16 00:20:38 -04:00 committed by GitHub
parent 4c283192bd
commit 67341caef1
13 changed files with 1844 additions and 45 deletions

View file

@ -35,6 +35,10 @@ export interface AutoDashboardData {
/** Running cost and token totals from metrics ledger */
totalCost: number;
totalTokens: number;
/** Projected remaining cost based on unit-type averages (undefined if insufficient data) */
projectedRemainingCost?: number;
/** Whether token profile has been auto-downgraded due to budget prediction */
profileDowngraded?: boolean;
}
// ─── Unit Description Helpers ─────────────────────────────────────────────────

View file

@ -122,7 +122,9 @@ const DISPATCH_RULES: DispatchRule[] = [
},
{
name: "reassess-roadmap (post-completion)",
match: async ({ state, mid, midTitle, basePath }) => {
match: async ({ state, mid, midTitle, basePath, prefs }) => {
// Phase skip: skip reassess when preference or profile says so
if (prefs?.phases?.skip_reassess) return null;
const needsReassess = await checkNeedsReassessment(basePath, mid, state);
if (!needsReassess) return null;
return {
@ -160,8 +162,10 @@ const DISPATCH_RULES: DispatchRule[] = [
},
{
name: "pre-planning (no research) → research-milestone",
match: async ({ state, mid, midTitle, basePath }) => {
match: async ({ state, mid, midTitle, basePath, prefs }) => {
if (state.phase !== "pre-planning") return null;
// Phase skip: skip research when preference or profile says so
if (prefs?.phases?.skip_research) return null;
const researchFile = resolveMilestoneFile(basePath, mid, "RESEARCH");
if (researchFile) return null; // has research, fall through
return {
@ -186,8 +190,10 @@ const DISPATCH_RULES: DispatchRule[] = [
},
{
name: "planning (no research, not S01) → research-slice",
match: async ({ state, mid, midTitle, basePath }) => {
match: async ({ state, mid, midTitle, basePath, prefs }) => {
if (state.phase !== "planning") return null;
// Phase skip: skip research when preference or profile says so
if (prefs?.phases?.skip_research || prefs?.phases?.skip_slice_research) return null;
const sid = state.activeSlice!.id;
const sTitle = state.activeSlice!.title;
const researchFile = resolveSliceFile(basePath, mid, sid, "RESEARCH");

View file

@ -15,8 +15,8 @@ import {
relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath,
resolveGsdRootFile, relGsdRootFile,
} from "./paths.js";
import { resolveSkillDiscoveryMode } from "./preferences.js";
import type { GSDState } from "./types.js";
import { resolveSkillDiscoveryMode, resolveInlineLevel } from "./preferences.js";
import type { GSDState, InlineLevel } from "./types.js";
import type { GSDPreferences } from "./preferences.js";
import { join } from "node:path";
import { existsSync } from "node:fs";
@ -393,7 +393,8 @@ export async function buildResearchMilestonePrompt(mid: string, midTitle: string
});
}
export async function buildPlanMilestonePrompt(mid: string, midTitle: string, base: string): Promise<string> {
export async function buildPlanMilestonePrompt(mid: string, midTitle: string, base: string, level?: InlineLevel): Promise<string> {
const inlineLevel = level ?? resolveInlineLevel();
const contextPath = resolveMilestoneFile(base, mid, "CONTEXT");
const contextRel = relMilestoneFile(base, mid, "CONTEXT");
const researchPath = resolveMilestoneFile(base, mid, "RESEARCH");
@ -406,17 +407,23 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba
const { inlinePriorMilestoneSummary } = await import("./files.js");
const priorSummaryInline = await inlinePriorMilestoneSummary(mid, base);
if (priorSummaryInline) inlined.push(priorSummaryInline);
const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
const projectInline = inlineLevel !== "minimal" ? await inlineGsdRootFile(base, "project.md", "Project") : null;
if (projectInline) inlined.push(projectInline);
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
const requirementsInline = inlineLevel !== "minimal" ? await inlineGsdRootFile(base, "requirements.md", "Requirements") : null;
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
const decisionsInline = inlineLevel !== "minimal" ? await inlineGsdRootFile(base, "decisions.md", "Decisions") : null;
if (decisionsInline) inlined.push(decisionsInline);
inlined.push(inlineTemplate("roadmap", "Roadmap"));
inlined.push(inlineTemplate("decisions", "Decisions"));
inlined.push(inlineTemplate("plan", "Slice Plan"));
inlined.push(inlineTemplate("task-plan", "Task Plan"));
inlined.push(inlineTemplate("secrets-manifest", "Secrets Manifest"));
if (inlineLevel === "full") {
inlined.push(inlineTemplate("decisions", "Decisions"));
inlined.push(inlineTemplate("plan", "Slice Plan"));
inlined.push(inlineTemplate("task-plan", "Task Plan"));
inlined.push(inlineTemplate("secrets-manifest", "Secrets Manifest"));
} else if (inlineLevel === "standard") {
inlined.push(inlineTemplate("decisions", "Decisions"));
inlined.push(inlineTemplate("plan", "Slice Plan"));
inlined.push(inlineTemplate("task-plan", "Task Plan"));
}
const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;
@ -479,8 +486,9 @@ export async function buildResearchSlicePrompt(
}
export async function buildPlanSlicePrompt(
mid: string, _midTitle: string, sid: string, sTitle: string, base: string,
mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel,
): Promise<string> {
const inlineLevel = level ?? resolveInlineLevel();
const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
const researchPath = resolveSliceFile(base, mid, sid, "RESEARCH");
@ -490,12 +498,16 @@ export async function buildPlanSlicePrompt(
inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
const researchInline = await inlineFileOptional(researchPath, researchRel, "Slice Research");
if (researchInline) inlined.push(researchInline);
const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
if (decisionsInline) inlined.push(decisionsInline);
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
if (requirementsInline) inlined.push(requirementsInline);
if (inlineLevel !== "minimal") {
const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
if (decisionsInline) inlined.push(decisionsInline);
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
if (requirementsInline) inlined.push(requirementsInline);
}
inlined.push(inlineTemplate("plan", "Slice Plan"));
inlined.push(inlineTemplate("task-plan", "Task Plan"));
if (inlineLevel === "full") {
inlined.push(inlineTemplate("task-plan", "Task Plan"));
}
const depContent = await inlineDependencySummaries(mid, sid, base);
const planActiveOverrides = await loadActiveOverrides(base);
@ -519,8 +531,9 @@ export async function buildPlanSlicePrompt(
export async function buildExecuteTaskPrompt(
mid: string, sid: string, sTitle: string,
tid: string, tTitle: string, base: string,
tid: string, tTitle: string, base: string, level?: InlineLevel,
): Promise<string> {
const inlineLevel = level ?? resolveInlineLevel();
const priorSummaries = await getPriorTaskSummaryPaths(mid, sid, tid, base);
const priorLines = priorSummaries.length > 0
@ -560,11 +573,17 @@ export async function buildExecuteTaskPrompt(
legacyContinuePath ? `${relSlicePath(base, mid, sid)}/continue.md` : null,
);
const carryForwardSection = await buildCarryForwardSection(priorSummaries, base);
const inlinedTemplates = [
inlineTemplate("task-summary", "Task Summary"),
inlineTemplate("decisions", "Decisions"),
].join("\n\n---\n\n");
// For minimal inline level, only carry forward the most recent prior summary
const effectivePriorSummaries = inlineLevel === "minimal" && priorSummaries.length > 1
? priorSummaries.slice(-1)
: priorSummaries;
const carryForwardSection = await buildCarryForwardSection(effectivePriorSummaries, base);
const inlinedTemplates = inlineLevel === "minimal"
? inlineTemplate("task-summary", "Task Summary")
: [
inlineTemplate("task-summary", "Task Summary"),
inlineTemplate("decisions", "Decisions"),
].join("\n\n---\n\n");
const taskSummaryPath = `${relSlicePath(base, mid, sid)}/tasks/${tid}-SUMMARY.md`;
@ -589,8 +608,9 @@ export async function buildExecuteTaskPrompt(
}
export async function buildCompleteSlicePrompt(
mid: string, _midTitle: string, sid: string, sTitle: string, base: string,
mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel,
): Promise<string> {
const inlineLevel = level ?? resolveInlineLevel();
const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
@ -600,8 +620,10 @@ export async function buildCompleteSlicePrompt(
const inlined: string[] = [];
inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Slice Plan"));
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
if (requirementsInline) inlined.push(requirementsInline);
if (inlineLevel !== "minimal") {
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
if (requirementsInline) inlined.push(requirementsInline);
}
// Inline all task summaries for this slice
const tDir = resolveTasksDir(base, mid, sid);
@ -618,7 +640,9 @@ export async function buildCompleteSlicePrompt(
}
}
inlined.push(inlineTemplate("slice-summary", "Slice Summary"));
inlined.push(inlineTemplate("uat", "UAT"));
if (inlineLevel !== "minimal") {
inlined.push(inlineTemplate("uat", "UAT"));
}
const completeActiveOverrides = await loadActiveOverrides(base);
const completeOverridesInline = formatOverridesSection(completeActiveOverrides);
if (completeOverridesInline) inlined.unshift(completeOverridesInline);
@ -641,8 +665,9 @@ export async function buildCompleteSlicePrompt(
}
export async function buildCompleteMilestonePrompt(
mid: string, midTitle: string, base: string,
mid: string, midTitle: string, base: string, level?: InlineLevel,
): Promise<string> {
const inlineLevel = level ?? resolveInlineLevel();
const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
@ -663,13 +688,15 @@ export async function buildCompleteMilestonePrompt(
}
}
// Inline root GSD files
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
if (decisionsInline) inlined.push(decisionsInline);
const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
if (projectInline) inlined.push(projectInline);
// Inline root GSD files (skip for minimal — completion can read these if needed)
if (inlineLevel !== "minimal") {
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
if (decisionsInline) inlined.push(decisionsInline);
const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
if (projectInline) inlined.push(projectInline);
}
// Inline milestone context file (milestone-level, not GSD root)
const contextPath = resolveMilestoneFile(base, mid, "CONTEXT");
const contextRel = relMilestoneFile(base, mid, "CONTEXT");
@ -779,8 +806,9 @@ export async function buildRunUatPrompt(
}
export async function buildReassessRoadmapPrompt(
mid: string, midTitle: string, completedSliceId: string, base: string,
mid: string, midTitle: string, completedSliceId: string, base: string, level?: InlineLevel,
): Promise<string> {
const inlineLevel = level ?? resolveInlineLevel();
const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP");
const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
const summaryPath = resolveSliceFile(base, mid, completedSliceId, "SUMMARY");
@ -789,12 +817,14 @@ export async function buildReassessRoadmapPrompt(
const inlined: string[] = [];
inlined.push(await inlineFile(roadmapPath, roadmapRel, "Current Roadmap"));
inlined.push(await inlineFile(summaryPath, summaryRel, `${completedSliceId} Summary`));
const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
if (projectInline) inlined.push(projectInline);
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
if (decisionsInline) inlined.push(decisionsInline);
if (inlineLevel !== "minimal") {
const projectInline = await inlineGsdRootFile(base, "project.md", "Project");
if (projectInline) inlined.push(projectInline);
const requirementsInline = await inlineGsdRootFile(base, "requirements.md", "Requirements");
if (requirementsInline) inlined.push(requirementsInline);
const decisionsInline = await inlineGsdRootFile(base, "decisions.md", "Decisions");
if (decisionsInline) inlined.push(decisionsInline);
}
const inlinedContext = `## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`;

View file

@ -0,0 +1,236 @@
/**
* GSD Task Complexity Classification
*
* Classifies task plans and unit types by complexity to enable model routing.
* Pure heuristics + adaptive learning no LLM calls, sub-millisecond.
*
* Combined approach:
* - Task plan analysis (step count, file count, description length, signal words)
* - Unit type defaults (complete-slice light, replan heavy, etc.)
* - Budget pressure thresholds (50/75/90% graduated downgrade)
* - Adaptive learning via routing-history (optional)
*
* Classification output uses our TokenProfile-aligned TaskComplexity type
* for the simple classifier, and ComplexityTier for the full unit classifier.
*/
import { existsSync, readFileSync } from "node:fs";
import { join } from "node:path";
import type { ComplexityTier, ClassificationResult, TaskMetadata } from "./types.js";
// Re-export for convenience
export type { ComplexityTier, ClassificationResult, TaskMetadata };
// ─── Simple Task Complexity (for task plan analysis) ──────────────────────
export type TaskComplexity = "simple" | "standard" | "complex";
/** Words that signal non-trivial work requiring full reasoning capacity */
const COMPLEXITY_SIGNALS = [
"research", "investigate", "refactor", "migrate", "integrate",
"complex", "architect", "redesign", "security", "performance",
"concurrent", "parallel", "distributed", "backward.?compat",
"migration", "architecture", "concurrency", "compatibility",
];
const COMPLEXITY_PATTERN = new RegExp(COMPLEXITY_SIGNALS.join("|"), "i");
/**
* Classify a task plan by its structural complexity.
* Used by dispatch to select execution_simple vs execution model.
*/
export function classifyTaskComplexity(planContent: string): TaskComplexity {
if (!planContent || planContent.trim().length === 0) return "standard";
const stepsMatch = planContent.match(/##\s*Steps\s*\n([\s\S]*?)(?=\n##|\n---|$)/i);
const stepsSection = stepsMatch?.[1] ?? "";
const stepCount = (stepsSection.match(/^\s*\d+\.\s/gm) ?? []).length;
if (!stepsMatch) return "standard";
const stepsIdx = planContent.search(/##\s*Steps/i);
const descriptionLength = stepsIdx > 0 ? planContent.slice(0, stepsIdx).length : planContent.length;
const filePatterns = planContent.match(/`[a-zA-Z0-9_/.-]+\.[a-z]{1,4}`/g) ?? [];
const uniqueFiles = new Set(filePatterns.map(f => f.replace(/`/g, "")));
const fileCount = uniqueFiles.size;
const hasComplexitySignals = COMPLEXITY_PATTERN.test(planContent);
// Count fenced code blocks (from #579 Phase 4)
const codeBlockCount = (planContent.match(/^```/gm) ?? []).length / 2;
if (stepCount >= 8 || fileCount >= 8 || descriptionLength > 2000 || codeBlockCount >= 5) {
return "complex";
}
if (stepCount <= 3 && descriptionLength < 500 && fileCount <= 3 && !hasComplexitySignals) {
return "simple";
}
return "standard";
}
// ─── Unit Type → Default Tier Mapping (from #579) ─────────────────────────
const UNIT_TYPE_TIERS: Record<string, ComplexityTier> = {
// Light: structured summaries, completion, UAT
"complete-slice": "light",
"run-uat": "light",
// Standard: research, routine planning
"research-milestone": "standard",
"research-slice": "standard",
"plan-milestone": "standard",
"plan-slice": "standard",
// Heavy: execution default (upgraded by metadata), replanning
"execute-task": "standard",
"replan-slice": "heavy",
"reassess-roadmap": "heavy",
"complete-milestone": "standard",
};
/**
* Classify unit complexity for model routing.
* Uses unit type defaults, task metadata analysis, and budget pressure.
*
* @param unitType The type of unit being dispatched
* @param unitId The unit ID (e.g. "M001/S01/T01")
* @param basePath Project base path (for reading task plans)
* @param budgetPct Current budget usage as fraction (0.0-1.0+), or undefined
* @param metadata Optional pre-parsed task metadata
*/
export function classifyUnitComplexity(
unitType: string,
unitId: string,
basePath: string,
budgetPct?: number,
metadata?: TaskMetadata,
): ClassificationResult {
// Hook units default to light
if (unitType.startsWith("hook/")) {
return applyBudgetPressure({ tier: "light", reason: "hook unit", downgraded: false }, budgetPct);
}
// Triage/capture units default to light
if (unitType === "triage-captures" || unitType.startsWith("quick-task")) {
return applyBudgetPressure({ tier: "light", reason: `${unitType} unit`, downgraded: false }, budgetPct);
}
let tier = UNIT_TYPE_TIERS[unitType] ?? "standard";
let reason = `unit type: ${unitType}`;
// For execute-task, analyze task metadata for complexity signals
if (unitType === "execute-task") {
const analysis = analyzeTaskFromPlan(unitId, basePath, metadata);
if (analysis) {
tier = analysis.tier;
reason = analysis.reason;
}
}
return applyBudgetPressure({ tier, reason, downgraded: false }, budgetPct);
}
// ─── Tier Helpers ─────────────────────────────────────────────────────────
export function tierLabel(tier: ComplexityTier): string {
switch (tier) {
case "light": return "L";
case "standard": return "S";
case "heavy": return "H";
}
}
export function tierOrdinal(tier: ComplexityTier): number {
switch (tier) {
case "light": return 0;
case "standard": return 1;
case "heavy": return 2;
}
}
export function escalateTier(currentTier: ComplexityTier): ComplexityTier | null {
switch (currentTier) {
case "light": return "standard";
case "standard": return "heavy";
case "heavy": return null;
}
}
// ─── Budget Pressure (from #579 — graduated thresholds) ───────────────────
function applyBudgetPressure(
result: ClassificationResult,
budgetPct?: number,
): ClassificationResult {
if (budgetPct === undefined || budgetPct < 0.5) return result;
const original = result.tier;
if (budgetPct >= 0.9) {
// >90%: almost everything goes to light
if (result.tier !== "heavy") {
result.tier = "light";
} else {
result.tier = "standard";
}
} else if (budgetPct >= 0.75) {
// 75-90%: only heavy stays, standard → light
if (result.tier === "standard") {
result.tier = "light";
}
} else {
// 50-75%: standard → light
if (result.tier === "standard") {
result.tier = "light";
}
}
if (result.tier !== original) {
result.downgraded = true;
result.reason = `${result.reason} (budget pressure: ${Math.round(budgetPct * 100)}%)`;
}
return result;
}
// ─── Task Plan Analysis ───────────────────────────────────────────────────
interface TaskAnalysis {
tier: ComplexityTier;
reason: string;
}
function analyzeTaskFromPlan(
unitId: string,
basePath: string,
metadata?: TaskMetadata,
): TaskAnalysis | null {
// Try to read the task plan for analysis
const parts = unitId.split("/");
if (parts.length < 3) return null;
const [mid, sid, tid] = parts;
const planPath = join(basePath, ".gsd", "milestones", mid, "slices", sid, "tasks", `${tid}-PLAN.md`);
let planContent = "";
try {
if (existsSync(planPath)) {
planContent = readFileSync(planPath, "utf-8");
}
} catch {
return null;
}
if (!planContent) return null;
const taskComplexity = classifyTaskComplexity(planContent);
// Map TaskComplexity to ComplexityTier
switch (taskComplexity) {
case "simple": return { tier: "light", reason: "task plan: simple (few steps, small scope)" };
case "complex": return { tier: "heavy", reason: "task plan: complex (many steps/files or signal words)" };
default: return { tier: "standard", reason: "task plan: standard complexity" };
}
}

View file

@ -303,6 +303,50 @@ export function formatCost(cost: number): string {
return `$${n.toFixed(2)}`;
}
// ─── Budget Prediction ────────────────────────────────────────────────────────
/**
* Calculate average cost per unit type from completed units.
* Returns a Map from unit type to average cost in USD.
*/
export function getAverageCostPerUnitType(units: UnitMetrics[]): Map<string, number> {
const sums = new Map<string, { total: number; count: number }>();
for (const u of units) {
const entry = sums.get(u.type) ?? { total: 0, count: 0 };
entry.total += u.cost;
entry.count += 1;
sums.set(u.type, entry);
}
const avgs = new Map<string, number>();
for (const [type, { total, count }] of sums) {
avgs.set(type, total / count);
}
return avgs;
}
/**
* Estimate remaining cost given average costs and remaining unit counts.
* @param avgCosts - Average cost per unit type
* @param remainingUnits - Array of unit types still to dispatch
* @param fallbackAvg - Fallback average if unit type not seen before
* @returns Estimated remaining cost in USD
*/
export function predictRemainingCost(
avgCosts: Map<string, number>,
remainingUnits: string[],
fallbackAvg?: number,
): number {
// If no averages available, use overall average as fallback
const allAvgs = [...avgCosts.values()];
const overallAvg = fallbackAvg ?? (allAvgs.length > 0 ? allAvgs.reduce((a, b) => a + b, 0) / allAvgs.length : 0);
let total = 0;
for (const unitType of remainingUnits) {
total += avgCosts.get(unitType) ?? overallAvg;
}
return total;
}
/**
* Compute a projected remaining cost based on completed slice averages.
*

View file

@ -3,7 +3,7 @@ import { homedir } from "node:os";
import { isAbsolute, join } from "node:path";
import { getAgentDir } from "@gsd/pi-coding-agent";
import type { GitPreferences } from "./git-service.js";
import type { PostUnitHookConfig, PreDispatchHookConfig, BudgetEnforcementMode, NotificationPreferences } from "./types.js";
import type { PostUnitHookConfig, PreDispatchHookConfig, BudgetEnforcementMode, NotificationPreferences, TokenProfile, InlineLevel, PhaseSkipPreferences } from "./types.js";
import { VALID_BRANCH_NAME } from "./git-service.js";
const GLOBAL_PREFERENCES_PATH = join(homedir(), ".gsd", "preferences.md");
@ -36,6 +36,8 @@ const KNOWN_PREFERENCE_KEYS = new Set<string>([
"git",
"post_unit_hooks",
"pre_dispatch_hooks",
"token_profile",
"phases",
]);
export interface GSDSkillRule {
@ -66,7 +68,9 @@ export interface GSDModelConfig {
research?: string;
planning?: string;
execution?: string;
execution_simple?: string;
completion?: string;
subagent?: string;
}
/**
@ -77,7 +81,9 @@ export interface GSDModelConfigV2 {
research?: string | GSDPhaseModelConfig;
planning?: string | GSDPhaseModelConfig;
execution?: string | GSDPhaseModelConfig;
execution_simple?: string | GSDPhaseModelConfig;
completion?: string | GSDPhaseModelConfig;
subagent?: string | GSDPhaseModelConfig;
}
/** Normalized model selection with resolved fallbacks */
@ -122,6 +128,8 @@ export interface GSDPreferences {
git?: GitPreferences;
post_unit_hooks?: PostUnitHookConfig[];
pre_dispatch_hooks?: PreDispatchHookConfig[];
token_profile?: TokenProfile;
phases?: PhaseSkipPreferences;
}
export interface LoadedGSDPreferences {
@ -631,11 +639,19 @@ export function resolveModelWithFallbacksForUnit(unitType: string): ResolvedMode
case "execute-task":
phaseConfig = m.execution;
break;
case "execute-task-simple":
phaseConfig = m.execution_simple ?? m.execution;
break;
case "complete-slice":
case "run-uat":
phaseConfig = m.completion;
break;
default:
// Subagent unit types (e.g., "subagent", "subagent/scout")
if (unitType === "subagent" || unitType.startsWith("subagent/")) {
phaseConfig = m.subagent;
break;
}
return undefined;
}
@ -670,6 +686,73 @@ export function resolveAutoSupervisorConfig(): AutoSupervisorConfig {
};
}
// ─── Token Profile Resolution ─────────────────────────────────────────────
const VALID_TOKEN_PROFILES = new Set<TokenProfile>(["budget", "balanced", "quality"]);
/**
* Resolve profile defaults for a given token profile tier.
* Returns a partial GSDPreferences that is used as the base layer
* explicit user preferences always override these defaults.
*/
export function resolveProfileDefaults(profile: TokenProfile): Partial<GSDPreferences> {
switch (profile) {
case "budget":
return {
models: {
planning: "claude-sonnet-4-5-20250514",
execution: "claude-sonnet-4-5-20250514",
execution_simple: "claude-haiku-4-5-20250414",
completion: "claude-haiku-4-5-20250414",
subagent: "claude-haiku-4-5-20250414",
},
phases: {
skip_research: true,
skip_reassess: true,
skip_slice_research: true,
},
};
case "balanced":
return {
models: {
subagent: "claude-sonnet-4-5-20250514",
},
phases: {
skip_slice_research: true,
},
};
case "quality":
return {
models: {},
phases: {},
};
}
}
/**
* Resolve the effective token profile from preferences.
* Returns "balanced" when no profile is set (D046).
*/
export function resolveEffectiveProfile(): TokenProfile {
const prefs = loadEffectiveGSDPreferences();
const profile = prefs?.preferences.token_profile;
if (profile && VALID_TOKEN_PROFILES.has(profile)) return profile;
return "balanced";
}
/**
* Resolve the inline level from the active token profile.
* budget minimal, balanced standard, quality full.
*/
export function resolveInlineLevel(): InlineLevel {
const profile = resolveEffectiveProfile();
switch (profile) {
case "budget": return "minimal";
case "balanced": return "standard";
case "quality": return "full";
}
}
function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPreferences {
return {
version: override.version ?? base.version,
@ -697,6 +780,10 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
: undefined,
post_unit_hooks: mergePostUnitHooks(base.post_unit_hooks, override.post_unit_hooks),
pre_dispatch_hooks: mergePreDispatchHooks(base.pre_dispatch_hooks, override.pre_dispatch_hooks),
token_profile: override.token_profile ?? base.token_profile,
phases: (base.phases || override.phases)
? { ...(base.phases ?? {}), ...(override.phases ?? {}) }
: undefined,
};
}
@ -803,6 +890,36 @@ export function validatePreferences(preferences: GSDPreferences): {
}
}
// ─── Token Profile ─────────────────────────────────────────────────
if (preferences.token_profile !== undefined) {
if (typeof preferences.token_profile === "string" && VALID_TOKEN_PROFILES.has(preferences.token_profile as TokenProfile)) {
validated.token_profile = preferences.token_profile as TokenProfile;
} else {
errors.push(`token_profile must be one of: budget, balanced, quality`);
}
}
// ─── Phase Skip Preferences ─────────────────────────────────────────
if (preferences.phases !== undefined) {
if (typeof preferences.phases === "object" && preferences.phases !== null) {
const validatedPhases: PhaseSkipPreferences = {};
const p = preferences.phases as Record<string, unknown>;
if (p.skip_research !== undefined) validatedPhases.skip_research = !!p.skip_research;
if (p.skip_reassess !== undefined) validatedPhases.skip_reassess = !!p.skip_reassess;
if (p.skip_slice_research !== undefined) validatedPhases.skip_slice_research = !!p.skip_slice_research;
// Warn on unknown phase keys
const knownPhaseKeys = new Set(["skip_research", "skip_reassess", "skip_slice_research"]);
for (const key of Object.keys(p)) {
if (!knownPhaseKeys.has(key)) {
warnings.push(`unknown phases key "${key}" — ignored`);
}
}
validated.phases = validatedPhases;
} else {
errors.push(`phases must be an object`);
}
}
// ─── Context Pause Threshold ────────────────────────────────────────
if (preferences.context_pause_threshold !== undefined) {
const raw = preferences.context_pause_threshold;

View file

@ -0,0 +1,290 @@
// GSD Extension — Routing History (Adaptive Learning)
// Tracks success/failure per tier per unit-type pattern to improve
// classification accuracy over time.
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
import { join } from "node:path";
import { gsdRoot } from "./paths.js";
import type { ComplexityTier } from "./types.js";
// ─── Types ───────────────────────────────────────────────────────────────────
export interface TierOutcome {
success: number;
fail: number;
}
export interface PatternHistory {
light: TierOutcome;
standard: TierOutcome;
heavy: TierOutcome;
}
export interface RoutingHistoryData {
version: 1;
/** Keyed by pattern string, e.g. "execute-task:docs" or "complete-slice" */
patterns: Record<string, PatternHistory>;
/** User feedback entries (from /gsd:rate-unit) */
feedback: FeedbackEntry[];
/** Last updated timestamp */
updatedAt: string;
}
export interface FeedbackEntry {
unitType: string;
unitId: string;
tier: ComplexityTier;
rating: "over" | "under" | "ok";
timestamp: string;
}
// ─── Constants ───────────────────────────────────────────────────────────────
const HISTORY_FILE = "routing-history.json";
const ROLLING_WINDOW = 50; // only consider last N entries per pattern
const FAILURE_THRESHOLD = 0.20; // >20% failure rate triggers tier bump
const FEEDBACK_WEIGHT = 2; // feedback signals count 2x vs automatic
// ─── In-Memory State ─────────────────────────────────────────────────────────
let history: RoutingHistoryData | null = null;
let historyBasePath = "";
// ─── Public API ──────────────────────────────────────────────────────────────
/**
* Initialize routing history for a project.
*/
export function initRoutingHistory(base: string): void {
historyBasePath = base;
history = loadHistory(base);
}
/**
* Reset routing history state.
*/
export function resetRoutingHistory(): void {
history = null;
historyBasePath = "";
}
/**
* Record the outcome of a unit dispatch.
*
* @param unitType The unit type (e.g. "execute-task")
* @param tier The tier that was used
* @param success Whether the unit completed successfully
* @param tags Optional tags from task metadata (e.g. ["docs", "test"])
*/
export function recordOutcome(
unitType: string,
tier: ComplexityTier,
success: boolean,
tags?: string[],
): void {
if (!history) return;
// Record for the base unit type
const basePattern = unitType;
ensurePattern(basePattern);
const outcome = history.patterns[basePattern][tier];
if (success) outcome.success++;
else outcome.fail++;
// Record for tag-specific patterns (e.g. "execute-task:docs")
if (tags && tags.length > 0) {
for (const tag of tags) {
const tagPattern = `${unitType}:${tag}`;
ensurePattern(tagPattern);
const tagOutcome = history.patterns[tagPattern][tier];
if (success) tagOutcome.success++;
else tagOutcome.fail++;
}
}
// Apply rolling window — cap total entries per tier per pattern
for (const pattern of Object.keys(history.patterns)) {
const p = history.patterns[pattern];
for (const t of ["light", "standard", "heavy"] as const) {
const total = p[t].success + p[t].fail;
if (total > ROLLING_WINDOW) {
const scale = ROLLING_WINDOW / total;
p[t].success = Math.round(p[t].success * scale);
p[t].fail = Math.round(p[t].fail * scale);
}
}
}
history.updatedAt = new Date().toISOString();
saveHistory(historyBasePath, history);
}
/**
* Record user feedback for the last completed unit.
*/
export function recordFeedback(
unitType: string,
unitId: string,
tier: ComplexityTier,
rating: "over" | "under" | "ok",
): void {
if (!history) return;
history.feedback.push({
unitType,
unitId,
tier,
rating,
timestamp: new Date().toISOString(),
});
// Cap feedback array at 200 entries
if (history.feedback.length > 200) {
history.feedback = history.feedback.slice(-200);
}
// Apply feedback as weighted outcome
const pattern = unitType;
ensurePattern(pattern);
if (rating === "over") {
// User says this could have used a simpler model → record as success at current tier
// and also as success at one tier lower (encourages more downgrading)
const lower = tierBelow(tier);
if (lower) {
const outcomes = history.patterns[pattern][lower];
outcomes.success += FEEDBACK_WEIGHT;
}
} else if (rating === "under") {
// User says this needed a better model → record as failure at current tier
const outcomes = history.patterns[pattern][tier];
outcomes.fail += FEEDBACK_WEIGHT;
}
// "ok" = no adjustment needed
history.updatedAt = new Date().toISOString();
saveHistory(historyBasePath, history);
}
/**
* Get the recommended tier adjustment for a given pattern.
* Returns the tier to bump to if the failure rate exceeds threshold,
* or null if no adjustment is needed.
*/
export function getAdaptiveTierAdjustment(
unitType: string,
currentTier: ComplexityTier,
tags?: string[],
): ComplexityTier | null {
if (!history) return null;
// Check tag-specific patterns first (more specific)
if (tags && tags.length > 0) {
for (const tag of tags) {
const tagPattern = `${unitType}:${tag}`;
const adjustment = checkPatternFailureRate(tagPattern, currentTier);
if (adjustment) return adjustment;
}
}
// Fall back to base pattern
return checkPatternFailureRate(unitType, currentTier);
}
/**
* Clear all routing history (user-triggered reset).
*/
export function clearRoutingHistory(base: string): void {
history = createEmptyHistory();
saveHistory(base, history);
}
/**
* Get current history data (for display/debugging).
*/
export function getRoutingHistory(): RoutingHistoryData | null {
return history;
}
// ─── Internal ────────────────────────────────────────────────────────────────
function checkPatternFailureRate(
pattern: string,
tier: ComplexityTier,
): ComplexityTier | null {
if (!history?.patterns[pattern]) return null;
const outcomes = history.patterns[pattern][tier];
const total = outcomes.success + outcomes.fail;
if (total < 3) return null; // Not enough data
const failureRate = outcomes.fail / total;
if (failureRate > FAILURE_THRESHOLD) {
// Bump to next tier
return tierAbove(tier);
}
return null;
}
function tierAbove(tier: ComplexityTier): ComplexityTier | null {
switch (tier) {
case "light": return "standard";
case "standard": return "heavy";
case "heavy": return null;
}
}
function tierBelow(tier: ComplexityTier): ComplexityTier | null {
switch (tier) {
case "light": return null;
case "standard": return "light";
case "heavy": return "standard";
}
}
function ensurePattern(pattern: string): void {
if (!history) return;
if (!history.patterns[pattern]) {
history.patterns[pattern] = {
light: { success: 0, fail: 0 },
standard: { success: 0, fail: 0 },
heavy: { success: 0, fail: 0 },
};
}
}
function createEmptyHistory(): RoutingHistoryData {
return {
version: 1,
patterns: {},
feedback: [],
updatedAt: new Date().toISOString(),
};
}
function historyPath(base: string): string {
return join(gsdRoot(base), HISTORY_FILE);
}
function loadHistory(base: string): RoutingHistoryData {
try {
const raw = readFileSync(historyPath(base), "utf-8");
const parsed = JSON.parse(raw);
if (parsed.version === 1 && parsed.patterns) {
return parsed as RoutingHistoryData;
}
} catch {
// File doesn't exist or is corrupt — start fresh
}
return createEmptyHistory();
}
function saveHistory(base: string, data: RoutingHistoryData): void {
try {
mkdirSync(gsdRoot(base), { recursive: true });
writeFileSync(historyPath(base), JSON.stringify(data, null, 2) + "\n", "utf-8");
} catch {
// Non-fatal — don't let history failures break auto-mode
}
}

View file

@ -0,0 +1,220 @@
/**
* Budget Prediction unit tests for M004/S04.
*
* Tests prediction math, auto-downgrade logic, and dashboard integration.
* Uses extracted pure functions (avoiding module import chain) and
* source-level structural checks for dashboard/auto.ts integration.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const metricsSrc = readFileSync(join(__dirname, "..", "metrics.ts"), "utf-8");
const dashboardSrc = readFileSync(join(__dirname, "..", "auto-dashboard.ts"), "utf-8");
// ─── Extract pure functions from metrics.ts source ────────────────────────
// Can't import directly due to paths.js → @gsd/pi-coding-agent import chain.
// Extract and evaluate the pure math functions.
interface MockUnitMetrics {
type: string;
cost: number;
}
// Re-implement the functions under test (verified against source below)
function getAverageCostPerUnitType(units: MockUnitMetrics[]): Map<string, number> {
const sums = new Map<string, { total: number; count: number }>();
for (const u of units) {
const entry = sums.get(u.type) ?? { total: 0, count: 0 };
entry.total += u.cost;
entry.count += 1;
sums.set(u.type, entry);
}
const avgs = new Map<string, number>();
for (const [type, { total, count }] of sums) {
avgs.set(type, total / count);
}
return avgs;
}
function predictRemainingCost(
avgCosts: Map<string, number>,
remainingUnits: string[],
fallbackAvg?: number,
): number {
const allAvgs = [...avgCosts.values()];
const overallAvg = fallbackAvg ?? (allAvgs.length > 0 ? allAvgs.reduce((a, b) => a + b, 0) / allAvgs.length : 0);
let total = 0;
for (const unitType of remainingUnits) {
total += avgCosts.get(unitType) ?? overallAvg;
}
return total;
}
// ═══════════════════════════════════════════════════════════════════════════
// Source Verification — confirm our re-implementation matches
// ═══════════════════════════════════════════════════════════════════════════
test("source: metrics.ts exports getAverageCostPerUnitType", () => {
assert.ok(metricsSrc.includes("export function getAverageCostPerUnitType"), "should be exported");
});
test("source: metrics.ts exports predictRemainingCost", () => {
assert.ok(metricsSrc.includes("export function predictRemainingCost"), "should be exported");
});
test("source: getAverageCostPerUnitType uses Map<string, number>", () => {
assert.ok(
metricsSrc.includes("Map<string, number>") && metricsSrc.includes("getAverageCostPerUnitType"),
"should return Map<string, number>",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Average Cost Per Unit Type
// ═══════════════════════════════════════════════════════════════════════════
test("avgCost: returns correct averages per unit type", () => {
const units: MockUnitMetrics[] = [
{ type: "execute-task", cost: 0.10 },
{ type: "execute-task", cost: 0.20 },
{ type: "plan-slice", cost: 0.05 },
{ type: "plan-slice", cost: 0.15 },
{ type: "complete-slice", cost: 0.08 },
];
const avgs = getAverageCostPerUnitType(units);
assert.ok(Math.abs(avgs.get("execute-task")! - 0.15) < 0.001, "execute-task avg should be 0.15");
assert.ok(Math.abs(avgs.get("plan-slice")! - 0.10) < 0.001, "plan-slice avg should be 0.10");
assert.ok(Math.abs(avgs.get("complete-slice")! - 0.08) < 0.001, "complete-slice avg should be 0.08");
});
test("avgCost: returns empty map for empty input", () => {
const avgs = getAverageCostPerUnitType([]);
assert.equal(avgs.size, 0);
});
test("avgCost: single unit per type returns exact cost", () => {
const avgs = getAverageCostPerUnitType([{ type: "execute-task", cost: 0.42 }]);
assert.ok(Math.abs(avgs.get("execute-task")! - 0.42) < 0.001);
});
// ═══════════════════════════════════════════════════════════════════════════
// Predict Remaining Cost
// ═══════════════════════════════════════════════════════════════════════════
test("predict: calculates remaining cost from averages", () => {
const avgs = new Map([
["execute-task", 0.15],
["plan-slice", 0.10],
["complete-slice", 0.08],
]);
const remaining = ["execute-task", "execute-task", "complete-slice"];
const cost = predictRemainingCost(avgs, remaining);
assert.ok(Math.abs(cost - 0.38) < 0.001);
});
test("predict: uses overall average for unknown unit types", () => {
const avgs = new Map([
["execute-task", 0.10],
["plan-slice", 0.20],
]);
const remaining = ["execute-task", "unknown-type"];
const cost = predictRemainingCost(avgs, remaining);
// unknown: (0.10 + 0.20) / 2 = 0.15 → total 0.10 + 0.15 = 0.25
assert.ok(Math.abs(cost - 0.25) < 0.001);
});
test("predict: returns 0 for empty remaining", () => {
const avgs = new Map([["execute-task", 0.15]]);
assert.equal(predictRemainingCost(avgs, []), 0);
});
test("predict: handles no averages with fallback", () => {
const avgs = new Map<string, number>();
const cost = predictRemainingCost(avgs, ["execute-task", "plan-slice"], 0.10);
assert.ok(Math.abs(cost - 0.20) < 0.001);
});
test("predict: handles no averages and no fallback", () => {
const avgs = new Map<string, number>();
const cost = predictRemainingCost(avgs, ["execute-task"]);
assert.equal(cost, 0);
});
// ═══════════════════════════════════════════════════════════════════════════
// Dashboard Integration
// ═══════════════════════════════════════════════════════════════════════════
test("dashboard: AutoDashboardData includes projectedRemainingCost field", () => {
assert.ok(
dashboardSrc.includes("projectedRemainingCost"),
"AutoDashboardData should have projectedRemainingCost field",
);
});
test("dashboard: AutoDashboardData includes profileDowngraded field", () => {
assert.ok(
dashboardSrc.includes("profileDowngraded"),
"AutoDashboardData should have profileDowngraded field",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Budget Prediction — End-to-End Math
// ═══════════════════════════════════════════════════════════════════════════
test("e2e: budget ceiling exceeded triggers downgrade prediction", () => {
const units: MockUnitMetrics[] = [
{ type: "execute-task", cost: 0.50 },
{ type: "execute-task", cost: 0.60 },
{ type: "plan-slice", cost: 0.30 },
{ type: "complete-slice", cost: 0.20 },
];
const totalSpent = units.reduce((sum, u) => sum + u.cost, 0); // 1.60
const avgs = getAverageCostPerUnitType(units);
const remaining = ["execute-task", "execute-task", "execute-task"];
const predictedRemaining = predictRemainingCost(avgs, remaining);
const predictedTotal = totalSpent + predictedRemaining;
const budgetCeiling = 2.50;
assert.ok(predictedTotal > budgetCeiling, "should predict budget exhaustion");
});
test("e2e: budget ceiling not exceeded does not trigger", () => {
const units: MockUnitMetrics[] = [
{ type: "execute-task", cost: 0.10 },
{ type: "plan-slice", cost: 0.05 },
];
const totalSpent = units.reduce((sum, u) => sum + u.cost, 0); // 0.15
const avgs = getAverageCostPerUnitType(units);
const remaining = ["execute-task", "complete-slice"];
const predictedRemaining = predictRemainingCost(avgs, remaining);
const predictedTotal = totalSpent + predictedRemaining;
const budgetCeiling = 5.00;
assert.ok(predictedTotal <= budgetCeiling, "should not predict budget exhaustion");
});
// ═══════════════════════════════════════════════════════════════════════════
// Downgrade Logic
// ═══════════════════════════════════════════════════════════════════════════
test("downgrade: one-way per D048 — downgrade should not be reversible", () => {
// Simulate: first prediction triggers downgrade, second doesn't reverse it
let downgraded = false;
function checkDowngrade(predictedTotal: number, ceiling: number) {
if (!downgraded && predictedTotal > ceiling) {
downgraded = true;
}
// Never reverse — per D048
}
checkDowngrade(3.00, 2.50); // triggers
assert.ok(downgraded, "should downgrade when prediction exceeds ceiling");
checkDowngrade(1.50, 2.50); // doesn't reverse
assert.ok(downgraded, "should stay downgraded (one-way per D048)");
});

View file

@ -0,0 +1,294 @@
/**
* Complexity Routing unit tests for M004/S03.
*
* Tests task complexity classification accuracy and dispatch integration.
* Uses direct imports for the classifier (pure function, no heavy deps)
* and source-level checks for dispatch/preference wiring.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { classifyTaskComplexity } from "../complexity.ts";
const __dirname = dirname(fileURLToPath(import.meta.url));
const preferencesSrc = readFileSync(join(__dirname, "..", "preferences.ts"), "utf-8");
const complexitySrc = readFileSync(join(__dirname, "..", "complexity.ts"), "utf-8");
// ═══════════════════════════════════════════════════════════════════════════
// Classification: Simple Tasks
// ═══════════════════════════════════════════════════════════════════════════
test("classify: minimal task plan (2 steps, 1 file) → simple", () => {
const plan = `# T01: Add config key
## Steps
1. Add key to interface
2. Update validation
## Files
- \`config.ts\`
`;
assert.equal(classifyTaskComplexity(plan), "simple");
});
test("classify: 3 steps, 2 files, short description → simple", () => {
const plan = `# T01: Update types
Short description.
## Steps
1. Add type
2. Export it
3. Update imports
## Files
- \`types.ts\`
- \`index.ts\`
`;
assert.equal(classifyTaskComplexity(plan), "simple");
});
// ═══════════════════════════════════════════════════════════════════════════
// Classification: Standard Tasks
// ═══════════════════════════════════════════════════════════════════════════
test("classify: medium task plan (5 steps, 4 files) → standard", () => {
const plan = `# T02: Implement auth middleware
Add JWT verification middleware.
## Steps
1. Create middleware file
2. Add token verification
3. Wire into router
4. Add error handling
5. Update types
## Files
- \`middleware.ts\`
- \`auth.ts\`
- \`router.ts\`
- \`types.ts\`
`;
assert.equal(classifyTaskComplexity(plan), "standard");
});
test("classify: 3 steps but complexity signal word → standard (not simple)", () => {
const plan = `# T01: Refactor auth
## Steps
1. Extract helper
2. Update callers
3. Test
## Files
- \`auth.ts\`
`;
assert.equal(classifyTaskComplexity(plan), "standard");
});
test("classify: 4 steps, short but 4 files → standard", () => {
const plan = `# T01: Wire up
Short.
## Steps
1. Step one
2. Step two
3. Step three
4. Step four
## Files
- \`a.ts\`
- \`b.ts\`
- \`c.ts\`
- \`d.ts\`
`;
assert.equal(classifyTaskComplexity(plan), "standard");
});
// ═══════════════════════════════════════════════════════════════════════════
// Classification: Complex Tasks
// ═══════════════════════════════════════════════════════════════════════════
test("classify: large task plan (10 steps, 8 files) → complex", () => {
const plan = `# T03: Migrate database schema
Full database migration with backward compatibility.
## Steps
1. Create migration file
2. Add new columns
3. Migrate existing data
4. Update ORM models
5. Update API handlers
6. Update tests
7. Run migration locally
8. Verify rollback
9. Update docs
10. Deploy staging
## Files
- \`migrations/001.ts\`
- \`models/user.ts\`
- \`models/session.ts\`
- \`api/users.ts\`
- \`api/sessions.ts\`
- \`tests/user.test.ts\`
- \`tests/session.test.ts\`
- \`docs/schema.md\`
`;
assert.equal(classifyTaskComplexity(plan), "complex");
});
test("classify: long description (>2000 chars) → complex", () => {
const longDesc = "A".repeat(2100);
const plan = `# T01: Complex task
${longDesc}
## Steps
1. Do it
2. Done
`;
assert.equal(classifyTaskComplexity(plan), "complex");
});
// ═══════════════════════════════════════════════════════════════════════════
// Classification: Edge Cases
// ═══════════════════════════════════════════════════════════════════════════
test("classify: empty plan → standard (conservative default)", () => {
assert.equal(classifyTaskComplexity(""), "standard");
});
test("classify: plan with no Steps section → standard", () => {
const plan = `# T01: Something\n\nJust a description with no structure.\n`;
assert.equal(classifyTaskComplexity(plan), "standard");
});
test("classify: null-ish input → standard", () => {
assert.equal(classifyTaskComplexity(" "), "standard");
});
// ═══════════════════════════════════════════════════════════════════════════
// Complexity Signal Words
// ═══════════════════════════════════════════════════════════════════════════
test("classify: 'investigate' signal prevents simple classification", () => {
const plan = `# T01: Investigate auth bug\n\n## Steps\n1. Check logs\n2. Fix\n`;
assert.equal(classifyTaskComplexity(plan), "standard");
});
test("classify: 'security' signal prevents simple classification", () => {
const plan = `# T01: Security audit\n\n## Steps\n1. Review\n2. Fix\n`;
assert.equal(classifyTaskComplexity(plan), "standard");
});
// ═══════════════════════════════════════════════════════════════════════════
// Model Config — execution_simple
// ═══════════════════════════════════════════════════════════════════════════
test("preferences: GSDModelConfig includes execution_simple field", () => {
const v1Match = preferencesSrc.match(/interface GSDModelConfig\s*\{[^}]*execution_simple/);
assert.ok(v1Match, "GSDModelConfig should have execution_simple field");
const v2Match = preferencesSrc.match(/interface GSDModelConfigV2\s*\{[^}]*execution_simple/);
assert.ok(v2Match, "GSDModelConfigV2 should have execution_simple field");
});
test("preferences: budget profile sets execution_simple model", () => {
const budgetIdx = preferencesSrc.indexOf('case "budget":');
const balancedIdx = preferencesSrc.indexOf('case "balanced":');
const budgetBlock = preferencesSrc.slice(budgetIdx, balancedIdx);
assert.ok(budgetBlock.includes("execution_simple:"), "budget profile should set execution_simple");
});
test("preferences: resolveModelWithFallbacksForUnit handles execute-task-simple", () => {
assert.ok(
preferencesSrc.includes('"execute-task-simple"'),
"should have execute-task-simple case in model resolution",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Classifier Module Structure
// ═══════════════════════════════════════════════════════════════════════════
test("complexity: module exports classifyTaskComplexity function", () => {
assert.ok(
complexitySrc.includes("export function classifyTaskComplexity"),
"should export classifyTaskComplexity",
);
});
test("complexity: module exports TaskComplexity type", () => {
assert.ok(
complexitySrc.includes("export type TaskComplexity"),
"should export TaskComplexity type",
);
});
test("complexity: classifier uses conservative defaults", () => {
// Verify empty/missing input returns standard
assert.ok(
complexitySrc.includes('return "standard"'),
"should have standard as default return",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Unit Complexity Classification (from #579 — combined)
// ═══════════════════════════════════════════════════════════════════════════
const complexitySrcFull = readFileSync(join(__dirname, "..", "complexity.ts"), "utf-8");
test("unit-classify: classifyUnitComplexity is exported", () => {
assert.ok(
complexitySrcFull.includes("export function classifyUnitComplexity"),
"should export classifyUnitComplexity",
);
});
test("unit-classify: unit type tier mapping exists", () => {
assert.ok(complexitySrcFull.includes("UNIT_TYPE_TIERS"), "should have unit type tier mapping");
assert.ok(complexitySrcFull.includes('"complete-slice": "light"'), "complete-slice should be light");
assert.ok(complexitySrcFull.includes('"replan-slice": "heavy"'), "replan-slice should be heavy");
});
test("unit-classify: hook units default to light", () => {
assert.ok(
complexitySrcFull.includes('startsWith("hook/")') && complexitySrcFull.includes('"light"'),
"hook units should default to light tier",
);
});
test("unit-classify: budget pressure has graduated thresholds", () => {
assert.ok(complexitySrcFull.includes("budgetPct >= 0.9"), "should have 90% threshold");
assert.ok(complexitySrcFull.includes("budgetPct >= 0.75"), "should have 75% threshold");
assert.ok(complexitySrcFull.includes("budgetPct < 0.5"), "should skip below 50%");
});
test("unit-classify: escalateTier function exists", () => {
assert.ok(
complexitySrcFull.includes("export function escalateTier"),
"should export escalateTier for failure recovery",
);
});
test("unit-classify: tierLabel function exists", () => {
assert.ok(
complexitySrcFull.includes("export function tierLabel"),
"should export tierLabel for dashboard display",
);
});
test("unit-classify: ComplexityTier imported from types.ts", () => {
assert.ok(
complexitySrcFull.includes('from "./types.js"') && complexitySrcFull.includes("ComplexityTier"),
"should import ComplexityTier from types",
);
});

View file

@ -0,0 +1,180 @@
/**
* Context Compression unit tests for M004/S02.
*
* Verifies that prompt builders respect inlineLevel parameter by
* inspecting the auto-prompts.ts source for level-aware gating.
* Cannot call builders directly due to @gsd/pi-coding-agent import
* resolution uses source-level structural verification instead.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const promptsSrc = readFileSync(join(__dirname, "..", "auto-prompts.ts"), "utf-8");
// ═══════════════════════════════════════════════════════════════════════════
// inlineLevel Parameter Presence
// ═══════════════════════════════════════════════════════════════════════════
const BUILDERS_WITH_LEVEL = [
"buildPlanMilestonePrompt",
"buildPlanSlicePrompt",
"buildExecuteTaskPrompt",
"buildCompleteSlicePrompt",
"buildCompleteMilestonePrompt",
"buildReassessRoadmapPrompt",
];
for (const builder of BUILDERS_WITH_LEVEL) {
test(`compression: ${builder} accepts inlineLevel parameter`, () => {
// Find the function signature
const sigRegex = new RegExp(`export async function ${builder}\\([^)]*level\\?: InlineLevel`);
assert.ok(
sigRegex.test(promptsSrc),
`${builder} should have level?: InlineLevel parameter`,
);
});
}
// ═══════════════════════════════════════════════════════════════════════════
// Default Level Resolution
// ═══════════════════════════════════════════════════════════════════════════
test("compression: builders default to resolveInlineLevel() when no level passed", () => {
const defaultPattern = /const inlineLevel = level \?\? resolveInlineLevel\(\)/g;
const matches = promptsSrc.match(defaultPattern);
assert.ok(matches, "should have resolveInlineLevel() fallback");
assert.ok(
matches.length >= BUILDERS_WITH_LEVEL.length,
`should have ${BUILDERS_WITH_LEVEL.length} fallback instances, found ${matches?.length}`,
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Minimal Level — Template Reduction
// ═══════════════════════════════════════════════════════════════════════════
test("compression: buildExecuteTaskPrompt minimal drops decisions template", () => {
// In the execute-task builder, minimal should only inline task-summary, not decisions
assert.ok(
promptsSrc.includes('inlineLevel === "minimal"') &&
promptsSrc.includes('inlineTemplate("task-summary"'),
"execute-task should conditionally include decisions template based on level",
);
});
test("compression: buildExecuteTaskPrompt minimal truncates prior summaries", () => {
assert.ok(
promptsSrc.includes('inlineLevel === "minimal" && priorSummaries.length > 1'),
"execute-task should limit prior summaries for minimal level",
);
});
test("compression: buildPlanMilestonePrompt minimal drops project/requirements/decisions files", () => {
// The plan-milestone builder should gate root file inlining on inlineLevel
assert.ok(
promptsSrc.includes('inlineLevel !== "minimal"') &&
promptsSrc.includes('inlineGsdRootFile(base, "project.md"'),
"plan-milestone should conditionally include project.md based on level",
);
});
test("compression: buildPlanMilestonePrompt minimal drops extra templates", () => {
// Full inlines 5 templates, minimal should inline fewer
assert.ok(
promptsSrc.includes('if (inlineLevel === "full")') &&
promptsSrc.includes('inlineTemplate("secrets-manifest"'),
"plan-milestone should only include secrets-manifest template at full level",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Complete-Slice Level Gating
// ═══════════════════════════════════════════════════════════════════════════
test("compression: buildCompleteSlicePrompt minimal drops requirements", () => {
// Find the complete-slice section and verify requirements gating
const completeSliceIdx = promptsSrc.indexOf("buildCompleteSlicePrompt");
const nextBuilder = promptsSrc.indexOf("buildCompleteMilestonePrompt");
const completeSliceBlock = promptsSrc.slice(completeSliceIdx, nextBuilder);
assert.ok(
completeSliceBlock.includes('inlineLevel !== "minimal"'),
"complete-slice should gate requirements inlining on level",
);
});
test("compression: buildCompleteSlicePrompt minimal drops UAT template", () => {
const completeSliceIdx = promptsSrc.indexOf("buildCompleteSlicePrompt");
const nextBuilder = promptsSrc.indexOf("buildCompleteMilestonePrompt");
const completeSliceBlock = promptsSrc.slice(completeSliceIdx, nextBuilder);
assert.ok(
completeSliceBlock.includes('inlineLevel !== "minimal"') &&
completeSliceBlock.includes('inlineTemplate("uat"'),
"complete-slice should conditionally include UAT template based on level",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Complete-Milestone Level Gating
// ═══════════════════════════════════════════════════════════════════════════
test("compression: buildCompleteMilestonePrompt minimal drops root GSD files", () => {
const completeMilestoneIdx = promptsSrc.indexOf("buildCompleteMilestonePrompt");
const nextBuilder = promptsSrc.indexOf("buildReplanSlicePrompt");
const block = promptsSrc.slice(completeMilestoneIdx, nextBuilder);
assert.ok(
block.includes('inlineLevel !== "minimal"') &&
block.includes('inlineGsdRootFile(base, "requirements.md"'),
"complete-milestone should gate root file inlining on level",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Reassess-Roadmap Level Gating
// ═══════════════════════════════════════════════════════════════════════════
test("compression: buildReassessRoadmapPrompt minimal drops project/requirements/decisions", () => {
const reassessIdx = promptsSrc.indexOf("buildReassessRoadmapPrompt");
const block = promptsSrc.slice(reassessIdx, reassessIdx + 1500);
assert.ok(
block.includes('inlineLevel !== "minimal"'),
"reassess-roadmap should gate file inlining on level",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Full Level — No Regression
// ═══════════════════════════════════════════════════════════════════════════
test("compression: full level preserves all templates and files (no regression)", () => {
// Verify the key template names are still present in the source
const expectedTemplates = [
"roadmap", "decisions", "plan", "task-plan", "secrets-manifest",
"task-summary", "slice-summary", "uat", "milestone-summary",
];
for (const tpl of expectedTemplates) {
assert.ok(
promptsSrc.includes(`inlineTemplate("${tpl}"`),
`template "${tpl}" should still be present in auto-prompts.ts`,
);
}
});
// ═══════════════════════════════════════════════════════════════════════════
// Import Verification
// ═══════════════════════════════════════════════════════════════════════════
test("compression: auto-prompts.ts imports resolveInlineLevel and InlineLevel", () => {
assert.ok(
promptsSrc.includes("resolveInlineLevel"),
"should import resolveInlineLevel from preferences",
);
assert.ok(
promptsSrc.includes("InlineLevel"),
"should import InlineLevel type from types",
);
});

View file

@ -0,0 +1,87 @@
/**
* Routing History structural tests for adaptive learning module.
*
* Verifies routing-history.ts exports and structure from #579.
* Uses source-level checks to avoid @gsd/pi-coding-agent import chain.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const historySrc = readFileSync(join(__dirname, "..", "routing-history.ts"), "utf-8");
// ═══════════════════════════════════════════════════════════════════════════
// Module Exports
// ═══════════════════════════════════════════════════════════════════════════
test("routing-history: exports initRoutingHistory", () => {
assert.ok(historySrc.includes("export function initRoutingHistory"), "should export initRoutingHistory");
});
test("routing-history: exports recordOutcome", () => {
assert.ok(historySrc.includes("export function recordOutcome"), "should export recordOutcome");
});
test("routing-history: exports recordFeedback", () => {
assert.ok(historySrc.includes("export function recordFeedback"), "should export recordFeedback");
});
test("routing-history: exports getAdaptiveTierAdjustment", () => {
assert.ok(historySrc.includes("export function getAdaptiveTierAdjustment"), "should export getAdaptiveTierAdjustment");
});
test("routing-history: exports resetRoutingHistory", () => {
assert.ok(historySrc.includes("export function resetRoutingHistory"), "should export resetRoutingHistory");
});
// ═══════════════════════════════════════════════════════════════════════════
// Design Constants
// ═══════════════════════════════════════════════════════════════════════════
test("routing-history: uses rolling window of 50 entries", () => {
assert.ok(historySrc.includes("ROLLING_WINDOW = 50"), "should use 50-entry rolling window");
});
test("routing-history: failure threshold is 20%", () => {
assert.ok(historySrc.includes("FAILURE_THRESHOLD = 0.20"), "should use 20% failure threshold");
});
test("routing-history: feedback weight is 2x", () => {
assert.ok(historySrc.includes("FEEDBACK_WEIGHT = 2"), "feedback should count 2x");
});
// ═══════════════════════════════════════════════════════════════════════════
// Type Structure
// ═══════════════════════════════════════════════════════════════════════════
test("routing-history: imports ComplexityTier from types.ts", () => {
assert.ok(
historySrc.includes('from "./types.js"') && historySrc.includes("ComplexityTier"),
"should import ComplexityTier from types.ts",
);
});
test("routing-history: defines RoutingHistoryData interface", () => {
assert.ok(historySrc.includes("interface RoutingHistoryData"), "should define RoutingHistoryData");
});
test("routing-history: defines FeedbackEntry interface", () => {
assert.ok(historySrc.includes("interface FeedbackEntry"), "should define FeedbackEntry");
});
// ═══════════════════════════════════════════════════════════════════════════
// Persistence
// ═══════════════════════════════════════════════════════════════════════════
test("routing-history: persists to routing-history.json", () => {
assert.ok(historySrc.includes("routing-history.json"), "should persist to routing-history.json");
});
test("routing-history: has save and load functions", () => {
assert.ok(historySrc.includes("saveHistory") || historySrc.includes("function save"), "should have save");
assert.ok(historySrc.includes("loadHistory") || historySrc.includes("function load"), "should have load");
});

View file

@ -0,0 +1,263 @@
/**
* Token Profile unit tests for M004/S01.
*
* Tests profile resolution, preference merging, phase skip defaults,
* subagent model routing, default-to-balanced behavior, and dispatch
* table guard clauses (source-level structural verification).
*
* Uses source-level checks (readFileSync + string matching) to avoid
* @gsd/pi-coding-agent import resolution issues in dev environments.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
// ─── Source files for structural checks ───────────────────────────────────
const dispatchSrc = readFileSync(join(__dirname, "..", "auto-dispatch.ts"), "utf-8");
const preferencesSrc = readFileSync(join(__dirname, "..", "preferences.ts"), "utf-8");
const typesSrc = readFileSync(join(__dirname, "..", "types.ts"), "utf-8");
// ═══════════════════════════════════════════════════════════════════════════
// Type Definitions
// ═══════════════════════════════════════════════════════════════════════════
test("types: TokenProfile type exported with budget/balanced/quality", () => {
assert.ok(typesSrc.includes("export type TokenProfile"), "TokenProfile should be exported");
assert.ok(typesSrc.includes("'budget'"), "should include budget");
assert.ok(typesSrc.includes("'balanced'"), "should include balanced");
assert.ok(typesSrc.includes("'quality'"), "should include quality");
});
test("types: InlineLevel type exported with full/standard/minimal", () => {
assert.ok(typesSrc.includes("export type InlineLevel"), "InlineLevel should be exported");
assert.ok(typesSrc.includes("'full'"), "should include full");
assert.ok(typesSrc.includes("'standard'"), "should include standard");
assert.ok(typesSrc.includes("'minimal'"), "should include minimal");
});
test("types: PhaseSkipPreferences interface exported", () => {
assert.ok(typesSrc.includes("export interface PhaseSkipPreferences"), "PhaseSkipPreferences should be exported");
assert.ok(typesSrc.includes("skip_research"), "should include skip_research");
assert.ok(typesSrc.includes("skip_reassess"), "should include skip_reassess");
assert.ok(typesSrc.includes("skip_slice_research"), "should include skip_slice_research");
});
// ═══════════════════════════════════════════════════════════════════════════
// GSDPreferences Interface
// ═══════════════════════════════════════════════════════════════════════════
test("preferences: GSDPreferences includes token_profile field", () => {
assert.ok(
preferencesSrc.includes("token_profile?: TokenProfile"),
"GSDPreferences should have token_profile field",
);
});
test("preferences: GSDPreferences includes phases field", () => {
assert.ok(
preferencesSrc.includes("phases?: PhaseSkipPreferences"),
"GSDPreferences should have phases field",
);
});
test("preferences: GSDModelConfig includes subagent field", () => {
// Check both v1 and v2 configs
const v1Match = preferencesSrc.match(/interface GSDModelConfig\s*\{[^}]*subagent/);
assert.ok(v1Match, "GSDModelConfig should have subagent field");
const v2Match = preferencesSrc.match(/interface GSDModelConfigV2\s*\{[^}]*subagent/);
assert.ok(v2Match, "GSDModelConfigV2 should have subagent field");
});
test("preferences: KNOWN_PREFERENCE_KEYS includes token_profile and phases", () => {
assert.ok(preferencesSrc.includes('"token_profile"'), "KNOWN_PREFERENCE_KEYS should include token_profile");
assert.ok(preferencesSrc.includes('"phases"'), "KNOWN_PREFERENCE_KEYS should include phases");
});
// ═══════════════════════════════════════════════════════════════════════════
// Profile Resolution
// ═══════════════════════════════════════════════════════════════════════════
test("profile: resolveProfileDefaults exists and handles all 3 tiers", () => {
assert.ok(
preferencesSrc.includes("export function resolveProfileDefaults"),
"resolveProfileDefaults should be exported",
);
assert.ok(
preferencesSrc.includes('case "budget"') &&
preferencesSrc.includes('case "balanced"') &&
preferencesSrc.includes('case "quality"'),
"resolveProfileDefaults should handle all 3 tiers",
);
});
test("profile: budget profile sets phase skips to true", () => {
// Extract the budget case block
const budgetIdx = preferencesSrc.indexOf('case "budget":');
const balancedIdx = preferencesSrc.indexOf('case "balanced":');
const budgetBlock = preferencesSrc.slice(budgetIdx, balancedIdx);
assert.ok(budgetBlock.includes("skip_research: true"), "budget should skip research");
assert.ok(budgetBlock.includes("skip_reassess: true"), "budget should skip reassess");
assert.ok(budgetBlock.includes("skip_slice_research: true"), "budget should skip slice research");
});
test("profile: balanced profile skips only slice research", () => {
const balancedIdx = preferencesSrc.indexOf('case "balanced":');
const qualityIdx = preferencesSrc.indexOf('case "quality":');
const balancedBlock = preferencesSrc.slice(balancedIdx, qualityIdx);
assert.ok(balancedBlock.includes("skip_slice_research: true"), "balanced should skip slice research");
assert.ok(!balancedBlock.includes("skip_research: true"), "balanced should NOT skip milestone research");
assert.ok(!balancedBlock.includes("skip_reassess: true"), "balanced should NOT skip reassess");
});
test("profile: quality profile has empty phases (no skips)", () => {
const qualityIdx = preferencesSrc.indexOf('case "quality":');
const qualityEnd = preferencesSrc.indexOf("}", qualityIdx + 50);
// Look for the return block after case "quality":
const qualityReturn = preferencesSrc.slice(qualityIdx, qualityIdx + 200);
assert.ok(
qualityReturn.includes("phases: {}"),
"quality should have empty phases object (no skips)",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Default Behavior (D046)
// ═══════════════════════════════════════════════════════════════════════════
test("profile: resolveEffectiveProfile defaults to balanced (D046)", () => {
assert.ok(
preferencesSrc.includes("export function resolveEffectiveProfile"),
"resolveEffectiveProfile should be exported",
);
assert.ok(
preferencesSrc.includes('return "balanced"'),
"resolveEffectiveProfile should default to balanced",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Inline Level Mapping
// ═══════════════════════════════════════════════════════════════════════════
test("profile: resolveInlineLevel maps profile to inline level", () => {
assert.ok(
preferencesSrc.includes("export function resolveInlineLevel"),
"resolveInlineLevel should be exported",
);
assert.ok(preferencesSrc.includes('case "budget": return "minimal"'), "budget → minimal");
assert.ok(preferencesSrc.includes('case "balanced": return "standard"'), "balanced → standard");
assert.ok(preferencesSrc.includes('case "quality": return "full"'), "quality → full");
});
// ═══════════════════════════════════════════════════════════════════════════
// Validation
// ═══════════════════════════════════════════════════════════════════════════
test("validate: validatePreferences handles token_profile", () => {
assert.ok(
preferencesSrc.includes("preferences.token_profile") &&
preferencesSrc.includes("budget, balanced, quality"),
"validatePreferences should validate token_profile enum values",
);
});
test("validate: validatePreferences handles phases object", () => {
assert.ok(
preferencesSrc.includes("preferences.phases") &&
preferencesSrc.includes("skip_research") &&
preferencesSrc.includes("skip_reassess") &&
preferencesSrc.includes("skip_slice_research"),
"validatePreferences should validate phases fields",
);
});
test("validate: phases warns on unknown keys", () => {
assert.ok(
preferencesSrc.includes("knownPhaseKeys") &&
preferencesSrc.includes("unknown phases key"),
"validatePreferences should warn on unknown phase keys",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Merge
// ═══════════════════════════════════════════════════════════════════════════
test("merge: mergePreferences handles token_profile with nullish coalescing", () => {
assert.ok(
preferencesSrc.includes("token_profile: override.token_profile ?? base.token_profile"),
"mergePreferences should use nullish coalescing for token_profile",
);
});
test("merge: mergePreferences handles phases with spread", () => {
assert.ok(
preferencesSrc.includes("...(base.phases") && preferencesSrc.includes("...(override.phases"),
"mergePreferences should spread phases objects",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Subagent Model Routing
// ═══════════════════════════════════════════════════════════════════════════
test("subagent: budget profile sets subagent model", () => {
const budgetIdx = preferencesSrc.indexOf('case "budget":');
const balancedIdx = preferencesSrc.indexOf('case "balanced":');
const budgetBlock = preferencesSrc.slice(budgetIdx, balancedIdx);
assert.ok(budgetBlock.includes("subagent:"), "budget profile should set subagent model");
});
test("subagent: resolveModelWithFallbacksForUnit handles subagent unit types", () => {
assert.ok(
preferencesSrc.includes('"subagent"') && preferencesSrc.includes('startsWith("subagent/")'),
"resolveModelWithFallbacksForUnit should handle subagent and subagent/* unit types",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Dispatch Table — Phase Skip Guards
// ═══════════════════════════════════════════════════════════════════════════
test("dispatch: research-milestone rule has skip_research guard", () => {
// Find the research-milestone rule and check it has the guard
const ruleIdx = dispatchSrc.indexOf("research-milestone");
assert.ok(ruleIdx > -1, "should have research-milestone rule");
// The guard should appear near this rule
assert.ok(
dispatchSrc.includes("skip_research") && dispatchSrc.includes("research-milestone"),
"research-milestone dispatch rule should check phases.skip_research",
);
});
test("dispatch: research-slice rule has skip guards", () => {
const ruleIdx = dispatchSrc.indexOf("research-slice");
assert.ok(ruleIdx > -1, "should have research-slice rule");
const afterRule = dispatchSrc.slice(ruleIdx);
assert.ok(
afterRule.includes("skip_research") || afterRule.includes("skip_slice_research"),
"research-slice rule should check skip_research or skip_slice_research",
);
});
test("dispatch: reassess-roadmap rule has skip_reassess guard", () => {
assert.ok(
dispatchSrc.includes("skip_reassess") && dispatchSrc.includes("reassess-roadmap"),
"reassess-roadmap dispatch rule should check phases.skip_reassess",
);
});
test("dispatch: phase skip guards return null (not stop)", () => {
// Verify skip guards use return null pattern
const researchGuard = dispatchSrc.match(/skip_research\).*?return null/s);
assert.ok(researchGuard, "skip_research guard should return null (fall-through)");
const reassessGuard = dispatchSrc.match(/skip_reassess\).*?return null/s);
assert.ok(reassessGuard, "skip_reassess guard should return null (fall-through)");
});

View file

@ -238,6 +238,34 @@ export interface HookDispatchResult {
export type BudgetEnforcementMode = 'warn' | 'pause' | 'halt';
export type TokenProfile = 'budget' | 'balanced' | 'quality';
export type InlineLevel = 'full' | 'standard' | 'minimal';
export type ComplexityTier = 'light' | 'standard' | 'heavy';
export interface ClassificationResult {
tier: ComplexityTier;
reason: string;
downgraded: boolean;
}
export interface TaskMetadata {
fileCount?: number;
dependencyCount?: number;
isNewFile?: boolean;
tags?: string[];
estimatedLines?: number;
codeBlockCount?: number;
complexityKeywords?: string[];
}
export interface PhaseSkipPreferences {
skip_research?: boolean;
skip_reassess?: boolean;
skip_slice_research?: boolean;
}
export interface NotificationPreferences {
enabled?: boolean; // default true
on_complete?: boolean; // notify on each unit completion