diff --git a/docs/configuration.md b/docs/configuration.md index a647524a7..8f4f9830a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -231,11 +231,14 @@ phases: skip_research: false # skip milestone-level research skip_reassess: false # skip roadmap reassessment after each slice skip_slice_research: true # skip per-slice research + reassess_after_slice: true # enable roadmap reassessment after each slice (required for reassessment) require_slice_discussion: false # pause auto-mode before each slice for discussion ``` These are usually set automatically by `token_profile`, but can be overridden explicitly. +> **Note:** Roadmap reassessment requires `reassess_after_slice: true` to be set explicitly. Without it, reassessment is skipped regardless of `skip_reassess`. + ### `skill_discovery` Controls how GSD finds and applies skills during auto mode. diff --git a/docs/token-optimization.md b/docs/token-optimization.md index d2fb38bfe..a622869d1 100644 --- a/docs/token-optimization.md +++ b/docs/token-optimization.md @@ -105,7 +105,11 @@ Explicit `phases` settings always override the profile defaults. ## Complexity-Based Task Routing -GSD automatically classifies each task by complexity and routes it to an appropriate model tier. This means simple documentation fixes don't burn expensive Opus tokens, while complex architectural work gets the reasoning power it needs. +GSD classifies each task by complexity and routes it to an appropriate model tier when dynamic routing is enabled. Simple documentation fixes use cheaper models while complex architectural work gets the reasoning power it needs. + +> **Prerequisite:** Dynamic routing requires explicit `models` in your preferences. Without a `models` section, routing is skipped and the session's launch model is used for all phases. Token profiles set `models` automatically. + +> **Ceiling behavior:** When dynamic routing is active, the model configured for each phase acts as a **ceiling**, not a fixed assignment. The router may downgrade to a cheaper model for simpler tasks but never upgrades beyond the configured model. ### How Classification Works @@ -172,13 +176,15 @@ GSD tracks the success and failure of each tier assignment over time and adjusts ### User Feedback -GSD accepts manual feedback to accelerate learning: +Use `/gsd rate` to submit feedback on the last completed unit's model tier: -- **"over"** — the model was overpowered for this task (encourages downgrading) -- **"under"** — the model wasn't capable enough (encourages upgrading) -- **"ok"** — correct assignment (no adjustment) +``` +/gsd rate over # model was overpowered — encourage cheaper next time +/gsd rate ok # model was appropriate — no adjustment +/gsd rate under # model was too weak — encourage stronger next time +``` -Feedback signals are weighted 2× compared to automatic outcomes. +Feedback signals are weighted 2× compared to automatic outcomes. Requires dynamic routing to be active (the last unit must have tier data). ### Data Management diff --git a/src/resources/extensions/gsd/commands-rate.ts b/src/resources/extensions/gsd/commands-rate.ts new file mode 100644 index 000000000..39eedace1 --- /dev/null +++ b/src/resources/extensions/gsd/commands-rate.ts @@ -0,0 +1,55 @@ +/** + * /gsd rate — Submit feedback on the last unit's model tier assignment. + * Feeds into the adaptive routing history so future dispatches improve. + */ + +import type { ExtensionCommandContext } from "@gsd/pi-coding-agent"; +import { loadLedgerFromDisk } from "./metrics.js"; +import { recordFeedback, initRoutingHistory } from "./routing-history.js"; +import type { ComplexityTier } from "./complexity-classifier.js"; + +const VALID_RATINGS = new Set(["over", "under", "ok"]); + +export async function handleRate( + args: string, + ctx: ExtensionCommandContext, + basePath: string, +): Promise { + const rating = args.trim().toLowerCase(); + + if (!rating || !VALID_RATINGS.has(rating)) { + ctx.ui.notify( + "Usage: /gsd rate \n" + + " over — model was overpowered for that task (encourage cheaper)\n" + + " ok — model was appropriate\n" + + " under — model was too weak (encourage stronger)", + "info", + ); + return; + } + + const ledger = loadLedgerFromDisk(basePath); + if (!ledger || ledger.units.length === 0) { + ctx.ui.notify("No completed units found — nothing to rate.", "warning"); + return; + } + + const lastUnit = ledger.units[ledger.units.length - 1]; + const tier = lastUnit.tier as ComplexityTier | undefined; + + if (!tier) { + ctx.ui.notify( + "Last unit has no tier data (dynamic routing was not active). Rating skipped.", + "warning", + ); + return; + } + + initRoutingHistory(basePath); + recordFeedback(lastUnit.type, lastUnit.id, tier, rating as "over" | "under" | "ok"); + + ctx.ui.notify( + `Recorded "${rating}" for ${lastUnit.type}/${lastUnit.id} at tier ${tier}.`, + "info", + ); +} diff --git a/src/resources/extensions/gsd/commands.ts b/src/resources/extensions/gsd/commands.ts index dc47e9bf0..adbb47e3d 100644 --- a/src/resources/extensions/gsd/commands.ts +++ b/src/resources/extensions/gsd/commands.ts @@ -89,6 +89,7 @@ export function registerGSDCommand(pi: ExtensionAPI): void { { cmd: "triage", desc: "Manually trigger triage of pending captures" }, { cmd: "dispatch", desc: "Dispatch a specific phase directly" }, { cmd: "history", desc: "View execution history" }, + { cmd: "rate", desc: "Rate last unit's model tier (over/ok/under) — improves adaptive routing" }, { cmd: "undo", desc: "Revert last completed unit" }, { cmd: "skip", desc: "Prevent a unit from auto-mode dispatch" }, { cmd: "export", desc: "Export milestone/slice results" }, @@ -566,6 +567,12 @@ export async function handleGSDCommand( return; } + if (trimmed === "rate" || trimmed.startsWith("rate ")) { + const { handleRate } = await import("./commands-rate.js"); + await handleRate(trimmed.replace(/^rate\s*/, "").trim(), ctx, projectRoot()); + return; + } + if (trimmed.startsWith("skip ")) { await handleSkip(trimmed.replace(/^skip\s*/, "").trim(), ctx, projectRoot()); return; diff --git a/src/resources/extensions/gsd/preferences.ts b/src/resources/extensions/gsd/preferences.ts index e83f3f400..1ea1a037a 100644 --- a/src/resources/extensions/gsd/preferences.ts +++ b/src/resources/extensions/gsd/preferences.ts @@ -15,9 +15,10 @@ import { homedir } from "node:os"; import { join } from "node:path"; import { gsdRoot } from "./paths.js"; import { parse as parseYaml } from "yaml"; -import type { PostUnitHookConfig, PreDispatchHookConfig } from "./types.js"; +import type { PostUnitHookConfig, PreDispatchHookConfig, TokenProfile } from "./types.js"; import type { DynamicRoutingConfig } from "./model-router.js"; import { normalizeStringArray } from "../shared/mod.js"; +import { resolveProfileDefaults as _resolveProfileDefaults } from "./preferences-models.js"; import { MODE_DEFAULTS, @@ -141,6 +142,18 @@ export function loadEffectiveGSDPreferences(): LoadedGSDPreferences | null { }; } + // Apply token-profile defaults as the lowest-priority layer so that + // `token_profile: budget` sets models and phase-skips automatically. + // Explicit user preferences always override profile defaults. + const profile = result.preferences.token_profile as TokenProfile | undefined; + if (profile) { + const profileDefaults = _resolveProfileDefaults(profile); + result = { + ...result, + preferences: mergePreferences(profileDefaults as GSDPreferences, result.preferences), + }; + } + // Apply mode defaults as the lowest-priority layer if (result.preferences.mode) { result = {