feat: stop/backtrack capture classifications for milestone regression (#3488)
* feat: add stop/backtrack capture classifications for milestone regression (#3487) Adds 4-layer methodology for halting auto-mode and backtracking to previous milestones when captures indicate the user wants to stop or that a milestone missed critical features: 1. Type layer: "stop" and "backtrack" classification types in captures.ts 2. Guard layer: pre-dispatch stop check in runGuards() pauses auto-mode before the next unit dispatches 3. Resolution layer: executeBacktrack() writes BACKTRACK-TRIGGER.md and milestone regression markers for state machine detection 4. Protection layer: revertExecutorResolvedCaptures() detects and reverts captures silenced by non-triage agents (resolved without classification) Also adds fast-path stop detection in auto-post-unit.ts that pattern-matches pending capture text for stop keywords without waiting for triage. Closes #3487 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add slice-level skip with gsd_skip_slice tool (#3477) Adds "skipped" as a closed status alongside "complete" and "done": - status-guards.ts: isClosedStatus() recognizes "skipped" - state.ts: isStatusDone() recognizes "skipped" - gsd-db.ts: getActiveSliceFromDb() skips slices with status "skipped" - db-tools.ts: new gsd_skip_slice tool for rethink and manual use - rethink.md: added "Skip a slice" operation to rethink prompt - rethink.ts: buildRethinkData shows skipped slice counts Skipped slices satisfy dependencies for downstream slices, allowing auto-mode to advance past them. Slice data is preserved for reference. Relates to #3477 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: resolve 4 issues found in adversarial review of PR #3488 1. triage-ui.ts: Restore stop/backtrack entries in CLASSIFICATION_LABELS and ALL_CLASSIFICATIONS — the Record<Classification, ...> type requires all union members, and runtime lookups would crash on stop/backtrack. Also auto-confirm stop/backtrack in the triage confirmation flow (matching the triage-captures.md prompt directive). 2. triage-resolution.ts: Replace require("node:fs") in clearBacktrackTrigger with ESM import of unlinkSync — consistent with the rest of the codebase. 3. auto-post-unit.ts: Anchor STOP_PATTERN regex to start-of-string (^) to prevent false positives on captures like "add a pause button" or "stop the timer from re-rendering" which are feature descriptions, not halt directives. 4. status-guards.test.ts: Add missing test case for isClosedStatus("skipped") to cover the new status value. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: update tool-naming test count for gsd_skip_slice The new gsd_skip_slice tool (no alias) brings the total from 29 to 30. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7d5bf63b2d
commit
a061e3c276
15 changed files with 633 additions and 17 deletions
|
|
@ -46,7 +46,7 @@ import {
|
|||
persistHookState,
|
||||
resolveHookArtifactPath,
|
||||
} from "./post-unit-hooks.js";
|
||||
import { hasPendingCaptures, loadPendingCaptures } from "./captures.js";
|
||||
import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures } from "./captures.js";
|
||||
import { debugLog } from "./debug-logger.js";
|
||||
import { runSafely } from "./auto-utils.js";
|
||||
import type { AutoSession, SidecarItem } from "./auto/session.js";
|
||||
|
|
@ -594,6 +594,53 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
|
|||
}
|
||||
}
|
||||
|
||||
// ── Fast-path stop detection (#3487) ──
|
||||
// Before waiting for triage, check if any PENDING captures contain explicit
|
||||
// stop/halt language. If so, pause immediately — don't wait for triage.
|
||||
if (s.currentUnit && s.currentUnit.type !== "triage-captures") {
|
||||
try {
|
||||
const pending = loadPendingCaptures(s.basePath);
|
||||
// Match only when the capture text starts with a stop/halt directive word,
|
||||
// or the entire text is short and dominated by such a word. This avoids
|
||||
// false positives on captures like "add a pause button" or "stop the timer
|
||||
// from re-rendering" — those are feature descriptions, not halt directives.
|
||||
const STOP_PATTERN = /^(stop|halt|abort|don'?t continue|pause|cease)\b/i;
|
||||
const stopCapture = pending.find(c => STOP_PATTERN.test(c.text.trim()));
|
||||
if (stopCapture) {
|
||||
ctx.ui.notify(
|
||||
`Stop directive detected in pending capture ${stopCapture.id}: "${stopCapture.text}" — pausing auto-mode.`,
|
||||
"warning",
|
||||
);
|
||||
debugLog("postUnit", { phase: "fast-stop", captureId: stopCapture.id });
|
||||
await pauseAuto(ctx, pi);
|
||||
return "stopped";
|
||||
}
|
||||
} catch (e) {
|
||||
debugLog("postUnit", { phase: "fast-stop-error", error: String(e) });
|
||||
}
|
||||
}
|
||||
|
||||
// ── Capture protection: revert executor-silenced captures (#3487) ──
|
||||
// Non-triage agents can write **Status:** resolved to CAPTURES.md, bypassing
|
||||
// the triage pipeline. Revert those to pending before the triage check.
|
||||
if (
|
||||
s.currentUnit &&
|
||||
s.currentUnit.type !== "triage-captures"
|
||||
) {
|
||||
try {
|
||||
const reverted = revertExecutorResolvedCaptures(s.basePath);
|
||||
if (reverted > 0) {
|
||||
debugLog("postUnit", { phase: "capture-protection", reverted });
|
||||
ctx.ui.notify(
|
||||
`Reverted ${reverted} capture${reverted === 1 ? "" : "s"} silenced by executor — re-queuing for triage.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
debugLog("postUnit", { phase: "capture-protection-error", error: String(e) });
|
||||
}
|
||||
}
|
||||
|
||||
// ── Triage check ──
|
||||
if (
|
||||
!s.stepMode &&
|
||||
|
|
|
|||
|
|
@ -709,7 +709,7 @@ export async function runDispatch(
|
|||
// ─── runGuards ────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Phase 2: Guards — budget ceiling, context window, secrets re-check.
|
||||
* Phase 2: Guards — stop directives, budget ceiling, context window, secrets re-check.
|
||||
* Returns break to exit the loop, or next to proceed to dispatch.
|
||||
*/
|
||||
export async function runGuards(
|
||||
|
|
@ -718,6 +718,48 @@ export async function runGuards(
|
|||
): Promise<PhaseResult> {
|
||||
const { ctx, pi, s, deps, prefs } = ic;
|
||||
|
||||
// ── Stop/Backtrack directive guard (#3487) ──
|
||||
// Check for unexecuted stop or backtrack captures BEFORE dispatching any unit.
|
||||
// This ensures user "halt" directives are honored immediately.
|
||||
try {
|
||||
const { loadStopCaptures, markCaptureExecuted } = await import("../captures.js");
|
||||
const stopCaptures = loadStopCaptures(s.basePath);
|
||||
if (stopCaptures.length > 0) {
|
||||
const first = stopCaptures[0];
|
||||
const isBacktrack = first.classification === "backtrack";
|
||||
const label = isBacktrack
|
||||
? `Backtrack directive: ${first.text}`
|
||||
: `Stop directive: ${first.text}`;
|
||||
|
||||
ctx.ui.notify(label, "warning");
|
||||
deps.sendDesktopNotification(
|
||||
"GSD", label, "warning", "stop-directive",
|
||||
basename(s.originalBasePath || s.basePath),
|
||||
);
|
||||
|
||||
// Mark all stop/backtrack captures as executed so they don't re-fire
|
||||
for (const cap of stopCaptures) {
|
||||
markCaptureExecuted(s.basePath, cap.id);
|
||||
}
|
||||
|
||||
// For backtrack captures, write the backtrack trigger before pausing
|
||||
if (isBacktrack) {
|
||||
try {
|
||||
const { executeBacktrack } = await import("../triage-resolution.js");
|
||||
executeBacktrack(s.basePath, mid, first);
|
||||
} catch (e) {
|
||||
debugLog("guards", { phase: "backtrack-execution-error", error: String(e) });
|
||||
}
|
||||
}
|
||||
|
||||
await deps.pauseAuto(ctx, pi);
|
||||
debugLog("autoLoop", { phase: "exit", reason: isBacktrack ? "user-backtrack" : "user-stop" });
|
||||
return { action: "break", reason: isBacktrack ? "user-backtrack" : "user-stop" };
|
||||
}
|
||||
} catch (e) {
|
||||
debugLog("guards", { phase: "stop-guard-error", error: String(e) });
|
||||
}
|
||||
|
||||
// Budget ceiling guard
|
||||
const budgetCeiling = prefs?.budget_ceiling;
|
||||
if (budgetCeiling !== undefined && budgetCeiling > 0) {
|
||||
|
|
|
|||
|
|
@ -883,6 +883,84 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|||
pi.registerTool(sliceCompleteTool);
|
||||
registerAlias(pi, sliceCompleteTool, "gsd_complete_slice", "gsd_slice_complete");
|
||||
|
||||
// ─── gsd_skip_slice (#3477 / #3487) ───────────────────────────────────
|
||||
|
||||
const skipSliceExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
|
||||
const dbAvailable = await ensureDbOpen();
|
||||
if (!dbAvailable) {
|
||||
return {
|
||||
content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot skip slice." }],
|
||||
details: { operation: "skip_slice", error: "db_unavailable" } as any,
|
||||
};
|
||||
}
|
||||
try {
|
||||
const { getSlice, updateSliceStatus } = await import("../gsd-db.js");
|
||||
const { invalidateStateCache } = await import("../state.js");
|
||||
|
||||
const slice = getSlice(params.milestoneId, params.sliceId);
|
||||
if (!slice) {
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Error: Slice ${params.sliceId} not found in milestone ${params.milestoneId}` }],
|
||||
details: { operation: "skip_slice", error: "slice_not_found" } as any,
|
||||
};
|
||||
}
|
||||
|
||||
if (slice.status === "complete" || slice.status === "done") {
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Error: Slice ${params.sliceId} is already complete — cannot skip.` }],
|
||||
details: { operation: "skip_slice", error: "already_complete" } as any,
|
||||
};
|
||||
}
|
||||
|
||||
if (slice.status === "skipped") {
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Slice ${params.sliceId} is already skipped.` }],
|
||||
details: { operation: "skip_slice", sliceId: params.sliceId, milestoneId: params.milestoneId } as any,
|
||||
};
|
||||
}
|
||||
|
||||
updateSliceStatus(params.milestoneId, params.sliceId, "skipped");
|
||||
invalidateStateCache();
|
||||
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Skipped slice ${params.sliceId} (${params.milestoneId}). Reason: ${params.reason ?? "User-directed skip"}. Auto-mode will advance past this slice.` }],
|
||||
details: {
|
||||
operation: "skip_slice",
|
||||
sliceId: params.sliceId,
|
||||
milestoneId: params.milestoneId,
|
||||
reason: params.reason,
|
||||
} as any,
|
||||
};
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
logError("tool", `skip_slice tool failed: ${msg}`, { tool: "gsd_skip_slice", error: String(err) });
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Error skipping slice: ${msg}` }],
|
||||
details: { operation: "skip_slice", error: msg } as any,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pi.registerTool({
|
||||
name: "gsd_skip_slice",
|
||||
label: "Skip Slice",
|
||||
description:
|
||||
"Mark a slice as skipped so auto-mode advances past it without executing. " +
|
||||
"The slice data is preserved for reference. The state machine treats skipped slices like completed ones for dependency satisfaction.",
|
||||
promptSnippet: "Skip a GSD slice (mark as skipped, auto-mode will advance past it)",
|
||||
promptGuidelines: [
|
||||
"Use gsd_skip_slice when a slice should be bypassed — descoped, superseded, or no longer relevant.",
|
||||
"Cannot skip a slice that is already complete.",
|
||||
"Skipped slices satisfy downstream dependencies just like completed slices.",
|
||||
],
|
||||
parameters: Type.Object({
|
||||
sliceId: Type.String({ description: "Slice ID (e.g. S02)" }),
|
||||
milestoneId: Type.String({ description: "Milestone ID (e.g. M003)" }),
|
||||
reason: Type.Optional(Type.String({ description: "Reason for skipping this slice" })),
|
||||
}),
|
||||
execute: skipSliceExecute,
|
||||
});
|
||||
|
||||
// ─── gsd_complete_milestone ────────────────────────────────────────────
|
||||
|
||||
const milestoneCompleteExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
|
||||
|
|
|
|||
|
|
@ -285,6 +285,75 @@ export function loadActionableCaptures(basePath: string, currentMilestoneId?: st
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load unexecuted stop captures — user directives to halt auto-mode.
|
||||
* These are checked in the pre-dispatch guard pipeline (runGuards) to
|
||||
* pause auto-mode before the next unit is dispatched.
|
||||
*/
|
||||
export function loadStopCaptures(basePath: string): CaptureEntry[] {
|
||||
return loadAllCaptures(basePath).filter(
|
||||
c => c.status === "resolved" && !c.executed &&
|
||||
(c.classification === "stop" || c.classification === "backtrack"),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load unexecuted backtrack captures specifically — captures directing
|
||||
* auto-mode to abandon current milestone and return to a previous one.
|
||||
*/
|
||||
export function loadBacktrackCaptures(basePath: string): CaptureEntry[] {
|
||||
return loadAllCaptures(basePath).filter(
|
||||
c => c.status === "resolved" && !c.executed && c.classification === "backtrack",
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Revert captures that were silenced by non-triage agents.
|
||||
*
|
||||
* When an execute-task or other non-triage agent writes `**Status:** resolved`
|
||||
* to CAPTURES.md, it bypasses the triage pipeline entirely. This function
|
||||
* detects such captures (resolved but missing the Classification field that
|
||||
* triage always writes) and reverts them to pending so the triage sidecar
|
||||
* picks them up properly.
|
||||
*
|
||||
* Returns the number of captures reverted.
|
||||
*/
|
||||
export function revertExecutorResolvedCaptures(basePath: string): number {
|
||||
const filePath = resolveCapturesPath(basePath);
|
||||
if (!existsSync(filePath)) return 0;
|
||||
|
||||
let content = readFileSync(filePath, "utf-8");
|
||||
let reverted = 0;
|
||||
|
||||
const all = loadAllCaptures(basePath);
|
||||
for (const capture of all) {
|
||||
// A properly triaged capture has both resolved status AND a classification.
|
||||
// An executor-silenced capture has resolved status but NO classification.
|
||||
if (capture.status === "resolved" && !capture.classification) {
|
||||
const sectionRegex = new RegExp(
|
||||
`(### ${escapeRegex(capture.id)}\\n(?:(?!### ).)*?)(?=### |$)`,
|
||||
"s",
|
||||
);
|
||||
const match = sectionRegex.exec(content);
|
||||
if (match) {
|
||||
let section = match[1];
|
||||
section = section.replace(
|
||||
/\*\*Status:\*\*\s*resolved/i,
|
||||
"**Status:** pending",
|
||||
);
|
||||
content = content.replace(sectionRegex, section);
|
||||
reverted++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reverted > 0) {
|
||||
writeFileSync(filePath, content, "utf-8");
|
||||
}
|
||||
|
||||
return reverted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retroactively stamp a capture with a milestone ID.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1661,11 +1661,11 @@ export function getActiveSliceFromDb(milestoneId: string): SliceRow | null {
|
|||
const row = currentDb.prepare(
|
||||
`SELECT s.* FROM slices s
|
||||
WHERE s.milestone_id = :mid
|
||||
AND s.status NOT IN ('complete', 'done')
|
||||
AND s.status NOT IN ('complete', 'done', 'skipped')
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM json_each(s.depends) AS dep
|
||||
WHERE dep.value NOT IN (
|
||||
SELECT id FROM slices WHERE milestone_id = :mid AND status IN ('complete', 'done')
|
||||
SELECT id FROM slices WHERE milestone_id = :mid AND status IN ('complete', 'done', 'skipped')
|
||||
)
|
||||
)
|
||||
ORDER BY s.sequence, s.id
|
||||
|
|
|
|||
|
|
@ -45,6 +45,13 @@ reason: "<reason>"
|
|||
### Unpark a milestone
|
||||
Remove the `{ID}-PARKED.md` file from the milestone directory to reactivate it.
|
||||
|
||||
### Skip a slice
|
||||
Mark a slice as skipped so auto-mode advances past it without executing. Use the `gsd_skip_slice` tool:
|
||||
```
|
||||
gsd_skip_slice({ milestone_id: "M003", slice_id: "S02", reason: "Descoped — feature moved to M005" })
|
||||
```
|
||||
Skipped slices are treated as closed by the state machine (like "complete" but distinct). Use when a slice is no longer needed or has been superseded. The slice data is preserved for reference.
|
||||
|
||||
### Discard a milestone
|
||||
**Permanently** delete a milestone directory and prune it from QUEUE-ORDER.json. **Always confirm with the user before discarding.** Warn explicitly if the milestone has completed work.
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ The user captured thoughts during execution using `/gsd capture`. Your job is to
|
|||
|
||||
For each capture, classify it as one of:
|
||||
|
||||
- **stop**: User directive to halt auto-mode immediately. Use when the user says "stop", "halt", "abort", "don't continue", "pause", or otherwise wants execution to cease. Auto-mode will pause after the current unit completes. Examples: "stop running", "halt execution", "don't continue".
|
||||
- **backtrack**: User directive to abandon the current milestone and return to a previous one. The user believes earlier milestones missed critical features or need rework. Include the target milestone ID (e.g., M003) in the Resolution field. Auto-mode will pause and write a regression marker. Examples: "restart from M003", "go back to milestone 3", "M004 and M005 failed, restart from M003".
|
||||
- **quick-task**: Small, self-contained, no downstream impact. Can be done in minutes without modifying the plan. Examples: fix a typo, add a missing import, tweak a config value.
|
||||
- **inject**: Belongs in the current slice but wasn't planned. Needs a new task added to the slice plan. Examples: add error handling to a module being built, add a missing test case for current work.
|
||||
- **defer**: Belongs in a future slice or milestone. Not urgent for current work. Examples: performance optimization, feature that depends on unbuilt infrastructure, nice-to-have enhancement.
|
||||
|
|
@ -28,10 +30,12 @@ For each capture, classify it as one of:
|
|||
|
||||
## Decision Guidelines
|
||||
|
||||
- **ALWAYS classify as stop** when the user explicitly says "stop", "halt", "abort", or "don't continue". Never shoe-horn a stop directive into "replan" or "note".
|
||||
- **ALWAYS classify as backtrack** when the user references returning to a previous milestone, restarting from an earlier point, or abandoning current milestone work. Include the target milestone ID in the Resolution field (e.g., "Backtrack to M003").
|
||||
- Prefer **quick-task** when the work is clearly small and self-contained.
|
||||
- Prefer **inject** over **replan** when only a new task is needed, not rewriting existing ones.
|
||||
- Prefer **defer** over **inject** when the work doesn't belong in the current slice's scope.
|
||||
- Use **replan** only when remaining incomplete tasks need to change — not just for adding work.
|
||||
- Use **replan** only when remaining incomplete tasks in the *current slice* need to change — not for cross-milestone issues.
|
||||
- Use **note** for observations that don't require action.
|
||||
- When unsure between quick-task and inject, consider: will this take more than 10 minutes? If yes, inject.
|
||||
|
||||
|
|
@ -46,6 +50,7 @@ For each capture, classify it as one of:
|
|||
- If applicable, which files would be affected
|
||||
|
||||
For captures classified as **note** or **defer**, auto-confirm without asking — these are low-impact.
|
||||
For captures classified as **stop** or **backtrack**, auto-confirm without asking — these are urgent user directives that must be honored immediately.
|
||||
For captures classified as **quick-task**, **inject**, or **replan**, ask the user to confirm or choose a different classification.
|
||||
|
||||
3. **Update** `.gsd/CAPTURES.md` — for each capture, update its section with the confirmed classification:
|
||||
|
|
|
|||
|
|
@ -112,8 +112,11 @@ function buildRethinkData(
|
|||
if (dbAvailable && status !== "complete") {
|
||||
const slices = getMilestoneSlices(mid);
|
||||
if (slices.length > 0) {
|
||||
const done = slices.filter(s => s.status === "complete").length;
|
||||
sliceInfo = `${done}/${slices.length} complete`;
|
||||
const done = slices.filter(s => s.status === "complete" || s.status === "done").length;
|
||||
const skipped = slices.filter(s => s.status === "skipped").length;
|
||||
sliceInfo = skipped > 0
|
||||
? `${done}/${slices.length} complete, ${skipped} skipped`
|
||||
: `${done}/${slices.length} complete`;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -295,7 +295,7 @@ function extractContextTitle(content: string | null, fallback: string): string {
|
|||
* Helper: check if a DB status counts as "done" (handles K002 ambiguity).
|
||||
*/
|
||||
function isStatusDone(status: string): boolean {
|
||||
return status === 'complete' || status === 'done';
|
||||
return status === 'complete' || status === 'done' || status === 'skipped';
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
/**
|
||||
* Status predicates for GSD state-machine guards.
|
||||
*
|
||||
* The DB stores status as free-form strings. Two values indicate
|
||||
* "closed": "complete" (canonical) and "done" (legacy / alias).
|
||||
* The DB stores status as free-form strings. Three values indicate
|
||||
* "closed": "complete" (canonical), "done" (legacy / alias), and
|
||||
* "skipped" (user-directed skip via rethink or backtrack).
|
||||
* Every inline `status === "complete" || status === "done"` should
|
||||
* use isClosedStatus() instead.
|
||||
*/
|
||||
|
||||
/** Returns true when a milestone, slice, or task status indicates closure. */
|
||||
export function isClosedStatus(status: string): boolean {
|
||||
return status === "complete" || status === "done";
|
||||
return status === "complete" || status === "done" || status === "skipped";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,10 @@ test('isClosedStatus: "done" returns true', () => {
|
|||
assert.equal(isClosedStatus('done'), true);
|
||||
});
|
||||
|
||||
test('isClosedStatus: "skipped" returns true', () => {
|
||||
assert.equal(isClosedStatus('skipped'), true);
|
||||
});
|
||||
|
||||
test('isClosedStatus: "pending" returns false', () => {
|
||||
assert.equal(isClosedStatus('pending'), false);
|
||||
});
|
||||
|
|
|
|||
216
src/resources/extensions/gsd/tests/stop-backtrack.test.ts
Normal file
216
src/resources/extensions/gsd/tests/stop-backtrack.test.ts
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
/**
|
||||
* Unit tests for stop/backtrack capture classifications and milestone regression (#3487).
|
||||
*
|
||||
* Tests:
|
||||
* - "stop" and "backtrack" are valid classification types
|
||||
* - loadStopCaptures returns unexecuted stop+backtrack captures
|
||||
* - loadBacktrackCaptures returns only backtrack captures
|
||||
* - revertExecutorResolvedCaptures reverts silenced captures
|
||||
* - executeBacktrack writes trigger and regression markers
|
||||
* - readBacktrackTrigger parses trigger file
|
||||
*/
|
||||
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, readFileSync, writeFileSync, rmSync, existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { isClosedStatus } from "../status-guards.ts";
|
||||
import {
|
||||
appendCapture,
|
||||
loadAllCaptures,
|
||||
loadStopCaptures,
|
||||
loadBacktrackCaptures,
|
||||
markCaptureResolved,
|
||||
revertExecutorResolvedCaptures,
|
||||
hasPendingCaptures,
|
||||
} from "../captures.ts";
|
||||
import {
|
||||
executeBacktrack,
|
||||
readBacktrackTrigger,
|
||||
} from "../triage-resolution.ts";
|
||||
|
||||
function makeTempDir(prefix: string): string {
|
||||
const dir = join(
|
||||
tmpdir(),
|
||||
`${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
function setupGsdDir(tmp: string): void {
|
||||
mkdirSync(join(tmp, ".gsd"), { recursive: true });
|
||||
}
|
||||
|
||||
// ─── Classification Types ─────────────────────────────────────────────────────
|
||||
|
||||
test("stop is a valid classification", () => {
|
||||
const tmp = makeTempDir("stop-class");
|
||||
setupGsdDir(tmp);
|
||||
const id = appendCapture(tmp, "stop running immediately");
|
||||
markCaptureResolved(tmp, id, "stop", "Halt auto-mode", "User said stop", "M005");
|
||||
const all = loadAllCaptures(tmp);
|
||||
const cap = all.find(c => c.id === id);
|
||||
assert.equal(cap?.classification, "stop");
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("backtrack is a valid classification", () => {
|
||||
const tmp = makeTempDir("bt-class");
|
||||
setupGsdDir(tmp);
|
||||
const id = appendCapture(tmp, "restart from M003");
|
||||
markCaptureResolved(tmp, id, "backtrack", "Backtrack to M003", "User wants to restart", "M005");
|
||||
const all = loadAllCaptures(tmp);
|
||||
const cap = all.find(c => c.id === id);
|
||||
assert.equal(cap?.classification, "backtrack");
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// ─── loadStopCaptures ─────────────────────────────────────────────────────────
|
||||
|
||||
test("loadStopCaptures returns unexecuted stop and backtrack captures", () => {
|
||||
const tmp = makeTempDir("load-stop");
|
||||
setupGsdDir(tmp);
|
||||
const stopId = appendCapture(tmp, "halt execution");
|
||||
const btId = appendCapture(tmp, "go back to M003");
|
||||
const noteId = appendCapture(tmp, "just a note");
|
||||
markCaptureResolved(tmp, stopId, "stop", "Halt", "User stop", "M005");
|
||||
markCaptureResolved(tmp, btId, "backtrack", "Backtrack to M003", "User backtrack", "M005");
|
||||
markCaptureResolved(tmp, noteId, "note", "Info only", "Not actionable", "M005");
|
||||
|
||||
const stops = loadStopCaptures(tmp);
|
||||
assert.equal(stops.length, 2);
|
||||
assert.ok(stops.some(c => c.classification === "stop"));
|
||||
assert.ok(stops.some(c => c.classification === "backtrack"));
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("loadBacktrackCaptures returns only backtrack captures", () => {
|
||||
const tmp = makeTempDir("load-bt");
|
||||
setupGsdDir(tmp);
|
||||
const stopId = appendCapture(tmp, "halt execution");
|
||||
const btId = appendCapture(tmp, "go back to M003");
|
||||
markCaptureResolved(tmp, stopId, "stop", "Halt", "User stop", "M005");
|
||||
markCaptureResolved(tmp, btId, "backtrack", "Backtrack to M003", "User backtrack", "M005");
|
||||
|
||||
const bts = loadBacktrackCaptures(tmp);
|
||||
assert.equal(bts.length, 1);
|
||||
assert.equal(bts[0].classification, "backtrack");
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// ─── revertExecutorResolvedCaptures ───────────────────────────────────────────
|
||||
|
||||
test("revertExecutorResolvedCaptures reverts captures resolved without classification", () => {
|
||||
const tmp = makeTempDir("revert-exec");
|
||||
setupGsdDir(tmp);
|
||||
const id = appendCapture(tmp, "stop everything");
|
||||
|
||||
// Simulate an executor writing Status: resolved directly (no classification)
|
||||
const capPath = join(tmp, ".gsd", "CAPTURES.md");
|
||||
let content = readFileSync(capPath, "utf-8");
|
||||
content = content.replace("**Status:** pending", "**Status:** resolved");
|
||||
writeFileSync(capPath, content, "utf-8");
|
||||
|
||||
// Verify it's now "resolved" without classification
|
||||
assert.equal(hasPendingCaptures(tmp), false);
|
||||
|
||||
// Revert should detect and fix it
|
||||
const reverted = revertExecutorResolvedCaptures(tmp);
|
||||
assert.equal(reverted, 1);
|
||||
|
||||
// Should be pending again
|
||||
assert.equal(hasPendingCaptures(tmp), true);
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("revertExecutorResolvedCaptures does NOT revert properly triaged captures", () => {
|
||||
const tmp = makeTempDir("revert-skip");
|
||||
setupGsdDir(tmp);
|
||||
const id = appendCapture(tmp, "restart from M003");
|
||||
markCaptureResolved(tmp, id, "backtrack", "Backtrack to M003", "User wants restart", "M005");
|
||||
|
||||
// This capture was properly triaged — should NOT be reverted
|
||||
const reverted = revertExecutorResolvedCaptures(tmp);
|
||||
assert.equal(reverted, 0);
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// ─── executeBacktrack ─────────────────────────────────────────────────────────
|
||||
|
||||
test("executeBacktrack writes trigger and regression markers", () => {
|
||||
const tmp = makeTempDir("exec-bt");
|
||||
setupGsdDir(tmp);
|
||||
|
||||
// Create target milestone directory
|
||||
mkdirSync(join(tmp, ".gsd", "milestones", "M003"), { recursive: true });
|
||||
|
||||
const targetMid = executeBacktrack(tmp, "M005", {
|
||||
id: "CAP-test123",
|
||||
text: "restart from M003 — milestones after 2 failed",
|
||||
timestamp: new Date().toISOString(),
|
||||
status: "resolved",
|
||||
classification: "backtrack",
|
||||
resolution: "Backtrack to M003",
|
||||
rationale: "User directive",
|
||||
});
|
||||
|
||||
assert.equal(targetMid, "M003");
|
||||
|
||||
// Check trigger file exists
|
||||
const triggerPath = join(tmp, ".gsd", "BACKTRACK-TRIGGER.md");
|
||||
assert.ok(existsSync(triggerPath));
|
||||
const triggerContent = readFileSync(triggerPath, "utf-8");
|
||||
assert.ok(triggerContent.includes("M005"));
|
||||
assert.ok(triggerContent.includes("M003"));
|
||||
|
||||
// Check regression marker exists on target milestone
|
||||
const regressionPath = join(tmp, ".gsd", "milestones", "M003", "M003-REGRESSION.md");
|
||||
assert.ok(existsSync(regressionPath));
|
||||
const regressionContent = readFileSync(regressionPath, "utf-8");
|
||||
assert.ok(regressionContent.includes("M005"));
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// ─── readBacktrackTrigger ─────────────────────────────────────────────────────
|
||||
|
||||
test("readBacktrackTrigger parses trigger file", () => {
|
||||
const tmp = makeTempDir("read-bt");
|
||||
setupGsdDir(tmp);
|
||||
mkdirSync(join(tmp, ".gsd", "milestones", "M003"), { recursive: true });
|
||||
|
||||
executeBacktrack(tmp, "M005", {
|
||||
id: "CAP-abc",
|
||||
text: "go back to M003",
|
||||
timestamp: new Date().toISOString(),
|
||||
status: "resolved",
|
||||
classification: "backtrack",
|
||||
resolution: "Backtrack to M003",
|
||||
rationale: "Regression",
|
||||
});
|
||||
|
||||
const trigger = readBacktrackTrigger(tmp);
|
||||
assert.ok(trigger);
|
||||
assert.equal(trigger.target, "M003");
|
||||
assert.equal(trigger.from, "M005");
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("readBacktrackTrigger returns null when no trigger exists", () => {
|
||||
const tmp = makeTempDir("no-bt");
|
||||
setupGsdDir(tmp);
|
||||
const trigger = readBacktrackTrigger(tmp);
|
||||
assert.equal(trigger, null);
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// ─── Slice Skip Status (#3477) ──────────────────────────────────────────────
|
||||
|
||||
test("isClosedStatus treats 'skipped' as closed", () => {
|
||||
assert.equal(isClosedStatus("skipped"), true);
|
||||
assert.equal(isClosedStatus("complete"), true);
|
||||
assert.equal(isClosedStatus("done"), true);
|
||||
assert.equal(isClosedStatus("pending"), false);
|
||||
assert.equal(isClosedStatus("active"), false);
|
||||
});
|
||||
|
|
@ -45,7 +45,7 @@ console.log('\n── Tool naming: registration count ──');
|
|||
const pi = makeMockPi();
|
||||
registerDbTools(pi);
|
||||
|
||||
assert.deepStrictEqual(pi.tools.length, 29, 'Should register exactly 29 tools (14 canonical + 14 aliases + 1 gate tool)');
|
||||
assert.deepStrictEqual(pi.tools.length, 30, 'Should register exactly 30 tools (14 canonical + 14 aliases + 1 gate tool + 1 gsd_skip_slice)');
|
||||
|
||||
// ─── Both names exist for each pair ──────────────────────────────────────────
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
* Also provides detectFileOverlap() for surfacing downstream impact on quick tasks.
|
||||
*/
|
||||
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { createRequire } from "node:module";
|
||||
import { gsdRoot, milestonesDir } from "./paths.js";
|
||||
|
|
@ -129,6 +129,129 @@ export function executeReplan(
|
|||
}
|
||||
}
|
||||
|
||||
// ─── Backtrack (Milestone Regression) ────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Execute a backtrack directive — user wants to abandon current milestone
|
||||
* and return to a previous one (milestone regression).
|
||||
*
|
||||
* Writes a BACKTRACK-TRIGGER.md marker at `.gsd/BACKTRACK-TRIGGER.md` with
|
||||
* the target milestone, reason, and timestamp. The state machine (deriveState)
|
||||
* detects this and transitions the project to the target milestone, resetting
|
||||
* its slices to allow re-planning.
|
||||
*
|
||||
* Returns the extracted target milestone ID, or null if extraction failed.
|
||||
*/
|
||||
export function executeBacktrack(
|
||||
basePath: string,
|
||||
currentMilestoneId: string,
|
||||
capture: CaptureEntry,
|
||||
): string | null {
|
||||
try {
|
||||
// Extract target milestone from capture text or resolution
|
||||
const targetMatch = (capture.resolution ?? capture.text)
|
||||
.match(/\b(M\d{3}(?:-[a-z0-9]{6})?)\b/);
|
||||
const targetMilestoneId = targetMatch?.[1] ?? null;
|
||||
|
||||
const ts = new Date().toISOString();
|
||||
const triggerPath = join(gsdRoot(basePath), "BACKTRACK-TRIGGER.md");
|
||||
const content = [
|
||||
`# Backtrack Trigger`,
|
||||
``,
|
||||
`**Source:** Capture ${capture.id}`,
|
||||
`**Capture:** ${capture.text}`,
|
||||
`**Rationale:** ${capture.rationale ?? "User-initiated milestone backtrack"}`,
|
||||
`**From:** ${currentMilestoneId}`,
|
||||
`**Target:** ${targetMilestoneId ?? "(user to specify)"}`,
|
||||
`**Triggered:** ${ts}`,
|
||||
``,
|
||||
`Auto-mode was paused by this backtrack directive. The user directed`,
|
||||
`that the current milestone (${currentMilestoneId}) be abandoned and work`,
|
||||
`should return to ${targetMilestoneId ?? "a previous milestone"}.`,
|
||||
``,
|
||||
`## Recovery Steps`,
|
||||
``,
|
||||
`1. Review what went wrong in ${currentMilestoneId}`,
|
||||
`2. Identify missing features/requirements from the target milestone`,
|
||||
`3. Resume auto-mode — the state machine will re-enter discussion for the target`,
|
||||
].join("\n");
|
||||
|
||||
writeFileSync(triggerPath, content, "utf-8");
|
||||
|
||||
// If we have a valid target, also reset that milestone's completion status
|
||||
// so deriveState() will re-enter it as the active milestone.
|
||||
if (targetMilestoneId) {
|
||||
try {
|
||||
const targetDir = join(milestonesDir(basePath), targetMilestoneId);
|
||||
if (existsSync(targetDir)) {
|
||||
// Write a regression marker so the state machine knows this milestone
|
||||
// needs re-discussion, not just re-execution
|
||||
const regressionPath = join(targetDir, `${targetMilestoneId}-REGRESSION.md`);
|
||||
writeFileSync(regressionPath, [
|
||||
`# Milestone Regression`,
|
||||
``,
|
||||
`**From:** ${currentMilestoneId}`,
|
||||
`**Reason:** ${capture.text}`,
|
||||
`**Triggered:** ${ts}`,
|
||||
``,
|
||||
`This milestone is being revisited because downstream milestone`,
|
||||
`${currentMilestoneId} failed or missed critical features that should`,
|
||||
`have been part of this milestone's scope.`,
|
||||
``,
|
||||
`The discuss phase should re-evaluate requirements and identify gaps.`,
|
||||
].join("\n"), "utf-8");
|
||||
}
|
||||
} catch { /* best-effort */ }
|
||||
}
|
||||
|
||||
return targetMilestoneId;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the backtrack trigger file if it exists.
|
||||
* Returns the parsed target milestone and metadata, or null.
|
||||
*/
|
||||
export function readBacktrackTrigger(basePath: string): {
|
||||
target: string | null;
|
||||
from: string | null;
|
||||
capture: string;
|
||||
triggeredAt: string;
|
||||
} | null {
|
||||
const triggerPath = join(gsdRoot(basePath), "BACKTRACK-TRIGGER.md");
|
||||
if (!existsSync(triggerPath)) return null;
|
||||
|
||||
try {
|
||||
const content = readFileSync(triggerPath, "utf-8");
|
||||
const target = content.match(/\*\*Target:\*\*\s*(.+)/)?.[1]?.trim() ?? null;
|
||||
const from = content.match(/\*\*From:\*\*\s*(.+)/)?.[1]?.trim() ?? null;
|
||||
const capture = content.match(/\*\*Capture:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
|
||||
const triggeredAt = content.match(/\*\*Triggered:\*\*\s*(.+)/)?.[1]?.trim() ?? "";
|
||||
return {
|
||||
target: target === "(user to specify)" ? null : target,
|
||||
from,
|
||||
capture,
|
||||
triggeredAt,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the backtrack trigger after it has been processed.
|
||||
*/
|
||||
export function clearBacktrackTrigger(basePath: string): void {
|
||||
const triggerPath = join(gsdRoot(basePath), "BACKTRACK-TRIGGER.md");
|
||||
try {
|
||||
if (existsSync(triggerPath)) {
|
||||
unlinkSync(triggerPath);
|
||||
}
|
||||
} catch { /* best-effort */ }
|
||||
}
|
||||
|
||||
// ─── File Overlap Detection ───────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
|
@ -298,6 +421,10 @@ export interface TriageExecutionResult {
|
|||
deferredMilestones: number;
|
||||
/** Captures classified as quick-task that need dispatch */
|
||||
quickTasks: CaptureEntry[];
|
||||
/** Number of stop directives (will pause auto-mode via guard) */
|
||||
stopped: number;
|
||||
/** Backtrack captures (will trigger milestone regression via guard) */
|
||||
backtracks: CaptureEntry[];
|
||||
/** Details of each action taken, for logging */
|
||||
actions: string[];
|
||||
}
|
||||
|
|
@ -326,6 +453,8 @@ export function executeTriageResolutions(
|
|||
replanned: 0,
|
||||
deferredMilestones: 0,
|
||||
quickTasks: [],
|
||||
stopped: 0,
|
||||
backtracks: [],
|
||||
actions: [],
|
||||
};
|
||||
|
||||
|
|
@ -409,5 +538,19 @@ export function executeTriageResolutions(
|
|||
}
|
||||
}
|
||||
|
||||
// Count stop/backtrack captures — these are handled by the pre-dispatch guard
|
||||
// in runGuards(), not here. We just report them for logging purposes.
|
||||
const allCaptures = loadAllCaptures(basePath);
|
||||
for (const cap of allCaptures) {
|
||||
if (cap.status !== "resolved" || cap.executed) continue;
|
||||
if (cap.classification === "stop") {
|
||||
result.stopped++;
|
||||
result.actions.push(`Stop directive from ${cap.id}: "${cap.text}" — will pause on next dispatch`);
|
||||
} else if (cap.classification === "backtrack") {
|
||||
result.backtracks.push(cap);
|
||||
result.actions.push(`Backtrack directive from ${cap.id}: "${cap.text}" — will trigger milestone regression on next dispatch`);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,11 +51,11 @@ const CLASSIFICATION_LABELS: Record<Classification, { label: string; description
|
|||
},
|
||||
"stop": {
|
||||
label: "Stop",
|
||||
description: "Halt current execution — a blocking issue requires resolution.",
|
||||
description: "Halt auto-mode immediately — user directive to cease execution.",
|
||||
},
|
||||
"backtrack": {
|
||||
label: "Backtrack",
|
||||
description: "Undo recent steps and retry from an earlier checkpoint.",
|
||||
description: "Abandon current milestone and return to a previous one.",
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -91,8 +91,9 @@ export async function showTriageConfirmation(
|
|||
const capture = captureMap.get(result.captureId);
|
||||
if (!capture) continue;
|
||||
|
||||
// Auto-confirm note and defer — low-impact, no plan modification
|
||||
if (result.classification === "note" || result.classification === "defer") {
|
||||
// Auto-confirm note, defer, stop, and backtrack — low-impact or urgent directives
|
||||
if (result.classification === "note" || result.classification === "defer"
|
||||
|| result.classification === "stop" || result.classification === "backtrack") {
|
||||
const resolution = result.classification === "note"
|
||||
? "acknowledged as note"
|
||||
: `deferred${result.targetSlice ? ` to ${result.targetSlice}` : ""}`;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue