fix(gsd): add intent + phase guards to resume context fallback (#3615)

Tighten the deriveState fallback per adversarial review:
- Intent-gated: only fire for low-entropy resume prompts via
  RESUME_INTENT_PATTERNS (continue, ok, go ahead, resume, etc.)
- Phase-gated: only during state.phase === "executing"
- Non-resume prompts (help, status, abort, diagnostics) are not
  hijacked with execution context

Add behavioral tests: 24 positive matches + 17 negative rejections
for the intent pattern, alongside the 5 structural tests.
This commit is contained in:
Jeremy 2026-04-06 12:14:36 -05:00
parent 5b104897c8
commit 04dc4a988b
2 changed files with 123 additions and 50 deletions

View file

@ -264,6 +264,13 @@ function buildWorktreeContextBlock(): string {
return "";
}
/**
* Low-entropy resume intent patterns short phrases a user types to
* continue work after a pause, rate limit, or context reset (#3615).
* Tested against the trimmed, lowercased prompt with trailing punctuation stripped.
*/
const RESUME_INTENT_PATTERNS = /^(continue|resume|ok|go|go ahead|proceed|keep going|carry on|next|yes|yeah|yep|sure|do it|let's go|pick up where you left off)$/;
async function buildGuidedExecuteContextInjection(prompt: string, basePath: string): Promise<string | null> {
const executeMatch = prompt.match(/Execute the next task:\s+(T\d+)\s+\("([^"]+)"\)\s+in slice\s+(S\d+)\s+of milestone\s+(M\d+(?:-[a-z0-9]{6})?)/i);
if (executeMatch) {
@ -280,21 +287,25 @@ async function buildGuidedExecuteContextInjection(prompt: string, basePath: stri
}
}
// Fallback: unstructured prompt (e.g., "continue", "ok", "go ahead") but
// state shows an active executing task — inject task context so the agent
// Fallback: low-entropy resume prompt (e.g., "continue", "ok", "go ahead")
// during an active executing task — inject task context so the agent
// doesn't rebuild from scratch (#3615).
// Intent-gated: only fire for short, resume-like prompts to avoid hijacking
// control/help/diagnostic prompts with unrelated execution context.
// Phase-gated: only fire during "executing" to avoid misrouting during
// replanning, gate evaluation, or other non-execution phases where
// activeTask may still be set.
const state = await deriveState(basePath);
if (state.phase === "executing" && state.activeTask && state.activeMilestone && state.activeSlice) {
return buildTaskExecutionContextInjection(
basePath,
state.activeMilestone.id,
state.activeSlice.id,
state.activeTask.id,
state.activeTask.title,
);
// replanning, gate evaluation, or other non-execution phases.
const trimmed = prompt.trim().toLowerCase().replace(/[.!?,]+$/g, "");
if (RESUME_INTENT_PATTERNS.test(trimmed)) {
const state = await deriveState(basePath);
if (state.phase === "executing" && state.activeTask && state.activeMilestone && state.activeSlice) {
return buildTaskExecutionContextInjection(
basePath,
state.activeMilestone.id,
state.activeSlice.id,
state.activeTask.id,
state.activeTask.title,
);
}
}
return null;

View file

@ -9,12 +9,10 @@
* injected causing the agent to rebuild everything from scratch and
* burn ~86k tokens.
*
* This test verifies the structural properties of the fix:
* 1. buildGuidedExecuteContextInjection has a deriveState fallback
* that fires when neither regex matches
* 2. The fallback is phase-gated to "executing" only
* 3. The fallback checks for activeTask + activeMilestone + activeSlice
* 4. The fallback calls buildTaskExecutionContextInjection with derived state
* This test verifies:
* 1. Structural: the fallback exists with phase + intent guards
* 2. Behavioral: RESUME_INTENT_PATTERNS matches expected prompts and
* rejects non-resume prompts (control, help, diagnostic, etc.)
*/
import { describe, test } from "node:test";
@ -29,69 +27,56 @@ const systemContextSource = readFileSync(
"utf-8",
);
describe("#3615 — unstructured prompts must inject task context when work is active", () => {
// Extract the buildGuidedExecuteContextInjection function body
// ── Structural tests ────────────────────────────────────────────────
describe("#3615 — structural: fallback exists with correct guards", () => {
const fnStart = systemContextSource.indexOf("async function buildGuidedExecuteContextInjection(");
assert.ok(fnStart >= 0, "should find buildGuidedExecuteContextInjection");
// Find the closing brace — scan for the next top-level "async function" or end of file
const fnEnd = systemContextSource.indexOf("\nasync function ", fnStart + 1);
const fnBody = fnEnd >= 0
? systemContextSource.slice(fnStart, fnEnd)
: systemContextSource.slice(fnStart);
test("has a deriveState fallback after the two regex branches", () => {
// The function should call deriveState outside of the resumeMatch block
// Count deriveState calls — should be at least 2 (one in resumeMatch, one in fallback)
const deriveStateCalls = fnBody.match(/deriveState\(basePath\)/g);
assert.ok(
deriveStateCalls && deriveStateCalls.length >= 2,
`expected >=2 deriveState(basePath) calls in buildGuidedExecuteContextInjection, got ${deriveStateCalls?.length ?? 0}`,
`expected >=2 deriveState(basePath) calls, got ${deriveStateCalls?.length ?? 0}`,
);
});
test("fallback is phase-gated to executing only", () => {
// The fallback must check state.phase === "executing" to avoid misrouting
// during replanning, gate evaluation, or other non-execution phases
const afterFallbackComment = fnBody.indexOf("// Fallback:");
assert.ok(afterFallbackComment >= 0, "should have a fallback comment after the regex branches");
const fallbackSection = fnBody.slice(afterFallbackComment);
const afterFallback = fnBody.indexOf("// Fallback:");
assert.ok(afterFallback >= 0, "should have a fallback comment");
const fallbackSection = fnBody.slice(afterFallback);
assert.ok(
fallbackSection.includes('state.phase === "executing"'),
'fallback must be gated on state.phase === "executing" to prevent misrouting in non-execution phases',
'fallback must be gated on state.phase === "executing"',
);
});
test("fallback checks for activeTask, activeMilestone, and activeSlice", () => {
const afterFallbackComment = fnBody.indexOf("// Fallback:");
const fallbackSection = fnBody.slice(afterFallbackComment);
test("fallback is intent-gated via RESUME_INTENT_PATTERNS", () => {
const afterFallback = fnBody.indexOf("// Fallback:");
const fallbackSection = fnBody.slice(afterFallback);
assert.ok(
fallbackSection.includes("state.activeTask") &&
fallbackSection.includes("state.activeMilestone") &&
fallbackSection.includes("state.activeSlice"),
"fallback must check all three state properties before injecting context",
fallbackSection.includes("RESUME_INTENT_PATTERNS"),
"fallback must check RESUME_INTENT_PATTERNS before deriveState",
);
});
test("fallback calls buildTaskExecutionContextInjection with derived state", () => {
const afterFallbackComment = fnBody.indexOf("// Fallback:");
assert.ok(afterFallbackComment >= 0, "should have a fallback comment after the regex branches");
const fallbackSection = fnBody.slice(afterFallbackComment);
assert.ok(
fallbackSection.includes("buildTaskExecutionContextInjection"),
"fallback section must call buildTaskExecutionContextInjection",
);
const afterFallback = fnBody.indexOf("// Fallback:");
const fallbackSection = fnBody.slice(afterFallback);
assert.ok(
fallbackSection.includes("buildTaskExecutionContextInjection") &&
fallbackSection.includes("state.activeMilestone.id") &&
fallbackSection.includes("state.activeSlice.id") &&
fallbackSection.includes("state.activeTask.id"),
"fallback must pass state-derived IDs to buildTaskExecutionContextInjection",
"fallback must call buildTaskExecutionContextInjection with state-derived IDs",
);
});
test("function does NOT return null before the fallback", () => {
// There should be exactly one "return null" at the very end, after the fallback
test("only one return null at the end", () => {
const returnNulls = fnBody.match(/return null;/g);
assert.ok(
returnNulls && returnNulls.length === 1,
@ -99,3 +84,80 @@ describe("#3615 — unstructured prompts must inject task context when work is a
);
});
});
// ── Behavioral tests: RESUME_INTENT_PATTERNS ────────────────────────
describe("#3615 — behavioral: RESUME_INTENT_PATTERNS matches resume prompts", () => {
// Extract the regex from source so the test stays in sync
const patternMatch = systemContextSource.match(/const RESUME_INTENT_PATTERNS\s*=\s*\/(.+)\/;/);
assert.ok(patternMatch, "should find RESUME_INTENT_PATTERNS definition");
const pattern = new RegExp(patternMatch[1]);
// Helper: normalize prompt the same way the production code does
const normalize = (s: string) => s.trim().toLowerCase().replace(/[.!?,]+$/g, "");
const shouldMatch = [
"continue",
"Continue",
"CONTINUE",
"continue.",
"continue!",
"resume",
"ok",
"OK",
"Ok!",
"go",
"go ahead",
"Go ahead.",
"proceed",
"keep going",
"carry on",
"next",
"yes",
"yeah",
"yep",
"sure",
"do it",
"let's go",
"pick up where you left off",
" continue ", // whitespace padded
];
const shouldNotMatch = [
"help",
"status",
"/gsd auto",
"/gsd stats",
"what's the plan?",
"show me the logs",
"abort",
"stop",
"cancel",
"replan this slice",
"I think we should change the approach",
"can you explain what you just did?",
"run the tests",
"check the build",
"Execute the next task: T01",
"what files were changed",
"",
];
for (const prompt of shouldMatch) {
test(`matches resume prompt: "${prompt}"`, () => {
assert.ok(
pattern.test(normalize(prompt)),
`expected RESUME_INTENT_PATTERNS to match "${prompt}" (normalized: "${normalize(prompt)}")`,
);
});
}
for (const prompt of shouldNotMatch) {
test(`rejects non-resume prompt: "${prompt}"`, () => {
assert.ok(
!pattern.test(normalize(prompt)),
`expected RESUME_INTENT_PATTERNS to NOT match "${prompt}" (normalized: "${normalize(prompt)}")`,
);
});
}
});