fix(gsd): add intent + phase guards to resume context fallback (#3615)
Tighten the deriveState fallback per adversarial review: - Intent-gated: only fire for low-entropy resume prompts via RESUME_INTENT_PATTERNS (continue, ok, go ahead, resume, etc.) - Phase-gated: only during state.phase === "executing" - Non-resume prompts (help, status, abort, diagnostics) are not hijacked with execution context Add behavioral tests: 24 positive matches + 17 negative rejections for the intent pattern, alongside the 5 structural tests.
This commit is contained in:
parent
5b104897c8
commit
04dc4a988b
2 changed files with 123 additions and 50 deletions
|
|
@ -264,6 +264,13 @@ function buildWorktreeContextBlock(): string {
|
|||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Low-entropy resume intent patterns — short phrases a user types to
|
||||
* continue work after a pause, rate limit, or context reset (#3615).
|
||||
* Tested against the trimmed, lowercased prompt with trailing punctuation stripped.
|
||||
*/
|
||||
const RESUME_INTENT_PATTERNS = /^(continue|resume|ok|go|go ahead|proceed|keep going|carry on|next|yes|yeah|yep|sure|do it|let's go|pick up where you left off)$/;
|
||||
|
||||
async function buildGuidedExecuteContextInjection(prompt: string, basePath: string): Promise<string | null> {
|
||||
const executeMatch = prompt.match(/Execute the next task:\s+(T\d+)\s+\("([^"]+)"\)\s+in slice\s+(S\d+)\s+of milestone\s+(M\d+(?:-[a-z0-9]{6})?)/i);
|
||||
if (executeMatch) {
|
||||
|
|
@ -280,21 +287,25 @@ async function buildGuidedExecuteContextInjection(prompt: string, basePath: stri
|
|||
}
|
||||
}
|
||||
|
||||
// Fallback: unstructured prompt (e.g., "continue", "ok", "go ahead") but
|
||||
// state shows an active executing task — inject task context so the agent
|
||||
// Fallback: low-entropy resume prompt (e.g., "continue", "ok", "go ahead")
|
||||
// during an active executing task — inject task context so the agent
|
||||
// doesn't rebuild from scratch (#3615).
|
||||
// Intent-gated: only fire for short, resume-like prompts to avoid hijacking
|
||||
// control/help/diagnostic prompts with unrelated execution context.
|
||||
// Phase-gated: only fire during "executing" to avoid misrouting during
|
||||
// replanning, gate evaluation, or other non-execution phases where
|
||||
// activeTask may still be set.
|
||||
const state = await deriveState(basePath);
|
||||
if (state.phase === "executing" && state.activeTask && state.activeMilestone && state.activeSlice) {
|
||||
return buildTaskExecutionContextInjection(
|
||||
basePath,
|
||||
state.activeMilestone.id,
|
||||
state.activeSlice.id,
|
||||
state.activeTask.id,
|
||||
state.activeTask.title,
|
||||
);
|
||||
// replanning, gate evaluation, or other non-execution phases.
|
||||
const trimmed = prompt.trim().toLowerCase().replace(/[.!?,]+$/g, "");
|
||||
if (RESUME_INTENT_PATTERNS.test(trimmed)) {
|
||||
const state = await deriveState(basePath);
|
||||
if (state.phase === "executing" && state.activeTask && state.activeMilestone && state.activeSlice) {
|
||||
return buildTaskExecutionContextInjection(
|
||||
basePath,
|
||||
state.activeMilestone.id,
|
||||
state.activeSlice.id,
|
||||
state.activeTask.id,
|
||||
state.activeTask.title,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -9,12 +9,10 @@
|
|||
* injected — causing the agent to rebuild everything from scratch and
|
||||
* burn ~86k tokens.
|
||||
*
|
||||
* This test verifies the structural properties of the fix:
|
||||
* 1. buildGuidedExecuteContextInjection has a deriveState fallback
|
||||
* that fires when neither regex matches
|
||||
* 2. The fallback is phase-gated to "executing" only
|
||||
* 3. The fallback checks for activeTask + activeMilestone + activeSlice
|
||||
* 4. The fallback calls buildTaskExecutionContextInjection with derived state
|
||||
* This test verifies:
|
||||
* 1. Structural: the fallback exists with phase + intent guards
|
||||
* 2. Behavioral: RESUME_INTENT_PATTERNS matches expected prompts and
|
||||
* rejects non-resume prompts (control, help, diagnostic, etc.)
|
||||
*/
|
||||
|
||||
import { describe, test } from "node:test";
|
||||
|
|
@ -29,69 +27,56 @@ const systemContextSource = readFileSync(
|
|||
"utf-8",
|
||||
);
|
||||
|
||||
describe("#3615 — unstructured prompts must inject task context when work is active", () => {
|
||||
// Extract the buildGuidedExecuteContextInjection function body
|
||||
// ── Structural tests ────────────────────────────────────────────────
|
||||
|
||||
describe("#3615 — structural: fallback exists with correct guards", () => {
|
||||
const fnStart = systemContextSource.indexOf("async function buildGuidedExecuteContextInjection(");
|
||||
assert.ok(fnStart >= 0, "should find buildGuidedExecuteContextInjection");
|
||||
// Find the closing brace — scan for the next top-level "async function" or end of file
|
||||
const fnEnd = systemContextSource.indexOf("\nasync function ", fnStart + 1);
|
||||
const fnBody = fnEnd >= 0
|
||||
? systemContextSource.slice(fnStart, fnEnd)
|
||||
: systemContextSource.slice(fnStart);
|
||||
|
||||
test("has a deriveState fallback after the two regex branches", () => {
|
||||
// The function should call deriveState outside of the resumeMatch block
|
||||
// Count deriveState calls — should be at least 2 (one in resumeMatch, one in fallback)
|
||||
const deriveStateCalls = fnBody.match(/deriveState\(basePath\)/g);
|
||||
assert.ok(
|
||||
deriveStateCalls && deriveStateCalls.length >= 2,
|
||||
`expected >=2 deriveState(basePath) calls in buildGuidedExecuteContextInjection, got ${deriveStateCalls?.length ?? 0}`,
|
||||
`expected >=2 deriveState(basePath) calls, got ${deriveStateCalls?.length ?? 0}`,
|
||||
);
|
||||
});
|
||||
|
||||
test("fallback is phase-gated to executing only", () => {
|
||||
// The fallback must check state.phase === "executing" to avoid misrouting
|
||||
// during replanning, gate evaluation, or other non-execution phases
|
||||
const afterFallbackComment = fnBody.indexOf("// Fallback:");
|
||||
assert.ok(afterFallbackComment >= 0, "should have a fallback comment after the regex branches");
|
||||
|
||||
const fallbackSection = fnBody.slice(afterFallbackComment);
|
||||
const afterFallback = fnBody.indexOf("// Fallback:");
|
||||
assert.ok(afterFallback >= 0, "should have a fallback comment");
|
||||
const fallbackSection = fnBody.slice(afterFallback);
|
||||
assert.ok(
|
||||
fallbackSection.includes('state.phase === "executing"'),
|
||||
'fallback must be gated on state.phase === "executing" to prevent misrouting in non-execution phases',
|
||||
'fallback must be gated on state.phase === "executing"',
|
||||
);
|
||||
});
|
||||
|
||||
test("fallback checks for activeTask, activeMilestone, and activeSlice", () => {
|
||||
const afterFallbackComment = fnBody.indexOf("// Fallback:");
|
||||
const fallbackSection = fnBody.slice(afterFallbackComment);
|
||||
test("fallback is intent-gated via RESUME_INTENT_PATTERNS", () => {
|
||||
const afterFallback = fnBody.indexOf("// Fallback:");
|
||||
const fallbackSection = fnBody.slice(afterFallback);
|
||||
assert.ok(
|
||||
fallbackSection.includes("state.activeTask") &&
|
||||
fallbackSection.includes("state.activeMilestone") &&
|
||||
fallbackSection.includes("state.activeSlice"),
|
||||
"fallback must check all three state properties before injecting context",
|
||||
fallbackSection.includes("RESUME_INTENT_PATTERNS"),
|
||||
"fallback must check RESUME_INTENT_PATTERNS before deriveState",
|
||||
);
|
||||
});
|
||||
|
||||
test("fallback calls buildTaskExecutionContextInjection with derived state", () => {
|
||||
const afterFallbackComment = fnBody.indexOf("// Fallback:");
|
||||
assert.ok(afterFallbackComment >= 0, "should have a fallback comment after the regex branches");
|
||||
|
||||
const fallbackSection = fnBody.slice(afterFallbackComment);
|
||||
assert.ok(
|
||||
fallbackSection.includes("buildTaskExecutionContextInjection"),
|
||||
"fallback section must call buildTaskExecutionContextInjection",
|
||||
);
|
||||
const afterFallback = fnBody.indexOf("// Fallback:");
|
||||
const fallbackSection = fnBody.slice(afterFallback);
|
||||
assert.ok(
|
||||
fallbackSection.includes("buildTaskExecutionContextInjection") &&
|
||||
fallbackSection.includes("state.activeMilestone.id") &&
|
||||
fallbackSection.includes("state.activeSlice.id") &&
|
||||
fallbackSection.includes("state.activeTask.id"),
|
||||
"fallback must pass state-derived IDs to buildTaskExecutionContextInjection",
|
||||
"fallback must call buildTaskExecutionContextInjection with state-derived IDs",
|
||||
);
|
||||
});
|
||||
|
||||
test("function does NOT return null before the fallback", () => {
|
||||
// There should be exactly one "return null" at the very end, after the fallback
|
||||
test("only one return null at the end", () => {
|
||||
const returnNulls = fnBody.match(/return null;/g);
|
||||
assert.ok(
|
||||
returnNulls && returnNulls.length === 1,
|
||||
|
|
@ -99,3 +84,80 @@ describe("#3615 — unstructured prompts must inject task context when work is a
|
|||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Behavioral tests: RESUME_INTENT_PATTERNS ────────────────────────
|
||||
|
||||
describe("#3615 — behavioral: RESUME_INTENT_PATTERNS matches resume prompts", () => {
|
||||
// Extract the regex from source so the test stays in sync
|
||||
const patternMatch = systemContextSource.match(/const RESUME_INTENT_PATTERNS\s*=\s*\/(.+)\/;/);
|
||||
assert.ok(patternMatch, "should find RESUME_INTENT_PATTERNS definition");
|
||||
const pattern = new RegExp(patternMatch[1]);
|
||||
|
||||
// Helper: normalize prompt the same way the production code does
|
||||
const normalize = (s: string) => s.trim().toLowerCase().replace(/[.!?,]+$/g, "");
|
||||
|
||||
const shouldMatch = [
|
||||
"continue",
|
||||
"Continue",
|
||||
"CONTINUE",
|
||||
"continue.",
|
||||
"continue!",
|
||||
"resume",
|
||||
"ok",
|
||||
"OK",
|
||||
"Ok!",
|
||||
"go",
|
||||
"go ahead",
|
||||
"Go ahead.",
|
||||
"proceed",
|
||||
"keep going",
|
||||
"carry on",
|
||||
"next",
|
||||
"yes",
|
||||
"yeah",
|
||||
"yep",
|
||||
"sure",
|
||||
"do it",
|
||||
"let's go",
|
||||
"pick up where you left off",
|
||||
" continue ", // whitespace padded
|
||||
];
|
||||
|
||||
const shouldNotMatch = [
|
||||
"help",
|
||||
"status",
|
||||
"/gsd auto",
|
||||
"/gsd stats",
|
||||
"what's the plan?",
|
||||
"show me the logs",
|
||||
"abort",
|
||||
"stop",
|
||||
"cancel",
|
||||
"replan this slice",
|
||||
"I think we should change the approach",
|
||||
"can you explain what you just did?",
|
||||
"run the tests",
|
||||
"check the build",
|
||||
"Execute the next task: T01",
|
||||
"what files were changed",
|
||||
"",
|
||||
];
|
||||
|
||||
for (const prompt of shouldMatch) {
|
||||
test(`matches resume prompt: "${prompt}"`, () => {
|
||||
assert.ok(
|
||||
pattern.test(normalize(prompt)),
|
||||
`expected RESUME_INTENT_PATTERNS to match "${prompt}" (normalized: "${normalize(prompt)}")`,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
for (const prompt of shouldNotMatch) {
|
||||
test(`rejects non-resume prompt: "${prompt}"`, () => {
|
||||
assert.ok(
|
||||
!pattern.test(normalize(prompt)),
|
||||
`expected RESUME_INTENT_PATTERNS to NOT match "${prompt}" (normalized: "${normalize(prompt)}")`,
|
||||
);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue