When /gsd queue was active, the agent had unrestricted access to all tools and would execute described work instead of creating milestones. The queue prompt instructed milestone-only behavior, but the system prompt's "execute with full commitment" directive dominated. Add a mechanical tool gate (shouldBlockQueueExecution) that blocks write/edit to non-.gsd/ paths and mutating bash commands when queue phase is active. Read-only tools, discussion tools, and .gsd/ artifact writes remain allowed. This enforces the queue contract at the tool layer rather than relying solely on prompt compliance. Closes #2545 Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
eb40f74cfe
commit
a09ae1de26
4 changed files with 251 additions and 1 deletions
|
|
@ -6,7 +6,7 @@ import { isToolCallEventType } from "@gsd/pi-coding-agent";
|
|||
import { buildMilestoneFileName, resolveMilestonePath, resolveSliceFile, resolveSlicePath } from "../paths.js";
|
||||
import { buildBeforeAgentStartResult } from "./system-context.js";
|
||||
import { handleAgentEnd } from "./agent-end-recovery.js";
|
||||
import { clearDiscussionFlowState, isDepthVerified, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite } from "./write-gate.js";
|
||||
import { clearDiscussionFlowState, isDepthVerified, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite, shouldBlockQueueExecution } from "./write-gate.js";
|
||||
import { isBlockedStateFile, isBashWriteToStateFile, BLOCKED_WRITE_ERROR } from "../write-intercept.js";
|
||||
import { getDiscussionMilestoneId } from "../guided-flow.js";
|
||||
import { loadToolApiKeys } from "../commands-config.js";
|
||||
|
|
@ -144,6 +144,23 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|||
return { block: true, reason: loopCheck.reason };
|
||||
}
|
||||
|
||||
// ── Queue-mode execution guard (#2545): block source-code mutations ──
|
||||
// When /gsd queue is active, the agent should only create milestones,
|
||||
// not execute work. Block write/edit to non-.gsd/ paths and bash commands
|
||||
// that would modify files.
|
||||
if (isQueuePhaseActive()) {
|
||||
let queueInput = "";
|
||||
if (isToolCallEventType("write", event)) {
|
||||
queueInput = event.input.path;
|
||||
} else if (isToolCallEventType("edit", event)) {
|
||||
queueInput = event.input.path;
|
||||
} else if (isToolCallEventType("bash", event)) {
|
||||
queueInput = event.input.command;
|
||||
}
|
||||
const queueGuard = shouldBlockQueueExecution(event.toolName, queueInput, true);
|
||||
if (queueGuard.block) return queueGuard;
|
||||
}
|
||||
|
||||
// ── Single-writer engine: block direct writes to STATE.md ──────────
|
||||
// Covers write, edit, and bash tools to prevent bypass vectors.
|
||||
if (isToolCallEventType("write", event)) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,31 @@
|
|||
const MILESTONE_CONTEXT_RE = /M\d+(?:-[a-z0-9]{6})?-CONTEXT\.md$/;
|
||||
|
||||
/**
|
||||
* Path segment that identifies .gsd/ planning artifacts.
|
||||
* Writes to these paths are allowed during queue mode.
|
||||
*/
|
||||
const GSD_DIR_RE = /(^|[/\\])\.gsd([/\\]|$)/;
|
||||
|
||||
/**
|
||||
* Read-only tool names that are always safe during queue mode.
|
||||
*/
|
||||
const QUEUE_SAFE_TOOLS = new Set([
|
||||
"read", "grep", "find", "ls", "glob",
|
||||
// Discussion & planning tools
|
||||
"ask_user_questions",
|
||||
"gsd_milestone_generate_id",
|
||||
"gsd_summary_save",
|
||||
// Web research tools used during queue discussion
|
||||
"search-the-web", "resolve_library", "get_library_docs", "fetch_page",
|
||||
"search_and_read",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Bash commands that are read-only / investigative — safe during queue mode.
|
||||
* Matches the leading command in a bash invocation.
|
||||
*/
|
||||
const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s)/;
|
||||
|
||||
let depthVerificationDone = false;
|
||||
let activeQueuePhase = false;
|
||||
|
||||
|
|
@ -49,3 +75,52 @@ export function shouldBlockContextWrite(
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Queue-mode execution guard (#2545).
|
||||
*
|
||||
* When the queue phase is active, the agent should only create planning
|
||||
* artifacts (milestones, CONTEXT.md, QUEUE.md, etc.) — never execute work.
|
||||
* This function blocks write/edit/bash tool calls that would modify source
|
||||
* code outside of .gsd/.
|
||||
*
|
||||
* @param toolName The tool being called (write, edit, bash, etc.)
|
||||
* @param input For write/edit: the file path. For bash: the command string.
|
||||
* @param queuePhaseActive Whether the queue phase is currently active.
|
||||
* @returns { block, reason } — block=true if the call should be rejected.
|
||||
*/
|
||||
export function shouldBlockQueueExecution(
|
||||
toolName: string,
|
||||
input: string,
|
||||
queuePhaseActive: boolean,
|
||||
): { block: boolean; reason?: string } {
|
||||
if (!queuePhaseActive) return { block: false };
|
||||
|
||||
// Always-safe tools (read-only, discussion, planning)
|
||||
if (QUEUE_SAFE_TOOLS.has(toolName)) return { block: false };
|
||||
|
||||
// write/edit — allow if targeting .gsd/ planning artifacts
|
||||
if (toolName === "write" || toolName === "edit") {
|
||||
if (GSD_DIR_RE.test(input)) return { block: false };
|
||||
return {
|
||||
block: true,
|
||||
reason: `Blocked: /gsd queue is a planning tool — it creates milestones, not executes work. ` +
|
||||
`Cannot ${toolName} to "${input}" during queue mode. ` +
|
||||
`Write CONTEXT.md files and update PROJECT.md/QUEUE.md instead.`,
|
||||
};
|
||||
}
|
||||
|
||||
// bash — allow read-only/investigative commands, block everything else
|
||||
if (toolName === "bash") {
|
||||
if (BASH_READ_ONLY_RE.test(input)) return { block: false };
|
||||
return {
|
||||
block: true,
|
||||
reason: `Blocked: /gsd queue is a planning tool — it creates milestones, not executes work. ` +
|
||||
`Cannot run "${input.slice(0, 80)}${input.length > 80 ? "…" : ""}" during queue mode. ` +
|
||||
`Use read-only commands (cat, grep, git log, etc.) to investigate, then write planning artifacts.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Unknown tools — allow by default (custom extension tools, etc.)
|
||||
return { block: false };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ export {
|
|||
isQueuePhaseActive,
|
||||
setQueuePhaseActive,
|
||||
shouldBlockContextWrite,
|
||||
shouldBlockQueueExecution,
|
||||
} from "./bootstrap/write-gate.js";
|
||||
|
||||
export default async function registerExtension(pi: ExtensionAPI) {
|
||||
|
|
|
|||
157
src/resources/extensions/gsd/tests/queue-execution-guard.test.ts
Normal file
157
src/resources/extensions/gsd/tests/queue-execution-guard.test.ts
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
/**
|
||||
* Unit tests for the queue-mode execution guard (#2545).
|
||||
*
|
||||
* When queue phase is active, the agent should only create milestones —
|
||||
* not execute work. This guard blocks write/edit/bash tool calls that
|
||||
* target source code (non-.gsd/ paths) during queue mode.
|
||||
*
|
||||
* Exercises shouldBlockQueueExecution() — a pure function that checks:
|
||||
* (a) queuePhaseActive false → pass (not in queue mode)
|
||||
* (b) toolName is read-only (read, grep, find, ls) → pass
|
||||
* (c) toolName is ask_user_questions → pass (discussion tool)
|
||||
* (d) write/edit to .gsd/ path → pass (planning artifacts)
|
||||
* (e) write/edit to source path → block
|
||||
* (f) bash command → block (could execute work)
|
||||
* (g) registered GSD tools (gsd_milestone_generate_id, gsd_summary_save) → pass
|
||||
*/
|
||||
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { shouldBlockQueueExecution } from '../bootstrap/write-gate.ts';
|
||||
|
||||
// ─── Scenario 1: Not in queue mode — all tools pass ──
|
||||
|
||||
test('queue-guard: allows all tools when queue phase is not active', () => {
|
||||
const r1 = shouldBlockQueueExecution('write', '/src/index.ts', false);
|
||||
assert.strictEqual(r1.block, false, 'write should pass outside queue mode');
|
||||
|
||||
const r2 = shouldBlockQueueExecution('bash', 'npm test', false);
|
||||
assert.strictEqual(r2.block, false, 'bash should pass outside queue mode');
|
||||
|
||||
const r3 = shouldBlockQueueExecution('edit', '/src/index.ts', false);
|
||||
assert.strictEqual(r3.block, false, 'edit should pass outside queue mode');
|
||||
});
|
||||
|
||||
// ─── Scenario 2: Read-only tools always pass in queue mode ──
|
||||
|
||||
test('queue-guard: allows read-only tools during queue mode', () => {
|
||||
for (const tool of ['read', 'grep', 'find', 'ls', 'glob']) {
|
||||
const result = shouldBlockQueueExecution(tool, '/src/index.ts', true);
|
||||
assert.strictEqual(result.block, false, `${tool} should pass in queue mode`);
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Scenario 3: Discussion/planning tools pass in queue mode ──
|
||||
|
||||
test('queue-guard: allows discussion and planning tools during queue mode', () => {
|
||||
const r1 = shouldBlockQueueExecution('ask_user_questions', '', true);
|
||||
assert.strictEqual(r1.block, false, 'ask_user_questions should pass');
|
||||
|
||||
const r2 = shouldBlockQueueExecution('gsd_milestone_generate_id', '', true);
|
||||
assert.strictEqual(r2.block, false, 'gsd_milestone_generate_id should pass');
|
||||
|
||||
const r3 = shouldBlockQueueExecution('gsd_summary_save', '', true);
|
||||
assert.strictEqual(r3.block, false, 'gsd_summary_save should pass');
|
||||
});
|
||||
|
||||
// ─── Scenario 4: Write to .gsd/ paths passes (planning artifacts) ──
|
||||
|
||||
test('queue-guard: allows writes to .gsd/ paths during queue mode', () => {
|
||||
const r1 = shouldBlockQueueExecution('write', '.gsd/milestones/M001/M001-CONTEXT.md', true);
|
||||
assert.strictEqual(r1.block, false, 'write to .gsd/ should pass');
|
||||
|
||||
const r2 = shouldBlockQueueExecution('write', '/project/.gsd/PROJECT.md', true);
|
||||
assert.strictEqual(r2.block, false, 'write to .gsd/PROJECT.md should pass');
|
||||
|
||||
const r3 = shouldBlockQueueExecution('edit', '.gsd/QUEUE.md', true);
|
||||
assert.strictEqual(r3.block, false, 'edit to .gsd/QUEUE.md should pass');
|
||||
|
||||
const r4 = shouldBlockQueueExecution('write', '.gsd/REQUIREMENTS.md', true);
|
||||
assert.strictEqual(r4.block, false, 'write to .gsd/REQUIREMENTS.md should pass');
|
||||
|
||||
const r5 = shouldBlockQueueExecution('write', '.gsd/DECISIONS.md', true);
|
||||
assert.strictEqual(r5.block, false, 'write to .gsd/DECISIONS.md should pass');
|
||||
});
|
||||
|
||||
// ─── Scenario 5: Write/edit to source code paths blocked ──
|
||||
|
||||
test('queue-guard: blocks writes to source code during queue mode', () => {
|
||||
const r1 = shouldBlockQueueExecution('write', 'src/index.ts', true);
|
||||
assert.strictEqual(r1.block, true, 'write to src/ should be blocked');
|
||||
assert.ok(r1.reason, 'should provide a reason');
|
||||
assert.ok(r1.reason!.includes('queue'), 'reason should mention queue');
|
||||
|
||||
const r2 = shouldBlockQueueExecution('write', '/project/src/components/App.tsx', true);
|
||||
assert.strictEqual(r2.block, true, 'write to component file should be blocked');
|
||||
|
||||
const r3 = shouldBlockQueueExecution('edit', 'package.json', true);
|
||||
assert.strictEqual(r3.block, true, 'edit to package.json should be blocked');
|
||||
|
||||
const r4 = shouldBlockQueueExecution('edit', '/project/lib/utils.ts', true);
|
||||
assert.strictEqual(r4.block, true, 'edit to lib/ should be blocked');
|
||||
});
|
||||
|
||||
// ─── Scenario 6: Bash commands blocked during queue mode ──
|
||||
|
||||
test('queue-guard: blocks bash commands during queue mode', () => {
|
||||
const r1 = shouldBlockQueueExecution('bash', 'npm install some-package', true);
|
||||
assert.strictEqual(r1.block, true, 'npm install should be blocked');
|
||||
assert.ok(r1.reason, 'should provide a reason');
|
||||
|
||||
const r2 = shouldBlockQueueExecution('bash', 'node src/index.ts', true);
|
||||
assert.strictEqual(r2.block, true, 'running node should be blocked');
|
||||
});
|
||||
|
||||
// ─── Scenario 7: Bash read-only commands pass during queue mode ──
|
||||
|
||||
test('queue-guard: allows read-only bash commands during queue mode', () => {
|
||||
const r1 = shouldBlockQueueExecution('bash', 'cat src/index.ts', true);
|
||||
assert.strictEqual(r1.block, false, 'cat should pass');
|
||||
|
||||
const r2 = shouldBlockQueueExecution('bash', 'ls -la src/', true);
|
||||
assert.strictEqual(r2.block, false, 'ls should pass');
|
||||
|
||||
const r3 = shouldBlockQueueExecution('bash', 'git log --oneline -10', true);
|
||||
assert.strictEqual(r3.block, false, 'git log should pass');
|
||||
|
||||
const r4 = shouldBlockQueueExecution('bash', 'find . -name "*.ts"', true);
|
||||
assert.strictEqual(r4.block, false, 'find should pass');
|
||||
|
||||
const r5 = shouldBlockQueueExecution('bash', 'grep -rn "TODO" src/', true);
|
||||
assert.strictEqual(r5.block, false, 'grep should pass');
|
||||
|
||||
const r6 = shouldBlockQueueExecution('bash', 'head -20 src/index.ts', true);
|
||||
assert.strictEqual(r6.block, false, 'head should pass');
|
||||
|
||||
const r7 = shouldBlockQueueExecution('bash', 'wc -l src/index.ts', true);
|
||||
assert.strictEqual(r7.block, false, 'wc should pass');
|
||||
|
||||
const r8 = shouldBlockQueueExecution('bash', 'git diff HEAD~1', true);
|
||||
assert.strictEqual(r8.block, false, 'git diff should pass');
|
||||
|
||||
const r9 = shouldBlockQueueExecution('bash', 'gh issue view 42', true);
|
||||
assert.strictEqual(r9.block, false, 'gh issue view should pass');
|
||||
});
|
||||
|
||||
// ─── Scenario 8: mkdir for .gsd/ milestone directories passes ──
|
||||
|
||||
test('queue-guard: allows mkdir for .gsd/ milestone directories', () => {
|
||||
const r1 = shouldBlockQueueExecution('bash', 'mkdir -p .gsd/milestones/M010/slices', true);
|
||||
assert.strictEqual(r1.block, false, 'mkdir -p .gsd/ should pass');
|
||||
});
|
||||
|
||||
// ─── Scenario 9: Web search and library tools pass ──
|
||||
|
||||
test('queue-guard: allows web search and library tools during queue mode', () => {
|
||||
const r1 = shouldBlockQueueExecution('search-the-web', '', true);
|
||||
assert.strictEqual(r1.block, false, 'search-the-web should pass');
|
||||
|
||||
const r2 = shouldBlockQueueExecution('resolve_library', '', true);
|
||||
assert.strictEqual(r2.block, false, 'resolve_library should pass');
|
||||
|
||||
const r3 = shouldBlockQueueExecution('get_library_docs', '', true);
|
||||
assert.strictEqual(r3.block, false, 'get_library_docs should pass');
|
||||
|
||||
const r4 = shouldBlockQueueExecution('fetch_page', '', true);
|
||||
assert.strictEqual(r4.block, false, 'fetch_page should pass');
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue