505 lines
19 KiB
TypeScript
505 lines
19 KiB
TypeScript
import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs";
|
|
import { join } from "node:path";
|
|
|
|
/**
|
|
* Regex matching milestone CONTEXT.md file names in both legacy M001
|
|
* and unique M001-abc123 formats. Exported so regex-hardening tests
|
|
* can exercise the real pattern rather than a drift-prone inline
|
|
* re-implementation.
|
|
*/
|
|
export const MILESTONE_CONTEXT_RE = /M\d+(?:-[a-z0-9]{6})?-CONTEXT\.md$/;
|
|
const CONTEXT_MILESTONE_RE = /(?:^|[/\\])(M\d+(?:-[a-z0-9]{6})?)-CONTEXT\.md$/i;
|
|
const DEPTH_VERIFICATION_MILESTONE_RE = /depth_verification[_-](M\d+(?:-[a-z0-9]{6})?)/i;
|
|
|
|
/**
|
|
* Path segment that identifies .sf/ planning artifacts.
|
|
* Writes to these paths are allowed during queue mode.
|
|
*/
|
|
const SF_DIR_RE = /(^|[/\\])\.sf([/\\]|$)/;
|
|
|
|
/**
|
|
* Read-only tool names that are always safe during queue mode.
|
|
*/
|
|
const QUEUE_SAFE_TOOLS = new Set([
|
|
"read", "grep", "find", "ls", "glob",
|
|
// Discussion & planning tools
|
|
"ask_user_questions",
|
|
"sf_milestone_generate_id",
|
|
"sf_summary_save",
|
|
// Web research tools used during queue discussion
|
|
"search-the-web", "resolve_library", "get_library_docs", "fetch_page",
|
|
"search_and_read",
|
|
]);
|
|
|
|
/**
|
|
* Bash commands that are read-only / investigative — safe during queue mode.
|
|
* Matches the leading command in a bash invocation.
|
|
*
|
|
* Extension policy: add commands here when they are read-only / diagnostic.
|
|
* Never add commands that mutate project state (write files, run builds that
|
|
* emit artifacts, install packages, etc.).
|
|
*
|
|
* Current read-only additions:
|
|
* npm run <diagnostic> — read-only diagnostic scripts: test, lint, typecheck, etc.
|
|
* NOT: build, install, compile, generate, deploy (artifact-producing)
|
|
* npm ls/list/info — inspect installed packages (read-only)
|
|
* npm outdated/audit — security/update checks (read-only)
|
|
* npx <pkg> — run a package binary without installing globally
|
|
* tsx — TypeScript runner used for dry-run / inspection scripts
|
|
* node --print — evaluate and print an expression, no side effects
|
|
* python / python3 — script inspection, version checks
|
|
* pip / pip3 show — show installed package info (read-only)
|
|
* jq — read-only JSON query
|
|
* yq — read-only YAML query
|
|
* curl -s / curl --silent — fetch for inspection (no -o / no output redirect)
|
|
* openssl version — version / certificate inspection
|
|
* env / printenv — print environment variables
|
|
* true / false — shell no-ops / test exit codes
|
|
*/
|
|
const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.sf|rtk\s|npm\s+run\s+(test|test:\w+|lint|lint:\w+|typecheck|type-check|type-check:\w+|check|verify|audit|outdated|format:check|ci|validate)\b|npm\s+(ls|list|info|view|show|outdated|audit|explain|doctor|ping|--version|-v)\b|npx\s|tsx\s|node\s+(--print|--version|-v\b)|python[23]?\s+(-c\s+'[^']*'|--version|-V\b|-m\s+(pip\s+show|pip\s+list|site))|pip[23]?\s+(show|list|freeze|check|index\s+versions)\b|jq\s|yq\s|curl\s+(-s\b|--silent\b)(?!\s+[^|>]*\s-[oO]\b)(?!\s+[^|>]*\s--output\b)[^|>]*$|openssl\s+(version|x509|s_client)|env\b|printenv\b|true\b|false\b)/;
|
|
|
|
const verifiedDepthMilestones = new Set<string>();
|
|
let activeQueuePhase = false;
|
|
|
|
/**
|
|
* Discussion gate enforcement state.
|
|
*
|
|
* When ask_user_questions is called with a recognized gate question ID,
|
|
* we track the pending gate. Until the gate is confirmed (user selects the
|
|
* first/recommended option), all non-read-only tool calls are blocked.
|
|
* This mechanically prevents the model from rationalizing past failed or
|
|
* cancelled gate questions.
|
|
*/
|
|
let pendingGateId: string | null = null;
|
|
|
|
/**
|
|
* Recognized gate question ID patterns.
|
|
* These appear in discuss.md (depth/requirements/roadmap).
|
|
*/
|
|
const GATE_QUESTION_PATTERNS = [
|
|
"depth_verification",
|
|
] as const;
|
|
|
|
/**
|
|
* Tools that are safe to call while a gate is pending.
|
|
* Includes read-only tools and ask_user_questions itself (so the model can re-ask).
|
|
*/
|
|
const GATE_SAFE_TOOLS = new Set([
|
|
"ask_user_questions",
|
|
"read", "grep", "find", "ls", "glob",
|
|
"search-the-web", "resolve_library", "get_library_docs", "fetch_page",
|
|
"search_and_read",
|
|
]);
|
|
|
|
export interface WriteGateSnapshot {
|
|
verifiedDepthMilestones: string[];
|
|
activeQueuePhase: boolean;
|
|
pendingGateId: string | null;
|
|
}
|
|
|
|
function shouldPersistWriteGateSnapshot(env: NodeJS.ProcessEnv = process.env): boolean {
|
|
return env.SF_PERSIST_WRITE_GATE_STATE === "1";
|
|
}
|
|
|
|
function writeGateSnapshotPath(basePath: string = process.cwd()): string {
|
|
return join(basePath, ".sf", "runtime", "write-gate-state.json");
|
|
}
|
|
|
|
function currentWriteGateSnapshot(): WriteGateSnapshot {
|
|
return {
|
|
verifiedDepthMilestones: [...verifiedDepthMilestones].sort(),
|
|
activeQueuePhase,
|
|
pendingGateId,
|
|
};
|
|
}
|
|
|
|
function persistWriteGateSnapshot(basePath: string = process.cwd()): void {
|
|
if (!shouldPersistWriteGateSnapshot()) return;
|
|
const path = writeGateSnapshotPath(basePath);
|
|
mkdirSync(join(basePath, ".sf", "runtime"), { recursive: true });
|
|
const tempPath = `${path}.tmp`;
|
|
writeFileSync(tempPath, JSON.stringify(currentWriteGateSnapshot(), null, 2), "utf-8");
|
|
renameSync(tempPath, path);
|
|
}
|
|
|
|
function clearPersistedWriteGateSnapshot(basePath: string = process.cwd()): void {
|
|
if (!shouldPersistWriteGateSnapshot()) return;
|
|
const path = writeGateSnapshotPath(basePath);
|
|
try {
|
|
unlinkSync(path);
|
|
} catch {
|
|
// swallow
|
|
}
|
|
}
|
|
|
|
function normalizeWriteGateSnapshot(value: unknown): WriteGateSnapshot {
|
|
const record = value && typeof value === "object" ? value as Record<string, unknown> : {};
|
|
const verified = Array.isArray(record.verifiedDepthMilestones)
|
|
? record.verifiedDepthMilestones.filter((item): item is string => typeof item === "string")
|
|
: [];
|
|
return {
|
|
verifiedDepthMilestones: [...new Set(verified)].sort(),
|
|
activeQueuePhase: record.activeQueuePhase === true,
|
|
pendingGateId: typeof record.pendingGateId === "string" ? record.pendingGateId : null,
|
|
};
|
|
}
|
|
|
|
const EMPTY_SNAPSHOT: WriteGateSnapshot = {
|
|
verifiedDepthMilestones: [],
|
|
activeQueuePhase: false,
|
|
pendingGateId: null,
|
|
};
|
|
|
|
export function loadWriteGateSnapshot(basePath: string = process.cwd()): WriteGateSnapshot {
|
|
const path = writeGateSnapshotPath(basePath);
|
|
if (!existsSync(path)) {
|
|
// When persist mode is active and the file has been deleted, treat it as a
|
|
// full state reset so deleting the file clears the HARD BLOCK gate.
|
|
// In non-persist mode the file is never written, so fall back to in-memory.
|
|
if (shouldPersistWriteGateSnapshot()) return EMPTY_SNAPSHOT;
|
|
return currentWriteGateSnapshot();
|
|
}
|
|
try {
|
|
return normalizeWriteGateSnapshot(JSON.parse(readFileSync(path, "utf-8")));
|
|
} catch {
|
|
return currentWriteGateSnapshot();
|
|
}
|
|
}
|
|
|
|
export function isDepthVerified(): boolean {
|
|
return verifiedDepthMilestones.size > 0;
|
|
}
|
|
|
|
/**
|
|
* Check whether a specific milestone has passed depth verification.
|
|
*/
|
|
export function isMilestoneDepthVerified(milestoneId: string | null | undefined): boolean {
|
|
if (!milestoneId) return false;
|
|
return verifiedDepthMilestones.has(milestoneId);
|
|
}
|
|
|
|
export function isMilestoneDepthVerifiedInSnapshot(
|
|
snapshot: WriteGateSnapshot,
|
|
milestoneId: string | null | undefined,
|
|
): boolean {
|
|
if (!milestoneId) return false;
|
|
return snapshot.verifiedDepthMilestones.includes(milestoneId);
|
|
}
|
|
|
|
export function isQueuePhaseActive(): boolean {
|
|
return activeQueuePhase;
|
|
}
|
|
|
|
export function setQueuePhaseActive(active: boolean): void {
|
|
activeQueuePhase = active;
|
|
persistWriteGateSnapshot();
|
|
}
|
|
|
|
export function resetWriteGateState(): void {
|
|
verifiedDepthMilestones.clear();
|
|
pendingGateId = null;
|
|
persistWriteGateSnapshot();
|
|
}
|
|
|
|
export function clearDiscussionFlowState(): void {
|
|
verifiedDepthMilestones.clear();
|
|
activeQueuePhase = false;
|
|
pendingGateId = null;
|
|
clearPersistedWriteGateSnapshot();
|
|
}
|
|
|
|
export function markDepthVerified(milestoneId?: string | null, basePath: string = process.cwd()): void {
|
|
if (!milestoneId) return;
|
|
verifiedDepthMilestones.add(milestoneId);
|
|
persistWriteGateSnapshot(basePath);
|
|
}
|
|
|
|
/**
|
|
* Check whether a question ID matches a recognized gate pattern.
|
|
*/
|
|
export function isGateQuestionId(questionId: string): boolean {
|
|
return GATE_QUESTION_PATTERNS.some(pattern => questionId.includes(pattern));
|
|
}
|
|
|
|
/**
|
|
* Extract the milestone ID embedded in a depth-verification question id.
|
|
* Prompts are expected to use ids like `depth_verification_M001_confirm`.
|
|
*/
|
|
export function extractDepthVerificationMilestoneId(questionId: string): string | null {
|
|
const match = questionId.match(DEPTH_VERIFICATION_MILESTONE_RE);
|
|
return match?.[1] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Extract the milestone ID from a milestone CONTEXT file path.
|
|
*/
|
|
function extractContextMilestoneId(inputPath: string): string | null {
|
|
const match = inputPath.match(CONTEXT_MILESTONE_RE);
|
|
return match?.[1] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Mark a gate as pending (called when ask_user_questions is invoked with a gate ID).
|
|
*/
|
|
export function setPendingGate(gateId: string): void {
|
|
pendingGateId = gateId;
|
|
persistWriteGateSnapshot();
|
|
}
|
|
|
|
/**
|
|
* Clear the pending gate (called when the user confirms).
|
|
*/
|
|
export function clearPendingGate(): void {
|
|
pendingGateId = null;
|
|
persistWriteGateSnapshot();
|
|
}
|
|
|
|
/**
|
|
* Get the currently pending gate, if any.
|
|
*/
|
|
export function getPendingGate(): string | null {
|
|
return pendingGateId;
|
|
}
|
|
|
|
/**
|
|
* Check whether a tool call should be blocked because a discussion gate
|
|
* is pending (ask_user_questions was called but not confirmed).
|
|
*
|
|
* Returns { block: true, reason } if the tool should be blocked.
|
|
* Read-only tools and ask_user_questions itself are always allowed.
|
|
*/
|
|
export function shouldBlockPendingGate(
|
|
toolName: string,
|
|
milestoneId: string | null,
|
|
queuePhaseActive?: boolean,
|
|
): { block: boolean; reason?: string } {
|
|
return shouldBlockPendingGateInSnapshot(currentWriteGateSnapshot(), toolName, milestoneId, queuePhaseActive);
|
|
}
|
|
|
|
export function shouldBlockPendingGateInSnapshot(
|
|
snapshot: WriteGateSnapshot,
|
|
toolName: string,
|
|
_milestoneId: string | null,
|
|
_queuePhaseActive?: boolean,
|
|
): { block: boolean; reason?: string } {
|
|
if (!snapshot.pendingGateId) return { block: false };
|
|
|
|
if (GATE_SAFE_TOOLS.has(toolName)) return { block: false };
|
|
|
|
// Bash read-only commands are also safe
|
|
if (toolName === "bash") return { block: false }; // bash is checked separately below
|
|
|
|
return {
|
|
block: true,
|
|
reason: [
|
|
`HARD BLOCK: Discussion gate "${snapshot.pendingGateId}" has not been confirmed by the user.`,
|
|
`You MUST re-call ask_user_questions with the gate question before making any other tool calls.`,
|
|
`If the previous ask_user_questions call failed, errored, was cancelled, or the user's response`,
|
|
`did not match a provided option, you MUST re-ask — never rationalize past the block.`,
|
|
`Do NOT proceed, do NOT use alternative approaches, do NOT skip the gate.`,
|
|
].join(" "),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Check whether a bash command should be blocked because a discussion gate is pending.
|
|
* Read-only bash commands are allowed; mutating commands are blocked.
|
|
*/
|
|
export function shouldBlockPendingGateBash(
|
|
command: string,
|
|
milestoneId: string | null,
|
|
queuePhaseActive?: boolean,
|
|
): { block: boolean; reason?: string } {
|
|
return shouldBlockPendingGateBashInSnapshot(currentWriteGateSnapshot(), command, milestoneId, queuePhaseActive);
|
|
}
|
|
|
|
export function shouldBlockPendingGateBashInSnapshot(
|
|
snapshot: WriteGateSnapshot,
|
|
command: string,
|
|
_milestoneId: string | null,
|
|
_queuePhaseActive?: boolean,
|
|
): { block: boolean; reason?: string } {
|
|
if (!snapshot.pendingGateId) return { block: false };
|
|
|
|
// Allow read-only bash commands
|
|
if (BASH_READ_ONLY_RE.test(command)) return { block: false };
|
|
|
|
return {
|
|
block: true,
|
|
reason: [
|
|
`HARD BLOCK: Discussion gate "${snapshot.pendingGateId}" has not been confirmed by the user.`,
|
|
`You MUST re-call ask_user_questions with the gate question before running mutating commands.`,
|
|
`If the previous ask_user_questions call failed, errored, was cancelled, or the user's response`,
|
|
`did not match a provided option, you MUST re-ask — never rationalize past the block.`,
|
|
].join(" "),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Check whether a depth_verification answer confirms the discussion is complete.
|
|
* Uses structural validation: the selected answer must exactly match the first
|
|
* option label from the question definition (the confirmation option by convention).
|
|
* This rejects free-form "Other" text, decline options, and garbage input without
|
|
* coupling to any specific label substring.
|
|
*
|
|
* @param selected The answer's selected value from details.response.answers[id].selected
|
|
* @param options The question's options array from event.input.questions[n].options
|
|
*/
|
|
export function isDepthConfirmationAnswer(
|
|
selected: unknown,
|
|
options?: Array<{ label?: string }>,
|
|
): boolean {
|
|
const value = Array.isArray(selected) ? selected[0] : selected;
|
|
if (typeof value !== "string" || !value) return false;
|
|
|
|
// If options are available, structurally validate: selected must exactly match
|
|
// the first option (confirmation) label. Rejects free-form "Other" and decline options.
|
|
if (Array.isArray(options) && options.length > 0) {
|
|
const confirmLabel = options[0]?.label;
|
|
return typeof confirmLabel === "string" && value === confirmLabel;
|
|
}
|
|
|
|
// Fallback when options aren't available (e.g., older call sites):
|
|
// accept only if it contains "(Recommended)" — the prompt convention suffix.
|
|
return value.includes("(Recommended)");
|
|
}
|
|
|
|
export function shouldBlockContextWrite(
|
|
toolName: string,
|
|
inputPath: string,
|
|
milestoneId: string | null,
|
|
_queuePhaseActive?: boolean,
|
|
): { block: boolean; reason?: string } {
|
|
if (toolName !== "write") return { block: false };
|
|
if (!MILESTONE_CONTEXT_RE.test(inputPath)) return { block: false };
|
|
|
|
const targetMilestoneId = extractContextMilestoneId(inputPath) ?? milestoneId;
|
|
if (!targetMilestoneId) {
|
|
return {
|
|
block: true,
|
|
reason: [
|
|
`HARD BLOCK: Cannot write milestone CONTEXT.md without knowing which milestone it belongs to.`,
|
|
`This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.`,
|
|
`Required action: call ask_user_questions with question id containing "depth_verification" and the milestone id.`,
|
|
].join(" "),
|
|
};
|
|
}
|
|
|
|
if (isMilestoneDepthVerified(targetMilestoneId)) return { block: false };
|
|
|
|
return {
|
|
block: true,
|
|
reason: [
|
|
`HARD BLOCK: Cannot write to milestone CONTEXT.md without depth verification.`,
|
|
`This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.`,
|
|
`Required action: call ask_user_questions with question id containing "depth_verification".`,
|
|
`The user MUST select the "(Recommended)" confirmation option to unlock this gate.`,
|
|
`If the user declines, cancels, or the tool fails, you must re-ask — not bypass.`,
|
|
].join(" "),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Check whether a sf_summary_save CONTEXT artifact should be blocked.
|
|
* Slice-level CONTEXT artifacts are allowed; milestone-level CONTEXT writes
|
|
* require the milestone to be depth-verified first.
|
|
*/
|
|
export function shouldBlockContextArtifactSave(
|
|
artifactType: string,
|
|
milestoneId: string | null,
|
|
sliceId?: string | null,
|
|
): { block: boolean; reason?: string } {
|
|
return shouldBlockContextArtifactSaveInSnapshot(currentWriteGateSnapshot(), artifactType, milestoneId, sliceId);
|
|
}
|
|
|
|
export function shouldBlockContextArtifactSaveInSnapshot(
|
|
snapshot: WriteGateSnapshot,
|
|
artifactType: string,
|
|
milestoneId: string | null,
|
|
sliceId?: string | null,
|
|
): { block: boolean; reason?: string } {
|
|
if (artifactType !== "CONTEXT") return { block: false };
|
|
if (sliceId) return { block: false };
|
|
if (!milestoneId) {
|
|
return {
|
|
block: true,
|
|
reason: [
|
|
`HARD BLOCK: Cannot save milestone CONTEXT without a milestone_id.`,
|
|
`This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.`,
|
|
].join(" "),
|
|
};
|
|
}
|
|
if (isMilestoneDepthVerifiedInSnapshot(snapshot, milestoneId)) return { block: false };
|
|
|
|
return {
|
|
block: true,
|
|
reason: [
|
|
`HARD BLOCK: Cannot save milestone CONTEXT without depth verification for ${milestoneId}.`,
|
|
`This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.`,
|
|
`Required action: call ask_user_questions with question id containing "depth_verification_${milestoneId}".`,
|
|
`The user MUST select the "(Recommended)" confirmation option to unlock this gate.`,
|
|
].join(" "),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Queue-mode execution guard (#2545).
|
|
*
|
|
* When the queue phase is active, the agent should only create planning
|
|
* artifacts (milestones, CONTEXT.md, QUEUE.md, etc.) — never execute work.
|
|
* This function blocks write/edit/bash tool calls that would modify source
|
|
* code outside of .sf/.
|
|
*
|
|
* @param toolName The tool being called (write, edit, bash, etc.)
|
|
* @param input For write/edit: the file path. For bash: the command string.
|
|
* @param queuePhaseActive Whether the queue phase is currently active.
|
|
* @returns { block, reason } — block=true if the call should be rejected.
|
|
*/
|
|
export function shouldBlockQueueExecution(
|
|
toolName: string,
|
|
input: string,
|
|
queuePhaseActive: boolean,
|
|
): { block: boolean; reason?: string } {
|
|
return shouldBlockQueueExecutionInSnapshot(currentWriteGateSnapshot(), toolName, input, queuePhaseActive);
|
|
}
|
|
|
|
export function shouldBlockQueueExecutionInSnapshot(
|
|
snapshot: WriteGateSnapshot,
|
|
toolName: string,
|
|
input: string,
|
|
queuePhaseActive: boolean = snapshot.activeQueuePhase,
|
|
): { block: boolean; reason?: string } {
|
|
if (!queuePhaseActive) return { block: false };
|
|
|
|
// Always-safe tools (read-only, discussion, planning)
|
|
if (QUEUE_SAFE_TOOLS.has(toolName)) return { block: false };
|
|
|
|
// write/edit — allow if targeting .sf/ planning artifacts
|
|
if (toolName === "write" || toolName === "edit") {
|
|
if (SF_DIR_RE.test(input)) return { block: false };
|
|
return {
|
|
block: true,
|
|
reason: `Blocked: /sf queue is a planning tool — it creates milestones, not executes work. ` +
|
|
`Cannot ${toolName} to "${input}" during queue mode. ` +
|
|
`Write CONTEXT.md files and update PROJECT.md/QUEUE.md instead.`,
|
|
};
|
|
}
|
|
|
|
// bash — allow read-only/investigative commands, block everything else
|
|
if (toolName === "bash") {
|
|
if (BASH_READ_ONLY_RE.test(input)) return { block: false };
|
|
return {
|
|
block: true,
|
|
reason: `Blocked: /sf queue is a planning tool — it creates milestones, not executes work. ` +
|
|
`Cannot run "${input.slice(0, 80)}${input.length > 80 ? "…" : ""}" during queue mode. ` +
|
|
`Use read-only commands (cat, grep, git log, etc.) to investigate, then write planning artifacts.`,
|
|
};
|
|
}
|
|
|
|
// Unknown tools — block by default in queue mode so custom tools cannot
|
|
// bypass execution restrictions.
|
|
return {
|
|
block: true,
|
|
reason: `Blocked: /sf queue is a planning tool — it creates milestones, not executes work. Unknown tools are not permitted during queue mode.`,
|
|
};
|
|
}
|