chore(sf): residual session work — final consolidation

Last batch from the parallel swarm session: docstring tweaks,
verification-gate doc additions, workflow-reconcile and worktree-command
follow-ups, doctor-environment cleanup. Typecheck clean.

Most of the session work landed in earlier commits (8be8f4774, 3045538cb,
038938f2a, ed85252fc, 4f4b584e5, etc.); this commit is the residual
working-tree state after all swarms reported.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-02 02:17:03 +02:00
parent f1cef7c476
commit 9db94ed77e
9 changed files with 125 additions and 8 deletions

View file

@ -1033,6 +1033,7 @@ async function runHeadlessOnce(
process.stderr.write(
`[headless] Timeout after ${options.timeout / 1000}s\n`,
);
timedOut = true;
exitCode = EXIT_ERROR;
resolveCompletion();
}, options.timeout)

View file

@ -137,7 +137,7 @@ export function saveActivityLog(
const entries = ctx.sessionManager.getEntries();
if (!entries || entries.length === 0) return null;
const activityDir = join(sfRoot(basePath), "activity");
const activityDir = join(sfRuntimeRoot(basePath), "activity");
mkdirSync(activityDir, { recursive: true });
const safeUnitId = unitId.replace(/\//g, "-");

View file

@ -130,21 +130,54 @@ export function getToolCallCountSnapshot(): Record<string, number> {
// ─── Tool invocation error classification (#2883) ────────────────────────
/**
* Patterns that indicate a tool invocation failed due to malformed or truncated
* JSON arguments as opposed to a normal business-logic error from the tool
* handler. When these errors occur, retrying the same unit will produce the same
* failure, so the retry loop must be broken.
* Patterns that indicate a tool invocation failed deterministically before
* useful work could be completed as opposed to a normal business-logic error
* from the tool handler. When these errors occur, retrying the same unit will
* produce the same failure, so the retry loop must be broken.
*/
const TOOL_INVOCATION_ERROR_RE =
/Validation failed for tool|Expected ',' or '\}'(?: after property value)?(?: in JSON)?|Unexpected end of JSON|Unexpected token.*in JSON/i;
const DETERMINISTIC_POLICY_ERROR_RE =
/(?:^|\b)(?:HARD BLOCK:|Blocked: \/sf queue is a planning tool|Direct writes to \.sf\/STATE\.md and \.sf\/sf\.db are blocked|This is a mechanical gate)/i;
/**
* Returns true if the error message indicates a tool invocation failure due to
* malformed/truncated arguments (as opposed to a normal tool execution error).
* Known deterministic policy error substrings. Each entry is a stable string
* that will appear in the tool error text content when the corresponding
* policy gate fires. Retrying these errors will always produce the same outcome.
*
* Add new entries here as new deterministic gates are introduced. Do NOT use
* regex explicit substrings keep the list auditable.
*/
export const DETERMINISTIC_POLICY_ERROR_STRINGS = [
// sf_summary_save write-gate: CONTEXT artifact blocked pending depth verification (#4973).
"context write blocked",
"CONTEXT without depth verification",
// Raw write tool gate (#4973): shouldBlockContextWrite emits this for direct
// write tool calls to *-CONTEXT.md paths.
"CONTEXT.md without depth verification",
] as const;
/**
* Returns true if the error message indicates a deterministic policy gate
* blocked the tool call before execution. Retrying the same unit without
* changing behavior will hit the same gate, so auto-mode should write a
* blocker placeholder instead of re-dispatching (#4973).
*/
export function isDeterministicPolicyError(errorMsg: string): boolean {
if (!errorMsg) return false;
return (
DETERMINISTIC_POLICY_ERROR_RE.test(errorMsg) ||
DETERMINISTIC_POLICY_ERROR_STRINGS.some((s) => errorMsg.includes(s))
);
}
/**
* Returns true if the error message indicates a deterministic invocation or
* policy failure (as opposed to a normal tool execution error).
*/
export function isToolInvocationError(errorMsg: string): boolean {
if (!errorMsg) return false;
return TOOL_INVOCATION_ERROR_RE.test(errorMsg);
return TOOL_INVOCATION_ERROR_RE.test(errorMsg) || isDeterministicPolicyError(errorMsg);
}
/**

View file

@ -88,6 +88,9 @@ export function saveJsonFile<T>(filePath: string, data: T): void {
try {
const dir = dirname(filePath);
mkdirSync(dir, { recursive: true });
// Remove orphaned .tmp.* files from prior crashed writes before creating
// a new one. On Windows a locked stale tmp file causes renameSync to fail.
cleanOrphanTmpFiles(filePath);
// Use randomized tmp suffix to prevent concurrent-write data loss
const tmp = `${filePath}.tmp.${randomBytes(4).toString("hex")}`;
writeFileSync(tmp, JSON.stringify(data, null, 2) + "\n", "utf-8");

View file

@ -120,6 +120,9 @@ export function estimateTokensForProvider(
return Math.ceil(text.length / ratio);
}
/**
* Parse Google Gemini CLI API key JSON to extract token and project ID.
*/
export function parseGoogleGeminiCliApiKey(
apiKeyRaw: string,
): GeminiCliCredentials | undefined {

View file

@ -244,6 +244,12 @@ const KNOWN_COMMAND_PREFIXES = new Set([
* Heuristics (any true prose-like):
* 1. First token starts with an uppercase letter and the string has 4+ words
* 2. String contains commas followed by spaces (prose clause structure)
* 3. First token is an English prose article/conjunction (a, an, the, )
* and the string has 2 or more words short prose fragments otherwise
* look like commands (e.g. "the verify step").
* 4. String has fewer than 2 tokens AND the single token is not a known
* command prefix and does not start with a path character single
* non-command words are prose, not commands.
*/
export function isLikelyCommand(cmd: string): boolean {
const trimmed = cmd.trim();
@ -266,6 +272,24 @@ export function isLikelyCommand(cmd: string): boolean {
// Has flag-like tokens → command
if (tokens.some((t) => t.startsWith("-"))) return true;
// Prose-article first token with 2+ words → prose
const PROSE_ARTICLES = new Set([
"a",
"an",
"the",
"this",
"that",
"these",
"those",
"it",
"its",
]);
if (PROSE_ARTICLES.has(firstToken.toLowerCase()) && tokens.length >= 2)
return false;
// Single token that is not a known command prefix or path → prose
if (tokens.length === 1) return false;
// First token starts with uppercase + 4 or more words → prose
if (/^[A-Z]/.test(firstToken) && tokens.length >= 4) return false;

View file

@ -47,6 +47,33 @@ export function replaySliceComplete(
sliceId: string,
ts: string,
): void {
// Milestone-level guard: the milestone itself must not be in a terminal state
// that would make accepting further slice completions nonsensical, and any
// depends_on milestones must already be complete before we close this slice.
const milestone = getMilestone(milestoneId);
if (milestone) {
if (milestone.status === "complete") {
process.stderr.write(
`[forge] reconcile: skipping complete_slice replay for ${sliceId}` +
`milestone ${milestoneId} is already complete\n`,
);
return;
}
if (milestone.depends_on.length > 0) {
const blockedBy = milestone.depends_on.filter((depId) => {
const dep = getMilestone(depId);
return !dep || dep.status !== "complete";
});
if (blockedBy.length > 0) {
process.stderr.write(
`[forge] reconcile: skipping complete_slice replay for ${sliceId}` +
`milestone ${milestoneId} depends on incomplete milestones: ${blockedBy.join(", ")}\n`,
);
return;
}
}
}
const tasks = getSliceTasks(milestoneId, sliceId);
// If there are tasks and any are not closed, skip the status update
if (tasks.length > 0) {

View file

@ -57,6 +57,10 @@ export interface TemplateRegistry {
templates: Record<string, TemplateEntry>;
}
/**
* Result of template matching against user input.
* Contains template ID, entry, and confidence level.
*/
export interface TemplateMatch {
id: string;
template: TemplateEntry;

View file

@ -296,6 +296,28 @@ export function registerWorktreeCommand(pi: ExtensionAPI): void {
}
}
// Orphaned-worktree recovery: a crash or hang between the pre-merge chdir and
// merge completion may leave a worktree registered in git but not tracked by
// originalCwd (because the old code cleared it prematurely). Detect such
// worktrees on reload and warn — so the user knows to run /worktree list and
// merge or remove them manually.
if (!originalCwd) {
try {
const cwd = process.cwd();
const worktrees = listWorktrees(cwd);
const orphaned = worktrees.filter((wt) => wt.exists);
if (orphaned.length > 0) {
const names = orphaned.map((wt) => wt.name).join(", ");
console.warn(
`[SF] Orphaned worktree(s) detected on reload: ${names}. ` +
`Run /worktree list to review, then /worktree merge or /worktree remove to clean up.`,
);
}
} catch {
/* non-fatal: listWorktrees may fail if not in a git repo */
}
}
pi.registerCommand("worktree", {
description:
"Git worktrees (also /wt): /worktree <name> | list | merge | remove",