feat(sf): worktree hardening, skip-slice handler, cwd anchoring + docstrings

- new worktree-root.ts / worktree-session-state.ts: track and restore
  original project root after /worktree merge or /worktree return
- new tools/skip-slice.ts: cascade skip to tasks in the slice so milestone
  completion isn't blocked by pending tasks (#4375)
- auto/run-unit.ts: anchor cwd to basePath before newSession() captures it
  (GAP-10) — prevents tool runtime / system prompt from rooting on drifted
  cwd from async_bash, background jobs, or prior unit cleanup
- safety/git-checkpoint.ts: harden HEAD-rev-parse against execFileSync
  errors, surface stderr properly
- broad JSDoc / docstring pass across the rest of the SF extension surface

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-02 01:48:37 +02:00
parent ed47951960
commit 4f4b584e53
33 changed files with 913 additions and 197 deletions

View file

@ -11,6 +11,12 @@ import {
import { migrateLegacyScaffold } from "./scaffold-drift.js";
import { logWarning } from "./workflow-logger.js";
/**
* Single scaffold file template with path and template content.
*
* @property path canonical file path relative to project root (e.g., "AGENTS.md", "docs/PLANS.md")
* @property content template body to write when file is missing or pending-upgrade
*/
export interface ScaffoldFile {
path: string;
content: string;
@ -23,6 +29,14 @@ export interface ScaffoldFile {
*/
const NO_MARKER_PATHS = new Set<string>([".siftignore"]);
/**
* Canonical scaffold file templates SF manages for agent legibility.
*
* Includes AGENTS.md (routing map), ARCHITECTURE.md (system overview), and docs
* tree structure (product specs, design docs, execution plans, records, generated).
* Phase C syncs these to disk, stamps them with version markers, and records manifest
* entries (ADR-021).
*/
export const SCAFFOLD_FILES: ScaffoldFile[] = [
{
path: ".siftignore",

View file

@ -10,6 +10,7 @@
import { randomUUID } from "node:crypto";
import { mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { atomicWriteSync } from "../atomic-write.js";
import type {
ExtensionAPI,
ExtensionContext,
@ -636,9 +637,8 @@ export async function autoLoop(
const unitPhaseResult = await runUnitPhaseViaContract(dispatchContract, ic, iterData, loopState);
if (unitPhaseResult.action === "next") {
const d = unitPhaseResult.data as { unitStartedAt: number; requestDispatchedAt?: number };
if (d?.requestDispatchedAt) {
s.lastRequestTimestamp = d.requestDispatchedAt;
}
const requestTimestamp = d?.requestDispatchedAt ?? d?.unitStartedAt;
if (typeof requestTimestamp === "number") s.lastRequestTimestamp = requestTimestamp;
}
deps.uokObserver?.onPhaseResult("unit", unitPhaseResult.action, {
unitType: iterData.unitType,
@ -945,9 +945,8 @@ export async function autoLoop(
);
if (unitPhaseResult.action === "next") {
const d = unitPhaseResult.data as { unitStartedAt: number; requestDispatchedAt?: number };
if (d?.requestDispatchedAt) {
s.lastRequestTimestamp = d.requestDispatchedAt;
}
const requestTimestamp = d?.requestDispatchedAt ?? d?.unitStartedAt;
if (typeof requestTimestamp === "number") s.lastRequestTimestamp = requestTimestamp;
}
deps.uokObserver?.onPhaseResult("unit", unitPhaseResult.action, {
unitType: iterData.unitType,

View file

@ -96,6 +96,7 @@ import {
getRequiredWorkflowToolsForAutoUnit,
getWorkflowTransportSupportError,
} from "../workflow-mcp.js";
import { resolveWorktreeProjectRoot } from "../worktree-root.js";
import { detectStuck } from "./detect-stuck.js";
import {
FINALIZE_POST_TIMEOUT_MS,
@ -233,7 +234,7 @@ export function requiresHumanProductionMutationApproval(text: string): boolean {
export function _resolveDispatchGuardBasePath(
s: Pick<AutoSession, "originalBasePath" | "basePath">,
): string {
return s.originalBasePath || s.basePath;
return resolveWorktreeProjectRoot(s.basePath, s.originalBasePath);
}
const PLANNING_FLOW_GATE_PHASES: ReadonlySet<Phase> = new Set([
@ -2784,6 +2785,20 @@ export async function runFinalize(
});
} else {
// s.pendingVerificationRetry was set by postUnitPreVerification.
// Emit a dedicated journal event so forensics can distinguish bounded
// verification retries from genuine stuck-loop dispatch repetitions (#4540).
const retryInfo = s.pendingVerificationRetry;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "artifact-verification-retry",
data: {
unitType: _preUnitSnapshot?.type,
unitId: retryInfo?.unitId,
attempt: retryInfo?.attempt,
},
});
// Continue the loop — next iteration will inject the retry context into the prompt.
debugLog("autoLoop", {
phase: "artifact-verification-retry",
@ -2995,3 +3010,6 @@ export async function runFinalize(
return { action: "next", data: undefined as undefined };
}
// ─── GAP-12: exported alias ───────────────────────────────────────────────────
export const resetSessionTimeoutState = resetConsecutiveSessionTimeouts;

View file

@ -14,7 +14,7 @@ import {
resolvePersistModelChanges,
} from "../preferences.js";
import { logWarning } from "../workflow-logger.js";
import { _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js";
import { _clearCurrentResolve, _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js";
import type { AutoSession } from "./session.js";
import { NEW_SESSION_TIMEOUT_MS } from "./session.js";
import {
@ -45,12 +45,42 @@ export async function runUnit(
): Promise<UnitResult> {
debugLog("runUnit", { phase: "start", unitType, unitId });
// GAP-10: Ensure cwd matches basePath BEFORE newSession() captures it. The
// new session reads process.cwd() during construction to anchor its tool
// runtime and system prompt; if cwd has drifted (async_bash, background
// jobs, prior unit cleanup), the session would otherwise be rooted to the
// wrong directory. Must be synchronous — no awaits between chdir and
// newSession (#1389, #4762 follow-up).
try {
if (process.cwd() !== s.basePath) {
process.chdir(s.basePath);
}
} catch (e) {
const msg = `Failed to chdir to basePath before newSession (basePath: ${s.basePath}): ${String(e)}`;
logWarning("engine", msg, { basePath: s.basePath, error: String(e) });
return {
status: "cancelled",
errorContext: {
message: msg,
category: "session-failed",
isTransient: false,
},
};
}
// ── Session creation with timeout ──
debugLog("runUnit", { phase: "session-create", unitType, unitId });
let sessionResult: { cancelled: boolean };
let sessionTimeoutHandle: ReturnType<typeof setTimeout> | undefined;
const mySessionSwitchGeneration = ++sessionSwitchGeneration;
// GAP-07: Cancellation controller for newSession(). When the session-creation
// timeout fires, we abort this controller so that any still-in-flight
// newSession() work (which may clobber process.cwd()) is signalled to stop.
// Note: SF's newSession() does not currently accept abortSignal in its
// options type, so we cannot pass it directly — but aborting the controller
// documents the intent clearly and is a no-op call site when the API adds it.
const sessionAbortController = new AbortController();
_setSessionSwitchInFlight(true);
try {
const sessionPromise = s.cmdCtx!.newSession().finally(() => {
@ -60,7 +90,10 @@ export async function runUnit(
});
const timeoutPromise = new Promise<{ cancelled: true }>((resolve) => {
sessionTimeoutHandle = setTimeout(
() => resolve({ cancelled: true }),
() => {
sessionAbortController.abort();
resolve({ cancelled: true });
},
NEW_SESSION_TIMEOUT_MS,
);
});
@ -102,15 +135,23 @@ export async function runUnit(
return { status: "cancelled" };
}
// GAP-09: Hard-cancel if setModel fails rather than continuing with the
// wrong model. Running with an unexpected model wastes the unit and can
// cause quota / pricing surprises.
if (s.currentUnitModel && typeof pi.setModel === "function") {
const restored = await pi.setModel(s.currentUnitModel, {
const modelId = s.currentUnitModel;
const restored = await pi.setModel(modelId, {
persist: resolvePersistModelChanges(),
});
if (!restored) {
ctx.ui.notify(
`Failed to restore ${s.currentUnitModel.provider}/${s.currentUnitModel.id} after session creation. Using session default.`,
"warning",
);
return {
status: "cancelled",
errorContext: {
message: `setModel failed for ${modelId.provider}/${modelId.id}`,
category: "session-failed",
isTransient: false,
},
};
}
}
@ -122,18 +163,33 @@ export async function runUnit(
_setCurrentResolve(resolve);
});
// Ensure cwd matches basePath before dispatch (#1389).
// async_bash and background jobs can drift cwd away from the worktree.
// Realigning here prevents commits from landing on the wrong branch.
try {
if (process.cwd() !== s.basePath) {
process.chdir(s.basePath);
// GAP-08: Provider request-readiness pre-check (#4555).
// Verify the provider can accept requests before dispatching. If the token
// has expired since bootstrap, return cancelled immediately so the unit is
// not wasted on a guaranteed 401.
{
const provider = s.currentUnitModel?.provider ?? ctx.model?.provider;
if (provider != null && typeof ctx.modelRegistry.isProviderRequestReady === "function") {
let ready = false;
try {
ready = ctx.modelRegistry.isProviderRequestReady(provider);
} catch {
ready = false;
}
if (!ready) {
_clearCurrentResolve();
return {
status: "cancelled",
errorContext: {
message: `Provider ${provider} is not request-ready (login/token expired)`,
category: "provider",
isTransient: false,
},
};
}
}
} catch (e) {
logWarning("engine", "Failed to chdir to basePath before dispatch", {
basePath: s.basePath,
error: String(e),
});
}
// ── Send the prompt ──

View file

@ -502,6 +502,7 @@ export function registerHooks(
pi.on("tool_call", async (event, ctx) => {
if (!isAutoActive()) return;
safetyRecordToolCall(
event.toolCallId,
event.toolName,
event.input as Record<string, unknown>,
);

View file

@ -644,6 +644,10 @@ async function buildCarryForwardLines(
);
}
/**
* Build resume state section from CONTINUE.md or legacy continue.md.
* Returns progress, completed work, and next action if available.
*/
async function buildResumeSection(
basePath: string,
milestoneId: string,
@ -694,6 +698,10 @@ async function buildResumeSection(
return lines.join("\n");
}
/**
* Extract slice plan excerpt with goal, demo, verification, and observability.
* Returns formatted section for task execution context.
*/
function extractSliceExecutionExcerpt(
content: string | null,
relPath: string,
@ -726,6 +734,10 @@ function extractSliceExecutionExcerpt(
return parts.join("\n");
}
/**
* Extract a markdown section by heading name from content.
* Returns section content until next heading or null if not found.
*/
function extractMarkdownSection(
content: string,
heading: string,
@ -741,10 +753,16 @@ function extractMarkdownSection(
return rest.slice(0, end).trim();
}
/**
* Escape special regex characters in a string.
*/
function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
/**
* Collapse multiple whitespace in text to single spaces.
*/
function oneLine(text: string): string {
return text.replace(/\s+/g, " ").trim();
}

View file

@ -112,6 +112,10 @@ const GATE_SAFE_TOOLS = new Set([
"search_and_read",
]);
/**
* Snapshot of write gate state: depth verification, queue phase, and pending gates.
* Persisted to .sf/runtime/write-gate-state.json for cross-process coordination.
*/
export interface WriteGateSnapshot {
verifiedDepthMilestones: string[];
activeQueuePhase: boolean;

View file

@ -22,6 +22,9 @@ const sfHome = process.env.SF_HOME || join(homedir(), ".sf");
// ─── Types (mirrored from extension-registry.ts) ────────────────────────────
/**
* Extension manifest metadata including capabilities and dependencies.
*/
interface ExtensionManifest {
id: string;
name: string;
@ -41,6 +44,9 @@ interface ExtensionManifest {
};
}
/**
* Extension registry entry tracking enabled/disabled status and source.
*/
interface ExtensionRegistryEntry {
id: string;
enabled: boolean;
@ -49,6 +55,9 @@ interface ExtensionRegistryEntry {
disabledReason?: string;
}
/**
* Extension registry mapping extension IDs to their status entries.
*/
interface ExtensionRegistry {
version: 1;
entries: Record<string, ExtensionRegistryEntry>;
@ -56,14 +65,23 @@ interface ExtensionRegistry {
// ─── Registry I/O ───────────────────────────────────────────────────────────
/**
* Get the path to the extension registry file.
*/
function getRegistryPath(): string {
return join(sfHome, "extensions", "registry.json");
}
/**
* Get the path to the agent extensions directory.
*/
function getAgentExtensionsDir(): string {
return join(sfHome, "agent", "extensions");
}
/**
* Load the extension registry, defaulting to an empty registry on error.
*/
function loadRegistry(): ExtensionRegistry {
const filePath = getRegistryPath();
try {
@ -84,6 +102,9 @@ function loadRegistry(): ExtensionRegistry {
}
}
/**
* Save the extension registry to disk (atomic via temp file).
*/
function saveRegistry(registry: ExtensionRegistry): void {
const filePath = getRegistryPath();
try {
@ -96,12 +117,18 @@ function saveRegistry(registry: ExtensionRegistry): void {
}
}
/**
* Check if an extension is enabled in the registry (defaults to true if not registered).
*/
function isEnabled(registry: ExtensionRegistry, id: string): boolean {
const entry = registry.entries[id];
if (!entry) return true;
return entry.enabled;
}
/**
* Load extension manifest from a directory, or null if not found/invalid.
*/
function readManifest(dir: string): ExtensionManifest | null {
const mPath = join(dir, "extension-manifest.json");
if (!existsSync(mPath)) return null;
@ -115,6 +142,9 @@ function readManifest(dir: string): ExtensionManifest | null {
}
}
/**
* Discover all extension manifests from the agent extensions directory.
*/
function discoverManifests(): Map<string, ExtensionManifest> {
const extDir = getAgentExtensionsDir();
const manifests = new Map<string, ExtensionManifest>();
@ -129,6 +159,9 @@ function discoverManifests(): Map<string, ExtensionManifest> {
// ─── Command Handler ────────────────────────────────────────────────────────
/**
* Handler for /sf extensions subcommands (list, enable, disable, info).
*/
export async function handleExtensions(
args: string,
ctx: ExtensionCommandContext,
@ -162,6 +195,9 @@ export async function handleExtensions(
);
}
/**
* List all discovered extensions with their status and capabilities.
*/
function handleList(ctx: ExtensionCommandContext): void {
const manifests = discoverManifests();
const registry = loadRegistry();
@ -211,6 +247,9 @@ function handleList(ctx: ExtensionCommandContext): void {
ctx.ui.notify(lines.join("\n"), "info");
}
/**
* Enable a disabled extension in the registry.
*/
function handleEnable(
id: string | undefined,
ctx: ExtensionCommandContext,

View file

@ -48,6 +48,9 @@ const TASK_COUNT_TIERS: [number, number][] = [
// ─── Types ───────────────────────────────────────────────────────────────────
/**
* Result of truncation at section boundaries (content + dropped section count).
*/
export interface TruncationResult {
/** The (possibly truncated) content string */
content: string;
@ -55,6 +58,9 @@ export interface TruncationResult {
droppedSections: number;
}
/**
* Proportional character budget allocation for a context window.
*/
export interface BudgetAllocation {
/** Character budget for dependency/prior-task summaries */
summaryBudgetChars: number;
@ -71,6 +77,9 @@ export interface BudgetAllocation {
// ─── Minimal interface slices for dependency injection ───────────────────────
// These avoid coupling to full ModelRegistry/SFPreferences types in tests.
/**
* Minimal model interface for context window resolution.
*/
export interface MinimalModel {
id: string;
provider: string;

View file

@ -9,11 +9,17 @@ import type { Decision, Requirement } from "./types.js";
// ─── Query Functions ───────────────────────────────────────────────────────
/**
* Options for filtering decisions by milestone and scope.
*/
export interface DecisionQueryOpts {
milestoneId?: string;
scope?: string;
}
/**
* Options for filtering requirements by milestone, slice, and status.
*/
export interface RequirementQueryOpts {
milestoneId?: string;
sliceId?: string;

View file

@ -8,6 +8,13 @@ import { deriveState } from "./state.js";
import { readEvents } from "./workflow-events.js";
import { renderAllProjections } from "./workflow-projections.js";
/**
* Check SF engine health: database constraints, projection drift, and corruption.
*
* Verifies orphaned tasks/slices, duplicate IDs, and missing task summaries.
* Re-renders stale markdown projections when event log is newer than cached files.
* Non-fatal: issues are reported but never auto-fixed.
*/
export async function checkEngineHealth(
basePath: string,
issues: DoctorIssue[],

View file

@ -50,6 +50,7 @@ export type JournalEventType =
| "worktree-skip"
| "worktree-merge-start"
| "worktree-merge-failed"
| "artifact-verification-retry"
// #4764 — worktree lifespan / divergence telemetry
| "worktree-created"
| "worktree-merged"
@ -59,7 +60,10 @@ export type JournalEventType =
| "canonical-root-redirect"
// #4765 — slice-cadence collapse
| "slice-merged"
| "milestone-resquash";
| "milestone-resquash"
// dispatch telemetry — measure agent/subagent invocation frequency and shape
| "subagent-invoked"
| "subagent-completed";
/** A single structured event in the journal. */
export interface JournalEntry {

View file

@ -20,9 +20,19 @@ import {
} from "./notification-store.js";
import { formattedShortcutPair } from "./shortcut-defs.js";
/**
* Filter mode for notification display. Controls which severity levels are shown.
*/
type FilterMode = "all" | "error" | "warning" | "info";
/**
* Cycle of filter modes used when cycling through filter states with 'f' key.
*/
const FILTER_CYCLE: FilterMode[] = ["all", "error", "warning", "info"];
/**
* Returns a single-character icon representing the notification severity.
*/
function severityIcon(severity: NotifySeverity): string {
switch (severity) {
case "error":
@ -36,7 +46,10 @@ function severityIcon(severity: NotifySeverity): string {
}
}
/** Word-wrap plain text to fit within maxWidth columns. */
/**
* Word-wrap plain text to fit within maxWidth columns. Splits on whitespace,
* handles single words longer than maxWidth by truncating with ellipsis.
*/
function wrapText(text: string, maxWidth: number): string[] {
if (text.length <= maxWidth) return [text];
const words = text.split(/\s+/);
@ -59,6 +72,10 @@ function wrapText(text: string, maxWidth: number): string[] {
);
}
/**
* Format an ISO timestamp for display as relative time ("just now", "5m ago", etc.).
* Falls back to HH:MM:SS if parsing fails.
*/
function formatTimestamp(ts: string): string {
try {
const d = new Date(ts);

View file

@ -19,9 +19,21 @@ import { join } from "node:path";
// ─── Types ──────────────────────────────────────────────────────────────
/**
* Severity level for notifications. Indicates the urgency and importance of the notification.
*/
export type NotifySeverity = "info" | "success" | "warning" | "error";
/**
* Source origin for notifications. Indicates whether the notification came from explicit
* notify() calls or workflow-logger warnings.
*/
export type NotificationSource = "notify" | "workflow-logger";
/**
* Optional metadata attached to a notification. Provides context about
* notification classification, blocking behavior, and deduplication strategy.
*/
export interface NotificationMetadata {
kind?: "notice" | "approval_request" | "progress" | "terminal";
blocking?: boolean;
@ -29,6 +41,10 @@ export interface NotificationMetadata {
source?: string;
}
/**
* A persisted notification entry in the store. Contains all metadata and
* content necessary to reconstruct the notification UI state across sessions.
*/
export interface NotificationEntry {
id: string;
ts: string;

View file

@ -93,6 +93,9 @@ function detectFileOverlaps(
* 3. It does not have file overlap with other eligible milestones
* (overlaps are flagged as warnings but do not disqualify)
*/
/**
* Analyze which milestones are eligible for parallel execution based on deps and file overlaps.
*/
export async function analyzeParallelEligibility(
basePath: string,
): Promise<ParallelCandidates> {
@ -214,6 +217,9 @@ export async function analyzeParallelEligibility(
/**
* Produce a human-readable report of parallel eligibility analysis.
*/
/**
* Format eligibility analysis results into a markdown report.
*/
export function formatEligibilityReport(
candidates: ParallelCandidates,
): string {

View file

@ -37,6 +37,9 @@ export type MergeOrder = "sequential" | "by-completion";
* Uses a subprocess to avoid disrupting the global DB singleton.
* Returns true when milestones.status = 'complete' in the worktree's sf.db.
*/
/**
* Check if a milestone is marked as complete in its worktree database.
*/
export function isMilestoneCompleteInWorktreeDb(
basePath: string,
mid: string,
@ -95,6 +98,9 @@ function discoverDbCompletedMilestones(basePath: string): Set<string> {
* are included if their worktree DB shows status='complete'.
* See: https://github.com/singularity-forge/sf-run/issues/2812
*/
/**
* Determine merge order for completed milestones (sequential or by-completion).
*/
export function determineMergeOrder(
workers: WorkerInfo[],
order: MergeOrder = "sequential",
@ -151,6 +157,9 @@ export function determineMergeOrder(
* Attempt to merge a single milestone's worktree back to main.
* Wraps mergeMilestoneToMain with error handling for parallel context.
*/
/**
* Merge a completed milestone to main branch, returning success or conflict details.
*/
export async function mergeCompletedMilestone(
basePath: string,
milestoneId: string,
@ -208,6 +217,9 @@ export async function mergeCompletedMilestone(
* Merge all completed milestones in sequence.
* Stops on first conflict and returns results so far.
*/
/**
* Merge all completed milestones in order, stopping on first conflict.
*/
export async function mergeAllCompleted(
basePath: string,
workers: WorkerInfo[],
@ -232,6 +244,9 @@ export async function mergeAllCompleted(
/**
* Format merge results for display.
*/
/**
* Format merge operation results into a markdown report with conflicts and errors.
*/
export function formatMergeResults(results: MergeResult[]): string {
if (results.length === 0) return "No completed milestones to merge.";

View file

@ -49,7 +49,9 @@ export type {
SFPhaseModelConfig,
} from "./preferences-types.js";
/** Map of provider ID to allowed model ID patterns. */
export type ProviderModelAllowList = Record<string, readonly string[]>;
/** Map of provider ID to blocked model ID patterns. */
export type ProviderModelBlockList = Record<string, readonly string[]>;
type ProviderPolicyModel = {
@ -160,7 +162,10 @@ function isModelAllowedByBuiltInProviderPolicy(
return true;
}
export function isProviderModelAllowed(
export /**
* Check if a provider/model pair is allowed by built-in and user-configured policies.
*/
function isProviderModelAllowed(
provider: string,
modelId: string,
providerModelAllow: ProviderModelAllowList | undefined,
@ -189,6 +194,9 @@ export function isProviderModelAllowed(
);
}
/**
* Filter models by provider/model allow and block lists from user preferences.
*/
export function filterModelsByProviderModelAllow<
T extends { provider: string; id: string },
>(
@ -216,6 +224,9 @@ export function filterModelsByProviderModelAllow<
);
}
/**
* Check if a provider is in the allowed list and not in the blocked list.
*/
export function isProviderAllowedByLists(
provider: string,
allowedProviders: readonly string[] | undefined,
@ -247,17 +258,9 @@ export function resolveModelForUnit(unitType: string): string | undefined {
* - Extended: `planning: { model: claude-opus-4-6, fallbacks: [glm-5, minimax-m2.5] }`
*/
/**
* Fallback resolver used when the user hasn't pinned `models.<unit>`:
* iterate every model the pi-ai catalog knows about whose provider is in
* `allowed_providers` (or every provider, if the allow-list is unset),
* score them with the unit-type-specific benchmark profile, and return
* the top pick plus diversified fallbacks.
*
* Pulls the candidate pool from `models.generated.js` rather than a live
* registry lookup so it works during preference resolution (before the
* registry is populated). The dispatch-time availability check happens
* downstream in auto-model-selection.ts and filters unavailable
* candidates naturally (expired keys, providers without auth, etc.).
* Auto-benchmark model picker when user hasn't pinned `models.<unit>`.
* Scores candidates using unit-type-specific benchmark profiles and returns top pick plus fallbacks.
* Works during preference resolution by pulling from pi-ai catalog rather than live registry.
*/
function resolveAutoBenchmarkPickForUnit(
unitType: string,

View file

@ -106,6 +106,10 @@ function normalizedTask(unit: ProductionMutationUnit): string {
return [unit.taskTitle, unit.taskText].join("\n").toLowerCase();
}
/**
* Assess whether LLM auto-approval criteria are met for a production mutation.
* Returns approved=true only if all safety constraints are present in task text.
*/
export function assessLlmProductionMutationApproval(
unit: ProductionMutationUnit,
): { approved: boolean; reasons: string[] } {
@ -149,6 +153,10 @@ export function assessLlmProductionMutationApproval(
return { approved: reasons.length === 0, reasons };
}
/**
* Build a fully approved production mutation approval using LLM policy.
* Caller must verify assessLlmProductionMutationApproval() returned approved=true first.
*/
export function buildLlmProductionMutationApproval(
unit: ProductionMutationUnit,
approvedAt: Date = new Date(),
@ -181,6 +189,10 @@ export function buildLlmProductionMutationApproval(
};
}
/**
* Assess and atomically write an LLM-approved production mutation approval if criteria match.
* Returns approved=false with reasons if any safety constraint is missing.
*/
export function approveProductionMutationWithLlmPolicy(
basePath: string,
unit: ProductionMutationUnit,
@ -207,6 +219,9 @@ export function approveProductionMutationWithLlmPolicy(
return { path, approved: true, reasons: [], wrote: true };
}
/**
* Create an empty approval template if it doesn't exist. No-op if already present.
*/
export function ensureProductionMutationApprovalTemplate(
basePath: string,
unit: ProductionMutationUnit,
@ -237,6 +252,10 @@ function nonEmptyStringArray(value: unknown): value is string[] {
);
}
/**
* Validate a parsed approval JSON against schema and safety constraints.
* Returns approved=false with reasons if any field is missing or invalid.
*/
export function validateProductionMutationApproval(
data: unknown,
unit: ProductionMutationUnit,
@ -300,6 +319,9 @@ export function validateProductionMutationApproval(
return { approved: reasons.length === 0, reasons };
}
/**
* Read and validate approval status from disk. Returns rejected reasons if validation fails.
*/
export function readProductionMutationApprovalStatus(
basePath: string,
unit: ProductionMutationUnit,

View file

@ -3,12 +3,25 @@
* Tracks every bash command, file write, and file edit during a unit execution.
* Evidence is compared against LLM completion claims in evidence-cross-ref.ts.
*
* Evidence is persisted to .sf/safety/evidence-<mid>-<sid>-<tid>.json so it
* survives session restarts (pause/resume, crash recovery). On unit start,
* call resetEvidence() then loadEvidenceFromDisk(). On every new tool call,
* saveEvidenceToDisk() is called automatically by recordToolCall/recordToolResult.
*
* Follows the same module-level Map pattern as auto-tool-tracking.ts.
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
import { join } from "node:path";
import {
existsSync,
mkdirSync,
readFileSync,
writeFileSync,
renameSync,
unlinkSync,
} from "node:fs";
import { join, dirname } from "node:path";
import { randomBytes } from "node:crypto";
// ─── Types ──────────────────────────────────────────────────────────────────
@ -41,22 +54,11 @@ export type EvidenceEntry = BashEvidence | FileWriteEvidence | FileEditEvidence;
let unitEvidence: EvidenceEntry[] = [];
// Disk persistence: unit context set by resetEvidence() at unit start.
// Guarded by presence of currentUnitId — if absent, disk write is skipped.
let currentUnitId: string | undefined = undefined;
let currentBasePath: string | undefined = undefined;
// ─── Public API ─────────────────────────────────────────────────────────────
/**
* Reset all evidence for a new unit. Call at unit start.
* @param unitId - The active unit ID (e.g. M006/S02/T03)
* @param basePath - Project root path for computing the evidence file path
*/
export function resetEvidence(unitId?: string, basePath?: string): void {
/** Reset all evidence for a new unit. Call at unit start. */
export function resetEvidence(): void {
unitEvidence = [];
currentUnitId = unitId;
currentBasePath = basePath;
}
/** Get a read-only view of all evidence collected for the current unit. */
@ -79,123 +81,78 @@ export function getFilePaths(): string[] {
.map((e) => e.path);
}
// ─── Disk Persistence ──────────────────────────────────────────────────────
// ─── Persistence (Bug #4385 — evidence must survive session restarts) ────────
/**
* Append an evidence entry to .sf/active/{unitId}/evidence.jsonl.
*
* Purpose: Evidence collected at tool_call time must survive a mid-unit re-dispatch
* race where runUnitPhase re-fires between tool_call and tool_execution_end.
* Without disk persistence, in-memory evidence is lost on re-dispatch.
*
* Consumer: phases.ts runUnitPhase calls resetEvidence(unitId, basePath) at unit start.
* Best-effort: disk write failures are silently swallowed so they never crash the agent.
* Build the path for the evidence JSON file for a given unit.
* Lives under .sf/safety/ which is gitignored and session-scoped.
*/
function saveEvidenceToDisk(entry: EvidenceEntry): void {
if (!currentUnitId || !currentBasePath) return;
try {
const dir = join(currentBasePath, ".sf", "active", currentUnitId);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
const line = JSON.stringify(entry);
appendFileSync(join(dir, "evidence.jsonl"), line + "\n");
} catch {
// Best-effort: disk write failures must not crash the agent.
}
}
// ─── Recording (called from register-hooks.ts) ─────────────────────────────
/**
* Record a tool call at dispatch time (before execution).
* Exit codes and output are filled in by recordToolResult after execution.
*/
export function recordToolCall(
toolName: string,
input: Record<string, unknown>,
): void {
let entry: EvidenceEntry | undefined;
if (toolName === "bash" || toolName === "Bash") {
entry = {
kind: "bash",
toolCallId: "",
command: String(input.command ?? ""),
exitCode: -1,
outputSnippet: "",
timestamp: Date.now(),
};
} else if (toolName === "write" || toolName === "Write") {
entry = {
kind: "write",
toolCallId: "",
path: String(input.file_path ?? input.path ?? ""),
timestamp: Date.now(),
};
} else if (toolName === "edit" || toolName === "Edit") {
entry = {
kind: "edit",
toolCallId: "",
path: String(input.file_path ?? input.path ?? ""),
timestamp: Date.now(),
};
}
if (entry) {
unitEvidence.push(entry);
saveEvidenceToDisk(entry);
}
function evidencePath(
basePath: string,
milestoneId: string,
sliceId: string,
taskId: string,
): string {
return join(
basePath,
".sf",
"safety",
`evidence-${milestoneId}-${sliceId}-${taskId}.json`,
);
}
/**
* Record a tool execution result. Matches the most recent unresolved entry
* of the same kind and fills in the toolCallId, exit code, and output.
* Validate that a parsed value is an array of EvidenceEntry objects.
* Rejects corrupt / schema-mismatch data rather than letting it poison state.
*/
export function recordToolResult(
toolCallId: string,
toolName: string,
result: unknown,
isError: boolean,
): void {
const normalizedName = toolName.toLowerCase();
if (normalizedName === "bash") {
const entry = findLastUnresolved("bash") as BashEvidence | undefined;
if (entry) {
entry.toolCallId = toolCallId;
const text = extractResultText(result);
entry.outputSnippet = text.slice(0, 500);
const exitMatch = text.match(/Command exited with code (\d+)/);
entry.exitCode = exitMatch ? Number(exitMatch[1]) : isError ? 1 : 0;
saveEvidenceToDisk(entry);
}
} else if (normalizedName === "write" || normalizedName === "edit") {
const entry = findLastUnresolved(normalizedName as "write" | "edit");
if (entry) {
entry.toolCallId = toolCallId;
saveEvidenceToDisk(entry);
}
}
}
// ─── Disk Load (session resume) ────────────────────────────────────────────
function evidencePath(basePath: string, milestoneId: string, sliceId: string, taskId: string): string {
return join(basePath, ".sf", "active", `${milestoneId}/${sliceId}/${taskId}`, "evidence.jsonl");
}
function isEvidenceArray(data: unknown): data is EvidenceEntry[] {
if (!Array.isArray(data)) return false;
return data.every((e) => {
if (e === null || typeof e !== "object") return false;
const rec = e as Record<string, unknown>;
return typeof rec.toolCallId === "string" && typeof rec.kind === "string";
if (typeof rec.toolCallId !== "string") return false;
if (typeof rec.timestamp !== "number") return false;
if (rec.kind === "bash") {
return (
typeof rec.command === "string" &&
typeof rec.exitCode === "number" &&
typeof rec.outputSnippet === "string"
);
}
if (rec.kind === "write" || rec.kind === "edit") {
return typeof rec.path === "string";
}
return false;
});
}
/**
* Load evidence from disk into module state after resetEvidence().
* Call on session resume so evidence collected before a pause is restored.
* No-op if the file does not exist (fresh unit).
* Persist the current in-memory evidence to disk so it survives a session
* restart. Called from saveEvidenceToDisk after recordToolCall/recordToolResult.
* Non-fatal persistence failures must never break unit execution.
*/
export function saveEvidenceToDisk(
basePath: string,
milestoneId: string,
sliceId: string,
taskId: string,
): void {
try {
const path = evidencePath(basePath, milestoneId, sliceId, taskId);
mkdirSync(dirname(path), { recursive: true });
const tmp = `${path}.tmp.${randomBytes(4).toString("hex")}`;
writeFileSync(tmp, JSON.stringify(unitEvidence, null, 2) + "\n", "utf-8");
renameSync(tmp, path);
} catch {
// Non-fatal — don't let persistence failures break unit execution
}
}
/**
* Load persisted evidence from disk into the in-memory array.
* Call after resetEvidence() on session resume to restore context for a
* partially-executed unit. If the file does not exist (fresh unit), this
* is a no-op getEvidence() will return [] which is correct.
*/
export function loadEvidenceFromDisk(
basePath: string,
@ -206,37 +163,98 @@ export function loadEvidenceFromDisk(
try {
const path = evidencePath(basePath, milestoneId, sliceId, taskId);
if (!existsSync(path)) return;
const lines = readFileSync(path, "utf-8")
.split("\n")
.filter((l) => l.trim().length > 0);
const entries: EvidenceEntry[] = [];
for (const line of lines) {
try {
const parsed = JSON.parse(line);
entries.push(parsed as EvidenceEntry);
} catch {
// Skip malformed lines
}
}
if (isEvidenceArray(entries)) {
unitEvidence = entries;
const raw = readFileSync(path, "utf-8");
const parsed = JSON.parse(raw);
if (isEvidenceArray(parsed)) {
unitEvidence = parsed;
}
} catch {
// Non-fatal — corrupt / missing file is treated as empty evidence
}
}
// ─── Internals ──────────────────────────────────────────────────────────────
function findLastUnresolved(kind: string): EvidenceEntry | undefined {
for (let i = unitEvidence.length - 1; i >= 0; i--) {
if (unitEvidence[i].kind === kind && unitEvidence[i].toolCallId === "") {
return unitEvidence[i];
/**
* Delete the persisted evidence file for a unit after it has been fully
* processed. Prevents stale evidence from affecting future retries of
* the same unit ID.
*/
export function clearEvidenceFromDisk(
basePath: string,
milestoneId: string,
sliceId: string,
taskId: string,
): void {
try {
const path = evidencePath(basePath, milestoneId, sliceId, taskId);
if (existsSync(path)) {
unlinkSync(path);
}
} catch {
// Non-fatal
}
return undefined;
}
// ─── Recording (called from register-hooks.ts) ─────────────────────────────
/**
* Record a tool call at dispatch time (before execution).
* Exit codes and output are filled in by recordToolResult after execution.
*/
export function recordToolCall(
toolCallId: string,
toolName: string,
input: Record<string, unknown>,
): void {
if (toolName === "bash" || toolName === "Bash") {
unitEvidence.push({
kind: "bash",
toolCallId,
command: String(input.command ?? ""),
exitCode: -1,
outputSnippet: "",
timestamp: Date.now(),
});
} else if (toolName === "write" || toolName === "Write") {
unitEvidence.push({
kind: "write",
toolCallId,
path: String(input.file_path ?? input.path ?? ""),
timestamp: Date.now(),
});
} else if (toolName === "edit" || toolName === "Edit") {
unitEvidence.push({
kind: "edit",
toolCallId,
path: String(input.file_path ?? input.path ?? ""),
timestamp: Date.now(),
});
}
}
/**
* Record a tool execution result. Matches the entry by toolCallId (assigned
* at dispatch time) and fills in exit code + output. Prior versions matched
* by `kind + empty-string` which corrupted parallel tool calls.
*/
export function recordToolResult(
toolCallId: string,
toolName: string,
result: unknown,
isError: boolean,
): void {
const entry = unitEvidence.find((e) => e.toolCallId === toolCallId);
if (!entry) return;
if (entry.kind === "bash") {
const text = extractResultText(result);
entry.outputSnippet = text.slice(0, 500);
const exitMatch = text.match(/Command exited with code (\d+)/);
entry.exitCode = exitMatch ? Number(exitMatch[1]) : isError ? 1 : 0;
}
}
// ─── Internals ──────────────────────────────────────────────────────────────
function extractResultText(result: unknown): string {
if (typeof result === "string") return result;
if (result && typeof result === "object") {

View file

@ -4,7 +4,9 @@
*
* Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
* and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
* Defaults to git diff HEAD~1 --name-only after auto-commit. Deferred-commit
* Compares against `git diff-tree --root --no-commit-id -r --name-only HEAD` after auto-commit.
* Using diff-tree --root handles initial commits, shallow clones, and merge commits correctly
* (Bug git diff HEAD~1 failed on initial commits). Deferred-commit
* flows can instead validate the staged diff before the commit is created.
*
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
@ -159,15 +161,12 @@ function getChangedFilesFromLastCommit(basePath: string): string[] | null {
try {
const result = execFileSync(
"git",
["diff", "--name-only", "HEAD~1", "HEAD"],
["diff-tree", "--root", "--no-commit-id", "-r", "--name-only", "HEAD"],
{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
).trim();
return result ? result.split("\n").filter(Boolean) : [];
} catch (e) {
logWarning(
"safety",
`git diff failed in file-change-validator: ${(e as Error).message}`,
);
logWarning("safety", `git diff failed in file-change-validator: ${(e as Error).message}`);
return null;
}
}

View file

@ -26,7 +26,7 @@ export function createCheckpoint(
unitId: string,
): string | null {
try {
const sha = execFileSync("git", ["rev-parse", "HEAD"], {
const sha = execFileSync("git", ["rev-parse", "--verify", "HEAD"], {
cwd: basePath,
stdio: ["ignore", "pipe", "pipe"],
encoding: "utf-8",
@ -48,6 +48,17 @@ export function createCheckpoint(
return sha;
} catch (e) {
const stderr = (e as { stderr?: Buffer | string }).stderr;
const stderrText = Buffer.isBuffer(stderr)
? stderr.toString("utf-8")
: String(stderr ?? "");
if (
stderrText.includes("Needed a single revision") ||
stderrText.includes("unknown revision") ||
stderrText.includes("ambiguous argument 'HEAD'")
) {
return null;
}
logWarning("safety", `checkpoint creation failed: ${(e as Error).message}`);
return null;
}

View file

@ -114,12 +114,15 @@ export type {
FileWriteEvidence,
} from "./evidence-collector.js";
export {
clearEvidenceFromDisk,
getBashEvidence,
getEvidence,
getFilePaths,
loadEvidenceFromDisk,
recordToolCall,
recordToolResult,
resetEvidence,
saveEvidenceToDisk,
} from "./evidence-collector.js";
export type {
ClaimedEvidence,

View file

@ -53,6 +53,9 @@ const BACKLOG_HEADER =
export type SelfFeedbackSeverity = "critical" | "high" | "medium" | "low";
/**
* Context where a feedback entry occurred (milestone/slice/task/unit).
*/
export interface SelfFeedbackOccurredIn {
milestone?: string;
slice?: string;
@ -60,6 +63,9 @@ export interface SelfFeedbackOccurredIn {
unitType?: string;
}
/**
* A self-feedback entry reporting an anomaly or issue detected during auto-mode execution.
*/
export interface SelfFeedbackEntry {
kind: string;
severity: SelfFeedbackSeverity;
@ -105,6 +111,9 @@ export type ResolutionEvidence =
| { kind: "human-clear" }
| { kind: "promoted-to-requirement"; requirementId: string };
/**
* Persisted feedback entry with metadata: ID, timestamp, version, and resolution state.
*/
export interface PersistedSelfFeedbackEntry extends SelfFeedbackEntry {
id: string;
ts: string;
@ -120,6 +129,9 @@ export interface PersistedSelfFeedbackEntry extends SelfFeedbackEntry {
resolvedCriteriaMet?: string[];
}
/**
* Result of recording a self-feedback entry. Contains the persisted entry and blocking status.
*/
export interface RecordResult {
entry: PersistedSelfFeedbackEntry;
/** True when callers should treat the originating unit as blocked. */
@ -308,6 +320,9 @@ export function getBlockedEntries(
);
}
/**
* Input for marking a feedback entry as resolved with evidence and reasoning.
*/
export interface ResolutionInput {
reason: string;
evidence: ResolutionEvidence;
@ -407,6 +422,9 @@ function compareSemver(a: string, b: string): number {
*
* Returns the entries split by retry-eligibility for the dispatcher to act on.
*/
/**
* Result of triaging blocked entries by their version eligibility for retry.
*/
export interface BlockedTriage {
retry: PersistedSelfFeedbackEntry[];
stillBlocked: PersistedSelfFeedbackEntry[];

View file

@ -27,7 +27,8 @@ const activelyLoadedSkills = new Set<string>();
/**
* Capture the list of available skill names at dispatch time.
* Called before each unit starts.
*
* Called before each unit starts to establish the baseline for telemetry.
*/
export function captureAvailableSkills(): void {
const skillsDir = join(homedir(), ".agents", "skills");
@ -44,18 +45,19 @@ export function captureAvailableSkills(): void {
}
/**
* Record that a skill was actively loaded (its SKILL.md was read).
* Call this when the agent reads a SKILL.md file.
* Record that a skill was actively loaded during execution.
*
* @param skillName - Name of the skill file that was read.
*/
export function recordSkillRead(skillName: string): void {
activelyLoadedSkills.add(skillName);
}
/**
* Get the skill names for the current unit and clear state.
* Returns actively loaded skills if any, otherwise available skills.
* This gives the most useful signal: if the agent read specific skills,
* report those; otherwise report what was available.
* Retrieve captured skills for the current unit and reset state.
*
* Returns actively loaded skills if any were read, otherwise returns available skills.
* Clears all telemetry state after retrieval.
*/
export function getAndClearSkills(): string[] {
const result =
@ -68,7 +70,7 @@ export function getAndClearSkills(): string[] {
}
/**
* Reset all telemetry state. Called when auto-mode stops.
* Clear all telemetry state. Called when auto-mode stops.
*/
export function resetSkillTelemetry(): void {
availableSkills = [];
@ -76,8 +78,10 @@ export function resetSkillTelemetry(): void {
}
/**
* Get last-used timestamps for all skills from metrics data.
* Returns a Map from skill name to most recent ms timestamp.
* Extract last-used timestamp for each skill from unit metrics.
*
* @param units - Array of unit metrics with skill lists and timestamps.
* @returns Map from skill name to most recent finishedAt timestamp.
*/
export function getSkillLastUsed(
units: Array<{ finishedAt: number; skills?: string[] }>,
@ -96,8 +100,11 @@ export function getSkillLastUsed(
}
/**
* Detect stale skills those not used within the given threshold (in days).
* Returns skill names that should be deprioritized.
* Identify skills unused since a given threshold, including untracked installs.
*
* @param units - Unit metrics containing skill usage.
* @param thresholdDays - Days of inactivity to consider stale.
* @returns All installed skills exceeding the staleness threshold.
*/
export function detectStaleSkills(
units: Array<{ finishedAt: number; skills?: string[] }>,

View file

@ -37,6 +37,9 @@ export interface EligibleSlice {
* @param completedSliceIds Set of slice IDs that are already complete.
* @returns Array of eligible slice descriptors.
*/
/**
* Determine which slices are eligible to run based on explicit dependencies or positional order.
*/
export function getEligibleSlices(
slices: SliceInput[],
completedSliceIds: Set<string>,

View file

@ -72,6 +72,9 @@ let sliceState: SliceOrchestratorState | null = null;
/**
* Check whether slice-level parallel is currently active.
*/
/**
* Check if slice parallel execution is currently active.
*/
export function isSliceParallelActive(): boolean {
return sliceState?.active === true;
}
@ -79,6 +82,9 @@ export function isSliceParallelActive(): boolean {
/**
* Get current slice orchestrator state (read-only snapshot).
*/
/**
* Get the current slice orchestrator state, including active workers and cost tracking.
*/
export function getSliceOrchestratorState(): SliceOrchestratorState | null {
return sliceState;
}
@ -89,6 +95,9 @@ export function getSliceOrchestratorState(): SliceOrchestratorState | null {
* For each eligible slice: create a worktree, spawn `sf --mode json --print "/sf autonomous"`
* with env SF_SLICE_LOCK=<SID> + SF_MILESTONE_LOCK=<MID> + SF_PARALLEL_WORKER=1.
*/
/**
* Start parallel execution of eligible slices with worker processes and budget tracking.
*/
export async function startSliceParallel(
basePath: string,
milestoneId: string,
@ -201,6 +210,9 @@ export async function startSliceParallel(
/**
* Stop all slice-parallel workers and deactivate.
*/
/**
* Terminate all active slice workers and clean up their worktrees.
*/
export function stopSliceParallel(): void {
if (!sliceState) return;
@ -235,6 +247,9 @@ export function stopSliceParallel(): void {
/**
* Get aggregate cost across all slice workers.
*/
/**
* Calculate total cost across all active slice workers.
*/
export function getSliceAggregateCost(): number {
if (!sliceState) return 0;
let total = 0;
@ -247,6 +262,9 @@ export function getSliceAggregateCost(): number {
/**
* Check if budget ceiling has been exceeded.
*/
/**
* Check if total slice cost has met or exceeded the budget ceiling.
*/
export function isSliceBudgetExceeded(): boolean {
if (!sliceState?.budgetCeiling) return false;
return getSliceAggregateCost() >= sliceState.budgetCeiling;
@ -255,6 +273,9 @@ export function isSliceBudgetExceeded(): boolean {
/**
* Reset module state (for testing).
*/
/**
* Reset orchestrator state and clear all worker cleanup handlers.
*/
export function resetSliceOrchestrator(): void {
if (sliceState) {
for (const w of sliceState.workers.values()) {

View file

@ -0,0 +1,133 @@
/**
* skip-slice handler the core operation behind sf_skip_slice.
*
* Marks a slice as skipped and cascades the skip to every non-closed task in
* that slice. Without the task cascade the deep-check in
* executeCompleteMilestone reports pending tasks inside the skipped slice and
* blocks milestone completion (see #4375).
*
* This function performs DB writes only. The MCP wrapper in
* bootstrap/db-tools.ts handles state-cache invalidation and STATE.md rebuild.
*/
import {
getSlice,
getSliceTasks,
isDbAvailable,
transaction,
updateSliceStatus,
updateTaskStatus,
} from "../sf-db.js";
import { isClosedStatus } from "../status-guards.js";
/**
* Input parameters for {@link handleSkipSlice}.
*
* - `milestoneId` / `sliceId` identify the target slice.
* - `reason` is a free-form note surfaced in the MCP response; optional
* because the caller (e.g. rethink flow) may not have a structured reason.
*/
export interface SkipSliceParams {
milestoneId: string;
sliceId: string;
reason?: string;
}
/**
* Stable machine-readable error codes for {@link SkipSliceResult.error}.
* Keep in sync with the wrapper in bootstrap/db-tools.ts.
*/
export type SkipSliceErrorCode = "slice_not_found" | "already_complete";
/**
* Result of a {@link handleSkipSlice} call.
*
* - `tasksSkipped` count of tasks whose status was cascaded to "skipped".
* Zero is a valid success (slice had no non-closed tasks).
* - `wasAlreadySkipped` true when the slice was in "skipped" status on
* entry; callers can use this to distinguish first-skip from re-skip.
* - `error` / `errorCode` set together for recoverable validation failures
* (unknown slice, slice already complete). Both absent on success. DB
* errors propagate as thrown exceptions and should be caught by the caller.
*/
export interface SkipSliceResult {
milestoneId: string;
sliceId: string;
tasksSkipped: number;
wasAlreadySkipped: boolean;
reason?: string;
error?: string;
errorCode?: SkipSliceErrorCode;
}
/**
* Mark a slice as "skipped" and cascade the skip to every non-closed task in
* that slice. Runs as a single transaction so slice status and task statuses
* are always consistent.
*
* Behaviour summary:
* - Unknown slice returns {@link SkipSliceResult} with `error`.
* - Slice already complete/done returns `error` (cannot un-complete).
* - Slice already skipped still cascades leftover non-closed tasks
* (heals inconsistent historical state from projects that ran older
* versions before the #4375 cascade fix).
* - Tasks in closed status (complete/done/skipped) are never downgraded.
*/
export function handleSkipSlice(params: SkipSliceParams): SkipSliceResult {
const base: SkipSliceResult = {
milestoneId: params.milestoneId,
sliceId: params.sliceId,
tasksSkipped: 0,
wasAlreadySkipped: false,
reason: params.reason,
};
// Fail loudly on a closed DB so a `null` from getSlice() inside the
// transaction unambiguously means "slice not found", never "DB unavailable".
// The MCP wrapper in bootstrap/db-tools.ts runs ensureDbOpen() before calling
// this helper; this guard protects direct callers (tests, future code).
if (!isDbAvailable()) {
throw new Error("handleSkipSlice: SF database is not available");
}
// ── Guards + DB writes inside a single transaction (prevents TOCTOU) ────
let guardError: string | null = null;
let guardCode: SkipSliceErrorCode | null = null;
let wasAlreadySkipped = false;
let tasksSkipped = 0;
transaction(() => {
const slice = getSlice(params.milestoneId, params.sliceId);
if (!slice) {
guardError = `Slice ${params.sliceId} not found in milestone ${params.milestoneId}`;
guardCode = "slice_not_found";
return;
}
if (slice.status === "complete" || slice.status === "done") {
guardError = `Slice ${params.sliceId} is already complete — cannot skip.`;
guardCode = "already_complete";
return;
}
wasAlreadySkipped = slice.status === "skipped";
if (!wasAlreadySkipped) {
updateSliceStatus(params.milestoneId, params.sliceId, "skipped");
}
// Cascade: mark every non-closed task as skipped so milestone completion
// doesn't trip the deep-task guard (#4375). Closed tasks (complete/done/
// skipped) are left untouched — we never downgrade.
const tasks = getSliceTasks(params.milestoneId, params.sliceId);
for (const task of tasks) {
if (!isClosedStatus(task.status)) {
updateTaskStatus(params.milestoneId, params.sliceId, task.id, "skipped");
tasksSkipped++;
}
}
});
if (guardError) {
return { ...base, error: guardError, errorCode: guardCode ?? undefined };
}
return { ...base, tasksSkipped, wasAlreadySkipped };
}

View file

@ -345,6 +345,9 @@ function renderRiskHeatmap(
// ─── Dependencies View ───────────────────────────────────────────────────────
/**
* Render milestone/slice dependencies and critical path with data flow.
*/
export function renderDepsView(
data: VisualizerData,
th: Theme,
@ -506,6 +509,9 @@ function renderCriticalPath(
// ─── Metrics View ────────────────────────────────────────────────────────────
/**
* Render cost and usage metrics by phase, model, tier, and projections.
*/
export function renderMetricsView(
data: VisualizerData,
th: Theme,
@ -687,6 +693,9 @@ function renderCostProjections(
// ─── Timeline View (Gantt) ──────────────────────────────────────────────────
/**
* Render execution timeline as Gantt view (wide) or list view (narrow).
*/
export function renderTimelineView(
data: VisualizerData,
th: Theme,
@ -850,6 +859,9 @@ function formatTimeLabel(ts: number): string {
// ─── Agent View ──────────────────────────────────────────────────────────────
/**
* Render current agent status, completion progress, budget pressure, and recent units.
*/
export function renderAgentView(
data: VisualizerData,
th: Theme,
@ -976,6 +988,9 @@ export function renderAgentView(
// ─── Changelog View ──────────────────────────────────────────────────────────
/**
* Render completed slices with one-liners, files modified, decisions, and patterns.
*/
export function renderChangelogView(
data: VisualizerData,
th: Theme,
@ -1041,6 +1056,9 @@ export function renderChangelogView(
// ─── Export View ─────────────────────────────────────────────────────────────
/**
* Render export options (markdown, JSON, snapshot) with last export path.
*/
export function renderExportView(
_data: VisualizerData,
th: Theme,

View file

@ -43,6 +43,7 @@ function getWorktreeCompletions(prefix: string) {
return null;
}
/** Register a lazy-loaded worktree command alias via dynamic import. */
function registerLazyWorktreeAlias(
pi: ExtensionAPI,
name: "worktree" | "wt",
@ -60,6 +61,7 @@ function registerLazyWorktreeAlias(
});
}
/** Register /worktree and /wt commands with lazy loading via dynamic import. */
export function registerLazyWorktreeCommands(pi: ExtensionAPI): void {
registerLazyWorktreeAlias(
pi,

View file

@ -54,12 +54,18 @@ import {
*/
let originalCwd: string | null = null;
/** Get the original project root if currently in a worktree, or null. */
/**
* Get the original project root if currently in a worktree, or null.
* Used to restore context after `/worktree merge` or `/worktree return`.
*/
export function getWorktreeOriginalCwd(): string | null {
return originalCwd;
}
/** Get the name of the active worktree, or null if not in one. */
/**
* Get the name of the active worktree, or null if not in one.
* Extracts from .sf/worktrees/ path segment.
*/
export function getActiveWorktreeName(): string | null {
if (!originalCwd) return null;
const cwd = process.cwd();
@ -263,6 +269,10 @@ async function worktreeHandler(
}
}
/**
* Main handler for /worktree and /wt commands routes to subcommand handlers.
* Manages worktree state tracking (originalCwd) across chdir calls.
*/
export async function handleWorktreeCommand(
args: string,
ctx: ExtensionCommandContext,
@ -272,6 +282,7 @@ export async function handleWorktreeCommand(
await worktreeHandler(args, ctx, pi, alias);
}
/** Register /worktree and /wt commands with completion support. */
export function registerWorktreeCommand(pi: ExtensionAPI): void {
// Restore worktree state after /reload.
// The module-level originalCwd resets to null when extensions are re-loaded,

View file

@ -20,6 +20,10 @@ import { listWorktrees, type WorktreeInfo } from "./worktree-manager.js";
// ─── Types ─────────────────────────────────────────────────────────────────
/**
* Health and lifecycle status of a single worktree.
* Used for audits, health checks, and the `/worktree list` status display.
*/
export interface WorktreeHealthStatus {
/** The worktree info from worktree-manager */
worktree: WorktreeInfo;

View file

@ -0,0 +1,179 @@
import { existsSync, readFileSync, realpathSync, statSync } from "node:fs";
import { homedir } from "node:os";
import { join, resolve } from "node:path";
function sfHome(): string {
return process.env.SF_HOME || join(homedir(), ".sf");
}
export interface WorktreeSegment {
sfIdx: number;
afterWorktrees: number;
}
export function normalizeWorktreePathForCompare(path: string): string {
let normalized: string;
try {
normalized = realpathSync(path);
} catch {
normalized = resolve(path);
}
const slashed = normalized.replaceAll("\\", "/");
const trimmed = slashed.replace(/\/+$/, "");
return process.platform === "win32" ? (trimmed || "/").toLowerCase() : (trimmed || "/");
}
/**
* Find the SF worktree segment in both direct project layout and the
* symlink-resolved external-state layout used by ~/.sf/projects/<hash>.
*/
export function findWorktreeSegment(normalizedPath: string): WorktreeSegment | null {
const directMarker = "/.sf/worktrees/";
const directIdx = normalizedPath.indexOf(directMarker);
if (directIdx !== -1) {
return { sfIdx: directIdx, afterWorktrees: directIdx + directMarker.length };
}
const externalRe = /\/\.sf\/projects\/[^/]+\/worktrees\//;
const externalMatch = normalizedPath.match(externalRe);
if (externalMatch && externalMatch.index !== undefined) {
return {
sfIdx: externalMatch.index,
afterWorktrees: externalMatch.index + externalMatch[0].length,
};
}
return null;
}
export function isSfWorktreePath(path: string): boolean {
return findWorktreeSegment(path.replaceAll("\\", "/")) !== null;
}
/**
* Resolve the canonical project root for worktree operations.
*
* `originalBasePath` wins when available because session state already knows the
* root. `SF_PROJECT_ROOT` is the next strongest signal for worker processes.
* Otherwise, derive the root from direct `.sf/worktrees` paths, or recover it
* from the worktree `.git` file for symlink-resolved ~/.sf/project paths.
*/
export function resolveWorktreeProjectRoot(
basePath: string,
originalBasePath?: string | null,
): string {
const preferred =
originalBasePath?.trim() ||
process.env.SF_PROJECT_ROOT?.trim() ||
basePath;
return resolveProjectRootFromPath(preferred);
}
function resolveProjectRootFromPath(path: string): string {
const normalizedPath = path.replaceAll("\\", "/");
const segment = findWorktreeSegment(normalizedPath);
if (!segment) {
return resolveNearestBootstrappedSfRoot(path) ?? resolveGitWorkingTreeRoot(path) ?? path;
}
const sepChar = path.includes("\\") ? "\\" : "/";
const sfMarker = `${sepChar}.sf${sepChar}`;
const markerIdx = path.indexOf(sfMarker);
const candidate = markerIdx !== -1
? path.slice(0, markerIdx)
: path.slice(0, segment.sfIdx);
const sfHomeNorm = normalizeWorktreePathForCompare(sfHome());
const candidateSfPath = normalizeWorktreePathForCompare(join(candidate, ".sf"));
if (candidateSfPath === sfHomeNorm || candidateSfPath.startsWith(`${sfHomeNorm}/`)) {
const realRoot = resolveProjectRootFromGitFile(path);
return realRoot ?? path;
}
return candidate;
}
function resolveNearestBootstrappedSfRoot(path: string): string | null {
try {
let dir = existsSync(path) && !statSync(path).isDirectory()
? resolve(path, "..")
: path;
for (let i = 0; i < 30; i++) {
if (hasSfBootstrapArtifacts(join(dir, ".sf"))) return dir;
const gitPath = join(dir, ".git");
if (existsSync(gitPath)) return null;
const parent = resolve(dir, "..");
if (parent === dir) break;
dir = parent;
}
} catch {
// Non-fatal: callers fall back to git root resolution.
}
return null;
}
function hasSfBootstrapArtifacts(sfPath: string): boolean {
return existsSync(sfPath) &&
(existsSync(join(sfPath, "PREFERENCES.md")) ||
existsSync(join(sfPath, "preferences.md")) ||
existsSync(join(sfPath, "milestones")));
}
function resolveGitWorkingTreeRoot(path: string): string | null {
try {
let dir = existsSync(path) && !statSync(path).isDirectory()
? resolve(path, "..")
: path;
for (let i = 0; i < 30; i++) {
const gitPath = join(dir, ".git");
if (existsSync(gitPath)) return dir;
const parent = resolve(dir, "..");
if (parent === dir) break;
dir = parent;
}
} catch {
// Non-fatal: callers either keep the original path or fail closed.
}
return null;
}
function resolveProjectRootFromGitFile(worktreePath: string): string | null {
try {
let dir = worktreePath;
for (let i = 0; i < 30; i++) {
const gitPath = join(dir, ".git");
if (existsSync(gitPath)) {
const content = readFileSync(gitPath, "utf8").trim();
if (content.startsWith("gitdir: ")) {
const gitDir = resolve(dir, content.slice(8));
const dotGitDir = resolve(gitDir, "..", "..");
if (dotGitDir.endsWith(".git") || dotGitDir.endsWith(".git/") || dotGitDir.endsWith(".git\\")) {
return resolve(dotGitDir, "..");
}
const commonDirPath = join(gitDir, "commondir");
if (existsSync(commonDirPath)) {
const commonDir = readFileSync(commonDirPath, "utf8").trim();
const resolvedCommonDir = resolve(gitDir, commonDir);
return resolve(resolvedCommonDir, "..");
}
}
break;
}
const parent = resolve(dir, "..");
if (parent === dir) break;
dir = parent;
}
} catch {
// Non-fatal: callers either keep the original path or fail closed.
}
return null;
}

View file

@ -0,0 +1,35 @@
// SF worktree session state
let originalCwd: string | null = null;
export function getWorktreeOriginalCwd(): string | null {
return originalCwd;
}
export function setWorktreeOriginalCwd(cwd: string): void {
originalCwd = cwd;
}
export function clearWorktreeOriginalCwd(): void {
originalCwd = null;
}
export function ensureWorktreeOriginalCwdFromPath(cwd: string = process.cwd()): string | null {
if (originalCwd) return originalCwd;
const marker = `${/\\/.test(cwd) ? "\\" : "/"}.sf${/\\/.test(cwd) ? "\\" : "/"}worktrees${/\\/.test(cwd) ? "\\" : "/"}`;
const markerIdx = cwd.indexOf(marker);
if (markerIdx !== -1) {
originalCwd = cwd.slice(0, markerIdx);
}
return originalCwd;
}
export function getActiveWorktreeName(): string | null {
if (!originalCwd) return null;
const cwd = process.cwd();
const wtDir = `${originalCwd.replace(/[\\/]+$/, "")}/.sf/worktrees`.replaceAll("\\", "/");
const normalizedCwd = cwd.replaceAll("\\", "/");
if (!normalizedCwd.startsWith(`${wtDir}/`)) return null;
const rel = normalizedCwd.slice(wtDir.length + 1);
const name = rel.split("/")[0];
return name || null;
}