fix(sf): harden auto loops and skill sandbox

This commit is contained in:
Mikael Hugo 2026-05-02 19:46:36 +02:00
parent d742602454
commit 0c7c4eca5b
24 changed files with 1024 additions and 58 deletions

View file

@ -25,7 +25,7 @@
},
"scripts": {
"build": "tsc",
"test": "node --test dist/mcp-server.test.js"
"test": "vitest run packages/mcp-server/src --root ../.. --config vitest.config.ts"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.27.1",

View file

@ -41,6 +41,7 @@ import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
import { minimatch } from "minimatch";
import {
classifyCommand,
invalidateConfigCache,
@ -481,12 +482,218 @@ export function handleSessionStart(state: PermissionState, ctx: any): void {
}
}
// ============================================================================
// SKILL SANDBOX — allowed-tools enforcement
// ============================================================================
/** Active skill-declared allowed tool patterns for the current session. */
let activeSkillAllowedTools: readonly string[] | null = null;
/** Name of the skill that declared the active allowed-tools patterns. */
let activeSkillName: string | null = null;
/**
* Set the allowed tool patterns for the current session.
* Called when a skill with allowed-tools is activated.
*/
export function setAllowedTools(
patterns: readonly string[],
skillName?: string,
): void {
activeSkillAllowedTools = patterns.length > 0 ? [...patterns] : null;
activeSkillName = patterns.length > 0 ? (skillName ?? null) : null;
}
/**
* Clear the allowed tool patterns for the current session.
* Called when a skill is deactivated or at session end.
*/
export function clearAllowedTools(): void {
activeSkillAllowedTools = null;
activeSkillName = null;
}
/**
* Extract the command pattern from a Bash(...) allowed-tools entry.
*
* Purpose: skill sandbox matching and error messages need the command pattern
* without the outer tool syntax.
* Consumer: matchesAllowedToolPattern and blocked-command diagnostics.
*/
function extractBashAllowedPattern(pattern: string): string | null {
const trimmedPattern = pattern.trim();
const match = trimmedPattern.match(/^Bash\((.+)\)$/i);
if (!match) return null;
return match[1].trim();
}
/**
* Parse an allowed-tools pattern and check if a command matches.
*
* Purpose: enforce `allowed-tools: Bash(npm run:*)` as a command-prefix rule,
* where the colon separates the command prefix from its wildcard arguments.
* Consumer: handleBashToolCall before normal permission checks run.
*/
export function matchesAllowedToolPattern(
pattern: string,
command: string,
): boolean {
const commandPattern = extractBashAllowedPattern(pattern);
if (!commandPattern) return false;
const trimmedCommand = command.trim().replace(/\s+/g, " ");
if (commandPattern === "*") return true;
if (commandPattern.endsWith(":*")) {
const prefix = commandPattern.slice(0, -2).trim().replace(/\s+/g, " ");
return trimmedCommand === prefix || trimmedCommand.startsWith(`${prefix} `);
}
if (!/[*?[\]]/.test(commandPattern)) {
return trimmedCommand === commandPattern.trim().replace(/\s+/g, " ");
}
return minimatch(trimmedCommand, commandPattern);
}
function formatAllowedPatternList(patterns: readonly string[]): string {
const displayPatterns = patterns.map(
(pattern) => extractBashAllowedPattern(pattern) ?? pattern.trim(),
);
return JSON.stringify(displayPatterns);
}
function unquoteFrontmatterValue(value: string): string {
const trimmed = value.trim();
if (
(trimmed.startsWith('"') && trimmed.endsWith('"')) ||
(trimmed.startsWith("'") && trimmed.endsWith("'"))
) {
return trimmed.slice(1, -1).trim();
}
return trimmed;
}
function splitAllowedTools(value: string): string[] {
return value
.split(/\s*,\s*/)
.map((entry) => unquoteFrontmatterValue(entry))
.filter(Boolean);
}
function readSkillAllowedTools(
filePath: string,
fallbackName?: string,
): { skillName: string; patterns: string[] } | null {
let raw: string;
try {
raw = fs.readFileSync(filePath, "utf-8");
} catch {
return null;
}
const frontmatter = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/);
if (!frontmatter) return null;
const name =
frontmatter[1]
.match(/^name:\s*(.+)$/m)?.[1]
.trim()
.replace(/^["']|["']$/g, "") ||
fallbackName ||
path.basename(path.dirname(filePath));
const allowedTools = frontmatter[1].match(/^allowed-tools:\s*(.+)$/m)?.[1];
if (!allowedTools) return { skillName: name, patterns: [] };
return {
skillName: name,
patterns: splitAllowedTools(allowedTools),
};
}
function extractSkillBlocks(text: string): Array<{ name: string; location: string }> {
const blocks: Array<{ name: string; location: string }> = [];
const blockRe = /<skill\s+name="([^"]+)"\s+location="([^"]+)">/g;
let match: RegExpExecArray | null;
while ((match = blockRe.exec(text)) !== null) {
blocks.push({ name: match[1], location: match[2] });
}
return blocks;
}
/**
* Activate skill sandbox rules from rendered skill XML blocks.
*
* Purpose: connect real skill invocation paths (`/skill:name` expansion and
* the Skill tool result) to bash sandbox enforcement for the active turn.
* Consumer: sf-permissions before_agent_start and Skill tool_result hooks.
*/
export function activateAllowedToolsFromSkillText(text: string): void {
const blocks = extractSkillBlocks(text);
if (blocks.length === 0) {
clearAllowedTools();
return;
}
const names: string[] = [];
const patterns: string[] = [];
for (const block of blocks) {
const loaded = readSkillAllowedTools(block.location, block.name);
if (!loaded) continue;
names.push(loaded.skillName);
patterns.push(...loaded.patterns);
}
if (patterns.length > 0) {
setAllowedTools(patterns, names.join(", "));
} else {
clearAllowedTools();
}
}
/**
* Check if a bash command is allowed by the active skill's allowed-tools.
* Returns a block result if the command is not allowed, undefined if allowed.
*/
function checkSkillSandbox(
command: string,
): { block: true; reason: string } | undefined {
if (activeSkillAllowedTools === null) return undefined;
if (activeSkillAllowedTools.length === 0) return undefined;
const trimmedCommand = command.trim();
const matched = activeSkillAllowedTools.some((pattern) =>
matchesAllowedToolPattern(pattern, trimmedCommand),
);
if (matched) return undefined;
const skillLabel = activeSkillName ? `Allowed by: ${activeSkillName}` : "";
return {
block: true,
reason:
`Command "${trimmedCommand}" blocked by skill sandbox. ` +
`Allowed patterns: ${formatAllowedPatternList(activeSkillAllowedTools)}` +
(skillLabel ? `\n${skillLabel}` : ""),
};
}
/** Handle bash tool_call - check permission and prompt if needed */
export async function handleBashToolCall(
state: PermissionState,
command: string,
ctx: any,
): Promise<{ block: true; reason: string } | undefined> {
// Skill sandbox check — applied before permission level checks
const sandboxResult = checkSkillSandbox(command);
if (sandboxResult) {
// Log the block for observability
const logMsg = `Blocked "${command.trim()}" — no pattern matched ${formatAllowedPatternList(activeSkillAllowedTools ?? [])}`;
if (ctx?.logInfo) {
ctx.logInfo("skill-sandbox", logMsg);
} else if (ctx?.ui?.notify) {
ctx.ui.notify(`skill-sandbox: ${logMsg}`, "warning");
}
return sandboxResult;
}
if (state.currentLevel === "bypassed") return undefined;
const classification = classifyCommand(command);
@ -638,9 +845,18 @@ export default function (pi: ExtensionAPI) {
});
pi.on("session_start", async (_event, ctx) => {
clearAllowedTools();
handleSessionStart(state, ctx);
});
pi.on("before_agent_start", async (event) => {
activateAllowedToolsFromSkillText(event.prompt);
});
pi.on("agent_end", async () => {
clearAllowedTools();
});
pi.on("tool_call", async (event, ctx) => {
if (event.toolName === "bash") {
return handleBashToolCall(state, event.input.command as string, ctx);
@ -657,4 +873,14 @@ export default function (pi: ExtensionAPI) {
return undefined;
});
pi.on("tool_result", async (event) => {
if (event.toolName !== "Skill" || event.isError) return undefined;
const text = event.content
.filter((part) => part.type === "text")
.map((part) => part.text)
.join("\n");
activateAllowedToolsFromSkillText(text);
return undefined;
});
}

View file

@ -0,0 +1,156 @@
import assert from "node:assert/strict";
import { test } from "vitest";
import {
clearAllowedTools,
createInitialState,
handleBashToolCall,
matchesAllowedToolPattern,
setAllowedTools,
} from "../index.ts";
test("matchesAllowedToolPattern allows exact match", () => {
assert.equal(matchesAllowedToolPattern("Bash(pwd)", "pwd"), true);
assert.equal(matchesAllowedToolPattern("Bash(pwd)", "pwdx"), false);
});
test("matchesAllowedToolPattern allows prefix wildcard", () => {
assert.equal(matchesAllowedToolPattern("Bash(npm run:*)", "npm run build"), true);
assert.equal(matchesAllowedToolPattern("Bash(npm run:*)", "npm run test --watch"), true);
assert.equal(matchesAllowedToolPattern("Bash(npm run:*)", "npm install"), false);
assert.equal(matchesAllowedToolPattern("Bash(agent-browser:*)", "agent-browser open https://example.com"), true);
});
test("matchesAllowedToolPattern allows bare wildcard", () => {
assert.equal(matchesAllowedToolPattern("Bash(*)", "anything here"), true);
});
test("matchesAllowedToolPattern allows glob with minimatch", () => {
assert.equal(matchesAllowedToolPattern("Bash(npm run *)", "npm run build"), true);
assert.equal(matchesAllowedToolPattern("Bash(npm run *)", "npm run"), false);
});
test("matchesAllowedToolPattern is case-insensitive for Bash wrapper", () => {
assert.equal(matchesAllowedToolPattern("bash(pwd)", "pwd"), true);
assert.equal(matchesAllowedToolPattern("BASH(pwd)", "pwd"), true);
});
test("handleBashToolCall blocks non-matching command when allowed-tools active", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
setAllowedTools(["Bash(npm run:*)"], "build-skill");
try {
const blocked = await handleBashToolCall(state, "rm -rf /", {});
assert.equal(blocked?.block, true);
assert.match(blocked?.reason ?? "", /Command "rm -rf \/" blocked by skill sandbox/);
assert.match(blocked?.reason ?? "", /Allowed patterns: \["npm run:\*"\]/);
assert.match(blocked?.reason ?? "", /Allowed by: build-skill/);
} finally {
clearAllowedTools();
}
});
test("handleBashToolCall allows matching command when allowed-tools active", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
setAllowedTools(["Bash(npm run:*)"], "build-skill");
try {
const result = await handleBashToolCall(state, "npm run build", {});
assert.equal(result, undefined);
} finally {
clearAllowedTools();
}
});
test("handleBashToolCall allows any command when no allowed-tools set", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
clearAllowedTools();
const result = await handleBashToolCall(state, "rm -rf /", {});
assert.equal(result, undefined);
});
test("handleBashToolCall allows command matching any of multiple patterns", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
setAllowedTools(["Bash(npm run:*)", "Bash(pwd)"], "multi-skill");
try {
assert.equal(await handleBashToolCall(state, "npm run test", {}), undefined);
assert.equal(await handleBashToolCall(state, "pwd", {}), undefined);
const blocked = await handleBashToolCall(state, "ls -la", {});
assert.equal(blocked?.block, true);
} finally {
clearAllowedTools();
}
});
test("handleBashToolCall logs block via logInfo when available", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
setAllowedTools(["Bash(npm run:*)"], "build-skill");
const logs: Array<{ tag: string; msg: string }> = [];
const ctx = {
logInfo: (tag: string, msg: string) => logs.push({ tag, msg }),
};
try {
await handleBashToolCall(state, "rm -rf /", ctx);
assert.equal(logs.length, 1);
assert.equal(logs[0].tag, "skill-sandbox");
assert.match(logs[0].msg, /Blocked "rm -rf \/" — no pattern matched/);
} finally {
clearAllowedTools();
}
});
test("handleBashToolCall falls back to ui.notify when logInfo unavailable", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
setAllowedTools(["Bash(npm run:*)"], "build-skill");
const notifications: Array<{ msg: string; level: string }> = [];
const ctx = {
ui: {
notify: (msg: string, level: string) => notifications.push({ msg, level }),
},
};
try {
await handleBashToolCall(state, "rm -rf /", ctx);
assert.equal(notifications.length, 1);
assert.match(notifications[0].msg, /skill-sandbox: Blocked "rm -rf \/" — no pattern matched/);
assert.equal(notifications[0].level, "warning");
} finally {
clearAllowedTools();
}
});
test("handleBashToolCall does not log when no sandbox active", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
clearAllowedTools();
const logs: Array<{ tag: string; msg: string }> = [];
const ctx = {
logInfo: (tag: string, msg: string) => logs.push({ tag, msg }),
};
await handleBashToolCall(state, "rm -rf /", ctx);
assert.equal(logs.length, 0);
});
test("setAllowedTools with empty array clears state", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
setAllowedTools(["Bash(pwd)"], "test-skill");
setAllowedTools([], "test-skill");
const result = await handleBashToolCall(state, "rm -rf /", {});
assert.equal(result, undefined);
clearAllowedTools();
});

View file

@ -47,7 +47,7 @@ import { getRtkSessionSavings } from "../shared/rtk-session-stats.js";
import { deactivateSF } from "../shared/sf-phase-state.js";
import { clearActivityLogState } from "./activity-log.js";
import { atomicWriteSync } from "./atomic-write.js";
import { AutoSession } from "./auto/session.js";
import { AutoSession, getAutoSession } from "./auto/session.js";
// import { startSliceParallel } from "./slice-parallel-orchestrator.js"; (decoy for legacy regex tests)
import {
getBudgetAlertLevel,
@ -235,7 +235,7 @@ export {
//
// Tests in auto-session-encapsulation.test.ts enforce this invariant.
// ─────────────────────────────────────────────────────────────────────────────
const s = new AutoSession();
const s = getAutoSession();
/** Throttle STATE.md rebuilds — at most once per 30 seconds */
const _STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
@ -515,6 +515,28 @@ export function setCurrentDispatchedModelId(
s.currentDispatchedModelId = model ? `${model.provider}/${model.id}` : null;
}
/**
* Mark the current research unit as terminal after saving its RESEARCH artifact.
*
* Purpose: prevent a research unit that already produced its durable artifact
* from drifting into planner tools before the orchestrator dispatches planning.
* Consumer: register-hooks tool_result handling for sf_summary_save.
*/
export function markResearchTerminalTransition(): void {
getAutoSession().researchTerminalTransition = true;
}
/**
* Return whether the current unit has already crossed its research terminal transition.
*
* Purpose: planning-tool guards can reject post-summary planning calls without
* reading runtime files or duplicating unit state.
* Consumer: register-hooks tool_call enforcement for research units.
*/
export function hasResearchTerminalTransition(): boolean {
return getAutoSession().researchTerminalTransition;
}
// Tool tracking — delegates to auto-tool-tracking.ts
export function markToolStart(toolCallId: string, toolName?: string): void {
_markToolStart(toolCallId, s.active, toolName);

View file

@ -1936,6 +1936,7 @@ export async function runUnitPhase(
// unit in the same Node process (see workflow-logger.ts module header).
_resetLogs();
s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() };
s.researchTerminalTransition = false;
s.lastGitActionFailure = null;
s.lastGitActionStatus = null;
setCurrentPhase(unitType);

View file

@ -17,7 +17,9 @@ import type { AgentEndEvent, ErrorContext, UnitResult } from "./types.js";
// A single module-level resolve function scoped to the current unit execution.
// No queue — if an agent_end arrives with no pending resolver, it is dropped
// (logged as warning). This is simpler and safer than the previous session-
// scoped pendingResolve + pendingAgentEndQueue pattern.
// scoped pendingResolve + pendingAgentEndQueue pattern. Late duplicate
// agent_end events are ignored because the first event already resolved the
// unit and a stale duplicate must not trip the idle watchdog.
let _currentResolve: ((result: UnitResult) => void) | null = null;
let _sessionSwitchInFlight = false;
@ -45,9 +47,10 @@ export function _clearCurrentResolve(): void {
* in-flight unit promise. One-shot: the resolver is nulled before calling
* to prevent double-resolution from model fallback retries.
*
* If called when no resolver is registered (event arrived between loop
* iterations or during session switch), throws so the bug surfaces immediately
* instead of silently dropping the event and causing the auto-loop to hang.
* If called when no resolver is registered, the event is stale relative to the
* current unit lifecycle and is ignored. runUnit registers the resolver before
* dispatching the turn, so a no-pending event is either a duplicate or a late
* event from a previous session.
*/
export function resolveAgentEnd(event: AgentEndEvent): void {
if (_sessionSwitchInFlight) {
@ -60,12 +63,7 @@ export function resolveAgentEnd(event: AgentEndEvent): void {
_currentResolve = null;
r({ status: "completed", event });
} else {
// Throw instead of silent drop so the ordering bug surfaces immediately
// rather than via a timeout in cold-cache runs (#race-condition-silent-event-drop).
throw new Error(
"resolveAgentEnd called with no pending unit — agent_end arrived before _setCurrentResolve. " +
"Check event ordering: resolveAgentEnd must be called after runUnit sets the resolver.",
);
debugLog("resolveAgentEnd", { status: "ignored-no-pending-resolve" });
}
}

View file

@ -76,6 +76,23 @@ export const STUB_RECOVERY_THRESHOLD = 2;
export const MAX_LIFETIME_DISPATCHES = 6;
export const NEW_SESSION_TIMEOUT_MS = 120_000;
// ─── Singleton ───────────────────────────────────────────────────────────────
let _autoSessionInstance: AutoSession | null = null;
/** Get or create the singleton AutoSession instance. */
export function getAutoSession(): AutoSession {
if (!_autoSessionInstance) {
_autoSessionInstance = new AutoSession();
}
return _autoSessionInstance;
}
/** Reset the singleton instance (used in tests). */
export function resetAutoSession(): void {
_autoSessionInstance = null;
}
// ─── AutoSession ─────────────────────────────────────────────────────────────
export class AutoSession {
@ -241,6 +258,15 @@ export class AutoSession {
// the milestone finishes (or resquash runs).
milestoneStartShas: Map<string, string> = new Map();
// ── Research unit terminal transition ──────────────────────────────────
/**
* Set to true when a research unit (research-slice/research-milestone)
* successfully saves its RESEARCH artifact via sf_summary_save.
* Subsequent planning tool calls are blocked to prevent post-artifact drift
* where the agent continues into milestone/slice/task planning.
*/
researchTerminalTransition = false;
// ── Signal handler ───────────────────────────────────────────────────────
sigtermHandler: (() => void) | null = null;
@ -364,6 +390,9 @@ export class AutoSession {
this.milestoneStartShas = new Map();
// Research terminal transition
this.researchTerminalTransition = false;
// Signal handler
this.sigtermHandler = null;

View file

@ -10,10 +10,12 @@ import { formatTokenCount } from "../../shared/format-utils.js";
import { saveActivityLog } from "../activity-log.js";
import {
getAutoDashboardData,
hasResearchTerminalTransition,
isAutoActive,
isAutoPaused,
markToolEnd,
markToolStart,
markResearchTerminalTransition,
recordToolInvocationError,
} from "../auto.js";
import {
@ -481,6 +483,41 @@ export function registerHooks(
return { block: true, reason: loopCheck.reason };
}
// ── Research unit terminal transition enforcement ─────────────────────
// After a research unit (research-slice/research-milestone) successfully
// saves its RESEARCH artifact via sf_summary_save, the tool returns
// terminal_transition: true. We track this and block subsequent planning
// tool calls to prevent post-artifact drift (e.g. calling sf_plan_milestone
// after research is complete). This addresses sf-moocx6m5-ij630a.
if (isAutoActive()) {
const dash = getAutoDashboardData();
const currentUnit = dash.currentUnit;
if (
currentUnit &&
(currentUnit.type === "research-slice" ||
currentUnit.type === "research-milestone")
) {
if (hasResearchTerminalTransition()) {
const planningTools = new Set([
"sf_plan_milestone",
"sf_plan_slice",
"sf_plan_task",
"sf_milestone_generate_id",
"sf_replan_slice",
"sf_reassess_roadmap",
]);
if (planningTools.has(event.toolName)) {
return {
block: true,
reason:
`Research unit terminal transition: ${currentUnit.type} ${currentUnit.id} has already completed its RESEARCH artifact. ` +
`Planning tools (${event.toolName}) are blocked. The orchestrator will dispatch planner units after research.`,
};
}
}
}
}
// ── Discussion gate enforcement: track pending gate questions ─────────
// Only gate-shaped ask_user_questions calls should block execution.
// The gate stays pending until the user selects the approval option.
@ -641,6 +678,17 @@ export function registerHooks(
pi.on("tool_result", async (event) => {
if (isAutoActive()) {
if (
event.toolName === "sf_summary_save" &&
event.details &&
typeof event.details === "object" &&
(event.details as { terminal_transition?: unknown })
.terminal_transition === true &&
(event.details as { unit_type?: unknown }).unit_type === "research"
) {
markResearchTerminalTransition();
}
const steer = observeMemorySleeperToolResult(event);
if (steer) {
pi.sendMessage(

View file

@ -12,7 +12,6 @@ const TOP_LEVEL_SUBCOMMANDS = [
cmd: "autonomous",
desc: "Autonomous mode — research, plan, execute, commit, repeat",
},
{ cmd: "auto", desc: "Alias for /sf autonomous" },
{ cmd: "stop", desc: "Stop autonomous mode gracefully" },
{
cmd: "pause",

View file

@ -27,7 +27,7 @@ type CompletionMap = Record<string, readonly SfCommandDefinition[]>;
* Comprehensive description of all available SF commands for help text.
*/
export const SF_COMMAND_DESCRIPTION =
"SF — Singularity Forge: /sf help|start|templates|next|autonomous|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|todo|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan|scaffold|eval-review";
"SF — Singularity Forge: /sf help|start|templates|next|autonomous|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|todo|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan|scaffold|eval-review";
/**
* Top-level SF subcommands with descriptions.
@ -39,10 +39,6 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly SfCommandDefinition[] = [
cmd: "autonomous",
desc: "Autonomous mode — continuous loop, never asks user (self-resolves or stops with blocker)",
},
{
cmd: "auto",
desc: "Auto mode — continuous loop, can ask when blocked",
},
{ cmd: "stop", desc: "Stop autonomous mode gracefully" },
{
cmd: "pause",

View file

@ -34,7 +34,6 @@ export function showHelp(ctx: ExtensionCommandContext, args = ""): void {
" /sf start <tpl> Start a workflow template",
" /sf Run next unit (same as /sf next)",
" /sf autonomous Run all queued product units continuously",
" /sf auto Alias for /sf autonomous",
" /sf pause Pause autonomous mode",
" /sf stop Stop autonomous mode gracefully",
"",
@ -71,7 +70,6 @@ export function showHelp(ctx: ExtensionCommandContext, args = ""): void {
" /sf Run next unit in step mode (same as /sf next)",
" /sf next Execute next task, then pause [--dry-run] [--verbose]",
" /sf autonomous Run all queued product units continuously [--verbose]",
" /sf auto Alias for /sf autonomous",
" /sf stop Stop autonomous mode gracefully",
" /sf pause Pause autonomous mode (preserves state, /sf autonomous to resume)",
" /sf discuss Start guided milestone/slice discussion",

View file

@ -210,6 +210,7 @@ interface LegacySkillFrontmatter {
name?: string;
description?: string;
'disable-model-invocation'?: boolean;
'allowed-tools'?: string;
[key: string]: unknown;
}
@ -248,9 +249,17 @@ function loadFromLegacySkill(
return { component: null, diagnostics };
}
const allowedTools = frontmatter['allowed-tools']
? String(frontmatter['allowed-tools'])
.split(',')
.map((s) => s.trim())
.filter(Boolean)
: undefined;
const spec: SkillSpec = {
prompt: 'SKILL.md',
disableModelInvocation: frontmatter['disable-model-invocation'] === true,
allowedTools,
};
const id = computeComponentId(name);

View file

@ -63,6 +63,9 @@ export interface SkillSpec {
/** If true, skill is excluded from LLM system prompt (invoke-only). */
disableModelInvocation?: boolean;
/** Allowed tool patterns declared by the skill (e.g. "Bash(npm run:*)"). */
allowedTools?: string[];
}
// ============================================================================

View file

@ -62,6 +62,86 @@ function invalidateCaches(): void {
clearParseCache();
}
/**
* Render a model-provided list entry without corrupting ordered lists.
*
* Purpose: keep generated plan markdown lint-clean when planners provide
* numbered success criteria such as `1. Build the contract`.
* Consumer: slice plan renderers that turn DB planning fields into Markdown.
*/
function renderListEntry(entry: string): string {
const trimmed = entry.trim();
const orderedBullet = trimmed.match(/^[-*+]\s+(\d+)[.)]\s+(.+)$/);
if (orderedBullet) {
return `${orderedBullet[1]}. ${orderedBullet[2].trim()}`;
}
const ordered = trimmed.match(/^(\d+)[.)]\s+(.+)$/);
if (ordered) {
return `${ordered[1]}. ${ordered[2].trim()}`;
}
if (/^[-*+]\s+\S/.test(trimmed)) {
return trimmed;
}
return `- ${trimmed}`;
}
/**
* Surround ATX headings in model-provided markdown with blank lines.
*
* Purpose: generated artifacts pass the runtime content validator even when
* LLM-authored task descriptions contain `### Step` subsections.
* Consumer: slice and task plan renderers before writing Markdown artifacts.
*/
function normalizeMarkdownBlockSpacing(text: string): string {
const sourceLines = text.trim().replace(/\r\n/g, "\n").split("\n");
const output: string[] = [];
let inFence = false;
for (let i = 0; i < sourceLines.length; i++) {
const line = sourceLines[i];
const trimmed = line.trim();
const fence = /^(```|~~~)/.test(trimmed);
const heading = !inFence && /^#{1,6}\s+\S/.test(trimmed);
if (heading && output.length > 0 && output[output.length - 1]?.trim()) {
output.push("");
}
output.push(line);
if (fence) {
inFence = !inFence;
}
const next = sourceLines[i + 1];
if (heading && next !== undefined && next.trim()) {
output.push("");
}
}
return output.join("\n").trim();
}
/**
* Append model-provided markdown as an indented child block.
*
* Purpose: prevent task description subsections from escaping into top-level
* slice-plan structure while preserving useful step detail for executors.
* Consumer: the `## Tasks` section of slice plans.
*/
function appendIndentedMarkdownBlock(
lines: string[],
text: string,
indent = " ",
): void {
for (const line of normalizeMarkdownBlockSpacing(text).split("\n")) {
lines.push(line.trim() ? `${indent}${line}` : "");
}
}
/**
* Load artifact content from DB first, falling back to reading from disk.
* On disk fallback, stores the content in the artifacts table for future use.
@ -284,7 +364,7 @@ function renderTaskPlanMarkdown(
lines.push("");
if (task.description.trim()) {
lines.push(task.description.trim());
lines.push(normalizeMarkdownBlockSpacing(task.description));
lines.push("");
}
@ -375,7 +455,7 @@ function renderSlicePlanMarkdown(
.split(/\n+/)
.map((entry) => entry.trim())
.filter(Boolean)) {
lines.push(line.startsWith("-") ? line : `- ${line}`);
lines.push(renderListEntry(line));
}
} else {
lines.push("- Complete the planned slice outcomes.");
@ -529,7 +609,7 @@ function renderSlicePlanMarkdown(
`- [${done}] **${task.id}: ${task.title || task.id}**${estimate}`,
);
if (task.description.trim()) {
lines.push(` ${task.description.trim()}`);
appendIndentedMarkdownBlock(lines, task.description);
}
if (task.files.length > 0) {
lines.push(

View file

@ -143,7 +143,6 @@ Templates showing the expected format for each artifact type are in:
- `/sf` - contextual wizard
- `/sf autonomous` - auto-execute (fresh context per task)
- `/sf auto` - alias for `/sf autonomous`
- `/sf stop` - stop auto-mode
- `/sf status` - progress dashboard overlay
- `/sf queue` - queue future milestones (safe while auto-mode is running)

View file

@ -0,0 +1,100 @@
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test } from "vitest";
import {
activateAllowedToolsFromSkillText,
clearAllowedTools,
createInitialState,
handleBashToolCall,
matchesAllowedToolPattern,
setAllowedTools,
} from "../../sf-permissions/index.ts";
test("allowed-tools sandbox matches Bash prefix patterns against real commands", () => {
assert.equal(matchesAllowedToolPattern("Bash(npm run:*)", "npm run build"), true);
assert.equal(
matchesAllowedToolPattern("Bash(agent-browser:*)", "agent-browser open https://example.com"),
true,
);
assert.equal(matchesAllowedToolPattern("Bash(pwd)", "pwd"), true);
assert.equal(matchesAllowedToolPattern("Bash(npm run:*)", "npm install"), false);
});
test("allowed-tools sandbox blocks non-matching bash before permission prompts", async () => {
const state = createInitialState();
state.currentLevel = "bypassed";
setAllowedTools(["Bash(npm run:*)"], "build-skill");
try {
assert.equal(
await handleBashToolCall(state, "npm run build", {}),
undefined,
"matching command is allowed even when permission mode is bypassed",
);
const blocked = await handleBashToolCall(state, "rm -rf /", {
ui: { notify: () => undefined },
});
assert.equal(blocked?.block, true);
assert.match(
blocked?.reason ?? "",
/Command "rm -rf \/" blocked by skill sandbox/,
);
assert.match(blocked?.reason ?? "", /Allowed patterns: \["npm run:\*"\]/);
assert.match(blocked?.reason ?? "", /Allowed by: build-skill/);
} finally {
clearAllowedTools();
}
});
test("allowed-tools sandbox activates from real skill block location", async () => {
const dir = mkdtempSync(join(tmpdir(), "sf-allowed-tools-"));
const skillDir = join(dir, "agent-browser");
const skillPath = join(skillDir, "SKILL.md");
mkdirSync(skillDir, { recursive: true });
writeFileSync(
skillPath,
[
"---",
"name: agent-browser",
"description: Browser automation.",
"allowed-tools: Bash(npx agent-browser:*), Bash(agent-browser:*)",
"---",
"",
"# agent-browser",
"",
].join("\n"),
"utf-8",
);
const state = createInitialState();
state.currentLevel = "high";
try {
activateAllowedToolsFromSkillText(
`<skill name="agent-browser" location="${skillPath}">\nUse browser.\n</skill>`,
);
assert.equal(
await handleBashToolCall(
state,
"npx agent-browser open https://example.com",
{},
),
undefined,
"declared agent-browser command is allowed",
);
const blocked = await handleBashToolCall(state, "npm run build", {});
assert.equal(blocked?.block, true);
assert.match(
blocked?.reason ?? "",
/Allowed patterns: \["npx agent-browser:\*","agent-browser:\*"\]/,
);
} finally {
clearAllowedTools();
rmSync(dir, { recursive: true, force: true });
}
});

View file

@ -123,19 +123,16 @@ test("resolveAgentEnd resolves a pending runUnit promise", async () => {
assert.deepEqual(result.event, event);
});
test("resolveAgentEnd throws when no promise is pending (cold-cache ordering bug)", () => {
test("resolveAgentEnd ignores stale event when no promise is pending", () => {
_resetPendingResolve();
// Must throw so the ordering bug surfaces immediately, not via timeout.
// Previously this silently dropped events (#race-condition-silent-event-drop).
assert.throws(
assert.doesNotThrow(
() => resolveAgentEnd(makeEvent()),
/no pending unit/,
"resolveAgentEnd must throw when called before _setCurrentResolve",
"stale agent_end must not trip idle watchdog error handling",
);
});
test("double resolveAgentEnd only resolves once (second throws)", async () => {
test("double resolveAgentEnd only resolves once and ignores duplicate", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
@ -151,11 +148,10 @@ test("double resolveAgentEnd only resolves once (second throws)", async () => {
// First resolve — should work
resolveAgentEnd(event1);
// Second resolve — must throw (no pending resolver after first resolution)
assert.throws(
// Second resolve — should be ignored after the resolver is consumed
assert.doesNotThrow(
() => resolveAgentEnd(event2),
/no pending unit/,
"second resolveAgentEnd must throw after first consumed the resolver",
"duplicate agent_end after completion must be ignored",
);
const result = await resultPromise;

View file

@ -145,15 +145,14 @@ describe("workflow catalog registration", () => {
assert.ok(labels.includes("model"), "should include model completion");
});
it("autonomous appears in TOP_LEVEL_SUBCOMMANDS and auto remains an alias", () => {
it("autonomous appears in TOP_LEVEL_SUBCOMMANDS and auto remains hidden", () => {
const autonomous = TOP_LEVEL_SUBCOMMANDS.find(
(c) => c.cmd === "autonomous",
);
const auto = TOP_LEVEL_SUBCOMMANDS.find((c) => c.cmd === "auto");
assert.ok(autonomous, "autonomous should be in TOP_LEVEL_SUBCOMMANDS");
assert.match(autonomous!.desc, /Autonomous mode/i);
assert.ok(auto, "auto alias should remain in TOP_LEVEL_SUBCOMMANDS");
assert.match(auto!.desc, /Auto mode/i);
assert.equal(auto, undefined, "auto alias should remain hidden from listings");
});
it("getSfArgumentCompletions supports autonomous flags", () => {

View file

@ -708,20 +708,17 @@ describe("Custom engine loop integration", () => {
);
});
// ── Cold-cache ordering: resolveAgentEnd before _setCurrentResolve must throw ──
// Previously, resolveAgentEnd silently dropped events when called before
// _setCurrentResolve (cold vitest cache exposes the ordering window).
// The fix makes it throw so the bug surfaces immediately instead of via
// timeout (#race-condition-silent-event-drop).
it("resolveAgentEnd before _setCurrentResolve throws (cold-cache ordering)", async () => {
// ── Cold-cache ordering: stale agent_end before _setCurrentResolve ──
// Late/duplicate agent_end events can arrive between unit lifecycles. They
// must not throw into watchdog handlers after the current unit is already
// resolved or paused.
it("resolveAgentEnd before _setCurrentResolve is ignored as stale", async () => {
_resetPendingResolve();
// Simulate the cold-cache race: resolveAgentEnd fires before runUnit registers
// its resolver. Must throw so the bug is caught immediately.
assert.throws(
// Simulate a stale event before runUnit registers its resolver.
assert.doesNotThrow(
() => resolveAgentEnd({ messages: [{ role: "assistant" }] }),
/no pending unit/,
"resolveAgentEnd must throw when no resolver is registered yet",
"stale agent_end with no resolver should not break auto-mode",
);
});
});

View file

@ -30,6 +30,7 @@ import {
openDatabase,
} from "../sf-db.ts";
import { invalidateStateCache } from "../state.ts";
import { validateContent } from "../safety/content-validator.ts";
// ═══════════════════════════════════════════════════════════════════════════
// Helpers
@ -596,7 +597,7 @@ test("── markdown-renderer: renderPlanFromDb creates parse-compatible slice
planning: {
goal: "Render slice plans from DB state.",
successCriteria:
"- Slice plan stays parse-compatible\n- Task plan files are regenerated",
"1. Slice plan stays parse-compatible\n2. Task plan files are regenerated",
proofLevel: "integration",
integrationClosure: "Wires DB planning rows to markdown artifacts.",
observabilityImpact:
@ -610,7 +611,8 @@ test("── markdown-renderer: renderPlanFromDb creates parse-compatible slice
title: "Render slice plan",
status: "pending",
planning: {
description: "Implement the DB-backed slice plan renderer.",
description:
"Implement the DB-backed slice plan renderer.\n\n### 1. Add regression coverage\nWrite the failing contract.\n\n### 2. Patch rendering\nKeep generated markdown lint-clean.",
estimate: "45m",
files: ["src/resources/extensions/sf/markdown-renderer.ts"],
verify: "node --test markdown-renderer.test.ts",
@ -654,6 +656,27 @@ test("── markdown-renderer: renderPlanFromDb creates parse-compatible slice
);
const planContent = fs.readFileSync(rendered.planPath, "utf-8");
assert.ok(
planContent.includes("1. Slice plan stays parse-compatible"),
"ordered must-have keeps ordered-list syntax",
);
assert.ok(
!planContent.includes("- 1. Slice plan stays parse-compatible"),
"ordered must-have is not wrapped in a bullet",
);
assert.ok(
planContent.includes(
" ### 1. Add regression coverage\n\n Write the failing contract.",
),
"multi-line task description is indented and blank-lined in slice plan",
);
assert.deepStrictEqual(
validateContent("plan-slice", rendered.planPath).filter((v) =>
/Markdown lint MD0(?:22|29)\//.test(v.reason),
),
[],
"rendered slice plan has no heading/list lint warnings",
);
clearAllCaches();
const parsedPlan = parsePlan(planContent);
assert.strictEqual(
@ -719,11 +742,24 @@ test("── markdown-renderer: renderPlanFromDb creates parse-compatible slice
"T01-PLAN.md",
);
const taskPlanContent = fs.readFileSync(taskPlanPath, "utf-8");
assert.ok(
taskPlanContent.includes(
"### 1. Add regression coverage\n\nWrite the failing contract.",
),
"task plan task-step headings are surrounded by blank lines",
);
assert.deepStrictEqual(
validateContent("plan-task", taskPlanPath).filter((v) =>
v.reason.includes("Markdown lint MD022"),
),
[],
"rendered task plan has no blanks-around-headings lint warnings",
);
const taskPlanFile = parseTaskPlanFile(taskPlanContent);
assert.strictEqual(
taskPlanFile.frontmatter.estimated_steps,
1,
"task plan frontmatter exposes estimated_steps",
5,
"task plan frontmatter exposes estimated_steps for multiline detail",
);
assert.strictEqual(
taskPlanFile.frontmatter.estimated_files,

View file

@ -0,0 +1,164 @@
import assert from "node:assert/strict";
import { randomUUID } from "node:crypto";
import { mkdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test } from "vitest";
import {
getAutoSession,
resetAutoSession,
} from "../auto/session.js";
import {
hasResearchTerminalTransition,
markResearchTerminalTransition,
} from "../auto.js";
function makeTmpBase(): string {
const base = join(tmpdir(), `sf-research-terminal-${randomUUID()}`);
mkdirSync(join(base, ".sf"), { recursive: true });
return base;
}
function cleanup(base: string): void {
try {
rmSync(base, { recursive: true, force: true });
} catch {
/* swallow */
}
}
test("markResearchTerminalTransition sets researchTerminalTransition true", () => {
const session = getAutoSession();
// Reset to known state
session.reset();
assert.equal(session.researchTerminalTransition, false);
markResearchTerminalTransition();
assert.equal(session.researchTerminalTransition, true);
assert.equal(hasResearchTerminalTransition(), true);
// Cleanup
session.reset();
});
test("hasResearchTerminalTransition returns false after reset", () => {
const session = getAutoSession();
// Reset to known state
session.reset();
markResearchTerminalTransition();
assert.equal(hasResearchTerminalTransition(), true);
session.reset();
assert.equal(hasResearchTerminalTransition(), false);
});
test("research terminal transition blocks planning tools", async () => {
// This test verifies the integration between:
// 1. executeSummarySave returning terminal_transition for RESEARCH
// 2. register-hooks tool_call blocking planning tools after terminal transition
//
// The actual blocking logic lives in register-hooks.ts and is tested here
// by simulating the conditions that trigger it.
const base = makeTmpBase();
try {
const session = getAutoSession();
// Reset to known state
session.reset();
// Simulate a research unit being active
session.active = true;
session.currentUnit = {
type: "research-slice",
id: "M001/S01",
startedAt: Date.now(),
};
// Before terminal transition, planning should be allowed
assert.equal(hasResearchTerminalTransition(), false);
// Mark terminal transition (as would happen after sf_summary_save RESEARCH)
markResearchTerminalTransition();
// After terminal transition, planning should be blocked
assert.equal(hasResearchTerminalTransition(), true);
// Verify the session state reflects the terminal transition
assert.equal(session.researchTerminalTransition, true);
assert.equal(session.currentUnit?.type, "research-slice");
// Cleanup
session.reset();
} finally {
cleanup(base);
}
});
test("research terminal transition does not block non-planning tools", () => {
const session = getAutoSession();
// Reset to known state
session.reset();
// Simulate a research unit being active
session.active = true;
session.currentUnit = {
type: "research-slice",
id: "M001/S01",
startedAt: Date.now(),
};
markResearchTerminalTransition();
// Non-planning tools should not be blocked by the research terminal transition
// (the actual blocking logic only checks planning tools)
const nonPlanningTools = [
"read",
"write",
"edit",
"bash",
"grep",
"find",
"sf_summary_save",
"sf_self_report",
];
// The hasResearchTerminalTransition function just returns state;
// the actual tool filtering happens in register-hooks.ts
assert.equal(hasResearchTerminalTransition(), true);
// Cleanup
session.reset();
});
test("research terminal transition resets on new unit dispatch", () => {
const session = getAutoSession();
// Reset to known state
session.reset();
// Simulate first research unit completing
session.active = true;
session.currentUnit = {
type: "research-slice",
id: "M001/S01",
startedAt: Date.now(),
};
markResearchTerminalTransition();
assert.equal(hasResearchTerminalTransition(), true);
// Simulate new unit dispatch (research terminal should be reset)
// In production, this happens in the auto-loop when dispatching a new unit
session.researchTerminalTransition = false;
session.currentUnit = {
type: "plan-slice",
id: "M001/S01",
startedAt: Date.now(),
};
assert.equal(hasResearchTerminalTransition(), false);
assert.equal(session.currentUnit?.type, "plan-slice");
// Cleanup
session.reset();
});

View file

@ -138,6 +138,20 @@ test("workflow-projections: renderPlanContent includes ## Tasks section", () =>
assert.ok(content.includes("## Tasks"));
});
test("workflow-projections: renderPlanContent preserves ordered must-haves", () => {
const content = renderPlanContent(
makeSlice({
success_criteria:
"1. First ordered outcome\n2. Second ordered outcome",
}),
[],
);
assert.ok(content.includes("1. First ordered outcome"));
assert.ok(content.includes("2. Second ordered outcome"));
assert.ok(!content.includes("- 1. First ordered outcome"));
});
test("workflow-projections: renderPlanContent includes top-level verification from task commands", () => {
const content = renderPlanContent(makeSlice(), [
makeTask({ verify: "npm test src/middleware/auth.test.ts" }),
@ -264,6 +278,20 @@ test("workflow-projections: task with verify renders Verify subline", () => {
assert.ok(content.includes(" - Verify: npm test"));
});
test("workflow-projections: task markdown headings stay nested and lint-spaced", () => {
const task = makeTask({
description:
"Implement the contract.\n\n### 1. Add coverage\nWrite the failing test.\n\n### 2. Patch renderer\nKeep markdown valid.",
});
const content = renderPlanContent(makeSlice(), [task]);
assert.ok(content.includes("- [ ] **T01: Create JWT middleware**"));
assert.ok(
content.includes(" ### 1. Add coverage\n\n Write the failing test."),
);
assert.ok(!content.includes("\n### 1. Add coverage\nWrite"));
});
test("workflow-projections: task with no verify omits Verify subline", () => {
const task = makeTask({ verify: "" });
const content = renderPlanContent(makeSlice(), [task]);

View file

@ -39,6 +39,86 @@ export function stripIdPrefix(title: string, id: string): string {
return result.trim() || title;
}
/**
* Render a model-provided list entry without corrupting ordered lists.
*
* Purpose: projection fallback output remains valid Markdown when planning
* rows contain numbered success criteria from the LLM.
* Consumer: renderPlanContent when writing PLAN.md projections.
*/
function renderListEntry(entry: string): string {
const trimmed = entry.trim();
const orderedBullet = trimmed.match(/^[-*+]\s+(\d+)[.)]\s+(.+)$/);
if (orderedBullet) {
return `${orderedBullet[1]}. ${orderedBullet[2].trim()}`;
}
const ordered = trimmed.match(/^(\d+)[.)]\s+(.+)$/);
if (ordered) {
return `${ordered[1]}. ${ordered[2].trim()}`;
}
if (/^[-*+]\s+\S/.test(trimmed)) {
return trimmed;
}
return `- ${trimmed}`;
}
/**
* Surround ATX headings in model-provided markdown with blank lines.
*
* Purpose: generated PLAN.md projections pass content validation even when
* task descriptions contain LLM-authored step headings.
* Consumer: renderPlanContent task entries.
*/
function normalizeMarkdownBlockSpacing(text: string): string {
const sourceLines = text.trim().replace(/\r\n/g, "\n").split("\n");
const output: string[] = [];
let inFence = false;
for (let i = 0; i < sourceLines.length; i++) {
const line = sourceLines[i];
const trimmed = line.trim();
const fence = /^(```|~~~)/.test(trimmed);
const heading = !inFence && /^#{1,6}\s+\S/.test(trimmed);
if (heading && output.length > 0 && output[output.length - 1]?.trim()) {
output.push("");
}
output.push(line);
if (fence) {
inFence = !inFence;
}
const next = sourceLines[i + 1];
if (heading && next !== undefined && next.trim()) {
output.push("");
}
}
return output.join("\n").trim();
}
/**
* Append model-provided markdown as an indented child block.
*
* Purpose: task description subsections stay nested under their task instead
* of becoming top-level PLAN.md headings.
* Consumer: renderPlanContent task entries.
*/
function appendIndentedMarkdownBlock(
lines: string[],
text: string,
indent = " ",
): void {
for (const line of normalizeMarkdownBlockSpacing(text).split("\n")) {
lines.push(line.trim() ? `${indent}${line}` : "");
}
}
// ─── PLAN.md Projection ──────────────────────────────────────────────────
/**
@ -70,7 +150,7 @@ export function renderPlanContent(
.split(/\n+/)
.map((entry) => entry.trim())
.filter(Boolean)) {
lines.push(line.startsWith("-") ? line : `- ${line}`);
lines.push(renderListEntry(line));
}
} else {
lines.push("- Complete the planned slice outcomes.");
@ -198,9 +278,10 @@ export function renderPlanContent(
for (const task of taskRows) {
const checkbox = isClosedStatus(task.status) ? "[x]" : "[ ]";
lines.push(
`- ${checkbox} **${task.id}: ${task.title}** \u2014 ${task.description}`,
);
lines.push(`- ${checkbox} **${task.id}: ${task.title}**`);
if (task.description.trim()) {
appendIndentedMarkdownBlock(lines, task.description);
}
// Estimate subline (always present if non-empty)
if (task.estimate) {

View file

@ -143,6 +143,7 @@ export default defineConfig({
"src/tests/**/*.test.mjs",
"src/resources/extensions/sf/tests/**/*.test.ts",
"src/resources/extensions/sf/tests/**/*.test.mjs",
"src/resources/extensions/sf-permissions/tests/**/*.test.ts",
"src/resources/extensions/shared/tests/**/*.test.ts",
"src/resources/extensions/claude-code-cli/tests/**/*.test.ts",
"src/resources/extensions/github-sync/tests/**/*.test.ts",