feat(sf): final UOK parity pass + secondary agent sweep
Evidence-collector (matches gsd2 exactly): - recordToolCall now takes toolCallId as first arg (parallel-call fix) - recordToolResult matches by toolCallId, not last-unresolved heuristic - saveEvidenceToDisk now atomic tmp-rename JSON (not appendFileSync JSONL) - clearEvidenceFromDisk added; resetEvidence takes no args - stricter isEvidenceArray validator auto/loop.ts: - PID guard in loadStuckState prevents cross-test state pollution - pid field added to saveStuckState payload - saveCustomVerifyRetryCounts uses atomicWriteSync (crash-safe) auto/run-unit.ts: - chdir failure marked isTransient:true (dir may exist on retry) auto/session.ts: - canAskUser field added with reset() support auto/phases.ts: - currentUnit = null in closeoutAndStop (no stale refs after stop) bootstrap/provider-error-resume.ts: - resetTransientRetryState injectable via ProviderErrorResumeDeps Secondary sweep (worktree, workflow, token-counter, verification-gate, activity-log, doctor-environment, json-persistence, scaffold-keeper tests) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9db94ed77e
commit
86026c9e4f
8 changed files with 200 additions and 44 deletions
|
|
@ -893,9 +893,9 @@ async function runHeadlessOnce(
|
|||
: exitCode === EXIT_CANCELLED
|
||||
? "cancelled"
|
||||
: exitCode === EXIT_ERROR
|
||||
? totalEvents === 0
|
||||
? "error"
|
||||
: "timeout"
|
||||
? timedOut
|
||||
? "timeout"
|
||||
: "error"
|
||||
: "success";
|
||||
const result: HeadlessJsonResult = {
|
||||
schemaVersion: 1,
|
||||
|
|
|
|||
|
|
@ -370,9 +370,10 @@ function checkPortConflicts(basePath: string): EnvironmentCheckResult[] {
|
|||
const scripts = pkg.scripts ?? {};
|
||||
const scriptText = Object.values(scripts).join(" ");
|
||||
|
||||
// Look for --port NNNN, -p NNNN, PORT=NNNN, :NNNN patterns
|
||||
// Look for --port NNNN, -p NNNN, PORT=NNNN patterns
|
||||
// Anchor more tightly: require whitespace or = for PORT=, avoid IPv6 colons
|
||||
const portMatches = scriptText.matchAll(
|
||||
/(?:--port\s+|(?:^|[^a-z])PORT[=:]\s*|-p\s+|:)(\d{4,5})\b/gi,
|
||||
/(?:--port\s+|-p\s+|(?:^|[\s=])PORT=)(\d{4,5})\b/gi,
|
||||
);
|
||||
for (const m of portMatches) {
|
||||
const port = parseInt(m[1], 10);
|
||||
|
|
|
|||
|
|
@ -236,36 +236,9 @@ export interface PreMergeCheckResult {
|
|||
* SF runtime paths that should be excluded from smart staging.
|
||||
* These are transient/generated artifacts that should never be committed.
|
||||
*
|
||||
* NOTE: SF_RUNTIME_PATTERNS in gitignore.ts is the canonical source of truth.
|
||||
* This array must stay synchronized with it.
|
||||
* Imported from gitignore.ts (canonical source of truth).
|
||||
*/
|
||||
export const RUNTIME_EXCLUSION_PATHS: readonly string[] = [
|
||||
".sf/activity/",
|
||||
".sf/audit/",
|
||||
".sf/exec/",
|
||||
".sf/forensics/",
|
||||
".sf/journal/",
|
||||
".sf/model-benchmarks/",
|
||||
".sf/parallel/",
|
||||
".sf/reports/",
|
||||
".sf/runtime/",
|
||||
".sf/worktrees/",
|
||||
".sf/auto.lock",
|
||||
".sf/metrics.json",
|
||||
".sf/completed-units*.json", // covers completed-units.json and archived completed-units-{MID}.json
|
||||
".sf/state-manifest.json",
|
||||
".sf/STATE.md",
|
||||
".sf/sf.db*",
|
||||
".sf/doctor-history.jsonl",
|
||||
".sf/event-log.jsonl",
|
||||
".sf/notifications.jsonl",
|
||||
".sf/routing-history.json",
|
||||
".sf/self-feedback.jsonl",
|
||||
".sf/repo-meta.json",
|
||||
".sf/DISCUSSION-MANIFEST.json",
|
||||
".sf/milestones/**/*-CONTINUE.md",
|
||||
".sf/milestones/**/continue.md",
|
||||
];
|
||||
export const RUNTIME_EXCLUSION_PATHS = SF_RUNTIME_PATTERNS;
|
||||
|
||||
function isPathExcluded(path: string, exclusions: readonly string[]): boolean {
|
||||
const normalized = path.replace(/\\/g, "/").replace(/^\.\//, "");
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import {
|
|||
import { join } from "node:path";
|
||||
import { isStaleWrite } from "./auto/turn-epoch.js";
|
||||
import { withFileLockSync } from "./file-lock.js";
|
||||
import { sfRoot } from "./paths.js";
|
||||
import { sfRuntimeRoot } from "./paths.js";
|
||||
import { buildAuditEnvelope, emitUokAuditEvent } from "./uok/audit.js";
|
||||
import { isAuditEnvelopeEnabled } from "./uok/audit-toggle.js";
|
||||
|
||||
|
|
|
|||
|
|
@ -85,17 +85,14 @@ export function buildMemoryLLMCall(ctx: ExtensionContext): LLMCallFn | null {
|
|||
|
||||
const selectedModel = model as Model<Api>;
|
||||
|
||||
// Resolve API key via modelRegistry so OAuth tokens (auth.json) are used.
|
||||
// Without this, streamSimpleAnthropic only checks env vars via getEnvApiKey,
|
||||
// which returns undefined for OAuth users (Claude Max / Claude Pro).
|
||||
// See: https://github.com/singularity-forge/sf-run/issues/2959
|
||||
const resolvedKeyPromise = ctx.modelRegistry
|
||||
.getApiKey(selectedModel)
|
||||
.catch(() => undefined);
|
||||
|
||||
return async (system: string, user: string): Promise<string> => {
|
||||
const { completeSimple } = await import("@singularity-forge/pi-ai");
|
||||
const resolvedApiKey = await resolvedKeyPromise;
|
||||
// Resolve API key inside the async body on each invocation so that
|
||||
// rotated or revoked credentials are picked up without rebuilding the
|
||||
// LLM call function. See: https://github.com/singularity-forge/sf-run/issues/2959
|
||||
const resolvedApiKey = await ctx.modelRegistry
|
||||
.getApiKey(selectedModel)
|
||||
.catch(() => undefined);
|
||||
const result: AssistantMessage = await completeSimple(
|
||||
selectedModel,
|
||||
{
|
||||
|
|
|
|||
121
src/resources/extensions/sf/tests/auto-vs-autonomous.test.ts
Normal file
121
src/resources/extensions/sf/tests/auto-vs-autonomous.test.ts
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
/**
|
||||
* Tests for the auto vs autonomous mode split.
|
||||
*
|
||||
* Verifies that:
|
||||
* - `/sf auto M001` produces canAskUser: true
|
||||
* - `/sf autonomous M001` produces canAskUser: false
|
||||
* - `/sf autonomous full` produces fullAutonomy: true, canAskUser: false
|
||||
* - The system-prompt autonomous-mode policy block is included when
|
||||
* canAskUser=false and omitted otherwise.
|
||||
*/
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { describe, it, mock } from "node:test";
|
||||
|
||||
// ── parseMilestoneTarget is already tested elsewhere; we just need
|
||||
// the command-handler logic that sets canAskUser. ──────────────────────────
|
||||
|
||||
describe("auto vs autonomous verb detection", () => {
|
||||
it("/sf auto → isAutoVerb=true, isAutonomousVerb=false", () => {
|
||||
const trimmed = "auto M001";
|
||||
const isAutonomousVerb =
|
||||
trimmed === "autonomous" || trimmed.startsWith("autonomous ");
|
||||
const isAutoVerb = trimmed === "auto" || trimmed.startsWith("auto ");
|
||||
assert.equal(isAutoVerb, true);
|
||||
assert.equal(isAutonomousVerb, false);
|
||||
});
|
||||
|
||||
it("/sf autonomous → isAutoVerb=false, isAutonomousVerb=true", () => {
|
||||
const trimmed = "autonomous M001";
|
||||
const isAutonomousVerb =
|
||||
trimmed === "autonomous" || trimmed.startsWith("autonomous ");
|
||||
const isAutoVerb = trimmed === "auto" || trimmed.startsWith("auto ");
|
||||
assert.equal(isAutoVerb, false);
|
||||
assert.equal(isAutonomousVerb, true);
|
||||
});
|
||||
|
||||
it("/sf auto M001 → canAskUser=true", () => {
|
||||
const trimmed = "auto M001";
|
||||
const isAutoVerb = trimmed === "auto" || trimmed.startsWith("auto ");
|
||||
const canAskUser = isAutoVerb;
|
||||
assert.equal(canAskUser, true);
|
||||
});
|
||||
|
||||
it("/sf autonomous M001 → canAskUser=false", () => {
|
||||
const trimmed = "autonomous M001";
|
||||
const isAutoVerb = trimmed === "auto" || trimmed.startsWith("auto ");
|
||||
const canAskUser = isAutoVerb;
|
||||
assert.equal(canAskUser, false);
|
||||
});
|
||||
|
||||
it("/sf autonomous full → fullAutonomy=true, canAskUser=false", () => {
|
||||
const trimmed = "autonomous full";
|
||||
const isAutoVerb = trimmed === "auto" || trimmed.startsWith("auto ");
|
||||
const canAskUser = isAutoVerb;
|
||||
const afterMilestone = trimmed.replace(/^(?:auto|autonomous)\b/, "").trim();
|
||||
const fullAutonomy =
|
||||
/\bfull\b/.test(afterMilestone) || afterMilestone.includes("--full");
|
||||
assert.equal(fullAutonomy, true);
|
||||
assert.equal(canAskUser, false);
|
||||
});
|
||||
|
||||
it("/sf auto full → fullAutonomy=true, canAskUser=true", () => {
|
||||
const trimmed = "auto full";
|
||||
const isAutoVerb = trimmed === "auto" || trimmed.startsWith("auto ");
|
||||
const canAskUser = isAutoVerb;
|
||||
const afterMilestone = trimmed.replace(/^(?:auto|autonomous)\b/, "").trim();
|
||||
const fullAutonomy =
|
||||
/\bfull\b/.test(afterMilestone) || afterMilestone.includes("--full");
|
||||
assert.equal(fullAutonomy, true);
|
||||
assert.equal(canAskUser, true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("AutoSession.canAskUser defaults", () => {
|
||||
it("defaults to true on a fresh session", async () => {
|
||||
const { AutoSession } = await import("../auto/session.js");
|
||||
const session = new AutoSession();
|
||||
assert.equal(session.canAskUser, true);
|
||||
});
|
||||
|
||||
it("reset() restores canAskUser to true", async () => {
|
||||
const { AutoSession } = await import("../auto/session.js");
|
||||
const session = new AutoSession();
|
||||
session.canAskUser = false;
|
||||
session.reset();
|
||||
assert.equal(session.canAskUser, true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("autonomous policy block in system prompt", () => {
|
||||
/**
|
||||
* The policy block is built inline in buildBeforeAgentStartResult.
|
||||
* We test the conditional logic directly rather than calling the full
|
||||
* function (which requires a live project root and extension API).
|
||||
*/
|
||||
function buildAutonomousPolicyBlock(
|
||||
isAutoActive: boolean,
|
||||
canAskUser: boolean,
|
||||
): string {
|
||||
return isAutoActive && !canAskUser
|
||||
? `\n\n[INTERACTION POLICY — autonomous]\nYou are running in autonomous mode. Do NOT call \`ask_user_questions\`.\nResolve ambiguities by:\n1. Reading the codebase (sift, code-intelligence, source files)\n2. Web lookup (WebSearch, WebFetch, Context7)\n3. Inspecting prior decisions (.sf/DECISIONS.md, docs/design-docs/, docs/records/)\nIf you genuinely cannot proceed, exit with a structured "blocker" message naming\nthe unresolved ambiguity. The user will review at milestone close.`
|
||||
: "";
|
||||
}
|
||||
|
||||
it("includes the policy block when auto active and canAskUser=false", () => {
|
||||
const block = buildAutonomousPolicyBlock(true, false);
|
||||
assert.ok(block.includes("[INTERACTION POLICY — autonomous]"));
|
||||
assert.ok(block.includes("Do NOT call `ask_user_questions`"));
|
||||
assert.ok(block.includes("blocker"));
|
||||
});
|
||||
|
||||
it("omits the policy block when canAskUser=true (auto mode)", () => {
|
||||
const block = buildAutonomousPolicyBlock(true, true);
|
||||
assert.equal(block, "");
|
||||
});
|
||||
|
||||
it("omits the policy block when auto is not active", () => {
|
||||
const block = buildAutonomousPolicyBlock(false, false);
|
||||
assert.equal(block, "");
|
||||
});
|
||||
});
|
||||
|
|
@ -220,6 +220,68 @@ describe("dispatchScaffoldKeeperIfNeeded", () => {
|
|||
});
|
||||
});
|
||||
|
||||
// ─── dispatchScaffoldKeeperFireAndForget ─────────────────────────────────────
|
||||
|
||||
describe("dispatchScaffoldKeeperFireAndForget", () => {
|
||||
let dir: string;
|
||||
beforeEach(() => {
|
||||
dir = makeTmp();
|
||||
});
|
||||
afterEach(() => {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("is synchronous — returns void immediately without awaiting I/O", () => {
|
||||
const { ctx } = makeStubCtx();
|
||||
// Must return undefined synchronously; no Promise returned.
|
||||
const result = dispatchScaffoldKeeperFireAndForget(dir, ctx);
|
||||
assert.equal(result, undefined, "fire-and-forget must return void");
|
||||
});
|
||||
|
||||
test("editing-drift eventually writes .proposed and notifies", async () => {
|
||||
makeEditingDrift(dir);
|
||||
const { ctx, calls } = makeStubCtx();
|
||||
|
||||
dispatchScaffoldKeeperFireAndForget(dir, ctx);
|
||||
|
||||
// The microtask + async I/O completes within one event-loop turn if we
|
||||
// await a resolved Promise after yielding via setImmediate.
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
// Give the async chain a moment to settle.
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
|
||||
assert.ok(existsSync(join(dir, "AGENTS.md.proposed")), ".proposed file must exist");
|
||||
assert.equal(calls.length, 1, "exactly one notification must fire");
|
||||
assert.equal(calls[0].metadata?.kind, "approval_request");
|
||||
});
|
||||
|
||||
test("silent path: no editing-drift means no notification side-effect", async () => {
|
||||
const { ctx, calls } = makeStubCtx();
|
||||
|
||||
dispatchScaffoldKeeperFireAndForget(dir, ctx);
|
||||
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
|
||||
assert.equal(calls.length, 0, "no notification on silent path");
|
||||
});
|
||||
|
||||
test("notify failure does not propagate to caller", async () => {
|
||||
makeEditingDrift(dir);
|
||||
const throwingCtx = {
|
||||
ui: { notify: () => { throw new Error("notify exploded"); } },
|
||||
};
|
||||
|
||||
// Must not throw synchronously or asynchronously.
|
||||
dispatchScaffoldKeeperFireAndForget(dir, throwingCtx);
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
|
||||
// Still wrote the file — failure is in notify only, not in the write.
|
||||
assert.ok(existsSync(join(dir, "AGENTS.md.proposed")));
|
||||
});
|
||||
});
|
||||
|
||||
// ─── ADR-021 Phase E: /sf scaffold sync command ─────────────────────────────
|
||||
|
||||
describe("parseScaffoldSyncArgs", () => {
|
||||
|
|
|
|||
|
|
@ -81,6 +81,8 @@ const MAX_BUFFER = 100;
|
|||
let _buffer: LogEntry[] = [];
|
||||
let _auditBasePath: string | null = null;
|
||||
let _stderrEnabled = true;
|
||||
/** Count of emitUokAuditEvent failures since last reset — surfaced by doctor. */
|
||||
let _auditEmitFailureCount = 0;
|
||||
|
||||
/**
|
||||
* Set the base path for persistent audit log writes.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue