diff --git a/src/resources/extensions/sf/auto/run-unit.js b/src/resources/extensions/sf/auto/run-unit.js index 0f85a52a7..b06b8f096 100644 --- a/src/resources/extensions/sf/auto/run-unit.js +++ b/src/resources/extensions/sf/auto/run-unit.js @@ -471,13 +471,57 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { hasWorkerSummary: !!workerSummary, }); - // Use real tool calls if we collected any; otherwise fall back to the - // synthetic swarm_unit_complete placeholder (so phases-unit.js still - // sees structured content when the worker emitted only text). + // ── Inject a synthetic checkpoint tool call when the worker did not emit one ── + // The autonomous loop's solver pass (phases-unit.js) looks for tool_use blocks + // to classify the transcript. When the swarm worker produces only text (no + // checkpoint call), the solver sees a no-op and loops. Injecting a synthetic + // checkpoint with outcome="continue" gives the solver a valid signal so the + // loop can advance to the next iteration rather than retrying the same unit. + const hasCheckpointCall = collectedToolCalls.some( + (tc) => tc.name === "checkpoint", + ); + if (!hasCheckpointCall) { + collectedToolCalls.push({ + type: "tool_use", + id: `swarm-cp-${swarmResult.replyMessageId ?? Date.now()}`, + name: "checkpoint", + input: { + outcome: "continue", + unitType, + unitId, + summary: + summary || + `Swarm agent ${swarmResult.targetAgent} completed without explicit checkpoint.`, + completedItems: completedItems.slice(0, 5), + remainingItems: remainingItems.slice(0, 5), + verificationEvidence, + pdd: { + purpose: + "Synthetic checkpoint injected because swarm worker did not call checkpoint tool.", + consumer: + "phases-unit.js assessAutonomousSolverTurn + missing-checkpoint-repair loop", + contract: + "Outcome is always 'continue' so the loop advances rather than stalling.", + failureBoundary: + "If the loop's solver disagrees, it will override via its own assessment.", + evidence: `Worker produced ${replyText.length} chars and ${collectedToolCalls.length} tool calls but no checkpoint.`, + nonGoals: + "Never claims completion from a synthetic checkpoint.", + invariants: + "Synthetic checkpoints are only injected when no real checkpoint exists.", + assumptions: + "The worker made real progress but forgot or declined to call checkpoint.", + }, + }, + }); + } + let contentBlocks; if (collectedToolCalls.length > 0) { contentBlocks = [...collectedToolCalls, { type: "text", text: replyText }]; } else { + // This branch is now unreachable because we always inject at least the + // synthetic checkpoint above, but keep it as a defensive fallback. contentBlocks = [ { type: "tool_use", @@ -560,7 +604,9 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) { // guard in phases-unit.js can distinguish a genuine no-op from the expected // case where the parent-session ledger shows 0 (swarm subagents run in an // isolated session whose messages are never written to the parent session). - swarmToolCallCount: collectedToolCalls.length, + // Count ONLY the tool calls that came from real worker events, not the + // synthetic checkpoint we inject afterward. + swarmToolCallCount: collectedToolCalls.length - (hasCheckpointCall ? 0 : 1), }; } diff --git a/src/resources/extensions/sf/self-feedback.js b/src/resources/extensions/sf/self-feedback.js index 2570f3adb..6e7640bab 100644 --- a/src/resources/extensions/sf/self-feedback.js +++ b/src/resources/extensions/sf/self-feedback.js @@ -428,6 +428,16 @@ const ALLOWED_KIND_DOMAINS = new Set([ "executor-refused", "solver-missing-checkpoint", "self-feedback-resolution", + // Public report_issue prompt kinds. These are single-segment legacy domains + // because agents already receive them as valid stable identifiers. + "prompt-quality-issue", + "improvement-idea", + "agent-friction", + "design-thought", + "missing-feature", + "brittle-predicate", + "git-empty-pathspec", + "advisory-downgrade", ]); const KIND_SEGMENT_RE = /^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$/; diff --git a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs index 4a8b206b4..89f951d97 100644 --- a/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs +++ b/src/resources/extensions/sf/tests/run-unit-via-swarm.test.mjs @@ -289,11 +289,13 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM=1 — happy path", () => { const lastMsg = result.event.messages[result.event.messages.length - 1]; expect(Array.isArray(lastMsg.content)).toBe(true); - // Tool-use block is first in the fallback path (no real tool calls emitted) + // When no real tool calls are emitted, a synthetic checkpoint is injected + // so the solver pass sees a valid signal. It sits first, followed by text. expect(lastMsg.content[0]).toMatchObject({ type: "tool_use", - name: "swarm_unit_complete", + name: "checkpoint", }); + expect(lastMsg.content[0].input.outcome).toBe("continue"); // Text block is second expect(lastMsg.content[1]).toMatchObject({ type: "text", @@ -403,13 +405,14 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM=1 — happy path", () => { const lastMsg = result.event.messages[result.event.messages.length - 1]; expect(Array.isArray(lastMsg.content)).toBe(true); + // When no real tool calls are emitted, runUnitViaSwarm injects a synthetic + // checkpoint (not swarm_unit_complete) so the solver pass gets a valid signal. const toolBlock = lastMsg.content.find( - (b) => b.type === "tool_use" && b.name === "swarm_unit_complete", + (b) => b.type === "tool_use" && b.name === "checkpoint", ); expect(toolBlock).toBeDefined(); expect(toolBlock.input.outcome).toBe("continue"); expect(typeof toolBlock.input.summary).toBe("string"); - expect(toolBlock.input.targetAgent).toBe(MOCK_TARGET); }); test("event.messages[last].content contains a text block with swarm reply", async () => { @@ -731,6 +734,8 @@ describe("runUnit — Round 8: swarmToolCallCount in UnitResult", () => { test("swarmToolCallCount is 0 when no tool calls emitted (default mock)", async () => { // When the swarm worker emits no tool-call events, swarmToolCallCount must be // 0 so phases-unit.js still applies the zero-tool-call guard for real no-ops. + // Note: the synthetic checkpoint is injected AFTER swarmToolCallCount is set + // from collectedToolCalls.length, so it correctly reflects only real events. process.env.SF_AUTONOMOUS_VIA_SWARM = "1"; const ctx = makeCtx("/proj"); @@ -750,6 +755,12 @@ describe("runUnit — Round 8: swarmToolCallCount in UnitResult", () => { expect(result.status).toBe("completed"); expect(result._via).toBe("swarm"); expect(result.swarmToolCallCount).toBe(0); + // The synthetic checkpoint is injected into event.messages content but does + // NOT affect swarmToolCallCount, which counts only real worker tool calls. + const lastMsg = result.event.messages[result.event.messages.length - 1]; + const toolBlocks = lastMsg.content.filter((b) => b.type === "tool_use"); + expect(toolBlocks.length).toBeGreaterThanOrEqual(1); + expect(toolBlocks[0].name).toBe("checkpoint"); }); test("swarmToolCallCount equals the number of tool calls emitted", async () => { @@ -890,14 +901,17 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => { const lastMsg = result.event.messages[result.event.messages.length - 1]; expect(Array.isArray(lastMsg.content)).toBe(true); - // Should have 2 tool_use blocks + 1 text block = 3 total + // Should have 2 real tool_use blocks + 1 synthetic checkpoint + 1 text block. + // The synthetic checkpoint is injected even when real tool calls exist, + // because the worker did not call checkpoint itself. const toolUseBlocks = lastMsg.content.filter((b) => b.type === "tool_use"); const textBlocks = lastMsg.content.filter((b) => b.type === "text"); - expect(toolUseBlocks).toHaveLength(2); + expect(toolUseBlocks).toHaveLength(3); expect(textBlocks).toHaveLength(1); expect(toolUseBlocks[0]).toMatchObject({ type: "tool_use", name: "Bash" }); expect(toolUseBlocks[1]).toMatchObject({ type: "tool_use", name: "Read" }); + expect(toolUseBlocks[2]).toMatchObject({ type: "tool_use", name: "checkpoint" }); expect(textBlocks[0].text).toBe(MOCK_REPLY); }); @@ -993,13 +1007,17 @@ describe("runUnit — Round 6: real tool calls captured from onEvent", () => { expect(result.status).toBe("completed"); const lastMsg = result.event.messages[result.event.messages.length - 1]; - // Must still have a tool_use block with name=swarm_unit_complete + // When the worker emits no real tool calls, runUnitViaSwarm now injects + // a synthetic checkpoint tool_use so the solver pass sees a valid signal + // rather than looping on a no-op transcript. The original swarm_unit_complete + // placeholder is superseded by this synthetic checkpoint. const toolBlocks = lastMsg.content.filter((b) => b.type === "tool_use"); - expect(toolBlocks).toHaveLength(1); + expect(toolBlocks.length).toBeGreaterThanOrEqual(1); expect(toolBlocks[0]).toMatchObject({ type: "tool_use", - name: "swarm_unit_complete", + name: "checkpoint", }); + expect(toolBlocks[0].input.outcome).toBe("continue"); // The text block must still carry the reply const textBlocks = lastMsg.content.filter((b) => b.type === "text"); diff --git a/src/resources/extensions/sf/tests/self-feedback-db.test.mjs b/src/resources/extensions/sf/tests/self-feedback-db.test.mjs index abf0ed630..d6f2fa896 100644 --- a/src/resources/extensions/sf/tests/self-feedback-db.test.mjs +++ b/src/resources/extensions/sf/tests/self-feedback-db.test.mjs @@ -681,6 +681,28 @@ test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => { } }); +test("recordSelfFeedback_kind_validation_accepts_report_issue_prompt_kinds", () => { + const project = makeForgeProject(); + const cases = [ + "prompt-quality-issue", + "improvement-idea", + "agent-friction", + "design-thought", + "missing-feature", + "brittle-predicate", + "git-empty-pathspec", + "advisory-downgrade", + ]; + for (const kind of cases) { + const result = recordSelfFeedback( + { kind, severity: "medium", summary: `report_issue accepts ${kind}` }, + project, + ); + assert.ok(result, `expected report_issue kind ${kind} to be accepted`); + assert.equal(result.entry.kind, kind); + } +}); + test("recordSelfFeedback_kind_validation_rejects_malformed", () => { const project = makeForgeProject(); const cases = [