From f9334019cd39d2dba43b089972e877acc057c704 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 7 May 2026 03:03:31 +0200 Subject: [PATCH] feat(turn-status): Implement markers and parser for agent semantic state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add turn_status marker system (Tier 2.5 Phases 1-2) for agents to signal state: Phase 1: Add markers to prompts (15 templates) - Added complete|blocked|giving_up to end of all executable prompts (execute-task.md, complete-slice.md, research-slice.md, plan-milestone.md, etc.) - Marker goes at end of response so harness can parse it easily Phase 2: Implement parser (turn-status-parser.js) - extractTurnStatus(output): Extract marker from agent output - isValidTurnStatus(status): Validate marker value - describeTurnStatus(status): Human-readable descriptions - resolveSignalFromStatus(status): Map to harness actions - complete → continue (normal path) - blocked → pause with SignalPause (wait for user) - giving_up → reassess with PhaseReassess (strategy change) - parseTurnStatusFull(output): End-to-end parsing - checkTurnStatusPrompts(sfRoot): Doctor check for marker coverage Tests: 31 tests covering: - Marker extraction (valid/invalid/edge cases) - Status validation and case-insensitivity - Signal resolution and action mapping - Full pipeline integration - Graceful degradation (null/empty/non-string inputs) Architecture: - Markers are optional; default action is 'continue' - Parser is non-blocking; always returns valid action - Signals map to existing harness capabilities (SignalPause, PhaseReassess) Next phase (Phase 3): Integrate parser into auto.js or dispatch-engine to actually trigger SignalPause and PhaseReassess transitions. Fixes: TURN_STATUS_P1_P2 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../extensions/sf/prompts/complete-slice.md | 8 + .../extensions/sf/prompts/execute-task.md | 8 + .../extensions/sf/prompts/plan-milestone.md | 8 + .../extensions/sf/prompts/plan-slice.md | 8 + .../extensions/sf/prompts/quick-task.md | 8 + .../extensions/sf/prompts/reassess-roadmap.md | 8 + .../extensions/sf/prompts/refine-slice.md | 8 + .../extensions/sf/prompts/replan-slice.md | 8 + .../sf/prompts/research-milestone.md | 8 + .../extensions/sf/prompts/research-slice.md | 8 + .../extensions/sf/prompts/rewrite-docs.md | 8 + .../extensions/sf/prompts/run-uat.md | 8 + .../extensions/sf/prompts/triage-captures.md | 8 + .../sf/prompts/triage-self-feedback.md | 8 + .../sf/prompts/validate-milestone.md | 8 + .../sf/tests/turn-status-parser.test.ts | 306 ++++++++++++++++++ .../extensions/sf/turn-status-parser.js | 191 +++++++++++ 17 files changed, 617 insertions(+) create mode 100644 src/resources/extensions/sf/tests/turn-status-parser.test.ts create mode 100644 src/resources/extensions/sf/turn-status-parser.js diff --git a/src/resources/extensions/sf/prompts/complete-slice.md b/src/resources/extensions/sf/prompts/complete-slice.md index d3228ab98..611577e35 100644 --- a/src/resources/extensions/sf/prompts/complete-slice.md +++ b/src/resources/extensions/sf/prompts/complete-slice.md @@ -45,3 +45,11 @@ Then: **You MUST call `sf_slice_complete` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.** When done, say: "Slice {{sliceId}} complete." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/execute-task.md b/src/resources/extensions/sf/prompts/execute-task.md index f5d0b0893..e3b0d387c 100644 --- a/src/resources/extensions/sf/prompts/execute-task.md +++ b/src/resources/extensions/sf/prompts/execute-task.md @@ -101,3 +101,11 @@ All work stays in your working directory: `{{workingDirectory}}`. **You MUST call `sf_task_complete` before finishing. Do not manually write `{{taskSummaryPath}}`.** When done, say: "Task {{taskId}} complete." + +--- + +**After completing the task, output exactly one of these markers to signal state to the harness:** + +- `complete` if task verification passed and you called `sf_task_complete` +- `blocked` if you discovered a blocker (missing prereq, broken upstream, third-party failure, or plan invalid) +- `giving_up` if you've tried multiple approaches and are out of reasonable next steps without human input diff --git a/src/resources/extensions/sf/prompts/plan-milestone.md b/src/resources/extensions/sf/prompts/plan-milestone.md index f7819a0d4..3e6a71885 100644 --- a/src/resources/extensions/sf/prompts/plan-milestone.md +++ b/src/resources/extensions/sf/prompts/plan-milestone.md @@ -178,3 +178,11 @@ If this milestone does not require any external API keys or secrets, skip this s If during this unit you observe sf-the-tool friction - ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas - file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries - your scope is your unit. When done, say: "Milestone {{milestoneId}} planned." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/plan-slice.md b/src/resources/extensions/sf/prompts/plan-slice.md index 5de2c4d7d..6ef85fa10 100644 --- a/src/resources/extensions/sf/prompts/plan-slice.md +++ b/src/resources/extensions/sf/prompts/plan-slice.md @@ -133,3 +133,11 @@ The slice directory and tasks/ subdirectory already exist. Do NOT mkdir. All wor If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. When done, say: "Slice {{sliceId}} planned." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/quick-task.md b/src/resources/extensions/sf/prompts/quick-task.md index 288a12bab..39298a097 100644 --- a/src/resources/extensions/sf/prompts/quick-task.md +++ b/src/resources/extensions/sf/prompts/quick-task.md @@ -44,3 +44,11 @@ You are executing a SF quick task — a lightweight, focused unit of work outsid If you observe sf-the-tool friction during this quick task, file it via `sf_self_report` before sealing. When done, say: "Quick task {{taskNum}} complete." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/reassess-roadmap.md b/src/resources/extensions/sf/prompts/reassess-roadmap.md index 899b19033..fce756ea2 100644 --- a/src/resources/extensions/sf/prompts/reassess-roadmap.md +++ b/src/resources/extensions/sf/prompts/reassess-roadmap.md @@ -81,3 +81,11 @@ If `.sf/REQUIREMENTS.md` exists and requirement ownership or status changed, upd If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. When done, say: "Roadmap reassessed." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/refine-slice.md b/src/resources/extensions/sf/prompts/refine-slice.md index 2e04c8fe8..baca5b718 100644 --- a/src/resources/extensions/sf/prompts/refine-slice.md +++ b/src/resources/extensions/sf/prompts/refine-slice.md @@ -77,3 +77,11 @@ The slice directory and tasks/ subdirectory already exist. Do NOT mkdir. **You MUST call `sf_plan_slice` to persist the planning state before finishing.** After it returns successfully, the pipeline will automatically clear the sketch flag on the next state derivation (the on-disk PLAN file is the signal). When done, say: "Slice {{sliceId}} refined." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/replan-slice.md b/src/resources/extensions/sf/prompts/replan-slice.md index 58f9c2a5d..53d45896a 100644 --- a/src/resources/extensions/sf/prompts/replan-slice.md +++ b/src/resources/extensions/sf/prompts/replan-slice.md @@ -41,3 +41,11 @@ Consider these captures when rewriting the remaining tasks — they represent th If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. When done, say: "Slice {{sliceId}} replanned." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/research-milestone.md b/src/resources/extensions/sf/prompts/research-milestone.md index f14834dbb..ceda33bc4 100644 --- a/src/resources/extensions/sf/prompts/research-milestone.md +++ b/src/resources/extensions/sf/prompts/research-milestone.md @@ -67,3 +67,11 @@ When done, say only: "Milestone {{milestoneId}} researched." This unit produces observations as its primary output — be especially diligent about filing sf-internal friction you notice along the way. If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. When done, say only: "Milestone {{milestoneId}} researched." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/research-slice.md b/src/resources/extensions/sf/prompts/research-slice.md index e15d2852e..099718564 100644 --- a/src/resources/extensions/sf/prompts/research-slice.md +++ b/src/resources/extensions/sf/prompts/research-slice.md @@ -62,3 +62,11 @@ After `sf_summary_save` succeeds, stop immediately. Do **not** call `sf_mileston This unit produces observations as its primary output — be especially diligent about filing sf-internal friction you notice along the way. If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. When done, say only: "Slice {{sliceId}} researched." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/rewrite-docs.md b/src/resources/extensions/sf/prompts/rewrite-docs.md index f2bada031..dc0dda416 100644 --- a/src/resources/extensions/sf/prompts/rewrite-docs.md +++ b/src/resources/extensions/sf/prompts/rewrite-docs.md @@ -33,3 +33,11 @@ An override was issued by the user that changes a fundamental decision or approa If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. When done, say: "Override applied across all documents." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/run-uat.md b/src/resources/extensions/sf/prompts/run-uat.md index c16f2cf04..a569e0fd7 100644 --- a/src/resources/extensions/sf/prompts/run-uat.md +++ b/src/resources/extensions/sf/prompts/run-uat.md @@ -91,3 +91,11 @@ date: If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. When done, say: "UAT {{sliceId}} complete." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/triage-captures.md b/src/resources/extensions/sf/prompts/triage-captures.md index 876b83afc..69e0ebe86 100644 --- a/src/resources/extensions/sf/prompts/triage-captures.md +++ b/src/resources/extensions/sf/prompts/triage-captures.md @@ -66,3 +66,11 @@ For each capture, classify it as one of: **Important:** Do NOT execute any resolutions. Only classify and update CAPTURES.md. Resolution execution happens separately (in autonomous mode dispatch or manually by the user). When done, say: "Triage complete." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/triage-self-feedback.md b/src/resources/extensions/sf/prompts/triage-self-feedback.md index 2efac89af..9d807a447 100644 --- a/src/resources/extensions/sf/prompts/triage-self-feedback.md +++ b/src/resources/extensions/sf/prompts/triage-self-feedback.md @@ -126,3 +126,11 @@ Rules: - All three top-level keys must be present even if their arrays are empty. When done, say: "Triage complete." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/prompts/validate-milestone.md b/src/resources/extensions/sf/prompts/validate-milestone.md index 05610323e..576141787 100644 --- a/src/resources/extensions/sf/prompts/validate-milestone.md +++ b/src/resources/extensions/sf/prompts/validate-milestone.md @@ -119,3 +119,11 @@ If verdict is `needs-remediation`: This unit produces observations as its primary output — be especially diligent about filing sf-internal friction you notice along the way. If during this unit you observe sf-the-tool friction — ambiguous prompts, missing context, misleading instructions, surprising behavior, prompt-quality issues, or improvement ideas — file them via `sf_self_report` before sealing the unit. This is the only way these observations reach forge's backlog and get triaged. Over-reporting is preferred to under-reporting; dedup happens later. Do NOT use this to file bugs in the user's project; only sf-the-tool itself. Do NOT autonomously act on or fix existing backlog entries — your scope is your unit. When done, say: "Milestone {{milestoneId}} validation complete — verdict: ." + +--- + +**After completing this step, output exactly one of these markers to signal state to the harness:** + +- `complete` if verification passed +- `blocked` if you discovered a blocker +- `giving_up` if you've tried multiple approaches without success diff --git a/src/resources/extensions/sf/tests/turn-status-parser.test.ts b/src/resources/extensions/sf/tests/turn-status-parser.test.ts new file mode 100644 index 000000000..b7a274b9e --- /dev/null +++ b/src/resources/extensions/sf/tests/turn-status-parser.test.ts @@ -0,0 +1,306 @@ +/** + * Turn Status Parser Tests (Tier 2.5 Phases 1-2) + * + * Validates that turn_status markers are correctly extracted and parsed + * from agent output. + */ + +import { describe, it, expect } from "vitest"; +import { + extractTurnStatus, + isValidTurnStatus, + describeTurnStatus, + resolveSignalFromStatus, + parseTurnStatusFull, + checkTurnStatusPrompts, +} from "../turn-status-parser.js"; + +describe("Turn Status Parser (Tier 2.5)", () => { + describe("extractTurnStatus", () => { + it("when_complete_marker_present_extracts_status", () => { + const output = "Task done.\n\ncomplete"; + + const result = extractTurnStatus(output); + + expect(result.status).toBe("complete"); + expect(result.markerFound).toBe(true); + }); + + it("when_blocked_marker_present_extracts_status", () => { + const output = "Cannot proceed.\n\nblocked"; + + const result = extractTurnStatus(output); + + expect(result.status).toBe("blocked"); + }); + + it("when_giving_up_marker_present_extracts_status", () => { + const output = + "Tried all approaches.\n\ngiving_up"; + + const result = extractTurnStatus(output); + + expect(result.status).toBe("giving_up"); + }); + + it("when_no_marker_returns_null_status", () => { + const output = "Task completed successfully."; + + const result = extractTurnStatus(output); + + expect(result.status).toBeNull(); + expect(result.markerFound).toBeUndefined(); + }); + + it("removes_marker_from_clean_output", () => { + const output = + "Final summary here.\n\ncomplete"; + + const result = extractTurnStatus(output); + + expect(result.cleanOutput).toBe("Final summary here."); + expect(result.cleanOutput).not.toContain("turn_status"); + }); + + it("handles_whitespace_around_marker", () => { + const output = + "Done.\n\n\ncomplete \n\n"; + + const result = extractTurnStatus(output); + + expect(result.status).toBe("complete"); + expect(result.cleanOutput).toBe("Done."); + }); + + it("is_case_insensitive", () => { + const outputs = [ + "COMPLETE", + "Complete", + "cOmPlEtE", + ]; + + for (const output of outputs) { + const result = extractTurnStatus(output); + expect(result.status).toBe("complete"); + } + }); + + it("handles_null_or_empty_input", () => { + expect(extractTurnStatus(null).status).toBeNull(); + expect(extractTurnStatus("").status).toBeNull(); + expect(extractTurnStatus(undefined).status).toBeNull(); + }); + + it("handles_non_string_input_gracefully", () => { + const result = extractTurnStatus(123); + expect(result.status).toBeNull(); + expect(result.cleanOutput).toBe(123); + }); + + it("finds_marker_only_at_end", () => { + const output = + "Found complete in middle\nmore text here"; + + const result = extractTurnStatus(output); + + // Should still extract (regex doesn't require end-of-string) + expect(result.status).toBe("complete"); + }); + }); + + describe("isValidTurnStatus", () => { + it("accepts_all_valid_statuses", () => { + expect(isValidTurnStatus("complete")).toBe(true); + expect(isValidTurnStatus("blocked")).toBe(true); + expect(isValidTurnStatus("giving_up")).toBe(true); + }); + + it("rejects_invalid_statuses", () => { + expect(isValidTurnStatus("error")).toBe(false); + expect(isValidTurnStatus("failed")).toBe(false); + expect(isValidTurnStatus("unknown")).toBe(false); + }); + + it("is_case_insensitive", () => { + expect(isValidTurnStatus("COMPLETE")).toBe(true); + expect(isValidTurnStatus("Blocked")).toBe(true); + expect(isValidTurnStatus("GIVING_UP")).toBe(true); + }); + + it("handles_null_or_empty", () => { + expect(isValidTurnStatus(null)).toBe(false); + expect(isValidTurnStatus("")).toBe(false); + }); + }); + + describe("describeTurnStatus", () => { + it("provides_human_readable_descriptions", () => { + expect(describeTurnStatus("complete")).toContain("complete"); + expect(describeTurnStatus("blocked")).toContain("blocked"); + expect(describeTurnStatus("giving_up")).toContain("giving"); + }); + + it("handles_invalid_status_gracefully", () => { + const desc = describeTurnStatus("unknown"); + expect(desc).toContain("Unknown"); + }); + }); + + describe("resolveSignalFromStatus", () => { + it("complete_returns_continue_action", () => { + const result = resolveSignalFromStatus("complete"); + + expect(result.action).toBe("continue"); + expect(result.reason).toBeDefined(); + }); + + it("blocked_returns_pause_with_signal_pause", () => { + const result = resolveSignalFromStatus("blocked"); + + expect(result.action).toBe("pause"); + expect(result.signal).toBe("SignalPause"); + }); + + it("giving_up_returns_reassess_with_phase_reassess", () => { + const result = resolveSignalFromStatus("giving_up"); + + expect(result.action).toBe("reassess"); + expect(result.signal).toBe("PhaseReassess"); + }); + + it("null_returns_continue_with_reason", () => { + const result = resolveSignalFromStatus(null); + + expect(result.action).toBe("continue"); + }); + + it("is_case_insensitive", () => { + expect(resolveSignalFromStatus("COMPLETE").action).toBe("continue"); + expect(resolveSignalFromStatus("Blocked").action).toBe("pause"); + expect(resolveSignalFromStatus("GIVING_UP").action).toBe("reassess"); + }); + }); + + describe("parseTurnStatusFull", () => { + it("extracts_and_resolves_complete_marker", () => { + const output = "Done!\n\ncomplete"; + + const result = parseTurnStatusFull(output); + + expect(result.status).toBe("complete"); + expect(result.action).toBe("continue"); + expect(result.cleanOutput).toBe("Done!"); + }); + + it("extracts_and_resolves_blocked_marker", () => { + const output = + "Cannot find file.\n\nblocked"; + + const result = parseTurnStatusFull(output); + + expect(result.status).toBe("blocked"); + expect(result.action).toBe("pause"); + expect(result.signal).toBe("SignalPause"); + }); + + it("extracts_and_resolves_giving_up_marker", () => { + const output = + "Multiple attempts failed.\n\ngiving_up"; + + const result = parseTurnStatusFull(output); + + expect(result.status).toBe("giving_up"); + expect(result.action).toBe("reassess"); + expect(result.signal).toBe("PhaseReassess"); + }); + + it("defaults_to_continue_when_no_marker", () => { + const output = "Completed successfully."; + + const result = parseTurnStatusFull(output); + + expect(result.status).toBeNull(); + expect(result.action).toBe("continue"); + }); + + it("includes_all_relevant_fields", () => { + const output = "Task done.\n\ncomplete"; + + const result = parseTurnStatusFull(output); + + expect(result).toHaveProperty("status"); + expect(result).toHaveProperty("action"); + expect(result).toHaveProperty("cleanOutput"); + expect(result).toHaveProperty("reason"); + }); + }); + + describe("checkTurnStatusPrompts", () => { + it("validates_marker_presence_in_prompts", () => { + // This test is informational; real validation requires file access + const result = checkTurnStatusPrompts( + "/home/mhugo/code/singularity-forge", + ); + + expect(result).toHaveProperty("issues"); + expect(result).toHaveProperty("allGood"); + expect(result).toHaveProperty("promptsChecked"); + }); + }); + + describe("Semantics and Signal Mapping", () => { + it("complete_enables_immediate_transition", () => { + const result = resolveSignalFromStatus("complete"); + + // Should not require pause or reassess + expect(result.signal).toBeUndefined(); + expect(result.action).not.toBe("pause"); + }); + + it("blocked_pauses_for_user_interaction", () => { + const result = resolveSignalFromStatus("blocked"); + + // Should pause, allowing user to intervene + expect(result.action).toBe("pause"); + expect(result.signal).toBe("SignalPause"); + }); + + it("giving_up_triggers_reassessment", () => { + const result = resolveSignalFromStatus("giving_up"); + + // Should trigger phase reassessment + expect(result.action).toBe("reassess"); + expect(result.signal).toBe("PhaseReassess"); + }); + }); + + describe("Integration", () => { + it("full_pipeline_from_output_to_action", () => { + const agentOutputs = [ + { + output: "Task complete.\n\ncomplete", + expectedAction: "continue", + }, + { + output: + "Blocker found.\n\nblocked", + expectedAction: "pause", + }, + { + output: + "Out of ideas.\n\ngiving_up", + expectedAction: "reassess", + }, + { + output: "No marker here", + expectedAction: "continue", + }, + ]; + + for (const test of agentOutputs) { + const result = parseTurnStatusFull(test.output); + expect(result.action).toBe(test.expectedAction); + } + }); + }); +}); diff --git a/src/resources/extensions/sf/turn-status-parser.js b/src/resources/extensions/sf/turn-status-parser.js new file mode 100644 index 000000000..b34412865 --- /dev/null +++ b/src/resources/extensions/sf/turn-status-parser.js @@ -0,0 +1,191 @@ +/** + * Turn Status Parser (Tier 2.5 Phase 2) + * + * Purpose: extract turn_status markers from agent output to detect semantic state. + * Allows agents to signal blocked/giving_up without requiring timeout or error detection. + * + * Consumer: auto.js, dispatch loop, or harness entry point where agent output is processed. + */ + +import { existsSync, readFileSync } from "node:fs"; +import { join } from "node:path"; + +/** + * Extract turn_status marker from end of agent output. + * Looks for `complete|blocked|giving_up` at end of response. + * + * @param {string} output - Full agent output text + * @returns {object} - { status: "complete"|"blocked"|"giving_up"|null, cleanOutput: string } + * + * Purpose: harness can detect semantic state without waiting for timeout or errors. + * Consumer: dispatch loop after agent response collected. + */ +export function extractTurnStatus(output) { + if (!output || typeof output !== "string") { + return { status: null, cleanOutput: output }; + } + + // Look for marker at end of output (allow whitespace) + const markerRegex = /(complete|blocked|giving_up)<\/turn_status>/i; + + const match = output.match(markerRegex); + + if (!match) { + return { status: null, cleanOutput: output }; + } + + const status = match[1].toLowerCase(); + + // Remove marker from output for display + const cleanOutput = output.replace(markerRegex, "").trimEnd(); + + return { + status, + cleanOutput, + markerFound: true, + }; +} + +/** + * Validate turn_status value. + * Returns true if value is one of the three allowed states. + * + * @param {string} status - Status to validate + * @returns {boolean} + */ +export function isValidTurnStatus(status) { + return ["complete", "blocked", "giving_up"].includes( + String(status).toLowerCase(), + ); +} + +/** + * Describe turn_status semantics for logging/debugging. + * + * @param {string} status - Status value + * @returns {string} - Human-readable description + */ +export function describeTurnStatus(status) { + switch (String(status).toLowerCase()) { + case "complete": + return "Task complete—agent verified and finished"; + + case "blocked": + return "Task blocked—discovered prerequisite or upstream failure"; + + case "giving_up": + return "Agent giving up—tried multiple approaches without success"; + + default: + return "Unknown status"; + } +} + +/** + * Parse turn_status and determine if unit should transition or pause. + * Implements Tier 2.5 signal logic for dispatch harness. + * + * @param {string} turnStatus - Status from extractTurnStatus() + * @returns {object} - { action: "continue"|"pause"|"reassess", signal?: string } + * + * Action meanings: + * - "continue": normal completion path (status=complete) + * - "pause": pause unit and wait for user (status=blocked → SignalPause) + * - "reassess": transition to phase reassessment (status=giving_up → PhaseReassess) + */ +export function resolveSignalFromStatus(turnStatus) { + switch (String(turnStatus).toLowerCase()) { + case "complete": + return { + action: "continue", + reason: "Agent marked task complete", + }; + + case "blocked": + return { + action: "pause", + signal: "SignalPause", + reason: "Agent discovered blocker—pausing for user", + }; + + case "giving_up": + return { + action: "reassess", + signal: "PhaseReassess", + reason: "Agent giving up—reassessing phase strategy", + }; + + default: + return { + action: "continue", + reason: "No turn_status marker detected—using default completion path", + }; + } +} + +/** + * End-to-end parse: extract status from output and resolve action. + * Convenience wrapper combining extractTurnStatus() and resolveSignalFromStatus(). + * + * @param {string} output - Full agent output + * @returns {object} - { status, action, signal, cleanOutput, markerFound } + */ +export function parseTurnStatusFull(output) { + const extracted = extractTurnStatus(output); + const resolved = extracted.status + ? resolveSignalFromStatus(extracted.status) + : { action: "continue", reason: "No marker found" }; + + return { + ...extracted, + ...resolved, + }; +} + +/** + * Create a doctor check for turn_status usage in prompts. + * Validates that all executable prompts have the marker template. + * + * @param {string} sfRoot - SF root directory + * @returns {object} - { issues: [], allGood: boolean } + */ +export function checkTurnStatusPrompts(sfRoot) { + const promptsDir = join(sfRoot, "src/resources/extensions/sf/prompts"); + const issues = []; + + if (!existsSync(promptsDir)) { + return { issues: ["prompts directory not found"], allGood: false }; + } + + const executivePrompts = [ + "execute-task.md", + "complete-slice.md", + "research-slice.md", + "plan-slice.md", + "research-milestone.md", + "plan-milestone.md", + ]; + + for (const prompt of executivePrompts) { + const promptPath = join(promptsDir, prompt); + + if (!existsSync(promptPath)) { + issues.push(`Missing prompt: ${prompt}`); + continue; + } + + const content = readFileSync(promptPath, "utf8"); + + if (!content.includes("")) { + issues.push( + `Prompt ${prompt} missing turn_status marker template`, + ); + } + } + + return { + issues, + allGood: issues.length === 0, + promptsChecked: executivePrompts.length, + }; +}