test: cover adaptive uok circuit breaker
This commit is contained in:
parent
856ce4d530
commit
8f5f33611a
5 changed files with 435 additions and 293 deletions
|
|
@ -7,8 +7,8 @@
|
|||
* Unpark: Removes the PARKED.md marker. The milestone resumes normal state
|
||||
* derivation (active/pending depending on position and dependencies).
|
||||
*
|
||||
* Discard: Permanently removes the milestone directory. Also prunes
|
||||
* QUEUE-ORDER.json if the discarded milestone was in it.
|
||||
* Discard: Permanently removes the milestone directory. Also prunes the
|
||||
* DB-backed queue order, with legacy JSON fallback when SQLite is unavailable.
|
||||
*/
|
||||
import {
|
||||
existsSync,
|
||||
|
|
@ -108,7 +108,7 @@ export function unparkMilestone(basePath, milestoneId) {
|
|||
// ─── Discard ───────────────────────────────────────────────────────────────
|
||||
/**
|
||||
* Discard a milestone — permanently removes the milestone directory and
|
||||
* prunes it from QUEUE-ORDER.json if present.
|
||||
* prunes it from the queue order if present.
|
||||
* Returns true if successfully discarded, false if milestone not found.
|
||||
*/
|
||||
export function discardMilestone(basePath, milestoneId) {
|
||||
|
|
@ -126,7 +126,7 @@ export function discardMilestone(basePath, milestoneId) {
|
|||
);
|
||||
}
|
||||
rmSync(mDir, { recursive: true, force: true });
|
||||
// Prune from queue order if present
|
||||
// Prune from queue order if present.
|
||||
const order = loadQueueOrder(basePath);
|
||||
if (order && order.includes(milestoneId)) {
|
||||
saveQueueOrder(
|
||||
|
|
|
|||
|
|
@ -16,17 +16,17 @@ You are a project reorganization assistant for a SF (Singularity Forge) project.
|
|||
|
||||
## Supported Operations
|
||||
|
||||
<!-- NOTE: Park, unpark, reorder, discard, and dependency-update operations are intentionally
|
||||
file-based. No sf_* tool API exists for these milestone-lifecycle mutations yet.
|
||||
The single-writer DB tools (sf_plan_milestone, sf_complete_milestone, etc.) own
|
||||
create and complete; queue management is file-driven until tool support is added. -->
|
||||
<!-- NOTE: Park, unpark, discard, and dependency-update operations still use
|
||||
milestone lifecycle files until dedicated tool APIs exist. Queue order is
|
||||
DB-backed through SF's queue-order helper when SQLite is available; the
|
||||
legacy `.sf/QUEUE-ORDER.json` file is fallback-only. -->
|
||||
|
||||
### Reorder milestones
|
||||
Change execution order of pending/active milestones. Write `.sf/QUEUE-ORDER.json`:
|
||||
```json
|
||||
{ "order": ["M003", "M001", "M002"], "updatedAt": "<ISO timestamp>" }
|
||||
```
|
||||
Only include non-complete milestone IDs. Validate dependency constraints before saving.
|
||||
Change execution order of pending/active milestones through SF's queue-order
|
||||
mechanism. Runtime stores order in SQLite `milestones.sequence` when the DB is
|
||||
available. Only use `.sf/QUEUE-ORDER.json` for explicit legacy fallback recovery.
|
||||
Only include non-complete milestone IDs. Validate dependency constraints before
|
||||
saving.
|
||||
|
||||
### Park a milestone
|
||||
Temporarily shelve a milestone (reversible). Create a `{ID}-PARKED.md` file in the milestone directory:
|
||||
|
|
@ -56,12 +56,13 @@ Skipped slices are treated as closed by the state machine (like "complete" but d
|
|||
**CRITICAL — Non-bypassable gate:** Skipping a slice is a permanent DB operation. You MUST confirm with the user before calling `sf_skip_slice`. If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed without explicit approval.
|
||||
|
||||
### Discard a milestone
|
||||
**Permanently** delete a milestone directory and prune it from QUEUE-ORDER.json.
|
||||
**Permanently** delete a milestone directory and prune it from the DB-backed
|
||||
queue order.
|
||||
|
||||
**CRITICAL — Non-bypassable gate:** Discarding is irreversible. You MUST confirm with the user before discarding. Warn explicitly if the milestone has completed work. If the user does not respond or gives an ambiguous answer, you MUST re-ask — never rationalize past the block. A missing confirmation is a "do not discard."
|
||||
|
||||
### Add a new milestone
|
||||
Use the `sf_milestone_generate_id` tool to get the next ID, then call `sf_summary_save` with `milestone_id: {ID}`, `artifact_type: "CONTEXT"`, and the scope/goals/success criteria as `content` — the tool writes the context file to disk and persists to DB. Update QUEUE-ORDER.json to place it at the desired position.
|
||||
Use the `sf_milestone_generate_id` tool to get the next ID, then call `sf_summary_save` with `milestone_id: {ID}`, `artifact_type: "CONTEXT"`, and the scope/goals/success criteria as `content` — the tool writes the context file to disk and persists to DB. Update the DB-backed queue order to place it at the desired position.
|
||||
|
||||
### Update dependencies
|
||||
Edit `depends_on` in the YAML frontmatter of a milestone's `{ID}-CONTEXT.md` file. For example:
|
||||
|
|
@ -81,7 +82,7 @@ If a proposed order would violate constraints, explain the issue and suggest alt
|
|||
|
||||
## After Each Change
|
||||
|
||||
1. Execute the change (write/delete files, update QUEUE-ORDER.json)
|
||||
1. Execute the change (write/delete files, update DB-backed queue order when order changes)
|
||||
2. Show the updated milestone order
|
||||
3. Note if the active milestone changed as a result
|
||||
4. Ask if there's anything else to adjust
|
||||
|
|
@ -91,7 +92,7 @@ If a proposed order would violate constraints, explain the issue and suggest alt
|
|||
- Do NOT modify completed milestones — they're done
|
||||
- Do NOT park completed milestones — it would corrupt dependency satisfaction
|
||||
- Park is preferred over discard when a milestone has any completed work
|
||||
- Always persist queue order changes to `.sf/QUEUE-ORDER.json`
|
||||
- Always persist queue order changes through SF's queue-order mechanism; do not hand-edit `.sf/QUEUE-ORDER.json` unless explicitly operating in DB-unavailable legacy fallback
|
||||
- {{commitInstruction}}
|
||||
|
||||
### Report sf-internal observations
|
||||
|
|
|
|||
|
|
@ -7,53 +7,53 @@
|
|||
* Consumer: QA and developers verifying turn_status system behavior.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
extractTurnStatus,
|
||||
resolveSignalFromStatus,
|
||||
parseTurnStatusFull,
|
||||
isValidTurnStatus,
|
||||
describeTurnStatus,
|
||||
checkTurnStatusPrompts,
|
||||
checkTurnStatusPrompts,
|
||||
describeTurnStatus,
|
||||
extractTurnStatus,
|
||||
isValidTurnStatus,
|
||||
parseTurnStatusFull,
|
||||
resolveSignalFromStatus,
|
||||
} from "../turn-status-parser.js";
|
||||
|
||||
describe("Turn Status Integration Tests (Tier 2.5)", () => {
|
||||
describe("End-to-End Signal Pipeline", () => {
|
||||
it("complete_marker_produces_continue_action", () => {
|
||||
const agentOutput = `
|
||||
describe("End-to-End Signal Pipeline", () => {
|
||||
it("complete_marker_produces_continue_action", () => {
|
||||
const agentOutput = `
|
||||
I have successfully completed the task.
|
||||
All tests pass, code is reviewed, ready to merge.
|
||||
|
||||
<turn_status>complete</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.action).toBe("continue");
|
||||
expect(result.signal).toBeUndefined();
|
||||
expect(result.markerFound).toBe(true);
|
||||
expect(result.cleanOutput).not.toContain("<turn_status>");
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.action).toBe("continue");
|
||||
expect(result.signal).toBeUndefined();
|
||||
expect(result.markerFound).toBe(true);
|
||||
expect(result.cleanOutput).not.toContain("<turn_status>");
|
||||
});
|
||||
|
||||
it("blocked_marker_produces_pause_signal", () => {
|
||||
const agentOutput = `
|
||||
it("blocked_marker_produces_pause_signal", () => {
|
||||
const agentOutput = `
|
||||
I discovered that the database schema is not documented.
|
||||
I need this information to proceed with the implementation.
|
||||
Pausing here pending user input.
|
||||
|
||||
<turn_status>blocked</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
|
||||
expect(result.status).toBe("blocked");
|
||||
expect(result.action).toBe("pause");
|
||||
expect(result.signal).toBe("SignalPause");
|
||||
expect(result.markerFound).toBe(true);
|
||||
expect(result.reason).toContain("blocker");
|
||||
});
|
||||
expect(result.status).toBe("blocked");
|
||||
expect(result.action).toBe("pause");
|
||||
expect(result.signal).toBe("SignalPause");
|
||||
expect(result.markerFound).toBe(true);
|
||||
expect(result.reason).toContain("blocker");
|
||||
});
|
||||
|
||||
it("giving_up_marker_produces_reassess_signal", () => {
|
||||
const agentOutput = `
|
||||
it("giving_up_marker_produces_reassess_signal", () => {
|
||||
const agentOutput = `
|
||||
I have tried multiple approaches:
|
||||
1. Optimization A - didn't work
|
||||
2. Optimization B - made it worse
|
||||
|
|
@ -64,90 +64,90 @@ Recommending phase reassessment.
|
|||
|
||||
<turn_status>giving_up</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
|
||||
expect(result.status).toBe("giving_up");
|
||||
expect(result.action).toBe("reassess");
|
||||
expect(result.signal).toBe("PhaseReassess");
|
||||
expect(result.markerFound).toBe(true);
|
||||
expect(result.reason).toContain("giving up");
|
||||
});
|
||||
expect(result.status).toBe("giving_up");
|
||||
expect(result.action).toBe("reassess");
|
||||
expect(result.signal).toBe("PhaseReassess");
|
||||
expect(result.markerFound).toBe(true);
|
||||
expect(result.reason).toContain("giving up");
|
||||
});
|
||||
|
||||
it("no_marker_defaults_to_continue", () => {
|
||||
const agentOutput = `
|
||||
it("no_marker_defaults_to_continue", () => {
|
||||
const agentOutput = `
|
||||
I have successfully completed the task.
|
||||
All tests pass, code is reviewed, ready to merge.
|
||||
`;
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
|
||||
expect(result.status).toBeNull();
|
||||
expect(result.action).toBe("continue");
|
||||
expect(result.markerFound).toBeUndefined();
|
||||
expect(result.cleanOutput).toBe(agentOutput);
|
||||
});
|
||||
});
|
||||
expect(result.status).toBeNull();
|
||||
expect(result.action).toBe("continue");
|
||||
expect(result.markerFound).toBeUndefined();
|
||||
expect(result.cleanOutput).toBe(agentOutput);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Marker Placement and Format", () => {
|
||||
it("marker_on_separate_line_at_end", () => {
|
||||
const output = `Task complete.
|
||||
describe("Marker Placement and Format", () => {
|
||||
it("marker_on_separate_line_at_end", () => {
|
||||
const output = `Task complete.
|
||||
|
||||
<turn_status>complete</turn_status>`;
|
||||
const result = extractTurnStatus(output);
|
||||
const result = extractTurnStatus(output);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput).toBe("Task complete.");
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput).toBe("Task complete.");
|
||||
});
|
||||
|
||||
it("marker_with_trailing_whitespace", () => {
|
||||
const output = `Task complete.
|
||||
it("marker_with_trailing_whitespace", () => {
|
||||
const output = `Task complete.
|
||||
<turn_status>complete</turn_status>
|
||||
`;
|
||||
const result = extractTurnStatus(output);
|
||||
const result = extractTurnStatus(output);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
});
|
||||
|
||||
it("marker_case_insensitive", () => {
|
||||
const outputs = [
|
||||
"<turn_status>COMPLETE</turn_status>",
|
||||
"<turn_status>Complete</turn_status>",
|
||||
"<turn_status>CoMpLeTe</turn_status>",
|
||||
];
|
||||
it("marker_case_insensitive", () => {
|
||||
const outputs = [
|
||||
"<turn_status>COMPLETE</turn_status>",
|
||||
"<turn_status>Complete</turn_status>",
|
||||
"<turn_status>CoMpLeTe</turn_status>",
|
||||
];
|
||||
|
||||
for (const output of outputs) {
|
||||
const result = extractTurnStatus(output);
|
||||
expect(result.status).toBe("complete");
|
||||
}
|
||||
});
|
||||
for (const output of outputs) {
|
||||
const result = extractTurnStatus(output);
|
||||
expect(result.status).toBe("complete");
|
||||
}
|
||||
});
|
||||
|
||||
it("marker_not_at_end_ignored", () => {
|
||||
const output = `<turn_status>complete</turn_status>
|
||||
it("marker_not_at_end_ignored", () => {
|
||||
const output = `<turn_status>complete</turn_status>
|
||||
|
||||
Additional notes here that come after marker.`;
|
||||
const result = extractTurnStatus(output);
|
||||
const result = extractTurnStatus(output);
|
||||
|
||||
// Marker not at end, so should be null
|
||||
expect(result.status).toBeNull();
|
||||
});
|
||||
// Marker not at end, so should be null
|
||||
expect(result.status).toBeNull();
|
||||
});
|
||||
|
||||
it("malformed_marker_ignored", () => {
|
||||
const malformed = [
|
||||
"<turn_status>complete",
|
||||
"turn_status>complete</turn_status>",
|
||||
"<turn_status>complete></turn_status>",
|
||||
"<turn_status>invalid_status</turn_status>",
|
||||
];
|
||||
it("malformed_marker_ignored", () => {
|
||||
const malformed = [
|
||||
"<turn_status>complete",
|
||||
"turn_status>complete</turn_status>",
|
||||
"<turn_status>complete></turn_status>",
|
||||
"<turn_status>invalid_status</turn_status>",
|
||||
];
|
||||
|
||||
for (const output of malformed) {
|
||||
const result = extractTurnStatus(output);
|
||||
expect(result.status).toBeNull();
|
||||
}
|
||||
});
|
||||
});
|
||||
for (const output of malformed) {
|
||||
const result = extractTurnStatus(output);
|
||||
expect(result.status).toBeNull();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("Multi-Block Agent Output", () => {
|
||||
it("marker_with_code_blocks_and_messages", () => {
|
||||
const output = `
|
||||
describe("Multi-Block Agent Output", () => {
|
||||
it("marker_with_code_blocks_and_messages", () => {
|
||||
const output = `
|
||||
I implemented the feature. Here's the code:
|
||||
|
||||
\`\`\`typescript
|
||||
|
|
@ -160,15 +160,15 @@ Testing completed successfully. Ready for review.
|
|||
|
||||
<turn_status>complete</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(output);
|
||||
const result = parseTurnStatusFull(output);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput).toContain("function example");
|
||||
expect(result.cleanOutput).not.toContain("<turn_status>");
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput).toContain("function example");
|
||||
expect(result.cleanOutput).not.toContain("<turn_status>");
|
||||
});
|
||||
|
||||
it("marker_with_json_output", () => {
|
||||
const output = `
|
||||
it("marker_with_json_output", () => {
|
||||
const output = `
|
||||
Analysis results:
|
||||
\`\`\`json
|
||||
{"status": "ok", "findings": []}
|
||||
|
|
@ -178,14 +178,14 @@ Analysis completed. No issues found.
|
|||
|
||||
<turn_status>complete</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(output);
|
||||
const result = parseTurnStatusFull(output);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput).toContain('"status": "ok"');
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput).toContain('"status": "ok"');
|
||||
});
|
||||
|
||||
it("marker_with_multiline_tool_output", () => {
|
||||
const output = `
|
||||
it("marker_with_multiline_tool_output", () => {
|
||||
const output = `
|
||||
Tool execution results:
|
||||
===== OUTPUT START =====
|
||||
Line 1
|
||||
|
|
@ -197,167 +197,165 @@ Execution successful.
|
|||
|
||||
<turn_status>complete</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(output);
|
||||
const result = parseTurnStatusFull(output);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput).toContain("Line 1");
|
||||
});
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput).toContain("Line 1");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Error Handling and Edge Cases", () => {
|
||||
it("null_or_empty_input", () => {
|
||||
const inputs = [null, undefined, "", " "];
|
||||
describe("Error Handling and Edge Cases", () => {
|
||||
it("null_or_empty_input", () => {
|
||||
const inputs = [null, undefined, "", " "];
|
||||
|
||||
for (const input of inputs) {
|
||||
const result = extractTurnStatus(input as any);
|
||||
expect(result.status).toBeNull();
|
||||
}
|
||||
});
|
||||
for (const input of inputs) {
|
||||
const result = extractTurnStatus(input as any);
|
||||
expect(result.status).toBeNull();
|
||||
}
|
||||
});
|
||||
|
||||
it("very_long_output_with_marker", () => {
|
||||
const longOutput = "x".repeat(100000);
|
||||
const output = `${longOutput}
|
||||
it("very_long_output_with_marker", () => {
|
||||
const longOutput = "x".repeat(100000);
|
||||
const output = `${longOutput}
|
||||
|
||||
<turn_status>complete</turn_status>`;
|
||||
const result = extractTurnStatus(output);
|
||||
const result = extractTurnStatus(output);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput.length).toBe(100000); // long string + newline
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.cleanOutput.length).toBe(100000); // long string + newline
|
||||
});
|
||||
|
||||
it("multiple_markers_uses_last_one", () => {
|
||||
// Regex matches last occurrence, so first marker is in content, last is at end
|
||||
const output = `First attempt: <turn_status>blocked</turn_status> (old)
|
||||
it("multiple_markers_uses_last_one", () => {
|
||||
// Regex matches last occurrence, so first marker is in content, last is at end
|
||||
const output = `First attempt: <turn_status>blocked</turn_status> (old)
|
||||
|
||||
Second attempt completed.
|
||||
|
||||
<turn_status>complete</turn_status>`;
|
||||
const result = extractTurnStatus(output);
|
||||
const result = extractTurnStatus(output);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
});
|
||||
|
||||
it("non_string_input_graceful", () => {
|
||||
const inputs = [123, { text: "hello" }, ["array"], true];
|
||||
it("non_string_input_graceful", () => {
|
||||
const inputs = [123, { text: "hello" }, ["array"], true];
|
||||
|
||||
for (const input of inputs) {
|
||||
const result = extractTurnStatus(input as any);
|
||||
expect(result.status).toBeNull();
|
||||
expect(result.cleanOutput).toBe(input);
|
||||
}
|
||||
});
|
||||
});
|
||||
for (const input of inputs) {
|
||||
const result = extractTurnStatus(input as any);
|
||||
expect(result.status).toBeNull();
|
||||
expect(result.cleanOutput).toBe(input);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("Signal Resolution Semantics", () => {
|
||||
it("complete_has_no_special_signal", () => {
|
||||
const result = resolveSignalFromStatus("complete");
|
||||
describe("Signal Resolution Semantics", () => {
|
||||
it("complete_has_no_special_signal", () => {
|
||||
const result = resolveSignalFromStatus("complete");
|
||||
|
||||
expect(result.action).toBe("continue");
|
||||
expect(result.signal).toBeUndefined();
|
||||
});
|
||||
expect(result.action).toBe("continue");
|
||||
expect(result.signal).toBeUndefined();
|
||||
});
|
||||
|
||||
it("blocked_sets_signal_pause", () => {
|
||||
const result = resolveSignalFromStatus("blocked");
|
||||
it("blocked_sets_signal_pause", () => {
|
||||
const result = resolveSignalFromStatus("blocked");
|
||||
|
||||
expect(result.action).toBe("pause");
|
||||
expect(result.signal).toBe("SignalPause");
|
||||
expect(result.reason).toContain("blocker");
|
||||
});
|
||||
expect(result.action).toBe("pause");
|
||||
expect(result.signal).toBe("SignalPause");
|
||||
expect(result.reason).toContain("blocker");
|
||||
});
|
||||
|
||||
it("giving_up_sets_signal_reassess", () => {
|
||||
const result = resolveSignalFromStatus("giving_up");
|
||||
it("giving_up_sets_signal_reassess", () => {
|
||||
const result = resolveSignalFromStatus("giving_up");
|
||||
|
||||
expect(result.action).toBe("reassess");
|
||||
expect(result.signal).toBe("PhaseReassess");
|
||||
expect(result.reason).toContain("giving up");
|
||||
});
|
||||
expect(result.action).toBe("reassess");
|
||||
expect(result.signal).toBe("PhaseReassess");
|
||||
expect(result.reason).toContain("giving up");
|
||||
});
|
||||
|
||||
it("null_status_defaults_to_continue", () => {
|
||||
const result = resolveSignalFromStatus(null);
|
||||
it("null_status_defaults_to_continue", () => {
|
||||
const result = resolveSignalFromStatus(null);
|
||||
|
||||
expect(result.action).toBe("continue");
|
||||
});
|
||||
expect(result.action).toBe("continue");
|
||||
});
|
||||
|
||||
it("unknown_status_defaults_to_continue", () => {
|
||||
const result = resolveSignalFromStatus("unknown_status");
|
||||
it("unknown_status_defaults_to_continue", () => {
|
||||
const result = resolveSignalFromStatus("unknown_status");
|
||||
|
||||
expect(result.action).toBe("continue");
|
||||
});
|
||||
});
|
||||
expect(result.action).toBe("continue");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Validation and Introspection", () => {
|
||||
it("isValidTurnStatus_accepts_all_three", () => {
|
||||
expect(isValidTurnStatus("complete")).toBe(true);
|
||||
expect(isValidTurnStatus("blocked")).toBe(true);
|
||||
expect(isValidTurnStatus("giving_up")).toBe(true);
|
||||
});
|
||||
describe("Validation and Introspection", () => {
|
||||
it("isValidTurnStatus_accepts_all_three", () => {
|
||||
expect(isValidTurnStatus("complete")).toBe(true);
|
||||
expect(isValidTurnStatus("blocked")).toBe(true);
|
||||
expect(isValidTurnStatus("giving_up")).toBe(true);
|
||||
});
|
||||
|
||||
it("isValidTurnStatus_case_insensitive", () => {
|
||||
expect(isValidTurnStatus("COMPLETE")).toBe(true);
|
||||
expect(isValidTurnStatus("Blocked")).toBe(true);
|
||||
expect(isValidTurnStatus("GIVING_UP")).toBe(true);
|
||||
});
|
||||
it("isValidTurnStatus_case_insensitive", () => {
|
||||
expect(isValidTurnStatus("COMPLETE")).toBe(true);
|
||||
expect(isValidTurnStatus("Blocked")).toBe(true);
|
||||
expect(isValidTurnStatus("GIVING_UP")).toBe(true);
|
||||
});
|
||||
|
||||
it("isValidTurnStatus_rejects_invalid", () => {
|
||||
const invalid = [
|
||||
"pending",
|
||||
"running",
|
||||
"error",
|
||||
"paused",
|
||||
"unknown",
|
||||
"",
|
||||
null,
|
||||
undefined,
|
||||
];
|
||||
it("isValidTurnStatus_rejects_invalid", () => {
|
||||
const invalid = [
|
||||
"pending",
|
||||
"running",
|
||||
"error",
|
||||
"paused",
|
||||
"unknown",
|
||||
"",
|
||||
null,
|
||||
undefined,
|
||||
];
|
||||
|
||||
for (const status of invalid) {
|
||||
expect(isValidTurnStatus(status)).toBe(false);
|
||||
}
|
||||
});
|
||||
for (const status of invalid) {
|
||||
expect(isValidTurnStatus(status)).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it("describeTurnStatus_provides_human_readable", () => {
|
||||
expect(describeTurnStatus("complete")).toContain(
|
||||
"Task complete",
|
||||
);
|
||||
expect(describeTurnStatus("blocked")).toContain("blocked");
|
||||
expect(describeTurnStatus("giving_up")).toContain("giving up");
|
||||
});
|
||||
it("describeTurnStatus_provides_human_readable", () => {
|
||||
expect(describeTurnStatus("complete")).toContain("Task complete");
|
||||
expect(describeTurnStatus("blocked")).toContain("blocked");
|
||||
expect(describeTurnStatus("giving_up")).toContain("giving up");
|
||||
});
|
||||
|
||||
it("describeTurnStatus_handles_invalid", () => {
|
||||
const desc = describeTurnStatus("unknown");
|
||||
expect(desc).toContain("Unknown");
|
||||
});
|
||||
});
|
||||
it("describeTurnStatus_handles_invalid", () => {
|
||||
const desc = describeTurnStatus("unknown");
|
||||
expect(desc).toContain("Unknown");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Doctor Check Integration", () => {
|
||||
it("checkTurnStatusPrompts_validates_marker_coverage", () => {
|
||||
// This test uses a real prompt directory from the repo
|
||||
const result = checkTurnStatusPrompts(process.cwd());
|
||||
describe("Doctor Check Integration", () => {
|
||||
it("checkTurnStatusPrompts_validates_marker_coverage", () => {
|
||||
// This test uses a real prompt directory from the repo
|
||||
const result = checkTurnStatusPrompts(process.cwd());
|
||||
|
||||
expect(result).toHaveProperty("issues");
|
||||
expect(result).toHaveProperty("allGood");
|
||||
expect(result).toHaveProperty("promptsChecked");
|
||||
expect(result).toHaveProperty("issues");
|
||||
expect(result).toHaveProperty("allGood");
|
||||
expect(result).toHaveProperty("promptsChecked");
|
||||
|
||||
// If prompts are in place, this should pass
|
||||
if (result.allGood) {
|
||||
expect(result.issues.length).toBe(0);
|
||||
expect(result.promptsChecked).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
// If prompts are in place, this should pass
|
||||
if (result.allGood) {
|
||||
expect(result.issues.length).toBe(0);
|
||||
expect(result.promptsChecked).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
|
||||
it("checkTurnStatusPrompts_detects_missing_markers", () => {
|
||||
// Create a temporary directory without markers
|
||||
// (This would require filesystem operations; simplified for illustration)
|
||||
const result = checkTurnStatusPrompts(process.cwd());
|
||||
it("checkTurnStatusPrompts_detects_missing_markers", () => {
|
||||
// Create a temporary directory without markers
|
||||
// (This would require filesystem operations; simplified for illustration)
|
||||
const result = checkTurnStatusPrompts(process.cwd());
|
||||
|
||||
expect(result).toHaveProperty("promptsChecked");
|
||||
expect(result.promptsChecked).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
expect(result).toHaveProperty("promptsChecked");
|
||||
expect(result.promptsChecked).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Real-World Scenarios", () => {
|
||||
it("research_slice_complete_scenario", () => {
|
||||
const agentOutput = `
|
||||
describe("Real-World Scenarios", () => {
|
||||
it("research_slice_complete_scenario", () => {
|
||||
const agentOutput = `
|
||||
I researched the topic and found:
|
||||
1. Component architecture: React functional components recommended
|
||||
2. Performance: Memoization for large lists
|
||||
|
|
@ -367,15 +365,15 @@ All research documented in RESEARCH.md.
|
|||
|
||||
<turn_status>complete</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.action).toBe("continue");
|
||||
expect(result.cleanOutput).toContain("Component architecture");
|
||||
});
|
||||
expect(result.status).toBe("complete");
|
||||
expect(result.action).toBe("continue");
|
||||
expect(result.cleanOutput).toContain("Component architecture");
|
||||
});
|
||||
|
||||
it("execute_task_blocked_scenario", () => {
|
||||
const agentOutput = `
|
||||
it("execute_task_blocked_scenario", () => {
|
||||
const agentOutput = `
|
||||
I need to implement the auth system but:
|
||||
- The OAuth app credentials are not configured
|
||||
- The callback URL is not set in the provider dashboard
|
||||
|
|
@ -386,16 +384,16 @@ and provide the API documentation.
|
|||
|
||||
<turn_status>blocked</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
|
||||
expect(result.status).toBe("blocked");
|
||||
expect(result.action).toBe("pause");
|
||||
expect(result.signal).toBe("SignalPause");
|
||||
expect(result.cleanOutput).toContain("OAuth app credentials");
|
||||
});
|
||||
expect(result.status).toBe("blocked");
|
||||
expect(result.action).toBe("pause");
|
||||
expect(result.signal).toBe("SignalPause");
|
||||
expect(result.cleanOutput).toContain("OAuth app credentials");
|
||||
});
|
||||
|
||||
it("complete_slice_giving_up_scenario", () => {
|
||||
const agentOutput = `
|
||||
it("complete_slice_giving_up_scenario", () => {
|
||||
const agentOutput = `
|
||||
I attempted to optimize the query performance but:
|
||||
|
||||
Attempt 1: Index on user_id
|
||||
|
|
@ -416,51 +414,51 @@ I recommend we either accept current performance or expand scope for infrastruct
|
|||
|
||||
<turn_status>giving_up</turn_status>
|
||||
`;
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
const result = parseTurnStatusFull(agentOutput);
|
||||
|
||||
expect(result.status).toBe("giving_up");
|
||||
expect(result.action).toBe("reassess");
|
||||
expect(result.signal).toBe("PhaseReassess");
|
||||
expect(result.reason).toContain("giving up");
|
||||
});
|
||||
});
|
||||
expect(result.status).toBe("giving_up");
|
||||
expect(result.action).toBe("reassess");
|
||||
expect(result.signal).toBe("PhaseReassess");
|
||||
expect(result.reason).toContain("giving up");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Cross-Cutting Concerns", () => {
|
||||
it("parser_is_idempotent", () => {
|
||||
const output = `Task done.
|
||||
describe("Cross-Cutting Concerns", () => {
|
||||
it("parser_is_idempotent", () => {
|
||||
const output = `Task done.
|
||||
|
||||
<turn_status>complete</turn_status>`;
|
||||
const result1 = parseTurnStatusFull(output);
|
||||
const result2 = parseTurnStatusFull(output);
|
||||
const result1 = parseTurnStatusFull(output);
|
||||
const result2 = parseTurnStatusFull(output);
|
||||
|
||||
expect(result1).toEqual(result2);
|
||||
});
|
||||
expect(result1).toEqual(result2);
|
||||
});
|
||||
|
||||
it("signal_resolution_independent_of_output_content", () => {
|
||||
// Both should resolve to the same signal regardless of output content
|
||||
const outputs = [
|
||||
"Error: failed\n<turn_status>blocked</turn_status>",
|
||||
"Success: completed\n<turn_status>blocked</turn_status>",
|
||||
"\n<turn_status>blocked</turn_status>",
|
||||
];
|
||||
it("signal_resolution_independent_of_output_content", () => {
|
||||
// Both should resolve to the same signal regardless of output content
|
||||
const outputs = [
|
||||
"Error: failed\n<turn_status>blocked</turn_status>",
|
||||
"Success: completed\n<turn_status>blocked</turn_status>",
|
||||
"\n<turn_status>blocked</turn_status>",
|
||||
];
|
||||
|
||||
const results = outputs.map(parseTurnStatusFull);
|
||||
const results = outputs.map(parseTurnStatusFull);
|
||||
|
||||
for (const result of results) {
|
||||
expect(result.signal).toBe("SignalPause");
|
||||
expect(result.action).toBe("pause");
|
||||
}
|
||||
});
|
||||
for (const result of results) {
|
||||
expect(result.signal).toBe("SignalPause");
|
||||
expect(result.action).toBe("pause");
|
||||
}
|
||||
});
|
||||
|
||||
it("no_side_effects_on_input", () => {
|
||||
const output = `Task done.
|
||||
it("no_side_effects_on_input", () => {
|
||||
const output = `Task done.
|
||||
|
||||
<turn_status>complete</turn_status>`;
|
||||
const originalOutput = output;
|
||||
const originalOutput = output;
|
||||
|
||||
parseTurnStatusFull(output);
|
||||
parseTurnStatusFull(output);
|
||||
|
||||
expect(output).toBe(originalOutput);
|
||||
});
|
||||
});
|
||||
expect(output).toBe(originalOutput);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -13,9 +13,21 @@ import {
|
|||
} from "../sf-db.js";
|
||||
import { validateGate } from "../uok/contracts.js";
|
||||
import { UokGateRunner } from "../uok/gate-runner.js";
|
||||
import {
|
||||
buildMetricsText,
|
||||
invalidateMetricsCache,
|
||||
} from "../uok/metrics-exposition.js";
|
||||
|
||||
const tmpRoots = [];
|
||||
|
||||
function restoreEnv(name, value) {
|
||||
if (value === undefined) {
|
||||
delete process.env[name];
|
||||
return;
|
||||
}
|
||||
process.env[name] = value;
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
closeDatabase();
|
||||
for (const dir of tmpRoots.splice(0)) {
|
||||
|
|
@ -475,3 +487,119 @@ test("runner_register_when_invalid_gate_throws", () => {
|
|||
assert.throws(() => runner.register({ id: "x", type: "policy" }), /execute/);
|
||||
assert.throws(() => runner.register(null), /object/);
|
||||
});
|
||||
|
||||
// ─── Adaptive cooldown ─────────────────────────────────────────────────────
|
||||
|
||||
test("circuitBreaker_adaptive_cooldown_doubles_on_reopen", async () => {
|
||||
openDatabase(":memory:");
|
||||
const prevThreshold = process.env.SF_CIRCUIT_BREAKER_FAILURE_THRESHOLD;
|
||||
const prevOpenMs = process.env.SF_CIRCUIT_BREAKER_OPEN_DURATION_MS;
|
||||
const prevMaxMs = process.env.SF_CIRCUIT_BREAKER_MAX_OPEN_DURATION_MS;
|
||||
process.env.SF_CIRCUIT_BREAKER_FAILURE_THRESHOLD = "1";
|
||||
process.env.SF_CIRCUIT_BREAKER_OPEN_DURATION_MS = "50";
|
||||
process.env.SF_CIRCUIT_BREAKER_MAX_OPEN_DURATION_MS = "200";
|
||||
|
||||
try {
|
||||
const runner = new UokGateRunner();
|
||||
runner.register({
|
||||
id: "adaptive-cb",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome: "fail",
|
||||
failureClass: "execution",
|
||||
rationale: "no",
|
||||
}),
|
||||
});
|
||||
|
||||
await runner.run("adaptive-cb", makeCtx());
|
||||
|
||||
const r2 = await runner.run("adaptive-cb", makeCtx());
|
||||
assert.ok(
|
||||
r2.rationale.includes("50ms"),
|
||||
`Expected 50ms in rationale, got: ${r2.rationale}`,
|
||||
);
|
||||
|
||||
await new Promise((r) => setTimeout(r, 60));
|
||||
await runner.run("adaptive-cb", makeCtx());
|
||||
const r3 = await runner.run("adaptive-cb", makeCtx());
|
||||
assert.ok(
|
||||
r3.rationale.includes("100ms"),
|
||||
`Expected 100ms in rationale, got: ${r3.rationale}`,
|
||||
);
|
||||
|
||||
await new Promise((r) => setTimeout(r, 110));
|
||||
await runner.run("adaptive-cb", makeCtx());
|
||||
const r4 = await runner.run("adaptive-cb", makeCtx());
|
||||
assert.ok(
|
||||
r4.rationale.includes("200ms"),
|
||||
`Expected 200ms in rationale, got: ${r4.rationale}`,
|
||||
);
|
||||
|
||||
await new Promise((r) => setTimeout(r, 210));
|
||||
await runner.run("adaptive-cb", makeCtx());
|
||||
const r5 = await runner.run("adaptive-cb", makeCtx());
|
||||
assert.ok(
|
||||
r5.rationale.includes("200ms"),
|
||||
`Expected 200ms capped in rationale, got: ${r5.rationale}`,
|
||||
);
|
||||
} finally {
|
||||
restoreEnv("SF_CIRCUIT_BREAKER_FAILURE_THRESHOLD", prevThreshold);
|
||||
restoreEnv("SF_CIRCUIT_BREAKER_OPEN_DURATION_MS", prevOpenMs);
|
||||
restoreEnv("SF_CIRCUIT_BREAKER_MAX_OPEN_DURATION_MS", prevMaxMs);
|
||||
}
|
||||
});
|
||||
|
||||
test("circuitBreaker_per_gate_thresholds_respected", async () => {
|
||||
openDatabase(":memory:");
|
||||
const prevA = process.env.SF_CIRCUIT_BREAKER_GATE_A_THRESHOLD;
|
||||
const prevB = process.env.SF_CIRCUIT_BREAKER_GATE_B_THRESHOLD;
|
||||
process.env.SF_CIRCUIT_BREAKER_GATE_A_THRESHOLD = "2";
|
||||
process.env.SF_CIRCUIT_BREAKER_GATE_B_THRESHOLD = "5";
|
||||
|
||||
try {
|
||||
const runner = new UokGateRunner();
|
||||
runner.register({
|
||||
id: "gate-a",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome: "fail",
|
||||
failureClass: "execution",
|
||||
rationale: "no",
|
||||
}),
|
||||
});
|
||||
runner.register({
|
||||
id: "gate-b",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome: "fail",
|
||||
failureClass: "execution",
|
||||
rationale: "no",
|
||||
}),
|
||||
});
|
||||
|
||||
// gate-a threshold=2: open after 2 failures
|
||||
await runner.run("gate-a", makeCtx());
|
||||
await runner.run("gate-a", makeCtx());
|
||||
const cbA = getGateCircuitBreaker("gate-a");
|
||||
assert.equal(cbA.state, "open");
|
||||
|
||||
// gate-b threshold=5: still closed after 2 failures
|
||||
await runner.run("gate-b", makeCtx());
|
||||
await runner.run("gate-b", makeCtx());
|
||||
const cbB = getGateCircuitBreaker("gate-b");
|
||||
assert.equal(cbB.state, "closed");
|
||||
} finally {
|
||||
restoreEnv("SF_CIRCUIT_BREAKER_GATE_A_THRESHOLD", prevA);
|
||||
restoreEnv("SF_CIRCUIT_BREAKER_GATE_B_THRESHOLD", prevB);
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Metrics caching ───────────────────────────────────────────────────────
|
||||
|
||||
test("metrics_cache_returns_same_text_within_ttl", async () => {
|
||||
openDatabase(":memory:");
|
||||
invalidateMetricsCache();
|
||||
const first = buildMetricsText();
|
||||
const second = buildMetricsText();
|
||||
assert.equal(first, second);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -60,6 +60,10 @@ function computeCooldownMs(breaker, thresholds) {
|
|||
);
|
||||
}
|
||||
|
||||
function formatCooldownMs(ms) {
|
||||
return ms >= 1000 ? `${Math.round(ms / 1000)}s` : `${ms}ms`;
|
||||
}
|
||||
|
||||
function nowIso() {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
|
@ -184,7 +188,7 @@ export class UokGateRunner {
|
|||
}
|
||||
return {
|
||||
blocked: true,
|
||||
reason: `Circuit breaker OPEN for ${gateId} (failure streak ${breaker.failureStreak}, cooldown ${Math.round(cooldownMs / 1000)}s). Cooldown until ${new Date(openedAt + cooldownMs).toISOString()}.`,
|
||||
reason: `Circuit breaker OPEN for ${gateId} (failure streak ${breaker.failureStreak}, cooldown ${formatCooldownMs(cooldownMs)}). Cooldown until ${new Date(openedAt + cooldownMs).toISOString()}.`,
|
||||
};
|
||||
}
|
||||
if (breaker.state === "half-open") {
|
||||
|
|
@ -450,6 +454,17 @@ export class UokGateRunner {
|
|||
// Update circuit breaker based on final outcome
|
||||
const succeeded = final?.outcome === "pass";
|
||||
this._updateCircuitBreaker(id, succeeded);
|
||||
if (final && !succeeded && breaker.state === "half-open") {
|
||||
const reopened = getGateCircuitBreaker(id);
|
||||
const cooldownMs = computeCooldownMs(
|
||||
reopened,
|
||||
resolveCircuitBreakerThresholds(id),
|
||||
);
|
||||
final = {
|
||||
...final,
|
||||
rationale: `${final.rationale}; Circuit breaker reopened for ${id} (failure streak ${reopened.failureStreak}, cooldown ${formatCooldownMs(cooldownMs)}).`,
|
||||
};
|
||||
}
|
||||
|
||||
return (
|
||||
final ?? {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue