merge: resolve conflict with main's assert.equal fix in doctor tests
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
020b4a876e
11 changed files with 266 additions and 65 deletions
6
package-lock.json
generated
6
package-lock.json
generated
|
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "gsd-pi",
|
||||
"version": "2.46.1",
|
||||
"version": "2.49.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "gsd-pi",
|
||||
"version": "2.46.1",
|
||||
"version": "2.49.0",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"workspaces": [
|
||||
|
|
@ -9191,7 +9191,7 @@
|
|||
},
|
||||
"packages/pi-coding-agent": {
|
||||
"name": "@gsd/pi-coding-agent",
|
||||
"version": "2.46.1",
|
||||
"version": "2.49.0",
|
||||
"dependencies": {
|
||||
"@mariozechner/jiti": "^2.6.2",
|
||||
"@silvia-odwyer/photon-node": "^0.3.4",
|
||||
|
|
|
|||
|
|
@ -40,9 +40,18 @@ export function handleExtensionUIRequest(
|
|||
let response: Record<string, unknown>
|
||||
|
||||
switch (method) {
|
||||
case 'select':
|
||||
response = { type: 'extension_ui_response', id, value: event.options?.[0] ?? '' }
|
||||
case 'select': {
|
||||
// Lock-guard prompts list "View status" first, but headless needs "Force start"
|
||||
// to proceed. Detect by title and pick the force option.
|
||||
const title = String(event.title ?? '')
|
||||
let selected = event.options?.[0] ?? ''
|
||||
if (title.includes('Auto-mode is running') && event.options) {
|
||||
const forceOption = event.options.find(o => o.toLowerCase().includes('force start'))
|
||||
if (forceOption) selected = forceOption
|
||||
}
|
||||
response = { type: 'extension_ui_response', id, value: selected }
|
||||
break
|
||||
}
|
||||
case 'confirm':
|
||||
response = { type: 'extension_ui_response', id, confirmed: true }
|
||||
break
|
||||
|
|
|
|||
|
|
@ -90,8 +90,8 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
|
|||
if (!positionalStarted && arg.startsWith('--')) {
|
||||
if (arg === '--timeout' && i + 1 < args.length) {
|
||||
options.timeout = parseInt(args[++i], 10)
|
||||
if (Number.isNaN(options.timeout) || options.timeout <= 0) {
|
||||
process.stderr.write('[headless] Error: --timeout must be a positive integer (milliseconds)\n')
|
||||
if (Number.isNaN(options.timeout) || options.timeout < 0) {
|
||||
process.stderr.write('[headless] Error: --timeout must be a non-negative integer (milliseconds, 0 to disable)\n')
|
||||
process.exit(1)
|
||||
}
|
||||
} else if (arg === '--json') {
|
||||
|
|
@ -183,6 +183,14 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
|
|||
options.timeout = 600_000 // 10 minutes
|
||||
}
|
||||
|
||||
// auto-mode sessions are long-running (minutes to hours) with their own internal
|
||||
// per-unit timeout via auto-supervisor. Disable the overall timeout unless the
|
||||
// user explicitly set --timeout.
|
||||
const isAutoMode = options.command === 'auto'
|
||||
if (isAutoMode && options.timeout === 300_000) {
|
||||
options.timeout = 0
|
||||
}
|
||||
|
||||
// Supervised mode cannot share stdin with --context -
|
||||
if (options.supervised && options.context === '-') {
|
||||
process.stderr.write('[headless] Error: --supervised cannot be used with --context - (both require stdin)\n')
|
||||
|
|
@ -337,12 +345,14 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
|
|||
// Precompute supervised response timeout
|
||||
const responseTimeout = options.responseTimeout ?? 30_000
|
||||
|
||||
// Overall timeout
|
||||
const timeoutTimer = setTimeout(() => {
|
||||
process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`)
|
||||
exitCode = 1
|
||||
resolveCompletion()
|
||||
}, options.timeout)
|
||||
// Overall timeout (disabled when options.timeout === 0, e.g. auto-mode)
|
||||
const timeoutTimer = options.timeout > 0
|
||||
? setTimeout(() => {
|
||||
process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`)
|
||||
exitCode = 1
|
||||
resolveCompletion()
|
||||
}, options.timeout)
|
||||
: null
|
||||
|
||||
// Event handler
|
||||
client.onEvent((event) => {
|
||||
|
|
@ -434,7 +444,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
|
|||
interrupted = true
|
||||
exitCode = 1
|
||||
client.stop().finally(() => {
|
||||
clearTimeout(timeoutTimer)
|
||||
if (timeoutTimer) clearTimeout(timeoutTimer)
|
||||
if (idleTimer) clearTimeout(idleTimer)
|
||||
process.exit(exitCode)
|
||||
})
|
||||
|
|
@ -447,7 +457,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
|
|||
await client.start()
|
||||
} catch (err) {
|
||||
process.stderr.write(`[headless] Error: Failed to start RPC session: ${err instanceof Error ? err.message : String(err)}\n`)
|
||||
clearTimeout(timeoutTimer)
|
||||
if (timeoutTimer) clearTimeout(timeoutTimer)
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
|
|
@ -456,7 +466,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
|
|||
if (!internalProcess?.stdin) {
|
||||
process.stderr.write('[headless] Error: Cannot access child process stdin\n')
|
||||
await client.stop()
|
||||
clearTimeout(timeoutTimer)
|
||||
if (timeoutTimer) clearTimeout(timeoutTimer)
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
|
|
@ -511,7 +521,9 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
|
|||
process.stderr.write('[headless] Milestone ready — chaining into auto-mode...\n')
|
||||
}
|
||||
|
||||
// Reset completion state for the auto-mode phase
|
||||
// Reset completion state for the auto-mode phase.
|
||||
// Disable the overall timeout — auto-mode has its own internal supervisor.
|
||||
if (timeoutTimer) clearTimeout(timeoutTimer)
|
||||
completed = false
|
||||
milestoneReady = false
|
||||
blocked = false
|
||||
|
|
@ -532,7 +544,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
|
|||
}
|
||||
|
||||
// Cleanup
|
||||
clearTimeout(timeoutTimer)
|
||||
if (timeoutTimer) clearTimeout(timeoutTimer)
|
||||
if (idleTimer) clearTimeout(idleTimer)
|
||||
pendingResponseTimers.forEach((timer) => clearTimeout(timer))
|
||||
pendingResponseTimers.clear()
|
||||
|
|
|
|||
|
|
@ -35,7 +35,24 @@ Then:
|
|||
|
||||
**Success path** (all verifications passed — continue with steps 7–11):
|
||||
|
||||
7. **Persist completion through `gsd_complete_milestone`.** Call it with: `milestoneId`, `title`, `oneLiner`, `narrative`, `successCriteriaResults`, `definitionOfDoneResults`, `requirementOutcomes`, `keyDecisions`, `keyFiles`, `lessonsLearned`, `followUps`, `deviations`, `verificationPassed: true`. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
|
||||
7. **Persist completion through `gsd_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
|
||||
|
||||
**Required parameters:**
|
||||
- `milestoneId` (string) — Milestone ID (e.g. M001)
|
||||
- `title` (string) — Milestone title
|
||||
- `oneLiner` (string) — One-sentence summary of what the milestone achieved
|
||||
- `narrative` (string) — Detailed narrative of what happened during the milestone
|
||||
- `successCriteriaResults` (string) — Markdown detailing how each success criterion was met or not met
|
||||
- `definitionOfDoneResults` (string) — Markdown detailing how each definition-of-done item was met
|
||||
- `requirementOutcomes` (string) — Markdown detailing requirement status transitions with evidence
|
||||
- `keyDecisions` (array of strings) — Key architectural/pattern decisions made during the milestone
|
||||
- `keyFiles` (array of strings) — Key files created or modified during the milestone
|
||||
- `lessonsLearned` (array of strings) — Lessons learned during the milestone
|
||||
- `verificationPassed` (boolean) — Must be `true` — confirms that code change verification, success criteria, and definition of done checks all passed before completion
|
||||
|
||||
**Optional parameters:**
|
||||
- `followUps` (string) — Follow-up items for future milestones
|
||||
- `deviations` (string) — Deviations from the original plan
|
||||
8. For each requirement whose status changed in step 6, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically.
|
||||
9. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state.
|
||||
10. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.
|
||||
|
|
|
|||
|
|
@ -142,9 +142,10 @@ Then **offer GitHub issue creation**: "Would you like me to create a GitHub issu
|
|||
If yes, create using the `bash` tool:
|
||||
|
||||
```bash
|
||||
gh issue create --repo gsd-build/gsd-2 \
|
||||
# Step 1: Create issue (use labels for metadata, NOT for classification — type is set via GraphQL)
|
||||
ISSUE_URL=$(gh issue create --repo gsd-build/gsd-2 \
|
||||
--title "..." \
|
||||
--label "bug" --label "auto-generated" \
|
||||
--label "auto-generated" \
|
||||
--body "$(cat <<'EOF'
|
||||
## Problem
|
||||
[1-2 sentence summary]
|
||||
|
|
@ -169,7 +170,13 @@ gh issue create --repo gsd-build/gsd-2 \
|
|||
---
|
||||
*Auto-generated by `/gsd forensics`*
|
||||
EOF
|
||||
)"
|
||||
)")
|
||||
|
||||
# Step 2: Set issue type via GraphQL (gh issue create has no --type flag)
|
||||
ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
|
||||
ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
|
||||
TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
|
||||
gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
|
||||
```
|
||||
|
||||
### Redaction Rules (CRITICAL)
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ describe('doctor-environment', async () => {
|
|||
const results = runEnvironmentChecks(dir);
|
||||
const depsCheck = results.find(r => r.name === "dependencies");
|
||||
assert.ok(depsCheck !== undefined, "dependencies check runs");
|
||||
assert.deepStrictEqual(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale");
|
||||
assert.equal(depsCheck!.status, "ok", "npm marker newer than lockfile → not stale");
|
||||
}
|
||||
|
||||
console.log("\n=== env: yarn marker file newer than lockfile → ok (#1974) ===");
|
||||
|
|
@ -168,7 +168,7 @@ describe('doctor-environment', async () => {
|
|||
const results = runEnvironmentChecks(dir);
|
||||
const depsCheck = results.find(r => r.name === "dependencies");
|
||||
assert.ok(depsCheck !== undefined, "dependencies check runs");
|
||||
assert.deepStrictEqual(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale");
|
||||
assert.equal(depsCheck!.status, "ok", "yarn marker newer than lockfile → not stale");
|
||||
}
|
||||
|
||||
console.log("\n=== env: pnpm marker file newer than lockfile → ok (#1974) ===");
|
||||
|
|
@ -192,7 +192,7 @@ describe('doctor-environment', async () => {
|
|||
const results = runEnvironmentChecks(dir);
|
||||
const depsCheck = results.find(r => r.name === "dependencies");
|
||||
assert.ok(depsCheck !== undefined, "dependencies check runs");
|
||||
assert.deepStrictEqual(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale");
|
||||
assert.equal(depsCheck!.status, "ok", "pnpm marker newer than lockfile → not stale");
|
||||
}
|
||||
|
||||
console.log("\n=== env: no marker file falls back to dir mtime → stale warning (#1974) ===");
|
||||
|
|
@ -213,7 +213,7 @@ describe('doctor-environment', async () => {
|
|||
const results = runEnvironmentChecks(dir);
|
||||
const depsCheck = results.find(r => r.name === "dependencies");
|
||||
assert.ok(depsCheck !== undefined, "dependencies check runs");
|
||||
assert.deepStrictEqual(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning");
|
||||
assert.equal(depsCheck!.status, "warning", "no marker + lockfile newer → stale warning");
|
||||
}
|
||||
|
||||
// ── Env File Check ─────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -176,11 +176,15 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
|
|||
);
|
||||
payload.tools = tools;
|
||||
|
||||
// ── Session-level search budget (#1309) ──────────────────────────────
|
||||
// ── Session-level search budget (#1309, #compaction-safe) ─────────────
|
||||
// Count web_search_tool_result blocks in the conversation history to
|
||||
// determine how many native searches have already been used this session.
|
||||
// The Anthropic API's max_uses resets per request, so without this guard,
|
||||
// pause_turn → resubmit cycles allow unlimited total searches.
|
||||
//
|
||||
// Use the monotonic high-water mark: take the max of the history count
|
||||
// and the running counter. This prevents budget resets when context
|
||||
// compaction removes web_search_tool_result blocks from history.
|
||||
if (Array.isArray(messages)) {
|
||||
let historySearchCount = 0;
|
||||
for (const msg of messages) {
|
||||
|
|
@ -192,8 +196,9 @@ export function registerNativeSearchHooks(pi: NativeSearchPI): { getIsAnthropic:
|
|||
}
|
||||
}
|
||||
}
|
||||
// Sync counter from history (handles session restore / context replay)
|
||||
sessionSearchCount = historySearchCount;
|
||||
// High-water mark: never decrease the counter, even if compaction
|
||||
// removes web_search_tool_result blocks from the visible history.
|
||||
sessionSearchCount = Math.max(sessionSearchCount, historySearchCount);
|
||||
}
|
||||
|
||||
const remaining = Math.max(0, MAX_NATIVE_SEARCHES_PER_SESSION - sessionSearchCount);
|
||||
|
|
|
|||
|
|
@ -106,14 +106,20 @@ searchCache.startPurgeInterval(60_000);
|
|||
|
||||
// Consecutive duplicate search guard (#949)
|
||||
// Tracks recent query keys to detect and break search loops.
|
||||
const MAX_CONSECUTIVE_DUPES = 3;
|
||||
const MAX_CONSECUTIVE_DUPES = 1;
|
||||
let lastSearchKey = "";
|
||||
let consecutiveDupeCount = 0;
|
||||
|
||||
/** Reset session-scoped duplicate-search guard state. */
|
||||
// Session-level total search budget (all queries, not just duplicates).
|
||||
// Prevents unbounded search accumulation across varied queries.
|
||||
const MAX_SEARCHES_PER_SESSION = 15;
|
||||
let sessionTotalSearches = 0;
|
||||
|
||||
/** Reset session-scoped search guard state (both duplicate and budget). */
|
||||
export function resetSearchLoopGuardState(): void {
|
||||
lastSearchKey = "";
|
||||
consecutiveDupeCount = 0;
|
||||
sessionTotalSearches = 0;
|
||||
}
|
||||
|
||||
// Summarizer responses: max 50 entries, 15-minute TTL
|
||||
|
|
@ -357,6 +363,17 @@ export function registerSearchTool(pi: ExtensionAPI) {
|
|||
};
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Session-level search budget
|
||||
// ------------------------------------------------------------------
|
||||
if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) {
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `⚠️ Search budget exhausted: ${sessionTotalSearches}/${MAX_SEARCHES_PER_SESSION} searches used this session. The information you need should already be in previous search results. Stop searching and use those results to proceed with your task.` }],
|
||||
isError: true,
|
||||
details: { errorKind: "budget_exhausted", error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})` } satisfies Partial<SearchDetails>,
|
||||
};
|
||||
}
|
||||
|
||||
const count = params.count ?? 5;
|
||||
const wantSummary = params.summary ?? false;
|
||||
|
||||
|
|
@ -410,6 +427,9 @@ export function registerSearchTool(pi: ExtensionAPI) {
|
|||
consecutiveDupeCount = 1;
|
||||
}
|
||||
|
||||
// Count every search that passes the guards toward the session budget.
|
||||
sessionTotalSearches++;
|
||||
|
||||
const cached = searchCache.get(cacheKey);
|
||||
|
||||
if (cached) {
|
||||
|
|
|
|||
|
|
@ -103,9 +103,12 @@ gh issue list -R gsd-build/gsd-2
|
|||
gh issue list -R gsd-build/gsd-2 --label "priority:p1" --state open
|
||||
|
||||
# Create issue with labels and milestone
|
||||
# NOTE: Do NOT use labels for issue classification (bug, feature, etc.)
|
||||
# Use labels for metadata (priority, status, auto-generated) only.
|
||||
# Issue classification uses GitHub Issue Types, set via GraphQL after creation.
|
||||
gh issue create -R gsd-build/gsd-2 \
|
||||
--title "feat: add feature X" \
|
||||
--label "priority:p1" --label "type:feature" \
|
||||
--label "priority:p1" \
|
||||
--milestone "v1.0"
|
||||
|
||||
# View issue
|
||||
|
|
@ -120,6 +123,24 @@ gh issue edit <number> -R gsd-build/gsd-2 \
|
|||
--remove-label "status:needs-grooming"
|
||||
```
|
||||
|
||||
### Issue Types (Classification)
|
||||
|
||||
`gh issue create` has no `--type` flag. Issue types (Bug, Feature Request, etc.) are set via GraphQL after creation:
|
||||
|
||||
```bash
|
||||
# Step 1: Create the issue (returns URL)
|
||||
ISSUE_URL=$(gh issue create -R gsd-build/gsd-2 \
|
||||
--title "..." --body "...")
|
||||
|
||||
# Step 2: Set the issue type via GraphQL
|
||||
ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
|
||||
ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
|
||||
TYPE_ID=$(gh api graphql -f query='{ repository(owner:"gsd-build",name:"gsd-2") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
|
||||
gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
|
||||
```
|
||||
|
||||
Replace `"Bug"` with the appropriate type name (`"Feature Request"`, `"Task"`, etc.).
|
||||
|
||||
### Labels
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -855,6 +855,51 @@ test("MAX_NATIVE_SEARCHES_PER_SESSION is exported and equals 15", () => {
|
|||
assert.equal(MAX_NATIVE_SEARCHES_PER_SESSION, 15, "Session budget should be 15 (#1309)");
|
||||
});
|
||||
|
||||
test("session search budget: survives context compaction (high-water mark)", async () => {
|
||||
const pi = createMockPI();
|
||||
registerNativeSearchHooks(pi);
|
||||
|
||||
await pi.fire("model_select", {
|
||||
type: "model_select",
|
||||
model: { provider: "anthropic", name: "claude-sonnet-4-6" },
|
||||
previousModel: undefined,
|
||||
source: "set",
|
||||
});
|
||||
|
||||
// First request: history has 12 web_search_tool_result blocks
|
||||
const searchBlocks = Array.from({ length: 12 }, (_, i) => ({
|
||||
type: "web_search_tool_result",
|
||||
tool_use_id: `ws${i}`,
|
||||
content: [],
|
||||
}));
|
||||
|
||||
let payload: Record<string, unknown> = {
|
||||
model: "claude-sonnet-4-6-20250514",
|
||||
tools: [{ name: "bash", type: "custom" }],
|
||||
messages: [{ role: "user", content: [{ type: "text", text: "search" }, ...searchBlocks] }],
|
||||
};
|
||||
|
||||
await pi.fire("before_provider_request", { type: "before_provider_request", payload });
|
||||
let tools = payload.tools as any[];
|
||||
let nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
|
||||
assert.ok(nativeTool, "Should still inject web_search with 12/15 used");
|
||||
assert.equal(nativeTool.max_uses, 3, "Should have 3 remaining (15 - 12)");
|
||||
|
||||
// Second request: context was compacted — search blocks gone from history.
|
||||
// Without high-water mark, the budget would reset to 15.
|
||||
payload = {
|
||||
model: "claude-sonnet-4-6-20250514",
|
||||
tools: [{ name: "bash", type: "custom" }],
|
||||
messages: [{ role: "user", content: "compacted context — no search blocks" }],
|
||||
};
|
||||
|
||||
await pi.fire("before_provider_request", { type: "before_provider_request", payload });
|
||||
tools = payload.tools as any[];
|
||||
nativeTool = tools.find((t: any) => t.type === "web_search_20250305");
|
||||
assert.ok(nativeTool, "Should still inject web_search with 12/15 used (high-water mark)");
|
||||
assert.equal(nativeTool.max_uses, 3, "High-water mark should preserve 12 — only 3 remaining");
|
||||
});
|
||||
|
||||
// ─── stripThinkingFromHistory tests ─────────────────────────────────────────
|
||||
|
||||
test("stripThinkingFromHistory removes thinking from earlier assistant messages", () => {
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { registerSearchTool } from "../resources/extensions/search-the-web/tool-search.ts";
|
||||
import { registerSearchTool, resetSearchLoopGuardState } from "../resources/extensions/search-the-web/tool-search.ts";
|
||||
import searchExtension from "../resources/extensions/search-the-web/index.ts";
|
||||
|
||||
const ORIGINAL_ENV = {
|
||||
|
|
@ -72,6 +72,8 @@ function createMockPI() {
|
|||
const toolsByName = new Map<string, any>();
|
||||
let registeredTool: any = null;
|
||||
|
||||
let activeTools: string[] = [];
|
||||
|
||||
const pi = {
|
||||
on(event: string, handler: (...args: any[]) => unknown) {
|
||||
handlers.push({ event, handler });
|
||||
|
|
@ -91,6 +93,8 @@ function createMockPI() {
|
|||
getRegisteredTool(name = "search-the-web") {
|
||||
return toolsByName.get(name) ?? registeredTool;
|
||||
},
|
||||
getActiveTools() { return activeTools; },
|
||||
setActiveTools(tools: string[]) { activeTools = tools; },
|
||||
writeTempFile: async (_content: string, _opts?: unknown) => "/tmp/search-out.txt",
|
||||
};
|
||||
|
||||
|
|
@ -134,18 +138,16 @@ test("search loop guard fires after MAX_CONSECUTIVE_DUPES duplicates", async (t)
|
|||
|
||||
const execute = tool.execute.bind(tool);
|
||||
|
||||
// Calls 1–3: below threshold, should return search results (not an error)
|
||||
for (let i = 1; i <= 3; i++) {
|
||||
const result = await callSearch(execute, "loop test query", `call-${i}`);
|
||||
assert.notEqual(result.isError, true, `call ${i} should not trigger loop guard`);
|
||||
}
|
||||
// Call 1: first call should succeed (MAX_CONSECUTIVE_DUPES = 1)
|
||||
const result1 = await callSearch(execute, "loop test query", "call-1");
|
||||
assert.notEqual(result1.isError, true, "call 1 should not trigger loop guard");
|
||||
|
||||
// Call 4: hits the threshold — guard fires
|
||||
const result4 = await callSearch(execute, "loop test query", "call-4");
|
||||
assert.equal(result4.isError, true, "call 4 should trigger the loop guard");
|
||||
assert.equal(result4.details?.errorKind, "search_loop");
|
||||
// Call 2: identical query — guard fires immediately (threshold = 1)
|
||||
const result2 = await callSearch(execute, "loop test query", "call-2");
|
||||
assert.equal(result2.isError, true, "call 2 should trigger the loop guard");
|
||||
assert.equal(result2.details?.errorKind, "search_loop");
|
||||
assert.ok(
|
||||
result4.content[0].text.includes("Search loop detected"),
|
||||
result2.content[0].text.includes("Search loop detected"),
|
||||
"error message should mention search loop"
|
||||
);
|
||||
});
|
||||
|
|
@ -174,11 +176,9 @@ test("search loop guard resets at session_start boundary", async (t) => {
|
|||
assert.ok(tool, "search tool should be registered");
|
||||
const execute = tool.execute.bind(tool);
|
||||
|
||||
// Trigger guard in session 1
|
||||
for (let i = 1; i <= 4; i++) {
|
||||
await callSearch(execute, query, `s1-call-${i}`);
|
||||
}
|
||||
const guardResult = await callSearch(execute, query, "s1-call-5");
|
||||
// Trigger guard in session 1 (call 1 succeeds, call 2 fires guard)
|
||||
await callSearch(execute, query, "s1-call-1");
|
||||
const guardResult = await callSearch(execute, query, "s1-call-2");
|
||||
assert.equal(guardResult.isError, true, "session 1 should be guarded");
|
||||
assert.equal(guardResult.details?.errorKind, "search_loop");
|
||||
|
||||
|
|
@ -211,28 +211,26 @@ test("search loop guard stays armed after firing — subsequent duplicates immed
|
|||
const tool = pi.getRegisteredTool();
|
||||
const execute = tool.execute.bind(tool);
|
||||
|
||||
// Exhaust the initial window (calls 1–3 succeed, call 4 fires guard)
|
||||
for (let i = 1; i <= 3; i++) {
|
||||
await callSearch(execute, query, `call-${i}`);
|
||||
}
|
||||
const guardFirst = await callSearch(execute, query, "call-4");
|
||||
assert.equal(guardFirst.isError, true, "call 4 should trigger the loop guard");
|
||||
// Call 1 succeeds, call 2 fires guard (MAX_CONSECUTIVE_DUPES = 1)
|
||||
await callSearch(execute, query, "call-1");
|
||||
const guardFirst = await callSearch(execute, query, "call-2");
|
||||
assert.equal(guardFirst.isError, true, "call 2 should trigger the loop guard");
|
||||
|
||||
// Key regression test: call 5 (and beyond) must ALSO trigger the guard.
|
||||
// The original bug reset state on trigger, so call 5 was treated as a fresh
|
||||
// Key regression test: call 3 (and beyond) must ALSO trigger the guard.
|
||||
// The original bug reset state on trigger, so call 3 was treated as a fresh
|
||||
// first search and the loop restarted.
|
||||
const guardSecond = await callSearch(execute, query, "call-5");
|
||||
const guardSecond = await callSearch(execute, query, "call-3");
|
||||
assert.equal(
|
||||
guardSecond.isError, true,
|
||||
"call 5 should STILL trigger the loop guard (guard must stay armed after firing)"
|
||||
"call 3 should STILL trigger the loop guard (guard must stay armed after firing)"
|
||||
);
|
||||
assert.equal(guardSecond.details?.errorKind, "search_loop");
|
||||
|
||||
// Call 6 as well — guard should keep firing
|
||||
const guardThird = await callSearch(execute, query, "call-6");
|
||||
// Call 4 as well — guard should keep firing
|
||||
const guardThird = await callSearch(execute, query, "call-4");
|
||||
assert.equal(
|
||||
guardThird.isError, true,
|
||||
"call 6 should STILL trigger the loop guard"
|
||||
"call 4 should STILL trigger the loop guard"
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -255,10 +253,9 @@ test("search loop guard resets cleanly when a different query is issued", async
|
|||
const tool = pi.getRegisteredTool();
|
||||
const execute = tool.execute.bind(tool);
|
||||
|
||||
// Trigger guard for queryA
|
||||
for (let i = 1; i <= 4; i++) {
|
||||
await callSearch(execute, queryA, `call-a-${i}`);
|
||||
}
|
||||
// Trigger guard for queryA (call 1 succeeds, call 2 fires guard)
|
||||
await callSearch(execute, queryA, "call-a-1");
|
||||
await callSearch(execute, queryA, "call-a-2");
|
||||
|
||||
// Issue a different query — should succeed (resets the duplicate counter)
|
||||
const resultB = await callSearch(execute, queryB, "call-b-1");
|
||||
|
|
@ -267,3 +264,71 @@ test("search loop guard resets cleanly when a different query is issued", async
|
|||
"a different query after guard should not be treated as a loop"
|
||||
);
|
||||
});
|
||||
|
||||
test("session search budget blocks after MAX_SEARCHES_PER_SESSION varied queries", async (t) => {
|
||||
process.env.BRAVE_API_KEY = "test-key-budget";
|
||||
delete process.env.TAVILY_API_KEY;
|
||||
delete process.env.OLLAMA_API_KEY;
|
||||
const restoreFetch = mockFetch(makeBraveResponse());
|
||||
|
||||
t.after(() => {
|
||||
restoreFetch();
|
||||
restoreSearchEnv();
|
||||
});
|
||||
|
||||
// Reset guard state (including session budget) and register directly
|
||||
resetSearchLoopGuardState();
|
||||
const pi = createMockPI();
|
||||
registerSearchTool(pi as any);
|
||||
|
||||
const tool = pi.getRegisteredTool();
|
||||
assert.ok(tool, "search tool should be registered");
|
||||
const execute = tool.execute.bind(tool);
|
||||
|
||||
// Issue 15 unique queries — all should succeed (budget = 15)
|
||||
for (let i = 1; i <= 15; i++) {
|
||||
const result = await callSearch(execute, `unique budget query ${i}`, `budget-${i}`);
|
||||
assert.notEqual(result.isError, true, `query ${i} should succeed within budget`);
|
||||
}
|
||||
|
||||
// Query 16: budget exhausted — should be blocked
|
||||
const blocked = await callSearch(execute, "one more query", "budget-16");
|
||||
assert.equal(blocked.isError, true, "query 16 should be blocked by budget");
|
||||
assert.equal(blocked.details?.errorKind, "budget_exhausted");
|
||||
assert.ok(
|
||||
blocked.content[0].text.includes("Search budget exhausted"),
|
||||
"error message should mention budget"
|
||||
);
|
||||
});
|
||||
|
||||
test("session search budget resets via resetSearchLoopGuardState", async (t) => {
|
||||
process.env.BRAVE_API_KEY = "test-key-budget-reset";
|
||||
delete process.env.TAVILY_API_KEY;
|
||||
delete process.env.OLLAMA_API_KEY;
|
||||
const restoreFetch = mockFetch(makeBraveResponse());
|
||||
|
||||
t.after(() => {
|
||||
restoreFetch();
|
||||
restoreSearchEnv();
|
||||
});
|
||||
|
||||
// Reset and register directly
|
||||
resetSearchLoopGuardState();
|
||||
const pi = createMockPI();
|
||||
registerSearchTool(pi as any);
|
||||
|
||||
const tool = pi.getRegisteredTool();
|
||||
const execute = tool.execute.bind(tool);
|
||||
|
||||
// Exhaust budget
|
||||
for (let i = 1; i <= 15; i++) {
|
||||
await callSearch(execute, `budget reset query ${i}`, `br-${i}`);
|
||||
}
|
||||
const exhausted = await callSearch(execute, "exhausted query", "br-exhausted");
|
||||
assert.equal(exhausted.isError, true, "budget should be exhausted");
|
||||
|
||||
// Reset simulates new session
|
||||
resetSearchLoopGuardState();
|
||||
const fresh = await callSearch(execute, "fresh session query", "br-fresh");
|
||||
assert.notEqual(fresh.isError, true, "first query after reset should succeed");
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue