sf snapshot: pre-dispatch, uncommitted changes after 83m inactivity
This commit is contained in:
parent
8c66c11131
commit
0037f44677
5 changed files with 200 additions and 403 deletions
|
|
@ -40,6 +40,7 @@ function createResolver(overrides?: {
|
|||
hasAuth?: (provider: string) => boolean;
|
||||
isProviderRequestReady?: (provider: string) => boolean;
|
||||
find?: (provider: string, modelId: string) => Model<Api> | undefined;
|
||||
getAvailable?: () => Model<Api>[];
|
||||
}) {
|
||||
const settingsManager = {
|
||||
getFallbackSettings: () => ({
|
||||
|
|
@ -62,6 +63,7 @@ function createResolver(overrides?: {
|
|||
return undefined;
|
||||
}),
|
||||
isProviderRequestReady: overrides?.isProviderRequestReady ?? overrides?.hasAuth ?? (() => true),
|
||||
getAvailable: overrides?.getAvailable ?? (() => [zaiModel, alibabaModel, openaiModel]),
|
||||
} as unknown as ModelRegistry;
|
||||
|
||||
return { resolver: new FallbackResolver(settingsManager, authStorage, modelRegistry), authStorage };
|
||||
|
|
@ -105,6 +107,7 @@ describe("FallbackResolver — findFallback", () => {
|
|||
it("returns null when all providers are backed off", async () => {
|
||||
const { resolver } = createResolver({
|
||||
isProviderAvailable: () => false,
|
||||
getAvailable: () => [zaiModel, alibabaModel, openaiModel],
|
||||
});
|
||||
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
|
|
@ -117,11 +120,46 @@ describe("FallbackResolver — findFallback", () => {
|
|||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("returns null when model is not in any chain", async () => {
|
||||
it("falls back to free selection when model is not in any chain", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const unknownModel = createMockModel("unknown", "some-model");
|
||||
const result = await resolver.findFallback(unknownModel, "quota_exhausted");
|
||||
assert.equal(result, null);
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.chainName, "free-selection");
|
||||
// Should pick an available model with different provider
|
||||
assert.notEqual(result!.model.provider, "unknown");
|
||||
});
|
||||
|
||||
it("free selection prefers models with matching reasoning capability", async () => {
|
||||
const reasoningModel = createMockModel("openai", "gpt-4.1");
|
||||
reasoningModel.reasoning = true;
|
||||
const nonReasoningModel = createMockModel("alibaba", "glm-5");
|
||||
nonReasoningModel.reasoning = false;
|
||||
|
||||
const { resolver } = createResolver({
|
||||
getAvailable: () => [nonReasoningModel, reasoningModel],
|
||||
});
|
||||
|
||||
const currentModel = createMockModel("unknown", "some-model");
|
||||
currentModel.reasoning = true;
|
||||
|
||||
const result = await resolver.findFallback(currentModel, "quota_exhausted");
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "openai");
|
||||
assert.equal(result!.model.reasoning, true);
|
||||
});
|
||||
|
||||
it("free selection excludes same provider", async () => {
|
||||
const sameProviderModel = createMockModel("zai", "glm-5-other");
|
||||
const differentProviderModel = createMockModel("alibaba", "glm-5");
|
||||
|
||||
const { resolver } = createResolver({
|
||||
getAvailable: () => [sameProviderModel, differentProviderModel],
|
||||
});
|
||||
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "alibaba");
|
||||
});
|
||||
|
||||
it("skips providers that are not request-ready", async () => {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,9 @@ export class FallbackResolver {
|
|||
* Searches all chains for entries matching the current model's provider+id,
|
||||
* then returns the next available entry with lower priority (higher number).
|
||||
*
|
||||
* If no chain contains the current model, falls through to free selection:
|
||||
* picks any available model from the registry with a different provider.
|
||||
*
|
||||
* @returns FallbackResult if a fallback is available, null otherwise
|
||||
*/
|
||||
async findFallback(
|
||||
|
|
@ -61,7 +64,9 @@ export class FallbackResolver {
|
|||
if (wrapResult) return wrapResult;
|
||||
}
|
||||
|
||||
return null;
|
||||
// No chain contained the current model — fall through to free selection
|
||||
// from any available model in the registry with a different provider.
|
||||
return this._findAnyAvailableFallback(currentModel);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -161,4 +166,43 @@ export class FallbackResolver {
|
|||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free-selection fallback when no chain contains the current model.
|
||||
* Picks any available model from the registry with a different provider.
|
||||
* Prefers models with reasoning capability if the current model has it.
|
||||
*/
|
||||
private _findAnyAvailableFallback(
|
||||
currentModel: Model<Api>,
|
||||
): FallbackResult | null {
|
||||
const allModels = this.modelRegistry.getAvailable();
|
||||
const candidates = allModels.filter((m) => {
|
||||
// Exclude same provider — credential rotation was already tried
|
||||
if (m.provider === currentModel.provider) return false;
|
||||
// Exclude exhausted providers
|
||||
if (!this.authStorage.isProviderAvailable(m.provider)) return false;
|
||||
// Exclude models without auth
|
||||
if (!this.modelRegistry.isProviderRequestReady(m.provider)) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
if (candidates.length === 0) return null;
|
||||
|
||||
// Sort: prefer models with matching reasoning capability, then by context window
|
||||
candidates.sort((a, b) => {
|
||||
const aReasoningMatch = a.reasoning === currentModel.reasoning ? 1 : 0;
|
||||
const bReasoningMatch = b.reasoning === currentModel.reasoning ? 1 : 0;
|
||||
if (aReasoningMatch !== bReasoningMatch) {
|
||||
return bReasoningMatch - aReasoningMatch;
|
||||
}
|
||||
return (b.contextWindow ?? 0) - (a.contextWindow ?? 0);
|
||||
});
|
||||
|
||||
const chosen = candidates[0];
|
||||
return {
|
||||
model: chosen,
|
||||
chainName: "free-selection",
|
||||
reason: `free fallback to ${chosen.provider}/${chosen.id} (no chain configured)`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -607,6 +607,79 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("quota wait before fallback", () => {
|
||||
it("waits for retryAfterMs before retrying same provider on quota error", async () => {
|
||||
const { deps, emittedEvents, continueFn } = createMockDeps({
|
||||
model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
|
||||
fallbackResult: null,
|
||||
retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 60000 },
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"You have exhausted your capacity on this model. Your quota will reset after 59s.",
|
||||
);
|
||||
(msg as any).retryAfterMs = 59000;
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
|
||||
// Should wait and retry, not immediately fail
|
||||
assert.equal(result, true, "should wait and retry on quota reset");
|
||||
|
||||
const retryStart = emittedEvents.find(
|
||||
(e) => e.type === "auto_retry_start",
|
||||
);
|
||||
assert.ok(retryStart, "should emit auto_retry_start with wait");
|
||||
assert.equal(
|
||||
retryStart!.delayMs,
|
||||
59000,
|
||||
"should use provider's retry-after delay",
|
||||
);
|
||||
assert.ok(
|
||||
retryStart!.errorMessage.includes("waiting for quota reset"),
|
||||
"error message should indicate quota wait",
|
||||
);
|
||||
|
||||
// Should NOT have emitted fallback_chain_exhausted
|
||||
const chainExhausted = emittedEvents.find(
|
||||
(e) => e.type === "fallback_chain_exhausted",
|
||||
);
|
||||
assert.equal(
|
||||
chainExhausted,
|
||||
undefined,
|
||||
"should not emit fallback_chain_exhausted when waiting for quota reset",
|
||||
);
|
||||
});
|
||||
|
||||
it("falls through to fallback when retryAfterMs exceeds maxDelayMs", async () => {
|
||||
const fallbackModel = createMockModel("openai", "gpt-4o");
|
||||
const { deps, emittedEvents } = createMockDeps({
|
||||
model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
|
||||
fallbackResult: {
|
||||
model: fallbackModel,
|
||||
reason: "cross-provider fallback",
|
||||
},
|
||||
retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 30000 },
|
||||
});
|
||||
|
||||
const handler = new RetryHandler(deps);
|
||||
const msg = errorMessage(
|
||||
"You have exhausted your capacity on this model. Your quota will reset after 5m.",
|
||||
);
|
||||
(msg as any).retryAfterMs = 300000; // 5 minutes, exceeds maxDelayMs
|
||||
|
||||
const result = await handler.handleRetryableError(msg);
|
||||
|
||||
// Should fall through to fallback since wait is too long
|
||||
assert.equal(result, true, "should fallback when quota reset is too long");
|
||||
|
||||
const switchEvent = emittedEvents.find(
|
||||
(e) => e.type === "fallback_provider_switch",
|
||||
);
|
||||
assert.ok(switchEvent, "should emit fallback_provider_switch");
|
||||
});
|
||||
});
|
||||
|
||||
describe("quota_exhausted credential backoff (#3430)", () => {
|
||||
it("does NOT call markUsageLimitReached for quota_exhausted errors", async () => {
|
||||
// "Extra usage is required" is an account-level billing gate.
|
||||
|
|
|
|||
|
|
@ -210,6 +210,48 @@ export class RetryHandler {
|
|||
// quota errors, or auth errors (invalid/expired key — no point retrying).
|
||||
const isAuthError = errorType === "auth_error";
|
||||
if (isRateLimit || isQuotaError || isAuthError) {
|
||||
// For quota errors with a retry-after hint, wait before switching providers.
|
||||
// The quota may reset quickly (e.g. 59s), so waiting is often better than
|
||||
// switching to a potentially worse model.
|
||||
if (isQuotaError && message.retryAfterMs !== undefined && message.retryAfterMs > 0) {
|
||||
const cap = settings.maxDelayMs > 0 ? settings.maxDelayMs : Infinity;
|
||||
if (message.retryAfterMs <= cap) {
|
||||
this._deps.emit({
|
||||
type: "auto_retry_start",
|
||||
attempt: this._retryAttempt + 1,
|
||||
maxAttempts: settings.maxRetries,
|
||||
delayMs: message.retryAfterMs,
|
||||
errorMessage: `${message.errorMessage} (waiting for quota reset)`,
|
||||
});
|
||||
|
||||
this._removeLastAssistantError();
|
||||
this._retryAbortController = new AbortController();
|
||||
try {
|
||||
await sleep(message.retryAfterMs, this._retryAbortController.signal);
|
||||
} catch {
|
||||
// Aborted during sleep
|
||||
if (retryGeneration !== this._retryGeneration) {
|
||||
this._retryAbortController = undefined;
|
||||
return false;
|
||||
}
|
||||
const attempt = this._retryAttempt;
|
||||
this._retryAttempt = 0;
|
||||
this._retryAbortController = undefined;
|
||||
this._deps.emit({
|
||||
type: "auto_retry_end",
|
||||
success: false,
|
||||
attempt,
|
||||
finalError: "Retry cancelled",
|
||||
});
|
||||
this._resolveRetry();
|
||||
return false;
|
||||
}
|
||||
this._retryAbortController = undefined;
|
||||
this._scheduleContinue(retryGeneration);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
const fallbackResult = await this._deps.fallbackResolver.findFallback(
|
||||
this._deps.getModel()!,
|
||||
errorType,
|
||||
|
|
|
|||
|
|
@ -1,400 +0,0 @@
|
|||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import {
|
||||
evaluateRunawayGuard,
|
||||
resetRunawayGuardState,
|
||||
} from "../auto-runaway-guard.js";
|
||||
import { runCustomVerification } from "../custom-verification.js";
|
||||
import { getPriorSliceCompletionBlocker } from "../dispatch-guard.js";
|
||||
import { checkCrossTaskSignatures } from "../post-execution-checks.js";
|
||||
import {
|
||||
extractPackageReferences,
|
||||
normalizeFilePath,
|
||||
} from "../pre-execution-checks.js";
|
||||
import { formatFailureContext, isLikelyCommand } from "../verification-gate.js";
|
||||
|
||||
// ─── Bug 1: formatFailureContext double-truncation ─────────────────────────
|
||||
|
||||
describe("formatFailureContext", () => {
|
||||
test("does not double-truncate a single check's stderr", () => {
|
||||
const longStderr = "x".repeat(3000);
|
||||
const result = {
|
||||
passed: false,
|
||||
checks: [
|
||||
{
|
||||
command: "npm test",
|
||||
exitCode: 1,
|
||||
stdout: "",
|
||||
stderr: longStderr,
|
||||
durationMs: 100,
|
||||
},
|
||||
],
|
||||
discoverySource: "preference" as const,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
const context = formatFailureContext(result);
|
||||
// Per-check budget = 10000/1 = 10000, capped to MAX_STDERR_PER_CHECK=2000
|
||||
expect(context).toContain("x".repeat(2000));
|
||||
expect(context).toContain("…[truncated]");
|
||||
// Must NOT contain the total-body truncation marker
|
||||
expect(context).not.toContain("…[remaining failures truncated]");
|
||||
});
|
||||
|
||||
test("fairly distributes budget across multiple failures", () => {
|
||||
const stderrA = "a".repeat(3000);
|
||||
const stderrB = "b".repeat(3000);
|
||||
const result = {
|
||||
passed: false,
|
||||
checks: [
|
||||
{
|
||||
command: "npm run lint",
|
||||
exitCode: 1,
|
||||
stdout: "",
|
||||
stderr: stderrA,
|
||||
durationMs: 100,
|
||||
},
|
||||
{
|
||||
command: "npm run test",
|
||||
exitCode: 1,
|
||||
stdout: "",
|
||||
stderr: stderrB,
|
||||
durationMs: 100,
|
||||
},
|
||||
],
|
||||
discoverySource: "preference" as const,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
const context = formatFailureContext(result);
|
||||
// Per-check budget = 10000/2 = 5000, capped to 2000
|
||||
expect(context).toContain("a".repeat(2000));
|
||||
expect(context).toContain("b".repeat(2000));
|
||||
expect(context).not.toContain("…[remaining failures truncated]");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Bug 2: checkCrossTaskSignatures partial-signature-drift ───────────────
|
||||
|
||||
describe("checkCrossTaskSignatures", () => {
|
||||
test("detects drift against ALL prior definitions, not just the most recent", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "sf-sig-drift-"));
|
||||
try {
|
||||
// Create prior task files with different signatures for same function
|
||||
mkdirSync(join(base, "first"), { recursive: true });
|
||||
mkdirSync(join(base, "second"), { recursive: true });
|
||||
mkdirSync(join(base, "current"), { recursive: true });
|
||||
|
||||
writeFileSync(
|
||||
join(base, "first", "first.ts"),
|
||||
"export function foo(a: string): void {}",
|
||||
);
|
||||
writeFileSync(
|
||||
join(base, "second", "second.ts"),
|
||||
"export function foo(a: string, b: number): void {}",
|
||||
);
|
||||
writeFileSync(
|
||||
join(base, "current", "current.ts"),
|
||||
"export function foo(a: string): number { return 1; }",
|
||||
);
|
||||
|
||||
const taskRow = {
|
||||
id: "T03",
|
||||
key_files: ["current/current.ts"],
|
||||
};
|
||||
const priorTasks = [
|
||||
{
|
||||
id: "T01",
|
||||
key_files: ["first/first.ts"],
|
||||
},
|
||||
{
|
||||
id: "T02",
|
||||
key_files: ["second/second.ts"],
|
||||
},
|
||||
];
|
||||
|
||||
const results = checkCrossTaskSignatures(taskRow, priorTasks, base);
|
||||
|
||||
// Should detect drift against BOTH prior definitions
|
||||
const fooResults = results.filter((r) => r.target === "foo");
|
||||
expect(fooResults.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Should report mismatch with T01 (params match but return type differs)
|
||||
const returnTypeMismatches = fooResults.filter((r) =>
|
||||
r.message.includes("returns"),
|
||||
);
|
||||
expect(returnTypeMismatches.length).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// Should report mismatch with T02 (params differ)
|
||||
const paramMismatches = fooResults.filter((r) =>
|
||||
r.message.includes("parameters"),
|
||||
);
|
||||
expect(paramMismatches.length).toBeGreaterThanOrEqual(1);
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Bug 3: dispatch-guard global-positional-fallback ──────────────────────
|
||||
|
||||
describe("getPriorSliceCompletionBlocker", () => {
|
||||
test("does not block zero-dep slice when earlier incomplete slice has unsatisfied deps", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "sf-dispatch-"));
|
||||
try {
|
||||
// Create a milestone structure with:
|
||||
// S01 depends on S02 (circular-ish, but S02 is incomplete)
|
||||
// S03 has no dependencies
|
||||
// When dispatching S03, it should NOT be blocked by S01's incompleteness
|
||||
// because S01 is itself blocked by unsatisfied deps.
|
||||
mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });
|
||||
writeFileSync(
|
||||
join(base, ".sf", "milestones", "M001", "ROADMAP.md"),
|
||||
"# M001\n\n" +
|
||||
"- [ ] S01 — first slice\n" +
|
||||
" - depends: S02\n" +
|
||||
"- [ ] S02 — second slice\n" +
|
||||
"- [ ] S03 — third slice\n",
|
||||
);
|
||||
|
||||
const blocker = getPriorSliceCompletionBlocker(
|
||||
base,
|
||||
"main",
|
||||
"execute-task",
|
||||
"M001/S03/T01",
|
||||
);
|
||||
// S03 should NOT be blocked because S01 has unsatisfied deps (S02 incomplete)
|
||||
expect(blocker).toBeNull();
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Bug 4: extractPackageReferences stopword collision ────────────────────
|
||||
|
||||
describe("extractPackageReferences", () => {
|
||||
test("includes 'test' after -D flag", () => {
|
||||
const packages = extractPackageReferences("npm install -D test");
|
||||
expect(packages).toContain("test");
|
||||
});
|
||||
|
||||
test("includes 'test' without flag when it looks like a package name", () => {
|
||||
// "test" by itself is a stopword, but with package-name chars it should pass
|
||||
const packages = extractPackageReferences("npm install test-utils");
|
||||
expect(packages).toContain("test-utils");
|
||||
});
|
||||
|
||||
test("stops on prose stopwords", () => {
|
||||
const packages = extractPackageReferences("npm install and then run build");
|
||||
// "and" is a stopword and should terminate parsing
|
||||
expect(packages).not.toContain("then");
|
||||
expect(packages).not.toContain("run");
|
||||
});
|
||||
|
||||
test("includes scoped packages regardless of stopwords", () => {
|
||||
const packages = extractPackageReferences("npm install @types/test");
|
||||
expect(packages).toContain("@types/test");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Bug 5: runaway-guard token false-positive ─────────────────────────────
|
||||
|
||||
describe("evaluateRunawayGuard", () => {
|
||||
test("does not warn on token count alone when other signals are healthy", () => {
|
||||
// Simulate a unit that has burned 1.2M tokens but only made 25 tool calls
|
||||
// in 5 minutes with 1 changed file — this is healthy progress, not runaway.
|
||||
resetRunawayGuardState("execute-task", "M001/S01/T01", {
|
||||
sessionTokens: 100_000_000, // baseline: session already had 100M tokens
|
||||
changedFiles: 0,
|
||||
worktreeFingerprint: "abc123",
|
||||
});
|
||||
|
||||
const config = {
|
||||
enabled: true,
|
||||
toolCallWarning: 60,
|
||||
tokenWarning: 1_000_000,
|
||||
elapsedMs: 20 * 60 * 1000,
|
||||
changedFilesWarning: 75,
|
||||
diagnosticTurns: 2,
|
||||
hardPause: true,
|
||||
minIntervalMs: 120_000,
|
||||
};
|
||||
|
||||
const decision = evaluateRunawayGuard(
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
{
|
||||
toolCalls: 25,
|
||||
sessionTokens: 101_200_000, // delta = 1.2M tokens
|
||||
elapsedMs: 5 * 60 * 1000, // 5 minutes
|
||||
changedFiles: 1,
|
||||
worktreeFingerprint: "def456",
|
||||
topTools: { read: 10, edit: 5, bash: 5 },
|
||||
},
|
||||
config,
|
||||
);
|
||||
|
||||
// Should NOT warn because token count alone is not enough —
|
||||
// tool calls (25 < 60), elapsed (5min < 20min), changed files (1 < 75)
|
||||
// are all well below thresholds.
|
||||
expect(decision.action).toBe("none");
|
||||
});
|
||||
|
||||
test("warns when token count is accompanied by a primary signal", () => {
|
||||
resetRunawayGuardState("execute-task", "M001/S01/T01", {
|
||||
sessionTokens: 100_000_000,
|
||||
changedFiles: 0,
|
||||
worktreeFingerprint: "abc123",
|
||||
});
|
||||
|
||||
const config = {
|
||||
enabled: true,
|
||||
toolCallWarning: 60,
|
||||
tokenWarning: 1_000_000,
|
||||
elapsedMs: 20 * 60 * 1000,
|
||||
changedFilesWarning: 75,
|
||||
diagnosticTurns: 2,
|
||||
hardPause: true,
|
||||
minIntervalMs: 120_000,
|
||||
};
|
||||
|
||||
const decision = evaluateRunawayGuard(
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
{
|
||||
toolCalls: 65, // exceeds toolCallWarning (60)
|
||||
sessionTokens: 101_200_000, // delta = 1.2M tokens
|
||||
elapsedMs: 5 * 60 * 1000,
|
||||
changedFiles: 1,
|
||||
worktreeFingerprint: "def456",
|
||||
topTools: { read: 30, edit: 20, bash: 15 },
|
||||
},
|
||||
config,
|
||||
);
|
||||
|
||||
// Should warn because tool calls (65 >= 60) is a primary signal,
|
||||
// and tokens (1.2M >= 1M) corroborate.
|
||||
expect(decision.action).toBe("warn");
|
||||
});
|
||||
|
||||
test("warns on no-progress heuristic regardless of primary signals", () => {
|
||||
resetRunawayGuardState("execute-task", "M001/S01/T01", {
|
||||
sessionTokens: 100_000_000,
|
||||
changedFiles: 0,
|
||||
worktreeFingerprint: "abc123",
|
||||
});
|
||||
|
||||
const config = {
|
||||
enabled: true,
|
||||
toolCallWarning: 60,
|
||||
tokenWarning: 1_000_000,
|
||||
elapsedMs: 20 * 60 * 1000,
|
||||
changedFilesWarning: 75,
|
||||
diagnosticTurns: 2,
|
||||
hardPause: true,
|
||||
minIntervalMs: 120_000,
|
||||
};
|
||||
|
||||
const decision = evaluateRunawayGuard(
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
{
|
||||
toolCalls: 30, // exceeds EXECUTE_NO_PROGRESS_TOOL_WARNING (25)
|
||||
sessionTokens: 100_600_000, // delta = 600K tokens
|
||||
elapsedMs: 10 * 60 * 1000,
|
||||
changedFiles: 0,
|
||||
worktreeFingerprint: "abc123", // unchanged
|
||||
topTools: { read: 15, edit: 10, bash: 5 },
|
||||
},
|
||||
config,
|
||||
);
|
||||
|
||||
// Should warn because no-progress heuristic fires:
|
||||
// 0 changed files, worktree unchanged, 30 tool calls, 600K tokens
|
||||
expect(decision.action).toBe("warn");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Bug 6: isLikelyCommand prose guard gaps ───────────────────────────────
|
||||
|
||||
describe("isLikelyCommand", () => {
|
||||
test("treats single non-command token as prose", () => {
|
||||
expect(isLikelyCommand("hello")).toBe(false);
|
||||
expect(isLikelyCommand("world")).toBe(false);
|
||||
});
|
||||
|
||||
test("treats single known command token as command", () => {
|
||||
expect(isLikelyCommand("npm")).toBe(true);
|
||||
expect(isLikelyCommand("node")).toBe(true);
|
||||
});
|
||||
|
||||
test("treats single-letter first token with 2+ words as prose", () => {
|
||||
expect(isLikelyCommand("a quick test")).toBe(false);
|
||||
expect(isLikelyCommand("it works now")).toBe(false);
|
||||
expect(isLikelyCommand("i am here")).toBe(false);
|
||||
});
|
||||
|
||||
test("still treats short command-like strings as commands", () => {
|
||||
expect(isLikelyCommand("npm test")).toBe(true);
|
||||
expect(isLikelyCommand("./script.sh")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Bug 7: custom-verification standalone shell operators ─────────────────
|
||||
|
||||
describe("runCustomVerification shell-command policy", () => {
|
||||
test("pauses on standalone pipe operator", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "sf-cv-pipe-"));
|
||||
try {
|
||||
writeFileSync(
|
||||
join(base, "DEFINITION.yaml"),
|
||||
`steps:\n - id: step-1\n produces: []\n verify:\n policy: shell-command\n command: echo hello | cat\n`,
|
||||
);
|
||||
const result = runCustomVerification(base, "step-1");
|
||||
expect(result).toBe("pause");
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("pauses on background operator", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "sf-cv-bg-"));
|
||||
try {
|
||||
writeFileSync(
|
||||
join(base, "DEFINITION.yaml"),
|
||||
`steps:\n - id: step-1\n produces: []\n verify:\n policy: shell-command\n command: echo hello \u0026\n`,
|
||||
);
|
||||
const result = runCustomVerification(base, "step-1");
|
||||
expect(result).toBe("pause");
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("allows safe command without pipe or background", () => {
|
||||
const base = mkdtempSync(join(tmpdir(), "sf-cv-safe-"));
|
||||
try {
|
||||
writeFileSync(
|
||||
join(base, "DEFINITION.yaml"),
|
||||
`steps:\n - id: step-1\n produces: []\n verify:\n policy: shell-command\n command: echo hello\n`,
|
||||
);
|
||||
const result = runCustomVerification(base, "step-1");
|
||||
expect(result).toBe("continue");
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Bug 8: normalizeFilePath JSDoc filesystem warning ─────────────────────
|
||||
|
||||
describe("normalizeFilePath", () => {
|
||||
test("does NOT make path safe for direct filesystem use", () => {
|
||||
// The function normalizes for comparison only; traversal sequences remain
|
||||
const normalized = normalizeFilePath("../../etc/passwd");
|
||||
expect(normalized).toBe("../../etc/passwd");
|
||||
// A consumer that passed this straight to readFileSync would escape
|
||||
// the intended directory. The JSDoc warns about this explicitly.
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue