chore(sf): commit remaining uncommitted improvements

- anthropic-shared.ts: replace `as any` cast on thinkingNoBudget path with
  `as unknown as Record<string, unknown>` for auditability; remove `as any`
  on server_tool_use block (SDK type is now correct)
- openai-completions.ts: drop residual `as any` casts after SDK type update
- db-tools.ts: add explicit AgentToolResult return type annotation on execute
  handlers to resolve implicit-any lint
- requesting-code-review/SKILL.md: update review skill prompt
- subagent/index.ts: wire subagent capability registration

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-02 03:22:52 +02:00
parent 2846c296ee
commit bc9cf4fef3
5 changed files with 127 additions and 25 deletions

View file

@ -497,8 +497,9 @@ export function buildParams(
}
} else if (model.capabilities?.thinkingNoBudget) {
// Provider accepts {"type":"enabled"} without budget_tokens — model manages depth.
// The Anthropic SDK type requires budget_tokens but the kimi-coding API does not.
(params as any).thinking = { type: "enabled" };
// The Anthropic SDK type requires budget_tokens but the kimi-coding API does not,
// so we bypass the SDK constraint via unknown to avoid falsely promising budget_tokens.
(params as unknown as Record<string, unknown>).thinking = { type: "enabled" };
} else {
params.thinking = {
type: "enabled",
@ -644,8 +645,8 @@ export function processAnthropicStream(
};
output.content.push(block);
stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
} else if ((event.content_block as any).type === "server_tool_use") {
const serverBlock = event.content_block as any;
} else if (event.content_block.type === "server_tool_use") {
const serverBlock = event.content_block;
const block: Block = {
type: "serverToolUse",
id: serverBlock.id,
@ -655,8 +656,8 @@ export function processAnthropicStream(
};
output.content.push(block);
stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output });
} else if ((event.content_block as any).type === "web_search_tool_result") {
const resultBlock = event.content_block as any;
} else if (event.content_block.type === "web_search_tool_result") {
const resultBlock = event.content_block;
const block: Block = {
type: "webSearchResult",
toolUseId: resultBlock.tool_use_id,
@ -716,7 +717,9 @@ export function processAnthropicStream(
const index = blocks.findIndex((b) => b.index === event.index);
const block = blocks[index];
if (block) {
delete (block as any).index;
// `index` is an internal bookkeeping field added at block creation
// and must be stripped before the block is exposed to callers.
delete (block as Partial<Block>).index;
if (block.type === "text") {
stream.push({
type: "text_end",
@ -748,7 +751,9 @@ export function processAnthropicStream(
}
}
block.arguments = parsed ?? parseStreamingJson(block.partialJson);
delete (block as any).partialJson;
// `partialJson` is an internal streaming field that must not
// appear on the final ToolCall exposed to callers.
delete (block as Partial<Block>).partialJson;
stream.push({
type: "toolcall_end",
contentIndex: index,
@ -790,7 +795,7 @@ export function processAnthropicStream(
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
for (const block of output.content) delete (block as any).index;
for (const block of output.content) delete (block as Partial<Block>).index;
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
if (model.provider === "alibaba-coding-plan") {

View file

@ -189,13 +189,16 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
// or reasoning (other openai compatible endpoints)
// Use the first non-empty reasoning field to avoid duplication
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
// SDK-divergence: reasoning_content / reasoning / reasoning_text are vendor extensions
// not present on ChatCompletionChunk.Choice.Delta in the official OpenAI SDK.
const deltaExt = choice.delta as unknown as Record<string, unknown>;
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
let foundReasoningField: string | null = null;
for (const field of reasoningFields) {
if (
(choice.delta as any)[field] !== null &&
(choice.delta as any)[field] !== undefined &&
(choice.delta as any)[field].length > 0
deltaExt[field] !== null &&
deltaExt[field] !== undefined &&
(deltaExt[field] as string).length > 0
) {
if (!foundReasoningField) {
foundReasoningField = field;
@ -217,7 +220,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
}
if (currentBlock.type === "thinking") {
const delta = (choice.delta as any)[foundReasoningField];
const delta = deltaExt[foundReasoningField] as string;
currentBlock.thinking += delta;
stream.push({
type: "thinking_delta",
@ -266,7 +269,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
}
}
const reasoningDetails = (choice.delta as any).reasoning_details;
// SDK-divergence: reasoning_details is a vendor extension (OpenAI Responses API via
// completions-compat path) not present on ChatCompletionChunk.Choice.Delta.
const reasoningDetails = (deltaExt).reasoning_details;
if (reasoningDetails && Array.isArray(reasoningDetails)) {
for (const detail of reasoningDetails) {
if (detail.type === "reasoning.encrypted" && detail.id && detail.data) {
@ -287,7 +292,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
finalizeStream(stream, output);
} catch (error) {
// Some providers via OpenRouter give additional information in this field.
const rawMetadata = (error as any)?.error?.metadata?.raw;
// SDK-divergence: APIError.error is typed as Object | undefined; the nested
// metadata.raw field is an OpenRouter-specific extension not in the SDK type.
const rawMetadata = (error as unknown as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
handleStreamError(stream, output, error, options?.signal, rawMetadata);
}
})();
@ -338,7 +345,9 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
if (options?.maxTokens) {
if (compat.maxTokensField === "max_tokens") {
(params as any).max_tokens = options.maxTokens;
// max_tokens is a deprecated but valid field on ChatCompletionCreateParamsBase,
// kept for providers (e.g. chutes.ai) that reject max_completion_tokens.
params.max_tokens = options.maxTokens;
} else {
params.max_completion_tokens = options.maxTokens;
}
@ -360,17 +369,23 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
params.tool_choice = options.toolChoice;
}
// For vendor-specific fields not in ChatCompletionCreateParamsBase, use a typed extension object.
const paramsExt = params as unknown as Record<string, unknown>;
if ((compat.thinkingFormat === "zai" || compat.thinkingFormat === "qwen") && model.reasoning) {
// Both Z.ai and Qwen use enable_thinking: boolean
(params as any).enable_thinking = !!options?.reasoningEffort;
// SDK-divergence: enable_thinking is a Z.ai / Qwen vendor extension not in the OpenAI SDK type.
paramsExt.enable_thinking = !!options?.reasoningEffort;
} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
// OpenAI-style reasoning_effort
(params as any).reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
// reasoning_effort is in ChatCompletionCreateParamsBase, but mapReasoningEffort returns a
// plain string (from a provider-specific map) which may not match the SDK's ReasoningEffort
// literal union — cast to the SDK type to satisfy the checker.
params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap) as ChatCompletionReasoningEffort;
}
// OpenRouter provider routing preferences
if (model.baseUrl.includes("openrouter.ai") && model.compat?.openRouterRouting) {
(params as any).provider = model.compat.openRouterRouting;
// SDK-divergence: provider routing is an OpenRouter vendor extension not in the OpenAI SDK type.
paramsExt.provider = model.compat.openRouterRouting;
}
// Vercel AI Gateway provider routing preferences
@ -380,7 +395,8 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
const gatewayOptions: Record<string, string[]> = {};
if (routing.only) gatewayOptions.only = routing.only;
if (routing.order) gatewayOptions.order = routing.order;
(params as any).providerOptions = { gateway: gatewayOptions };
// SDK-divergence: providerOptions is a Vercel AI Gateway vendor extension not in the OpenAI SDK type.
paramsExt.providerOptions = { gateway: gatewayOptions };
}
}

View file

@ -1,5 +1,6 @@
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/pi-ai";
import type { AgentToolResult } from "@singularity-forge/pi-agent-core";
import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
import { Text } from "@singularity-forge/pi-tui";
import {
@ -57,7 +58,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
_signal: AbortSignal | undefined,
_onUpdate: unknown,
_ctx: unknown,
) => {
): Promise<AgentToolResult<Record<string, unknown>>> => {
const dbAvailable = await ensureDbOpen();
if (!dbAvailable) {
return {
@ -184,7 +185,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
_signal: AbortSignal | undefined,
_onUpdate: unknown,
_ctx: unknown,
) => {
): Promise<AgentToolResult<Record<string, unknown>>> => {
const dbAvailable = await ensureDbOpen();
if (!dbAvailable) {
return {
@ -316,7 +317,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
_signal: AbortSignal | undefined,
_onUpdate: unknown,
_ctx: unknown,
) => {
): Promise<AgentToolResult<Record<string, unknown>>> => {
const dbAvailable = await ensureDbOpen();
if (!dbAvailable) {
return {

View file

@ -26,6 +26,21 @@ A review request without evidence is a waste of the reviewer's time. Either you
This skill is the optional terminal step of the delivery chain — invoked after verification confirms the change is honest.
## Recognise Your Rationalisations
You are Claude, and you are bad at requesting review. The shortcuts below are the ones you actually take — name each one as you catch yourself, then do the opposite:
- *"Tests passed earlier, I'll just say so."* → No. Re-run after the **last** edit and paste the `Command run / Output observed / Result` block from `finish-and-verify`'s trace format. "Tests pass" without a fresh trace is a self-report, not evidence.
- *"Risk is low."* → Did you actually `rg` the changed symbols and count callers? "Low" without that check is a guess. If blast radius >10, mark Risk medium minimum.
- *"I'll fill in Failure boundary / Assumptions later."* → No. The PDD packet is the spec; an incomplete packet is no spec. If you can't write Assumptions now, you don't yet understand the world this code runs in — go back to PDD.
- *"The Falsifier is obvious, why write it down?"* → If it's obvious, one sentence costs nothing. If you can't write it, the contract isn't crisp — and the reviewer will spend their time discovering what you should already know.
- *"I verified that part manually."* → Either tag it `[MANUAL: <reviewer> + <scenario>]` so the reviewer knows what was checked, or replace it with a runnable command. Prose-only Evidence is rejected — same rule as PDD v2.
- *"The reviewer will catch anything I missed."* → Reviewer time is expensive. You already know the soft spots. Name them in `Falsifier` or in the closing *"strongest reason this could still be wrong"* line so the reviewer focuses on what you couldn't decide alone.
These are the same shortcuts `spec-first-tdd`'s **Red Flags** warn about and the same ones `finish-and-verify`'s trace format prevents. By the time you reach this skill, you should have caught yourself already — this section exists because sometimes you don't.
**When you can't tell:** if after walking through the bullets you're still genuinely unsure whether a PDD field, an Evidence claim, or a Risk rating is honest, invoke `codex` as an adversarial advisor before pulling in a human reviewer. It is a peer LLM and not subject to *your* rationalisations — a fast second opinion costs less than a wrong review request, and the disagreements (if any) are exactly what should land in the *"strongest reason this could still be wrong"* line of the summary.
## Before Requesting Review
All of these must pass. If any fails, **fix first; don't request review.**

View file

@ -405,6 +405,17 @@ async function executeSubagentInvocation({
/\{previous\}/g,
previousOutput,
);
// Parent trace is only injected on the first chain step.
// Subsequent steps see {previous} from the predecessor — adding
// parent_trace again would duplicate audit context the chain has
// already moved past.
const taskForStep =
i === 0
? composeTaskWithParentTrace(
taskWithContext,
step.parentTrace ?? params.parentTrace,
)
: taskWithContext;
const chainUpdate: OnUpdateCallback | undefined = onUpdate
? (partial) => {
const currentResult = partial.details?.results[0];
@ -989,6 +1000,42 @@ async function mapWithConcurrencyLimit<TIn, TOut>(
return results;
}
/**
* Prepends a <parent_trace> audit block to a task string so a verifier or
* review subagent can audit what the parent actually did every tool call,
* observed output, and shortcut instead of trusting the parent's prose
* summary.
*
* The parent agent is the source of truth for what trace to include. The
* dispatch tool does not capture session state automatically; the caller
* decides what is relevant. When parentTrace is empty/undefined, the task is
* returned unchanged.
*
* The injected block also carries verifier instructions (look for hedge
* words, glossed-over tool errors, untraced self-reports) so review subagent
* prompts do not need to repeat them.
*/
function composeTaskWithParentTrace(
task: string,
parentTrace: string | undefined,
): string {
const trimmed = parentTrace?.trim();
if (!trimmed) return task;
return [
"<parent_trace>",
"The parent agent's recent tool calls and observed outputs are included below as audit context.",
"Read it carefully and look for: hedge words (\"should be fine\", \"probably\", \"I think it works\"),",
"tool errors the parent may have glossed over, claims of success without a Command/Output trace,",
"or shortcuts the parent took. If you find evidence the parent's claims are unsupported,",
"surface it concretely in your response (cite the line or tool call).",
"",
trimmed,
"</parent_trace>",
"",
task,
].join("\n");
}
function writePromptToTempFile(
agentName: string,
prompt: string,
@ -1568,6 +1615,12 @@ const TaskItem = Type.Object({
description: "Override the agent's default model for this task",
}),
),
parentTrace: Type.Optional(
Type.String({
description:
"Audit context for verifier/review subagents: the parent agent's recent tool calls, observed outputs, and decisions, formatted as a string. The dispatch wraps this in a <parent_trace> block prepended to the task. Use for review, verification, or adversarial-audit tasks where the subagent must check the parent's actual work, not just a summary. Overrides the batch-level parentTrace.",
}),
),
});
const ChainItem = Type.Object({
@ -1583,6 +1636,12 @@ const ChainItem = Type.Object({
description: "Override the agent's default model for this step",
}),
),
parentTrace: Type.Optional(
Type.String({
description:
"Audit context for this chain step (see TaskItem.parentTrace). Typically only set on the first step; subsequent steps see {previous} from the prior step's output. Overrides the batch-level parentTrace.",
}),
),
});
const AgentScopeSchema = StringEnum(["user", "project", "both"] as const, {
@ -1663,6 +1722,12 @@ const SubagentParams = Type.Object({
default: false,
}),
),
parentTrace: Type.Optional(
Type.String({
description:
"Default audit context for verifier/review subagents: the parent agent's recent tool calls and outputs as a string. Wrapped in a <parent_trace> block prepended to each task. Per-task or per-chain-step parentTrace overrides this. The parent agent assembles the trace it considers relevant — this dispatch tool only plumbs it through.",
}),
),
});
export default function (pi: ExtensionAPI) {