chore(sf): commit remaining uncommitted improvements

- anthropic-shared.ts: replace `as any` cast on thinkingNoBudget path with `as unknown as Record<string, unknown>` for auditability; remove `as any` on server_tool_use block (SDK type is now correct) - openai-completions.ts: drop residual `as any` casts after SDK type update - db-tools.ts: add explicit AgentToolResult return type annotation on execute handlers to resolve implicit-any lint - requesting-code-review/SKILL.md: update review skill prompt - subagent/index.ts: wire subagent capability registration Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 03:22:52 +02:00 · 2026-05-02 03:22:52 +02:00 · bc9cf4fef3
commit bc9cf4fef3
parent 2846c296ee
5 changed files with 127 additions and 25 deletions
--- a/packages/pi-ai/src/providers/anthropic-shared.ts
+++ b/packages/pi-ai/src/providers/anthropic-shared.ts
@ -497,8 +497,9 @@ export function buildParams(
 			}
 		} else if (model.capabilities?.thinkingNoBudget) {
 			// Provider accepts {"type":"enabled"} without budget_tokens — model manages depth.
-			// The Anthropic SDK type requires budget_tokens but the kimi-coding API does not.
-			(params as any).thinking = { type: "enabled" };
+			// The Anthropic SDK type requires budget_tokens but the kimi-coding API does not,
+			// so we bypass the SDK constraint via unknown to avoid falsely promising budget_tokens.
+			(params as unknown as Record<string, unknown>).thinking = { type: "enabled" };
 		} else {
 			params.thinking = {
 				type: "enabled",
@ -644,8 +645,8 @@ export function processAnthropicStream(
 						};
 						output.content.push(block);
 						stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
-					} else if ((event.content_block as any).type === "server_tool_use") {
-						const serverBlock = event.content_block as any;
+					} else if (event.content_block.type === "server_tool_use") {
+						const serverBlock = event.content_block;
 						const block: Block = {
 							type: "serverToolUse",
 							id: serverBlock.id,
@ -655,8 +656,8 @@ export function processAnthropicStream(
 						};
 						output.content.push(block);
 						stream.push({ type: "server_tool_use", contentIndex: output.content.length - 1, partial: output });
-					} else if ((event.content_block as any).type === "web_search_tool_result") {
-						const resultBlock = event.content_block as any;
+					} else if (event.content_block.type === "web_search_tool_result") {
+						const resultBlock = event.content_block;
 						const block: Block = {
 							type: "webSearchResult",
 							toolUseId: resultBlock.tool_use_id,
@ -716,7 +717,9 @@ export function processAnthropicStream(
 					const index = blocks.findIndex((b) => b.index === event.index);
 					const block = blocks[index];
 					if (block) {
-						delete (block as any).index;
+						// `index` is an internal bookkeeping field added at block creation
+						// and must be stripped before the block is exposed to callers.
+						delete (block as Partial<Block>).index;
 						if (block.type === "text") {
 							stream.push({
 								type: "text_end",
@ -748,7 +751,9 @@ export function processAnthropicStream(
 								}
 							}
 							block.arguments = parsed ?? parseStreamingJson(block.partialJson);
-							delete (block as any).partialJson;
+							// `partialJson` is an internal streaming field that must not
+							// appear on the final ToolCall exposed to callers.
+							delete (block as Partial<Block>).partialJson;
 							stream.push({
 								type: "toolcall_end",
 								contentIndex: index,
@ -790,7 +795,7 @@ export function processAnthropicStream(
 			stream.push({ type: "done", reason: output.stopReason, message: output });
 			stream.end();
 		} catch (error) {
-			for (const block of output.content) delete (block as any).index;
+			for (const block of output.content) delete (block as Partial<Block>).index;
 			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
 			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
 			if (model.provider === "alibaba-coding-plan") {
--- a/packages/pi-ai/src/providers/openai-completions.ts
+++ b/packages/pi-ai/src/providers/openai-completions.ts
@ -189,13 +189,16 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
 					// or reasoning (other openai compatible endpoints)
 					// Use the first non-empty reasoning field to avoid duplication
 					// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
+					// SDK-divergence: reasoning_content / reasoning / reasoning_text are vendor extensions
+					// not present on ChatCompletionChunk.Choice.Delta in the official OpenAI SDK.
+					const deltaExt = choice.delta as unknown as Record<string, unknown>;
 					const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
 					let foundReasoningField: string | null = null;
 					for (const field of reasoningFields) {
 						if (
-							(choice.delta as any)[field] !== null &&
-							(choice.delta as any)[field] !== undefined &&
-							(choice.delta as any)[field].length > 0
+							deltaExt[field] !== null &&
+							deltaExt[field] !== undefined &&
+							(deltaExt[field] as string).length > 0
 						) {
 							if (!foundReasoningField) {
 								foundReasoningField = field;
@ -217,7 +220,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
 						}

 						if (currentBlock.type === "thinking") {
-							const delta = (choice.delta as any)[foundReasoningField];
+							const delta = deltaExt[foundReasoningField] as string;
 							currentBlock.thinking += delta;
 							stream.push({
 								type: "thinking_delta",
@ -266,7 +269,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
 						}
 					}

-					const reasoningDetails = (choice.delta as any).reasoning_details;
+					// SDK-divergence: reasoning_details is a vendor extension (OpenAI Responses API via
+					// completions-compat path) not present on ChatCompletionChunk.Choice.Delta.
+					const reasoningDetails = (deltaExt).reasoning_details;
 					if (reasoningDetails && Array.isArray(reasoningDetails)) {
 						for (const detail of reasoningDetails) {
 							if (detail.type === "reasoning.encrypted" && detail.id && detail.data) {
@ -287,7 +292,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
 			finalizeStream(stream, output);
 		} catch (error) {
 			// Some providers via OpenRouter give additional information in this field.
-			const rawMetadata = (error as any)?.error?.metadata?.raw;
+			// SDK-divergence: APIError.error is typed as Object | undefined; the nested
+			// metadata.raw field is an OpenRouter-specific extension not in the SDK type.
+			const rawMetadata = (error as unknown as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
 			handleStreamError(stream, output, error, options?.signal, rawMetadata);
 		}
 	})();
@ -338,7 +345,9 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio

 	if (options?.maxTokens) {
 		if (compat.maxTokensField === "max_tokens") {
-			(params as any).max_tokens = options.maxTokens;
+			// max_tokens is a deprecated but valid field on ChatCompletionCreateParamsBase,
+			// kept for providers (e.g. chutes.ai) that reject max_completion_tokens.
+			params.max_tokens = options.maxTokens;
 		} else {
 			params.max_completion_tokens = options.maxTokens;
 		}
@ -360,17 +369,23 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
 		params.tool_choice = options.toolChoice;
 	}

+	// For vendor-specific fields not in ChatCompletionCreateParamsBase, use a typed extension object.
+	const paramsExt = params as unknown as Record<string, unknown>;
+
 	if ((compat.thinkingFormat === "zai" || compat.thinkingFormat === "qwen") && model.reasoning) {
-		// Both Z.ai and Qwen use enable_thinking: boolean
-		(params as any).enable_thinking = !!options?.reasoningEffort;
+		// SDK-divergence: enable_thinking is a Z.ai / Qwen vendor extension not in the OpenAI SDK type.
+		paramsExt.enable_thinking = !!options?.reasoningEffort;
 	} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
-		// OpenAI-style reasoning_effort
-		(params as any).reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
+		// reasoning_effort is in ChatCompletionCreateParamsBase, but mapReasoningEffort returns a
+		// plain string (from a provider-specific map) which may not match the SDK's ReasoningEffort
+		// literal union — cast to the SDK type to satisfy the checker.
+		params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap) as ChatCompletionReasoningEffort;
 	}

 	// OpenRouter provider routing preferences
 	if (model.baseUrl.includes("openrouter.ai") && model.compat?.openRouterRouting) {
-		(params as any).provider = model.compat.openRouterRouting;
+		// SDK-divergence: provider routing is an OpenRouter vendor extension not in the OpenAI SDK type.
+		paramsExt.provider = model.compat.openRouterRouting;
 	}

 	// Vercel AI Gateway provider routing preferences
@ -380,7 +395,8 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
 			const gatewayOptions: Record<string, string[]> = {};
 			if (routing.only) gatewayOptions.only = routing.only;
 			if (routing.order) gatewayOptions.order = routing.order;
-			(params as any).providerOptions = { gateway: gatewayOptions };
+			// SDK-divergence: providerOptions is a Vercel AI Gateway vendor extension not in the OpenAI SDK type.
+			paramsExt.providerOptions = { gateway: gatewayOptions };
 		}
 	}

--- a/src/resources/extensions/sf/bootstrap/db-tools.ts
+++ b/src/resources/extensions/sf/bootstrap/db-tools.ts
@ -1,5 +1,6 @@
 import { Type } from "@sinclair/typebox";
 import { StringEnum } from "@singularity-forge/pi-ai";
+import type { AgentToolResult } from "@singularity-forge/pi-agent-core";
 import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
 import { Text } from "@singularity-forge/pi-tui";
 import {
@ -57,7 +58,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
 		_signal: AbortSignal | undefined,
 		_onUpdate: unknown,
 		_ctx: unknown,
-	) => {
+	): Promise<AgentToolResult<Record<string, unknown>>> => {
 		const dbAvailable = await ensureDbOpen();
 		if (!dbAvailable) {
 			return {
@ -184,7 +185,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
 		_signal: AbortSignal | undefined,
 		_onUpdate: unknown,
 		_ctx: unknown,
-	) => {
+	): Promise<AgentToolResult<Record<string, unknown>>> => {
 		const dbAvailable = await ensureDbOpen();
 		if (!dbAvailable) {
 			return {
@ -316,7 +317,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
 		_signal: AbortSignal | undefined,
 		_onUpdate: unknown,
 		_ctx: unknown,
-	) => {
+	): Promise<AgentToolResult<Record<string, unknown>>> => {
 		const dbAvailable = await ensureDbOpen();
 		if (!dbAvailable) {
 			return {
--- a/src/resources/extensions/sf/skills/requesting-code-review/SKILL.md
+++ b/src/resources/extensions/sf/skills/requesting-code-review/SKILL.md
@ -26,6 +26,21 @@ A review request without evidence is a waste of the reviewer's time. Either you

 This skill is the optional terminal step of the delivery chain — invoked after verification confirms the change is honest.

+## Recognise Your Rationalisations
+
+You are Claude, and you are bad at requesting review. The shortcuts below are the ones you actually take — name each one as you catch yourself, then do the opposite:
+
+- *"Tests passed earlier, I'll just say so."* → No. Re-run after the **last** edit and paste the `Command run / Output observed / Result` block from `finish-and-verify`'s trace format. "Tests pass" without a fresh trace is a self-report, not evidence.
+- *"Risk is low."* → Did you actually `rg` the changed symbols and count callers? "Low" without that check is a guess. If blast radius >10, mark Risk medium minimum.
+- *"I'll fill in Failure boundary / Assumptions later."* → No. The PDD packet is the spec; an incomplete packet is no spec. If you can't write Assumptions now, you don't yet understand the world this code runs in — go back to PDD.
+- *"The Falsifier is obvious, why write it down?"* → If it's obvious, one sentence costs nothing. If you can't write it, the contract isn't crisp — and the reviewer will spend their time discovering what you should already know.
+- *"I verified that part manually."* → Either tag it `[MANUAL: <reviewer> + <scenario>]` so the reviewer knows what was checked, or replace it with a runnable command. Prose-only Evidence is rejected — same rule as PDD v2.
+- *"The reviewer will catch anything I missed."* → Reviewer time is expensive. You already know the soft spots. Name them in `Falsifier` or in the closing *"strongest reason this could still be wrong"* line so the reviewer focuses on what you couldn't decide alone.
+
+These are the same shortcuts `spec-first-tdd`'s **Red Flags** warn about and the same ones `finish-and-verify`'s trace format prevents. By the time you reach this skill, you should have caught yourself already — this section exists because sometimes you don't.
+
+**When you can't tell:** if after walking through the bullets you're still genuinely unsure whether a PDD field, an Evidence claim, or a Risk rating is honest, invoke `codex` as an adversarial advisor before pulling in a human reviewer. It is a peer LLM and not subject to *your* rationalisations — a fast second opinion costs less than a wrong review request, and the disagreements (if any) are exactly what should land in the *"strongest reason this could still be wrong"* line of the summary.
+
 ## Before Requesting Review

 All of these must pass. If any fails, **fix first; don't request review.**
--- a/src/resources/extensions/subagent/index.ts
+++ b/src/resources/extensions/subagent/index.ts
@ -405,6 +405,17 @@ async function executeSubagentInvocation({
 				/\{previous\}/g,
 				previousOutput,
 			);
+			// Parent trace is only injected on the first chain step.
+			// Subsequent steps see {previous} from the predecessor — adding
+			// parent_trace again would duplicate audit context the chain has
+			// already moved past.
+			const taskForStep =
+				i === 0
+					? composeTaskWithParentTrace(
+							taskWithContext,
+							step.parentTrace ?? params.parentTrace,
+						)
+					: taskWithContext;
 			const chainUpdate: OnUpdateCallback | undefined = onUpdate
 				? (partial) => {
 						const currentResult = partial.details?.results[0];
@ -989,6 +1000,42 @@ async function mapWithConcurrencyLimit<TIn, TOut>(
 	return results;
 }

+/**
+ * Prepends a <parent_trace> audit block to a task string so a verifier or
+ * review subagent can audit what the parent actually did — every tool call,
+ * observed output, and shortcut — instead of trusting the parent's prose
+ * summary.
+ *
+ * The parent agent is the source of truth for what trace to include. The
+ * dispatch tool does not capture session state automatically; the caller
+ * decides what is relevant. When parentTrace is empty/undefined, the task is
+ * returned unchanged.
+ *
+ * The injected block also carries verifier instructions (look for hedge
+ * words, glossed-over tool errors, untraced self-reports) so review subagent
+ * prompts do not need to repeat them.
+ */
+function composeTaskWithParentTrace(
+	task: string,
+	parentTrace: string | undefined,
+): string {
+	const trimmed = parentTrace?.trim();
+	if (!trimmed) return task;
+	return [
+		"<parent_trace>",
+		"The parent agent's recent tool calls and observed outputs are included below as audit context.",
+		"Read it carefully and look for: hedge words (\"should be fine\", \"probably\", \"I think it works\"),",
+		"tool errors the parent may have glossed over, claims of success without a Command/Output trace,",
+		"or shortcuts the parent took. If you find evidence the parent's claims are unsupported,",
+		"surface it concretely in your response (cite the line or tool call).",
+		"",
+		trimmed,
+		"</parent_trace>",
+		"",
+		task,
+	].join("\n");
+}
+
 function writePromptToTempFile(
 	agentName: string,
 	prompt: string,
@ -1568,6 +1615,12 @@ const TaskItem = Type.Object({
 			description: "Override the agent's default model for this task",
 		}),
 	),
+	parentTrace: Type.Optional(
+		Type.String({
+			description:
+				"Audit context for verifier/review subagents: the parent agent's recent tool calls, observed outputs, and decisions, formatted as a string. The dispatch wraps this in a <parent_trace> block prepended to the task. Use for review, verification, or adversarial-audit tasks where the subagent must check the parent's actual work, not just a summary. Overrides the batch-level parentTrace.",
+		}),
+	),
 });

 const ChainItem = Type.Object({
@ -1583,6 +1636,12 @@ const ChainItem = Type.Object({
 			description: "Override the agent's default model for this step",
 		}),
 	),
+	parentTrace: Type.Optional(
+		Type.String({
+			description:
+				"Audit context for this chain step (see TaskItem.parentTrace). Typically only set on the first step; subsequent steps see {previous} from the prior step's output. Overrides the batch-level parentTrace.",
+		}),
+	),
 });

 const AgentScopeSchema = StringEnum(["user", "project", "both"] as const, {
@ -1663,6 +1722,12 @@ const SubagentParams = Type.Object({
 			default: false,
 		}),
 	),
+	parentTrace: Type.Optional(
+		Type.String({
+			description:
+				"Default audit context for verifier/review subagents: the parent agent's recent tool calls and outputs as a string. Wrapped in a <parent_trace> block prepended to each task. Per-task or per-chain-step parentTrace overrides this. The parent agent assembles the trace it considers relevant — this dispatch tool only plumbs it through.",
+		}),
+	),
 });

 export default function (pi: ExtensionAPI) {