diff --git a/src/resources/extensions/browser-tools/capture.ts b/src/resources/extensions/browser-tools/capture.ts index 895ffc13b..24dd890df 100644 --- a/src/resources/extensions/browser-tools/capture.ts +++ b/src/resources/extensions/browser-tools/capture.ts @@ -10,9 +10,11 @@ import sharp from "sharp"; import type { CompactPageState, CompactSelectorState } from "./state.js"; import { formatCompactStateSummary } from "./utils.js"; -// Anthropic API rejects images > 2000px in multi-image requests. -// Cap at 1568px (recommended optimal size) to stay well within limits. -const MAX_SCREENSHOT_DIM = 1568; +// Anthropic vision: 1568px is the recommended optimal width. Height is capped +// generously at 8000px so tall full-page screenshots remain readable rather +// than being squished into a square constraint. +const MAX_SCREENSHOT_WIDTH = 1568; +const MAX_SCREENSHOT_HEIGHT = 8000; // --------------------------------------------------------------------------- // Compact page state capture @@ -120,9 +122,10 @@ export async function postActionSummary(p: Page, target?: Page | Frame): Promise // --------------------------------------------------------------------------- /** - * If either dimension of the image buffer exceeds MAX_SCREENSHOT_DIM, - * downscale proportionally using sharp. Returns the original buffer - * unchanged if already within limits. + * Constrain screenshot dimensions for the Anthropic vision API. + * Width is capped at 1568px (optimal) and height at 8000px, each + * independently, using `fit: "inside"` so aspect ratio is preserved. + * Small images are never upscaled. * * `page` parameter is retained for ToolDeps signature stability (D008) * but is no longer used — all processing is server-side via sharp. @@ -133,18 +136,17 @@ export async function constrainScreenshot( mimeType: string, quality: number, ): Promise { - const { width, height } = await sharp(buffer).metadata(); + const meta = await sharp(buffer).metadata(); + const width = meta.width; + const height = meta.height; - if ( - width !== undefined && - height !== undefined && - width <= MAX_SCREENSHOT_DIM && - height <= MAX_SCREENSHOT_DIM - ) { - return buffer; - } + if (width === undefined || height === undefined) return buffer; + if (width <= MAX_SCREENSHOT_WIDTH && height <= MAX_SCREENSHOT_HEIGHT) return buffer; - const resizer = sharp(buffer).resize(MAX_SCREENSHOT_DIM, MAX_SCREENSHOT_DIM, { fit: "inside" }); + const resizer = sharp(buffer).resize(MAX_SCREENSHOT_WIDTH, MAX_SCREENSHOT_HEIGHT, { + fit: "inside", + withoutEnlargement: true, + }); if (mimeType === "image/png") { return Buffer.from(await resizer.png().toBuffer());