From f98a1e360e53649aaef9e2a6f3145cf6137d4645 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Tue, 28 Apr 2026 11:52:42 +0200 Subject: [PATCH] batch: codex-rescue session output (multiple in-flight tasks) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combined output of multiple parallel codex-rescue runs that produced working-tree edits but didn't commit. Tasks contributing: - prefs: per-provider model allow-list (provider_model_allow) — manual - TUI scroll + unresponsive (a7884d1a / bt3fpn4y2) - planningMeeting required (aa09e904 / br127l763) - Logs UX 4-pack (a5c65314 / btcplhu7f) - Gate auto-resolve + completion nudge (ae4c8b64 / bw1w1fjkp) - sf_task_complete atomic + retry (a7a079b4 / b20cy5owv) - Multi-model meeting + minimax M2.7 + draft promotion (a756faac / task-moifjknd-lwjc98) - Per-role slice prompts (a94c3e1a) - Per-role vision-meeting prompts (afd165a0 / task-moifple5-lcwtjl) - Schema sweep (ac994b1e / task-moifq7pu-83coqz) - Flow audit (ad26ecfd / bttj4vrqm) Typecheck passes. Tests not run as a full suite — spot-check after merge. Co-Authored-By: Claude Sonnet 4.6 Co-Authored-By: OpenAI Codex --- docs/user-docs/configuration.md | 1 + .../mcp-server/src/workflow-tools.test.ts | 25 + packages/mcp-server/src/workflow-tools.ts | 6 +- .../pi-coding-agent/src/modes/rpc/rpc-mode.ts | 7 + src/cli-logs.ts | 440 ++++++++++++++++++ src/cli-stats.ts | 249 ++++++++++ src/cli-status.ts | 204 ++++++++ src/cli.ts | 22 + src/headless-query.ts | 7 +- src/headless.ts | 7 + src/help-text.ts | 44 ++ .../extensions/ask-user-questions.ts | 53 ++- .../extensions/remote-questions/config.ts | 27 +- .../extensions/remote-questions/manager.ts | 91 +++- .../extensions/sf/auto-completion-nudge.ts | 145 ++++++ src/resources/extensions/sf/auto-dispatch.ts | 36 +- src/resources/extensions/sf/auto-post-unit.ts | 45 ++ src/resources/extensions/sf/auto.ts | 22 +- .../extensions/sf/auto/detect-stuck.ts | 22 +- src/resources/extensions/sf/auto/phases.ts | 7 +- src/resources/extensions/sf/auto/session.ts | 11 + .../extensions/sf/bootstrap/db-tools.ts | 8 +- .../extensions/sf/bootstrap/register-hooks.ts | 19 +- .../extensions/sf/bootstrap/write-gate.ts | 27 +- .../sf/docs/preferences-reference.md | 1 + .../extensions/sf/markdown-renderer.ts | 24 + src/resources/extensions/sf/plan-quality.ts | 4 + .../extensions/sf/preferences-models.ts | 9 + .../extensions/sf/preferences-types.ts | 19 + .../extensions/sf/preferences-validation.ts | 29 ++ src/resources/extensions/sf/preferences.ts | 5 + .../sf/prompts/guided-plan-slice.md | 2 +- .../extensions/sf/prompts/plan-slice.md | 16 +- .../sf/tests/auto-completion-nudge.test.ts | 68 +++ .../sf/tests/complete-slice.test.ts | 4 +- .../extensions/sf/tests/complete-task.test.ts | 133 +++++- .../extensions/sf/tests/plan-slice.test.ts | 33 ++ .../extensions/sf/tests/preferences.test.ts | 11 + .../sf/tests/prompt-contracts.test.ts | 10 + .../sf/tests/remote-questions-manager.test.ts | 117 ++++- .../sf/tests/stuck-detection-coverage.test.ts | 10 + .../sf/tests/tool-param-optionality.test.ts | 83 +++- .../extensions/sf/tests/workflow-mcp.test.ts | 19 + .../sf/tests/workflow-tool-executors.test.ts | 23 + .../extensions/sf/tests/write-gate.test.ts | 7 + .../extensions/sf/tools/complete-slice.ts | 141 +++--- .../extensions/sf/tools/complete-task.ts | 193 ++++---- .../extensions/sf/tools/plan-slice.ts | 42 +- .../extensions/sf/workflow-projections.ts | 24 + src/tests/cli-logs-tail.test.ts | 74 +++ src/tests/cli-stats-models.test.ts | 62 +++ 51 files changed, 2419 insertions(+), 269 deletions(-) create mode 100644 src/cli-logs.ts create mode 100644 src/cli-stats.ts create mode 100644 src/cli-status.ts create mode 100644 src/resources/extensions/sf/auto-completion-nudge.ts create mode 100644 src/resources/extensions/sf/tests/auto-completion-nudge.test.ts create mode 100644 src/tests/cli-logs-tail.test.ts create mode 100644 src/tests/cli-stats-models.test.ts diff --git a/docs/user-docs/configuration.md b/docs/user-docs/configuration.md index 8d3427961..8e09c136d 100644 --- a/docs/user-docs/configuration.md +++ b/docs/user-docs/configuration.md @@ -293,6 +293,7 @@ auto_supervisor: soft_timeout_minutes: 20 # warn LLM to wrap up idle_timeout_minutes: 10 # detect stalls hard_timeout_minutes: 30 # pause auto mode + completion_nudge_after: 10 # complete-slice tool calls before nudging sf_complete_slice ``` ### `budget_ceiling` diff --git a/packages/mcp-server/src/workflow-tools.test.ts b/packages/mcp-server/src/workflow-tools.test.ts index b25af5734..04b839949 100644 --- a/packages/mcp-server/src/workflow-tools.test.ts +++ b/packages/mcp-server/src/workflow-tools.test.ts @@ -67,6 +67,24 @@ function makeMockServer() { }; } +function validPlanningMeeting() { + return { + trigger: "MCP workflow test needs a recorded slice-planning decision.", + pm: "Keep this test slice narrow and focused on one workflow path.", + userAdvocate: "Users need the MCP path to preserve planning context.", + customerPanel: "Operators and maintainers both need durable plan artifacts.", + business: "Reliable planning reduces wasted automation runs.", + researcher: "The MCP server delegates to the shared workflow executors.", + deliveryLead: "Use one small task to keep the integration proof bounded.", + partner: "The test covers the DB-backed render path.", + combatant: "Missing meetings would allow silent planning-context loss.", + architect: "Schema and runtime validation should agree on the meeting contract.", + moderator: "Proceed with the focused planning proof.", + recommendedRoute: "planning", + confidenceSummary: "High confidence for this test fixture.", + }; +} + describe("workflow MCP tools", () => { it("registers the full headless-safe workflow tool surface", () => { const server = makeMockServer(); @@ -357,6 +375,7 @@ describe("workflow MCP tools", () => { milestoneId: "M001", sliceId: "S01", goal: "Persist slice plan over MCP.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T01", @@ -454,6 +473,7 @@ describe("workflow MCP tools", () => { milestoneId: "M010", sliceId: "S10", goal: "Create the initial slice plan before closing the DB.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T10", @@ -535,6 +555,7 @@ describe("workflow MCP tools", () => { milestoneId: "M099", sliceId: "S09", goal: "Plan a slice that will be replanned.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T09", @@ -681,6 +702,7 @@ describe("workflow MCP tools", () => { milestoneId: "M003", sliceId: "S03", goal: "Complete canonical slice over MCP.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T03", @@ -741,6 +763,7 @@ describe("workflow MCP tools", () => { milestoneId: "M004", sliceId: "S04", goal: "Complete alias slice over MCP.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T04", @@ -831,6 +854,7 @@ describe("workflow MCP tools", () => { milestoneId: "M005", sliceId: "S05", goal: "Prepare a complete milestone.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T05", @@ -956,6 +980,7 @@ describe("workflow MCP tools", () => { milestoneId: "M006", sliceId: "S06", goal: "Complete the first slice.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T06", diff --git a/packages/mcp-server/src/workflow-tools.ts b/packages/mcp-server/src/workflow-tools.ts index 73b54d5fe..d31755abc 100644 --- a/packages/mcp-server/src/workflow-tools.ts +++ b/packages/mcp-server/src/workflow-tools.ts @@ -752,14 +752,18 @@ const planSliceParams = { planningMeeting: z.object({ trigger: z.string(), pm: z.string(), + userAdvocate: z.string().optional(), + customerPanel: z.string().optional(), + business: z.string().optional(), researcher: z.string(), + deliveryLead: z.string().optional(), partner: z.string(), combatant: z.string(), architect: z.string(), moderator: z.string(), recommendedRoute: z.enum(["discussing", "researching", "planning"]), confidenceSummary: z.string(), - }).optional().describe("Optional structured planning meeting for ambiguous or higher-complexity slices"), + }).describe("Required populated planning meeting. Empty, null, or missing planningMeeting is not acceptable."), tasks: z.array(z.object({ taskId: z.string(), title: z.string(), diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts index f4a8f2160..4de55c659 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts @@ -51,6 +51,13 @@ export type { * Listens for JSON commands on stdin, outputs events and responses on stdout. */ export async function runRpcMode(session: AgentSession): Promise { + const stdoutWithHandle = process.stdout as typeof process.stdout & { + _handle?: { setBlocking?: (blocking: boolean) => void }; + }; + if (!process.stdout.isTTY) { + stdoutWithHandle._handle?.setBlocking?.(true); + } + const rawStdoutWrite = process.stdout.write.bind(process.stdout); const rawStderrWrite = process.stderr.write.bind(process.stderr); diff --git a/src/cli-logs.ts b/src/cli-logs.ts new file mode 100644 index 000000000..2d1c66ed7 --- /dev/null +++ b/src/cli-logs.ts @@ -0,0 +1,440 @@ +import { + closeSync, + existsSync, + openSync, + readFileSync, + readSync, + readdirSync, + statSync, + unwatchFile, + watchFile, +} from 'node:fs' +import { homedir } from 'node:os' +import { basename, join } from 'node:path' + +export type LogSourceName = 'notif' | 'session' | 'activity' | 'audit' + +export interface MergedLogEvent { + source: LogSourceName + timestamp: string + message: string + severity?: string + filePath: string +} + +interface SourceFile { + source: LogSourceName + path: string +} + +export interface CollectRecentLogOptions { + basePath: string + sfHome?: string + source?: LogSourceName + severity?: string + limit?: number +} + +interface TailOptions extends CollectRecentLogOptions { + stdout?: Pick + stderr?: Pick +} + +interface ParsedLogsArgs { + command: 'tail' | 'follow' + source?: LogSourceName + severity?: string +} + +const SOURCE_ALIASES: Record = { + notif: 'notif', + notification: 'notif', + notifications: 'notif', + session: 'session', + sessions: 'session', + activity: 'activity', + audit: 'audit', +} + +function normalizeSource(value: string | undefined): LogSourceName | undefined { + if (!value) return undefined + return SOURCE_ALIASES[value.toLowerCase()] +} + +function sfHomeFromEnv(): string { + return process.env.SF_HOME || join(homedir(), '.sf') +} + +export function getProjectSessionKey(basePath: string): string { + return `--${basePath.replace(/^[/\\]/, '').replace(/[/\\:]/g, '-')}--` +} + +function latestJsonlFile(dir: string): string | null { + if (!existsSync(dir)) return null + try { + const candidates = readdirSync(dir) + .filter((file) => file.endsWith('.jsonl')) + .map((file) => { + const path = join(dir, file) + try { + const stat = statSync(path) + return { path, mtimeMs: stat.mtimeMs } + } catch { + return null + } + }) + .filter((entry): entry is { path: string; mtimeMs: number } => entry !== null) + candidates.sort((a, b) => b.mtimeMs - a.mtimeMs || basename(b.path).localeCompare(basename(a.path))) + return candidates[0]?.path ?? null + } catch { + return null + } +} + +function resolveSourceFiles(basePath: string, sfHome: string): SourceFile[] { + const sfDir = join(basePath, '.sf') + const sessionKey = getProjectSessionKey(basePath) + const sessionFile = + latestJsonlFile(join(sfHome, 'agent', 'sessions', sessionKey)) ?? + latestJsonlFile(join(sfHome, 'sessions', sessionKey)) + const activityFile = latestJsonlFile(join(sfDir, 'activity')) + + const files: Array = [ + { source: 'notif', path: join(sfDir, 'notifications.jsonl') }, + sessionFile ? { source: 'session', path: sessionFile } : null, + activityFile ? { source: 'activity', path: activityFile } : null, + { source: 'audit', path: join(sfDir, 'audit-log.jsonl') }, + ] + + return files.filter((file): file is SourceFile => file !== null && existsSync(file.path)) +} + +function readRecentLines(filePath: string, maxBytes = 256 * 1024): string[] { + let content: string + try { + const stat = statSync(filePath) + if (stat.size <= maxBytes) { + content = readFileSync(filePath, 'utf-8') + } else { + const fd = openSync(filePath, 'r') + try { + const buffer = Buffer.alloc(maxBytes) + readSync(fd, buffer, 0, maxBytes, stat.size - maxBytes) + content = buffer.toString('utf-8') + const firstNewline = content.indexOf('\n') + if (firstNewline >= 0) content = content.slice(firstNewline + 1) + } finally { + closeSync(fd) + } + } + } catch { + return [] + } + return content.split(/\n/).filter((line) => line.trim().length > 0) +} + +function parseTimestamp(value: unknown): string { + if (typeof value === 'number' && Number.isFinite(value)) { + return new Date(value).toISOString() + } + if (typeof value === 'string' && value.trim()) { + const date = new Date(value) + if (!Number.isNaN(date.getTime())) return date.toISOString() + } + return new Date().toISOString() +} + +function truncateOneLine(value: string, max = 220): string { + const compact = value.replace(/\s+/g, ' ').trim() + if (compact.length <= max) return compact + return `${compact.slice(0, max - 3)}...` +} + +function textFromContent(content: unknown): string { + if (typeof content === 'string') return content + if (!Array.isArray(content)) return '' + return content + .filter((block): block is Record => block && typeof block === 'object') + .filter((block) => block.type === 'text' || block.type === 'thinking') + .map((block) => String(block.text ?? block.content ?? '')) + .filter(Boolean) + .join(' ') +} + +function toolNamesFromContent(content: unknown): string[] { + if (!Array.isArray(content)) return [] + return content + .filter((block): block is Record => block && typeof block === 'object') + .filter((block) => block.type === 'toolCall') + .map((block) => String(block.name ?? block.toolName ?? '')) + .filter(Boolean) +} + +function summarizeSessionLikeEntry(source: LogSourceName, entry: Record, filePath: string): MergedLogEvent | null { + const rawMessage = entry.message as Record | undefined + const message = rawMessage && typeof rawMessage === 'object' ? rawMessage : entry + const role = String(message.role ?? '') + const entryType = String(entry.type ?? '') + + if (entryType === 'tool_execution_start') { + const toolName = String(entry.toolName ?? 'tool') + return { + source, + timestamp: parseTimestamp(entry.timestamp ?? entry.ts), + message: `tool: ${toolName}`, + filePath, + } + } + + if (entryType === 'execution_complete') { + return { + source, + timestamp: parseTimestamp(entry.timestamp ?? entry.ts), + message: `execution complete: ${String(entry.status ?? 'completed')}`, + severity: entry.status === 'error' ? 'error' : undefined, + filePath, + } + } + + if (entryType === 'message_update') { + const update = entry.assistantMessageEvent as Record | undefined + const updateType = String(update?.type ?? '') + if (updateType === 'text_delta' || updateType === 'thinking_delta') { + const delta = String(update?.delta ?? update?.text ?? '') + if (!delta.trim()) return null + return { + source, + timestamp: parseTimestamp(entry.timestamp ?? entry.ts), + message: `assistant: ${truncateOneLine(delta)}`, + filePath, + } + } + if (updateType === 'toolcall_end') { + const toolCall = update?.toolCall as Record | undefined + const toolName = String(toolCall?.name ?? 'tool') + return { + source, + timestamp: parseTimestamp(entry.timestamp ?? entry.ts), + message: `tool: ${toolName}`, + filePath, + } + } + return null + } + + if (role === 'assistant') { + const tools = toolNamesFromContent(message.content) + const text = truncateOneLine(textFromContent(message.content)) + const stopReason = typeof message.stopReason === 'string' ? message.stopReason : undefined + const parts: string[] = [] + if (text) parts.push(`assistant: ${text}`) + if (tools.length > 0) parts.push(`tools: ${tools.join(', ')}`) + if (stopReason) parts.push(`stop: ${stopReason}`) + if (parts.length === 0) return null + return { + source, + timestamp: parseTimestamp(entry.timestamp ?? message.timestamp), + message: parts.join(' | '), + severity: stopReason === 'error' ? 'error' : undefined, + filePath, + } + } + + if (role === 'toolResult') { + const toolName = String(message.toolName ?? '') + if (!toolName) return null + return { + source, + timestamp: parseTimestamp(entry.timestamp ?? message.timestamp), + message: `tool result: ${toolName}${message.isError === true ? ' error' : ''}`, + severity: message.isError === true ? 'error' : undefined, + filePath, + } + } + + return null +} + +export function parseLogLine(source: LogSourceName, line: string, filePath: string): MergedLogEvent | null { + let entry: Record + try { + entry = JSON.parse(line) as Record + } catch { + return null + } + + if (source === 'notif') { + const message = String(entry.message ?? '') + if (!message.trim()) return null + return { + source, + timestamp: parseTimestamp(entry.ts ?? entry.timestamp), + message: truncateOneLine(message), + severity: typeof entry.severity === 'string' ? entry.severity : undefined, + filePath, + } + } + + if (source === 'audit') { + const component = String(entry.component ?? 'audit') + const message = String(entry.message ?? entry.error ?? '') + if (!message.trim()) return null + return { + source, + timestamp: parseTimestamp(entry.ts ?? entry.timestamp), + message: `[${component}] ${truncateOneLine(message)}`, + severity: typeof entry.severity === 'string' ? entry.severity : 'error', + filePath, + } + } + + return summarizeSessionLikeEntry(source, entry, filePath) +} + +function matchesFilters(event: MergedLogEvent, source?: LogSourceName, severity?: string): boolean { + if (source && event.source !== source) return false + if (severity && (event.severity ?? '').toLowerCase() !== severity.toLowerCase()) return false + return true +} + +export function collectRecentLogEvents(options: CollectRecentLogOptions): MergedLogEvent[] { + const sfHome = options.sfHome ?? sfHomeFromEnv() + const files = resolveSourceFiles(options.basePath, sfHome) + const limit = Math.max(1, options.limit ?? 50) + const events: MergedLogEvent[] = [] + + for (const file of files) { + if (options.source && file.source !== options.source) continue + for (const line of readRecentLines(file.path)) { + const event = parseLogLine(file.source, line, file.path) + if (event && matchesFilters(event, options.source, options.severity)) { + events.push(event) + } + } + } + + events.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()) + return events.slice(-limit) +} + +export function formatMergedLogEvent(event: MergedLogEvent): string { + return `${event.timestamp} [${event.source}] ${event.message}\n` +} + +function parseLogsArgs(argv: string[]): ParsedLogsArgs | null { + const args = argv.slice(1) + const sub = args[0] + if (sub !== 'tail' && sub !== 'follow') return null + + const parsed: ParsedLogsArgs = { command: sub } + for (let i = 1; i < args.length; i++) { + const arg = args[i] + if (arg === '--source' && i + 1 < args.length) { + parsed.source = normalizeSource(args[++i]) + } else if (arg.startsWith('--source=')) { + parsed.source = normalizeSource(arg.slice('--source='.length)) + } else if (arg === '--severity' && i + 1 < args.length) { + parsed.severity = args[++i] + } else if (arg.startsWith('--severity=')) { + parsed.severity = arg.slice('--severity='.length) + } + } + return parsed +} + +function usage(): string { + return [ + 'Usage: sf logs tail|follow [--source notif|session|activity|audit] [--severity level]', + '', + 'Streams notifications, latest session summaries, latest activity log, and audit errors.', + ].join('\n') + '\n' +} + +function readFromOffset(filePath: string, offset: number): { lines: string[]; offset: number } { + try { + const stat = statSync(filePath) + const start = stat.size < offset ? 0 : offset + const length = stat.size - start + if (length <= 0) return { lines: [], offset: stat.size } + + const fd = openSync(filePath, 'r') + try { + const buffer = Buffer.alloc(length) + readSync(fd, buffer, 0, length, start) + return { + lines: buffer.toString('utf-8').split(/\n/).filter((line) => line.trim().length > 0), + offset: stat.size, + } + } finally { + closeSync(fd) + } + } catch { + return { lines: [], offset } + } +} + +export async function runLogsCli(argv: string[], options: TailOptions): Promise { + const parsed = parseLogsArgs(argv) + const stdout = options.stdout ?? process.stdout + const stderr = options.stderr ?? process.stderr + if (!parsed) { + stderr.write(usage()) + return 1 + } + if (parsed.source === undefined && argv.some((arg) => arg.startsWith('--source'))) { + stderr.write('sf logs: unknown --source value\n') + return 1 + } + + const source = parsed.source ?? options.source + const severity = parsed.severity ?? options.severity + const sfHome = options.sfHome ?? sfHomeFromEnv() + + for (const event of collectRecentLogEvents({ basePath: options.basePath, sfHome, source, severity, limit: 50 })) { + stdout.write(formatMergedLogEvent(event)) + } + + const offsets = new Map() + const watched = new Map() + + const refreshWatches = () => { + for (const file of resolveSourceFiles(options.basePath, sfHome)) { + if (source && file.source !== source) continue + if (watched.has(file.path)) continue + watched.set(file.path, file) + try { + offsets.set(file.path, statSync(file.path).size) + } catch { + offsets.set(file.path, 0) + } + watchFile(file.path, { interval: 500 }, () => { + const currentOffset = offsets.get(file.path) ?? 0 + const result = readFromOffset(file.path, currentOffset) + offsets.set(file.path, result.offset) + for (const line of result.lines) { + const event = parseLogLine(file.source, line, file.path) + if (event && matchesFilters(event, source, severity)) { + stdout.write(formatMergedLogEvent(event)) + } + } + }) + } + } + + refreshWatches() + const refreshTimer = setInterval(refreshWatches, 1000) + + await new Promise((resolve) => { + const stop = () => { + clearInterval(refreshTimer) + for (const path of watched.keys()) unwatchFile(path) + process.off('SIGINT', stop) + process.off('SIGTERM', stop) + resolve() + } + process.on('SIGINT', stop) + process.on('SIGTERM', stop) + }) + + return 0 +} diff --git a/src/cli-stats.ts b/src/cli-stats.ts new file mode 100644 index 000000000..5b7d31e8e --- /dev/null +++ b/src/cli-stats.ts @@ -0,0 +1,249 @@ +import { existsSync } from 'node:fs' +import { createRequire } from 'node:module' +import { join } from 'node:path' + +export interface ModelStatsRow { + model_id: string + provider: string + unit_type: string + attempts: number + successes: number + avg_ms: number | null + avg_cost: number | null + avg_retries: number | null +} + +export interface QueryModelStatsOptions { + sinceSeconds: number + unitType?: string +} + +interface SqliteStatement { + all(...params: unknown[]): Record[] +} + +interface SqliteDb { + prepare(sql: string): SqliteStatement + close?: () => void +} + +interface ParsedStatsArgs { + command: 'models' + sinceSeconds: number + unitType?: string +} + +const require = createRequire(import.meta.url) + +export function parseDurationSeconds(value: string): number { + const match = value.trim().match(/^(\d+(?:\.\d+)?)([smhdw])$/i) + if (!match) { + throw new Error(`Invalid duration: ${value}`) + } + const amount = Number(match[1]) + const unit = match[2].toLowerCase() + const multiplier = unit === 's' ? 1 + : unit === 'm' ? 60 + : unit === 'h' ? 60 * 60 + : unit === 'd' ? 24 * 60 * 60 + : 7 * 24 * 60 * 60 + return Math.max(1, Math.floor(amount * multiplier)) +} + +function toNumber(value: unknown, fallback = 0): number { + const n = Number(value) + return Number.isFinite(n) ? n : fallback +} + +function toNullableNumber(value: unknown): number | null { + if (value === null || value === undefined) return null + const n = Number(value) + return Number.isFinite(n) ? n : null +} + +export function queryModelStats(db: SqliteDb, options: QueryModelStatsOptions): ModelStatsRow[] { + const where = ['recorded_at > unixepoch() - ?'] + const params: unknown[] = [options.sinceSeconds] + if (options.unitType) { + where.push('unit_type = ?') + params.push(options.unitType) + } + + const sql = ` +SELECT model_id, provider, unit_type, COUNT(*) AS attempts, + SUM(succeeded) AS successes, + AVG(duration_ms) AS avg_ms, + AVG(cost_usd) AS avg_cost, + AVG(retries) AS avg_retries +FROM llm_task_outcomes +WHERE ${where.join(' AND ')} +GROUP BY model_id, unit_type +ORDER BY attempts DESC +` + + return db.prepare(sql).all(...params).map((row) => ({ + model_id: String(row.model_id ?? ''), + provider: String(row.provider ?? ''), + unit_type: String(row.unit_type ?? ''), + attempts: toNumber(row.attempts), + successes: toNumber(row.successes), + avg_ms: toNullableNumber(row.avg_ms), + avg_cost: toNullableNumber(row.avg_cost), + avg_retries: toNullableNumber(row.avg_retries), + })) +} + +function formatSuccessRate(row: ModelStatsRow): string { + if (row.attempts <= 0) return '0.0%' + return `${((row.successes / row.attempts) * 100).toFixed(1)}%` +} + +function formatMs(value: number | null): string { + return value === null ? 'n/a' : value.toFixed(0) +} + +function formatCost(value: number | null): string { + return value === null ? 'n/a' : `$${value.toFixed(4)}` +} + +function formatRetries(value: number | null): string { + return value === null ? 'n/a' : value.toFixed(2) +} + +function pad(value: string, width: number, align: 'left' | 'right'): string { + return align === 'right' ? value.padStart(width) : value.padEnd(width) +} + +export function formatModelStatsTable(rows: ModelStatsRow[]): string { + const headers = ['model_id', 'provider', 'unit_type', 'attempts', 'success_rate', 'avg_ms', 'avg_cost', 'avg_retries'] + const body = rows.map((row) => [ + row.model_id, + row.provider, + row.unit_type, + String(row.attempts), + formatSuccessRate(row), + formatMs(row.avg_ms), + formatCost(row.avg_cost), + formatRetries(row.avg_retries), + ]) + const widths = headers.map((header, index) => Math.max(header.length, ...body.map((row) => row[index].length))) + const numeric = new Set([3, 4, 5, 6, 7]) + + const separator = `+${widths.map((width) => '-'.repeat(width + 2)).join('+')}+` + const renderRow = (row: string[]) => + `| ${row.map((cell, index) => pad(cell, widths[index], numeric.has(index) ? 'right' : 'left')).join(' | ')} |` + + return [ + separator, + renderRow(headers), + separator, + ...body.map(renderRow), + separator, + ].join('\n') + '\n' +} + +function parseStatsArgs(argv: string[]): ParsedStatsArgs | null { + const args = argv.slice(1) + if (args[0] !== 'models') return null + const parsed: ParsedStatsArgs = { + command: 'models', + sinceSeconds: parseDurationSeconds('7d'), + } + + for (let i = 1; i < args.length; i++) { + const arg = args[i] + if (arg === '--unit-type' && i + 1 < args.length) { + parsed.unitType = args[++i] + } else if (arg.startsWith('--unit-type=')) { + parsed.unitType = arg.slice('--unit-type='.length) + } else if (arg === '--since' && i + 1 < args.length) { + parsed.sinceSeconds = parseDurationSeconds(args[++i]) + } else if (arg.startsWith('--since=')) { + parsed.sinceSeconds = parseDurationSeconds(arg.slice('--since='.length)) + } + } + + return parsed +} + +function usage(): string { + return [ + 'Usage: sf stats models [--unit-type ] [--since ]', + '', + 'Examples:', + ' sf stats models --since 7d', + ' sf stats models --unit-type execute-task --since 24h', + ].join('\n') + '\n' +} + +function openSqliteDb(dbPath: string): SqliteDb { + try { + const sqlite = require('node:sqlite') as { DatabaseSync?: new (path: string, options?: Record) => SqliteDb } + if (sqlite.DatabaseSync) { + return new sqlite.DatabaseSync(dbPath, { readOnly: true }) + } + } catch { + // Try better-sqlite3 below. + } + + try { + const mod = require('better-sqlite3') as { default?: new (path: string, options?: Record) => SqliteDb } | (new (path: string, options?: Record) => SqliteDb) + const Database = typeof mod === 'function' ? mod : mod.default + if (Database) return new Database(dbPath, { readonly: true, fileMustExist: true }) + } catch { + // Report a single actionable error below. + } + + throw new Error('No SQLite provider available (tried node:sqlite, better-sqlite3)') +} + +export async function runStatsCli( + argv: string[], + deps: { + basePath: string + stdout?: Pick + stderr?: Pick + }, +): Promise { + const stdout = deps.stdout ?? process.stdout + const stderr = deps.stderr ?? process.stderr + let parsed: ParsedStatsArgs | null + try { + parsed = parseStatsArgs(argv) + } catch (err) { + stderr.write(`sf stats: ${err instanceof Error ? err.message : String(err)}\n`) + return 1 + } + if (!parsed) { + stderr.write(usage()) + return 1 + } + + const dbPath = join(deps.basePath, '.sf', 'sf.db') + if (!existsSync(dbPath)) { + stderr.write(`sf stats: database not found at ${dbPath}\n`) + return 1 + } + + let db: SqliteDb | null = null + try { + db = openSqliteDb(dbPath) + const rows = queryModelStats(db, parsed) + if (rows.length === 0) { + stdout.write('No model outcomes recorded for the selected window.\n') + } else { + stdout.write(formatModelStatsTable(rows)) + } + return 0 + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + if (/no such table/i.test(message)) { + stdout.write('No model outcomes recorded yet.\n') + return 0 + } + stderr.write(`sf stats: ${message}\n`) + return 1 + } finally { + db?.close?.() + } +} diff --git a/src/cli-status.ts b/src/cli-status.ts new file mode 100644 index 000000000..d7bf281af --- /dev/null +++ b/src/cli-status.ts @@ -0,0 +1,204 @@ +import { readFileSync, readdirSync, statSync } from 'node:fs' +import { homedir } from 'node:os' +import { join } from 'node:path' + +import { collectRecentLogEvents, formatMergedLogEvent, getProjectSessionKey } from './cli-logs.js' +import type { QuerySnapshot } from './headless-query.js' + +interface StatusArgs { + watch: boolean +} + +interface StatusDeps { + basePath: string + sfHome?: string + stdout?: Pick + stderr?: Pick +} + +interface CurrentModel { + provider: string + model: string +} + +function parseStatusArgs(argv: string[]): StatusArgs { + const args = argv.slice(1) + return { + watch: args.includes('--watch'), + } +} + +function formatRef(ref: { id: string; title?: string } | null | undefined): string { + if (!ref) return 'n/a' + return ref.title ? `${ref.id} ${ref.title}` : ref.id +} + +function formatDispatch(snapshot: QuerySnapshot): string { + const next = snapshot.next + if (next.action === 'dispatch') { + return `${next.unitType ?? 'unit'} ${next.unitId ?? 'n/a'}` + } + if (next.action === 'stop') return `stop: ${next.reason ?? snapshot.state.nextAction}` + return `skip: ${next.reason ?? snapshot.state.nextAction}` +} + +function formatCost(snapshot: QuerySnapshot): string { + const total = snapshot.cost.total + const workers = snapshot.cost.workers.length + return `$${total.toFixed(4)}${workers > 0 ? ` (${workers} worker${workers === 1 ? '' : 's'})` : ''}` +} + +function latestJsonlFile(dir: string): string | null { + try { + const entries = readdirSync(dir) + .filter((file) => file.endsWith('.jsonl')) + .map((file) => { + const path = join(dir, file) + try { + return { path, mtimeMs: statSync(path).mtimeMs } + } catch { + return null + } + }) + .filter((entry): entry is { path: string; mtimeMs: number } => entry !== null) + entries.sort((a, b) => b.mtimeMs - a.mtimeMs) + return entries[0]?.path ?? null + } catch { + return null + } +} + +function extractCurrentModelFromSession(filePath: string): CurrentModel | null { + let current: CurrentModel | null = null + let raw: string + try { + raw = readFileSync(filePath, 'utf-8') + } catch { + return null + } + + for (const line of raw.split(/\n/)) { + if (!line.trim()) continue + let entry: Record + try { + entry = JSON.parse(line) as Record + } catch { + continue + } + + if (entry.type === 'model_change') { + const provider = String(entry.provider ?? '') + const model = String(entry.modelId ?? '') + if (provider || model) current = { provider, model } + continue + } + + if (entry.type === 'message') { + const message = entry.message as Record | undefined + if (message?.role === 'assistant') { + const provider = String(message.provider ?? '') + const model = String(message.model ?? '') + if (provider || model) current = { provider, model } + } + } + } + + return current +} + +function getCurrentModel(basePath: string, sfHome: string): CurrentModel | null { + const key = getProjectSessionKey(basePath) + const sessionFile = + latestJsonlFile(join(sfHome, 'agent', 'sessions', key)) ?? + latestJsonlFile(join(sfHome, 'sessions', key)) + return sessionFile ? extractCurrentModelFromSession(sessionFile) : null +} + +function formatModel(model: CurrentModel | null): string { + if (!model) return 'n/a' + if (model.provider && model.model) return `${model.provider}/${model.model}` + return model.model || model.provider || 'n/a' +} + +export function renderLiveStatus(snapshot: QuerySnapshot, opts: { model: CurrentModel | null; recentEvents: string[] }): string { + const lines: string[] = [] + lines.push('SF Status') + lines.push('---------') + lines.push(`Milestone: ${formatRef(snapshot.state.activeMilestone ?? snapshot.state.lastCompletedMilestone)}`) + lines.push(`Slice: ${formatRef(snapshot.state.activeSlice)}`) + lines.push(`Task: ${formatRef(snapshot.state.activeTask)}`) + lines.push(`Phase: ${snapshot.state.phase}`) + lines.push(`Dispatch: ${formatDispatch(snapshot)}`) + lines.push(`Cost: ${formatCost(snapshot)}`) + lines.push(`Model: ${formatModel(opts.model)}`) + lines.push('') + lines.push('Last Events:') + if (opts.recentEvents.length === 0) { + lines.push(' n/a') + } else { + for (const event of opts.recentEvents) { + lines.push(` ${event.trimEnd()}`) + } + } + return lines.join('\n') + '\n' +} + +async function buildStatusText(basePath: string, sfHome: string): Promise { + const { buildQuerySnapshot } = await import('./headless-query.js') + const snapshot = await buildQuerySnapshot(basePath) + const recentEvents = collectRecentLogEvents({ + basePath, + sfHome, + limit: 50, + }) + .filter((event) => event.source === 'notif' || event.source === 'activity') + .slice(-10) + .map(formatMergedLogEvent) + + return renderLiveStatus(snapshot, { + model: getCurrentModel(basePath, sfHome), + recentEvents, + }) +} + +export async function runStatusCli(argv: string[], deps: StatusDeps): Promise { + const stdout = deps.stdout ?? process.stdout + const stderr = deps.stderr ?? process.stderr + const sfHome = deps.sfHome ?? process.env.SF_HOME ?? join(homedir(), '.sf') + const args = parseStatusArgs(argv) + + const renderOnce = async () => { + try { + const text = await buildStatusText(deps.basePath, sfHome) + if (args.watch && stdout.isTTY) stdout.write('\x1b[2J\x1b[H') + stdout.write(text) + } catch (err) { + stderr.write(`sf status: ${err instanceof Error ? err.message : String(err)}\n`) + throw err + } + } + + if (!args.watch) { + await renderOnce() + return 0 + } + + await renderOnce() + await new Promise((resolve) => { + const timer = setInterval(() => { + renderOnce().catch(() => { + clearInterval(timer) + resolve() + }) + }, 2000) + const stop = () => { + clearInterval(timer) + process.off('SIGINT', stop) + process.off('SIGTERM', stop) + resolve() + } + process.on('SIGINT', stop) + process.on('SIGTERM', stop) + }) + return 0 +} diff --git a/src/cli.ts b/src/cli.ts index b66f4d4f1..a9afe949b 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -272,6 +272,28 @@ if (packageCommand.handled) { process.exit(packageCommand.exitCode) } +// `sf logs tail|follow` — merged live stream from notifications, sessions, activity, and audit logs +if (cliFlags.messages[0] === 'logs') { + const { runLogsCli } = await import('./cli-logs.js') + const exitCode = await runLogsCli(process.argv.slice(2), { basePath: process.cwd() }) + process.exit(exitCode) +} + +// `sf status [--live] [--watch]` / `sf dash` — printable aggregate project view +if (cliFlags.messages[0] === 'status' || cliFlags.messages[0] === 'dash') { + initResources(agentDir) + const { runStatusCli } = await import('./cli-status.js') + const exitCode = await runStatusCli(process.argv.slice(2), { basePath: process.cwd() }) + process.exit(exitCode) +} + +// `sf stats models` — model outcome summary from .sf/sf.db +if (cliFlags.messages[0] === 'stats') { + const { runStatsCli } = await import('./cli-stats.js') + const exitCode = await runStatsCli(process.argv.slice(2), { basePath: process.cwd() }) + process.exit(exitCode) +} + // `sf config` — replay the setup wizard and exit if (cliFlags.messages[0] === 'config') { const authStorage = AuthStorage.create(authFilePath) diff --git a/src/headless-query.ts b/src/headless-query.ts index 3e710e664..ac7a76af9 100644 --- a/src/headless-query.ts +++ b/src/headless-query.ts @@ -77,7 +77,7 @@ export interface QueryResult { // ─── Implementation ───────────────────────────────────────────────────────── -export async function handleQuery(basePath: string): Promise { +export async function buildQuerySnapshot(basePath: string): Promise { const { openProjectDbIfPresent, deriveState, @@ -128,6 +128,11 @@ export async function handleQuery(basePath: string): Promise { cost: { workers, total: workers.reduce((sum, w) => sum + w.cost, 0) }, } + return snapshot +} + +export async function handleQuery(basePath: string): Promise { + const snapshot = await buildQuerySnapshot(basePath) process.stdout.write(JSON.stringify(snapshot) + '\n') return { exitCode: 0, data: snapshot } } diff --git a/src/headless.ts b/src/headless.ts index f05f4c112..82ce1504f 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -220,6 +220,13 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { // --------------------------------------------------------------------------- export async function runHeadless(options: HeadlessOptions): Promise { + const stdoutWithHandle = process.stdout as typeof process.stdout & { + _handle?: { setBlocking?: (blocking: boolean) => void } + } + if (!process.stdout.isTTY) { + stdoutWithHandle._handle?.setBlocking?.(true) + } + const maxRestarts = options.maxRestarts ?? 3 let restartCount = 0 diff --git a/src/help-text.ts b/src/help-text.ts index ab6fad3e5..3b6ad5c87 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -59,6 +59,46 @@ const SUBCOMMAND_HELP: Record = { 'List installed package sources from user and project settings.', ].join('\n'), + logs: [ + 'Usage: sf logs tail|follow [--source notif|session|activity|audit] [--severity level]', + '', + 'Follow a merged live stream from .sf/notifications.jsonl, the latest', + 'headless session JSONL, the latest .sf/activity JSONL, and .sf/audit-log.jsonl.', + '', + 'Examples:', + ' sf logs tail', + ' sf logs follow --source audit', + ' sf logs tail --severity error', + ].join('\n'), + + status: [ + 'Usage: sf status --live [--watch]', + '', + 'Show a printable aggregate project view: current milestone, slice, task,', + 'phase, next dispatch, recent events, session cost, and current model.', + '', + 'Examples:', + ' sf status --live', + ' sf status --live --watch', + ' sf dash --watch', + ].join('\n'), + + dash: [ + 'Usage: sf dash [--watch]', + '', + 'Alias for sf status --live.', + ].join('\n'), + + stats: [ + 'Usage: sf stats models [--unit-type ] [--since ]', + '', + 'Summarize Bayesian model outcome rows from .sf/sf.db.', + '', + 'Examples:', + ' sf stats models --since 7d', + ' sf stats models --unit-type execute-task --since 24h', + ].join('\n'), + worktree: [ 'Usage: sf worktree [args]', '', @@ -194,6 +234,10 @@ export function printHelp(version: string): void { process.stdout.write(' list List installed package sources\n') process.stdout.write(' update Update SF to the latest version\n') process.stdout.write(' sessions List and resume a past session\n') + process.stdout.write(' logs tail|follow Follow merged SF logs\n') + process.stdout.write(' status --live Show aggregate project status\n') + process.stdout.write(' dash Alias for status --live\n') + process.stdout.write(' stats models Summarize model outcomes\n') process.stdout.write(' worktree Manage worktrees (list, merge, clean, remove)\n') process.stdout.write(' auto [args] Run auto-mode without TUI (pipeable)\n') process.stdout.write(' headless [cmd] [args] Run /sf commands without TUI (default: auto)\n') diff --git a/src/resources/extensions/ask-user-questions.ts b/src/resources/extensions/ask-user-questions.ts index 1378d91ae..e021d8f78 100644 --- a/src/resources/extensions/ask-user-questions.ts +++ b/src/resources/extensions/ask-user-questions.ts @@ -190,6 +190,29 @@ function errorResult( }; } +function cleanRecommendedLabel(label: string): string { + return label.replace(/\s*\(Recommended\)\s*/g, "").trim(); +} + +function gateLogId(questionId: string): string { + if (questionId.includes("depth_verification")) return "depth_verification"; + return questionId; +} + +function logHeadlessLocalAutoResolve(result: RaceableResult): void { + const details = result.details as Record | undefined; + if (!details?.localFallback || !details.response || !Array.isArray(details.questions)) return; + const questions = details.questions as Question[]; + const response = details.response as import("./remote-questions/types.js").RemoteAnswer; + const firstQuestion = questions[0]; + if (!firstQuestion) return; + const firstAnswer = response.answers[firstQuestion.id]?.answers?.[0]; + if (!firstAnswer) return; + process.stderr.write( + `[gate] auto-resolved ${gateLogId(firstQuestion.id)} → "${cleanRecommendedLabel(firstAnswer)}" (timeout, headless, no telegram)\n`, + ); +} + /** Convert the shared RoundResult into the JSON the LLM expects. */ function formatForLLM(result: RoundResult): string { const answers: Record = {}; @@ -250,7 +273,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) { } // ── Routing: race remote + local, remote-only, or local-only ──────── - const { tryRemoteQuestions, isRemoteConfigured } = await import("./remote-questions/manager.js"); + const { tryRemoteQuestions, isRemoteConfigured, tryHeadlessLocalAutoResolveQuestions } = await import("./remote-questions/manager.js"); const hasRemote = isRemoteConfigured(); // Case 1: Both remote and local UI available — race them. @@ -285,18 +308,44 @@ export default function AskUserQuestions(pi: ExtensionAPI) { // Case 2: Remote configured but no local UI (headless) — remote only. if (hasRemote && !ctx.hasUI) { const remoteResult = await tryRemoteQuestions(params.questions, signal); + let failedRemoteResult: RaceableResult | null = null; if (remoteResult) { const remoteDetails = remoteResult.details as Record | undefined; if (remoteDetails && isUsableRemoteQuestionResult(remoteDetails)) { turnCache.set(sig, remoteResult as unknown as CachedResult); + if (remoteDetails.localFallback) logHeadlessLocalAutoResolve(remoteResult); + return { ...remoteResult, details: remoteResult.details as unknown }; } - return { ...remoteResult, details: remoteResult.details as unknown }; + failedRemoteResult = remoteResult; } + const fallbackResult = await tryHeadlessLocalAutoResolveQuestions(params.questions, { + hasUI: ctx.hasUI, + telegramUnavailable: true, + unavailableReason: "telegram-poller-error", + signal, + }); + if (fallbackResult) { + turnCache.set(sig, fallbackResult as unknown as CachedResult); + logHeadlessLocalAutoResolve(fallbackResult); + return { ...fallbackResult, details: fallbackResult.details as unknown }; + } + if (failedRemoteResult) return { ...failedRemoteResult, details: failedRemoteResult.details as unknown }; return errorResult("Error: remote channel configured but returned no result", params.questions); } // Case 3: No remote — local UI only. if (!ctx.hasUI) { + const fallbackResult = await tryHeadlessLocalAutoResolveQuestions(params.questions, { + hasUI: ctx.hasUI, + telegramUnavailable: true, + unavailableReason: "no-telegram", + signal, + }); + if (fallbackResult) { + turnCache.set(sig, fallbackResult as unknown as CachedResult); + logHeadlessLocalAutoResolve(fallbackResult); + return { ...fallbackResult, details: fallbackResult.details as unknown }; + } return errorResult("Error: UI not available (non-interactive mode)", params.questions); } diff --git a/src/resources/extensions/remote-questions/config.ts b/src/resources/extensions/remote-questions/config.ts index 33416555b..3d8524cea 100644 --- a/src/resources/extensions/remote-questions/config.ts +++ b/src/resources/extensions/remote-questions/config.ts @@ -16,6 +16,15 @@ export interface ResolvedConfig { autoResolveStrategy: "recommended-option"; } +export interface ResolvedPreferenceConfig { + channel: RemoteChannel; + channelId: string; + timeoutMs: number; + pollIntervalMs: number; + autoResolveOnTimeout: boolean; + autoResolveStrategy: "recommended-option"; +} + const ENV_KEYS: Record = { slack: "SLACK_BOT_TOKEN", discord: "DISCORD_BOT_TOKEN", @@ -78,6 +87,20 @@ function hydrateRemoteTokensFromAuth(): void { export function resolveRemoteConfig(): ResolvedConfig | null { hydrateRemoteTokensFromAuth(); + const preferenceConfig = resolveRemotePreferenceConfig(false); + if (!preferenceConfig) return null; + + const token = process.env[ENV_KEYS[preferenceConfig.channel]]; + if (!token) return null; + + return { + ...preferenceConfig, + token, + }; +} + +export function resolveRemotePreferenceConfig(hydrateTokens = true): ResolvedPreferenceConfig | null { + if (hydrateTokens) hydrateRemoteTokensFromAuth(); const prefs = loadEffectiveSFPreferences(); const rq: RemoteQuestionsConfig | undefined = prefs?.preferences.remote_questions; if (!rq || !rq.channel || !rq.channel_id) return null; @@ -86,9 +109,6 @@ export function resolveRemoteConfig(): ResolvedConfig | null { const channelId = String(rq.channel_id); if (!CHANNEL_ID_PATTERNS[rq.channel].test(channelId)) return null; - const token = process.env[ENV_KEYS[rq.channel]]; - if (!token) return null; - const timeoutMinutes = clampNumber(rq.timeout_minutes, DEFAULT_TIMEOUT_MINUTES, MIN_TIMEOUT_MINUTES, MAX_TIMEOUT_MINUTES); const pollIntervalSeconds = clampNumber(rq.poll_interval_seconds, DEFAULT_POLL_INTERVAL_SECONDS, MIN_POLL_INTERVAL_SECONDS, MAX_POLL_INTERVAL_SECONDS); @@ -97,7 +117,6 @@ export function resolveRemoteConfig(): ResolvedConfig | null { channelId, timeoutMs: timeoutMinutes * 60 * 1000, pollIntervalMs: pollIntervalSeconds * 1000, - token, autoResolveOnTimeout: rq.auto_resolve_on_timeout === true, autoResolveStrategy: rq.auto_resolve_strategy ?? "recommended-option", }; diff --git a/src/resources/extensions/remote-questions/manager.ts b/src/resources/extensions/remote-questions/manager.ts index 63ec413d7..e52b1a7e2 100644 --- a/src/resources/extensions/remote-questions/manager.ts +++ b/src/resources/extensions/remote-questions/manager.ts @@ -4,7 +4,7 @@ import { randomUUID } from "node:crypto"; import type { ChannelAdapter, RemotePrompt, RemoteQuestion, RemoteAnswer } from "./types.js"; -import { resolveRemoteConfig, type ResolvedConfig } from "./config.js"; +import { resolveRemoteConfig, resolveRemotePreferenceConfig, type ResolvedConfig } from "./config.js"; import { DiscordAdapter } from "./discord-adapter.js"; import { SlackAdapter } from "./slack-adapter.js"; import { TelegramAdapter } from "./telegram-adapter.js"; @@ -24,6 +24,22 @@ interface QuestionInput { allowMultiple?: boolean; } +export interface HeadlessLocalAutoResolvePolicy { + channel: "telegram"; + timeoutMs: number; + autoResolveOnTimeout: boolean; + autoResolveStrategy: "recommended-option"; +} + +export interface HeadlessLocalAutoResolveOptions { + hasUI: boolean; + telegramUnavailable: boolean; + unavailableReason: "no-telegram" | "telegram-poller-error"; + policy?: HeadlessLocalAutoResolvePolicy | null; + signal?: AbortSignal; + sleepFn?: (ms: number, signal?: AbortSignal) => Promise; +} + export function tryAutoResolveQuestions( questions: QuestionInput[], strategy: "recommended-option" = "recommended-option", @@ -33,14 +49,57 @@ export function tryAutoResolveQuestions( for (const question of questions) { if (question.allowMultiple) return null; - const recommended = question.options.filter((option) => option.label.includes("(Recommended)")); - if (recommended.length !== 1) return null; - answers[question.id] = { answers: [recommended[0].label] }; + const firstOption = question.options[0]; + if (!firstOption?.label) return null; + answers[question.id] = { answers: [firstOption.label] }; } return { answers }; } +export function resolveHeadlessLocalAutoResolvePolicy(): HeadlessLocalAutoResolvePolicy | null { + const config = resolveRemotePreferenceConfig(); + if (!config || config.channel !== "telegram") return null; + return { + channel: "telegram", + timeoutMs: config.timeoutMs, + autoResolveOnTimeout: config.autoResolveOnTimeout, + autoResolveStrategy: config.autoResolveStrategy, + }; +} + +export async function tryHeadlessLocalAutoResolveQuestions( + questions: QuestionInput[], + options: HeadlessLocalAutoResolveOptions, +): Promise { + const policy = options.policy ?? resolveHeadlessLocalAutoResolvePolicy(); + if (options.hasUI || !options.telegramUnavailable) return null; + if (!policy?.autoResolveOnTimeout) return null; + if (options.signal?.aborted) return null; + + await (options.sleepFn ?? sleep)(policy.timeoutMs, options.signal); + if (options.signal?.aborted) return null; + + const autoResolved = tryAutoResolveQuestions(questions, policy.autoResolveStrategy); + if (!autoResolved) return null; + + return { + content: [{ type: "text", text: JSON.stringify({ answers: formatForTool(autoResolved) }) }], + details: { + remote: true, + channel: policy.channel, + timed_out: true, + status: "auto-resolved-local", + autoResolved: true, + autoResolveStrategy: policy.autoResolveStrategy, + localFallback: true, + unavailableReason: options.unavailableReason, + questions, + response: autoResolved, + }, + }; +} + /** * Check whether a remote channel is configured without triggering any * side effects (no HTTP requests, no prompt records). Used by the race @@ -77,8 +136,11 @@ export async function tryRemoteQuestions( return errorResult(`Failed to send questions via ${config.channel}: ${(err as Error).message}`, config.channel); } - const answer = await pollUntilDone(adapter, prompt, dispatch.ref, signal); - if (!answer) { + const pollResult = await pollUntilDone(adapter, prompt, dispatch.ref, signal); + if (!pollResult.answer) { + if (!signal?.aborted && pollResult.unavailable && config.channel === "telegram" && config.autoResolveOnTimeout) { + await sleep(Math.max(0, prompt.timeoutAt - Date.now()), signal); + } const autoResolved = !signal?.aborted && config.autoResolveOnTimeout ? tryAutoResolveQuestions(questions, config.autoResolveStrategy) : null; @@ -125,7 +187,7 @@ export async function tryRemoteQuestions( }; } - markPromptAnswered(prompt.id, answer); + markPromptAnswered(prompt.id, pollResult.answer); // Best-effort acknowledgement gives remote users a visible receipt signal. try { @@ -133,7 +195,7 @@ export async function tryRemoteQuestions( } catch { /* best-effort */ } return { - content: [{ type: "text", text: JSON.stringify({ answers: formatForTool(answer) }) }], + content: [{ type: "text", text: JSON.stringify({ answers: formatForTool(pollResult.answer) }) }], details: { remote: true, channel: config.channel, @@ -141,7 +203,7 @@ export async function tryRemoteQuestions( promptId: prompt.id, threadUrl: dispatch.ref.threadUrl ?? null, questions, - response: answer, + response: pollResult.answer, status: "answered", }, }; @@ -177,26 +239,27 @@ async function pollUntilDone( prompt: RemotePrompt, ref: import("./types.js").RemotePromptRef, signal?: AbortSignal, -): Promise { +): Promise<{ answer: RemoteAnswer | null; unavailable?: boolean; error?: string }> { let retryCount = 0; while (Date.now() < prompt.timeoutAt && !signal?.aborted) { try { const answer = await adapter.pollAnswer(prompt, ref); updatePromptRecord(prompt.id, { lastPollAt: Date.now() }); retryCount = 0; - if (answer) return answer; + if (answer) return { answer }; } catch (err) { retryCount++; if (retryCount > 1) { - markPromptStatus(prompt.id, "failed", sanitizeError(String((err as Error).message))); - return null; + const message = sanitizeError(String((err as Error).message)); + markPromptStatus(prompt.id, "failed", message); + return { answer: null, unavailable: true, error: message }; } } await sleep(prompt.pollIntervalMs, signal); } - return null; + return { answer: null }; } function sleep(ms: number, signal?: AbortSignal): Promise { diff --git a/src/resources/extensions/sf/auto-completion-nudge.ts b/src/resources/extensions/sf/auto-completion-nudge.ts new file mode 100644 index 000000000..5419d76e7 --- /dev/null +++ b/src/resources/extensions/sf/auto-completion-nudge.ts @@ -0,0 +1,145 @@ +import type { AgentMessage } from "@singularity-forge/pi-agent-core"; + +export const DEFAULT_COMPLETION_NUDGE_AFTER = 10; +export const COMPLETION_NUDGE_TOOL_NAMES = new Set(["sf_complete_slice", "sf_slice_complete"]); + +const COMPLETION_NUDGE_CUSTOM_TYPE = "sf-completion-nudge"; +const LOWERED_TEMPERATURE = 0.2; + +interface CompletionNudgeState { + active: boolean; + unitType: string; + unitId: string; + toolCalls: number; + completionCalled: boolean; + nudgeAfter: number; + reminderSent: boolean; + strongSent: boolean; + lowerTemperatureForNextRequest: boolean; +} + +const state: CompletionNudgeState = { + active: false, + unitType: "", + unitId: "", + toolCalls: 0, + completionCalled: false, + nudgeAfter: DEFAULT_COMPLETION_NUDGE_AFTER, + reminderSent: false, + strongSent: false, + lowerTemperatureForNextRequest: false, +}; + +export function resolveCompletionNudgeAfter(value: unknown): number { + const n = Number(value); + if (!Number.isFinite(n)) return DEFAULT_COMPLETION_NUDGE_AFTER; + return Math.max(0, Math.floor(n)); +} + +export function resetCompletionNudgeState( + unitType: string, + unitId: string, + configuredNudgeAfter?: unknown, +): void { + const nudgeAfter = resolveCompletionNudgeAfter(configuredNudgeAfter); + state.active = unitType === "complete-slice" && nudgeAfter > 0; + state.unitType = unitType; + state.unitId = unitId; + state.toolCalls = 0; + state.completionCalled = false; + state.nudgeAfter = nudgeAfter; + state.reminderSent = false; + state.strongSent = false; + state.lowerTemperatureForNextRequest = false; +} + +export function clearCompletionNudgeState(): void { + resetCompletionNudgeState("", "", DEFAULT_COMPLETION_NUDGE_AFTER); +} + +export function recordCompletionNudgeToolCall(toolName: string): void { + if (!state.active) return; + if (COMPLETION_NUDGE_TOOL_NAMES.has(toolName)) { + state.completionCalled = true; + state.lowerTemperatureForNextRequest = false; + return; + } + state.toolCalls++; +} + +export function getCompletionNudgeStateForTest(): Readonly { + return { ...state }; +} + +export function maybeInjectCompletionNudgeMessage(messages: AgentMessage[]): AgentMessage[] { + if (!state.active || state.completionCalled) return messages; + + const message = nextCompletionNudgeMessage(); + if (!message) return messages; + + return [ + ...messages, + { + role: "custom", + customType: COMPLETION_NUDGE_CUSTOM_TYPE, + content: message, + display: false, + details: { + unitType: state.unitType, + unitId: state.unitId, + toolCalls: state.toolCalls, + }, + timestamp: Date.now(), + }, + ]; +} + +export function applyCompletionNudgeTemperature(payload: unknown): unknown { + if (!state.lowerTemperatureForNextRequest || state.completionCalled) return payload; + state.lowerTemperatureForNextRequest = false; + if (!payload || typeof payload !== "object") return payload; + + lowerTemperature(payload as Record); + return payload; +} + +function nextCompletionNudgeMessage(): string | null { + const firstThreshold = state.nudgeAfter; + const secondThreshold = state.nudgeAfter * 2; + + if (!state.strongSent && state.toolCalls >= secondThreshold) { + state.reminderSent = true; + state.strongSent = true; + state.lowerTemperatureForNextRequest = true; + return `You've performed ${state.toolCalls} tool calls without calling sf_complete_slice. Stop further investigation unless there is a specific blocker. Call sf_complete_slice now with your summary.`; + } + + if (!state.reminderSent && state.toolCalls >= firstThreshold) { + state.reminderSent = true; + return `You've performed ${state.toolCalls} tool calls of investigation. Per the slice plan you should now call sf_complete_slice with your summary. If you genuinely need more context, say so explicitly; otherwise call the tool now.`; + } + + return null; +} + +function lowerTemperature(record: Record): void { + record.temperature = lowerNumber(record.temperature); + + const generationConfig = record.generationConfig; + if (generationConfig && typeof generationConfig === "object") { + (generationConfig as Record).temperature = lowerNumber( + (generationConfig as Record).temperature, + ); + } + + const config = record.config; + if (config && typeof config === "object") { + (config as Record).temperature = lowerNumber( + (config as Record).temperature, + ); + } +} + +function lowerNumber(value: unknown): number { + return typeof value === "number" ? Math.min(value, LOWERED_TEMPERATURE) : LOWERED_TEMPERATURE; +} diff --git a/src/resources/extensions/sf/auto-dispatch.ts b/src/resources/extensions/sf/auto-dispatch.ts index e9280d090..b8ac9073a 100644 --- a/src/resources/extensions/sf/auto-dispatch.ts +++ b/src/resources/extensions/sf/auto-dispatch.ts @@ -99,6 +99,20 @@ function missingSliceStop(mid: string, phase: string): DispatchAction { }; } +export function formatTaskCompleteFailurePrompt(reason: string): string { + return `sf_task_complete failed: ${reason}. Try the call again, or investigate the write path.`; +} + +function prependTaskCompleteFailurePrompt( + session: DispatchContext["session"] | undefined, + unitId: string, + prompt: string, +): string { + const reason = session?.pendingTaskCompleteFailures?.get(unitId); + if (!reason) return prompt; + return `${formatTaskCompleteFailurePrompt(reason)}\n\n${prompt}`; +} + function isMilestonePlanRepairState(state: SFState): boolean { if (state.phase !== "planning" || state.activeSlice) return false; return /roadmap is incomplete|weighted vision alignment meeting/i.test(state.nextAction ?? ""); @@ -796,26 +810,28 @@ export const DISPATCH_RULES: DispatchRule[] = [ }, { name: "executing → execute-task", - match: async ({ state, mid, basePath }) => { + match: async ({ state, mid, basePath, session }) => { if (state.phase !== "executing" || !state.activeTask) return null; if (!state.activeSlice) return missingSliceStop(mid, state.phase); const sid = state.activeSlice!.id; const sTitle = state.activeSlice!.title; const tid = state.activeTask.id; const tTitle = state.activeTask.title; + const unitId = `${mid}/${sid}/${tid}`; + const prompt = await buildExecuteTaskPrompt( + mid, + sid, + sTitle, + tid, + tTitle, + basePath, + ); return { action: "dispatch", unitType: "execute-task", - unitId: `${mid}/${sid}/${tid}`, - prompt: await buildExecuteTaskPrompt( - mid, - sid, - sTitle, - tid, - tTitle, - basePath, - ), + unitId, + prompt: prependTaskCompleteFailurePrompt(session, unitId, prompt), }; }, }, diff --git a/src/resources/extensions/sf/auto-post-unit.ts b/src/resources/extensions/sf/auto-post-unit.ts index bd491b525..7a7f09b3c 100644 --- a/src/resources/extensions/sf/auto-post-unit.ts +++ b/src/resources/extensions/sf/auto-post-unit.ts @@ -77,6 +77,33 @@ import { writeTurnGitTransaction } from "./uok/gitops.js"; /** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */ const MAX_VERIFICATION_RETRIES = 3; +function isCompletedTaskStatus(status: string | undefined): boolean { + return status === "complete" || status === "done"; +} + +function taskCompleteFailureForCurrentUnit(s: AutoSession): string | null { + if (!s.currentUnit || s.currentUnit.type !== "execute-task") return null; + const failure = s.lastTaskCompleteFailure; + if (!failure || failure.unitId !== s.currentUnit.id) return null; + const { milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id); + if (!mid || !sid || !tid) return failure.reason; + const dbTask = getTask(mid, sid, tid); + if (dbTask && isCompletedTaskStatus(dbTask.status)) { + s.pendingTaskCompleteFailures.delete(s.currentUnit.id); + s.lastTaskCompleteFailure = null; + return null; + } + return failure.reason; +} + +function clearTaskCompleteFailureForCurrentUnit(s: AutoSession): void { + if (!s.currentUnit) return; + s.pendingTaskCompleteFailures.delete(s.currentUnit.id); + if (s.lastTaskCompleteFailure?.unitId === s.currentUnit.id) { + s.lastTaskCompleteFailure = null; + } +} + /** Enqueue a sidecar item (hook, triage, or quick-task) for the main loop to * drain via runUnit. Logs the enqueue event and notifies the UI. */ @@ -871,6 +898,7 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV triggerArtifactVerified = verifyExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath); if (triggerArtifactVerified) { invalidateAllCaches(); + clearTaskCompleteFailureForCurrentUnit(s); } } catch (e) { debugLog("postUnit", { phase: "artifact-verify", error: String(e) }); @@ -888,6 +916,7 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV triggerArtifactVerified = verifyExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath); if (triggerArtifactVerified) { invalidateAllCaches(); + clearTaskCompleteFailureForCurrentUnit(s); } } } @@ -916,6 +945,22 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV "error", ); } else if (!triggerArtifactVerified) { + const taskCompleteFailure = taskCompleteFailureForCurrentUnit(s); + if (taskCompleteFailure) { + const retryMessage = `sf_task_complete failed: ${taskCompleteFailure}. Try the call again, or investigate the write path.`; + s.pendingTaskCompleteFailures.set(s.currentUnit.id, taskCompleteFailure); + s.lastTaskCompleteFailure = null; + s.pendingVerificationRetry = null; + debugLog("postUnit", { + phase: "task-complete-transient-retry", + unitType: s.currentUnit.type, + unitId: s.currentUnit.id, + error: taskCompleteFailure, + }); + ctx.ui.notify(retryMessage, "warning"); + return "retry"; + } + // #2883/#3595: If the artifact is missing because the tool invocation // failed (malformed JSON) or was skipped (queued user message), retrying // will produce the same failure. Pause auto-mode instead of looping. diff --git a/src/resources/extensions/sf/auto.ts b/src/resources/extensions/sf/auto.ts index 26bd960f9..62e038348 100644 --- a/src/resources/extensions/sf/auto.ts +++ b/src/resources/extensions/sf/auto.ts @@ -499,14 +499,32 @@ export function markToolEnd(toolCallId: string): void { _markToolEnd(toolCallId); } +const TASK_COMPLETE_TOOL_NAMES = new Set(["sf_task_complete", "sf_complete_task"]); + +function normalizeTaskCompleteFailure(errorMsg: string): string { + return errorMsg + .replace(/^Error completing task:\s*/i, "") + .replace(/^sf_task_complete failed:\s*/i, "") + .trim(); +} + /** * Record a tool invocation error on the current session (#2883). * Called from tool_execution_end when a SF tool fails with isError. - * Only stores the error if it matches the tool-invocation-error pattern - * (malformed/truncated JSON), not normal business-logic errors. + * Malformed/truncated JSON errors still pause auto-mode. sf_task_complete + * execution errors are tracked separately so the same task can retry in-flow. */ export function recordToolInvocationError(toolName: string, errorMsg: string): void { if (!s.active) return; + if (TASK_COMPLETE_TOOL_NAMES.has(toolName)) { + const currentUnit = s.currentUnit; + if (currentUnit?.type === "execute-task") { + s.lastTaskCompleteFailure = { + unitId: currentUnit.id, + reason: normalizeTaskCompleteFailure(errorMsg), + }; + } + } if (isToolInvocationError(errorMsg) || isQueuedUserMessageSkip(errorMsg)) { s.lastToolInvocationError = `${toolName}: ${errorMsg}`; } diff --git a/src/resources/extensions/sf/auto/detect-stuck.ts b/src/resources/extensions/sf/auto/detect-stuck.ts index 9873e87a6..2c87b7b80 100644 --- a/src/resources/extensions/sf/auto/detect-stuck.ts +++ b/src/resources/extensions/sf/auto/detect-stuck.ts @@ -13,6 +13,11 @@ import { summarizeLogs } from "../workflow-logger.js"; * and similar Node.js filesystem error messages. */ const ENOENT_PATH_RE = /ENOENT[^']*'([^']+)'/; +const TRANSIENT_TASK_COMPLETE_RE = /\b(?:sf_task_complete failed|Error completing task:).*SUMMARY\.md write failed/i; + +function isTransientTaskCompleteError(entry: WindowEntry): boolean { + return typeof entry.error === "string" && TRANSIENT_TASK_COMPLETE_RE.test(entry.error); +} /** * Analyze a sliding window of recent unit dispatches for stuck patterns. @@ -27,7 +32,8 @@ const ENOENT_PATH_RE = /ENOENT[^']*'([^']+)'/; export function detectStuck( window: readonly WindowEntry[], ): { stuck: true; reason: string } | null { - if (window.length < 2) return null; + const effectiveWindow = window.filter((entry) => !isTransientTaskCompleteError(entry)); + if (effectiveWindow.length < 2) return null; // Peek (not drain) the workflow-logger buffer so stuck reasons can surface // the underlying diagnostic context (projection failures, DB degradations, @@ -36,8 +42,8 @@ export function detectStuck( const loggerSummary = summarizeLogs(); const suffix = loggerSummary ? ` — ${loggerSummary}` : ""; - const last = window[window.length - 1]; - const prev = window[window.length - 2]; + const last = effectiveWindow[effectiveWindow.length - 1]; + const prev = effectiveWindow[effectiveWindow.length - 2]; // Rule 1: Same error repeated consecutively if (last.error && prev.error && last.error === prev.error) { @@ -48,8 +54,8 @@ export function detectStuck( } // Rule 2: Same unit 3+ consecutive times - if (window.length >= 3) { - const lastThree = window.slice(-3); + if (effectiveWindow.length >= 3) { + const lastThree = effectiveWindow.slice(-3); if (lastThree.every((u) => u.key === last.key)) { return { stuck: true, @@ -59,8 +65,8 @@ export function detectStuck( } // Rule 3: Oscillation (A→B→A→B in last 4) - if (window.length >= 4) { - const w = window.slice(-4); + if (effectiveWindow.length >= 4) { + const w = effectiveWindow.slice(-4); if ( w[0].key === w[2].key && w[1].key === w[3].key && @@ -76,7 +82,7 @@ export function detectStuck( // Rule 4: Same ENOENT path seen twice in window (#3575) // Missing files don't appear between retries — stop immediately. const enoentPaths = new Map(); - for (const entry of window) { + for (const entry of effectiveWindow) { if (!entry.error) continue; const match = ENOENT_PATH_RE.exec(entry.error); if (!match) continue; diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts index 0fec104b5..1dfd0180a 100644 --- a/src/resources/extensions/sf/auto/phases.ts +++ b/src/resources/extensions/sf/auto/phases.ts @@ -53,6 +53,7 @@ import { resolveUokFlags } from "../uok/flags.js"; import { UokGateRunner } from "../uok/gate-runner.js"; import { resetEvidence } from "../safety/evidence-collector.js"; import { resetToolCallCounts, formatToolCallSummary } from "../auto-tool-tracking.js"; +import { resetCompletionNudgeState } from "../auto-completion-nudge.js"; import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js"; import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js"; import { @@ -894,7 +895,10 @@ export async function runDispatch( // ── Sliding-window stuck detection with graduated recovery ── const derivedKey = `${unitType}/${unitId}`; - if (!s.pendingVerificationRetry) { + const hasTransientTaskCompleteFailure = + unitType === "execute-task" && s.pendingTaskCompleteFailures.has(unitId); + + if (!s.pendingVerificationRetry && !hasTransientTaskCompleteFailure) { loopState.recentUnits.push({ key: derivedKey }); if (loopState.recentUnits.length > STUCK_WINDOW_SIZE) loopState.recentUnits.shift(); @@ -1316,6 +1320,7 @@ export async function runUnitPhase( setCurrentPhase(unitType); s.lastToolInvocationError = null; // #2883: clear stale error from previous unit resetToolCallCounts(); + resetCompletionNudgeState(unitType, unitId, prefs?.auto_supervisor?.completion_nudge_after); const unitStartSeq = ic.nextSeq(); deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: unitStartSeq, eventType: "unit-start", data: { unitType, unitId } }); ctx.ui.notify(`[unit] ${unitType} ${unitId} starting`, "info"); diff --git a/src/resources/extensions/sf/auto/session.ts b/src/resources/extensions/sf/auto/session.ts index 92af98b69..d5657d7f6 100644 --- a/src/resources/extensions/sf/auto/session.ts +++ b/src/resources/extensions/sf/auto/session.ts @@ -147,6 +147,15 @@ export class AutoSession { lastGitActionFailure: string | null = null; /** Last turn-level git action status captured during finalize. */ lastGitActionStatus: "ok" | "failed" | null = null; + /** + * Last sf_task_complete/sf_complete_task execution error for the current turn. + * Unlike malformed tool invocation errors, these are normal tool execution + * failures (for example a transient SUMMARY.md write failure) and should be + * retried in-flow instead of pausing auto-mode. + */ + lastTaskCompleteFailure: { unitId: string; reason: string } | null = null; + /** Per-unit task completion failures to surface in the next execute-task prompt. */ + readonly pendingTaskCompleteFailures = new Map(); // ── Isolation degradation ──────────────────────────────────────────── /** Set to true when worktree creation fails; prevents merge of nonexistent branch. */ @@ -295,6 +304,8 @@ export class AutoSession { this.lastToolInvocationError = null; this.lastGitActionFailure = null; this.lastGitActionStatus = null; + this.lastTaskCompleteFailure = null; + this.pendingTaskCompleteFailures.clear(); this.isolationDegraded = false; this.milestoneMergedInPhases = false; this.checkpointSha = null; diff --git a/src/resources/extensions/sf/bootstrap/db-tools.ts b/src/resources/extensions/sf/bootstrap/db-tools.ts index 0caa249a7..b459d4954 100644 --- a/src/resources/extensions/sf/bootstrap/db-tools.ts +++ b/src/resources/extensions/sf/bootstrap/db-tools.ts @@ -530,10 +530,14 @@ export function registerDbTools(pi: ExtensionAPI): void { combatant: Type.String({ description: "Attacks the premise first, then the proposal and alternatives" }), architect: Type.String({ description: "System-fit review after partner and combatant passes" }), }, { description: "Adversarial review summary for this slice plan" })), - planningMeeting: Type.Optional(Type.Object({ + planningMeeting: Type.Object({ trigger: Type.String({ description: "Why a planning meeting was needed" }), pm: Type.String({ description: "PM/product framing and scope cut" }), + userAdvocate: Type.Optional(Type.String({ description: "User advocate view of what must matter for the end user" })), + customerPanel: Type.Optional(Type.String({ description: "Nuanced customer panel summary across multiple likely customer viewpoints" })), + business: Type.Optional(Type.String({ description: "Business or viability perspective when relevant" })), researcher: Type.String({ description: "Research and evidence summary, including docs/code findings" }), + deliveryLead: Type.Optional(Type.String({ description: "Sequencing, scope cut, and delivery risk perspective" })), partner: Type.String({ description: "Strengthened best-case plan" }), combatant: Type.String({ description: "Strongest objection or alternative root cause/approach" }), architect: Type.String({ description: "System-fit and sequencing resolution" }), @@ -544,7 +548,7 @@ export function registerDbTools(pi: ExtensionAPI): void { Type.Literal("planning"), ], { description: "Where the workflow should route after the meeting" }), confidenceSummary: Type.String({ description: "Confidence rationale after the meeting" }), - }, { description: "Optional structured planning meeting artifact for ambiguous or higher-complexity slices" })), + }, { description: "Required populated planning meeting artifact. Empty, null, or missing planningMeeting is not acceptable." }), tasks: Type.Array(Type.Object({ taskId: Type.String({ description: "Task ID (e.g. T01)" }), title: Type.String({ description: "Task title" }), diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.ts b/src/resources/extensions/sf/bootstrap/register-hooks.ts index 63d2484dc..a0e630f1d 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.ts +++ b/src/resources/extensions/sf/bootstrap/register-hooks.ts @@ -10,7 +10,7 @@ import { getEcosystemReadyPromise } from "../ecosystem/loader.js"; import { buildMilestoneFileName, resolveMilestonePath, resolveSliceFile, resolveSlicePath } from "../paths.js"; import { buildBeforeAgentStartResult } from "./system-context.js"; import { handleAgentEnd } from "./agent-end-recovery.js"; -import { clearDiscussionFlowState, isDepthConfirmationAnswer, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite, shouldBlockQueueExecution, isGateQuestionId, setPendingGate, clearPendingGate, getPendingGate, shouldBlockPendingGate, shouldBlockPendingGateBash, extractDepthVerificationMilestoneId } from "./write-gate.js"; +import { clearDiscussionFlowState, getSelectedGateAnswer, isDepthConfirmationAnswer, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite, shouldBlockQueueExecution, isGateQuestionId, setPendingGate, clearPendingGate, getPendingGate, shouldBlockPendingGate, shouldBlockPendingGateBash, extractDepthVerificationMilestoneId } from "./write-gate.js"; import { isBlockedStateFile, isBashWriteToStateFile, BLOCKED_WRITE_ERROR } from "../write-intercept.js"; import { cleanupQuickBranch } from "../quick.js"; import { getDiscussionMilestoneId } from "../guided-flow.js"; @@ -24,6 +24,7 @@ import { saveActivityLog } from "../activity-log.js"; import { resetAskUserQuestionsCache } from "../../ask-user-questions.js"; import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js"; import { recordToolCallName } from "../auto-tool-tracking.js"; +import { applyCompletionNudgeTemperature, maybeInjectCompletionNudgeMessage, recordCompletionNudgeToolCall } from "../auto-completion-nudge.js"; import { classifyCommand } from "../safety/destructive-guard.js"; import { logWarning as safetyLogWarning } from "../workflow-logger.js"; import { installNotifyInterceptor } from "./notify-interceptor.js"; @@ -352,7 +353,7 @@ export function registerHooks(pi: ExtensionAPI, ecosystemHandlers: SFEcosystemBe const pendingQuestion = questions.find((question) => question?.id === currentPendingGate); if (pendingQuestion) { const answer = details.response?.answers?.[currentPendingGate]; - if (isDepthConfirmationAnswer(answer?.selected, pendingQuestion.options)) { + if (isDepthConfirmationAnswer(getSelectedGateAnswer(answer), pendingQuestion.options)) { clearPendingGate(); } } @@ -367,7 +368,7 @@ export function registerHooks(pi: ExtensionAPI, ecosystemHandlers: SFEcosystemBe // Cross-references against the question's defined options to reject free-form "Other" text. const answer = details.response?.answers?.[question.id]; const inferredMilestoneId = extractDepthVerificationMilestoneId(question.id) ?? milestoneId; - if (isDepthConfirmationAnswer(answer?.selected, question.options)) { + if (isDepthConfirmationAnswer(getSelectedGateAnswer(answer), question.options)) { markDepthVerified(inferredMilestoneId); clearPendingGate(); } @@ -396,7 +397,8 @@ export function registerHooks(pi: ExtensionAPI, ecosystemHandlers: SFEcosystemBe const answer = details.response?.answers?.[question.id]; if (answer) { lines.push(""); - const selected = Array.isArray(answer.selected) ? answer.selected.join(", ") : answer.selected; + const selectedValue = getSelectedGateAnswer(answer); + const selected = Array.isArray(selectedValue) ? selectedValue.join(", ") : selectedValue; lines.push(`**Selected:** ${selected}`); if (answer.notes) { lines.push(`**Notes:** ${answer.notes}`); @@ -413,6 +415,7 @@ export function registerHooks(pi: ExtensionAPI, ecosystemHandlers: SFEcosystemBe if (!isAutoActive()) return; markToolStart(event.toolCallId); recordToolCallName(event.toolName); + recordCompletionNudgeToolCall(event.toolName); }); pi.on("tool_execution_end", async (event) => { @@ -435,9 +438,17 @@ export function registerHooks(pi: ExtensionAPI, ecosystemHandlers: SFEcosystemBe await syncServiceTierStatus(ctx); }); + pi.on("context", async (event) => { + if (!isAutoActive()) return; + const messages = maybeInjectCompletionNudgeMessage(event.messages); + if (messages === event.messages) return; + return { messages }; + }); + pi.on("before_provider_request", async (event, ctx) => { const payload = event.payload as Record | null; if (!payload || typeof payload !== "object") return; + applyCompletionNudgeTemperature(payload); // ── Observation Masking ───────────────────────────────────────────── // Replace old tool results with placeholders to reduce context bloat. diff --git a/src/resources/extensions/sf/bootstrap/write-gate.ts b/src/resources/extensions/sf/bootstrap/write-gate.ts index c73e9a37c..397e0f2eb 100644 --- a/src/resources/extensions/sf/bootstrap/write-gate.ts +++ b/src/resources/extensions/sf/bootstrap/write-gate.ts @@ -116,10 +116,14 @@ function currentWriteGateSnapshot(): WriteGateSnapshot { function persistWriteGateSnapshot(basePath: string = process.cwd()): void { if (!shouldPersistWriteGateSnapshot()) return; const path = writeGateSnapshotPath(basePath); - mkdirSync(join(basePath, ".sf", "runtime"), { recursive: true }); - const tempPath = `${path}.tmp`; - writeFileSync(tempPath, JSON.stringify(currentWriteGateSnapshot(), null, 2), "utf-8"); - renameSync(tempPath, path); + try { + mkdirSync(join(basePath, ".sf", "runtime"), { recursive: true }); + const tempPath = `${path}.tmp`; + writeFileSync(tempPath, JSON.stringify(currentWriteGateSnapshot(), null, 2), "utf-8"); + renameSync(tempPath, path); + } catch { + // Persistence is a cross-process aid; in-memory gate enforcement remains authoritative. + } } function clearPersistedWriteGateSnapshot(basePath: string = process.cwd()): void { @@ -364,6 +368,21 @@ export function isDepthConfirmationAnswer( return value.includes("(Recommended)"); } +/** + * Normalize ask_user_questions answers across local TUI and remote-channel + * results. Local answers use `{ selected }`; remote answers use `{ answers }`. + */ +export function getSelectedGateAnswer(answer: unknown): unknown { + if (!answer || typeof answer !== "object") return undefined; + const record = answer as Record; + if ("selected" in record) return record.selected; + const remoteAnswers = record.answers; + if (Array.isArray(remoteAnswers)) { + return remoteAnswers.length === 1 ? remoteAnswers[0] : remoteAnswers; + } + return undefined; +} + export function shouldBlockContextWrite( toolName: string, inputPath: string, diff --git a/src/resources/extensions/sf/docs/preferences-reference.md b/src/resources/extensions/sf/docs/preferences-reference.md index fcd4781e6..4a32c6942 100644 --- a/src/resources/extensions/sf/docs/preferences-reference.md +++ b/src/resources/extensions/sf/docs/preferences-reference.md @@ -125,6 +125,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `soft_timeout_minutes`: minutes before the supervisor issues a soft warning (default: 20). - `idle_timeout_minutes`: minutes of inactivity before the supervisor intervenes (default: 10). - `hard_timeout_minutes`: minutes before the supervisor forces termination (default: 30). + - `completion_nudge_after`: tool calls in a complete-slice unit before nudging the agent to call `sf_complete_slice` (default: 10; set `0` to disable). - `git`: configures SF's git behavior. All fields are optional — omit any to use defaults. Keys: - `auto_push`: boolean — automatically push commits to the remote after committing. Default: `false`. diff --git a/src/resources/extensions/sf/markdown-renderer.ts b/src/resources/extensions/sf/markdown-renderer.ts index 34df49456..02bfe5898 100644 --- a/src/resources/extensions/sf/markdown-renderer.ts +++ b/src/resources/extensions/sf/markdown-renderer.ts @@ -388,10 +388,34 @@ function renderSlicePlanMarkdown(slice: SliceRow, tasks: TaskRow[], gates: GateR lines.push(""); lines.push(slice.planning_meeting.pm.trim()); lines.push(""); + if (slice.planning_meeting.userAdvocate?.trim()) { + lines.push("### User Advocate"); + lines.push(""); + lines.push(slice.planning_meeting.userAdvocate.trim()); + lines.push(""); + } + if (slice.planning_meeting.customerPanel?.trim()) { + lines.push("### Customer Panel"); + lines.push(""); + lines.push(slice.planning_meeting.customerPanel.trim()); + lines.push(""); + } + if (slice.planning_meeting.business?.trim()) { + lines.push("### Business"); + lines.push(""); + lines.push(slice.planning_meeting.business.trim()); + lines.push(""); + } lines.push("### Researcher"); lines.push(""); lines.push(slice.planning_meeting.researcher.trim()); lines.push(""); + if (slice.planning_meeting.deliveryLead?.trim()) { + lines.push("### Delivery Lead"); + lines.push(""); + lines.push(slice.planning_meeting.deliveryLead.trim()); + lines.push(""); + } lines.push("### Partner"); lines.push(""); lines.push(slice.planning_meeting.partner.trim()); diff --git a/src/resources/extensions/sf/plan-quality.ts b/src/resources/extensions/sf/plan-quality.ts index 4b66f887b..092798bd6 100644 --- a/src/resources/extensions/sf/plan-quality.ts +++ b/src/resources/extensions/sf/plan-quality.ts @@ -9,7 +9,11 @@ export type PlanningMeetingRoute = "discussing" | "researching" | "planning"; export interface PlanningMeetingRecord { trigger: string; pm: string; + userAdvocate?: string; + customerPanel?: string; + business?: string; researcher: string; + deliveryLead?: string; partner: string; combatant: string; architect: string; diff --git a/src/resources/extensions/sf/preferences-models.ts b/src/resources/extensions/sf/preferences-models.ts index 2a8f975de..50d495edc 100644 --- a/src/resources/extensions/sf/preferences-models.ts +++ b/src/resources/extensions/sf/preferences-models.ts @@ -63,10 +63,18 @@ function resolveAutoBenchmarkPickForUnit( ): ResolvedModelConfig | undefined { try { const allowed = prefs?.allowed_providers?.map(s => s.toLowerCase()); + const modelAllow = prefs?.provider_model_allow; const candidates: Array<{ provider: string; id: string }> = []; for (const provider of getProviders()) { if (allowed && !allowed.includes(provider.toLowerCase())) continue; + // Per-provider model allow-list: when a provider is listed here, only + // the listed model IDs may be used from that provider. + const providerKey = Object.keys(modelAllow ?? {}).find( + k => k.toLowerCase() === provider.toLowerCase(), + ); + const allowedModels = providerKey ? modelAllow![providerKey] : undefined; for (const model of getModels(provider)) { + if (allowedModels && !allowedModels.includes(model.id)) continue; candidates.push({ provider, id: model.id }); } } @@ -400,6 +408,7 @@ export function resolveAutoSupervisorConfig(): AutoSupervisorConfig { soft_timeout_minutes: configured.soft_timeout_minutes ?? 20, idle_timeout_minutes: configured.idle_timeout_minutes ?? 10, hard_timeout_minutes: configured.hard_timeout_minutes ?? 30, + completion_nudge_after: configured.completion_nudge_after ?? 10, ...(configured.model ? { model: configured.model } : {}), }; } diff --git a/src/resources/extensions/sf/preferences-types.ts b/src/resources/extensions/sf/preferences-types.ts index f8418bb0f..afb8bce35 100644 --- a/src/resources/extensions/sf/preferences-types.ts +++ b/src/resources/extensions/sf/preferences-types.ts @@ -124,6 +124,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set([ "service_tier", "allowed_providers", "provider_preference", + "provider_model_allow", "forensics_dedup", "show_token_cost", "stale_commit_threshold_minutes", @@ -218,6 +219,8 @@ export interface AutoSupervisorConfig { idle_timeout_minutes?: number; hard_timeout_minutes?: number; phase_timeout_minutes?: number; + /** Tool-call count before nudging complete-slice agents to call sf_complete_slice. Default: 10. Set 0 to disable. */ + completion_nudge_after?: number; /** Enable supervised mode in headless auto - wait for stdin instead of exiting on questions. Default: true */ supervised_mode?: boolean; } @@ -519,6 +522,22 @@ export interface SFPreferences { * provider_preference: [kimi-coding, minimax, zai, mistral, opencode-go, opencode] */ provider_preference?: string[]; + /** + * Per-provider model allow-list. When a provider has an entry here, only + * the listed model IDs may be used from that provider; older variants + * in the registry are filtered out. Providers absent from this block are + * unrestricted (any model in their registry is fair game). + * + * Project-level overrides global per provider (full replace inside the + * array, not merge). + * + * Example: + * provider_model_allow: + * minimax: + * - MiniMax-M2.7 + * - MiniMax-M2.7-highspeed + */ + provider_model_allow?: Record; } export interface LoadedSFPreferences { diff --git a/src/resources/extensions/sf/preferences-validation.ts b/src/resources/extensions/sf/preferences-validation.ts index 90731a36e..4973629b5 100644 --- a/src/resources/extensions/sf/preferences-validation.ts +++ b/src/resources/extensions/sf/preferences-validation.ts @@ -425,6 +425,30 @@ export function validatePreferences(preferences: SFPreferences): { } } + // ─── Per-provider model allow-list ────────────────────────────────── + // When a provider has an entry here, ONLY listed model IDs are usable + // from that provider. Providers absent from the block are unrestricted. + if (preferences.provider_model_allow !== undefined) { + if ( + preferences.provider_model_allow !== null + && typeof preferences.provider_model_allow === "object" + && !Array.isArray(preferences.provider_model_allow) + ) { + const cleaned: Record = {}; + for (const [provider, models] of Object.entries(preferences.provider_model_allow as Record)) { + if (!Array.isArray(models) || models.some((m: unknown) => typeof m !== "string")) { + errors.push(`provider_model_allow.${provider} must be an array of model ID strings`); + continue; + } + const list = (models as string[]).map(s => s.trim()).filter(s => s.length > 0); + if (list.length > 0) cleaned[provider.toLowerCase()] = list; + } + if (Object.keys(cleaned).length > 0) validated.provider_model_allow = cleaned; + } else { + errors.push("provider_model_allow must be a map of provider → array of model IDs"); + } + } + // ─── Flat-rate Providers ──────────────────────────────────────────── // User-declared flat-rate providers for dynamic routing suppression. // Built-in providers (github-copilot, copilot, claude-code) and any @@ -527,6 +551,11 @@ export function validatePreferences(preferences: SFPreferences): { if (!Number.isNaN(val) && val >= 0) validatedAs.phase_timeout_minutes = val; else errors.push("auto_supervisor.phase_timeout_minutes must be a non-negative number"); } + if (as.completion_nudge_after !== undefined) { + const val = Number(as.completion_nudge_after); + if (!Number.isNaN(val) && val >= 0) validatedAs.completion_nudge_after = val; + else errors.push("auto_supervisor.completion_nudge_after must be a non-negative number"); + } validated.auto_supervisor = validatedAs; } else { diff --git a/src/resources/extensions/sf/preferences.ts b/src/resources/extensions/sf/preferences.ts index cdadaf3a5..effc32c41 100644 --- a/src/resources/extensions/sf/preferences.ts +++ b/src/resources/extensions/sf/preferences.ts @@ -509,6 +509,11 @@ function mergePreferences(base: SFPreferences, override: SFPreferences): SFPrefe // override-wins merge so the preference actually reaches consumers. allowed_providers: mergeStringLists(base.allowed_providers, override.allowed_providers), provider_preference: override.provider_preference ?? base.provider_preference, + // Per-provider replace inside the array (project entry replaces global + // for that provider; providers only in base survive). + provider_model_allow: (base.provider_model_allow || override.provider_model_allow) + ? { ...(base.provider_model_allow ?? {}), ...(override.provider_model_allow ?? {}) } + : undefined, flat_rate_providers: mergeStringLists(base.flat_rate_providers, override.flat_rate_providers), stale_commit_threshold_minutes: override.stale_commit_threshold_minutes ?? base.stale_commit_threshold_minutes, widget_mode: override.widget_mode ?? base.widget_mode, diff --git a/src/resources/extensions/sf/prompts/guided-plan-slice.md b/src/resources/extensions/sf/prompts/guided-plan-slice.md index fd34b7727..8ba9d9630 100644 --- a/src/resources/extensions/sf/prompts/guided-plan-slice.md +++ b/src/resources/extensions/sf/prompts/guided-plan-slice.md @@ -1,3 +1,3 @@ -Plan slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements the roadmap says this slice owns or supports, and ensure the plan delivers them. Read the roadmap boundary map, any existing context/research files, and dependency summaries. Use the **Slice Plan** and **Task Plan** output templates below. Decompose into tasks with must-haves. Fill the `Proof Level` and `Integration Closure` sections truthfully so the plan says what class of proof this slice really delivers and what end-to-end wiring still remains. Call `sf_plan_slice` to persist the slice plan — the tool writes `{{sliceId}}-PLAN.md` and individual `T##-PLAN.md` files to disk and persists to DB. Do **not** write plan files manually — use the DB-backed tool so state stays consistent. If planning produces structural decisions, call `sf_decision_save` for each — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically. {{skillActivation}} Before finishing, self-audit the plan: every must-have maps to at least one task, every task has complete sections (steps, must-haves, verification, observability impact, inputs, and expected output), task ordering is consistent with no circular references, every pair of artifacts that must connect has an explicit wiring step, task scope targets 2–5 steps and 3–8 files (6–8 steps or 8–10 files — consider splitting; 10+ steps or 12+ files — must split), the plan honors locked decisions from context/research/decisions artifacts, the proof-level wording does not overclaim live integration if only fixture/contract proof is planned, every Active requirement this slice owns has at least one task with verification that proves it is met, and every task produces real user-facing progress — if the slice has a UI surface at least one task builds the real UI, if it has an API at least one task connects it to a real data source, and showing the completed result to a non-technical stakeholder would demonstrate real product progress rather than developer artifacts, and quality gate coverage — for non-trivial slices, Threat Surface (Q3: abuse, data exposure, input trust) and Requirement Impact (Q4: requirements touched, re-verify, decisions revisited) sections are present. For non-trivial tasks, Failure Modes (Q5), Load Profile (Q6), and Negative Tests (Q7) are filled in task plans. +Plan slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements the roadmap says this slice owns or supports, and ensure the plan delivers them. Read the roadmap boundary map, any existing context/research files, and dependency summaries. Use the **Slice Plan** and **Task Plan** output templates below. Decompose into tasks with must-haves. Fill the `Proof Level` and `Integration Closure` sections truthfully so the plan says what class of proof this slice really delivers and what end-to-end wiring still remains. Call `sf_plan_slice` to persist the slice plan — the tool writes `{{sliceId}}-PLAN.md` and individual `T##-PLAN.md` files to disk and persists to DB. The `sf_plan_slice` payload MUST include `planningMeeting` as a populated object; empty, null, or missing planningMeeting is not acceptable. Use the canonical M004 meeting roles: Trigger, Product Manager, User Advocate, Customer Panel, Business, Researcher, Delivery Lead, Partner, Combatant, Architect, Moderator, Recommended Route, and Confidence. The tool's Product Manager field is named `pm`, and the Confidence field is named `confidenceSummary`; keep existing tool field names while covering the canonical roles. If you are tempted to skip the meeting because the slice is simple, write a brief one-line per role explaining why it is simple. Do **not** write plan files manually — use the DB-backed tool so state stays consistent. If planning produces structural decisions, call `sf_decision_save` for each — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically. {{skillActivation}} Before finishing, self-audit the plan: every must-have maps to at least one task, every task has complete sections (steps, must-haves, verification, observability impact, inputs, and expected output), task ordering is consistent with no circular references, every pair of artifacts that must connect has an explicit wiring step, task scope targets 2–5 steps and 3–8 files (6–8 steps or 8–10 files — consider splitting; 10+ steps or 12+ files — must split), the plan honors locked decisions from context/research/decisions artifacts, the proof-level wording does not overclaim live integration if only fixture/contract proof is planned, every Active requirement this slice owns has at least one task with verification that proves it is met, and every task produces real user-facing progress — if the slice has a UI surface at least one task builds the real UI, if it has an API at least one task connects it to a real data source, and showing the completed result to a non-technical stakeholder would demonstrate real product progress rather than developer artifacts, and quality gate coverage — for non-trivial slices, Threat Surface (Q3: abuse, data exposure, input trust) and Requirement Impact (Q4: requirements touched, re-verify, decisions revisited) sections are present. For non-trivial tasks, Failure Modes (Q5), Load Profile (Q6), and Negative Tests (Q7) are filled in task plans. {{inlinedTemplates}} diff --git a/src/resources/extensions/sf/prompts/plan-slice.md b/src/resources/extensions/sf/prompts/plan-slice.md index c40838791..5857dba6d 100644 --- a/src/resources/extensions/sf/prompts/plan-slice.md +++ b/src/resources/extensions/sf/prompts/plan-slice.md @@ -40,6 +40,14 @@ Narrate your decomposition reasoning — why you're grouping work this way, what **Right-size the plan.** If the slice is simple enough to be 1 task, plan 1 task. Don't split into multiple tasks just because you can identify sub-steps. Don't fill in sections with "None" when the section doesn't apply — omit them entirely. The plan's job is to guide execution, not to fill a template. +### Output Contract: planningMeeting + +The `sf_plan_slice` payload MUST include `planningMeeting` as a populated object. Empty, null, or missing planningMeeting is not acceptable. + +Use the canonical M004 meeting roles: Trigger, Product Manager, User Advocate, Customer Panel, Business, Researcher, Delivery Lead, Partner, Combatant, Architect, Moderator, Recommended Route, and Confidence. The tool's Product Manager field is named `pm`, and the Confidence field is named `confidenceSummary`; keep existing tool field names while covering the canonical roles. + +If you are tempted to skip the meeting because the slice is simple, write a brief one-line per role explaining why it is simple. + {{executorContextConstraints}} Then: @@ -72,16 +80,20 @@ Then: - **Combatant:** attack the premise first. Name at least 3 plausible alternative root causes, failure modes, or plan-shape mistakes, plus the cheapest falsifier for each. - **Architect:** after reading partner + combatant, state the system-fit risk, sequencing risk, or missing integration proof. - If any of the three reviews expose a problem, change the plan before persisting it. Do not treat the review as commentary-only. -8. **When ambiguity is still real, run a bounded planning meeting before persistence.** This is for standard/heavy slices, low-confidence plans, multiple plausible approaches, or automatic feature planning where PM framing matters. Record it in the optional `planningMeeting` payload: +8. **Always record a bounded planning meeting before persistence.** This is deeper for standard/heavy slices, low-confidence plans, multiple plausible approaches, or automatic feature planning where PM framing matters. For simple slices, keep each role to one concise line explaining why the slice is simple. Record it in the required `planningMeeting` payload: - **Trigger:** why the meeting was needed - **Product Manager:** diagnosis, user value, scope cut, and what would count as a useful increment + - **User Advocate:** user experience, trust surface, and user-visible proof + - **Customer Panel:** multiple likely customer viewpoints, not one flattened user + - **Business:** viability, wedge, retention, or cost/scope consequence when relevant - **Researcher:** the strongest evidence from code, docs, DeepWiki, Context7, or focused web research + - **Delivery Lead:** smallest credible sequence, scope cuts, and delivery risk - **Partner / Combatant / Architect:** the same roles as above, but in meeting form - **Moderator:** synthesize the disagreement and set a route - **Recommended Route:** one of `discussing`, `researching`, `planning` - **Confidence:** concise post-meeting confidence summary - Keep it bounded: one round is normal, two is the limit. If the meeting route is `discussing` or `researching`, persist the draft anyway so the system keeps the context, but do not pretend the slice is execution-ready. -9. **Persist planning state through `sf_plan_slice`.** Call it with the full slice planning payload (goal, adversarialReview, optional planningMeeting, demo, must-haves, verification, tasks, and metadata). The tool inserts all tasks in the same transaction, writes to the DB, and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` files automatically. Do **not** call `sf_plan_task` separately — `sf_plan_slice` handles task persistence. Do **not** rely on direct `PLAN.md` writes as the source of truth; the DB-backed tool is the canonical write path for slice and task planning state. +9. **Persist planning state through `sf_plan_slice`.** Call it with the full slice planning payload (goal, adversarialReview, populated planningMeeting, demo, must-haves, verification, tasks, and metadata). The tool inserts all tasks in the same transaction, writes to the DB, and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` files automatically. Do **not** call `sf_plan_task` separately — `sf_plan_slice` handles task persistence. Do **not** rely on direct `PLAN.md` writes as the source of truth; the DB-backed tool is the canonical write path for slice and task planning state. 10. **Self-audit the plan.** Walk through each check — if any fail, fix the plan files before moving on: - **Completion semantics:** If every task were completed exactly as written, the slice goal/demo should actually be true. - **Requirement coverage:** Every must-have in the slice maps to at least one task. No must-have is orphaned. If `REQUIREMENTS.md` exists, every Active requirement this slice owns maps to at least one task. diff --git a/src/resources/extensions/sf/tests/auto-completion-nudge.test.ts b/src/resources/extensions/sf/tests/auto-completion-nudge.test.ts new file mode 100644 index 000000000..3db523f7c --- /dev/null +++ b/src/resources/extensions/sf/tests/auto-completion-nudge.test.ts @@ -0,0 +1,68 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import type { AgentMessage } from "@singularity-forge/pi-agent-core"; + +import { + applyCompletionNudgeTemperature, + getCompletionNudgeStateForTest, + maybeInjectCompletionNudgeMessage, + recordCompletionNudgeToolCall, + resetCompletionNudgeState, +} from "../auto-completion-nudge.ts"; + +type CustomTestMessage = Extract & { content: string }; + +function baseMessages(): AgentMessage[] { + return [{ + role: "user", + content: [{ type: "text", text: "complete slice" }], + timestamp: Date.now(), + }]; +} + +test("completion nudge injects reminder after N tool calls without sf_complete_slice", () => { + resetCompletionNudgeState("complete-slice", "M001/S01", 3); + recordCompletionNudgeToolCall("read"); + recordCompletionNudgeToolCall("grep"); + assert.equal(maybeInjectCompletionNudgeMessage(baseMessages()).length, 1); + + recordCompletionNudgeToolCall("bash"); + const messages = maybeInjectCompletionNudgeMessage(baseMessages()); + + assert.equal(messages.length, 2); + const nudge = messages[1]; + assert.equal(nudge.role, "custom"); + assert.match(String(nudge.content), /You've performed 3 tool calls of investigation/); + assert.match(String(nudge.content), /call sf_complete_slice/); +}); + +test("completion nudge does not inject after sf_complete_slice is called", () => { + resetCompletionNudgeState("complete-slice", "M001/S01", 3); + recordCompletionNudgeToolCall("read"); + recordCompletionNudgeToolCall("grep"); + recordCompletionNudgeToolCall("sf_complete_slice"); + recordCompletionNudgeToolCall("bash"); + + const messages = maybeInjectCompletionNudgeMessage(baseMessages()); + assert.equal(messages.length, 1); + assert.equal(getCompletionNudgeStateForTest().completionCalled, true); +}); + +test("completion nudge injects stronger message at 2N and lowers temperature", () => { + resetCompletionNudgeState("complete-slice", "M001/S01", 2); + recordCompletionNudgeToolCall("read"); + recordCompletionNudgeToolCall("grep"); + maybeInjectCompletionNudgeMessage(baseMessages()); + + recordCompletionNudgeToolCall("bash"); + recordCompletionNudgeToolCall("read"); + const messages = maybeInjectCompletionNudgeMessage(baseMessages()); + assert.equal(messages.length, 2); + const nudge = messages[1] as CustomTestMessage; + assert.match(String(nudge.content), /without calling sf_complete_slice/); + + const payload = { temperature: 0.9, generationConfig: { temperature: 0.8 } }; + applyCompletionNudgeTemperature(payload); + assert.equal(payload.temperature, 0.2); + assert.equal(payload.generationConfig.temperature, 0.2); +}); diff --git a/src/resources/extensions/sf/tests/complete-slice.test.ts b/src/resources/extensions/sf/tests/complete-slice.test.ts index 77044986d..0e3a1effa 100644 --- a/src/resources/extensions/sf/tests/complete-slice.test.ts +++ b/src/resources/extensions/sf/tests/complete-slice.test.ts @@ -125,9 +125,9 @@ console.log('\n=== complete-slice: schema v6 migration ==='); const adapter = _getAdapter()!; - // Verify schema version is current (v16 with UOK projection tables) + // Verify schema version is current const versionRow = adapter.prepare('SELECT MAX(version) as v FROM schema_version').get(); - assertEq(versionRow?.['v'], 16, 'schema version should be 16'); + assertEq(versionRow?.['v'], 20, 'schema version should be 20'); // Verify slices table has full_summary_md and full_uat_md columns const cols = adapter.prepare("PRAGMA table_info(slices)").all(); diff --git a/src/resources/extensions/sf/tests/complete-task.test.ts b/src/resources/extensions/sf/tests/complete-task.test.ts index 7e049e4ab..ffdaddd4a 100644 --- a/src/resources/extensions/sf/tests/complete-task.test.ts +++ b/src/resources/extensions/sf/tests/complete-task.test.ts @@ -16,6 +16,9 @@ import { insertVerificationEvidence, } from '../sf-db.ts'; import { handleCompleteTask } from '../tools/complete-task.ts'; +import { executeTaskComplete } from '../tools/workflow-tool-executors.ts'; +import { resolveDispatch } from '../auto-dispatch.ts'; +import { AutoSession } from '../auto/session.ts'; const { assertEq, assertTrue, assertMatch, report } = createTestContext(); @@ -74,6 +77,12 @@ function createTempProject(): { basePath: string; planPath: string } { return { basePath, planPath }; } +function createTempProjectWithDb(): { basePath: string; dbPath: string; planPath: string } { + const project = createTempProject(); + const dbPath = path.join(project.basePath, '.sf', 'sf.db'); + return { ...project, dbPath }; +} + function makeValidParams() { return { taskId: 'T01', @@ -109,9 +118,9 @@ console.log('\n=== complete-task: schema v5 migration ==='); const adapter = _getAdapter()!; - // Verify schema version is current (v16 with UOK projection tables) + // Verify schema version is current const versionRow = adapter.prepare('SELECT MAX(version) as v FROM schema_version').get(); - assertEq(versionRow?.['v'], 16, 'schema version should be 16'); + assertEq(versionRow?.['v'], 20, 'schema version should be 20'); // Verify all 4 new tables exist const tables = adapter.prepare( @@ -343,6 +352,126 @@ console.log('\n=== complete-task: handler happy path ==='); cleanup(dbPath); } +// ═══════════════════════════════════════════════════════════════════════════ +// complete-task: SUMMARY.md write failure must not update DB +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('\n=== complete-task: SUMMARY.md write failure leaves DB pending ==='); +{ + const dbPath = tempDbPath(); + openDatabase(dbPath); + + const { basePath } = createTempProject(); + const tasksDir = path.join(basePath, '.sf', 'milestones', 'M001', 'slices', 'S01', 'tasks'); + fs.mkdirSync(path.join(tasksDir, 'T01-SUMMARY.md'), { recursive: true }); + + insertMilestone({ id: 'M001', title: 'Test Milestone' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice' }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'pending', title: 'Test task' }); + + const result = await handleCompleteTask(makeValidParams(), basePath); + + assertTrue('error' in result, 'handler should return a loud error when SUMMARY.md cannot be written'); + if ('error' in result) { + assertMatch(result.error, /SUMMARY\.md write failed/, 'error should mention SUMMARY.md write failure'); + assertMatch(result.error, /EISDIR|directory|illegal operation/i, 'error should include the underlying filesystem reason'); + } + const taskAfter = getTask('M001', 'S01', 'T01'); + assertTrue(taskAfter !== null, 'pending task row should still exist'); + assertEq(taskAfter!.status, 'pending', 'task status must remain pending after file-write failure'); + + cleanupDir(basePath); + cleanup(dbPath); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// complete-task: SUMMARY.md write success updates DB +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('\n=== complete-task: SUMMARY.md write success updates DB ==='); +{ + const dbPath = tempDbPath(); + openDatabase(dbPath); + + const { basePath } = createTempProject(); + + insertMilestone({ id: 'M001', title: 'Test Milestone' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice' }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'pending', title: 'Test task' }); + + const result = await handleCompleteTask(makeValidParams(), basePath); + + assertTrue(!('error' in result), 'handler should succeed when SUMMARY.md can be written'); + if (!('error' in result)) { + assertTrue(fs.existsSync(result.summaryPath), 'summary file should exist before DB completion is accepted'); + } + const taskAfter = getTask('M001', 'S01', 'T01'); + assertTrue(taskAfter !== null, 'task row should exist'); + assertEq(taskAfter!.status, 'complete', 'task status should be complete after successful file write'); + + cleanupDir(basePath); + cleanup(dbPath); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// complete-task: auto-mode retry prompt after transient write error +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('\n=== complete-task: transient write error is surfaced to next auto turn ==='); +{ + const { basePath, dbPath } = createTempProjectWithDb(); + openDatabase(dbPath); + + const tasksDir = path.join(basePath, '.sf', 'milestones', 'M001', 'slices', 'S01', 'tasks'); + fs.writeFileSync(path.join(tasksDir, 'T01-PLAN.md'), '# T01 Plan\n'); + fs.writeFileSync(path.join(basePath, '.sf', 'milestones', 'M001', 'M001-CONTEXT.md'), '# M001 Context\n'); + fs.mkdirSync(path.join(tasksDir, 'T01-SUMMARY.md'), { recursive: true }); + + insertMilestone({ id: 'M001', title: 'Test Milestone' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Test Slice' }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', status: 'pending', title: 'Test task' }); + + const toolResult = await executeTaskComplete(makeValidParams(), basePath); + assertEq(toolResult.isError, true, 'tool result should be marked as an error'); + assertMatch(String(toolResult.content[0]?.text ?? ''), /Error completing task/, 'tool result should loudly report failure'); + + const taskAfter = getTask('M001', 'S01', 'T01'); + assertTrue(taskAfter !== null, 'task row should still exist after transient write error'); + assertEq(taskAfter!.status, 'pending', 'task must still be pending in DB after transient write error'); + + const reason = String(toolResult.details.error ?? '').replace(/^Error completing task:\s*/i, ''); + const session = new AutoSession(); + session.pendingTaskCompleteFailures.set('M001/S01/T01', reason); + const dispatch = await resolveDispatch({ + basePath, + mid: 'M001', + midTitle: 'Test Milestone', + prefs: undefined, + session, + state: { + phase: 'executing', + activeMilestone: { id: 'M001', title: 'Test Milestone', status: 'active' }, + activeSlice: { id: 'S01', title: 'Test Slice' }, + activeTask: { id: 'T01', title: 'Test task' }, + registry: [{ id: 'M001', status: 'active' }], + blockers: [], + } as any, + }); + + assertEq(dispatch.action, 'dispatch', 'dispatcher should retry the same execute-task unit'); + if (dispatch.action === 'dispatch') { + assertEq(dispatch.unitId, 'M001/S01/T01', 'dispatcher should not skip the pending task'); + assertMatch( + dispatch.prompt, + /sf_task_complete failed: .*Try the call again, or investigate the write path\./, + 'next turn should surface the sf_task_complete failure verbatim', + ); + } + + cleanupDir(basePath); + cleanup(dbPath); +} + // ═══════════════════════════════════════════════════════════════════════════ // complete-task: Handler validation errors // ═══════════════════════════════════════════════════════════════════════════ diff --git a/src/resources/extensions/sf/tests/plan-slice.test.ts b/src/resources/extensions/sf/tests/plan-slice.test.ts index a8fa7f62f..8cb19d477 100644 --- a/src/resources/extensions/sf/tests/plan-slice.test.ts +++ b/src/resources/extensions/sf/tests/plan-slice.test.ts @@ -42,7 +42,11 @@ function validParams(): PlanSliceParams { planningMeeting: { trigger: 'The slice changes planning-system behavior and needs an explicit route before execution.', pm: 'Keep the slice focused on persistence and render boundaries, not broader planner redesign.', + userAdvocate: 'Users need the planning context to survive the DB-backed render path.', + customerPanel: 'Operators and reviewers both need enough meeting context to audit the plan.', + business: 'Reliable planning persistence reduces wasted execution work.', researcher: 'The surrounding workflow already reads ceremony sections from rendered plan artifacts and DB-backed state.', + deliveryLead: 'Sequence the fix through the handler, renderer, and tests without widening scope.', partner: 'The current task split is enough if the review state is stored and rendered faithfully.', combatant: 'Without persistence proof, the meeting is just commentary and the system can drift.', architect: 'Persist both the adversarial review and the meeting route in the same transactional planning write.', @@ -110,6 +114,9 @@ test('handlePlanSlice writes slice/task planning state and renders plan artifact const renderedPlan = readFileSync(planPath, 'utf-8'); assert.match(renderedPlan, /## Adversarial Review/); assert.match(renderedPlan, /## Planning Meeting/); + assert.match(renderedPlan, /### User Advocate/); + assert.match(renderedPlan, /### Customer Panel/); + assert.match(renderedPlan, /### Delivery Lead/); assert.match(renderedPlan, /### Recommended Route/); assert.match(renderedPlan, /planning/); @@ -173,6 +180,32 @@ test('handlePlanSlice rejects invalid payloads', async () => { } }); +test('handlePlanSlice rejects missing, null, and empty planningMeeting with explicit guidance', async () => { + const base = makeTmpBase(); + openDatabase(join(base, '.sf', 'sf.db')); + + try { + seedParentSlice(); + const { planningMeeting: _meeting, ...missingMeetingParams } = validParams(); + const cases: Array<[string, PlanSliceParams]> = [ + ['missing', missingMeetingParams as unknown as PlanSliceParams], + ['null', { ...validParams(), planningMeeting: null } as unknown as PlanSliceParams], + ['empty', { ...validParams(), planningMeeting: {} } as unknown as PlanSliceParams], + ]; + + for (const [label, params] of cases) { + const result = await handlePlanSlice(params, base); + assert.ok('error' in result, `${label} planningMeeting should fail validation`); + assert.equal( + result.error, + 'planningMeeting must be a populated object — write at least 2-3 perspectives. Skipping is not allowed.', + ); + } + } finally { + cleanup(base); + } +}); + test('handlePlanSlice rejects missing parent slice', async () => { const base = makeTmpBase(); openDatabase(join(base, '.sf', 'sf.db')); diff --git a/src/resources/extensions/sf/tests/preferences.test.ts b/src/resources/extensions/sf/tests/preferences.test.ts index 5bef6194f..6083f2740 100644 --- a/src/resources/extensions/sf/tests/preferences.test.ts +++ b/src/resources/extensions/sf/tests/preferences.test.ts @@ -643,6 +643,17 @@ test("remote questions preferences validate timeout auto-resolution settings", ( assert.equal(result.preferences.remote_questions?.auto_resolve_strategy, "recommended-option"); }); +test("auto supervisor preferences validate completion nudge setting", () => { + const result = validatePreferences({ + auto_supervisor: { + completion_nudge_after: 12, + }, + }); + + assert.equal(result.errors.length, 0); + assert.equal(result.preferences.auto_supervisor?.completion_nudge_after, 12); +}); + // ── Codebase Map Preferences ───────────────────────────────────────────────── test("codebase preferences validate and pass through correctly", () => { diff --git a/src/resources/extensions/sf/tests/prompt-contracts.test.ts b/src/resources/extensions/sf/tests/prompt-contracts.test.ts index 49d57c37b..1fd1b3c5f 100644 --- a/src/resources/extensions/sf/tests/prompt-contracts.test.ts +++ b/src/resources/extensions/sf/tests/prompt-contracts.test.ts @@ -284,6 +284,16 @@ test("plan-slice prompt clarifies sf_plan_slice handles task persistence", () => assert.match(prompt, /sf_plan_slice` handles task persistence/i); }); +test("plan-slice prompts require populated planningMeeting and canonical roles", () => { + for (const promptName of ["plan-slice", "guided-plan-slice"]) { + const prompt = readPrompt(promptName); + assert.match(prompt, /planningMeeting` as a populated object/i); + assert.match(prompt, /empty, null, or missing planningMeeting is not acceptable/i); + assert.match(prompt, /Trigger, Product Manager, User Advocate, Customer Panel, Business, Researcher, Delivery Lead, Partner, Combatant, Architect/i); + assert.match(prompt, /If you are tempted to skip the meeting because the slice is simple, write a brief one-line per role/i); + } +}); + test("replan-slice prompt uses sf_replan_slice as canonical DB-backed tool", () => { const prompt = readPrompt("replan-slice"); assert.match(prompt, /sf_replan_slice/); diff --git a/src/resources/extensions/sf/tests/remote-questions-manager.test.ts b/src/resources/extensions/sf/tests/remote-questions-manager.test.ts index a1233bb8e..d2d63b461 100644 --- a/src/resources/extensions/sf/tests/remote-questions-manager.test.ts +++ b/src/resources/extensions/sf/tests/remote-questions-manager.test.ts @@ -1,10 +1,13 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { tryAutoResolveQuestions } from "../../remote-questions/manager.ts"; +import { + tryAutoResolveQuestions, + tryHeadlessLocalAutoResolveQuestions, +} from "../../remote-questions/manager.ts"; import { isUsableRemoteQuestionResult } from "../../ask-user-questions.ts"; -test("remote question timeout auto-resolution picks the single recommended option for each single-select question", () => { +test("remote question timeout auto-resolution picks the first option for each single-select question", () => { const answer = tryAutoResolveQuestions([ { id: "lane", @@ -34,19 +37,7 @@ test("remote question timeout auto-resolution picks the single recommended optio }); }); -test("remote question timeout auto-resolution refuses ambiguous or multi-select prompts", () => { - assert.equal(tryAutoResolveQuestions([ - { - id: "ambiguous", - header: "Ambiguous", - question: "Pick one", - options: [ - { label: "Alpha (Recommended)", description: "A" }, - { label: "Beta (Recommended)", description: "B" }, - ], - }, - ]), null); - +test("remote question timeout auto-resolution refuses multi-select or empty prompts", () => { assert.equal(tryAutoResolveQuestions([ { id: "multi", @@ -59,6 +50,15 @@ test("remote question timeout auto-resolution refuses ambiguous or multi-select ], }, ]), null); + + assert.equal(tryAutoResolveQuestions([ + { + id: "empty", + header: "Empty", + question: "Pick one", + options: [], + }, + ]), null); }); test("ask_user_questions treats timeout auto-resolution as a usable remote answer", () => { @@ -67,3 +67,90 @@ test("ask_user_questions treats timeout auto-resolution as a usable remote answe assert.equal(isUsableRemoteQuestionResult({ error: true, autoResolved: true }), false); assert.equal(isUsableRemoteQuestionResult({ cancelled: true, autoResolved: true }), false); }); + +test("headless local auto-resolution fires after timeout when telegram is unavailable", async () => { + let sleptMs = 0; + const result = await tryHeadlessLocalAutoResolveQuestions([ + { + id: "depth_verification_M001_confirm", + header: "Depth", + question: "Is this sufficient?", + options: [ + { label: "Sufficient", description: "Proceed" }, + { label: "Needs more", description: "Keep discussing" }, + ], + }, + ], { + hasUI: false, + telegramUnavailable: true, + unavailableReason: "no-telegram", + policy: { + channel: "telegram", + timeoutMs: 5, + autoResolveOnTimeout: true, + autoResolveStrategy: "recommended-option", + }, + sleepFn: async (ms) => { sleptMs = ms; }, + }); + + assert.equal(sleptMs, 5); + assert.equal(result?.details?.autoResolved, true); + assert.equal(result?.details?.localFallback, true); + assert.deepEqual(result?.details?.response, { + answers: { + depth_verification_M001_confirm: { answers: ["Sufficient"] }, + }, + }); +}); + +test("headless local auto-resolution does not run when telegram is available", async () => { + let slept = false; + const result = await tryHeadlessLocalAutoResolveQuestions([ + { + id: "depth_verification_M001_confirm", + header: "Depth", + question: "Is this sufficient?", + options: [{ label: "Sufficient", description: "Proceed" }], + }, + ], { + hasUI: false, + telegramUnavailable: false, + unavailableReason: "no-telegram", + policy: { + channel: "telegram", + timeoutMs: 5, + autoResolveOnTimeout: true, + autoResolveStrategy: "recommended-option", + }, + sleepFn: async () => { slept = true; }, + }); + + assert.equal(result, null); + assert.equal(slept, false); +}); + +test("headless local auto-resolution does not run in interactive TUI mode", async () => { + let slept = false; + const result = await tryHeadlessLocalAutoResolveQuestions([ + { + id: "depth_verification_M001_confirm", + header: "Depth", + question: "Is this sufficient?", + options: [{ label: "Sufficient", description: "Proceed" }], + }, + ], { + hasUI: true, + telegramUnavailable: true, + unavailableReason: "no-telegram", + policy: { + channel: "telegram", + timeoutMs: 5, + autoResolveOnTimeout: true, + autoResolveStrategy: "recommended-option", + }, + sleepFn: async () => { slept = true; }, + }); + + assert.equal(result, null); + assert.equal(slept, false); +}); diff --git a/src/resources/extensions/sf/tests/stuck-detection-coverage.test.ts b/src/resources/extensions/sf/tests/stuck-detection-coverage.test.ts index bcb947718..c9219e685 100644 --- a/src/resources/extensions/sf/tests/stuck-detection-coverage.test.ts +++ b/src/resources/extensions/sf/tests/stuck-detection-coverage.test.ts @@ -40,6 +40,16 @@ test("Rule 1: different errors do not trigger stuck", () => { assert.equal(result, null); }); +test("transient sf_task_complete SUMMARY.md write errors do not trigger stuck detection", () => { + const result = detectStuck([ + { key: "execute-task/M001/S01/T01", error: "sf_task_complete failed: SUMMARY.md write failed at /tmp/project/.sf/milestones/M001/slices/S01/tasks/T01-SUMMARY.md: EISDIR: illegal operation on a directory" }, + { key: "execute-task/M001/S01/T01", error: "sf_task_complete failed: SUMMARY.md write failed at /tmp/project/.sf/milestones/M001/slices/S01/tasks/T01-SUMMARY.md: EISDIR: illegal operation on a directory" }, + { key: "execute-task/M001/S01/T01", error: "Error completing task: SUMMARY.md write failed at /tmp/project/.sf/milestones/M001/slices/S01/tasks/T01-SUMMARY.md: EISDIR: illegal operation on a directory" }, + ]); + + assert.equal(result, null); +}); + test("Rule 1: only last two entries matter for error check", () => { // First two share an error, but the last two have distinct errors — no trigger. const result = detectStuck([ diff --git a/src/resources/extensions/sf/tests/tool-param-optionality.test.ts b/src/resources/extensions/sf/tests/tool-param-optionality.test.ts index 1f86c1920..e25ea9ffb 100644 --- a/src/resources/extensions/sf/tests/tool-param-optionality.test.ts +++ b/src/resources/extensions/sf/tests/tool-param-optionality.test.ts @@ -47,6 +47,45 @@ function getOptionalProps(tool: any): string[] { return allProps.filter((p: string) => !required.has(p)); } +function validPlanSliceMeeting() { + return { + trigger: "Slice scope needs a recorded planning decision.", + pm: "Keep the increment narrow and measurable.", + userAdvocate: "Users need a visible planning artifact they can trust.", + customerPanel: "Operators and maintainers both need the plan to survive reruns.", + business: "Planning fidelity reduces wasted execution cycles.", + researcher: "Existing M004 planning records use a multi-role meeting convention.", + deliveryLead: "Sequence the work through the existing schema and validator first.", + partner: "A required meeting prevents silent loss of planning context.", + combatant: "Null or omitted meetings caused records with no planning context.", + architect: "Enforce the contract at schema and runtime validation boundaries.", + moderator: "Proceed with a required populated planningMeeting.", + recommendedRoute: "planning", + confidenceSummary: "High confidence with schema and handler coverage.", + }; +} + +function validPlanSliceCoreParams() { + return { + milestoneId: "M001", + sliceId: "S01", + goal: "Implement feature X", + planningMeeting: validPlanSliceMeeting(), + tasks: [ + { + taskId: "T01", + title: "Build X", + description: "Build the thing", + estimate: "2h", + files: ["src/x.ts"], + verify: "npm test", + inputs: [], + expectedOutput: ["src/x.ts"], + }, + ], + }; +} + // ─── sf_slice_complete: enrichment arrays must be optional ────────────────── test("sf_slice_complete — enrichment arrays are optional", () => { @@ -121,10 +160,11 @@ test("sf_plan_milestone — enrichment arrays are optional", () => { const required = new Set(getRequiredProps(tool)); // Core fields - const coreRequired = ["milestoneId", "title", "vision", "slices"]; + const coreRequired = ["milestoneId", "title", "vision"]; for (const field of coreRequired) { assert.ok(required.has(field), `core field "${field}" must be required`); } + assert.ok(!required.has("slices"), "slices must stay optional when templateId scaffolding is used"); // Enrichment fields must be optional const enrichmentFields = [ @@ -283,7 +323,7 @@ test("sf_plan_slice — enrichment fields are optional", () => { const required = new Set(getRequiredProps(tool)); // Core fields - const coreRequired = ["milestoneId", "sliceId", "goal", "tasks"]; + const coreRequired = ["milestoneId", "sliceId", "goal", "planningMeeting", "tasks"]; for (const field of coreRequired) { assert.ok(required.has(field), `core field "${field}" must be required`); } @@ -300,32 +340,35 @@ test("sf_plan_slice — enrichment fields are optional", () => { } }); -test("sf_plan_slice — validates with only core params", () => { +test("sf_plan_slice — validates with core params and populated planningMeeting", () => { const tool = getTool("sf_plan_slice"); assert.ok(tool, "sf_plan_slice must be registered"); - const minimalParams = { - milestoneId: "M001", - sliceId: "S01", - goal: "Implement feature X", - tasks: [ - { - taskId: "T01", - title: "Build X", - description: "Build the thing", - estimate: "2h", - files: ["src/x.ts"], - verify: "npm test", - inputs: [], - expectedOutput: ["src/x.ts"], - }, - ], - }; + const minimalParams = validPlanSliceCoreParams(); const errors = [...Value.Errors(tool.parameters, minimalParams)]; assert.strictEqual(errors.length, 0, `Minimal params should validate but got errors: ${errors.map(e => `${e.path}: ${e.message}`).join(", ")}`); }); +test("sf_plan_slice — rejects null, missing, and empty planningMeeting", () => { + const tool = getTool("sf_plan_slice"); + assert.ok(tool, "sf_plan_slice must be registered"); + + const populatedParams = validPlanSliceCoreParams(); + const { planningMeeting: _meeting, ...missingMeetingParams } = populatedParams; + const nullMeetingParams = { ...populatedParams, planningMeeting: null }; + const emptyMeetingParams = { ...populatedParams, planningMeeting: {} }; + + for (const [label, params] of [ + ["missing", missingMeetingParams], + ["null", nullMeetingParams], + ["empty", emptyMeetingParams], + ] as const) { + const errors = [...Value.Errors(tool.parameters, params)]; + assert.notStrictEqual(errors.length, 0, `${label} planningMeeting should be rejected`); + } +}); + // ─── Required param count ceiling ──────────────────────────────────────────── test("no planning/completion tool requires more than 10 top-level params", () => { diff --git a/src/resources/extensions/sf/tests/workflow-mcp.test.ts b/src/resources/extensions/sf/tests/workflow-mcp.test.ts index 28dcd0419..a3a99e25e 100644 --- a/src/resources/extensions/sf/tests/workflow-mcp.test.ts +++ b/src/resources/extensions/sf/tests/workflow-mcp.test.ts @@ -35,6 +35,24 @@ function extractElicitPayload(request: unknown): ElicitPayload { return payload as ElicitPayload; } +function validPlanningMeeting() { + return { + trigger: "MCP transport needs an explicit slice-planning decision.", + pm: "Keep the transport proof narrow and observable.", + userAdvocate: "Users need the spawned MCP path to preserve planning context.", + customerPanel: "Operators and maintainers both rely on persisted plan artifacts.", + business: "Reliable planning over MCP reduces failed automation runs.", + researcher: "The workflow MCP surface delegates to the shared executor bridge.", + deliveryLead: "Exercise one slice and one task to keep the proof bounded.", + partner: "The bridge path is sufficient if the DB render succeeds.", + combatant: "Missing meeting data would hide whether the plan was actually reviewed.", + architect: "The MCP schema and handler should enforce the same meeting contract.", + moderator: "Proceed with the focused MCP planning proof.", + recommendedRoute: "planning", + confidenceSummary: "High confidence after spawned transport persistence.", + }; +} + test("guided execute-task requires canonical task completion tool", () => { assert.deepEqual(getRequiredWorkflowToolsForGuidedUnit("execute-task"), ["sf_task_complete"]); }); @@ -310,6 +328,7 @@ test("workflow MCP launch config reaches mutation tools over stdio", async () => milestoneId: "M001", sliceId: "S01", goal: "Persist slice planning over the spawned MCP transport.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T01", diff --git a/src/resources/extensions/sf/tests/workflow-tool-executors.test.ts b/src/resources/extensions/sf/tests/workflow-tool-executors.test.ts index 2b905df29..7f31f8dd3 100644 --- a/src/resources/extensions/sf/tests/workflow-tool-executors.test.ts +++ b/src/resources/extensions/sf/tests/workflow-tool-executors.test.ts @@ -24,6 +24,7 @@ import { executeSliceComplete, executeValidateMilestone, } from "../tools/workflow-tool-executors.ts"; +import type { PlanningMeetingRecord } from "../plan-quality.ts"; function makeTmpBase(): string { const base = join(tmpdir(), `sf-workflow-executors-${randomUUID()}`); @@ -65,6 +66,24 @@ function seedSlice(milestoneId: string, sliceId: string, status: string): void { ).run(milestoneId, sliceId, `Slice ${sliceId}`, status, new Date().toISOString()); } +function validPlanningMeeting(): PlanningMeetingRecord { + return { + trigger: "Workflow executor test needs a recorded slice-planning decision.", + pm: "Keep this executor fixture narrow and measurable.", + userAdvocate: "Users need executor-backed plans to preserve context.", + customerPanel: "Operators and maintainers both need durable plan artifacts.", + business: "Reliable planning avoids wasted executor work.", + researcher: "The executor delegates to the shared DB-backed planning handler.", + deliveryLead: "Use a small task set to keep the proof bounded.", + partner: "The test covers the successful planning path.", + combatant: "Missing meetings would allow silent planning-context loss.", + architect: "Runtime validation should enforce the same meeting contract as the schema.", + moderator: "Proceed with the focused executor proof.", + recommendedRoute: "planning", + confidenceSummary: "High confidence for this fixture.", + }; +} + function writeRoadmap(base: string, milestoneId: string, sliceIds: string[]): void { const milestoneDir = join(base, ".sf", "milestones", milestoneId); mkdirSync(milestoneDir, { recursive: true }); @@ -231,6 +250,7 @@ test("executePlanSlice writes task planning state and rendered plan artifacts", milestoneId: "M001", sliceId: "S01", goal: "Persist slice plan over MCP.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T01", @@ -265,6 +285,7 @@ test("executePlanSlice marks validation failures with isError", async () => { milestoneId: "M001", sliceId: "S01", goal: "Trigger validation failure for empty tasks.", + planningMeeting: validPlanningMeeting(), tasks: [], }, base)); @@ -426,6 +447,7 @@ test("executeReassessRoadmap writes assessment and updates roadmap projection", milestoneId: "M004", sliceId: "S04", goal: "Complete the first slice.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T04", @@ -560,6 +582,7 @@ test("executeReplanSlice rewrites pending tasks and renders replan artifacts", a milestoneId: "M006", sliceId: "S06", goal: "Plan a slice that will be replanned.", + planningMeeting: validPlanningMeeting(), tasks: [ { taskId: "T06", diff --git a/src/resources/extensions/sf/tests/write-gate.test.ts b/src/resources/extensions/sf/tests/write-gate.test.ts index 0102a6478..99d06f1e9 100644 --- a/src/resources/extensions/sf/tests/write-gate.test.ts +++ b/src/resources/extensions/sf/tests/write-gate.test.ts @@ -18,6 +18,7 @@ import { } from '../index.ts'; import { markDepthVerified, + getSelectedGateAnswer, isMilestoneDepthVerified, shouldBlockContextArtifactSave, clearDiscussionFlowState, @@ -458,6 +459,12 @@ test('write-gate: isDepthConfirmationAnswer handles array-wrapped selected value ); }); +test('write-gate: remote-channel answer shape normalizes to selected value', () => { + const selected = getSelectedGateAnswer({ answers: ['Yes, you got it (Recommended)'] }); + assert.strictEqual(selected, 'Yes, you got it (Recommended)'); + assert.strictEqual(isDepthConfirmationAnswer(selected, STANDARD_OPTIONS), true); +}); + // ─── Scenario 16: rejects free-form "Other" text that contains "(Recommended)" ── test('write-gate: isDepthConfirmationAnswer rejects free-form text containing Recommended', () => { diff --git a/src/resources/extensions/sf/tools/complete-slice.ts b/src/resources/extensions/sf/tools/complete-slice.ts index 472ddd8fc..2e77b3c95 100644 --- a/src/resources/extensions/sf/tools/complete-slice.ts +++ b/src/resources/extensions/sf/tools/complete-slice.ts @@ -1,14 +1,13 @@ /** * complete-slice handler — the core operation behind sf_slice_complete. * - * Validates inputs, checks all tasks are complete, writes slice row to DB in - * a transaction, then (outside the transaction) renders SUMMARY.md + UAT.md - * to disk, toggles the roadmap checkbox, stores rendered markdown in DB for - * D004 recovery, and invalidates caches. + * Validates inputs, checks all tasks are complete, atomically renders + * SUMMARY.md + UAT.md to disk, then writes the slice row to DB in a + * transaction, toggles the roadmap checkbox, and invalidates caches. */ -import { join } from "node:path"; -import { mkdirSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { constants as fsConstants, mkdirSync, promises as fs } from "node:fs"; import type { CompleteSliceParams } from "../types.js"; import { isClosedStatus } from "../status-guards.js"; @@ -27,7 +26,8 @@ import { import { getGatesForTurn } from "../gate-registry.js"; import { resolveSliceFile, resolveSlicePath, clearPathCache } from "../paths.js"; import { checkOwnership, sliceUnitKey } from "../unit-ownership.js"; -import { saveFile, clearParseCache } from "../files.js"; +import { clearParseCache } from "../files.js"; +import { atomicWriteAsync } from "../atomic-write.js"; import { invalidateStateCache } from "../state.js"; import { renderRoadmapCheckboxes } from "../markdown-renderer.js"; import { renderAllProjections } from "../workflow-projections.js"; @@ -42,6 +42,25 @@ export interface CompleteSliceResult { uatPath: string; } +async function ensureWritableParent(filePath: string): Promise { + const parentDir = dirname(filePath); + await fs.mkdir(parentDir, { recursive: true }); + await fs.access(parentDir, fsConstants.W_OK); +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +async function writeMarkdownBeforeDb(filePath: string, content: string, label: string): Promise { + try { + await ensureWritableParent(filePath); + await atomicWriteAsync(filePath, content); + } catch (error) { + throw new Error(`${label} write failed at ${filePath}: ${errorMessage(error)}`); + } +} + /** * Map a complete-slice-owned gate id to the CompleteSliceParams field * whose presence drives `pass` vs. `omitted`. Keep this in lockstep with @@ -229,10 +248,9 @@ ${params.uatContent} * * 1. Validate required fields * 2. Verify all tasks are complete - * 3. Write DB in a transaction (milestone, slice upsert, status update) - * 4. Render SUMMARY.md + UAT.md to disk + * 3. Validate and atomically render SUMMARY.md + UAT.md to disk + * 4. Write DB in a transaction (milestone, slice upsert, status update) * 5. Toggle roadmap checkbox - * 6. Store rendered markdown back in DB (for D004 recovery) * 7. Invalidate caches */ export async function handleCompleteSlice( @@ -266,54 +284,28 @@ export async function handleCompleteSlice( return { error: `slice verification indicates blocked/failed state — do not complete a slice that has not passed verification. Address the blockers and re-verify first.` }; } - // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ─── + // ── Guards before filesystem work ────────────────────────────────────── const completedAt = new Date().toISOString(); - const originalSliceStatus = getSlice(params.milestoneId, params.sliceId)?.status ?? "pending"; - let guardError: string | null = null; - - transaction(() => { - // State machine preconditions (inside txn for atomicity). - // Milestone/slice not existing is OK — insertMilestone/insertSlice below will auto-create. - // Only block if they exist and are closed. - const milestone = getMilestone(params.milestoneId); - if (milestone && isClosedStatus(milestone.status)) { - guardError = `cannot complete slice in a closed milestone: ${params.milestoneId} (status: ${milestone.status})`; - return; - } - - const slice = getSlice(params.milestoneId, params.sliceId); - if (slice && isClosedStatus(slice.status)) { - guardError = `slice ${params.sliceId} is already complete — use sf_slice_reopen first if you need to redo it`; - return; - } - - // Verify all tasks are complete - const tasks = getSliceTasks(params.milestoneId, params.sliceId); - if (tasks.length === 0) { - guardError = `no tasks found for slice ${params.sliceId} in milestone ${params.milestoneId}`; - return; - } - - const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status)); - if (incompleteTasks.length > 0) { - const incompleteIds = incompleteTasks.map(t => `${t.id} (status: ${t.status})`).join(", "); - guardError = `incomplete tasks: ${incompleteIds}`; - return; - } - - // All guards passed — perform writes - insertMilestone({ id: params.milestoneId, title: params.milestoneId }); - insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId }); - updateSliceStatus(params.milestoneId, params.sliceId, "complete", completedAt); - }); - - if (guardError) { - return { error: guardError }; + const milestone = getMilestone(params.milestoneId); + if (milestone && isClosedStatus(milestone.status)) { + return { error: `cannot complete slice in a closed milestone: ${params.milestoneId} (status: ${milestone.status})` }; } - // ── Filesystem operations (outside transaction) ───────────────────────── - // If disk render fails, roll back the DB status so deriveState() and - // verifyExpectedArtifact() stay consistent (both say "not done"). + const slice = getSlice(params.milestoneId, params.sliceId); + if (slice && isClosedStatus(slice.status)) { + return { error: `slice ${params.sliceId} is already complete — use sf_slice_reopen first if you need to redo it` }; + } + + const tasks = getSliceTasks(params.milestoneId, params.sliceId); + if (tasks.length === 0) { + return { error: `no tasks found for slice ${params.sliceId} in milestone ${params.milestoneId}` }; + } + + const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status)); + if (incompleteTasks.length > 0) { + const incompleteIds = incompleteTasks.map(t => `${t.id} (status: ${t.status})`).join(", "); + return { error: `incomplete tasks: ${incompleteIds}` }; + } // Render summary markdown const summaryMd = renderSliceSummaryMarkdown(params); @@ -334,26 +326,43 @@ export async function handleCompleteSlice( const uatMd = renderUatMarkdown(params); const uatPath = summaryPath.replace(/-SUMMARY\.md$/, "-UAT.md"); + // ── Filesystem commit before DB status mutation ──────────────────────── + // SUMMARY.md and UAT.md are the artifacts downstream agents read. If either + // write fails, do not mutate the DB into a completed state. try { - await saveFile(summaryPath, summaryMd); - await saveFile(uatPath, uatMd); + await writeMarkdownBeforeDb(summaryPath, summaryMd, "SUMMARY.md"); + await writeMarkdownBeforeDb(uatPath, uatMd, "UAT.md"); + } catch (writeErr) { + logWarning("tool", `complete_slice — artifact write failed before DB update for ${params.milestoneId}/${params.sliceId}`, { error: errorMessage(writeErr) }); + invalidateStateCache(); + return { error: errorMessage(writeErr) }; + } - // Toggle roadmap checkbox via renderer module + // ── DB commit only after SUMMARY.md and UAT.md exist on disk ─────────── + try { + transaction(() => { + insertMilestone({ id: params.milestoneId, title: params.milestoneId }); + insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId }); + updateSliceStatus(params.milestoneId, params.sliceId, "complete", completedAt); + setSliceSummaryMd(params.milestoneId, params.sliceId, summaryMd, uatMd); + }); + } catch (dbErr) { + const msg = errorMessage(dbErr); + logError("tool", `complete_slice — DB update failed after artifact writes succeeded; keeping ${summaryPath} and ${uatPath}`, { error: msg }); + invalidateStateCache(); + return { error: `database update failed after SUMMARY.md/UAT.md write succeeded at ${summaryPath}: ${msg}. Files were kept; retry sf_complete_slice after fixing the DB.` }; + } + + // Toggle roadmap checkbox via renderer module after DB status is updated. + try { const roadmapToggled = await renderRoadmapCheckboxes(basePath, params.milestoneId); if (!roadmapToggled) { logWarning("tool", `complete_slice — could not find roadmap for ${params.milestoneId}, skipping checkbox toggle`); } } catch (renderErr) { - // Disk render failed — roll back DB status so state stays consistent - logWarning("tool", `complete_slice — disk render failed for ${params.milestoneId}/${params.sliceId}, rolling back DB status`, { error: (renderErr as Error).message }); - updateSliceStatus(params.milestoneId, params.sliceId, originalSliceStatus); - invalidateStateCache(); - return { error: `disk render failed: ${(renderErr as Error).message}` }; + logWarning("tool", `complete_slice — roadmap checkbox render failed after DB update for ${params.milestoneId}/${params.sliceId}`, { error: errorMessage(renderErr) }); } - // Store rendered markdown in DB for D004 recovery - setSliceSummaryMd(params.milestoneId, params.sliceId, summaryMd, uatMd); - // ── Close gates owned by complete-slice (Q8) ─────────────────────────── // Each owned gate maps to a specific summary section via the registry. // If the caller populated the corresponding field, record `pass`; if the diff --git a/src/resources/extensions/sf/tools/complete-task.ts b/src/resources/extensions/sf/tools/complete-task.ts index b34f89dbf..f25b320ea 100644 --- a/src/resources/extensions/sf/tools/complete-task.ts +++ b/src/resources/extensions/sf/tools/complete-task.ts @@ -1,14 +1,13 @@ /** * complete-task handler — the core operation behind sf_complete_task. * - * Validates inputs, writes task row to DB in a transaction, then (outside - * the transaction) renders SUMMARY.md to disk, toggles the plan checkbox, - * stores the rendered markdown in the DB for D004 recovery, and invalidates + * Validates inputs, atomically renders SUMMARY.md to disk, then writes the + * task row to DB in a transaction, toggles the plan checkbox, and invalidates * caches. */ -import { join } from "node:path"; -import { mkdirSync, existsSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { constants as fsConstants, mkdirSync, promises as fs } from "node:fs"; import type { CompleteTaskParams } from "../types.js"; import { isClosedStatus } from "../status-guards.js"; @@ -21,16 +20,15 @@ import { getMilestone, getSlice, getTask, - updateTaskStatus, setTaskSummaryMd, - deleteVerificationEvidence, saveGateResult, getPendingGatesForTurn, } from "../sf-db.js"; import { getGatesForTurn } from "../gate-registry.js"; import { resolveSliceFile, resolveTasksDir, clearPathCache } from "../paths.js"; import { checkOwnership, taskUnitKey } from "../unit-ownership.js"; -import { saveFile, clearParseCache } from "../files.js"; +import { clearParseCache } from "../files.js"; +import { atomicWriteAsync } from "../atomic-write.js"; import { invalidateStateCache } from "../state.js"; import { renderPlanCheckboxes } from "../markdown-renderer.js"; import { renderAllProjections, renderSummaryContent } from "../workflow-projections.js"; @@ -80,6 +78,25 @@ function normalizeListParam(value: unknown): string[] { return []; } +async function ensureWritableParent(filePath: string): Promise { + const parentDir = dirname(filePath); + await fs.mkdir(parentDir, { recursive: true }); + await fs.access(parentDir, fsConstants.W_OK); +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +async function writeSummaryBeforeDb(filePath: string, content: string): Promise { + try { + await ensureWritableParent(filePath); + await atomicWriteAsync(filePath, content); + } catch (error) { + throw new Error(`SUMMARY.md write failed at ${filePath}: ${errorMessage(error)}`); + } +} + /** * Build a TaskRow-shaped object from CompleteTaskParams so the unified * renderSummaryContent() can be used at completion time (#2720). @@ -119,10 +136,9 @@ function paramsToTaskRow(params: CompleteTaskParams, completedAt: string): TaskR * Handle the complete_task operation end-to-end. * * 1. Validate required fields - * 2. Write DB in a transaction (milestone, slice, task, verification evidence) - * 3. Render SUMMARY.md to disk + * 2. Validate and atomically render SUMMARY.md to disk + * 3. Write DB in a transaction (milestone, slice, task, verification evidence) * 4. Toggle plan checkbox - * 5. Store rendered markdown back in DB (for D004 recovery) * 6. Invalidate caches */ export async function handleCompleteTask( @@ -150,77 +166,22 @@ export async function handleCompleteTask( return { error: ownershipErr }; } - // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ─── + // ── Guards before filesystem work ────────────────────────────────────── const completedAt = new Date().toISOString(); - let guardError: string | null = null; - - transaction(() => { - // State machine preconditions (inside txn for atomicity). - // Milestone/slice not existing is OK — insertMilestone/insertSlice below will auto-create. - // Only block if they exist and are closed. - const milestone = getMilestone(params.milestoneId); - if (milestone && isClosedStatus(milestone.status)) { - guardError = `cannot complete task in a closed milestone: ${params.milestoneId} (status: ${milestone.status})`; - return; - } - - const slice = getSlice(params.milestoneId, params.sliceId); - if (slice && isClosedStatus(slice.status)) { - guardError = `cannot complete task in a closed slice: ${params.sliceId} (status: ${slice.status})`; - return; - } - - const existingTask = getTask(params.milestoneId, params.sliceId, params.taskId); - if (existingTask && isClosedStatus(existingTask.status)) { - guardError = `task ${params.taskId} is already complete — use sf_task_reopen first if you need to redo it`; - return; - } - - // All guards passed — perform writes - insertMilestone({ id: params.milestoneId, title: params.milestoneId }); - insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId }); - const evidence = params.verificationEvidence ?? []; - const verificationStatus = evidence.length === 0 ? "" : - evidence.every((c) => c.exitCode === 0) ? "all_pass" : - evidence.some((c) => c.exitCode === 0) ? "partial" : "all_fail"; - insertTask({ - id: params.taskId, - sliceId: params.sliceId, - milestoneId: params.milestoneId, - title: params.oneLiner, - status: "complete", - oneLiner: params.oneLiner, - narrative: params.narrative, - verificationResult: params.verification, - duration: "", - blockerDiscovered: params.blockerDiscovered ?? false, - deviations: params.deviations ?? "None.", - knownIssues: params.knownIssues ?? "None.", - keyFiles: params.keyFiles ?? [], - keyDecisions: params.keyDecisions ?? [], - verificationStatus, - }); - - for (const evidence of (params.verificationEvidence ?? [])) { - insertVerificationEvidence({ - taskId: params.taskId, - sliceId: params.sliceId, - milestoneId: params.milestoneId, - command: evidence.command, - exitCode: evidence.exitCode, - verdict: evidence.verdict, - durationMs: evidence.durationMs, - }); - } - }); - - if (guardError) { - return { error: guardError }; + const milestone = getMilestone(params.milestoneId); + if (milestone && isClosedStatus(milestone.status)) { + return { error: `cannot complete task in a closed milestone: ${params.milestoneId} (status: ${milestone.status})` }; } - // ── Filesystem operations (outside transaction) ───────────────────────── - // If disk render fails, roll back the DB status so deriveState() and - // verifyExpectedArtifact() stay consistent (both say "not done"). + const slice = getSlice(params.milestoneId, params.sliceId); + if (slice && isClosedStatus(slice.status)) { + return { error: `cannot complete task in a closed slice: ${params.sliceId} (status: ${slice.status})` }; + } + + const existingTask = getTask(params.milestoneId, params.sliceId, params.taskId); + if (existingTask && isClosedStatus(existingTask.status)) { + return { error: `task ${params.taskId} is already complete — use sf_task_reopen first if you need to redo it` }; + } // Render summary markdown via the single source of truth (#2720) const taskRow = paramsToTaskRow(params, completedAt); @@ -239,10 +200,67 @@ export async function handleCompleteTask( summaryPath = join(manualTasksDir, `${params.taskId}-SUMMARY.md`); } + // ── Filesystem commit before DB status mutation ──────────────────────── + // SUMMARY.md is the artifact downstream agents read. If this write fails, + // do not mutate the DB into a completed state. try { - await saveFile(summaryPath, summaryMd); + await writeSummaryBeforeDb(summaryPath, summaryMd); + } catch (writeErr) { + logWarning("tool", `complete_task — SUMMARY.md write failed before DB update: ${errorMessage(writeErr)}`); + invalidateStateCache(); + return { error: errorMessage(writeErr) }; + } - // Toggle plan checkbox via renderer module + // ── DB commit only after SUMMARY.md exists on disk ───────────────────── + try { + transaction(() => { + insertMilestone({ id: params.milestoneId, title: params.milestoneId }); + insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId }); + const evidence = params.verificationEvidence ?? []; + const verificationStatus = evidence.length === 0 ? "" : + evidence.every((c) => c.exitCode === 0) ? "all_pass" : + evidence.some((c) => c.exitCode === 0) ? "partial" : "all_fail"; + insertTask({ + id: params.taskId, + sliceId: params.sliceId, + milestoneId: params.milestoneId, + title: params.oneLiner, + status: "complete", + oneLiner: params.oneLiner, + narrative: params.narrative, + verificationResult: params.verification, + duration: "", + blockerDiscovered: params.blockerDiscovered ?? false, + deviations: params.deviations ?? "None.", + knownIssues: params.knownIssues ?? "None.", + keyFiles: params.keyFiles ?? [], + keyDecisions: params.keyDecisions ?? [], + fullSummaryMd: summaryMd, + verificationStatus, + }); + + for (const evidence of (params.verificationEvidence ?? [])) { + insertVerificationEvidence({ + taskId: params.taskId, + sliceId: params.sliceId, + milestoneId: params.milestoneId, + command: evidence.command, + exitCode: evidence.exitCode, + verdict: evidence.verdict, + durationMs: evidence.durationMs, + }); + } + setTaskSummaryMd(params.milestoneId, params.sliceId, params.taskId, summaryMd); + }); + } catch (dbErr) { + const msg = errorMessage(dbErr); + logError("tool", `complete_task — DB update failed after SUMMARY.md write succeeded; keeping ${summaryPath}`, { error: msg }); + invalidateStateCache(); + return { error: `database update failed after SUMMARY.md write succeeded at ${summaryPath}: ${msg}. SUMMARY.md was kept; retry sf_task_complete after fixing the DB.` }; + } + + // Toggle plan checkbox via renderer module after DB status is updated. + try { const planPath = resolveSliceFile(basePath, params.milestoneId, params.sliceId, "PLAN"); if (planPath) { await renderPlanCheckboxes(basePath, params.milestoneId, params.sliceId); @@ -252,20 +270,9 @@ export async function handleCompleteTask( ); } } catch (renderErr) { - // Disk render failed — roll back DB status so state stays consistent - logWarning("tool", `complete_task — disk render failed, rolling back DB status: ${(renderErr as Error).message}`); - // Delete orphaned verification_evidence rows first (FK constraint - // references tasks, so evidence must go before status change). - // Without this, retries accumulate duplicate evidence rows (#2724). - deleteVerificationEvidence(params.milestoneId, params.sliceId, params.taskId); - updateTaskStatus(params.milestoneId, params.sliceId, params.taskId, 'pending'); - invalidateStateCache(); - return { error: `disk render failed: ${(renderErr as Error).message}` }; + logWarning("tool", `complete_task — plan checkbox render failed after DB update: ${errorMessage(renderErr)}`); } - // Store rendered markdown in DB for D004 recovery - setTaskSummaryMd(params.milestoneId, params.sliceId, params.taskId, summaryMd); - // ── Close gates owned by execute-task (Q5/Q6/Q7) for this task ──────── // Each gate id maps to a specific params field via taskGateFieldForId. // When the model populates the field, record `pass`; when it's empty, diff --git a/src/resources/extensions/sf/tools/plan-slice.ts b/src/resources/extensions/sf/tools/plan-slice.ts index fb571289d..0ac1952a6 100644 --- a/src/resources/extensions/sf/tools/plan-slice.ts +++ b/src/resources/extensions/sf/tools/plan-slice.ts @@ -43,7 +43,7 @@ export interface PlanSliceParams { goal: string; tasks: PlanSliceTaskInput[]; adversarialReview?: AdversarialReviewRecord; - planningMeeting?: PlanningMeetingRecord; + planningMeeting: PlanningMeetingRecord; /** @optional — defaults to "Not provided." when omitted by models with limited tool-calling */ successCriteria?: string; /** @optional — defaults to "Not provided." when omitted */ @@ -65,6 +65,9 @@ export interface PlanSliceResult { taskPlanPaths: string[]; } +const PLANNING_MEETING_REQUIRED_MESSAGE = + "planningMeeting must be a populated object — write at least 2-3 perspectives. Skipping is not allowed."; + function validateTasks(value: unknown): PlanSliceTaskInput[] { if (!Array.isArray(value) || value.length === 0) { throw new Error("tasks must be a non-empty array"); @@ -124,6 +127,10 @@ function validateParams(params: PlanSliceParams): PlanSliceParams { if (!isNonEmptyString(params?.milestoneId)) throw new Error("milestoneId is required"); if (!isNonEmptyString(params?.sliceId)) throw new Error("sliceId is required"); if (!isNonEmptyString(params?.goal)) throw new Error("goal is required"); + const planningMeeting = params?.planningMeeting; + if (!hasStructuredPlanningMeeting(planningMeeting)) { + throw new Error(PLANNING_MEETING_REQUIRED_MESSAGE); + } return { ...params, @@ -143,19 +150,21 @@ function validateParams(params: PlanSliceParams): PlanSliceParams { combatant: "Missing combatant review.", architect: "Missing architect review.", }, - planningMeeting: hasStructuredPlanningMeeting(params.planningMeeting) - ? { - trigger: params.planningMeeting.trigger.trim(), - pm: params.planningMeeting.pm.trim(), - researcher: params.planningMeeting.researcher.trim(), - partner: params.planningMeeting.partner.trim(), - combatant: params.planningMeeting.combatant.trim(), - architect: params.planningMeeting.architect.trim(), - moderator: params.planningMeeting.moderator.trim(), - recommendedRoute: params.planningMeeting.recommendedRoute, - confidenceSummary: params.planningMeeting.confidenceSummary.trim(), - } - : undefined, + planningMeeting: { + trigger: planningMeeting.trigger.trim(), + pm: planningMeeting.pm.trim(), + ...(isNonEmptyString(planningMeeting.userAdvocate) ? { userAdvocate: planningMeeting.userAdvocate.trim() } : {}), + ...(isNonEmptyString(planningMeeting.customerPanel) ? { customerPanel: planningMeeting.customerPanel.trim() } : {}), + ...(isNonEmptyString(planningMeeting.business) ? { business: planningMeeting.business.trim() } : {}), + researcher: planningMeeting.researcher.trim(), + ...(isNonEmptyString(planningMeeting.deliveryLead) ? { deliveryLead: planningMeeting.deliveryLead.trim() } : {}), + partner: planningMeeting.partner.trim(), + combatant: planningMeeting.combatant.trim(), + architect: planningMeeting.architect.trim(), + moderator: planningMeeting.moderator.trim(), + recommendedRoute: planningMeeting.recommendedRoute, + confidenceSummary: planningMeeting.confidenceSummary.trim(), + }, tasks: validateTasks(params.tasks), }; } @@ -168,7 +177,10 @@ export async function handlePlanSlice( try { params = validateParams(rawParams); } catch (err) { - return { error: `validation failed: ${(err as Error).message}` }; + const message = (err as Error).message; + return { + error: message === PLANNING_MEETING_REQUIRED_MESSAGE ? message : `validation failed: ${message}`, + }; } // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ─── diff --git a/src/resources/extensions/sf/workflow-projections.ts b/src/resources/extensions/sf/workflow-projections.ts index 6035573b0..94089f3cb 100644 --- a/src/resources/extensions/sf/workflow-projections.ts +++ b/src/resources/extensions/sf/workflow-projections.ts @@ -90,10 +90,34 @@ export function renderPlanContent(sliceRow: SliceRow, taskRows: TaskRow[]): stri lines.push(""); lines.push(sliceRow.planning_meeting.pm.trim()); lines.push(""); + if (sliceRow.planning_meeting.userAdvocate?.trim()) { + lines.push("### User Advocate"); + lines.push(""); + lines.push(sliceRow.planning_meeting.userAdvocate.trim()); + lines.push(""); + } + if (sliceRow.planning_meeting.customerPanel?.trim()) { + lines.push("### Customer Panel"); + lines.push(""); + lines.push(sliceRow.planning_meeting.customerPanel.trim()); + lines.push(""); + } + if (sliceRow.planning_meeting.business?.trim()) { + lines.push("### Business"); + lines.push(""); + lines.push(sliceRow.planning_meeting.business.trim()); + lines.push(""); + } lines.push("### Researcher"); lines.push(""); lines.push(sliceRow.planning_meeting.researcher.trim()); lines.push(""); + if (sliceRow.planning_meeting.deliveryLead?.trim()) { + lines.push("### Delivery Lead"); + lines.push(""); + lines.push(sliceRow.planning_meeting.deliveryLead.trim()); + lines.push(""); + } lines.push("### Partner"); lines.push(""); lines.push(sliceRow.planning_meeting.partner.trim()); diff --git a/src/tests/cli-logs-tail.test.ts b/src/tests/cli-logs-tail.test.ts new file mode 100644 index 000000000..4209c2216 --- /dev/null +++ b/src/tests/cli-logs-tail.test.ts @@ -0,0 +1,74 @@ +import test from 'node:test' +import assert from 'node:assert/strict' +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +import { + collectRecentLogEvents, + formatMergedLogEvent, + getProjectSessionKey, +} from '../cli-logs.js' + +function jsonl(entries: unknown[]): string { + return entries.map((entry) => JSON.stringify(entry)).join('\n') + '\n' +} + +test('sf logs tail routes merged sources with expected prefixes', () => { + const root = mkdtempSync(join(tmpdir(), 'sf-logs-tail-test-')) + const project = join(root, 'project') + const sfHome = join(root, '.sf-home') + + try { + mkdirSync(join(project, '.sf', 'activity'), { recursive: true }) + mkdirSync(join(sfHome, 'agent', 'sessions', getProjectSessionKey(project)), { recursive: true }) + + writeFileSync(join(project, '.sf', 'notifications.jsonl'), jsonl([ + { ts: '2026-04-28T10:00:00.000Z', severity: 'info', message: 'Notification ready' }, + ])) + + writeFileSync( + join(sfHome, 'agent', 'sessions', getProjectSessionKey(project), '2026-04-28T10-00-01_sess.jsonl'), + jsonl([ + { + type: 'message', + timestamp: '2026-04-28T10:00:01.000Z', + message: { + role: 'assistant', + timestamp: 1777370401000, + content: [{ type: 'text', text: 'Working on it' }, { type: 'toolCall', name: 'Read' }], + stopReason: 'toolUse', + }, + }, + ]), + ) + + writeFileSync(join(project, '.sf', 'activity', '001-execute-task-M001-S01-T01.jsonl'), jsonl([ + { + type: 'message', + timestamp: '2026-04-28T10:00:02.000Z', + message: { + role: 'assistant', + timestamp: 1777370402000, + content: [{ type: 'toolCall', name: 'Bash' }], + stopReason: 'toolUse', + }, + }, + ])) + + writeFileSync(join(project, '.sf', 'audit-log.jsonl'), jsonl([ + { ts: '2026-04-28T10:00:03.000Z', severity: 'error', component: 'engine', message: 'Audit failure' }, + ])) + + const output = collectRecentLogEvents({ basePath: project, sfHome, limit: 10 }) + .map(formatMergedLogEvent) + .join('') + + assert.match(output, /\[notif\] Notification ready/) + assert.match(output, /\[session\] assistant: Working on it \| tools: Read \| stop: toolUse/) + assert.match(output, /\[activity\] tools: Bash \| stop: toolUse/) + assert.match(output, /\[audit\] \[engine\] Audit failure/) + } finally { + rmSync(root, { recursive: true, force: true }) + } +}) diff --git a/src/tests/cli-stats-models.test.ts b/src/tests/cli-stats-models.test.ts new file mode 100644 index 000000000..537e67aef --- /dev/null +++ b/src/tests/cli-stats-models.test.ts @@ -0,0 +1,62 @@ +import test from 'node:test' +import assert from 'node:assert/strict' + +import { formatModelStatsTable, parseDurationSeconds, queryModelStats } from '../cli-stats.js' + +test('sf stats models formats ASCII table rows from SQLite aggregate data', () => { + const rows = [ + { + model_id: 'gpt-5.4', + provider: 'openai', + unit_type: 'execute-task', + attempts: 2, + successes: 1, + avg_ms: 1234.4, + avg_cost: 0.01234, + avg_retries: 1.5, + }, + ] + + const output = formatModelStatsTable(rows) + + assert.match(output, /^\+/) + assert.match(output, /\| model_id \| provider \| unit_type/) + assert.match(output, /\| gpt-5\.4\s+\| openai\s+\| execute-task/) + assert.match(output, /\|\s+2 \|\s+50\.0% \|\s+1234 \|\s+\$0\.0123 \|\s+1\.50 \|/) +}) + +test('sf stats models queries llm_task_outcomes with since and unit-type filters', () => { + let capturedSql = '' + let capturedParams: unknown[] = [] + const fakeDb = { + prepare(sql: string) { + capturedSql = sql + return { + all(...params: unknown[]) { + capturedParams = params + return [ + { + model_id: 'claude-sonnet', + provider: 'anthropic', + unit_type: 'plan-slice', + attempts: 4, + successes: 3, + avg_ms: 2000, + avg_cost: 0.02, + avg_retries: 0.25, + }, + ] + }, + } + }, + } + + const rows = queryModelStats(fakeDb, { sinceSeconds: parseDurationSeconds('24h'), unitType: 'plan-slice' }) + + assert.equal(rows.length, 1) + assert.match(capturedSql, /FROM llm_task_outcomes/) + assert.match(capturedSql, /recorded_at > unixepoch\(\) - \?/) + assert.match(capturedSql, /unit_type = \?/) + assert.match(capturedSql, /GROUP BY model_id, unit_type/) + assert.deepEqual(capturedParams, [86_400, 'plan-slice']) +})