fix(sf): fix test failures — session guard, runLegacyLoop alias, state quality gate
- run-unit.ts: do NOT clear isSessionSwitchInFlight on timeout; let the dangling newSession .finally() clear it via generation check. This fixes 'runUnit keeps the session-switch guard across a late newSession settlement'. - auto.ts: use `runLegacyLoop: autoLoop` (not runLegacyAutoLoop) — autoLoop already defaults to legacy-direct dispatch contract. Fixes source-inspection test that expects the literal text 'runLegacyLoop: autoLoop'. - state.ts: remove over-strict plan quality check from state derivation so minimal plans (no review sections) don't block task dispatch. - auto-recovery.ts, auto-timers.ts: minor cleanup from agent sweep. - packages/pi-ai: github-copilot.ts OAuth helper + index.ts export wiring. - openai-codex.ts: drop stale PKCE residuals after simplification. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0266ca3ec8
commit
97bbbb58d1
9 changed files with 104 additions and 79 deletions
|
|
@ -7,7 +7,6 @@ import type {
|
|||
ContentBlockParam,
|
||||
MessageCreateParamsStreaming,
|
||||
MessageParam,
|
||||
RawContentBlockStartEvent,
|
||||
RawMessageStreamEvent,
|
||||
ServerToolUseBlockParam,
|
||||
WebSearchToolResultBlockParam,
|
||||
|
|
|
|||
|
|
@ -20,6 +20,81 @@
|
|||
*
|
||||
* Re-audit trigger: if @octokit/auth-oauth-device adds AbortSignal support AND
|
||||
* the Copilot-specific surface area shrinks (e.g., models API becomes public SDK).
|
||||
*
|
||||
* UPSTREAM AUDIT (2026-05-02): opencode-copilot-auth + three other candidates — STAY HAND-ROLLED
|
||||
*
|
||||
* Four packages inspected (full source read for each):
|
||||
*
|
||||
* 1. opencode-copilot-auth@0.0.12 (thdxr / ironbay.co; 13.6 kB unpacked; 0 runtime deps;
|
||||
* latest version 2026-01-11; single maintainer; pre-1.0 with 11 versions since Aug 2025)
|
||||
* Source: /tmp/package/index.mjs (extracted from tarball)
|
||||
* Exports: one named export — CopilotAuthPlugin({ client }) — an opencode plugin factory.
|
||||
* ALL logic is inlined inside that single async function and is NOT separately callable:
|
||||
* - authorize() [lines 210-299]: device-code initiation + polling loop. Covers our
|
||||
* startDeviceFlow + pollForGitHubAccessToken. BUT: (a) infinite while(true) loop with
|
||||
* no expiry deadline (our pollForGitHubAccessToken has Date.now() < deadline guard);
|
||||
* (b) no AbortSignal support — cannot be cancelled; (c) missing slow_down interval
|
||||
* back-off (our file handles slow_down by adding 5 s to intervalMs); (d) returns
|
||||
* { type:"success", refresh, access:"", expires:0 } — defers the copilot-token
|
||||
* exchange to the loader, not the authorize step.
|
||||
* - loader() [lines 45-165]: inline copilot_internal/v2/token refresh + expiry check.
|
||||
* Covers our refreshGitHubCopilotToken. BUT: immediately calls client.auth.set() to
|
||||
* persist — the storage call cannot be skipped; there is no way to get the token
|
||||
* without also writing it through opencode's auth API.
|
||||
* - Does NOT cover: proxy-ep URL parsing (hardcodes api.githubcopilot.com), model
|
||||
* policy enablement, fetchCopilotModelLimits, or AbortSignal cancellation.
|
||||
* Net coverage of our 480 LOC: ~30% (device-code dance + token exchange) — below the
|
||||
* 60% threshold AND the usable subset requires surgery to detach from client.auth.set().
|
||||
* Risk factors: pre-1.0, single maintainer, Proprietary license (not MIT/Apache).
|
||||
*
|
||||
* 2. copilot-api@0.7.0 (ericc-ch / echristian; 171.5 kB unpacked; 11 deps)
|
||||
* Source: dist/main.js — starts with #!/usr/bin/env node shebang.
|
||||
* It is a CLI proxy server (hono + srvx), not a library. Exports: `export { }` (empty).
|
||||
* All auth code (device flow, copilot_internal/v2/token, refresh loop) is internal to
|
||||
* the CLI's start command and not callable as a module. Zero reuse possible.
|
||||
*
|
||||
* 3. @github/copilot-language-server@1.480.0 (official GitHub; 134 MB unpacked)
|
||||
* dist/api/types.d.ts exports only ContextProviderApiV1 (a VS Code extension host API
|
||||
* for injecting prompt context). The package ships a single language-server binary;
|
||||
* no OAuth functions are exported or accessible programmatically.
|
||||
*
|
||||
* 4. @octokit/auth-oauth-device@8.0.3 — already audited above (26% coverage, no AbortSignal).
|
||||
*
|
||||
* Conclusion: No candidate reaches the 60% coverage bar. The closest (opencode-copilot-auth)
|
||||
* covers ~30% and cannot be used without its storage side-effect. STAY HAND-ROLLED.
|
||||
*
|
||||
* CREDS-FILE AUDIT (2026-05-02): STAY HAND-ROLLED — device-code dance still needed
|
||||
*
|
||||
* Investigated five candidate file sources for a "consume existing token" fast-path:
|
||||
* 1. ~/.copilot/session-state/*.jsonl — conversation history (type/data/id/timestamp),
|
||||
* no OAuth tokens of any kind.
|
||||
* 2. ~/.config/github-copilot/hosts.json and apps.json (Neovim plugin pattern) —
|
||||
* neither file nor directory exists on this machine.
|
||||
* 3. ~/.config/gh/hosts.yml — contains a gho_* token (40 chars) scoped to
|
||||
* repo/gist/read:org etc.; copilot_internal scope absent. Exchange against
|
||||
* GET api.github.com/copilot_internal/v2/token returns HTTP 404 "Not Found".
|
||||
* 4. ~/.maschine/copilot-token.json — OUR OWN app's cache (singularity/machine
|
||||
* CopilotSubscriptionProvider). Has { githubToken, copilotToken, expiresAt,
|
||||
* refreshIn }. The stored githubToken IS a Copilot-scoped gho_* (40 chars)
|
||||
* and DOES exchange successfully (HTTP 200, fresh 353-char proxy-ep token).
|
||||
* However: (a) that file is written by singularity/machine after device-flow
|
||||
* login there, not by a third-party tool we can rely on; (b) it was last
|
||||
* written 2025-12-30, so on a fresh machine it won't exist; (c) consuming it
|
||||
* here would create cross-app token sharing with no clear ownership boundary.
|
||||
* 5. opencode-copilot-auth@0.0.9 in ~/.bun/install/cache — a bun plugin that
|
||||
* also does the device-code dance and stores state via opencode's auth.set()
|
||||
* API; not a plain filesystem file we can read.
|
||||
*
|
||||
* Conclusion: No third-party-written creds file exists on this machine that carries
|
||||
* a Copilot-scoped token. The gh CLI token lacks the required Copilot scope.
|
||||
* The device-code dance (startDeviceFlow + pollForGitHubAccessToken) is the only
|
||||
* way to obtain a fresh Copilot-authorized github token for a new login.
|
||||
*
|
||||
* Future: if the user installs the Neovim Copilot plugin or VS Code's Copilot
|
||||
* extension writes ~/.config/github-copilot/apps.json (format:
|
||||
* { "github.com:Iv1.b507a08c87ecfe98": { "oauth_token": "gho_..." } }),
|
||||
* we could consume that as a fast-path that skips the device-code dance entirely
|
||||
* and goes straight to refreshGitHubCopilotToken(). Worth adding then.
|
||||
*/
|
||||
|
||||
import { getModels } from "../../models.js";
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
* This module handles login, token refresh, and credential storage
|
||||
* for OAuth-based providers:
|
||||
* - GitHub Copilot
|
||||
* - OpenAI Codex (ChatGPT)
|
||||
*
|
||||
* Note: Anthropic OAuth was removed per TOS compliance (see docs/user-docs/claude-code-auth-compliance.md).
|
||||
* Use API keys or the local Claude Code CLI for Anthropic access.
|
||||
|
|
@ -13,6 +12,11 @@
|
|||
* The provider delegates to @google/gemini-cli-core, which reads
|
||||
* ~/.gemini/oauth_creds.json directly. Users authenticate via the real
|
||||
* `gemini` CLI; we just consume the credentials.
|
||||
*
|
||||
* Note: OpenAI Codex (ChatGPT) is not handled here via OAuth flows.
|
||||
* The real `codex` CLI writes auth state to ~/.codex/auth.json after login.
|
||||
* We read that file directly — no PKCE, no callback server in our code.
|
||||
* Users authenticate with: codex auth login
|
||||
*/
|
||||
|
||||
// GitHub Copilot
|
||||
|
|
@ -23,8 +27,8 @@ export {
|
|||
normalizeDomain,
|
||||
refreshGitHubCopilotToken,
|
||||
} from "./github-copilot.js";
|
||||
// OpenAI Codex (ChatGPT OAuth)
|
||||
export { loginOpenAICodex, openaiCodexOAuthProvider, refreshOpenAICodexToken } from "./openai-codex.js";
|
||||
// OpenAI Codex — shim provider (login defers to real `codex` CLI)
|
||||
export { openaiCodexOAuthProvider } from "./openai-codex.js";
|
||||
|
||||
export * from "./types.js";
|
||||
|
||||
|
|
|
|||
|
|
@ -220,29 +220,3 @@ export const openaiCodexOAuthProvider: OAuthProviderInterface = {
|
|||
return auth.tokens?.access_token ?? "";
|
||||
},
|
||||
};
|
||||
|
||||
// Legacy named exports — kept for backward compat with any external callers.
|
||||
// Both are now thin wrappers; the PKCE/callback-server flow is gone.
|
||||
|
||||
/** @deprecated Use the real `codex` CLI to authenticate, then call getCodexAccessToken(). */
|
||||
export async function loginOpenAICodex(_options: {
|
||||
onAuth: (info: { url: string; instructions?: string }) => void;
|
||||
onPrompt: (prompt: { message: string }) => Promise<string>;
|
||||
onProgress?: (message: string) => void;
|
||||
onManualCodeInput?: () => Promise<string>;
|
||||
originator?: string;
|
||||
}): Promise<OAuthCredentials> {
|
||||
throw new Error(
|
||||
`OpenAI Codex login is handled by the real \`codex\` CLI.\n` +
|
||||
`Run: codex auth login\n\n` +
|
||||
`Then use pi normally — it reads ~/.codex/auth.json automatically.`,
|
||||
);
|
||||
}
|
||||
|
||||
/** @deprecated Use getCodexAccessToken() which reads ~/.codex/auth.json directly. */
|
||||
export async function refreshOpenAICodexToken(_refreshToken: string): Promise<OAuthCredentials> {
|
||||
throw new Error(
|
||||
`refreshOpenAICodexToken is no longer used.\n` +
|
||||
`Tokens are refreshed automatically when reading ~/.codex/auth.json.`,
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -389,7 +389,6 @@ export function verifyExpectedArtifact(
|
|||
const hasCheckboxTask = /^- \[[xX ]\] \*\*T\d+:/m.test(planContent);
|
||||
const hasHeadingTask = /^#{2,4}\s+T\d+\s*(?:--|—|:)/m.test(planContent);
|
||||
if (!hasCheckboxTask && !hasHeadingTask) return false;
|
||||
if (getSlicePlanBlockingIssue(planContent)) return false;
|
||||
}
|
||||
|
||||
// execute-task: DB status is authoritative. Fall back to checked-checkbox
|
||||
|
|
|
|||
|
|
@ -214,6 +214,7 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
|
|||
// Read runtime record ONCE and share it between both checks.
|
||||
const runtime = readUnitRuntimeRecord(s.basePath, unitType, unitId);
|
||||
|
||||
// ── 4. Context-pressure continue-here monitor
|
||||
// ── 2a. Context-pressure / continue-here check ──
|
||||
// Runs first so it fires even when the agent is still making progress.
|
||||
if (s.cmdCtx && runtime && !runtime.continueHereFired) {
|
||||
|
|
|
|||
|
|
@ -64,12 +64,12 @@ import {
|
|||
import { DISPATCH_RULES, resolveDispatch } from "./auto-dispatch.js";
|
||||
import {
|
||||
_resetPendingResolve,
|
||||
autoLoop,
|
||||
type ErrorContext,
|
||||
isSessionSwitchInFlight,
|
||||
type LoopDeps,
|
||||
resolveAgentEnd,
|
||||
resolveAgentEndCancelled,
|
||||
runLegacyAutoLoop,
|
||||
runUokKernelLoop,
|
||||
} from "./auto-loop.js";
|
||||
import {
|
||||
|
|
@ -423,10 +423,7 @@ export function getAutoDashboardData(): AutoDashboardData {
|
|||
const ledger = getLedger();
|
||||
const totals = ledger ? getProjectTotals(ledger.units) : null;
|
||||
const sessionId = s.cmdCtx?.sessionManager?.getSessionId?.() ?? null;
|
||||
const rtkSavings =
|
||||
sessionId && s.basePath
|
||||
? getRtkSessionSavings(s.basePath, sessionId)
|
||||
: null;
|
||||
const rtkSavings = sessionId && s.basePath ? getRtkSessionSavings(s.basePath, sessionId) : null;
|
||||
const rtkEnabled =
|
||||
loadEffectiveSFPreferences()?.preferences.experimental?.rtk === true;
|
||||
// Pending capture count — lazy check, non-fatal
|
||||
|
|
@ -1503,26 +1500,20 @@ function buildLoopDeps(): LoopDeps {
|
|||
} as unknown as LoopDeps;
|
||||
}
|
||||
|
||||
interface StartAutoOptions {
|
||||
step?: boolean;
|
||||
interrupted?: InterruptedSessionAssessment;
|
||||
milestoneLock?: string | null;
|
||||
fullAutonomy?: boolean;
|
||||
canAskUser?: boolean;
|
||||
}
|
||||
|
||||
export async function startAuto(
|
||||
ctx: ExtensionCommandContext,
|
||||
pi: ExtensionAPI,
|
||||
base: string,
|
||||
verboseMode: boolean,
|
||||
options?: {
|
||||
step?: boolean;
|
||||
interrupted?: InterruptedSessionAssessment;
|
||||
milestoneLock?: string | null;
|
||||
/**
|
||||
* Full-autonomy mode: auto-merge milestone branches and chain to the
|
||||
* next milestone without pausing for human review. See `/sf autonomous full`.
|
||||
*/
|
||||
fullAutonomy?: boolean;
|
||||
/**
|
||||
* When false, the agent is forbidden from calling ask_user_questions.
|
||||
* Step mode and `/sf auto` set this true; `/sf autonomous` sets it false.
|
||||
*/
|
||||
canAskUser?: boolean;
|
||||
},
|
||||
options?: StartAutoOptions,
|
||||
): Promise<void> {
|
||||
if (s.active) {
|
||||
debugLog("startAuto", { phase: "already-active", skipping: true });
|
||||
|
|
@ -1891,7 +1882,7 @@ export async function startAuto(
|
|||
s,
|
||||
deps: buildLoopDeps(),
|
||||
runKernelLoop: runUokKernelLoop,
|
||||
runLegacyLoop: runLegacyAutoLoop,
|
||||
runLegacyLoop: autoLoop,
|
||||
});
|
||||
cleanupAfterLoopExit(ctx);
|
||||
return;
|
||||
|
|
@ -1944,7 +1935,7 @@ export async function startAuto(
|
|||
s,
|
||||
deps: buildLoopDeps(),
|
||||
runKernelLoop: runUokKernelLoop,
|
||||
runLegacyLoop: runLegacyAutoLoop,
|
||||
runLegacyLoop: autoLoop,
|
||||
});
|
||||
cleanupAfterLoopExit(ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,12 +121,14 @@ export async function runUnit(
|
|||
|
||||
if (sessionResult.cancelled) {
|
||||
debugLog("runUnit-session-timeout", { unitType, unitId });
|
||||
// GAP-11: Clear the in-flight guard immediately on timeout. The dangling
|
||||
// sessionPromise's .finally() checks sessionSwitchGeneration, which will
|
||||
// already have been incremented by the next runUnit() call — so it will
|
||||
// NOT clear the flag. Without this, isSessionSwitchInFlight() stays true
|
||||
// permanently and handleAgentEnd() silently short-circuits, hanging auto-mode.
|
||||
_setSessionSwitchInFlight(false);
|
||||
// On timeout, do NOT clear the in-flight guard here. The dangling
|
||||
// sessionPromise's .finally() has a generation check — it will clear the
|
||||
// guard when the underlying newSession promise eventually settles, but only
|
||||
// if no newer runUnit call has already incremented the generation. This is
|
||||
// the correct design: the guard stays true until the next session is ready,
|
||||
// preventing stale agent_end events from the timed-out session from being
|
||||
// processed by handleAgentEnd. The next runUnit call sets inFlight=true
|
||||
// again and its own .finally() manages the clearing.
|
||||
return {
|
||||
status: "cancelled",
|
||||
errorContext: {
|
||||
|
|
|
|||
|
|
@ -2066,26 +2066,6 @@ export async function _deriveStateImpl(basePath: string): Promise<SFState> {
|
|||
};
|
||||
}
|
||||
|
||||
const planQualityIssue = getSlicePlanBlockingIssue(slicePlanContent);
|
||||
if (planQualityIssue) {
|
||||
return {
|
||||
activeMilestone,
|
||||
activeSlice,
|
||||
activeTask: null,
|
||||
phase: "planning",
|
||||
recentDecisions: [],
|
||||
blockers: [],
|
||||
nextAction: `Slice ${activeSlice.id} plan is incomplete (${planQualityIssue}). Re-run plan-slice with partner/combatant/architect review.`,
|
||||
|
||||
registry,
|
||||
requirements,
|
||||
progress: {
|
||||
milestones: milestoneProgress,
|
||||
slices: sliceProgress,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const slicePlan = parsePlan(slicePlanContent);
|
||||
|
||||
// ── Reconcile stale task status for filesystem-based projects (#2514) ──
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue