singularity-forge/src/tests/integration/e2e-headless.test.ts

/**
 * E2E integration tests for `sf headless` runtime behavior.
 *
 * Spawns real `sf headless` child processes and asserts on
 * stdout/stderr/exit-code for: JSON batch mode, SIGINT exit code,
 * stream-json NDJSON output, --resume error path, and invalid
 * --output-format handling.
 *
 * These tests are structural — they do NOT require API keys.
 *
 * Prerequisite: npm run build must be run first.
 *
 * Run with:
 *   node --import ./src/resources/extensions/sf/tests/resolve-ts.mjs \
 *        --experimental-strip-types --test \
 *        src/tests/integration/e2e-headless.test.ts
 */

import assert from "node:assert/strict";
import { spawn } from "node:child_process";
import { existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import test from "node:test";

const projectRoot = process.cwd();
const loaderPath = join(projectRoot, "dist", "loader.js");

if (!existsSync(loaderPath)) {
	throw new Error("dist/loader.js not found — run: npm run build");
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

type RunResult = {
	stdout: string;
	stderr: string;
	code: number | null;
	timedOut: boolean;
};

/**
 * Spawn `node dist/loader.js ...args` and collect output.
 */
function runGsd(
	args: string[],
	timeoutMs = 30_000,
	env: NodeJS.ProcessEnv = {},
	cwd: string = projectRoot,
): Promise<RunResult> {
	return new Promise((resolve) => {
		let stdout = "";
		let stderr = "";
		let timedOut = false;

		const child = spawn("node", [loaderPath, ...args], {
			cwd,
			env: { ...process.env, ...env },
			stdio: ["pipe", "pipe", "pipe"],
		});

		child.stdout.on("data", (chunk: Buffer) => {
			stdout += chunk.toString();
		});
		child.stderr.on("data", (chunk: Buffer) => {
			stderr += chunk.toString();
		});

		child.stdin.end();

		const timer = setTimeout(() => {
			timedOut = true;
			child.kill("SIGTERM");
		}, timeoutMs);

		child.on("close", (code) => {
			clearTimeout(timer);
			resolve({ stdout, stderr, code, timedOut });
		});
	});
}

/**
 * Spawn a child process with the ability to send signals mid-flight.
 * Returns both the child and a promise that resolves with the result.
 */
function spawnGsd(
	args: string[],
	timeoutMs = 30_000,
	env: NodeJS.ProcessEnv = {},
	cwd: string = projectRoot,
): { child: ReturnType<typeof spawn>; result: Promise<RunResult> } {
	let stdout = "";
	let stderr = "";
	let timedOut = false;

	const child = spawn("node", [loaderPath, ...args], {
		cwd,
		env: { ...process.env, ...env },
		stdio: ["pipe", "pipe", "pipe"],
	});

	child.stdout!.on("data", (chunk: Buffer) => {
		stdout += chunk.toString();
	});
	child.stderr!.on("data", (chunk: Buffer) => {
		stderr += chunk.toString();
	});

	child.stdin!.end();

	const timer = setTimeout(() => {
		timedOut = true;
		child.kill("SIGTERM");
	}, timeoutMs);

	const result = new Promise<RunResult>((resolve) => {
		child.on("close", (code) => {
			clearTimeout(timer);
			resolve({ stdout, stderr, code, timedOut });
		});
	});

	return { child, result };
}

/** Strip ANSI escape codes from a string. */
function stripAnsi(s: string): string {
	// biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI escape sequence
	return s.replace(/\x1b\[[0-9;]*[A-Za-z]/g, "");
}

/** Bootstrap a temp directory with .sf/ structure (milestones + runtime). */
function createTempWithGsd(prefix: string): string {
	const dir = mkdtempSync(join(tmpdir(), prefix));
	mkdirSync(join(dir, ".sf", "milestones"), { recursive: true });
	mkdirSync(join(dir, ".sf", "runtime"), { recursive: true });
	return dir;
}

/** Assert no crash markers in output. */
function assertNoCrashMarkers(output: string): void {
	const crashMarkers = [
		"SyntaxError:",
		"ReferenceError:",
		"TypeError: Cannot read",
		"FATAL ERROR",
		"ERR_MODULE_NOT_FOUND",
		"Error: Cannot find module",
		"SIGSEGV",
		"SIGABRT",
	];

	for (const marker of crashMarkers) {
		assert.ok(
			!output.includes(marker),
			`output should not contain crash marker '${marker}':\n${output.slice(0, 500)}`,
		);
	}
}

// ===========================================================================
// 1. JSON batch mode suppresses streaming — stdout is a single JSON result
// ===========================================================================

test("headless --output-format json emits a single HeadlessJsonResult on stdout", async (t) => {
	const tmpDir = createTempWithGsd("sf-e2e-json-batch-");
	t.after(() => {
		rmSync(tmpDir, { recursive: true, force: true });
	});

	// --max-restarts 0 prevents retry loops which would emit multiple JSON results.
	// --timeout 2000 ensures the process completes quickly.
	// Will timeout/error (no API key) but JSON batch mode should emit one HeadlessJsonResult.
	const result = await runGsd(
		[
			"headless",
			"--output-format",
			"json",
			"--timeout",
			"2000",
			"--max-restarts",
			"0",
			"auto",
		],
		45_000, // generous harness timeout — process needs ~4-6s (2s timeout + startup + cleanup)
		{},
		tmpDir,
	);

	assert.ok(!result.timedOut, "test harness should not time out");
	// Non-zero exit expected (no API key / timeout), but process may exit 0
	// if auto-mode detects a conflict and completes immediately.
	assert.ok(result.code !== null, "process should exit with a code");

	const stdout = result.stdout.trim();
	assert.ok(
		stdout.length > 0,
		`stdout should contain the JSON result, got empty. stderr: ${stripAnsi(result.stderr).slice(0, 300)}`,
	);

	// Must parse as a single JSON object (not NDJSON with multiple lines)
	let parsed: Record<string, unknown>;
	try {
		parsed = JSON.parse(stdout);
	} catch (e) {
		assert.fail(
			`stdout should be valid JSON, got parse error: ${(e as Error).message}\nstdout: ${stdout.slice(0, 500)}`,
		);
	}

	// Assert HeadlessJsonResult shape
	assert.equal(parsed.schemaVersion, 1, "result should have schemaVersion 1");
	assert.equal(
		typeof parsed.status,
		"string",
		"result should have a string 'status' field",
	);
	assert.equal(
		typeof parsed.exitCode,
		"number",
		"result should have a number 'exitCode' field",
	);
	assert.equal(
		typeof parsed.duration,
		"number",
		"result should have a number 'duration' field",
	);
	assert.equal(
		typeof parsed.cost,
		"object",
		"result should have a 'cost' object",
	);
	assert.equal(
		typeof parsed.toolCalls,
		"number",
		"result should have a number 'toolCalls' field",
	);
	assert.equal(
		typeof parsed.events,
		"number",
		"result should have a number 'events' field",
	);

	// Must NOT be NDJSON (multiple newline-separated JSON objects)
	const lines = stdout.split("\n").filter((l: string) => l.trim().length > 0);
	assert.equal(
		lines.length,
		1,
		`expected exactly one JSON line in stdout, got ${lines.length}`,
	);

	const combined = stripAnsi(result.stdout + result.stderr);
	assertNoCrashMarkers(combined);
});

// ===========================================================================
// 2. SIGINT produces exit code 11 (EXIT_CANCELLED)
// ===========================================================================

test("headless exits with code 11 after SIGINT", async (t) => {
	const tmpDir = createTempWithGsd("sf-e2e-sigint-");
	t.after(() => {
		rmSync(tmpDir, { recursive: true, force: true });
	});

	// Spawn with long timeout and max-restarts 0 so the process stays alive
	// waiting for completion while we send SIGINT.
	const { child, result: resultPromise } = spawnGsd(
		[
			"headless",
			"--timeout",
			"60000",
			"--max-restarts",
			"0",
			"--context-text",
			"Test context for SIGINT",
			"new-milestone",
		],
		30_000,
		{},
		tmpDir,
	);

	// Wait for stderr output to confirm the process has started and registered
	// its SIGINT handler (handler is registered before client.start in runHeadlessOnce).
	let stderrSoFar = "";
	await new Promise<void>((resolve) => {
		const check = () => {
			if (stderrSoFar.length > 0) {
				resolve();
			}
		};
		child.stderr!.on("data", (chunk: Buffer) => {
			stderrSoFar += chunk.toString();
			check();
		});
		// Fallback: resolve after 4s even if no stderr
		setTimeout(resolve, 4000);
	});

	// Send SIGINT
	child.kill("SIGINT");

	const result = await resultPromise;
	assert.ok(!result.timedOut, "test harness should not time out");

	const stderr = stripAnsi(result.stderr);

	// In environments where the process completes before SIGINT arrives
	// (e.g., existing auto-mode session causes immediate conflict exit),
	// exit code may be 0 or 1 instead of 11. The test verifies the
	// handler's behavior when it can be observed.
	if (stderr.includes("Interrupted")) {
		// SIGINT handler fired — verify exit code 11
		assert.strictEqual(
			result.code,
			11,
			`SIGINT handler fired but exit code was ${result.code}, expected 11 (EXIT_CANCELLED)`,
		);
	} else {
		// Process exited before SIGINT arrived — acceptable in environments
		// with running sf sessions that cause auto-mode conflict.
		// Verify it at least didn't crash.
		const combined = stripAnsi(result.stdout + result.stderr);
		assertNoCrashMarkers(combined);
		assert.ok(
			result.code === 0 || result.code === 1 || result.code === 11,
			`expected clean exit (0, 1, or 11), got ${result.code}`,
		);
	}
});

// ===========================================================================
// 3. stream-json emits NDJSON on stdout (each line is valid JSON)
// ===========================================================================

test("headless --output-format stream-json emits NDJSON on stdout", async (t) => {
	const tmpDir = createTempWithGsd("sf-e2e-stream-json-");
	t.after(() => {
		rmSync(tmpDir, { recursive: true, force: true });
	});

	// --max-restarts 0 to prevent retry loops that extend runtime.
	const result = await runGsd(
		[
			"headless",
			"--output-format",
			"stream-json",
			"--timeout",
			"2000",
			"--max-restarts",
			"0",
			"auto",
		],
		45_000, // generous harness timeout
		{},
		tmpDir,
	);

	assert.ok(!result.timedOut, "test harness should not time out");
	// Non-zero exit expected (no API key / timeout), but 0 is acceptable
	// if auto-mode completes immediately (session conflict).
	assert.ok(result.code !== null, "process should exit with a code");

	const stdout = result.stdout.trim();

	// stream-json may produce zero events if the process errors before any
	// events fire — that's valid. But if there IS stdout, every line must
	// be valid JSON (NDJSON format).
	if (stdout.length > 0) {
		const lines = stdout.split("\n").filter((l: string) => l.trim().length > 0);
		assert.ok(
			lines.length > 0,
			"if stdout has content, it should have at least one line",
		);

		for (let i = 0; i < lines.length; i++) {
			try {
				JSON.parse(lines[i]);
			} catch (e) {
				assert.fail(
					`stdout line ${i + 1} is not valid JSON: ${(e as Error).message}\nline: ${lines[i].slice(0, 300)}`,
				);
			}
		}

		// Multiple NDJSON lines (not a single batch object) is expected
		// for stream-json mode when events fire
	}

	const combined = stripAnsi(result.stdout + result.stderr);
	assertNoCrashMarkers(combined);
});

// ===========================================================================
// 4. --resume with nonexistent ID exits 1 with clean error
// ===========================================================================

test("headless --resume with nonexistent ID exits 1 with descriptive error", async (t) => {
	const tmpDir = createTempWithGsd("sf-e2e-resume-bad-");
	t.after(() => {
		rmSync(tmpDir, { recursive: true, force: true });
	});

	const result = await runGsd(
		[
			"headless",
			"--resume",
			"nonexistent-id-xyz",
			"--max-restarts",
			"0",
			"auto",
		],
		30_000,
		{},
		tmpDir,
	);

	assert.ok(!result.timedOut, "test harness should not time out");
	assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);

	const stderr = stripAnsi(result.stderr);

	// The error should mention the bad ID or "No session matching"
	assert.ok(
		stderr.includes("nonexistent-id-xyz") ||
			stderr.includes("No session matching"),
		`stderr should mention the bad session ID or 'No session matching', got:\n${stderr.slice(0, 500)}`,
	);

	const combined = stripAnsi(result.stdout + result.stderr);
	assertNoCrashMarkers(combined);
});

// ===========================================================================
// 5. --output-format with invalid value exits 1 with helpful message
// ===========================================================================

test("headless --output-format with invalid value exits 1", async (t) => {
	const tmpDir = createTempWithGsd("sf-e2e-bad-format-");
	t.after(() => {
		rmSync(tmpDir, { recursive: true, force: true });
	});

	const result = await runGsd(
		["headless", "--output-format", "invalid-format", "auto"],
		15_000,
		{},
		tmpDir,
	);

	assert.ok(!result.timedOut, "test harness should not time out");
	assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);

	const stderr = stripAnsi(result.stderr);

	// Should mention valid formats
	assert.ok(
		stderr.includes("text") &&
			stderr.includes("json") &&
			stderr.includes("stream-json"),
		`stderr should list valid output formats, got:\n${stderr.slice(0, 500)}`,
	);

	// Should mention what was provided
	assert.ok(
		stderr.includes("invalid-format"),
		`stderr should echo the invalid value, got:\n${stderr.slice(0, 500)}`,
	);

	const combined = stripAnsi(result.stdout + result.stderr);
	assertNoCrashMarkers(combined);
});