singularity-forge/bin/sf-from-source
2026-05-17 20:07:36 +02:00

189 lines
7.2 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# sf-from-source — run SF directly from this source checkout via node.
#
# Purpose: every local commit in this repo is live immediately without
# rebuilding dist/. Human CLI invocations use this bash shim for better
# shell integration (set -e, pipefail, etc.).
#
# Subagents: SF_BIN_PATH is exported as dist/loader.js (not this shim), so
# all child pi processes spawned by the subagent extension use dist/loader.js
# directly as their entry point. dist/loader.js is a proper Node.js shebang
# entry point, avoiding the bash-script-vs-node parsing issue.
#
# Why node, not bun:
# - bun doesn't ship node:sqlite (sf-db.ts falls back to filesystem-
# derivation degraded mode under bun).
# - bun's native-addon loader doesn't inherit the system library
# search path under Nix (libz.so.1 not found for forge_engine.node).
# - node 26.1+ has stable enough node:sqlite coverage for SF's database-first
# runtime and supports
# --experimental-strip-types so .ts runs directly.
# - The src/resources/extensions/sf/tests/resolve-ts.mjs loader hook
# already handles .js → .ts import-specifier remapping for runtime
# resolution.
#
# Contract:
# - Executable shim; human CLI entry point with full shell features.
# - Exports SF_BIN_PATH=dist/loader.js so all child processes (including
# subagent pi instances) use the Node.js entry point directly.
#
# Requirements: node >= 26.1 on PATH,
# node_modules populated.
set -euo pipefail
# Default subagent dispatch to the swarm/messagebus path rather than
# subprocess spawn. The opt-in flag has been stable since the
# tests/subagent-via-swarm.test.mjs harness landed; making it the
# wrapper default keeps subagent traffic on the uok message-bus
# substrate (table uok_messages) instead of spawning child sf
# processes. Set SF_SUBAGENT_VIA_SWARM=0 (or =false) before invoking
# sf to opt out.
: "${SF_SUBAGENT_VIA_SWARM:=1}"
export SF_SUBAGENT_VIA_SWARM
SCRIPT_DIR=$(cd -- "$(dirname -- "$(readlink -f "${BASH_SOURCE[0]}")")" &>/dev/null && pwd)
SF_SOURCE_ROOT=$(cd -- "$SCRIPT_DIR/.." &>/dev/null && pwd)
if [[ -n "${SF_NODE_BIN:-}" ]]; then
NODE_BIN="$SF_NODE_BIN"
elif [[ -x "$HOME/.local/bin/mise" ]]; then
NODE_BIN=$(cd -- "$SF_SOURCE_ROOT" && "$HOME/.local/bin/mise" which node 2>/dev/null || true)
NODE_BIN=${NODE_BIN:-node}
else
NODE_BIN=node
fi
IS_HEADLESS=0
if [[ "${1:-}" == "headless" ]]; then
IS_HEADLESS=1
echo "[forge] Preparing source runtime for headless command..."
fi
SF_PROJECT_LOCK_DIR="$(pwd)/.sf"
SF_PROJECT_LOCK_FILE="$SF_PROJECT_LOCK_DIR/sf.lock"
sf_lock_holder_pid() {
local lock_file="$1"
local holder
holder=$(cat "$lock_file" 2>/dev/null || true)
[[ "$holder" =~ (^|[[:space:]])pid=([0-9]+) ]] || return 1
printf '%s\n' "${BASH_REMATCH[2]}"
}
sf_cleanup_dead_lock_holder() {
local lock_file="$1"
[[ -f "$lock_file" ]] || return 0
local holder_pid
holder_pid=$(sf_lock_holder_pid "$lock_file" || true)
[[ -n "$holder_pid" ]] || return 0
if kill -0 "$holder_pid" 2>/dev/null; then
return 0
fi
rm -f -- "$lock_file" 2>/dev/null || true
echo "[forge] Removed stale SF project lock for dead pid $holder_pid: $lock_file" >&2
}
sf_cleanup_own_lock_holder() {
local holder_pid
holder_pid=$(sf_lock_holder_pid "$SF_PROJECT_LOCK_FILE" || true)
[[ "$holder_pid" == "$$" ]] || return 0
rm -f -- "$SF_PROJECT_LOCK_FILE" 2>/dev/null || true
}
sf_cleanup_dead_lock_holder "$SF_PROJECT_LOCK_FILE"
# Single-writer project lock. Two SF processes writing to the same
# .sf/sf.db over WAL cause torn pages and "database disk image is
# malformed" corruption (observed 2026-05-17 in dogfood-5 — see
# .sf/self-feedback for the recurrence pattern). Take a flock on
# .sf/sf.lock before launching node; the lock is released
# automatically when this process exits. Skip in known read-only
# modes (logs, status, dash, sessions, list, version, help) where
# concurrent reads are safe and useful.
#
# Top-level read-only commands skip the lock. ALSO skip when $1=headless
# AND $2 is a read-only subcommand — otherwise the operator can't even
# check SF's state while autonomous mode is running (regression observed
# 2026-05-17 when `sf headless query` / `feedback list` / --help were
# rejected with "Another sf is already running" despite being pure reads).
case "${1:-} ${2:-}" in
"server "*|"serve "*|"web "*)
: # server owns its own lifecycle; do not hold the project writer lock forever
;;
"logs "*|"status "*|"dash "*|"sessions "*|"list "*|"--version "*|"-v "*|"--help "*|"-h "*)
: # top-level read-only — no lock needed
;;
"headless --help"*|"headless -h"*|"headless --version"*|"headless -v"*|"headless query"*|"headless status"*|"headless usage"*|"headless reflect"*|"headless ")
: # headless read-only subcommand — no lock needed
;;
"headless feedback"*|"headless triage"*)
# `feedback list` and `triage --list` are read-only; `feedback add/resolve`
# and `triage --run/--apply` are writes. Allow the read-only forms only.
if [[ "$*" == *" list"* || "$*" == *" --list"* || "$*" == *" --json"* ]]; then
: # read-only inspect
else
__SF_NEEDS_LOCK=1
fi
;;
*)
__SF_NEEDS_LOCK=1
;;
esac
case "${__SF_NEEDS_LOCK:-}" in
1)
if [[ -z "${SF_SKIP_LOCK:-}" ]]; then
mkdir -p "$SF_PROJECT_LOCK_DIR" 2>/dev/null || true
# Open read+write WITHOUT truncating so collision branch can still
# read the current holder before failing. Truncate happens AFTER
# flock succeeds (below) so two racers don't clobber each other's
# holder metadata.
exec 200<>"$SF_PROJECT_LOCK_FILE"
if ! flock -n 200; then
holder=$(cat "$SF_PROJECT_LOCK_FILE" 2>/dev/null)
[[ -z "$holder" ]] && holder="(no metadata)"
echo "[forge] Another sf is already running in $(pwd)" >&2
echo "[forge] Lock holder: $holder" >&2
echo "[forge] If you're sure no other sf is running, remove $SF_PROJECT_LOCK_FILE and retry." >&2
echo "[forge] To bypass (NOT RECOMMENDED — risks WAL corruption): SF_SKIP_LOCK=1 sf ..." >&2
exit 75 # EX_TEMPFAIL
fi
# Truncate + write our holder metadata AFTER acquiring the lock.
: > "$SF_PROJECT_LOCK_FILE"
echo "pid=$$ args=$* cwd=$(pwd) started=$(date -Iseconds)" >&200
trap sf_cleanup_own_lock_holder EXIT
fi
;;
esac
# SF_BIN_PATH: absolute path to dist/loader.js (not this shim).
# This is what the subagent extension spawns for child pi processes.
# dist/loader.js is a proper Node.js entry point — bash scripts cannot be
# spawned by Node.js as executables (Node parses them as JS, causing SyntaxError).
export SF_BIN_PATH="$SF_SOURCE_ROOT/dist/loader.js"
export SF_CLI_PATH="${SF_CLI_PATH:-$SCRIPT_DIR/sf-from-source}"
"$NODE_BIN" "$SF_SOURCE_ROOT/scripts/ensure-source-resources.cjs"
if [[ "$IS_HEADLESS" == "1" ]]; then
echo "[forge] Launching source CLI..."
fi
ORIGINAL_ARGS=("$@")
NEXT_ARGS=("${ORIGINAL_ARGS[@]}")
while true; do
set +e
"$NODE_BIN" \
--import "$SF_SOURCE_ROOT/src/resources/extensions/sf/tests/resolve-ts.mjs" \
--experimental-strip-types \
--no-warnings \
"$SF_SOURCE_ROOT/src/loader.ts" "${NEXT_ARGS[@]}"
status=$?
set -e
if [[ "$status" == "12" && "$IS_HEADLESS" != "1" && -t 0 && -t 1 ]]; then
echo "[forge] Runtime reload requested — restarting source CLI with --continue..."
NEXT_ARGS=("--continue")
continue
fi
exit "$status"
done