feat: add repo harness profiler

This commit is contained in:
Mikael Hugo 2026-04-29 17:39:52 +02:00
parent a611db9032
commit d02d33aa70
6 changed files with 1061 additions and 2 deletions

View file

@ -0,0 +1,79 @@
/**
* commands-harness.ts - repo-native harness evolution commands.
*
* Purpose: expose the read-only profiler so operators can seed harness
* evolution state without changing prompts or claiming untracked files.
*/
import { join } from "node:path";
import type { ExtensionCommandContext } from "@singularity-forge/pi-coding-agent";
import { ensureDbOpen } from "./bootstrap/dynamic-tools.js";
import { projectRoot } from "./commands/context.js";
import { profileRepository } from "./repo-profiler.js";
import { recordRepoProfile } from "./sf-db.js";
function formatProfileSummary(
profile: ReturnType<typeof profileRepository>,
): string {
const untracked = profile.git.changedFiles.filter(
(file) => file.gitStatus === "untracked",
).length;
const modified = profile.git.changedFiles.filter(
(file) => file.gitStatus === "modified",
).length;
const stacks =
profile.stacks
.map((stack) => stack.kind)
.filter(Boolean)
.join(", ") || "none detected";
const risks =
profile.riskHints
.map((hint) => hint.family)
.filter(Boolean)
.join(", ") || "none detected";
return [
"Repo harness profile recorded",
`Profile: ${profile.profileId}`,
`State: ${join(profile.projectRoot, ".sf", "sf.db")}`,
`Branch: ${profile.git.branch ?? "unknown"}`,
`Changed files: ${profile.git.changedFiles.length} (${modified} modified, ${untracked} untracked)`,
`Stacks: ${stacks}`,
`Risk hints: ${risks}`,
"",
"Untracked files were recorded as observations only; SF did not stage or adopt them.",
].join("\n");
}
/**
* Run repo harness profiling and persist the resulting snapshot.
*
* Purpose: give users and future auto-flow slices an explicit entry point for
* harness evolution's read-only observation phase.
*
* Consumer: `/sf harness profile` command.
*/
export async function handleHarness(
args: string,
ctx: ExtensionCommandContext,
): Promise<void> {
const subcommand = args.trim() || "profile";
if (!["profile", "snapshot", "status"].includes(subcommand)) {
ctx.ui.notify(
"Usage: /sf harness profile\nRecords a read-only repo profile for harness evolution.",
"warning",
);
return;
}
const basePath = projectRoot();
const opened = await ensureDbOpen(basePath);
if (!opened) {
ctx.ui.notify("No SF database available. Run /sf init first.", "warning");
return;
}
const profile = profileRepository(basePath);
recordRepoProfile(profile);
ctx.ui.notify(formatProfileSummary(profile), "info");
}

View file

@ -15,7 +15,7 @@ export interface GsdCommandDefinition {
type CompletionMap = Record<string, readonly GsdCommandDefinition[]>;
export const SF_COMMAND_DESCRIPTION =
"SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan";
"SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan";
export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
{ cmd: "help", desc: "Categorized command reference with descriptions" },
@ -93,6 +93,10 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
cmd: "knowledge",
desc: "Add persistent project knowledge (rule, pattern, or lesson)",
},
{
cmd: "harness",
desc: "Repo-native harness evolution (profile, status)",
},
{
cmd: "new-milestone",
desc: "Create a milestone from a specification document (headless)",
@ -245,6 +249,13 @@ const NESTED_COMPLETIONS: CompletionMap = {
{ cmd: "pattern", desc: "Add a code pattern to follow" },
{ cmd: "lesson", desc: "Record a lesson learned" },
],
harness: [
{
cmd: "profile",
desc: "Record a read-only repo profile for harness evolution",
},
{ cmd: "status", desc: "Alias for profile in the first implementation" },
],
start: [
{ cmd: "bugfix", desc: "Triage, fix, test, and ship a bug fix" },
{

View file

@ -239,6 +239,11 @@ Examples:
);
return true;
}
if (trimmed === "harness" || trimmed.startsWith("harness ")) {
const { handleHarness } = await import("../../commands-harness.js");
await handleHarness(trimmed.replace(/^harness\s*/, "").trim(), ctx);
return true;
}
if (trimmed === "migrate" || trimmed.startsWith("migrate ")) {
const { handleMigrate } = await import("../../migrate/command.js");
await handleMigrate(trimmed.replace(/^migrate\s*/, "").trim(), ctx, pi);

View file

@ -0,0 +1,560 @@
/**
* repo-profiler.ts - read-only repository shape profiler.
*
* Purpose: give harness evolution a factual repo snapshot without staging,
* deleting, or claiming ownership of user files.
*/
import { execFileSync } from "node:child_process";
import { createHash } from "node:crypto";
import { existsSync, readFileSync, realpathSync, statSync } from "node:fs";
import { basename, extname, join, sep } from "node:path";
import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
/**
* Classifies a path's git state in the repo profile.
*
* Purpose: let harness planning distinguish user changes from untracked
* observations without inferring ownership from raw porcelain codes.
*
* Consumer: RepoFileObservation rows stored by sf-db and `/sf harness profile`.
*/
export type RepoGitStatus =
| "tracked"
| "modified"
| "deleted"
| "renamed"
| "untracked"
| "ignored";
/**
* Describes who may mutate an observed path by default.
*
* Purpose: prevent SF from staging or rewriting files that were only observed
* during profiling.
*
* Consumer: RepoFileObservation rows and future harness adoption checks.
*/
export type RepoFileOwnership =
| "sf_generated"
| "user_owned"
| "observed_only"
| "candidate_harness";
/**
* Describes one changed or untracked file seen during repository profiling.
*
* Purpose: preserve path-level repo reality for harness planning while keeping
* untracked files explicitly read-only.
*
* Consumer: RepoProfile.git.changedFiles and repo_file_observations.
*/
export interface RepoFileObservation {
path: string;
gitStatus: RepoGitStatus;
ownership: RepoFileOwnership;
language: string | null;
sizeBytes: number;
contentHash: string | null;
summary: string | null;
firstSeenAt: string;
lastSeenAt: string;
adoptedAt: string | null;
adoptionUnitId: string | null;
}
/**
* Records a detected language or platform stack.
*
* Purpose: drive harness template selection from concrete repo files.
*
* Consumer: risk classification and future template-kit matching.
*/
export interface StackSignal {
kind: string;
sourcePath: string;
confidence: number;
}
/**
* Records a likely application or service entry point.
*
* Purpose: guide harness generation toward runnable surfaces.
*
* Consumer: future harness planner and smoke-test proposal generation.
*/
export interface EntrypointSignal {
kind: string;
path: string;
confidence: number;
}
/**
* Records detected test infrastructure.
*
* Purpose: let harness planning distinguish missing coverage from existing
* repo-native checks.
*
* Consumer: risk hints and future harness inventory comparison.
*/
export interface TestSignal {
kind: string;
path: string;
confidence: number;
}
/**
* Records detected CI configuration.
*
* Purpose: determine whether generated harness gates can be wired into an
* existing automation surface.
*
* Consumer: risk hints and future CI snippet proposals.
*/
export interface CiSignal {
kind: string;
path: string;
confidence: number;
}
/**
* Records detected project documentation.
*
* Purpose: seed harness planning from existing specs and architecture docs.
*
* Consumer: future prompt/context selection for harness proposals.
*/
export interface DocumentSignal {
kind: string;
path: string;
confidence: number;
}
/**
* Records detected database or migration surfaces.
*
* Purpose: trigger database-specific harness requirements.
*
* Consumer: risk hints and future migration-gate proposals.
*/
export interface DataStoreSignal {
kind: string;
path: string;
confidence: number;
}
/**
* Records detected network-facing application surfaces.
*
* Purpose: trigger web/API/gateway smoke-test harness requirements.
*
* Consumer: risk hints and future smoke-test proposal generation.
*/
export interface NetworkSurfaceSignal {
kind: string;
path: string;
confidence: number;
}
/**
* Records one risk family inferred from the repo profile.
*
* Purpose: make harness gaps explainable before SF proposes generated files.
*
* Consumer: `/sf harness profile` summaries and future harness planner input.
*/
export interface RiskHint {
family: string;
reason: string;
confidence: number;
}
/**
* Captures a read-only snapshot of repository shape.
*
* Purpose: provide the factual base for repo-native harness evolution without
* mutating the working tree.
*
* Consumer: sf-db profile persistence and `/sf harness profile`.
*/
export interface RepoProfile {
profileId: string;
projectHash: string;
projectRoot: string;
git: {
head: string | null;
branch: string | null;
remoteHash: string | null;
dirty: boolean;
changedFiles: RepoFileObservation[];
};
stacks: StackSignal[];
entrypoints: EntrypointSignal[];
tests: TestSignal[];
ci: CiSignal[];
docs: DocumentSignal[];
dataStores: DataStoreSignal[];
networkSurfaces: NetworkSurfaceSignal[];
riskHints: RiskHint[];
createdAt: string;
}
/**
* Configures deterministic profile generation.
*
* Purpose: allow tests and future scheduled snapshots to pin timestamps.
*
* Consumer: profileRepository callers.
*/
export interface RepoProfileOptions {
now?: () => string;
}
const HASH_READ_LIMIT_BYTES = 1024 * 1024;
function git(args: string[], cwd: string, allowFailure = true): string {
try {
return execFileSync("git", args, {
cwd,
stdio: ["ignore", "pipe", "pipe"],
encoding: "utf-8",
env: GIT_NO_PROMPT_ENV,
}).trimEnd();
} catch {
if (allowFailure) return "";
throw new Error(`git ${args.join(" ")} failed in ${cwd}`);
}
}
function sha256(input: string): string {
return createHash("sha256").update(input).digest("hex");
}
function normalizePath(path: string): string {
return path.split(sep).join("/");
}
function pathExists(basePath: string, relPath: string): boolean {
return existsSync(join(basePath, relPath));
}
function signal<T extends { kind: string; path?: string; sourcePath?: string }>(
items: T[],
item: T,
): void {
items.push(item);
}
function languageForPath(path: string): string | null {
const ext = extname(path).toLowerCase();
const file = basename(path).toLowerCase();
if (file === "go.mod" || ext === ".go") return "go";
if (file === "package.json" || ext === ".ts" || ext === ".tsx")
return "typescript";
if (ext === ".js" || ext === ".jsx" || ext === ".mjs" || ext === ".cjs")
return "javascript";
if (file === "cargo.toml" || ext === ".rs") return "rust";
if (file === "pyproject.toml" || ext === ".py") return "python";
if (file === "flake.nix" || file === "shell.nix" || ext === ".nix")
return "nix";
if (ext === ".md" || ext === ".markdown") return "markdown";
if (ext === ".sql") return "sql";
if (file.includes("dockerfile")) return "dockerfile";
if (ext === ".yml" || ext === ".yaml") return "yaml";
if (ext === ".json") return "json";
return null;
}
function summarizeFile(
basePath: string,
relPath: string,
): {
sizeBytes: number;
contentHash: string | null;
summary: string | null;
} {
const fullPath = join(basePath, relPath);
try {
const stat = statSync(fullPath);
if (!stat.isFile()) {
return { sizeBytes: stat.size, contentHash: null, summary: null };
}
if (stat.size > HASH_READ_LIMIT_BYTES) {
return { sizeBytes: stat.size, contentHash: null, summary: null };
}
const content = readFileSync(fullPath);
const contentHash = `sha256:${createHash("sha256").update(content).digest("hex")}`;
const asText = content.toString("utf8");
const firstLine = asText
.split(/\r?\n/)
.map((line) => line.trim())
.find((line) => line.length > 0);
return {
sizeBytes: stat.size,
contentHash,
summary: firstLine ? firstLine.slice(0, 160) : null,
};
} catch {
return { sizeBytes: 0, contentHash: null, summary: null };
}
}
function parsePorcelainStatus(
line: string,
basePath: string,
now: string,
): RepoFileObservation | null {
if (line.length < 4) return null;
const code = line.slice(0, 2);
const rawPath = line.slice(3);
const normalizedRawPath = normalizePath(rawPath);
const path = normalizedRawPath.includes(" -> ")
? normalizedRawPath.split(" -> ").at(-1)!
: normalizedRawPath;
let gitStatus: RepoGitStatus = "modified";
if (code === "??") gitStatus = "untracked";
else if (code.includes("D")) gitStatus = "deleted";
else if (code.includes("R")) gitStatus = "renamed";
const ownership: RepoFileOwnership =
gitStatus === "untracked" ? "observed_only" : "user_owned";
const fileSummary =
gitStatus === "deleted"
? { sizeBytes: 0, contentHash: null, summary: null }
: summarizeFile(basePath, path);
return {
path,
gitStatus,
ownership,
language: languageForPath(path),
sizeBytes: fileSummary.sizeBytes,
contentHash: fileSummary.contentHash,
summary: fileSummary.summary,
firstSeenAt: now,
lastSeenAt: now,
adoptedAt: null,
adoptionUnitId: null,
};
}
function detectStacks(basePath: string): StackSignal[] {
const stacks: StackSignal[] = [];
if (pathExists(basePath, "package.json"))
signal(stacks, { kind: "node", sourcePath: "package.json", confidence: 1 });
if (pathExists(basePath, "go.mod"))
signal(stacks, { kind: "go", sourcePath: "go.mod", confidence: 1 });
if (pathExists(basePath, "Cargo.toml"))
signal(stacks, { kind: "rust", sourcePath: "Cargo.toml", confidence: 1 });
if (pathExists(basePath, "pyproject.toml"))
signal(stacks, {
kind: "python",
sourcePath: "pyproject.toml",
confidence: 1,
});
if (pathExists(basePath, "flake.nix"))
signal(stacks, { kind: "nix", sourcePath: "flake.nix", confidence: 1 });
if (pathExists(basePath, "shell.nix"))
signal(stacks, { kind: "nix", sourcePath: "shell.nix", confidence: 0.9 });
return stacks;
}
function detectEntrypoints(basePath: string): EntrypointSignal[] {
const entrypoints: EntrypointSignal[] = [];
for (const path of [
"src/index.ts",
"src/main.ts",
"src/cli.ts",
"cmd",
"main.go",
"portal/main.go",
"dr-agent/main.go",
]) {
if (pathExists(basePath, path))
signal(entrypoints, { kind: "entrypoint", path, confidence: 0.8 });
}
return entrypoints;
}
function detectTests(basePath: string): TestSignal[] {
const tests: TestSignal[] = [];
for (const path of ["tests", "test", "__tests__", "src/tests"]) {
if (pathExists(basePath, path))
signal(tests, { kind: "test-directory", path, confidence: 0.8 });
}
return tests;
}
function detectCi(basePath: string): CiSignal[] {
const ci: CiSignal[] = [];
for (const path of [".github/workflows", ".gitlab-ci.yml", "Jenkinsfile"]) {
if (pathExists(basePath, path))
signal(ci, { kind: "ci", path, confidence: 0.9 });
}
return ci;
}
function detectDocs(basePath: string): DocumentSignal[] {
const docs: DocumentSignal[] = [];
for (const path of [
"SPEC.md",
"ARCHITECTURE.md",
"AGENTS.md",
"README.md",
"docs",
]) {
if (pathExists(basePath, path))
signal(docs, { kind: "doc", path, confidence: 0.9 });
}
return docs;
}
function detectDataStores(basePath: string): DataStoreSignal[] {
const stores: DataStoreSignal[] = [];
for (const path of ["migrations", "prisma", "db", "sql"]) {
if (pathExists(basePath, path))
signal(stores, { kind: "database", path, confidence: 0.8 });
}
return stores;
}
function detectNetworkSurfaces(basePath: string): NetworkSurfaceSignal[] {
const surfaces: NetworkSurfaceSignal[] = [];
for (const path of ["portal", "gateway", "api", "web", "server"]) {
if (pathExists(basePath, path))
signal(surfaces, { kind: "network", path, confidence: 0.7 });
}
return surfaces;
}
function buildRiskHints(args: {
stacks: StackSignal[];
tests: TestSignal[];
ci: CiSignal[];
dataStores: DataStoreSignal[];
networkSurfaces: NetworkSurfaceSignal[];
changedFiles: RepoFileObservation[];
}): RiskHint[] {
const hints: RiskHint[] = [];
if (args.networkSurfaces.length > 0) {
hints.push({
family: "web",
reason: "network-facing directories detected",
confidence: 0.7,
});
}
if (args.dataStores.length > 0) {
hints.push({
family: "database",
reason: "database or migration directories detected",
confidence: 0.8,
});
}
if (args.stacks.some((stack) => stack.kind === "nix")) {
hints.push({
family: "infrastructure",
reason: "Nix project files detected",
confidence: 0.8,
});
}
if (args.changedFiles.some((file) => file.gitStatus === "untracked")) {
hints.push({
family: "harness-drift",
reason: "untracked files observed in working tree",
confidence: 0.6,
});
}
if (args.tests.length === 0 || args.ci.length === 0) {
hints.push({
family: "verification-gap",
reason: "tests or CI signals are missing from the repo profile",
confidence: 0.5,
});
}
return hints;
}
function canonicalRemote(remote: string): string {
return remote
.trim()
.replace(/^https?:\/\/([^/@]+@)?/i, "https://")
.replace(/\.git$/i, "")
.toLowerCase();
}
/**
* Build a read-only repository profile from git status and well-known files.
*
* Purpose: seed harness evolution with observable repo facts while preserving
* user ownership of untracked and modified files.
*
* Consumer: `/sf harness profile` and future pre-plan harness snapshots.
*/
export function profileRepository(
basePath: string,
options: RepoProfileOptions = {},
): RepoProfile {
const createdAt = options.now?.() ?? new Date().toISOString();
const projectRoot = realpathSync(basePath);
const head = git(["rev-parse", "HEAD"], projectRoot) || null;
const branch = git(["branch", "--show-current"], projectRoot) || null;
const remote = git(["remote", "get-url", "origin"], projectRoot) || "";
const remoteHash = remote
? `sha256:${sha256(canonicalRemote(remote))}`
: null;
const projectHash = remote
? sha256(canonicalRemote(remote)).slice(0, 16)
: sha256(projectRoot).slice(0, 16);
const status = git(["status", "--porcelain=v1", "-uall"], projectRoot);
const changedFiles = status
.split(/\r?\n/)
.map((line) => line.trimEnd())
.filter(Boolean)
.map((line) => parsePorcelainStatus(line, projectRoot, createdAt))
.filter((obs): obs is RepoFileObservation => obs !== null)
.sort((a, b) => a.path.localeCompare(b.path));
const stacks = detectStacks(projectRoot);
const entrypoints = detectEntrypoints(projectRoot);
const tests = detectTests(projectRoot);
const ci = detectCi(projectRoot);
const docs = detectDocs(projectRoot);
const dataStores = detectDataStores(projectRoot);
const networkSurfaces = detectNetworkSurfaces(projectRoot);
const riskHints = buildRiskHints({
stacks,
tests,
ci,
dataStores,
networkSurfaces,
changedFiles,
});
const profileId = `rp_${sha256(
`${projectHash}:${head ?? ""}:${branch ?? ""}:${createdAt}:${changedFiles
.map((file) => `${file.gitStatus}:${file.path}:${file.contentHash ?? ""}`)
.join("|")}`,
).slice(0, 24)}`;
return {
profileId,
projectHash,
projectRoot,
git: {
head,
branch,
remoteHash,
dirty: changedFiles.length > 0,
changedFiles,
},
stacks,
entrypoints,
tests,
ci,
docs,
dataStores,
networkSurfaces,
riskHints,
createdAt,
};
}

View file

@ -197,7 +197,7 @@ function openRawDb(path: string): unknown {
return new Database(path);
}
const SCHEMA_VERSION = 20;
const SCHEMA_VERSION = 21;
function indexExists(db: DbAdapter, name: string): boolean {
return !!db
@ -226,6 +226,44 @@ function ensureVerificationEvidenceDedupIndex(db: DbAdapter): void {
);
}
function ensureRepoProfileTables(db: DbAdapter): void {
db.exec(`
CREATE TABLE IF NOT EXISTS repo_profiles (
profile_id TEXT PRIMARY KEY,
project_hash TEXT NOT NULL,
project_root TEXT NOT NULL DEFAULT '',
head TEXT DEFAULT NULL,
branch TEXT DEFAULT NULL,
remote_hash TEXT DEFAULT NULL,
dirty INTEGER NOT NULL DEFAULT 0,
profile_json TEXT NOT NULL DEFAULT '{}',
created_at TEXT NOT NULL
)
`);
db.exec(`
CREATE TABLE IF NOT EXISTS repo_file_observations (
path TEXT PRIMARY KEY,
latest_profile_id TEXT NOT NULL,
git_status TEXT NOT NULL,
ownership TEXT NOT NULL,
language TEXT DEFAULT NULL,
size_bytes INTEGER NOT NULL DEFAULT 0,
content_hash TEXT DEFAULT NULL,
summary TEXT DEFAULT NULL,
first_seen_at TEXT NOT NULL,
last_seen_at TEXT NOT NULL,
adopted_at TEXT DEFAULT NULL,
adoption_unit_id TEXT DEFAULT NULL
)
`);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_repo_profiles_created ON repo_profiles(created_at DESC)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_repo_file_observations_status ON repo_file_observations(git_status, ownership)",
);
}
function initSchema(db: DbAdapter, fileBacked: boolean): void {
if (fileBacked) db.exec("PRAGMA journal_mode=WAL");
if (fileBacked) db.exec("PRAGMA busy_timeout = 5000");
@ -609,6 +647,7 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void {
db.exec(
"CREATE INDEX IF NOT EXISTS idx_llm_task_outcomes_provider ON llm_task_outcomes(provider, recorded_at DESC)",
);
ensureRepoProfileTables(db);
db.exec(
`CREATE VIEW IF NOT EXISTS active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL`,
@ -1414,6 +1453,16 @@ function migrateSchema(db: DbAdapter): void {
});
}
if (currentVersion < 21) {
ensureRepoProfileTables(db);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 21,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
@ -4019,6 +4068,236 @@ export function insertLlmTaskOutcome(input: LlmTaskOutcomeInput): boolean {
}
}
/**
* Input shape for persisting a repository profile snapshot.
*
* Purpose: keep sf-db decoupled from repo-profiler runtime imports while
* enforcing the storage contract at compile time.
*
* Consumer: recordRepoProfile callers.
*/
export interface RepoProfileStorageInput {
profileId: string;
projectHash: string;
projectRoot: string;
git: {
head: string | null;
branch: string | null;
remoteHash: string | null;
dirty: boolean;
changedFiles: Array<{
path: string;
gitStatus: string;
ownership: string;
language: string | null;
sizeBytes: number;
contentHash: string | null;
summary: string | null;
firstSeenAt: string;
lastSeenAt: string;
adoptedAt: string | null;
adoptionUnitId: string | null;
}>;
};
createdAt: string;
}
/**
* Database row returned for a stored repository profile.
*
* Purpose: expose profile metadata without forcing callers to parse JSON.
*
* Consumer: harness status and future drift diagnostics.
*/
export interface RepoProfileRow {
profileId: string;
projectHash: string;
projectRoot: string;
head: string | null;
branch: string | null;
remoteHash: string | null;
dirty: boolean;
profileJson: string;
createdAt: string;
}
/**
* Database row returned for the current path observation ledger.
*
* Purpose: let harness planning inspect file ownership and first/last-seen
* timestamps without touching raw SQL.
*
* Consumer: future harness planner and diagnostics.
*/
export interface RepoFileObservationRow {
path: string;
latestProfileId: string;
gitStatus: string;
ownership: string;
language: string | null;
sizeBytes: number;
contentHash: string | null;
summary: string | null;
firstSeenAt: string;
lastSeenAt: string;
adoptedAt: string | null;
adoptionUnitId: string | null;
}
function asStringOrNull(value: unknown): string | null {
return typeof value === "string" && value.length > 0 ? value : null;
}
/**
* Persist a repository profile snapshot and update current file observations.
*
* Purpose: make harness evolution's read-only repo facts queryable across
* sessions while preserving first-seen timestamps for untracked observations.
*
* Consumer: `/sf harness profile` and future pre-plan profile snapshots.
*/
export function recordRepoProfile(profile: RepoProfileStorageInput): void {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
transaction(() => {
currentDb!
.prepare(
`INSERT OR REPLACE INTO repo_profiles (
profile_id, project_hash, project_root, head, branch, remote_hash,
dirty, profile_json, created_at
) VALUES (
:profile_id, :project_hash, :project_root, :head, :branch, :remote_hash,
:dirty, :profile_json, :created_at
)`,
)
.run({
":profile_id": profile.profileId,
":project_hash": profile.projectHash,
":project_root": profile.projectRoot,
":head": profile.git.head,
":branch": profile.git.branch,
":remote_hash": profile.git.remoteHash,
":dirty": profile.git.dirty ? 1 : 0,
":profile_json": JSON.stringify(profile),
":created_at": profile.createdAt,
});
const stmt = currentDb!.prepare(
`INSERT INTO repo_file_observations (
path, latest_profile_id, git_status, ownership, language, size_bytes,
content_hash, summary, first_seen_at, last_seen_at, adopted_at,
adoption_unit_id
) VALUES (
:path, :latest_profile_id, :git_status, :ownership, :language, :size_bytes,
:content_hash, :summary, :first_seen_at, :last_seen_at, :adopted_at,
:adoption_unit_id
)
ON CONFLICT(path) DO UPDATE SET
latest_profile_id = excluded.latest_profile_id,
git_status = excluded.git_status,
ownership = CASE
WHEN repo_file_observations.ownership = 'sf_generated'
THEN repo_file_observations.ownership
WHEN repo_file_observations.ownership = 'candidate_harness'
THEN repo_file_observations.ownership
ELSE excluded.ownership
END,
language = excluded.language,
size_bytes = excluded.size_bytes,
content_hash = excluded.content_hash,
summary = excluded.summary,
first_seen_at = repo_file_observations.first_seen_at,
last_seen_at = excluded.last_seen_at,
adopted_at = COALESCE(repo_file_observations.adopted_at, excluded.adopted_at),
adoption_unit_id = COALESCE(repo_file_observations.adoption_unit_id, excluded.adoption_unit_id)`,
);
for (const file of profile.git.changedFiles) {
stmt.run({
":path": file.path,
":latest_profile_id": profile.profileId,
":git_status": file.gitStatus,
":ownership": file.ownership,
":language": file.language,
":size_bytes": file.sizeBytes,
":content_hash": file.contentHash,
":summary": file.summary,
":first_seen_at": file.firstSeenAt,
":last_seen_at": file.lastSeenAt,
":adopted_at": file.adoptedAt,
":adoption_unit_id": file.adoptionUnitId,
});
}
});
}
/**
* Return the most recently recorded repository profile.
*
* Purpose: let harness planning and diagnostics inspect the latest factual
* repo snapshot without re-running the profiler.
*
* Consumer: harness status commands and future plan-phase coverage checks.
*/
export function getLatestRepoProfile(): RepoProfileRow | null {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
const row = currentDb
.prepare(
`SELECT profile_id, project_hash, project_root, head, branch, remote_hash,
dirty, profile_json, created_at
FROM repo_profiles
ORDER BY created_at DESC, profile_id DESC
LIMIT 1`,
)
.get();
if (!row) return null;
return {
profileId: row["profile_id"] as string,
projectHash: row["project_hash"] as string,
projectRoot: row["project_root"] as string,
head: asStringOrNull(row["head"]),
branch: asStringOrNull(row["branch"]),
remoteHash: asStringOrNull(row["remote_hash"]),
dirty: row["dirty"] === 1,
profileJson: (row["profile_json"] as string) ?? "{}",
createdAt: row["created_at"] as string,
};
}
/**
* Return the current file observations accumulated by repo profiling.
*
* Purpose: keep untracked and modified file awareness queryable without
* treating those paths as SF-owned artifacts.
*
* Consumer: harness planning, diagnostics, and future drift detection.
*/
export function getRepoFileObservations(): RepoFileObservationRow[] {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
return currentDb
.prepare(
`SELECT path, latest_profile_id, git_status, ownership, language,
size_bytes, content_hash, summary, first_seen_at, last_seen_at,
adopted_at, adoption_unit_id
FROM repo_file_observations
ORDER BY path ASC`,
)
.all()
.map((row) => ({
path: row["path"] as string,
latestProfileId: row["latest_profile_id"] as string,
gitStatus: row["git_status"] as string,
ownership: row["ownership"] as string,
language: asStringOrNull(row["language"]),
sizeBytes: (row["size_bytes"] as number) ?? 0,
contentHash: asStringOrNull(row["content_hash"]),
summary: asStringOrNull(row["summary"]),
firstSeenAt: row["first_seen_at"] as string,
lastSeenAt: row["last_seen_at"] as string,
adoptedAt: asStringOrNull(row["adopted_at"]),
adoptionUnitId: asStringOrNull(row["adoption_unit_id"]),
}));
}
/**
* INSERT OR REPLACE a quality_gates row. Used by milestone-validation-gates.ts
* to persist milestone-level (MV*) gate outcomes after validate-milestone runs.

View file

@ -0,0 +1,125 @@
import assert from "node:assert/strict";
import { execFileSync } from "node:child_process";
import {
mkdirSync,
mkdtempSync,
rmSync,
statSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import test from "node:test";
import { profileRepository } from "../repo-profiler.ts";
import {
closeDatabase,
getLatestRepoProfile,
getRepoFileObservations,
openDatabase,
recordRepoProfile,
} from "../sf-db.ts";
function run(args: string[], cwd: string): string {
return execFileSync("git", args, {
cwd,
stdio: ["ignore", "pipe", "pipe"],
encoding: "utf-8",
}).trim();
}
function makeRepo(): string {
const dir = mkdtempSync(join(tmpdir(), "sf-repo-profiler-"));
run(["init", "-b", "main"], dir);
run(["config", "user.email", "test@example.com"], dir);
run(["config", "user.name", "SF Test"], dir);
writeFileSync(join(dir, "README.md"), "# Repo\n");
writeFileSync(
join(dir, "package.json"),
'{"scripts":{"test":"node --test"}}\n',
);
run(["add", "README.md", "package.json"], dir);
run(["commit", "-m", "init"], dir);
return dir;
}
test("profileRepository_when_untracked_file_exists_marks_observed_only", () => {
const repo = makeRepo();
try {
writeFileSync(join(repo, "README.md"), "# Repo\n\nchanged\n");
mkdirSync(join(repo, "docs"));
writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\nlocal note\n");
const beforeStatus = run(["status", "--short"], repo);
const profile = profileRepository(repo, {
now: () => "2026-04-29T10:00:00.000Z",
});
const afterStatus = run(["status", "--short"], repo);
assert.equal(afterStatus, beforeStatus, "profiling must be read-only");
assert.equal(profile.git.branch, "main");
assert.equal(profile.git.dirty, true);
assert.ok(profile.git.head, "profile records HEAD when available");
assert.ok(
profile.stacks.some(
(signal) =>
signal.kind === "node" && signal.sourcePath === "package.json",
),
"package.json produces a node stack signal",
);
const modified = profile.git.changedFiles.find(
(file) => file.path === "README.md",
);
assert.equal(modified?.gitStatus, "modified");
assert.equal(modified?.ownership, "user_owned");
const untracked = profile.git.changedFiles.find(
(file) => file.path === "docs/scratch.md",
);
assert.equal(untracked?.gitStatus, "untracked");
assert.equal(untracked?.ownership, "observed_only");
assert.equal(
untracked?.sizeBytes,
statSync(join(repo, "docs/scratch.md")).size,
);
assert.match(untracked?.contentHash ?? "", /^sha256:[a-f0-9]{64}$/);
} finally {
rmSync(repo, { recursive: true, force: true });
}
});
test("recordRepoProfile_when_file_observed_again_preserves_first_seen", () => {
const repo = makeRepo();
try {
mkdirSync(join(repo, "docs"));
writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\n");
closeDatabase();
assert.equal(openDatabase(":memory:"), true);
const first = profileRepository(repo, {
now: () => "2026-04-29T10:00:00.000Z",
});
recordRepoProfile(first);
writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\nupdated\n");
const second = profileRepository(repo, {
now: () => "2026-04-29T10:05:00.000Z",
});
recordRepoProfile(second);
const latest = getLatestRepoProfile();
assert.equal(latest?.profileId, second.profileId);
const observations = getRepoFileObservations();
const scratch = observations.find((obs) => obs.path === "docs/scratch.md");
assert.equal(scratch?.ownership, "observed_only");
assert.equal(scratch?.gitStatus, "untracked");
assert.equal(scratch?.firstSeenAt, "2026-04-29T10:00:00.000Z");
assert.equal(scratch?.lastSeenAt, "2026-04-29T10:05:00.000Z");
assert.equal(scratch?.latestProfileId, second.profileId);
} finally {
closeDatabase();
rmSync(repo, { recursive: true, force: true });
}
});