From d02d33aa70e94a0669c85beb681d391bbc681fc3 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Wed, 29 Apr 2026 17:39:52 +0200 Subject: [PATCH] feat: add repo harness profiler --- .../extensions/sf/commands-harness.ts | 79 +++ .../extensions/sf/commands/catalog.ts | 13 +- .../extensions/sf/commands/handlers/ops.ts | 5 + src/resources/extensions/sf/repo-profiler.ts | 560 ++++++++++++++++++ src/resources/extensions/sf/sf-db.ts | 281 ++++++++- .../extensions/sf/tests/repo-profiler.test.ts | 125 ++++ 6 files changed, 1061 insertions(+), 2 deletions(-) create mode 100644 src/resources/extensions/sf/commands-harness.ts create mode 100644 src/resources/extensions/sf/repo-profiler.ts create mode 100644 src/resources/extensions/sf/tests/repo-profiler.test.ts diff --git a/src/resources/extensions/sf/commands-harness.ts b/src/resources/extensions/sf/commands-harness.ts new file mode 100644 index 000000000..92f2b6d94 --- /dev/null +++ b/src/resources/extensions/sf/commands-harness.ts @@ -0,0 +1,79 @@ +/** + * commands-harness.ts - repo-native harness evolution commands. + * + * Purpose: expose the read-only profiler so operators can seed harness + * evolution state without changing prompts or claiming untracked files. + */ + +import { join } from "node:path"; +import type { ExtensionCommandContext } from "@singularity-forge/pi-coding-agent"; +import { ensureDbOpen } from "./bootstrap/dynamic-tools.js"; +import { projectRoot } from "./commands/context.js"; +import { profileRepository } from "./repo-profiler.js"; +import { recordRepoProfile } from "./sf-db.js"; + +function formatProfileSummary( + profile: ReturnType, +): string { + const untracked = profile.git.changedFiles.filter( + (file) => file.gitStatus === "untracked", + ).length; + const modified = profile.git.changedFiles.filter( + (file) => file.gitStatus === "modified", + ).length; + const stacks = + profile.stacks + .map((stack) => stack.kind) + .filter(Boolean) + .join(", ") || "none detected"; + const risks = + profile.riskHints + .map((hint) => hint.family) + .filter(Boolean) + .join(", ") || "none detected"; + + return [ + "Repo harness profile recorded", + `Profile: ${profile.profileId}`, + `State: ${join(profile.projectRoot, ".sf", "sf.db")}`, + `Branch: ${profile.git.branch ?? "unknown"}`, + `Changed files: ${profile.git.changedFiles.length} (${modified} modified, ${untracked} untracked)`, + `Stacks: ${stacks}`, + `Risk hints: ${risks}`, + "", + "Untracked files were recorded as observations only; SF did not stage or adopt them.", + ].join("\n"); +} + +/** + * Run repo harness profiling and persist the resulting snapshot. + * + * Purpose: give users and future auto-flow slices an explicit entry point for + * harness evolution's read-only observation phase. + * + * Consumer: `/sf harness profile` command. + */ +export async function handleHarness( + args: string, + ctx: ExtensionCommandContext, +): Promise { + const subcommand = args.trim() || "profile"; + if (!["profile", "snapshot", "status"].includes(subcommand)) { + ctx.ui.notify( + "Usage: /sf harness profile\nRecords a read-only repo profile for harness evolution.", + "warning", + ); + return; + } + + const basePath = projectRoot(); + const opened = await ensureDbOpen(basePath); + if (!opened) { + ctx.ui.notify("No SF database available. Run /sf init first.", "warning"); + return; + } + + const profile = profileRepository(basePath); + recordRepoProfile(profile); + ctx.ui.notify(formatProfileSummary(profile), "info"); +} diff --git a/src/resources/extensions/sf/commands/catalog.ts b/src/resources/extensions/sf/commands/catalog.ts index e27f3dfa6..7543a6523 100644 --- a/src/resources/extensions/sf/commands/catalog.ts +++ b/src/resources/extensions/sf/commands/catalog.ts @@ -15,7 +15,7 @@ export interface GsdCommandDefinition { type CompletionMap = Record; export const SF_COMMAND_DESCRIPTION = - "SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan"; + "SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan"; export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ { cmd: "help", desc: "Categorized command reference with descriptions" }, @@ -93,6 +93,10 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ cmd: "knowledge", desc: "Add persistent project knowledge (rule, pattern, or lesson)", }, + { + cmd: "harness", + desc: "Repo-native harness evolution (profile, status)", + }, { cmd: "new-milestone", desc: "Create a milestone from a specification document (headless)", @@ -245,6 +249,13 @@ const NESTED_COMPLETIONS: CompletionMap = { { cmd: "pattern", desc: "Add a code pattern to follow" }, { cmd: "lesson", desc: "Record a lesson learned" }, ], + harness: [ + { + cmd: "profile", + desc: "Record a read-only repo profile for harness evolution", + }, + { cmd: "status", desc: "Alias for profile in the first implementation" }, + ], start: [ { cmd: "bugfix", desc: "Triage, fix, test, and ship a bug fix" }, { diff --git a/src/resources/extensions/sf/commands/handlers/ops.ts b/src/resources/extensions/sf/commands/handlers/ops.ts index 0b434443a..23cce6bad 100644 --- a/src/resources/extensions/sf/commands/handlers/ops.ts +++ b/src/resources/extensions/sf/commands/handlers/ops.ts @@ -239,6 +239,11 @@ Examples: ); return true; } + if (trimmed === "harness" || trimmed.startsWith("harness ")) { + const { handleHarness } = await import("../../commands-harness.js"); + await handleHarness(trimmed.replace(/^harness\s*/, "").trim(), ctx); + return true; + } if (trimmed === "migrate" || trimmed.startsWith("migrate ")) { const { handleMigrate } = await import("../../migrate/command.js"); await handleMigrate(trimmed.replace(/^migrate\s*/, "").trim(), ctx, pi); diff --git a/src/resources/extensions/sf/repo-profiler.ts b/src/resources/extensions/sf/repo-profiler.ts new file mode 100644 index 000000000..b4c127810 --- /dev/null +++ b/src/resources/extensions/sf/repo-profiler.ts @@ -0,0 +1,560 @@ +/** + * repo-profiler.ts - read-only repository shape profiler. + * + * Purpose: give harness evolution a factual repo snapshot without staging, + * deleting, or claiming ownership of user files. + */ + +import { execFileSync } from "node:child_process"; +import { createHash } from "node:crypto"; +import { existsSync, readFileSync, realpathSync, statSync } from "node:fs"; +import { basename, extname, join, sep } from "node:path"; +import { GIT_NO_PROMPT_ENV } from "./git-constants.js"; + +/** + * Classifies a path's git state in the repo profile. + * + * Purpose: let harness planning distinguish user changes from untracked + * observations without inferring ownership from raw porcelain codes. + * + * Consumer: RepoFileObservation rows stored by sf-db and `/sf harness profile`. + */ +export type RepoGitStatus = + | "tracked" + | "modified" + | "deleted" + | "renamed" + | "untracked" + | "ignored"; + +/** + * Describes who may mutate an observed path by default. + * + * Purpose: prevent SF from staging or rewriting files that were only observed + * during profiling. + * + * Consumer: RepoFileObservation rows and future harness adoption checks. + */ +export type RepoFileOwnership = + | "sf_generated" + | "user_owned" + | "observed_only" + | "candidate_harness"; + +/** + * Describes one changed or untracked file seen during repository profiling. + * + * Purpose: preserve path-level repo reality for harness planning while keeping + * untracked files explicitly read-only. + * + * Consumer: RepoProfile.git.changedFiles and repo_file_observations. + */ +export interface RepoFileObservation { + path: string; + gitStatus: RepoGitStatus; + ownership: RepoFileOwnership; + language: string | null; + sizeBytes: number; + contentHash: string | null; + summary: string | null; + firstSeenAt: string; + lastSeenAt: string; + adoptedAt: string | null; + adoptionUnitId: string | null; +} + +/** + * Records a detected language or platform stack. + * + * Purpose: drive harness template selection from concrete repo files. + * + * Consumer: risk classification and future template-kit matching. + */ +export interface StackSignal { + kind: string; + sourcePath: string; + confidence: number; +} + +/** + * Records a likely application or service entry point. + * + * Purpose: guide harness generation toward runnable surfaces. + * + * Consumer: future harness planner and smoke-test proposal generation. + */ +export interface EntrypointSignal { + kind: string; + path: string; + confidence: number; +} + +/** + * Records detected test infrastructure. + * + * Purpose: let harness planning distinguish missing coverage from existing + * repo-native checks. + * + * Consumer: risk hints and future harness inventory comparison. + */ +export interface TestSignal { + kind: string; + path: string; + confidence: number; +} + +/** + * Records detected CI configuration. + * + * Purpose: determine whether generated harness gates can be wired into an + * existing automation surface. + * + * Consumer: risk hints and future CI snippet proposals. + */ +export interface CiSignal { + kind: string; + path: string; + confidence: number; +} + +/** + * Records detected project documentation. + * + * Purpose: seed harness planning from existing specs and architecture docs. + * + * Consumer: future prompt/context selection for harness proposals. + */ +export interface DocumentSignal { + kind: string; + path: string; + confidence: number; +} + +/** + * Records detected database or migration surfaces. + * + * Purpose: trigger database-specific harness requirements. + * + * Consumer: risk hints and future migration-gate proposals. + */ +export interface DataStoreSignal { + kind: string; + path: string; + confidence: number; +} + +/** + * Records detected network-facing application surfaces. + * + * Purpose: trigger web/API/gateway smoke-test harness requirements. + * + * Consumer: risk hints and future smoke-test proposal generation. + */ +export interface NetworkSurfaceSignal { + kind: string; + path: string; + confidence: number; +} + +/** + * Records one risk family inferred from the repo profile. + * + * Purpose: make harness gaps explainable before SF proposes generated files. + * + * Consumer: `/sf harness profile` summaries and future harness planner input. + */ +export interface RiskHint { + family: string; + reason: string; + confidence: number; +} + +/** + * Captures a read-only snapshot of repository shape. + * + * Purpose: provide the factual base for repo-native harness evolution without + * mutating the working tree. + * + * Consumer: sf-db profile persistence and `/sf harness profile`. + */ +export interface RepoProfile { + profileId: string; + projectHash: string; + projectRoot: string; + git: { + head: string | null; + branch: string | null; + remoteHash: string | null; + dirty: boolean; + changedFiles: RepoFileObservation[]; + }; + stacks: StackSignal[]; + entrypoints: EntrypointSignal[]; + tests: TestSignal[]; + ci: CiSignal[]; + docs: DocumentSignal[]; + dataStores: DataStoreSignal[]; + networkSurfaces: NetworkSurfaceSignal[]; + riskHints: RiskHint[]; + createdAt: string; +} + +/** + * Configures deterministic profile generation. + * + * Purpose: allow tests and future scheduled snapshots to pin timestamps. + * + * Consumer: profileRepository callers. + */ +export interface RepoProfileOptions { + now?: () => string; +} + +const HASH_READ_LIMIT_BYTES = 1024 * 1024; + +function git(args: string[], cwd: string, allowFailure = true): string { + try { + return execFileSync("git", args, { + cwd, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + env: GIT_NO_PROMPT_ENV, + }).trimEnd(); + } catch { + if (allowFailure) return ""; + throw new Error(`git ${args.join(" ")} failed in ${cwd}`); + } +} + +function sha256(input: string): string { + return createHash("sha256").update(input).digest("hex"); +} + +function normalizePath(path: string): string { + return path.split(sep).join("/"); +} + +function pathExists(basePath: string, relPath: string): boolean { + return existsSync(join(basePath, relPath)); +} + +function signal( + items: T[], + item: T, +): void { + items.push(item); +} + +function languageForPath(path: string): string | null { + const ext = extname(path).toLowerCase(); + const file = basename(path).toLowerCase(); + if (file === "go.mod" || ext === ".go") return "go"; + if (file === "package.json" || ext === ".ts" || ext === ".tsx") + return "typescript"; + if (ext === ".js" || ext === ".jsx" || ext === ".mjs" || ext === ".cjs") + return "javascript"; + if (file === "cargo.toml" || ext === ".rs") return "rust"; + if (file === "pyproject.toml" || ext === ".py") return "python"; + if (file === "flake.nix" || file === "shell.nix" || ext === ".nix") + return "nix"; + if (ext === ".md" || ext === ".markdown") return "markdown"; + if (ext === ".sql") return "sql"; + if (file.includes("dockerfile")) return "dockerfile"; + if (ext === ".yml" || ext === ".yaml") return "yaml"; + if (ext === ".json") return "json"; + return null; +} + +function summarizeFile( + basePath: string, + relPath: string, +): { + sizeBytes: number; + contentHash: string | null; + summary: string | null; +} { + const fullPath = join(basePath, relPath); + try { + const stat = statSync(fullPath); + if (!stat.isFile()) { + return { sizeBytes: stat.size, contentHash: null, summary: null }; + } + if (stat.size > HASH_READ_LIMIT_BYTES) { + return { sizeBytes: stat.size, contentHash: null, summary: null }; + } + const content = readFileSync(fullPath); + const contentHash = `sha256:${createHash("sha256").update(content).digest("hex")}`; + const asText = content.toString("utf8"); + const firstLine = asText + .split(/\r?\n/) + .map((line) => line.trim()) + .find((line) => line.length > 0); + return { + sizeBytes: stat.size, + contentHash, + summary: firstLine ? firstLine.slice(0, 160) : null, + }; + } catch { + return { sizeBytes: 0, contentHash: null, summary: null }; + } +} + +function parsePorcelainStatus( + line: string, + basePath: string, + now: string, +): RepoFileObservation | null { + if (line.length < 4) return null; + const code = line.slice(0, 2); + const rawPath = line.slice(3); + const normalizedRawPath = normalizePath(rawPath); + const path = normalizedRawPath.includes(" -> ") + ? normalizedRawPath.split(" -> ").at(-1)! + : normalizedRawPath; + + let gitStatus: RepoGitStatus = "modified"; + if (code === "??") gitStatus = "untracked"; + else if (code.includes("D")) gitStatus = "deleted"; + else if (code.includes("R")) gitStatus = "renamed"; + + const ownership: RepoFileOwnership = + gitStatus === "untracked" ? "observed_only" : "user_owned"; + const fileSummary = + gitStatus === "deleted" + ? { sizeBytes: 0, contentHash: null, summary: null } + : summarizeFile(basePath, path); + + return { + path, + gitStatus, + ownership, + language: languageForPath(path), + sizeBytes: fileSummary.sizeBytes, + contentHash: fileSummary.contentHash, + summary: fileSummary.summary, + firstSeenAt: now, + lastSeenAt: now, + adoptedAt: null, + adoptionUnitId: null, + }; +} + +function detectStacks(basePath: string): StackSignal[] { + const stacks: StackSignal[] = []; + if (pathExists(basePath, "package.json")) + signal(stacks, { kind: "node", sourcePath: "package.json", confidence: 1 }); + if (pathExists(basePath, "go.mod")) + signal(stacks, { kind: "go", sourcePath: "go.mod", confidence: 1 }); + if (pathExists(basePath, "Cargo.toml")) + signal(stacks, { kind: "rust", sourcePath: "Cargo.toml", confidence: 1 }); + if (pathExists(basePath, "pyproject.toml")) + signal(stacks, { + kind: "python", + sourcePath: "pyproject.toml", + confidence: 1, + }); + if (pathExists(basePath, "flake.nix")) + signal(stacks, { kind: "nix", sourcePath: "flake.nix", confidence: 1 }); + if (pathExists(basePath, "shell.nix")) + signal(stacks, { kind: "nix", sourcePath: "shell.nix", confidence: 0.9 }); + return stacks; +} + +function detectEntrypoints(basePath: string): EntrypointSignal[] { + const entrypoints: EntrypointSignal[] = []; + for (const path of [ + "src/index.ts", + "src/main.ts", + "src/cli.ts", + "cmd", + "main.go", + "portal/main.go", + "dr-agent/main.go", + ]) { + if (pathExists(basePath, path)) + signal(entrypoints, { kind: "entrypoint", path, confidence: 0.8 }); + } + return entrypoints; +} + +function detectTests(basePath: string): TestSignal[] { + const tests: TestSignal[] = []; + for (const path of ["tests", "test", "__tests__", "src/tests"]) { + if (pathExists(basePath, path)) + signal(tests, { kind: "test-directory", path, confidence: 0.8 }); + } + return tests; +} + +function detectCi(basePath: string): CiSignal[] { + const ci: CiSignal[] = []; + for (const path of [".github/workflows", ".gitlab-ci.yml", "Jenkinsfile"]) { + if (pathExists(basePath, path)) + signal(ci, { kind: "ci", path, confidence: 0.9 }); + } + return ci; +} + +function detectDocs(basePath: string): DocumentSignal[] { + const docs: DocumentSignal[] = []; + for (const path of [ + "SPEC.md", + "ARCHITECTURE.md", + "AGENTS.md", + "README.md", + "docs", + ]) { + if (pathExists(basePath, path)) + signal(docs, { kind: "doc", path, confidence: 0.9 }); + } + return docs; +} + +function detectDataStores(basePath: string): DataStoreSignal[] { + const stores: DataStoreSignal[] = []; + for (const path of ["migrations", "prisma", "db", "sql"]) { + if (pathExists(basePath, path)) + signal(stores, { kind: "database", path, confidence: 0.8 }); + } + return stores; +} + +function detectNetworkSurfaces(basePath: string): NetworkSurfaceSignal[] { + const surfaces: NetworkSurfaceSignal[] = []; + for (const path of ["portal", "gateway", "api", "web", "server"]) { + if (pathExists(basePath, path)) + signal(surfaces, { kind: "network", path, confidence: 0.7 }); + } + return surfaces; +} + +function buildRiskHints(args: { + stacks: StackSignal[]; + tests: TestSignal[]; + ci: CiSignal[]; + dataStores: DataStoreSignal[]; + networkSurfaces: NetworkSurfaceSignal[]; + changedFiles: RepoFileObservation[]; +}): RiskHint[] { + const hints: RiskHint[] = []; + if (args.networkSurfaces.length > 0) { + hints.push({ + family: "web", + reason: "network-facing directories detected", + confidence: 0.7, + }); + } + if (args.dataStores.length > 0) { + hints.push({ + family: "database", + reason: "database or migration directories detected", + confidence: 0.8, + }); + } + if (args.stacks.some((stack) => stack.kind === "nix")) { + hints.push({ + family: "infrastructure", + reason: "Nix project files detected", + confidence: 0.8, + }); + } + if (args.changedFiles.some((file) => file.gitStatus === "untracked")) { + hints.push({ + family: "harness-drift", + reason: "untracked files observed in working tree", + confidence: 0.6, + }); + } + if (args.tests.length === 0 || args.ci.length === 0) { + hints.push({ + family: "verification-gap", + reason: "tests or CI signals are missing from the repo profile", + confidence: 0.5, + }); + } + return hints; +} + +function canonicalRemote(remote: string): string { + return remote + .trim() + .replace(/^https?:\/\/([^/@]+@)?/i, "https://") + .replace(/\.git$/i, "") + .toLowerCase(); +} + +/** + * Build a read-only repository profile from git status and well-known files. + * + * Purpose: seed harness evolution with observable repo facts while preserving + * user ownership of untracked and modified files. + * + * Consumer: `/sf harness profile` and future pre-plan harness snapshots. + */ +export function profileRepository( + basePath: string, + options: RepoProfileOptions = {}, +): RepoProfile { + const createdAt = options.now?.() ?? new Date().toISOString(); + const projectRoot = realpathSync(basePath); + const head = git(["rev-parse", "HEAD"], projectRoot) || null; + const branch = git(["branch", "--show-current"], projectRoot) || null; + const remote = git(["remote", "get-url", "origin"], projectRoot) || ""; + const remoteHash = remote + ? `sha256:${sha256(canonicalRemote(remote))}` + : null; + const projectHash = remote + ? sha256(canonicalRemote(remote)).slice(0, 16) + : sha256(projectRoot).slice(0, 16); + const status = git(["status", "--porcelain=v1", "-uall"], projectRoot); + const changedFiles = status + .split(/\r?\n/) + .map((line) => line.trimEnd()) + .filter(Boolean) + .map((line) => parsePorcelainStatus(line, projectRoot, createdAt)) + .filter((obs): obs is RepoFileObservation => obs !== null) + .sort((a, b) => a.path.localeCompare(b.path)); + + const stacks = detectStacks(projectRoot); + const entrypoints = detectEntrypoints(projectRoot); + const tests = detectTests(projectRoot); + const ci = detectCi(projectRoot); + const docs = detectDocs(projectRoot); + const dataStores = detectDataStores(projectRoot); + const networkSurfaces = detectNetworkSurfaces(projectRoot); + const riskHints = buildRiskHints({ + stacks, + tests, + ci, + dataStores, + networkSurfaces, + changedFiles, + }); + const profileId = `rp_${sha256( + `${projectHash}:${head ?? ""}:${branch ?? ""}:${createdAt}:${changedFiles + .map((file) => `${file.gitStatus}:${file.path}:${file.contentHash ?? ""}`) + .join("|")}`, + ).slice(0, 24)}`; + + return { + profileId, + projectHash, + projectRoot, + git: { + head, + branch, + remoteHash, + dirty: changedFiles.length > 0, + changedFiles, + }, + stacks, + entrypoints, + tests, + ci, + docs, + dataStores, + networkSurfaces, + riskHints, + createdAt, + }; +} diff --git a/src/resources/extensions/sf/sf-db.ts b/src/resources/extensions/sf/sf-db.ts index 2de7d720f..23eb8a424 100644 --- a/src/resources/extensions/sf/sf-db.ts +++ b/src/resources/extensions/sf/sf-db.ts @@ -197,7 +197,7 @@ function openRawDb(path: string): unknown { return new Database(path); } -const SCHEMA_VERSION = 20; +const SCHEMA_VERSION = 21; function indexExists(db: DbAdapter, name: string): boolean { return !!db @@ -226,6 +226,44 @@ function ensureVerificationEvidenceDedupIndex(db: DbAdapter): void { ); } +function ensureRepoProfileTables(db: DbAdapter): void { + db.exec(` + CREATE TABLE IF NOT EXISTS repo_profiles ( + profile_id TEXT PRIMARY KEY, + project_hash TEXT NOT NULL, + project_root TEXT NOT NULL DEFAULT '', + head TEXT DEFAULT NULL, + branch TEXT DEFAULT NULL, + remote_hash TEXT DEFAULT NULL, + dirty INTEGER NOT NULL DEFAULT 0, + profile_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL + ) + `); + db.exec(` + CREATE TABLE IF NOT EXISTS repo_file_observations ( + path TEXT PRIMARY KEY, + latest_profile_id TEXT NOT NULL, + git_status TEXT NOT NULL, + ownership TEXT NOT NULL, + language TEXT DEFAULT NULL, + size_bytes INTEGER NOT NULL DEFAULT 0, + content_hash TEXT DEFAULT NULL, + summary TEXT DEFAULT NULL, + first_seen_at TEXT NOT NULL, + last_seen_at TEXT NOT NULL, + adopted_at TEXT DEFAULT NULL, + adoption_unit_id TEXT DEFAULT NULL + ) + `); + db.exec( + "CREATE INDEX IF NOT EXISTS idx_repo_profiles_created ON repo_profiles(created_at DESC)", + ); + db.exec( + "CREATE INDEX IF NOT EXISTS idx_repo_file_observations_status ON repo_file_observations(git_status, ownership)", + ); +} + function initSchema(db: DbAdapter, fileBacked: boolean): void { if (fileBacked) db.exec("PRAGMA journal_mode=WAL"); if (fileBacked) db.exec("PRAGMA busy_timeout = 5000"); @@ -609,6 +647,7 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void { db.exec( "CREATE INDEX IF NOT EXISTS idx_llm_task_outcomes_provider ON llm_task_outcomes(provider, recorded_at DESC)", ); + ensureRepoProfileTables(db); db.exec( `CREATE VIEW IF NOT EXISTS active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL`, @@ -1414,6 +1453,16 @@ function migrateSchema(db: DbAdapter): void { }); } + if (currentVersion < 21) { + ensureRepoProfileTables(db); + db.prepare( + "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)", + ).run({ + ":version": 21, + ":applied_at": new Date().toISOString(), + }); + } + db.exec("COMMIT"); } catch (err) { db.exec("ROLLBACK"); @@ -4019,6 +4068,236 @@ export function insertLlmTaskOutcome(input: LlmTaskOutcomeInput): boolean { } } +/** + * Input shape for persisting a repository profile snapshot. + * + * Purpose: keep sf-db decoupled from repo-profiler runtime imports while + * enforcing the storage contract at compile time. + * + * Consumer: recordRepoProfile callers. + */ +export interface RepoProfileStorageInput { + profileId: string; + projectHash: string; + projectRoot: string; + git: { + head: string | null; + branch: string | null; + remoteHash: string | null; + dirty: boolean; + changedFiles: Array<{ + path: string; + gitStatus: string; + ownership: string; + language: string | null; + sizeBytes: number; + contentHash: string | null; + summary: string | null; + firstSeenAt: string; + lastSeenAt: string; + adoptedAt: string | null; + adoptionUnitId: string | null; + }>; + }; + createdAt: string; +} + +/** + * Database row returned for a stored repository profile. + * + * Purpose: expose profile metadata without forcing callers to parse JSON. + * + * Consumer: harness status and future drift diagnostics. + */ +export interface RepoProfileRow { + profileId: string; + projectHash: string; + projectRoot: string; + head: string | null; + branch: string | null; + remoteHash: string | null; + dirty: boolean; + profileJson: string; + createdAt: string; +} + +/** + * Database row returned for the current path observation ledger. + * + * Purpose: let harness planning inspect file ownership and first/last-seen + * timestamps without touching raw SQL. + * + * Consumer: future harness planner and diagnostics. + */ +export interface RepoFileObservationRow { + path: string; + latestProfileId: string; + gitStatus: string; + ownership: string; + language: string | null; + sizeBytes: number; + contentHash: string | null; + summary: string | null; + firstSeenAt: string; + lastSeenAt: string; + adoptedAt: string | null; + adoptionUnitId: string | null; +} + +function asStringOrNull(value: unknown): string | null { + return typeof value === "string" && value.length > 0 ? value : null; +} + +/** + * Persist a repository profile snapshot and update current file observations. + * + * Purpose: make harness evolution's read-only repo facts queryable across + * sessions while preserving first-seen timestamps for untracked observations. + * + * Consumer: `/sf harness profile` and future pre-plan profile snapshots. + */ +export function recordRepoProfile(profile: RepoProfileStorageInput): void { + if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); + transaction(() => { + currentDb! + .prepare( + `INSERT OR REPLACE INTO repo_profiles ( + profile_id, project_hash, project_root, head, branch, remote_hash, + dirty, profile_json, created_at + ) VALUES ( + :profile_id, :project_hash, :project_root, :head, :branch, :remote_hash, + :dirty, :profile_json, :created_at + )`, + ) + .run({ + ":profile_id": profile.profileId, + ":project_hash": profile.projectHash, + ":project_root": profile.projectRoot, + ":head": profile.git.head, + ":branch": profile.git.branch, + ":remote_hash": profile.git.remoteHash, + ":dirty": profile.git.dirty ? 1 : 0, + ":profile_json": JSON.stringify(profile), + ":created_at": profile.createdAt, + }); + + const stmt = currentDb!.prepare( + `INSERT INTO repo_file_observations ( + path, latest_profile_id, git_status, ownership, language, size_bytes, + content_hash, summary, first_seen_at, last_seen_at, adopted_at, + adoption_unit_id + ) VALUES ( + :path, :latest_profile_id, :git_status, :ownership, :language, :size_bytes, + :content_hash, :summary, :first_seen_at, :last_seen_at, :adopted_at, + :adoption_unit_id + ) + ON CONFLICT(path) DO UPDATE SET + latest_profile_id = excluded.latest_profile_id, + git_status = excluded.git_status, + ownership = CASE + WHEN repo_file_observations.ownership = 'sf_generated' + THEN repo_file_observations.ownership + WHEN repo_file_observations.ownership = 'candidate_harness' + THEN repo_file_observations.ownership + ELSE excluded.ownership + END, + language = excluded.language, + size_bytes = excluded.size_bytes, + content_hash = excluded.content_hash, + summary = excluded.summary, + first_seen_at = repo_file_observations.first_seen_at, + last_seen_at = excluded.last_seen_at, + adopted_at = COALESCE(repo_file_observations.adopted_at, excluded.adopted_at), + adoption_unit_id = COALESCE(repo_file_observations.adoption_unit_id, excluded.adoption_unit_id)`, + ); + + for (const file of profile.git.changedFiles) { + stmt.run({ + ":path": file.path, + ":latest_profile_id": profile.profileId, + ":git_status": file.gitStatus, + ":ownership": file.ownership, + ":language": file.language, + ":size_bytes": file.sizeBytes, + ":content_hash": file.contentHash, + ":summary": file.summary, + ":first_seen_at": file.firstSeenAt, + ":last_seen_at": file.lastSeenAt, + ":adopted_at": file.adoptedAt, + ":adoption_unit_id": file.adoptionUnitId, + }); + } + }); +} + +/** + * Return the most recently recorded repository profile. + * + * Purpose: let harness planning and diagnostics inspect the latest factual + * repo snapshot without re-running the profiler. + * + * Consumer: harness status commands and future plan-phase coverage checks. + */ +export function getLatestRepoProfile(): RepoProfileRow | null { + if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); + const row = currentDb + .prepare( + `SELECT profile_id, project_hash, project_root, head, branch, remote_hash, + dirty, profile_json, created_at + FROM repo_profiles + ORDER BY created_at DESC, profile_id DESC + LIMIT 1`, + ) + .get(); + if (!row) return null; + return { + profileId: row["profile_id"] as string, + projectHash: row["project_hash"] as string, + projectRoot: row["project_root"] as string, + head: asStringOrNull(row["head"]), + branch: asStringOrNull(row["branch"]), + remoteHash: asStringOrNull(row["remote_hash"]), + dirty: row["dirty"] === 1, + profileJson: (row["profile_json"] as string) ?? "{}", + createdAt: row["created_at"] as string, + }; +} + +/** + * Return the current file observations accumulated by repo profiling. + * + * Purpose: keep untracked and modified file awareness queryable without + * treating those paths as SF-owned artifacts. + * + * Consumer: harness planning, diagnostics, and future drift detection. + */ +export function getRepoFileObservations(): RepoFileObservationRow[] { + if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); + return currentDb + .prepare( + `SELECT path, latest_profile_id, git_status, ownership, language, + size_bytes, content_hash, summary, first_seen_at, last_seen_at, + adopted_at, adoption_unit_id + FROM repo_file_observations + ORDER BY path ASC`, + ) + .all() + .map((row) => ({ + path: row["path"] as string, + latestProfileId: row["latest_profile_id"] as string, + gitStatus: row["git_status"] as string, + ownership: row["ownership"] as string, + language: asStringOrNull(row["language"]), + sizeBytes: (row["size_bytes"] as number) ?? 0, + contentHash: asStringOrNull(row["content_hash"]), + summary: asStringOrNull(row["summary"]), + firstSeenAt: row["first_seen_at"] as string, + lastSeenAt: row["last_seen_at"] as string, + adoptedAt: asStringOrNull(row["adopted_at"]), + adoptionUnitId: asStringOrNull(row["adoption_unit_id"]), + })); +} + /** * INSERT OR REPLACE a quality_gates row. Used by milestone-validation-gates.ts * to persist milestone-level (MV*) gate outcomes after validate-milestone runs. diff --git a/src/resources/extensions/sf/tests/repo-profiler.test.ts b/src/resources/extensions/sf/tests/repo-profiler.test.ts new file mode 100644 index 000000000..44b65b15b --- /dev/null +++ b/src/resources/extensions/sf/tests/repo-profiler.test.ts @@ -0,0 +1,125 @@ +import assert from "node:assert/strict"; +import { execFileSync } from "node:child_process"; +import { + mkdirSync, + mkdtempSync, + rmSync, + statSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import test from "node:test"; +import { profileRepository } from "../repo-profiler.ts"; +import { + closeDatabase, + getLatestRepoProfile, + getRepoFileObservations, + openDatabase, + recordRepoProfile, +} from "../sf-db.ts"; + +function run(args: string[], cwd: string): string { + return execFileSync("git", args, { + cwd, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + }).trim(); +} + +function makeRepo(): string { + const dir = mkdtempSync(join(tmpdir(), "sf-repo-profiler-")); + run(["init", "-b", "main"], dir); + run(["config", "user.email", "test@example.com"], dir); + run(["config", "user.name", "SF Test"], dir); + writeFileSync(join(dir, "README.md"), "# Repo\n"); + writeFileSync( + join(dir, "package.json"), + '{"scripts":{"test":"node --test"}}\n', + ); + run(["add", "README.md", "package.json"], dir); + run(["commit", "-m", "init"], dir); + return dir; +} + +test("profileRepository_when_untracked_file_exists_marks_observed_only", () => { + const repo = makeRepo(); + try { + writeFileSync(join(repo, "README.md"), "# Repo\n\nchanged\n"); + mkdirSync(join(repo, "docs")); + writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\nlocal note\n"); + + const beforeStatus = run(["status", "--short"], repo); + const profile = profileRepository(repo, { + now: () => "2026-04-29T10:00:00.000Z", + }); + const afterStatus = run(["status", "--short"], repo); + + assert.equal(afterStatus, beforeStatus, "profiling must be read-only"); + assert.equal(profile.git.branch, "main"); + assert.equal(profile.git.dirty, true); + assert.ok(profile.git.head, "profile records HEAD when available"); + assert.ok( + profile.stacks.some( + (signal) => + signal.kind === "node" && signal.sourcePath === "package.json", + ), + "package.json produces a node stack signal", + ); + + const modified = profile.git.changedFiles.find( + (file) => file.path === "README.md", + ); + assert.equal(modified?.gitStatus, "modified"); + assert.equal(modified?.ownership, "user_owned"); + + const untracked = profile.git.changedFiles.find( + (file) => file.path === "docs/scratch.md", + ); + assert.equal(untracked?.gitStatus, "untracked"); + assert.equal(untracked?.ownership, "observed_only"); + assert.equal( + untracked?.sizeBytes, + statSync(join(repo, "docs/scratch.md")).size, + ); + assert.match(untracked?.contentHash ?? "", /^sha256:[a-f0-9]{64}$/); + } finally { + rmSync(repo, { recursive: true, force: true }); + } +}); + +test("recordRepoProfile_when_file_observed_again_preserves_first_seen", () => { + const repo = makeRepo(); + try { + mkdirSync(join(repo, "docs")); + writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\n"); + + closeDatabase(); + assert.equal(openDatabase(":memory:"), true); + + const first = profileRepository(repo, { + now: () => "2026-04-29T10:00:00.000Z", + }); + recordRepoProfile(first); + + writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\nupdated\n"); + const second = profileRepository(repo, { + now: () => "2026-04-29T10:05:00.000Z", + }); + recordRepoProfile(second); + + const latest = getLatestRepoProfile(); + assert.equal(latest?.profileId, second.profileId); + + const observations = getRepoFileObservations(); + const scratch = observations.find((obs) => obs.path === "docs/scratch.md"); + assert.equal(scratch?.ownership, "observed_only"); + assert.equal(scratch?.gitStatus, "untracked"); + assert.equal(scratch?.firstSeenAt, "2026-04-29T10:00:00.000Z"); + assert.equal(scratch?.lastSeenAt, "2026-04-29T10:05:00.000Z"); + assert.equal(scratch?.latestProfileId, second.profileId); + } finally { + closeDatabase(); + rmSync(repo, { recursive: true, force: true }); + } +});