feat: add repo harness profiler
This commit is contained in:
parent
a611db9032
commit
d02d33aa70
6 changed files with 1061 additions and 2 deletions
79
src/resources/extensions/sf/commands-harness.ts
Normal file
79
src/resources/extensions/sf/commands-harness.ts
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
/**
|
||||
* commands-harness.ts - repo-native harness evolution commands.
|
||||
*
|
||||
* Purpose: expose the read-only profiler so operators can seed harness
|
||||
* evolution state without changing prompts or claiming untracked files.
|
||||
*/
|
||||
|
||||
import { join } from "node:path";
|
||||
import type { ExtensionCommandContext } from "@singularity-forge/pi-coding-agent";
|
||||
import { ensureDbOpen } from "./bootstrap/dynamic-tools.js";
|
||||
import { projectRoot } from "./commands/context.js";
|
||||
import { profileRepository } from "./repo-profiler.js";
|
||||
import { recordRepoProfile } from "./sf-db.js";
|
||||
|
||||
function formatProfileSummary(
|
||||
profile: ReturnType<typeof profileRepository>,
|
||||
): string {
|
||||
const untracked = profile.git.changedFiles.filter(
|
||||
(file) => file.gitStatus === "untracked",
|
||||
).length;
|
||||
const modified = profile.git.changedFiles.filter(
|
||||
(file) => file.gitStatus === "modified",
|
||||
).length;
|
||||
const stacks =
|
||||
profile.stacks
|
||||
.map((stack) => stack.kind)
|
||||
.filter(Boolean)
|
||||
.join(", ") || "none detected";
|
||||
const risks =
|
||||
profile.riskHints
|
||||
.map((hint) => hint.family)
|
||||
.filter(Boolean)
|
||||
.join(", ") || "none detected";
|
||||
|
||||
return [
|
||||
"Repo harness profile recorded",
|
||||
`Profile: ${profile.profileId}`,
|
||||
`State: ${join(profile.projectRoot, ".sf", "sf.db")}`,
|
||||
`Branch: ${profile.git.branch ?? "unknown"}`,
|
||||
`Changed files: ${profile.git.changedFiles.length} (${modified} modified, ${untracked} untracked)`,
|
||||
`Stacks: ${stacks}`,
|
||||
`Risk hints: ${risks}`,
|
||||
"",
|
||||
"Untracked files were recorded as observations only; SF did not stage or adopt them.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Run repo harness profiling and persist the resulting snapshot.
|
||||
*
|
||||
* Purpose: give users and future auto-flow slices an explicit entry point for
|
||||
* harness evolution's read-only observation phase.
|
||||
*
|
||||
* Consumer: `/sf harness profile` command.
|
||||
*/
|
||||
export async function handleHarness(
|
||||
args: string,
|
||||
ctx: ExtensionCommandContext,
|
||||
): Promise<void> {
|
||||
const subcommand = args.trim() || "profile";
|
||||
if (!["profile", "snapshot", "status"].includes(subcommand)) {
|
||||
ctx.ui.notify(
|
||||
"Usage: /sf harness profile\nRecords a read-only repo profile for harness evolution.",
|
||||
"warning",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const basePath = projectRoot();
|
||||
const opened = await ensureDbOpen(basePath);
|
||||
if (!opened) {
|
||||
ctx.ui.notify("No SF database available. Run /sf init first.", "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
const profile = profileRepository(basePath);
|
||||
recordRepoProfile(profile);
|
||||
ctx.ui.notify(formatProfileSummary(profile), "info");
|
||||
}
|
||||
|
|
@ -15,7 +15,7 @@ export interface GsdCommandDefinition {
|
|||
type CompletionMap = Record<string, readonly GsdCommandDefinition[]>;
|
||||
|
||||
export const SF_COMMAND_DESCRIPTION =
|
||||
"SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan";
|
||||
"SF — Singularity Forge: /sf help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|harness|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications|ship|do|session-report|backlog|pr-branch|add-tests|scan";
|
||||
|
||||
export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
|
||||
{ cmd: "help", desc: "Categorized command reference with descriptions" },
|
||||
|
|
@ -93,6 +93,10 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
|
|||
cmd: "knowledge",
|
||||
desc: "Add persistent project knowledge (rule, pattern, or lesson)",
|
||||
},
|
||||
{
|
||||
cmd: "harness",
|
||||
desc: "Repo-native harness evolution (profile, status)",
|
||||
},
|
||||
{
|
||||
cmd: "new-milestone",
|
||||
desc: "Create a milestone from a specification document (headless)",
|
||||
|
|
@ -245,6 +249,13 @@ const NESTED_COMPLETIONS: CompletionMap = {
|
|||
{ cmd: "pattern", desc: "Add a code pattern to follow" },
|
||||
{ cmd: "lesson", desc: "Record a lesson learned" },
|
||||
],
|
||||
harness: [
|
||||
{
|
||||
cmd: "profile",
|
||||
desc: "Record a read-only repo profile for harness evolution",
|
||||
},
|
||||
{ cmd: "status", desc: "Alias for profile in the first implementation" },
|
||||
],
|
||||
start: [
|
||||
{ cmd: "bugfix", desc: "Triage, fix, test, and ship a bug fix" },
|
||||
{
|
||||
|
|
|
|||
|
|
@ -239,6 +239,11 @@ Examples:
|
|||
);
|
||||
return true;
|
||||
}
|
||||
if (trimmed === "harness" || trimmed.startsWith("harness ")) {
|
||||
const { handleHarness } = await import("../../commands-harness.js");
|
||||
await handleHarness(trimmed.replace(/^harness\s*/, "").trim(), ctx);
|
||||
return true;
|
||||
}
|
||||
if (trimmed === "migrate" || trimmed.startsWith("migrate ")) {
|
||||
const { handleMigrate } = await import("../../migrate/command.js");
|
||||
await handleMigrate(trimmed.replace(/^migrate\s*/, "").trim(), ctx, pi);
|
||||
|
|
|
|||
560
src/resources/extensions/sf/repo-profiler.ts
Normal file
560
src/resources/extensions/sf/repo-profiler.ts
Normal file
|
|
@ -0,0 +1,560 @@
|
|||
/**
|
||||
* repo-profiler.ts - read-only repository shape profiler.
|
||||
*
|
||||
* Purpose: give harness evolution a factual repo snapshot without staging,
|
||||
* deleting, or claiming ownership of user files.
|
||||
*/
|
||||
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { createHash } from "node:crypto";
|
||||
import { existsSync, readFileSync, realpathSync, statSync } from "node:fs";
|
||||
import { basename, extname, join, sep } from "node:path";
|
||||
import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
|
||||
|
||||
/**
|
||||
* Classifies a path's git state in the repo profile.
|
||||
*
|
||||
* Purpose: let harness planning distinguish user changes from untracked
|
||||
* observations without inferring ownership from raw porcelain codes.
|
||||
*
|
||||
* Consumer: RepoFileObservation rows stored by sf-db and `/sf harness profile`.
|
||||
*/
|
||||
export type RepoGitStatus =
|
||||
| "tracked"
|
||||
| "modified"
|
||||
| "deleted"
|
||||
| "renamed"
|
||||
| "untracked"
|
||||
| "ignored";
|
||||
|
||||
/**
|
||||
* Describes who may mutate an observed path by default.
|
||||
*
|
||||
* Purpose: prevent SF from staging or rewriting files that were only observed
|
||||
* during profiling.
|
||||
*
|
||||
* Consumer: RepoFileObservation rows and future harness adoption checks.
|
||||
*/
|
||||
export type RepoFileOwnership =
|
||||
| "sf_generated"
|
||||
| "user_owned"
|
||||
| "observed_only"
|
||||
| "candidate_harness";
|
||||
|
||||
/**
|
||||
* Describes one changed or untracked file seen during repository profiling.
|
||||
*
|
||||
* Purpose: preserve path-level repo reality for harness planning while keeping
|
||||
* untracked files explicitly read-only.
|
||||
*
|
||||
* Consumer: RepoProfile.git.changedFiles and repo_file_observations.
|
||||
*/
|
||||
export interface RepoFileObservation {
|
||||
path: string;
|
||||
gitStatus: RepoGitStatus;
|
||||
ownership: RepoFileOwnership;
|
||||
language: string | null;
|
||||
sizeBytes: number;
|
||||
contentHash: string | null;
|
||||
summary: string | null;
|
||||
firstSeenAt: string;
|
||||
lastSeenAt: string;
|
||||
adoptedAt: string | null;
|
||||
adoptionUnitId: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records a detected language or platform stack.
|
||||
*
|
||||
* Purpose: drive harness template selection from concrete repo files.
|
||||
*
|
||||
* Consumer: risk classification and future template-kit matching.
|
||||
*/
|
||||
export interface StackSignal {
|
||||
kind: string;
|
||||
sourcePath: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records a likely application or service entry point.
|
||||
*
|
||||
* Purpose: guide harness generation toward runnable surfaces.
|
||||
*
|
||||
* Consumer: future harness planner and smoke-test proposal generation.
|
||||
*/
|
||||
export interface EntrypointSignal {
|
||||
kind: string;
|
||||
path: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records detected test infrastructure.
|
||||
*
|
||||
* Purpose: let harness planning distinguish missing coverage from existing
|
||||
* repo-native checks.
|
||||
*
|
||||
* Consumer: risk hints and future harness inventory comparison.
|
||||
*/
|
||||
export interface TestSignal {
|
||||
kind: string;
|
||||
path: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records detected CI configuration.
|
||||
*
|
||||
* Purpose: determine whether generated harness gates can be wired into an
|
||||
* existing automation surface.
|
||||
*
|
||||
* Consumer: risk hints and future CI snippet proposals.
|
||||
*/
|
||||
export interface CiSignal {
|
||||
kind: string;
|
||||
path: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records detected project documentation.
|
||||
*
|
||||
* Purpose: seed harness planning from existing specs and architecture docs.
|
||||
*
|
||||
* Consumer: future prompt/context selection for harness proposals.
|
||||
*/
|
||||
export interface DocumentSignal {
|
||||
kind: string;
|
||||
path: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records detected database or migration surfaces.
|
||||
*
|
||||
* Purpose: trigger database-specific harness requirements.
|
||||
*
|
||||
* Consumer: risk hints and future migration-gate proposals.
|
||||
*/
|
||||
export interface DataStoreSignal {
|
||||
kind: string;
|
||||
path: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records detected network-facing application surfaces.
|
||||
*
|
||||
* Purpose: trigger web/API/gateway smoke-test harness requirements.
|
||||
*
|
||||
* Consumer: risk hints and future smoke-test proposal generation.
|
||||
*/
|
||||
export interface NetworkSurfaceSignal {
|
||||
kind: string;
|
||||
path: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records one risk family inferred from the repo profile.
|
||||
*
|
||||
* Purpose: make harness gaps explainable before SF proposes generated files.
|
||||
*
|
||||
* Consumer: `/sf harness profile` summaries and future harness planner input.
|
||||
*/
|
||||
export interface RiskHint {
|
||||
family: string;
|
||||
reason: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Captures a read-only snapshot of repository shape.
|
||||
*
|
||||
* Purpose: provide the factual base for repo-native harness evolution without
|
||||
* mutating the working tree.
|
||||
*
|
||||
* Consumer: sf-db profile persistence and `/sf harness profile`.
|
||||
*/
|
||||
export interface RepoProfile {
|
||||
profileId: string;
|
||||
projectHash: string;
|
||||
projectRoot: string;
|
||||
git: {
|
||||
head: string | null;
|
||||
branch: string | null;
|
||||
remoteHash: string | null;
|
||||
dirty: boolean;
|
||||
changedFiles: RepoFileObservation[];
|
||||
};
|
||||
stacks: StackSignal[];
|
||||
entrypoints: EntrypointSignal[];
|
||||
tests: TestSignal[];
|
||||
ci: CiSignal[];
|
||||
docs: DocumentSignal[];
|
||||
dataStores: DataStoreSignal[];
|
||||
networkSurfaces: NetworkSurfaceSignal[];
|
||||
riskHints: RiskHint[];
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures deterministic profile generation.
|
||||
*
|
||||
* Purpose: allow tests and future scheduled snapshots to pin timestamps.
|
||||
*
|
||||
* Consumer: profileRepository callers.
|
||||
*/
|
||||
export interface RepoProfileOptions {
|
||||
now?: () => string;
|
||||
}
|
||||
|
||||
const HASH_READ_LIMIT_BYTES = 1024 * 1024;
|
||||
|
||||
function git(args: string[], cwd: string, allowFailure = true): string {
|
||||
try {
|
||||
return execFileSync("git", args, {
|
||||
cwd,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
encoding: "utf-8",
|
||||
env: GIT_NO_PROMPT_ENV,
|
||||
}).trimEnd();
|
||||
} catch {
|
||||
if (allowFailure) return "";
|
||||
throw new Error(`git ${args.join(" ")} failed in ${cwd}`);
|
||||
}
|
||||
}
|
||||
|
||||
function sha256(input: string): string {
|
||||
return createHash("sha256").update(input).digest("hex");
|
||||
}
|
||||
|
||||
function normalizePath(path: string): string {
|
||||
return path.split(sep).join("/");
|
||||
}
|
||||
|
||||
function pathExists(basePath: string, relPath: string): boolean {
|
||||
return existsSync(join(basePath, relPath));
|
||||
}
|
||||
|
||||
function signal<T extends { kind: string; path?: string; sourcePath?: string }>(
|
||||
items: T[],
|
||||
item: T,
|
||||
): void {
|
||||
items.push(item);
|
||||
}
|
||||
|
||||
function languageForPath(path: string): string | null {
|
||||
const ext = extname(path).toLowerCase();
|
||||
const file = basename(path).toLowerCase();
|
||||
if (file === "go.mod" || ext === ".go") return "go";
|
||||
if (file === "package.json" || ext === ".ts" || ext === ".tsx")
|
||||
return "typescript";
|
||||
if (ext === ".js" || ext === ".jsx" || ext === ".mjs" || ext === ".cjs")
|
||||
return "javascript";
|
||||
if (file === "cargo.toml" || ext === ".rs") return "rust";
|
||||
if (file === "pyproject.toml" || ext === ".py") return "python";
|
||||
if (file === "flake.nix" || file === "shell.nix" || ext === ".nix")
|
||||
return "nix";
|
||||
if (ext === ".md" || ext === ".markdown") return "markdown";
|
||||
if (ext === ".sql") return "sql";
|
||||
if (file.includes("dockerfile")) return "dockerfile";
|
||||
if (ext === ".yml" || ext === ".yaml") return "yaml";
|
||||
if (ext === ".json") return "json";
|
||||
return null;
|
||||
}
|
||||
|
||||
function summarizeFile(
|
||||
basePath: string,
|
||||
relPath: string,
|
||||
): {
|
||||
sizeBytes: number;
|
||||
contentHash: string | null;
|
||||
summary: string | null;
|
||||
} {
|
||||
const fullPath = join(basePath, relPath);
|
||||
try {
|
||||
const stat = statSync(fullPath);
|
||||
if (!stat.isFile()) {
|
||||
return { sizeBytes: stat.size, contentHash: null, summary: null };
|
||||
}
|
||||
if (stat.size > HASH_READ_LIMIT_BYTES) {
|
||||
return { sizeBytes: stat.size, contentHash: null, summary: null };
|
||||
}
|
||||
const content = readFileSync(fullPath);
|
||||
const contentHash = `sha256:${createHash("sha256").update(content).digest("hex")}`;
|
||||
const asText = content.toString("utf8");
|
||||
const firstLine = asText
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.find((line) => line.length > 0);
|
||||
return {
|
||||
sizeBytes: stat.size,
|
||||
contentHash,
|
||||
summary: firstLine ? firstLine.slice(0, 160) : null,
|
||||
};
|
||||
} catch {
|
||||
return { sizeBytes: 0, contentHash: null, summary: null };
|
||||
}
|
||||
}
|
||||
|
||||
function parsePorcelainStatus(
|
||||
line: string,
|
||||
basePath: string,
|
||||
now: string,
|
||||
): RepoFileObservation | null {
|
||||
if (line.length < 4) return null;
|
||||
const code = line.slice(0, 2);
|
||||
const rawPath = line.slice(3);
|
||||
const normalizedRawPath = normalizePath(rawPath);
|
||||
const path = normalizedRawPath.includes(" -> ")
|
||||
? normalizedRawPath.split(" -> ").at(-1)!
|
||||
: normalizedRawPath;
|
||||
|
||||
let gitStatus: RepoGitStatus = "modified";
|
||||
if (code === "??") gitStatus = "untracked";
|
||||
else if (code.includes("D")) gitStatus = "deleted";
|
||||
else if (code.includes("R")) gitStatus = "renamed";
|
||||
|
||||
const ownership: RepoFileOwnership =
|
||||
gitStatus === "untracked" ? "observed_only" : "user_owned";
|
||||
const fileSummary =
|
||||
gitStatus === "deleted"
|
||||
? { sizeBytes: 0, contentHash: null, summary: null }
|
||||
: summarizeFile(basePath, path);
|
||||
|
||||
return {
|
||||
path,
|
||||
gitStatus,
|
||||
ownership,
|
||||
language: languageForPath(path),
|
||||
sizeBytes: fileSummary.sizeBytes,
|
||||
contentHash: fileSummary.contentHash,
|
||||
summary: fileSummary.summary,
|
||||
firstSeenAt: now,
|
||||
lastSeenAt: now,
|
||||
adoptedAt: null,
|
||||
adoptionUnitId: null,
|
||||
};
|
||||
}
|
||||
|
||||
function detectStacks(basePath: string): StackSignal[] {
|
||||
const stacks: StackSignal[] = [];
|
||||
if (pathExists(basePath, "package.json"))
|
||||
signal(stacks, { kind: "node", sourcePath: "package.json", confidence: 1 });
|
||||
if (pathExists(basePath, "go.mod"))
|
||||
signal(stacks, { kind: "go", sourcePath: "go.mod", confidence: 1 });
|
||||
if (pathExists(basePath, "Cargo.toml"))
|
||||
signal(stacks, { kind: "rust", sourcePath: "Cargo.toml", confidence: 1 });
|
||||
if (pathExists(basePath, "pyproject.toml"))
|
||||
signal(stacks, {
|
||||
kind: "python",
|
||||
sourcePath: "pyproject.toml",
|
||||
confidence: 1,
|
||||
});
|
||||
if (pathExists(basePath, "flake.nix"))
|
||||
signal(stacks, { kind: "nix", sourcePath: "flake.nix", confidence: 1 });
|
||||
if (pathExists(basePath, "shell.nix"))
|
||||
signal(stacks, { kind: "nix", sourcePath: "shell.nix", confidence: 0.9 });
|
||||
return stacks;
|
||||
}
|
||||
|
||||
function detectEntrypoints(basePath: string): EntrypointSignal[] {
|
||||
const entrypoints: EntrypointSignal[] = [];
|
||||
for (const path of [
|
||||
"src/index.ts",
|
||||
"src/main.ts",
|
||||
"src/cli.ts",
|
||||
"cmd",
|
||||
"main.go",
|
||||
"portal/main.go",
|
||||
"dr-agent/main.go",
|
||||
]) {
|
||||
if (pathExists(basePath, path))
|
||||
signal(entrypoints, { kind: "entrypoint", path, confidence: 0.8 });
|
||||
}
|
||||
return entrypoints;
|
||||
}
|
||||
|
||||
function detectTests(basePath: string): TestSignal[] {
|
||||
const tests: TestSignal[] = [];
|
||||
for (const path of ["tests", "test", "__tests__", "src/tests"]) {
|
||||
if (pathExists(basePath, path))
|
||||
signal(tests, { kind: "test-directory", path, confidence: 0.8 });
|
||||
}
|
||||
return tests;
|
||||
}
|
||||
|
||||
function detectCi(basePath: string): CiSignal[] {
|
||||
const ci: CiSignal[] = [];
|
||||
for (const path of [".github/workflows", ".gitlab-ci.yml", "Jenkinsfile"]) {
|
||||
if (pathExists(basePath, path))
|
||||
signal(ci, { kind: "ci", path, confidence: 0.9 });
|
||||
}
|
||||
return ci;
|
||||
}
|
||||
|
||||
function detectDocs(basePath: string): DocumentSignal[] {
|
||||
const docs: DocumentSignal[] = [];
|
||||
for (const path of [
|
||||
"SPEC.md",
|
||||
"ARCHITECTURE.md",
|
||||
"AGENTS.md",
|
||||
"README.md",
|
||||
"docs",
|
||||
]) {
|
||||
if (pathExists(basePath, path))
|
||||
signal(docs, { kind: "doc", path, confidence: 0.9 });
|
||||
}
|
||||
return docs;
|
||||
}
|
||||
|
||||
function detectDataStores(basePath: string): DataStoreSignal[] {
|
||||
const stores: DataStoreSignal[] = [];
|
||||
for (const path of ["migrations", "prisma", "db", "sql"]) {
|
||||
if (pathExists(basePath, path))
|
||||
signal(stores, { kind: "database", path, confidence: 0.8 });
|
||||
}
|
||||
return stores;
|
||||
}
|
||||
|
||||
function detectNetworkSurfaces(basePath: string): NetworkSurfaceSignal[] {
|
||||
const surfaces: NetworkSurfaceSignal[] = [];
|
||||
for (const path of ["portal", "gateway", "api", "web", "server"]) {
|
||||
if (pathExists(basePath, path))
|
||||
signal(surfaces, { kind: "network", path, confidence: 0.7 });
|
||||
}
|
||||
return surfaces;
|
||||
}
|
||||
|
||||
function buildRiskHints(args: {
|
||||
stacks: StackSignal[];
|
||||
tests: TestSignal[];
|
||||
ci: CiSignal[];
|
||||
dataStores: DataStoreSignal[];
|
||||
networkSurfaces: NetworkSurfaceSignal[];
|
||||
changedFiles: RepoFileObservation[];
|
||||
}): RiskHint[] {
|
||||
const hints: RiskHint[] = [];
|
||||
if (args.networkSurfaces.length > 0) {
|
||||
hints.push({
|
||||
family: "web",
|
||||
reason: "network-facing directories detected",
|
||||
confidence: 0.7,
|
||||
});
|
||||
}
|
||||
if (args.dataStores.length > 0) {
|
||||
hints.push({
|
||||
family: "database",
|
||||
reason: "database or migration directories detected",
|
||||
confidence: 0.8,
|
||||
});
|
||||
}
|
||||
if (args.stacks.some((stack) => stack.kind === "nix")) {
|
||||
hints.push({
|
||||
family: "infrastructure",
|
||||
reason: "Nix project files detected",
|
||||
confidence: 0.8,
|
||||
});
|
||||
}
|
||||
if (args.changedFiles.some((file) => file.gitStatus === "untracked")) {
|
||||
hints.push({
|
||||
family: "harness-drift",
|
||||
reason: "untracked files observed in working tree",
|
||||
confidence: 0.6,
|
||||
});
|
||||
}
|
||||
if (args.tests.length === 0 || args.ci.length === 0) {
|
||||
hints.push({
|
||||
family: "verification-gap",
|
||||
reason: "tests or CI signals are missing from the repo profile",
|
||||
confidence: 0.5,
|
||||
});
|
||||
}
|
||||
return hints;
|
||||
}
|
||||
|
||||
function canonicalRemote(remote: string): string {
|
||||
return remote
|
||||
.trim()
|
||||
.replace(/^https?:\/\/([^/@]+@)?/i, "https://")
|
||||
.replace(/\.git$/i, "")
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a read-only repository profile from git status and well-known files.
|
||||
*
|
||||
* Purpose: seed harness evolution with observable repo facts while preserving
|
||||
* user ownership of untracked and modified files.
|
||||
*
|
||||
* Consumer: `/sf harness profile` and future pre-plan harness snapshots.
|
||||
*/
|
||||
export function profileRepository(
|
||||
basePath: string,
|
||||
options: RepoProfileOptions = {},
|
||||
): RepoProfile {
|
||||
const createdAt = options.now?.() ?? new Date().toISOString();
|
||||
const projectRoot = realpathSync(basePath);
|
||||
const head = git(["rev-parse", "HEAD"], projectRoot) || null;
|
||||
const branch = git(["branch", "--show-current"], projectRoot) || null;
|
||||
const remote = git(["remote", "get-url", "origin"], projectRoot) || "";
|
||||
const remoteHash = remote
|
||||
? `sha256:${sha256(canonicalRemote(remote))}`
|
||||
: null;
|
||||
const projectHash = remote
|
||||
? sha256(canonicalRemote(remote)).slice(0, 16)
|
||||
: sha256(projectRoot).slice(0, 16);
|
||||
const status = git(["status", "--porcelain=v1", "-uall"], projectRoot);
|
||||
const changedFiles = status
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trimEnd())
|
||||
.filter(Boolean)
|
||||
.map((line) => parsePorcelainStatus(line, projectRoot, createdAt))
|
||||
.filter((obs): obs is RepoFileObservation => obs !== null)
|
||||
.sort((a, b) => a.path.localeCompare(b.path));
|
||||
|
||||
const stacks = detectStacks(projectRoot);
|
||||
const entrypoints = detectEntrypoints(projectRoot);
|
||||
const tests = detectTests(projectRoot);
|
||||
const ci = detectCi(projectRoot);
|
||||
const docs = detectDocs(projectRoot);
|
||||
const dataStores = detectDataStores(projectRoot);
|
||||
const networkSurfaces = detectNetworkSurfaces(projectRoot);
|
||||
const riskHints = buildRiskHints({
|
||||
stacks,
|
||||
tests,
|
||||
ci,
|
||||
dataStores,
|
||||
networkSurfaces,
|
||||
changedFiles,
|
||||
});
|
||||
const profileId = `rp_${sha256(
|
||||
`${projectHash}:${head ?? ""}:${branch ?? ""}:${createdAt}:${changedFiles
|
||||
.map((file) => `${file.gitStatus}:${file.path}:${file.contentHash ?? ""}`)
|
||||
.join("|")}`,
|
||||
).slice(0, 24)}`;
|
||||
|
||||
return {
|
||||
profileId,
|
||||
projectHash,
|
||||
projectRoot,
|
||||
git: {
|
||||
head,
|
||||
branch,
|
||||
remoteHash,
|
||||
dirty: changedFiles.length > 0,
|
||||
changedFiles,
|
||||
},
|
||||
stacks,
|
||||
entrypoints,
|
||||
tests,
|
||||
ci,
|
||||
docs,
|
||||
dataStores,
|
||||
networkSurfaces,
|
||||
riskHints,
|
||||
createdAt,
|
||||
};
|
||||
}
|
||||
|
|
@ -197,7 +197,7 @@ function openRawDb(path: string): unknown {
|
|||
return new Database(path);
|
||||
}
|
||||
|
||||
const SCHEMA_VERSION = 20;
|
||||
const SCHEMA_VERSION = 21;
|
||||
|
||||
function indexExists(db: DbAdapter, name: string): boolean {
|
||||
return !!db
|
||||
|
|
@ -226,6 +226,44 @@ function ensureVerificationEvidenceDedupIndex(db: DbAdapter): void {
|
|||
);
|
||||
}
|
||||
|
||||
function ensureRepoProfileTables(db: DbAdapter): void {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS repo_profiles (
|
||||
profile_id TEXT PRIMARY KEY,
|
||||
project_hash TEXT NOT NULL,
|
||||
project_root TEXT NOT NULL DEFAULT '',
|
||||
head TEXT DEFAULT NULL,
|
||||
branch TEXT DEFAULT NULL,
|
||||
remote_hash TEXT DEFAULT NULL,
|
||||
dirty INTEGER NOT NULL DEFAULT 0,
|
||||
profile_json TEXT NOT NULL DEFAULT '{}',
|
||||
created_at TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS repo_file_observations (
|
||||
path TEXT PRIMARY KEY,
|
||||
latest_profile_id TEXT NOT NULL,
|
||||
git_status TEXT NOT NULL,
|
||||
ownership TEXT NOT NULL,
|
||||
language TEXT DEFAULT NULL,
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||
content_hash TEXT DEFAULT NULL,
|
||||
summary TEXT DEFAULT NULL,
|
||||
first_seen_at TEXT NOT NULL,
|
||||
last_seen_at TEXT NOT NULL,
|
||||
adopted_at TEXT DEFAULT NULL,
|
||||
adoption_unit_id TEXT DEFAULT NULL
|
||||
)
|
||||
`);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_repo_profiles_created ON repo_profiles(created_at DESC)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_repo_file_observations_status ON repo_file_observations(git_status, ownership)",
|
||||
);
|
||||
}
|
||||
|
||||
function initSchema(db: DbAdapter, fileBacked: boolean): void {
|
||||
if (fileBacked) db.exec("PRAGMA journal_mode=WAL");
|
||||
if (fileBacked) db.exec("PRAGMA busy_timeout = 5000");
|
||||
|
|
@ -609,6 +647,7 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void {
|
|||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_llm_task_outcomes_provider ON llm_task_outcomes(provider, recorded_at DESC)",
|
||||
);
|
||||
ensureRepoProfileTables(db);
|
||||
|
||||
db.exec(
|
||||
`CREATE VIEW IF NOT EXISTS active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL`,
|
||||
|
|
@ -1414,6 +1453,16 @@ function migrateSchema(db: DbAdapter): void {
|
|||
});
|
||||
}
|
||||
|
||||
if (currentVersion < 21) {
|
||||
ensureRepoProfileTables(db);
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
).run({
|
||||
":version": 21,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
db.exec("COMMIT");
|
||||
} catch (err) {
|
||||
db.exec("ROLLBACK");
|
||||
|
|
@ -4019,6 +4068,236 @@ export function insertLlmTaskOutcome(input: LlmTaskOutcomeInput): boolean {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Input shape for persisting a repository profile snapshot.
|
||||
*
|
||||
* Purpose: keep sf-db decoupled from repo-profiler runtime imports while
|
||||
* enforcing the storage contract at compile time.
|
||||
*
|
||||
* Consumer: recordRepoProfile callers.
|
||||
*/
|
||||
export interface RepoProfileStorageInput {
|
||||
profileId: string;
|
||||
projectHash: string;
|
||||
projectRoot: string;
|
||||
git: {
|
||||
head: string | null;
|
||||
branch: string | null;
|
||||
remoteHash: string | null;
|
||||
dirty: boolean;
|
||||
changedFiles: Array<{
|
||||
path: string;
|
||||
gitStatus: string;
|
||||
ownership: string;
|
||||
language: string | null;
|
||||
sizeBytes: number;
|
||||
contentHash: string | null;
|
||||
summary: string | null;
|
||||
firstSeenAt: string;
|
||||
lastSeenAt: string;
|
||||
adoptedAt: string | null;
|
||||
adoptionUnitId: string | null;
|
||||
}>;
|
||||
};
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Database row returned for a stored repository profile.
|
||||
*
|
||||
* Purpose: expose profile metadata without forcing callers to parse JSON.
|
||||
*
|
||||
* Consumer: harness status and future drift diagnostics.
|
||||
*/
|
||||
export interface RepoProfileRow {
|
||||
profileId: string;
|
||||
projectHash: string;
|
||||
projectRoot: string;
|
||||
head: string | null;
|
||||
branch: string | null;
|
||||
remoteHash: string | null;
|
||||
dirty: boolean;
|
||||
profileJson: string;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Database row returned for the current path observation ledger.
|
||||
*
|
||||
* Purpose: let harness planning inspect file ownership and first/last-seen
|
||||
* timestamps without touching raw SQL.
|
||||
*
|
||||
* Consumer: future harness planner and diagnostics.
|
||||
*/
|
||||
export interface RepoFileObservationRow {
|
||||
path: string;
|
||||
latestProfileId: string;
|
||||
gitStatus: string;
|
||||
ownership: string;
|
||||
language: string | null;
|
||||
sizeBytes: number;
|
||||
contentHash: string | null;
|
||||
summary: string | null;
|
||||
firstSeenAt: string;
|
||||
lastSeenAt: string;
|
||||
adoptedAt: string | null;
|
||||
adoptionUnitId: string | null;
|
||||
}
|
||||
|
||||
function asStringOrNull(value: unknown): string | null {
|
||||
return typeof value === "string" && value.length > 0 ? value : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist a repository profile snapshot and update current file observations.
|
||||
*
|
||||
* Purpose: make harness evolution's read-only repo facts queryable across
|
||||
* sessions while preserving first-seen timestamps for untracked observations.
|
||||
*
|
||||
* Consumer: `/sf harness profile` and future pre-plan profile snapshots.
|
||||
*/
|
||||
export function recordRepoProfile(profile: RepoProfileStorageInput): void {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
transaction(() => {
|
||||
currentDb!
|
||||
.prepare(
|
||||
`INSERT OR REPLACE INTO repo_profiles (
|
||||
profile_id, project_hash, project_root, head, branch, remote_hash,
|
||||
dirty, profile_json, created_at
|
||||
) VALUES (
|
||||
:profile_id, :project_hash, :project_root, :head, :branch, :remote_hash,
|
||||
:dirty, :profile_json, :created_at
|
||||
)`,
|
||||
)
|
||||
.run({
|
||||
":profile_id": profile.profileId,
|
||||
":project_hash": profile.projectHash,
|
||||
":project_root": profile.projectRoot,
|
||||
":head": profile.git.head,
|
||||
":branch": profile.git.branch,
|
||||
":remote_hash": profile.git.remoteHash,
|
||||
":dirty": profile.git.dirty ? 1 : 0,
|
||||
":profile_json": JSON.stringify(profile),
|
||||
":created_at": profile.createdAt,
|
||||
});
|
||||
|
||||
const stmt = currentDb!.prepare(
|
||||
`INSERT INTO repo_file_observations (
|
||||
path, latest_profile_id, git_status, ownership, language, size_bytes,
|
||||
content_hash, summary, first_seen_at, last_seen_at, adopted_at,
|
||||
adoption_unit_id
|
||||
) VALUES (
|
||||
:path, :latest_profile_id, :git_status, :ownership, :language, :size_bytes,
|
||||
:content_hash, :summary, :first_seen_at, :last_seen_at, :adopted_at,
|
||||
:adoption_unit_id
|
||||
)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
latest_profile_id = excluded.latest_profile_id,
|
||||
git_status = excluded.git_status,
|
||||
ownership = CASE
|
||||
WHEN repo_file_observations.ownership = 'sf_generated'
|
||||
THEN repo_file_observations.ownership
|
||||
WHEN repo_file_observations.ownership = 'candidate_harness'
|
||||
THEN repo_file_observations.ownership
|
||||
ELSE excluded.ownership
|
||||
END,
|
||||
language = excluded.language,
|
||||
size_bytes = excluded.size_bytes,
|
||||
content_hash = excluded.content_hash,
|
||||
summary = excluded.summary,
|
||||
first_seen_at = repo_file_observations.first_seen_at,
|
||||
last_seen_at = excluded.last_seen_at,
|
||||
adopted_at = COALESCE(repo_file_observations.adopted_at, excluded.adopted_at),
|
||||
adoption_unit_id = COALESCE(repo_file_observations.adoption_unit_id, excluded.adoption_unit_id)`,
|
||||
);
|
||||
|
||||
for (const file of profile.git.changedFiles) {
|
||||
stmt.run({
|
||||
":path": file.path,
|
||||
":latest_profile_id": profile.profileId,
|
||||
":git_status": file.gitStatus,
|
||||
":ownership": file.ownership,
|
||||
":language": file.language,
|
||||
":size_bytes": file.sizeBytes,
|
||||
":content_hash": file.contentHash,
|
||||
":summary": file.summary,
|
||||
":first_seen_at": file.firstSeenAt,
|
||||
":last_seen_at": file.lastSeenAt,
|
||||
":adopted_at": file.adoptedAt,
|
||||
":adoption_unit_id": file.adoptionUnitId,
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the most recently recorded repository profile.
|
||||
*
|
||||
* Purpose: let harness planning and diagnostics inspect the latest factual
|
||||
* repo snapshot without re-running the profiler.
|
||||
*
|
||||
* Consumer: harness status commands and future plan-phase coverage checks.
|
||||
*/
|
||||
export function getLatestRepoProfile(): RepoProfileRow | null {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
const row = currentDb
|
||||
.prepare(
|
||||
`SELECT profile_id, project_hash, project_root, head, branch, remote_hash,
|
||||
dirty, profile_json, created_at
|
||||
FROM repo_profiles
|
||||
ORDER BY created_at DESC, profile_id DESC
|
||||
LIMIT 1`,
|
||||
)
|
||||
.get();
|
||||
if (!row) return null;
|
||||
return {
|
||||
profileId: row["profile_id"] as string,
|
||||
projectHash: row["project_hash"] as string,
|
||||
projectRoot: row["project_root"] as string,
|
||||
head: asStringOrNull(row["head"]),
|
||||
branch: asStringOrNull(row["branch"]),
|
||||
remoteHash: asStringOrNull(row["remote_hash"]),
|
||||
dirty: row["dirty"] === 1,
|
||||
profileJson: (row["profile_json"] as string) ?? "{}",
|
||||
createdAt: row["created_at"] as string,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the current file observations accumulated by repo profiling.
|
||||
*
|
||||
* Purpose: keep untracked and modified file awareness queryable without
|
||||
* treating those paths as SF-owned artifacts.
|
||||
*
|
||||
* Consumer: harness planning, diagnostics, and future drift detection.
|
||||
*/
|
||||
export function getRepoFileObservations(): RepoFileObservationRow[] {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
return currentDb
|
||||
.prepare(
|
||||
`SELECT path, latest_profile_id, git_status, ownership, language,
|
||||
size_bytes, content_hash, summary, first_seen_at, last_seen_at,
|
||||
adopted_at, adoption_unit_id
|
||||
FROM repo_file_observations
|
||||
ORDER BY path ASC`,
|
||||
)
|
||||
.all()
|
||||
.map((row) => ({
|
||||
path: row["path"] as string,
|
||||
latestProfileId: row["latest_profile_id"] as string,
|
||||
gitStatus: row["git_status"] as string,
|
||||
ownership: row["ownership"] as string,
|
||||
language: asStringOrNull(row["language"]),
|
||||
sizeBytes: (row["size_bytes"] as number) ?? 0,
|
||||
contentHash: asStringOrNull(row["content_hash"]),
|
||||
summary: asStringOrNull(row["summary"]),
|
||||
firstSeenAt: row["first_seen_at"] as string,
|
||||
lastSeenAt: row["last_seen_at"] as string,
|
||||
adoptedAt: asStringOrNull(row["adopted_at"]),
|
||||
adoptionUnitId: asStringOrNull(row["adoption_unit_id"]),
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* INSERT OR REPLACE a quality_gates row. Used by milestone-validation-gates.ts
|
||||
* to persist milestone-level (MV*) gate outcomes after validate-milestone runs.
|
||||
|
|
|
|||
125
src/resources/extensions/sf/tests/repo-profiler.test.ts
Normal file
125
src/resources/extensions/sf/tests/repo-profiler.test.ts
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import {
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
rmSync,
|
||||
statSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import test from "node:test";
|
||||
import { profileRepository } from "../repo-profiler.ts";
|
||||
import {
|
||||
closeDatabase,
|
||||
getLatestRepoProfile,
|
||||
getRepoFileObservations,
|
||||
openDatabase,
|
||||
recordRepoProfile,
|
||||
} from "../sf-db.ts";
|
||||
|
||||
function run(args: string[], cwd: string): string {
|
||||
return execFileSync("git", args, {
|
||||
cwd,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
encoding: "utf-8",
|
||||
}).trim();
|
||||
}
|
||||
|
||||
function makeRepo(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-repo-profiler-"));
|
||||
run(["init", "-b", "main"], dir);
|
||||
run(["config", "user.email", "test@example.com"], dir);
|
||||
run(["config", "user.name", "SF Test"], dir);
|
||||
writeFileSync(join(dir, "README.md"), "# Repo\n");
|
||||
writeFileSync(
|
||||
join(dir, "package.json"),
|
||||
'{"scripts":{"test":"node --test"}}\n',
|
||||
);
|
||||
run(["add", "README.md", "package.json"], dir);
|
||||
run(["commit", "-m", "init"], dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
test("profileRepository_when_untracked_file_exists_marks_observed_only", () => {
|
||||
const repo = makeRepo();
|
||||
try {
|
||||
writeFileSync(join(repo, "README.md"), "# Repo\n\nchanged\n");
|
||||
mkdirSync(join(repo, "docs"));
|
||||
writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\nlocal note\n");
|
||||
|
||||
const beforeStatus = run(["status", "--short"], repo);
|
||||
const profile = profileRepository(repo, {
|
||||
now: () => "2026-04-29T10:00:00.000Z",
|
||||
});
|
||||
const afterStatus = run(["status", "--short"], repo);
|
||||
|
||||
assert.equal(afterStatus, beforeStatus, "profiling must be read-only");
|
||||
assert.equal(profile.git.branch, "main");
|
||||
assert.equal(profile.git.dirty, true);
|
||||
assert.ok(profile.git.head, "profile records HEAD when available");
|
||||
assert.ok(
|
||||
profile.stacks.some(
|
||||
(signal) =>
|
||||
signal.kind === "node" && signal.sourcePath === "package.json",
|
||||
),
|
||||
"package.json produces a node stack signal",
|
||||
);
|
||||
|
||||
const modified = profile.git.changedFiles.find(
|
||||
(file) => file.path === "README.md",
|
||||
);
|
||||
assert.equal(modified?.gitStatus, "modified");
|
||||
assert.equal(modified?.ownership, "user_owned");
|
||||
|
||||
const untracked = profile.git.changedFiles.find(
|
||||
(file) => file.path === "docs/scratch.md",
|
||||
);
|
||||
assert.equal(untracked?.gitStatus, "untracked");
|
||||
assert.equal(untracked?.ownership, "observed_only");
|
||||
assert.equal(
|
||||
untracked?.sizeBytes,
|
||||
statSync(join(repo, "docs/scratch.md")).size,
|
||||
);
|
||||
assert.match(untracked?.contentHash ?? "", /^sha256:[a-f0-9]{64}$/);
|
||||
} finally {
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("recordRepoProfile_when_file_observed_again_preserves_first_seen", () => {
|
||||
const repo = makeRepo();
|
||||
try {
|
||||
mkdirSync(join(repo, "docs"));
|
||||
writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\n");
|
||||
|
||||
closeDatabase();
|
||||
assert.equal(openDatabase(":memory:"), true);
|
||||
|
||||
const first = profileRepository(repo, {
|
||||
now: () => "2026-04-29T10:00:00.000Z",
|
||||
});
|
||||
recordRepoProfile(first);
|
||||
|
||||
writeFileSync(join(repo, "docs", "scratch.md"), "# Scratch\nupdated\n");
|
||||
const second = profileRepository(repo, {
|
||||
now: () => "2026-04-29T10:05:00.000Z",
|
||||
});
|
||||
recordRepoProfile(second);
|
||||
|
||||
const latest = getLatestRepoProfile();
|
||||
assert.equal(latest?.profileId, second.profileId);
|
||||
|
||||
const observations = getRepoFileObservations();
|
||||
const scratch = observations.find((obs) => obs.path === "docs/scratch.md");
|
||||
assert.equal(scratch?.ownership, "observed_only");
|
||||
assert.equal(scratch?.gitStatus, "untracked");
|
||||
assert.equal(scratch?.firstSeenAt, "2026-04-29T10:00:00.000Z");
|
||||
assert.equal(scratch?.lastSeenAt, "2026-04-29T10:05:00.000Z");
|
||||
assert.equal(scratch?.latestProfileId, second.profileId);
|
||||
} finally {
|
||||
closeDatabase();
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue