feat(self-feedback): enforce kind taxonomy at recordSelfFeedback
Addresses sf-mp4rxkwt-sfthez (gap:self-feedback-kind-vocabulary-unbounded).
The reflection report identified this as part of the deepest architectural
concern (4 entries clustered under data-plane isolation), and the
threshold-promoter was structurally unable to fire because every entry's
kind was a unique string (clusters by exact match).
Add a `domain:family[:specific]` taxonomy validated at recordSelfFeedback
write time:
ALLOWED_KIND_DOMAINS enum of allowed top-level domains (gap,
architecture-defect, architectural-risk,
inconsistency, runaway-loop, schema-drift,
janitor-gap, upstream-rollup, reflection,
copilot-parity-gaps, gap-audit-orphan-prompt,
gap-audit-orphan-command, flow-audit,
executor-refused, solver-missing-checkpoint,
runaway-guard-hard-pause,
self-feedback-resolution)
KIND_SEGMENT_RE /^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$/ — kebab-case
per segment
validateKind(kind) accepts:
domain (1-segment legacy)
domain:family (2-segment canonical)
domain👪specific (3-segment specific)
rejects: empty, non-string, >3 segments,
unknown domain, non-kebab segments
recordSelfFeedback now returns null when validateKind fails, with a
warning logged via workflow-logger. Existing rows in the ledger are
grandfathered (validation only fires on NEW writes through this entry
point) so the migration is non-destructive.
This unblocks the threshold-promoter to cluster by domain:family
prefix once the requirement-promoter is updated to do so (separate
follow-up). Detectors and reflection passes can now reason about
domains rather than handfuls of unique strings.
Tests: 3 new (canonical-shapes / malformed-rejected / non-string-rejected).
8 existing test fixtures updated to use canonical kinds (gap:test-feedback
etc.) — they were using bare slugs that the new validation correctly
rejects.
1596/1596 SF extension tests pass; typecheck clean.
Note on prior dispatch: this work was first attempted via "sf headless -p"
to dogfood the new memory rule (drive SF work through sf headless, not
parallel Claude Code agents). The dispatch ran 49s with 8 tool calls but
landed nothing — the same fragility documented in sf-mp4rxkwb-l4baga
(triage-not-a-first-class-unit-type). Hand-coding fallback applied;
fragility data point added to the open entry's evidence trail.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e2f631901f
commit
83c28b756c
2 changed files with 151 additions and 10 deletions
|
|
@ -46,6 +46,7 @@ import {
|
|||
resolveSelfFeedbackEntry,
|
||||
} from "./sf-db.js";
|
||||
import { createMemory } from "./memory-store.js";
|
||||
import { logWarning } from "./workflow-logger.js";
|
||||
|
||||
const SELF_FEEDBACK_HEADER =
|
||||
"# SF Self-Feedback\n\n" +
|
||||
|
|
@ -394,12 +395,96 @@ function readActiveUnit(basePath) {
|
|||
return undefined;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Allowed top-level domains for self-feedback `kind` strings (sf-mp4rxkwt-sfthez).
|
||||
*
|
||||
* Why an enum + structured shape: free-form `kind` made the threshold-promoter
|
||||
* cluster by exact-string and almost never fire — every entry has a unique
|
||||
* suffix. With a `domain:family[:specific]` shape, the promoter can cluster
|
||||
* by `domain:family` prefix and detectors gain a stable taxonomy. Reflection
|
||||
* passes can also reason about whole domains rather than handfuls of unique
|
||||
* strings. Existing entries with single-segment kinds remain readable
|
||||
* (we only validate NEW writes via recordSelfFeedback) so the migration is
|
||||
* non-destructive.
|
||||
*
|
||||
* Adding a new domain: add it here AND name the family/specific convention
|
||||
* you're introducing in the entry's PDD so future operators can find it.
|
||||
*/
|
||||
const ALLOWED_KIND_DOMAINS = new Set([
|
||||
"gap",
|
||||
"architecture-defect",
|
||||
"architectural-risk",
|
||||
"inconsistency",
|
||||
"runaway-loop",
|
||||
"runaway-guard-hard-pause",
|
||||
"schema-drift",
|
||||
"janitor-gap",
|
||||
"upstream-rollup",
|
||||
"reflection",
|
||||
"copilot-parity-gaps",
|
||||
"gap-audit-orphan-prompt",
|
||||
"gap-audit-orphan-command",
|
||||
"flow-audit",
|
||||
"executor-refused",
|
||||
"solver-missing-checkpoint",
|
||||
"self-feedback-resolution",
|
||||
]);
|
||||
|
||||
const KIND_SEGMENT_RE = /^[a-z][a-z0-9]*(?:-[a-z0-9]+)*$/;
|
||||
|
||||
/**
|
||||
* Validate a kind string. Accepts:
|
||||
* `domain` — single-segment legacy shape, requires the
|
||||
* domain to be in the allow-list. We accept this
|
||||
* so existing detectors that file e.g.
|
||||
* `runaway-guard-hard-pause` keep working until
|
||||
* they're migrated to a `domain:family` form.
|
||||
* `domain:family` — two-segment canonical shape. Both segments
|
||||
* kebab-case. Domain must be in the allow-list.
|
||||
* `domain:family:specific` — three-segment specific shape, all kebab.
|
||||
*
|
||||
* Returns `null` when valid; an error message string when not. The string
|
||||
* surface is so recordSelfFeedback callers (and tests) can see WHY a write
|
||||
* was refused instead of guessing.
|
||||
*/
|
||||
function validateKind(kind) {
|
||||
if (typeof kind !== "string" || kind.length === 0) {
|
||||
return "kind must be a non-empty string";
|
||||
}
|
||||
const segments = kind.split(":");
|
||||
if (segments.length < 1 || segments.length > 3) {
|
||||
return `kind "${kind}" must have 1, 2, or 3 colon-separated segments (got ${segments.length})`;
|
||||
}
|
||||
for (const seg of segments) {
|
||||
if (!KIND_SEGMENT_RE.test(seg)) {
|
||||
return `kind "${kind}" segment "${seg}" must be kebab-case (lowercase letters/digits, internal hyphens)`;
|
||||
}
|
||||
}
|
||||
if (!ALLOWED_KIND_DOMAINS.has(segments[0])) {
|
||||
return `kind "${kind}" uses unknown domain "${segments[0]}"; valid domains: ${Array.from(ALLOWED_KIND_DOMAINS).sort().join(", ")}`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ─── Public API ────────────────────────────────────────────────────────────
|
||||
/**
|
||||
* Record a self-feedback entry to the appropriate channel (project or upstream).
|
||||
* Non-fatal — write errors are swallowed and null is returned on failure.
|
||||
*
|
||||
* Kind validation (sf-mp4rxkwt-sfthez): the entry's `kind` MUST satisfy
|
||||
* validateKind — see ALLOWED_KIND_DOMAINS + KIND_SEGMENT_RE. Writes with
|
||||
* malformed kinds return null and emit a warning to workflow-logger.
|
||||
*/
|
||||
export function recordSelfFeedback(entry, basePath = process.cwd()) {
|
||||
const kindError = validateKind(entry?.kind);
|
||||
if (kindError) {
|
||||
try {
|
||||
logWarning("self-feedback", `recordSelfFeedback rejected: ${kindError}`);
|
||||
} catch {
|
||||
// best-effort — never block on logger failure
|
||||
}
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const occurredIn = entry.occurredIn ?? readActiveUnit(basePath);
|
||||
const persisted = {
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ test("recordSelfFeedback_when_db_available_writes_sqlite_and_versioned_projectio
|
|||
|
||||
const result = recordSelfFeedback(
|
||||
{
|
||||
kind: "test-feedback",
|
||||
kind: "gap:test-feedback",
|
||||
severity: "medium",
|
||||
summary: "DB backed feedback",
|
||||
evidence: "unit test",
|
||||
|
|
@ -83,7 +83,7 @@ test("readAllSelfFeedback_imports_legacy_jsonl_once_when_db_available", () => {
|
|||
const legacyEntry = {
|
||||
id: "legacy-1",
|
||||
ts: "2026-05-07T00:00:00.000Z",
|
||||
kind: "legacy-feedback",
|
||||
kind: "gap:legacy-feedback",
|
||||
severity: "high",
|
||||
blocking: true,
|
||||
summary: "Legacy JSONL entry",
|
||||
|
|
@ -109,7 +109,7 @@ test("markResolved_when_db_available_updates_sqlite_and_markdown_projection", ()
|
|||
const project = makeForgeProject();
|
||||
const result = recordSelfFeedback(
|
||||
{
|
||||
kind: "resolvable-feedback",
|
||||
kind: "gap:resolvable-feedback",
|
||||
severity: "high",
|
||||
summary: "Resolve through DB",
|
||||
},
|
||||
|
|
@ -144,7 +144,7 @@ test("markResolved_rejects_resolution_with_non_canonical_evidence_kind", () => {
|
|||
const project = makeForgeProject();
|
||||
const result = recordSelfFeedback(
|
||||
{
|
||||
kind: "writer-layer-test",
|
||||
kind: "gap:writer-layer-test",
|
||||
severity: "medium",
|
||||
summary: "Must be rejected when evidence kind is non-canonical",
|
||||
},
|
||||
|
|
@ -195,7 +195,7 @@ test("markResolved_accepts_each_canonical_evidence_kind", () => {
|
|||
];
|
||||
for (const evidence of kinds) {
|
||||
const filed = recordSelfFeedback(
|
||||
{ kind: `accepts-${evidence.kind}`, severity: "low", summary: "x" },
|
||||
{ kind: `gap:accepts-${evidence.kind}`, severity: "low", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const ok = markResolved(
|
||||
|
|
@ -223,7 +223,7 @@ test("markResolved_rejects_agent_fix_with_nonexistent_commit_sha", async () => {
|
|||
});
|
||||
|
||||
const filed = recordSelfFeedback(
|
||||
{ kind: "commit-verif-test", severity: "high", summary: "x" },
|
||||
{ kind: "gap:commit-verif-test", severity: "high", summary: "x" },
|
||||
project,
|
||||
);
|
||||
|
||||
|
|
@ -261,7 +261,7 @@ test("markResolved_accepts_agent_fix_with_no_commit_sha_or_ungrokable_path", ()
|
|||
const project = makeForgeProject();
|
||||
// No git init → ungrokable → carve-out allows the resolution
|
||||
const filed = recordSelfFeedback(
|
||||
{ kind: "no-git-test", severity: "low", summary: "x" },
|
||||
{ kind: "gap:no-git-test", severity: "low", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const ok = markResolved(
|
||||
|
|
@ -277,7 +277,7 @@ test("markResolved_accepts_agent_fix_with_no_commit_sha_or_ungrokable_path", ()
|
|||
// agent-fix without any commitSha is also accepted (commit not always
|
||||
// the right artifact — summaryNarrative or testPath may be)
|
||||
const filed2 = recordSelfFeedback(
|
||||
{ kind: "no-sha-test", severity: "low", summary: "x" },
|
||||
{ kind: "gap:no-sha-test", severity: "low", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const ok2 = markResolved(
|
||||
|
|
@ -369,11 +369,67 @@ test("markResolved_memory_mirror_handles_human_clear_without_commit_tags", async
|
|||
);
|
||||
});
|
||||
|
||||
test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => {
|
||||
const project = makeForgeProject();
|
||||
const cases = [
|
||||
"gap", // 1-segment legacy (allowed-domain)
|
||||
"gap:routing", // 2-segment domain:family
|
||||
"gap:routing:tiebreak-cost-only", // 3-segment domain:family:specific
|
||||
"architecture-defect:foo", // hyphenated domain works
|
||||
"architecture-defect:solver:executor-conflation",
|
||||
"runaway-loop", // 1-segment legacy
|
||||
"upstream-rollup:gap-audit-orphan-prompt",
|
||||
];
|
||||
for (const kind of cases) {
|
||||
const result = recordSelfFeedback(
|
||||
{ kind, severity: "low", summary: `test ${kind}` },
|
||||
project,
|
||||
);
|
||||
assert.ok(result, `expected ${kind} to be accepted`);
|
||||
assert.equal(result.entry.kind, kind);
|
||||
}
|
||||
});
|
||||
|
||||
test("recordSelfFeedback_kind_validation_rejects_malformed", () => {
|
||||
const project = makeForgeProject();
|
||||
const cases = [
|
||||
{ kind: "" }, // empty
|
||||
{ kind: "Unknown" }, // not in domain allow-list
|
||||
{ kind: "gap:" }, // empty family
|
||||
{ kind: ":family" }, // empty domain
|
||||
{ kind: "gap:routing:tiebreak:extra" },// 4 segments
|
||||
{ kind: "gap:Routing" }, // uppercase in family (not kebab)
|
||||
{ kind: "gap:routing_cost" }, // underscore (not kebab)
|
||||
{ kind: "gap:-routing" }, // leading hyphen
|
||||
{ kind: "gap:routing-" }, // trailing hyphen
|
||||
{ kind: "9gap:routing" }, // domain starts with digit
|
||||
{ kind: "weird-domain:family" }, // unknown domain
|
||||
];
|
||||
for (const { kind } of cases) {
|
||||
const result = recordSelfFeedback(
|
||||
{ kind, severity: "low", summary: `should reject ${kind}` },
|
||||
project,
|
||||
);
|
||||
assert.equal(result, null, `expected ${JSON.stringify(kind)} to be rejected`);
|
||||
}
|
||||
});
|
||||
|
||||
test("recordSelfFeedback_kind_validation_rejects_non_string", () => {
|
||||
const project = makeForgeProject();
|
||||
for (const kind of [undefined, null, 42, ["gap"], { domain: "gap" }]) {
|
||||
const result = recordSelfFeedback(
|
||||
{ kind, severity: "low", summary: "non-string kind" },
|
||||
project,
|
||||
);
|
||||
assert.equal(result, null, `expected non-string ${JSON.stringify(kind)} to be rejected`);
|
||||
}
|
||||
});
|
||||
|
||||
test("markResolved_appends_resolution_event_to_jsonl_audit_log", () => {
|
||||
const project = makeForgeProject();
|
||||
const result = recordSelfFeedback(
|
||||
{
|
||||
kind: "audit-trail",
|
||||
kind: "gap:audit-trail",
|
||||
severity: "high",
|
||||
summary: "Resolution must land in JSONL too",
|
||||
},
|
||||
|
|
@ -479,7 +535,7 @@ test("compactSelfFeedbackMarkdown_when_projection_stale_rewrites_from_sqlite", (
|
|||
const project = makeForgeProject();
|
||||
const result = recordSelfFeedback(
|
||||
{
|
||||
kind: "stale-projection",
|
||||
kind: "gap:stale-projection",
|
||||
severity: "medium",
|
||||
summary: "Projection should be repaired",
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue