fix(auto): harden recovery — checkbox verification, atomic writes, roadmap checks (#547)
Four fixes to auto-recovery logic that caused silent failures or inconsistent state: 1. skipExecuteTask: return false when checkbox regex doesn't match the plan format, so callers fall through to other recovery strategies instead of assuming success (lines 252-255) 2. verifyExpectedArtifact: fail verification on corrupt/unparseable roadmap instead of silently passing. Prevents advancing past an incomplete complete-slice when the roadmap file is malformed (line 152) 3. removePersistedKey: use atomic tmp+rename write (matching persistCompletedKey) to prevent completed-units.json corruption on crash mid-write (line 293) 4. selfHealRuntimeRecords: use verifyExpectedArtifact instead of bare existsSync for execute-task healing, so tasks with summary but unchecked plan checkbox aren't incorrectly marked complete (line 374) Co-authored-by: TÂCHES <afromanguy@me.com>
This commit is contained in:
parent
3101469b4d
commit
ecef348a95
1 changed files with 21 additions and 4 deletions
|
|
@ -149,7 +149,12 @@ export function verifyExpectedArtifact(unitType: string, unitId: string, base: s
|
|||
const roadmap = parseRoadmap(roadmapContent);
|
||||
const slice = roadmap.slices.find(s => s.id === sid);
|
||||
if (slice && !slice.done) return false;
|
||||
} catch (e) { /* corrupt roadmap — be lenient and treat as verified */ void e; }
|
||||
} catch {
|
||||
// Corrupt/unparseable roadmap — fail verification so the unit
|
||||
// re-runs and has a chance to fix the roadmap. Silently passing
|
||||
// here could advance past an incomplete slice.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -251,6 +256,11 @@ export function skipExecuteTask(
|
|||
const re = new RegExp(`^(- \\[) \\] (\\*\\*${escapedTid}:)`, "m");
|
||||
if (re.test(planContent)) {
|
||||
writeFileSync(planAbs, planContent.replace(re, "$1x] $2"), "utf-8");
|
||||
} else {
|
||||
// Regex didn't match — checkbox format differs from expected pattern.
|
||||
// Return false so callers know the plan was NOT updated and can
|
||||
// fall through to other recovery strategies instead of assuming success.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -290,7 +300,10 @@ export function removePersistedKey(base: string, key: string): void {
|
|||
if (existsSync(file)) {
|
||||
let keys: string[] = JSON.parse(readFileSync(file, "utf-8"));
|
||||
keys = keys.filter(k => k !== key);
|
||||
writeFileSync(file, JSON.stringify(keys), "utf-8");
|
||||
// Atomic write: tmp file + rename prevents partial writes on crash
|
||||
const tmpFile = file + ".tmp";
|
||||
writeFileSync(tmpFile, JSON.stringify(keys), "utf-8");
|
||||
renameSync(tmpFile, file);
|
||||
}
|
||||
} catch (e) { /* non-fatal: removePersistedKey failure */ void e; }
|
||||
}
|
||||
|
|
@ -412,8 +425,12 @@ export async function selfHealRuntimeRecords(
|
|||
const { unitType, unitId } = record;
|
||||
const artifactPath = resolveExpectedArtifactPath(unitType, unitId, base);
|
||||
|
||||
// Case 1: Artifact exists — unit completed but closeout didn't finish
|
||||
if (artifactPath && existsSync(artifactPath)) {
|
||||
// Case 1: Artifact exists — unit completed but closeout didn't finish.
|
||||
// Use verifyExpectedArtifact (not just existsSync) so that execute-task
|
||||
// also checks the plan checkbox is marked [x]. Without this, a task
|
||||
// whose summary exists but checkbox is unchecked would be incorrectly
|
||||
// marked as completed, causing deriveState to re-dispatch it endlessly.
|
||||
if (artifactPath && existsSync(artifactPath) && verifyExpectedArtifact(unitType, unitId, base)) {
|
||||
clearUnitRuntimeRecord(base, unitType, unitId);
|
||||
// Also persist completion key if missing
|
||||
const key = `${unitType}/${unitId}`;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue