feat(ci): implement three-stage promotion pipeline (Dev → Test → Prod) (#1098)
* feat(ci): add version stamp script for dev publishes Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(ci): add CLI smoke tests for pipeline test stage Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(ci): add FixtureProvider for LLM conversation recording and replay Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(ci): add fixture test runner and sample recordings Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(ci): add live test stubs and pipeline npm scripts Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(ci): add three-stage promotion pipeline workflow Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(ci): add weekly cleanup workflow for stale dev versions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(ci): add fixture recording helper stub Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f28feaed36
commit
6f410a0041
18 changed files with 936 additions and 0 deletions
57
.github/workflows/cleanup-dev-versions.yml
vendored
Normal file
57
.github/workflows/cleanup-dev-versions.yml
vendored
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
name: Cleanup Dev Versions
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 6 * * 1" # Monday 06:00 UTC
|
||||
workflow_dispatch: {}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
cleanup:
|
||||
name: Remove stale -dev versions
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 22
|
||||
registry-url: https://registry.npmjs.org
|
||||
|
||||
- name: Unpublish old dev versions
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
PACKAGE="gsd-pi"
|
||||
MAX_AGE_DAYS=30
|
||||
CUTOFF=$(date -u -d "-${MAX_AGE_DAYS} days" +%s 2>/dev/null || date -u -v-${MAX_AGE_DAYS}d +%s)
|
||||
|
||||
echo "Fetching all versions of ${PACKAGE}..."
|
||||
VERSIONS=$(npm view "${PACKAGE}" versions --json 2>/dev/null | node -e "
|
||||
const data = JSON.parse(require('fs').readFileSync('/dev/stdin', 'utf8'));
|
||||
const versions = Array.isArray(data) ? data : [data];
|
||||
versions.filter(v => v.includes('-dev.')).forEach(v => console.log(v));
|
||||
")
|
||||
|
||||
if [ -z "${VERSIONS}" ]; then
|
||||
echo "No dev versions found."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
REMOVED=0
|
||||
while IFS= read -r VERSION; do
|
||||
PUBLISH_TIME=$(npm view "${PACKAGE}@${VERSION}" time --json 2>/dev/null | node -e "
|
||||
const data = JSON.parse(require('fs').readFileSync('/dev/stdin', 'utf8'));
|
||||
console.log(Math.floor(new Date(data).getTime() / 1000));
|
||||
" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "${PUBLISH_TIME}" -gt 0 ] && [ "${PUBLISH_TIME}" -lt "${CUTOFF}" ]; then
|
||||
echo "Unpublishing ${PACKAGE}@${VERSION} (published $(date -u -d @${PUBLISH_TIME} +%Y-%m-%d 2>/dev/null || date -u -r ${PUBLISH_TIME} +%Y-%m-%d))"
|
||||
npm unpublish "${PACKAGE}@${VERSION}" || echo " Warning: failed to unpublish ${VERSION}"
|
||||
REMOVED=$((REMOVED + 1))
|
||||
fi
|
||||
done <<< "${VERSIONS}"
|
||||
|
||||
echo "Removed ${REMOVED} stale dev version(s)."
|
||||
186
.github/workflows/pipeline.yml
vendored
Normal file
186
.github/workflows/pipeline.yml
vendored
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
name: Pipeline
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["CI"]
|
||||
types: [completed]
|
||||
branches: [main]
|
||||
|
||||
concurrency:
|
||||
group: pipeline-${{ github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
dev-publish:
|
||||
name: Dev Publish
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ghcr.io/gsd-build/gsd-ci-builder:latest
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
outputs:
|
||||
dev-version: ${{ steps.stamp.outputs.version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 22
|
||||
registry-url: https://registry.npmjs.org
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Build
|
||||
run: npm run build
|
||||
|
||||
- name: Stamp dev version
|
||||
id: stamp
|
||||
run: |
|
||||
npm run pipeline:version-stamp
|
||||
echo "version=$(node -p 'require(\"./package.json\").version')" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Publish @dev
|
||||
run: npm publish --tag dev
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
- name: Smoke test (local)
|
||||
run: npm run test:smoke
|
||||
|
||||
test-verify:
|
||||
name: Test & Verify
|
||||
needs: dev-publish
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 22
|
||||
registry-url: https://registry.npmjs.org
|
||||
|
||||
- name: Install gsd-pi@dev globally
|
||||
run: npm install -g gsd-pi@dev
|
||||
|
||||
- name: Run smoke tests (against installed binary)
|
||||
run: |
|
||||
export GSD_SMOKE_BINARY=$(which gsd)
|
||||
npm run test:smoke
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Run fixture tests
|
||||
run: npm run test:fixtures
|
||||
|
||||
- name: Promote to @next
|
||||
run: npm dist-tag add gsd-pi@${{ needs.dev-publish.outputs.dev-version }} next
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push runtime Docker image
|
||||
run: |
|
||||
docker build --target runtime \
|
||||
-t ghcr.io/gsd-build/gsd-pi:next \
|
||||
-t ghcr.io/gsd-build/gsd-pi:${{ needs.dev-publish.outputs.dev-version }} \
|
||||
.
|
||||
docker push ghcr.io/gsd-build/gsd-pi:next
|
||||
docker push ghcr.io/gsd-build/gsd-pi:${{ needs.dev-publish.outputs.dev-version }}
|
||||
|
||||
prod-release:
|
||||
name: Production Release
|
||||
needs: [dev-publish, test-verify]
|
||||
runs-on: ubuntu-latest
|
||||
environment: prod
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 22
|
||||
registry-url: https://registry.npmjs.org
|
||||
|
||||
- name: Run live LLM tests (optional)
|
||||
continue-on-error: true
|
||||
run: npm run test:live
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GSD_LIVE_TESTS: "1"
|
||||
|
||||
- name: Promote to @latest
|
||||
run: npm dist-tag add gsd-pi@${{ needs.dev-publish.outputs.dev-version }} latest
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Tag runtime Docker image as latest
|
||||
run: |
|
||||
docker pull ghcr.io/gsd-build/gsd-pi:${{ needs.dev-publish.outputs.dev-version }}
|
||||
docker tag ghcr.io/gsd-build/gsd-pi:${{ needs.dev-publish.outputs.dev-version }} ghcr.io/gsd-build/gsd-pi:latest
|
||||
docker push ghcr.io/gsd-build/gsd-pi:latest
|
||||
|
||||
- name: Extract base version
|
||||
id: base-version
|
||||
run: |
|
||||
echo "version=$(echo '${{ needs.dev-publish.outputs.dev-version }}' | sed 's/-dev\..*//')" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Create GitHub Release
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
gh release create "v${{ steps.base-version.outputs.version }}" \
|
||||
--title "v${{ steps.base-version.outputs.version }}" \
|
||||
--generate-notes \
|
||||
--latest
|
||||
|
||||
update-builder:
|
||||
name: Update CI Builder Image
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Check for Dockerfile changes
|
||||
id: check
|
||||
run: |
|
||||
CHANGED=$(git diff --name-only ${{ github.event.workflow_run.head_sha }}~1 ${{ github.event.workflow_run.head_sha }} -- Dockerfile || echo "")
|
||||
echo "changed=$([[ -n \"$CHANGED\" ]] && echo 'true' || echo 'false')" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Log in to GHCR
|
||||
if: steps.check.outputs.changed == 'true'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push CI builder image
|
||||
if: steps.check.outputs.changed == 'true'
|
||||
run: |
|
||||
docker build --target builder \
|
||||
-t ghcr.io/gsd-build/gsd-ci-builder:latest \
|
||||
.
|
||||
docker push ghcr.io/gsd-build/gsd-ci-builder:latest
|
||||
|
|
@ -52,6 +52,10 @@
|
|||
"test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=50 --lines=50 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts",
|
||||
"test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/*integration*.test.ts src/tests/integration/*.test.ts",
|
||||
"test": "npm run test:unit && npm run test:integration",
|
||||
"test:smoke": "node --experimental-strip-types tests/smoke/run.ts",
|
||||
"test:fixtures": "node --experimental-strip-types tests/fixtures/run.ts",
|
||||
"test:fixtures:record": "GSD_FIXTURE_MODE=record node --experimental-strip-types tests/fixtures/record.ts",
|
||||
"test:live": "GSD_LIVE_TESTS=1 node --experimental-strip-types tests/live/run.ts",
|
||||
"test:browser-tools": "node --test src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs",
|
||||
"test:native": "node --test packages/native/src/__tests__/grep.test.mjs",
|
||||
"build:native": "node native/scripts/build.js",
|
||||
|
|
@ -64,6 +68,9 @@
|
|||
"sync-platform-versions": "node native/scripts/sync-platform-versions.cjs",
|
||||
"validate-pack": "node scripts/validate-pack.js",
|
||||
"typecheck:extensions": "tsc --noEmit --project tsconfig.extensions.json",
|
||||
"pipeline:version-stamp": "node scripts/version-stamp.mjs",
|
||||
"docker:build-runtime": "docker build --target runtime -t ghcr.io/gsd-build/gsd-pi .",
|
||||
"docker:build-builder": "docker build --target builder -t ghcr.io/gsd-build/gsd-ci-builder .",
|
||||
"prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && git diff --exit-code || (echo 'ERROR: version sync changed files — commit them before publishing' && exit 1) && npm run build && npm run typecheck:extensions && npm run validate-pack"
|
||||
},
|
||||
"dependencies": {
|
||||
|
|
|
|||
13
scripts/version-stamp.mjs
Normal file
13
scripts/version-stamp.mjs
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import { readFileSync, writeFileSync } from "fs";
|
||||
import { execFileSync } from "child_process";
|
||||
|
||||
const pkgPath = new URL("../package.json", import.meta.url);
|
||||
const pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
|
||||
|
||||
const shortSha = execFileSync("git", ["rev-parse", "--short", "HEAD"], { encoding: "utf8" }).trim();
|
||||
const devVersion = `${pkg.version}-dev.${shortSha}`;
|
||||
|
||||
pkg.version = devVersion;
|
||||
writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + "\n");
|
||||
|
||||
console.log(`Stamped version: ${devVersion}`);
|
||||
142
tests/fixtures/provider.ts
vendored
Normal file
142
tests/fixtures/provider.ts
vendored
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
import { readFileSync, writeFileSync, mkdirSync } from "fs";
|
||||
import { dirname } from "path";
|
||||
|
||||
/**
|
||||
* A single tool use within a conversation turn.
|
||||
*/
|
||||
export interface ToolUse {
|
||||
name: string;
|
||||
input: Record<string, unknown>;
|
||||
output?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* A file referenced in a fixture (for setup or assertions).
|
||||
*/
|
||||
export interface FixtureFile {
|
||||
path: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* A single turn in a recorded LLM conversation.
|
||||
*/
|
||||
export interface FixtureTurn {
|
||||
role: "user" | "assistant";
|
||||
content: string;
|
||||
toolUses?: ToolUse[];
|
||||
}
|
||||
|
||||
/**
|
||||
* A complete fixture recording.
|
||||
*/
|
||||
export interface FixtureRecording {
|
||||
name: string;
|
||||
description?: string;
|
||||
turns: FixtureTurn[];
|
||||
files?: FixtureFile[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current fixture mode from the environment.
|
||||
*/
|
||||
export function getFixtureMode(): "record" | "replay" | "off" {
|
||||
const mode = process.env.GSD_FIXTURE_MODE?.toLowerCase();
|
||||
if (mode === "record") return "record";
|
||||
if (mode === "replay") return "replay";
|
||||
return "off";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the fixture recordings directory path.
|
||||
*/
|
||||
export function getFixtureDir(): string {
|
||||
return process.env.GSD_FIXTURE_DIR || new URL("recordings", import.meta.url).pathname;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a fixture recording from a JSON file.
|
||||
*/
|
||||
export function loadFixture(filePath: string): FixtureRecording {
|
||||
const raw = readFileSync(filePath, "utf8");
|
||||
return JSON.parse(raw) as FixtureRecording;
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves a fixture recording to a JSON file.
|
||||
*/
|
||||
export function saveFixture(filePath: string, recording: FixtureRecording): void {
|
||||
mkdirSync(dirname(filePath), { recursive: true });
|
||||
writeFileSync(filePath, JSON.stringify(recording, null, 2) + "\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a readable stream of responses from a fixture recording,
|
||||
* returning one assistant turn at a time.
|
||||
*/
|
||||
export function createReplayStream(recording: FixtureRecording): Iterator<FixtureTurn> {
|
||||
const assistantTurns = recording.turns.filter((t) => t.role === "assistant");
|
||||
let index = 0;
|
||||
return {
|
||||
next(): IteratorResult<FixtureTurn> {
|
||||
if (index < assistantTurns.length) {
|
||||
return { value: assistantTurns[index++], done: false };
|
||||
}
|
||||
return { value: undefined as any, done: true };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Records conversation turns and saves them as a fixture.
|
||||
*/
|
||||
export class FixtureRecorder {
|
||||
private turns: FixtureTurn[] = [];
|
||||
private files: FixtureFile[] = [];
|
||||
private name: string;
|
||||
private description?: string;
|
||||
|
||||
constructor(name: string, description?: string) {
|
||||
this.name = name;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
addTurn(turn: FixtureTurn): void {
|
||||
this.turns.push(turn);
|
||||
}
|
||||
|
||||
addFile(file: FixtureFile): void {
|
||||
this.files.push(file);
|
||||
}
|
||||
|
||||
save(filePath: string): void {
|
||||
const recording: FixtureRecording = {
|
||||
name: this.name,
|
||||
...(this.description ? { description: this.description } : {}),
|
||||
turns: this.turns,
|
||||
...(this.files.length > 0 ? { files: this.files } : {}),
|
||||
};
|
||||
saveFixture(filePath, recording);
|
||||
}
|
||||
|
||||
getTurns(): FixtureTurn[] {
|
||||
return [...this.turns];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replays saved fixture responses by turn index.
|
||||
*/
|
||||
export class FixtureReplayer {
|
||||
private stream: Iterator<FixtureTurn>;
|
||||
|
||||
constructor(recording: FixtureRecording) {
|
||||
this.stream = createReplayStream(recording);
|
||||
}
|
||||
|
||||
nextResponse(): FixtureTurn | null {
|
||||
const result = this.stream.next();
|
||||
return result.done ? null : result.value;
|
||||
}
|
||||
}
|
||||
50
tests/fixtures/record.ts
vendored
Normal file
50
tests/fixtures/record.ts
vendored
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Fixture Recording Helper
|
||||
*
|
||||
* Records LLM conversations as fixture files for replay in CI.
|
||||
*
|
||||
* Usage:
|
||||
* GSD_FIXTURE_MODE=record npm run test:fixtures:record
|
||||
*
|
||||
* This script is a placeholder for the full recording workflow.
|
||||
* To create new fixture recordings:
|
||||
*
|
||||
* 1. Set GSD_FIXTURE_MODE=record in your environment
|
||||
* 2. Run an agent conversation that you want to capture
|
||||
* 3. The FixtureRecorder (from provider.ts) collects turns automatically
|
||||
* 4. Recordings are saved as JSON to tests/fixtures/recordings/
|
||||
*
|
||||
* For manual fixture creation, create a JSON file in recordings/ matching
|
||||
* the FixtureRecording interface from provider.ts:
|
||||
*
|
||||
* {
|
||||
* "name": "descriptive-name",
|
||||
* "description": "What this fixture tests",
|
||||
* "turns": [
|
||||
* { "role": "user", "content": "..." },
|
||||
* { "role": "assistant", "content": "...", "toolUses": [...] }
|
||||
* ]
|
||||
* }
|
||||
*
|
||||
* Then run `npm run test:fixtures` to validate the recording.
|
||||
*/
|
||||
|
||||
import { getFixtureMode, getFixtureDir } from "./provider.ts";
|
||||
|
||||
const mode = getFixtureMode();
|
||||
const dir = getFixtureDir();
|
||||
|
||||
if (mode !== "record") {
|
||||
console.log("Fixture recording is not active.");
|
||||
console.log("Set GSD_FIXTURE_MODE=record to enable recording.");
|
||||
console.log("");
|
||||
console.log("Usage:");
|
||||
console.log(" npm run test:fixtures:record # Start recording");
|
||||
console.log(" npm run test:fixtures # Replay and verify recordings");
|
||||
console.log("");
|
||||
console.log(`Recordings directory: ${dir}`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
console.log(`Recording mode active. Fixture directory: ${dir}`);
|
||||
console.log("Recording integration is pending full agent hookup.");
|
||||
24
tests/fixtures/recordings/agent-creates-file.json
vendored
Normal file
24
tests/fixtures/recordings/agent-creates-file.json
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "agent-creates-file",
|
||||
"description": "Single-turn fixture where the agent creates a file using the Write tool",
|
||||
"turns": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Create a file called hello.txt with the content 'Hello, world!'"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I'll create the file for you.",
|
||||
"toolUses": [
|
||||
{
|
||||
"name": "Write",
|
||||
"input": {
|
||||
"file_path": "/tmp/hello.txt",
|
||||
"content": "Hello, world!"
|
||||
},
|
||||
"output": "File created successfully at /tmp/hello.txt"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
41
tests/fixtures/recordings/agent-handles-error.json
vendored
Normal file
41
tests/fixtures/recordings/agent-handles-error.json
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
{
|
||||
"name": "agent-handles-error",
|
||||
"description": "Two-turn fixture where the agent encounters and handles an error",
|
||||
"turns": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Read the file /nonexistent/path.txt"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Let me try to read that file.",
|
||||
"toolUses": [
|
||||
{
|
||||
"name": "Read",
|
||||
"input": {
|
||||
"file_path": "/nonexistent/path.txt"
|
||||
},
|
||||
"error": "ENOENT: no such file or directory, open '/nonexistent/path.txt'"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "That file doesn't exist. Can you create it instead?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "The file doesn't exist. I'll create it for you.",
|
||||
"toolUses": [
|
||||
{
|
||||
"name": "Write",
|
||||
"input": {
|
||||
"file_path": "/nonexistent/path.txt",
|
||||
"content": ""
|
||||
},
|
||||
"output": "File created successfully"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
50
tests/fixtures/recordings/agent-multi-turn-tools.json
vendored
Normal file
50
tests/fixtures/recordings/agent-multi-turn-tools.json
vendored
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
{
|
||||
"name": "agent-multi-turn-tools",
|
||||
"description": "Two-turn fixture where the agent uses multiple Write tools across turns",
|
||||
"turns": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Create two files: hello.txt and world.txt"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I'll create both files.",
|
||||
"toolUses": [
|
||||
{
|
||||
"name": "Write",
|
||||
"input": {
|
||||
"file_path": "/tmp/hello.txt",
|
||||
"content": "Hello"
|
||||
},
|
||||
"output": "File created successfully at /tmp/hello.txt"
|
||||
},
|
||||
{
|
||||
"name": "Write",
|
||||
"input": {
|
||||
"file_path": "/tmp/world.txt",
|
||||
"content": "World"
|
||||
},
|
||||
"output": "File created successfully at /tmp/world.txt"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Now create a combined file"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I'll create the combined file.",
|
||||
"toolUses": [
|
||||
{
|
||||
"name": "Write",
|
||||
"input": {
|
||||
"file_path": "/tmp/combined.txt",
|
||||
"content": "Hello World"
|
||||
},
|
||||
"output": "File created successfully at /tmp/combined.txt"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
42
tests/fixtures/recordings/agent-reads-and-edits.json
vendored
Normal file
42
tests/fixtures/recordings/agent-reads-and-edits.json
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"name": "agent-reads-and-edits",
|
||||
"description": "Two-turn fixture where the agent reads a file then edits it",
|
||||
"turns": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Read the file config.json and change the port to 8080"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Let me read the file first.",
|
||||
"toolUses": [
|
||||
{
|
||||
"name": "Read",
|
||||
"input": {
|
||||
"file_path": "/tmp/config.json"
|
||||
},
|
||||
"output": "{ \"port\": 3000, \"host\": \"localhost\" }"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Go ahead and make the edit."
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I'll update the port to 8080.",
|
||||
"toolUses": [
|
||||
{
|
||||
"name": "Edit",
|
||||
"input": {
|
||||
"file_path": "/tmp/config.json",
|
||||
"old_string": "\"port\": 3000",
|
||||
"new_string": "\"port\": 8080"
|
||||
},
|
||||
"output": "File updated successfully"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
100
tests/fixtures/run.ts
vendored
Normal file
100
tests/fixtures/run.ts
vendored
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
import { readdirSync } from "fs";
|
||||
import { join, dirname } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { loadFixture, FixtureReplayer } from "./provider.ts";
|
||||
import type { FixtureTurn, FixtureRecording } from "./provider.ts";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const recordingsDir = join(__dirname, "recordings");
|
||||
|
||||
const files = readdirSync(recordingsDir)
|
||||
.filter((f) => f.endsWith(".json"))
|
||||
.sort();
|
||||
|
||||
if (files.length === 0) {
|
||||
console.error("No fixture recordings found");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const file of files) {
|
||||
const filePath = join(recordingsDir, file);
|
||||
const label = file.replace(/\.json$/, "");
|
||||
|
||||
try {
|
||||
const recording = loadFixture(filePath);
|
||||
|
||||
// Validate recording structure
|
||||
assertRecordingShape(recording, label);
|
||||
|
||||
// Replay through FixtureReplayer and verify responses
|
||||
const replayer = new FixtureReplayer(recording);
|
||||
const assistantTurns = recording.turns.filter((t) => t.role === "assistant");
|
||||
|
||||
for (let i = 0; i < assistantTurns.length; i++) {
|
||||
const response = replayer.nextResponse();
|
||||
if (!response) {
|
||||
throw new Error(`Replayer exhausted at turn ${i}, expected ${assistantTurns.length} assistant turns`);
|
||||
}
|
||||
assertTurnShape(response, `${label} turn ${i}`);
|
||||
|
||||
// Verify response matches the original
|
||||
if (response.content !== assistantTurns[i].content) {
|
||||
throw new Error(
|
||||
`Turn ${i} content mismatch: "${response.content}" !== "${assistantTurns[i].content}"`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify replayer is exhausted
|
||||
const extra = replayer.nextResponse();
|
||||
if (extra !== null) {
|
||||
throw new Error("Replayer returned extra responses beyond expected count");
|
||||
}
|
||||
|
||||
console.log(` PASS ${label}`);
|
||||
passed++;
|
||||
} catch (err: any) {
|
||||
console.error(` FAIL ${label}: ${err.message}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nFixture tests: ${passed} passed, ${failed} failed`);
|
||||
if (failed > 0) process.exit(1);
|
||||
|
||||
function assertRecordingShape(recording: FixtureRecording, label: string): void {
|
||||
if (!recording.name || typeof recording.name !== "string") {
|
||||
throw new Error(`${label}: missing or invalid 'name'`);
|
||||
}
|
||||
if (!Array.isArray(recording.turns) || recording.turns.length === 0) {
|
||||
throw new Error(`${label}: 'turns' must be a non-empty array`);
|
||||
}
|
||||
for (const turn of recording.turns) {
|
||||
assertTurnShape(turn, label);
|
||||
}
|
||||
}
|
||||
|
||||
function assertTurnShape(turn: FixtureTurn, label: string): void {
|
||||
if (turn.role !== "user" && turn.role !== "assistant") {
|
||||
throw new Error(`${label}: invalid role "${turn.role}"`);
|
||||
}
|
||||
if (typeof turn.content !== "string") {
|
||||
throw new Error(`${label}: turn content must be a string`);
|
||||
}
|
||||
if (turn.toolUses) {
|
||||
if (!Array.isArray(turn.toolUses)) {
|
||||
throw new Error(`${label}: toolUses must be an array`);
|
||||
}
|
||||
for (const tool of turn.toolUses) {
|
||||
if (!tool.name || typeof tool.name !== "string") {
|
||||
throw new Error(`${label}: tool use missing 'name'`);
|
||||
}
|
||||
if (!tool.input || typeof tool.input !== "object") {
|
||||
throw new Error(`${label}: tool use missing 'input'`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
52
tests/live/run.ts
Normal file
52
tests/live/run.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { readdirSync } from "fs";
|
||||
import { execFileSync } from "child_process";
|
||||
import { join, dirname } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
if (process.env.GSD_LIVE_TESTS !== "1") {
|
||||
console.log("Skipping live tests (set GSD_LIVE_TESTS=1 to enable)");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const testFiles = readdirSync(__dirname)
|
||||
.filter((f) => f.startsWith("test-") && f.endsWith(".ts"))
|
||||
.sort();
|
||||
|
||||
if (testFiles.length === 0) {
|
||||
console.error("No live test files found");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
let skipped = 0;
|
||||
|
||||
for (const file of testFiles) {
|
||||
const filePath = join(__dirname, file);
|
||||
const label = file.replace(/\.ts$/, "");
|
||||
try {
|
||||
execFileSync("node", ["--experimental-strip-types", filePath], {
|
||||
encoding: "utf8",
|
||||
stdio: "pipe",
|
||||
timeout: 60_000,
|
||||
});
|
||||
console.log(` PASS ${label}`);
|
||||
passed++;
|
||||
} catch (err: any) {
|
||||
const output = (err.stdout || "") + (err.stderr || "");
|
||||
if (output.includes("SKIPPED")) {
|
||||
console.log(` SKIP ${label}`);
|
||||
skipped++;
|
||||
} else {
|
||||
console.error(` FAIL ${label}`);
|
||||
if (err.stdout) console.error(err.stdout);
|
||||
if (err.stderr) console.error(err.stderr);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nLive tests: ${passed} passed, ${failed} failed, ${skipped} skipped`);
|
||||
if (failed > 0) process.exit(1);
|
||||
33
tests/live/test-anthropic-roundtrip.ts
Normal file
33
tests/live/test-anthropic-roundtrip.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
if (!apiKey) {
|
||||
console.log("SKIPPED: ANTHROPIC_API_KEY not set");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const response = await fetch("https://api.anthropic.com/v1/messages", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": apiKey,
|
||||
"anthropic-version": "2023-06-01",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "claude-sonnet-4-20250514",
|
||||
max_tokens: 32,
|
||||
messages: [{ role: "user", content: "Reply with exactly: LIVE_TEST_OK" }],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
console.error(`Anthropic API error ${response.status}: ${body}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as { content: Array<{ text: string }> };
|
||||
const text = data.content?.[0]?.text || "";
|
||||
|
||||
if (!text.includes("LIVE_TEST_OK")) {
|
||||
console.error(`Unexpected response: "${text}"`);
|
||||
process.exit(1);
|
||||
}
|
||||
32
tests/live/test-openai-roundtrip.ts
Normal file
32
tests/live/test-openai-roundtrip.ts
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) {
|
||||
console.log("SKIPPED: OPENAI_API_KEY not set");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gpt-4o-mini",
|
||||
max_tokens: 32,
|
||||
messages: [{ role: "user", content: "Reply with exactly: LIVE_TEST_OK" }],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
console.error(`OpenAI API error ${response.status}: ${body}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as { choices: Array<{ message: { content: string } }> };
|
||||
const text = data.choices?.[0]?.message?.content || "";
|
||||
|
||||
if (!text.includes("LIVE_TEST_OK")) {
|
||||
console.error(`Unexpected response: "${text}"`);
|
||||
process.exit(1);
|
||||
}
|
||||
40
tests/smoke/run.ts
Normal file
40
tests/smoke/run.ts
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import { readdirSync } from "fs";
|
||||
import { execFileSync } from "child_process";
|
||||
import { join, dirname } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
const testFiles = readdirSync(__dirname)
|
||||
.filter((f) => f.startsWith("test-") && f.endsWith(".ts"))
|
||||
.sort();
|
||||
|
||||
if (testFiles.length === 0) {
|
||||
console.error("No smoke test files found");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const file of testFiles) {
|
||||
const filePath = join(__dirname, file);
|
||||
const label = file.replace(/\.ts$/, "");
|
||||
try {
|
||||
execFileSync("node", ["--experimental-strip-types", filePath], {
|
||||
encoding: "utf8",
|
||||
stdio: "pipe",
|
||||
timeout: 30_000,
|
||||
});
|
||||
console.log(` PASS ${label}`);
|
||||
passed++;
|
||||
} catch (err: any) {
|
||||
console.error(` FAIL ${label}`);
|
||||
if (err.stdout) console.error(err.stdout);
|
||||
if (err.stderr) console.error(err.stderr);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nSmoke tests: ${passed} passed, ${failed} failed`);
|
||||
if (failed > 0) process.exit(1);
|
||||
23
tests/smoke/test-help.ts
Normal file
23
tests/smoke/test-help.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import { execFileSync } from "child_process";
|
||||
|
||||
const binary = process.env.GSD_SMOKE_BINARY || "npx";
|
||||
const args = process.env.GSD_SMOKE_BINARY
|
||||
? ["--help"]
|
||||
: ["gsd-pi", "--help"];
|
||||
|
||||
const output = execFileSync(binary, args, {
|
||||
encoding: "utf8",
|
||||
timeout: 30_000,
|
||||
});
|
||||
|
||||
const lower = output.toLowerCase();
|
||||
|
||||
if (!lower.includes("gsd")) {
|
||||
console.error(`Help output does not contain "gsd": "${output}"`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!lower.includes("usage")) {
|
||||
console.error(`Help output does not contain "usage": "${output}"`);
|
||||
process.exit(1);
|
||||
}
|
||||
28
tests/smoke/test-init.ts
Normal file
28
tests/smoke/test-init.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import { execFileSync } from "child_process";
|
||||
import { mkdtempSync, existsSync, rmSync } from "fs";
|
||||
import { join } from "path";
|
||||
import { tmpdir } from "os";
|
||||
|
||||
const tmpDir = mkdtempSync(join(tmpdir(), "gsd-smoke-init-"));
|
||||
|
||||
try {
|
||||
const binary = process.env.GSD_SMOKE_BINARY || "npx";
|
||||
const args = process.env.GSD_SMOKE_BINARY
|
||||
? ["init"]
|
||||
: ["gsd-pi", "init"];
|
||||
|
||||
execFileSync(binary, args, {
|
||||
encoding: "utf8",
|
||||
timeout: 30_000,
|
||||
cwd: tmpDir,
|
||||
env: { ...process.env, GSD_NON_INTERACTIVE: "1" },
|
||||
});
|
||||
|
||||
const gsdDir = join(tmpDir, ".gsd");
|
||||
if (!existsSync(gsdDir)) {
|
||||
console.error(`.gsd directory not created in ${tmpDir}`);
|
||||
process.exit(1);
|
||||
}
|
||||
} finally {
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
16
tests/smoke/test-version.ts
Normal file
16
tests/smoke/test-version.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
import { execFileSync } from "child_process";
|
||||
|
||||
const binary = process.env.GSD_SMOKE_BINARY || "npx";
|
||||
const args = process.env.GSD_SMOKE_BINARY
|
||||
? ["--version"]
|
||||
: ["gsd-pi", "--version"];
|
||||
|
||||
const output = execFileSync(binary, args, {
|
||||
encoding: "utf8",
|
||||
timeout: 30_000,
|
||||
}).trim();
|
||||
|
||||
if (!/^\d+\.\d+\.\d+/.test(output)) {
|
||||
console.error(`Version output does not match expected pattern: "${output}"`);
|
||||
process.exit(1);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue