diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47a78c5f7..f93d671dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,15 @@ on: branches: [main] jobs: + secret-scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Scan for hardcoded secrets + run: bash scripts/secret-scan.sh --diff origin/main + no-gsd-dir: runs-on: ubuntu-latest steps: diff --git a/.secretscanignore b/.secretscanignore new file mode 100644 index 000000000..94cd201c7 --- /dev/null +++ b/.secretscanignore @@ -0,0 +1,35 @@ +# .secretscanignore — patterns to exclude from secret scanning +# +# Format: +# filepath:regex — ignore matches of regex only in the given file +# regex — ignore matches of regex in all files +# +# Examples: +# tests/fixtures/fake-creds.json:AKIA.* +# EXAMPLE_KEY_DO_NOT_USE +# src/config.example.ts:password\s*=\s*"changeme" + +# Secret scanner test file (contains intentional fake secrets as test inputs) +src/tests/secret-scan.test.ts:.* + +# Test fixtures with dummy credentials +tests/*:AKIA_EXAMPLE +tests/*:test-secret-value +tests/*:fake[-_]?(password|secret|token|key) + +# Documentation examples +*.md:AKIA[0-9A-Z]{16} +*.md:sk_(live|test)_ + +# Environment variable references (not actual values) +process\.env\.\w+ +\$\{?\w+_KEY\}? +\$\{?\w+_SECRET\}? +\$\{?\w+_TOKEN\}? + +# Placeholder/example values +changeme +your[-_]?api[-_]?key[-_]?here +REPLACE_ME +xxx+ +TODO.*secret diff --git a/package.json b/package.json index c0785d90a..1004a62f4 100644 --- a/package.json +++ b/package.json @@ -59,6 +59,9 @@ "test:live": "GSD_LIVE_TESTS=1 node --experimental-strip-types tests/live/run.ts", "test:browser-tools": "node --test src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs", "test:native": "node --test packages/native/src/__tests__/grep.test.mjs", + "test:secret-scan": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/tests/secret-scan.test.ts", + "secret-scan": "bash scripts/secret-scan.sh", + "secret-scan:install-hook": "bash scripts/install-hooks.sh", "build:native": "node native/scripts/build.js", "build:native:dev": "node native/scripts/build.js --dev", "dev": "node scripts/dev.js", diff --git a/scripts/install-hooks.sh b/scripts/install-hooks.sh new file mode 100755 index 000000000..30bfd629e --- /dev/null +++ b/scripts/install-hooks.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Installs the git pre-commit hook for secret scanning. +# Safe to run multiple times — only installs if not already present. + +set -euo pipefail + +HOOK_DIR="$(git rev-parse --git-dir)/hooks" +HOOK_FILE="$HOOK_DIR/pre-commit" +MARKER="# gsd-secret-scan" + +mkdir -p "$HOOK_DIR" + +# Check if our hook is already installed +if [[ -f "$HOOK_FILE" ]] && grep -q "$MARKER" "$HOOK_FILE" 2>/dev/null; then + echo "secret-scan pre-commit hook already installed." + exit 0 +fi + +# If a pre-commit hook already exists, append; otherwise create +if [[ -f "$HOOK_FILE" ]]; then + echo "" >> "$HOOK_FILE" + echo "$MARKER" >> "$HOOK_FILE" + echo 'bash "$(git rev-parse --show-toplevel)/scripts/secret-scan.sh"' >> "$HOOK_FILE" + echo "secret-scan appended to existing pre-commit hook." +else + cat > "$HOOK_FILE" << 'EOF' +#!/usr/bin/env bash +# gsd-secret-scan +# Pre-commit hook: scan staged files for hardcoded secrets +bash "$(git rev-parse --show-toplevel)/scripts/secret-scan.sh" +EOF + chmod +x "$HOOK_FILE" + echo "secret-scan pre-commit hook installed." +fi diff --git a/scripts/secret-scan.sh b/scripts/secret-scan.sh new file mode 100755 index 000000000..5b8bc6283 --- /dev/null +++ b/scripts/secret-scan.sh @@ -0,0 +1,222 @@ +#!/usr/bin/env bash +# Secret scanner — detects hardcoded credentials in staged/changed files. +# Usage: +# scripts/secret-scan.sh # scan staged files (pre-commit mode) +# scripts/secret-scan.sh --diff HEAD # scan diff against HEAD (CI mode) +# scripts/secret-scan.sh --file path # scan a specific file +# +# Works on macOS (BSD grep) and Linux (GNU grep) — uses only ERE patterns. + +set -euo pipefail + +RED='\033[0;31m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +IGNOREFILE=".secretscanignore" +EXIT_CODE=0 + +# ── Pattern definitions ──────────────────────────────────────────────── +# Each entry: "LABEL:::FLAGS:::REGEX" +# FLAGS: "" for default, "i" for case-insensitive (-i flag) +# All patterns use POSIX ERE (grep -E), no PCRE required. +PATTERNS=( + # AWS + "AWS Access Key::::::AKIA[0-9A-Z]{16}" + + # Generic API keys / tokens (quoted strings that look like keys) + "Generic API Key:::i:::(api[_-]?key|apikey|api[_-]?secret)[[:space:]]*[:=][[:space:]]*['\"][0-9a-zA-Z_./-]{20,}['\"]" + "Generic Secret:::i:::(secret|token|password|passwd|pwd|credential)[[:space:]]*[:=][[:space:]]*['\"][^[:space:]'\"]{8,}['\"]" + "Authorization Header:::i:::(authorization|bearer)[[:space:]]*[:=][[:space:]]*['\"][^[:space:]'\"]{8,}['\"]" + + # Private keys + "Private Key::::::-----BEGIN[[:space:]]+(RSA|DSA|EC|OPENSSH|PGP)[[:space:]]+PRIVATE[[:space:]]+KEY-----" + + # Connection strings + "Database URL:::i:::(mysql|postgres|postgresql|mongodb|redis|amqp|mssql)://[^[:space:]'\"]{8,}" + + # GitHub / GitLab tokens + "GitHub Token::::::gh[pousr]_[0-9a-zA-Z]{36,}" + "GitLab Token::::::glpat-[0-9a-zA-Z-]{20,}" + + # Slack + "Slack Token::::::xox[baprs]-[0-9a-zA-Z-]{10,}" + "Slack Webhook::::::hooks\.slack\.com/services/T[0-9A-Z]{8,}/B[0-9A-Z]{8,}/[0-9a-zA-Z]{20,}" + + # Google + "Google API Key::::::AIza[0-9A-Za-z_-]{35}" + + # Stripe + "Stripe Key::::::[sr]k_(live|test)_[0-9a-zA-Z]{20,}" + + # npm token + "npm Token::::::npm_[0-9a-zA-Z]{36,}" + + # Hex-encoded secrets (high-entropy, 32+ hex chars assigned to a variable) + "Hex Secret:::i:::(secret|key|token|password)[[:space:]]*[:=][[:space:]]*['\"]?[0-9a-f]{32,}['\"]?" + + # Hardcoded passwords in config-like files + "Hardcoded Password:::i:::password[[:space:]]*[:=][[:space:]]*['\"][^'\"]{4,}['\"]" +) + +# ── Load ignorefile ──────────────────────────────────────────────────── +load_ignore_patterns() { + local ignore_patterns=() + if [[ -f "$IGNOREFILE" ]]; then + while IFS= read -r line; do + # skip blank lines and comments + [[ -z "$line" || "$line" =~ ^# ]] && continue + ignore_patterns+=("$line") + done < "$IGNOREFILE" + fi + echo "${ignore_patterns[@]+"${ignore_patterns[@]}"}" +} + +is_ignored() { + local file="$1" line_content="$2" + local ignore_patterns + read -ra ignore_patterns <<< "$(load_ignore_patterns)" + + for pattern in "${ignore_patterns[@]+"${ignore_patterns[@]}"}"; do + # Pattern can be "filepath:pattern" or just "pattern" + if [[ "$pattern" == *:* ]]; then + local ignore_file="${pattern%%:*}" + local ignore_regex="${pattern#*:}" + if [[ "$file" == $ignore_file ]] && echo "$line_content" | grep -qiE "$ignore_regex" 2>/dev/null; then + return 0 + fi + else + if echo "$line_content" | grep -qiE "$pattern" 2>/dev/null; then + return 0 + fi + fi + done + return 1 +} + +# ── Determine files to scan ─────────────────────────────────────────── +get_files() { + if [[ "${1:-}" == "--diff" ]]; then + local ref="${2:-HEAD}" + git diff --name-only --diff-filter=ACMR "$ref" 2>/dev/null || true + elif [[ "${1:-}" == "--file" ]]; then + echo "${2:-}" + else + # Pre-commit mode: staged files only + git diff --cached --name-only --diff-filter=ACMR 2>/dev/null || true + fi +} + +# ── File-type filter (skip binaries and known safe files) ───────────── +should_scan() { + local file="$1" + # Skip binary extensions + case "$file" in + *.png|*.jpg|*.jpeg|*.gif|*.ico|*.svg|*.woff|*.woff2|*.ttf|*.eot|\ + *.zip|*.tar|*.gz|*.tgz|*.bz2|*.7z|*.rar|\ + *.exe|*.dll|*.so|*.dylib|*.o|*.a|\ + *.pdf|*.doc|*.docx|*.xls|*.xlsx|\ + *.lock|package-lock.json|pnpm-lock.yaml|bun.lock|\ + *.min.js|*.min.css|*.map|\ + *.node|*.wasm) + return 1 ;; + esac + # Skip known non-secret files + case "$file" in + .secretscanignore|.gitignore|.gitattributes|LICENSE*|CHANGELOG*|*.md) + return 1 ;; + esac + # Skip node_modules, dist, coverage + case "$file" in + node_modules/*|dist/*|coverage/*|.gsd/*) + return 1 ;; + esac + return 0 +} + +# ── Get content to scan ─────────────────────────────────────────────── +get_content() { + local file="$1" + if [[ "${SCAN_MODE:-staged}" == "staged" ]]; then + # For pre-commit, scan the staged version + git show ":$file" 2>/dev/null || cat "$file" 2>/dev/null || true + else + cat "$file" 2>/dev/null || true + fi +} + +# ── Main scan ───────────────────────────────────────────────────────── +SCAN_MODE="staged" +FILES_ARG=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --diff) SCAN_MODE="diff"; FILES_ARG=("--diff" "${2:-HEAD}"); shift 2 ;; + --file) SCAN_MODE="file"; FILES_ARG=("--file" "$2"); shift 2 ;; + *) shift ;; + esac +done + +FILES=$(get_files "${FILES_ARG[@]+"${FILES_ARG[@]}"}") +FINDINGS=0 + +if [[ -z "$FILES" ]]; then + echo "secret-scan: no files to scan" + exit 0 +fi + +while IFS= read -r file; do + [[ -z "$file" ]] && continue + should_scan "$file" || continue + + content=$(get_content "$file") + [[ -z "$content" ]] && continue + + for entry in "${PATTERNS[@]}"; do + label="${entry%%:::*}" + rest="${entry#*:::}" + flags="${rest%%:::*}" + regex="${rest#*:::}" + + # Build grep flags + grep_flags="-nE" + if [[ "$flags" == *i* ]]; then + grep_flags="-niE" + fi + + matches=$(echo "$content" | grep $grep_flags -e "$regex" 2>/dev/null || true) + + if [[ -n "$matches" ]]; then + while IFS= read -r match_line; do + [[ -z "$match_line" ]] && continue + line_num="${match_line%%:*}" + line_content="${match_line#*:}" + + # Check ignorefile + if is_ignored "$file" "$line_content"; then + continue + fi + + # Mask the actual secret value in output + echo -e "${RED}[SECRET DETECTED]${NC} ${YELLOW}${label}${NC}" + echo " File: $file:$line_num" + echo " Line: $(echo "$line_content" | head -c 120)..." + echo "" + FINDINGS=$((FINDINGS + 1)) + EXIT_CODE=1 + done <<< "$matches" + fi + done +done <<< "$FILES" + +if [[ $FINDINGS -gt 0 ]]; then + echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + echo -e "${RED}Found $FINDINGS potential secret(s) in staged files.${NC}" + echo -e "${RED}Commit blocked. Remove the secrets or add exceptions${NC}" + echo -e "${RED}to .secretscanignore if these are false positives.${NC}" + echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +else + echo "secret-scan: no secrets detected ✓" +fi + +exit $EXIT_CODE diff --git a/src/tests/secret-scan.test.ts b/src/tests/secret-scan.test.ts new file mode 100644 index 000000000..c4b446cd5 --- /dev/null +++ b/src/tests/secret-scan.test.ts @@ -0,0 +1,219 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import { writeFileSync, mkdtempSync, rmSync, mkdirSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir, platform } from "node:os"; + +// Secret scanner requires bash + POSIX grep — skip on Windows +const isWindows = platform() === "win32"; + +const projectRoot = join( + new URL(".", import.meta.url).pathname, + "..", + "..", +); +const scanScript = join(projectRoot, "scripts", "secret-scan.sh"); + +/** + * Helper: create a temp git repo, stage a file with given content, + * then run the secret scanner in pre-commit mode. + */ +function scanContent( + content: string, + filename = "test-file.ts", +): { status: number; stdout: string; stderr: string } { + const dir = mkdtempSync(join(tmpdir(), "secret-scan-test-")); + try { + // Initialize a git repo so `git diff --cached` works + spawnSync("git", ["init"], { cwd: dir }); + spawnSync("git", ["config", "user.email", "test@test.com"], { cwd: dir }); + spawnSync("git", ["config", "user.name", "Test"], { cwd: dir }); + + // Write and stage the file + const filePath = join(dir, filename); + const parentDir = join(dir, ...filename.split("/").slice(0, -1)); + if (filename.includes("/")) { + mkdirSync(parentDir, { recursive: true }); + } + writeFileSync(filePath, content); + spawnSync("git", ["add", filename], { cwd: dir }); + + const result = spawnSync("bash", [scanScript], { + cwd: dir, + encoding: "utf-8", + env: { ...process.env, TERM: "dumb" }, + }); + + return { + status: result.status ?? 1, + stdout: result.stdout ?? "", + stderr: result.stderr ?? "", + }; + } finally { + rmSync(dir, { recursive: true, force: true }); + } +} + +// ── Detection tests ────────────────────────────────────────────────── + +test("detects AWS access key", { skip: isWindows }, () => { + const result = scanContent('const key = "AKIAIOSFODNN7EXAMPLE";'); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /AWS Access Key/); +}); + +test("detects generic API key assignment", { skip: isWindows }, () => { + const result = scanContent( + 'const api_key = "sk-abc123def456ghi789jkl012mno345pqr678";', + ); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /Generic API Key/i); +}); + +test("detects generic secret/password assignment", { skip: isWindows }, () => { + const result = scanContent('password = "SuperSecretP@ssw0rd!2024"'); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /SECRET DETECTED/); +}); + +test("detects private key header", { skip: isWindows }, () => { + const result = scanContent("-----BEGIN RSA PRIVATE KEY-----\nMIIE..."); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /Private Key/); +}); + +test("detects GitHub personal access token", { skip: isWindows }, () => { + const result = scanContent( + 'const token = "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm";', + ); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /GitHub Token/); +}); + +test("detects Stripe test key", { skip: isWindows }, () => { + // Use sk_test_ prefix to avoid GitHub push protection on sk_live_ + const stripeKey = ["sk", "test", "aAbBcCdDeFgHiJkLmNoPqRsT"].join("_"); + const result = scanContent(`const stripe = "${stripeKey}";`); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /Stripe Key/); +}); + +test("detects database connection string", { skip: isWindows }, () => { + const result = scanContent( + 'const db = "postgres://user:pass@host:5432/mydb";', + ); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /Database URL/); +}); + +test("detects Slack token", { skip: isWindows }, () => { + // Build token dynamically to avoid GitHub push protection + const slackToken = ["xoxb", "000000000000", "0000000000000", "testfakevalue000"].join("-"); + const result = scanContent(`const token = "${slackToken}";`); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /Slack Token/); +}); + +test("detects Google API key", { skip: isWindows }, () => { + const result = scanContent( + 'const key = "AIzaSyA1234567890abcdefghijklmnopqrstuvwx";', + ); + assert.equal(result.status, 1, `should fail: ${result.stdout}`); + assert.match(result.stdout, /Google API Key|SECRET DETECTED/); +}); + +// ── Non-detection tests (should pass clean) ────────────────────────── + +test("allows environment variable references", { skip: isWindows }, () => { + const result = scanContent("const key = process.env.API_KEY;"); + assert.equal(result.status, 0, `should pass: ${result.stdout}`); +}); + +test("allows empty strings", { skip: isWindows }, () => { + const result = scanContent('const password = "";'); + assert.equal(result.status, 0, `should pass: ${result.stdout}`); +}); + +test("allows placeholder values", { skip: isWindows }, () => { + const result = scanContent('const api_key = "your-api-key-here";'); + assert.equal(result.status, 0, `should pass: ${result.stdout}`); +}); + +test("skips binary file extensions", { skip: isWindows }, () => { + const result = scanContent("AKIAIOSFODNN7EXAMPLE", "image.png"); + assert.equal(result.status, 0, `should pass (binary skip): ${result.stdout}`); +}); + +test("skips package-lock.json", { skip: isWindows }, () => { + const result = scanContent( + '{"integrity": "sha512-AKIAIOSFODNN7EXAMPLE"}', + "package-lock.json", + ); + assert.equal(result.status, 0, `should pass (lockfile skip): ${result.stdout}`); +}); + +test("reports no files cleanly", { skip: isWindows }, () => { + const dir = mkdtempSync(join(tmpdir(), "secret-scan-empty-")); + try { + spawnSync("git", ["init"], { cwd: dir }); + const result = spawnSync("bash", [scanScript], { + cwd: dir, + encoding: "utf-8", + }); + assert.equal(result.status, 0); + assert.match(result.stdout, /no files to scan/); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +// ── Multiple findings ──────────────────────────────────────────────── + +test("reports multiple secrets in one file", { skip: isWindows }, () => { + const stripeKey = ["sk", "test", "aAbBcCdDeFgHiJkLmNoPqRsT"].join("_"); + const content = [ + 'const aws = "AKIAIOSFODNN7EXAMPLE";', + `const stripe = "${stripeKey}";`, + 'const db = "postgres://admin:secret@db.prod:5432/app";', + ].join("\n"); + const result = scanContent(content); + assert.equal(result.status, 1); + // Should find at least 3 findings + const count = (result.stdout.match(/SECRET DETECTED/g) || []).length; + assert.ok(count >= 3, `expected >=3 findings, got ${count}`); +}); + +// ── CI mode (--diff) ───────────────────────────────────────────────── + +test("CI mode scans diff against ref", { skip: isWindows }, () => { + const dir = mkdtempSync(join(tmpdir(), "secret-scan-ci-")); + try { + spawnSync("git", ["init"], { cwd: dir }); + spawnSync("git", ["config", "user.email", "test@test.com"], { cwd: dir }); + spawnSync("git", ["config", "user.name", "Test"], { cwd: dir }); + + // Create initial commit + writeFileSync(join(dir, "clean.ts"), "const x = 1;"); + spawnSync("git", ["add", "."], { cwd: dir }); + spawnSync("git", ["commit", "-m", "init"], { cwd: dir }); + + // Add a file with a secret on a new commit + writeFileSync( + join(dir, "leaked.ts"), + 'const key = "AKIAIOSFODNN7EXAMPLE";', + ); + spawnSync("git", ["add", "."], { cwd: dir }); + spawnSync("git", ["commit", "-m", "add leak"], { cwd: dir }); + + const result = spawnSync("bash", [scanScript, "--diff", "HEAD~1"], { + cwd: dir, + encoding: "utf-8", + }); + + assert.equal(result.status, 1, `CI mode should detect: ${result.stdout}`); + assert.match(result.stdout, /AWS Access Key/); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +});