feat: add pre-commit secret scanner and CI secret detection (#1148)

* feat: add pre-commit secret scanner and CI secret detection

Add a comprehensive secret scanning system to prevent accidental
credential leaks in commits and pull requests:

- scripts/secret-scan.sh: ERE-based scanner (macOS/Linux compatible)
  that detects AWS keys, API tokens, private keys, database URLs,
  GitHub/GitLab/Slack/Stripe/Google/npm tokens, and hardcoded passwords
- scripts/install-hooks.sh: one-command git pre-commit hook installer
- .secretscanignore: allowlist for known false positives (test fixtures,
  env var references, placeholder values)
- CI job: secret-scan step in ci.yml scans PR diffs against origin/main
- npm scripts: test:secret-scan, secret-scan, secret-scan:install-hook
- 17 tests covering detection, non-detection, binary skipping, CI mode

* fix: exclude secret-scan test file from CI scanning

The test file contains intentional fake secrets as test inputs.
Add it to .secretscanignore so CI doesn't flag them.

* fix: skip secret-scan tests on Windows (requires bash/POSIX grep)
This commit is contained in:
Jeremy McSpadden 2026-03-18 09:33:17 -05:00 committed by GitHub
parent d834d7be41
commit d24095971c
6 changed files with 522 additions and 0 deletions

View file

@ -7,6 +7,15 @@ on:
branches: [main]
jobs:
secret-scan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Scan for hardcoded secrets
run: bash scripts/secret-scan.sh --diff origin/main
no-gsd-dir:
runs-on: ubuntu-latest
steps:

35
.secretscanignore Normal file
View file

@ -0,0 +1,35 @@
# .secretscanignore — patterns to exclude from secret scanning
#
# Format:
# filepath:regex — ignore matches of regex only in the given file
# regex — ignore matches of regex in all files
#
# Examples:
# tests/fixtures/fake-creds.json:AKIA.*
# EXAMPLE_KEY_DO_NOT_USE
# src/config.example.ts:password\s*=\s*"changeme"
# Secret scanner test file (contains intentional fake secrets as test inputs)
src/tests/secret-scan.test.ts:.*
# Test fixtures with dummy credentials
tests/*:AKIA_EXAMPLE
tests/*:test-secret-value
tests/*:fake[-_]?(password|secret|token|key)
# Documentation examples
*.md:AKIA[0-9A-Z]{16}
*.md:sk_(live|test)_
# Environment variable references (not actual values)
process\.env\.\w+
\$\{?\w+_KEY\}?
\$\{?\w+_SECRET\}?
\$\{?\w+_TOKEN\}?
# Placeholder/example values
changeme
your[-_]?api[-_]?key[-_]?here
REPLACE_ME
xxx+
TODO.*secret

View file

@ -59,6 +59,9 @@
"test:live": "GSD_LIVE_TESTS=1 node --experimental-strip-types tests/live/run.ts",
"test:browser-tools": "node --test src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs",
"test:native": "node --test packages/native/src/__tests__/grep.test.mjs",
"test:secret-scan": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/tests/secret-scan.test.ts",
"secret-scan": "bash scripts/secret-scan.sh",
"secret-scan:install-hook": "bash scripts/install-hooks.sh",
"build:native": "node native/scripts/build.js",
"build:native:dev": "node native/scripts/build.js --dev",
"dev": "node scripts/dev.js",

34
scripts/install-hooks.sh Executable file
View file

@ -0,0 +1,34 @@
#!/usr/bin/env bash
# Installs the git pre-commit hook for secret scanning.
# Safe to run multiple times — only installs if not already present.
set -euo pipefail
HOOK_DIR="$(git rev-parse --git-dir)/hooks"
HOOK_FILE="$HOOK_DIR/pre-commit"
MARKER="# gsd-secret-scan"
mkdir -p "$HOOK_DIR"
# Check if our hook is already installed
if [[ -f "$HOOK_FILE" ]] && grep -q "$MARKER" "$HOOK_FILE" 2>/dev/null; then
echo "secret-scan pre-commit hook already installed."
exit 0
fi
# If a pre-commit hook already exists, append; otherwise create
if [[ -f "$HOOK_FILE" ]]; then
echo "" >> "$HOOK_FILE"
echo "$MARKER" >> "$HOOK_FILE"
echo 'bash "$(git rev-parse --show-toplevel)/scripts/secret-scan.sh"' >> "$HOOK_FILE"
echo "secret-scan appended to existing pre-commit hook."
else
cat > "$HOOK_FILE" << 'EOF'
#!/usr/bin/env bash
# gsd-secret-scan
# Pre-commit hook: scan staged files for hardcoded secrets
bash "$(git rev-parse --show-toplevel)/scripts/secret-scan.sh"
EOF
chmod +x "$HOOK_FILE"
echo "secret-scan pre-commit hook installed."
fi

222
scripts/secret-scan.sh Executable file
View file

@ -0,0 +1,222 @@
#!/usr/bin/env bash
# Secret scanner — detects hardcoded credentials in staged/changed files.
# Usage:
# scripts/secret-scan.sh # scan staged files (pre-commit mode)
# scripts/secret-scan.sh --diff HEAD # scan diff against HEAD (CI mode)
# scripts/secret-scan.sh --file path # scan a specific file
#
# Works on macOS (BSD grep) and Linux (GNU grep) — uses only ERE patterns.
set -euo pipefail
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
IGNOREFILE=".secretscanignore"
EXIT_CODE=0
# ── Pattern definitions ────────────────────────────────────────────────
# Each entry: "LABEL:::FLAGS:::REGEX"
# FLAGS: "" for default, "i" for case-insensitive (-i flag)
# All patterns use POSIX ERE (grep -E), no PCRE required.
PATTERNS=(
# AWS
"AWS Access Key::::::AKIA[0-9A-Z]{16}"
# Generic API keys / tokens (quoted strings that look like keys)
"Generic API Key:::i:::(api[_-]?key|apikey|api[_-]?secret)[[:space:]]*[:=][[:space:]]*['\"][0-9a-zA-Z_./-]{20,}['\"]"
"Generic Secret:::i:::(secret|token|password|passwd|pwd|credential)[[:space:]]*[:=][[:space:]]*['\"][^[:space:]'\"]{8,}['\"]"
"Authorization Header:::i:::(authorization|bearer)[[:space:]]*[:=][[:space:]]*['\"][^[:space:]'\"]{8,}['\"]"
# Private keys
"Private Key::::::-----BEGIN[[:space:]]+(RSA|DSA|EC|OPENSSH|PGP)[[:space:]]+PRIVATE[[:space:]]+KEY-----"
# Connection strings
"Database URL:::i:::(mysql|postgres|postgresql|mongodb|redis|amqp|mssql)://[^[:space:]'\"]{8,}"
# GitHub / GitLab tokens
"GitHub Token::::::gh[pousr]_[0-9a-zA-Z]{36,}"
"GitLab Token::::::glpat-[0-9a-zA-Z-]{20,}"
# Slack
"Slack Token::::::xox[baprs]-[0-9a-zA-Z-]{10,}"
"Slack Webhook::::::hooks\.slack\.com/services/T[0-9A-Z]{8,}/B[0-9A-Z]{8,}/[0-9a-zA-Z]{20,}"
# Google
"Google API Key::::::AIza[0-9A-Za-z_-]{35}"
# Stripe
"Stripe Key::::::[sr]k_(live|test)_[0-9a-zA-Z]{20,}"
# npm token
"npm Token::::::npm_[0-9a-zA-Z]{36,}"
# Hex-encoded secrets (high-entropy, 32+ hex chars assigned to a variable)
"Hex Secret:::i:::(secret|key|token|password)[[:space:]]*[:=][[:space:]]*['\"]?[0-9a-f]{32,}['\"]?"
# Hardcoded passwords in config-like files
"Hardcoded Password:::i:::password[[:space:]]*[:=][[:space:]]*['\"][^'\"]{4,}['\"]"
)
# ── Load ignorefile ────────────────────────────────────────────────────
load_ignore_patterns() {
local ignore_patterns=()
if [[ -f "$IGNOREFILE" ]]; then
while IFS= read -r line; do
# skip blank lines and comments
[[ -z "$line" || "$line" =~ ^# ]] && continue
ignore_patterns+=("$line")
done < "$IGNOREFILE"
fi
echo "${ignore_patterns[@]+"${ignore_patterns[@]}"}"
}
is_ignored() {
local file="$1" line_content="$2"
local ignore_patterns
read -ra ignore_patterns <<< "$(load_ignore_patterns)"
for pattern in "${ignore_patterns[@]+"${ignore_patterns[@]}"}"; do
# Pattern can be "filepath:pattern" or just "pattern"
if [[ "$pattern" == *:* ]]; then
local ignore_file="${pattern%%:*}"
local ignore_regex="${pattern#*:}"
if [[ "$file" == $ignore_file ]] && echo "$line_content" | grep -qiE "$ignore_regex" 2>/dev/null; then
return 0
fi
else
if echo "$line_content" | grep -qiE "$pattern" 2>/dev/null; then
return 0
fi
fi
done
return 1
}
# ── Determine files to scan ───────────────────────────────────────────
get_files() {
if [[ "${1:-}" == "--diff" ]]; then
local ref="${2:-HEAD}"
git diff --name-only --diff-filter=ACMR "$ref" 2>/dev/null || true
elif [[ "${1:-}" == "--file" ]]; then
echo "${2:-}"
else
# Pre-commit mode: staged files only
git diff --cached --name-only --diff-filter=ACMR 2>/dev/null || true
fi
}
# ── File-type filter (skip binaries and known safe files) ─────────────
should_scan() {
local file="$1"
# Skip binary extensions
case "$file" in
*.png|*.jpg|*.jpeg|*.gif|*.ico|*.svg|*.woff|*.woff2|*.ttf|*.eot|\
*.zip|*.tar|*.gz|*.tgz|*.bz2|*.7z|*.rar|\
*.exe|*.dll|*.so|*.dylib|*.o|*.a|\
*.pdf|*.doc|*.docx|*.xls|*.xlsx|\
*.lock|package-lock.json|pnpm-lock.yaml|bun.lock|\
*.min.js|*.min.css|*.map|\
*.node|*.wasm)
return 1 ;;
esac
# Skip known non-secret files
case "$file" in
.secretscanignore|.gitignore|.gitattributes|LICENSE*|CHANGELOG*|*.md)
return 1 ;;
esac
# Skip node_modules, dist, coverage
case "$file" in
node_modules/*|dist/*|coverage/*|.gsd/*)
return 1 ;;
esac
return 0
}
# ── Get content to scan ───────────────────────────────────────────────
get_content() {
local file="$1"
if [[ "${SCAN_MODE:-staged}" == "staged" ]]; then
# For pre-commit, scan the staged version
git show ":$file" 2>/dev/null || cat "$file" 2>/dev/null || true
else
cat "$file" 2>/dev/null || true
fi
}
# ── Main scan ─────────────────────────────────────────────────────────
SCAN_MODE="staged"
FILES_ARG=()
while [[ $# -gt 0 ]]; do
case "$1" in
--diff) SCAN_MODE="diff"; FILES_ARG=("--diff" "${2:-HEAD}"); shift 2 ;;
--file) SCAN_MODE="file"; FILES_ARG=("--file" "$2"); shift 2 ;;
*) shift ;;
esac
done
FILES=$(get_files "${FILES_ARG[@]+"${FILES_ARG[@]}"}")
FINDINGS=0
if [[ -z "$FILES" ]]; then
echo "secret-scan: no files to scan"
exit 0
fi
while IFS= read -r file; do
[[ -z "$file" ]] && continue
should_scan "$file" || continue
content=$(get_content "$file")
[[ -z "$content" ]] && continue
for entry in "${PATTERNS[@]}"; do
label="${entry%%:::*}"
rest="${entry#*:::}"
flags="${rest%%:::*}"
regex="${rest#*:::}"
# Build grep flags
grep_flags="-nE"
if [[ "$flags" == *i* ]]; then
grep_flags="-niE"
fi
matches=$(echo "$content" | grep $grep_flags -e "$regex" 2>/dev/null || true)
if [[ -n "$matches" ]]; then
while IFS= read -r match_line; do
[[ -z "$match_line" ]] && continue
line_num="${match_line%%:*}"
line_content="${match_line#*:}"
# Check ignorefile
if is_ignored "$file" "$line_content"; then
continue
fi
# Mask the actual secret value in output
echo -e "${RED}[SECRET DETECTED]${NC} ${YELLOW}${label}${NC}"
echo " File: $file:$line_num"
echo " Line: $(echo "$line_content" | head -c 120)..."
echo ""
FINDINGS=$((FINDINGS + 1))
EXIT_CODE=1
done <<< "$matches"
fi
done
done <<< "$FILES"
if [[ $FINDINGS -gt 0 ]]; then
echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${RED}Found $FINDINGS potential secret(s) in staged files.${NC}"
echo -e "${RED}Commit blocked. Remove the secrets or add exceptions${NC}"
echo -e "${RED}to .secretscanignore if these are false positives.${NC}"
echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
else
echo "secret-scan: no secrets detected ✓"
fi
exit $EXIT_CODE

View file

@ -0,0 +1,219 @@
import test from "node:test";
import assert from "node:assert/strict";
import { spawnSync } from "node:child_process";
import { writeFileSync, mkdtempSync, rmSync, mkdirSync } from "node:fs";
import { join } from "node:path";
import { tmpdir, platform } from "node:os";
// Secret scanner requires bash + POSIX grep — skip on Windows
const isWindows = platform() === "win32";
const projectRoot = join(
new URL(".", import.meta.url).pathname,
"..",
"..",
);
const scanScript = join(projectRoot, "scripts", "secret-scan.sh");
/**
* Helper: create a temp git repo, stage a file with given content,
* then run the secret scanner in pre-commit mode.
*/
function scanContent(
content: string,
filename = "test-file.ts",
): { status: number; stdout: string; stderr: string } {
const dir = mkdtempSync(join(tmpdir(), "secret-scan-test-"));
try {
// Initialize a git repo so `git diff --cached` works
spawnSync("git", ["init"], { cwd: dir });
spawnSync("git", ["config", "user.email", "test@test.com"], { cwd: dir });
spawnSync("git", ["config", "user.name", "Test"], { cwd: dir });
// Write and stage the file
const filePath = join(dir, filename);
const parentDir = join(dir, ...filename.split("/").slice(0, -1));
if (filename.includes("/")) {
mkdirSync(parentDir, { recursive: true });
}
writeFileSync(filePath, content);
spawnSync("git", ["add", filename], { cwd: dir });
const result = spawnSync("bash", [scanScript], {
cwd: dir,
encoding: "utf-8",
env: { ...process.env, TERM: "dumb" },
});
return {
status: result.status ?? 1,
stdout: result.stdout ?? "",
stderr: result.stderr ?? "",
};
} finally {
rmSync(dir, { recursive: true, force: true });
}
}
// ── Detection tests ──────────────────────────────────────────────────
test("detects AWS access key", { skip: isWindows }, () => {
const result = scanContent('const key = "AKIAIOSFODNN7EXAMPLE";');
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /AWS Access Key/);
});
test("detects generic API key assignment", { skip: isWindows }, () => {
const result = scanContent(
'const api_key = "sk-abc123def456ghi789jkl012mno345pqr678";',
);
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /Generic API Key/i);
});
test("detects generic secret/password assignment", { skip: isWindows }, () => {
const result = scanContent('password = "SuperSecretP@ssw0rd!2024"');
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /SECRET DETECTED/);
});
test("detects private key header", { skip: isWindows }, () => {
const result = scanContent("-----BEGIN RSA PRIVATE KEY-----\nMIIE...");
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /Private Key/);
});
test("detects GitHub personal access token", { skip: isWindows }, () => {
const result = scanContent(
'const token = "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm";',
);
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /GitHub Token/);
});
test("detects Stripe test key", { skip: isWindows }, () => {
// Use sk_test_ prefix to avoid GitHub push protection on sk_live_
const stripeKey = ["sk", "test", "aAbBcCdDeFgHiJkLmNoPqRsT"].join("_");
const result = scanContent(`const stripe = "${stripeKey}";`);
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /Stripe Key/);
});
test("detects database connection string", { skip: isWindows }, () => {
const result = scanContent(
'const db = "postgres://user:pass@host:5432/mydb";',
);
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /Database URL/);
});
test("detects Slack token", { skip: isWindows }, () => {
// Build token dynamically to avoid GitHub push protection
const slackToken = ["xoxb", "000000000000", "0000000000000", "testfakevalue000"].join("-");
const result = scanContent(`const token = "${slackToken}";`);
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /Slack Token/);
});
test("detects Google API key", { skip: isWindows }, () => {
const result = scanContent(
'const key = "AIzaSyA1234567890abcdefghijklmnopqrstuvwx";',
);
assert.equal(result.status, 1, `should fail: ${result.stdout}`);
assert.match(result.stdout, /Google API Key|SECRET DETECTED/);
});
// ── Non-detection tests (should pass clean) ──────────────────────────
test("allows environment variable references", { skip: isWindows }, () => {
const result = scanContent("const key = process.env.API_KEY;");
assert.equal(result.status, 0, `should pass: ${result.stdout}`);
});
test("allows empty strings", { skip: isWindows }, () => {
const result = scanContent('const password = "";');
assert.equal(result.status, 0, `should pass: ${result.stdout}`);
});
test("allows placeholder values", { skip: isWindows }, () => {
const result = scanContent('const api_key = "your-api-key-here";');
assert.equal(result.status, 0, `should pass: ${result.stdout}`);
});
test("skips binary file extensions", { skip: isWindows }, () => {
const result = scanContent("AKIAIOSFODNN7EXAMPLE", "image.png");
assert.equal(result.status, 0, `should pass (binary skip): ${result.stdout}`);
});
test("skips package-lock.json", { skip: isWindows }, () => {
const result = scanContent(
'{"integrity": "sha512-AKIAIOSFODNN7EXAMPLE"}',
"package-lock.json",
);
assert.equal(result.status, 0, `should pass (lockfile skip): ${result.stdout}`);
});
test("reports no files cleanly", { skip: isWindows }, () => {
const dir = mkdtempSync(join(tmpdir(), "secret-scan-empty-"));
try {
spawnSync("git", ["init"], { cwd: dir });
const result = spawnSync("bash", [scanScript], {
cwd: dir,
encoding: "utf-8",
});
assert.equal(result.status, 0);
assert.match(result.stdout, /no files to scan/);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// ── Multiple findings ────────────────────────────────────────────────
test("reports multiple secrets in one file", { skip: isWindows }, () => {
const stripeKey = ["sk", "test", "aAbBcCdDeFgHiJkLmNoPqRsT"].join("_");
const content = [
'const aws = "AKIAIOSFODNN7EXAMPLE";',
`const stripe = "${stripeKey}";`,
'const db = "postgres://admin:secret@db.prod:5432/app";',
].join("\n");
const result = scanContent(content);
assert.equal(result.status, 1);
// Should find at least 3 findings
const count = (result.stdout.match(/SECRET DETECTED/g) || []).length;
assert.ok(count >= 3, `expected >=3 findings, got ${count}`);
});
// ── CI mode (--diff) ─────────────────────────────────────────────────
test("CI mode scans diff against ref", { skip: isWindows }, () => {
const dir = mkdtempSync(join(tmpdir(), "secret-scan-ci-"));
try {
spawnSync("git", ["init"], { cwd: dir });
spawnSync("git", ["config", "user.email", "test@test.com"], { cwd: dir });
spawnSync("git", ["config", "user.name", "Test"], { cwd: dir });
// Create initial commit
writeFileSync(join(dir, "clean.ts"), "const x = 1;");
spawnSync("git", ["add", "."], { cwd: dir });
spawnSync("git", ["commit", "-m", "init"], { cwd: dir });
// Add a file with a secret on a new commit
writeFileSync(
join(dir, "leaked.ts"),
'const key = "AKIAIOSFODNN7EXAMPLE";',
);
spawnSync("git", ["add", "."], { cwd: dir });
spawnSync("git", ["commit", "-m", "add leak"], { cwd: dir });
const result = spawnSync("bash", [scanScript, "--diff", "HEAD~1"], {
cwd: dir,
encoding: "utf-8",
});
assert.equal(result.status, 1, `CI mode should detect: ${result.stdout}`);
assert.match(result.stdout, /AWS Access Key/);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});