feat(ci): skip build/test for docs-only PRs and add prompt injection scan (#1699)

Docs-only PRs (only .md files and docs/ changes) now skip the expensive build, typecheck, and test jobs while still running lint and a new docs-check job. The docs-check job runs a prompt injection scanner that detects hidden directives, role overrides, system prompt markers, tool call injection, and invisible Unicode in markdown prose (excluding fenced code blocks and inline code spans). Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 10:39:03 -04:00 · 2026-03-21 10:39:03 -04:00 · 55d6c7d9f1
commit 55d6c7d9f1
parent 7385cf4bb8
3 changed files with 285 additions and 5 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -4,8 +4,6 @@ on:
  push:
    branches: [main]
    paths-ignore:
-      - '**.md'
-      - 'docs/**'
      - '.github/workflows/ai-triage.yml'
      - '.github/workflows/build-native.yml'
      - '.github/workflows/cleanup-dev-versions.yml'
@ -14,8 +12,6 @@ on:
  pull_request:
    branches: [main]
    paths-ignore:
-      - '**.md'
-      - 'docs/**'
      - '.github/workflows/ai-triage.yml'
      - '.github/workflows/build-native.yml'
      - '.github/workflows/cleanup-dev-versions.yml'
@ -27,7 +23,54 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      docs-only: ${{ steps.check.outputs.docs-only }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - name: Check if only documentation changed
+        id: check
+        env:
+          PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
+          PUSH_BEFORE_SHA: ${{ github.event.before }}
+          EVENT_NAME: ${{ github.event_name }}
+          HEAD_SHA: ${{ github.sha }}
+        run: |
+          if [ "$EVENT_NAME" = "pull_request" ]; then
+            BASE="$PR_BASE_SHA"
+          else
+            BASE="$PUSH_BEFORE_SHA"
+          fi
+          FILES=$(git diff --name-only "$BASE" "$HEAD_SHA" 2>/dev/null || git diff --name-only HEAD~1)
+          echo "Changed files:"
+          echo "$FILES"
+          NON_DOCS=$(echo "$FILES" | grep -vE '\.(md|markdown)$' | grep -vE '^docs/' | grep -vE '^LICENSE$' || true)
+          if [ -z "$NON_DOCS" ]; then
+            echo "docs-only=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Only documentation files changed — skipping build/test"
+          else
+            echo "docs-only=false" >> "$GITHUB_OUTPUT"
+            echo "Non-docs files changed:"
+            echo "$NON_DOCS"
+          fi
+
+  docs-check:
+    runs-on: ubuntu-latest
+    needs: detect-changes
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - name: Scan documentation for prompt injection
+        run: bash scripts/docs-prompt-injection-scan.sh --diff origin/main
+
  lint:
+    needs: detect-changes
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
@ -53,6 +96,8 @@ jobs:
        run: node scripts/check-skill-references.mjs

  build:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.docs-only != 'true'
    runs-on: ubuntu-latest

    steps:
@ -86,7 +131,10 @@ jobs:
        run: npm run test:integration

  windows-portability:
-    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+    needs: detect-changes
+    if: >-
+      needs.detect-changes.outputs.docs-only != 'true' &&
+      github.event_name == 'push' && github.ref == 'refs/heads/main'
    runs-on: windows-latest

    steps:
--- a/docs/ci-cd-pipeline.md
+++ b/docs/ci-cd-pipeline.md
@ -70,6 +70,29 @@ docker run --rm -v $(pwd):/workspace ghcr.io/gsd-build/gsd-pi:latest --version

 **CI optimization (v2.38):** GitHub Actions minutes were reduced ~60-70% (~10k → ~3-4k/month) through workflow consolidation and caching improvements.

+### Docs-Only PR Detection (v2.41)
+
+CI automatically detects when a PR contains only documentation changes (`.md` files and `docs/` content). When docs-only:
+
+- **Skipped:** `build`, `windows-portability` (no code to compile or test)
+- **Still runs:** `lint` (secret scanning, `.gsd/` check), `docs-check` (prompt injection scan)
+
+This saves CI minutes on documentation PRs while still enforcing security checks.
+
+### Prompt Injection Scan (v2.41)
+
+The `docs-check` job runs `scripts/docs-prompt-injection-scan.sh` on every PR that touches markdown files. It scans documentation prose (excluding fenced code blocks) for patterns that could manipulate LLM behavior when docs are ingested as context:
+
+- **System prompt markers** — `<system-prompt>`, `<|im_start|>system`, `[SYSTEM]:`
+- **Role/instruction overrides** — `ignore previous instructions`, `you are now`, `new instructions:`
+- **Hidden HTML directives** — `<!-- PROMPT:`, `<!-- INSTRUCTION:`
+- **Tool call injection** — `<tool_call>`, `<function_call>`, `<invoke`
+- **Invisible Unicode** — zero-width character sequences that hide directives
+
+Content inside fenced code blocks (` ``` `) is excluded — patterns in code examples are expected and legitimate.
+
+**False positives:** Add exceptions to `.prompt-injection-scanignore` using the same format as `.secretscanignore` (one pattern per line, `file:regex` for file-scoped exceptions).
+
 ### Gating Tests

 The pipeline only triggers after `ci.yml` passes. Key gating tests include:
--- a/scripts/docs-prompt-injection-scan.sh
+++ b/scripts/docs-prompt-injection-scan.sh
@ -0,0 +1,209 @@
+#!/usr/bin/env bash
+# Scan markdown documentation for prompt injection patterns.
+# Designed to catch hidden directives, role overrides, and system prompt
+# markers that could influence LLM behavior when docs are ingested as context.
+#
+# Usage:
+#   bash scripts/docs-prompt-injection-scan.sh                  # scan staged .md files
+#   bash scripts/docs-prompt-injection-scan.sh --diff origin/main  # scan changed .md files vs branch
+#   bash scripts/docs-prompt-injection-scan.sh --file README.md    # scan a single file
+
+set -euo pipefail
+
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+IGNOREFILE=".prompt-injection-scanignore"
+EXIT_CODE=0
+FINDINGS=0
+
+# ── Patterns ──────────────────────────────────────────────────────────
+# Format: "Label:::flags:::regex"
+# Flags: i = case-insensitive
+PATTERNS=(
+  # System prompt markers
+  "System prompt marker:::i:::<system-prompt>"
+  "System prompt marker:::i:::<\|im_start\|>system"
+  "System prompt marker:::i:::\[SYSTEM\][[:space:]]*:"
+
+  # Role injection / override
+  "Role injection:::i:::you are now [a-z]"
+  "Instruction override:::i:::ignore (all )?previous instructions"
+  "Instruction override:::i:::ignore (all )?prior instructions"
+  "Instruction override:::i:::disregard (all )?(above|previous|prior)"
+  "Instruction override:::i:::forget (all )?(above|previous|prior) (instructions|context|rules)"
+  "Instruction override:::i:::new instructions:"
+  "Instruction override:::i:::override (all )?instructions"
+  "Instruction override:::i:::your new role is"
+  "Instruction override:::i:::from now on,? (you (are|will|must|should)|act as)"
+
+  # Hidden HTML directives
+  "Hidden HTML directive::::::<!--[[:space:]]*(PROMPT|INSTRUCTION|SYSTEM|OVERRIDE|INJECT)[[:space:]]*:"
+  "Hidden HTML directive::::::<!--[[:space:]]*(ignore|disregard|forget|override)"
+
+  # Tool / function call injection
+  "Tool call injection::::::(<tool_call>|<function_call>|<tool_use>)"
+  "Tool call injection::::::(<invoke|<function_calls>)"
+
+  # Encoded payload markers
+  "Encoded payload:::i:::(eval|exec|decode)\((base64|atob|btoa)"
+
+  # Invisible Unicode tricks (zero-width chars used to hide directives)
+  # Match specific zero-width codepoints: U+200B (ZWSP), U+200C (ZWNJ), U+200D (ZWJ), U+FEFF (BOM)
+  # Use Perl-compatible Unicode escapes to avoid matching em-dash (U+2014) and similar
+  "Invisible Unicode:::P:::\\x{200B}|\\x{200C}|\\x{200D}|\\x{FEFF}"
+)
+
+# ── Helpers ───────────────────────────────────────────────────────────
+
+load_ignore_patterns() {
+  local ignore_patterns=()
+  if [[ -f "$IGNOREFILE" ]]; then
+    while IFS= read -r line; do
+      [[ -z "$line" || "$line" =~ ^# ]] && continue
+      ignore_patterns+=("$line")
+    done < "$IGNOREFILE"
+  fi
+  echo "${ignore_patterns[@]+"${ignore_patterns[@]}"}"
+}
+
+is_ignored() {
+  local file="$1" line_content="$2"
+  local ignore_patterns
+  read -ra ignore_patterns <<< "$(load_ignore_patterns)"
+
+  for pattern in "${ignore_patterns[@]+"${ignore_patterns[@]}"}"; do
+    if [[ "$pattern" == *:* ]]; then
+      local ignore_file="${pattern%%:*}"
+      local ignore_regex="${pattern#*:}"
+      if [[ "$file" == $ignore_file ]] && echo "$line_content" | grep -qiE "$ignore_regex" 2>/dev/null; then
+        return 0
+      fi
+    else
+      if echo "$line_content" | grep -qiE "$pattern" 2>/dev/null; then
+        return 0
+      fi
+    fi
+  done
+  return 1
+}
+
+# Strip fenced code blocks and inline code from content so we don't flag
+# examples/docs. Returns only the prose portions of the markdown.
+strip_code_blocks() {
+  awk '
+    /^```/ { in_code = !in_code; print ""; next }
+    in_code { print ""; next }
+    {
+      # Replace inline backtick spans with empty string
+      gsub(/`[^`]+`/, "")
+      print
+    }
+  '
+}
+
+get_files() {
+  if [[ "${1:-}" == "--diff" ]]; then
+    local ref="${2:-HEAD}"
+    git diff --name-only --diff-filter=ACMR "$ref" 2>/dev/null | grep -E '\.(md|markdown)$' || true
+  elif [[ "${1:-}" == "--file" ]]; then
+    echo "${2:-}"
+  else
+    git diff --cached --name-only --diff-filter=ACMR 2>/dev/null | grep -E '\.(md|markdown)$' || true
+  fi
+}
+
+get_content() {
+  local file="$1"
+  if [[ "${SCAN_MODE:-staged}" == "staged" ]]; then
+    git show ":$file" 2>/dev/null || cat "$file" 2>/dev/null || true
+  else
+    cat "$file" 2>/dev/null || true
+  fi
+}
+
+# ── Parse arguments ───────────────────────────────────────────────────
+
+SCAN_MODE="staged"
+FILES_ARG=()
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --diff) SCAN_MODE="diff"; FILES_ARG=("--diff" "${2:-HEAD}"); shift 2 ;;
+    --file) SCAN_MODE="file"; FILES_ARG=("--file" "$2"); shift 2 ;;
+    *) shift ;;
+  esac
+done
+
+FILES=$(get_files "${FILES_ARG[@]+"${FILES_ARG[@]}"}")
+
+if [[ -z "$FILES" ]]; then
+  echo "prompt-injection-scan: no documentation files to scan"
+  exit 0
+fi
+
+# ── Scan ──────────────────────────────────────────────────────────────
+
+while IFS= read -r file; do
+  [[ -z "$file" ]] && continue
+
+  raw_content=$(get_content "$file")
+  [[ -z "$raw_content" ]] && continue
+
+  # Strip code blocks so we only scan prose
+  content=$(echo "$raw_content" | strip_code_blocks)
+
+  for entry in "${PATTERNS[@]}"; do
+    label="${entry%%:::*}"
+    rest="${entry#*:::}"
+    flags="${rest%%:::*}"
+    regex="${rest#*:::}"
+
+    if [[ "$flags" == *P* ]]; then
+      grep_flags="-nP"
+    else
+      grep_flags="-nE"
+    fi
+    if [[ "$flags" == *i* ]]; then
+      grep_flags="${grep_flags}i"
+    fi
+
+    matches=$(echo "$content" | grep $grep_flags -e "$regex" 2>/dev/null || true)
+
+    if [[ -n "$matches" ]]; then
+      while IFS= read -r match_line; do
+        [[ -z "$match_line" ]] && continue
+        line_num="${match_line%%:*}"
+        line_content="${match_line#*:}"
+
+        if is_ignored "$file" "$line_content"; then
+          continue
+        fi
+
+        echo -e "${RED}[PROMPT INJECTION]${NC} ${YELLOW}${label}${NC}"
+        echo -e "  File: ${CYAN}${file}:${line_num}${NC}"
+        echo "  Line: $(echo "$line_content" | head -c 120)..."
+        echo ""
+        FINDINGS=$((FINDINGS + 1))
+        EXIT_CODE=1
+      done <<< "$matches"
+    fi
+  done
+done <<< "$FILES"
+
+# ── Report ────────────────────────────────────────────────────────────
+
+if [[ $FINDINGS -gt 0 ]]; then
+  echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+  echo -e "${RED}Found $FINDINGS potential prompt injection(s) in docs.${NC}"
+  echo -e "${RED}Review flagged lines and remove or move to code blocks.${NC}"
+  echo -e "${RED}Add exceptions to .prompt-injection-scanignore if these${NC}"
+  echo -e "${RED}are false positives.${NC}"
+  echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+else
+  echo "prompt-injection-scan: no prompt injection detected ✓"
+fi
+
+exit $EXIT_CODE