nas-burnin/scripts/security-scan.sh

#!/usr/bin/env bash
# Daily security scan of the deployed truenas-burnin source on maple.
# Mirrors the .forgejo/workflows/security-scan.yml CI pipeline so a finding
# the runner-less forge would have flagged still surfaces here.
#
# Tools all run in containers — nothing installed on the host.
#   pip-audit  — known CVEs in installed packages (scans the LIVE container)
#   bandit     — Python static security analysis on host source tree
#   gitleaks   — secrets across the full git history
#
# Output:
#   ~/security-scans/scan-YYYY-MM-DD/{pip-audit,bandit,gitleaks}.txt
#   ~/security-scans/findings.log     — appended one line per scan with findings
#
# Wiring:
#   Daily systemd user timer at 03:30 local (after the in-app retention job
#   so backups are fresh). See scripts/security-scan.{service,timer}.

set -uo pipefail

REPO_URL="${REPO_URL:-https://git.hellocomputer.xyz/brandon/truenas-burnin.git}"
REPO="${REPO:-$HOME/scan-checkouts/truenas-burnin}"
OUT_BASE="${OUT_BASE:-$HOME/security-scans}"
DATE="$(date +%Y-%m-%d)"
OUT_DIR="$OUT_BASE/scan-$DATE"
SUMMARY="$OUT_BASE/findings.log"
GITLEAKS_VERSION="${GITLEAKS_VERSION:-8.21.2}"

mkdir -p "$OUT_DIR" "$(dirname "$REPO")"

# Maintain a dedicated checkout for scanning. The deploy at
# ~/docker/stacks/truenas-burnin/ is just the bind-mounted source — no
# .git, no history — so gitleaks can't scan there. We keep a separate
# clone, fast-forward it to origin/main each run.
if [ ! -d "$REPO/.git" ]; then
  echo "Cloning $REPO_URL to $REPO ..."
  git clone --quiet "$REPO_URL" "$REPO" || {
    echo "fatal: git clone failed" >&2
    exit 65
  }
fi

cd "$REPO"
# Refresh the scan checkout. Failures here mean we'd be scanning stale
# code without knowing — fail loudly instead of soldiering on silently.
if ! git fetch --quiet --prune origin; then
  echo "fatal: git fetch failed in $REPO" >&2
  exit 65
fi
git checkout --quiet main || true   # ok if already on main
if ! git reset --hard --quiet origin/main; then
  echo "fatal: git reset --hard failed in $REPO" >&2
  exit 65
fi

echo "=== Security scan $DATE ===" > "$OUT_DIR/summary.txt"
date -Iseconds >> "$OUT_DIR/summary.txt"
echo >> "$OUT_DIR/summary.txt"

# --- pip-audit against the lockfile in a throwaway container ------------
# Previously we did `docker exec truenas-burnin pip install pip-audit`
# which mutated the live production container with a transient package.
# Now scan the lockfile in an ephemeral container — same coverage of
# pinned versions + their transitives, no side effects on prod.
echo "--- pip-audit (requirements.txt in throwaway container) ---" | tee -a "$OUT_DIR/summary.txt"
docker run --rm \
  -v "$REPO/requirements.txt:/work/requirements.txt:ro" \
  -w /work \
  python:3.12-slim sh -c \
    "pip install --quiet --no-cache-dir --disable-pip-version-check pip-audit 2>/dev/null && pip-audit --requirement requirements.txt --strict --format=columns" \
  > "$OUT_DIR/pip-audit.txt" 2>&1
PIPS=$?
echo "  exit=$PIPS  ($OUT_DIR/pip-audit.txt)" | tee -a "$OUT_DIR/summary.txt"

# --- bandit against the LIVE deploy dir ---------------------------------
# Scan what's actually running, not what's in git — catches drift between
# forge HEAD and maple. B608 (SQL injection via dynamic strings) is
# skipped globally: every dynamic SQL build in this codebase uses
# bound parameters for data and structural placeholders only.
DEPLOY_DIR="${DEPLOY_DIR:-$HOME/docker/stacks/truenas-burnin}"
echo "--- bandit (deploy: $DEPLOY_DIR) ---" | tee -a "$OUT_DIR/summary.txt"
docker run --rm \
  -v "$DEPLOY_DIR/app:/src:ro" \
  python:3.12-slim sh -c \
    "pip install --quiet --no-cache-dir --disable-pip-version-check bandit 2>/dev/null && bandit -r /src -ll -ii --skip B608" \
  > "$OUT_DIR/bandit.txt" 2>&1
BANDITS=$?
echo "  exit=$BANDITS  ($OUT_DIR/bandit.txt)" | tee -a "$OUT_DIR/summary.txt"

# --- mypy against the deploy dir (informational only) -------------------
# Type checker — surfaces None-handling bugs and missing-attribute errors
# the runtime would have caught at the worst possible moment. Doesn't
# count toward the failure exit-code sum until the codebase is annotated
# enough to make findings actionable.
echo "--- mypy (informational) ---" | tee -a "$OUT_DIR/summary.txt"
docker run --rm \
  -v "$DEPLOY_DIR/app:/src:ro" \
  python:3.12-slim sh -c \
    "pip install --quiet --no-cache-dir --disable-pip-version-check mypy 2>&1 | tail -3 && mypy --ignore-missing-imports --no-strict-optional /src" \
  > "$OUT_DIR/mypy.txt" 2>&1
MYPY=$?
echo "  exit=$MYPY  ($OUT_DIR/mypy.txt) — informational only" | tee -a "$OUT_DIR/summary.txt"

# --- gitleaks against the full git history ------------------------------
echo "--- gitleaks ---" | tee -a "$OUT_DIR/summary.txt"
docker run --rm \
  -v "$REPO:/repo:ro" \
  "zricethezav/gitleaks:v$GITLEAKS_VERSION" \
    detect --source /repo --no-banner --redact --verbose \
  > "$OUT_DIR/gitleaks.txt" 2>&1
LEAKS=$?
echo "  exit=$LEAKS  ($OUT_DIR/gitleaks.txt)" | tee -a "$OUT_DIR/summary.txt"

# --- summary + notification --------------------------------------------
TOTAL_EXIT=$(( PIPS + BANDITS + LEAKS ))
{
  echo
  echo "Total findings exit-code sum: $TOTAL_EXIT"
  echo "  pip-audit: $PIPS"
  echo "  bandit:    $BANDITS"
  echo "  gitleaks:  $LEAKS"
} >> "$OUT_DIR/summary.txt"

if [ "$TOTAL_EXIT" -ne 0 ]; then
  printf '%s — findings (pip-audit=%d bandit=%d gitleaks=%d) — see %s\n' \
    "$DATE" "$PIPS" "$BANDITS" "$LEAKS" "$OUT_DIR" >> "$SUMMARY"
  # Hook for downstream notification — wire to your existing Mattermost
  # / Fastmail / webhook chain. Stays a no-op until SECURITY_SCAN_WEBHOOK
  # is set in the systemd unit's Environment=.
  if [ -n "${SECURITY_SCAN_WEBHOOK:-}" ]; then
    curl -fsS -X POST -H 'Content-Type: text/plain' \
      --data-binary "@$OUT_DIR/summary.txt" \
      "$SECURITY_SCAN_WEBHOOK" || true
  fi
fi

# Retention — keep last 30 daily directories, prune older.
find "$OUT_BASE" -maxdepth 1 -type d -name "scan-*" -mtime +30 \
  -exec rm -rf {} \;

exit "$TOTAL_EXIT"