diff --git a/app/auth.py b/app/auth.py
index 010ab4e..5c70d8c 100644
--- a/app/auth.py
+++ b/app/auth.py
@@ -136,8 +136,15 @@ async def get_user_by_id(user_id: int) -> User | None:
 
 async def create_user(username: str, password: str,
                       full_name: str | None = None,
-                      is_admin: bool = False) -> User:
-    """Insert a new user. Raises ValueError if the username collides."""
+                      is_admin: bool = False,
+                      bootstrap_only: bool = False) -> User:
+    """Insert a new user. Raises ValueError if the username collides.
+
+    bootstrap_only=True: serializes the insert with a check that the
+    users table is empty inside an IMMEDIATE transaction. Used for the
+    /api/v1/auth/setup first-user flow so two concurrent requests can't
+    both create admin accounts during the bootstrap window.
+    """
     username = (username or "").strip()
     if not username:
         raise ValueError("Username is required.")
@@ -146,6 +153,16 @@ async def create_user(username: str, password: str,
     h = hash_password(password)
     try:
         async with aiosqlite.connect(settings.db_path) as db:
+            if bootstrap_only:
+                # IMMEDIATE acquires the write lock up-front so a parallel
+                # setup request waits or fails — no two-step race.
+                await db.execute("BEGIN IMMEDIATE")
+                cur = await db.execute("SELECT COUNT(*) FROM users")
+                if (await cur.fetchone())[0] != 0:
+                    await db.execute("ROLLBACK")
+                    raise ValueError(
+                        "Users already exist — first-user setup is closed."
+                    )
             cur = await db.execute(
                 """INSERT INTO users
                        (username, password_hash, full_name, is_admin, created_at)
@@ -237,23 +254,48 @@ def login_locked_until(username: str, ip: str) -> float | None:
     return soonest
 
 
-def record_login_failure(username: str, ip: str) -> bool:
-    """Returns True if this failure tripped a lockout."""
-    tripped = False
+def register_login_attempt(username: str, ip: str) -> str:
+    """Atomic check-then-increment for a login attempt.
+
+    Returns:
+      "ok"              — allowed, counter incremented
+      "locked_out"      — already locked from a prior attempt
+      "now_locked_out"  — THIS attempt is what tripped the lockout
+
+    The increment runs synchronously (no awaits) so concurrent requests
+    can't slip past the threshold in CPython's single-threaded asyncio
+    loop. Caller must invoke clear_login_failures() on successful auth
+    to roll back this attempt's contribution.
+    """
     now = _time.time()
+    # Check existing lockouts first; if already locked, don't even
+    # increment — the lockout window absorbs everything.
+    for key in (("user", username.lower()), ("ip", ip)):
+        exp = _login_lockouts.get(key)
+        if exp is None:
+            continue
+        if now >= exp:
+            del _login_lockouts[key]
+            continue
+        return "locked_out"
+    # Increment + arm lockout if this push crosses the threshold.
+    tripped = False
     for key in (("user", username.lower()), ("ip", ip)):
         _gc_failures(key)
         _login_failures.setdefault(key, []).append(now)
         if len(_login_failures[key]) >= LOGIN_FAILURE_THRESHOLD:
             _login_lockouts[key] = now + LOGIN_LOCKOUT_SECONDS
-            _login_failures[key] = []  # reset counter once lockout armed
+            _login_failures[key] = []
             tripped = True
-    return tripped
+    return "now_locked_out" if tripped else "ok"
 
 
 def clear_login_failures(username: str, ip: str) -> None:
+    """Erase counters AND any lockout for a successful auth — caller
+    proved they have credentials, so the brute-force ladder resets."""
     for key in (("user", username.lower()), ("ip", ip)):
         _login_failures.pop(key, None)
+        _login_lockouts.pop(key, None)
 
 
 # ---------------------------------------------------------------------------
@@ -309,6 +351,24 @@ async def get_current_user_optional(request: Request) -> User | None:
     return await get_user_by_id(int(sess_user_id))
 
 
+def require_admin(request: Request) -> User:
+    """Strict admin gate — for any settings-mutating endpoint. The
+    AuthGate middleware has already populated request.state.current_user;
+    this just enforces is_admin on top."""
+    user = getattr(request.state, "current_user", None)
+    if not user:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Authentication required",
+        )
+    if not user.is_admin:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Admin only",
+        )
+    return user
+
+
 async def get_current_user(request: Request) -> User:
     """Strict version — for routes. 401 (or redirect for HTML) if missing."""
     user = await get_current_user_optional(request)
diff --git a/app/burnin.py b/app/burnin.py
index 8b019a3..e210dd1 100644
--- a/app/burnin.py
+++ b/app/burnin.py
@@ -952,17 +952,49 @@ async def _stage_surface_validate_nvme(job_id: int, devname: str,
         )
         return False
 
-    # Sanity-check post-format SMART health.
+    # Sanity-check post-format SMART health. Mirrors the surface_validate
+    # SSH path's check parity — fail on FAILED health, fail on real
+    # SMART attribute failures, log warnings but don't fail. A transport
+    # error here is treated as a soft pass (log + continue) so a single
+    # SSH blip after a successful format doesn't undo the work.
     try:
         attrs = await ssh_client.get_smart_attributes(devname)
+        ssh_only_failures = [
+            f for f in (attrs.get("failures") or []) if f.startswith("SSH error:")
+        ]
+        real_failures = [
+            f for f in (attrs.get("failures") or []) if not f.startswith("SSH error:")
+        ]
         if attrs.get("health") == "FAILED":
             await _set_stage_error(
                 job_id, "surface_validate",
-                "NVMe SMART health FAILED after format"
+                "NVMe SMART health FAILED after format",
             )
             return False
+        if real_failures:
+            await _set_stage_error(
+                job_id, "surface_validate",
+                "NVMe SMART attribute failures after format: "
+                + "; ".join(real_failures),
+            )
+            return False
+        if ssh_only_failures:
+            await _append_stage_log(
+                job_id, "surface_validate",
+                "[WARN] post-format SMART check had SSH errors "
+                "(soft-passing): " + "; ".join(ssh_only_failures) + "\n",
+            )
+        if attrs.get("warnings"):
+            await _append_stage_log(
+                job_id, "surface_validate",
+                "[WARN] " + "; ".join(attrs["warnings"]) + "\n",
+            )
     except Exception as exc:
         log.warning("Post-format SMART check error on %s: %s", devname, exc)
+        await _append_stage_log(
+            job_id, "surface_validate",
+            f"[WARN] post-format SMART check raised: {exc}\n",
+        )
 
     await _update_stage_percent(job_id, "surface_validate", 100)
     await _recalculate_progress(job_id)
@@ -1116,11 +1148,16 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
                         job_id,
                     )
 
-        # Flush remaining output
-        remainder = "".join(output_lines)
-        await _append_stage_log(job_id, "surface_validate", remainder)
+        # Flush only lines we haven't already written in 20-line chunks.
+        # Previously we appended the FULL accumulated output here too,
+        # doubling the stored log_text size for every surface_validate
+        # stage and pushing app.db into hundreds of MB.
+        flushed_count = (len(output_lines) // 20) * 20
+        tail = "".join(output_lines[flushed_count:])
+        if tail:
+            await _append_stage_log(job_id, "surface_validate", tail)
         result["bad_blocks"] = bad_blocks_total
-        result["output"] = remainder
+        result["output"] = "".join(output_lines)  # in-memory only, not re-stored
         result["aborted"] = bad_blocks_total > settings.bad_block_threshold
 
     except asyncio.CancelledError:
diff --git a/app/config.py b/app/config.py
index e48ba4a..0a3737b 100644
--- a/app/config.py
+++ b/app/config.py
@@ -83,7 +83,7 @@ class Settings(BaseSettings):
     ssh_key: str = ""             # PEM private key content (paste full key including headers)
 
     # Application version — used by the /api/v1/updates/check endpoint
-    app_version: str = "1.0.0-27"
+    app_version: str = "1.0.0-28"
 
     # ---- Authentication (1.0.0-22) ----
     # session_secret: HMAC key for signing session cookies. Empty = generate
@@ -92,6 +92,11 @@ class Settings(BaseSettings):
     # SESSION_SECRET env var if you want to share secrets across replicas.
     session_secret: str = ""
     session_max_age_seconds: int = 60 * 60 * 24 * 7  # 7 days
+    # Set to True when the dashboard is exclusively reachable over HTTPS
+    # (typical when fronted by nginx-proxy-manager with TLS). Refuses to
+    # send the session cookie on plain HTTP, eliminating the on-the-wire
+    # exposure surface. Leaving False allows initial deploy + LAN testing.
+    session_cookie_secure: bool = False
     # Initial admin bootstrap. If both env vars are set AND the users table
     # is empty at startup, create that account immediately. After that the
     # env vars are ignored — change passwords via the UI / database, not
diff --git a/app/database.py b/app/database.py
index 0f13298..d39bdd1 100644
--- a/app/database.py
+++ b/app/database.py
@@ -109,6 +109,11 @@ _MIGRATIONS = [
         created_at    TEXT    NOT NULL,
         last_login_at TEXT
     )""",
+    # 1.0.0-28: case-insensitive uniqueness. The base UNIQUE on username
+    # is case-sensitive but login does NOCASE — without this index two
+    # users `Admin` and `admin` could coexist and shadow each other.
+    """CREATE UNIQUE INDEX IF NOT EXISTS uniq_users_username_nocase
+       ON users (username COLLATE NOCASE)""",
 ]
 
 
diff --git a/app/main.py b/app/main.py
index 01c16c3..f782a07 100644
--- a/app/main.py
+++ b/app/main.py
@@ -213,7 +213,10 @@ app.add_middleware(
     secret_key=auth.get_session_secret(),
     session_cookie="burnin_session",
     max_age=settings.session_max_age_seconds,
-    https_only=False,  # we sit behind nginx-proxy-manager; trust upstream
+    # session_cookie_secure flips the cookie's Secure flag. Set to True
+    # in production behind HTTPS (nginx-proxy-manager) so the auth cookie
+    # is never sent on plain HTTP.
+    https_only=settings.session_cookie_secure,
     # SameSite=strict is the primary CSRF mitigation: the browser never
     # sends the session cookie on cross-site requests, so an attacker
     # page can't trigger any state-changing endpoint even if it knows
diff --git a/app/poller.py b/app/poller.py
index b30672b..25ecc3f 100644
--- a/app/poller.py
+++ b/app/poller.py
@@ -375,52 +375,54 @@ async def poll_cycle(client: TrueNASClient) -> int:
     # locked, and previously-unlocked drives stay unlocked, until detection
     # recovers. Treating a transient SSH blip as "no pool members" would
     # silently unlock every drive on the next poll.
-    detection_ok = True
+    # Each detection probe (pool / exported / mounted) succeeds or fails
+    # INDEPENDENTLY. Previously a single None blew away the whole map,
+    # so a fresh DB on a host where lsblk lacks zfs_member info but
+    # zpool works would never lock pool members. Now we apply each
+    # successful probe and only fail-closed for the ones that actually
+    # errored.
     pool_map: dict = {}
-    zfs_member_set: set = set()
-    mounted_set: set = set()
+    pool_probe_ok = True       # zpool list -vHP succeeded
+    zfs_probe_ok = True        # lsblk zfs_member succeeded
+    mounted_probe_ok = True    # findmnt succeeded
     try:
         from app import ssh_client as _ssh
         if _ssh.is_configured():
             pm = await _ssh.get_pool_membership()
             zs = await _ssh.get_zfs_member_drives()
             ms = await _ssh.get_mounted_drives()
-            if pm is None or zs is None or ms is None:
-                detection_ok = False
-            else:
-                pool_map = pm
-                zfs_member_set = zs
-                mounted_set = ms
-        # SSH unconfigured (mock/dev mode) — detection_ok stays True with
+            pool_probe_ok    = pm is not None
+            zfs_probe_ok     = zs is not None
+            mounted_probe_ok = ms is not None
+            if pool_probe_ok:
+                pool_map.update(pm)
+            if zfs_probe_ok:
+                for devname in zs:
+                    if devname not in pool_map:
+                        pool_map[devname] = {"pool": "(exported)", "role": "exported"}
+            if mounted_probe_ok:
+                for devname in ms:
+                    if devname not in pool_map:
+                        pool_map[devname] = {"pool": "(mounted)", "role": "mounted"}
+        # SSH unconfigured (mock/dev mode) — all probes "succeed" with
         # empty maps, so dev mode never artificially locks drives.
     except Exception:
-        detection_ok = False
+        pool_probe_ok = zfs_probe_ok = mounted_probe_ok = False
+        pool_map = {}
 
-    if not detection_ok:
+    # If ALL probes failed we have no fresh data at all — preserve the
+    # existing pool columns to keep locks honest. If at least one probe
+    # succeeded the new pool_map is a partial truth: we apply it and
+    # only refuse to clear locks coming from a probe that failed.
+    detection_ok = pool_probe_ok or zfs_probe_ok or mounted_probe_ok
+
+    if not (pool_probe_ok and zfs_probe_ok and mounted_probe_ok):
         log.warning(
-            "Pool detection failed this cycle — preserving existing "
-            "pool_name/pool_role columns. Locked drives stay locked, "
-            "unlocked drives stay unlocked, until SSH recovers."
+            "Pool detection partial: pool=%s zfs=%s mounted=%s — preserving "
+            "stale lock state from any probe that failed.",
+            pool_probe_ok, zfs_probe_ok, mounted_probe_ok,
         )
 
-    if detection_ok:
-        # Drives carrying ZFS labels but not in any active pool are
-        # "exported" — same hazard as an active pool member, so lock them
-        # too. We don't know the original pool name without
-        # `zpool import`-style scanning (slow + blocks); display
-        # "(exported)" and use a special token.
-        for devname in zfs_member_set:
-            if devname not in pool_map:
-                pool_map[devname] = {"pool": "(exported)", "role": "exported"}
-        # Drives with a non-ZFS mount somewhere (XFS/ext4/scratch/etc.)
-        # also lock — wiping a mounted FS is just as catastrophic. Lower
-        # precedence than active pool membership, since a drive in `tank`
-        # would also show under findmnt for the pool's mountpoint via
-        # /dev/zd* or zvol — but those are filtered in the parser.
-        for devname in mounted_set:
-            if devname not in pool_map:
-                pool_map[devname] = {"pool": "(mounted)", "role": "mounted"}
-
     # Index running jobs by (devname, test_type)
     active: dict[tuple[str, str], dict] = {}
     for job in running_jobs:
diff --git a/app/retention.py b/app/retention.py
index c821c91..a2c0be7 100644
--- a/app/retention.py
+++ b/app/retention.py
@@ -74,19 +74,36 @@ def _backup_dir() -> Path:
 
 async def backup_db(keep_count: int) -> Path | None:
     """Online-backup the live DB to backups/app-YYYY-MM-DD.db. Returns
-    the new file's path. Old backups beyond keep_count are deleted."""
+    the new file's path. Old backups beyond keep_count are deleted.
+
+    Atomicity: writes to a sibling tmp file first and renames into the
+    canonical daily slot only after backup succeeds. An interrupted
+    backup leaves the tmp file (cleaned up on next run); the previous
+    day's snapshot stays intact. os.replace is atomic within the same
+    filesystem on POSIX.
+    """
+    import os as _os
     bdir = _backup_dir()
     bdir.mkdir(parents=True, exist_ok=True)
     today = datetime.now().strftime("%Y-%m-%d")
     out = bdir / f"app-{today}.db"
+    tmp = bdir / f"app-{today}.db.tmp"
+
+    # Drop any leftover tmp from a previous interrupted run.
+    if tmp.exists():
+        try:
+            tmp.unlink()
+        except OSError:
+            pass
 
     # aiosqlite.Connection.backup() is an async wrapper around
     # sqlite3.Connection.backup — atomic online snapshot that doesn't
     # block writers (it copies pages in batches and yields between).
     async with aiosqlite.connect(settings.db_path) as src:
-        async with aiosqlite.connect(str(out)) as dst:
+        async with aiosqlite.connect(str(tmp)) as dst:
             await src.backup(dst)
 
+    _os.replace(tmp, out)
     log.info("Retention: DB backed up to %s (%d bytes)", out, out.stat().st_size)
 
     # Keep the N most recent backups; delete older.
@@ -124,17 +141,26 @@ async def run() -> None:
             now = datetime.now()
             today = now.strftime("%Y-%m-%d")
             if now.hour == _RUN_HOUR and _state["last_run_date"] != today:
-                _state["last_run_date"] = today
+                # Track prune + backup success independently. Mark the
+                # day "done" only when BOTH succeed so a transient
+                # failure gets retried on the next 5-min tick (still
+                # within the 03:00 hour).
+                prune_ok = False
+                backup_ok = False
                 try:
                     pruned = await prune_stage_logs(settings.retention_log_days)
                     if pruned:
                         await vacuum_db()
+                    prune_ok = True
                 except Exception as exc:
                     log.exception("Retention: pruning failed: %s", exc)
                 try:
                     await backup_db(settings.retention_backup_keep)
+                    backup_ok = True
                 except Exception as exc:
                     log.exception("Retention: backup failed: %s", exc)
+                if prune_ok and backup_ok:
+                    _state["last_run_date"] = today
         except asyncio.CancelledError:
             raise
         except Exception as exc:
diff --git a/app/routes.py b/app/routes.py
index bff5e4c..858a2de 100644
--- a/app/routes.py
+++ b/app/routes.py
@@ -2,6 +2,7 @@ import asyncio
 import csv
 import io
 import json
+import time as _time
 from datetime import datetime, timezone
 
 import aiosqlite
@@ -263,14 +264,22 @@ async def login_submit(request: Request):
         next_url = "/"
     ip = _client_ip(request)
 
-    # Rate-limit gate — checked BEFORE bcrypt so an attacker can't burn CPU.
-    locked_until = auth.login_locked_until(username, ip)
-    if locked_until is not None:
-        remaining = int(locked_until - __import__("time").time())
+    # Atomic register-and-check: increments the counter NOW (before any
+    # await), so a parallel burst of guesses can't all slip past the
+    # threshold. Cleared on successful auth via clear_login_failures.
+    attempt = auth.register_login_attempt(username, ip)
+    if attempt != "ok":
+        if attempt == "now_locked_out":
+            await auth.audit_auth_event(
+                "user_login_locked_out", username,
+                f"Failed login from {ip} — IP/user locked out for {auth.LOGIN_LOCKOUT_SECONDS // 60} min",
+            )
+        locked_until = auth.login_locked_until(username, ip)
+        remaining = int((locked_until or _time.time()) - _time.time())
         return templates.TemplateResponse(request, "login.html", {
             "request":     request,
             "needs_setup": False,
-            "error":       f"Too many failed attempts. Try again in {remaining // 60} min.",
+            "error":       f"Too many failed attempts. Try again in {remaining // 60 + 1} min.",
             "next":        next_url,
         }, status_code=429)
 
@@ -280,14 +289,8 @@ async def login_submit(request: Request):
         # so the timing of "user not found" matches "wrong password."
         if not found:
             auth.verify_password(password, "$2b$12$" + "x" * 53)
-        tripped = auth.record_login_failure(username, ip)
         await auth.audit_auth_event(
-            "user_login_locked_out" if tripped else "user_login_failed",
-            username,
-            f"Failed login from {ip}" + (
-                f" — IP/user locked out for {auth.LOGIN_LOCKOUT_SECONDS // 60} min"
-                if tripped else ""
-            ),
+            "user_login_failed", username, f"Failed login from {ip}",
         )
         return templates.TemplateResponse(request, "login.html", {
             "request":     request,
@@ -323,7 +326,12 @@ async def auth_first_user_setup(request: Request):
     password = form.get("password") or ""
     full_name = (form.get("full_name") or "").strip() or None
     try:
-        user = await auth.create_user(username, password, full_name, is_admin=True)
+        # bootstrap_only=True wraps the existence check + insert in an
+        # IMMEDIATE transaction so two concurrent setup requests can't
+        # both create admin accounts during the bootstrap window.
+        user = await auth.create_user(
+            username, password, full_name, is_admin=True, bootstrap_only=True
+        )
     except ValueError as exc:
         raise HTTPException(status_code=400, detail=str(exc))
     # Same fixation defense as the login flow — discard any pre-existing
@@ -466,12 +474,20 @@ async def health(db: aiosqlite.Connection = Depends(get_db)):
 
     checks: dict[str, dict] = {}
 
-    # DB probe — confirm the journal is healthy (PRAGMA reads journal_mode
-    # and would fail loudly if WAL is wedged or the file is unreadable).
+    # DB probe — actually exercise the write path (read-only mounts,
+    # full disks, broken WAL all silently pass a journal_mode read).
+    # Uses a temp table that lives only inside the connection so the
+    # round-trip touches the writer without polluting real data.
     try:
-        cur = await db.execute("PRAGMA journal_mode")
-        await cur.fetchone()
-        checks["db"] = {"ok": True}
+        await db.execute(
+            "CREATE TEMP TABLE IF NOT EXISTS _hc (k INTEGER PRIMARY KEY, v TEXT)"
+        )
+        await db.execute("INSERT OR REPLACE INTO _hc (k, v) VALUES (1, ?)",
+                          (datetime.now(timezone.utc).isoformat(),))
+        cur = await db.execute("SELECT v FROM _hc WHERE k=1")
+        row = await cur.fetchone()
+        await db.commit()
+        checks["db"] = {"ok": bool(row)}
     except Exception as exc:
         checks["db"] = {"ok": False, "error": str(exc)}
 
@@ -781,14 +797,25 @@ def _row_to_burnin(row: aiosqlite.Row, stages: list[aiosqlite.Row]) -> BurninJob
     )
 
 
+def _operator_for(request: Request, _ignored_body_value: str | None = None) -> str:
+    """Always return the logged-in user's name for audit attribution.
+    The request body's `operator` field (if any) is ignored — clients
+    can't spoof the operator identity any more."""
+    user = getattr(request.state, "current_user", None)
+    if not user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+    return user.full_name or user.username
+
+
 @router.post("/api/v1/burnin/start")
-async def burnin_start(req: StartBurninRequest):
+async def burnin_start(request: Request, req: StartBurninRequest):
+    operator = _operator_for(request, req.operator)
     results = []
     errors = []
     for drive_id in req.drive_ids:
         try:
             job_id = await burnin.start_job(
-                drive_id, req.profile, req.operator, stage_order=req.stage_order
+                drive_id, req.profile, operator, stage_order=req.stage_order
             )
             results.append({"drive_id": drive_id, "job_id": job_id})
         except burnin.PoolMemberError as exc:
@@ -809,10 +836,11 @@ async def burnin_start(req: StartBurninRequest):
 
 
 @router.post("/api/v1/drives/{drive_id}/unlock")
-async def unlock_pool_drive(drive_id: int, req: UnlockPoolDriveRequest):
+async def unlock_pool_drive(drive_id: int, request: Request, req: UnlockPoolDriveRequest):
+    operator = _operator_for(request, req.operator)
     try:
         expiry = await burnin.grant_pool_unlock(
-            drive_id, req.confirm_token, req.operator, req.reason,
+            drive_id, req.confirm_token, operator, req.reason,
         )
     except ValueError as exc:
         raise HTTPException(status_code=400, detail=str(exc))
@@ -821,8 +849,9 @@ async def unlock_pool_drive(drive_id: int, req: UnlockPoolDriveRequest):
 
 
 @router.post("/api/v1/burnin/{job_id}/cancel")
-async def burnin_cancel(job_id: int, req: CancelBurninRequest):
-    ok = await burnin.cancel_job(job_id, req.operator)
+async def burnin_cancel(job_id: int, request: Request, req: CancelBurninRequest):
+    operator = _operator_for(request, req.operator)
+    ok = await burnin.cancel_job(job_id, operator)
     if not ok:
         raise HTTPException(status_code=409, detail="Job not found or not cancellable")
     return {"cancelled": True}
@@ -1044,6 +1073,7 @@ async def update_drive(
 @router.post("/api/v1/drives/{drive_id}/reset")
 async def reset_drive(
     drive_id: int,
+    request: Request,
     body: dict,
     db: aiosqlite.Connection = Depends(get_db),
 ):
@@ -1064,7 +1094,9 @@ async def reset_drive(
     if (await cur.fetchone())[0] > 0:
         raise HTTPException(status_code=409, detail="Cannot reset while a burn-in is active")
 
-    operator = body.get("operator", "operator")
+    # Trust the logged-in user, not the body (the JS used to send a
+    # literal "operator" because window._operator was never set).
+    operator = _operator_for(request, body.get("operator"))
 
     # Reset SMART test state to idle
     await db.execute(
@@ -1243,6 +1275,7 @@ async def settings_page(
     request: Request,
     db: aiosqlite.Connection = Depends(get_db),
 ):
+    auth.require_admin(request)
     # Editable values — real values for form fields (secrets excluded)
     editable = {
         # SMTP
@@ -1359,7 +1392,7 @@ async def get_settings_redacted(request: Request):
 @router.post("/api/v1/settings")
 async def save_settings(request: Request, body: dict):
     """Save editable runtime settings.  Secrets are only updated if non-empty."""
-    user = request.state.current_user
+    user = auth.require_admin(request)
     # Don't overwrite secrets if client sent empty string. Track which
     # ones DID get a real change so we can audit the rotation.
     rotated: list[str] = []
@@ -1389,8 +1422,9 @@ async def save_settings(request: Request, body: dict):
 
 
 @router.post("/api/v1/settings/test-smtp")
-async def test_smtp():
+async def test_smtp(request: Request):
     """Test the current SMTP configuration without sending an email."""
+    auth.require_admin(request)
     result = await mailer.test_smtp_connection()
     if not result["ok"]:
         raise HTTPException(status_code=502, detail=result["error"])
@@ -1398,8 +1432,9 @@ async def test_smtp():
 
 
 @router.post("/api/v1/settings/test-ssh")
-async def test_ssh():
+async def test_ssh(request: Request):
     """Test the current SSH configuration."""
+    auth.require_admin(request)
     from app import ssh_client
     result = await ssh_client.test_connection()
     if not result["ok"]:
diff --git a/app/ssh_client.py b/app/ssh_client.py
index 612e74e..75711c0 100644
--- a/app/ssh_client.py
+++ b/app/ssh_client.py
@@ -388,7 +388,16 @@ async def get_mounted_drives() -> set | None:
 
 def _parse_findmnt_sources(stdout: str) -> set:
     """Pure parser for findmnt output. Strips partitions; ignores tmpfs,
-    overlay, zfs (zfs is handled by pool detection)."""
+    overlay, zfs (zfs is handled by pool detection).
+
+    Recognised devnames (covers TrueNAS SCALE + CORE + LVM/MD stacks):
+      sd[a-z]+         — Linux SCSI/SATA   (sda, sdb, ..., sdaa)
+      nvmeXnY[pZ]      — Linux NVMe namespaces
+      mapper/<name>    — LVM logical volumes (/dev/mapper/vg-lv)
+      dm-N             — devicemapper short names
+      mdN              — Linux MD RAID arrays
+      ada[0-9]+, da[0-9]+  — TrueNAS CORE (FreeBSD) SATA/SAS
+    """
     import re as _re
     out: set = set()
     for raw in stdout.splitlines():
@@ -400,14 +409,22 @@ def _parse_findmnt_sources(stdout: str) -> set:
         if "/dev/zd" in s or "/dev/zvol" in s:
             continue
         name = s[len("/dev/"):].split("[")[0]  # bind mounts can have [subdir]
-        if name.startswith("nvme"):
-            m = _re.match(r"^(nvme\d+n\d+)", name)
-            if m:
-                out.add(m.group(1))
-        else:
-            m = _re.match(r"^(sd[a-z]+)", name)
+        # Try each recognised devname pattern in order. Mapper/dm-/md
+        # entries are kept whole because they represent a stack the
+        # operator should resolve manually before burn-in.
+        for pat in (
+            r"^(nvme\d+n\d+)",      # NVMe (strip pN)
+            r"^(sd[a-z]+)",         # Linux SCSI/SATA (strip number)
+            r"^(mapper/[^/]+)",     # LVM logical volume
+            r"^(dm-\d+)",           # devicemapper short name
+            r"^(md\d+)",            # MD RAID
+            r"^(ada\d+)",           # FreeBSD SATA
+            r"^(da\d+)",            # FreeBSD SAS/SCSI
+        ):
+            m = _re.match(pat, name)
             if m:
                 out.add(m.group(1))
+                break
     return out
 
 
diff --git a/scripts/security-scan.sh b/scripts/security-scan.sh
index ed1ab0c..2185aab 100644
--- a/scripts/security-scan.sh
+++ b/scripts/security-scan.sh
@@ -41,19 +41,33 @@ if [ ! -d "$REPO/.git" ]; then
 fi
 
 cd "$REPO"
-git fetch --quiet --prune origin 2>&1 || true
-git checkout --quiet main 2>&1 || true
-git reset --hard --quiet origin/main 2>&1 || true
+# Refresh the scan checkout. Failures here mean we'd be scanning stale
+# code without knowing — fail loudly instead of soldiering on silently.
+if ! git fetch --quiet --prune origin; then
+  echo "fatal: git fetch failed in $REPO" >&2
+  exit 65
+fi
+git checkout --quiet main || true   # ok if already on main
+if ! git reset --hard --quiet origin/main; then
+  echo "fatal: git reset --hard failed in $REPO" >&2
+  exit 65
+fi
 
 echo "=== Security scan $DATE ===" > "$OUT_DIR/summary.txt"
 date -Iseconds >> "$OUT_DIR/summary.txt"
 echo >> "$OUT_DIR/summary.txt"
 
-# --- pip-audit against the LIVE container's installed packages ----------
-# Catches CVEs that hit a transitive dep we don't pin in requirements.txt.
-echo "--- pip-audit (live container) ---" | tee -a "$OUT_DIR/summary.txt"
-docker exec truenas-burnin sh -c \
-  "pip install --quiet --no-cache-dir --disable-pip-version-check pip-audit 2>/dev/null && pip-audit --strict --format=columns" \
+# --- pip-audit against the lockfile in a throwaway container ------------
+# Previously we did `docker exec truenas-burnin pip install pip-audit`
+# which mutated the live production container with a transient package.
+# Now scan the lockfile in an ephemeral container — same coverage of
+# pinned versions + their transitives, no side effects on prod.
+echo "--- pip-audit (requirements.txt in throwaway container) ---" | tee -a "$OUT_DIR/summary.txt"
+docker run --rm \
+  -v "$REPO/requirements.txt:/work/requirements.txt:ro" \
+  -w /work \
+  python:3.12-slim sh -c \
+    "pip install --quiet --no-cache-dir --disable-pip-version-check pip-audit 2>/dev/null && pip-audit --requirement requirements.txt --strict --format=columns" \
   > "$OUT_DIR/pip-audit.txt" 2>&1
 PIPS=$?
 echo "  exit=$PIPS  ($OUT_DIR/pip-audit.txt)" | tee -a "$OUT_DIR/summary.txt"