diff --git a/app/burnin.py b/app/burnin.py
index d1cb87a..0a1ec13 100644
--- a/app/burnin.py
+++ b/app/burnin.py
@@ -19,6 +19,7 @@ Cancellation:
 import asyncio
 import logging
 import time
+from contextlib import asynccontextmanager
 from datetime import datetime, timezone
 
 import aiosqlite
@@ -66,14 +67,29 @@ POLL_INTERVAL = 5.0  # seconds between progress checks during active stages
 _semaphore: asyncio.Semaphore | None = None
 _client: TrueNASClient | None = None
 
+# Live job tracking — keeps a strong reference to every _run_job task so it
+# isn't garbage-collected (asyncio.create_task only keeps a weak ref) and so
+# cancel_job / check_stuck_jobs can actually unwedge a stuck task.
+_active_tasks: dict[int, "asyncio.Task"] = {}
+
+# Remote PID of any long-running SSH child process (currently only badblocks)
+# so we can kill it via a fresh SSH session — proc.kill() over asyncssh sends
+# a "signal" channel request that OpenSSH sshd ignores by default, leaving
+# the remote process running and proc.wait() hanging forever.
+_remote_pids: dict[int, int] = {}
+
 
 def _now() -> str:
     return datetime.now(timezone.utc).isoformat()
 
 
-def _db():
-    """Open a fresh WAL-mode connection. Caller must use 'async with'."""
-    return aiosqlite.connect(settings.db_path)
+@asynccontextmanager
+async def _db():
+    """Open a WAL-mode connection with busy_timeout so writers wait for the lock
+    instead of immediately raising 'database is locked' under contention."""
+    async with aiosqlite.connect(settings.db_path) as db:
+        await db.execute("PRAGMA busy_timeout=10000")
+        yield db
 
 
 # ---------------------------------------------------------------------------
@@ -104,11 +120,228 @@ async def init(client: TrueNASClient) -> None:
         await db.commit()
 
     for job_id in queued:
-        asyncio.create_task(_run_job(job_id))
+        _spawn_run_job(job_id)
 
     log.info("Burn-in orchestrator ready (max_concurrent=%d)", settings.max_parallel_burnins)
 
 
+def _spawn_run_job(job_id: int) -> "asyncio.Task":
+    """Schedule a _run_job task and keep a strong reference to it.
+
+    Plain asyncio.create_task() only leaves a weak reference behind, so the
+    task can be GC'd before it ever runs. Storing it in _active_tasks also
+    lets cancel_job / check_stuck_jobs cancel it directly.
+    """
+    task = asyncio.create_task(_run_job(job_id))
+    _active_tasks[job_id] = task
+
+    def _cleanup(t: "asyncio.Task") -> None:
+        # Remove only if it's still us — avoid clobbering a re-enqueued task.
+        if _active_tasks.get(job_id) is t:
+            _active_tasks.pop(job_id, None)
+        _remote_pids.pop(job_id, None)
+
+    task.add_done_callback(_cleanup)
+    return task
+
+
+async def _kill_remote_process(job_id: int) -> None:
+    """Send kill -9 to the remote PID associated with this job, if any.
+
+    asyncssh's proc.kill() sends an SSH 'signal' channel request which
+    OpenSSH's sshd does not honor by default. Opening a fresh session and
+    running /bin/kill is the reliable way to actually terminate the process.
+    """
+    pid = _remote_pids.pop(job_id, None)
+    if not pid:
+        return
+    try:
+        from app import ssh_client
+        async with await ssh_client._connect() as conn:
+            await asyncio.wait_for(
+                conn.run(f"kill -9 {pid} 2>/dev/null || true", check=False),
+                timeout=10,
+            )
+        log.info("Remote-killed PID %d for job %d", pid, job_id)
+    except Exception as exc:
+        log.warning("Failed to remote-kill PID %d for job %d: %s", pid, job_id, exc)
+
+
+# ---------------------------------------------------------------------------
+# Pool-drive unlock state
+# ---------------------------------------------------------------------------
+#
+# Drives that ZFS reports as belonging to an active zpool (including the
+# boot pool) are locked from burn-in until the operator explicitly unlocks
+# them via POST /api/v1/drives/{id}/unlock. Grants live in memory only —
+# a container restart wipes them, which is the right default for "this is
+# very dangerous." TTL is bounded so an unlock you forgot about can't sit
+# armed indefinitely.
+
+import time as _time
+from dataclasses import dataclass
+
+UNLOCK_TTL_SECONDS = 600  # 10 minutes
+BOOT_POOL_NAME = "boot-pool"
+BOOT_POOL_CONFIRM_TOKEN = "DESTROY BOOT POOL"
+EXPORTED_POOL_ROLE = "exported"
+EXPORTED_CONFIRM_TOKEN = "DESTROY EXPORTED POOL"
+
+
+@dataclass
+class _UnlockGrant:
+    """An operator-issued, time-bounded permission to burn-in a pool drive.
+
+    The grant is BOUND to the (pool_name, pool_role) observed at unlock
+    time. If a subsequent poll reclassifies the drive — e.g. it was
+    "(exported)" when unlocked but is now in active pool "tank", or it
+    used to be a cache vdev and now shows as data — the grant is
+    invalidated. Otherwise the operator's "I confirm this exported drive
+    is decommissioned" judgement would silently authorise destruction
+    of a live pool.
+    """
+    expiry: float
+    pool_name: str
+    pool_role: str | None
+
+
+_unlock_grants: dict[int, _UnlockGrant] = {}
+
+
+class PoolMemberError(Exception):
+    """Raised by start_job when a drive is in a zpool and not unlocked."""
+    def __init__(self, drive_id: int, pool_name: str, pool_role: str | None):
+        self.drive_id = drive_id
+        self.pool_name = pool_name
+        self.pool_role = pool_role
+        is_boot = pool_name == BOOT_POOL_NAME
+        super().__init__(
+            f"Drive is part of {'BOOT POOL' if is_boot else 'pool'} "
+            f"'{pool_name}'{' (' + pool_role + ')' if pool_role else ''}. "
+            f"Unlock required before burn-in."
+        )
+
+
+def _is_unlocked(drive_id: int, current_pool_name: str | None,
+                 current_pool_role: str | None) -> bool:
+    """True iff a non-expired grant exists AND the drive's pool identity
+    matches what was observed at unlock time."""
+    grant = _unlock_grants.get(drive_id)
+    if grant is None:
+        return False
+    if _time.time() >= grant.expiry:
+        _unlock_grants.pop(drive_id, None)
+        return False
+    if grant.pool_name != current_pool_name or grant.pool_role != current_pool_role:
+        # Pool identity changed since unlock — drive may now belong to a
+        # different (or live) pool. Invalidate the grant; operator must
+        # re-unlock with eyes-open against the current state.
+        _unlock_grants.pop(drive_id, None)
+        log.warning(
+            "Invalidating unlock grant for drive_id=%d: pool changed from "
+            "(%s, %s) to (%s, %s)",
+            drive_id, grant.pool_name, grant.pool_role,
+            current_pool_name, current_pool_role,
+        )
+        return False
+    return True
+
+
+def unlock_expiry(drive_id: int, current_pool_name: str | None,
+                  current_pool_role: str | None) -> float | None:
+    """Return the absolute expiry of an active grant, or None.
+
+    Same identity-binding semantics as _is_unlocked: a grant whose stored
+    pool identity no longer matches the current row is treated as expired
+    and reaped. This is what the dashboard reads to decide whether to show
+    the unlocked-Burn-In affordance vs the locked-Unlock affordance.
+    """
+    grant = _unlock_grants.get(drive_id)
+    if grant is None:
+        return None
+    if _time.time() >= grant.expiry:
+        _unlock_grants.pop(drive_id, None)
+        return None
+    if grant.pool_name != current_pool_name or grant.pool_role != current_pool_role:
+        _unlock_grants.pop(drive_id, None)
+        return None
+    return grant.expiry
+
+
+async def grant_pool_unlock(drive_id: int, confirm_token: str,
+                            operator: str, reason: str) -> float:
+    """Validate confirmation token + reason and grant a time-limited unlock.
+
+    Raises ValueError on bad confirm_token, missing reason, or drive not
+    actually in a pool. Returns the unix expiry timestamp on success.
+    """
+    if not reason or len(reason.strip()) < 5:
+        raise ValueError("A reason of at least 5 characters is required.")
+    if not operator or not operator.strip():
+        raise ValueError("Operator name is required.")
+
+    async with _db() as db:
+        db.row_factory = aiosqlite.Row
+        cur = await db.execute(
+            "SELECT pool_name, pool_role, devname FROM drives WHERE id=?",
+            (drive_id,),
+        )
+        row = await cur.fetchone()
+        if not row:
+            raise ValueError("Drive not found.")
+        pool_name = row["pool_name"]
+        pool_role = row["pool_role"]
+        if not pool_name:
+            raise ValueError(
+                "This drive is not part of any pool — no unlock needed."
+            )
+
+        # Boot-pool and exported pools both get dedicated, harder-to-fat-
+        # finger tokens. Active data pools just need their pool name typed.
+        if pool_name == BOOT_POOL_NAME:
+            expected = BOOT_POOL_CONFIRM_TOKEN
+        elif pool_role == EXPORTED_POOL_ROLE:
+            expected = EXPORTED_CONFIRM_TOKEN
+        else:
+            expected = pool_name
+        if (confirm_token or "").strip() != expected:
+            raise ValueError("Confirmation token does not match.")
+
+        if pool_name == BOOT_POOL_NAME:
+            evt = "boot_pool_drive_unlocked"
+        elif pool_role == EXPORTED_POOL_ROLE:
+            evt = "exported_pool_drive_unlocked"
+        else:
+            evt = "pool_drive_unlocked"
+        await db.execute(
+            """INSERT INTO audit_events
+                   (event_type, drive_id, burnin_job_id, operator, message)
+               VALUES (?,?,?,?,?)""",
+            (evt, drive_id, None, operator.strip(),
+             f"Unlocked {pool_name} drive {row['devname']} for burn-in: {reason.strip()}"),
+        )
+        await db.commit()
+
+    # Arm the in-memory grant ONLY after the audit row is durable. If the
+    # commit above raises, we exit without writing _unlock_grants — no
+    # unaudited active unlocks. The grant is bound to the (pool_name,
+    # pool_role) we observed under the open transaction so a later poll
+    # that reclassifies the drive invalidates it (see _is_unlocked).
+    expiry = _time.time() + UNLOCK_TTL_SECONDS
+    _unlock_grants[drive_id] = _UnlockGrant(
+        expiry=expiry,
+        pool_name=pool_name,
+        pool_role=pool_role,
+    )
+
+    log.warning(
+        "Pool-drive unlock granted: drive_id=%d pool=%s role=%s "
+        "operator=%s reason=%r",
+        drive_id, pool_name, pool_role, operator, reason,
+    )
+    return expiry
+
+
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
@@ -142,13 +375,35 @@ async def start_job(drive_id: int, profile: str, operator: str,
         if (await cur.fetchone())[0] > 0:
             raise ValueError("Drive already has an active burn-in job")
 
-        # Create job
+        # Pool-membership gate: locked unless the operator explicitly
+        # unlocked this drive via /api/v1/drives/{id}/unlock recently.
+        # _is_unlocked also checks that the grant's stored (pool_name,
+        # pool_role) still matches the live row — a grant issued for an
+        # exported drive doesn't carry over if the drive turns out to be
+        # in an active pool on the next poll.
         cur = await db.execute(
-            """INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
-               VALUES (?,?,?,?,?,?) RETURNING id""",
-            (drive_id, profile, "queued", 0, operator, now),
+            "SELECT pool_name, pool_role FROM drives WHERE id=?", (drive_id,)
         )
-        job_id = (await cur.fetchone())["id"]
+        drow = await cur.fetchone()
+        if drow and drow["pool_name"] and not _is_unlocked(
+            drive_id, drow["pool_name"], drow["pool_role"]
+        ):
+            raise PoolMemberError(drive_id, drow["pool_name"], drow["pool_role"])
+
+        # Create job. The partial unique index uniq_active_burnin_per_drive
+        # (database.py) is the actual race-stopper here: if two concurrent
+        # /api/v1/burnin/start calls both pass the SELECT-COUNT check above,
+        # only one INSERT can win; the loser raises IntegrityError, which
+        # we surface with the same ValueError as the inline duplicate check.
+        try:
+            cur = await db.execute(
+                """INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
+                   VALUES (?,?,?,?,?,?) RETURNING id""",
+                (drive_id, profile, "queued", 0, operator, now),
+            )
+            job_id = (await cur.fetchone())["id"]
+        except aiosqlite.IntegrityError:
+            raise ValueError("Drive already has an active burn-in job")
 
         # Create stage rows in the desired execution order
         for stage_name in stages:
@@ -164,7 +419,7 @@ async def start_job(drive_id: int, profile: str, operator: str,
         )
         await db.commit()
 
-    asyncio.create_task(_run_job(job_id))
+    _spawn_run_job(job_id)
     log.info("Burn-in job %d queued (drive_id=%d profile=%s operator=%s)",
              job_id, drive_id, profile, operator)
     return job_id
@@ -198,6 +453,13 @@ async def cancel_job(job_id: int, operator: str) -> bool:
         )
         await db.commit()
 
+    # Kill the remote child process FIRST (so proc.wait() in the running task
+    # can return), then cancel the task so any other awaits unblock.
+    await _kill_remote_process(job_id)
+    task = _active_tasks.get(job_id)
+    if task and not task.done():
+        task.cancel()
+
     log.info("Burn-in job %d cancelled by %s", job_id, operator)
     return True
 
@@ -206,10 +468,45 @@ async def cancel_job(job_id: int, operator: str) -> bool:
 # Job runner
 # ---------------------------------------------------------------------------
 
+async def _thermal_gate_ok() -> bool:
+    """True if it's thermally safe to start a new burn-in.
+    Checks the peak temperature of drives currently under active burn-in.
+    """
+    try:
+        async with _db() as db:
+            cur = await db.execute("""
+                SELECT MAX(d.temperature_c)
+                FROM drives d
+                JOIN burnin_jobs bj ON bj.drive_id = d.id
+                WHERE bj.state = 'running' AND d.temperature_c IS NOT NULL
+            """)
+            row = await cur.fetchone()
+            max_temp = row[0] if row and row[0] is not None else None
+        return max_temp is None or max_temp < settings.temp_warn_c
+    except Exception:
+        return True  # Never block on error
+
+
 async def _run_job(job_id: int) -> None:
     """Acquire semaphore slot, execute all stages, persist final state."""
     assert _semaphore is not None, "burnin.init() not called"
 
+    # Adaptive thermal gate: wait before competing for a slot if running drives
+    # are already at or above the warning threshold.  This prevents layering a
+    # new burn-in on top of a thermally-stressed system.  Gives up after 3 min
+    # and proceeds anyway so jobs don't queue indefinitely.
+    for _attempt in range(18):  # 18 × 10 s = 3 min max
+        if await _thermal_gate_ok():
+            break
+        if _attempt == 0:
+            log.info(
+                "Thermal gate: job %d waiting — running drive temps at or above %d°C",
+                job_id, settings.temp_warn_c,
+            )
+        await asyncio.sleep(10)
+    else:
+        log.warning("Thermal gate timed out for job %d — proceeding anyway", job_id)
+
     async with _semaphore:
         if await _is_cancelled(job_id):
             return
@@ -254,18 +551,35 @@ async def _run_job(job_id: int) -> None:
 
         success = False
         error_text = None
+        was_cancelled = False
         try:
             success = await _execute_stages(job_id, job_stages, devname, drive_id)
         except asyncio.CancelledError:
-            pass
+            was_cancelled = True
         except Exception as exc:
             error_text = str(exc)
             log.exception("Burn-in raised exception", extra={"job_id": job_id, "devname": devname})
 
-        if await _is_cancelled(job_id):
+        # If the job has already moved to a terminal state — by cancel_job
+        # ('cancelled') or check_stuck_jobs ('unknown') — leave it alone. The
+        # task may have been cancelled mid-stage; finalizing as 'failed' would
+        # clobber that audit-meaningful terminal state.
+        async with _db() as db:
+            cur = await db.execute("SELECT state FROM burnin_jobs WHERE id=?", (job_id,))
+            cur_row = await cur.fetchone()
+        if cur_row and cur_row[0] != "running":
             return
 
-        final_state = "passed" if success else "failed"
+        # Cancellation arriving here means the asyncio task was cancelled
+        # by something other than cancel_job/check_stuck_jobs (shutdown,
+        # uvicorn reload, future code paths). The DB still says 'running',
+        # so we have to write *some* terminal state, but classifying the
+        # interrupted job as 'failed' would lie — we don't actually know
+        # whether the underlying SMART/badblocks work passed or not.
+        if was_cancelled:
+            final_state = "unknown"
+        else:
+            final_state = "passed" if success else "failed"
         async with _db() as db:
             await db.execute("PRAGMA journal_mode=WAL")
             await db.execute(
@@ -464,6 +778,14 @@ async def _stage_smart_test_ssh(job_id: int, devname: str, test_type: str, stage
     # Brief pause to let the test register in smartctl output
     await asyncio.sleep(3)
 
+    # Throttle log_text appends — every poll on a multi-hour long_smart bloated
+    # log_text to 50+ MB and triggered SQLite "database is locked" because each
+    # COALESCE-then-append rewrites the whole column. Append every ~60s, on the
+    # first poll, and on any state change.
+    LOG_EVERY_N_POLLS = 12
+    poll_count = 0
+    last_state: str | None = None
+
     # Poll until complete
     while True:
         if await _is_cancelled(job_id):
@@ -482,7 +804,11 @@ async def _stage_smart_test_ssh(job_id: int, devname: str, test_type: str, stage
             await _append_stage_log(job_id, stage_name, f"[poll error] {exc}\n")
             continue
 
-        await _append_stage_log(job_id, stage_name, progress["output"] + "\n---\n")
+        poll_count += 1
+        state_changed = progress["state"] != last_state
+        last_state = progress["state"]
+        if poll_count == 1 or poll_count % LOG_EVERY_N_POLLS == 0 or state_changed:
+            await _append_stage_log(job_id, stage_name, progress["output"] + "\n---\n")
 
         if progress["state"] == "running":
             pct = max(0, 100 - progress["percent_remaining"])
@@ -519,15 +845,39 @@ async def _stage_smart_test_ssh(job_id: int, devname: str, test_type: str, stage
         # "unknown" → keep polling
 
 
+async def _badblocks_available() -> bool:
+    """Check if badblocks is installed on the remote host (Linux/SCALE only)."""
+    from app import ssh_client
+    try:
+        async with await ssh_client._connect() as conn:
+            result = await conn.run("which badblocks", check=False)
+            return result.returncode == 0
+    except Exception:
+        return False
+
+
 async def _stage_surface_validate(job_id: int, devname: str, drive_id: int) -> bool:
     """
-    Surface validation stage.
-    SSH mode: runs badblocks -wsv -b 4096 -p 1 /dev/{devname}.
-    Mock mode: simulated timed progress (no real I/O).
+    Surface validation stage — auto-routes to the right implementation:
+
+    1. SSH configured + badblocks available (TrueNAS SCALE / Linux):
+       → runs badblocks -wsv -b 4096 -p 1 /dev/{devname} directly over SSH.
+    2. SSH configured + badblocks NOT available (TrueNAS CORE / FreeBSD):
+       → uses TrueNAS REST API disk.wipe FULL job + post-wipe SMART check.
+    3. No SSH:
+       → simulated timed progress (dev/mock mode).
     """
     from app import ssh_client
     if ssh_client.is_configured():
-        return await _stage_surface_validate_ssh(job_id, devname, drive_id)
+        if await _badblocks_available():
+            return await _stage_surface_validate_ssh(job_id, devname, drive_id)
+        # TrueNAS CORE/FreeBSD: badblocks not available — use native wipe API
+        await _append_stage_log(
+            job_id, "surface_validate",
+            "[INFO] badblocks not found on host (TrueNAS CORE/FreeBSD) — "
+            "using TrueNAS disk.wipe API (FULL write pass).\n\n"
+        )
+        return await _stage_surface_validate_truenas(job_id, devname, drive_id)
     return await _stage_timed_simulate(job_id, "surface_validate", settings.surface_validate_seconds)
 
 
@@ -537,8 +887,11 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
 
     await _append_stage_log(
         job_id, "surface_validate",
-        f"[START] badblocks -wsv -b 4096 -p 1 /dev/{devname}\n"
-        f"[NOTE]  This is a DESTRUCTIVE write test. All data on /dev/{devname} will be overwritten.\n\n"
+        f"[START] badblocks -wsv -b {settings.surface_validate_block_size} "
+        f"-c {settings.surface_validate_block_buffer} "
+        f"-p {settings.surface_validate_passes} /dev/{devname}\n"
+        f"[NOTE]  This is a DESTRUCTIVE write test. "
+        f"All data on /dev/{devname} will be overwritten.\n\n"
     )
 
     def _is_cancelled_sync() -> bool:
@@ -580,14 +933,50 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
         output_lines: list[str] = []
 
         async with await ssh_client._connect() as conn:
-            cmd = f"badblocks -wsv -b 4096 -p 1 /dev/{devname}"
+            # Wrap in `sh -c 'echo PID:$$; exec ...'` so we get the remote
+            # PID on the first stdout line. asyncssh's proc.kill() sends an
+            # SSH signal request that OpenSSH's sshd ignores by default, so
+            # we need the PID to issue an out-of-band `kill -9` over a fresh
+            # session when we want to abort.
+            #
+            # Block geometry is operator-tunable (Settings → Burn-in):
+            #   -b N   block size in bytes  (settings.surface_validate_block_size)
+            #   -c N   blocks held per IO   (settings.surface_validate_block_buffer)
+            #   -p N   pass count           (settings.surface_validate_passes)
+            # Defaults preserve original behavior (-b 4096 -c 64 -p 1).
+            bb_args = (
+                f"-wsv "
+                f"-b {settings.surface_validate_block_size} "
+                f"-c {settings.surface_validate_block_buffer} "
+                f"-p {settings.surface_validate_passes}"
+            )
+            cmd = (
+                f"sh -c 'echo PID:$$; exec badblocks {bb_args} /dev/{devname}'"
+            )
             async with conn.create_process(cmd) as proc:
                 import re as _re
 
+                pid_seen = False
+
                 async def _drain(stream, is_stderr: bool):
-                    nonlocal bad_blocks_total
+                    nonlocal bad_blocks_total, pid_seen
                     async for raw in stream:
                         line = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace")
+
+                        # First stdout line is "PID:<n>" from the wrapping shell.
+                        # Capture it and don't append it to the user-visible log.
+                        if not is_stderr and not pid_seen and line.startswith("PID:"):
+                            pid_seen = True
+                            try:
+                                _remote_pids[job_id] = int(line[4:].strip())
+                                log.info(
+                                    "Captured remote PID %d for job %d (badblocks)",
+                                    _remote_pids[job_id], job_id,
+                                )
+                            except ValueError:
+                                pass
+                            continue
+
                         output_lines.append(line)
 
                         if is_stderr:
@@ -610,7 +999,7 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
 
                         # Abort on bad block threshold
                         if bad_blocks_total > settings.bad_block_threshold:
-                            proc.kill()
+                            await _kill_remote_process(job_id)
                             output_lines.append(
                                 f"\n[ABORTED] {bad_blocks_total} bad block(s) exceeded "
                                 f"threshold ({settings.bad_block_threshold})\n"
@@ -618,7 +1007,7 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
                             return
 
                         if await _is_cancelled(job_id):
-                            proc.kill()
+                            await _kill_remote_process(job_id)
                             return
 
                 await asyncio.gather(
@@ -626,7 +1015,17 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
                     _drain(proc.stderr, True),
                     return_exceptions=True,
                 )
-                await proc.wait()
+                # Bound proc.wait so a remote process that ignored our kill
+                # signal (or that we never managed to kill) can't pin this
+                # task in the semaphore forever. Closing the connection on
+                # exit will deliver SIGPIPE to the remote on its next write.
+                try:
+                    await asyncio.wait_for(proc.wait(), timeout=15)
+                except asyncio.TimeoutError:
+                    log.warning(
+                        "proc.wait() timed out for job %d — abandoning channel",
+                        job_id,
+                    )
 
         # Flush remaining output
         remainder = "".join(output_lines)
@@ -655,6 +1054,116 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
     return True
 
 
+async def _stage_surface_validate_truenas(job_id: int, devname: str, drive_id: int) -> bool:
+    """
+    Surface validation via TrueNAS CORE disk.wipe REST API.
+    Used on FreeBSD (TrueNAS CORE) where badblocks is unavailable.
+
+    Sends a FULL write-zero pass across the entire disk, polls progress,
+    then runs a post-wipe SMART attribute check to catch reallocated sectors.
+    """
+    from app import ssh_client
+
+    await _append_stage_log(
+        job_id, "surface_validate",
+        f"[START] TrueNAS disk.wipe FULL — {devname}\n"
+        f"[NOTE]  DESTRUCTIVE: all data on {devname} will be overwritten.\n\n"
+    )
+
+    # Start the wipe job
+    try:
+        tn_job_id = await _client.wipe_disk(devname, "FULL")
+    except Exception as exc:
+        await _set_stage_error(job_id, "surface_validate", f"Failed to start disk.wipe: {exc}")
+        return False
+
+    await _append_stage_log(
+        job_id, "surface_validate",
+        f"[JOB] TrueNAS wipe job started (job_id={tn_job_id})\n"
+    )
+
+    # Poll until complete
+    log_flush_counter = 0
+    while True:
+        if await _is_cancelled(job_id):
+            try:
+                await _client.abort_job(tn_job_id)
+            except Exception:
+                pass
+            return False
+
+        await asyncio.sleep(POLL_INTERVAL)
+
+        try:
+            job = await _client.get_job(tn_job_id)
+        except Exception as exc:
+            log.warning("Wipe job poll failed: %s", exc, extra={"job_id": job_id})
+            await _append_stage_log(job_id, "surface_validate", f"[poll error] {exc}\n")
+            continue
+
+        if not job:
+            await _set_stage_error(job_id, "surface_validate", f"Wipe job {tn_job_id} not found")
+            return False
+
+        state = job.get("state", "")
+        pct = int(job.get("progress", {}).get("percent", 0) or 0)
+        desc = job.get("progress", {}).get("description", "")
+
+        await _update_stage_percent(job_id, "surface_validate", min(pct, 99))
+        await _recalculate_progress(job_id)
+        _push_update()
+
+        # Log progress description every ~5 polls to avoid DB spam
+        log_flush_counter += 1
+        if desc and log_flush_counter % 5 == 0:
+            await _append_stage_log(job_id, "surface_validate", f"[{pct}%] {desc}\n")
+
+        if state == "SUCCESS":
+            await _update_stage_percent(job_id, "surface_validate", 100)
+            await _append_stage_log(
+                job_id, "surface_validate",
+                f"\n[DONE] Wipe job {tn_job_id} completed successfully.\n"
+            )
+            # Post-wipe SMART check — catch any sectors that failed under write stress
+            if ssh_client.is_configured() and drive_id is not None:
+                await _append_stage_log(
+                    job_id, "surface_validate",
+                    "[CHECK] Running post-wipe SMART attribute check...\n"
+                )
+                try:
+                    attrs = await ssh_client.get_smart_attributes(devname)
+                    await _store_smart_attrs(drive_id, attrs)
+                    if attrs["failures"]:
+                        error = "Post-wipe SMART check: " + "; ".join(attrs["failures"])
+                        await _set_stage_error(job_id, "surface_validate", error)
+                        return False
+                    if attrs["warnings"]:
+                        await _append_stage_log(
+                            job_id, "surface_validate",
+                            "[WARNING] " + "; ".join(attrs["warnings"]) + "\n"
+                        )
+                    await _append_stage_log(
+                        job_id, "surface_validate",
+                        f"[CHECK] SMART health: {attrs['health']} — no critical attributes.\n"
+                    )
+                except Exception as exc:
+                    log.warning("Post-wipe SMART check failed: %s", exc)
+                    await _append_stage_log(
+                        job_id, "surface_validate",
+                        f"[WARN] Post-wipe SMART check failed (non-fatal): {exc}\n"
+                    )
+            return True
+
+        elif state in ("FAILED", "ABORTED", "ERROR"):
+            error_msg = job.get("error") or f"Disk wipe failed (state={state})"
+            await _set_stage_error(
+                job_id, "surface_validate",
+                f"TrueNAS disk.wipe FAILED: {error_msg}"
+            )
+            return False
+        # RUNNING or WAITING — keep polling
+
+
 async def _stage_timed_simulate(job_id: int, stage_name: str, duration_seconds: int) -> bool:
     """Simulate a timed stage with progress updates (mock / dev mode)."""
     start = time.monotonic()
@@ -681,21 +1190,47 @@ async def _stage_final_check(job_id: int, devname: str, drive_id: int | None = N
     Verify drive passed all tests.
     SSH mode: run smartctl -a and check critical attributes.
     Mock mode: check SMART health field in DB.
+
+    A transient SSH connectivity failure here must NOT invalidate a prior
+    multi-day surface_validate. Retry SSH-only failures, then soft-pass.
     """
     await asyncio.sleep(1)
     from app import ssh_client
+
+    def _ssh_only(failures: list[str]) -> bool:
+        return bool(failures) and all(f.startswith("SSH error:") for f in failures)
+
     if ssh_client.is_configured() and drive_id is not None:
         try:
             attrs = await ssh_client.get_smart_attributes(devname)
+            for attempt in range(2):
+                if not _ssh_only(attrs.get("failures") or []):
+                    break
+                log.warning(
+                    "final_check SSH unreachable (attempt %d/3); retrying in 30s",
+                    attempt + 1,
+                    extra={"job_id": job_id, "devname": devname},
+                )
+                await asyncio.sleep(30)
+                attrs = await ssh_client.get_smart_attributes(devname)
+
+            failures = attrs.get("failures") or []
+            if _ssh_only(failures):
+                log.warning(
+                    "final_check soft-pass: SSH unreachable after retries; prior stages stand",
+                    extra={"job_id": job_id, "devname": devname, "ssh_error": failures},
+                )
+                return True
+
             await _store_smart_attrs(drive_id, attrs)
-            if attrs["health"] == "FAILED" or attrs["failures"]:
-                failures = attrs["failures"] or [f"SMART health: {attrs['health']}"]
+            if attrs["health"] == "FAILED" or failures:
+                msg = failures or [f"SMART health: {attrs['health']}"]
                 await _set_stage_error(job_id, "final_check",
-                                       "Final check failed: " + "; ".join(failures))
+                                       "Final check failed: " + "; ".join(msg))
                 return False
             return True
         except Exception as exc:
-            log.warning("SSH final_check failed, falling back to DB check: %s", exc)
+            log.warning("SSH final_check raised, falling back to DB check: %s", exc)
 
     # DB check (mock mode fallback)
     async with _db() as db:
@@ -942,6 +1477,11 @@ async def check_stuck_jobs() -> None:
                 "UPDATE burnin_jobs SET state='unknown', finished_at=? WHERE id=?",
                 (now, job_id),
             )
+            await db.execute(
+                """UPDATE burnin_stages SET state='unknown', finished_at=?
+                   WHERE burnin_job_id=? AND state='running'""",
+                (now, job_id),
+            )
             await db.execute(
                 """INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message)
                    VALUES (?,?,?,?,?)""",
@@ -951,5 +1491,16 @@ async def check_stuck_jobs() -> None:
 
         await db.commit()
 
+    # Actually unstick the running tasks so they release their semaphore slot.
+    # Without this the DB state becomes 'unknown' but the asyncio task keeps
+    # holding the slot forever — which is the bug that left subsequent jobs
+    # permanently 'queued' until container restart.
+    for row in stuck:
+        job_id = row[0]
+        await _kill_remote_process(job_id)
+        task = _active_tasks.get(job_id)
+        if task and not task.done():
+            task.cancel()
+
     _push_update()
     log.warning("Marked %d stuck job(s) as unknown", len(stuck))
diff --git a/app/config.py b/app/config.py
index 7c1f3cf..6250bde 100644
--- a/app/config.py
+++ b/app/config.py
@@ -58,6 +58,21 @@ class Settings(BaseSettings):
     # Bad-block tolerance — surface_validate fails if bad blocks exceed this
     bad_block_threshold: int = 0
 
+    # Surface-validate (badblocks) tunables — defaults match the Spearfoot
+    # disk-burnin.sh community script's recommended geometry for large HDDs.
+    # block_size      : -b in bytes; aligned to AF (4 KiB) sectors. Bumping
+    #                   to 8192 roughly halves badblocks runtime on multi-TB
+    #                   drives at the cost of ~2x RAM in the test buffer.
+    # block_buffer    : -c blocks held in memory per IO. 64 = badblocks
+    #                   default. Higher values = larger buffer, faster IO,
+    #                   more RAM (block_size * block_buffer bytes per pass).
+    # passes          : -p value. 1 = repeat until one consecutive clean
+    #                   scan (current behavior). 2-3 for paranoid burn-in
+    #                   that re-confirms after finding errors.
+    surface_validate_block_size:   int = 4096
+    surface_validate_block_buffer: int = 64
+    surface_validate_passes:       int = 1
+
     # SSH credentials for direct TrueNAS command execution (Stage 7)
     # When ssh_host is set, burn-in stages use SSH for smartctl/badblocks instead of REST API.
     # Leave ssh_host empty to use the mock/REST API (development mode).
@@ -68,7 +83,7 @@ class Settings(BaseSettings):
     ssh_key: str = ""             # PEM private key content (paste full key including headers)
 
     # Application version — used by the /api/v1/updates/check endpoint
-    app_version: str = "1.0.0-7"
+    app_version: str = "1.0.0-21"
 
 
 settings = Settings()
diff --git a/app/database.py b/app/database.py
index 477b60a..ce02a64 100644
--- a/app/database.py
+++ b/app/database.py
@@ -89,6 +89,16 @@ _MIGRATIONS = [
     "ALTER TABLE smart_tests ADD COLUMN raw_output TEXT",
     # Stage 8: track last reset time so dashboard burn-in col clears after reset
     "ALTER TABLE drives ADD COLUMN last_reset_at TEXT",
+    # 1.0.0-15: pool-membership lock
+    "ALTER TABLE drives ADD COLUMN pool_name TEXT",
+    "ALTER TABLE drives ADD COLUMN pool_role TEXT",
+    "ALTER TABLE drives ADD COLUMN pool_seen_at TEXT",
+    # 1.0.0-19: enforce one active burn-in per drive at the storage layer.
+    # Closes the read-then-insert race in burnin.start_job — without this,
+    # two concurrent /api/v1/burnin/start requests for the same drive could
+    # both observe zero active jobs and both insert queued rows.
+    """CREATE UNIQUE INDEX IF NOT EXISTS uniq_active_burnin_per_drive
+       ON burnin_jobs (drive_id) WHERE state IN ('queued', 'running')""",
 ]
 
 
diff --git a/app/mailer.py b/app/mailer.py
index 7800d60..642cc8e 100644
--- a/app/mailer.py
+++ b/app/mailer.py
@@ -5,6 +5,7 @@ Disabled when SMTP_HOST is not set.
 """
 
 import asyncio
+import html
 import logging
 import smtplib
 import ssl
@@ -109,17 +110,61 @@ def _drive_rows_html(drives: list[dict]) -> str:
     return "\n".join(rows)
 
 
-def _build_html(drives: list[dict], generated_at: str) -> str:
+def _build_unlock_banner_html(events: list[dict]) -> str:
+    """Banner listing every pool-drive unlock granted in the last 24h.
+
+    Every interpolated DB field is run through html.escape — operator and
+    reason are free-text from the unlock modal and otherwise inject into
+    the email body verbatim.
+    """
+    if not events:
+        return ""
+    rows = []
+    for e in events:
+        evt = e.get("event_type") or ""
+        is_boot = evt == "boot_pool_drive_unlocked"
+        is_exported = evt == "exported_pool_drive_unlocked"
+        kind = (
+            "BOOT POOL" if is_boot
+            else "EXPORTED ZFS" if is_exported
+            else "pool"
+        )
+        when = html.escape((e.get("created_at") or "")[:19])
+        operator = html.escape(e.get("operator") or "?")
+        devname = html.escape(e.get("devname") or "?")
+        # `message` already includes pool name, devname, and the operator's
+        # reason — surface it verbatim so the audit trail is faithful.
+        message = html.escape(e.get("message") or "")
+        rows.append(
+            f"<li style='margin:4px 0'><strong>{when}</strong> &middot; "
+            f"<strong>{operator}</strong> unlocked a {kind} drive "
+            f"({devname}): "
+            f"<span style='color:#c9d1d9'>{message}</span></li>"
+        )
+    return f"""
+        <div style="background:#4b1113;border:1px solid #f85149;border-radius:6px;
+                    padding:14px 18px;margin-bottom:20px;color:#f85149">
+          <div style="font-weight:600;font-size:14px;margin-bottom:6px">
+            &#x26A0; {len(events)} pool-drive unlock(s) in the last 24h
+          </div>
+          <ul style="margin:0;padding-left:18px;font-size:12.5px;color:#f0a0a0">
+            {''.join(rows)}
+          </ul>
+        </div>"""
+
+
+def _build_html(drives: list[dict], generated_at: str,
+                unlock_events: list[dict] | None = None) -> str:
     total = len(drives)
     failed_drives = [d for d in drives if d.get("smart_health") == "FAILED"]
     running_burnin = [d for d in drives if (d.get("burnin") or {}).get("state") == "running"]
     passed_burnin = [d for d in drives if (d.get("burnin") or {}).get("state") == "passed"]
 
-    # Alert banner
-    alert_html = ""
+    # Alert banners (unlock events first — the audit-grade signal)
+    alert_html = _build_unlock_banner_html(unlock_events or [])
     if failed_drives:
         names = ", ".join(d["devname"] for d in failed_drives)
-        alert_html = f"""
+        alert_html += f"""
         <div style="background:#4b1113;border:1px solid #f85149;border-radius:6px;padding:14px 18px;margin-bottom:20px;color:#f85149;font-weight:500">
           ⚠ SMART health FAILED on {len(failed_drives)} drive(s): {names}
         </div>"""
@@ -287,6 +332,36 @@ async def _fetch_report_data() -> list[dict]:
         return await _fetch_drives_for_template(db)
 
 
+async def _fetch_unlock_events_24h() -> list[dict]:
+    """Return pool-drive unlock audit events from the last 24 hours.
+
+    These are operator overrides of the pool-membership lock — every entry
+    represents a deliberate decision to risk a pool, so the daily report
+    surfaces them as an audit-grade banner.
+    """
+    async with aiosqlite.connect(settings.db_path) as db:
+        db.row_factory = aiosqlite.Row
+        await db.execute("PRAGMA journal_mode=WAL")
+        # julianday() handles the 'YYYY-MM-DDTHH:MM:SS.fff+00:00' format
+        # we write from Python; comparing the raw string against
+        # datetime('now','-1 day') (which formats as 'YYYY-MM-DD HH:MM:SS')
+        # produces subtle off-by-up-to-a-day errors because of the
+        # 'T' vs ' ' separator and the '+00:00' suffix.
+        cur = await db.execute("""
+            SELECT ae.event_type, ae.operator, ae.message, ae.created_at,
+                   d.devname, d.pool_name, d.pool_role
+            FROM audit_events ae
+            LEFT JOIN drives d ON d.id = ae.drive_id
+            WHERE ae.event_type IN (
+                    'pool_drive_unlocked',
+                    'boot_pool_drive_unlocked',
+                    'exported_pool_drive_unlocked')
+              AND julianday(ae.created_at) >= julianday('now', '-1 day')
+            ORDER BY ae.created_at DESC
+        """)
+        return [dict(r) for r in await cur.fetchall()]
+
+
 # ---------------------------------------------------------------------------
 # Scheduler
 # ---------------------------------------------------------------------------
@@ -411,9 +486,16 @@ async def test_smtp_connection() -> dict:
 async def send_report_now() -> None:
     """Send a report immediately (used by on-demand API endpoint)."""
     drives = await _fetch_report_data()
+    unlock_events = await _fetch_unlock_events_24h()
     now_str = datetime.now().strftime("%Y-%m-%d %H:%M")
-    html = _build_html(drives, now_str)
-    subject = f"Burn-In Report — {datetime.now().strftime('%Y-%m-%d')} ({len(drives)} drives)"
+    html = _build_html(drives, now_str, unlock_events)
+    suffix = ""
+    if unlock_events:
+        suffix = f" — {len(unlock_events)} pool unlock(s)"
+    subject = (
+        f"Burn-In Report — {datetime.now().strftime('%Y-%m-%d')} "
+        f"({len(drives)} drives){suffix}"
+    )
     await asyncio.to_thread(_send_email, subject, html)
 
 
diff --git a/app/models.py b/app/models.py
index 1748c7b..d9195a4 100644
--- a/app/models.py
+++ b/app/models.py
@@ -97,8 +97,17 @@ class DriveResponse(BaseModel):
     smart_long: SmartTestState
     notes: str | None = None
     location: str | None = None
+    pool_name: str | None = None
+    pool_role: str | None = None
+    pool_unlocked_until: float | None = None  # unix epoch; null = locked
 
 
 class UpdateDriveRequest(BaseModel):
     notes: str | None = None
     location: str | None = None
+
+
+class UnlockPoolDriveRequest(BaseModel):
+    confirm_token: str
+    operator: str
+    reason: str
diff --git a/app/poller.py b/app/poller.py
index 67f625c..4c013d7 100644
--- a/app/poller.py
+++ b/app/poller.py
@@ -20,13 +20,15 @@ from app.truenas import TrueNASClient
 
 log = logging.getLogger(__name__)
 
-# Shared state read by the /health endpoint
+# Shared state read by the /health endpoint and dashboard template
 _state: dict[str, Any] = {
     "last_poll_at": None,
     "last_error": None,
     "healthy": False,
     "drives_seen": 0,
     "consecutive_failures": 0,
+    "system_temps": {},        # {"cpu_c": int|None, "pch_c": int|None}
+    "thermal_pressure": "ok",  # "ok" | "warn" | "crit" — based on running burn-in drive temps
 }
 
 # SSE subscriber queues — notified after each successful poll
@@ -87,18 +89,60 @@ def _map_history_state(status: str) -> str:
 # DB helpers
 # ---------------------------------------------------------------------------
 
-async def _upsert_drive(db: aiosqlite.Connection, disk: dict, now: str) -> int:
-    await db.execute(
+async def _upsert_drive(db: aiosqlite.Connection, disk: dict, now: str,
+                        pool_info: dict | None = None,
+                        update_pool: bool = True) -> int:
+    """Insert/update a drive row.
+
+    pool_info: {"pool": str, "role": str} if this drive is currently in a
+    zpool, else None. None values clear pool columns so a removed-from-pool
+    drive doesn't stay locked.
+
+    update_pool: when False, pool columns are preserved on conflict and
+    initialised to NULL on insert. Callers pass False on detection failure
+    so a transient SSH outage doesn't silently unlock every drive.
+    """
+    pool_name = pool_info["pool"] if pool_info else None
+    pool_role = pool_info["role"] if pool_info else None
+    pool_seen_at = now if pool_info else None
+
+    if update_pool:
+        update_clause = """
+            devname        = excluded.devname,
+            serial         = excluded.serial,
+            model          = excluded.model,
+            size_bytes     = excluded.size_bytes,
+            temperature_c  = excluded.temperature_c,
+            smart_health   = excluded.smart_health,
+            last_seen_at   = excluded.last_seen_at,
+            last_polled_at = excluded.last_polled_at,
+            pool_name      = excluded.pool_name,
+            pool_role      = excluded.pool_role,
+            pool_seen_at   = excluded.pool_seen_at
         """
-        INSERT INTO drives
-            (truenas_disk_id, devname, serial, model, size_bytes,
-             temperature_c, smart_health, last_seen_at, last_polled_at)
-        VALUES (?,?,?,?,?,?,?,?,?)
-        ON CONFLICT(truenas_disk_id) DO UPDATE SET
+    else:
+        # Preserve pool_name / pool_role / pool_seen_at — detection failed
+        # this cycle, so we have no fresh data and must not overwrite.
+        update_clause = """
+            devname        = excluded.devname,
+            serial         = excluded.serial,
+            model          = excluded.model,
+            size_bytes     = excluded.size_bytes,
             temperature_c  = excluded.temperature_c,
             smart_health   = excluded.smart_health,
             last_seen_at   = excluded.last_seen_at,
             last_polled_at = excluded.last_polled_at
+        """
+
+    await db.execute(
+        f"""
+        INSERT INTO drives
+            (truenas_disk_id, devname, serial, model, size_bytes,
+             temperature_c, smart_health, last_seen_at, last_polled_at,
+             pool_name, pool_role, pool_seen_at)
+        VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
+        ON CONFLICT(truenas_disk_id) DO UPDATE SET
+        {update_clause}
         """,
         (
             disk["identifier"],
@@ -110,6 +154,9 @@ async def _upsert_drive(db: aiosqlite.Connection, disk: dict, now: str) -> int:
             disk.get("smart_health", "UNKNOWN"),
             now,
             now,
+            pool_name,
+            pool_role,
+            pool_seen_at,
         ),
     )
     cur = await db.execute(
@@ -208,6 +255,67 @@ async def _sync_history(
 # Poll cycle
 # ---------------------------------------------------------------------------
 
+async def _poll_smart_via_ssh(db: aiosqlite.Connection, now: str) -> None:
+    """
+    Poll progress for SMART tests started via SSH (truenas_job_id IS NULL).
+    Used on TrueNAS SCALE 25.10+ where the REST smart/test API no longer exists.
+    """
+    from app import ssh_client
+    if not ssh_client.is_configured():
+        return
+
+    cur = await db.execute(
+        """SELECT st.id, st.test_type, st.drive_id, d.devname, st.started_at
+           FROM smart_tests st
+           JOIN drives d ON d.id = st.drive_id
+           WHERE st.state = 'running' AND st.truenas_job_id IS NULL"""
+    )
+    rows = await cur.fetchall()
+    if not rows:
+        return
+
+    for row in rows:
+        test_id, ttype, drive_id, devname, started_at = row[0], row[1], row[2], row[3], row[4]
+        try:
+            progress = await ssh_client.poll_smart_progress(devname)
+        except Exception as exc:
+            log.warning("SSH SMART poll failed for %s: %s", devname, exc)
+            continue
+
+        state = progress["state"]
+        pct_remaining = progress.get("percent_remaining")  # None = not yet in output
+        raw_output = progress.get("output", "")
+
+        if state == "running":
+            # pct_remaining=None means smartctl output doesn't have the % line yet
+            # (test just started) — keep percent at 0 rather than jumping to 100
+            if pct_remaining is None:
+                pct = 0
+            else:
+                pct = max(0, 100 - pct_remaining)
+            eta = _eta_from_progress(pct, started_at)
+            await db.execute(
+                "UPDATE smart_tests SET percent=?, eta_at=?, raw_output=? WHERE id=?",
+                (pct, eta, raw_output, test_id),
+            )
+        elif state == "passed":
+            await db.execute(
+                "UPDATE smart_tests SET state='passed', percent=100, finished_at=?, raw_output=? WHERE id=?",
+                (now, raw_output, test_id),
+            )
+            log.info("SSH SMART %s passed on %s", ttype, devname)
+        elif state == "failed":
+            await db.execute(
+                "UPDATE smart_tests SET state='failed', percent=0, finished_at=?, "
+                "error_text=?, raw_output=? WHERE id=?",
+                (now, f"SMART {ttype.upper()} test failed", raw_output, test_id),
+            )
+            log.warning("SSH SMART %s FAILED on %s", ttype, devname)
+        # state == "unknown" → keep polling, no update
+
+    await db.commit()
+
+
 async def poll_cycle(client: TrueNASClient) -> int:
     """Run one full poll. Returns number of drives seen."""
     now = _now()
@@ -215,6 +323,88 @@ async def poll_cycle(client: TrueNASClient) -> int:
     disks = await client.get_disks()
     running_jobs = await client.get_smart_jobs(state="RUNNING")
 
+    # Fetch temperatures via SCALE-specific endpoint.
+    # CORE doesn't have this endpoint — silently skip on any error.
+    try:
+        temps = await client.get_disk_temperatures()
+    except Exception:
+        temps = {}
+
+    # Inject temperature into each disk dict (SCALE 25.10 has no temp in /disk)
+    for disk in disks:
+        devname = disk.get("devname", "")
+        t = temps.get(devname)
+        if t is not None:
+            disk["temperature"] = int(round(t))
+
+    # SMART health — TrueNAS /api/v2.0/disk doesn't expose smart_health,
+    # so without this every drive defaults to UNKNOWN forever (only burn-in
+    # stages used to populate it). Run `smartctl -H` over a single SSH
+    # session for every drive every Nth cycle. Cache between cycles via
+    # _state so the dashboard always renders the most recent answer.
+    SMART_HEALTH_EVERY_N_CYCLES = 5  # ~1 minute at default 12s interval
+    _state.setdefault("smart_health_cache", {})
+    cycle_n = _state.get("cycle", 0) + 1
+    _state["cycle"] = cycle_n
+    try:
+        from app import ssh_client as _ssh
+        if _ssh.is_configured() and (cycle_n % SMART_HEALTH_EVERY_N_CYCLES == 1):
+            health_map = await _ssh.get_smart_health_map(
+                [d["devname"] for d in disks if d.get("devname")]
+            )
+            if health_map is not None:
+                _state["smart_health_cache"] = health_map
+    except Exception as exc:
+        log.warning("smart_health refresh failed: %s", exc)
+    health_cache = _state.get("smart_health_cache") or {}
+    for disk in disks:
+        devname = disk.get("devname", "")
+        h = health_cache.get(devname)
+        if h:
+            disk["smart_health"] = h
+
+    # Pool membership map — drives in any zpool are locked from burn-in.
+    # ssh_client returns None on failure (distinct from {} which means "no
+    # pools"). If EITHER detection call fails we fail-closed: leave
+    # pool_name / pool_role columns alone so previously-locked drives stay
+    # locked, and previously-unlocked drives stay unlocked, until detection
+    # recovers. Treating a transient SSH blip as "no pool members" would
+    # silently unlock every drive on the next poll.
+    detection_ok = True
+    pool_map: dict = {}
+    zfs_member_set: set = set()
+    try:
+        from app import ssh_client as _ssh
+        if _ssh.is_configured():
+            pm = await _ssh.get_pool_membership()
+            zs = await _ssh.get_zfs_member_drives()
+            if pm is None or zs is None:
+                detection_ok = False
+            else:
+                pool_map = pm
+                zfs_member_set = zs
+        # SSH unconfigured (mock/dev mode) — detection_ok stays True with
+        # empty maps, so dev mode never artificially locks drives.
+    except Exception:
+        detection_ok = False
+
+    if not detection_ok:
+        log.warning(
+            "Pool detection failed this cycle — preserving existing "
+            "pool_name/pool_role columns. Locked drives stay locked, "
+            "unlocked drives stay unlocked, until SSH recovers."
+        )
+
+    if detection_ok:
+        # Drives carrying ZFS labels but not in any active pool are
+        # "exported" — same hazard as an active pool member, so lock them
+        # too. We don't know the original pool name without
+        # `zpool import`-style scanning (slow + blocks); display
+        # "(exported)" and use a special token.
+        for devname in zfs_member_set:
+            if devname not in pool_map:
+                pool_map[devname] = {"pool": "(exported)", "role": "exported"}
+
     # Index running jobs by (devname, test_type)
     active: dict[tuple[str, str], dict] = {}
     for job in running_jobs:
@@ -233,7 +423,11 @@ async def poll_cycle(client: TrueNASClient) -> int:
 
         for disk in disks:
             devname = disk["devname"]
-            drive_id = await _upsert_drive(db, disk, now)
+            drive_id = await _upsert_drive(
+                db, disk, now,
+                pool_map.get(devname) if detection_ok else None,
+                update_pool=detection_ok,
+            )
 
             for ttype in ("short", "long"):
                 if (devname, ttype) in active:
@@ -243,6 +437,9 @@ async def poll_cycle(client: TrueNASClient) -> int:
 
         await db.commit()
 
+        # SSH SMART polling — for tests started via smartctl (no TrueNAS REST job)
+        await _poll_smart_via_ssh(db, now)
+
     return len(disks)
 
 
@@ -263,6 +460,39 @@ async def run(client: TrueNASClient) -> None:
             _state["drives_seen"] = count
             _state["consecutive_failures"] = 0
             log.debug("Poll OK", extra={"drives": count})
+
+            # System sensor temps via SSH (non-fatal)
+            from app import ssh_client as _ssh
+            if _ssh.is_configured():
+                try:
+                    _state["system_temps"] = await _ssh.get_system_sensors()
+                except Exception:
+                    pass
+
+            # Thermal pressure: max temp of drives currently under burn-in
+            try:
+                async with aiosqlite.connect(settings.db_path) as _tdb:
+                    _tdb.row_factory = aiosqlite.Row
+                    await _tdb.execute("PRAGMA journal_mode=WAL")
+                    _cur = await _tdb.execute("""
+                        SELECT MAX(d.temperature_c)
+                        FROM drives d
+                        JOIN burnin_jobs bj ON bj.drive_id = d.id
+                        WHERE bj.state = 'running' AND d.temperature_c IS NOT NULL
+                    """)
+                    _row = await _cur.fetchone()
+                    _max_t = _row[0] if _row and _row[0] is not None else None
+                if _max_t is None:
+                    _state["thermal_pressure"] = "ok"
+                elif _max_t >= settings.temp_crit_c:
+                    _state["thermal_pressure"] = "crit"
+                elif _max_t >= settings.temp_warn_c:
+                    _state["thermal_pressure"] = "warn"
+                else:
+                    _state["thermal_pressure"] = "ok"
+            except Exception:
+                _state["thermal_pressure"] = "ok"
+
             _notify_subscribers()
 
             # Check for stuck jobs every 5 cycles (~1 min at default 12s interval)
diff --git a/app/routes.py b/app/routes.py
index ddff766..1cfadc2 100644
--- a/app/routes.py
+++ b/app/routes.py
@@ -15,7 +15,8 @@ from app.database import get_db
 from app.models import (
     BurninJobResponse, BurninStageResponse,
     CancelBurninRequest, DriveResponse,
-    SmartTestState, StartBurninRequest, UpdateDriveRequest,
+    SmartTestState, StartBurninRequest, UnlockPoolDriveRequest,
+    UpdateDriveRequest,
 )
 from app.renderer import templates
 
@@ -48,6 +49,22 @@ def _is_stale(last_polled_at: str) -> bool:
         return True
 
 
+def _compute_eta_seconds(started_at: str | None, percent: int) -> int | None:
+    """Linear ETA extrapolation from started_at and percent complete."""
+    if not started_at or percent <= 0:
+        return None
+    try:
+        start = datetime.fromisoformat(started_at)
+        if start.tzinfo is None:
+            start = start.replace(tzinfo=timezone.utc)
+        elapsed = (datetime.now(timezone.utc) - start).total_seconds()
+        total_est = elapsed / (percent / 100)
+        remaining = max(0, int(total_est - elapsed))
+        return remaining
+    except Exception:
+        return None
+
+
 def _build_smart(row: aiosqlite.Row, prefix: str) -> SmartTestState:
     eta_at = row[f"{prefix}_eta_at"]
     return SmartTestState(
@@ -76,6 +93,11 @@ def _row_to_drive(row: aiosqlite.Row) -> DriveResponse:
         smart_long=_build_smart(row, "long"),
         notes=row["notes"],
         location=row["location"],
+        pool_name=row["pool_name"],
+        pool_role=row["pool_role"],
+        pool_unlocked_until=burnin.unlock_expiry(
+            row["id"], row["pool_name"], row["pool_role"],
+        ),
     )
 
 
@@ -96,7 +118,7 @@ _DRIVES_QUERY = """
     SELECT
         d.id, d.devname, d.serial, d.model, d.size_bytes,
         d.temperature_c, d.smart_health, d.last_polled_at,
-        d.notes, d.location,
+        d.notes, d.location, d.pool_name, d.pool_role,
         s.state       AS short_state,
         s.percent     AS short_percent,
         s.started_at  AS short_started_at,
@@ -112,6 +134,7 @@ _DRIVES_QUERY = """
     FROM drives d
     LEFT JOIN smart_tests s ON s.drive_id = d.id AND s.test_type = 'short'
     LEFT JOIN smart_tests l ON l.drive_id = d.id AND l.test_type = 'long'
+    WHERE d.last_seen_at >= datetime('now', '-7 days')
     {where}
     ORDER BY d.devname
 """
@@ -138,11 +161,55 @@ async def _fetch_drives_for_template(db: aiosqlite.Connection) -> list[dict]:
     cur = await db.execute(_DRIVES_QUERY.format(where=""))
     rows = await cur.fetchall()
     burnin_by_drive = await _fetch_burnin_by_drive(db)
+
+    # For burn-ins that include SMART stages, fetch those stages so we can
+    # mirror their progress/result in the Short/Long SMART columns.
+    # This covers both running stages (showing live progress) and completed
+    # stages (showing passed/failed after the burn-in moves to the next stage).
+    bi_smart_stages: dict[int, dict[str, dict]] = {}  # job_id -> {stage_name: row}
+    bi_ids_with_smart = [
+        bi["id"] for bi in burnin_by_drive.values()
+        if bi["state"] in ("running", "queued")
+    ]
+    if bi_ids_with_smart:
+        placeholders = ",".join("?" * len(bi_ids_with_smart))
+        cur = await db.execute(f"""
+            SELECT bs.burnin_job_id, bs.stage_name, bs.state, bs.percent,
+                   bs.started_at, bs.finished_at, bs.error_text
+            FROM burnin_stages bs
+            WHERE bs.burnin_job_id IN ({placeholders})
+              AND bs.stage_name IN ('short_smart', 'long_smart')
+              AND bs.state IN ('running', 'passed', 'failed')
+        """, bi_ids_with_smart)
+        for r in await cur.fetchall():
+            bi_smart_stages.setdefault(r["burnin_job_id"], {})[r["stage_name"]] = dict(r)
+
     drives = []
     for row in rows:
         d = _row_to_drive(row).model_dump()
         d["status"] = _compute_status(d)
-        d["burnin"] = burnin_by_drive.get(d["id"])
+        bi = burnin_by_drive.get(d["id"])
+        d["burnin"] = bi
+
+        # Overlay burn-in SMART stage progress/results onto the SMART columns
+        if bi and bi["id"] in bi_smart_stages:
+            for stage_name, stage in bi_smart_stages[bi["id"]].items():
+                target = "smart_short" if stage_name == "short_smart" else "smart_long"
+                # Only overlay if the standalone SMART column is idle/empty
+                existing = d.get(target) or {}
+                if existing.get("state") not in (None, "idle"):
+                    continue
+                pct = stage["percent"] or 0
+                d[target] = {
+                    "state": stage["state"],
+                    "percent": pct if stage["state"] == "running" else (100 if stage["state"] == "passed" else 0),
+                    "eta_seconds": _compute_eta_seconds(stage["started_at"], pct) if stage["state"] == "running" else None,
+                    "eta_timestamp": None,
+                    "started_at": stage["started_at"],
+                    "finished_at": stage["finished_at"],
+                    "error_text": stage["error_text"],
+                }
+
         drives.append(d)
     return drives
 
@@ -170,7 +237,7 @@ def _stale_context(poller_state: dict) -> dict:
 async def dashboard(request: Request, db: aiosqlite.Connection = Depends(get_db)):
     drives = await _fetch_drives_for_template(db)
     ps = poller.get_state()
-    return templates.TemplateResponse("dashboard.html", {
+    return templates.TemplateResponse(request, "dashboard.html", {
         "request": request,
         "drives": drives,
         "poller": ps,
@@ -218,6 +285,18 @@ async def sse_drives(request: Request):
 
                 yield {"event": "drives-update", "data": html}
 
+                # Push system sensor state so JS can update temp chips live
+                ps = poller.get_state()
+                yield {
+                    "event": "system-sensors",
+                    "data": json.dumps({
+                        "system_temps":    ps.get("system_temps", {}),
+                        "thermal_pressure": ps.get("thermal_pressure", "ok"),
+                        "temp_warn_c":     settings.temp_warn_c,
+                        "temp_crit_c":     settings.temp_crit_c,
+                    }),
+                }
+
                 # Push browser notification event if this was a job completion
                 if alert:
                     yield {"event": "job-alert", "data": json.dumps(alert)}
@@ -258,7 +337,7 @@ async def list_drives(db: aiosqlite.Connection = Depends(get_db)):
 @router.get("/api/v1/drives/{drive_id}/drawer")
 async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)):
     """Data for the log drawer — latest burn-in job + stages, SMART tests, audit events."""
-    cur = await db.execute(_DRIVES_QUERY.format(where="WHERE d.id = ?"), (drive_id,))
+    cur = await db.execute(_DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,))
     row = await cur.fetchone()
     if not row:
         raise HTTPException(status_code=404, detail="Drive not found")
@@ -339,7 +418,7 @@ async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)
 @router.get("/api/v1/drives/{drive_id}", response_model=DriveResponse)
 async def get_drive(drive_id: int, db: aiosqlite.Connection = Depends(get_db)):
     cur = await db.execute(
-        _DRIVES_QUERY.format(where="WHERE d.id = ?"), (drive_id,)
+        _DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,)
     )
     row = await cur.fetchone()
     if not row:
@@ -353,9 +432,13 @@ async def smart_start(
     body: dict,
     db: aiosqlite.Connection = Depends(get_db),
 ):
-    """Start a standalone SHORT or LONG SMART test on a single drive."""
-    from app.truenas import TrueNASClient
-    from app import burnin as _burnin
+    """Start a standalone SHORT or LONG SMART test on a single drive.
+
+    Uses SSH (smartctl) when configured — required for TrueNAS SCALE 25.10+
+    where the REST smart/test endpoint no longer exists.
+    Falls back to TrueNAS REST API for older versions.
+    """
+    from app import burnin as _burnin, ssh_client
 
     test_type = (body.get("type") or "").upper()
     if test_type not in ("SHORT", "LONG"):
@@ -367,17 +450,42 @@ async def smart_start(
         raise HTTPException(status_code=404, detail="Drive not found")
     devname = row[0]
 
-    # Use the shared TrueNAS client held by the burnin module
-    client = _burnin._client
-    if client is None:
-        raise HTTPException(status_code=503, detail="TrueNAS client not ready")
+    now = datetime.now(timezone.utc).isoformat()
+    ttype_lower = test_type.lower()
 
-    try:
-        tn_job_id = await client.start_smart_test([devname], test_type)
-    except Exception as exc:
-        raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
+    if ssh_client.is_configured():
+        # SSH path — works on TrueNAS SCALE 25.10+ and CORE
+        try:
+            output = await ssh_client.start_smart_test(devname, test_type)
+        except Exception as exc:
+            raise HTTPException(status_code=502, detail=f"SSH error: {exc}")
 
-    return {"job_id": tn_job_id, "devname": devname, "type": test_type}
+        # Mark as running in DB (truenas_job_id=NULL signals SSH-managed test)
+        # Store smartctl start output as proof the test was initiated
+        await db.execute(
+            """INSERT INTO smart_tests (drive_id, test_type, state, percent, started_at, raw_output)
+               VALUES (?,?,?,?,?,?)
+               ON CONFLICT(drive_id, test_type) DO UPDATE SET
+                   state='running', percent=0, truenas_job_id=NULL,
+                   started_at=excluded.started_at, finished_at=NULL, error_text=NULL,
+                   raw_output=excluded.raw_output""",
+            (drive_id, ttype_lower, "running", 0, now, output),
+        )
+        await db.commit()
+        from app import poller as _poller
+        _poller._notify_subscribers()
+        return {"devname": devname, "type": test_type, "message": output[:200]}
+
+    else:
+        # REST path — older TrueNAS CORE / SCALE versions
+        client = _burnin._client
+        if client is None:
+            raise HTTPException(status_code=503, detail="TrueNAS client not ready")
+        try:
+            tn_job_id = await client.start_smart_test([devname], test_type)
+        except Exception as exc:
+            raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
+        return {"job_id": tn_job_id, "devname": devname, "type": test_type}
 
 
 @router.post("/api/v1/drives/{drive_id}/smart/cancel")
@@ -403,28 +511,37 @@ async def smart_cancel(
     if client is None:
         raise HTTPException(status_code=503, detail="TrueNAS client not ready")
 
-    # Find the running TrueNAS job for this drive/test-type
-    try:
-        jobs = await client.get_smart_jobs()
-        tn_job_id = None
-        for j in jobs:
-            if j.get("state") != "RUNNING":
-                continue
-            args = j.get("arguments", [])
-            if not args or not isinstance(args[0], dict):
-                continue
-            if devname in args[0].get("disks", []):
-                tn_job_id = j["id"]
-                break
+    from app import ssh_client
 
-        if tn_job_id is None:
-            raise HTTPException(status_code=404, detail="No running SMART test found for this drive")
+    if ssh_client.is_configured():
+        # SSH path — abort via smartctl -X
+        try:
+            await ssh_client.abort_smart_test(devname)
+        except Exception as exc:
+            raise HTTPException(status_code=502, detail=f"SSH abort error: {exc}")
+    else:
+        # REST path — find TrueNAS job and abort it
+        try:
+            jobs = await client.get_smart_jobs()
+            tn_job_id = None
+            for j in jobs:
+                if j.get("state") != "RUNNING":
+                    continue
+                args = j.get("arguments", [])
+                if not args or not isinstance(args[0], dict):
+                    continue
+                if devname in args[0].get("disks", []):
+                    tn_job_id = j["id"]
+                    break
 
-        await client.abort_job(tn_job_id)
-    except HTTPException:
-        raise
-    except Exception as exc:
-        raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
+            if tn_job_id is None:
+                raise HTTPException(status_code=404, detail="No running SMART test found for this drive")
+
+            await client.abort_job(tn_job_id)
+        except HTTPException:
+            raise
+        except Exception as exc:
+            raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
 
     # Update local DB state
     now = datetime.now(timezone.utc).isoformat()
@@ -479,13 +596,35 @@ async def burnin_start(req: StartBurninRequest):
                 drive_id, req.profile, req.operator, stage_order=req.stage_order
             )
             results.append({"drive_id": drive_id, "job_id": job_id})
+        except burnin.PoolMemberError as exc:
+            errors.append({
+                "drive_id": drive_id,
+                "error": str(exc),
+                "pool_name": exc.pool_name,
+                "pool_role": exc.pool_role,
+                "pool_locked": True,
+            })
         except ValueError as exc:
             errors.append({"drive_id": drive_id, "error": str(exc)})
     if errors and not results:
-        raise HTTPException(status_code=409, detail=errors[0]["error"])
+        # Surface the first error's structured fields so the UI can render
+        # an unlock affordance instead of a generic toast.
+        raise HTTPException(status_code=409, detail=errors[0])
     return {"queued": results, "errors": errors}
 
 
+@router.post("/api/v1/drives/{drive_id}/unlock")
+async def unlock_pool_drive(drive_id: int, req: UnlockPoolDriveRequest):
+    try:
+        expiry = await burnin.grant_pool_unlock(
+            drive_id, req.confirm_token, req.operator, req.reason,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc))
+    return {"unlocked": True, "expires_at": expiry,
+            "ttl_seconds": burnin.UNLOCK_TTL_SECONDS}
+
+
 @router.post("/api/v1/burnin/{job_id}/cancel")
 async def burnin_cancel(job_id: int, req: CancelBurninRequest):
     ok = await burnin.cancel_job(job_id, req.operator)
@@ -562,7 +701,7 @@ async def history_list(
     jobs = [dict(r) for r in rows]
 
     ps = poller.get_state()
-    return templates.TemplateResponse("history.html", {
+    return templates.TemplateResponse(request, "history.html", {
         "request": request,
         "jobs": jobs,
         "active_state": state,
@@ -612,7 +751,7 @@ async def history_detail(
     job["stages"] = [dict(r) for r in await cur.fetchall()]
 
     ps = poller.get_state()
-    return templates.TemplateResponse("job_detail.html", {
+    return templates.TemplateResponse(request, "job_detail.html", {
         "request": request,
         "job": job,
         "poller": ps,
@@ -791,7 +930,7 @@ async def audit_log(
     cur = await db.execute(_AUDIT_QUERY)
     rows = [dict(r) for r in await cur.fetchall()]
     ps = poller.get_state()
-    return templates.TemplateResponse("audit.html", {
+    return templates.TemplateResponse(request, "audit.html", {
         "request":      request,
         "events":       rows,
         "event_colors": _AUDIT_EVENT_COLORS,
@@ -887,7 +1026,7 @@ async def stats_page(
     drives_total = (await cur.fetchone())[0]
 
     ps = poller.get_state()
-    return templates.TemplateResponse("stats.html", {
+    return templates.TemplateResponse(request, "stats.html", {
         "request":          request,
         "overall":          overall,
         "by_model":         by_model,
@@ -931,6 +1070,9 @@ async def settings_page(
         "temp_warn_c":               settings.temp_warn_c,
         "temp_crit_c":               settings.temp_crit_c,
         "bad_block_threshold":       settings.bad_block_threshold,
+        "surface_validate_block_size":   settings.surface_validate_block_size,
+        "surface_validate_block_buffer": settings.surface_validate_block_buffer,
+        "surface_validate_passes":       settings.surface_validate_passes,
         # SSH credentials (take effect immediately — each SSH call reads live settings)
         "ssh_host":                  settings.ssh_host,
         "ssh_port":                  settings.ssh_port,
@@ -948,7 +1090,7 @@ async def settings_page(
 
     from app import ssh_client as _ssh
     ps = poller.get_state()
-    return templates.TemplateResponse("settings.html", {
+    return templates.TemplateResponse(request, "settings.html", {
         "request":        request,
         "editable":       editable,
         "smtp_enabled":   bool(settings.smtp_host),
@@ -1069,7 +1211,7 @@ async def history_print(
     """, (job_id,))
     job["stages"] = [dict(r) for r in await cur.fetchall()]
 
-    return templates.TemplateResponse("job_print.html", {
+    return templates.TemplateResponse(request, "job_print.html", {
         "request": request,
         "job":     job,
     })
diff --git a/app/settings_store.py b/app/settings_store.py
index ace86df..f1bbac8 100644
--- a/app/settings_store.py
+++ b/app/settings_store.py
@@ -38,6 +38,9 @@ _EDITABLE: dict[str, type] = {
     "temp_warn_c":               int,
     "temp_crit_c":               int,
     "bad_block_threshold":       int,
+    "surface_validate_block_size":   int,
+    "surface_validate_block_buffer": int,
+    "surface_validate_passes":       int,
     # SSH credentials — take effect immediately (each connection reads live settings)
     "ssh_host":                  str,
     "ssh_port":                  int,
@@ -96,6 +99,26 @@ def _apply(data: dict) -> None:
             if key == "bad_block_threshold" and int(val) < 0:
                 log.warning("settings_store: bad_block_threshold must be >= 0 — ignoring")
                 continue
+            if key == "surface_validate_block_size":
+                # badblocks accepts any positive int but in practice the
+                # useful range is 512..1048576 and it should be a power of 2.
+                v = int(val)
+                if v < 512 or v > 1048576 or (v & (v - 1)) != 0:
+                    log.warning(
+                        "settings_store: surface_validate_block_size must be "
+                        "a power of 2 between 512 and 1048576 — ignoring %r", val
+                    )
+                    continue
+            if key == "surface_validate_block_buffer" and not (1 <= int(val) <= 4096):
+                log.warning(
+                    "settings_store: surface_validate_block_buffer must be 1..4096 — ignoring"
+                )
+                continue
+            if key == "surface_validate_passes" and not (0 <= int(val) <= 16):
+                log.warning(
+                    "settings_store: surface_validate_passes must be 0..16 — ignoring"
+                )
+                continue
             if key == "ssh_port" and not (1 <= int(val) <= 65535):
                 log.warning("settings_store: ssh_port out of range — ignoring")
                 continue
diff --git a/app/ssh_client.py b/app/ssh_client.py
index 183c650..f84ca9c 100644
--- a/app/ssh_client.py
+++ b/app/ssh_client.py
@@ -38,15 +38,26 @@ SMART_ATTRS: dict[int, tuple[str, bool]] = {
 # ---------------------------------------------------------------------------
 
 def is_configured() -> bool:
-    """Returns True when SSH credentials are present and usable."""
+    """Returns True when SSH host + at least one auth method is available."""
+    import os
     from app.config import settings
-    return bool(settings.ssh_host and (settings.ssh_password or settings.ssh_key))
+    if not settings.ssh_host:
+        return False
+    has_creds = bool(
+        settings.ssh_key
+        or settings.ssh_password
+        or os.path.exists(os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH))
+    )
+    return has_creds
 
 
 # ---------------------------------------------------------------------------
 # Low-level connection
 # ---------------------------------------------------------------------------
 
+_MOUNTED_KEY_PATH = "/run/secrets/ssh_key"
+
+
 async def _connect():
     """Open a single-use SSH connection. Caller must use `async with`."""
     import asyncssh
@@ -59,9 +70,17 @@ async def _connect():
         "known_hosts": None,          # trust all hosts (same spirit as TRUENAS_VERIFY_TLS=false)
     }
     if settings.ssh_key:
+        # Key material provided via env var (base case)
         kwargs["client_keys"] = [asyncssh.import_private_key(settings.ssh_key)]
-    if settings.ssh_password:
+    elif settings.ssh_password:
         kwargs["password"] = settings.ssh_password
+    else:
+        # Fall back to mounted key file (preferred for production — no key in env vars)
+        import os
+        key_path = os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH)
+        if os.path.exists(key_path):
+            kwargs["client_keys"] = [key_path]
+        # If nothing is configured, asyncssh will attempt agent/default key lookup
 
     return asyncssh.connect(**kwargs)
 
@@ -228,6 +247,294 @@ async def run_badblocks(
     }
 
 
+def _parse_zpool_list_output(stdout: str) -> dict:
+    """Pure parser for `zpool list -vHP` stdout. Exposed for unit tests.
+
+    See get_pool_membership() for output semantics. This function never
+    raises — malformed lines are silently skipped.
+    """
+    import re as _re
+
+    def _strip_partition(name: str) -> str:
+        m = _re.match(r"^(nvme\d+n\d+)", name)
+        if m:
+            return m.group(1)
+        m = _re.match(r"^(sd[a-z]+)", name)
+        if m:
+            return m.group(1)
+        return name
+
+    SECTION_MARKERS = {"cache", "log", "logs", "spare", "spares",
+                       "special", "dedup"}
+    SECTION_NORMALIZE = {"logs": "log", "spares": "spare"}
+
+    out: dict = {}
+    current_pool: str | None = None
+    current_role: str = "data"
+
+    for raw in stdout.splitlines():
+        if not raw.strip():
+            continue
+        depth = 0
+        while depth < len(raw) and raw[depth] == "\t":
+            depth += 1
+        first = raw[depth:].split("\t", 1)[0].strip()
+
+        if depth == 0:
+            current_pool = first
+            current_role = "data"
+            continue
+
+        if depth == 1:
+            if first in SECTION_MARKERS:
+                current_role = SECTION_NORMALIZE.get(first, first)
+                continue
+            if first.startswith(("mirror", "raidz", "draid")):
+                continue
+            if first.startswith("/dev/") and current_pool:
+                dn = _strip_partition(first[len("/dev/"):])
+                out[dn] = {"pool": current_pool, "role": current_role}
+            continue
+
+        if first.startswith("/dev/") and current_pool:
+            dn = _strip_partition(first[len("/dev/"):])
+            out[dn] = {"pool": current_pool, "role": current_role}
+
+    return out
+
+
+def _parse_lsblk_zfs_output(stdout: str) -> set:
+    """Pure parser for `lsblk -no NAME,FSTYPE -l` stdout. Returns base
+    devnames carrying ZFS labels (whole-disk OR via partition). Exposed
+    for unit tests."""
+    import re as _re
+    out: set = set()
+    for line in stdout.splitlines():
+        parts = line.split()
+        if len(parts) < 2:
+            continue
+        name, fstype = parts[0], parts[1]
+        if fstype != "zfs_member":
+            continue
+        if name.startswith("nvme"):
+            m = _re.match(r"^(nvme\d+n\d+)", name)
+            if m:
+                out.add(m.group(1))
+        else:
+            m = _re.match(r"^(sd[a-z]+)", name)
+            if m:
+                out.add(m.group(1))
+    return out
+
+
+async def get_pool_membership() -> dict | None:
+    """Return {devname: {"pool": str, "role": str}} for every drive in any zpool.
+
+    Parses `zpool list -vHP` output. Tab-indent depth tells us structure:
+        depth 0  pool name line
+        depth 1  vdev type line (mirror-N, raidz*N, draid*) OR section
+                 marker (cache/log/spare/special/dedup/logs) OR a single-disk
+                 vdev that is itself a /dev/... entry
+        depth 2  device line within a vdev — '/dev/sdX', '/dev/nvmeXnY', etc.
+                 may have a partition suffix that we strip back to the
+                 base devname so it matches what TrueNAS reports.
+    Roles: data | cache | log | spare | special | dedup
+    Returns:
+      - {}     when the SSH call succeeded and there are genuinely no pools
+      - None   on any failure (SSH down, parse error, non-zero exit, no
+               stdout). Callers MUST treat None differently from {}: an
+               empty dict is "definitely no pool members," None is "we
+               couldn't tell." Treating None as "no pool members" is a
+               fail-open security regression.
+    """
+    import re as _re
+    if not is_configured():
+        return {}
+    cmd = "zpool list -vHP 2>/dev/null"
+    try:
+        async with await _connect() as conn:
+            r = await conn.run(cmd, check=False)
+        if r.returncode != 0:
+            return None
+    except Exception:
+        return None
+    if not r.stdout:
+        # rc==0 with empty output = host has no pools. (`zpool list -H`
+        # returns no rows when zero pools are imported.) That's a real
+        # answer, not a failure.
+        return {}
+    return _parse_zpool_list_output(r.stdout)
+
+
+async def get_smart_health_map(devnames: list[str]) -> dict | None:
+    """Return {devname: 'PASSED'|'FAILED'|'UNKNOWN'} for every devname.
+
+    Runs `smartctl -H` for each disk in a single SSH session — much faster
+    than one connection per disk. Returns None on any SSH failure so the
+    poller can fall back to the previously-stored health value rather than
+    silently overwriting everything as 'UNKNOWN'.
+
+    `smartctl -H` is the cheap SMART self-assessment lookup (no full
+    attribute scan) — milliseconds per drive. The output format is stable:
+        SMART overall-health self-assessment test result: PASSED
+        SMART overall-health self-assessment test result: FAILED!
+    For drives that don't support the command at all, smartctl exits
+    non-zero and we record UNKNOWN for that device specifically.
+    """
+    if not is_configured() or not devnames:
+        return {} if devnames else None
+    # Build one shell pipeline that prefixes each result with "@@DEVNAME@@"
+    # so we can split the combined stdout deterministically.
+    parts = []
+    for d in devnames:
+        # Reject anything that doesn't look like a basic devname so we
+        # never inject shell metacharacters into the remote command.
+        if not d.replace("nvme", "").replace("n", "").replace("p", "").replace("sd", "").isalnum():
+            continue
+        parts.append(f"echo '@@{d}@@'; smartctl -H /dev/{d} 2>&1; echo '@@END@@'")
+    if not parts:
+        return {}
+    cmd = "; ".join(parts)
+    try:
+        async with await _connect() as conn:
+            r = await asyncio.wait_for(conn.run(cmd, check=False), timeout=30)
+    except Exception:
+        return None
+    if not r.stdout:
+        return None
+    return _parse_smart_health_batch(r.stdout)
+
+
+def _parse_smart_health_batch(stdout: str) -> dict:
+    """Pure parser for the batched smartctl -H output. Exposed for tests."""
+    result: dict[str, str] = {}
+    current: str | None = None
+    buf: list[str] = []
+
+    def _flush():
+        if current is None:
+            return
+        text = "\n".join(buf)
+        if "PASSED" in text:
+            result[current] = "PASSED"
+        elif "FAILED" in text or "FAILURE" in text:
+            result[current] = "FAILED"
+        else:
+            result[current] = "UNKNOWN"
+
+    for raw in stdout.splitlines():
+        line = raw.strip()
+        if line.startswith("@@") and line.endswith("@@"):
+            inner = line[2:-2]
+            if inner == "END":
+                _flush()
+                current = None
+                buf = []
+            else:
+                _flush()
+                current = inner
+                buf = []
+        else:
+            buf.append(line)
+    _flush()
+    return result
+
+
+async def get_zfs_member_drives() -> set | None:
+    """Return devnames of every drive whose partitions carry a ZFS label.
+
+    Combined with get_pool_membership(): a drive in this set but NOT in the
+    active-pool map carries ZFS data from a previously-imported pool that
+    was exported (or imported on a different system). We treat those as
+    locked too — wiping them would silently destroy a pool.
+
+    Returns:
+      - set()  when lsblk succeeded and no drives carry ZFS labels
+      - None   on any failure. Same fail-closed semantics as
+               get_pool_membership() — callers must NOT treat None as
+               "no exported drives," that's a security regression.
+    """
+    if not is_configured():
+        return set()
+    cmd = "lsblk -no NAME,FSTYPE -l 2>/dev/null"
+    try:
+        async with await _connect() as conn:
+            r = await conn.run(cmd, check=False)
+        if r.returncode != 0:
+            return None
+    except Exception:
+        return None
+    if not r.stdout:
+        # lsblk with rc==0 and no output is impossible on a normal Linux
+        # host; treat as failure rather than "no drives at all."
+        return None
+    return _parse_lsblk_zfs_output(r.stdout)
+
+
+async def get_system_sensors() -> dict:
+    """
+    Run `sensors -j` on TrueNAS and extract system-level temperatures.
+    Returns {"cpu_c": int|None, "pch_c": int|None}.
+    cpu_c  = CPU package temp (coretemp chip)
+    pch_c  = PCH/chipset temp (pch_* chip) — proxy for storage I/O lane thermals
+    Falls back gracefully if SSH is not configured or lm-sensors is unavailable.
+    """
+    if not is_configured():
+        return {}
+    try:
+        async with await _connect() as conn:
+            result = await conn.run("sensors -j 2>/dev/null", check=False)
+            output = result.stdout.strip()
+            if not output:
+                return {}
+            return _parse_sensors_json(output)
+    except Exception as exc:
+        log.debug("get_system_sensors failed: %s", exc)
+        return {}
+
+
+def _parse_sensors_json(output: str) -> dict:
+    import json as _json
+    try:
+        data = _json.loads(output)
+    except Exception:
+        return {}
+
+    cpu_c: int | None = None
+    pch_c: int | None = None
+
+    for chip_name, chip_data in data.items():
+        if not isinstance(chip_data, dict):
+            continue
+
+        # CPU package temp — coretemp chip, "Package id N" sensor
+        if chip_name.startswith("coretemp") and cpu_c is None:
+            for sensor_name, sensor_vals in chip_data.items():
+                if not isinstance(sensor_vals, dict):
+                    continue
+                if "package" in sensor_name.lower():
+                    for k, v in sensor_vals.items():
+                        if k.endswith("_input") and isinstance(v, (int, float)):
+                            cpu_c = int(round(v))
+                            break
+                if cpu_c is not None:
+                    break
+
+        # PCH / chipset temp — manages PCIe lanes including HBA / storage I/O
+        elif chip_name.startswith("pch_") and pch_c is None:
+            for sensor_name, sensor_vals in chip_data.items():
+                if not isinstance(sensor_vals, dict):
+                    continue
+                for k, v in sensor_vals.items():
+                    if k.endswith("_input") and isinstance(v, (int, float)):
+                        pch_c = int(round(v))
+                        break
+                if pch_c is not None:
+                    break
+
+    return {"cpu_c": cpu_c, "pch_c": pch_c}
+
+
 # ---------------------------------------------------------------------------
 # Parsers
 # ---------------------------------------------------------------------------
@@ -275,7 +582,7 @@ def _parse_smartctl(output: str) -> dict:
 
 def _parse_smart_progress(output: str) -> dict:
     state = "unknown"
-    percent_remaining = 0
+    percent_remaining = None  # None = "in progress but no % line parsed yet"
 
     lower = output.lower()
 
diff --git a/app/static/app.css b/app/static/app.css
index 94dafc4..094f58f 100644
--- a/app/static/app.css
+++ b/app/static/app.css
@@ -281,7 +281,11 @@ tr:hover td {
 .col-size    { min-width: 70px; text-align: right; }
 .col-temp    { min-width: 75px; text-align: right; }
 .col-health  { min-width: 85px; }
-.col-smart   { min-width: 150px; }
+.col-smart   { min-width: 95px; }
+/* Tighter horizontal padding on the SMART columns — they hold short
+   pills ("Passed"/"—") or a progress bar, so the default 14px gutter
+   wastes space on 13" laptops. */
+th.col-smart, td.col-smart { padding-left: 6px; padding-right: 6px; }
 .col-actions { min-width: 170px; }
 
 /* -----------------------------------------------------------------------
@@ -1076,6 +1080,56 @@ a.stat-card:hover {
 .stat-passed  .stat-value { color: var(--green); }
 .stat-idle    .stat-value { color: var(--text-muted); }
 
+/* Vertical separator between drive-count cards and sensor chips */
+.stats-bar-sep {
+  width: 1px;
+  height: 36px;
+  background: var(--border);
+  align-self: center;
+  flex-shrink: 0;
+}
+
+/* Compact sensor chip — CPU / PCH / Thermal */
+.stat-sensor {
+  background: var(--bg-card);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  padding: 6px 12px;
+  text-align: center;
+  min-width: 52px;
+  display: flex;
+  flex-direction: column;
+  gap: 2px;
+}
+
+.stat-sensor-val {
+  font-size: 16px;
+  font-weight: 700;
+  font-variant-numeric: tabular-nums;
+  line-height: 1.1;
+}
+
+.stat-sensor-label {
+  font-size: 9px;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: var(--text-muted);
+  line-height: 1.2;
+}
+
+/* Thermal pressure states */
+.stat-sensor-thermal-warn {
+  border-color: var(--yellow-bd);
+  background: var(--yellow-bg);
+}
+.stat-sensor-thermal-warn .stat-sensor-val { color: var(--yellow); }
+
+.stat-sensor-thermal-crit {
+  border-color: var(--red-bd);
+  background: var(--red-bg);
+}
+.stat-sensor-thermal-crit .stat-sensor-val { color: var(--red); }
+
 /* -----------------------------------------------------------------------
    Batch action bar (inside filter-bar)
 ----------------------------------------------------------------------- */
@@ -2372,6 +2426,85 @@ tr.drawer-row-active {
   color: var(--yellow);
 }
 
+/* -----------------------------------------------------------------------
+   Pool-membership lock indicators
+----------------------------------------------------------------------- */
+.pool-lock-icon {
+  display: inline-block;
+  margin-right: 4px;
+  font-size: 12px;
+  color: var(--yellow);
+  vertical-align: baseline;
+}
+.pool-lock-icon.pool-lock-boot {
+  color: var(--red, #e25555);
+}
+.pool-pill {
+  display: inline-block;
+  margin-top: 3px;
+  padding: 1px 7px;
+  font-size: 10.5px;
+  font-weight: 600;
+  letter-spacing: 0.3px;
+  text-transform: uppercase;
+  border-radius: 4px;
+  background: color-mix(in srgb, var(--yellow) 14%, transparent);
+  color: var(--yellow);
+  border: 1px solid color-mix(in srgb, var(--yellow) 35%, transparent);
+}
+.pool-pill.pool-pill-boot {
+  background: color-mix(in srgb, var(--red, #e25555) 16%, transparent);
+  color: var(--red, #e25555);
+  border-color: color-mix(in srgb, var(--red, #e25555) 45%, transparent);
+}
+.pool-pill.pool-pill-exported {
+  background: color-mix(in srgb, #e07a3f 16%, transparent);
+  color: #e07a3f;
+  border-color: color-mix(in srgb, #e07a3f 45%, transparent);
+}
+.pool-lock-icon.pool-lock-exported {
+  color: #e07a3f;
+}
+.btn-unlock {
+  background: transparent;
+  border: 1px solid color-mix(in srgb, var(--yellow) 50%, transparent);
+  color: var(--yellow);
+  border-radius: 5px;
+  padding: 3px 9px;
+  font-size: 12px;
+  cursor: pointer;
+  transition: background .15s, color .15s, border-color .15s;
+}
+.btn-unlock:hover {
+  background: color-mix(in srgb, var(--yellow) 14%, transparent);
+}
+.btn-unlock-boot {
+  border-color: color-mix(in srgb, var(--red, #e25555) 55%, transparent);
+  color: var(--red, #e25555);
+}
+.btn-unlock-boot:hover {
+  background: color-mix(in srgb, var(--red, #e25555) 14%, transparent);
+}
+.btn-unlock-exported {
+  border-color: color-mix(in srgb, #e07a3f 55%, transparent);
+  color: #e07a3f;
+}
+.btn-unlock-exported:hover {
+  background: color-mix(in srgb, #e07a3f 14%, transparent);
+}
+.unlock-countdown {
+  margin-left: 4px;
+  font-size: 11px;
+  color: var(--green, #39c179);
+  font-variant-numeric: tabular-nums;
+}
+.unlock-countdown-expired {
+  color: var(--yellow);
+}
+.modal.modal-danger {
+  border-top: 3px solid var(--red, #e25555);
+}
+
 /* -----------------------------------------------------------------------
    Parallel burn-in inline warning
 ----------------------------------------------------------------------- */
@@ -2409,41 +2542,3 @@ tr.drawer-row-active {
   font-variant-numeric: tabular-nums;
 }
 
-/* -----------------------------------------------------------------------
-   Live Terminal drawer panel (xterm.js)
------------------------------------------------------------------------ */
-.drawer-panel-terminal {
-  padding: 0 !important;
-  overflow: hidden !important;
-  position: relative;
-  background: #0d1117;
-}
-
-/* Let xterm fill the full panel height */
-.drawer-panel-terminal .xterm {
-  height: 100%;
-}
-.drawer-panel-terminal .xterm-viewport {
-  overflow-y: auto !important;
-}
-
-/* Reconnect bar — floats over the terminal when disconnected */
-.term-reconnect-bar {
-  position: absolute;
-  bottom: 12px;
-  right: 12px;
-  z-index: 20;
-  display: flex;
-  align-items: center;
-  gap: 8px;
-  background: rgba(13,17,23,0.85);
-  border: 1px solid var(--border);
-  border-radius: 6px;
-  padding: 6px 10px;
-  font-size: 12px;
-  color: var(--text-muted);
-}
-.term-reconnect-bar .btn-secondary {
-  padding: 3px 10px;
-  font-size: 11px;
-}
diff --git a/app/static/app.js b/app/static/app.js
index ca59f68..c840936 100644
--- a/app/static/app.js
+++ b/app/static/app.js
@@ -68,6 +68,7 @@
     applyFilter(activeFilter);
     restoreCheckboxes();
     initElapsedTimers();
+    initUnlockCountdowns();
     initLocationEdits();
     if (_drawerDriveId) {
       _drawerHighlightRow(_drawerDriveId);
@@ -135,14 +136,59 @@
     if (nb) nb.style.display = 'none';
   }
 
-  // Handle job-alert SSE events for browser notifications
+  // Handle SSE events
   document.addEventListener('htmx:sseMessage', function (e) {
-    if (!e.detail || e.detail.type !== 'job-alert') return;
-    try {
-      handleJobAlert(JSON.parse(e.detail.data));
-    } catch (_) {}
+    if (!e.detail) return;
+    if (e.detail.type === 'job-alert') {
+      try { handleJobAlert(JSON.parse(e.detail.data)); } catch (_) {}
+    } else if (e.detail.type === 'system-sensors') {
+      try { handleSystemSensors(JSON.parse(e.detail.data)); } catch (_) {}
+    }
   });
 
+  function handleSystemSensors(data) {
+    var st   = data.system_temps  || {};
+    var tp   = data.thermal_pressure || 'ok';
+    var warn = data.temp_warn_c   || 46;
+    var crit = data.temp_crit_c   || 55;
+
+    function tempClass(c) {
+      if (c == null) return '';
+      return c >= crit ? 'temp-hot' : c >= warn ? 'temp-warm' : 'temp-cool';
+    }
+
+    // CPU chip
+    var cpuChip = document.getElementById('sensor-cpu');
+    var cpuVal  = document.getElementById('sensor-cpu-val');
+    if (cpuVal && st.cpu_c != null) {
+      if (cpuChip) cpuChip.hidden = false;
+      cpuVal.textContent = st.cpu_c + '°';
+      cpuVal.className   = 'stat-sensor-val ' + tempClass(st.cpu_c);
+    }
+
+    // PCH chip
+    var pchChip = document.getElementById('sensor-pch');
+    var pchVal  = document.getElementById('sensor-pch-val');
+    if (pchVal && st.pch_c != null) {
+      if (pchChip) pchChip.hidden = false;
+      pchVal.textContent = st.pch_c + '°';
+      pchVal.className   = 'stat-sensor-val ' + tempClass(st.pch_c);
+    }
+
+    // Thermal pressure chip
+    var tChip = document.getElementById('sensor-thermal');
+    var tVal  = document.getElementById('sensor-thermal-val');
+    if (tChip && tVal) {
+      if (tp === 'warn' || tp === 'crit') {
+        tChip.hidden = false;
+        tChip.className = 'stat-sensor stat-sensor-thermal stat-sensor-thermal-' + tp;
+        tVal.textContent = tp === 'warn' ? 'WARM' : 'HOT';
+      } else {
+        tChip.hidden = true;
+      }
+    }
+  }
+
   function handleJobAlert(data) {
     var isPass   = data.state === 'passed';
     var icon     = isPass ? '✓' : '✕';
@@ -203,6 +249,41 @@
 
   initElapsedTimers();
 
+  // Live countdown for pool-drive unlock TTL — runs once per second; ticker
+  // self-stops when no .unlock-countdown spans remain on the page.
+  var _unlockTickInterval = null;
+  function tickUnlockCountdowns() {
+    var spans = document.querySelectorAll('.unlock-countdown[data-expires]');
+    if (spans.length === 0) {
+      if (_unlockTickInterval) {
+        clearInterval(_unlockTickInterval);
+        _unlockTickInterval = null;
+      }
+      return;
+    }
+    var nowSec = Date.now() / 1000;
+    spans.forEach(function (el) {
+      var exp = parseFloat(el.dataset.expires);
+      if (!exp || isNaN(exp)) return;
+      var rem = Math.max(0, exp - nowSec);
+      if (rem <= 0) {
+        el.textContent = 'expired';
+        el.className = 'unlock-countdown unlock-countdown-expired';
+        return;
+      }
+      var m = Math.floor(rem / 60);
+      var s = Math.floor(rem % 60);
+      el.textContent = '\u{1F513} ' + m + ':' + (s < 10 ? '0' : '') + s;
+    });
+  }
+  function initUnlockCountdowns() {
+    if (_unlockTickInterval) return;
+    if (document.querySelectorAll('.unlock-countdown[data-expires]').length === 0) return;
+    _unlockTickInterval = setInterval(tickUnlockCountdowns, 1000);
+    tickUnlockCountdowns();
+  }
+  initUnlockCountdowns();
+
   // -----------------------------------------------------------------------
   // Inline location / notes edit
   // -----------------------------------------------------------------------
@@ -538,7 +619,16 @@
 
       var data = await resp.json();
       if (!resp.ok) {
-        showToast(data.detail || 'Failed to start burn-in', 'error');
+        // detail may be the structured pool-locked object {drive_id,
+        // pool_name, pool_role, pool_locked: true, error: "..."}.
+        // The user already opened the start modal, so the unlock TTL must
+        // have just expired between modal-open and submit. Auto-flip to
+        // the unlock modal for that drive.
+        if (_handlePoolLockedError(data.detail)) {
+          closeModal();
+          return;
+        }
+        showToast(_extractErrorMessage(data.detail) || 'Failed to start burn-in', 'error');
         return;
       }
 
@@ -549,6 +639,161 @@
     }
   }
 
+  // Helpers shared between single-drive and batch start error paths.
+  // Backend returns either a string (legacy errors) or, for pool-locked
+  // drives, an object: {drive_id, error, pool_name, pool_role, pool_locked}.
+  function _extractErrorMessage(detail) {
+    if (!detail) return null;
+    if (typeof detail === 'string') return detail;
+    if (typeof detail === 'object' && detail.error) return detail.error;
+    return null;
+  }
+  // Returns true if it handled a pool-locked error by opening the unlock
+  // modal for the offending drive. Caller should bail out.
+  function _handlePoolLockedError(detail) {
+    if (!detail || typeof detail !== 'object' || !detail.pool_locked) return false;
+    var driveId = detail.drive_id;
+    if (driveId == null) return false;
+    var btn = document.querySelector('.btn-unlock[data-drive-id="' + driveId + '"]');
+    if (btn) {
+      // openUnlockModal closes any other open modals as a side effect of
+      // calling its own close handlers; we still need to close the
+      // start/batch modal explicitly in the caller, since openUnlockModal
+      // doesn't know which one is open.
+      openUnlockModal(btn);
+      return true;
+    }
+    // Unlock button not in the DOM (drive row may have refreshed).
+    // Surface a descriptive toast instead of [object Object].
+    showToast(
+      (detail.error || 'Drive is pool-locked') +
+      ' Reload the page and click Unlock on the drive row.',
+      'error',
+    );
+    return true;
+  }
+
+  // -----------------------------------------------------------------------
+  // Pool-drive Unlock modal
+  // -----------------------------------------------------------------------
+
+  var unlockDriveId = null;
+  var unlockExpectedToken = null;
+
+  function openUnlockModal(btn) {
+    unlockDriveId       = btn.dataset.driveId;
+    var poolName        = btn.dataset.poolName  || '';
+    var poolRole        = btn.dataset.poolRole  || 'data';
+    var isBoot          = btn.dataset.isBootPool === '1';
+    var isExported      = btn.dataset.isExported === '1';
+    if (isBoot)            unlockExpectedToken = 'DESTROY BOOT POOL';
+    else if (isExported)   unlockExpectedToken = 'DESTROY EXPORTED POOL';
+    else                   unlockExpectedToken = poolName;
+
+    document.getElementById('unlock-devname').textContent = btn.dataset.devname || '—';
+    document.getElementById('unlock-model').textContent   = btn.dataset.model   || '—';
+    document.getElementById('unlock-serial').textContent  = btn.dataset.serial  || '—';
+    document.getElementById('unlock-size').textContent    = btn.dataset.size    || '—';
+
+    var chip = document.getElementById('unlock-pool-chip');
+    if (isExported) {
+      chip.textContent = 'exported ZFS';
+      chip.className   = 'chip chip-aborted';
+    } else {
+      chip.textContent = poolName + ' · ' + poolRole;
+      chip.className   = 'chip ' + (isBoot ? 'chip-failed' : 'chip-aborted');
+    }
+
+    var titleEl = document.getElementById('unlock-modal-title');
+    var warnTitle = document.getElementById('unlock-warning-title');
+    var warnBody  = document.getElementById('unlock-warning-body');
+    if (isBoot) {
+      titleEl.textContent = 'Unlock BOOT POOL drive';
+      warnTitle.textContent = 'This is a TrueNAS BOOT drive.';
+      warnBody.textContent =
+        'Running burn-in on this drive will destroy the operating system on it. ' +
+        'If this drive is half of a mirrored boot pool, the system will continue running on the other mirror, ' +
+        'but you must already have a replacement plan. Proceeding without one bricks the host.';
+    } else if (isExported) {
+      titleEl.textContent = 'Unlock drive with EXPORTED ZFS data';
+      warnTitle.textContent = 'This drive carries ZFS data from a previously-imported pool.';
+      warnBody.textContent =
+        "TrueNAS isn't using this pool right now, but the drive still holds the labels and data. " +
+        'Burning it in will silently destroy whatever pool that data belongs to — including ' +
+        'pools that another system may be relying on. Confirm you have already evacuated or ' +
+        'reassigned the pool before continuing.';
+    } else {
+      titleEl.textContent = 'Unlock pool drive';
+      warnTitle.textContent = "This drive belongs to zpool '" + poolName + "'.";
+      warnBody.textContent =
+        'Running a destructive burn-in stage will overwrite all data on this drive ' +
+        'and almost certainly destroy the pool. Only proceed if you have already ' +
+        'removed this drive from the pool, or if you are intentionally decommissioning the pool.';
+    }
+    document.getElementById('unlock-confirm-token').textContent = unlockExpectedToken;
+    document.getElementById('unlock-confirm-hint').textContent  = 'Expected: ' + unlockExpectedToken;
+
+    document.getElementById('unlock-confirm-input').value = '';
+    document.getElementById('unlock-reason-input').value  = '';
+    var savedOp = localStorage.getItem('burnin_operator') || '';
+    document.getElementById('unlock-operator-input').value = savedOp;
+    validateUnlockModal();
+
+    document.getElementById('unlock-modal').removeAttribute('hidden');
+    setTimeout(function () {
+      document.getElementById('unlock-operator-input').focus();
+    }, 50);
+  }
+
+  function closeUnlockModal() {
+    document.getElementById('unlock-modal').setAttribute('hidden', '');
+    unlockDriveId = null;
+    unlockExpectedToken = null;
+  }
+
+  function validateUnlockModal() {
+    var op  = (document.getElementById('unlock-operator-input').value || '').trim();
+    var rsn = (document.getElementById('unlock-reason-input').value   || '').trim();
+    var tok = (document.getElementById('unlock-confirm-input').value  || '').trim();
+    var ok  = op.length > 0 && rsn.length >= 5 && tok === unlockExpectedToken;
+    document.getElementById('unlock-modal-submit-btn').disabled = !ok;
+  }
+
+  async function submitUnlock() {
+    var op  = (document.getElementById('unlock-operator-input').value || '').trim();
+    var rsn = (document.getElementById('unlock-reason-input').value   || '').trim();
+    var tok = (document.getElementById('unlock-confirm-input').value  || '').trim();
+    localStorage.setItem('burnin_operator', op);
+
+    var btn = document.getElementById('unlock-modal-submit-btn');
+    btn.disabled = true;
+
+    try {
+      var resp = await fetch('/api/v1/drives/' + unlockDriveId + '/unlock', {
+        method:  'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body:    JSON.stringify({
+          confirm_token: tok,
+          operator:      op,
+          reason:        rsn,
+        }),
+      });
+      var data = await resp.json();
+      if (!resp.ok) {
+        showToast(data.detail || 'Unlock failed', 'error');
+        btn.disabled = false;
+        return;
+      }
+      closeUnlockModal();
+      showToast('Unlocked for 10 minutes — start burn-in now to use it.', 'success');
+      // Force a drive list refresh so the row flips from Unlock → Burn-In
+      if (typeof refreshDrives === 'function') refreshDrives();
+    } catch (err) {
+      showToast('Network error', 'error');
+      btn.disabled = false;
+    }
+  }
+
   // -----------------------------------------------------------------------
   // Batch Burn-In
   // -----------------------------------------------------------------------
@@ -729,7 +974,11 @@
       });
       var data = await resp.json();
       if (!resp.ok) {
-        showToast(data.detail || 'Failed to queue batch', 'error');
+        if (_handlePoolLockedError(data.detail)) {
+          closeBatchModal();
+          return;
+        }
+        showToast(_extractErrorMessage(data.detail) || 'Failed to queue batch', 'error');
         if (btn) btn.disabled = false;
         return;
       }
@@ -737,11 +986,18 @@
       closeBatchModal();
       checkedDriveIds.clear();
       updateBatchBar();
-      var queued  = (data.queued  || []).length;
-      var errors  = (data.errors  || []).length;
-      var msg = queued + ' burn-in(s) queued';
-      if (errors) msg += ', ' + errors + ' skipped (already active)';
-      showToast(msg, errors && !queued ? 'error' : 'success');
+      var queued     = (data.queued || []).length;
+      var allErrors  = data.errors || [];
+      var poolLocked = allErrors.filter(function (e) { return e && e.pool_locked; });
+      var alreadyActive = allErrors.length - poolLocked.length;
+
+      var parts = [queued + ' burn-in(s) queued'];
+      if (alreadyActive) parts.push(alreadyActive + ' skipped (already active)');
+      if (poolLocked.length) {
+        parts.push(poolLocked.length + ' pool-locked (use Unlock on each row)');
+      }
+      var tone = (queued === 0 && allErrors.length) ? 'error' : 'success';
+      showToast(parts.join(', '), tone);
     } catch (err) {
       showToast('Network error', 'error');
       if (btn) btn.disabled = false;
@@ -792,6 +1048,10 @@
     var cancelSmartBtn = e.target.closest('.btn-cancel-smart');
     if (cancelSmartBtn && !cancelSmartBtn.disabled) { cancelSmartTest(cancelSmartBtn); return; }
 
+    // Pool-drive unlock button (single drive)
+    var unlockBtn = e.target.closest('.btn-unlock');
+    if (unlockBtn && !unlockBtn.disabled) { openUnlockModal(unlockBtn); return; }
+
     // Burn-in start button (single drive)
     var startBtn = e.target.closest('.btn-start');
     if (startBtn && !startBtn.disabled) { openModal(startBtn); return; }
@@ -820,6 +1080,14 @@
       return;
     }
 
+    // Unlock modal
+    if (e.target.closest('#unlock-modal-close-btn') || e.target.closest('#unlock-modal-cancel-btn')) {
+      closeUnlockModal();
+      return;
+    }
+    if (e.target.id === 'unlock-modal') { closeUnlockModal(); return; }
+    if (e.target.id === 'unlock-modal-submit-btn') { submitUnlock(); return; }
+
     // Batch modal close
     if (e.target.closest('#batch-modal-close-btn') || e.target.closest('#batch-modal-cancel-btn')) {
       closeBatchModal();
@@ -837,11 +1105,15 @@
 
   document.addEventListener('input', function (e) {
     var id = e.target.id;
+    if (id === 'unlock-operator-input' || id === 'unlock-reason-input' ||
+        id === 'unlock-confirm-input') validateUnlockModal();
     if (id === 'operator-input' || id === 'confirm-serial') validateModal();
   });
 
   document.addEventListener('keydown', function (e) {
     if (e.key === 'Escape') {
+      var uModal = document.getElementById('unlock-modal');
+      if (uModal && !uModal.hidden) { closeUnlockModal(); return; }
       var modal  = document.getElementById('start-modal');
       if (modal && !modal.hidden) { closeModal(); return; }
       var bModal = document.getElementById('batch-modal');
@@ -1117,14 +1389,6 @@
     document.querySelectorAll('.drawer-panel').forEach(function (p) {
       p.classList.toggle('active', p.id === 'drawer-panel-' + _drawerTab);
     });
-    // Terminal tab: init/fit on activation; hide autoscroll (N/A for terminal)
-    var asl = document.querySelector('.autoscroll-label');
-    if (_drawerTab === 'terminal') {
-      if (asl) asl.style.visibility = 'hidden';
-      openTerminalTab();
-    } else {
-      if (asl) asl.style.visibility = '';
-    }
   });
 
   // Close button
@@ -1149,155 +1413,4 @@
     }).catch(function () { showToast('Network error', 'error'); });
   });
 
-  // -----------------------------------------------------------------------
-  // Live Terminal  (xterm.js + SSH WebSocket)
-  // -----------------------------------------------------------------------
-
-  var _xtermReady = false;   // xterm.js + FitAddon libraries loaded
-  var _terminal   = null;    // xterm.js Terminal instance
-  var _termFit    = null;    // FitAddon instance
-  var _termWs     = null;    // active WebSocket (null = disconnected)
-
-  function _loadXtermLibs(cb) {
-    var link = document.createElement('link');
-    link.rel  = 'stylesheet';
-    link.href = 'https://cdn.jsdelivr.net/npm/xterm@5.3.0/css/xterm.css';
-    document.head.appendChild(link);
-
-    var s1 = document.createElement('script');
-    s1.src = 'https://cdn.jsdelivr.net/npm/xterm@5.3.0/lib/xterm.js';
-    s1.onload = function () {
-      var s2 = document.createElement('script');
-      s2.src = 'https://cdn.jsdelivr.net/npm/xterm-addon-fit@0.8.0/lib/xterm-addon-fit.js';
-      s2.onload = cb;
-      document.head.appendChild(s2);
-    };
-    document.head.appendChild(s1);
-  }
-
-  function openTerminalTab() {
-    var panel = document.getElementById('drawer-panel-terminal');
-    if (!panel) return;
-
-    if (!_xtermReady) {
-      panel.innerHTML = '<div class="drawer-loading">Loading terminal\u2026</div>';
-      _loadXtermLibs(function () {
-        _xtermReady = true;
-        _termInit(panel);
-      });
-      return;
-    }
-
-    if (!_terminal) {
-      _termInit(panel);
-      return;
-    }
-
-    // Already initialised — refit to current panel dimensions
-    setTimeout(function () {
-      if (_termFit) try { _termFit.fit(); } catch (_) {}
-    }, 30);
-  }
-
-  function _termInit(panel) {
-    panel.innerHTML = '';
-
-    var term = new Terminal({
-      cursorBlink: true,
-      fontSize: 13,
-      fontFamily: '"SF Mono","Fira Code",Consolas,"DejaVu Sans Mono",monospace',
-      theme: {
-        background:          '#0d1117',
-        foreground:          '#e6edf3',
-        cursor:              '#58a6ff',
-        cursorAccent:        '#0d1117',
-        selectionBackground: 'rgba(88,166,255,0.25)',
-        black:         '#484f58', red:     '#ff7b72', green:   '#3fb950', yellow:  '#d29922',
-        blue:          '#58a6ff', magenta: '#bc8cff', cyan:    '#39c5cf', white:   '#b1bac4',
-        brightBlack:   '#6e7681', brightRed: '#ffa198', brightGreen: '#56d364',
-        brightYellow:  '#e3b341', brightBlue: '#79c0ff', brightMagenta: '#d2a8ff',
-        brightCyan:    '#56d4dd', brightWhite: '#f0f6fc',
-      },
-      scrollback: 2000,
-      allowProposedApi: true,
-    });
-
-    var fit = new FitAddon.FitAddon();
-    term.loadAddon(fit);
-    term.open(panel);
-
-    _terminal = term;
-    _termFit  = fit;
-
-    // Initial fit after the panel is visible
-    setTimeout(function () {
-      if (_termFit) try { _termFit.fit(); } catch (_) {}
-    }, 30);
-
-    // Forward all keystrokes → SSH (onData registered once here)
-    term.onData(function (data) {
-      if (_termWs && _termWs.readyState === 1) {
-        _termWs.send(new TextEncoder().encode(data));
-      }
-    });
-
-    // Refit + notify server on resize
-    new ResizeObserver(function () {
-      if (!_termFit) return;
-      try { _termFit.fit(); } catch (_) {}
-      if (_termWs && _termWs.readyState === 1 && _terminal) {
-        _termWs.send(JSON.stringify({ type: 'resize', cols: _terminal.cols, rows: _terminal.rows }));
-      }
-    }).observe(panel);
-
-    _termConnect();
-  }
-
-  function _termConnect() {
-    if (_termWs && _termWs.readyState <= 1) return; // already open or connecting
-
-    var proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
-    var ws    = new WebSocket(proto + '//' + location.host + '/ws/terminal');
-    ws.binaryType = 'arraybuffer';
-    _termWs = ws;
-
-    ws.onopen = function () {
-      _termHideReconnect();
-      if (_terminal && ws.readyState === 1) {
-        ws.send(JSON.stringify({ type: 'resize', cols: _terminal.cols, rows: _terminal.rows }));
-      }
-    };
-
-    ws.onmessage = function (e) {
-      if (!_terminal) return;
-      _terminal.write(e.data instanceof ArrayBuffer ? new Uint8Array(e.data) : e.data);
-    };
-
-    ws.onclose = function () {
-      if (_terminal) _terminal.write('\r\n\x1b[33m\u2500\u2500 disconnected \u2500\u2500\x1b[0m\r\n');
-      _termShowReconnect();
-    };
-
-    ws.onerror = function () { /* onclose fires too */ };
-  }
-
-  function _termShowReconnect() {
-    var panel = document.getElementById('drawer-panel-terminal');
-    if (!panel || panel.querySelector('.term-reconnect-bar')) return;
-    var bar = document.createElement('div');
-    bar.className = 'term-reconnect-bar';
-    bar.innerHTML = '<span>Connection closed</span>'
-                  + '<button class="btn-secondary">\u21ba Reconnect</button>';
-    bar.querySelector('button').onclick = function () {
-      bar.remove();
-      _termConnect();
-    };
-    panel.appendChild(bar);
-  }
-
-  function _termHideReconnect() {
-    var bar = document.querySelector('.term-reconnect-bar');
-    if (bar) bar.remove();
-  }
-
 }());
diff --git a/app/templates/components/drives_table.html b/app/templates/components/drives_table.html
index b9c7de2..77ea2ff 100644
--- a/app/templates/components/drives_table.html
+++ b/app/templates/components/drives_table.html
@@ -80,11 +80,14 @@
       {%- set bi_active = drive.burnin and drive.burnin.state in ('queued', 'running') %}
       {%- set short_busy = drive.smart_short and drive.smart_short.state == 'running' %}
       {%- set long_busy  = drive.smart_long  and drive.smart_long.state  == 'running' %}
-      {%- set selectable = not bi_active and not short_busy and not long_busy %}
+      {%- set pool_locked = drive.pool_name and not drive.pool_unlocked_until %}
+      {%- set is_boot_pool = drive.pool_name == 'boot-pool' %}
+      {%- set is_exported  = drive.pool_role == 'exported' %}
+      {%- set selectable = not bi_active and not short_busy and not long_busy and not pool_locked %}
       {%- set bi_done = drive.burnin and drive.burnin.state in ('passed', 'failed', 'cancelled', 'unknown') %}
       {%- set smart_done = (drive.smart_short and drive.smart_short.state in ('passed','failed','aborted'))
                         or (drive.smart_long  and drive.smart_long.state  in ('passed','failed','aborted')) %}
-      {%- set can_reset = (bi_done or smart_done) and not bi_active and not short_busy and not long_busy %}
+      {%- set can_reset = (bi_done or smart_done) and not bi_active and not short_busy and not long_busy and not pool_locked %}
       <tr data-status="{{ drive.status }}" id="drive-{{ drive.id }}">
         <td class="col-check">
           {%- if selectable %}
@@ -92,8 +95,18 @@
           {%- endif %}
         </td>
         <td class="col-drive">
-          <span class="drive-name">{{ drive.devname }}</span>
+          <span class="drive-name">
+            {%- if drive.pool_name -%}
+            <span class="pool-lock-icon{% if is_boot_pool %} pool-lock-boot{% elif is_exported %} pool-lock-exported{% endif %}"
+                  title="{% if is_boot_pool %}In BOOT POOL '{{ drive.pool_name }}'{% elif is_exported %}Carries ZFS data from a previously-imported pool{% else %}In pool '{{ drive.pool_name }}' ({{ drive.pool_role or 'data' }}){% endif %}">&#x1F512;</span>
+            {%- endif -%}
+            {{ drive.devname }}
+          </span>
           <span class="drive-model">{{ drive.model or "Unknown" }}</span>
+          {%- if drive.pool_name %}
+          <span class="pool-pill{% if is_boot_pool %} pool-pill-boot{% elif is_exported %} pool-pill-exported{% endif %}"
+                title="ZFS pool membership">{% if is_exported %}exported ZFS{% else %}{{ drive.pool_name }} &middot; {{ drive.pool_role or 'data' }}{% endif %}</span>
+          {%- endif %}
           {%- if drive.location %}
           <span class="drive-location"
                 data-drive-id="{{ drive.id }}"
@@ -154,6 +167,20 @@
                     {% if short_busy %}disabled{% endif %}
                     title="Start Long SMART test (~several hours)">Long</button>
             {%- endif %}
+            {%- if pool_locked %}
+            <!-- Drive is in a zpool — replace Burn-In with Unlock affordance -->
+            <button class="btn-action btn-unlock{% if is_boot_pool %} btn-unlock-boot{% elif is_exported %} btn-unlock-exported{% endif %}"
+                    data-drive-id="{{ drive.id }}"
+                    data-devname="{{ drive.devname }}"
+                    data-serial="{{ drive.serial or '' }}"
+                    data-model="{{ drive.model or 'Unknown' }}"
+                    data-size="{{ drive.size_bytes | format_bytes }}"
+                    data-pool-name="{{ drive.pool_name }}"
+                    data-pool-role="{{ drive.pool_role or 'data' }}"
+                    data-is-boot-pool="{{ '1' if is_boot_pool else '0' }}"
+                    data-is-exported="{{ '1' if is_exported else '0' }}"
+                    title="{% if is_boot_pool %}Drive is in BOOT POOL '{{ drive.pool_name }}' — click to unlock{% elif is_exported %}Drive carries ZFS data from a previously-imported pool — click to unlock{% else %}Drive is in pool '{{ drive.pool_name }}' — click to unlock{% endif %}">&#x1F512; Unlock</button>
+            {%- else %}
             <!-- Burn-In -->
             <button class="btn-action btn-start{% if short_busy or long_busy %} btn-disabled{% endif %}"
                     data-drive-id="{{ drive.id }}"
@@ -162,8 +189,10 @@
                     data-model="{{ drive.model or 'Unknown' }}"
                     data-size="{{ drive.size_bytes | format_bytes }}"
                     data-health="{{ drive.smart_health }}"
+                    data-pool-name="{{ drive.pool_name or '' }}"
+                    data-pool-unlocked-until="{{ drive.pool_unlocked_until or '' }}"
                     {% if short_busy or long_busy %}disabled{% endif %}
-                    title="Start Burn-In">Burn-In</button>
+                    title="Start Burn-In{% if drive.pool_name %} (UNLOCKED — pool drive){% endif %}">Burn-In{% if drive.pool_name %} <span class="unlock-countdown" data-expires="{{ drive.pool_unlocked_until }}">&#x1F513;</span>{% endif %}</button>
             <!-- Reset — clears SMART state so drive can be re-tested from scratch -->
             {%- if can_reset %}
             <button class="btn-action btn-reset"
@@ -171,6 +200,7 @@
                     title="Reset SMART state — clears test results so drive shows as fresh">Reset</button>
             {%- endif %}
             {%- endif %}
+            {%- endif %}
           </div>
         </td>
       </tr>
diff --git a/app/templates/components/modal_unlock.html b/app/templates/components/modal_unlock.html
new file mode 100644
index 0000000..5d6c761
--- /dev/null
+++ b/app/templates/components/modal_unlock.html
@@ -0,0 +1,69 @@
+<div id="unlock-modal" class="modal-overlay" hidden aria-modal="true" role="dialog">
+  <div class="modal modal-danger">
+    <div class="modal-header">
+      <h2 class="modal-title" id="unlock-modal-title">Unlock pool drive</h2>
+      <button class="modal-close" id="unlock-modal-close-btn" aria-label="Close">&#x2715;</button>
+    </div>
+
+    <div class="modal-body">
+
+      <div class="modal-drive-info">
+        <div class="modal-drive-row">
+          <span class="modal-devname" id="unlock-devname">&mdash;</span>
+          <span class="chip" id="unlock-pool-chip">&mdash;</span>
+        </div>
+        <div class="modal-drive-sub">
+          <span id="unlock-model">&mdash;</span>
+          &middot;
+          <span id="unlock-size">&mdash;</span>
+          &middot;
+          <span class="mono" id="unlock-serial">&mdash;</span>
+        </div>
+      </div>
+
+      <div id="unlock-warning" class="confirm-warning">
+        <strong id="unlock-warning-title">This drive belongs to a zpool.</strong>
+        <p id="unlock-warning-body">
+          Running a destructive burn-in stage will overwrite all data on this drive
+          and almost certainly destroy the pool. Only proceed if you have already
+          removed this drive from the pool, or if you are intentionally
+          decommissioning the pool.
+        </p>
+      </div>
+
+      <div class="form-group">
+        <label class="form-label" for="unlock-operator-input">Operator</label>
+        <input class="form-input" type="text" id="unlock-operator-input"
+               placeholder="Your name" autocomplete="name" maxlength="64">
+      </div>
+
+      <div class="form-group">
+        <label class="form-label" for="unlock-reason-input">
+          Reason (recorded to audit log, minimum 5 characters)
+        </label>
+        <input class="form-input" type="text" id="unlock-reason-input"
+               placeholder="e.g. replacing failed drive in tank/raidz2-0"
+               autocomplete="off" maxlength="200">
+      </div>
+
+      <div class="form-group">
+        <label class="form-label" for="unlock-confirm-input" id="unlock-confirm-label">
+          Type <code id="unlock-confirm-token">&mdash;</code> to confirm
+        </label>
+        <input class="form-input form-input-confirm" type="text" id="unlock-confirm-input"
+               placeholder="" autocomplete="off" spellcheck="false">
+        <div class="confirm-hint" id="unlock-confirm-hint"></div>
+      </div>
+
+      <div class="stage-always-note">
+        Unlock lasts 10 minutes. After that, this drive locks again automatically.
+      </div>
+
+    </div>
+
+    <div class="modal-footer">
+      <button class="btn-secondary" id="unlock-modal-cancel-btn">Cancel</button>
+      <button class="btn-danger" id="unlock-modal-submit-btn" disabled>Unlock</button>
+    </div>
+  </div>
+</div>
diff --git a/app/templates/dashboard.html b/app/templates/dashboard.html
index dd7ec38..b9c9cdf 100644
--- a/app/templates/dashboard.html
+++ b/app/templates/dashboard.html
@@ -5,8 +5,9 @@
 {% block content %}
 {% include "components/modal_start.html" %}
 {% include "components/modal_batch.html" %}
+{% include "components/modal_unlock.html" %}
 
-<!-- Stats bar — counts are updated live by app.js updateCounts() -->
+<!-- Stats bar — drive counts updated live by app.js updateCounts(); sensor chips updated by SSE system-sensors event -->
 <div class="stats-bar">
   <div class="stat-card" data-stat-filter="all">
     <span class="stat-value" id="stat-all">{{ drives | length }}</span>
@@ -28,6 +29,33 @@
     <span class="stat-value" id="stat-idle">0</span>
     <span class="stat-label">Idle</span>
   </div>
+
+  {%- set st = poller.system_temps if (poller and poller.system_temps) else {} %}
+  {%- if st.get('cpu_c') is not none or st.get('pch_c') is not none %}
+  <div class="stats-bar-sep"></div>
+  {%- if st.get('cpu_c') is not none %}
+  <div class="stat-sensor" id="sensor-cpu">
+    <span class="stat-sensor-val {{ st.get('cpu_c') | temp_class }}" id="sensor-cpu-val">{{ st.get('cpu_c') }}°</span>
+    <span class="stat-sensor-label">CPU</span>
+  </div>
+  {%- endif %}
+  {%- if st.get('pch_c') is not none %}
+  <div class="stat-sensor" id="sensor-pch">
+    <span class="stat-sensor-val {{ st.get('pch_c') | temp_class }}" id="sensor-pch-val">{{ st.get('pch_c') }}°</span>
+    <span class="stat-sensor-label">PCH</span>
+  </div>
+  {%- endif %}
+  {%- endif %}
+
+  {%- set tp = poller.thermal_pressure if poller else 'ok' %}
+  <div class="stat-sensor stat-sensor-thermal stat-sensor-thermal-{{ tp }}"
+       id="sensor-thermal"
+       {% if not tp or tp == 'ok' %}hidden{% endif %}>
+    <span class="stat-sensor-val" id="sensor-thermal-val">
+      {%- if tp == 'warn' %}WARM{%- elif tp == 'crit' %}HOT{%- else %}OK{%- endif %}
+    </span>
+    <span class="stat-sensor-label">Thermal</span>
+  </div>
 </div>
 
 <!-- Failed drive banner — shown/hidden by JS when failed count > 0 -->
@@ -83,7 +111,6 @@
       <button class="drawer-tab active" data-tab="burnin">Burn-In</button>
       <button class="drawer-tab" data-tab="smart">SMART</button>
       <button class="drawer-tab" data-tab="events">Events</button>
-      <button class="drawer-tab" data-tab="terminal">Terminal</button>
     </nav>
     <div class="drawer-controls">
       <label class="autoscroll-label">
@@ -97,7 +124,6 @@
     <div class="drawer-panel active" id="drawer-panel-burnin"></div>
     <div class="drawer-panel" id="drawer-panel-smart"></div>
     <div class="drawer-panel" id="drawer-panel-events"></div>
-    <div class="drawer-panel drawer-panel-terminal" id="drawer-panel-terminal"></div>
   </div>
 </div>
 {% endblock %}
diff --git a/app/templates/settings.html b/app/templates/settings.html
index 2578e06..17671aa 100644
--- a/app/templates/settings.html
+++ b/app/templates/settings.html
@@ -248,6 +248,30 @@
                  type="number" min="0" max="9999" value="{{ editable.bad_block_threshold }}">
           <span class="sf-hint">Max bad blocks before surface validate fails (Stage 7)</span>
         </div>
+
+        <div class="sf-row">
+          <label class="sf-label" for="surface_validate_block_size">Badblocks Block Size (bytes)</label>
+          <input class="sf-input sf-input-xs" id="surface_validate_block_size"
+                 name="surface_validate_block_size" type="number" min="512" max="1048576" step="512"
+                 value="{{ editable.surface_validate_block_size }}">
+          <span class="sf-hint">badblocks -b. 4096 (default) is conservative; 8192 is faster on multi-TB HDDs (~2x RAM, ~half the runtime). Power of 2.</span>
+        </div>
+
+        <div class="sf-row">
+          <label class="sf-label" for="surface_validate_block_buffer">Badblocks Block Buffer</label>
+          <input class="sf-input sf-input-xs" id="surface_validate_block_buffer"
+                 name="surface_validate_block_buffer" type="number" min="1" max="4096"
+                 value="{{ editable.surface_validate_block_buffer }}">
+          <span class="sf-hint">badblocks -c. 64 (default) matches the upstream tool. Buffer = block_size × this many blocks per IO.</span>
+        </div>
+
+        <div class="sf-row">
+          <label class="sf-label" for="surface_validate_passes">Badblocks Passes</label>
+          <input class="sf-input sf-input-xs" id="surface_validate_passes"
+                 name="surface_validate_passes" type="number" min="0" max="16"
+                 value="{{ editable.surface_validate_passes }}">
+          <span class="sf-hint">badblocks -p. 1 = repeat until one consecutive clean scan (default). 2-3 for paranoid burn-in that re-confirms after errors.</span>
+        </div>
       </div>
 
     </div><!-- /right col -->
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_pool_parser.py b/tests/test_pool_parser.py
new file mode 100644
index 0000000..95e0947
--- /dev/null
+++ b/tests/test_pool_parser.py
@@ -0,0 +1,283 @@
+"""Unit tests for the zpool-list and lsblk parsers in ssh_client.
+
+These cover the structural cases that drive the pool-membership lock:
+mirror/raidz/draid container vdevs, single-disk vdevs at depth 1, the
+flattened-indentation behaviour of `zpool list -vHP` on TrueNAS, partition
+suffix stripping for NVMe and SCSI, and the cache/log/spare/special
+section markers (including plural variants).
+
+Run with:  python -m unittest discover tests/ -v
+"""
+
+import unittest
+
+from app.ssh_client import (
+    _parse_zpool_list_output,
+    _parse_lsblk_zfs_output,
+    _parse_smart_health_batch,
+)
+
+
+class TestParseZpoolList(unittest.TestCase):
+
+    def test_empty_output_returns_empty(self):
+        self.assertEqual(_parse_zpool_list_output(""), {})
+
+    def test_single_pool_with_mirror(self):
+        # TrueNAS-flattened output: pool at depth 0, vdev type and devices
+        # all at depth 1.
+        out = _parse_zpool_list_output(
+            "boot-pool\t232G\t8.4G\t224G\t-\t-\t17%\t3%\t1.00x\tONLINE\t-\n"
+            "\tmirror-0\t232G\t8.4G\t224G\t-\t-\t17%\t3.6%\t-\tONLINE\n"
+            "\t/dev/nvme0n1p3\t232G\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdd3\t232G\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out, {
+            "nvme0n1": {"pool": "boot-pool", "role": "data"},
+            "sdd":     {"pool": "boot-pool", "role": "data"},
+        })
+
+    def test_raidz2_pool(self):
+        out = _parse_zpool_list_output(
+            "tank\t127T\t4.5T\t122T\t-\t-\t0%\t3%\t1.00x\tONLINE\t-\n"
+            "\traidz2-0\t127T\t4.5T\t122T\t-\t-\t0%\t3%\t-\tONLINE\n"
+            "\t/dev/sdc\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sde\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdf\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(set(out.keys()), {"sdc", "sde", "sdf"})
+        for v in out.values():
+            self.assertEqual(v, {"pool": "tank", "role": "data"})
+
+    def test_draid_pool(self):
+        out = _parse_zpool_list_output(
+            "warm\t100T\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\tdraid2:8d:10c:1s-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdg\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdh\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out["sdg"], {"pool": "warm", "role": "data"})
+        self.assertEqual(out["sdh"], {"pool": "warm", "role": "data"})
+
+    def test_single_disk_vdev_at_depth_1(self):
+        # No mirror/raidz wrapper — a `/dev/...` line itself sits at depth 1.
+        out = _parse_zpool_list_output(
+            "scratch\t1T\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\t/dev/sdi\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out, {"sdi": {"pool": "scratch", "role": "data"}})
+
+    def test_section_markers_switch_role(self):
+        # cache / log / spare / special / dedup all at depth 1; subsequent
+        # /dev/... lines (also at depth 1) inherit that role.
+        out = _parse_zpool_list_output(
+            "tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\tmirror-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdb\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\tcache\n"
+            "\t/dev/nvme1n1\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\tlog\n"
+            "\t/dev/nvme2n1\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\tspare\n"
+            "\t/dev/sdz\t-\t-\t-\t-\t-\t-\t-\t-\tAVAIL\n"
+        )
+        self.assertEqual(out["sda"],     {"pool": "tank", "role": "data"})
+        self.assertEqual(out["sdb"],     {"pool": "tank", "role": "data"})
+        self.assertEqual(out["nvme1n1"], {"pool": "tank", "role": "cache"})
+        self.assertEqual(out["nvme2n1"], {"pool": "tank", "role": "log"})
+        self.assertEqual(out["sdz"],     {"pool": "tank", "role": "spare"})
+
+    def test_section_markers_plurals_normalize(self):
+        # ZFS sometimes emits 'logs'/'spares' instead of 'log'/'spare'.
+        out = _parse_zpool_list_output(
+            "tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\tlogs\n"
+            "\t/dev/nvme0n1\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\tspares\n"
+            "\t/dev/sdz\t-\t-\t-\t-\t-\t-\t-\t-\tAVAIL\n"
+        )
+        self.assertEqual(out["nvme0n1"]["role"], "log")
+        self.assertEqual(out["sdz"]["role"],     "spare")
+
+    def test_special_and_dedup_section(self):
+        out = _parse_zpool_list_output(
+            "tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\tspecial\n"
+            "\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\tdedup\n"
+            "\t/dev/sdb\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out["sda"]["role"], "special")
+        self.assertEqual(out["sdb"]["role"], "dedup")
+
+    def test_partition_suffix_stripped(self):
+        out = _parse_zpool_list_output(
+            "tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\tmirror-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sda3\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/nvme0n1p3\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertIn("sda", out)
+        self.assertNotIn("sda3", out)
+        self.assertIn("nvme0n1", out)
+        self.assertNotIn("nvme0n1p3", out)
+
+    def test_long_scsi_devname(self):
+        # Past sdz: sdaa, sdab, ...
+        out = _parse_zpool_list_output(
+            "big\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\traidz3-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdaa\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdab1\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out["sdaa"]["pool"], "big")
+        self.assertEqual(out["sdab"]["pool"], "big")  # partition stripped
+
+    def test_pool_name_with_dashes_dots_underscores(self):
+        out = _parse_zpool_list_output(
+            "my-cool_pool.v2\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out["sda"]["pool"], "my-cool_pool.v2")
+
+    def test_multiple_pools(self):
+        out = _parse_zpool_list_output(
+            "boot-pool\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\tmirror-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/nvme0n1p3\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdd3\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\traidz2-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "\t/dev/sdb\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out["nvme0n1"]["pool"], "boot-pool")
+        self.assertEqual(out["sdd"]["pool"],     "boot-pool")
+        self.assertEqual(out["sda"]["pool"],     "tank")
+        self.assertEqual(out["sdb"]["pool"],     "tank")
+
+    def test_pool_role_resets_between_pools(self):
+        # Section marker in pool A must not carry into pool B.
+        out = _parse_zpool_list_output(
+            "a\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\tcache\n"
+            "\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+            "b\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\t/dev/sdb\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out["sda"]["role"], "cache")
+        self.assertEqual(out["sdb"]["role"], "data")
+
+    def test_blank_lines_skipped(self):
+        out = _parse_zpool_list_output(
+            "\n"
+            "tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
+            "\n"
+            "\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
+        )
+        self.assertEqual(out, {"sda": {"pool": "tank", "role": "data"}})
+
+
+class TestParseLsblkZfs(unittest.TestCase):
+
+    def test_empty_returns_empty_set(self):
+        self.assertEqual(_parse_lsblk_zfs_output(""), set())
+
+    def test_partition_zfs_member(self):
+        # Typical TrueNAS layout: zpool members are partitions.
+        out = _parse_lsblk_zfs_output(
+            "sda      \n"
+            "sda1     \n"
+            "sda3     zfs_member\n"
+            "sdb      \n"
+            "sdb3     zfs_member\n"
+        )
+        self.assertEqual(out, {"sda", "sdb"})
+
+    def test_whole_disk_zfs_member(self):
+        # Some configurations put zfs_member on the whole disk.
+        out = _parse_lsblk_zfs_output(
+            "sdc      zfs_member\n"
+        )
+        self.assertEqual(out, {"sdc"})
+
+    def test_nvme_partitioned_and_whole(self):
+        out = _parse_lsblk_zfs_output(
+            "nvme0n1     \n"
+            "nvme0n1p3   zfs_member\n"
+            "nvme1n1     zfs_member\n"
+        )
+        self.assertEqual(out, {"nvme0n1", "nvme1n1"})
+
+    def test_non_zfs_fstypes_ignored(self):
+        out = _parse_lsblk_zfs_output(
+            "sda1     ext4\n"
+            "sda2     swap\n"
+            "sdb1     btrfs\n"
+        )
+        self.assertEqual(out, set())
+
+    def test_long_scsi_devnames(self):
+        out = _parse_lsblk_zfs_output(
+            "sdaa     zfs_member\n"
+            "sdab1    zfs_member\n"
+        )
+        self.assertEqual(out, {"sdaa", "sdab"})
+
+    def test_short_lines_skipped(self):
+        out = _parse_lsblk_zfs_output(
+            "sda\n"
+            "\n"
+            "sdb1     zfs_member\n"
+        )
+        self.assertEqual(out, {"sdb"})
+
+
+class TestParseSmartHealthBatch(unittest.TestCase):
+
+    def test_passed_drive(self):
+        out = _parse_smart_health_batch(
+            "@@sda@@\n"
+            "smartctl 7.4 2023-08-01 r5530 [x86_64-linux-6.6]\n"
+            "SMART overall-health self-assessment test result: PASSED\n"
+            "@@END@@\n"
+        )
+        self.assertEqual(out, {"sda": "PASSED"})
+
+    def test_failed_drive(self):
+        out = _parse_smart_health_batch(
+            "@@sdb@@\n"
+            "SMART overall-health self-assessment test result: FAILED!\n"
+            "@@END@@\n"
+        )
+        self.assertEqual(out, {"sdb": "FAILED"})
+
+    def test_unknown_when_no_marker(self):
+        out = _parse_smart_health_batch(
+            "@@sdc@@\n"
+            "/dev/sdc: Unknown USB bridge\n"
+            "@@END@@\n"
+        )
+        self.assertEqual(out, {"sdc": "UNKNOWN"})
+
+    def test_multiple_drives_mixed_states(self):
+        out = _parse_smart_health_batch(
+            "@@sda@@\n"
+            "SMART overall-health self-assessment test result: PASSED\n"
+            "@@END@@\n"
+            "@@sdb@@\n"
+            "SMART overall-health self-assessment test result: FAILED!\n"
+            "@@END@@\n"
+            "@@nvme0n1@@\n"
+            "SMART overall-health self-assessment test result: PASSED\n"
+            "@@END@@\n"
+        )
+        self.assertEqual(out, {"sda": "PASSED", "sdb": "FAILED", "nvme0n1": "PASSED"})
+
+    def test_empty_returns_empty(self):
+        self.assertEqual(_parse_smart_health_batch(""), {})
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_unlock_flow.py b/tests/test_unlock_flow.py
new file mode 100644
index 0000000..626a3fd
--- /dev/null
+++ b/tests/test_unlock_flow.py
@@ -0,0 +1,303 @@
+"""Unit tests for the pool-drive unlock state machine in burnin.py.
+
+Covers: token validation per pool kind, identity-binding (grant
+invalidated when pool_name/pool_role changes), TTL expiry, the
+audit-commit-then-arm ordering (a failing audit insert leaves no
+in-memory grant), and the unique-active-burnin partial index that
+prevents duplicate queued rows for the same drive.
+
+Uses an in-memory SQLite DB and monkeypatches app.config.settings.db_path.
+No SSH, no network, no FastAPI.
+
+Run with:  python -m unittest discover tests/ -v
+"""
+
+import os
+import tempfile
+import time
+import unittest
+
+import aiosqlite
+
+
+async def _setup_temp_db() -> str:
+    """Create a temp SQLite file, point app.config at it, init schema.
+    Async-callable from IsolatedAsyncioTestCase.asyncSetUp."""
+    fd, path = tempfile.mkstemp(suffix=".db")
+    os.close(fd)
+    from app.config import settings
+    settings.db_path = path
+
+    from app.database import init_db
+    await init_db()
+    # Seed pool drives so unlock_flow tests have something to grant on.
+    async with aiosqlite.connect(path) as db:
+        await db.execute("""
+            INSERT INTO drives
+                (truenas_disk_id, devname, serial, model, size_bytes,
+                 temperature_c, smart_health, last_seen_at, last_polled_at,
+                 pool_name, pool_role, pool_seen_at)
+            VALUES ('test-id-1', 'sda', 'TESTSER1', 'TestModel', 1000,
+                    30, 'PASSED', '2026-05-02T00:00:00+00:00',
+                    '2026-05-02T00:00:00+00:00',
+                    'tank', 'data', '2026-05-02T00:00:00+00:00')
+        """)
+        await db.execute("""
+            INSERT INTO drives
+                (truenas_disk_id, devname, serial, model, size_bytes,
+                 temperature_c, smart_health, last_seen_at, last_polled_at,
+                 pool_name, pool_role, pool_seen_at)
+            VALUES ('test-id-2', 'sdb', 'TESTSER2', 'TestModel', 1000,
+                    30, 'PASSED', '2026-05-02T00:00:00+00:00',
+                    '2026-05-02T00:00:00+00:00',
+                    'boot-pool', 'data', '2026-05-02T00:00:00+00:00')
+        """)
+        await db.execute("""
+            INSERT INTO drives
+                (truenas_disk_id, devname, serial, model, size_bytes,
+                 temperature_c, smart_health, last_seen_at, last_polled_at,
+                 pool_name, pool_role, pool_seen_at)
+            VALUES ('test-id-3', 'sdc', 'TESTSER3', 'TestModel', 1000,
+                    30, 'PASSED', '2026-05-02T00:00:00+00:00',
+                    '2026-05-02T00:00:00+00:00',
+                    '(exported)', 'exported', '2026-05-02T00:00:00+00:00')
+        """)
+        await db.commit()
+    return path
+
+
+class TestUnlockFlow(unittest.IsolatedAsyncioTestCase):
+
+    async def asyncSetUp(self):
+        self.db_path = await _setup_temp_db()
+        # Reset module state so previous test runs don't bleed in.
+        from app import burnin
+        burnin._unlock_grants.clear()
+
+    async def asyncTearDown(self):
+        try:
+            os.unlink(self.db_path)
+        except OSError:
+            pass
+
+    # ----- token validation per pool kind -----
+
+    async def test_active_pool_token_is_pool_name(self):
+        from app import burnin
+        # Drive 1 = tank/data
+        with self.assertRaises(ValueError):
+            await burnin.grant_pool_unlock(1, "wrong", "op", "valid reason")
+        expiry = await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
+        self.assertGreater(expiry, time.time())
+
+    async def test_boot_pool_token_is_destroy_phrase(self):
+        from app import burnin
+        # Drive 2 = boot-pool — typing the pool name must NOT work.
+        with self.assertRaises(ValueError):
+            await burnin.grant_pool_unlock(2, "boot-pool", "op", "valid reason")
+        expiry = await burnin.grant_pool_unlock(
+            2, "DESTROY BOOT POOL", "op", "valid reason"
+        )
+        self.assertGreater(expiry, time.time())
+
+    async def test_exported_token_is_destroy_phrase(self):
+        from app import burnin
+        # Drive 3 = (exported)/exported
+        with self.assertRaises(ValueError):
+            await burnin.grant_pool_unlock(3, "(exported)", "op", "valid reason")
+        expiry = await burnin.grant_pool_unlock(
+            3, "DESTROY EXPORTED POOL", "op", "valid reason"
+        )
+        self.assertGreater(expiry, time.time())
+
+    # ----- input validation -----
+
+    async def test_empty_reason_rejected(self):
+        from app import burnin
+        with self.assertRaises(ValueError):
+            await burnin.grant_pool_unlock(1, "tank", "op", "")
+
+    async def test_short_reason_rejected(self):
+        from app import burnin
+        with self.assertRaises(ValueError):
+            await burnin.grant_pool_unlock(1, "tank", "op", "hi")
+
+    async def test_empty_operator_rejected(self):
+        from app import burnin
+        with self.assertRaises(ValueError):
+            await burnin.grant_pool_unlock(1, "tank", "", "valid reason")
+
+    async def test_unknown_drive_rejected(self):
+        from app import burnin
+        with self.assertRaises(ValueError):
+            await burnin.grant_pool_unlock(99999, "anything", "op", "valid reason")
+
+    async def test_drive_not_in_pool_rejected(self):
+        from app import burnin
+        # Manually clear pool fields on drive 1
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute("UPDATE drives SET pool_name=NULL, pool_role=NULL WHERE id=1")
+            await db.commit()
+        with self.assertRaises(ValueError):
+            await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
+
+    # ----- identity binding (Codex finding #2) -----
+
+    async def test_grant_invalidated_when_pool_name_changes(self):
+        from app import burnin
+        await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
+        # Operator's grant references tank/data; pool detection now reports tank2.
+        self.assertTrue(burnin._is_unlocked(1, "tank", "data"))
+        self.assertFalse(burnin._is_unlocked(1, "tank2", "data"))
+        # And the side effect: the grant is reaped, not just temporarily denied.
+        self.assertNotIn(1, burnin._unlock_grants)
+
+    async def test_grant_invalidated_when_pool_role_changes(self):
+        from app import burnin
+        await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
+        # Same pool, different role (data -> cache).
+        self.assertFalse(burnin._is_unlocked(1, "tank", "cache"))
+        self.assertNotIn(1, burnin._unlock_grants)
+
+    async def test_unlock_expiry_returns_none_for_mismatched_identity(self):
+        from app import burnin
+        await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
+        self.assertIsNotNone(burnin.unlock_expiry(1, "tank", "data"))
+        self.assertIsNone(burnin.unlock_expiry(1, "tank2", "data"))
+
+    # ----- TTL expiry -----
+
+    async def test_expired_grant_returns_false(self):
+        from app import burnin
+        # Drop TTL to 0 so the grant is born expired.
+        original = burnin.UNLOCK_TTL_SECONDS
+        burnin.UNLOCK_TTL_SECONDS = 0
+        try:
+            await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
+            self.assertFalse(burnin._is_unlocked(1, "tank", "data"))
+            self.assertNotIn(1, burnin._unlock_grants)
+        finally:
+            burnin.UNLOCK_TTL_SECONDS = original
+
+    # ----- audit commit ordering (Codex finding #3) -----
+
+    async def test_audit_event_recorded_for_active_pool(self):
+        from app import burnin
+        await burnin.grant_pool_unlock(1, "tank", "alice", "swapping out drive")
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cur = await db.execute(
+                "SELECT event_type, operator, message FROM audit_events "
+                "WHERE drive_id=? ORDER BY id DESC LIMIT 1", (1,)
+            )
+            row = await cur.fetchone()
+        self.assertEqual(row["event_type"], "pool_drive_unlocked")
+        self.assertEqual(row["operator"], "alice")
+        self.assertIn("swapping out drive", row["message"])
+
+    async def test_audit_event_for_boot_pool_uses_distinct_type(self):
+        from app import burnin
+        await burnin.grant_pool_unlock(
+            2, "DESTROY BOOT POOL", "alice", "replacing failed mirror"
+        )
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cur = await db.execute(
+                "SELECT event_type FROM audit_events WHERE drive_id=? ORDER BY id DESC LIMIT 1",
+                (2,),
+            )
+            row = await cur.fetchone()
+        self.assertEqual(row["event_type"], "boot_pool_drive_unlocked")
+
+    async def test_audit_event_for_exported_uses_distinct_type(self):
+        from app import burnin
+        await burnin.grant_pool_unlock(
+            3, "DESTROY EXPORTED POOL", "alice", "decommissioned pool"
+        )
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cur = await db.execute(
+                "SELECT event_type FROM audit_events WHERE drive_id=? ORDER BY id DESC LIMIT 1",
+                (3,),
+            )
+            row = await cur.fetchone()
+        self.assertEqual(row["event_type"], "exported_pool_drive_unlocked")
+
+    async def test_failed_token_does_not_record_audit_event(self):
+        from app import burnin
+        try:
+            await burnin.grant_pool_unlock(1, "wrong-token", "op", "valid reason")
+        except ValueError:
+            pass
+        async with aiosqlite.connect(self.db_path) as db:
+            cur = await db.execute(
+                "SELECT COUNT(*) FROM audit_events WHERE drive_id=?", (1,)
+            )
+            self.assertEqual((await cur.fetchone())[0], 0)
+        # And no in-memory grant was armed.
+        self.assertNotIn(1, burnin._unlock_grants)
+
+
+class TestActiveJobUniqueIndex(unittest.IsolatedAsyncioTestCase):
+    """Codex finding #4 — the partial unique index on burnin_jobs(drive_id)
+    WHERE state IN ('queued','running') must reject a second active row even
+    when two requests pass the SELECT-COUNT check concurrently."""
+
+    async def asyncSetUp(self):
+        self.db_path = await _setup_temp_db()
+        from app import burnin
+        burnin._unlock_grants.clear()
+        # Need to clear the pool field on drive 1 so unlock isn't required
+        # for these race tests.
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute("UPDATE drives SET pool_name=NULL, pool_role=NULL WHERE id=1")
+            await db.commit()
+        # Burnin orchestrator init for the semaphore
+        from app import burnin as b
+        import asyncio as _a
+        b._semaphore = _a.Semaphore(4)
+
+    async def asyncTearDown(self):
+        try:
+            os.unlink(self.db_path)
+        except OSError:
+            pass
+
+    async def test_index_blocks_second_active_insert(self):
+        # Insert a queued row by hand, then try a second one — index fires.
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                """INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
+                   VALUES (?,?,?,?,?,?)""",
+                (1, "surface", "queued", 0, "op", "2026-05-02T00:00:00+00:00"),
+            )
+            await db.commit()
+            with self.assertRaises(aiosqlite.IntegrityError):
+                await db.execute(
+                    """INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
+                       VALUES (?,?,?,?,?,?)""",
+                    (1, "surface", "queued", 0, "op", "2026-05-02T00:00:01+00:00"),
+                )
+                await db.commit()
+
+    async def test_index_allows_terminal_state_then_new_job(self):
+        # passed/failed/cancelled/unknown rows must not block a fresh queue.
+        async with aiosqlite.connect(self.db_path) as db:
+            for state in ("passed", "failed", "cancelled", "unknown"):
+                await db.execute(
+                    """INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
+                       VALUES (?,?,?,?,?,?)""",
+                    (1, "surface", state, 100, "op", "2026-05-02T00:00:00+00:00"),
+                )
+            await db.commit()
+            # Should succeed — no other queued/running row exists.
+            await db.execute(
+                """INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
+                   VALUES (?,?,?,?,?,?)""",
+                (1, "surface", "queued", 0, "op", "2026-05-02T00:00:00+00:00"),
+            )
+            await db.commit()
+
+
+if __name__ == "__main__":
+    unittest.main()