39 changed files with 78 additions and 676 deletions
--- a/claude-sandbox/truenas-burnin/.env.example
+++ b/claude-sandbox/truenas-burnin/.env.example
--- a/claude-sandbox/truenas-burnin/.gitignore
+++ b/claude-sandbox/truenas-burnin/.gitignore
--- a/claude-sandbox/truenas-burnin/CLAUDE.md
+++ b/claude-sandbox/truenas-burnin/CLAUDE.md
@ -209,7 +209,7 @@ All read from `.env` via `pydantic-settings`. See `.env.example` for full list.
 | `TEMP_WARN_C` | `46` | Temperature warning threshold (°C) |
 | `TEMP_CRIT_C` | `55` | Temperature critical threshold — precheck fails above this |
 | `BAD_BLOCK_THRESHOLD` | `0` | Max bad blocks allowed before surface_validate fails (0 = any bad = fail) |
-| `APP_VERSION` | `1.0.0-8` | Displayed in header version badge |
+| `APP_VERSION` | `1.0.0-7` | Displayed in header version badge |
 | `SSH_HOST` | `` | TrueNAS SSH hostname/IP — empty disables SSH mode (uses mock/REST) |
 | `SSH_PORT` | `22` | TrueNAS SSH port |
 | `SSH_USER` | `root` | TrueNAS SSH username |
--- a/claude-sandbox/truenas-burnin/Dockerfile
+++ b/claude-sandbox/truenas-burnin/Dockerfile
--- a/claude-sandbox/truenas-burnin/SPEC.md
+++ b/claude-sandbox/truenas-burnin/SPEC.md
--- a/claude-sandbox/truenas-burnin/app/init.py
+++ b/claude-sandbox/truenas-burnin/app/init.py
--- a/claude-sandbox/truenas-burnin/app/burnin.py
+++ b/claude-sandbox/truenas-burnin/app/burnin.py
@ -206,45 +206,10 @@ async def cancel_job(job_id: int, operator: str) -> bool:
 # Job runner
 # ---------------------------------------------------------------------------

-async def _thermal_gate_ok() -> bool:
-    """True if it's thermally safe to start a new burn-in.
-    Checks the peak temperature of drives currently under active burn-in.
-    """
-    try:
-        async with _db() as db:
-            cur = await db.execute("""
-                SELECT MAX(d.temperature_c)
-                FROM drives d
-                JOIN burnin_jobs bj ON bj.drive_id = d.id
-                WHERE bj.state = 'running' AND d.temperature_c IS NOT NULL
-            """)
-            row = await cur.fetchone()
-            max_temp = row[0] if row and row[0] is not None else None
-        return max_temp is None or max_temp < settings.temp_warn_c
-    except Exception:
-        return True  # Never block on error
-
-
 async def _run_job(job_id: int) -> None:
    """Acquire semaphore slot, execute all stages, persist final state."""
    assert _semaphore is not None, "burnin.init() not called"

-    # Adaptive thermal gate: wait before competing for a slot if running drives
-    # are already at or above the warning threshold.  This prevents layering a
-    # new burn-in on top of a thermally-stressed system.  Gives up after 3 min
-    # and proceeds anyway so jobs don't queue indefinitely.
-    for _attempt in range(18):  # 18 × 10 s = 3 min max
-        if await _thermal_gate_ok():
-            break
-        if _attempt == 0:
-            log.info(
-                "Thermal gate: job %d waiting — running drive temps at or above %d°C",
-                job_id, settings.temp_warn_c,
-            )
-        await asyncio.sleep(10)
-    else:
-        log.warning("Thermal gate timed out for job %d — proceeding anyway", job_id)
-
    async with _semaphore:
        if await _is_cancelled(job_id):
            return
@ -554,39 +519,15 @@ async def _stage_smart_test_ssh(job_id: int, devname: str, test_type: str, stage
        # "unknown" → keep polling


-async def _badblocks_available() -> bool:
-    """Check if badblocks is installed on the remote host (Linux/SCALE only)."""
-    from app import ssh_client
-    try:
-        async with await ssh_client._connect() as conn:
-            result = await conn.run("which badblocks", check=False)
-            return result.returncode == 0
-    except Exception:
-        return False
-
-
 async def _stage_surface_validate(job_id: int, devname: str, drive_id: int) -> bool:
    """
-    Surface validation stage — auto-routes to the right implementation:
-
-    1. SSH configured + badblocks available (TrueNAS SCALE / Linux):
-       → runs badblocks -wsv -b 4096 -p 1 /dev/{devname} directly over SSH.
-    2. SSH configured + badblocks NOT available (TrueNAS CORE / FreeBSD):
-       → uses TrueNAS REST API disk.wipe FULL job + post-wipe SMART check.
-    3. No SSH:
-       → simulated timed progress (dev/mock mode).
+    Surface validation stage.
+    SSH mode: runs badblocks -wsv -b 4096 -p 1 /dev/{devname}.
+    Mock mode: simulated timed progress (no real I/O).
    """
    from app import ssh_client
    if ssh_client.is_configured():
-        if await _badblocks_available():
-            return await _stage_surface_validate_ssh(job_id, devname, drive_id)
-        # TrueNAS CORE/FreeBSD: badblocks not available — use native wipe API
-        await _append_stage_log(
-            job_id, "surface_validate",
-            "[INFO] badblocks not found on host (TrueNAS CORE/FreeBSD) — "
-            "using TrueNAS disk.wipe API (FULL write pass).\n\n"
-        )
-        return await _stage_surface_validate_truenas(job_id, devname, drive_id)
+        return await _stage_surface_validate_ssh(job_id, devname, drive_id)
    return await _stage_timed_simulate(job_id, "surface_validate", settings.surface_validate_seconds)


@ -714,116 +655,6 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
    return True


-async def _stage_surface_validate_truenas(job_id: int, devname: str, drive_id: int) -> bool:
-    """
-    Surface validation via TrueNAS CORE disk.wipe REST API.
-    Used on FreeBSD (TrueNAS CORE) where badblocks is unavailable.
-
-    Sends a FULL write-zero pass across the entire disk, polls progress,
-    then runs a post-wipe SMART attribute check to catch reallocated sectors.
-    """
-    from app import ssh_client
-
-    await _append_stage_log(
-        job_id, "surface_validate",
-        f"[START] TrueNAS disk.wipe FULL — {devname}\n"
-        f"[NOTE]  DESTRUCTIVE: all data on {devname} will be overwritten.\n\n"
-    )
-
-    # Start the wipe job
-    try:
-        tn_job_id = await _client.wipe_disk(devname, "FULL")
-    except Exception as exc:
-        await _set_stage_error(job_id, "surface_validate", f"Failed to start disk.wipe: {exc}")
-        return False
-
-    await _append_stage_log(
-        job_id, "surface_validate",
-        f"[JOB] TrueNAS wipe job started (job_id={tn_job_id})\n"
-    )
-
-    # Poll until complete
-    log_flush_counter = 0
-    while True:
-        if await _is_cancelled(job_id):
-            try:
-                await _client.abort_job(tn_job_id)
-            except Exception:
-                pass
-            return False
-
-        await asyncio.sleep(POLL_INTERVAL)
-
-        try:
-            job = await _client.get_job(tn_job_id)
-        except Exception as exc:
-            log.warning("Wipe job poll failed: %s", exc, extra={"job_id": job_id})
-            await _append_stage_log(job_id, "surface_validate", f"[poll error] {exc}\n")
-            continue
-
-        if not job:
-            await _set_stage_error(job_id, "surface_validate", f"Wipe job {tn_job_id} not found")
-            return False
-
-        state = job.get("state", "")
-        pct = int(job.get("progress", {}).get("percent", 0) or 0)
-        desc = job.get("progress", {}).get("description", "")
-
-        await _update_stage_percent(job_id, "surface_validate", min(pct, 99))
-        await _recalculate_progress(job_id)
-        _push_update()
-
-        # Log progress description every ~5 polls to avoid DB spam
-        log_flush_counter += 1
-        if desc and log_flush_counter % 5 == 0:
-            await _append_stage_log(job_id, "surface_validate", f"[{pct}%] {desc}\n")
-
-        if state == "SUCCESS":
-            await _update_stage_percent(job_id, "surface_validate", 100)
-            await _append_stage_log(
-                job_id, "surface_validate",
-                f"\n[DONE] Wipe job {tn_job_id} completed successfully.\n"
-            )
-            # Post-wipe SMART check — catch any sectors that failed under write stress
-            if ssh_client.is_configured() and drive_id is not None:
-                await _append_stage_log(
-                    job_id, "surface_validate",
-                    "[CHECK] Running post-wipe SMART attribute check...\n"
-                )
-                try:
-                    attrs = await ssh_client.get_smart_attributes(devname)
-                    await _store_smart_attrs(drive_id, attrs)
-                    if attrs["failures"]:
-                        error = "Post-wipe SMART check: " + "; ".join(attrs["failures"])
-                        await _set_stage_error(job_id, "surface_validate", error)
-                        return False
-                    if attrs["warnings"]:
-                        await _append_stage_log(
-                            job_id, "surface_validate",
-                            "[WARNING] " + "; ".join(attrs["warnings"]) + "\n"
-                        )
-                    await _append_stage_log(
-                        job_id, "surface_validate",
-                        f"[CHECK] SMART health: {attrs['health']} — no critical attributes.\n"
-                    )
-                except Exception as exc:
-                    log.warning("Post-wipe SMART check failed: %s", exc)
-                    await _append_stage_log(
-                        job_id, "surface_validate",
-                        f"[WARN] Post-wipe SMART check failed (non-fatal): {exc}\n"
-                    )
-            return True
-
-        elif state in ("FAILED", "ABORTED", "ERROR"):
-            error_msg = job.get("error") or f"Disk wipe failed (state={state})"
-            await _set_stage_error(
-                job_id, "surface_validate",
-                f"TrueNAS disk.wipe FAILED: {error_msg}"
-            )
-            return False
-        # RUNNING or WAITING — keep polling
-
-
 async def _stage_timed_simulate(job_id: int, stage_name: str, duration_seconds: int) -> bool:
    """Simulate a timed stage with progress updates (mock / dev mode)."""
    start = time.monotonic()
--- a/claude-sandbox/truenas-burnin/app/config.py
+++ b/claude-sandbox/truenas-burnin/app/config.py
@ -68,7 +68,7 @@ class Settings(BaseSettings):
    ssh_key: str = ""             # PEM private key content (paste full key including headers)

    # Application version — used by the /api/v1/updates/check endpoint
-    app_version: str = "1.0.0-8"
+    app_version: str = "1.0.0-7"


 settings = Settings()
--- a/claude-sandbox/truenas-burnin/app/database.py
+++ b/claude-sandbox/truenas-burnin/app/database.py
--- a/claude-sandbox/truenas-burnin/app/logging_config.py
+++ b/claude-sandbox/truenas-burnin/app/logging_config.py
--- a/claude-sandbox/truenas-burnin/app/mailer.py
+++ b/claude-sandbox/truenas-burnin/app/mailer.py
--- a/claude-sandbox/truenas-burnin/app/main.py
+++ b/claude-sandbox/truenas-burnin/app/main.py
--- a/claude-sandbox/truenas-burnin/app/models.py
+++ b/claude-sandbox/truenas-burnin/app/models.py
--- a/claude-sandbox/truenas-burnin/app/notifier.py
+++ b/claude-sandbox/truenas-burnin/app/notifier.py
--- a/claude-sandbox/truenas-burnin/app/poller.py
+++ b/claude-sandbox/truenas-burnin/app/poller.py
@ -20,15 +20,13 @@ from app.truenas import TrueNASClient

 log = logging.getLogger(__name__)

-# Shared state read by the /health endpoint and dashboard template
+# Shared state read by the /health endpoint
 _state: dict[str, Any] = {
    "last_poll_at": None,
    "last_error": None,
    "healthy": False,
    "drives_seen": 0,
    "consecutive_failures": 0,
-    "system_temps": {},        # {"cpu_c": int|None, "pch_c": int|None}
-    "thermal_pressure": "ok",  # "ok" | "warn" | "crit" — based on running burn-in drive temps
 }

 # SSE subscriber queues — notified after each successful poll
@ -210,67 +208,6 @@ async def _sync_history(
 # Poll cycle
 # ---------------------------------------------------------------------------

-async def _poll_smart_via_ssh(db: aiosqlite.Connection, now: str) -> None:
-    """
-    Poll progress for SMART tests started via SSH (truenas_job_id IS NULL).
-    Used on TrueNAS SCALE 25.10+ where the REST smart/test API no longer exists.
-    """
-    from app import ssh_client
-    if not ssh_client.is_configured():
-        return
-
-    cur = await db.execute(
-        """SELECT st.id, st.test_type, st.drive_id, d.devname, st.started_at
-           FROM smart_tests st
-           JOIN drives d ON d.id = st.drive_id
-           WHERE st.state = 'running' AND st.truenas_job_id IS NULL"""
-    )
-    rows = await cur.fetchall()
-    if not rows:
-        return
-
-    for row in rows:
-        test_id, ttype, drive_id, devname, started_at = row[0], row[1], row[2], row[3], row[4]
-        try:
-            progress = await ssh_client.poll_smart_progress(devname)
-        except Exception as exc:
-            log.warning("SSH SMART poll failed for %s: %s", devname, exc)
-            continue
-
-        state = progress["state"]
-        pct_remaining = progress.get("percent_remaining")  # None = not yet in output
-        raw_output = progress.get("output", "")
-
-        if state == "running":
-            # pct_remaining=None means smartctl output doesn't have the % line yet
-            # (test just started) — keep percent at 0 rather than jumping to 100
-            if pct_remaining is None:
-                pct = 0
-            else:
-                pct = max(0, 100 - pct_remaining)
-            eta = _eta_from_progress(pct, started_at)
-            await db.execute(
-                "UPDATE smart_tests SET percent=?, eta_at=?, raw_output=? WHERE id=?",
-                (pct, eta, raw_output, test_id),
-            )
-        elif state == "passed":
-            await db.execute(
-                "UPDATE smart_tests SET state='passed', percent=100, finished_at=?, raw_output=? WHERE id=?",
-                (now, raw_output, test_id),
-            )
-            log.info("SSH SMART %s passed on %s", ttype, devname)
-        elif state == "failed":
-            await db.execute(
-                "UPDATE smart_tests SET state='failed', percent=0, finished_at=?, "
-                "error_text=?, raw_output=? WHERE id=?",
-                (now, f"SMART {ttype.upper()} test failed", raw_output, test_id),
-            )
-            log.warning("SSH SMART %s FAILED on %s", ttype, devname)
-        # state == "unknown" → keep polling, no update
-
-    await db.commit()
-
-
 async def poll_cycle(client: TrueNASClient) -> int:
    """Run one full poll. Returns number of drives seen."""
    now = _now()
@ -278,20 +215,6 @@ async def poll_cycle(client: TrueNASClient) -> int:
    disks = await client.get_disks()
    running_jobs = await client.get_smart_jobs(state="RUNNING")

-    # Fetch temperatures via SCALE-specific endpoint.
-    # CORE doesn't have this endpoint — silently skip on any error.
-    try:
-        temps = await client.get_disk_temperatures()
-    except Exception:
-        temps = {}
-
-    # Inject temperature into each disk dict (SCALE 25.10 has no temp in /disk)
-    for disk in disks:
-        devname = disk.get("devname", "")
-        t = temps.get(devname)
-        if t is not None:
-            disk["temperature"] = int(round(t))
-
    # Index running jobs by (devname, test_type)
    active: dict[tuple[str, str], dict] = {}
    for job in running_jobs:
@ -320,9 +243,6 @@ async def poll_cycle(client: TrueNASClient) -> int:

        await db.commit()

-        # SSH SMART polling — for tests started via smartctl (no TrueNAS REST job)
-        await _poll_smart_via_ssh(db, now)
-
    return len(disks)


@ -343,39 +263,6 @@ async def run(client: TrueNASClient) -> None:
            _state["drives_seen"] = count
            _state["consecutive_failures"] = 0
            log.debug("Poll OK", extra={"drives": count})
-
-            # System sensor temps via SSH (non-fatal)
-            from app import ssh_client as _ssh
-            if _ssh.is_configured():
-                try:
-                    _state["system_temps"] = await _ssh.get_system_sensors()
-                except Exception:
-                    pass
-
-            # Thermal pressure: max temp of drives currently under burn-in
-            try:
-                async with aiosqlite.connect(settings.db_path) as _tdb:
-                    _tdb.row_factory = aiosqlite.Row
-                    await _tdb.execute("PRAGMA journal_mode=WAL")
-                    _cur = await _tdb.execute("""
-                        SELECT MAX(d.temperature_c)
-                        FROM drives d
-                        JOIN burnin_jobs bj ON bj.drive_id = d.id
-                        WHERE bj.state = 'running' AND d.temperature_c IS NOT NULL
-                    """)
-                    _row = await _cur.fetchone()
-                    _max_t = _row[0] if _row and _row[0] is not None else None
-                if _max_t is None:
-                    _state["thermal_pressure"] = "ok"
-                elif _max_t >= settings.temp_crit_c:
-                    _state["thermal_pressure"] = "crit"
-                elif _max_t >= settings.temp_warn_c:
-                    _state["thermal_pressure"] = "warn"
-                else:
-                    _state["thermal_pressure"] = "ok"
-            except Exception:
-                _state["thermal_pressure"] = "ok"
-
            _notify_subscribers()

            # Check for stuck jobs every 5 cycles (~1 min at default 12s interval)
--- a/claude-sandbox/truenas-burnin/app/renderer.py
+++ b/claude-sandbox/truenas-burnin/app/renderer.py
--- a/claude-sandbox/truenas-burnin/app/routes.py
+++ b/claude-sandbox/truenas-burnin/app/routes.py
@ -218,18 +218,6 @@ async def sse_drives(request: Request):

                yield {"event": "drives-update", "data": html}

-                # Push system sensor state so JS can update temp chips live
-                ps = poller.get_state()
-                yield {
-                    "event": "system-sensors",
-                    "data": json.dumps({
-                        "system_temps":    ps.get("system_temps", {}),
-                        "thermal_pressure": ps.get("thermal_pressure", "ok"),
-                        "temp_warn_c":     settings.temp_warn_c,
-                        "temp_crit_c":     settings.temp_crit_c,
-                    }),
-                }
-
                # Push browser notification event if this was a job completion
                if alert:
                    yield {"event": "job-alert", "data": json.dumps(alert)}
@ -365,13 +353,9 @@ async def smart_start(
    body: dict,
    db: aiosqlite.Connection = Depends(get_db),
 ):
-    """Start a standalone SHORT or LONG SMART test on a single drive.
-
-    Uses SSH (smartctl) when configured — required for TrueNAS SCALE 25.10+
-    where the REST smart/test endpoint no longer exists.
-    Falls back to TrueNAS REST API for older versions.
-    """
-    from app import burnin as _burnin, ssh_client
+    """Start a standalone SHORT or LONG SMART test on a single drive."""
+    from app.truenas import TrueNASClient
+    from app import burnin as _burnin

    test_type = (body.get("type") or "").upper()
    if test_type not in ("SHORT", "LONG"):
@ -383,42 +367,17 @@ async def smart_start(
        raise HTTPException(status_code=404, detail="Drive not found")
    devname = row[0]

-    now = datetime.now(timezone.utc).isoformat()
-    ttype_lower = test_type.lower()
+    # Use the shared TrueNAS client held by the burnin module
+    client = _burnin._client
+    if client is None:
+        raise HTTPException(status_code=503, detail="TrueNAS client not ready")

-    if ssh_client.is_configured():
-        # SSH path — works on TrueNAS SCALE 25.10+ and CORE
-        try:
-            output = await ssh_client.start_smart_test(devname, test_type)
-        except Exception as exc:
-            raise HTTPException(status_code=502, detail=f"SSH error: {exc}")
+    try:
+        tn_job_id = await client.start_smart_test([devname], test_type)
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")

-        # Mark as running in DB (truenas_job_id=NULL signals SSH-managed test)
-        # Store smartctl start output as proof the test was initiated
-        await db.execute(
-            """INSERT INTO smart_tests (drive_id, test_type, state, percent, started_at, raw_output)
-               VALUES (?,?,?,?,?,?)
-               ON CONFLICT(drive_id, test_type) DO UPDATE SET
-                   state='running', percent=0, truenas_job_id=NULL,
-                   started_at=excluded.started_at, finished_at=NULL, error_text=NULL,
-                   raw_output=excluded.raw_output""",
-            (drive_id, ttype_lower, "running", 0, now, output),
-        )
-        await db.commit()
-        from app import poller as _poller
-        _poller._notify_subscribers()
-        return {"devname": devname, "type": test_type, "message": output[:200]}
-
-    else:
-        # REST path — older TrueNAS CORE / SCALE versions
-        client = _burnin._client
-        if client is None:
-            raise HTTPException(status_code=503, detail="TrueNAS client not ready")
-        try:
-            tn_job_id = await client.start_smart_test([devname], test_type)
-        except Exception as exc:
-            raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
-        return {"job_id": tn_job_id, "devname": devname, "type": test_type}
+    return {"job_id": tn_job_id, "devname": devname, "type": test_type}


@router.post("/api/v1/drives/{drive_id}/smart/cancel")
@ -444,37 +403,28 @@ async def smart_cancel(
    if client is None:
        raise HTTPException(status_code=503, detail="TrueNAS client not ready")

-    from app import ssh_client
+    # Find the running TrueNAS job for this drive/test-type
+    try:
+        jobs = await client.get_smart_jobs()
+        tn_job_id = None
+        for j in jobs:
+            if j.get("state") != "RUNNING":
+                continue
+            args = j.get("arguments", [])
+            if not args or not isinstance(args[0], dict):
+                continue
+            if devname in args[0].get("disks", []):
+                tn_job_id = j["id"]
+                break

-    if ssh_client.is_configured():
-        # SSH path — abort via smartctl -X
-        try:
-            await ssh_client.abort_smart_test(devname)
-        except Exception as exc:
-            raise HTTPException(status_code=502, detail=f"SSH abort error: {exc}")
-    else:
-        # REST path — find TrueNAS job and abort it
-        try:
-            jobs = await client.get_smart_jobs()
-            tn_job_id = None
-            for j in jobs:
-                if j.get("state") != "RUNNING":
-                    continue
-                args = j.get("arguments", [])
-                if not args or not isinstance(args[0], dict):
-                    continue
-                if devname in args[0].get("disks", []):
-                    tn_job_id = j["id"]
-                    break
+        if tn_job_id is None:
+            raise HTTPException(status_code=404, detail="No running SMART test found for this drive")

-            if tn_job_id is None:
-                raise HTTPException(status_code=404, detail="No running SMART test found for this drive")
-
-            await client.abort_job(tn_job_id)
-        except HTTPException:
-            raise
-        except Exception as exc:
-            raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
+        await client.abort_job(tn_job_id)
+    except HTTPException:
+        raise
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")

    # Update local DB state
    now = datetime.now(timezone.utc).isoformat()
--- a/claude-sandbox/truenas-burnin/app/settings_store.py
+++ b/claude-sandbox/truenas-burnin/app/settings_store.py
--- a/claude-sandbox/truenas-burnin/app/ssh_client.py
+++ b/claude-sandbox/truenas-burnin/app/ssh_client.py
@ -38,26 +38,15 @@ SMART_ATTRS: dict[int, tuple[str, bool]] = {
 # ---------------------------------------------------------------------------

 def is_configured() -> bool:
-    """Returns True when SSH host + at least one auth method is available."""
-    import os
+    """Returns True when SSH credentials are present and usable."""
    from app.config import settings
-    if not settings.ssh_host:
-        return False
-    has_creds = bool(
-        settings.ssh_key
-        or settings.ssh_password
-        or os.path.exists(os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH))
-    )
-    return has_creds
+    return bool(settings.ssh_host and (settings.ssh_password or settings.ssh_key))


 # ---------------------------------------------------------------------------
 # Low-level connection
 # ---------------------------------------------------------------------------

-_MOUNTED_KEY_PATH = "/run/secrets/ssh_key"
-
-
 async def _connect():
    """Open a single-use SSH connection. Caller must use `async with`."""
    import asyncssh
@ -70,17 +59,9 @@ async def _connect():
        "known_hosts": None,          # trust all hosts (same spirit as TRUENAS_VERIFY_TLS=false)
    }
    if settings.ssh_key:
-        # Key material provided via env var (base case)
        kwargs["client_keys"] = [asyncssh.import_private_key(settings.ssh_key)]
-    elif settings.ssh_password:
+    if settings.ssh_password:
        kwargs["password"] = settings.ssh_password
-    else:
-        # Fall back to mounted key file (preferred for production — no key in env vars)
-        import os
-        key_path = os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH)
-        if os.path.exists(key_path):
-            kwargs["client_keys"] = [key_path]
-        # If nothing is configured, asyncssh will attempt agent/default key lookup

    return asyncssh.connect(**kwargs)

@ -247,70 +228,6 @@ async def run_badblocks(
    }


-async def get_system_sensors() -> dict:
-    """
-    Run `sensors -j` on TrueNAS and extract system-level temperatures.
-    Returns {"cpu_c": int|None, "pch_c": int|None}.
-    cpu_c  = CPU package temp (coretemp chip)
-    pch_c  = PCH/chipset temp (pch_* chip) — proxy for storage I/O lane thermals
-    Falls back gracefully if SSH is not configured or lm-sensors is unavailable.
-    """
-    if not is_configured():
-        return {}
-    try:
-        async with await _connect() as conn:
-            result = await conn.run("sensors -j 2>/dev/null", check=False)
-            output = result.stdout.strip()
-            if not output:
-                return {}
-            return _parse_sensors_json(output)
-    except Exception as exc:
-        log.debug("get_system_sensors failed: %s", exc)
-        return {}
-
-
-def _parse_sensors_json(output: str) -> dict:
-    import json as _json
-    try:
-        data = _json.loads(output)
-    except Exception:
-        return {}
-
-    cpu_c: int | None = None
-    pch_c: int | None = None
-
-    for chip_name, chip_data in data.items():
-        if not isinstance(chip_data, dict):
-            continue
-
-        # CPU package temp — coretemp chip, "Package id N" sensor
-        if chip_name.startswith("coretemp") and cpu_c is None:
-            for sensor_name, sensor_vals in chip_data.items():
-                if not isinstance(sensor_vals, dict):
-                    continue
-                if "package" in sensor_name.lower():
-                    for k, v in sensor_vals.items():
-                        if k.endswith("_input") and isinstance(v, (int, float)):
-                            cpu_c = int(round(v))
-                            break
-                if cpu_c is not None:
-                    break
-
-        # PCH / chipset temp — manages PCIe lanes including HBA / storage I/O
-        elif chip_name.startswith("pch_") and pch_c is None:
-            for sensor_name, sensor_vals in chip_data.items():
-                if not isinstance(sensor_vals, dict):
-                    continue
-                for k, v in sensor_vals.items():
-                    if k.endswith("_input") and isinstance(v, (int, float)):
-                        pch_c = int(round(v))
-                        break
-                if pch_c is not None:
-                    break
-
-    return {"cpu_c": cpu_c, "pch_c": pch_c}
-
-
 # ---------------------------------------------------------------------------
 # Parsers
 # ---------------------------------------------------------------------------
@ -358,7 +275,7 @@ def _parse_smartctl(output: str) -> dict:

 def _parse_smart_progress(output: str) -> dict:
    state = "unknown"
-    percent_remaining = None  # None = "in progress but no % line parsed yet"
+    percent_remaining = 0

    lower = output.lower()

--- a/claude-sandbox/truenas-burnin/app/static/app.css
+++ b/claude-sandbox/truenas-burnin/app/static/app.css
@ -1076,56 +1076,6 @@ a.stat-card:hover {
 .stat-passed  .stat-value { color: var(--green); }
 .stat-idle    .stat-value { color: var(--text-muted); }

-/* Vertical separator between drive-count cards and sensor chips */
-.stats-bar-sep {
-  width: 1px;
-  height: 36px;
-  background: var(--border);
-  align-self: center;
-  flex-shrink: 0;
-}
-
-/* Compact sensor chip — CPU / PCH / Thermal */
-.stat-sensor {
-  background: var(--bg-card);
-  border: 1px solid var(--border);
-  border-radius: 8px;
-  padding: 6px 12px;
-  text-align: center;
-  min-width: 52px;
-  display: flex;
-  flex-direction: column;
-  gap: 2px;
-}
-
-.stat-sensor-val {
-  font-size: 16px;
-  font-weight: 700;
-  font-variant-numeric: tabular-nums;
-  line-height: 1.1;
-}
-
-.stat-sensor-label {
-  font-size: 9px;
-  text-transform: uppercase;
-  letter-spacing: 0.08em;
-  color: var(--text-muted);
-  line-height: 1.2;
-}
-
-/* Thermal pressure states */
-.stat-sensor-thermal-warn {
-  border-color: var(--yellow-bd);
-  background: var(--yellow-bg);
-}
-.stat-sensor-thermal-warn .stat-sensor-val { color: var(--yellow); }
-
-.stat-sensor-thermal-crit {
-  border-color: var(--red-bd);
-  background: var(--red-bg);
-}
-.stat-sensor-thermal-crit .stat-sensor-val { color: var(--red); }
-
 /* -----------------------------------------------------------------------
   Batch action bar (inside filter-bar)
 ----------------------------------------------------------------------- */
--- a/claude-sandbox/truenas-burnin/app/static/app.js
+++ b/claude-sandbox/truenas-burnin/app/static/app.js
@ -135,59 +135,14 @@
    if (nb) nb.style.display = 'none';
  }

-  // Handle SSE events
+  // Handle job-alert SSE events for browser notifications
  document.addEventListener('htmx:sseMessage', function (e) {
-    if (!e.detail) return;
-    if (e.detail.type === 'job-alert') {
-      try { handleJobAlert(JSON.parse(e.detail.data)); } catch (_) {}
-    } else if (e.detail.type === 'system-sensors') {
-      try { handleSystemSensors(JSON.parse(e.detail.data)); } catch (_) {}
-    }
+    if (!e.detail || e.detail.type !== 'job-alert') return;
+    try {
+      handleJobAlert(JSON.parse(e.detail.data));
+    } catch (_) {}
  });

-  function handleSystemSensors(data) {
-    var st   = data.system_temps  || {};
-    var tp   = data.thermal_pressure || 'ok';
-    var warn = data.temp_warn_c   || 46;
-    var crit = data.temp_crit_c   || 55;
-
-    function tempClass(c) {
-      if (c == null) return '';
-      return c >= crit ? 'temp-hot' : c >= warn ? 'temp-warm' : 'temp-cool';
-    }
-
-    // CPU chip
-    var cpuChip = document.getElementById('sensor-cpu');
-    var cpuVal  = document.getElementById('sensor-cpu-val');
-    if (cpuVal && st.cpu_c != null) {
-      if (cpuChip) cpuChip.hidden = false;
-      cpuVal.textContent = st.cpu_c + '°';
-      cpuVal.className   = 'stat-sensor-val ' + tempClass(st.cpu_c);
-    }
-
-    // PCH chip
-    var pchChip = document.getElementById('sensor-pch');
-    var pchVal  = document.getElementById('sensor-pch-val');
-    if (pchVal && st.pch_c != null) {
-      if (pchChip) pchChip.hidden = false;
-      pchVal.textContent = st.pch_c + '°';
-      pchVal.className   = 'stat-sensor-val ' + tempClass(st.pch_c);
-    }
-
-    // Thermal pressure chip
-    var tChip = document.getElementById('sensor-thermal');
-    var tVal  = document.getElementById('sensor-thermal-val');
-    if (tChip && tVal) {
-      if (tp === 'warn' || tp === 'crit') {
-        tChip.hidden = false;
-        tChip.className = 'stat-sensor stat-sensor-thermal stat-sensor-thermal-' + tp;
-        tVal.textContent = tp === 'warn' ? 'WARM' : 'HOT';
-      } else {
-        tChip.hidden = true;
-      }
-    }
-  }
-
  function handleJobAlert(data) {
    var isPass   = data.state === 'passed';
    var icon     = isPass ? '✓' : '✕';
--- a/claude-sandbox/truenas-burnin/app/templates/audit.html
+++ b/claude-sandbox/truenas-burnin/app/templates/audit.html
--- a/claude-sandbox/truenas-burnin/app/templates/components/drives_table.html
+++ b/claude-sandbox/truenas-burnin/app/templates/components/drives_table.html
--- a/claude-sandbox/truenas-burnin/app/templates/components/modal_batch.html
+++ b/claude-sandbox/truenas-burnin/app/templates/components/modal_batch.html
--- a/claude-sandbox/truenas-burnin/app/templates/components/modal_start.html
+++ b/claude-sandbox/truenas-burnin/app/templates/components/modal_start.html
--- a/claude-sandbox/truenas-burnin/app/templates/dashboard.html
+++ b/claude-sandbox/truenas-burnin/app/templates/dashboard.html
@ -6,7 +6,7 @@
 {% include "components/modal_start.html" %}
 {% include "components/modal_batch.html" %}

-<!-- Stats bar — drive counts updated live by app.js updateCounts(); sensor chips updated by SSE system-sensors event -->
+<!-- Stats bar — counts are updated live by app.js updateCounts() -->
 <div class="stats-bar">
  <div class="stat-card" data-stat-filter="all">
    <span class="stat-value" id="stat-all">{{ drives | length }}</span>
@ -28,33 +28,6 @@
    <span class="stat-value" id="stat-idle">0</span>
    <span class="stat-label">Idle</span>
  </div>
-
-  {%- set st = poller.system_temps if (poller and poller.system_temps) else {} %}
-  {%- if st.get('cpu_c') is not none or st.get('pch_c') is not none %}
-  <div class="stats-bar-sep"></div>
-  {%- if st.get('cpu_c') is not none %}
-  <div class="stat-sensor" id="sensor-cpu">
-    <span class="stat-sensor-val {{ st.get('cpu_c') | temp_class }}" id="sensor-cpu-val">{{ st.get('cpu_c') }}°</span>
-    <span class="stat-sensor-label">CPU</span>
-  </div>
-  {%- endif %}
-  {%- if st.get('pch_c') is not none %}
-  <div class="stat-sensor" id="sensor-pch">
-    <span class="stat-sensor-val {{ st.get('pch_c') | temp_class }}" id="sensor-pch-val">{{ st.get('pch_c') }}°</span>
-    <span class="stat-sensor-label">PCH</span>
-  </div>
-  {%- endif %}
-  {%- endif %}
-
-  {%- set tp = poller.thermal_pressure if poller else 'ok' %}
-  <div class="stat-sensor stat-sensor-thermal stat-sensor-thermal-{{ tp }}"
-       id="sensor-thermal"
-       {% if not tp or tp == 'ok' %}hidden{% endif %}>
-    <span class="stat-sensor-val" id="sensor-thermal-val">
-      {%- if tp == 'warn' %}WARM{%- elif tp == 'crit' %}HOT{%- else %}OK{%- endif %}
-    </span>
-    <span class="stat-sensor-label">Thermal</span>
-  </div>
 </div>

 <!-- Failed drive banner — shown/hidden by JS when failed count > 0 -->
--- a/claude-sandbox/truenas-burnin/app/templates/history.html
+++ b/claude-sandbox/truenas-burnin/app/templates/history.html
--- a/claude-sandbox/truenas-burnin/app/templates/job_detail.html
+++ b/claude-sandbox/truenas-burnin/app/templates/job_detail.html
--- a/claude-sandbox/truenas-burnin/app/templates/job_print.html
+++ b/claude-sandbox/truenas-burnin/app/templates/job_print.html
--- a/claude-sandbox/truenas-burnin/app/templates/layout.html
+++ b/claude-sandbox/truenas-burnin/app/templates/layout.html
--- a/claude-sandbox/truenas-burnin/app/templates/settings.html
+++ b/claude-sandbox/truenas-burnin/app/templates/settings.html
--- a/claude-sandbox/truenas-burnin/app/templates/stats.html
+++ b/claude-sandbox/truenas-burnin/app/templates/stats.html
--- a/claude-sandbox/truenas-burnin/app/terminal.py
+++ b/claude-sandbox/truenas-burnin/app/terminal.py
@ -50,19 +50,12 @@ async def handle(ws: WebSocket) -> None:
    elif settings.ssh_password:
        connect_kw["password"] = settings.ssh_password
    else:
-        # Fall back to mounted key file (same logic as ssh_client._connect)
-        import os
-        from app import ssh_client as _sc
-        key_path = os.environ.get("SSH_KEY_FILE", _sc._MOUNTED_KEY_PATH)
-        if os.path.exists(key_path):
-            connect_kw["client_keys"] = [key_path]
-        else:
-            await _send(ws,
-                b"\r\n\x1b[33mNo SSH credentials configured.\x1b[0m "
-                b"Set a password or private key in Settings.\r\n"
-            )
-            await ws.close(1008)
-            return
+        await _send(ws,
+            b"\r\n\x1b[33mNo SSH credentials configured.\x1b[0m "
+            b"Set a password or private key in Settings.\r\n"
+        )
+        await ws.close(1008)
+        return

    await _send(ws,
        f"\r\n\x1b[36mConnecting to {settings.ssh_host}\u2026\x1b[0m\r\n".encode()
--- a/claude-sandbox/truenas-burnin/app/truenas.py
+++ b/claude-sandbox/truenas-burnin/app/truenas.py
@ -65,13 +65,7 @@ class TrueNASClient:
            "get_disks",
        )
        r.raise_for_status()
-        disks = r.json()
-        # Filter out expired records — TrueNAS keeps historical entries for removed
-        # disks with expiretime set. Only return currently-present drives.
-        active = [d for d in disks if not d.get("expiretime")]
-        if len(active) < len(disks):
-            log.debug("get_disks: filtered %d expired record(s)", len(disks) - len(active))
-        return active
+        return r.json()

    async def get_smart_jobs(self, state: str | None = None) -> list[dict]:
        params: dict = {"method": "smart.test"}
@ -116,49 +110,3 @@ class TrueNASClient:
        )
        r.raise_for_status()
        return r.json()
-
-    async def get_disk_temperatures(self) -> dict[str, float | None]:
-        """
-        Returns {devname: celsius | None}.
-        Uses POST /api/v2.0/disk/temperatures — available on TrueNAS SCALE 25.10+.
-        CORE compatibility: raises on 404/405, caller should catch and skip.
-        """
-        r = await _with_retry(
-            lambda: self._client.post("/api/v2.0/disk/temperatures", json={}),
-            "get_disk_temperatures",
-        )
-        r.raise_for_status()
-        return r.json()
-
-    async def wipe_disk(self, devname: str, mode: str = "FULL") -> int:
-        """
-        Start a disk wipe job. Not retried — duplicate starts would launch a second wipe.
-        mode: "QUICK" (wipe MBR/partitions only), "FULL" (write zeros), "FULL_RANDOM" (write random)
-        devname: basename only, e.g. "ada0" (not "/dev/ada0")
-        Returns the TrueNAS job ID.
-        """
-        r = await self._client.post(
-            "/api/v2.0/disk/wipe",
-            json={"dev": devname, "mode": mode},
-        )
-        r.raise_for_status()
-        return r.json()
-
-    async def get_job(self, job_id: int) -> dict | None:
-        """
-        Fetch a single TrueNAS job by ID.
-        Returns the job dict, or None if not found.
-        """
-        import json as _json
-        r = await _with_retry(
-            lambda: self._client.get(
-                "/api/v2.0/core/get_jobs",
-                params={"filters": _json.dumps([["id", "=", job_id]])},
-            ),
-            f"get_job({job_id})",
-        )
-        r.raise_for_status()
-        jobs = r.json()
-        if isinstance(jobs, list) and jobs:
-            return jobs[0]
-        return None
--- a/claude-sandbox/truenas-burnin/docker-compose.yml
+++ b/claude-sandbox/truenas-burnin/docker-compose.yml
@ -1,23 +0,0 @@
-services:
-  # mock-truenas is kept for local dev — not started in production
-  # To use mock mode: docker compose --profile mock up
-  # mock-truenas:
-  #   build: ./mock-truenas
-  #   container_name: mock-truenas
-  #   ports:
-  #     - "8000:8000"
-  #   profiles: [mock]
-  #   restart: unless-stopped
-
-  app:
-    build: .
-    container_name: truenas-burnin
-    ports:
-      - "8084:8084"
-    env_file: .env
-    volumes:
-      - ./data:/data
-      - ./app/templates:/opt/app/app/templates
-      - ./app/static:/opt/app/app/static
-      - /home/brandon/.ssh/id_ed25519:/run/secrets/ssh_key:ro
-    restart: unless-stopped
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,21 @@
+services:
+  mock-truenas:
+    build: ./mock-truenas
+    container_name: mock-truenas
+    ports:
+      - "8000:8000"
+    restart: unless-stopped
+
+  app:
+    build: .
+    container_name: truenas-burnin
+    ports:
+      - "8084:8084"
+    env_file: .env
+    volumes:
+      - ./data:/data
+      - ./app/templates:/opt/app/app/templates
+      - ./app/static:/opt/app/app/static
+    depends_on:
+      - mock-truenas
+    restart: unless-stopped
--- a/claude-sandbox/truenas-burnin/mock-truenas/Dockerfile
+++ b/claude-sandbox/truenas-burnin/mock-truenas/Dockerfile
--- a/claude-sandbox/truenas-burnin/mock-truenas/app.py
+++ b/claude-sandbox/truenas-burnin/mock-truenas/app.py
--- a/claude-sandbox/truenas-burnin/requirements.txt
+++ b/claude-sandbox/truenas-burnin/requirements.txt
@ -1,5 +1,5 @@
 fastapi
-uvicorn[standard]
+uvicorn
 aiosqlite
 httpx
 pydantic-settings