nas-burnin/app/routes/__init__.py

import asyncio
import csv
import io
import json
from datetime import datetime, timezone

import aiosqlite
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.responses import HTMLResponse, StreamingResponse
from sse_starlette.sse import EventSourceResponse

from app import auth, burnin, mailer, poller, settings_store
from app.config import settings
from app.database import get_db
from app.models import (
    BurninJobResponse, BurninStageResponse,
    CancelBurninRequest, DriveResponse,
    SmartTestState, StartBurninRequest, UnlockPoolDriveRequest,
    UpdateDriveRequest,
)
from app.renderer import templates

# Helpers shared with the extracted sub-routers — keep the underscore-
# prefixed local names that existing in-file callers reach for.
from ._helpers import (
    client_ip as _client_ip,
    is_stale as _is_stale,
    operator_for as _operator_for,
    secret_status as _secret_status,
    stale_context as _stale_context,
    SECRET_FIELDS as _SECRET_FIELDS,
)

router = APIRouter()

# Sub-routers extracted as part of the routes/ package split (1.0.0-34).
# Their endpoints get registered against the same APIRouter, so the
# external `from app.routes import router` import in app/main.py keeps
# working unchanged. Future slices can extract more — drives, burnin,
# settings, history — using the same pattern.
#
# Absolute imports (vs `from . import auth`) because the line-12
# `from app import auth` binds `auth` as an attribute on this package's
# namespace, which would shadow the relative-submodule lookup and yield
# `app.auth` instead of `app.routes.auth`.
import app.routes.auth as _auth_routes        # noqa: E402
import app.routes.system as _system_routes    # noqa: E402

router.include_router(_auth_routes.router)
router.include_router(_system_routes.router)

# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------

def _eta_seconds(eta_at: str | None) -> int | None:
    if not eta_at:
        return None
    try:
        eta_ts = datetime.fromisoformat(eta_at)
        if eta_ts.tzinfo is None:
            eta_ts = eta_ts.replace(tzinfo=timezone.utc)
        remaining = (eta_ts - datetime.now(timezone.utc)).total_seconds()
        return max(0, int(remaining))
    except Exception:
        return None


# _is_stale is now imported from ._helpers above.


def _compute_eta_seconds(started_at: str | None, percent: int) -> int | None:
    """Linear ETA extrapolation from started_at and percent complete."""
    if not started_at or percent <= 0:
        return None
    try:
        start = datetime.fromisoformat(started_at)
        if start.tzinfo is None:
            start = start.replace(tzinfo=timezone.utc)
        elapsed = (datetime.now(timezone.utc) - start).total_seconds()
        total_est = elapsed / (percent / 100)
        remaining = max(0, int(total_est - elapsed))
        return remaining
    except Exception:
        return None


def _build_smart(row: aiosqlite.Row, prefix: str) -> SmartTestState:
    eta_at = row[f"{prefix}_eta_at"]
    return SmartTestState(
        state=row[f"{prefix}_state"] or "idle",
        percent=row[f"{prefix}_percent"],
        eta_seconds=_eta_seconds(eta_at),
        eta_timestamp=eta_at,
        started_at=row[f"{prefix}_started_at"],
        finished_at=row[f"{prefix}_finished_at"],
        error_text=row[f"{prefix}_error"],
    )


def _row_to_drive(row: aiosqlite.Row) -> DriveResponse:
    return DriveResponse(
        id=row["id"],
        devname=row["devname"],
        serial=row["serial"],
        model=row["model"],
        size_bytes=row["size_bytes"],
        temperature_c=row["temperature_c"],
        smart_health=row["smart_health"] or "UNKNOWN",
        last_polled_at=row["last_polled_at"],
        is_stale=_is_stale(row["last_polled_at"]),
        smart_short=_build_smart(row, "short"),
        smart_long=_build_smart(row, "long"),
        notes=row["notes"],
        location=row["location"],
        pool_name=row["pool_name"],
        pool_role=row["pool_role"],
        pool_unlocked_until=burnin.unlock_expiry(
            row["id"], row["pool_name"], row["pool_role"],
        ),
    )


def _compute_status(drive: dict) -> str:
    short = (drive.get("smart_short") or {}).get("state", "idle")
    long_ = (drive.get("smart_long") or {}).get("state", "idle")
    health = drive.get("smart_health", "UNKNOWN")
    if "running" in (short, long_):
        return "running"
    if short == "failed" or long_ == "failed" or health == "FAILED":
        return "failed"
    if "passed" in (short, long_):
        return "passed"
    return "idle"


_DRIVES_QUERY = """
    SELECT
        d.id, d.devname, d.serial, d.model, d.size_bytes,
        d.temperature_c, d.smart_health, d.last_polled_at,
        d.notes, d.location, d.pool_name, d.pool_role,
        s.state       AS short_state,
        s.percent     AS short_percent,
        s.started_at  AS short_started_at,
        s.eta_at      AS short_eta_at,
        s.finished_at AS short_finished_at,
        s.error_text  AS short_error,
        l.state       AS long_state,
        l.percent     AS long_percent,
        l.started_at  AS long_started_at,
        l.eta_at      AS long_eta_at,
        l.finished_at AS long_finished_at,
        l.error_text  AS long_error
    FROM drives d
    LEFT JOIN smart_tests s ON s.drive_id = d.id AND s.test_type = 'short'
    LEFT JOIN smart_tests l ON l.drive_id = d.id AND l.test_type = 'long'
    WHERE d.last_seen_at >= datetime('now', '-7 days')
    {where}
    ORDER BY d.devname
"""


async def _fetch_burnin_by_drive(db: aiosqlite.Connection) -> dict[int, dict]:
    """Return latest burn-in job (any state) keyed by drive_id.

    Jobs created before the drive's last_reset_at are excluded so the
    dashboard burn-in column clears after a reset while history is preserved.
    """
    cur = await db.execute("""
        SELECT bj.*
        FROM burnin_jobs bj
        JOIN drives d ON d.id = bj.drive_id
        WHERE bj.id IN (SELECT MAX(id) FROM burnin_jobs GROUP BY drive_id)
          AND (d.last_reset_at IS NULL OR bj.created_at > d.last_reset_at)
    """)
    rows = await cur.fetchall()
    return {r["drive_id"]: dict(r) for r in rows}


async def _fetch_drives_for_template(db: aiosqlite.Connection) -> list[dict]:
    cur = await db.execute(_DRIVES_QUERY.format(where=""))
    rows = await cur.fetchall()
    burnin_by_drive = await _fetch_burnin_by_drive(db)

    # For burn-ins that include SMART stages, fetch those stages so we can
    # mirror their progress/result in the Short/Long SMART columns.
    # This covers both running stages (showing live progress) and completed
    # stages (showing passed/failed after the burn-in moves to the next stage).
    bi_smart_stages: dict[int, dict[str, dict]] = {}  # job_id -> {stage_name: row}
    bi_ids_with_smart = [
        bi["id"] for bi in burnin_by_drive.values()
        if bi["state"] in ("running", "queued")
    ]
    if bi_ids_with_smart:
        placeholders = ",".join("?" * len(bi_ids_with_smart))
        # placeholders is purely structural ("?,?,?"); IDs themselves are
        # bound via the parameter tuple. SQL built via concatenation so
        # bandit's B608 (which fires on any f-string SQL) doesn't flag it.
        sql = (
            "SELECT bs.burnin_job_id, bs.stage_name, bs.state, bs.percent, "
            "       bs.started_at, bs.finished_at, bs.error_text "
            "FROM burnin_stages bs "
            "WHERE bs.burnin_job_id IN (" + placeholders + ") "
            "  AND bs.stage_name IN ('short_smart', 'long_smart') "
            "  AND bs.state IN ('running', 'passed', 'failed')"
        )
        cur = await db.execute(sql, bi_ids_with_smart)
        for r in await cur.fetchall():
            bi_smart_stages.setdefault(r["burnin_job_id"], {})[r["stage_name"]] = dict(r)

    drives = []
    for row in rows:
        d = _row_to_drive(row).model_dump()
        d["status"] = _compute_status(d)
        bi = burnin_by_drive.get(d["id"])
        d["burnin"] = bi

        # Overlay burn-in SMART stage progress/results onto the SMART columns
        if bi and bi["id"] in bi_smart_stages:
            for stage_name, stage in bi_smart_stages[bi["id"]].items():
                target = "smart_short" if stage_name == "short_smart" else "smart_long"
                # Only overlay if the standalone SMART column is idle/empty
                existing = d.get(target) or {}
                if existing.get("state") not in (None, "idle"):
                    continue
                pct = stage["percent"] or 0
                d[target] = {
                    "state": stage["state"],
                    "percent": pct if stage["state"] == "running" else (100 if stage["state"] == "passed" else 0),
                    "eta_seconds": _compute_eta_seconds(stage["started_at"], pct) if stage["state"] == "running" else None,
                    "eta_timestamp": None,
                    "started_at": stage["started_at"],
                    "finished_at": stage["finished_at"],
                    "error_text": stage["error_text"],
                }

        drives.append(d)
    return drives


# _stale_context is now imported from ._helpers above.


# ---------------------------------------------------------------------------
# Dashboard
# ---------------------------------------------------------------------------

@router.get("/", response_class=HTMLResponse)
async def dashboard(request: Request, db: aiosqlite.Connection = Depends(get_db)):
    drives = await _fetch_drives_for_template(db)
    ps = poller.get_state()
    return templates.TemplateResponse(request, "dashboard.html", {
        "request": request,
        "drives": drives,
        "poller": ps,
        **_stale_context(ps),
    })


# ---------------------------------------------------------------------------
# SSE — live drive table updates
# ---------------------------------------------------------------------------

@router.get("/sse/drives")
async def sse_drives(request: Request):
    q = poller.subscribe()

    async def generate():
        try:
            while True:
                # Wait for next poll notification or keepalive timeout
                try:
                    payload = await asyncio.wait_for(q.get(), timeout=25.0)
                except asyncio.TimeoutError:
                    if await request.is_disconnected():
                        break
                    yield {"event": "keepalive", "data": ""}
                    continue

                if await request.is_disconnected():
                    break

                # Extract alert from payload (may be None for regular polls)
                alert = None
                if isinstance(payload, dict):
                    alert = payload.get("alert")

                # Render fresh table HTML
                async with aiosqlite.connect(settings.db_path) as db:
                    db.row_factory = aiosqlite.Row
                    await db.execute("PRAGMA journal_mode=WAL")
                    drives = await _fetch_drives_for_template(db)

                html = templates.env.get_template(
                    "components/drives_table.html"
                ).render(drives=drives)

                yield {"event": "drives-update", "data": html}

                # Push system sensor state so JS can update temp chips live
                ps = poller.get_state()
                yield {
                    "event": "system-sensors",
                    "data": json.dumps({
                        "system_temps":    ps.get("system_temps", {}),
                        "thermal_pressure": ps.get("thermal_pressure", "ok"),
                        "temp_warn_c":     settings.temp_warn_c,
                        "temp_crit_c":     settings.temp_crit_c,
                    }),
                }

                # Push browser notification event if this was a job completion
                if alert:
                    yield {"event": "job-alert", "data": json.dumps(alert)}

        finally:
            poller.unsubscribe(q)

    return EventSourceResponse(generate())


# ---------------------------------------------------------------------------
# JSON API
# ---------------------------------------------------------------------------


@router.get("/api/v1/drives", response_model=list[DriveResponse])
async def list_drives(db: aiosqlite.Connection = Depends(get_db)):
    cur = await db.execute(_DRIVES_QUERY.format(where=""))
    rows = await cur.fetchall()
    return [_row_to_drive(r) for r in rows]


@router.get("/api/v1/drives/{drive_id}/drawer")
async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)):
    """Data for the log drawer — latest burn-in job + stages, SMART tests, audit events."""
    cur = await db.execute(_DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,))
    row = await cur.fetchone()
    if not row:
        raise HTTPException(status_code=404, detail="Drive not found")
    drive = _row_to_drive(row)

    # Latest burn-in job + its stages (include log_text and bad_blocks)
    cur = await db.execute(
        "SELECT * FROM burnin_jobs WHERE drive_id=? ORDER BY id DESC LIMIT 1",
        (drive_id,),
    )
    job_row = await cur.fetchone()
    burnin = None
    if job_row:
        job = dict(job_row)
        cur = await db.execute(
            "SELECT id, stage_name, state, percent, started_at, finished_at, "
            "duration_seconds, error_text, log_text, bad_blocks "
            "FROM burnin_stages WHERE burnin_job_id=? ORDER BY id",
            (job_row["id"],),
        )
        job["stages"] = [dict(r) for r in await cur.fetchall()]
        burnin = job

    # SMART raw output from smart_tests table
    cur = await db.execute(
        "SELECT test_type, state, percent, started_at, finished_at, error_text, raw_output "
        "FROM smart_tests WHERE drive_id=?",
        (drive_id,),
    )
    smart_rows = {r["test_type"]: dict(r) for r in await cur.fetchall()}

    # Cached SMART attributes (JSON blob on drives table)
    import json as _json
    smart_attrs = None
    cur = await db.execute("SELECT smart_attrs FROM drives WHERE id=?", (drive_id,))
    attrs_row = await cur.fetchone()
    if attrs_row and attrs_row["smart_attrs"]:
        try:
            smart_attrs = _json.loads(attrs_row["smart_attrs"])
        except Exception:
            pass

    # Last 50 audit events for this drive (newest first)
    cur = await db.execute("""
        SELECT id, event_type, operator, message, created_at
        FROM audit_events
        WHERE drive_id = ?
        ORDER BY id DESC
        LIMIT 50
    """, (drive_id,))
    events = [dict(r) for r in await cur.fetchall()]

    def _smart_card(test_type: str) -> dict:
        smart_obj = drive.smart_short if test_type == "short" else drive.smart_long
        base = smart_obj.model_dump() if smart_obj else {}
        row = smart_rows.get(test_type, {})
        base["raw_output"] = row.get("raw_output")
        return base

    return {
        "drive": {
            "id":         drive.id,
            "devname":    drive.devname,
            "serial":     drive.serial,
            "model":      drive.model,
            "size_bytes": drive.size_bytes,
        },
        "burnin":      burnin,
        "smart": {
            "short":       _smart_card("short"),
            "long":        _smart_card("long"),
            "attrs":       smart_attrs,
        },
        "events":      events,
    }


@router.get("/api/v1/drives/{drive_id}", response_model=DriveResponse)
async def get_drive(drive_id: int, db: aiosqlite.Connection = Depends(get_db)):
    cur = await db.execute(
        _DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,)
    )
    row = await cur.fetchone()
    if not row:
        raise HTTPException(status_code=404, detail="Drive not found")
    return _row_to_drive(row)


@router.post("/api/v1/drives/{drive_id}/smart/start")
async def smart_start(
    drive_id: int,
    body: dict,
    db: aiosqlite.Connection = Depends(get_db),
):
    """Start a standalone SHORT or LONG SMART test on a single drive.

    Uses SSH (smartctl) when configured — required for TrueNAS SCALE 25.10+
    where the REST smart/test endpoint no longer exists.
    Falls back to TrueNAS REST API for older versions.
    """
    from app import burnin as _burnin, ssh_client

    test_type = (body.get("type") or "").upper()
    if test_type not in ("SHORT", "LONG"):
        raise HTTPException(status_code=422, detail="type must be SHORT or LONG")

    cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,))
    row = await cur.fetchone()
    if not row:
        raise HTTPException(status_code=404, detail="Drive not found")
    devname = row[0]

    now = datetime.now(timezone.utc).isoformat()
    ttype_lower = test_type.lower()

    if ssh_client.is_configured():
        # SSH path — works on TrueNAS SCALE 25.10+ and CORE
        try:
            output = await ssh_client.start_smart_test(devname, test_type)
        except Exception as exc:
            raise HTTPException(status_code=502, detail=f"SSH error: {exc}")

        # Mark as running in DB (truenas_job_id=NULL signals SSH-managed test)
        # Store smartctl start output as proof the test was initiated
        await db.execute(
            """INSERT INTO smart_tests (drive_id, test_type, state, percent, started_at, raw_output)
               VALUES (?,?,?,?,?,?)
               ON CONFLICT(drive_id, test_type) DO UPDATE SET
                   state='running', percent=0, truenas_job_id=NULL,
                   started_at=excluded.started_at, finished_at=NULL, error_text=NULL,
                   raw_output=excluded.raw_output""",
            (drive_id, ttype_lower, "running", 0, now, output),
        )
        await db.commit()
        from app import poller as _poller
        _poller._notify_subscribers()
        return {"devname": devname, "type": test_type, "message": output[:200]}

    else:
        # REST path — older TrueNAS CORE / SCALE versions
        client = _burnin._client
        if client is None:
            raise HTTPException(status_code=503, detail="TrueNAS client not ready")
        try:
            tn_job_id = await client.start_smart_test([devname], test_type)
        except Exception as exc:
            raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
        return {"job_id": tn_job_id, "devname": devname, "type": test_type}


@router.post("/api/v1/drives/{drive_id}/smart/cancel")
async def smart_cancel(
    drive_id: int,
    body: dict,
    db: aiosqlite.Connection = Depends(get_db),
):
    """Cancel a running standalone SMART test on a drive."""
    from app import burnin as _burnin

    test_type = (body.get("type") or "").lower()
    if test_type not in ("short", "long"):
        raise HTTPException(status_code=422, detail="type must be 'short' or 'long'")

    cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,))
    row = await cur.fetchone()
    if not row:
        raise HTTPException(status_code=404, detail="Drive not found")
    devname = row[0]

    client = _burnin._client
    if client is None:
        raise HTTPException(status_code=503, detail="TrueNAS client not ready")

    from app import ssh_client

    if ssh_client.is_configured():
        # SSH path — abort via smartctl -X
        try:
            await ssh_client.abort_smart_test(devname)
        except Exception as exc:
            raise HTTPException(status_code=502, detail=f"SSH abort error: {exc}")
    else:
        # REST path — find TrueNAS job and abort it
        try:
            jobs = await client.get_smart_jobs()
            tn_job_id = None
            for j in jobs:
                if j.get("state") != "RUNNING":
                    continue
                args = j.get("arguments", [])
                if not args or not isinstance(args[0], dict):
                    continue
                if devname in args[0].get("disks", []):
                    tn_job_id = j["id"]
                    break

            if tn_job_id is None:
                raise HTTPException(status_code=404, detail="No running SMART test found for this drive")

            await client.abort_job(tn_job_id)
        except HTTPException:
            raise
        except Exception as exc:
            raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")

    # Update local DB state
    now = datetime.now(timezone.utc).isoformat()
    await db.execute(
        "UPDATE smart_tests SET state='aborted', finished_at=? WHERE drive_id=? AND test_type=? AND state='running'",
        (now, drive_id, test_type),
    )
    await db.commit()

    return {"cancelled": True, "devname": devname, "type": test_type}


# ---------------------------------------------------------------------------
# Burn-in API
# ---------------------------------------------------------------------------

def _row_to_burnin(row: aiosqlite.Row, stages: list[aiosqlite.Row]) -> BurninJobResponse:
    return BurninJobResponse(
        id=row["id"],
        drive_id=row["drive_id"],
        profile=row["profile"],
        state=row["state"],
        percent=row["percent"] or 0,
        stage_name=row["stage_name"],
        operator=row["operator"],
        created_at=row["created_at"],
        started_at=row["started_at"],
        finished_at=row["finished_at"],
        error_text=row["error_text"],
        stages=[
            BurninStageResponse(
                id=s["id"],
                stage_name=s["stage_name"],
                state=s["state"],
                percent=s["percent"] or 0,
                started_at=s["started_at"],
                finished_at=s["finished_at"],
                error_text=s["error_text"],
            )
            for s in stages
        ],
    )


# _operator_for is now imported from ._helpers above.


@router.post("/api/v1/burnin/start")
async def burnin_start(request: Request, req: StartBurninRequest):
    operator = _operator_for(request, req.operator)
    results = []
    errors = []
    for drive_id in req.drive_ids:
        try:
            job_id = await burnin.start_job(
                drive_id, req.profile, operator, stage_order=req.stage_order
            )
            results.append({"drive_id": drive_id, "job_id": job_id})
        except burnin.PoolMemberError as exc:
            errors.append({
                "drive_id": drive_id,
                "error": str(exc),
                "pool_name": exc.pool_name,
                "pool_role": exc.pool_role,
                "pool_locked": True,
            })
        except ValueError as exc:
            errors.append({"drive_id": drive_id, "error": str(exc)})
    if errors and not results:
        # Surface the first error's structured fields so the UI can render
        # an unlock affordance instead of a generic toast.
        raise HTTPException(status_code=409, detail=errors[0])
    return {"queued": results, "errors": errors}


@router.post("/api/v1/drives/{drive_id}/unlock")
async def unlock_pool_drive(drive_id: int, request: Request, req: UnlockPoolDriveRequest):
    operator = _operator_for(request, req.operator)
    ip = _client_ip(request)
    # Rate-limit by drive AND by source IP. A typo on the confirm token
    # is the common case so the threshold is loose, but a brute-force
    # attempt to guess the token still hits the IP cap.
    keys = (("drive", drive_id), ("ip", ip))
    attempt = auth.unlock_limiter.register(*keys)
    if attempt != "ok":
        raise HTTPException(
            status_code=429,
            detail="Too many unlock attempts on this drive. Try again later.",
        )
    try:
        expiry = await burnin.grant_pool_unlock(
            drive_id, req.confirm_token, operator, req.reason,
        )
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc))
    auth.unlock_limiter.clear(*keys)
    return {"unlocked": True, "expires_at": expiry,
            # Read from the submodule, not the package-root snapshot
            # alias — keeps tests that monkey-patch UNLOCK_TTL_SECONDS
            # in app.burnin.unlock observable from the API response.
            "ttl_seconds": burnin.unlock.UNLOCK_TTL_SECONDS}


@router.post("/api/v1/burnin/{job_id}/cancel")
async def burnin_cancel(job_id: int, request: Request, req: CancelBurninRequest):
    operator = _operator_for(request, req.operator)
    ok = await burnin.cancel_job(job_id, operator)
    if not ok:
        raise HTTPException(status_code=409, detail="Job not found or not cancellable")
    return {"cancelled": True}


# ---------------------------------------------------------------------------
# History pages
# ---------------------------------------------------------------------------

_PAGE_SIZE = 50

_ALL_STATES = ("queued", "running", "passed", "failed", "cancelled", "unknown")

_HISTORY_QUERY = """
    SELECT
        bj.id, bj.drive_id, bj.profile, bj.state, bj.operator,
        bj.created_at, bj.started_at, bj.finished_at, bj.error_text,
        d.devname, d.serial, d.model, d.size_bytes,
        CAST(
            (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400
            AS INTEGER
        ) AS duration_seconds
    FROM burnin_jobs bj
    JOIN drives d ON d.id = bj.drive_id
    {where}
    ORDER BY bj.id DESC
"""


def _state_where(state: str) -> tuple[str, list]:
    if state == "all":
        return "", []
    return "WHERE bj.state = ?", [state]


@router.get("/history", response_class=HTMLResponse)
async def history_list(
    request: Request,
    state: str = Query(default="all"),
    page: int = Query(default=1, ge=1),
    db: aiosqlite.Connection = Depends(get_db),
):
    if state not in ("all",) + _ALL_STATES:
        state = "all"

    where_clause, params = _state_where(state)

    # Total count
    count_sql = f"SELECT COUNT(*) FROM burnin_jobs bj JOIN drives d ON d.id = bj.drive_id {where_clause}"
    cur = await db.execute(count_sql, params)
    total_count = (await cur.fetchone())[0]
    total_pages = max(1, (total_count + _PAGE_SIZE - 1) // _PAGE_SIZE)
    page = min(page, total_pages)
    offset = (page - 1) * _PAGE_SIZE

    # Per-state counts for badges
    cur = await db.execute(
        "SELECT state, COUNT(*) FROM burnin_jobs GROUP BY state"
    )
    counts = {"all": total_count if state == "all" else 0}
    for r in await cur.fetchall():
        counts[r[0]] = r[1]
    if state != "all":
        cur2 = await db.execute("SELECT COUNT(*) FROM burnin_jobs")
        counts["all"] = (await cur2.fetchone())[0]

    # Job rows
    sql = _HISTORY_QUERY.format(where=where_clause) + " LIMIT ? OFFSET ?"
    cur = await db.execute(sql, params + [_PAGE_SIZE, offset])
    rows = await cur.fetchall()
    jobs = [dict(r) for r in rows]

    ps = poller.get_state()
    return templates.TemplateResponse(request, "history.html", {
        "request": request,
        "jobs": jobs,
        "active_state": state,
        "counts": counts,
        "page": page,
        "total_pages": total_pages,
        "total_count": total_count,
        "poller": ps,
        **_stale_context(ps),
    })


@router.get("/history/{job_id}", response_class=HTMLResponse)
async def history_detail(
    request: Request,
    job_id: int,
    db: aiosqlite.Connection = Depends(get_db),
):
    # Job + drive info
    cur = await db.execute("""
        SELECT
            bj.*, d.devname, d.serial, d.model, d.size_bytes,
            CAST(
                (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400
                AS INTEGER
            ) AS duration_seconds
        FROM burnin_jobs bj
        JOIN drives d ON d.id = bj.drive_id
        WHERE bj.id = ?
    """, (job_id,))
    row = await cur.fetchone()
    if not row:
        raise HTTPException(status_code=404, detail="Burn-in job not found")
    job = dict(row)

    # Stages (with duration)
    cur = await db.execute("""
        SELECT *,
            CAST(
                (julianday(finished_at) - julianday(started_at)) * 86400
                AS INTEGER
            ) AS duration_seconds
        FROM burnin_stages
        WHERE burnin_job_id = ?
        ORDER BY id
    """, (job_id,))
    job["stages"] = [dict(r) for r in await cur.fetchall()]

    ps = poller.get_state()
    return templates.TemplateResponse(request, "job_detail.html", {
        "request": request,
        "job": job,
        "poller": ps,
        **_stale_context(ps),
    })


# ---------------------------------------------------------------------------
# CSV export
# ---------------------------------------------------------------------------

@router.get("/api/v1/burnin/export.csv")
async def burnin_export_csv(db: aiosqlite.Connection = Depends(get_db)):
    cur = await db.execute("""
        SELECT
            bj.id          AS job_id,
            bj.drive_id,
            d.devname,
            d.serial,
            d.model,
            bj.profile,
            bj.state,
            bj.operator,
            bj.created_at,
            bj.started_at,
            bj.finished_at,
            CAST(
                (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400
                AS INTEGER
            ) AS duration_seconds,
            bj.error_text
        FROM burnin_jobs bj
        JOIN drives d ON d.id = bj.drive_id
        ORDER BY bj.id DESC
    """)
    rows = await cur.fetchall()

    buf = io.StringIO()
    writer = csv.writer(buf)
    writer.writerow([
        "job_id", "drive_id", "devname", "serial", "model",
        "profile", "state", "operator",
        "created_at", "started_at", "finished_at", "duration_seconds",
        "error_text",
    ])
    for r in rows:
        writer.writerow(list(r))

    buf.seek(0)
    return StreamingResponse(
        iter([buf.getvalue()]),
        media_type="text/csv",
        headers={"Content-Disposition": "attachment; filename=burnin_history.csv"},
    )


# ---------------------------------------------------------------------------
# On-demand email report
# ---------------------------------------------------------------------------

@router.post("/api/v1/report/send")
async def send_report_now():
    """Trigger the daily status email immediately (for testing SMTP config)."""
    if not settings.smtp_host:
        raise HTTPException(status_code=503, detail="SMTP not configured (SMTP_HOST is empty)")
    try:
        await mailer.send_report_now()
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"Mail send failed: {exc}")
    return {"sent": True, "to": settings.smtp_to}


# ---------------------------------------------------------------------------
# Drive notes / location update
# ---------------------------------------------------------------------------

@router.patch("/api/v1/drives/{drive_id}")
async def update_drive(
    drive_id: int,
    req: UpdateDriveRequest,
    db: aiosqlite.Connection = Depends(get_db),
):
    cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,))
    if not await cur.fetchone():
        raise HTTPException(status_code=404, detail="Drive not found")

    await db.execute(
        "UPDATE drives SET notes=?, location=? WHERE id=?",
        (req.notes, req.location, drive_id),
    )
    await db.commit()
    return {"updated": True}


@router.post("/api/v1/drives/{drive_id}/reset")
async def reset_drive(
    drive_id: int,
    request: Request,
    body: dict,
    db: aiosqlite.Connection = Depends(get_db),
):
    """
    Clear SMART test results for a drive so it shows as fresh.
    Only allowed when no burn-in job is active (queued or running).
    Preserves all job history — just resets the display state.
    """
    cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,))
    if not await cur.fetchone():
        raise HTTPException(status_code=404, detail="Drive not found")

    # Reject if any active burn-in
    cur = await db.execute(
        "SELECT COUNT(*) FROM burnin_jobs WHERE drive_id=? AND state IN ('queued','running')",
        (drive_id,),
    )
    if (await cur.fetchone())[0] > 0:
        raise HTTPException(status_code=409, detail="Cannot reset while a burn-in is active")

    # Trust the logged-in user, not the body (the JS used to send a
    # literal "operator" because window._operator was never set).
    operator = _operator_for(request, body.get("operator"))

    # Reset SMART test state to idle
    await db.execute(
        """UPDATE smart_tests SET state='idle', percent=0, started_at=NULL,
           eta_at=NULL, finished_at=NULL, error_text=NULL, raw_output=NULL
           WHERE drive_id=?""",
        (drive_id,),
    )
    # Clear SMART attrs cache + stamp reset time (hides prior burn-in from dashboard)
    now = datetime.now(timezone.utc).isoformat()
    await db.execute(
        "UPDATE drives SET smart_attrs=NULL, last_reset_at=? WHERE id=?",
        (now, drive_id),
    )

    # Audit event
    await db.execute(
        """INSERT INTO audit_events (event_type, drive_id, operator, message)
           VALUES (?,?,?,?)""",
        ("drive_reset", drive_id, operator, "Drive reset — SMART state cleared"),
    )
    await db.commit()

    poller._notify_subscribers()
    return {"reset": True}


# ---------------------------------------------------------------------------
# Audit log page
# ---------------------------------------------------------------------------

_AUDIT_QUERY = """
    SELECT
        ae.id, ae.event_type, ae.operator, ae.message, ae.created_at,
        d.devname, d.serial
    FROM audit_events ae
    LEFT JOIN drives d ON d.id = ae.drive_id
    ORDER BY ae.id DESC
    LIMIT 200
"""

_AUDIT_EVENT_COLORS = {
    "burnin_queued":    "yellow",
    "burnin_started":   "blue",
    "burnin_passed":    "passed",
    "burnin_failed":    "failed",
    "burnin_cancelled": "cancelled",
    "burnin_stuck":     "failed",
    "burnin_unknown":   "unknown",
}


@router.get("/audit", response_class=HTMLResponse)
async def audit_log(
    request: Request,
    db: aiosqlite.Connection = Depends(get_db),
):
    cur = await db.execute(_AUDIT_QUERY)
    rows = [dict(r) for r in await cur.fetchall()]
    ps = poller.get_state()
    return templates.TemplateResponse(request, "audit.html", {
        "request":      request,
        "events":       rows,
        "event_colors": _AUDIT_EVENT_COLORS,
        "poller":       ps,
        **_stale_context(ps),
    })


# ---------------------------------------------------------------------------
# Stats / analytics page
# ---------------------------------------------------------------------------

@router.get("/stats", response_class=HTMLResponse)
async def stats_page(
    request: Request,
    db: aiosqlite.Connection = Depends(get_db),
):
    # Overall counts
    cur = await db.execute("""
        SELECT
            COUNT(*) as total,
            SUM(CASE WHEN state='passed'    THEN 1 ELSE 0 END) as passed,
            SUM(CASE WHEN state='failed'    THEN 1 ELSE 0 END) as failed,
            SUM(CASE WHEN state='running'   THEN 1 ELSE 0 END) as running,
            SUM(CASE WHEN state='cancelled' THEN 1 ELSE 0 END) as cancelled
        FROM burnin_jobs
    """)
    overall = dict(await cur.fetchone())

    # Failure rate by drive model (only completed jobs)
    cur = await db.execute("""
        SELECT
            COALESCE(d.model, 'Unknown') AS model,
            COUNT(*) AS total,
            SUM(CASE WHEN bj.state='passed' THEN 1 ELSE 0 END) AS passed,
            SUM(CASE WHEN bj.state='failed' THEN 1 ELSE 0 END) AS failed,
            ROUND(100.0 * SUM(CASE WHEN bj.state='passed' THEN 1 ELSE 0 END) / COUNT(*), 1) AS pass_rate
        FROM burnin_jobs bj
        JOIN drives d ON d.id = bj.drive_id
        WHERE bj.state IN ('passed', 'failed')
        GROUP BY COALESCE(d.model, 'Unknown')
        ORDER BY total DESC
        LIMIT 20
    """)
    by_model = [dict(r) for r in await cur.fetchall()]

    # Activity last 14 days
    cur = await db.execute("""
        SELECT
            date(created_at) AS day,
            COUNT(*) AS total,
            SUM(CASE WHEN state='passed' THEN 1 ELSE 0 END) AS passed,
            SUM(CASE WHEN state='failed' THEN 1 ELSE 0 END) AS failed
        FROM burnin_jobs
        WHERE created_at >= date('now', '-14 days')
        GROUP BY date(created_at)
        ORDER BY day DESC
    """)
    by_day = [dict(r) for r in await cur.fetchall()]

    # Average test duration by drive size (rounded to nearest TB)
    cur = await db.execute("""
        SELECT
            CAST(ROUND(CAST(d.size_bytes AS REAL) / 1e12) AS INTEGER) AS size_tb,
            COUNT(*)  AS total,
            ROUND(AVG(
                (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400 / 3600.0
            ), 1)     AS avg_hours
        FROM burnin_jobs bj
        JOIN drives d ON d.id = bj.drive_id
        WHERE bj.state IN ('passed', 'failed')
          AND bj.started_at IS NOT NULL
          AND bj.finished_at IS NOT NULL
        GROUP BY size_tb
        ORDER BY size_tb
    """)
    by_size = [dict(r) for r in await cur.fetchall()]

    # Failure breakdown by stage (which stage caused the failure)
    cur = await db.execute("""
        SELECT
            COALESCE(bj.stage_name, 'unknown') AS failed_stage,
            COUNT(*) AS count
        FROM burnin_jobs bj
        WHERE bj.state = 'failed'
        GROUP BY failed_stage
        ORDER BY count DESC
    """)
    by_failure_stage = [dict(r) for r in await cur.fetchall()]

    # Drives tracked
    cur = await db.execute("SELECT COUNT(*) FROM drives")
    drives_total = (await cur.fetchone())[0]

    ps = poller.get_state()
    return templates.TemplateResponse(request, "stats.html", {
        "request":          request,
        "overall":          overall,
        "by_model":         by_model,
        "by_day":           by_day,
        "by_size":          by_size,
        "by_failure_stage": by_failure_stage,
        "drives_total":     drives_total,
        "poller":           ps,
        **_stale_context(ps),
    })


# ---------------------------------------------------------------------------
# Settings page
# ---------------------------------------------------------------------------

@router.get("/settings", response_class=HTMLResponse)
async def settings_page(
    request: Request,
    db: aiosqlite.Connection = Depends(get_db),
):
    auth.require_admin(request)
    # Editable values — real values for form fields (secrets excluded)
    editable = {
        # SMTP
        "smtp_host":                 settings.smtp_host,
        "smtp_port":                 settings.smtp_port,
        "smtp_ssl_mode":             settings.smtp_ssl_mode or "starttls",
        "smtp_timeout":              settings.smtp_timeout,
        "smtp_user":                 settings.smtp_user,
        "smtp_from":                 settings.smtp_from,
        "smtp_to":                   settings.smtp_to,
        "smtp_report_hour":          settings.smtp_report_hour,
        "smtp_daily_report_enabled": settings.smtp_daily_report_enabled,
        "smtp_alert_on_fail":        settings.smtp_alert_on_fail,
        "smtp_alert_on_pass":        settings.smtp_alert_on_pass,
        # Webhook
        "webhook_url":               settings.webhook_url,
        # Burn-in behaviour
        "stuck_job_hours":           settings.stuck_job_hours,
        "max_parallel_burnins":      settings.max_parallel_burnins,
        "temp_warn_c":               settings.temp_warn_c,
        "temp_crit_c":               settings.temp_crit_c,
        "bad_block_threshold":       settings.bad_block_threshold,
        "surface_validate_block_size":   settings.surface_validate_block_size,
        "surface_validate_block_buffer": settings.surface_validate_block_buffer,
        "surface_validate_passes":       settings.surface_validate_passes,
        # SSH credentials (take effect immediately — each SSH call reads live settings)
        "ssh_host":                  settings.ssh_host,
        "ssh_port":                  settings.ssh_port,
        "ssh_user":                  settings.ssh_user,
        # Note: ssh_password and ssh_key intentionally omitted from display (sensitive)
        # System settings (restart required to fully apply)
        "truenas_base_url":          settings.truenas_base_url,
        "truenas_verify_tls":        settings.truenas_verify_tls,
        "poll_interval_seconds":     settings.poll_interval_seconds,
        "stale_threshold_seconds":   settings.stale_threshold_seconds,
        "allowed_ips":               settings.allowed_ips,
        "log_level":                 settings.log_level,
        # Note: truenas_api_key intentionally omitted from display (sensitive)
    }

    from app import ssh_client as _ssh
    ps = poller.get_state()
    return templates.TemplateResponse(request, "settings.html", {
        "request":        request,
        "editable":       editable,
        "secret_status":  _secret_status(),
        "smtp_enabled":   bool(settings.smtp_host),
        "ssh_configured": _ssh.is_configured(),
        "app_version":    settings.app_version,
        "poller":         ps,
        **_stale_context(ps),
    })


# _SECRET_FIELDS and _secret_status are now imported from ._helpers above.


@router.get("/api/v1/settings/redacted")
async def get_settings_redacted(request: Request):
    """Admin-only diagnostic dump of every editable setting with secrets
    replaced by '***'. Useful for ops triage ("what's actually loaded
    right now?") without leaking the real values into the transcript."""
    user = request.state.current_user
    if not user:
        raise HTTPException(status_code=401, detail="Authentication required")
    if not user.is_admin:
        raise HTTPException(status_code=403, detail="Admin only")
    out: dict[str, object] = {}
    for field in settings_store._EDITABLE.keys():
        val = getattr(settings, field, None)
        if field in _SECRET_FIELDS:
            out[field] = "***" if val else None
        else:
            out[field] = val
    out["_secret_status"] = _secret_status()
    return out


@router.post("/api/v1/settings")
async def save_settings(request: Request, body: dict):
    """Save editable runtime settings.  Secrets are only updated if non-empty."""
    user = auth.require_admin(request)
    # Don't overwrite secrets if client sent empty string. Track which
    # ones DID get a real change so we can audit the rotation.
    rotated: list[str] = []
    for secret_field in _SECRET_FIELDS:
        if secret_field in body:
            if body[secret_field] == "":
                del body[secret_field]
            else:
                rotated.append(secret_field)

    try:
        saved = settings_store.save(body)
    except ValueError as exc:
        raise HTTPException(status_code=422, detail=str(exc))

    # Audit secret rotations — never log the value, only the field name +
    # operator + source IP. Lets `audit` page answer "who rotated the
    # SMTP password last week?"
    if rotated and user:
        await auth.audit_auth_event(
            "settings_secret_changed",
            user.username,
            f"Rotated secrets from {_client_ip(request)}: {', '.join(sorted(rotated))}",
        )

    return {"saved": True, "keys": saved, "rotated_secrets": rotated}


@router.post("/api/v1/settings/test-smtp")
async def test_smtp(request: Request):
    """Test the current SMTP configuration without sending an email."""
    auth.require_admin(request)
    result = await mailer.test_smtp_connection()
    if not result["ok"]:
        raise HTTPException(status_code=502, detail=result["error"])
    return {"ok": True}


@router.post("/api/v1/settings/test-ssh")
async def test_ssh(request: Request):
    """Test the current SSH configuration."""
    auth.require_admin(request)
    from app import ssh_client
    result = await ssh_client.test_connection()
    if not result["ok"]:
        raise HTTPException(status_code=502, detail=result.get("error", "Connection failed"))
    return {"ok": True}


# ---------------------------------------------------------------------------
# Print view  (must be BEFORE /{job_id} int route)
# ---------------------------------------------------------------------------

@router.get("/history/{job_id}/print", response_class=HTMLResponse)
async def history_print(
    request: Request,
    job_id: int,
    db: aiosqlite.Connection = Depends(get_db),
):
    cur = await db.execute("""
        SELECT
            bj.*, d.devname, d.serial, d.model, d.size_bytes,
            CAST(
                (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400
                AS INTEGER
            ) AS duration_seconds
        FROM burnin_jobs bj
        JOIN drives d ON d.id = bj.drive_id
        WHERE bj.id = ?
    """, (job_id,))
    row = await cur.fetchone()
    if not row:
        raise HTTPException(status_code=404, detail="Job not found")
    job = dict(row)

    cur = await db.execute("""
        SELECT *,
            CAST(
                (julianday(finished_at) - julianday(started_at)) * 86400
                AS INTEGER
            ) AS duration_seconds
        FROM burnin_stages WHERE burnin_job_id=? ORDER BY id
    """, (job_id,))
    job["stages"] = [dict(r) for r in await cur.fetchall()]

    return templates.TemplateResponse(request, "job_print.html", {
        "request": request,
        "job":     job,
    })


# ---------------------------------------------------------------------------
# Burn-in job detail API  (must be after export.csv to avoid int coercion)
# ---------------------------------------------------------------------------

@router.get("/api/v1/burnin/{job_id}", response_model=BurninJobResponse)
async def burnin_get(job_id: int, db: aiosqlite.Connection = Depends(get_db)):
    db.row_factory = aiosqlite.Row
    cur = await db.execute("SELECT * FROM burnin_jobs WHERE id=?", (job_id,))
    row = await cur.fetchone()
    if not row:
        raise HTTPException(status_code=404, detail="Burn-in job not found")
    cur = await db.execute(
        "SELECT * FROM burnin_stages WHERE burnin_job_id=? ORDER BY id", (job_id,)
    )
    stages = await cur.fetchall()
    return _row_to_burnin(row, stages)