The drawer's surface_validate area now leads with a row of operator vitals computed from data already in the response: - Temp: drive temperature with cool/warm/hot colour (≥48 red, ≥42 yellow) - Speed: live MB/s, NULL until second progress sample arrives - Elapsed: time since stage started_at - ETA: extrapolated from overall progress; suppressed under 0.5% to avoid the "47 days remaining" artefact early in pattern 1 Live MB/s comes from a new bb_mbps column on burnin_stages, computed in the badblocks parser as (delta_overall_pct / 800) * size_bytes / dt. Skipped on phase transitions (per-phase pct resets) and sub-second samples (noisy). Drawer endpoint now passes drive.temperature_c through; JS stashes the latest drive object in _DRAWER_LAST_DRIVE so the burn-in renderer can pull it for the vitals row without changing call signatures. Tightened table CSS in this same session is unrelated and shipped already in earlier rounds via the bind-mounted app.css.
378 lines
14 KiB
Python
378 lines
14 KiB
Python
"""Drive endpoints — list, drawer, edit, SMART start/cancel, reset, unlock.
|
|
|
|
GET /api/v1/drives
|
|
GET /api/v1/drives/{id}/drawer
|
|
GET /api/v1/drives/{id}
|
|
PATCH /api/v1/drives/{id} — notes / location update
|
|
POST /api/v1/drives/{id}/smart/start
|
|
POST /api/v1/drives/{id}/smart/cancel
|
|
POST /api/v1/drives/{id}/reset
|
|
POST /api/v1/drives/{id}/unlock — pool-membership lock override
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json as _json
|
|
from datetime import datetime, timezone
|
|
|
|
import aiosqlite
|
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
|
|
from app import auth, burnin, poller
|
|
from app.database import get_db
|
|
from app.models import (
|
|
DriveResponse, UnlockPoolDriveRequest, UpdateDriveRequest,
|
|
)
|
|
|
|
from ._drives_helpers import _DRIVES_QUERY, _row_to_drive
|
|
from ._helpers import client_ip, operator_for
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("/api/v1/drives", response_model=list[DriveResponse])
|
|
async def list_drives(db: aiosqlite.Connection = Depends(get_db)):
|
|
cur = await db.execute(_DRIVES_QUERY.format(where=""))
|
|
rows = await cur.fetchall()
|
|
return [_row_to_drive(r) for r in rows]
|
|
|
|
|
|
@router.get("/api/v1/drives/{drive_id}/drawer")
|
|
async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)):
|
|
"""Data for the log drawer — latest burn-in job + stages, SMART tests, audit events."""
|
|
cur = await db.execute(_DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,))
|
|
row = await cur.fetchone()
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail="Drive not found")
|
|
drive = _row_to_drive(row)
|
|
|
|
# Latest burn-in job + its stages (include log_text and bad_blocks)
|
|
cur = await db.execute(
|
|
"SELECT * FROM burnin_jobs WHERE drive_id=? ORDER BY id DESC LIMIT 1",
|
|
(drive_id,),
|
|
)
|
|
job_row = await cur.fetchone()
|
|
burnin_job = None
|
|
if job_row:
|
|
job = dict(job_row)
|
|
cur = await db.execute(
|
|
"SELECT id, stage_name, state, percent, started_at, finished_at, "
|
|
"duration_seconds, error_text, log_text, bad_blocks, "
|
|
"bb_phase, bb_phase_pct, bb_mbps "
|
|
"FROM burnin_stages WHERE burnin_job_id=? ORDER BY id",
|
|
(job_row["id"],),
|
|
)
|
|
job["stages"] = [dict(r) for r in await cur.fetchall()]
|
|
burnin_job = job
|
|
|
|
# SMART raw output from smart_tests table
|
|
cur = await db.execute(
|
|
"SELECT test_type, state, percent, started_at, finished_at, error_text, raw_output "
|
|
"FROM smart_tests WHERE drive_id=?",
|
|
(drive_id,),
|
|
)
|
|
smart_rows = {r["test_type"]: dict(r) for r in await cur.fetchall()}
|
|
|
|
# Cached SMART attributes (JSON blob on drives table)
|
|
smart_attrs = None
|
|
cur = await db.execute("SELECT smart_attrs FROM drives WHERE id=?", (drive_id,))
|
|
attrs_row = await cur.fetchone()
|
|
if attrs_row and attrs_row["smart_attrs"]:
|
|
try:
|
|
smart_attrs = _json.loads(attrs_row["smart_attrs"])
|
|
except Exception:
|
|
pass
|
|
|
|
# Last 50 audit events for this drive (newest first)
|
|
cur = await db.execute("""
|
|
SELECT id, event_type, operator, message, created_at
|
|
FROM audit_events
|
|
WHERE drive_id = ?
|
|
ORDER BY id DESC
|
|
LIMIT 50
|
|
""", (drive_id,))
|
|
events = [dict(r) for r in await cur.fetchall()]
|
|
|
|
def _smart_card(test_type: str) -> dict:
|
|
smart_obj = drive.smart_short if test_type == "short" else drive.smart_long
|
|
base = smart_obj.model_dump() if smart_obj else {}
|
|
row = smart_rows.get(test_type, {})
|
|
base["raw_output"] = row.get("raw_output")
|
|
return base
|
|
|
|
return {
|
|
"drive": {
|
|
"id": drive.id,
|
|
"devname": drive.devname,
|
|
"serial": drive.serial,
|
|
"model": drive.model,
|
|
"size_bytes": drive.size_bytes,
|
|
"temperature_c": drive.temperature_c,
|
|
},
|
|
"burnin": burnin_job,
|
|
"smart": {
|
|
"short": _smart_card("short"),
|
|
"long": _smart_card("long"),
|
|
"attrs": smart_attrs,
|
|
},
|
|
"events": events,
|
|
}
|
|
|
|
|
|
@router.get("/api/v1/drives/{drive_id}", response_model=DriveResponse)
|
|
async def get_drive(drive_id: int, db: aiosqlite.Connection = Depends(get_db)):
|
|
cur = await db.execute(
|
|
_DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,)
|
|
)
|
|
row = await cur.fetchone()
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail="Drive not found")
|
|
return _row_to_drive(row)
|
|
|
|
|
|
@router.post("/api/v1/drives/{drive_id}/smart/start")
|
|
async def smart_start(
|
|
drive_id: int,
|
|
request: Request,
|
|
body: dict,
|
|
db: aiosqlite.Connection = Depends(get_db),
|
|
):
|
|
"""Start a standalone SHORT or LONG SMART test on a single drive.
|
|
|
|
Uses SSH (smartctl) when configured — required for TrueNAS SCALE 25.10+
|
|
where the REST smart/test endpoint no longer exists.
|
|
Falls back to TrueNAS REST API for older versions.
|
|
"""
|
|
from app import ssh_client
|
|
|
|
test_type = (body.get("type") or "").upper()
|
|
if test_type not in ("SHORT", "LONG"):
|
|
raise HTTPException(status_code=422, detail="type must be SHORT or LONG")
|
|
|
|
cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,))
|
|
row = await cur.fetchone()
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail="Drive not found")
|
|
devname = row[0]
|
|
|
|
operator = operator_for(request, body.get("operator"))
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
ttype_lower = test_type.lower()
|
|
|
|
if ssh_client.is_configured():
|
|
# SSH path — works on TrueNAS SCALE 25.10+ and CORE
|
|
try:
|
|
output = await ssh_client.start_smart_test(devname, test_type)
|
|
except Exception as exc:
|
|
raise HTTPException(status_code=502, detail=f"SSH error: {exc}")
|
|
|
|
# Mark as running in DB (truenas_job_id=NULL signals SSH-managed test)
|
|
# Store smartctl start output as proof the test was initiated
|
|
await db.execute(
|
|
"""INSERT INTO smart_tests (drive_id, test_type, state, percent, started_at, raw_output)
|
|
VALUES (?,?,?,?,?,?)
|
|
ON CONFLICT(drive_id, test_type) DO UPDATE SET
|
|
state='running', percent=0, truenas_job_id=NULL,
|
|
started_at=excluded.started_at, finished_at=NULL, error_text=NULL,
|
|
raw_output=excluded.raw_output""",
|
|
(drive_id, ttype_lower, "running", 0, now, output),
|
|
)
|
|
await db.execute(
|
|
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
|
|
VALUES (?,?,?,?)""",
|
|
("smart_test_start", drive_id, operator,
|
|
f"{test_type} SMART test started on {devname}"),
|
|
)
|
|
await db.commit()
|
|
poller._notify_subscribers()
|
|
return {"devname": devname, "type": test_type, "message": output[:200]}
|
|
|
|
else:
|
|
# REST path — older TrueNAS CORE / SCALE versions
|
|
client = burnin._client
|
|
if client is None:
|
|
raise HTTPException(status_code=503, detail="TrueNAS client not ready")
|
|
try:
|
|
tn_job_id = await client.start_smart_test([devname], test_type)
|
|
except Exception as exc:
|
|
raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
|
|
await db.execute(
|
|
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
|
|
VALUES (?,?,?,?)""",
|
|
("smart_test_start", drive_id, operator,
|
|
f"{test_type} SMART test started on {devname}"),
|
|
)
|
|
await db.commit()
|
|
return {"job_id": tn_job_id, "devname": devname, "type": test_type}
|
|
|
|
|
|
@router.post("/api/v1/drives/{drive_id}/smart/cancel")
|
|
async def smart_cancel(
|
|
drive_id: int,
|
|
request: Request,
|
|
body: dict,
|
|
db: aiosqlite.Connection = Depends(get_db),
|
|
):
|
|
"""Cancel a running standalone SMART test on a drive."""
|
|
test_type = (body.get("type") or "").lower()
|
|
if test_type not in ("short", "long"):
|
|
raise HTTPException(status_code=422, detail="type must be 'short' or 'long'")
|
|
|
|
cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,))
|
|
row = await cur.fetchone()
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail="Drive not found")
|
|
devname = row[0]
|
|
operator = operator_for(request, body.get("operator"))
|
|
|
|
client = burnin._client
|
|
if client is None:
|
|
raise HTTPException(status_code=503, detail="TrueNAS client not ready")
|
|
|
|
from app import ssh_client
|
|
|
|
if ssh_client.is_configured():
|
|
# SSH path — abort via smartctl -X
|
|
try:
|
|
await ssh_client.abort_smart_test(devname)
|
|
except Exception as exc:
|
|
raise HTTPException(status_code=502, detail=f"SSH abort error: {exc}")
|
|
else:
|
|
# REST path — find TrueNAS job and abort it
|
|
try:
|
|
jobs = await client.get_smart_jobs()
|
|
tn_job_id = None
|
|
for j in jobs:
|
|
if j.get("state") != "RUNNING":
|
|
continue
|
|
args = j.get("arguments", [])
|
|
if not args or not isinstance(args[0], dict):
|
|
continue
|
|
if devname in args[0].get("disks", []):
|
|
tn_job_id = j["id"]
|
|
break
|
|
|
|
if tn_job_id is None:
|
|
raise HTTPException(status_code=404, detail="No running SMART test found for this drive")
|
|
|
|
await client.abort_job(tn_job_id)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as exc:
|
|
raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
|
|
|
|
# Update local DB state
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
await db.execute(
|
|
"UPDATE smart_tests SET state='aborted', finished_at=? WHERE drive_id=? AND test_type=? AND state='running'",
|
|
(now, drive_id, test_type),
|
|
)
|
|
await db.execute(
|
|
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
|
|
VALUES (?,?,?,?)""",
|
|
("smart_test_cancel", drive_id, operator,
|
|
f"{test_type.upper()} SMART test cancelled on {devname}"),
|
|
)
|
|
await db.commit()
|
|
|
|
return {"cancelled": True, "devname": devname, "type": test_type}
|
|
|
|
|
|
@router.patch("/api/v1/drives/{drive_id}")
|
|
async def update_drive(
|
|
drive_id: int,
|
|
req: UpdateDriveRequest,
|
|
db: aiosqlite.Connection = Depends(get_db),
|
|
):
|
|
cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,))
|
|
if not await cur.fetchone():
|
|
raise HTTPException(status_code=404, detail="Drive not found")
|
|
|
|
await db.execute(
|
|
"UPDATE drives SET notes=?, location=? WHERE id=?",
|
|
(req.notes, req.location, drive_id),
|
|
)
|
|
await db.commit()
|
|
return {"updated": True}
|
|
|
|
|
|
@router.post("/api/v1/drives/{drive_id}/reset")
|
|
async def reset_drive(
|
|
drive_id: int,
|
|
request: Request,
|
|
body: dict,
|
|
db: aiosqlite.Connection = Depends(get_db),
|
|
):
|
|
"""
|
|
Clear SMART test results for a drive so it shows as fresh.
|
|
Only allowed when no burn-in job is active (queued or running).
|
|
Preserves all job history — just resets the display state.
|
|
"""
|
|
cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,))
|
|
if not await cur.fetchone():
|
|
raise HTTPException(status_code=404, detail="Drive not found")
|
|
|
|
# Reject if any active burn-in
|
|
cur = await db.execute(
|
|
"SELECT COUNT(*) FROM burnin_jobs WHERE drive_id=? AND state IN ('queued','running')",
|
|
(drive_id,),
|
|
)
|
|
if (await cur.fetchone())[0] > 0:
|
|
raise HTTPException(status_code=409, detail="Cannot reset while a burn-in is active")
|
|
|
|
# Trust the logged-in user, not the body (the JS used to send a
|
|
# literal "operator" because window._operator was never set).
|
|
operator = operator_for(request, body.get("operator"))
|
|
|
|
# Reset SMART test state to idle
|
|
await db.execute(
|
|
"""UPDATE smart_tests SET state='idle', percent=0, started_at=NULL,
|
|
eta_at=NULL, finished_at=NULL, error_text=NULL, raw_output=NULL
|
|
WHERE drive_id=?""",
|
|
(drive_id,),
|
|
)
|
|
# Clear SMART attrs cache + stamp reset time (hides prior burn-in from dashboard)
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
await db.execute(
|
|
"UPDATE drives SET smart_attrs=NULL, last_reset_at=? WHERE id=?",
|
|
(now, drive_id),
|
|
)
|
|
|
|
# Audit event
|
|
await db.execute(
|
|
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
|
|
VALUES (?,?,?,?)""",
|
|
("drive_reset", drive_id, operator, "Drive reset — SMART state cleared"),
|
|
)
|
|
await db.commit()
|
|
|
|
poller._notify_subscribers()
|
|
return {"reset": True}
|
|
|
|
|
|
@router.post("/api/v1/drives/{drive_id}/unlock")
|
|
async def unlock_pool_drive(drive_id: int, request: Request, req: UnlockPoolDriveRequest):
|
|
operator = operator_for(request, req.operator)
|
|
ip = client_ip(request)
|
|
# Rate-limit by drive AND by source IP. A typo on the confirm token
|
|
# is the common case so the threshold is loose, but a brute-force
|
|
# attempt to guess the token still hits the IP cap.
|
|
keys = (("drive", drive_id), ("ip", ip))
|
|
attempt = auth.unlock_limiter.register(*keys)
|
|
if attempt != "ok":
|
|
raise HTTPException(
|
|
status_code=429,
|
|
detail="Too many unlock attempts on this drive. Try again later.",
|
|
)
|
|
try:
|
|
expiry = await burnin.grant_pool_unlock(
|
|
drive_id, req.confirm_token, operator, req.reason,
|
|
)
|
|
except ValueError as exc:
|
|
raise HTTPException(status_code=400, detail=str(exc))
|
|
auth.unlock_limiter.clear(*keys)
|
|
# Read from the submodule, not the package-root snapshot alias —
|
|
# keeps tests that monkey-patch UNLOCK_TTL_SECONDS in
|
|
# app.burnin.unlock observable from the API response.
|
|
return {"unlocked": True, "expires_at": expiry,
|
|
"ttl_seconds": burnin.unlock.UNLOCK_TTL_SECONDS}
|