"""Drive endpoints — list, drawer, edit, SMART start/cancel, reset, unlock. GET /api/v1/drives GET /api/v1/drives/{id}/drawer GET /api/v1/drives/{id} PATCH /api/v1/drives/{id} — notes / location update POST /api/v1/drives/{id}/smart/start POST /api/v1/drives/{id}/smart/cancel POST /api/v1/drives/{id}/reset POST /api/v1/drives/{id}/unlock — pool-membership lock override """ from __future__ import annotations import json as _json from datetime import datetime, timezone import aiosqlite from fastapi import APIRouter, Depends, HTTPException, Request from app import auth, burnin, poller from app.database import get_db from app.models import ( DriveResponse, UnlockPoolDriveRequest, UpdateDriveRequest, ) from ._drives_helpers import _DRIVES_QUERY, _row_to_drive from ._helpers import client_ip, operator_for router = APIRouter() @router.get("/api/v1/drives", response_model=list[DriveResponse]) async def list_drives(db: aiosqlite.Connection = Depends(get_db)): cur = await db.execute(_DRIVES_QUERY.format(where="")) rows = await cur.fetchall() return [_row_to_drive(r) for r in rows] @router.get("/api/v1/drives/{drive_id}/drawer") async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)): """Data for the log drawer — latest burn-in job + stages, SMART tests, audit events.""" cur = await db.execute(_DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,)) row = await cur.fetchone() if not row: raise HTTPException(status_code=404, detail="Drive not found") drive = _row_to_drive(row) # Latest burn-in job + its stages (include log_text and bad_blocks) cur = await db.execute( "SELECT * FROM burnin_jobs WHERE drive_id=? ORDER BY id DESC LIMIT 1", (drive_id,), ) job_row = await cur.fetchone() burnin_job = None if job_row: job = dict(job_row) cur = await db.execute( "SELECT id, stage_name, state, percent, started_at, finished_at, " "duration_seconds, error_text, log_text, bad_blocks " "FROM burnin_stages WHERE burnin_job_id=? ORDER BY id", (job_row["id"],), ) job["stages"] = [dict(r) for r in await cur.fetchall()] burnin_job = job # SMART raw output from smart_tests table cur = await db.execute( "SELECT test_type, state, percent, started_at, finished_at, error_text, raw_output " "FROM smart_tests WHERE drive_id=?", (drive_id,), ) smart_rows = {r["test_type"]: dict(r) for r in await cur.fetchall()} # Cached SMART attributes (JSON blob on drives table) smart_attrs = None cur = await db.execute("SELECT smart_attrs FROM drives WHERE id=?", (drive_id,)) attrs_row = await cur.fetchone() if attrs_row and attrs_row["smart_attrs"]: try: smart_attrs = _json.loads(attrs_row["smart_attrs"]) except Exception: pass # Last 50 audit events for this drive (newest first) cur = await db.execute(""" SELECT id, event_type, operator, message, created_at FROM audit_events WHERE drive_id = ? ORDER BY id DESC LIMIT 50 """, (drive_id,)) events = [dict(r) for r in await cur.fetchall()] def _smart_card(test_type: str) -> dict: smart_obj = drive.smart_short if test_type == "short" else drive.smart_long base = smart_obj.model_dump() if smart_obj else {} row = smart_rows.get(test_type, {}) base["raw_output"] = row.get("raw_output") return base return { "drive": { "id": drive.id, "devname": drive.devname, "serial": drive.serial, "model": drive.model, "size_bytes": drive.size_bytes, }, "burnin": burnin_job, "smart": { "short": _smart_card("short"), "long": _smart_card("long"), "attrs": smart_attrs, }, "events": events, } @router.get("/api/v1/drives/{drive_id}", response_model=DriveResponse) async def get_drive(drive_id: int, db: aiosqlite.Connection = Depends(get_db)): cur = await db.execute( _DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,) ) row = await cur.fetchone() if not row: raise HTTPException(status_code=404, detail="Drive not found") return _row_to_drive(row) @router.post("/api/v1/drives/{drive_id}/smart/start") async def smart_start( drive_id: int, body: dict, db: aiosqlite.Connection = Depends(get_db), ): """Start a standalone SHORT or LONG SMART test on a single drive. Uses SSH (smartctl) when configured — required for TrueNAS SCALE 25.10+ where the REST smart/test endpoint no longer exists. Falls back to TrueNAS REST API for older versions. """ from app import ssh_client test_type = (body.get("type") or "").upper() if test_type not in ("SHORT", "LONG"): raise HTTPException(status_code=422, detail="type must be SHORT or LONG") cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,)) row = await cur.fetchone() if not row: raise HTTPException(status_code=404, detail="Drive not found") devname = row[0] now = datetime.now(timezone.utc).isoformat() ttype_lower = test_type.lower() if ssh_client.is_configured(): # SSH path — works on TrueNAS SCALE 25.10+ and CORE try: output = await ssh_client.start_smart_test(devname, test_type) except Exception as exc: raise HTTPException(status_code=502, detail=f"SSH error: {exc}") # Mark as running in DB (truenas_job_id=NULL signals SSH-managed test) # Store smartctl start output as proof the test was initiated await db.execute( """INSERT INTO smart_tests (drive_id, test_type, state, percent, started_at, raw_output) VALUES (?,?,?,?,?,?) ON CONFLICT(drive_id, test_type) DO UPDATE SET state='running', percent=0, truenas_job_id=NULL, started_at=excluded.started_at, finished_at=NULL, error_text=NULL, raw_output=excluded.raw_output""", (drive_id, ttype_lower, "running", 0, now, output), ) await db.commit() poller._notify_subscribers() return {"devname": devname, "type": test_type, "message": output[:200]} else: # REST path — older TrueNAS CORE / SCALE versions client = burnin._client if client is None: raise HTTPException(status_code=503, detail="TrueNAS client not ready") try: tn_job_id = await client.start_smart_test([devname], test_type) except Exception as exc: raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}") return {"job_id": tn_job_id, "devname": devname, "type": test_type} @router.post("/api/v1/drives/{drive_id}/smart/cancel") async def smart_cancel( drive_id: int, body: dict, db: aiosqlite.Connection = Depends(get_db), ): """Cancel a running standalone SMART test on a drive.""" test_type = (body.get("type") or "").lower() if test_type not in ("short", "long"): raise HTTPException(status_code=422, detail="type must be 'short' or 'long'") cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,)) row = await cur.fetchone() if not row: raise HTTPException(status_code=404, detail="Drive not found") devname = row[0] client = burnin._client if client is None: raise HTTPException(status_code=503, detail="TrueNAS client not ready") from app import ssh_client if ssh_client.is_configured(): # SSH path — abort via smartctl -X try: await ssh_client.abort_smart_test(devname) except Exception as exc: raise HTTPException(status_code=502, detail=f"SSH abort error: {exc}") else: # REST path — find TrueNAS job and abort it try: jobs = await client.get_smart_jobs() tn_job_id = None for j in jobs: if j.get("state") != "RUNNING": continue args = j.get("arguments", []) if not args or not isinstance(args[0], dict): continue if devname in args[0].get("disks", []): tn_job_id = j["id"] break if tn_job_id is None: raise HTTPException(status_code=404, detail="No running SMART test found for this drive") await client.abort_job(tn_job_id) except HTTPException: raise except Exception as exc: raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}") # Update local DB state now = datetime.now(timezone.utc).isoformat() await db.execute( "UPDATE smart_tests SET state='aborted', finished_at=? WHERE drive_id=? AND test_type=? AND state='running'", (now, drive_id, test_type), ) await db.commit() return {"cancelled": True, "devname": devname, "type": test_type} @router.patch("/api/v1/drives/{drive_id}") async def update_drive( drive_id: int, req: UpdateDriveRequest, db: aiosqlite.Connection = Depends(get_db), ): cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,)) if not await cur.fetchone(): raise HTTPException(status_code=404, detail="Drive not found") await db.execute( "UPDATE drives SET notes=?, location=? WHERE id=?", (req.notes, req.location, drive_id), ) await db.commit() return {"updated": True} @router.post("/api/v1/drives/{drive_id}/reset") async def reset_drive( drive_id: int, request: Request, body: dict, db: aiosqlite.Connection = Depends(get_db), ): """ Clear SMART test results for a drive so it shows as fresh. Only allowed when no burn-in job is active (queued or running). Preserves all job history — just resets the display state. """ cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,)) if not await cur.fetchone(): raise HTTPException(status_code=404, detail="Drive not found") # Reject if any active burn-in cur = await db.execute( "SELECT COUNT(*) FROM burnin_jobs WHERE drive_id=? AND state IN ('queued','running')", (drive_id,), ) if (await cur.fetchone())[0] > 0: raise HTTPException(status_code=409, detail="Cannot reset while a burn-in is active") # Trust the logged-in user, not the body (the JS used to send a # literal "operator" because window._operator was never set). operator = operator_for(request, body.get("operator")) # Reset SMART test state to idle await db.execute( """UPDATE smart_tests SET state='idle', percent=0, started_at=NULL, eta_at=NULL, finished_at=NULL, error_text=NULL, raw_output=NULL WHERE drive_id=?""", (drive_id,), ) # Clear SMART attrs cache + stamp reset time (hides prior burn-in from dashboard) now = datetime.now(timezone.utc).isoformat() await db.execute( "UPDATE drives SET smart_attrs=NULL, last_reset_at=? WHERE id=?", (now, drive_id), ) # Audit event await db.execute( """INSERT INTO audit_events (event_type, drive_id, operator, message) VALUES (?,?,?,?)""", ("drive_reset", drive_id, operator, "Drive reset — SMART state cleared"), ) await db.commit() poller._notify_subscribers() return {"reset": True} @router.post("/api/v1/drives/{drive_id}/unlock") async def unlock_pool_drive(drive_id: int, request: Request, req: UnlockPoolDriveRequest): operator = operator_for(request, req.operator) ip = client_ip(request) # Rate-limit by drive AND by source IP. A typo on the confirm token # is the common case so the threshold is loose, but a brute-force # attempt to guess the token still hits the IP cap. keys = (("drive", drive_id), ("ip", ip)) attempt = auth.unlock_limiter.register(*keys) if attempt != "ok": raise HTTPException( status_code=429, detail="Too many unlock attempts on this drive. Try again later.", ) try: expiry = await burnin.grant_pool_unlock( drive_id, req.confirm_token, operator, req.reason, ) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) auth.unlock_limiter.clear(*keys) # Read from the submodule, not the package-root snapshot alias — # keeps tests that monkey-patch UNLOCK_TTL_SECONDS in # app.burnin.unlock observable from the API response. return {"unlocked": True, "expires_at": expiry, "ttl_seconds": burnin.unlock.UNLOCK_TTL_SECONDS}