truenas-burnin/app/truenas.py
Brandon Walter b73b5251ae Initial commit — TrueNAS Burn-In Dashboard v0.5.0
Full-stack burn-in orchestration dashboard (Stages 1–6d complete):
FastAPI backend, SQLite/WAL, SSE live dashboard, mock TrueNAS server,
SMTP/webhook notifications, batch burn-in, settings UI, audit log,
stats page, cancel SMART/burn-in, drag-to-reorder stages.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 00:08:29 -05:00

112 lines
3.3 KiB
Python

import asyncio
import logging
from collections.abc import Callable, Coroutine
from typing import Any, TypeVar
import httpx
from app.config import settings
log = logging.getLogger(__name__)
T = TypeVar("T")
# Exceptions that are safe to retry (transient network issues)
_RETRYABLE = (
httpx.ConnectError,
httpx.TimeoutException,
httpx.RemoteProtocolError,
httpx.ReadError,
)
async def _with_retry(
factory: Callable[[], Coroutine[Any, Any, T]],
label: str,
max_attempts: int = 3,
) -> T:
"""
Call factory() to get a fresh coroutine and await it, retrying with
exponential backoff on transient failures.
A factory (not a bare coroutine) is required so each attempt gets a
new coroutine object — an already-awaited coroutine cannot be reused.
"""
backoff = 1.0
for attempt in range(1, max_attempts + 1):
try:
return await factory()
except _RETRYABLE as exc:
if attempt == max_attempts:
raise
log.warning(
"TrueNAS %s transient error (attempt %d/%d): %s — retrying in %.0fs",
label, attempt, max_attempts, exc, backoff,
)
await asyncio.sleep(backoff)
backoff *= 2
class TrueNASClient:
def __init__(self) -> None:
self._client = httpx.AsyncClient(
base_url=settings.truenas_base_url,
headers={"Authorization": f"Bearer {settings.truenas_api_key}"},
verify=settings.truenas_verify_tls,
timeout=10.0,
)
async def close(self) -> None:
await self._client.aclose()
async def get_disks(self) -> list[dict]:
r = await _with_retry(
lambda: self._client.get("/api/v2.0/disk"),
"get_disks",
)
r.raise_for_status()
return r.json()
async def get_smart_jobs(self, state: str | None = None) -> list[dict]:
params: dict = {"method": "smart.test"}
if state:
params["state"] = state
r = await _with_retry(
lambda: self._client.get("/api/v2.0/core/get_jobs", params=params),
"get_smart_jobs",
)
r.raise_for_status()
return r.json()
async def get_smart_results(self, devname: str) -> list[dict]:
r = await _with_retry(
lambda: self._client.get(f"/api/v2.0/smart/test/results/{devname}"),
f"get_smart_results({devname})",
)
r.raise_for_status()
return r.json()
async def start_smart_test(self, disks: list[str], test_type: str) -> int:
"""Start a SMART test. Not retried — a duplicate start would launch a second job."""
r = await self._client.post(
"/api/v2.0/smart/test",
json={"disks": disks, "type": test_type},
)
r.raise_for_status()
return r.json()
async def abort_job(self, job_id: int) -> None:
"""Abort a TrueNAS job. Not retried — best-effort cancel."""
r = await self._client.post(
"/api/v2.0/core/job_abort",
json={"id": job_id},
)
r.raise_for_status()
async def get_system_info(self) -> dict:
r = await _with_retry(
lambda: self._client.get("/api/v2.0/system/info"),
"get_system_info",
)
r.raise_for_status()
return r.json()