These files have been live on maple for a while via direct scp/edit but were never committed back to the forge. Restoring parity so the repo matches the running container's source tree before the new feature work on top. - app/terminal.py: NEW. xterm.js <-> asyncssh PTY bridge wired into the log drawer's Terminal tab. Was added on the deploy host only. - app/truenas.py: misc REST client tweaks deployed but not committed. - CLAUDE.md / SPEC.md: documentation drift — Stage 8 terminal section, updated file map. - docker-compose.yml / requirements.txt: minor infra deltas already active on maple. No behaviour change vs the running container. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
164 lines
5.3 KiB
Python
164 lines
5.3 KiB
Python
import asyncio
|
|
import logging
|
|
from collections.abc import Callable, Coroutine
|
|
from typing import Any, TypeVar
|
|
|
|
import httpx
|
|
|
|
from app.config import settings
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
T = TypeVar("T")
|
|
|
|
# Exceptions that are safe to retry (transient network issues)
|
|
_RETRYABLE = (
|
|
httpx.ConnectError,
|
|
httpx.TimeoutException,
|
|
httpx.RemoteProtocolError,
|
|
httpx.ReadError,
|
|
)
|
|
|
|
|
|
async def _with_retry(
|
|
factory: Callable[[], Coroutine[Any, Any, T]],
|
|
label: str,
|
|
max_attempts: int = 3,
|
|
) -> T:
|
|
"""
|
|
Call factory() to get a fresh coroutine and await it, retrying with
|
|
exponential backoff on transient failures.
|
|
|
|
A factory (not a bare coroutine) is required so each attempt gets a
|
|
new coroutine object — an already-awaited coroutine cannot be reused.
|
|
"""
|
|
backoff = 1.0
|
|
for attempt in range(1, max_attempts + 1):
|
|
try:
|
|
return await factory()
|
|
except _RETRYABLE as exc:
|
|
if attempt == max_attempts:
|
|
raise
|
|
log.warning(
|
|
"TrueNAS %s transient error (attempt %d/%d): %s — retrying in %.0fs",
|
|
label, attempt, max_attempts, exc, backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
backoff *= 2
|
|
|
|
|
|
class TrueNASClient:
|
|
def __init__(self) -> None:
|
|
self._client = httpx.AsyncClient(
|
|
base_url=settings.truenas_base_url,
|
|
headers={"Authorization": f"Bearer {settings.truenas_api_key}"},
|
|
verify=settings.truenas_verify_tls,
|
|
timeout=10.0,
|
|
)
|
|
|
|
async def close(self) -> None:
|
|
await self._client.aclose()
|
|
|
|
async def get_disks(self) -> list[dict]:
|
|
r = await _with_retry(
|
|
lambda: self._client.get("/api/v2.0/disk"),
|
|
"get_disks",
|
|
)
|
|
r.raise_for_status()
|
|
disks = r.json()
|
|
# Filter out expired records — TrueNAS keeps historical entries for removed
|
|
# disks with expiretime set. Only return currently-present drives.
|
|
active = [d for d in disks if not d.get("expiretime")]
|
|
if len(active) < len(disks):
|
|
log.debug("get_disks: filtered %d expired record(s)", len(disks) - len(active))
|
|
return active
|
|
|
|
async def get_smart_jobs(self, state: str | None = None) -> list[dict]:
|
|
params: dict = {"method": "smart.test"}
|
|
if state:
|
|
params["state"] = state
|
|
r = await _with_retry(
|
|
lambda: self._client.get("/api/v2.0/core/get_jobs", params=params),
|
|
"get_smart_jobs",
|
|
)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def get_smart_results(self, devname: str) -> list[dict]:
|
|
r = await _with_retry(
|
|
lambda: self._client.get(f"/api/v2.0/smart/test/results/{devname}"),
|
|
f"get_smart_results({devname})",
|
|
)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def start_smart_test(self, disks: list[str], test_type: str) -> int:
|
|
"""Start a SMART test. Not retried — a duplicate start would launch a second job."""
|
|
r = await self._client.post(
|
|
"/api/v2.0/smart/test",
|
|
json={"disks": disks, "type": test_type},
|
|
)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def abort_job(self, job_id: int) -> None:
|
|
"""Abort a TrueNAS job. Not retried — best-effort cancel."""
|
|
r = await self._client.post(
|
|
"/api/v2.0/core/job_abort",
|
|
json={"id": job_id},
|
|
)
|
|
r.raise_for_status()
|
|
|
|
async def get_system_info(self) -> dict:
|
|
r = await _with_retry(
|
|
lambda: self._client.get("/api/v2.0/system/info"),
|
|
"get_system_info",
|
|
)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def get_disk_temperatures(self) -> dict[str, float | None]:
|
|
"""
|
|
Returns {devname: celsius | None}.
|
|
Uses POST /api/v2.0/disk/temperatures — available on TrueNAS SCALE 25.10+.
|
|
CORE compatibility: raises on 404/405, caller should catch and skip.
|
|
"""
|
|
r = await _with_retry(
|
|
lambda: self._client.post("/api/v2.0/disk/temperatures", json={}),
|
|
"get_disk_temperatures",
|
|
)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def wipe_disk(self, devname: str, mode: str = "FULL") -> int:
|
|
"""
|
|
Start a disk wipe job. Not retried — duplicate starts would launch a second wipe.
|
|
mode: "QUICK" (wipe MBR/partitions only), "FULL" (write zeros), "FULL_RANDOM" (write random)
|
|
devname: basename only, e.g. "ada0" (not "/dev/ada0")
|
|
Returns the TrueNAS job ID.
|
|
"""
|
|
r = await self._client.post(
|
|
"/api/v2.0/disk/wipe",
|
|
json={"dev": devname, "mode": mode},
|
|
)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def get_job(self, job_id: int) -> dict | None:
|
|
"""
|
|
Fetch a single TrueNAS job by ID.
|
|
Returns the job dict, or None if not found.
|
|
"""
|
|
import json as _json
|
|
r = await _with_retry(
|
|
lambda: self._client.get(
|
|
"/api/v2.0/core/get_jobs",
|
|
params={"filters": _json.dumps([["id", "=", job_id]])},
|
|
),
|
|
f"get_job({job_id})",
|
|
)
|
|
r.raise_for_status()
|
|
jobs = r.json()
|
|
if isinstance(jobs, list) and jobs:
|
|
return jobs[0]
|
|
return None
|