nas-burnin/claude-sandbox/truenas-burnin/app/settings_store.py
echoparkbaby 3e0000528f TrueNAS Burn-In Dashboard v0.9.0 — Live mode, thermal monitoring, adaptive concurrency
Go live against real TrueNAS SCALE 25.10:
- Remove mock-truenas dependency; mount SSH key as Docker secret
- Filter expired disk records from /api/v2.0/disk (expiretime field)
- Route all SMART operations through SSH (SCALE 25.10 removed REST smart/test endpoint)
- Poll drive temperatures via POST /api/v2.0/disk/temperatures (SCALE-specific)
- Store raw smartctl output in smart_tests.raw_output for proof of test execution
- Fix percent-remaining=0 false jump to 100% on test start
- Fix terminal WebSocket: add mounted key file fallback (/run/secrets/ssh_key)
- Fix WebSocket support: uvicorn → uvicorn[standard] (installs websockets)

HBA/system sensor temps on dashboard:
- SSH to TrueNAS and run sensors -j each poll cycle
- Parse coretemp (CPU package) and pch_* (PCH/chipset — storage I/O proxy)
- Render as compact chips in stats bar, color-coded green/yellow/red
- Live updates via new SSE system-sensors event every 12s

Adaptive concurrency signal:
- Thermal pressure indicator in stats bar: hidden when OK, WARM/HOT when running
  burn-in drives hit temp_warn_c / temp_crit_c thresholds
- Thermal gate in burn-in queue: jobs wait up to 3 min before acquiring semaphore
  slot if running drives are already at warning temp; times out and proceeds

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 06:33:36 -05:00

140 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Runtime settings store — persists editable settings to /data/settings_overrides.json.
Changes take effect immediately (in-memory setattr on the global Settings object)
and survive restarts (JSON file is loaded in main.py lifespan).
System settings (TrueNAS URL, poll interval, etc.) are saved to JSON but require
a container restart to fully take effect (clients/middleware are initialized at boot).
"""
import json
import logging
from pathlib import Path
from app.config import settings
log = logging.getLogger(__name__)
# Field name → coerce function. Only fields listed here are accepted by save().
_EDITABLE: dict[str, type] = {
# Email / SMTP
"smtp_host": str,
"smtp_ssl_mode": str,
"smtp_timeout": int,
"smtp_user": str,
"smtp_password": str,
"smtp_from": str,
"smtp_to": str,
"smtp_daily_report_enabled": bool,
"smtp_report_hour": int,
"smtp_alert_on_fail": bool,
"smtp_alert_on_pass": bool,
# Webhook
"webhook_url": str,
# Burn-in behaviour
"stuck_job_hours": int,
"max_parallel_burnins": int,
"temp_warn_c": int,
"temp_crit_c": int,
"bad_block_threshold": int,
# SSH credentials — take effect immediately (each connection reads live settings)
"ssh_host": str,
"ssh_port": int,
"ssh_user": str,
"ssh_password": str,
"ssh_key": str,
# System settings — saved to JSON; require container restart to fully apply
"truenas_base_url": str,
"truenas_api_key": str,
"truenas_verify_tls": bool,
"poll_interval_seconds": int,
"stale_threshold_seconds": int,
"allowed_ips": str,
"log_level": str,
}
_VALID_SSL_MODES = {"starttls", "ssl", "plain"}
_VALID_LOG_LEVELS = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
def _overrides_path() -> Path:
return Path(settings.db_path).parent / "settings_overrides.json"
def _coerce(key: str, raw) -> object:
coerce = _EDITABLE[key]
if coerce is bool:
if isinstance(raw, bool):
return raw
return str(raw).lower() in ("1", "true", "yes", "on")
return coerce(raw)
def _apply(data: dict) -> None:
"""Apply a dict of updates to the live settings object."""
for key, raw in data.items():
if key not in _EDITABLE:
continue
try:
val = _coerce(key, raw)
if key == "smtp_ssl_mode" and val not in _VALID_SSL_MODES:
log.warning("settings_store: invalid smtp_ssl_mode %r — ignoring", val)
continue
if key == "smtp_report_hour" and not (0 <= int(val) <= 23):
log.warning("settings_store: smtp_report_hour out of range — ignoring")
continue
if key == "log_level" and val not in _VALID_LOG_LEVELS:
log.warning("settings_store: invalid log_level %r — ignoring", val)
continue
if key in ("poll_interval_seconds", "stale_threshold_seconds") and int(val) < 1:
log.warning("settings_store: %s must be >= 1 — ignoring", key)
continue
if key in ("temp_warn_c", "temp_crit_c") and not (20 <= int(val) <= 80):
log.warning("settings_store: %s out of range (2080) — ignoring", key)
continue
if key == "bad_block_threshold" and int(val) < 0:
log.warning("settings_store: bad_block_threshold must be >= 0 — ignoring")
continue
if key == "ssh_port" and not (1 <= int(val) <= 65535):
log.warning("settings_store: ssh_port out of range — ignoring")
continue
setattr(settings, key, val)
except (ValueError, TypeError) as exc:
log.warning("settings_store: invalid value for %s: %s", key, exc)
def init() -> None:
"""Load persisted overrides at startup. Call once from lifespan."""
path = _overrides_path()
if not path.exists():
return
try:
data = json.loads(path.read_text())
_apply(data)
log.info("settings_store: loaded %d override(s) from %s", len(data), path)
except Exception as exc:
log.warning("settings_store: could not load overrides from %s: %s", path, exc)
def save(updates: dict) -> list[str]:
"""
Validate, apply, and persist a dict of settings updates.
Returns list of keys that were actually saved.
Raises ValueError for unknown or invalid fields.
"""
accepted: dict = {}
for key, raw in updates.items():
if key not in _EDITABLE:
raise ValueError(f"Unknown or non-editable setting: {key!r}")
accepted[key] = raw
_apply(accepted)
# Persist ALL currently-applied editable values (not just the delta)
snapshot = {k: getattr(settings, k) for k in _EDITABLE}
path = _overrides_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(snapshot, indent=2))
log.info("settings_store: saved %d key(s) — snapshot written to %s", len(accepted), path)
return list(accepted.keys())