Stage 7: SSH architecture, SMART attribute monitoring, drive reset, and polish

SSH (app/ssh_client.py — new):
- asyncssh-based client: start_smart_test, poll_smart_progress, abort_smart_test,
  get_smart_attributes, run_badblocks with streaming progress callbacks
- SMART attribute table: monitors attrs 5/10/188/197/198/199 for warn/fail thresholds
- Falls back to REST API / mock simulation when ssh_host is not configured

Burn-in stages updated (burnin.py):
- _stage_smart_test: SSH path polls smartctl -a, stores raw output + parsed attributes
- _stage_surface_validate: SSH path streams badblocks, counts bad blocks vs configurable threshold
- _stage_final_check: SSH path checks smartctl attributes; DB fallback for mock mode
- New DB helpers: _append_stage_log, _update_stage_bad_blocks, _store_smart_attrs,
  _store_smart_raw_output

Database (database.py):
- Migrations: burnin_stages.log_text, burnin_stages.bad_blocks,
  drives.smart_attrs (JSON), smart_tests.raw_output

Settings (config.py + settings_store.py):
- ssh_host, ssh_port, ssh_user, ssh_password, ssh_key — all runtime-editable
- SSH section in Settings UI with Test SSH Connection button

Webhook (notifier.py):
- Added bad_blocks and timestamp fields to payload per SPEC

Drive reset (routes.py + drives_table.html):
- POST /api/v1/drives/{id}/reset — clears SMART state, smart_attrs; audit logged
- Reset button visible on drives with completed test state (no active burn-in)

Log drawer (app.js):
- Burn-In tab: shows raw stage log_text (SSH output) with bad block highlighting
- SMART tab: shows SMART attribute table with warn/fail colouring + raw smartctl output

Polish:
- Version badge (v1.0.0-6d) in header via Jinja2 global
- Parallel burn-in warning when max_parallel_burnins > 8 in Settings
- Stats page: avg duration by drive size + failure breakdown by stage
- settings.html: SSH section with key textarea, parallel warn div

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Brandon Walter 2026-02-24 08:09:30 -05:00
parent 4ab54d7ed8
commit 2dff58bd52
15 changed files with 1141 additions and 44 deletions

View file

@ -303,6 +303,16 @@ async def _run_job(job_id: int) -> None:
)
job_row = await cur2.fetchone()
if job_row:
# Get bad_blocks count from surface_validate stage if present
bad_blocks = 0
async with _db() as db3:
cur3 = await db3.execute(
"SELECT bad_blocks FROM burnin_stages WHERE burnin_job_id=? AND stage_name='surface_validate'",
(job_id,)
)
bb_row = await cur3.fetchone()
if bb_row and bb_row[0]:
bad_blocks = bb_row[0]
asyncio.create_task(notifier.notify_job_complete(
job_id=job_id,
devname=devname,
@ -312,6 +322,7 @@ async def _run_job(job_id: int) -> None:
profile=job_row["profile"],
operator=job_row["operator"],
error_text=error_text,
bad_blocks=bad_blocks,
))
except Exception as exc:
log.error("Failed to schedule notifications: %s", exc)
@ -352,15 +363,15 @@ async def _dispatch_stage(job_id: int, stage_name: str, devname: str, drive_id:
if stage_name == "precheck":
return await _stage_precheck(job_id, drive_id)
elif stage_name == "short_smart":
return await _stage_smart_test(job_id, devname, "SHORT", "short_smart")
return await _stage_smart_test(job_id, devname, "SHORT", "short_smart", drive_id)
elif stage_name == "long_smart":
return await _stage_smart_test(job_id, devname, "LONG", "long_smart")
return await _stage_smart_test(job_id, devname, "LONG", "long_smart", drive_id)
elif stage_name == "surface_validate":
return await _stage_timed_simulate(job_id, "surface_validate", settings.surface_validate_seconds)
return await _stage_surface_validate(job_id, devname, drive_id)
elif stage_name == "io_validate":
return await _stage_timed_simulate(job_id, "io_validate", settings.io_validate_seconds)
elif stage_name == "final_check":
return await _stage_final_check(job_id, devname)
return await _stage_final_check(job_id, devname, drive_id)
return True
@ -393,8 +404,17 @@ async def _stage_precheck(job_id: int, drive_id: int) -> bool:
return True
async def _stage_smart_test(job_id: int, devname: str, test_type: str, stage_name: str) -> bool:
"""Start a TrueNAS SMART test and poll until complete."""
async def _stage_smart_test(job_id: int, devname: str, test_type: str, stage_name: str,
drive_id: int | None = None) -> bool:
"""Start a SMART test. Uses SSH if configured, TrueNAS REST API otherwise."""
from app import ssh_client
if ssh_client.is_configured():
return await _stage_smart_test_ssh(job_id, devname, test_type, stage_name, drive_id)
return await _stage_smart_test_api(job_id, devname, test_type, stage_name)
async def _stage_smart_test_api(job_id: int, devname: str, test_type: str, stage_name: str) -> bool:
"""TrueNAS REST API path for SMART test (mock / dev mode)."""
tn_job_id = await _client.start_smart_test([devname], test_type)
while True:
@ -428,8 +448,215 @@ async def _stage_smart_test(job_id: int, devname: str, test_type: str, stage_nam
await asyncio.sleep(POLL_INTERVAL)
async def _stage_smart_test_ssh(job_id: int, devname: str, test_type: str, stage_name: str,
drive_id: int | None) -> bool:
"""SSH path for SMART test — runs smartctl directly on TrueNAS."""
from app import ssh_client
# Start the test
try:
startup = await ssh_client.start_smart_test(devname, test_type)
await _append_stage_log(job_id, stage_name, startup + "\n")
except Exception as exc:
await _set_stage_error(job_id, stage_name, f"Failed to start SMART test via SSH: {exc}")
return False
# Brief pause to let the test register in smartctl output
await asyncio.sleep(3)
# Poll until complete
while True:
if await _is_cancelled(job_id):
try:
await ssh_client.abort_smart_test(devname)
except Exception:
pass
return False
await asyncio.sleep(POLL_INTERVAL)
try:
progress = await ssh_client.poll_smart_progress(devname)
except Exception as exc:
log.warning("SSH SMART poll failed: %s", exc, extra={"job_id": job_id})
await _append_stage_log(job_id, stage_name, f"[poll error] {exc}\n")
continue
await _append_stage_log(job_id, stage_name, progress["output"] + "\n---\n")
if progress["state"] == "running":
pct = max(0, 100 - progress["percent_remaining"])
await _update_stage_percent(job_id, stage_name, pct)
await _recalculate_progress(job_id)
_push_update()
elif progress["state"] == "passed":
await _update_stage_percent(job_id, stage_name, 100)
# Run attribute check
if drive_id is not None:
try:
attrs = await ssh_client.get_smart_attributes(devname)
await _store_smart_attrs(drive_id, attrs)
await _store_smart_raw_output(drive_id, test_type, attrs["raw_output"])
if attrs["failures"]:
error = "SMART attribute failures: " + "; ".join(attrs["failures"])
await _set_stage_error(job_id, stage_name, error)
return False
if attrs["warnings"]:
await _append_stage_log(
job_id, stage_name,
"[WARNING] " + "; ".join(attrs["warnings"]) + "\n"
)
except Exception as exc:
log.warning("Failed to retrieve SMART attributes: %s", exc)
await _recalculate_progress(job_id)
_push_update()
return True
elif progress["state"] == "failed":
await _set_stage_error(job_id, stage_name, f"SMART {test_type} test failed")
return False
# "unknown" → keep polling
async def _stage_surface_validate(job_id: int, devname: str, drive_id: int) -> bool:
"""
Surface validation stage.
SSH mode: runs badblocks -wsv -b 4096 -p 1 /dev/{devname}.
Mock mode: simulated timed progress (no real I/O).
"""
from app import ssh_client
if ssh_client.is_configured():
return await _stage_surface_validate_ssh(job_id, devname, drive_id)
return await _stage_timed_simulate(job_id, "surface_validate", settings.surface_validate_seconds)
async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int) -> bool:
"""Run badblocks over SSH, streaming output to stage log."""
from app import ssh_client
await _append_stage_log(
job_id, "surface_validate",
f"[START] badblocks -wsv -b 4096 -p 1 /dev/{devname}\n"
f"[NOTE] This is a DESTRUCTIVE write test. All data on /dev/{devname} will be overwritten.\n\n"
)
def _is_cancelled_sync() -> bool:
# Synchronous version — we check the DB state flag set by cancel_job()
import asyncio
loop = asyncio.get_event_loop()
try:
return loop.run_until_complete(_is_cancelled(job_id))
except Exception:
return False
last_logged_pct = [-1]
def on_progress(pct: int, bad_blocks: int, line: str) -> None:
nonlocal last_logged_pct
# Write to log (fire-and-forget via asyncio.create_task from sync context)
# The log append is done in the async flush below
pass
accumulated_lines: list[str] = []
async def on_progress_async(pct: int, bad_blocks: int, line: str) -> None:
accumulated_lines.append(line)
# Flush to DB and update progress every ~25 lines to avoid excessive DB writes
if len(accumulated_lines) % 25 == 0:
await _append_stage_log(job_id, "surface_validate", "".join(accumulated_lines[-25:]))
await _update_stage_bad_blocks(job_id, "surface_validate", bad_blocks)
await _update_stage_percent(job_id, "surface_validate", pct)
await _recalculate_progress(job_id)
_push_update()
if await _is_cancelled(job_id):
raise asyncio.CancelledError
# Run badblocks — we adapt the callback pattern to async by collecting then flushing
result = {"bad_blocks": 0, "output": "", "aborted": False}
try:
# The actual streaming; we handle progress via the accumulated_lines pattern
bad_blocks_total = 0
output_lines: list[str] = []
async with await ssh_client._connect() as conn:
cmd = f"badblocks -wsv -b 4096 -p 1 /dev/{devname}"
async with conn.create_process(cmd) as proc:
import re as _re
async def _drain(stream, is_stderr: bool):
nonlocal bad_blocks_total
async for raw in stream:
line = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace")
output_lines.append(line)
if is_stderr:
m = _re.search(r"([\d.]+)%\s+done", line)
if m:
pct = min(99, int(float(m.group(1))))
await _update_stage_percent(job_id, "surface_validate", pct)
await _update_stage_bad_blocks(job_id, "surface_validate", bad_blocks_total)
await _recalculate_progress(job_id)
_push_update()
else:
stripped = line.strip()
if stripped and stripped.isdigit():
bad_blocks_total += 1
# Append to DB log in chunks
if len(output_lines) % 20 == 0:
chunk = "".join(output_lines[-20:])
await _append_stage_log(job_id, "surface_validate", chunk)
# Abort on bad block threshold
if bad_blocks_total > settings.bad_block_threshold:
proc.kill()
output_lines.append(
f"\n[ABORTED] {bad_blocks_total} bad block(s) exceeded "
f"threshold ({settings.bad_block_threshold})\n"
)
return
if await _is_cancelled(job_id):
proc.kill()
return
await asyncio.gather(
_drain(proc.stdout, False),
_drain(proc.stderr, True),
return_exceptions=True,
)
await proc.wait()
# Flush remaining output
remainder = "".join(output_lines)
await _append_stage_log(job_id, "surface_validate", remainder)
result["bad_blocks"] = bad_blocks_total
result["output"] = remainder
result["aborted"] = bad_blocks_total > settings.bad_block_threshold
except asyncio.CancelledError:
return False
except Exception as exc:
await _append_stage_log(job_id, "surface_validate", f"\n[SSH error] {exc}\n")
await _set_stage_error(job_id, "surface_validate", f"SSH badblocks error: {exc}")
return False
await _update_stage_bad_blocks(job_id, "surface_validate", result["bad_blocks"])
if result["aborted"] or result["bad_blocks"] > settings.bad_block_threshold:
await _set_stage_error(
job_id, "surface_validate",
f"Surface validate FAILED: {result['bad_blocks']} bad block(s) found "
f"(threshold: {settings.bad_block_threshold})"
)
return False
return True
async def _stage_timed_simulate(job_id: int, stage_name: str, duration_seconds: int) -> bool:
"""Simulate a timed stage (surface validation / IO validation) with progress updates."""
"""Simulate a timed stage with progress updates (mock / dev mode)."""
start = time.monotonic()
while True:
@ -449,9 +676,28 @@ async def _stage_timed_simulate(job_id: int, stage_name: str, duration_seconds:
await asyncio.sleep(POLL_INTERVAL)
async def _stage_final_check(job_id: int, devname: str) -> bool:
"""Verify drive passed all tests by checking current SMART health in DB."""
async def _stage_final_check(job_id: int, devname: str, drive_id: int | None = None) -> bool:
"""
Verify drive passed all tests.
SSH mode: run smartctl -a and check critical attributes.
Mock mode: check SMART health field in DB.
"""
await asyncio.sleep(1)
from app import ssh_client
if ssh_client.is_configured() and drive_id is not None:
try:
attrs = await ssh_client.get_smart_attributes(devname)
await _store_smart_attrs(drive_id, attrs)
if attrs["health"] == "FAILED" or attrs["failures"]:
failures = attrs["failures"] or [f"SMART health: {attrs['health']}"]
await _set_stage_error(job_id, "final_check",
"Final check failed: " + "; ".join(failures))
return False
return True
except Exception as exc:
log.warning("SSH final_check failed, falling back to DB check: %s", exc)
# DB check (mock mode fallback)
async with _db() as db:
cur = await db.execute(
"SELECT smart_health FROM drives WHERE devname=?", (devname,)
@ -549,6 +795,57 @@ async def _cancel_stage(job_id: int, stage_name: str) -> None:
await db.commit()
async def _append_stage_log(job_id: int, stage_name: str, text: str) -> None:
"""Append text to the log_text column of a burnin_stages row."""
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"""UPDATE burnin_stages
SET log_text = COALESCE(log_text, '') || ?
WHERE burnin_job_id=? AND stage_name=?""",
(text, job_id, stage_name),
)
await db.commit()
async def _update_stage_bad_blocks(job_id: int, stage_name: str, count: int) -> None:
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE burnin_stages SET bad_blocks=? WHERE burnin_job_id=? AND stage_name=?",
(count, job_id, stage_name),
)
await db.commit()
async def _store_smart_attrs(drive_id: int, attrs: dict) -> None:
"""Persist latest SMART attribute dict to drives.smart_attrs (JSON)."""
import json
# Convert int keys to str for JSON serialisation
serialisable = {str(k): v for k, v in attrs.get("attributes", {}).items()}
blob = json.dumps({
"health": attrs.get("health", "UNKNOWN"),
"attrs": serialisable,
"warnings": attrs.get("warnings", []),
"failures": attrs.get("failures", []),
})
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("UPDATE drives SET smart_attrs=? WHERE id=?", (blob, drive_id))
await db.commit()
async def _store_smart_raw_output(drive_id: int, test_type: str, raw: str) -> None:
"""Store raw smartctl output in smart_tests.raw_output."""
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE smart_tests SET raw_output=? WHERE drive_id=? AND test_type=?",
(raw, drive_id, test_type.lower()),
)
await db.commit()
async def _set_stage_error(job_id: int, stage_name: str, error_text: str) -> None:
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")

View file

@ -56,9 +56,17 @@ class Settings(BaseSettings):
temp_crit_c: int = 55 # red critical (precheck refuses to start above this)
# Bad-block tolerance — surface_validate fails if bad blocks exceed this
# (applies to real badblocks in Stage 7; ignored by mock simulation)
bad_block_threshold: int = 0
# SSH credentials for direct TrueNAS command execution (Stage 7)
# When ssh_host is set, burn-in stages use SSH for smartctl/badblocks instead of REST API.
# Leave ssh_host empty to use the mock/REST API (development mode).
ssh_host: str = ""
ssh_port: int = 22
ssh_user: str = "root" # TrueNAS CORE default is root
ssh_password: str = "" # Password auth (leave blank if using key)
ssh_key: str = "" # PEM private key content (paste full key including headers)
# Application version — used by the /api/v1/updates/check endpoint
app_version: str = "1.0.0-6d"

View file

@ -82,6 +82,11 @@ CREATE INDEX IF NOT EXISTS idx_audit_events_job ON audit_events(burnin_job_id)
_MIGRATIONS = [
"ALTER TABLE drives ADD COLUMN notes TEXT",
"ALTER TABLE drives ADD COLUMN location TEXT",
# Stage 7: SSH command output + SMART attribute storage
"ALTER TABLE burnin_stages ADD COLUMN log_text TEXT",
"ALTER TABLE burnin_stages ADD COLUMN bad_blocks INTEGER DEFAULT 0",
"ALTER TABLE drives ADD COLUMN smart_attrs TEXT",
"ALTER TABLE smart_tests ADD COLUMN raw_output TEXT",
]

View file

@ -23,8 +23,10 @@ async def notify_job_complete(
profile: str,
operator: str,
error_text: str | None,
bad_blocks: int = 0,
) -> None:
"""Fire all configured notifications for a completed burn-in job."""
from datetime import datetime, timezone
tasks = []
if settings.webhook_url:
@ -38,6 +40,8 @@ async def notify_job_complete(
"profile": profile,
"operator": operator,
"error_text": error_text,
"bad_blocks": bad_blocks,
"timestamp": datetime.now(timezone.utc).isoformat(),
}))
if settings.smtp_host:

View file

@ -126,7 +126,7 @@ def _format_elapsed(iso: str | None) -> str:
return ""
# Register
# Register filters
templates.env.filters["format_bytes"] = _format_bytes
templates.env.filters["format_eta"] = _format_eta
templates.env.filters["temp_class"] = _temp_class
@ -135,3 +135,7 @@ templates.env.filters["format_dt_full"] = _format_dt_full
templates.env.filters["format_duration"] = _format_duration
templates.env.filters["format_elapsed"] = _format_elapsed
templates.env.globals["drive_status"] = _drive_status
from app.config import settings as _settings
templates.env.globals["app_version"] = _settings.app_version

View file

@ -258,7 +258,7 @@ async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)
raise HTTPException(status_code=404, detail="Drive not found")
drive = _row_to_drive(row)
# Latest burn-in job + its stages
# Latest burn-in job + its stages (include log_text and bad_blocks)
cur = await db.execute(
"SELECT * FROM burnin_jobs WHERE drive_id=? ORDER BY id DESC LIMIT 1",
(drive_id,),
@ -268,12 +268,33 @@ async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)
if job_row:
job = dict(job_row)
cur = await db.execute(
"SELECT * FROM burnin_stages WHERE burnin_job_id=? ORDER BY id",
"SELECT id, stage_name, state, percent, started_at, finished_at, "
"duration_seconds, error_text, log_text, bad_blocks "
"FROM burnin_stages WHERE burnin_job_id=? ORDER BY id",
(job_row["id"],),
)
job["stages"] = [dict(r) for r in await cur.fetchall()]
burnin = job
# SMART raw output from smart_tests table
cur = await db.execute(
"SELECT test_type, state, percent, started_at, finished_at, error_text, raw_output "
"FROM smart_tests WHERE drive_id=?",
(drive_id,),
)
smart_rows = {r["test_type"]: dict(r) for r in await cur.fetchall()}
# Cached SMART attributes (JSON blob on drives table)
import json as _json
smart_attrs = None
cur = await db.execute("SELECT smart_attrs FROM drives WHERE id=?", (drive_id,))
attrs_row = await cur.fetchone()
if attrs_row and attrs_row["smart_attrs"]:
try:
smart_attrs = _json.loads(attrs_row["smart_attrs"])
except Exception:
pass
# Last 50 audit events for this drive (newest first)
cur = await db.execute("""
SELECT id, event_type, operator, message, created_at
@ -284,6 +305,13 @@ async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)
""", (drive_id,))
events = [dict(r) for r in await cur.fetchall()]
def _smart_card(test_type: str) -> dict:
smart_obj = drive.smart_short if test_type == "short" else drive.smart_long
base = smart_obj.model_dump() if smart_obj else {}
row = smart_rows.get(test_type, {})
base["raw_output"] = row.get("raw_output")
return base
return {
"drive": {
"id": drive.id,
@ -294,8 +322,9 @@ async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)
},
"burnin": burnin,
"smart": {
"short": drive.smart_short.model_dump() if drive.smart_short else None,
"long": drive.smart_long.model_dump() if drive.smart_long else None,
"short": _smart_card("short"),
"long": _smart_card("long"),
"attrs": smart_attrs,
},
"events": events,
}
@ -672,6 +701,53 @@ async def update_drive(
return {"updated": True}
@router.post("/api/v1/drives/{drive_id}/reset")
async def reset_drive(
drive_id: int,
body: dict,
db: aiosqlite.Connection = Depends(get_db),
):
"""
Clear SMART test results for a drive so it shows as fresh.
Only allowed when no burn-in job is active (queued or running).
Preserves all job history just resets the display state.
"""
cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,))
if not await cur.fetchone():
raise HTTPException(status_code=404, detail="Drive not found")
# Reject if any active burn-in
cur = await db.execute(
"SELECT COUNT(*) FROM burnin_jobs WHERE drive_id=? AND state IN ('queued','running')",
(drive_id,),
)
if (await cur.fetchone())[0] > 0:
raise HTTPException(status_code=409, detail="Cannot reset while a burn-in is active")
operator = body.get("operator", "operator")
# Reset SMART test state to idle
await db.execute(
"""UPDATE smart_tests SET state='idle', percent=0, started_at=NULL,
eta_at=NULL, finished_at=NULL, error_text=NULL, raw_output=NULL
WHERE drive_id=?""",
(drive_id,),
)
# Clear cached SMART attributes
await db.execute("UPDATE drives SET smart_attrs=NULL WHERE id=?", (drive_id,))
# Audit event
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
VALUES (?,?,?,?)""",
("drive_reset", drive_id, operator, "Drive reset — SMART state cleared"),
)
await db.commit()
poller._notify_subscribers()
return {"reset": True}
# ---------------------------------------------------------------------------
# Audit log page
# ---------------------------------------------------------------------------
@ -766,6 +842,36 @@ async def stats_page(
""")
by_day = [dict(r) for r in await cur.fetchall()]
# Average test duration by drive size (rounded to nearest TB)
cur = await db.execute("""
SELECT
CAST(ROUND(CAST(d.size_bytes AS REAL) / 1e12) AS INTEGER) AS size_tb,
COUNT(*) AS total,
ROUND(AVG(
(julianday(bj.finished_at) - julianday(bj.started_at)) * 86400 / 3600.0
), 1) AS avg_hours
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
WHERE bj.state IN ('passed', 'failed')
AND bj.started_at IS NOT NULL
AND bj.finished_at IS NOT NULL
GROUP BY size_tb
ORDER BY size_tb
""")
by_size = [dict(r) for r in await cur.fetchall()]
# Failure breakdown by stage (which stage caused the failure)
cur = await db.execute("""
SELECT
COALESCE(bj.stage_name, 'unknown') AS failed_stage,
COUNT(*) AS count
FROM burnin_jobs bj
WHERE bj.state = 'failed'
GROUP BY failed_stage
ORDER BY count DESC
""")
by_failure_stage = [dict(r) for r in await cur.fetchall()]
# Drives tracked
cur = await db.execute("SELECT COUNT(*) FROM drives")
drives_total = (await cur.fetchone())[0]
@ -776,6 +882,8 @@ async def stats_page(
"overall": overall,
"by_model": by_model,
"by_day": by_day,
"by_size": by_size,
"by_failure_stage": by_failure_stage,
"drives_total": drives_total,
"poller": ps,
**_stale_context(ps),
@ -813,6 +921,11 @@ async def settings_page(
"temp_warn_c": settings.temp_warn_c,
"temp_crit_c": settings.temp_crit_c,
"bad_block_threshold": settings.bad_block_threshold,
# SSH credentials (take effect immediately — each SSH call reads live settings)
"ssh_host": settings.ssh_host,
"ssh_port": settings.ssh_port,
"ssh_user": settings.ssh_user,
# Note: ssh_password and ssh_key intentionally omitted from display (sensitive)
# System settings (restart required to fully apply)
"truenas_base_url": settings.truenas_base_url,
"truenas_verify_tls": settings.truenas_verify_tls,
@ -823,11 +936,13 @@ async def settings_page(
# Note: truenas_api_key intentionally omitted from display (sensitive)
}
from app import ssh_client as _ssh
ps = poller.get_state()
return templates.TemplateResponse("settings.html", {
"request": request,
"editable": editable,
"smtp_enabled": bool(settings.smtp_host),
"ssh_configured": _ssh.is_configured(),
"app_version": settings.app_version,
"poller": ps,
**_stale_context(ps),
@ -838,7 +953,7 @@ async def settings_page(
async def save_settings(body: dict):
"""Save editable runtime settings. Secrets are only updated if non-empty."""
# Don't overwrite secrets if client sent empty string
for secret_field in ("smtp_password", "truenas_api_key"):
for secret_field in ("smtp_password", "truenas_api_key", "ssh_password", "ssh_key"):
if secret_field in body and body[secret_field] == "":
del body[secret_field]
@ -859,6 +974,16 @@ async def test_smtp():
return {"ok": True}
@router.post("/api/v1/settings/test-ssh")
async def test_ssh():
"""Test the current SSH configuration."""
from app import ssh_client
result = await ssh_client.test_connection()
if not result["ok"]:
raise HTTPException(status_code=502, detail=result.get("error", "Connection failed"))
return {"ok": True}
@router.get("/api/v1/updates/check")
async def check_updates():
"""Check for a newer release on Forgejo."""

View file

@ -38,6 +38,12 @@ _EDITABLE: dict[str, type] = {
"temp_warn_c": int,
"temp_crit_c": int,
"bad_block_threshold": int,
# SSH credentials — take effect immediately (each connection reads live settings)
"ssh_host": str,
"ssh_port": int,
"ssh_user": str,
"ssh_password": str,
"ssh_key": str,
# System settings — saved to JSON; require container restart to fully apply
"truenas_base_url": str,
"truenas_api_key": str,
@ -90,6 +96,9 @@ def _apply(data: dict) -> None:
if key == "bad_block_threshold" and int(val) < 0:
log.warning("settings_store: bad_block_threshold must be >= 0 — ignoring")
continue
if key == "ssh_port" and not (1 <= int(val) <= 65535):
log.warning("settings_store: ssh_port out of range — ignoring")
continue
setattr(settings, key, val)
except (ValueError, TypeError) as exc:
log.warning("settings_store: invalid value for %s: %s", key, exc)

303
app/ssh_client.py Normal file
View file

@ -0,0 +1,303 @@
"""
SSH client for direct TrueNAS command execution (Stage 7).
When ssh_host is configured, burn-in stages use SSH to run smartctl and
badblocks directly on the TrueNAS host instead of going through the REST API.
Falls back to REST API / simulation when SSH is not configured (dev/mock mode).
TrueNAS CORE (FreeBSD) device paths: /dev/ada0, /dev/da0, etc.
TrueNAS SCALE (Linux) device paths: /dev/sda, /dev/sdb, etc.
The devname from the TrueNAS API is used as-is in /dev/{devname}.
"""
import asyncio
import logging
import re
from typing import Callable
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Monitored SMART attributes
# True → any non-zero raw value is a hard failure (drive rejected)
# False → non-zero is a warning (flagged but test continues)
# ---------------------------------------------------------------------------
SMART_ATTRS: dict[int, tuple[str, bool]] = {
5: ("Reallocated_Sector_Ct", True), # reallocation = FAIL
10: ("Spin_Retry_Count", False), # mechanical stress = WARN
188: ("Command_Timeout", False), # drive not responding = WARN
197: ("Current_Pending_Sector", True), # pending reallocation = FAIL
198: ("Offline_Uncorrectable", True), # unrecoverable read error = FAIL
199: ("UDMA_CRC_Error_Count", False), # cable/controller issue = WARN
}
# ---------------------------------------------------------------------------
# Configuration check
# ---------------------------------------------------------------------------
def is_configured() -> bool:
"""Returns True when SSH credentials are present and usable."""
from app.config import settings
return bool(settings.ssh_host and (settings.ssh_password or settings.ssh_key))
# ---------------------------------------------------------------------------
# Low-level connection
# ---------------------------------------------------------------------------
async def _connect():
"""Open a single-use SSH connection. Caller must use `async with`."""
import asyncssh
from app.config import settings
kwargs: dict = {
"host": settings.ssh_host,
"port": settings.ssh_port,
"username": settings.ssh_user,
"known_hosts": None, # trust all hosts (same spirit as TRUENAS_VERIFY_TLS=false)
}
if settings.ssh_key:
kwargs["client_keys"] = [asyncssh.import_private_key(settings.ssh_key)]
if settings.ssh_password:
kwargs["password"] = settings.ssh_password
return asyncssh.connect(**kwargs)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def test_connection() -> dict:
"""Test SSH connectivity. Returns {"ok": True} or {"ok": False, "error": str}."""
if not is_configured():
return {"ok": False, "error": "SSH not configured (ssh_host is empty)"}
try:
async with await _connect() as conn:
result = await conn.run("echo ok", check=False)
if "ok" in result.stdout:
return {"ok": True}
return {"ok": False, "error": result.stderr.strip() or "unexpected output"}
except Exception as exc:
return {"ok": False, "error": str(exc)}
async def get_smart_attributes(devname: str) -> dict:
"""
Run `smartctl -a /dev/{devname}` and parse the output.
Returns:
health: str "PASSED" | "FAILED" | "UNKNOWN"
raw_output: str full smartctl output
attributes: dict[int, {"name": str, "raw": int}]
warnings: list[str] attribute names with non-zero raw (non-critical)
failures: list[str] attribute names with non-zero raw (critical)
"""
cmd = f"smartctl -a /dev/{devname}"
try:
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
return _parse_smartctl(output)
except Exception as exc:
return {
"health": "UNKNOWN",
"raw_output": str(exc),
"attributes": {},
"warnings": [],
"failures": [f"SSH error: {exc}"],
}
async def start_smart_test(devname: str, test_type: str) -> str:
"""
Run `smartctl -t short|long /dev/{devname}`.
Returns raw output. Raises RuntimeError on unrecoverable failure.
test_type: "SHORT" or "LONG"
"""
arg = "short" if test_type.upper() == "SHORT" else "long"
cmd = f"smartctl -t {arg} /dev/{devname}"
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
# smartctl exits 0 or 4 when the test is successfully started on most drives
started = ("Testing has begun" in output or
"test has begun" in output.lower() or
result.returncode in (0, 4))
if not started:
raise RuntimeError(f"smartctl returned exit {result.returncode}: {output[:400]}")
return output
async def poll_smart_progress(devname: str) -> dict:
"""
Run `smartctl -a /dev/{devname}` and extract self-test status.
Returns:
state: "running" | "passed" | "failed" | "unknown"
percent_remaining: int (0 = complete when state != "running")
output: str
"""
cmd = f"smartctl -a /dev/{devname}"
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
return _parse_smart_progress(output)
async def abort_smart_test(devname: str) -> None:
"""Send `smartctl -X /dev/{devname}` to abort an in-progress test."""
cmd = f"smartctl -X /dev/{devname}"
async with await _connect() as conn:
await conn.run(cmd, check=False)
async def run_badblocks(
devname: str,
on_progress: Callable[[int, int, str], None],
cancelled_fn: Callable[[], bool] | None = None,
) -> dict:
"""
Run `badblocks -wsv -b 4096 -p 1 /dev/{devname}` and stream output.
on_progress(percent, bad_blocks, line) is called for each line of output.
cancelled_fn() is polled to support mid-test cancellation.
Returns: {"bad_blocks": int, "output": str, "aborted": bool}
"""
from app.config import settings
cmd = f"badblocks -wsv -b 4096 -p 1 /dev/{devname}"
lines: list[str] = []
bad_blocks = 0
aborted = False
last_pct = 0
try:
async with await _connect() as conn:
async with conn.create_process(cmd) as proc:
# badblocks writes progress to stderr, bad block numbers to stdout
async def _read_stream(stream, is_stderr: bool):
nonlocal bad_blocks, last_pct, aborted
async for raw_line in stream:
line = raw_line if isinstance(raw_line, str) else raw_line.decode("utf-8", errors="replace")
lines.append(line)
if is_stderr:
m = re.search(r"([\d.]+)%\s+done", line)
if m:
last_pct = min(99, int(float(m.group(1))))
else:
# Each non-empty stdout line during badblocks is a bad block number
stripped = line.strip()
if stripped and stripped.isdigit():
bad_blocks += 1
on_progress(last_pct, bad_blocks, line)
# Abort if threshold exceeded
if bad_blocks > settings.bad_block_threshold:
aborted = True
proc.kill()
lines.append(
f"\n[ABORTED] Bad block count ({bad_blocks}) exceeded "
f"threshold ({settings.bad_block_threshold})\n"
)
return
# Abort on cancellation
if cancelled_fn and cancelled_fn():
aborted = True
proc.kill()
return
stdout_task = asyncio.create_task(_read_stream(proc.stdout, False))
stderr_task = asyncio.create_task(_read_stream(proc.stderr, True))
await asyncio.gather(stdout_task, stderr_task, return_exceptions=True)
await proc.wait()
except Exception as exc:
lines.append(f"\n[SSH error] {exc}\n")
if not aborted:
last_pct = 100
return {
"bad_blocks": bad_blocks,
"output": "".join(lines),
"aborted": aborted,
}
# ---------------------------------------------------------------------------
# Parsers
# ---------------------------------------------------------------------------
def _parse_smartctl(output: str) -> dict:
health = "UNKNOWN"
attributes: dict[int, dict] = {}
warnings: list[str] = []
failures: list[str] = []
m = re.search(r"self-assessment test result:\s+(\w+)", output, re.IGNORECASE)
if m:
health = m.group(1).upper()
# Attribute table: ID# NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
for line in output.splitlines():
am = re.match(
r"\s*(\d+)\s+(\S+)\s+\S+\s+\d+\s+\d+\s+\d+\s+\S+\s+\S+\s+\S+\s+(\d+)",
line,
)
if not am:
continue
attr_id = int(am.group(1))
attr_name = am.group(2)
raw_val = int(am.group(3))
attributes[attr_id] = {"name": attr_name, "raw": raw_val}
if attr_id in SMART_ATTRS:
_, is_critical = SMART_ATTRS[attr_id]
if raw_val > 0:
msg = f"{attr_name} = {raw_val}"
if is_critical:
failures.append(msg)
else:
warnings.append(msg)
return {
"health": health,
"raw_output": output,
"attributes": attributes,
"warnings": warnings,
"failures": failures,
}
def _parse_smart_progress(output: str) -> dict:
state = "unknown"
percent_remaining = 0
lower = output.lower()
if "self-test routine in progress" in lower or "self-test routine in progress" in output:
state = "running"
m = re.search(r"(\d+)%\s+of\s+test\s+remaining", output, re.IGNORECASE)
if m:
percent_remaining = int(m.group(1))
elif "completed without error" in lower:
state = "passed"
elif (
"completed: read failure" in lower
or "completed: write failure" in lower
or "aborted by host" in lower
or ("completed" in lower and "failure" in lower)
):
state = "failed"
elif "in progress" in lower:
state = "running"
return {
"state": state,
"percent_remaining": percent_remaining,
"output": output,
}

View file

@ -2283,3 +2283,125 @@ tr.drawer-row-active {
.drawer-smart-grid { grid-template-columns: 1fr; }
.drawer-drive-meta { display: none; }
}
/* -----------------------------------------------------------------------
Stage raw log output (SSH mode)
----------------------------------------------------------------------- */
.stage-log {
font-family: "SF Mono", "Consolas", "Monaco", monospace;
font-size: 11px;
line-height: 1.5;
color: var(--text-muted);
background: var(--bg);
border-left: 2px solid var(--border);
margin: 6px 0 2px 28px;
padding: 6px 10px;
white-space: pre-wrap;
word-break: break-all;
max-height: 200px;
overflow-y: auto;
}
.stage-log .log-bad-block {
color: var(--red);
font-weight: 600;
}
.stage-log .log-warn {
color: var(--yellow);
}
/* -----------------------------------------------------------------------
SMART attributes table in drawer
----------------------------------------------------------------------- */
.smart-attrs {
margin-top: 12px;
border-top: 1px solid var(--border);
padding-top: 10px;
}
.smart-attrs-title {
font-size: 11px;
font-weight: 600;
color: var(--text-muted);
text-transform: uppercase;
letter-spacing: .05em;
margin-bottom: 6px;
}
.smart-attr-row {
display: flex;
justify-content: space-between;
align-items: center;
padding: 3px 0;
font-size: 12px;
border-bottom: 1px solid color-mix(in srgb, var(--border) 50%, transparent);
}
.smart-attr-row:last-child { border-bottom: none; }
.smart-attr-name { color: var(--text-muted); }
.smart-attr-val { font-family: "SF Mono", monospace; font-size: 12px; }
.smart-attr-val.attr-ok { color: var(--green); }
.smart-attr-val.attr-warn { color: var(--yellow); font-weight: 600; }
.smart-attr-val.attr-fail { color: var(--red); font-weight: 600; }
.smart-attr-raw-output {
font-family: "SF Mono", "Consolas", monospace;
font-size: 10.5px;
line-height: 1.45;
color: var(--text-muted);
background: var(--bg);
border: 1px solid var(--border);
border-radius: 4px;
padding: 8px 10px;
margin-top: 10px;
white-space: pre;
overflow: auto;
max-height: 240px;
}
/* -----------------------------------------------------------------------
Reset button
----------------------------------------------------------------------- */
.btn-reset {
background: transparent;
border: 1px solid color-mix(in srgb, var(--text-muted) 40%, transparent);
color: var(--text-muted);
border-radius: 5px;
padding: 3px 8px;
font-size: 12px;
cursor: pointer;
transition: border-color .15s, color .15s;
}
.btn-reset:hover {
border-color: var(--yellow);
color: var(--yellow);
}
/* -----------------------------------------------------------------------
Parallel burn-in inline warning
----------------------------------------------------------------------- */
.sf-inline-warn {
background: color-mix(in srgb, var(--yellow) 12%, transparent);
border: 1px solid color-mix(in srgb, var(--yellow) 40%, transparent);
border-radius: 5px;
color: var(--yellow);
font-size: 12px;
padding: 7px 10px;
margin: 4px 0 8px 0;
}
/* -----------------------------------------------------------------------
SSH textarea
----------------------------------------------------------------------- */
.sf-textarea {
resize: vertical;
min-height: 90px;
font-family: "SF Mono", "Consolas", monospace;
font-size: 11px;
}
/* -----------------------------------------------------------------------
Version badge in header
----------------------------------------------------------------------- */
.header-version {
font-size: 11px;
color: var(--text-muted);
opacity: .6;
padding: 0 2px;
font-variant-numeric: tabular-nums;
}

View file

@ -957,8 +957,18 @@
if (s.error_text) {
html += '<div class="stage-error-line">' + _esc(s.error_text) + '</div>';
}
// Raw SSH log output (if available)
if (s.log_text) {
var logHtml = _esc(s.log_text)
.replace(/^(\d+)\s*$/gm, '<span class="log-bad-block">$1 ← BAD BLOCK</span>')
.replace(/\[WARNING\][^\n]*/g, '<span class="log-warn">$&</span>');
html += '<pre class="stage-log">' + logHtml + '</pre>';
}
// Bad block count badge
if (s.bad_blocks && s.bad_blocks > 0) {
html += '<div class="stage-error-line">' + s.bad_blocks + ' bad block(s) found</div>';
}
html += '</div>';
});
} else {
html += '<div class="drawer-empty">No stage data yet.</div>';
}
@ -973,6 +983,10 @@
}
}
// Monitored SMART attributes for inline colouring
var _SMART_CRITICAL = {5: true, 197: true, 198: true};
var _SMART_WARN = {10: true, 188: true, 199: true};
function _drawerRenderSmart(smart) {
var panel = document.getElementById('drawer-panel-smart');
if (!panel) return;
@ -994,10 +1008,41 @@
if (t.started_at) html += '<div class="smart-detail">Started: ' + _drawerFmtDt(t.started_at) + '</div>';
if (t.finished_at) html += '<div class="smart-detail">Finished: ' + _drawerFmtDt(t.finished_at) + '</div>';
if (t.error_text) html += '<div class="stage-error-line">' + _esc(t.error_text) + '</div>';
// Raw smartctl output (SSH mode)
if (t.raw_output) {
html += '<pre class="smart-attr-raw-output">' + _esc(t.raw_output) + '</pre>';
}
}
html += '</div>';
});
html += '</div>';
// SMART attribute table (from SSH attribute parse)
var attrs = smart && smart.attrs;
if (attrs) {
html += '<div class="smart-attrs">';
html += '<div class="smart-attrs-title">SMART Attributes</div>';
if (attrs.failures && attrs.failures.length) {
html += '<div class="stage-error-line" style="margin-bottom:6px">✕ Failures: ' + _esc(attrs.failures.join('; ')) + '</div>';
}
if (attrs.warnings && attrs.warnings.length) {
html += '<div class="stage-error-line" style="color:var(--yellow);margin-bottom:6px">⚠ Warnings: ' + _esc(attrs.warnings.join('; ')) + '</div>';
}
var attrMap = attrs.attrs || {};
var monitoredIds = [5, 10, 188, 197, 198, 199];
monitoredIds.forEach(function (id) {
var entry = attrMap[String(id)];
if (!entry) return;
var raw = entry.raw;
var cls = raw > 0 ? (_SMART_CRITICAL[id] ? 'attr-fail' : 'attr-warn') : 'attr-ok';
html += '<div class="smart-attr-row">';
html += '<span class="smart-attr-name">' + id + ' ' + _esc(entry.name) + '</span>';
html += '<span class="smart-attr-val ' + cls + '">' + raw + '</span>';
html += '</div>';
});
html += '</div>';
}
panel.innerHTML = html;
}
@ -1078,4 +1123,21 @@
if (e.target.closest('#drawer-close-btn')) closeDrawer();
});
// Reset button — clears SMART state for a drive
document.addEventListener('click', function (e) {
var btn = e.target.closest('.btn-reset');
if (!btn) return;
var driveId = btn.dataset.driveId;
if (!driveId) return;
var operator = (window._operator || 'operator');
fetch('/api/v1/drives/' + driveId + '/reset', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ operator: operator }),
}).then(function (r) {
if (!r.ok) return r.json().then(function (d) { showToast(d.detail || 'Reset failed', 'error'); });
showToast('Drive reset — state cleared', 'success');
}).catch(function () { showToast('Network error', 'error'); });
});
}());

View file

@ -81,6 +81,10 @@
{%- set short_busy = drive.smart_short and drive.smart_short.state == 'running' %}
{%- set long_busy = drive.smart_long and drive.smart_long.state == 'running' %}
{%- set selectable = not bi_active and not short_busy and not long_busy %}
{%- set bi_done = drive.burnin and drive.burnin.state in ('passed', 'failed', 'cancelled', 'unknown') %}
{%- set smart_done = (drive.smart_short and drive.smart_short.state in ('passed','failed','aborted'))
or (drive.smart_long and drive.smart_long.state in ('passed','failed','aborted')) %}
{%- set can_reset = (bi_done or smart_done) and not bi_active and not short_busy and not long_busy %}
<tr data-status="{{ drive.status }}" id="drive-{{ drive.id }}">
<td class="col-check">
{%- if selectable %}
@ -160,6 +164,12 @@
data-health="{{ drive.smart_health }}"
{% if short_busy or long_busy %}disabled{% endif %}
title="Start Burn-In">Burn-In</button>
<!-- Reset — clears SMART state so drive can be re-tested from scratch -->
{%- if can_reset %}
<button class="btn-action btn-reset"
data-drive-id="{{ drive.id }}"
title="Reset SMART state — clears test results so drive shows as fresh">Reset</button>
{%- endif %}
{%- endif %}
</div>
</td>

View file

@ -37,6 +37,7 @@
<a class="header-link" href="/audit">Audit</a>
<a class="header-link" href="/settings">Settings</a>
<a class="header-link" href="/docs" target="_blank" rel="noopener">API</a>
<span class="header-version">v{{ app_version if app_version is defined else '—' }}</span>
</div>
</header>

View file

@ -91,6 +91,57 @@
</div>
</div>
<!-- SSH -->
<div class="settings-card">
<div class="settings-card-header">
<span class="settings-card-title">SSH (TrueNAS Direct)</span>
{% if ssh_configured %}
<span class="chip chip-passed" style="font-size:10px">Configured</span>
{% else %}
<span class="chip chip-unknown" style="font-size:10px">Not configured — using REST API / mock</span>
{% endif %}
</div>
<p class="sf-hint" style="margin-bottom:8px">
When configured, burn-in stages run smartctl and badblocks directly on TrueNAS over SSH,
enabling SMART attribute monitoring and real bad-block detection. Leave Host empty to use
the TrueNAS REST API (mock / dev mode).
</p>
<div class="sf-fields">
<div class="sf-full sf-row-test" style="margin-bottom:4px">
<button type="button" id="test-ssh-btn" class="btn-secondary">Test SSH Connection</button>
<span id="ssh-test-result" class="settings-test-result" style="display:none"></span>
</div>
<label for="ssh_host">Host / IP</label>
<input class="sf-input" id="ssh_host" name="ssh_host" type="text"
value="{{ editable.ssh_host }}" placeholder="10.0.0.x (same as TrueNAS IP)">
<label for="ssh_port">Port</label>
<input class="sf-input sf-input-xs" id="ssh_port" name="ssh_port"
type="number" min="1" max="65535" value="{{ editable.ssh_port }}" style="width:70px">
<label for="ssh_user">Username</label>
<input class="sf-input" id="ssh_user" name="ssh_user" type="text"
value="{{ editable.ssh_user }}" placeholder="root">
<label for="ssh_password">Password</label>
<input class="sf-input" id="ssh_password" name="ssh_password" type="password"
placeholder="leave blank to keep existing" autocomplete="new-password">
<label for="ssh_key">Private Key</label>
<div>
<textarea class="sf-input sf-textarea" id="ssh_key" name="ssh_key"
rows="6" placeholder="Paste PEM private key here (-----BEGIN ... KEY-----). Leave blank to keep existing." autocomplete="off"></textarea>
<span class="sf-hint" style="margin-top:3px">
Either password or key auth. Key takes precedence if both are set.
Key is stored securely in <code>/data/settings_overrides.json</code>.
</span>
</div>
</div>
</div>
</div><!-- /left col -->
<!-- RIGHT column: Notifications + Behavior -->
@ -159,9 +210,14 @@
<div class="sf-row">
<label class="sf-label" for="max_parallel_burnins">Max Parallel Burn-Ins</label>
<input class="sf-input sf-input-xs" id="max_parallel_burnins" name="max_parallel_burnins"
type="number" min="1" max="16" value="{{ editable.max_parallel_burnins }}">
type="number" min="1" max="60" value="{{ editable.max_parallel_burnins }}">
<span class="sf-hint">How many jobs can run at the same time</span>
</div>
<div id="parallel-warn" class="sf-inline-warn"
{% if editable.max_parallel_burnins <= 8 %}style="display:none"{% endif %}>
⚠ Running many simultaneous surface scans may saturate your storage controller
and produce unreliable results. Recommended: 24.
</div>
<div class="sf-row">
<label class="sf-label" for="stuck_job_hours">Stuck Job Threshold (hours)</label>
@ -348,6 +404,36 @@
}
});
// Parallel burn-in warning
var parallelInput = document.getElementById('max_parallel_burnins');
var parallelWarn = document.getElementById('parallel-warn');
if (parallelInput && parallelWarn) {
parallelInput.addEventListener('input', function () {
parallelWarn.style.display = parseInt(parallelInput.value, 10) > 8 ? '' : 'none';
});
}
// Test SSH
var sshBtn = document.getElementById('test-ssh-btn');
var sshResult = document.getElementById('ssh-test-result');
if (sshBtn) {
sshBtn.addEventListener('click', async function () {
sshBtn.disabled = true;
sshBtn.textContent = 'Testing…';
sshResult.style.display = 'none';
try {
var resp = await fetch('/api/v1/settings/test-ssh', { method: 'POST' });
var data = await resp.json();
showResult(sshResult, resp.ok, resp.ok ? 'Connection OK' : (data.detail || 'Failed'));
} catch (e) {
showResult(sshResult, false, 'Network error');
} finally {
sshBtn.disabled = false;
sshBtn.textContent = 'Test SSH Connection';
}
});
}
// Check for Updates
var updBtn = document.getElementById('check-updates-btn');
var updResult = document.getElementById('update-result');

View file

@ -119,5 +119,65 @@
{% endif %}
</div>
</div>
<div class="stats-grid" style="margin-top:24px">
<!-- Average duration by drive size -->
<div class="stats-section">
<h2 class="section-title">Avg. Test Duration by Drive Size</h2>
{% if by_size %}
<div class="table-wrap" style="max-height:none">
<table>
<thead>
<tr>
<th>Size</th>
<th style="text-align:right">Jobs</th>
<th style="text-align:right">Avg Duration</th>
</tr>
</thead>
<tbody>
{% for s in by_size %}
<tr>
<td style="font-weight:500;color:var(--text-strong)">{{ s.size_tb }} TB</td>
<td class="mono text-muted" style="text-align:right">{{ s.total }}</td>
<td class="mono" style="text-align:right;color:var(--text-strong)">{{ s.avg_hours }}h</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="empty-state" style="border:1px solid var(--border);border-radius:8px;padding:32px">No completed jobs yet.</div>
{% endif %}
</div>
<!-- Failure breakdown by stage -->
<div class="stats-section">
<h2 class="section-title">Failures by Stage</h2>
{% if by_failure_stage %}
<div class="table-wrap" style="max-height:none">
<table>
<thead>
<tr>
<th>Stage</th>
<th style="text-align:right">Count</th>
</tr>
</thead>
<tbody>
{% for f in by_failure_stage %}
<tr>
<td style="font-weight:500;color:var(--red)">{{ f.failed_stage | replace('_',' ') | title }}</td>
<td class="mono" style="text-align:right;color:var(--red)">{{ f.count }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="empty-state" style="border:1px solid var(--border);border-radius:8px;padding:32px">No failures recorded.</div>
{% endif %}
</div>
</div>
{% endblock %}

View file

@ -5,3 +5,4 @@ httpx
pydantic-settings
jinja2
sse-starlette
asyncssh