From 2dff58bd5264708c2d974c7091309afe08d2aab0 Mon Sep 17 00:00:00 2001 From: Brandon Walter <51866976+echoparkbaby@users.noreply.github.com> Date: Tue, 24 Feb 2026 08:09:30 -0500 Subject: [PATCH] Stage 7: SSH architecture, SMART attribute monitoring, drive reset, and polish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SSH (app/ssh_client.py — new): - asyncssh-based client: start_smart_test, poll_smart_progress, abort_smart_test, get_smart_attributes, run_badblocks with streaming progress callbacks - SMART attribute table: monitors attrs 5/10/188/197/198/199 for warn/fail thresholds - Falls back to REST API / mock simulation when ssh_host is not configured Burn-in stages updated (burnin.py): - _stage_smart_test: SSH path polls smartctl -a, stores raw output + parsed attributes - _stage_surface_validate: SSH path streams badblocks, counts bad blocks vs configurable threshold - _stage_final_check: SSH path checks smartctl attributes; DB fallback for mock mode - New DB helpers: _append_stage_log, _update_stage_bad_blocks, _store_smart_attrs, _store_smart_raw_output Database (database.py): - Migrations: burnin_stages.log_text, burnin_stages.bad_blocks, drives.smart_attrs (JSON), smart_tests.raw_output Settings (config.py + settings_store.py): - ssh_host, ssh_port, ssh_user, ssh_password, ssh_key — all runtime-editable - SSH section in Settings UI with Test SSH Connection button Webhook (notifier.py): - Added bad_blocks and timestamp fields to payload per SPEC Drive reset (routes.py + drives_table.html): - POST /api/v1/drives/{id}/reset — clears SMART state, smart_attrs; audit logged - Reset button visible on drives with completed test state (no active burn-in) Log drawer (app.js): - Burn-In tab: shows raw stage log_text (SSH output) with bad block highlighting - SMART tab: shows SMART attribute table with warn/fail colouring + raw smartctl output Polish: - Version badge (v1.0.0-6d) in header via Jinja2 global - Parallel burn-in warning when max_parallel_burnins > 8 in Settings - Stats page: avg duration by drive size + failure breakdown by stage - settings.html: SSH section with key textarea, parallel warn div Co-Authored-By: Claude Sonnet 4.6 --- app/burnin.py | 315 ++++++++++++++++++++- app/config.py | 10 +- app/database.py | 5 + app/notifier.py | 22 +- app/renderer.py | 6 +- app/routes.py | 169 +++++++++-- app/settings_store.py | 9 + app/ssh_client.py | 303 ++++++++++++++++++++ app/static/app.css | 122 ++++++++ app/static/app.js | 64 ++++- app/templates/components/drives_table.html | 10 + app/templates/layout.html | 1 + app/templates/settings.html | 88 +++++- app/templates/stats.html | 60 ++++ requirements.txt | 1 + 15 files changed, 1141 insertions(+), 44 deletions(-) create mode 100644 app/ssh_client.py diff --git a/app/burnin.py b/app/burnin.py index e01f934..d1cb87a 100644 --- a/app/burnin.py +++ b/app/burnin.py @@ -303,6 +303,16 @@ async def _run_job(job_id: int) -> None: ) job_row = await cur2.fetchone() if job_row: + # Get bad_blocks count from surface_validate stage if present + bad_blocks = 0 + async with _db() as db3: + cur3 = await db3.execute( + "SELECT bad_blocks FROM burnin_stages WHERE burnin_job_id=? AND stage_name='surface_validate'", + (job_id,) + ) + bb_row = await cur3.fetchone() + if bb_row and bb_row[0]: + bad_blocks = bb_row[0] asyncio.create_task(notifier.notify_job_complete( job_id=job_id, devname=devname, @@ -312,6 +322,7 @@ async def _run_job(job_id: int) -> None: profile=job_row["profile"], operator=job_row["operator"], error_text=error_text, + bad_blocks=bad_blocks, )) except Exception as exc: log.error("Failed to schedule notifications: %s", exc) @@ -352,15 +363,15 @@ async def _dispatch_stage(job_id: int, stage_name: str, devname: str, drive_id: if stage_name == "precheck": return await _stage_precheck(job_id, drive_id) elif stage_name == "short_smart": - return await _stage_smart_test(job_id, devname, "SHORT", "short_smart") + return await _stage_smart_test(job_id, devname, "SHORT", "short_smart", drive_id) elif stage_name == "long_smart": - return await _stage_smart_test(job_id, devname, "LONG", "long_smart") + return await _stage_smart_test(job_id, devname, "LONG", "long_smart", drive_id) elif stage_name == "surface_validate": - return await _stage_timed_simulate(job_id, "surface_validate", settings.surface_validate_seconds) + return await _stage_surface_validate(job_id, devname, drive_id) elif stage_name == "io_validate": return await _stage_timed_simulate(job_id, "io_validate", settings.io_validate_seconds) elif stage_name == "final_check": - return await _stage_final_check(job_id, devname) + return await _stage_final_check(job_id, devname, drive_id) return True @@ -393,8 +404,17 @@ async def _stage_precheck(job_id: int, drive_id: int) -> bool: return True -async def _stage_smart_test(job_id: int, devname: str, test_type: str, stage_name: str) -> bool: - """Start a TrueNAS SMART test and poll until complete.""" +async def _stage_smart_test(job_id: int, devname: str, test_type: str, stage_name: str, + drive_id: int | None = None) -> bool: + """Start a SMART test. Uses SSH if configured, TrueNAS REST API otherwise.""" + from app import ssh_client + if ssh_client.is_configured(): + return await _stage_smart_test_ssh(job_id, devname, test_type, stage_name, drive_id) + return await _stage_smart_test_api(job_id, devname, test_type, stage_name) + + +async def _stage_smart_test_api(job_id: int, devname: str, test_type: str, stage_name: str) -> bool: + """TrueNAS REST API path for SMART test (mock / dev mode).""" tn_job_id = await _client.start_smart_test([devname], test_type) while True: @@ -428,8 +448,215 @@ async def _stage_smart_test(job_id: int, devname: str, test_type: str, stage_nam await asyncio.sleep(POLL_INTERVAL) +async def _stage_smart_test_ssh(job_id: int, devname: str, test_type: str, stage_name: str, + drive_id: int | None) -> bool: + """SSH path for SMART test — runs smartctl directly on TrueNAS.""" + from app import ssh_client + + # Start the test + try: + startup = await ssh_client.start_smart_test(devname, test_type) + await _append_stage_log(job_id, stage_name, startup + "\n") + except Exception as exc: + await _set_stage_error(job_id, stage_name, f"Failed to start SMART test via SSH: {exc}") + return False + + # Brief pause to let the test register in smartctl output + await asyncio.sleep(3) + + # Poll until complete + while True: + if await _is_cancelled(job_id): + try: + await ssh_client.abort_smart_test(devname) + except Exception: + pass + return False + + await asyncio.sleep(POLL_INTERVAL) + + try: + progress = await ssh_client.poll_smart_progress(devname) + except Exception as exc: + log.warning("SSH SMART poll failed: %s", exc, extra={"job_id": job_id}) + await _append_stage_log(job_id, stage_name, f"[poll error] {exc}\n") + continue + + await _append_stage_log(job_id, stage_name, progress["output"] + "\n---\n") + + if progress["state"] == "running": + pct = max(0, 100 - progress["percent_remaining"]) + await _update_stage_percent(job_id, stage_name, pct) + await _recalculate_progress(job_id) + _push_update() + + elif progress["state"] == "passed": + await _update_stage_percent(job_id, stage_name, 100) + # Run attribute check + if drive_id is not None: + try: + attrs = await ssh_client.get_smart_attributes(devname) + await _store_smart_attrs(drive_id, attrs) + await _store_smart_raw_output(drive_id, test_type, attrs["raw_output"]) + if attrs["failures"]: + error = "SMART attribute failures: " + "; ".join(attrs["failures"]) + await _set_stage_error(job_id, stage_name, error) + return False + if attrs["warnings"]: + await _append_stage_log( + job_id, stage_name, + "[WARNING] " + "; ".join(attrs["warnings"]) + "\n" + ) + except Exception as exc: + log.warning("Failed to retrieve SMART attributes: %s", exc) + await _recalculate_progress(job_id) + _push_update() + return True + + elif progress["state"] == "failed": + await _set_stage_error(job_id, stage_name, f"SMART {test_type} test failed") + return False + # "unknown" → keep polling + + +async def _stage_surface_validate(job_id: int, devname: str, drive_id: int) -> bool: + """ + Surface validation stage. + SSH mode: runs badblocks -wsv -b 4096 -p 1 /dev/{devname}. + Mock mode: simulated timed progress (no real I/O). + """ + from app import ssh_client + if ssh_client.is_configured(): + return await _stage_surface_validate_ssh(job_id, devname, drive_id) + return await _stage_timed_simulate(job_id, "surface_validate", settings.surface_validate_seconds) + + +async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int) -> bool: + """Run badblocks over SSH, streaming output to stage log.""" + from app import ssh_client + + await _append_stage_log( + job_id, "surface_validate", + f"[START] badblocks -wsv -b 4096 -p 1 /dev/{devname}\n" + f"[NOTE] This is a DESTRUCTIVE write test. All data on /dev/{devname} will be overwritten.\n\n" + ) + + def _is_cancelled_sync() -> bool: + # Synchronous version — we check the DB state flag set by cancel_job() + import asyncio + loop = asyncio.get_event_loop() + try: + return loop.run_until_complete(_is_cancelled(job_id)) + except Exception: + return False + + last_logged_pct = [-1] + + def on_progress(pct: int, bad_blocks: int, line: str) -> None: + nonlocal last_logged_pct + # Write to log (fire-and-forget via asyncio.create_task from sync context) + # The log append is done in the async flush below + pass + + accumulated_lines: list[str] = [] + + async def on_progress_async(pct: int, bad_blocks: int, line: str) -> None: + accumulated_lines.append(line) + # Flush to DB and update progress every ~25 lines to avoid excessive DB writes + if len(accumulated_lines) % 25 == 0: + await _append_stage_log(job_id, "surface_validate", "".join(accumulated_lines[-25:])) + await _update_stage_bad_blocks(job_id, "surface_validate", bad_blocks) + await _update_stage_percent(job_id, "surface_validate", pct) + await _recalculate_progress(job_id) + _push_update() + if await _is_cancelled(job_id): + raise asyncio.CancelledError + + # Run badblocks — we adapt the callback pattern to async by collecting then flushing + result = {"bad_blocks": 0, "output": "", "aborted": False} + try: + # The actual streaming; we handle progress via the accumulated_lines pattern + bad_blocks_total = 0 + output_lines: list[str] = [] + + async with await ssh_client._connect() as conn: + cmd = f"badblocks -wsv -b 4096 -p 1 /dev/{devname}" + async with conn.create_process(cmd) as proc: + import re as _re + + async def _drain(stream, is_stderr: bool): + nonlocal bad_blocks_total + async for raw in stream: + line = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace") + output_lines.append(line) + + if is_stderr: + m = _re.search(r"([\d.]+)%\s+done", line) + if m: + pct = min(99, int(float(m.group(1)))) + await _update_stage_percent(job_id, "surface_validate", pct) + await _update_stage_bad_blocks(job_id, "surface_validate", bad_blocks_total) + await _recalculate_progress(job_id) + _push_update() + else: + stripped = line.strip() + if stripped and stripped.isdigit(): + bad_blocks_total += 1 + + # Append to DB log in chunks + if len(output_lines) % 20 == 0: + chunk = "".join(output_lines[-20:]) + await _append_stage_log(job_id, "surface_validate", chunk) + + # Abort on bad block threshold + if bad_blocks_total > settings.bad_block_threshold: + proc.kill() + output_lines.append( + f"\n[ABORTED] {bad_blocks_total} bad block(s) exceeded " + f"threshold ({settings.bad_block_threshold})\n" + ) + return + + if await _is_cancelled(job_id): + proc.kill() + return + + await asyncio.gather( + _drain(proc.stdout, False), + _drain(proc.stderr, True), + return_exceptions=True, + ) + await proc.wait() + + # Flush remaining output + remainder = "".join(output_lines) + await _append_stage_log(job_id, "surface_validate", remainder) + result["bad_blocks"] = bad_blocks_total + result["output"] = remainder + result["aborted"] = bad_blocks_total > settings.bad_block_threshold + + except asyncio.CancelledError: + return False + except Exception as exc: + await _append_stage_log(job_id, "surface_validate", f"\n[SSH error] {exc}\n") + await _set_stage_error(job_id, "surface_validate", f"SSH badblocks error: {exc}") + return False + + await _update_stage_bad_blocks(job_id, "surface_validate", result["bad_blocks"]) + + if result["aborted"] or result["bad_blocks"] > settings.bad_block_threshold: + await _set_stage_error( + job_id, "surface_validate", + f"Surface validate FAILED: {result['bad_blocks']} bad block(s) found " + f"(threshold: {settings.bad_block_threshold})" + ) + return False + + return True + + async def _stage_timed_simulate(job_id: int, stage_name: str, duration_seconds: int) -> bool: - """Simulate a timed stage (surface validation / IO validation) with progress updates.""" + """Simulate a timed stage with progress updates (mock / dev mode).""" start = time.monotonic() while True: @@ -449,9 +676,28 @@ async def _stage_timed_simulate(job_id: int, stage_name: str, duration_seconds: await asyncio.sleep(POLL_INTERVAL) -async def _stage_final_check(job_id: int, devname: str) -> bool: - """Verify drive passed all tests by checking current SMART health in DB.""" +async def _stage_final_check(job_id: int, devname: str, drive_id: int | None = None) -> bool: + """ + Verify drive passed all tests. + SSH mode: run smartctl -a and check critical attributes. + Mock mode: check SMART health field in DB. + """ await asyncio.sleep(1) + from app import ssh_client + if ssh_client.is_configured() and drive_id is not None: + try: + attrs = await ssh_client.get_smart_attributes(devname) + await _store_smart_attrs(drive_id, attrs) + if attrs["health"] == "FAILED" or attrs["failures"]: + failures = attrs["failures"] or [f"SMART health: {attrs['health']}"] + await _set_stage_error(job_id, "final_check", + "Final check failed: " + "; ".join(failures)) + return False + return True + except Exception as exc: + log.warning("SSH final_check failed, falling back to DB check: %s", exc) + + # DB check (mock mode fallback) async with _db() as db: cur = await db.execute( "SELECT smart_health FROM drives WHERE devname=?", (devname,) @@ -549,6 +795,57 @@ async def _cancel_stage(job_id: int, stage_name: str) -> None: await db.commit() +async def _append_stage_log(job_id: int, stage_name: str, text: str) -> None: + """Append text to the log_text column of a burnin_stages row.""" + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute( + """UPDATE burnin_stages + SET log_text = COALESCE(log_text, '') || ? + WHERE burnin_job_id=? AND stage_name=?""", + (text, job_id, stage_name), + ) + await db.commit() + + +async def _update_stage_bad_blocks(job_id: int, stage_name: str, count: int) -> None: + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute( + "UPDATE burnin_stages SET bad_blocks=? WHERE burnin_job_id=? AND stage_name=?", + (count, job_id, stage_name), + ) + await db.commit() + + +async def _store_smart_attrs(drive_id: int, attrs: dict) -> None: + """Persist latest SMART attribute dict to drives.smart_attrs (JSON).""" + import json + # Convert int keys to str for JSON serialisation + serialisable = {str(k): v for k, v in attrs.get("attributes", {}).items()} + blob = json.dumps({ + "health": attrs.get("health", "UNKNOWN"), + "attrs": serialisable, + "warnings": attrs.get("warnings", []), + "failures": attrs.get("failures", []), + }) + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("UPDATE drives SET smart_attrs=? WHERE id=?", (blob, drive_id)) + await db.commit() + + +async def _store_smart_raw_output(drive_id: int, test_type: str, raw: str) -> None: + """Store raw smartctl output in smart_tests.raw_output.""" + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute( + "UPDATE smart_tests SET raw_output=? WHERE drive_id=? AND test_type=?", + (raw, drive_id, test_type.lower()), + ) + await db.commit() + + async def _set_stage_error(job_id: int, stage_name: str, error_text: str) -> None: async with _db() as db: await db.execute("PRAGMA journal_mode=WAL") diff --git a/app/config.py b/app/config.py index 6d432e1..5c9b10a 100644 --- a/app/config.py +++ b/app/config.py @@ -56,9 +56,17 @@ class Settings(BaseSettings): temp_crit_c: int = 55 # red critical (precheck refuses to start above this) # Bad-block tolerance — surface_validate fails if bad blocks exceed this - # (applies to real badblocks in Stage 7; ignored by mock simulation) bad_block_threshold: int = 0 + # SSH credentials for direct TrueNAS command execution (Stage 7) + # When ssh_host is set, burn-in stages use SSH for smartctl/badblocks instead of REST API. + # Leave ssh_host empty to use the mock/REST API (development mode). + ssh_host: str = "" + ssh_port: int = 22 + ssh_user: str = "root" # TrueNAS CORE default is root + ssh_password: str = "" # Password auth (leave blank if using key) + ssh_key: str = "" # PEM private key content (paste full key including headers) + # Application version — used by the /api/v1/updates/check endpoint app_version: str = "1.0.0-6d" diff --git a/app/database.py b/app/database.py index 729f766..bfb2893 100644 --- a/app/database.py +++ b/app/database.py @@ -82,6 +82,11 @@ CREATE INDEX IF NOT EXISTS idx_audit_events_job ON audit_events(burnin_job_id) _MIGRATIONS = [ "ALTER TABLE drives ADD COLUMN notes TEXT", "ALTER TABLE drives ADD COLUMN location TEXT", + # Stage 7: SSH command output + SMART attribute storage + "ALTER TABLE burnin_stages ADD COLUMN log_text TEXT", + "ALTER TABLE burnin_stages ADD COLUMN bad_blocks INTEGER DEFAULT 0", + "ALTER TABLE drives ADD COLUMN smart_attrs TEXT", + "ALTER TABLE smart_tests ADD COLUMN raw_output TEXT", ] diff --git a/app/notifier.py b/app/notifier.py index d830da2..d0c4184 100644 --- a/app/notifier.py +++ b/app/notifier.py @@ -23,21 +23,25 @@ async def notify_job_complete( profile: str, operator: str, error_text: str | None, + bad_blocks: int = 0, ) -> None: """Fire all configured notifications for a completed burn-in job.""" + from datetime import datetime, timezone tasks = [] if settings.webhook_url: tasks.append(_send_webhook({ - "event": f"burnin_{state}", - "job_id": job_id, - "devname": devname, - "serial": serial, - "model": model, - "state": state, - "profile": profile, - "operator": operator, - "error_text": error_text, + "event": f"burnin_{state}", + "job_id": job_id, + "devname": devname, + "serial": serial, + "model": model, + "state": state, + "profile": profile, + "operator": operator, + "error_text": error_text, + "bad_blocks": bad_blocks, + "timestamp": datetime.now(timezone.utc).isoformat(), })) if settings.smtp_host: diff --git a/app/renderer.py b/app/renderer.py index e205028..b9ece47 100644 --- a/app/renderer.py +++ b/app/renderer.py @@ -126,7 +126,7 @@ def _format_elapsed(iso: str | None) -> str: return "" -# Register +# Register filters templates.env.filters["format_bytes"] = _format_bytes templates.env.filters["format_eta"] = _format_eta templates.env.filters["temp_class"] = _temp_class @@ -135,3 +135,7 @@ templates.env.filters["format_dt_full"] = _format_dt_full templates.env.filters["format_duration"] = _format_duration templates.env.filters["format_elapsed"] = _format_elapsed templates.env.globals["drive_status"] = _drive_status + + +from app.config import settings as _settings +templates.env.globals["app_version"] = _settings.app_version diff --git a/app/routes.py b/app/routes.py index cd2e23e..6186e3b 100644 --- a/app/routes.py +++ b/app/routes.py @@ -258,7 +258,7 @@ async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db) raise HTTPException(status_code=404, detail="Drive not found") drive = _row_to_drive(row) - # Latest burn-in job + its stages + # Latest burn-in job + its stages (include log_text and bad_blocks) cur = await db.execute( "SELECT * FROM burnin_jobs WHERE drive_id=? ORDER BY id DESC LIMIT 1", (drive_id,), @@ -268,12 +268,33 @@ async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db) if job_row: job = dict(job_row) cur = await db.execute( - "SELECT * FROM burnin_stages WHERE burnin_job_id=? ORDER BY id", + "SELECT id, stage_name, state, percent, started_at, finished_at, " + "duration_seconds, error_text, log_text, bad_blocks " + "FROM burnin_stages WHERE burnin_job_id=? ORDER BY id", (job_row["id"],), ) job["stages"] = [dict(r) for r in await cur.fetchall()] burnin = job + # SMART raw output from smart_tests table + cur = await db.execute( + "SELECT test_type, state, percent, started_at, finished_at, error_text, raw_output " + "FROM smart_tests WHERE drive_id=?", + (drive_id,), + ) + smart_rows = {r["test_type"]: dict(r) for r in await cur.fetchall()} + + # Cached SMART attributes (JSON blob on drives table) + import json as _json + smart_attrs = None + cur = await db.execute("SELECT smart_attrs FROM drives WHERE id=?", (drive_id,)) + attrs_row = await cur.fetchone() + if attrs_row and attrs_row["smart_attrs"]: + try: + smart_attrs = _json.loads(attrs_row["smart_attrs"]) + except Exception: + pass + # Last 50 audit events for this drive (newest first) cur = await db.execute(""" SELECT id, event_type, operator, message, created_at @@ -284,20 +305,28 @@ async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db) """, (drive_id,)) events = [dict(r) for r in await cur.fetchall()] + def _smart_card(test_type: str) -> dict: + smart_obj = drive.smart_short if test_type == "short" else drive.smart_long + base = smart_obj.model_dump() if smart_obj else {} + row = smart_rows.get(test_type, {}) + base["raw_output"] = row.get("raw_output") + return base + return { "drive": { - "id": drive.id, - "devname": drive.devname, - "serial": drive.serial, - "model": drive.model, + "id": drive.id, + "devname": drive.devname, + "serial": drive.serial, + "model": drive.model, "size_bytes": drive.size_bytes, }, - "burnin": burnin, + "burnin": burnin, "smart": { - "short": drive.smart_short.model_dump() if drive.smart_short else None, - "long": drive.smart_long.model_dump() if drive.smart_long else None, + "short": _smart_card("short"), + "long": _smart_card("long"), + "attrs": smart_attrs, }, - "events": events, + "events": events, } @@ -672,6 +701,53 @@ async def update_drive( return {"updated": True} +@router.post("/api/v1/drives/{drive_id}/reset") +async def reset_drive( + drive_id: int, + body: dict, + db: aiosqlite.Connection = Depends(get_db), +): + """ + Clear SMART test results for a drive so it shows as fresh. + Only allowed when no burn-in job is active (queued or running). + Preserves all job history — just resets the display state. + """ + cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,)) + if not await cur.fetchone(): + raise HTTPException(status_code=404, detail="Drive not found") + + # Reject if any active burn-in + cur = await db.execute( + "SELECT COUNT(*) FROM burnin_jobs WHERE drive_id=? AND state IN ('queued','running')", + (drive_id,), + ) + if (await cur.fetchone())[0] > 0: + raise HTTPException(status_code=409, detail="Cannot reset while a burn-in is active") + + operator = body.get("operator", "operator") + + # Reset SMART test state to idle + await db.execute( + """UPDATE smart_tests SET state='idle', percent=0, started_at=NULL, + eta_at=NULL, finished_at=NULL, error_text=NULL, raw_output=NULL + WHERE drive_id=?""", + (drive_id,), + ) + # Clear cached SMART attributes + await db.execute("UPDATE drives SET smart_attrs=NULL WHERE id=?", (drive_id,)) + + # Audit event + await db.execute( + """INSERT INTO audit_events (event_type, drive_id, operator, message) + VALUES (?,?,?,?)""", + ("drive_reset", drive_id, operator, "Drive reset — SMART state cleared"), + ) + await db.commit() + + poller._notify_subscribers() + return {"reset": True} + + # --------------------------------------------------------------------------- # Audit log page # --------------------------------------------------------------------------- @@ -766,18 +842,50 @@ async def stats_page( """) by_day = [dict(r) for r in await cur.fetchall()] + # Average test duration by drive size (rounded to nearest TB) + cur = await db.execute(""" + SELECT + CAST(ROUND(CAST(d.size_bytes AS REAL) / 1e12) AS INTEGER) AS size_tb, + COUNT(*) AS total, + ROUND(AVG( + (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400 / 3600.0 + ), 1) AS avg_hours + FROM burnin_jobs bj + JOIN drives d ON d.id = bj.drive_id + WHERE bj.state IN ('passed', 'failed') + AND bj.started_at IS NOT NULL + AND bj.finished_at IS NOT NULL + GROUP BY size_tb + ORDER BY size_tb + """) + by_size = [dict(r) for r in await cur.fetchall()] + + # Failure breakdown by stage (which stage caused the failure) + cur = await db.execute(""" + SELECT + COALESCE(bj.stage_name, 'unknown') AS failed_stage, + COUNT(*) AS count + FROM burnin_jobs bj + WHERE bj.state = 'failed' + GROUP BY failed_stage + ORDER BY count DESC + """) + by_failure_stage = [dict(r) for r in await cur.fetchall()] + # Drives tracked cur = await db.execute("SELECT COUNT(*) FROM drives") drives_total = (await cur.fetchone())[0] ps = poller.get_state() return templates.TemplateResponse("stats.html", { - "request": request, - "overall": overall, - "by_model": by_model, - "by_day": by_day, - "drives_total": drives_total, - "poller": ps, + "request": request, + "overall": overall, + "by_model": by_model, + "by_day": by_day, + "by_size": by_size, + "by_failure_stage": by_failure_stage, + "drives_total": drives_total, + "poller": ps, **_stale_context(ps), }) @@ -813,6 +921,11 @@ async def settings_page( "temp_warn_c": settings.temp_warn_c, "temp_crit_c": settings.temp_crit_c, "bad_block_threshold": settings.bad_block_threshold, + # SSH credentials (take effect immediately — each SSH call reads live settings) + "ssh_host": settings.ssh_host, + "ssh_port": settings.ssh_port, + "ssh_user": settings.ssh_user, + # Note: ssh_password and ssh_key intentionally omitted from display (sensitive) # System settings (restart required to fully apply) "truenas_base_url": settings.truenas_base_url, "truenas_verify_tls": settings.truenas_verify_tls, @@ -823,13 +936,15 @@ async def settings_page( # Note: truenas_api_key intentionally omitted from display (sensitive) } + from app import ssh_client as _ssh ps = poller.get_state() return templates.TemplateResponse("settings.html", { - "request": request, - "editable": editable, - "smtp_enabled": bool(settings.smtp_host), - "app_version": settings.app_version, - "poller": ps, + "request": request, + "editable": editable, + "smtp_enabled": bool(settings.smtp_host), + "ssh_configured": _ssh.is_configured(), + "app_version": settings.app_version, + "poller": ps, **_stale_context(ps), }) @@ -838,7 +953,7 @@ async def settings_page( async def save_settings(body: dict): """Save editable runtime settings. Secrets are only updated if non-empty.""" # Don't overwrite secrets if client sent empty string - for secret_field in ("smtp_password", "truenas_api_key"): + for secret_field in ("smtp_password", "truenas_api_key", "ssh_password", "ssh_key"): if secret_field in body and body[secret_field] == "": del body[secret_field] @@ -859,6 +974,16 @@ async def test_smtp(): return {"ok": True} +@router.post("/api/v1/settings/test-ssh") +async def test_ssh(): + """Test the current SSH configuration.""" + from app import ssh_client + result = await ssh_client.test_connection() + if not result["ok"]: + raise HTTPException(status_code=502, detail=result.get("error", "Connection failed")) + return {"ok": True} + + @router.get("/api/v1/updates/check") async def check_updates(): """Check for a newer release on Forgejo.""" diff --git a/app/settings_store.py b/app/settings_store.py index 84d50b1..ace86df 100644 --- a/app/settings_store.py +++ b/app/settings_store.py @@ -38,6 +38,12 @@ _EDITABLE: dict[str, type] = { "temp_warn_c": int, "temp_crit_c": int, "bad_block_threshold": int, + # SSH credentials — take effect immediately (each connection reads live settings) + "ssh_host": str, + "ssh_port": int, + "ssh_user": str, + "ssh_password": str, + "ssh_key": str, # System settings — saved to JSON; require container restart to fully apply "truenas_base_url": str, "truenas_api_key": str, @@ -90,6 +96,9 @@ def _apply(data: dict) -> None: if key == "bad_block_threshold" and int(val) < 0: log.warning("settings_store: bad_block_threshold must be >= 0 — ignoring") continue + if key == "ssh_port" and not (1 <= int(val) <= 65535): + log.warning("settings_store: ssh_port out of range — ignoring") + continue setattr(settings, key, val) except (ValueError, TypeError) as exc: log.warning("settings_store: invalid value for %s: %s", key, exc) diff --git a/app/ssh_client.py b/app/ssh_client.py new file mode 100644 index 0000000..183c650 --- /dev/null +++ b/app/ssh_client.py @@ -0,0 +1,303 @@ +""" +SSH client for direct TrueNAS command execution (Stage 7). + +When ssh_host is configured, burn-in stages use SSH to run smartctl and +badblocks directly on the TrueNAS host instead of going through the REST API. +Falls back to REST API / simulation when SSH is not configured (dev/mock mode). + +TrueNAS CORE (FreeBSD) device paths: /dev/ada0, /dev/da0, etc. +TrueNAS SCALE (Linux) device paths: /dev/sda, /dev/sdb, etc. +The devname from the TrueNAS API is used as-is in /dev/{devname}. +""" + +import asyncio +import logging +import re +from typing import Callable + +log = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Monitored SMART attributes +# True → any non-zero raw value is a hard failure (drive rejected) +# False → non-zero is a warning (flagged but test continues) +# --------------------------------------------------------------------------- + +SMART_ATTRS: dict[int, tuple[str, bool]] = { + 5: ("Reallocated_Sector_Ct", True), # reallocation = FAIL + 10: ("Spin_Retry_Count", False), # mechanical stress = WARN + 188: ("Command_Timeout", False), # drive not responding = WARN + 197: ("Current_Pending_Sector", True), # pending reallocation = FAIL + 198: ("Offline_Uncorrectable", True), # unrecoverable read error = FAIL + 199: ("UDMA_CRC_Error_Count", False), # cable/controller issue = WARN +} + + +# --------------------------------------------------------------------------- +# Configuration check +# --------------------------------------------------------------------------- + +def is_configured() -> bool: + """Returns True when SSH credentials are present and usable.""" + from app.config import settings + return bool(settings.ssh_host and (settings.ssh_password or settings.ssh_key)) + + +# --------------------------------------------------------------------------- +# Low-level connection +# --------------------------------------------------------------------------- + +async def _connect(): + """Open a single-use SSH connection. Caller must use `async with`.""" + import asyncssh + from app.config import settings + + kwargs: dict = { + "host": settings.ssh_host, + "port": settings.ssh_port, + "username": settings.ssh_user, + "known_hosts": None, # trust all hosts (same spirit as TRUENAS_VERIFY_TLS=false) + } + if settings.ssh_key: + kwargs["client_keys"] = [asyncssh.import_private_key(settings.ssh_key)] + if settings.ssh_password: + kwargs["password"] = settings.ssh_password + + return asyncssh.connect(**kwargs) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +async def test_connection() -> dict: + """Test SSH connectivity. Returns {"ok": True} or {"ok": False, "error": str}.""" + if not is_configured(): + return {"ok": False, "error": "SSH not configured (ssh_host is empty)"} + try: + async with await _connect() as conn: + result = await conn.run("echo ok", check=False) + if "ok" in result.stdout: + return {"ok": True} + return {"ok": False, "error": result.stderr.strip() or "unexpected output"} + except Exception as exc: + return {"ok": False, "error": str(exc)} + + +async def get_smart_attributes(devname: str) -> dict: + """ + Run `smartctl -a /dev/{devname}` and parse the output. + Returns: + health: str — "PASSED" | "FAILED" | "UNKNOWN" + raw_output: str — full smartctl output + attributes: dict[int, {"name": str, "raw": int}] + warnings: list[str] — attribute names with non-zero raw (non-critical) + failures: list[str] — attribute names with non-zero raw (critical) + """ + cmd = f"smartctl -a /dev/{devname}" + try: + async with await _connect() as conn: + result = await conn.run(cmd, check=False) + output = result.stdout + result.stderr + return _parse_smartctl(output) + except Exception as exc: + return { + "health": "UNKNOWN", + "raw_output": str(exc), + "attributes": {}, + "warnings": [], + "failures": [f"SSH error: {exc}"], + } + + +async def start_smart_test(devname: str, test_type: str) -> str: + """ + Run `smartctl -t short|long /dev/{devname}`. + Returns raw output. Raises RuntimeError on unrecoverable failure. + test_type: "SHORT" or "LONG" + """ + arg = "short" if test_type.upper() == "SHORT" else "long" + cmd = f"smartctl -t {arg} /dev/{devname}" + async with await _connect() as conn: + result = await conn.run(cmd, check=False) + output = result.stdout + result.stderr + # smartctl exits 0 or 4 when the test is successfully started on most drives + started = ("Testing has begun" in output or + "test has begun" in output.lower() or + result.returncode in (0, 4)) + if not started: + raise RuntimeError(f"smartctl returned exit {result.returncode}: {output[:400]}") + return output + + +async def poll_smart_progress(devname: str) -> dict: + """ + Run `smartctl -a /dev/{devname}` and extract self-test status. + Returns: + state: "running" | "passed" | "failed" | "unknown" + percent_remaining: int (0 = complete when state != "running") + output: str + """ + cmd = f"smartctl -a /dev/{devname}" + async with await _connect() as conn: + result = await conn.run(cmd, check=False) + output = result.stdout + result.stderr + return _parse_smart_progress(output) + + +async def abort_smart_test(devname: str) -> None: + """Send `smartctl -X /dev/{devname}` to abort an in-progress test.""" + cmd = f"smartctl -X /dev/{devname}" + async with await _connect() as conn: + await conn.run(cmd, check=False) + + +async def run_badblocks( + devname: str, + on_progress: Callable[[int, int, str], None], + cancelled_fn: Callable[[], bool] | None = None, +) -> dict: + """ + Run `badblocks -wsv -b 4096 -p 1 /dev/{devname}` and stream output. + + on_progress(percent, bad_blocks, line) is called for each line of output. + cancelled_fn() is polled to support mid-test cancellation. + + Returns: {"bad_blocks": int, "output": str, "aborted": bool} + """ + from app.config import settings + cmd = f"badblocks -wsv -b 4096 -p 1 /dev/{devname}" + lines: list[str] = [] + bad_blocks = 0 + aborted = False + last_pct = 0 + + try: + async with await _connect() as conn: + async with conn.create_process(cmd) as proc: + # badblocks writes progress to stderr, bad block numbers to stdout + async def _read_stream(stream, is_stderr: bool): + nonlocal bad_blocks, last_pct, aborted + async for raw_line in stream: + line = raw_line if isinstance(raw_line, str) else raw_line.decode("utf-8", errors="replace") + lines.append(line) + + if is_stderr: + m = re.search(r"([\d.]+)%\s+done", line) + if m: + last_pct = min(99, int(float(m.group(1)))) + else: + # Each non-empty stdout line during badblocks is a bad block number + stripped = line.strip() + if stripped and stripped.isdigit(): + bad_blocks += 1 + + on_progress(last_pct, bad_blocks, line) + + # Abort if threshold exceeded + if bad_blocks > settings.bad_block_threshold: + aborted = True + proc.kill() + lines.append( + f"\n[ABORTED] Bad block count ({bad_blocks}) exceeded " + f"threshold ({settings.bad_block_threshold})\n" + ) + return + + # Abort on cancellation + if cancelled_fn and cancelled_fn(): + aborted = True + proc.kill() + return + + stdout_task = asyncio.create_task(_read_stream(proc.stdout, False)) + stderr_task = asyncio.create_task(_read_stream(proc.stderr, True)) + await asyncio.gather(stdout_task, stderr_task, return_exceptions=True) + await proc.wait() + + except Exception as exc: + lines.append(f"\n[SSH error] {exc}\n") + + if not aborted: + last_pct = 100 + + return { + "bad_blocks": bad_blocks, + "output": "".join(lines), + "aborted": aborted, + } + + +# --------------------------------------------------------------------------- +# Parsers +# --------------------------------------------------------------------------- + +def _parse_smartctl(output: str) -> dict: + health = "UNKNOWN" + attributes: dict[int, dict] = {} + warnings: list[str] = [] + failures: list[str] = [] + + m = re.search(r"self-assessment test result:\s+(\w+)", output, re.IGNORECASE) + if m: + health = m.group(1).upper() + + # Attribute table: ID# NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE + for line in output.splitlines(): + am = re.match( + r"\s*(\d+)\s+(\S+)\s+\S+\s+\d+\s+\d+\s+\d+\s+\S+\s+\S+\s+\S+\s+(\d+)", + line, + ) + if not am: + continue + attr_id = int(am.group(1)) + attr_name = am.group(2) + raw_val = int(am.group(3)) + attributes[attr_id] = {"name": attr_name, "raw": raw_val} + + if attr_id in SMART_ATTRS: + _, is_critical = SMART_ATTRS[attr_id] + if raw_val > 0: + msg = f"{attr_name} = {raw_val}" + if is_critical: + failures.append(msg) + else: + warnings.append(msg) + + return { + "health": health, + "raw_output": output, + "attributes": attributes, + "warnings": warnings, + "failures": failures, + } + + +def _parse_smart_progress(output: str) -> dict: + state = "unknown" + percent_remaining = 0 + + lower = output.lower() + + if "self-test routine in progress" in lower or "self-test routine in progress" in output: + state = "running" + m = re.search(r"(\d+)%\s+of\s+test\s+remaining", output, re.IGNORECASE) + if m: + percent_remaining = int(m.group(1)) + elif "completed without error" in lower: + state = "passed" + elif ( + "completed: read failure" in lower + or "completed: write failure" in lower + or "aborted by host" in lower + or ("completed" in lower and "failure" in lower) + ): + state = "failed" + elif "in progress" in lower: + state = "running" + + return { + "state": state, + "percent_remaining": percent_remaining, + "output": output, + } diff --git a/app/static/app.css b/app/static/app.css index b85cade..0a3d042 100644 --- a/app/static/app.css +++ b/app/static/app.css @@ -2283,3 +2283,125 @@ tr.drawer-row-active { .drawer-smart-grid { grid-template-columns: 1fr; } .drawer-drive-meta { display: none; } } + +/* ----------------------------------------------------------------------- + Stage raw log output (SSH mode) +----------------------------------------------------------------------- */ +.stage-log { + font-family: "SF Mono", "Consolas", "Monaco", monospace; + font-size: 11px; + line-height: 1.5; + color: var(--text-muted); + background: var(--bg); + border-left: 2px solid var(--border); + margin: 6px 0 2px 28px; + padding: 6px 10px; + white-space: pre-wrap; + word-break: break-all; + max-height: 200px; + overflow-y: auto; +} +.stage-log .log-bad-block { + color: var(--red); + font-weight: 600; +} +.stage-log .log-warn { + color: var(--yellow); +} + +/* ----------------------------------------------------------------------- + SMART attributes table in drawer +----------------------------------------------------------------------- */ +.smart-attrs { + margin-top: 12px; + border-top: 1px solid var(--border); + padding-top: 10px; +} +.smart-attrs-title { + font-size: 11px; + font-weight: 600; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: .05em; + margin-bottom: 6px; +} +.smart-attr-row { + display: flex; + justify-content: space-between; + align-items: center; + padding: 3px 0; + font-size: 12px; + border-bottom: 1px solid color-mix(in srgb, var(--border) 50%, transparent); +} +.smart-attr-row:last-child { border-bottom: none; } +.smart-attr-name { color: var(--text-muted); } +.smart-attr-val { font-family: "SF Mono", monospace; font-size: 12px; } +.smart-attr-val.attr-ok { color: var(--green); } +.smart-attr-val.attr-warn { color: var(--yellow); font-weight: 600; } +.smart-attr-val.attr-fail { color: var(--red); font-weight: 600; } +.smart-attr-raw-output { + font-family: "SF Mono", "Consolas", monospace; + font-size: 10.5px; + line-height: 1.45; + color: var(--text-muted); + background: var(--bg); + border: 1px solid var(--border); + border-radius: 4px; + padding: 8px 10px; + margin-top: 10px; + white-space: pre; + overflow: auto; + max-height: 240px; +} + +/* ----------------------------------------------------------------------- + Reset button +----------------------------------------------------------------------- */ +.btn-reset { + background: transparent; + border: 1px solid color-mix(in srgb, var(--text-muted) 40%, transparent); + color: var(--text-muted); + border-radius: 5px; + padding: 3px 8px; + font-size: 12px; + cursor: pointer; + transition: border-color .15s, color .15s; +} +.btn-reset:hover { + border-color: var(--yellow); + color: var(--yellow); +} + +/* ----------------------------------------------------------------------- + Parallel burn-in inline warning +----------------------------------------------------------------------- */ +.sf-inline-warn { + background: color-mix(in srgb, var(--yellow) 12%, transparent); + border: 1px solid color-mix(in srgb, var(--yellow) 40%, transparent); + border-radius: 5px; + color: var(--yellow); + font-size: 12px; + padding: 7px 10px; + margin: 4px 0 8px 0; +} + +/* ----------------------------------------------------------------------- + SSH textarea +----------------------------------------------------------------------- */ +.sf-textarea { + resize: vertical; + min-height: 90px; + font-family: "SF Mono", "Consolas", monospace; + font-size: 11px; +} + +/* ----------------------------------------------------------------------- + Version badge in header +----------------------------------------------------------------------- */ +.header-version { + font-size: 11px; + color: var(--text-muted); + opacity: .6; + padding: 0 2px; + font-variant-numeric: tabular-nums; +} diff --git a/app/static/app.js b/app/static/app.js index 3d5a921..8b80216 100644 --- a/app/static/app.js +++ b/app/static/app.js @@ -957,8 +957,18 @@ if (s.error_text) { html += '
' + _esc(s.error_text) + '
'; } + // Raw SSH log output (if available) + if (s.log_text) { + var logHtml = _esc(s.log_text) + .replace(/^(\d+)\s*$/gm, '$1 ← BAD BLOCK') + .replace(/\[WARNING\][^\n]*/g, '$&'); + html += '
' + logHtml + '
'; + } + // Bad block count badge + if (s.bad_blocks && s.bad_blocks > 0) { + html += '
' + s.bad_blocks + ' bad block(s) found
'; + } html += ''; - }); } else { html += '
No stage data yet.
'; } @@ -973,6 +983,10 @@ } } + // Monitored SMART attributes for inline colouring + var _SMART_CRITICAL = {5: true, 197: true, 198: true}; + var _SMART_WARN = {10: true, 188: true, 199: true}; + function _drawerRenderSmart(smart) { var panel = document.getElementById('drawer-panel-smart'); if (!panel) return; @@ -994,10 +1008,41 @@ if (t.started_at) html += '
Started: ' + _drawerFmtDt(t.started_at) + '
'; if (t.finished_at) html += '
Finished: ' + _drawerFmtDt(t.finished_at) + '
'; if (t.error_text) html += '
' + _esc(t.error_text) + '
'; + // Raw smartctl output (SSH mode) + if (t.raw_output) { + html += '
' + _esc(t.raw_output) + '
'; + } } html += ''; }); html += ''; + + // SMART attribute table (from SSH attribute parse) + var attrs = smart && smart.attrs; + if (attrs) { + html += '
'; + html += '
SMART Attributes
'; + if (attrs.failures && attrs.failures.length) { + html += '
✕ Failures: ' + _esc(attrs.failures.join('; ')) + '
'; + } + if (attrs.warnings && attrs.warnings.length) { + html += '
⚠ Warnings: ' + _esc(attrs.warnings.join('; ')) + '
'; + } + var attrMap = attrs.attrs || {}; + var monitoredIds = [5, 10, 188, 197, 198, 199]; + monitoredIds.forEach(function (id) { + var entry = attrMap[String(id)]; + if (!entry) return; + var raw = entry.raw; + var cls = raw > 0 ? (_SMART_CRITICAL[id] ? 'attr-fail' : 'attr-warn') : 'attr-ok'; + html += '
'; + html += '' + id + ' ' + _esc(entry.name) + ''; + html += '' + raw + ''; + html += '
'; + }); + html += '
'; + } + panel.innerHTML = html; } @@ -1078,4 +1123,21 @@ if (e.target.closest('#drawer-close-btn')) closeDrawer(); }); + // Reset button — clears SMART state for a drive + document.addEventListener('click', function (e) { + var btn = e.target.closest('.btn-reset'); + if (!btn) return; + var driveId = btn.dataset.driveId; + if (!driveId) return; + var operator = (window._operator || 'operator'); + fetch('/api/v1/drives/' + driveId + '/reset', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ operator: operator }), + }).then(function (r) { + if (!r.ok) return r.json().then(function (d) { showToast(d.detail || 'Reset failed', 'error'); }); + showToast('Drive reset — state cleared', 'success'); + }).catch(function () { showToast('Network error', 'error'); }); + }); + }()); diff --git a/app/templates/components/drives_table.html b/app/templates/components/drives_table.html index 1701b5b..b9c7de2 100644 --- a/app/templates/components/drives_table.html +++ b/app/templates/components/drives_table.html @@ -81,6 +81,10 @@ {%- set short_busy = drive.smart_short and drive.smart_short.state == 'running' %} {%- set long_busy = drive.smart_long and drive.smart_long.state == 'running' %} {%- set selectable = not bi_active and not short_busy and not long_busy %} + {%- set bi_done = drive.burnin and drive.burnin.state in ('passed', 'failed', 'cancelled', 'unknown') %} + {%- set smart_done = (drive.smart_short and drive.smart_short.state in ('passed','failed','aborted')) + or (drive.smart_long and drive.smart_long.state in ('passed','failed','aborted')) %} + {%- set can_reset = (bi_done or smart_done) and not bi_active and not short_busy and not long_busy %} {%- if selectable %} @@ -160,6 +164,12 @@ data-health="{{ drive.smart_health }}" {% if short_busy or long_busy %}disabled{% endif %} title="Start Burn-In">Burn-In + + {%- if can_reset %} + + {%- endif %} {%- endif %} diff --git a/app/templates/layout.html b/app/templates/layout.html index 94fb300..457074a 100644 --- a/app/templates/layout.html +++ b/app/templates/layout.html @@ -37,6 +37,7 @@ Audit Settings API + v{{ app_version if app_version is defined else '—' }} diff --git a/app/templates/settings.html b/app/templates/settings.html index ac38790..55f555b 100644 --- a/app/templates/settings.html +++ b/app/templates/settings.html @@ -91,6 +91,57 @@ + +
+
+ SSH (TrueNAS Direct) + {% if ssh_configured %} + Configured + {% else %} + Not configured — using REST API / mock + {% endif %} +
+

+ When configured, burn-in stages run smartctl and badblocks directly on TrueNAS over SSH, + enabling SMART attribute monitoring and real bad-block detection. Leave Host empty to use + the TrueNAS REST API (mock / dev mode). +

+
+ +
+ + +
+ + + + + + + + + + + + + + +
+ + + Either password or key auth. Key takes precedence if both are set. + Key is stored securely in /data/settings_overrides.json. + +
+ +
+
+ @@ -159,9 +210,14 @@
+ type="number" min="1" max="60" value="{{ editable.max_parallel_burnins }}"> How many jobs can run at the same time
+
+ ⚠ Running many simultaneous surface scans may saturate your storage controller + and produce unreliable results. Recommended: 2–4. +
@@ -348,6 +404,36 @@ } }); + // Parallel burn-in warning + var parallelInput = document.getElementById('max_parallel_burnins'); + var parallelWarn = document.getElementById('parallel-warn'); + if (parallelInput && parallelWarn) { + parallelInput.addEventListener('input', function () { + parallelWarn.style.display = parseInt(parallelInput.value, 10) > 8 ? '' : 'none'; + }); + } + + // Test SSH + var sshBtn = document.getElementById('test-ssh-btn'); + var sshResult = document.getElementById('ssh-test-result'); + if (sshBtn) { + sshBtn.addEventListener('click', async function () { + sshBtn.disabled = true; + sshBtn.textContent = 'Testing…'; + sshResult.style.display = 'none'; + try { + var resp = await fetch('/api/v1/settings/test-ssh', { method: 'POST' }); + var data = await resp.json(); + showResult(sshResult, resp.ok, resp.ok ? 'Connection OK' : (data.detail || 'Failed')); + } catch (e) { + showResult(sshResult, false, 'Network error'); + } finally { + sshBtn.disabled = false; + sshBtn.textContent = 'Test SSH Connection'; + } + }); + } + // Check for Updates var updBtn = document.getElementById('check-updates-btn'); var updResult = document.getElementById('update-result'); diff --git a/app/templates/stats.html b/app/templates/stats.html index 5417966..6f6f6cb 100644 --- a/app/templates/stats.html +++ b/app/templates/stats.html @@ -119,5 +119,65 @@ {% endif %}
+ + +
+ + +
+

Avg. Test Duration by Drive Size

+ {% if by_size %} +
+ + + + + + + + + + {% for s in by_size %} + + + + + + {% endfor %} + +
SizeJobsAvg Duration
{{ s.size_tb }} TB{{ s.total }}{{ s.avg_hours }}h
+
+ {% else %} +
No completed jobs yet.
+ {% endif %} +
+ + +
+

Failures by Stage

+ {% if by_failure_stage %} +
+ + + + + + + + + {% for f in by_failure_stage %} + + + + + {% endfor %} + +
StageCount
{{ f.failed_stage | replace('_',' ') | title }}{{ f.count }}
+
+ {% else %} +
No failures recorded.
+ {% endif %} +
+
{% endblock %} diff --git a/requirements.txt b/requirements.txt index ba5b5fa..b48d926 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ httpx pydantic-settings jinja2 sse-starlette +asyncssh