From b73b5251ae5e5a789c4f20a30621c9fdd9859cbd Mon Sep 17 00:00:00 2001 From: Brandon Walter <51866976+echoparkbaby@users.noreply.github.com> Date: Tue, 24 Feb 2026 00:08:29 -0500 Subject: [PATCH] =?UTF-8?q?Initial=20commit=20=E2=80=94=20TrueNAS=20Burn-I?= =?UTF-8?q?n=20Dashboard=20v0.5.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full-stack burn-in orchestration dashboard (Stages 1–6d complete): FastAPI backend, SQLite/WAL, SSE live dashboard, mock TrueNAS server, SMTP/webhook notifications, batch burn-in, settings UI, audit log, stats page, cancel SMART/burn-in, drag-to-reorder stages. Co-Authored-By: Claude Sonnet 4.6 --- .env.example | 39 + .gitignore | 31 + CLAUDE.md | 449 +++++ Dockerfile | 10 + SPEC.md | 296 +++ app/__init__.py | 0 app/burnin.py | 658 +++++++ app/config.py | 55 + app/database.py | 143 ++ app/logging_config.py | 50 + app/mailer.py | 453 +++++ app/main.py | 123 ++ app/models.py | 104 ++ app/notifier.py | 80 + app/poller.py | 290 +++ app/renderer.py | 136 ++ app/routes.py | 862 +++++++++ app/settings_store.py | 104 ++ app/static/app.css | 1939 ++++++++++++++++++++ app/static/app.js | 848 +++++++++ app/templates/audit.html | 55 + app/templates/components/drives_table.html | 174 ++ app/templates/components/modal_batch.html | 73 + app/templates/components/modal_start.html | 87 + app/templates/dashboard.html | 74 + app/templates/history.html | 93 + app/templates/job_detail.html | 122 ++ app/templates/job_print.html | 304 +++ app/templates/layout.html | 64 + app/templates/settings.html | 303 +++ app/templates/stats.html | 123 ++ app/truenas.py | 112 ++ docker-compose.yml | 21 + mock-truenas/Dockerfile | 9 + mock-truenas/app.py | 345 ++++ requirements.txt | 7 + 36 files changed, 8636 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 Dockerfile create mode 100644 SPEC.md create mode 100644 app/__init__.py create mode 100644 app/burnin.py create mode 100644 app/config.py create mode 100644 app/database.py create mode 100644 app/logging_config.py create mode 100644 app/mailer.py create mode 100644 app/main.py create mode 100644 app/models.py create mode 100644 app/notifier.py create mode 100644 app/poller.py create mode 100644 app/renderer.py create mode 100644 app/routes.py create mode 100644 app/settings_store.py create mode 100644 app/static/app.css create mode 100644 app/static/app.js create mode 100644 app/templates/audit.html create mode 100644 app/templates/components/drives_table.html create mode 100644 app/templates/components/modal_batch.html create mode 100644 app/templates/components/modal_start.html create mode 100644 app/templates/dashboard.html create mode 100644 app/templates/history.html create mode 100644 app/templates/job_detail.html create mode 100644 app/templates/job_print.html create mode 100644 app/templates/layout.html create mode 100644 app/templates/settings.html create mode 100644 app/templates/stats.html create mode 100644 app/truenas.py create mode 100644 docker-compose.yml create mode 100644 mock-truenas/Dockerfile create mode 100644 mock-truenas/app.py create mode 100644 requirements.txt diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..77043cc --- /dev/null +++ b/.env.example @@ -0,0 +1,39 @@ +APP_HOST=0.0.0.0 +APP_PORT=8084 +DB_PATH=/data/app.db + +# Point at mock-truenas for dev, real TrueNAS for production +TRUENAS_BASE_URL=http://mock-truenas:8000 +TRUENAS_API_KEY=your-api-key-here +TRUENAS_VERIFY_TLS=false + +POLL_INTERVAL_SECONDS=12 +STALE_THRESHOLD_SECONDS=45 +MAX_PARALLEL_BURNINS=2 + +# Stuck job detection — jobs running longer than this are marked unknown +STUCK_JOB_HOURS=24 + +# Security — comma-separated IPs or CIDRs, empty = allow all +# ALLOWED_IPS=10.0.0.0/24,127.0.0.1 + +LOG_LEVEL=INFO + +# SMTP — daily digest at SMTP_REPORT_HOUR, immediate alerts on fail/pass +# Leave SMTP_HOST empty to disable all email. +# SMTP_HOST=smtp.duocircle.com +# SMTP_PORT=587 +# SMTP_SSL_MODE=starttls # starttls (default) | ssl | plain +# SMTP_TIMEOUT=60 # connection timeout in seconds +# SMTP_USER=you@domain.com +# SMTP_PASSWORD=yourpassword +# SMTP_FROM=burnin@domain.com +# SMTP_TO=brandon@domain.com +# SMTP_REPORT_HOUR=8 +# SMTP_DAILY_REPORT_ENABLED=true # set false to skip daily report without disabling alerts +# SMTP_ALERT_ON_FAIL=true +# SMTP_ALERT_ON_PASS=false + +# Webhook — POST JSON on burnin_passed / burnin_failed +# Works with Slack, Discord, ntfy.sh, Gotify, n8n, Home Assistant, etc. +# WEBHOOK_URL=https://ntfy.sh/your-topic diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee84963 --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +*.egg-info/ +dist/ +build/ +.venv/ +venv/ +env/ + +# Environment +.env +.env.* +!.env.example + +# App data (SQLite DB, settings overrides, uploads) +data/ + +# macOS +.DS_Store +.AppleDouble +.LSOverride + +# Editors +.vscode/ +.idea/ +*.swp +*.swo diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6ba9618 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,449 @@ +# TrueNAS Burn-In Dashboard — Project Context + +> Drop this file in any new Claude session to resume work with full context. +> Last updated: 2026-02-22 (Stage 6d) + +--- + +## What This Is + +A self-hosted web dashboard for running and tracking hard-drive burn-in tests +against a TrueNAS CORE instance. Deployed on **maple.local** (10.0.0.138). + +- **App URL**: http://10.0.0.138:8084 (or http://burnin.hellocomputer.xyz) +- **Stack path on maple.local**: `~/docker/stacks/truenas-burnin/` +- **Source (local mac)**: `~/Desktop/claude-sandbox/truenas-burnin/` +- **Compose synced to maple.local** via `scp` or manual copy + +### Stages completed + +| Stage | Description | Status | +|-------|-------------|--------| +| 1 | Mock TrueNAS CORE v2.0 API (15 drives, sda–sdo) | ✅ | +| 2 | Backend core (FastAPI, SQLite/WAL, poller, TrueNAS client) | ✅ | +| 3 | Dashboard UI (Jinja2, SSE live updates, dark theme) | ✅ | +| 4 | Burn-in orchestrator (queue, concurrency, start/cancel) | ✅ | +| 5 | History page, job detail page, CSV export | ✅ | +| 6 | Hardening (retries, JSON logging, IP allowlist, poller watchdog) | ✅ | +| 6b | UX overhaul (stats bar, alerts, batch, notifications, location, print, analytics) | ✅ | +| 6c | Settings overhaul (editable form, runtime store, SMTP fix, stage selection) | ✅ | +| 6d | Cancel SMART tests, Cancel All burn-ins, drag-to-reorder stages in modals | ✅ | +| 7 | Cut to real TrueNAS | 🔲 future | + +--- + +## File Map + +``` +truenas-burnin/ +├── docker-compose.yml # two services: mock-truenas + app +├── Dockerfile # app container +├── requirements.txt +├── .env.example +├── data/ # SQLite DB lives here (gitignored, created on deploy) +│ +├── mock-truenas/ +│ ├── Dockerfile +│ └── app.py # FastAPI mock of TrueNAS CORE v2.0 REST API +│ +└── app/ + ├── __init__.py + ├── config.py # pydantic-settings; reads .env + ├── database.py # schema, migrations, init_db(), get_db() + ├── models.py # Pydantic v2 models; StartBurninRequest has run_surface/run_short/run_long + profile property + ├── settings_store.py # runtime settings store — persists to /data/settings_overrides.json + ├── truenas.py # httpx async client with retry (lambda factory pattern) + ├── poller.py # poll loop, SSE pub/sub, stale detection, stuck-job check + ├── burnin.py # orchestrator, semaphore, stages, check_stuck_jobs() + ├── notifier.py # webhook + immediate email alerts on job completion + ├── mailer.py # daily HTML email + per-job alert email + ├── logging_config.py # structured JSON logging + ├── renderer.py # Jinja2 + filters (format_bytes, format_eta, format_elapsed, …) + ├── routes.py # all FastAPI route handlers + ├── main.py # app factory, IP allowlist middleware, lifespan + │ + ├── static/ + │ ├── app.css # full dark theme + mobile responsive + │ └── app.js # push notifications, batch, elapsed timers, inline edit + │ + └── templates/ + ├── layout.html # header nav: History, Stats, Audit, Settings, bell button + ├── dashboard.html # stats bar, failed banner, batch bar + ├── history.html + ├── job_detail.html # + Print/Export button + ├── audit.html # audit event log + ├── stats.html # analytics: pass rate by model, daily activity + ├── settings.html # editable 2-col form: SMTP (left) + Notifications/Behavior/Webhook (right) + ├── job_print.html # print view with client-side QR code (qrcodejs CDN) + └── components/ + ├── drives_table.html # checkboxes, elapsed time, location inline edit + ├── modal_start.html # single-drive burn-in modal + └── modal_batch.html # batch burn-in modal +``` + +--- + +## Architecture Overview + +``` +Browser ──HTMX SSE──▶ GET /sse/drives + │ + poller.subscribe() + │ + asyncio.Queue ◀─── poller.run() notifies after each poll + │ & after each burnin stage update + render drives_table.html + yield SSE "drives-update" event +``` + +- **Poller** (`poller.py`): runs every `POLL_INTERVAL_SECONDS` (default 12s), calls + TrueNAS `/api/v2.0/disk` and `/api/v2.0/core/get_jobs`, writes to SQLite, + notifies SSE subscribers +- **Burn-in** (`burnin.py`): `asyncio.Semaphore(max_parallel_burnins)` gates + concurrency. Jobs are created immediately (queued state), semaphore gates + actual execution. On startup, any interrupted running jobs → state=unknown; + queued jobs are re-enqueued. +- **SSE** (`routes.py /sse/drives`): one persistent connection per browser tab. + Renders fresh `drives_table.html` HTML fragment on every notification. +- **HTMX** (`dashboard.html`): `hx-ext="sse"` + `sse-swap="drives-update"` + replaces `#drives-tbody` content without page reload. + +--- + +## Database Schema (SQLite WAL mode) + +```sql +-- drives: upsert by truenas_disk_id (the TrueNAS internal disk identifier) +drives (id, truenas_disk_id UNIQUE, devname, serial, model, size_bytes, + temperature_c, smart_health, last_polled_at) + +-- smart_tests: one row per drive+test_type combination (UNIQUE constraint) +smart_tests (id, drive_id FK, test_type CHECK('short','long'), + state, percent, started_at, eta_at, finished_at, error_text, + UNIQUE(drive_id, test_type)) + +-- burnin_jobs: one row per burn-in run (multiple per drive over time) +burnin_jobs (id, drive_id FK, profile, state CHECK(queued/running/passed/ + failed/cancelled/unknown), percent, stage_name, operator, + created_at, started_at, finished_at, error_text) + +-- burnin_stages: one row per stage per job +burnin_stages (id, burnin_job_id FK, stage_name, state, percent, + started_at, finished_at, error_text) + +-- audit_events: append-only log +audit_events (id, event_type, drive_id, job_id, operator, note, created_at) +``` + +--- + +## Burn-In Stage Definitions + +```python +STAGE_ORDER = { + "quick": ["precheck", "short_smart", "io_validate", "final_check"], + "full": ["precheck", "surface_validate", "short_smart", "long_smart", "final_check"], +} +``` + +The UI only exposes **full** profile (destructive). Quick profile exists for dev/testing. + +--- + +## TrueNAS API Contracts Used + +| Method | Endpoint | Notes | +|--------|----------|-------| +| GET | `/api/v2.0/disk` | List all disks | +| POST | `/api/v2.0/smart/test` | Start SMART test `{disks:[name], type:"SHORT"\|"LONG"}` | +| GET | `/api/v2.0/core/get_jobs` | Filter `[["method","=","smart.test"]]` | +| POST | `/api/v2.0/core/job_abort` | `job_id` positional arg | +| GET | `/api/v2.0/smart/test/results/{disk}` | Per-disk SMART results | + +Auth: `Authorization: Bearer {TRUENAS_API_KEY}` header. + +--- + +## Config / Environment Variables + +All read from `.env` via `pydantic-settings`. See `.env.example` for full list. + +| Variable | Default | Notes | +|----------|---------|-------| +| `APP_HOST` | `0.0.0.0` | | +| `APP_PORT` | `8080` | | +| `DB_PATH` | `/data/app.db` | Inside container | +| `TRUENAS_BASE_URL` | `http://localhost:8000` | Point at mock or real TrueNAS | +| `TRUENAS_API_KEY` | `mock-key` | Real API key for prod | +| `TRUENAS_VERIFY_TLS` | `false` | Set true for prod with valid cert | +| `POLL_INTERVAL_SECONDS` | `12` | | +| `STALE_THRESHOLD_SECONDS` | `45` | UI shows warning if data older than this | +| `MAX_PARALLEL_BURNINS` | `2` | asyncio.Semaphore limit | +| `SURFACE_VALIDATE_SECONDS` | `45` | Mock only — duration of surface stage | +| `IO_VALIDATE_SECONDS` | `25` | Mock only — duration of I/O stage | +| `STUCK_JOB_HOURS` | `24` | Hours before a running job is auto-marked unknown | +| `LOG_LEVEL` | `INFO` | | +| `ALLOWED_IPS` | `` | Empty = allow all. Comma-sep IPs/CIDRs | +| `SMTP_HOST` | `` | Empty = email disabled | +| `SMTP_PORT` | `587` | | +| `SMTP_USER` | `` | | +| `SMTP_PASSWORD` | `` | | +| `SMTP_FROM` | `` | | +| `SMTP_TO` | `` | Comma-separated | +| `SMTP_REPORT_HOUR` | `8` | Local hour (0-23) to send daily report | +| `SMTP_ALERT_ON_FAIL` | `true` | Immediate email when a job fails | +| `SMTP_ALERT_ON_PASS` | `false` | Immediate email when a job passes | +| `WEBHOOK_URL` | `` | POST JSON on burnin_passed/burnin_failed. Works with ntfy, Slack, Discord, n8n | + +--- + +## Deploy Workflow + +### First deploy (already done) +```bash +# On maple.local +cd ~/docker/stacks/truenas-burnin +docker compose up -d --build +``` + +### Redeploy after code changes +```bash +# Copy changed files from mac to maple.local first, e.g.: +scp -P 2225 -r app/ brandon@10.0.0.138:~/docker/stacks/truenas-burnin/ + +# Then on maple.local: +ssh brandon@10.0.0.138 -p 2225 +cd ~/docker/stacks/truenas-burnin +docker compose up -d --build +``` + +### Reset the database (e.g. after schema changes) +```bash +# On maple.local — stop containers first +docker compose stop app +# Delete DB using alpine (container owns the file, sudo not available) +docker run --rm -v ~/docker/stacks/truenas-burnin/data:/data alpine rm -f /data/app.db +docker compose start app +``` + +### Check logs +```bash +docker compose logs -f app +docker compose logs -f mock-truenas +``` + +--- + +## Mock TrueNAS Server (`mock-truenas/app.py`) + +- 15 drives: `sda`–`sdo` +- Drive mix: 3× ST12000NM0008 12TB, 3× WD80EFAX 8TB, 2× ST16000NM001G 16TB, + 2× ST4000VN008 4TB, 2× TOSHIBA MG06ACA10TE 10TB, 1× HGST HUS728T8TAL5200 8TB, + 1× Seagate Barracuda ST6000DM003 6TB, 1× **FAIL001** (sdn) — always fails at ~30% +- SHORT test: 90s simulated; LONG test: 480s simulated; tick every 5s +- Debug endpoints: + - `POST /debug/reset` — reset all jobs/state + - `GET /debug/state` — dump current state + - `POST /debug/complete-all-jobs` — instantly complete all running tests + +--- + +## Key Implementation Patterns + +### Retry pattern — lambda factory (NOT coroutine object) +```python +# CORRECT: pass a factory so each retry creates a fresh coroutine +r = await _with_retry(lambda: self._client.get("/api/v2.0/disk"), "get_disks") + +# WRONG: coroutine is exhausted after first await, retry silently fails +r = await _with_retry(self._client.get("/api/v2.0/disk"), "get_disks") +``` + +### SSE template rendering +```python +# Use templates.env.get_template().render() — not TemplateResponse (that's a Response object) +html = templates.env.get_template("components/drives_table.html").render(drives=drives) +yield {"event": "drives-update", "data": html} +``` + +### Sticky thead scroll fix +```css +/* BOTH axes required on table-wrap for position:sticky to work on thead */ +.table-wrap { + overflow: auto; /* NOT overflow-x: auto */ + max-height: calc(100vh - 130px); +} +thead { position: sticky; top: 0; z-index: 10; } +``` + +### export.csv route ordering +```python +# MUST register export.csv BEFORE /{job_id} — FastAPI tries int() on "export.csv" +@router.get("/api/v1/burnin/export.csv") # first +async def burnin_export_csv(...): ... + +@router.get("/api/v1/burnin/{job_id}") # second +async def burnin_get(job_id: int, ...): ... +``` + +--- + +## Known Issues / Past Bugs Fixed + +| Bug | Root Cause | Fix | +|-----|-----------|-----| +| `_execute_stages` used `STAGE_ORDER[profile]` ignoring custom order | Stage order stored in DB but not read back | `_run_job` reads stages from `burnin_stages ORDER BY id`; `_execute_stages` accepts `stages: list[str]` | +| Poller stuck at 'running' after completion | `_sync_history()` had early-return guard when state=running | Removed guard — `_sync_history` only called when job not in active dict | +| DB schema tables missing after edit | Tables split into separate variable never passed to `executescript()` | Put all tables in single `SCHEMA` string | +| Retry not retrying | `_with_retry(coro)` — coroutine exhausted after first fail | Changed to `_with_retry(factory: Callable[[], Coroutine])` | +| `error_text` overwritten | `_finish_stage(success=False)` overwrote error set by stage handler | `_finish_stage` omits `error_text` column in SQL when param is None | +| Cancelled stage showed 'failed' | `_execute_stages` called `_finish_stage(success=False)` on cancel | Check `_is_cancelled()`, call `_cancel_stage()` instead | +| export.csv returns 422 | Route registered after `/{job_id}`, FastAPI tries `int("export.csv")` | Move export route before parameterized route | +| Old drive names persist after mock rename | Poller upserts by `truenas_disk_id`, old rows stay | Delete `app.db` and restart | +| First row clipped behind sticky thead | `overflow-x: auto` only creates partial stacking context | Use `overflow: auto` (both axes) on `.table-wrap` | +| `rm data/app.db` permission denied | Container owns the file | Use `docker run --rm -v .../data:/data alpine rm -f /data/app.db` | +| First row clipped after Stage 6b | Stats bar added 70px but max-height not updated | `max-height: calc(100vh - 205px)` | +| SMTP "Connection unexpectedly closed" | `_send_email` used `settings.smtp_port` (587 default) even in SSL mode | Derive port from mode via `_MODE_PORTS` dict; SSL→465, STARTTLS→587, Plain→25 | +| SSL mode missing EHLO | `smtplib.SMTP_SSL` was created without calling `ehlo()` | Added `server.ehlo()` after both SSL and STARTTLS connections | + +--- + +## Stage 7 — Cutting to Real TrueNAS (TODO) + +When ready to test against a real TrueNAS CORE box: + +1. In `.env` on maple.local, set: + ```env + TRUENAS_BASE_URL=https://10.0.0.203 # or whatever your TrueNAS IP is + TRUENAS_API_KEY=your-real-key-here + TRUENAS_VERIFY_TLS=false # unless you have a valid cert + ``` +2. Comment out `mock-truenas` service in `docker-compose.yml` (or leave it running — harmless) +3. Verify TrueNAS CORE v2.0 API contract matches what `truenas.py` expects: + - `GET /api/v2.0/disk` returns list with `name`, `serial`, `model`, `size`, `temperature` + - `GET /api/v2.0/core/get_jobs` with filter `[["method","=","smart.test"]]` + - `POST /api/v2.0/smart/test` accepts `{disks: [devname], type: "SHORT"|"LONG"}` +4. Check that disk names match expected format (TrueNAS CORE uses `ada0`, `da0`, etc. — not `sda`) + - You may need to update mock drive names back or adjust poller logic +5. Delete `app.db` to clear mock drive rows before first real poll + +--- + +## Feature Reference (Stage 6b) + +### New Pages +| URL | Description | +|-----|-------------| +| `/stats` | Analytics — pass rate by model, daily activity last 14 days | +| `/audit` | Audit log — last 200 events with drive/operator context | +| `/settings` | Editable 2-col settings form (SMTP, Notifications, Behavior, Webhook) | +| `/history/{id}/print` | Print-friendly job report with QR code | + +### New API Routes (6b + 6c) +| Method | Path | Description | +|--------|------|-------------| +| `PATCH` | `/api/v1/drives/{id}` | Update `notes` and/or `location` | +| `POST` | `/api/v1/settings` | Save runtime settings to `/data/settings_overrides.json` | +| `POST` | `/api/v1/settings/test-smtp` | Test SMTP connection without sending email | + +### Notifications +- **Browser push**: Bell icon in header → `Notification.requestPermission()`. Fires on `job-alert` SSE event (burnin pass/fail). +- **SSE alert event**: `job-alert` event type on `/sse/drives`. JS listens via `htmx:sseMessage`. +- **Immediate email**: `send_job_alert()` in mailer.py. Triggered by `notifier.notify_job_complete()` from burnin.py. +- **Webhook**: `notifier._send_webhook()` — POST JSON to `WEBHOOK_URL`. Payload includes event, job_id, devname, serial, model, state, operator, error_text. + +### Stuck Job Detection +- `burnin.check_stuck_jobs()` runs every 5 poll cycles (~1 min) +- Jobs running longer than `STUCK_JOB_HOURS` (default 24h) → state=unknown +- Logged at CRITICAL level; audit event written + +### Batch Burn-In +- Checkboxes on each idle/selectable drive row +- Batch bar appears in filter row when any drives selected +- Uses existing `POST /api/v1/burnin/start` with multiple `drive_ids` +- Requires operator name + explicit confirmation checkbox (no serial required) +- JS `checkedDriveIds` Set persists across SSE swaps via `restoreCheckboxes()` + +### Drive Location +- `location` and `notes` fields added to drives table via ALTER TABLE migration +- Inline click-to-edit on location field in drive name cell +- Saves via `PATCH /api/v1/drives/{id}` on blur/Enter; restores on Escape + +## Feature Reference (Stage 6c) + +### Settings Page +- Two-column layout: SMTP card (left, wider) + Notifications / Behavior / Webhook stacked (right) +- Read-only system card at bottom (TrueNAS URL, poll interval, etc.) — restart required badge +- All changes save instantly via `POST /api/v1/settings` → `settings_store.save()` → `/data/settings_overrides.json` +- Overrides loaded on startup in `main.py` lifespan via `settings_store.init()` +- Connection mode dropdown auto-sets port: STARTTLS→587, SSL/TLS→465, Plain→25 +- Test Connection button at top of SMTP card — tests live settings without sending email +- Brand logo in header is now a clickable `` home link + +### SMTP Port Derivation +```python +# mailer.py — port is derived from mode, NOT from settings.smtp_port +_MODE_PORTS = {"starttls": 587, "ssl": 465, "plain": 25} +port = _MODE_PORTS.get(mode, 587) +``` +Never use `settings.smtp_port` in mailer — it's kept in config for `.env` backward compat only. + +### Burn-In Stage Selection +`StartBurninRequest` no longer takes `profile: str`. Instead takes: +- `run_surface: bool = True` — surface validate (destructive write test) +- `run_short: bool = True` — Short SMART (non-destructive) +- `run_long: bool = True` — Long SMART (non-destructive) + +Profile string is computed as a property. Profiles: `full`, `surface_short`, `surface_long`, +`surface`, `short_long`, `short`, `long`. Precheck and final_check always run. + +`STAGE_ORDER` in `burnin.py` has all 7 profile combinations. + +`_recalculate_progress()` uses `_STAGE_BASE_WEIGHTS` dict (per-stage weights) and computes +overall % dynamically from actual `burnin_stages` rows — no profile lookup needed. + +In the UI, both single-drive and batch modals show 3 checkboxes. If surface is unchecked: +- Destructive warning is hidden +- Serial confirmation field is hidden (single modal) +- Confirmation checkbox is hidden (batch modal) + +### Table Scroll Fix +```css +.table-wrap { + max-height: calc(100vh - 205px); /* header(44) + main-pad(20) + stats-bar(70) + filter-bar(46) + buffer */ +} +``` +If stats bar or other content height changes, update this offset. + +## Feature Reference (Stage 6d) + +### Cancel Functionality +| What | How | +|------|-----| +| Cancel running Short SMART | `✕ Short` button appears in action col when `short_busy`; calls `POST /api/v1/drives/{id}/smart/cancel` with `{type:"short"}` | +| Cancel running Long SMART | `✕ Long` button appears when `long_busy`; same route with `{type:"long"}` | +| Cancel individual burn-in | `✕ Burn-In` button (was "Cancel") shown when `bi_active`; calls `POST /api/v1/burnin/{id}/cancel` | +| Cancel All Running | Red `✕ Cancel All Burn-Ins` button appears in filter bar when any burn-in jobs are active; JS collects all `.btn-cancel[data-job-id]` and cancels each | + +**SMART cancel route** (`POST /api/v1/drives/{drive_id}/smart/cancel`): +1. Fetches all running TrueNAS jobs via `client.get_smart_jobs()` +2. Finds job where `arguments[0].disks` contains the drive's devname +3. Calls `client.abort_job(tn_job_id)` +4. Updates `smart_tests` table row to `state='aborted'` + +### Stage Reordering +- Default order changed to: **Short SMART → Long SMART → Surface Validate** (non-destructive first) +- Drag handles (⠿) on each stage row in both single and batch modals +- HTML5 drag-and-drop, no external library +- `getStageOrder(listId)` reads current DOM order of checked stages +- `stage_order: ["short_smart","long_smart","surface_validate"]` sent in API body +- `StartBurninRequest.stage_order: list[str] | None` — validated against allowed stage names +- `burnin.start_job()` accepts `stage_order` param; builds: `["precheck"] + stage_order + ["final_check"]` +- `_run_job()` reads stage names back from `burnin_stages ORDER BY id` — so custom order is honoured +- Destructive warning / serial confirmation still triggered by `stage-surface` checkbox ID (order-independent) + +## NPM / DNS Setup + +- Proxy host: `burnin.hellocomputer.xyz` → `http://10.0.0.138:8080` +- Authelia protection: recommended (no built-in auth in app) +- DNS: `burnin.hellocomputer.xyz` CNAME → `sandon.hellocomputer.xyz` (proxied: false) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..80b90ab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.12-slim + +WORKDIR /opt/app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app/ ./app/ + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8084"] diff --git a/SPEC.md b/SPEC.md new file mode 100644 index 0000000..5d01af1 --- /dev/null +++ b/SPEC.md @@ -0,0 +1,296 @@ +# TrueNAS Burn-In — Project Specification + +**Version:** 0.5.0 +**Status:** Active Development +**Audience:** Public / Open Source + +--- + +## Overview + +TrueNAS Burn-In is a self-hosted web dashboard that runs on a separate machine or VM and connects to a TrueNAS system via SSH to automate and monitor the drive burn-in process. It is designed for users who want to validate new hard drives before adding them to a ZFS pool — where reliability is non-negotiable. + +The app is not a TrueNAS plugin and does not run on TrueNAS itself. It connects remotely over SSH to issue smartctl and badblocks commands, polls results, and presents everything through a dark-themed real-time dashboard. It is deployed via Docker Compose and configured through a Settings UI and `.env` file. + +--- + +## Core Philosophy + +- Drives going into a ZFS pool must be rock solid. The app's job is to surface any doubt about a drive before it earns a slot in the pool. +- Burn-in is always triggered manually. There is no scheduling or automation. +- Simplicity over features. The README and Settings UI should be sufficient for any technically capable user to be up and running without hand-holding. +- Recommend safe defaults. Warn loudly when users push limits (too many parallel jobs, destructive operations, high temperatures). + +--- + +## Test Sequence + +Every drive goes through the following stages in order. A failure at any stage stops that drive immediately. + +### Stage 1 — Short SMART Test +``` +smartctl -t short -d sat /dev/sdX +``` +Polls for completion via: +``` +smartctl -a -d sat /dev/sdX | grep -i remaining +``` +Expected duration: ~2 minutes. If the test fails or reports any critical attribute violation, the drive is marked FAILED and no further tests run. + +### Stage 2 — Long SMART Test +``` +smartctl -t long -d sat /dev/sdX +``` +Expected duration: varies by drive size (typically 3–6 hours for 8–12TB drives). Same polling approach. Same failure behavior. + +### Stage 3 — Surface Scan (Badblocks, Destructive) +``` +badblocks -wsv -b 4096 -p 1 /dev/sdX +``` +This is a **destructive write test**. The UI must display a prominent warning before this stage begins, and again in the Settings page where the behavior is documented. The `-w` flag overwrites all data on the drive. This is intentional — these are new drives being validated before pool use. + +**Failure threshold:** 2 or more bad blocks found triggers immediate abort and FAILED status. The threshold should be configurable in Settings (default: 2). + +--- + +## SMART Attributes to Monitor + +The following attributes are checked after each SMART test and continuously during the burn-in run. Any non-zero value on pre-fail attributes is treated as a warning; crossing defined thresholds triggers failure. + +| ID | Attribute | Threshold | Notes | +|-----|----------------------------|--------------|--------------------------------------------| +| 5 | Reallocated_Sector_Ct | > 0 = FAIL | Any reallocation is disqualifying for ZFS | +| 10 | Spin_Retry_Count | > 0 = WARN | Mechanical stress indicator | +| 188 | Command_Timeout | > 0 = WARN | Drive not responding to commands | +| 197 | Current_Pending_Sector | > 0 = FAIL | Sectors waiting to be reallocated | +| 198 | Offline_Uncorrectable | > 0 = FAIL | Unrecoverable read errors | +| 199 | UDMA_CRC_Error_Count | > 0 = WARN | Likely cable/controller, flag for review | + +--- + +## Failure Behavior + +When a drive fails at any stage: + +1. All remaining tests for that drive are immediately cancelled. +2. The drive is marked `FAILED` in the UI with the specific failure reason (e.g., `FAILED (SURFACE VALIDATE)`, `FAILED (REALLOCATED SECTORS)`). +3. An alert is fired immediately via whichever notification channels are enabled in Settings (email and/or webhook — both can fire simultaneously). +4. The failed drive's row is visually distinct in the dashboard and cannot be accidentally re-queued without an explicit reset action. + +A **Reset** action clears the test state for a drive so it can be re-queued. It does not cancel in-progress tests — the Cancel button does that. Reset is only available on completed drives (passed, failed, or interrupted). + +--- + +## UI + +### Dashboard (Main View) + +- **Stats bar:** Total drives, Running, Failed, Passed, Idle counts. +- **Filter chips:** All / Running / Failed / Passed / Idle — filters the table below. +- **Drive table columns:** Drive (device name + model), Serial, Size, Temp, Health, Short SMART, Long SMART, Burn-In, Actions. +- **Temperature display:** Color-coded. Green ≤ 45°C, Yellow 46–54°C, Red ≥ 55°C. Thresholds configurable in Settings. +- **Running tests:** Show an animated progress bar with percentage and elapsed time instead of a static badge. +- **Actions per drive:** Short, Long, Burn-In buttons. Cancel button replaces Start when a test is running. +- **Row click:** Opens the Log Drawer for that drive. + +### Log Drawer + +Slides up from the bottom of the page when a drive row is clicked. Does not navigate away — the table remains visible and scrollable above. + +Three tabs: +- **badblocks** — live tail of badblocks stdout, including error lines with sector numbers highlighted in red. +- **SMART** — output of the last smartctl run for this drive, with monitored attribute values highlighted. +- **Events** — chronological timeline of everything that happened to this drive (test started, test passed, failure detected, alert sent, etc.). + +Features: +- Auto-scroll toggle (on by default). +- Blinking cursor on the active output line of a running test. +- Close button or click the same row again to dismiss. +- Failed drives show error lines in red with exact bad block sector numbers. + +### History Page + +Per-drive history. Each drive (identified by serial number) has a log of every burn-in run ever performed, with timestamps, results, and duration. Not per-session — per individual drive. + +### Audit Page + +Application-level event log. Records: test started, test cancelled, settings changed, alert sent, container restarted, SSH connection lost/restored. Useful for debugging and for open source users troubleshooting their setup. + +### Stats Page + +Aggregate statistics across all drives and all time. Pass rate, average test duration by drive size, failure breakdown by failure type. + +### Settings Page + +Divided into sections: + +**EMAIL (SMTP)** +- Host, Mode (STARTTLS/SSL/plain), Port, Timeout, Username, Password, From, To. +- Test Connection button. +- Enable/disable toggle. + +**WEBHOOK** +- Single URL field. POST JSON payload on `burnin_passed` and `burnin_failed` events. +- Compatible with ntfy.sh, Slack, Discord, n8n, and any generic HTTP POST receiver. +- Leave blank to disable. + +**NOTIFICATIONS** +- Daily Report toggle (sends full drive status email at a configured hour). +- Alert on Failure toggle (immediate — fires both email and webhook if both configured). +- Alert on Pass toggle. + +**BURN-IN BEHAVIOR** +- Max Parallel Burn-Ins (default: 2, max: 60). +- Warning displayed inline when set above 8: "Running many simultaneous surface scans may saturate your storage controller and produce unreliable results. Recommended: 2–4." +- Bad block failure threshold (default: 2). +- Stuck job threshold in hours (default: 24 — jobs running longer than this are auto-marked Unknown). + +**TEMPERATURE** +- Warning threshold (default: 46°C). +- Critical threshold (default: 55°C). + +**SSH** +- TrueNAS host/IP. +- Port (default: 22). +- Username. +- Authentication: SSH key (paste or upload) or password. +- Test Connection button. + +**SYSTEM** *(restart required to change — set in .env)* +- TrueNAS API URL. +- Verify TLS toggle. +- Poll interval (default: 12s). +- Stale threshold (default: 45s). +- IP allowlist. +- Log level (DEBUG / INFO / WARN / ERROR). + +**VERSION & UPDATES** +- Displays current version (starting at 0.5.0). +- "Check for Updates" button — queries GitHub releases API and shows latest version with a link if an update is available. + +--- + +## Data Persistence + +**SQLite** — single file, zero config, atomic writes. No data loss on container restart. + +On restart, any drive that was in a `running` state is automatically transitioned to `interrupted`. The user sees "INTERRUPTED" in the burn-in column and must manually reset and re-queue the drive. The partial log up to the point of interruption is preserved and viewable in the Log Drawer. + +Drive location labels persist in SQLite tied to serial number, so a drive's label survives container restarts and reappears automatically when the drive is detected again. + +--- + +## Notifications + +### Email +Standard SMTP. Fires on: burn-in failure (immediate), burn-in pass (if enabled), daily report (scheduled). + +Failure email includes: drive name, serial number, size, failure stage, failure reason, bad block count (if applicable), SMART attribute snapshot, timestamp. + +### Webhook +Single HTTP POST to configured URL with JSON body: +```json +{ + "event": "burnin_failed", + "drive": "sda", + "serial": "WDZ1A002", + "size": "12 TB", + "failure_reason": "SURFACE VALIDATE", + "bad_blocks": 2, + "timestamp": "2025-01-15T03:21:04Z" +} +``` +Compatible with ntfy.sh, Slack incoming webhooks, Discord webhooks, n8n HTTP trigger nodes. + +Both email and webhook fire simultaneously when both are configured and enabled. User controls each independently via Settings toggles. + +--- + +## SSH Architecture + +The app connects to TrueNAS over SSH from the host running the Docker container. It does not use the TrueNAS web API for drive operations — all smartctl and badblocks commands are issued directly over SSH. + +Connection details are configured in Settings (not `.env`). Supports: +- Password authentication. +- SSH key authentication (key pasted or uploaded in Settings UI). +- Custom port. +- Test Connection button validates credentials before saving. + +On SSH disconnection mid-test: the test process on TrueNAS may continue running (SSH disconnection does not kill the remote process if launched correctly with nohup or similar). The app marks the drive as `interrupted` in its own state, attempts to reconnect, and resumes polling if the process is still running. If the remote process is gone, the drive stays `interrupted`. + +--- + +## API + +A REST API is available at `/api/v1/`. It is documented via OpenAPI at `/openapi.json` and browsable at `/api` in the dashboard. Version displayed: 0.1.0 (API version tracked independently from app version). + +Key endpoints: +- `GET /api/v1/drives` — list all drives with current status. +- `GET /api/v1/drives/{drive_id}` — single drive detail. +- `PATCH /api/v1/drives/{drive_id}` — update drive metadata (e.g., location label). +- `POST /api/v1/drives/{drive_id}/smart/start` — start a SMART test. +- `POST /api/v1/drives/{drive_id}/smart/cancel` — cancel a SMART test. +- `POST /api/v1/burnin/start` — start a burn-in job. +- `POST /api/v1/burnin/{job_id}/cancel` — cancel a burn-in job. +- `GET /sse/drives` — Server-Sent Events stream powering the real-time dashboard UI. +- `GET /health` — health check endpoint. + +The API makes this app a strong candidate for MCP server integration, allowing an AI assistant to query drive status, start tests, or receive alerts conversationally. + +--- + +## Deployment + +Docker Compose. Minimum viable setup: + +```bash +git clone https://github.com/yourusername/truenas-burnin +cd truenas-burnin +cp .env.example .env +# Edit .env for system-level settings (TrueNAS URL, poll interval, etc.) +docker compose up -d +``` + +Navigate to `http://your-vm-ip:port` and complete SSH and SMTP configuration in Settings. + +All other configuration is done through the Settings UI — no manual file editing required beyond `.env` for system-level values. + +--- + +## mock-truenas + +A companion Docker service (`mock-truenas`) that simulates the TrueNAS API for UI development and testing without real hardware. It mocks drive discovery, SMART test responses, and badblocks progress. Used exclusively for development — not deployed in production. + +### Testing on Real TrueNAS (v1.0 Milestone Plan) + +To validate against real hardware: + +1. Switch `TRUENAS_URL` in `.env` from `http://mock-truenas:8000` to your real TrueNAS IP/hostname. +2. Ensure SSH is enabled on TrueNAS (System → Services → SSH). +3. Configure SSH credentials in Settings and use Test Connection to verify. +4. Start with a single idle drive — run Short SMART only first. +5. Verify the log drawer shows real smartctl output. +6. If successful, proceed to Long SMART, then a full burn-in on a drive you're comfortable wiping. +7. Confirm an alert email is received on completion. +8. Scale to 2–4 drives simultaneously and monitor system resource warnings. + +**v1.0 is considered production-ready when:** the app runs reliably on a real TrueNAS system with 10 simultaneous drives, a failure alert email is received correctly, and a passing drive's history is preserved across a container restart. + +--- + +## Version + +- App version starts at **0.5.0** +- Displayed on the dashboard landing page header and in Settings. +- Update check in Settings queries GitHub releases API. +- API version tracked separately, currently **0.1.0**. + +--- + +## Out of Scope (v1.0) + +- Scheduled or automated burn-in triggering. +- Non-destructive badblocks mode (read-only surface scan). +- Multi-TrueNAS support (single host only). +- User authentication / login wall (single-user, self-hosted, IP allowlist is sufficient). +- Mobile-optimized UI (desktop dashboard only). diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/burnin.py b/app/burnin.py new file mode 100644 index 0000000..def9a55 --- /dev/null +++ b/app/burnin.py @@ -0,0 +1,658 @@ +""" +Burn-in orchestrator. + +Manages a FIFO queue of burn-in jobs capped at MAX_PARALLEL_BURNINS concurrent +executions. Each job runs stages sequentially; a failed stage aborts the job. + +State is persisted to SQLite throughout — DB is source of truth. + +On startup: + - Any 'running' jobs from a previous run are marked 'unknown' (interrupted). + - Any 'queued' jobs are re-enqueued automatically. + +Cancellation: + - cancel_job() sets DB state to 'cancelled'. + - Running stage coroutines check _is_cancelled() at POLL_INTERVAL boundaries + and abort within a few seconds of the cancel request. +""" + +import asyncio +import logging +import time +from datetime import datetime, timezone + +import aiosqlite + +from app.config import settings +from app.truenas import TrueNASClient + +log = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Stage definitions +# --------------------------------------------------------------------------- + +STAGE_ORDER: dict[str, list[str]] = { + # Legacy + "quick": ["precheck", "short_smart", "io_validate", "final_check"], + # Single-stage selectable profiles + "surface": ["precheck", "surface_validate", "final_check"], + "short": ["precheck", "short_smart", "final_check"], + "long": ["precheck", "long_smart", "final_check"], + # Two-stage combos + "surface_short": ["precheck", "surface_validate", "short_smart", "final_check"], + "surface_long": ["precheck", "surface_validate", "long_smart", "final_check"], + "short_long": ["precheck", "short_smart", "long_smart", "final_check"], + # All three + "full": ["precheck", "surface_validate", "short_smart", "long_smart", "final_check"], +} + +# Per-stage base weights used to compute overall job % progress dynamically +_STAGE_BASE_WEIGHTS: dict[str, int] = { + "precheck": 5, + "surface_validate": 65, + "short_smart": 12, + "long_smart": 13, + "io_validate": 10, + "final_check": 5, +} + +POLL_INTERVAL = 5.0 # seconds between progress checks during active stages + +# --------------------------------------------------------------------------- +# Module-level state (initialized in init()) +# --------------------------------------------------------------------------- + +_semaphore: asyncio.Semaphore | None = None +_client: TrueNASClient | None = None + + +def _now() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _db(): + """Open a fresh WAL-mode connection. Caller must use 'async with'.""" + return aiosqlite.connect(settings.db_path) + + +# --------------------------------------------------------------------------- +# Init + startup reconciliation +# --------------------------------------------------------------------------- + +async def init(client: TrueNASClient) -> None: + global _semaphore, _client + _semaphore = asyncio.Semaphore(settings.max_parallel_burnins) + _client = client + + async with _db() as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("PRAGMA foreign_keys=ON") + + # Mark interrupted running jobs as unknown + await db.execute( + "UPDATE burnin_jobs SET state='unknown', finished_at=? WHERE state='running'", + (_now(),), + ) + + # Re-enqueue previously queued jobs + cur = await db.execute( + "SELECT id FROM burnin_jobs WHERE state='queued' ORDER BY created_at" + ) + queued = [r["id"] for r in await cur.fetchall()] + await db.commit() + + for job_id in queued: + asyncio.create_task(_run_job(job_id)) + + log.info("Burn-in orchestrator ready (max_concurrent=%d)", settings.max_parallel_burnins) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +async def start_job(drive_id: int, profile: str, operator: str, + stage_order: list[str] | None = None) -> int: + """Create and enqueue a burn-in job. Returns the new job ID. + + If stage_order is provided (e.g. ["short_smart","long_smart","surface_validate"]), + the job runs those stages in that order (precheck and final_check are always prepended/appended). + Otherwise the preset STAGE_ORDER[profile] is used. + """ + now = _now() + + # Build the actual stage list + if stage_order is not None: + stages = ["precheck"] + list(stage_order) + ["final_check"] + else: + stages = STAGE_ORDER[profile] + + async with _db() as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("PRAGMA foreign_keys=ON") + + # Reject duplicate active burn-in for same drive + cur = await db.execute( + "SELECT COUNT(*) FROM burnin_jobs WHERE drive_id=? AND state IN ('queued','running')", + (drive_id,), + ) + if (await cur.fetchone())[0] > 0: + raise ValueError("Drive already has an active burn-in job") + + # Create job + cur = await db.execute( + """INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at) + VALUES (?,?,?,?,?,?) RETURNING id""", + (drive_id, profile, "queued", 0, operator, now), + ) + job_id = (await cur.fetchone())["id"] + + # Create stage rows in the desired execution order + for stage_name in stages: + await db.execute( + "INSERT INTO burnin_stages (burnin_job_id, stage_name, state) VALUES (?,?,?)", + (job_id, stage_name, "pending"), + ) + + await db.execute( + """INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message) + VALUES (?,?,?,?,?)""", + ("burnin_queued", drive_id, job_id, operator, f"Queued {profile} burn-in"), + ) + await db.commit() + + asyncio.create_task(_run_job(job_id)) + log.info("Burn-in job %d queued (drive_id=%d profile=%s operator=%s)", + job_id, drive_id, profile, operator) + return job_id + + +async def cancel_job(job_id: int, operator: str) -> bool: + """Cancel a queued or running job. Returns True if state was changed.""" + async with _db() as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + + cur = await db.execute( + "SELECT state, drive_id FROM burnin_jobs WHERE id=?", (job_id,) + ) + row = await cur.fetchone() + if not row or row["state"] not in ("queued", "running"): + return False + + await db.execute( + "UPDATE burnin_jobs SET state='cancelled', finished_at=? WHERE id=?", + (_now(), job_id), + ) + await db.execute( + "UPDATE burnin_stages SET state='cancelled' WHERE burnin_job_id=? AND state IN ('pending','running')", + (job_id,), + ) + await db.execute( + """INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message) + VALUES (?,?,?,?,?)""", + ("burnin_cancelled", row["drive_id"], job_id, operator, "Cancelled by operator"), + ) + await db.commit() + + log.info("Burn-in job %d cancelled by %s", job_id, operator) + return True + + +# --------------------------------------------------------------------------- +# Job runner +# --------------------------------------------------------------------------- + +async def _run_job(job_id: int) -> None: + """Acquire semaphore slot, execute all stages, persist final state.""" + assert _semaphore is not None, "burnin.init() not called" + + async with _semaphore: + if await _is_cancelled(job_id): + return + + # Transition queued → running + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + row = await (await db.execute( + "SELECT drive_id, profile FROM burnin_jobs WHERE id=?", (job_id,) + )).fetchone() + if not row: + return + drive_id, profile = row[0], row[1] + + cur = await db.execute("SELECT devname, serial, model FROM drives WHERE id=?", (drive_id,)) + devname_row = await cur.fetchone() + if not devname_row: + return + devname = devname_row[0] + drive_serial = devname_row[1] + drive_model = devname_row[2] + + await db.execute( + "UPDATE burnin_jobs SET state='running', started_at=? WHERE id=?", + (_now(), job_id), + ) + await db.execute( + """INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message) + VALUES (?,?,?,(SELECT operator FROM burnin_jobs WHERE id=?),?)""", + ("burnin_started", drive_id, job_id, job_id, f"Started {profile} burn-in on {devname}"), + ) + # Read stage order from DB (respects any custom order set at job creation) + stage_cur = await db.execute( + "SELECT stage_name FROM burnin_stages WHERE burnin_job_id=? ORDER BY id", + (job_id,), + ) + job_stages = [r[0] for r in await stage_cur.fetchall()] + await db.commit() + + _push_update() + log.info("Burn-in started", extra={"job_id": job_id, "devname": devname, "profile": profile}) + + success = False + error_text = None + try: + success = await _execute_stages(job_id, job_stages, devname, drive_id) + except asyncio.CancelledError: + pass + except Exception as exc: + error_text = str(exc) + log.exception("Burn-in raised exception", extra={"job_id": job_id, "devname": devname}) + + if await _is_cancelled(job_id): + return + + final_state = "passed" if success else "failed" + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute( + "UPDATE burnin_jobs SET state=?, percent=?, finished_at=?, error_text=? WHERE id=?", + (final_state, 100 if success else None, _now(), error_text, job_id), + ) + await db.execute( + """INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message) + VALUES (?,?,?,(SELECT operator FROM burnin_jobs WHERE id=?),?)""", + (f"burnin_{final_state}", drive_id, job_id, job_id, + f"Burn-in {final_state} on {devname}"), + ) + await db.commit() + + # Build SSE alert for browser notifications + alert = { + "state": final_state, + "job_id": job_id, + "devname": devname, + "serial": drive_serial, + "model": drive_model, + "error_text": error_text, + } + _push_update(alert=alert) + log.info("Burn-in finished", extra={"job_id": job_id, "devname": devname, "state": final_state}) + + # Fire webhook + immediate email in background (non-blocking) + try: + from app import notifier + cur2 = None + async with _db() as db2: + db2.row_factory = aiosqlite.Row + cur2 = await db2.execute( + "SELECT profile, operator FROM burnin_jobs WHERE id=?", (job_id,) + ) + job_row = await cur2.fetchone() + if job_row: + asyncio.create_task(notifier.notify_job_complete( + job_id=job_id, + devname=devname, + serial=drive_serial, + model=drive_model, + state=final_state, + profile=job_row["profile"], + operator=job_row["operator"], + error_text=error_text, + )) + except Exception as exc: + log.error("Failed to schedule notifications: %s", exc) + + +async def _execute_stages(job_id: int, stages: list[str], devname: str, drive_id: int) -> bool: + for stage_name in stages: + if await _is_cancelled(job_id): + return False + + await _start_stage(job_id, stage_name) + _push_update() + + try: + ok = await _dispatch_stage(job_id, stage_name, devname, drive_id) + except Exception as exc: + log.error("Stage raised exception: %s", exc, extra={"job_id": job_id, "devname": devname, "stage": stage_name}) + ok = False + await _finish_stage(job_id, stage_name, success=False, error_text=str(exc)) + _push_update() + return False + + if not ok and await _is_cancelled(job_id): + # Stage was aborted due to cancellation — mark it cancelled, not failed + await _cancel_stage(job_id, stage_name) + else: + await _finish_stage(job_id, stage_name, success=ok) + await _recalculate_progress(job_id, profile) + _push_update() + + if not ok: + return False + + return True + + +async def _dispatch_stage(job_id: int, stage_name: str, devname: str, drive_id: int) -> bool: + if stage_name == "precheck": + return await _stage_precheck(job_id, drive_id) + elif stage_name == "short_smart": + return await _stage_smart_test(job_id, devname, "SHORT", "short_smart") + elif stage_name == "long_smart": + return await _stage_smart_test(job_id, devname, "LONG", "long_smart") + elif stage_name == "surface_validate": + return await _stage_timed_simulate(job_id, "surface_validate", settings.surface_validate_seconds) + elif stage_name == "io_validate": + return await _stage_timed_simulate(job_id, "io_validate", settings.io_validate_seconds) + elif stage_name == "final_check": + return await _stage_final_check(job_id, devname) + return True + + +# --------------------------------------------------------------------------- +# Individual stage implementations +# --------------------------------------------------------------------------- + +async def _stage_precheck(job_id: int, drive_id: int) -> bool: + """Check SMART health and temperature before starting destructive work.""" + async with _db() as db: + cur = await db.execute( + "SELECT smart_health, temperature_c FROM drives WHERE id=?", (drive_id,) + ) + row = await cur.fetchone() + + if not row: + return False + + health, temp = row[0], row[1] + + if health == "FAILED": + await _set_stage_error(job_id, "precheck", "Drive SMART health is FAILED — refusing to burn in") + return False + + if temp and temp > 60: + await _set_stage_error(job_id, "precheck", f"Drive temperature {temp}°C exceeds 60°C limit") + return False + + await asyncio.sleep(1) # Simulate brief check + return True + + +async def _stage_smart_test(job_id: int, devname: str, test_type: str, stage_name: str) -> bool: + """Start a TrueNAS SMART test and poll until complete.""" + tn_job_id = await _client.start_smart_test([devname], test_type) + + while True: + if await _is_cancelled(job_id): + try: + await _client.abort_job(tn_job_id) + except Exception: + pass + return False + + jobs = await _client.get_smart_jobs() + job = next((j for j in jobs if j["id"] == tn_job_id), None) + + if not job: + return False + + state = job["state"] + pct = job["progress"]["percent"] + + await _update_stage_percent(job_id, stage_name, pct) + await _recalculate_progress(job_id, None) + _push_update() + + if state == "SUCCESS": + return True + elif state in ("FAILED", "ABORTED"): + await _set_stage_error(job_id, stage_name, + job.get("error") or f"SMART {test_type} test failed") + return False + + await asyncio.sleep(POLL_INTERVAL) + + +async def _stage_timed_simulate(job_id: int, stage_name: str, duration_seconds: int) -> bool: + """Simulate a timed stage (surface validation / IO validation) with progress updates.""" + start = time.monotonic() + + while True: + if await _is_cancelled(job_id): + return False + + elapsed = time.monotonic() - start + pct = min(100, int(elapsed / duration_seconds * 100)) + + await _update_stage_percent(job_id, stage_name, pct) + await _recalculate_progress(job_id, None) + _push_update() + + if pct >= 100: + return True + + await asyncio.sleep(POLL_INTERVAL) + + +async def _stage_final_check(job_id: int, devname: str) -> bool: + """Verify drive passed all tests by checking current SMART health in DB.""" + await asyncio.sleep(1) + async with _db() as db: + cur = await db.execute( + "SELECT smart_health FROM drives WHERE devname=?", (devname,) + ) + row = await cur.fetchone() + + if not row or row[0] == "FAILED": + await _set_stage_error(job_id, "final_check", "Drive SMART health is FAILED after burn-in") + return False + + return True + + +# --------------------------------------------------------------------------- +# DB helpers +# --------------------------------------------------------------------------- + +async def _is_cancelled(job_id: int) -> bool: + async with _db() as db: + cur = await db.execute("SELECT state FROM burnin_jobs WHERE id=?", (job_id,)) + row = await cur.fetchone() + return bool(row and row[0] == "cancelled") + + +async def _start_stage(job_id: int, stage_name: str) -> None: + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute( + "UPDATE burnin_stages SET state='running', started_at=? WHERE burnin_job_id=? AND stage_name=?", + (_now(), job_id, stage_name), + ) + await db.execute( + "UPDATE burnin_jobs SET stage_name=? WHERE id=?", + (stage_name, job_id), + ) + await db.commit() + + +async def _finish_stage(job_id: int, stage_name: str, success: bool, error_text: str | None = None) -> None: + now = _now() + state = "passed" if success else "failed" + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + cur = await db.execute( + "SELECT started_at FROM burnin_stages WHERE burnin_job_id=? AND stage_name=?", + (job_id, stage_name), + ) + row = await cur.fetchone() + duration = None + if row and row[0]: + try: + start = datetime.fromisoformat(row[0]) + if start.tzinfo is None: + start = start.replace(tzinfo=timezone.utc) + duration = (datetime.now(timezone.utc) - start).total_seconds() + except Exception: + pass + + # Only overwrite error_text if one is passed; otherwise preserve what the stage already wrote + if error_text is not None: + await db.execute( + """UPDATE burnin_stages + SET state=?, percent=?, finished_at=?, duration_seconds=?, error_text=? + WHERE burnin_job_id=? AND stage_name=?""", + (state, 100 if success else None, now, duration, error_text, job_id, stage_name), + ) + else: + await db.execute( + """UPDATE burnin_stages + SET state=?, percent=?, finished_at=?, duration_seconds=? + WHERE burnin_job_id=? AND stage_name=?""", + (state, 100 if success else None, now, duration, job_id, stage_name), + ) + await db.commit() + + +async def _update_stage_percent(job_id: int, stage_name: str, pct: int) -> None: + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute( + "UPDATE burnin_stages SET percent=? WHERE burnin_job_id=? AND stage_name=?", + (pct, job_id, stage_name), + ) + await db.commit() + + +async def _cancel_stage(job_id: int, stage_name: str) -> None: + now = _now() + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute( + "UPDATE burnin_stages SET state='cancelled', finished_at=? WHERE burnin_job_id=? AND stage_name=?", + (now, job_id, stage_name), + ) + await db.commit() + + +async def _set_stage_error(job_id: int, stage_name: str, error_text: str) -> None: + async with _db() as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute( + "UPDATE burnin_stages SET error_text=? WHERE burnin_job_id=? AND stage_name=?", + (error_text, job_id, stage_name), + ) + await db.commit() + + +async def _recalculate_progress(job_id: int, profile: str | None = None) -> None: + """Recompute overall job % from actual stage rows. profile param is unused (kept for compat).""" + async with _db() as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + + cur = await db.execute( + "SELECT stage_name, state, percent FROM burnin_stages WHERE burnin_job_id=? ORDER BY id", + (job_id,), + ) + stages = await cur.fetchall() + if not stages: + return + + total_weight = sum(_STAGE_BASE_WEIGHTS.get(s["stage_name"], 5) for s in stages) + if total_weight == 0: + return + + completed = 0.0 + current = None + for s in stages: + w = _STAGE_BASE_WEIGHTS.get(s["stage_name"], 5) + st = s["state"] + if st == "passed": + completed += w + elif st == "running": + completed += w * (s["percent"] or 0) / 100 + current = s["stage_name"] + + pct = int(completed / total_weight * 100) + await db.execute( + "UPDATE burnin_jobs SET percent=?, stage_name=? WHERE id=?", + (pct, current, job_id), + ) + await db.commit() + + +# --------------------------------------------------------------------------- +# SSE push +# --------------------------------------------------------------------------- + +def _push_update(alert: dict | None = None) -> None: + """Notify SSE subscribers that data has changed, with optional browser notification payload.""" + try: + from app import poller + poller._notify_subscribers(alert=alert) + except Exception: + pass + + +# --------------------------------------------------------------------------- +# Stuck-job detection (called by poller every ~5 cycles) +# --------------------------------------------------------------------------- + +async def check_stuck_jobs() -> None: + """Mark jobs that have been 'running' beyond stuck_job_hours as 'unknown'.""" + threshold_seconds = settings.stuck_job_hours * 3600 + + async with _db() as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + + cur = await db.execute(""" + SELECT bj.id, bj.drive_id, d.devname, bj.started_at + FROM burnin_jobs bj + JOIN drives d ON d.id = bj.drive_id + WHERE bj.state = 'running' + AND bj.started_at IS NOT NULL + AND (julianday('now') - julianday(bj.started_at)) * 86400 > ? + """, (threshold_seconds,)) + stuck = await cur.fetchall() + + if not stuck: + return + + now = _now() + for row in stuck: + job_id, drive_id, devname, started_at = row[0], row[1], row[2], row[3] + log.critical( + "Stuck burn-in detected — marking unknown", + extra={"job_id": job_id, "devname": devname, "started_at": started_at}, + ) + await db.execute( + "UPDATE burnin_jobs SET state='unknown', finished_at=? WHERE id=?", + (now, job_id), + ) + await db.execute( + """INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message) + VALUES (?,?,?,?,?)""", + ("burnin_stuck", drive_id, job_id, "system", + f"Job stuck for >{settings.stuck_job_hours}h — automatically marked unknown"), + ) + + await db.commit() + + _push_update() + log.warning("Marked %d stuck job(s) as unknown", len(stuck)) diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..946d5e1 --- /dev/null +++ b/app/config.py @@ -0,0 +1,55 @@ +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + ) + + app_host: str = "0.0.0.0" + app_port: int = 8080 + db_path: str = "/data/app.db" + + truenas_base_url: str = "http://localhost:8000" + truenas_api_key: str = "mock-key" + truenas_verify_tls: bool = False + + poll_interval_seconds: int = 12 + stale_threshold_seconds: int = 45 + max_parallel_burnins: int = 2 + surface_validate_seconds: int = 45 # mock simulation duration + io_validate_seconds: int = 25 # mock simulation duration + + # Logging + log_level: str = "INFO" + + # Security — comma-separated IPs or CIDRs, e.g. "10.0.0.0/24,127.0.0.1" + # Empty string means allow all (default). + allowed_ips: str = "" + + # SMTP — daily status email at 8am local time + # Leave smtp_host empty to disable email. + smtp_host: str = "" + smtp_port: int = 587 + smtp_user: str = "" + smtp_password: str = "" + smtp_from: str = "" + smtp_to: str = "" # comma-separated recipients + smtp_report_hour: int = 8 # local hour to send (0-23) + smtp_daily_report_enabled: bool = True # set False to skip daily report without disabling alerts + smtp_alert_on_fail: bool = True # immediate email when a job fails + smtp_alert_on_pass: bool = False # immediate email when a job passes + smtp_ssl_mode: str = "starttls" # "starttls" | "ssl" | "plain" + smtp_timeout: int = 60 # connection + read timeout in seconds + + # Webhook — POST JSON payload on every job state change (pass/fail) + # Leave empty to disable. Works with Slack, Discord, ntfy, n8n, etc. + webhook_url: str = "" + + # Stuck-job detection: jobs running longer than this are marked 'unknown' + stuck_job_hours: int = 24 + + +settings = Settings() diff --git a/app/database.py b/app/database.py new file mode 100644 index 0000000..729f766 --- /dev/null +++ b/app/database.py @@ -0,0 +1,143 @@ +import aiosqlite +from pathlib import Path + +from app.config import settings + +SCHEMA = """ +CREATE TABLE IF NOT EXISTS drives ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + truenas_disk_id TEXT UNIQUE NOT NULL, + devname TEXT NOT NULL, + serial TEXT, + model TEXT, + size_bytes INTEGER, + temperature_c INTEGER, + smart_health TEXT DEFAULT 'UNKNOWN', + last_seen_at TEXT NOT NULL, + last_polled_at TEXT NOT NULL, + notes TEXT, + location TEXT +); + +CREATE TABLE IF NOT EXISTS smart_tests ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + drive_id INTEGER NOT NULL REFERENCES drives(id) ON DELETE CASCADE, + test_type TEXT NOT NULL CHECK(test_type IN ('short', 'long')), + state TEXT NOT NULL DEFAULT 'idle', + percent INTEGER DEFAULT 0, + truenas_job_id INTEGER, + started_at TEXT, + eta_at TEXT, + finished_at TEXT, + error_text TEXT, + UNIQUE(drive_id, test_type) +); + +CREATE TABLE IF NOT EXISTS burnin_jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + drive_id INTEGER NOT NULL REFERENCES drives(id), + profile TEXT NOT NULL, + state TEXT NOT NULL DEFAULT 'queued', + percent INTEGER DEFAULT 0, + stage_name TEXT, + operator TEXT NOT NULL, + created_at TEXT NOT NULL, + started_at TEXT, + finished_at TEXT, + error_text TEXT +); + +CREATE TABLE IF NOT EXISTS burnin_stages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + burnin_job_id INTEGER NOT NULL REFERENCES burnin_jobs(id) ON DELETE CASCADE, + stage_name TEXT NOT NULL, + state TEXT NOT NULL DEFAULT 'pending', + percent INTEGER DEFAULT 0, + started_at TEXT, + finished_at TEXT, + duration_seconds REAL, + error_text TEXT +); + +CREATE TABLE IF NOT EXISTS audit_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + event_type TEXT NOT NULL, + drive_id INTEGER REFERENCES drives(id), + burnin_job_id INTEGER REFERENCES burnin_jobs(id), + operator TEXT, + message TEXT NOT NULL, + created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')) +); + +CREATE INDEX IF NOT EXISTS idx_smart_drive_type ON smart_tests(drive_id, test_type); +CREATE INDEX IF NOT EXISTS idx_burnin_jobs_drive ON burnin_jobs(drive_id, state); +CREATE INDEX IF NOT EXISTS idx_burnin_stages_job ON burnin_stages(burnin_job_id); +CREATE INDEX IF NOT EXISTS idx_audit_events_job ON audit_events(burnin_job_id); +""" + + +# Migrations for existing databases that predate schema additions. +# Each entry is tried with try/except — SQLite raises OperationalError +# ("duplicate column name") if the column already exists, which is safe to ignore. +_MIGRATIONS = [ + "ALTER TABLE drives ADD COLUMN notes TEXT", + "ALTER TABLE drives ADD COLUMN location TEXT", +] + + +async def _run_migrations(db: aiosqlite.Connection) -> None: + for sql in _MIGRATIONS: + try: + await db.execute(sql) + except Exception: + pass # Column already exists — harmless + + # Remove the old CHECK(profile IN ('quick','full')) constraint if present. + # SQLite can't ALTER a CHECK — requires a full table rebuild. + cur = await db.execute( + "SELECT sql FROM sqlite_master WHERE type='table' AND name='burnin_jobs'" + ) + row = await cur.fetchone() + if row and "CHECK" in (row[0] or ""): + await db.executescript(""" + PRAGMA foreign_keys=OFF; + CREATE TABLE burnin_jobs_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + drive_id INTEGER NOT NULL REFERENCES drives(id), + profile TEXT NOT NULL, + state TEXT NOT NULL DEFAULT 'queued', + percent INTEGER DEFAULT 0, + stage_name TEXT, + operator TEXT NOT NULL, + created_at TEXT NOT NULL, + started_at TEXT, + finished_at TEXT, + error_text TEXT + ); + INSERT INTO burnin_jobs_new SELECT * FROM burnin_jobs; + DROP TABLE burnin_jobs; + ALTER TABLE burnin_jobs_new RENAME TO burnin_jobs; + CREATE INDEX IF NOT EXISTS idx_burnin_jobs_drive ON burnin_jobs(drive_id, state); + PRAGMA foreign_keys=ON; + """) + + +async def init_db() -> None: + Path(settings.db_path).parent.mkdir(parents=True, exist_ok=True) + async with aiosqlite.connect(settings.db_path) as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("PRAGMA foreign_keys=ON") + await db.executescript(SCHEMA) + await _run_migrations(db) + await db.commit() + + +async def get_db(): + db = await aiosqlite.connect(settings.db_path) + db.row_factory = aiosqlite.Row + try: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("PRAGMA foreign_keys=ON") + yield db + finally: + await db.close() diff --git a/app/logging_config.py b/app/logging_config.py new file mode 100644 index 0000000..458f145 --- /dev/null +++ b/app/logging_config.py @@ -0,0 +1,50 @@ +""" +Structured JSON logging configuration. + +Each log line is a single JSON object: + {"ts":"2026-02-21T21:34:36","level":"INFO","logger":"app.burnin","msg":"...","job_id":1} + +Extra context fields (job_id, drive_id, devname, stage) are included when +passed via the logging `extra=` kwarg. +""" + +import json +import logging +import traceback + +from app.config import settings + +# Standard LogRecord attributes to exclude from the "extra" dump +_STDLIB_ATTRS = frozenset(logging.LogRecord("", 0, "", 0, "", (), None).__dict__) + + +class _JsonFormatter(logging.Formatter): + def format(self, record: logging.LogRecord) -> str: + data: dict = { + "ts": self.formatTime(record, "%Y-%m-%dT%H:%M:%S"), + "level": record.levelname, + "logger": record.name, + "msg": record.getMessage(), + } + # Include any non-standard fields passed via extra={} + for key, val in record.__dict__.items(): + if key not in _STDLIB_ATTRS and not key.startswith("_"): + data[key] = val + if record.exc_info: + data["exc"] = "".join(traceback.format_exception(*record.exc_info)).strip() + return json.dumps(data) + + +def configure() -> None: + handler = logging.StreamHandler() + handler.setFormatter(_JsonFormatter()) + + level = getattr(logging, settings.log_level.upper(), logging.INFO) + root = logging.getLogger() + root.setLevel(level) + root.handlers = [handler] + + # Quiet chatty third-party loggers + logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("httpcore").setLevel(logging.WARNING) + logging.getLogger("uvicorn.access").setLevel(logging.WARNING) diff --git a/app/mailer.py b/app/mailer.py new file mode 100644 index 0000000..7800d60 --- /dev/null +++ b/app/mailer.py @@ -0,0 +1,453 @@ +""" +Daily status email — sent at smtp_report_hour (local time) every day. + +Disabled when SMTP_HOST is not set. +""" + +import asyncio +import logging +import smtplib +import ssl +from datetime import datetime, timedelta, timezone +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +import aiosqlite + +from app.config import settings + +log = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# HTML email template +# --------------------------------------------------------------------------- + +def _chip(state: str) -> str: + colours = { + "PASSED": ("#1a4731", "#3fb950", "#3fb950"), + "passed": ("#1a4731", "#3fb950", "#3fb950"), + "FAILED": ("#4b1113", "#f85149", "#f85149"), + "failed": ("#4b1113", "#f85149", "#f85149"), + "running": ("#0d2d6b", "#58a6ff", "#58a6ff"), + "queued": ("#4b3800", "#d29922", "#d29922"), + "cancelled": ("#222", "#8b949e", "#8b949e"), + "unknown": ("#222", "#8b949e", "#8b949e"), + "idle": ("#222", "#8b949e", "#8b949e"), + "UNKNOWN": ("#222", "#8b949e", "#8b949e"), + } + bg, fg, bd = colours.get(state, ("#222", "#8b949e", "#8b949e")) + label = state.upper() + return ( + f'{label}' + ) + + +def _temp_colour(c) -> str: + if c is None: + return "#8b949e" + if c < 40: + return "#3fb950" + if c < 50: + return "#d29922" + return "#f85149" + + +def _fmt_bytes(b) -> str: + if b is None: + return "—" + tb = b / 1_000_000_000_000 + if tb >= 1: + return f"{tb:.0f} TB" + return f"{b / 1_000_000_000:.0f} GB" + + +def _fmt_dt(iso: str | None) -> str: + if not iso: + return "—" + try: + dt = datetime.fromisoformat(iso) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt.astimezone().strftime("%Y-%m-%d %H:%M") + except Exception: + return iso or "—" + + +def _drive_rows_html(drives: list[dict]) -> str: + if not drives: + return 'No drives found' + + rows = [] + for d in drives: + health = d.get("smart_health") or "UNKNOWN" + temp = d.get("temperature_c") + bi = d.get("burnin") or {} + bi_state = bi.get("state", "—") if bi else "—" + + short = d.get("smart_short") or {} + long_ = d.get("smart_long") or {} + short_state = short.get("state", "idle") + long_state = long_.get("state", "idle") + + row_bg = "#1c0a0a" if health == "FAILED" else "#0d1117" + + rows.append(f""" + + {d.get('devname','—')} + {d.get('model','—')} + {d.get('serial','—')} + {_fmt_bytes(d.get('size_bytes'))} + {f'{temp}°C' if temp is not None else '—'} + {_chip(health)} + {_chip(short_state)} + {_chip(long_state)} + {_chip(bi_state) if bi else '—'} + """) + return "\n".join(rows) + + +def _build_html(drives: list[dict], generated_at: str) -> str: + total = len(drives) + failed_drives = [d for d in drives if d.get("smart_health") == "FAILED"] + running_burnin = [d for d in drives if (d.get("burnin") or {}).get("state") == "running"] + passed_burnin = [d for d in drives if (d.get("burnin") or {}).get("state") == "passed"] + + # Alert banner + alert_html = "" + if failed_drives: + names = ", ".join(d["devname"] for d in failed_drives) + alert_html = f""" +
+ ⚠ SMART health FAILED on {len(failed_drives)} drive(s): {names} +
""" + + drive_rows = _drive_rows_html(drives) + + return f""" + + + + + TrueNAS Burn-In — Daily Report + + + + +
+ + + + + + + + + + + + + + + + + + +
+ + + + + +
TrueNAS Burn-In + Daily Status Report{generated_at}
+
+ + {alert_html} + + + + + + + + + +
+
+
{total}
+
Drives
+
+
+
+
{len(failed_drives)}
+
Failed
+
+
+
+
{len(running_burnin)}
+
Running
+
+
+
+
{len(passed_burnin)}
+
Passed
+
+
+ + + + + + + + + + + + + + + + + + {drive_rows} + +
DriveModelSerialSizeTempHealthShortLongBurn-In
+ +
+ Generated by TrueNAS Burn-In Dashboard · {generated_at} +
+
+ +""" + + +# --------------------------------------------------------------------------- +# Send +# --------------------------------------------------------------------------- + +# Standard ports for each SSL mode — used when smtp_port is not overridden +_MODE_PORTS: dict[str, int] = {"starttls": 587, "ssl": 465, "plain": 25} + + +def _smtp_port() -> int: + """Derive port from ssl_mode; fall back to settings.smtp_port if explicitly set.""" + mode = (settings.smtp_ssl_mode or "starttls").lower() + return _MODE_PORTS.get(mode, 587) + + +def _send_email(subject: str, html: str) -> None: + recipients = [r.strip() for r in settings.smtp_to.split(",") if r.strip()] + if not recipients: + log.warning("SMTP_TO is empty — skipping send") + return + + msg = MIMEMultipart("alternative") + msg["Subject"] = subject + msg["From"] = settings.smtp_from or settings.smtp_user + msg["To"] = ", ".join(recipients) + msg.attach(MIMEText(html, "html", "utf-8")) + + ctx = ssl.create_default_context() + mode = (settings.smtp_ssl_mode or "starttls").lower() + timeout = int(settings.smtp_timeout or 60) + port = _smtp_port() + + if mode == "ssl": + server = smtplib.SMTP_SSL(settings.smtp_host, port, context=ctx, timeout=timeout) + server.ehlo() + server.login(settings.smtp_user, settings.smtp_password) + server.sendmail(msg["From"], recipients, msg.as_string()) + server.quit() + else: + with smtplib.SMTP(settings.smtp_host, port, timeout=timeout) as server: + server.ehlo() + if mode == "starttls": + server.starttls(context=ctx) + server.ehlo() + server.login(settings.smtp_user, settings.smtp_password) + server.sendmail(msg["From"], recipients, msg.as_string()) + + log.info("Email sent to %s", recipients) + + +# --------------------------------------------------------------------------- +# Data fetch +# --------------------------------------------------------------------------- + +async def _fetch_report_data() -> list[dict]: + """Pull drives + latest burnin state from DB.""" + from app.routes import _fetch_drives_for_template # local import avoids circular + async with aiosqlite.connect(settings.db_path) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + return await _fetch_drives_for_template(db) + + +# --------------------------------------------------------------------------- +# Scheduler +# --------------------------------------------------------------------------- + +def _build_alert_html( + job_id: int, + devname: str, + serial: str | None, + model: str | None, + state: str, + error_text: str | None, + generated_at: str, +) -> str: + is_fail = state == "failed" + color = "#f85149" if is_fail else "#3fb950" + bg = "#4b1113" if is_fail else "#1a4731" + icon = "✕" if is_fail else "✓" + + error_section = "" + if error_text: + error_section = f""" +
+ Error: {error_text} +
""" + + return f""" + +Burn-In {state.title()} Alert + + + +
+ + + + +
+
+ {icon} Burn-In {state.upper()} +
+ + + + + + + + + + + + + + + + + +
Device{devname}
Model{model or '—'}
Serial{serial or '—'}
Job #{job_id}
+ {error_section} +
{generated_at}
+
+
+ +""" + + +async def send_job_alert( + job_id: int, + devname: str, + serial: str | None, + model: str | None, + state: str, + error_text: str | None, +) -> None: + """Send an immediate per-job alert email (pass or fail).""" + icon = "✕" if state == "failed" else "✓" + subject = f"{icon} Burn-In {state.upper()}: {devname} ({serial or 'no serial'})" + now_str = datetime.now().strftime("%Y-%m-%d %H:%M") + html = _build_alert_html(job_id, devname, serial, model, state, error_text, now_str) + await asyncio.to_thread(_send_email, subject, html) + + +async def test_smtp_connection() -> dict: + """ + Try to establish an SMTP connection using current settings. + Returns {"ok": True/False, "error": str|None}. + Does NOT send any email. + """ + if not settings.smtp_host: + return {"ok": False, "error": "SMTP_HOST is not configured"} + + def _test() -> dict: + try: + ctx = ssl.create_default_context() + mode = (settings.smtp_ssl_mode or "starttls").lower() + timeout = int(settings.smtp_timeout or 60) + port = _smtp_port() + + if mode == "ssl": + server = smtplib.SMTP_SSL(settings.smtp_host, port, + context=ctx, timeout=timeout) + server.ehlo() + else: + server = smtplib.SMTP(settings.smtp_host, port, timeout=timeout) + server.ehlo() + if mode == "starttls": + server.starttls(context=ctx) + server.ehlo() + + if settings.smtp_user: + server.login(settings.smtp_user, settings.smtp_password) + server.quit() + return {"ok": True, "error": None} + except Exception as exc: + return {"ok": False, "error": str(exc)} + + return await asyncio.to_thread(_test) + + +async def send_report_now() -> None: + """Send a report immediately (used by on-demand API endpoint).""" + drives = await _fetch_report_data() + now_str = datetime.now().strftime("%Y-%m-%d %H:%M") + html = _build_html(drives, now_str) + subject = f"Burn-In Report — {datetime.now().strftime('%Y-%m-%d')} ({len(drives)} drives)" + await asyncio.to_thread(_send_email, subject, html) + + +async def run() -> None: + """Background loop: send daily report at smtp_report_hour local time.""" + if not settings.smtp_host: + log.info("SMTP not configured — daily email disabled") + return + + log.info( + "Mailer started — daily report at %02d:00 local time", + settings.smtp_report_hour, + ) + + while True: + now = datetime.now() + target = now.replace( + hour=settings.smtp_report_hour, + minute=0, second=0, microsecond=0, + ) + if target <= now: + target += timedelta(days=1) + + wait = (target - now).total_seconds() + log.info("Next report in %.0f seconds (%s)", wait, target.strftime("%Y-%m-%d %H:%M")) + await asyncio.sleep(wait) + + if settings.smtp_daily_report_enabled: + try: + await send_report_now() + except Exception as exc: + log.error("Failed to send daily report: %s", exc) + else: + log.info("Daily report skipped — smtp_daily_report_enabled is False") + + # Sleep briefly past the hour to avoid drift from re-triggering immediately + await asyncio.sleep(60) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..97d9f75 --- /dev/null +++ b/app/main.py @@ -0,0 +1,123 @@ +import asyncio +import ipaddress +import logging +from contextlib import asynccontextmanager + +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import PlainTextResponse + +from app import burnin, mailer, poller, settings_store +from app.config import settings +from app.database import init_db +from app.logging_config import configure as configure_logging +from app.renderer import templates # noqa: F401 — registers filters as side-effect +from app.routes import router +from app.truenas import TrueNASClient + +# Configure structured JSON logging before anything else logs +configure_logging() +log = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# IP allowlist middleware +# --------------------------------------------------------------------------- + +class _IPAllowlistMiddleware(BaseHTTPMiddleware): + """ + Block requests from IPs not in ALLOWED_IPS. + + When ALLOWED_IPS is empty the middleware is a no-op. + Checks X-Forwarded-For first (trusts the leftmost address), then the + direct client IP. + """ + + def __init__(self, app, allowed_ips: str) -> None: + super().__init__(app) + self._networks: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = [] + for entry in (s.strip() for s in allowed_ips.split(",") if s.strip()): + try: + self._networks.append(ipaddress.ip_network(entry, strict=False)) + except ValueError: + log.warning("Invalid ALLOWED_IPS entry ignored: %r", entry) + + def _is_allowed(self, ip_str: str) -> bool: + try: + addr = ipaddress.ip_address(ip_str) + return any(addr in net for net in self._networks) + except ValueError: + return False + + async def dispatch(self, request: Request, call_next): + if not self._networks: + return await call_next(request) + + # Prefer X-Forwarded-For (leftmost = original client) + forwarded = request.headers.get("X-Forwarded-For", "").split(",")[0].strip() + client_ip = forwarded or (request.client.host if request.client else "") + + if self._is_allowed(client_ip): + return await call_next(request) + + log.warning("Request blocked by IP allowlist", extra={"client_ip": client_ip}) + return PlainTextResponse("Forbidden", status_code=403) + + +# --------------------------------------------------------------------------- +# Poller supervisor — restarts run() if it ever exits unexpectedly +# --------------------------------------------------------------------------- + +async def _supervised_poller(client: TrueNASClient) -> None: + while True: + try: + await poller.run(client) + except asyncio.CancelledError: + raise # Propagate shutdown signal cleanly + except Exception as exc: + log.critical("Poller crashed unexpectedly — restarting in 5s: %s", exc) + await asyncio.sleep(5) + + +# --------------------------------------------------------------------------- +# Lifespan +# --------------------------------------------------------------------------- + +_client: TrueNASClient | None = None + + +@asynccontextmanager +async def lifespan(app: FastAPI): + global _client + log.info("Starting up") + await init_db() + settings_store.init() + _client = TrueNASClient() + await burnin.init(_client) + poll_task = asyncio.create_task(_supervised_poller(_client)) + mailer_task = asyncio.create_task(mailer.run()) + yield + log.info("Shutting down") + poll_task.cancel() + mailer_task.cancel() + try: + await asyncio.gather(poll_task, mailer_task, return_exceptions=True) + except asyncio.CancelledError: + pass + await _client.close() + + +# --------------------------------------------------------------------------- +# App +# --------------------------------------------------------------------------- + +app = FastAPI(title="TrueNAS Burn-In Dashboard", lifespan=lifespan) + +if settings.allowed_ips: + app.add_middleware(_IPAllowlistMiddleware, allowed_ips=settings.allowed_ips) + log.info("IP allowlist active: %s", settings.allowed_ips) + +app.mount("/static", StaticFiles(directory="app/static"), name="static") +app.include_router(router) diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000..1748c7b --- /dev/null +++ b/app/models.py @@ -0,0 +1,104 @@ +from pydantic import BaseModel, field_validator, model_validator + + +class SmartTestState(BaseModel): + state: str = "idle" + percent: int | None = None + eta_seconds: int | None = None + eta_timestamp: str | None = None + started_at: str | None = None + finished_at: str | None = None + error_text: str | None = None + + +_VALID_STAGE_NAMES = frozenset({"surface_validate", "short_smart", "long_smart"}) + + +class StartBurninRequest(BaseModel): + drive_ids: list[int] + operator: str + run_surface: bool = True + run_short: bool = True + run_long: bool = True + stage_order: list[str] | None = None # custom execution order, e.g. ["short_smart","long_smart","surface_validate"] + + @field_validator("operator") + @classmethod + def validate_operator(cls, v: str) -> str: + v = v.strip() + if not v: + raise ValueError("operator must not be empty") + return v + + @model_validator(mode="after") + def validate_stages(self) -> "StartBurninRequest": + if not (self.run_surface or self.run_short or self.run_long): + raise ValueError("At least one stage must be selected") + if self.stage_order is not None: + for s in self.stage_order: + if s not in _VALID_STAGE_NAMES: + raise ValueError(f"Invalid stage name in stage_order: {s!r}") + return self + + @property + def profile(self) -> str: + _MAP = { + (True, True, True): "full", + (True, True, False): "surface_short", + (True, False, True): "surface_long", + (True, False, False): "surface", + (False, True, True): "short_long", + (False, True, False): "short", + (False, False, True): "long", + } + return _MAP[(self.run_surface, self.run_short, self.run_long)] + + +class CancelBurninRequest(BaseModel): + operator: str = "unknown" + + +class BurninStageResponse(BaseModel): + id: int + stage_name: str + state: str + percent: int = 0 + started_at: str | None = None + finished_at: str | None = None + error_text: str | None = None + + +class BurninJobResponse(BaseModel): + id: int + drive_id: int + profile: str + state: str + percent: int = 0 + stage_name: str | None = None + operator: str + created_at: str + started_at: str | None = None + finished_at: str | None = None + error_text: str | None = None + stages: list[BurninStageResponse] = [] + + +class DriveResponse(BaseModel): + id: int + devname: str + serial: str | None = None + model: str | None = None + size_bytes: int | None = None + temperature_c: int | None = None + smart_health: str = "UNKNOWN" + last_polled_at: str + is_stale: bool + smart_short: SmartTestState + smart_long: SmartTestState + notes: str | None = None + location: str | None = None + + +class UpdateDriveRequest(BaseModel): + notes: str | None = None + location: str | None = None diff --git a/app/notifier.py b/app/notifier.py new file mode 100644 index 0000000..d830da2 --- /dev/null +++ b/app/notifier.py @@ -0,0 +1,80 @@ +""" +Notification dispatcher — webhooks and immediate email alerts. + +Called from burnin.py when a job reaches a terminal state (passed/failed). +Webhook fires unconditionally when WEBHOOK_URL is set. +Email alerts fire based on smtp_alert_on_fail / smtp_alert_on_pass settings. +""" + +import asyncio +import logging + +from app.config import settings + +log = logging.getLogger(__name__) + + +async def notify_job_complete( + job_id: int, + devname: str, + serial: str | None, + model: str | None, + state: str, + profile: str, + operator: str, + error_text: str | None, +) -> None: + """Fire all configured notifications for a completed burn-in job.""" + tasks = [] + + if settings.webhook_url: + tasks.append(_send_webhook({ + "event": f"burnin_{state}", + "job_id": job_id, + "devname": devname, + "serial": serial, + "model": model, + "state": state, + "profile": profile, + "operator": operator, + "error_text": error_text, + })) + + if settings.smtp_host: + should_alert = ( + (state == "failed" and settings.smtp_alert_on_fail) or + (state == "passed" and settings.smtp_alert_on_pass) + ) + if should_alert: + tasks.append(_send_alert_email(job_id, devname, serial, model, state, error_text)) + + if not tasks: + return + + results = await asyncio.gather(*tasks, return_exceptions=True) + for r in results: + if isinstance(r, Exception): + log.error("Notification failed: %s", r, extra={"job_id": job_id, "devname": devname}) + + +async def _send_webhook(payload: dict) -> None: + import httpx + async with httpx.AsyncClient(timeout=10.0) as client: + r = await client.post(settings.webhook_url, json=payload) + r.raise_for_status() + log.info( + "Webhook sent", + extra={"event": payload.get("event"), "job_id": payload.get("job_id"), "url": settings.webhook_url}, + ) + + +async def _send_alert_email( + job_id: int, + devname: str, + serial: str | None, + model: str | None, + state: str, + error_text: str | None, +) -> None: + from app import mailer + await mailer.send_job_alert(job_id, devname, serial, model, state, error_text) diff --git a/app/poller.py b/app/poller.py new file mode 100644 index 0000000..67f625c --- /dev/null +++ b/app/poller.py @@ -0,0 +1,290 @@ +""" +Polling loop — fetches TrueNAS state every POLL_INTERVAL_SECONDS and +normalizes it into SQLite. + +Design notes: + - Opens its own DB connection per cycle (WAL allows concurrent readers). + - Skips a cycle if TrueNAS is unreachable; marks poller unhealthy. + - Never overwrites a 'running' state with stale history. +""" + +import asyncio +import logging +from datetime import datetime, timezone, timedelta +from typing import Any + +import aiosqlite + +from app.config import settings +from app.truenas import TrueNASClient + +log = logging.getLogger(__name__) + +# Shared state read by the /health endpoint +_state: dict[str, Any] = { + "last_poll_at": None, + "last_error": None, + "healthy": False, + "drives_seen": 0, + "consecutive_failures": 0, +} + +# SSE subscriber queues — notified after each successful poll +_subscribers: list[asyncio.Queue] = [] + + +def get_state() -> dict: + return _state.copy() + + +def subscribe() -> asyncio.Queue: + q: asyncio.Queue = asyncio.Queue(maxsize=1) + _subscribers.append(q) + return q + + +def unsubscribe(q: asyncio.Queue) -> None: + try: + _subscribers.remove(q) + except ValueError: + pass + + +def _notify_subscribers(alert: dict | None = None) -> None: + payload = {"alert": alert} + for q in list(_subscribers): + try: + q.put_nowait(payload) + except asyncio.QueueFull: + pass # Client is behind; skip this update + + +def _now() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _eta_from_progress(percent: int, started_iso: str | None) -> str | None: + """Linear ETA extrapolation from elapsed time and percent complete.""" + if not started_iso or percent <= 0: + return None + try: + start = datetime.fromisoformat(started_iso) + if start.tzinfo is None: + start = start.replace(tzinfo=timezone.utc) + elapsed = (datetime.now(timezone.utc) - start).total_seconds() + total_est = elapsed / (percent / 100) + remaining = max(0.0, total_est - elapsed) + return (datetime.now(timezone.utc) + timedelta(seconds=remaining)).isoformat() + except Exception: + return None + + +def _map_history_state(status: str) -> str: + return "passed" if "without error" in status.lower() else "failed" + + +# --------------------------------------------------------------------------- +# DB helpers +# --------------------------------------------------------------------------- + +async def _upsert_drive(db: aiosqlite.Connection, disk: dict, now: str) -> int: + await db.execute( + """ + INSERT INTO drives + (truenas_disk_id, devname, serial, model, size_bytes, + temperature_c, smart_health, last_seen_at, last_polled_at) + VALUES (?,?,?,?,?,?,?,?,?) + ON CONFLICT(truenas_disk_id) DO UPDATE SET + temperature_c = excluded.temperature_c, + smart_health = excluded.smart_health, + last_seen_at = excluded.last_seen_at, + last_polled_at = excluded.last_polled_at + """, + ( + disk["identifier"], + disk["devname"], + disk.get("serial"), + disk.get("model"), + disk.get("size"), + disk.get("temperature"), + disk.get("smart_health", "UNKNOWN"), + now, + now, + ), + ) + cur = await db.execute( + "SELECT id FROM drives WHERE truenas_disk_id = ?", (disk["identifier"],) + ) + row = await cur.fetchone() + return row["id"] + + +async def _upsert_test(db: aiosqlite.Connection, drive_id: int, ttype: str, data: dict) -> None: + await db.execute( + """ + INSERT INTO smart_tests + (drive_id, test_type, state, percent, truenas_job_id, + started_at, eta_at, finished_at, error_text) + VALUES (?,?,?,?,?,?,?,?,?) + ON CONFLICT(drive_id, test_type) DO UPDATE SET + state = excluded.state, + percent = excluded.percent, + truenas_job_id = excluded.truenas_job_id, + started_at = COALESCE(excluded.started_at, smart_tests.started_at), + eta_at = excluded.eta_at, + finished_at = excluded.finished_at, + error_text = excluded.error_text + """, + ( + drive_id, + ttype, + data["state"], + data.get("percent", 0), + data.get("truenas_job_id"), + data.get("started_at"), + data.get("eta_at"), + data.get("finished_at"), + data.get("error_text"), + ), + ) + + +async def _apply_running_job( + db: aiosqlite.Connection, drive_id: int, ttype: str, job: dict +) -> None: + pct = job["progress"]["percent"] + await _upsert_test(db, drive_id, ttype, { + "state": "running", + "percent": pct, + "truenas_job_id": job["id"], + "started_at": job.get("time_started"), + "eta_at": _eta_from_progress(pct, job.get("time_started")), + "finished_at": None, + "error_text": None, + }) + + +async def _sync_history( + db: aiosqlite.Connection, + client: TrueNASClient, + drive_id: int, + devname: str, + ttype: str, +) -> None: + """Pull most recent completed test from history. + + This is only called when the drive+type is NOT in the active running-jobs + dict, so it's safe to overwrite any previous 'running' state — the job + has finished (or was never started). + """ + try: + results = await client.get_smart_results(devname) + except Exception: + return # History fetch failure is non-fatal + + if not results: + return + + for test in results[0].get("tests", []): + t_name = test.get("type", "").lower() + is_short = "short" in t_name + if (ttype == "short") != is_short: + continue # Wrong test type + + state = _map_history_state(test.get("status", "")) + await _upsert_test(db, drive_id, ttype, { + "state": state, + "percent": 100 if state == "passed" else 0, + "truenas_job_id": None, + "started_at": None, + "eta_at": None, + "finished_at": None, + "error_text": test.get("status_verbose") if state == "failed" else None, + }) + break # Most recent only + + +# --------------------------------------------------------------------------- +# Poll cycle +# --------------------------------------------------------------------------- + +async def poll_cycle(client: TrueNASClient) -> int: + """Run one full poll. Returns number of drives seen.""" + now = _now() + + disks = await client.get_disks() + running_jobs = await client.get_smart_jobs(state="RUNNING") + + # Index running jobs by (devname, test_type) + active: dict[tuple[str, str], dict] = {} + for job in running_jobs: + try: + args = job["arguments"][0] + devname = args["disks"][0] + ttype = args["type"].lower() + active[(devname, ttype)] = job + except (KeyError, IndexError, TypeError): + pass + + async with aiosqlite.connect(settings.db_path) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("PRAGMA foreign_keys=ON") + + for disk in disks: + devname = disk["devname"] + drive_id = await _upsert_drive(db, disk, now) + + for ttype in ("short", "long"): + if (devname, ttype) in active: + await _apply_running_job(db, drive_id, ttype, active[(devname, ttype)]) + else: + await _sync_history(db, client, drive_id, devname, ttype) + + await db.commit() + + return len(disks) + + +# --------------------------------------------------------------------------- +# Background loop +# --------------------------------------------------------------------------- + +async def run(client: TrueNASClient) -> None: + log.info("Poller started", extra={"poll_interval": settings.poll_interval_seconds}) + cycle = 0 + while True: + try: + count = await poll_cycle(client) + cycle += 1 + _state["last_poll_at"] = _now() + _state["last_error"] = None + _state["healthy"] = True + _state["drives_seen"] = count + _state["consecutive_failures"] = 0 + log.debug("Poll OK", extra={"drives": count}) + _notify_subscribers() + + # Check for stuck jobs every 5 cycles (~1 min at default 12s interval) + if cycle % 5 == 0: + try: + from app import burnin as _burnin + await _burnin.check_stuck_jobs() + except Exception as exc: + log.error("Stuck-job check failed: %s", exc) + + except Exception as exc: + failures = _state["consecutive_failures"] + 1 + _state["consecutive_failures"] = failures + _state["last_error"] = str(exc) + _state["healthy"] = False + if failures >= 5: + log.critical( + "Poller has failed %d consecutive times: %s", + failures, exc, + extra={"consecutive_failures": failures}, + ) + else: + log.error("Poll failed: %s", exc, extra={"consecutive_failures": failures}) + + await asyncio.sleep(settings.poll_interval_seconds) diff --git a/app/renderer.py b/app/renderer.py new file mode 100644 index 0000000..324ff6d --- /dev/null +++ b/app/renderer.py @@ -0,0 +1,136 @@ +""" +Jinja2 template engine + filter/helper registration. +Import `templates` from here — do not create additional Jinja2 instances. +""" + +from datetime import datetime, timezone + +from fastapi.templating import Jinja2Templates + +templates = Jinja2Templates(directory="app/templates") + + +# --------------------------------------------------------------------------- +# Template filters +# --------------------------------------------------------------------------- + +def _format_bytes(value: int | None) -> str: + if value is None: + return "—" + tb = value / 1_000_000_000_000 + if tb >= 1: + return f"{tb:.0f} TB" + gb = value / 1_000_000_000 + return f"{gb:.0f} GB" + + +def _format_eta(seconds: int | None) -> str: + if not seconds or seconds <= 0: + return "" + h = seconds // 3600 + m = (seconds % 3600) // 60 + if h > 0: + return f"~{h}h {m}m" if m else f"~{h}h" + return f"~{m}m" if m else "<1m" + + +def _temp_class(celsius: int | None) -> str: + if celsius is None: + return "" + if celsius < 40: + return "temp-cool" + if celsius < 50: + return "temp-warm" + return "temp-hot" + + +def _format_dt(iso: str | None) -> str: + if not iso: + return "—" + try: + dt = datetime.fromisoformat(iso) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + local = dt.astimezone() + return local.strftime("%H:%M:%S") + except Exception: + return iso + + +def _format_dt_full(iso: str | None) -> str: + """Date + time for history tables.""" + if not iso: + return "—" + try: + dt = datetime.fromisoformat(iso) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + local = dt.astimezone() + return local.strftime("%Y-%m-%d %H:%M:%S") + except Exception: + return iso + + +def _format_duration(seconds: float | int | None) -> str: + if seconds is None or seconds < 0: + return "—" + seconds = int(seconds) + h = seconds // 3600 + m = (seconds % 3600) // 60 + s = seconds % 60 + if h > 0: + return f"{h}h {m}m {s}s" + if m > 0: + return f"{m}m {s}s" + return f"{s}s" + + +# --------------------------------------------------------------------------- +# Template globals +# --------------------------------------------------------------------------- + +def _drive_status(drive: dict) -> str: + short = (drive.get("smart_short") or {}).get("state", "idle") + long_ = (drive.get("smart_long") or {}).get("state", "idle") + health = drive.get("smart_health", "UNKNOWN") + if "running" in (short, long_): + return "running" + if short == "failed" or long_ == "failed" or health == "FAILED": + return "failed" + if "passed" in (short, long_): + return "passed" + return "idle" + + +def _format_elapsed(iso: str | None) -> str: + """Human-readable elapsed time since an ISO timestamp (e.g. '2h 34m').""" + if not iso: + return "" + try: + start = datetime.fromisoformat(iso) + if start.tzinfo is None: + start = start.replace(tzinfo=timezone.utc) + elapsed = int((datetime.now(timezone.utc) - start).total_seconds()) + if elapsed < 0: + return "" + h = elapsed // 3600 + m = (elapsed % 3600) // 60 + s = elapsed % 60 + if h > 0: + return f"{h}h {m}m" + if m > 0: + return f"{m}m {s}s" + return f"{s}s" + except Exception: + return "" + + +# Register +templates.env.filters["format_bytes"] = _format_bytes +templates.env.filters["format_eta"] = _format_eta +templates.env.filters["temp_class"] = _temp_class +templates.env.filters["format_dt"] = _format_dt +templates.env.filters["format_dt_full"] = _format_dt_full +templates.env.filters["format_duration"] = _format_duration +templates.env.filters["format_elapsed"] = _format_elapsed +templates.env.globals["drive_status"] = _drive_status diff --git a/app/routes.py b/app/routes.py new file mode 100644 index 0000000..61cc559 --- /dev/null +++ b/app/routes.py @@ -0,0 +1,862 @@ +import asyncio +import csv +import io +import json +from datetime import datetime, timezone + +import aiosqlite +from fastapi import APIRouter, Depends, HTTPException, Query, Request +from fastapi.responses import HTMLResponse, StreamingResponse +from sse_starlette.sse import EventSourceResponse + +from app import burnin, mailer, poller, settings_store +from app.config import settings +from app.database import get_db +from app.models import ( + BurninJobResponse, BurninStageResponse, + CancelBurninRequest, DriveResponse, + SmartTestState, StartBurninRequest, UpdateDriveRequest, +) +from app.renderer import templates + +router = APIRouter() + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +def _eta_seconds(eta_at: str | None) -> int | None: + if not eta_at: + return None + try: + eta_ts = datetime.fromisoformat(eta_at) + if eta_ts.tzinfo is None: + eta_ts = eta_ts.replace(tzinfo=timezone.utc) + remaining = (eta_ts - datetime.now(timezone.utc)).total_seconds() + return max(0, int(remaining)) + except Exception: + return None + + +def _is_stale(last_polled_at: str) -> bool: + try: + last = datetime.fromisoformat(last_polled_at) + if last.tzinfo is None: + last = last.replace(tzinfo=timezone.utc) + return (datetime.now(timezone.utc) - last).total_seconds() > settings.stale_threshold_seconds + except Exception: + return True + + +def _build_smart(row: aiosqlite.Row, prefix: str) -> SmartTestState: + eta_at = row[f"{prefix}_eta_at"] + return SmartTestState( + state=row[f"{prefix}_state"] or "idle", + percent=row[f"{prefix}_percent"], + eta_seconds=_eta_seconds(eta_at), + eta_timestamp=eta_at, + started_at=row[f"{prefix}_started_at"], + finished_at=row[f"{prefix}_finished_at"], + error_text=row[f"{prefix}_error"], + ) + + +def _row_to_drive(row: aiosqlite.Row) -> DriveResponse: + return DriveResponse( + id=row["id"], + devname=row["devname"], + serial=row["serial"], + model=row["model"], + size_bytes=row["size_bytes"], + temperature_c=row["temperature_c"], + smart_health=row["smart_health"] or "UNKNOWN", + last_polled_at=row["last_polled_at"], + is_stale=_is_stale(row["last_polled_at"]), + smart_short=_build_smart(row, "short"), + smart_long=_build_smart(row, "long"), + notes=row["notes"], + location=row["location"], + ) + + +def _compute_status(drive: dict) -> str: + short = (drive.get("smart_short") or {}).get("state", "idle") + long_ = (drive.get("smart_long") or {}).get("state", "idle") + health = drive.get("smart_health", "UNKNOWN") + if "running" in (short, long_): + return "running" + if short == "failed" or long_ == "failed" or health == "FAILED": + return "failed" + if "passed" in (short, long_): + return "passed" + return "idle" + + +_DRIVES_QUERY = """ + SELECT + d.id, d.devname, d.serial, d.model, d.size_bytes, + d.temperature_c, d.smart_health, d.last_polled_at, + d.notes, d.location, + s.state AS short_state, + s.percent AS short_percent, + s.started_at AS short_started_at, + s.eta_at AS short_eta_at, + s.finished_at AS short_finished_at, + s.error_text AS short_error, + l.state AS long_state, + l.percent AS long_percent, + l.started_at AS long_started_at, + l.eta_at AS long_eta_at, + l.finished_at AS long_finished_at, + l.error_text AS long_error + FROM drives d + LEFT JOIN smart_tests s ON s.drive_id = d.id AND s.test_type = 'short' + LEFT JOIN smart_tests l ON l.drive_id = d.id AND l.test_type = 'long' + {where} + ORDER BY d.devname +""" + + +async def _fetch_burnin_by_drive(db: aiosqlite.Connection) -> dict[int, dict]: + """Return latest burn-in job (any state) keyed by drive_id.""" + cur = await db.execute(""" + SELECT bj.* + FROM burnin_jobs bj + WHERE bj.id IN (SELECT MAX(id) FROM burnin_jobs GROUP BY drive_id) + """) + rows = await cur.fetchall() + return {r["drive_id"]: dict(r) for r in rows} + + +async def _fetch_drives_for_template(db: aiosqlite.Connection) -> list[dict]: + cur = await db.execute(_DRIVES_QUERY.format(where="")) + rows = await cur.fetchall() + burnin_by_drive = await _fetch_burnin_by_drive(db) + drives = [] + for row in rows: + d = _row_to_drive(row).model_dump() + d["status"] = _compute_status(d) + d["burnin"] = burnin_by_drive.get(d["id"]) + drives.append(d) + return drives + + +def _stale_context(poller_state: dict) -> dict: + last = poller_state.get("last_poll_at") + if not last: + return {"stale": False, "stale_seconds": 0} + try: + dt = datetime.fromisoformat(last) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + elapsed = int((datetime.now(timezone.utc) - dt).total_seconds()) + stale = elapsed > settings.stale_threshold_seconds + return {"stale": stale, "stale_seconds": elapsed} + except Exception: + return {"stale": False, "stale_seconds": 0} + + +# --------------------------------------------------------------------------- +# Dashboard +# --------------------------------------------------------------------------- + +@router.get("/", response_class=HTMLResponse) +async def dashboard(request: Request, db: aiosqlite.Connection = Depends(get_db)): + drives = await _fetch_drives_for_template(db) + ps = poller.get_state() + return templates.TemplateResponse("dashboard.html", { + "request": request, + "drives": drives, + "poller": ps, + **_stale_context(ps), + }) + + +# --------------------------------------------------------------------------- +# SSE — live drive table updates +# --------------------------------------------------------------------------- + +@router.get("/sse/drives") +async def sse_drives(request: Request): + q = poller.subscribe() + + async def generate(): + try: + while True: + # Wait for next poll notification or keepalive timeout + try: + payload = await asyncio.wait_for(q.get(), timeout=25.0) + except asyncio.TimeoutError: + if await request.is_disconnected(): + break + yield {"event": "keepalive", "data": ""} + continue + + if await request.is_disconnected(): + break + + # Extract alert from payload (may be None for regular polls) + alert = None + if isinstance(payload, dict): + alert = payload.get("alert") + + # Render fresh table HTML + async with aiosqlite.connect(settings.db_path) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + drives = await _fetch_drives_for_template(db) + + html = templates.env.get_template( + "components/drives_table.html" + ).render(drives=drives) + + yield {"event": "drives-update", "data": html} + + # Push browser notification event if this was a job completion + if alert: + yield {"event": "job-alert", "data": json.dumps(alert)} + + finally: + poller.unsubscribe(q) + + return EventSourceResponse(generate()) + + +# --------------------------------------------------------------------------- +# JSON API +# --------------------------------------------------------------------------- + +@router.get("/health") +async def health(db: aiosqlite.Connection = Depends(get_db)): + ps = poller.get_state() + cur = await db.execute("SELECT COUNT(*) FROM drives") + row = await cur.fetchone() + drives_tracked = row[0] if row else 0 + return { + "status": "ok" if ps["healthy"] else "degraded", + "last_poll_at": ps["last_poll_at"], + "last_error": ps["last_error"], + "consecutive_failures": ps.get("consecutive_failures", 0), + "poll_interval_seconds": settings.poll_interval_seconds, + "drives_tracked": drives_tracked, + } + + +@router.get("/api/v1/drives", response_model=list[DriveResponse]) +async def list_drives(db: aiosqlite.Connection = Depends(get_db)): + cur = await db.execute(_DRIVES_QUERY.format(where="")) + rows = await cur.fetchall() + return [_row_to_drive(r) for r in rows] + + +@router.get("/api/v1/drives/{drive_id}", response_model=DriveResponse) +async def get_drive(drive_id: int, db: aiosqlite.Connection = Depends(get_db)): + cur = await db.execute( + _DRIVES_QUERY.format(where="WHERE d.id = ?"), (drive_id,) + ) + row = await cur.fetchone() + if not row: + raise HTTPException(status_code=404, detail="Drive not found") + return _row_to_drive(row) + + +@router.post("/api/v1/drives/{drive_id}/smart/start") +async def smart_start( + drive_id: int, + body: dict, + db: aiosqlite.Connection = Depends(get_db), +): + """Start a standalone SHORT or LONG SMART test on a single drive.""" + from app.truenas import TrueNASClient + from app import burnin as _burnin + + test_type = (body.get("type") or "").upper() + if test_type not in ("SHORT", "LONG"): + raise HTTPException(status_code=422, detail="type must be SHORT or LONG") + + cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,)) + row = await cur.fetchone() + if not row: + raise HTTPException(status_code=404, detail="Drive not found") + devname = row[0] + + # Use the shared TrueNAS client held by the burnin module + client = _burnin._client + if client is None: + raise HTTPException(status_code=503, detail="TrueNAS client not ready") + + try: + tn_job_id = await client.start_smart_test([devname], test_type) + except Exception as exc: + raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}") + + return {"job_id": tn_job_id, "devname": devname, "type": test_type} + + +@router.post("/api/v1/drives/{drive_id}/smart/cancel") +async def smart_cancel( + drive_id: int, + body: dict, + db: aiosqlite.Connection = Depends(get_db), +): + """Cancel a running standalone SMART test on a drive.""" + from app import burnin as _burnin + + test_type = (body.get("type") or "").lower() + if test_type not in ("short", "long"): + raise HTTPException(status_code=422, detail="type must be 'short' or 'long'") + + cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,)) + row = await cur.fetchone() + if not row: + raise HTTPException(status_code=404, detail="Drive not found") + devname = row[0] + + client = _burnin._client + if client is None: + raise HTTPException(status_code=503, detail="TrueNAS client not ready") + + # Find the running TrueNAS job for this drive/test-type + try: + jobs = await client.get_smart_jobs() + tn_job_id = None + for j in jobs: + if j.get("state") != "RUNNING": + continue + args = j.get("arguments", []) + if not args or not isinstance(args[0], dict): + continue + if devname in args[0].get("disks", []): + tn_job_id = j["id"] + break + + if tn_job_id is None: + raise HTTPException(status_code=404, detail="No running SMART test found for this drive") + + await client.abort_job(tn_job_id) + except HTTPException: + raise + except Exception as exc: + raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}") + + # Update local DB state + now = datetime.now(timezone.utc).isoformat() + await db.execute( + "UPDATE smart_tests SET state='aborted', finished_at=? WHERE drive_id=? AND test_type=? AND state='running'", + (now, drive_id, test_type), + ) + await db.commit() + + return {"cancelled": True, "devname": devname, "type": test_type} + + +# --------------------------------------------------------------------------- +# Burn-in API +# --------------------------------------------------------------------------- + +def _row_to_burnin(row: aiosqlite.Row, stages: list[aiosqlite.Row]) -> BurninJobResponse: + return BurninJobResponse( + id=row["id"], + drive_id=row["drive_id"], + profile=row["profile"], + state=row["state"], + percent=row["percent"] or 0, + stage_name=row["stage_name"], + operator=row["operator"], + created_at=row["created_at"], + started_at=row["started_at"], + finished_at=row["finished_at"], + error_text=row["error_text"], + stages=[ + BurninStageResponse( + id=s["id"], + stage_name=s["stage_name"], + state=s["state"], + percent=s["percent"] or 0, + started_at=s["started_at"], + finished_at=s["finished_at"], + error_text=s["error_text"], + ) + for s in stages + ], + ) + + +@router.post("/api/v1/burnin/start") +async def burnin_start(req: StartBurninRequest): + results = [] + errors = [] + for drive_id in req.drive_ids: + try: + job_id = await burnin.start_job( + drive_id, req.profile, req.operator, stage_order=req.stage_order + ) + results.append({"drive_id": drive_id, "job_id": job_id}) + except ValueError as exc: + errors.append({"drive_id": drive_id, "error": str(exc)}) + if errors and not results: + raise HTTPException(status_code=409, detail=errors[0]["error"]) + return {"queued": results, "errors": errors} + + +@router.post("/api/v1/burnin/{job_id}/cancel") +async def burnin_cancel(job_id: int, req: CancelBurninRequest): + ok = await burnin.cancel_job(job_id, req.operator) + if not ok: + raise HTTPException(status_code=409, detail="Job not found or not cancellable") + return {"cancelled": True} + + +# --------------------------------------------------------------------------- +# History pages +# --------------------------------------------------------------------------- + +_PAGE_SIZE = 50 + +_ALL_STATES = ("queued", "running", "passed", "failed", "cancelled", "unknown") + +_HISTORY_QUERY = """ + SELECT + bj.id, bj.drive_id, bj.profile, bj.state, bj.operator, + bj.created_at, bj.started_at, bj.finished_at, bj.error_text, + d.devname, d.serial, d.model, d.size_bytes, + CAST( + (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400 + AS INTEGER + ) AS duration_seconds + FROM burnin_jobs bj + JOIN drives d ON d.id = bj.drive_id + {where} + ORDER BY bj.id DESC +""" + + +def _state_where(state: str) -> tuple[str, list]: + if state == "all": + return "", [] + return "WHERE bj.state = ?", [state] + + +@router.get("/history", response_class=HTMLResponse) +async def history_list( + request: Request, + state: str = Query(default="all"), + page: int = Query(default=1, ge=1), + db: aiosqlite.Connection = Depends(get_db), +): + if state not in ("all",) + _ALL_STATES: + state = "all" + + where_clause, params = _state_where(state) + + # Total count + count_sql = f"SELECT COUNT(*) FROM burnin_jobs bj JOIN drives d ON d.id = bj.drive_id {where_clause}" + cur = await db.execute(count_sql, params) + total_count = (await cur.fetchone())[0] + total_pages = max(1, (total_count + _PAGE_SIZE - 1) // _PAGE_SIZE) + page = min(page, total_pages) + offset = (page - 1) * _PAGE_SIZE + + # Per-state counts for badges + cur = await db.execute( + "SELECT state, COUNT(*) FROM burnin_jobs GROUP BY state" + ) + counts = {"all": total_count if state == "all" else 0} + for r in await cur.fetchall(): + counts[r[0]] = r[1] + if state != "all": + cur2 = await db.execute("SELECT COUNT(*) FROM burnin_jobs") + counts["all"] = (await cur2.fetchone())[0] + + # Job rows + sql = _HISTORY_QUERY.format(where=where_clause) + " LIMIT ? OFFSET ?" + cur = await db.execute(sql, params + [_PAGE_SIZE, offset]) + rows = await cur.fetchall() + jobs = [dict(r) for r in rows] + + ps = poller.get_state() + return templates.TemplateResponse("history.html", { + "request": request, + "jobs": jobs, + "active_state": state, + "counts": counts, + "page": page, + "total_pages": total_pages, + "total_count": total_count, + "poller": ps, + **_stale_context(ps), + }) + + +@router.get("/history/{job_id}", response_class=HTMLResponse) +async def history_detail( + request: Request, + job_id: int, + db: aiosqlite.Connection = Depends(get_db), +): + # Job + drive info + cur = await db.execute(""" + SELECT + bj.*, d.devname, d.serial, d.model, d.size_bytes, + CAST( + (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400 + AS INTEGER + ) AS duration_seconds + FROM burnin_jobs bj + JOIN drives d ON d.id = bj.drive_id + WHERE bj.id = ? + """, (job_id,)) + row = await cur.fetchone() + if not row: + raise HTTPException(status_code=404, detail="Burn-in job not found") + job = dict(row) + + # Stages (with duration) + cur = await db.execute(""" + SELECT *, + CAST( + (julianday(finished_at) - julianday(started_at)) * 86400 + AS INTEGER + ) AS duration_seconds + FROM burnin_stages + WHERE burnin_job_id = ? + ORDER BY id + """, (job_id,)) + job["stages"] = [dict(r) for r in await cur.fetchall()] + + ps = poller.get_state() + return templates.TemplateResponse("job_detail.html", { + "request": request, + "job": job, + "poller": ps, + **_stale_context(ps), + }) + + +# --------------------------------------------------------------------------- +# CSV export +# --------------------------------------------------------------------------- + +@router.get("/api/v1/burnin/export.csv") +async def burnin_export_csv(db: aiosqlite.Connection = Depends(get_db)): + cur = await db.execute(""" + SELECT + bj.id AS job_id, + bj.drive_id, + d.devname, + d.serial, + d.model, + bj.profile, + bj.state, + bj.operator, + bj.created_at, + bj.started_at, + bj.finished_at, + CAST( + (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400 + AS INTEGER + ) AS duration_seconds, + bj.error_text + FROM burnin_jobs bj + JOIN drives d ON d.id = bj.drive_id + ORDER BY bj.id DESC + """) + rows = await cur.fetchall() + + buf = io.StringIO() + writer = csv.writer(buf) + writer.writerow([ + "job_id", "drive_id", "devname", "serial", "model", + "profile", "state", "operator", + "created_at", "started_at", "finished_at", "duration_seconds", + "error_text", + ]) + for r in rows: + writer.writerow(list(r)) + + buf.seek(0) + return StreamingResponse( + iter([buf.getvalue()]), + media_type="text/csv", + headers={"Content-Disposition": "attachment; filename=burnin_history.csv"}, + ) + + +# --------------------------------------------------------------------------- +# On-demand email report +# --------------------------------------------------------------------------- + +@router.post("/api/v1/report/send") +async def send_report_now(): + """Trigger the daily status email immediately (for testing SMTP config).""" + if not settings.smtp_host: + raise HTTPException(status_code=503, detail="SMTP not configured (SMTP_HOST is empty)") + try: + await mailer.send_report_now() + except Exception as exc: + raise HTTPException(status_code=502, detail=f"Mail send failed: {exc}") + return {"sent": True, "to": settings.smtp_to} + + +# --------------------------------------------------------------------------- +# Drive notes / location update +# --------------------------------------------------------------------------- + +@router.patch("/api/v1/drives/{drive_id}") +async def update_drive( + drive_id: int, + req: UpdateDriveRequest, + db: aiosqlite.Connection = Depends(get_db), +): + cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,)) + if not await cur.fetchone(): + raise HTTPException(status_code=404, detail="Drive not found") + + await db.execute( + "UPDATE drives SET notes=?, location=? WHERE id=?", + (req.notes, req.location, drive_id), + ) + await db.commit() + return {"updated": True} + + +# --------------------------------------------------------------------------- +# Audit log page +# --------------------------------------------------------------------------- + +_AUDIT_QUERY = """ + SELECT + ae.id, ae.event_type, ae.operator, ae.message, ae.created_at, + d.devname, d.serial + FROM audit_events ae + LEFT JOIN drives d ON d.id = ae.drive_id + ORDER BY ae.id DESC + LIMIT 200 +""" + +_AUDIT_EVENT_COLORS = { + "burnin_queued": "yellow", + "burnin_started": "blue", + "burnin_passed": "passed", + "burnin_failed": "failed", + "burnin_cancelled": "cancelled", + "burnin_stuck": "failed", + "burnin_unknown": "unknown", +} + + +@router.get("/audit", response_class=HTMLResponse) +async def audit_log( + request: Request, + db: aiosqlite.Connection = Depends(get_db), +): + cur = await db.execute(_AUDIT_QUERY) + rows = [dict(r) for r in await cur.fetchall()] + ps = poller.get_state() + return templates.TemplateResponse("audit.html", { + "request": request, + "events": rows, + "event_colors": _AUDIT_EVENT_COLORS, + "poller": ps, + **_stale_context(ps), + }) + + +# --------------------------------------------------------------------------- +# Stats / analytics page +# --------------------------------------------------------------------------- + +@router.get("/stats", response_class=HTMLResponse) +async def stats_page( + request: Request, + db: aiosqlite.Connection = Depends(get_db), +): + # Overall counts + cur = await db.execute(""" + SELECT + COUNT(*) as total, + SUM(CASE WHEN state='passed' THEN 1 ELSE 0 END) as passed, + SUM(CASE WHEN state='failed' THEN 1 ELSE 0 END) as failed, + SUM(CASE WHEN state='running' THEN 1 ELSE 0 END) as running, + SUM(CASE WHEN state='cancelled' THEN 1 ELSE 0 END) as cancelled + FROM burnin_jobs + """) + overall = dict(await cur.fetchone()) + + # Failure rate by drive model (only completed jobs) + cur = await db.execute(""" + SELECT + COALESCE(d.model, 'Unknown') AS model, + COUNT(*) AS total, + SUM(CASE WHEN bj.state='passed' THEN 1 ELSE 0 END) AS passed, + SUM(CASE WHEN bj.state='failed' THEN 1 ELSE 0 END) AS failed, + ROUND(100.0 * SUM(CASE WHEN bj.state='passed' THEN 1 ELSE 0 END) / COUNT(*), 1) AS pass_rate + FROM burnin_jobs bj + JOIN drives d ON d.id = bj.drive_id + WHERE bj.state IN ('passed', 'failed') + GROUP BY COALESCE(d.model, 'Unknown') + ORDER BY total DESC + LIMIT 20 + """) + by_model = [dict(r) for r in await cur.fetchall()] + + # Activity last 14 days + cur = await db.execute(""" + SELECT + date(created_at) AS day, + COUNT(*) AS total, + SUM(CASE WHEN state='passed' THEN 1 ELSE 0 END) AS passed, + SUM(CASE WHEN state='failed' THEN 1 ELSE 0 END) AS failed + FROM burnin_jobs + WHERE created_at >= date('now', '-14 days') + GROUP BY date(created_at) + ORDER BY day DESC + """) + by_day = [dict(r) for r in await cur.fetchall()] + + # Drives tracked + cur = await db.execute("SELECT COUNT(*) FROM drives") + drives_total = (await cur.fetchone())[0] + + ps = poller.get_state() + return templates.TemplateResponse("stats.html", { + "request": request, + "overall": overall, + "by_model": by_model, + "by_day": by_day, + "drives_total": drives_total, + "poller": ps, + **_stale_context(ps), + }) + + +# --------------------------------------------------------------------------- +# Settings page +# --------------------------------------------------------------------------- + +@router.get("/settings", response_class=HTMLResponse) +async def settings_page( + request: Request, + db: aiosqlite.Connection = Depends(get_db), +): + # Read-only display values (require container restart to change) + readonly = { + "truenas_base_url": settings.truenas_base_url, + "truenas_verify_tls": settings.truenas_verify_tls, + "poll_interval_seconds": settings.poll_interval_seconds, + "stale_threshold_seconds": settings.stale_threshold_seconds, + "allowed_ips": settings.allowed_ips or "(allow all)", + "log_level": settings.log_level, + } + + # Editable values — real values for form fields (password excluded) + editable = { + "smtp_host": settings.smtp_host, + "smtp_port": settings.smtp_port, + "smtp_ssl_mode": settings.smtp_ssl_mode or "starttls", + "smtp_timeout": settings.smtp_timeout, + "smtp_user": settings.smtp_user, + "smtp_from": settings.smtp_from, + "smtp_to": settings.smtp_to, + "smtp_report_hour": settings.smtp_report_hour, + "smtp_daily_report_enabled": settings.smtp_daily_report_enabled, + "smtp_alert_on_fail": settings.smtp_alert_on_fail, + "smtp_alert_on_pass": settings.smtp_alert_on_pass, + "webhook_url": settings.webhook_url, + "stuck_job_hours": settings.stuck_job_hours, + "max_parallel_burnins": settings.max_parallel_burnins, + } + + ps = poller.get_state() + return templates.TemplateResponse("settings.html", { + "request": request, + "readonly": readonly, + "editable": editable, + "smtp_enabled": bool(settings.smtp_host), + "poller": ps, + **_stale_context(ps), + }) + + +@router.post("/api/v1/settings") +async def save_settings(body: dict): + """Save editable runtime settings. Password is only updated if non-empty.""" + # Don't overwrite password if client sent empty string + if "smtp_password" in body and body["smtp_password"] == "": + del body["smtp_password"] + + try: + saved = settings_store.save(body) + except ValueError as exc: + raise HTTPException(status_code=422, detail=str(exc)) + + return {"saved": True, "keys": saved} + + +@router.post("/api/v1/settings/test-smtp") +async def test_smtp(): + """Test the current SMTP configuration without sending an email.""" + result = await mailer.test_smtp_connection() + if not result["ok"]: + raise HTTPException(status_code=502, detail=result["error"]) + return {"ok": True} + + +# --------------------------------------------------------------------------- +# Print view (must be BEFORE /{job_id} int route) +# --------------------------------------------------------------------------- + +@router.get("/history/{job_id}/print", response_class=HTMLResponse) +async def history_print( + request: Request, + job_id: int, + db: aiosqlite.Connection = Depends(get_db), +): + cur = await db.execute(""" + SELECT + bj.*, d.devname, d.serial, d.model, d.size_bytes, + CAST( + (julianday(bj.finished_at) - julianday(bj.started_at)) * 86400 + AS INTEGER + ) AS duration_seconds + FROM burnin_jobs bj + JOIN drives d ON d.id = bj.drive_id + WHERE bj.id = ? + """, (job_id,)) + row = await cur.fetchone() + if not row: + raise HTTPException(status_code=404, detail="Job not found") + job = dict(row) + + cur = await db.execute(""" + SELECT *, + CAST( + (julianday(finished_at) - julianday(started_at)) * 86400 + AS INTEGER + ) AS duration_seconds + FROM burnin_stages WHERE burnin_job_id=? ORDER BY id + """, (job_id,)) + job["stages"] = [dict(r) for r in await cur.fetchall()] + + return templates.TemplateResponse("job_print.html", { + "request": request, + "job": job, + }) + + +# --------------------------------------------------------------------------- +# Burn-in job detail API (must be after export.csv to avoid int coercion) +# --------------------------------------------------------------------------- + +@router.get("/api/v1/burnin/{job_id}", response_model=BurninJobResponse) +async def burnin_get(job_id: int, db: aiosqlite.Connection = Depends(get_db)): + db.row_factory = aiosqlite.Row + cur = await db.execute("SELECT * FROM burnin_jobs WHERE id=?", (job_id,)) + row = await cur.fetchone() + if not row: + raise HTTPException(status_code=404, detail="Burn-in job not found") + cur = await db.execute( + "SELECT * FROM burnin_stages WHERE burnin_job_id=? ORDER BY id", (job_id,) + ) + stages = await cur.fetchall() + return _row_to_burnin(row, stages) diff --git a/app/settings_store.py b/app/settings_store.py new file mode 100644 index 0000000..33ec439 --- /dev/null +++ b/app/settings_store.py @@ -0,0 +1,104 @@ +""" +Runtime settings store — persists editable settings to /data/settings_overrides.json. + +Changes take effect immediately (in-memory setattr on the global Settings object) +and survive restarts (JSON file is loaded in main.py lifespan). + +Settings that require a container restart (TrueNAS URL, poll interval, allowed IPs, etc.) +are NOT included here and are display-only on the settings page. +""" + +import json +import logging +from pathlib import Path + +from app.config import settings + +log = logging.getLogger(__name__) + +# Field name → coerce function. Only fields listed here are accepted by save(). +_EDITABLE: dict[str, type] = { + "smtp_host": str, + "smtp_ssl_mode": str, + "smtp_timeout": int, + "smtp_user": str, + "smtp_password": str, + "smtp_from": str, + "smtp_to": str, + "smtp_daily_report_enabled": bool, + "smtp_report_hour": int, + "smtp_alert_on_fail": bool, + "smtp_alert_on_pass": bool, + "webhook_url": str, + "stuck_job_hours": int, + "max_parallel_burnins": int, +} + +_VALID_SSL_MODES = {"starttls", "ssl", "plain"} + + +def _overrides_path() -> Path: + return Path(settings.db_path).parent / "settings_overrides.json" + + +def _coerce(key: str, raw) -> object: + coerce = _EDITABLE[key] + if coerce is bool: + if isinstance(raw, bool): + return raw + return str(raw).lower() in ("1", "true", "yes", "on") + return coerce(raw) + + +def _apply(data: dict) -> None: + """Apply a dict of updates to the live settings object.""" + for key, raw in data.items(): + if key not in _EDITABLE: + continue + try: + val = _coerce(key, raw) + if key == "smtp_ssl_mode" and val not in _VALID_SSL_MODES: + log.warning("settings_store: invalid smtp_ssl_mode %r — ignoring", val) + continue + if key == "smtp_report_hour" and not (0 <= int(val) <= 23): + log.warning("settings_store: smtp_report_hour out of range — ignoring") + continue + setattr(settings, key, val) + except (ValueError, TypeError) as exc: + log.warning("settings_store: invalid value for %s: %s", key, exc) + + +def init() -> None: + """Load persisted overrides at startup. Call once from lifespan.""" + path = _overrides_path() + if not path.exists(): + return + try: + data = json.loads(path.read_text()) + _apply(data) + log.info("settings_store: loaded %d override(s) from %s", len(data), path) + except Exception as exc: + log.warning("settings_store: could not load overrides from %s: %s", path, exc) + + +def save(updates: dict) -> list[str]: + """ + Validate, apply, and persist a dict of settings updates. + Returns list of keys that were actually saved. + Raises ValueError for unknown or invalid fields. + """ + accepted: dict = {} + for key, raw in updates.items(): + if key not in _EDITABLE: + raise ValueError(f"Unknown or non-editable setting: {key!r}") + accepted[key] = raw + + _apply(accepted) + + # Persist ALL currently-applied editable values (not just the delta) + snapshot = {k: getattr(settings, k) for k in _EDITABLE} + path = _overrides_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(snapshot, indent=2)) + log.info("settings_store: saved %d key(s) — snapshot written to %s", len(accepted), path) + return list(accepted.keys()) diff --git a/app/static/app.css b/app/static/app.css new file mode 100644 index 0000000..00d0b26 --- /dev/null +++ b/app/static/app.css @@ -0,0 +1,1939 @@ +/* ----------------------------------------------------------------------- + Variables +----------------------------------------------------------------------- */ +:root { + --bg: #0d1117; + --bg-card: #161b22; + --bg-row-alt: #0f1319; + --border: #30363d; + --text: #c9d1d9; + --text-muted: #8b949e; + --text-strong: #f0f6fc; + + --green: #3fb950; + --green-bg: rgba(63, 185, 80, 0.12); + --green-bd: rgba(63, 185, 80, 0.35); + + --yellow: #d29922; + --yellow-bg: rgba(210, 153, 34, 0.12); + --yellow-bd: rgba(210, 153, 34, 0.35); + + --red: #f85149; + --red-bg: rgba(248, 81, 73, 0.12); + --red-bd: rgba(248, 81, 73, 0.35); + + --blue: #58a6ff; + --blue-bg: rgba(88, 166, 255, 0.12); + --blue-bd: rgba(88, 166, 255, 0.35); + + --gray: #6e7681; + --gray-bg: rgba(110, 118, 129, 0.12); + --gray-bd: rgba(110, 118, 129, 0.35); +} + +/* ----------------------------------------------------------------------- + Reset +----------------------------------------------------------------------- */ +*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } + +body { + background: var(--bg); + color: var(--text); + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif; + font-size: 14px; + line-height: 1.5; + min-height: 100vh; +} + +a { color: var(--blue); text-decoration: none; } +a:hover { text-decoration: underline; } + +/* ----------------------------------------------------------------------- + Header +----------------------------------------------------------------------- */ +header { + background: var(--bg-card); + border-bottom: 1px solid var(--border); + padding: 10px 24px; + display: flex; + align-items: center; + justify-content: space-between; + position: sticky; + top: 0; + z-index: 100; +} + +.header-brand { + display: flex; + align-items: center; + gap: 10px; + color: var(--text-strong); +} + +.header-brand svg { + color: var(--blue); + flex-shrink: 0; +} + +.header-title { + font-size: 15px; + font-weight: 600; + letter-spacing: -0.01em; +} + +.header-meta { + display: flex; + align-items: center; + gap: 18px; + font-size: 12px; + color: var(--text-muted); +} + +.live-indicator { + display: flex; + align-items: center; + gap: 6px; + font-weight: 500; +} + +.live-dot { + width: 7px; + height: 7px; + border-radius: 50%; + background: var(--green); + box-shadow: 0 0 0 2px var(--green-bg); + animation: pulse-dot 2.5s ease-in-out infinite; + flex-shrink: 0; +} + +.live-dot.degraded { + background: var(--red); + box-shadow: 0 0 0 2px var(--red-bg); + animation: none; +} + +@keyframes pulse-dot { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.35; } +} + +.poll-time { + font-variant-numeric: tabular-nums; +} + +.header-link { + font-size: 11px; + color: var(--text-muted); + border: 1px solid var(--border); + border-radius: 4px; + padding: 2px 8px; +} + +.header-link:hover { + color: var(--text); + border-color: var(--text-muted); + text-decoration: none; +} + +/* ----------------------------------------------------------------------- + Banners +----------------------------------------------------------------------- */ +.banner { + padding: 8px 24px; + font-size: 13px; + border-bottom: 1px solid transparent; +} + +.banner-warn { + background: var(--yellow-bg); + border-color: var(--yellow-bd); + color: var(--yellow); +} + +.banner-error { + background: var(--red-bg); + border-color: var(--red-bd); + color: var(--red); +} + +/* ----------------------------------------------------------------------- + Main layout +----------------------------------------------------------------------- */ +main { + padding: 20px 24px 40px; +} + +/* ----------------------------------------------------------------------- + Filter bar +----------------------------------------------------------------------- */ +.filter-bar { + display: flex; + gap: 6px; + margin-bottom: 14px; + flex-wrap: wrap; +} + +.filter-btn { + display: inline-flex; + align-items: center; + gap: 7px; + background: none; + border: 1px solid var(--border); + color: var(--text-muted); + border-radius: 6px; + padding: 5px 12px; + font-size: 13px; + font-family: inherit; + cursor: pointer; + transition: color 0.12s, border-color 0.12s, background 0.12s; +} + +.filter-btn:hover { + color: var(--text); + border-color: #58a6ff55; +} + +.filter-btn.active { + color: var(--blue); + background: var(--blue-bg); + border-color: var(--blue-bd); +} + +.filter-btn .badge { + background: var(--bg); + border-radius: 10px; + padding: 0 6px; + font-size: 11px; + font-variant-numeric: tabular-nums; + min-width: 20px; + text-align: center; + color: var(--text-muted); + border: 1px solid var(--border); +} + +.filter-btn.active .badge { + background: var(--blue-bg); + color: var(--blue); + border-color: var(--blue-bd); +} + +/* ----------------------------------------------------------------------- + Table wrapper + overflow: auto on BOTH axes is required for position:sticky on thead + to work correctly. overflow-x:auto alone creates a stacking context + that causes tbody to render behind the sticky header. +----------------------------------------------------------------------- */ +.table-wrap { + overflow: auto; + max-height: calc(100vh - 205px); /* header(44) + main-pad(20) + stats-bar(70) + filter-bar(46) + buffer */ + border: 1px solid var(--border); + border-radius: 8px; +} + +table { + width: 100%; + border-collapse: collapse; + white-space: nowrap; +} + +thead { + background: var(--bg-card); + position: sticky; + top: 0; + z-index: 10; +} + +th { + padding: 9px 14px; + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--text-muted); + text-align: left; + border-bottom: 1px solid var(--border); + white-space: nowrap; +} + +td { + padding: 10px 14px; + border-bottom: 1px solid var(--border); + vertical-align: middle; +} + +tr:last-child td { + border-bottom: none; +} + +tr:nth-child(even) td { + background: var(--bg-row-alt); +} + +tr:hover td { + background: rgba(88, 166, 255, 0.04); +} + +/* ----------------------------------------------------------------------- + Column widths +----------------------------------------------------------------------- */ +.col-drive { min-width: 180px; } +.col-serial { min-width: 110px; } +.col-size { min-width: 70px; text-align: right; } +.col-temp { min-width: 75px; text-align: right; } +.col-health { min-width: 85px; } +.col-smart { min-width: 150px; } +.col-actions { min-width: 170px; } + +/* ----------------------------------------------------------------------- + Drive cell +----------------------------------------------------------------------- */ +.drive-name { + display: block; + font-weight: 500; + color: var(--text-strong); + font-size: 14px; +} + +.drive-model { + display: block; + font-size: 11px; + color: var(--text-muted); + margin-top: 1px; +} + +/* ----------------------------------------------------------------------- + Misc cell types +----------------------------------------------------------------------- */ +.mono { + font-family: "SF Mono", "Cascadia Code", "Fira Mono", "Consolas", monospace; + font-size: 12px; + color: var(--text-muted); +} + +.cell-empty { + color: var(--border); + font-size: 13px; +} + +/* ----------------------------------------------------------------------- + Temperature +----------------------------------------------------------------------- */ +.temp { + font-weight: 500; + font-size: 13px; + font-variant-numeric: tabular-nums; +} + +.temp-cool { color: var(--green); } +.temp-warm { color: var(--yellow); } +.temp-hot { color: var(--red); } + +/* ----------------------------------------------------------------------- + Status chips +----------------------------------------------------------------------- */ +.chip { + display: inline-flex; + align-items: center; + border-radius: 4px; + padding: 2px 8px; + font-size: 11px; + font-weight: 600; + letter-spacing: 0.04em; + text-transform: uppercase; + border: 1px solid transparent; + white-space: nowrap; +} + +.chip-passed { + color: var(--green); + background: var(--green-bg); + border-color: var(--green-bd); +} + +.chip-failed { + color: var(--red); + background: var(--red-bg); + border-color: var(--red-bd); + cursor: help; +} + +.chip-running { + color: var(--blue); + background: var(--blue-bg); + border-color: var(--blue-bd); +} + +.chip-aborted { + color: var(--yellow); + background: var(--yellow-bg); + border-color: var(--yellow-bd); +} + +.chip-unknown { + color: var(--gray); + background: var(--gray-bg); + border-color: var(--gray-bd); +} + +/* ----------------------------------------------------------------------- + SMART cell — progress + ETA +----------------------------------------------------------------------- */ +.smart-cell { + min-width: 140px; +} + +.progress-wrap { + display: flex; + align-items: center; + gap: 8px; +} + +.progress-bar { + flex: 1; + height: 5px; + background: var(--border); + border-radius: 3px; + overflow: hidden; + min-width: 80px; +} + +.progress-fill { + height: 100%; + background: var(--blue); + border-radius: 3px; + transition: width 0.6s ease; +} + +.progress-pct { + font-size: 12px; + font-variant-numeric: tabular-nums; + color: var(--text-muted); + min-width: 32px; + text-align: right; +} + +.eta-text { + font-size: 11px; + color: var(--text-muted); + margin-top: 3px; +} + +/* ----------------------------------------------------------------------- + Burn-in column +----------------------------------------------------------------------- */ +.col-burnin { min-width: 160px; } + +.burnin-cell { min-width: 140px; } + +.stage-name { + font-size: 11px; + color: var(--text-muted); + margin-top: 3px; + text-transform: capitalize; +} + +.chip-queued { + color: var(--yellow); + background: var(--yellow-bg); + border-color: var(--yellow-bd); +} + +.progress-fill-green { + background: var(--green); +} + +/* ----------------------------------------------------------------------- + Action buttons +----------------------------------------------------------------------- */ +.action-group { + display: flex; + gap: 5px; + flex-wrap: nowrap; + align-items: center; +} + +.btn-action { + border-radius: 5px; + padding: 4px 9px; + font-size: 11px; + font-family: inherit; + font-weight: 600; + cursor: pointer; + border: 1px solid transparent; + transition: opacity 0.12s; + white-space: nowrap; + letter-spacing: 0.02em; +} +.btn-action:hover:not(:disabled) { opacity: 0.8; } +.btn-action:disabled, +.btn-disabled { + opacity: 0.3; + cursor: not-allowed; +} + +.btn-smart-short { + background: var(--blue-bg); + color: var(--blue); + border-color: var(--blue-bd); +} + +.btn-smart-long { + background: var(--yellow-bg); + color: var(--yellow); + border-color: var(--yellow-bd); +} + +.btn-start { + background: var(--red-bg); + color: var(--red); + border-color: var(--red-bd); +} + +.btn-cancel { + background: var(--red-bg); + color: var(--red); + border-color: var(--red-bd); +} + +.btn-cancel-smart { + background: var(--yellow-bg); + color: var(--yellow); + border-color: var(--yellow-bd); +} + +/* Cancel All Running Burn-Ins button in the filter bar */ +.btn-cancel-all { + margin-left: auto; + padding: 4px 12px; + font-size: 12px; + font-weight: 500; + border: 1px solid var(--red-bd); + border-radius: 6px; + background: var(--red-bg); + color: var(--red); + cursor: pointer; + transition: background .15s, border-color .15s; + white-space: nowrap; +} +.btn-cancel-all:hover { + background: rgba(248, 81, 73, 0.22); + border-color: var(--red); +} + +/* ----------------------------------------------------------------------- + Modal overlay + dialog +----------------------------------------------------------------------- */ +.modal-overlay { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.65); + z-index: 500; + display: flex; + align-items: center; + justify-content: center; + padding: 20px; +} + +.modal-overlay[hidden] { display: none; } + +.modal { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 10px; + width: 100%; + max-width: 480px; + box-shadow: 0 20px 60px rgba(0, 0, 0, 0.5); +} + +.modal-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 16px 20px 12px; + border-bottom: 1px solid var(--border); +} + +.modal-title { + font-size: 15px; + font-weight: 600; + color: var(--text-strong); +} + +.modal-close { + background: none; + border: none; + color: var(--text-muted); + font-size: 16px; + cursor: pointer; + padding: 2px 6px; + border-radius: 4px; + line-height: 1; +} +.modal-close:hover { background: var(--border); color: var(--text); } + +.modal-body { + padding: 18px 20px; + display: flex; + flex-direction: column; + gap: 18px; +} + +.modal-footer { + display: flex; + justify-content: flex-end; + gap: 10px; + padding: 14px 20px; + border-top: 1px solid var(--border); +} + +/* Drive info block */ +.modal-drive-info { + background: var(--bg); + border: 1px solid var(--border); + border-radius: 6px; + padding: 12px 14px; +} + +.modal-drive-row { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 4px; +} + +.modal-devname { + font-size: 15px; + font-weight: 600; + color: var(--text-strong); +} + +.modal-drive-sub { + font-size: 12px; + color: var(--text-muted); +} + +/* Form elements */ +.form-group { display: flex; flex-direction: column; gap: 6px; } + +.form-label { + font-size: 12px; + font-weight: 500; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.form-input { + background: var(--bg); + border: 1px solid var(--border); + border-radius: 6px; + color: var(--text); + font-size: 14px; + font-family: inherit; + padding: 8px 12px; + outline: none; + transition: border-color 0.12s; +} +.form-input:focus { border-color: var(--blue); } + +.form-input-confirm { + font-family: "SF Mono", "Cascadia Code", monospace; + letter-spacing: 0.05em; +} + +/* Profile cards */ +.profile-options { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 8px; +} + +.profile-option { + cursor: pointer; +} + +.profile-option input[type="radio"] { + position: absolute; + opacity: 0; + width: 0; + height: 0; +} + +.profile-card { + border: 1px solid var(--border); + border-radius: 6px; + padding: 10px 12px; + transition: border-color 0.12s, background 0.12s; +} + +.profile-option input:checked + .profile-card { + border-color: var(--blue); + background: var(--blue-bg); +} + +.profile-card-title { + font-size: 13px; + font-weight: 600; + color: var(--text-strong); + margin-bottom: 3px; +} + +.profile-card-desc { + font-size: 11px; + color: var(--text-muted); + line-height: 1.4; +} + +/* Confirmation warning */ +.confirm-warning { + background: var(--red-bg); + border: 1px solid var(--red-bd); + border-radius: 6px; + color: var(--red); + font-size: 13px; + padding: 10px 12px; + line-height: 1.5; +} + +.confirm-hint { + font-size: 11px; + color: var(--text-muted); +} + +/* Modal buttons */ +.btn-primary { + background: var(--blue); + color: #fff; + border: 1px solid var(--blue); + border-radius: 6px; + padding: 7px 16px; + font-size: 13px; + font-family: inherit; + font-weight: 500; + cursor: pointer; + transition: opacity 0.12s; +} +.btn-primary:hover:not(:disabled) { opacity: 0.85; } +.btn-primary:disabled { opacity: 0.35; cursor: not-allowed; } + +.btn-secondary { + background: none; + color: var(--text-muted); + border: 1px solid var(--border); + border-radius: 6px; + padding: 7px 16px; + font-size: 13px; + font-family: inherit; + cursor: pointer; +} +.btn-secondary:hover { color: var(--text); border-color: var(--text-muted); } + +.btn-danger { + background: var(--red); + color: #fff; + border: 1px solid var(--red); + border-radius: 6px; + padding: 7px 16px; + font-size: 13px; + font-family: inherit; + font-weight: 600; + cursor: pointer; + transition: opacity 0.12s; +} +.btn-danger:hover:not(:disabled) { opacity: 0.85; } +.btn-danger:disabled { opacity: 0.35; cursor: not-allowed; } + +/* ----------------------------------------------------------------------- + Toast notifications +----------------------------------------------------------------------- */ +#toast-container { + position: fixed; + bottom: 24px; + right: 24px; + z-index: 600; + display: flex; + flex-direction: column; + gap: 8px; + pointer-events: none; +} + +.toast { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 10px 16px; + font-size: 13px; + color: var(--text); + box-shadow: 0 4px 16px rgba(0, 0, 0, 0.4); + animation: toast-in 0.2s ease; + max-width: 300px; +} + +.toast-success { border-left: 3px solid var(--green); color: var(--green); } +.toast-error { border-left: 3px solid var(--red); color: var(--red); } +.toast-info { border-left: 3px solid var(--blue); color: var(--blue); } + +@keyframes toast-in { + from { opacity: 0; transform: translateY(8px); } + to { opacity: 1; transform: translateY(0); } +} + +/* ----------------------------------------------------------------------- + Empty state +----------------------------------------------------------------------- */ +.empty-state { + padding: 48px; + text-align: center; + color: var(--text-muted); + font-size: 13px; +} + +/* ----------------------------------------------------------------------- + History / detail page chrome +----------------------------------------------------------------------- */ +.page-toolbar { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 16px; +} + +.page-title { + font-size: 18px; + font-weight: 600; + color: var(--text-strong); +} + +.toolbar-right { + display: flex; + gap: 8px; + align-items: center; +} + +.btn-export { + display: inline-block; + background: var(--green-bg); + color: var(--green); + border: 1px solid var(--green-bd); + border-radius: 6px; + padding: 5px 14px; + font-size: 12px; + font-weight: 500; + text-decoration: none; + transition: opacity 0.12s; +} +.btn-export:hover { opacity: 0.8; text-decoration: none; } + +.btn-detail { + display: inline-block; + background: var(--bg-card); + color: var(--text-muted); + border: 1px solid var(--border); + border-radius: 5px; + padding: 3px 10px; + font-size: 12px; + text-decoration: none; + transition: color 0.12s, border-color 0.12s; +} +.btn-detail:hover { color: var(--text); border-color: var(--text-muted); text-decoration: none; } + +.text-muted { color: var(--text-muted); } + +.col-job { min-width: 48px; } + +.error-cell { max-width: 260px; } +.error-snippet { + color: var(--red); + font-size: 12px; + cursor: help; + display: block; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 240px; +} + +.error-full { + color: var(--red); + font-size: 12px; +} + +/* ----------------------------------------------------------------------- + Pagination +----------------------------------------------------------------------- */ +.pagination { + display: flex; + align-items: center; + gap: 12px; + margin-top: 14px; + font-size: 13px; + color: var(--text-muted); +} + +.page-btn { + border: 1px solid var(--border); + border-radius: 5px; + padding: 4px 12px; + font-size: 12px; + color: var(--text-muted); + text-decoration: none; + transition: color 0.12s, border-color 0.12s; +} +.page-btn:hover { color: var(--text); border-color: var(--text-muted); text-decoration: none; } + +.page-info { color: var(--text-muted); font-size: 12px; } + +/* ----------------------------------------------------------------------- + Breadcrumb +----------------------------------------------------------------------- */ +.breadcrumb { + display: flex; + align-items: center; + gap: 6px; + font-size: 13px; + color: var(--text-muted); +} + +.breadcrumb a { color: var(--blue); } +.breadcrumb-sep { color: var(--border); } + +/* ----------------------------------------------------------------------- + Detail page — summary grid +----------------------------------------------------------------------- */ +.detail-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 14px; + margin-bottom: 20px; +} + +@media (max-width: 680px) { + .detail-grid { grid-template-columns: 1fr; } +} + +.detail-card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 8px; + overflow: hidden; +} + +.detail-card-title { + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--text-muted); + padding: 10px 14px 8px; + border-bottom: 1px solid var(--border); +} + +.detail-rows { padding: 4px 0; } + +.detail-row { + display: flex; + align-items: center; + justify-content: space-between; + padding: 7px 14px; + border-bottom: 1px solid var(--border); +} +.detail-row:last-child { border-bottom: none; } + +.detail-label { + font-size: 12px; + color: var(--text-muted); + flex-shrink: 0; + margin-right: 12px; +} + +.detail-value { + font-size: 13px; + color: var(--text); + text-align: right; +} + +.section-title { + font-size: 14px; + font-weight: 600; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.05em; + margin-bottom: 10px; +} + +.stage-label { + font-size: 13px; + color: var(--text); + font-weight: 500; +} + +/* chip-cancelled reuses gray */ +.chip-cancelled { + color: var(--gray); + background: var(--gray-bg); + border-color: var(--gray-bd); +} + +/* chip-red for full profile label */ +.chip-red { + color: var(--red); + background: var(--red-bg); + border-color: var(--red-bd); +} + +/* chip-gray for quick profile label */ +.chip-gray { + color: var(--gray); + background: var(--gray-bg); + border-color: var(--gray-bd); +} + +/* ----------------------------------------------------------------------- + Notification bell button +----------------------------------------------------------------------- */ +.notif-btn { + background: none; + border: 1px solid var(--border); + border-radius: 5px; + color: var(--text-muted); + cursor: pointer; + display: inline-flex; + align-items: center; + justify-content: center; + width: 26px; + height: 26px; + padding: 0; + transition: color 0.12s, border-color 0.12s, background 0.12s; +} + +.notif-btn:hover { + color: var(--text); + border-color: var(--text-muted); +} + +.notif-btn.notif-active { + color: var(--green); + background: var(--green-bg); + border-color: var(--green-bd); +} + +.notif-btn.notif-denied { + opacity: 0.35; + cursor: not-allowed; +} + +/* ----------------------------------------------------------------------- + Stats bar — top of dashboard +----------------------------------------------------------------------- */ +.stats-bar { + display: flex; + gap: 10px; + margin-bottom: 14px; + flex-wrap: wrap; +} + +.stat-card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 10px 18px; + text-align: center; + min-width: 80px; + display: flex; + flex-direction: column; + gap: 2px; + text-decoration: none; + transition: border-color 0.12s; +} + +a.stat-card:hover { + border-color: var(--text-muted); + text-decoration: none; +} + +.stat-value { + font-size: 22px; + font-weight: 700; + color: var(--text-strong); + font-variant-numeric: tabular-nums; + line-height: 1; +} + +.stat-label { + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.07em; + color: var(--text-muted); +} + +.stat-running .stat-value { color: var(--blue); } +.stat-failed .stat-value { color: var(--red); } +.stat-passed .stat-value { color: var(--green); } +.stat-idle .stat-value { color: var(--text-muted); } + +/* ----------------------------------------------------------------------- + Batch action bar (inside filter-bar) +----------------------------------------------------------------------- */ +.batch-bar { + display: flex; + align-items: center; + gap: 8px; + margin-left: auto; + background: var(--blue-bg); + border: 1px solid var(--blue-bd); + border-radius: 6px; + padding: 4px 10px; +} + +.batch-count-label { + font-size: 12px; + color: var(--blue); + font-weight: 500; +} + +.btn-batch-start { + background: var(--blue); + color: #fff; + border: none; + border-radius: 5px; + padding: 4px 12px; + font-size: 12px; + font-family: inherit; + font-weight: 600; + cursor: pointer; + transition: opacity 0.12s; +} +.btn-batch-start:hover { opacity: 0.85; } + +.btn-batch-clear { + background: none; + color: var(--blue); + border: 1px solid var(--blue-bd); + border-radius: 5px; + padding: 3px 8px; + font-size: 11px; + font-family: inherit; + cursor: pointer; +} +.btn-batch-clear:hover { background: var(--blue-bg); } + +/* ----------------------------------------------------------------------- + Checkbox column +----------------------------------------------------------------------- */ +.col-check { + width: 36px; + min-width: 36px; + padding: 10px 8px 10px 14px; +} + +.drive-checkbox, #select-all-cb { + width: 15px; + height: 15px; + cursor: pointer; + accent-color: var(--blue); +} + +/* ----------------------------------------------------------------------- + Drive location inline edit +----------------------------------------------------------------------- */ +.drive-location { + display: block; + font-size: 10px; + color: var(--text-muted); + margin-top: 2px; + cursor: pointer; + border-radius: 3px; + padding: 1px 3px; + transition: background 0.1s; + max-width: 160px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.drive-location:hover { background: var(--border); color: var(--text); } + +.drive-location-empty { + color: var(--border); + font-style: italic; +} +.drive-location-empty:hover { color: var(--text-muted); background: var(--border); } + +.drive-location-input { + display: block; + background: var(--bg); + border: 1px solid var(--blue); + border-radius: 3px; + color: var(--text); + font-size: 10px; + font-family: inherit; + padding: 1px 4px; + margin-top: 2px; + width: 140px; + outline: none; +} + +/* ----------------------------------------------------------------------- + Burn-in meta row (stage + elapsed) +----------------------------------------------------------------------- */ +.burnin-meta { + display: flex; + align-items: center; + gap: 8px; + margin-top: 3px; +} + +.elapsed-timer { + font-size: 10px; + color: var(--text-muted); + font-variant-numeric: tabular-nums; +} + +/* ----------------------------------------------------------------------- + Confirm checkbox (batch modal) +----------------------------------------------------------------------- */ +.confirm-check-label { + display: flex; + align-items: flex-start; + gap: 10px; + cursor: pointer; + font-size: 13px; + color: var(--text); + line-height: 1.5; +} +.confirm-check-label input[type="checkbox"] { + margin-top: 2px; + width: 15px; + height: 15px; + flex-shrink: 0; + accent-color: var(--red); +} + +/* ----------------------------------------------------------------------- + Settings page — two-column layout +----------------------------------------------------------------------- */ + +/* Outer two-column grid: SMTP left (wider), right col stacks */ +.settings-two-col { + display: grid; + grid-template-columns: 2fr 1fr; + gap: 12px; + align-items: start; + margin-bottom: 12px; +} + +.settings-left-col, +.settings-right-col { + display: flex; + flex-direction: column; + gap: 10px; +} + +/* Card */ +.settings-card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 12px 14px; +} + +.settings-card-readonly { + opacity: .75; + margin-bottom: 24px; +} + +.settings-card-header { + display: flex; + align-items: center; + gap: 8px; + margin-bottom: 10px; + padding-bottom: 8px; + border-bottom: 1px solid var(--border); +} + +.settings-card-title { + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: .06em; + color: var(--text-strong); +} + +/* Compact 2-col grid for horizontal label|input layout in email card */ +.sf-fields { + display: grid; + grid-template-columns: 84px 1fr; + align-items: center; + gap: 4px 8px; +} +.sf-fields > label { + font-size: 10px; + font-weight: 600; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: .04em; + text-align: right; + white-space: nowrap; +} +/* Full-width rows that span both columns */ +.sf-full { grid-column: 1 / -1; } +/* Inline sub-group (mode + timeout on one row) */ +.sf-inline-group { + display: flex; + align-items: center; + gap: 6px; +} +.sf-inline-group .sf-label-sm { + font-size: 10px; + font-weight: 600; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: .04em; + white-space: nowrap; + flex-shrink: 0; +} + +/* Field rows */ +.sf-row { + margin-bottom: 8px; +} +.sf-row:last-child { margin-bottom: 0; } + +.sf-label { + display: block; + font-size: 11px; + font-weight: 500; + color: var(--text-muted); + margin-bottom: 3px; + text-transform: uppercase; + letter-spacing: .04em; +} + +.sf-hint { + display: block; + font-size: 11px; + color: var(--text-muted); + margin-top: 2px; + line-height: 1.4; +} + +.sf-input { + width: 100%; + padding: 4px 8px; + font-size: 13px; + color: var(--text); + background: var(--bg); + border: 1px solid var(--border); + border-radius: 5px; + outline: none; + font-family: inherit; + transition: border-color .15s; +} +.sf-input:focus { border-color: var(--blue); } +.sf-input.sf-input-xs { width: 80px; } + +.sf-select { + padding: 4px 8px; + font-size: 13px; + color: var(--text); + background: var(--bg); + border: 1px solid var(--border); + border-radius: 5px; + outline: none; + cursor: pointer; + font-family: inherit; + transition: border-color .15s; +} +.sf-select:focus { border-color: var(--blue); } + +/* Inline group of small fields */ +.sf-row-inline { + display: flex; + gap: 12px; + flex-wrap: wrap; + align-items: flex-end; +} +.sf-row-inline > div { display: flex; flex-direction: column; } + +/* Test button inline row */ +.sf-row-test { + display: flex; + align-items: center; + gap: 10px; + margin-bottom: 0; +} + +/* Toggle row (label + toggle side by side) */ +.sf-toggle-row { + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + padding: 7px 0; + border-top: 1px solid var(--border); +} +.sf-toggle-row:first-of-type { border-top: none; padding-top: 0; } +.sf-toggle-row .sf-label { margin-bottom: 0; text-transform: none; font-size: 13px; color: var(--text); font-weight: 500; letter-spacing: 0; } +.sf-toggle-row .sf-hint { margin-top: 1px; } + +/* Thin divider inside card */ +.sf-divider { + height: 1px; + background: var(--border); + margin: 8px 0; +} + +/* Read-only grid inside system card */ +.sf-readonly-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 0; +} + +.sf-ro-row { + display: flex; + flex-direction: column; + gap: 2px; + padding: 8px 0; + border-bottom: 1px solid var(--border); +} +.sf-ro-row:nth-last-child(-n+2) { border-bottom: none; } + +.sf-ro-label { + font-size: 11px; + font-weight: 500; + text-transform: uppercase; + letter-spacing: .04em; + color: var(--text-muted); +} + +.sf-ro-value { + font-size: 13px; + color: var(--text); +} + +/* Save action bar */ +.settings-save-bar { + display: flex; + align-items: center; + gap: 14px; + margin-bottom: 24px; +} + +@media (max-width: 860px) { + .settings-two-col { + grid-template-columns: 1fr; + } + .sf-readonly-grid { + grid-template-columns: 1fr; + } + .sf-ro-row:nth-last-child(-n+2) { border-bottom: 1px solid var(--border); } + .sf-ro-row:last-child { border-bottom: none; } +} + +.page-subtitle { + font-size: 12px; + color: var(--text-muted); +} + +/* ----------------------------------------------------------------------- + Stats / analytics page +----------------------------------------------------------------------- */ +.stats-row { + display: flex; + gap: 10px; + flex-wrap: wrap; +} + +.overview-card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 14px 20px; + text-align: center; + min-width: 90px; + display: flex; + flex-direction: column; + gap: 4px; +} + +.ov-value { + font-size: 26px; + font-weight: 700; + color: var(--text-strong); + font-variant-numeric: tabular-nums; + line-height: 1; +} + +.ov-label { + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.07em; + color: var(--text-muted); +} + +.ov-green .ov-value { color: var(--green); } +.ov-red .ov-value { color: var(--red); } +.ov-blue .ov-value { color: var(--blue); } +.ov-gray .ov-value { color: var(--text-muted); } + +.stats-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 20px; +} + +.stats-section {} + +.rate-bar-wrap { + display: flex; + height: 6px; + border-radius: 3px; + overflow: hidden; + background: var(--border); + min-width: 80px; +} + +.rate-bar-fill { height: 100%; } +.rate-pass { background: var(--green); } +.rate-fail { background: var(--red); } + +/* ----------------------------------------------------------------------- + Mobile responsive +----------------------------------------------------------------------- */ +@media (max-width: 900px) { + .stats-grid { + grid-template-columns: 1fr; + } + .settings-grid { + grid-template-columns: 1fr; + } +} + +@media (max-width: 700px) { + header { + padding: 8px 14px; + flex-wrap: wrap; + gap: 8px; + } + + .header-meta { + gap: 10px; + } + + main { + padding: 12px 10px 32px; + } + + /* Hide less critical table columns on small screens */ + .col-size, .col-temp { display: none; } + + /* Make drive name wrap */ + .col-drive { min-width: 130px; } + .col-serial { min-width: 80px; } + + .stats-bar { + gap: 6px; + } + + .stat-card { + padding: 8px 12px; + min-width: 64px; + } + + .stat-value { font-size: 18px; } + + .detail-grid { + grid-template-columns: 1fr; + } + + .table-wrap { + max-height: calc(100vh - 240px); + } + + .batch-bar { + margin-left: 0; + width: 100%; + justify-content: space-between; + } +} + +@media (max-width: 480px) { + .header-link { display: none; } + .notif-btn { display: none; } + .col-serial { display: none; } + .col-health { display: none; } + + /* Settings page — iPhone */ + .settings-card { padding: 10px 10px; } + .settings-save-bar { flex-wrap: wrap; gap: 8px; } + .settings-save-bar .btn-primary, + .settings-save-bar .btn-secondary { flex: 1 1 auto; text-align: center; } + .sf-row-inline { flex-direction: column; gap: 8px; } + .sf-input.sf-input-xs { width: 100%; } + .sf-fields { grid-template-columns: 72px 1fr; } + .page-subtitle { font-size: 11px; } + + /* Filter bar — keep cancel-all from overflowing */ + .btn-cancel-all { margin-left: 0; width: 100%; } + .filter-bar { flex-wrap: wrap; } +} + +/* ----------------------------------------------------------------------- + Header brand — now an
tag, kill link styling +----------------------------------------------------------------------- */ +a.header-brand { + text-decoration: none; + color: var(--text-strong); +} +a.header-brand:hover { text-decoration: none; } +a.header-brand:hover .header-title { + color: var(--blue); + transition: color .15s; +} + +/* ----------------------------------------------------------------------- + Buttons — secondary + primary variants +----------------------------------------------------------------------- */ +.btn-primary { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 7px 18px; + font-size: 13px; + font-weight: 600; + color: #fff; + background: var(--blue); + border: 1px solid var(--blue-bd); + border-radius: 6px; + cursor: pointer; + transition: opacity .15s; +} +.btn-primary:hover { opacity: .85; } +.btn-primary:disabled { opacity: .45; cursor: default; } + +.btn-secondary { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 5px 14px; + font-size: 12px; + font-weight: 500; + color: var(--text); + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 6px; + cursor: pointer; + transition: border-color .15s, color .15s; +} +.btn-secondary:hover { border-color: var(--blue); color: var(--blue); } +.btn-secondary:disabled { opacity: .45; cursor: default; } + +/* ----------------------------------------------------------------------- + Settings form — sections and fields +----------------------------------------------------------------------- */ +.page-subtitle { + font-size: 13px; + color: var(--text-muted); + margin: -4px 0 20px; + line-height: 1.55; +} + +.page-subtitle code { + font-family: "SF Mono", "Cascadia Code", monospace; + font-size: 11px; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 3px; + padding: 1px 5px; + color: var(--text); +} + +.badge-restart { + font-size: 10px; + font-weight: 600; + letter-spacing: .04em; + color: var(--yellow); + background: var(--yellow-bg); + border: 1px solid var(--yellow-bd); + border-radius: 4px; + padding: 1px 7px; + white-space: nowrap; +} + +/* Section card */ +.settings-section { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 20px 22px; + margin-bottom: 16px; +} + +.settings-section-readonly { + opacity: .8; +} + +.settings-section-header { + display: flex; + align-items: center; + gap: 10px; + margin-bottom: 18px; +} + +.settings-section-title { + font-size: 13px; + font-weight: 600; + color: var(--text-strong); + text-transform: uppercase; + letter-spacing: .06em; +} + +/* Individual field row */ +.settings-field { + margin-bottom: 14px; +} +.settings-field:last-child { margin-bottom: 0; } + +.settings-label { + display: block; + font-size: 12px; + font-weight: 500; + color: var(--text-muted); + margin-bottom: 5px; +} + +.settings-input { + width: 100%; + max-width: 520px; + padding: 7px 11px; + font-size: 13px; + color: var(--text); + background: var(--bg); + border: 1px solid var(--border); + border-radius: 6px; + outline: none; + transition: border-color .15s; + font-family: inherit; +} +.settings-input:focus { border-color: var(--blue); } +.settings-input.settings-input-sm { max-width: 120px; } + +.settings-select { + padding: 7px 11px; + font-size: 13px; + color: var(--text); + background: var(--bg); + border: 1px solid var(--border); + border-radius: 6px; + outline: none; + cursor: pointer; + transition: border-color .15s; + font-family: inherit; +} +.settings-select:focus { border-color: var(--blue); } + +.settings-hint { + display: block; + font-size: 11px; + color: var(--text-muted); + margin-top: 4px; +} + +/* Row with multiple fields side by side */ +.settings-field-row { + display: flex; + flex-wrap: wrap; + gap: 20px; + align-items: flex-end; +} +.settings-field-group { display: flex; flex-direction: column; } +.settings-field-group .settings-label { margin-bottom: 5px; } + +/* Toggle row */ +.settings-field-toggle { + display: flex; + align-items: center; + justify-content: space-between; + gap: 20px; + padding: 10px 0; + border-top: 1px solid var(--border); +} +.settings-field-toggle:first-of-type { border-top: none; } + +.settings-toggle-info { + display: flex; + flex-direction: column; + gap: 2px; +} +.settings-toggle-info .settings-label { margin-bottom: 0; } + +/* Thin divider inside section */ +.settings-divider { + height: 1px; + background: var(--border); + margin: 14px 0; +} + +/* Inline test/action row */ +.settings-actions-inline { + display: flex; + align-items: center; + gap: 14px; + margin-top: 16px; + padding-top: 14px; + border-top: 1px solid var(--border); +} + +/* Save row */ +.settings-save-row { + display: flex; + align-items: center; + gap: 16px; + margin-bottom: 28px; +} + +/* Inline result text */ +.settings-test-result { + font-size: 13px; + font-weight: 500; +} +.result-ok { color: var(--green); } +.result-err { color: var(--red); } + +/* ----------------------------------------------------------------------- + Stage selection (burn-in modals) +----------------------------------------------------------------------- */ +.stage-checks { + display: flex; + flex-direction: column; + gap: 6px; + margin-top: 4px; +} + +.stage-check { + display: flex; + align-items: flex-start; + gap: 6px; + cursor: pointer; + padding: 7px 10px; + border-radius: 6px; + border: 1px solid var(--border); + background: var(--bg); + transition: border-color .15s; +} + +.stage-check:hover { border-color: var(--blue); } + +.stage-check.dragging { + opacity: 0.4; + border-style: dashed; +} + +.drag-handle { + color: var(--text-muted); + cursor: grab; + font-size: 16px; + line-height: 1; + padding-top: 1px; + flex-shrink: 0; + user-select: none; + opacity: 0.5; +} +.drag-handle:active { cursor: grabbing; } +.stage-check:hover .drag-handle { opacity: 1; } + +.stage-drag-hint { + font-size: 11px; + font-weight: 400; + color: var(--text-muted); +} + +.stage-check input[type="checkbox"] { + margin-top: 2px; + flex-shrink: 0; + accent-color: var(--blue); + width: 15px; + height: 15px; + cursor: pointer; +} + +.stage-check span { font-size: 13px; line-height: 1.4; } + +.stage-tag { + display: inline-block; + font-size: 10px; + font-weight: 600; + border-radius: 3px; + padding: 1px 5px; + margin-left: 5px; + vertical-align: middle; +} + +.stage-tag-destructive { + background: var(--red-bg); + color: var(--red); + border: 1px solid var(--red-bd); +} + +.stage-note-inline { + color: var(--text-muted); + font-weight: 400; +} + +.stage-always-note { + font-size: 11px; + color: var(--text-muted); + margin-top: 6px; +} + +/* ----------------------------------------------------------------------- + Toggle switch +----------------------------------------------------------------------- */ +.toggle { + position: relative; + display: inline-block; + width: 38px; + height: 22px; + flex-shrink: 0; +} + +.toggle input { + opacity: 0; + width: 0; + height: 0; + position: absolute; +} + +.toggle-slider { + position: absolute; + inset: 0; + background: var(--border); + border-radius: 22px; + cursor: pointer; + transition: background .2s; +} + +.toggle-slider::before { + content: ""; + position: absolute; + width: 16px; + height: 16px; + left: 3px; + bottom: 3px; + background: #fff; + border-radius: 50%; + transition: transform .2s; +} + +.toggle input:checked + .toggle-slider { + background: var(--blue); +} + +.toggle input:checked + .toggle-slider::before { + transform: translateX(16px); +} + +.toggle input:focus-visible + .toggle-slider { + outline: 2px solid var(--blue); + outline-offset: 2px; +} diff --git a/app/static/app.js b/app/static/app.js new file mode 100644 index 0000000..b857cf4 --- /dev/null +++ b/app/static/app.js @@ -0,0 +1,848 @@ +(function () { + 'use strict'; + + // ----------------------------------------------------------------------- + // Filter bar + stats bar + // ----------------------------------------------------------------------- + + var activeFilter = 'all'; + + function getRows() { + return Array.from(document.querySelectorAll('#drives-tbody tr[data-status]')); + } + + function updateCounts() { + var rows = getRows(); + var counts = { all: rows.length, running: 0, failed: 0, passed: 0, idle: 0 }; + rows.forEach(function (r) { + var s = r.dataset.status; + if (s && Object.prototype.hasOwnProperty.call(counts, s)) counts[s]++; + }); + + // Update filter bar badges + document.querySelectorAll('.filter-btn[data-filter]').forEach(function (btn) { + var badge = btn.querySelector('.badge'); + if (badge) badge.textContent = counts[btn.dataset.filter] != null ? counts[btn.dataset.filter] : 0; + }); + + // Update stats bar + ['all', 'running', 'failed', 'passed', 'idle'].forEach(function (s) { + var el = document.getElementById('stat-' + s); + if (el) el.textContent = counts[s] != null ? counts[s] : 0; + }); + + // Show/hide failed banner + var banner = document.getElementById('failed-banner'); + if (banner) { + var failedCount = counts.failed || 0; + banner.hidden = failedCount === 0; + var fc = banner.querySelector('.failed-count'); + if (fc) fc.textContent = failedCount; + } + + // Show/hide "Cancel All Burn-Ins" button based on whether any .btn-cancel exist + var cancelAllBtn = document.getElementById('cancel-all-btn'); + if (cancelAllBtn) { + var hasCancelable = document.querySelectorAll('.btn-cancel[data-job-id]').length > 0; + cancelAllBtn.hidden = !hasCancelable; + } + } + + function applyFilter(filter) { + activeFilter = filter; + getRows().forEach(function (row) { + row.style.display = (filter === 'all' || row.dataset.status === filter) ? '' : 'none'; + }); + document.querySelectorAll('.filter-btn[data-filter]').forEach(function (btn) { + btn.classList.toggle('active', btn.dataset.filter === filter); + }); + updateCounts(); + } + + document.addEventListener('click', function (e) { + var btn = e.target.closest('.filter-btn[data-filter]'); + if (btn) applyFilter(btn.dataset.filter); + }); + + document.addEventListener('htmx:afterSwap', function () { + applyFilter(activeFilter); + restoreCheckboxes(); + initElapsedTimers(); + initLocationEdits(); + }); + + updateCounts(); + + // ----------------------------------------------------------------------- + // Toast notifications + // ----------------------------------------------------------------------- + + function showToast(msg, type) { + type = type || 'info'; + var container = document.getElementById('toast-container'); + if (!container) return; + var el = document.createElement('div'); + el.className = 'toast toast-' + type; + el.textContent = msg; + container.appendChild(el); + setTimeout(function () { el.remove(); }, 5000); + } + + // ----------------------------------------------------------------------- + // Browser push notifications + // ----------------------------------------------------------------------- + + function updateNotifBtn() { + var btn = document.getElementById('notif-btn'); + if (!btn) return; + var perm = Notification.permission; + btn.classList.remove('notif-active', 'notif-denied'); + if (perm === 'granted') { + btn.classList.add('notif-active'); + btn.title = 'Notifications enabled'; + } else if (perm === 'denied') { + btn.classList.add('notif-denied'); + btn.title = 'Notifications blocked — allow in browser settings'; + } else { + btn.title = 'Enable browser notifications'; + } + } + + if ('Notification' in window) { + updateNotifBtn(); + document.addEventListener('click', function (e) { + if (!e.target.closest('#notif-btn')) return; + if (Notification.permission === 'denied') { + showToast('Notifications blocked — allow in browser settings', 'error'); + return; + } + Notification.requestPermission().then(function (perm) { + updateNotifBtn(); + if (perm === 'granted') { + showToast('Browser notifications enabled', 'success'); + new Notification('TrueNAS Burn-In', { + body: 'You will be notified when burn-in jobs complete.', + }); + } + }); + }); + } else { + var nb = document.getElementById('notif-btn'); + if (nb) nb.style.display = 'none'; + } + + // Handle job-alert SSE events for browser notifications + document.addEventListener('htmx:sseMessage', function (e) { + if (!e.detail || e.detail.type !== 'job-alert') return; + try { + handleJobAlert(JSON.parse(e.detail.data)); + } catch (_) {} + }); + + function handleJobAlert(data) { + var isPass = data.state === 'passed'; + var icon = isPass ? '✓' : '✕'; + var title = icon + ' ' + (data.devname || 'Drive') + ' — Burn-In ' + (data.state || '').toUpperCase(); + var bodyText = (data.model || '') + (data.serial ? ' · ' + data.serial : ''); + if (!isPass && data.error_text) bodyText += '\n' + data.error_text; + + showToast(title + (data.error_text ? ' · ' + data.error_text : ''), isPass ? 'success' : 'error'); + + if (Notification.permission === 'granted') { + try { + new Notification(title, { body: bodyText || undefined }); + } catch (_) {} + } + } + + // ----------------------------------------------------------------------- + // Elapsed time timers + // ----------------------------------------------------------------------- + + var _elapsedInterval = null; + + function formatElapsed(seconds) { + if (seconds < 0) return ''; + var h = Math.floor(seconds / 3600); + var m = Math.floor((seconds % 3600) / 60); + var s = seconds % 60; + if (h > 0) return h + 'h ' + m + 'm'; + if (m > 0) return m + 'm ' + s + 's'; + return s + 's'; + } + + function tickElapsedTimers() { + var now = Date.now(); + document.querySelectorAll('.elapsed-timer[data-started]').forEach(function (el) { + var started = new Date(el.dataset.started).getTime(); + if (isNaN(started)) return; + var elapsed = Math.floor((now - started) / 1000); + el.textContent = formatElapsed(elapsed); + }); + } + + function initElapsedTimers() { + if (_elapsedInterval) return; // Already running + var timers = document.querySelectorAll('.elapsed-timer[data-started]'); + if (timers.length === 0) return; + _elapsedInterval = setInterval(function () { + var remaining = document.querySelectorAll('.elapsed-timer[data-started]'); + if (remaining.length === 0) { + clearInterval(_elapsedInterval); + _elapsedInterval = null; + return; + } + tickElapsedTimers(); + }, 1000); + tickElapsedTimers(); + } + + initElapsedTimers(); + + // ----------------------------------------------------------------------- + // Inline location / notes edit + // ----------------------------------------------------------------------- + + function initLocationEdits() { + document.querySelectorAll('.drive-location').forEach(function (el) { + if (el._locationInited) return; + el._locationInited = true; + + el.addEventListener('click', function (evt) { + evt.stopPropagation(); + var driveId = el.dataset.driveId; + var current = el.classList.contains('drive-location-empty') ? '' : el.textContent.trim(); + + var input = document.createElement('input'); + input.type = 'text'; + input.className = 'drive-location-input'; + input.value = current; + input.placeholder = 'e.g. Bay 3 Shelf 2'; + input.maxLength = 64; + + el.replaceWith(input); + input.focus(); + input.select(); + + async function save() { + var newVal = input.value.trim(); + try { + var resp = await fetch('/api/v1/drives/' + driveId, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ location: newVal || null }), + }); + if (!resp.ok) throw new Error('save failed'); + } catch (_) { + showToast('Failed to save location', 'error'); + } + // The SSE update will replace the whole row; nothing more needed + } + + function cancel() { + var span = document.createElement('span'); + span.className = 'drive-location' + (current ? '' : ' drive-location-empty'); + span.dataset.driveId = driveId; + span.dataset.field = 'location'; + span.title = current ? 'Click to edit location' : 'Click to set location'; + span.textContent = current || '+ location'; + input.replaceWith(span); + initLocationEdits(); // re-attach listener + } + + input.addEventListener('blur', function () { save(); }); + input.addEventListener('keydown', function (e) { + if (e.key === 'Enter') { input.blur(); } + if (e.key === 'Escape') { cancel(); } + }); + }); + }); + } + + initLocationEdits(); + + // ----------------------------------------------------------------------- + // Stage drag-and-drop reordering + // ----------------------------------------------------------------------- + + function initStageDrag(listId) { + var list = document.getElementById(listId); + if (!list || list._dragInited) return; + list._dragInited = true; + + var draggingEl = null; + + list.addEventListener('dragstart', function (e) { + draggingEl = e.target.closest('.stage-check'); + if (!draggingEl) return; + draggingEl.classList.add('dragging'); + e.dataTransfer.effectAllowed = 'move'; + }); + + list.addEventListener('dragend', function () { + if (draggingEl) draggingEl.classList.remove('dragging'); + list.querySelectorAll('.stage-check.drag-over').forEach(function (el) { + el.classList.remove('drag-over'); + }); + draggingEl = null; + }); + + list.addEventListener('dragover', function (e) { + e.preventDefault(); + if (!draggingEl) return; + var target = e.target.closest('.stage-check'); + if (!target || target === draggingEl) return; + var rect = target.getBoundingClientRect(); + var midY = rect.top + rect.height / 2; + if (e.clientY < midY) { + list.insertBefore(draggingEl, target); + } else { + list.insertBefore(draggingEl, target.nextSibling); + } + }); + } + + // Map checkbox id → backend stage name + var _STAGE_ID_MAP = { + 'stage-surface': 'surface_validate', + 'stage-short': 'short_smart', + 'stage-long': 'long_smart', + 'batch-stage-surface': 'surface_validate', + 'batch-stage-short': 'short_smart', + 'batch-stage-long': 'long_smart', + }; + + // Read DOM order of checked stages from the given list element + function getStageOrder(listId) { + var items = Array.from(document.querySelectorAll('#' + listId + ' .stage-check')); + var order = []; + items.forEach(function (item) { + var cb = item.querySelector('input[type=checkbox]'); + if (cb && cb.checked && _STAGE_ID_MAP[cb.id]) { + order.push(_STAGE_ID_MAP[cb.id]); + } + }); + return order; + } + + // ----------------------------------------------------------------------- + // Standalone SMART test + // ----------------------------------------------------------------------- + + async function startSmartTest(btn) { + var driveId = btn.dataset.driveId; + var testType = btn.dataset.testType; + var operator = localStorage.getItem('burnin_operator') || 'unknown'; + + btn.disabled = true; + try { + var resp = await fetch('/api/v1/drives/' + driveId + '/smart/start', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ type: testType, operator: operator }), + }); + var data = await resp.json(); + if (!resp.ok) { + showToast(data.detail || 'Failed to start test', 'error'); + btn.disabled = false; + } else { + var label = testType === 'SHORT' ? 'Short' : 'Long'; + showToast(label + ' SMART test started on ' + data.devname, 'success'); + } + } catch (err) { + showToast('Network error', 'error'); + btn.disabled = false; + } + } + + // ----------------------------------------------------------------------- + // Cancel standalone SMART test + // ----------------------------------------------------------------------- + + async function cancelSmartTest(btn) { + var driveId = btn.dataset.driveId; + var testType = btn.dataset.testType; // 'short' or 'long' + var label = testType === 'short' ? 'Short' : 'Long'; + + if (!confirm('Cancel the ' + label + ' SMART test? This cannot be undone.')) return; + + btn.disabled = true; + try { + var resp = await fetch('/api/v1/drives/' + driveId + '/smart/cancel', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ type: testType }), + }); + var data = await resp.json(); + if (!resp.ok) { + showToast(data.detail || 'Failed to cancel test', 'error'); + btn.disabled = false; + } else { + var label = testType === 'short' ? 'Short' : 'Long'; + showToast(label + ' SMART test cancelled on ' + (data.devname || ''), 'info'); + } + } catch (err) { + showToast('Network error', 'error'); + btn.disabled = false; + } + } + + // ----------------------------------------------------------------------- + // Cancel ALL running/queued burn-in jobs + // ----------------------------------------------------------------------- + + async function cancelAllBurnins() { + var cancelBtns = Array.from(document.querySelectorAll('.btn-cancel[data-job-id]')); + if (cancelBtns.length === 0) { + showToast('No active burn-in jobs to cancel', 'info'); + return; + } + if (!confirm('Cancel ALL ' + cancelBtns.length + ' active burn-in job(s)? This cannot be undone.')) return; + var operator = localStorage.getItem('burnin_operator') || 'unknown'; + var count = 0; + for (var i = 0; i < cancelBtns.length; i++) { + var jobId = cancelBtns[i].dataset.jobId; + try { + var resp = await fetch('/api/v1/burnin/' + jobId + '/cancel', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ operator: operator }), + }); + if (resp.ok) count++; + } catch (_) {} + } + showToast(count + ' burn-in job(s) cancelled', 'info'); + } + + // ----------------------------------------------------------------------- + // Single drive Burn-In modal + // ----------------------------------------------------------------------- + + var modalDriveId = null; + var modalSerial = null; + + function _stageLabel() { + // Read labels in DOM order from stage-order-list + var items = Array.from(document.querySelectorAll('#stage-order-list .stage-check')); + var labelMap = { + 'stage-surface': 'Surface', + 'stage-short': 'Short SMART', + 'stage-long': 'Long SMART', + }; + var parts = []; + items.forEach(function (item) { + var cb = item.querySelector('input[type=checkbox]'); + if (cb && cb.checked && labelMap[cb.id]) parts.push(labelMap[cb.id]); + }); + return parts.length ? parts.join(' + ') : 'No stages'; + } + + function handleStageChange() { + var surfaceChecked = document.getElementById('stage-surface') && document.getElementById('stage-surface').checked; + var warning = document.getElementById('surface-warning'); + var serialField = document.getElementById('serial-field'); + if (warning) warning.style.display = surfaceChecked ? '' : 'none'; + if (serialField) serialField.style.display = surfaceChecked ? '' : 'none'; + // Update title + var title = document.getElementById('modal-title'); + if (title) title.textContent = 'Burn-In — ' + _stageLabel(); + validateModal(); + } + + function openModal(btn) { + modalDriveId = btn.dataset.driveId; + modalSerial = btn.dataset.serial || ''; + + document.getElementById('modal-devname').textContent = btn.dataset.devname || '—'; + document.getElementById('modal-model').textContent = btn.dataset.model || '—'; + document.getElementById('modal-serial-display').textContent = btn.dataset.serial || '—'; + document.getElementById('modal-size').textContent = btn.dataset.size || '—'; + + var healthEl = document.getElementById('modal-health'); + var health = btn.dataset.health || 'UNKNOWN'; + healthEl.textContent = health; + healthEl.className = 'chip chip-' + health.toLowerCase(); + + // Reset stage checkboxes to all-on (keep user's drag order) + ['stage-surface', 'stage-short', 'stage-long'].forEach(function (id) { + var el = document.getElementById(id); + if (el) el.checked = true; + }); + + document.getElementById('confirm-serial').value = ''; + document.getElementById('confirm-hint').textContent = 'Expected: ' + modalSerial; + + var savedOp = localStorage.getItem('burnin_operator') || ''; + document.getElementById('operator-input').value = savedOp; + + // Init drag on first open (list is in static DOM) + initStageDrag('stage-order-list'); + + handleStageChange(); // sets warning visibility + title + validates + + document.getElementById('start-modal').removeAttribute('hidden'); + setTimeout(function () { + document.getElementById('operator-input').focus(); + }, 50); + } + + function closeModal() { + document.getElementById('start-modal').setAttribute('hidden', ''); + modalDriveId = null; + modalSerial = null; + } + + function validateModal() { + var operator = (document.getElementById('operator-input').value || '').trim(); + var surfaceChecked = document.getElementById('stage-surface') && document.getElementById('stage-surface').checked; + var shortChecked = document.getElementById('stage-short') && document.getElementById('stage-short').checked; + var longChecked = document.getElementById('stage-long') && document.getElementById('stage-long').checked; + var anyStage = surfaceChecked || shortChecked || longChecked; + + var valid; + if (surfaceChecked) { + var typed = (document.getElementById('confirm-serial').value || '').trim(); + valid = operator.length > 0 && typed === modalSerial && modalSerial !== '' && anyStage; + } else { + valid = operator.length > 0 && anyStage; + } + document.getElementById('modal-start-btn').disabled = !valid; + } + + async function submitStart() { + var operator = (document.getElementById('operator-input').value || '').trim(); + localStorage.setItem('burnin_operator', operator); + + var runSurface = document.getElementById('stage-surface').checked; + var runShort = document.getElementById('stage-short').checked; + var runLong = document.getElementById('stage-long').checked; + var stageOrder = getStageOrder('stage-order-list'); + + try { + var resp = await fetch('/api/v1/burnin/start', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + drive_ids: [parseInt(modalDriveId, 10)], + operator: operator, + run_surface: runSurface, + run_short: runShort, + run_long: runLong, + stage_order: stageOrder, + }), + }); + + var data = await resp.json(); + if (!resp.ok) { + showToast(data.detail || 'Failed to start burn-in', 'error'); + return; + } + + closeModal(); + showToast('Burn-in queued: ' + _stageLabel(), 'success'); + } catch (err) { + showToast('Network error', 'error'); + } + } + + // ----------------------------------------------------------------------- + // Batch Burn-In + // ----------------------------------------------------------------------- + + var checkedDriveIds = new Set(); + + function updateBatchBar() { + var bar = document.getElementById('batch-bar'); + if (!bar) return; + var count = checkedDriveIds.size; + bar.hidden = count === 0; + var countEl = document.getElementById('batch-count'); + if (countEl) countEl.textContent = count; + } + + function restoreCheckboxes() { + // Re-check boxes that were checked before the SSE swap + document.querySelectorAll('.drive-checkbox').forEach(function (cb) { + cb.checked = checkedDriveIds.has(cb.dataset.driveId); + }); + + // Update select-all state + var selectAll = document.getElementById('select-all-cb'); + if (selectAll) { + var allBoxes = document.querySelectorAll('.drive-checkbox'); + selectAll.checked = allBoxes.length > 0 && Array.from(allBoxes).every(function (c) { return c.checked; }); + selectAll.indeterminate = checkedDriveIds.size > 0 && !selectAll.checked; + } + + updateBatchBar(); + } + + // Toggle individual checkbox + document.addEventListener('change', function (e) { + if (e.target.classList.contains('drive-checkbox')) { + var id = e.target.dataset.driveId; + if (e.target.checked) { + checkedDriveIds.add(id); + } else { + checkedDriveIds.delete(id); + } + updateBatchBar(); + // Update select-all indeterminate state + var selectAll = document.getElementById('select-all-cb'); + if (selectAll) { + var allBoxes = Array.from(document.querySelectorAll('.drive-checkbox')); + selectAll.checked = allBoxes.length > 0 && allBoxes.every(function (c) { return c.checked; }); + selectAll.indeterminate = checkedDriveIds.size > 0 && !selectAll.checked; + } + return; + } + + // Select-all checkbox + if (e.target.id === 'select-all-cb') { + var boxes = document.querySelectorAll('.drive-checkbox'); + boxes.forEach(function (cb) { + cb.checked = e.target.checked; + if (e.target.checked) { + checkedDriveIds.add(cb.dataset.driveId); + } else { + checkedDriveIds.delete(cb.dataset.driveId); + } + }); + updateBatchBar(); + return; + } + + // Batch modal inputs validation + if (['batch-confirm-cb', 'batch-stage-surface', 'batch-stage-short', 'batch-stage-long'].indexOf(e.target.id) !== -1) { + validateBatchModal(); + } + }); + + // Batch bar buttons + document.addEventListener('click', function (e) { + if (e.target.id === 'batch-start-btn' || e.target.closest('#batch-start-btn')) { + openBatchModal(); + return; + } + if (e.target.id === 'batch-clear-btn') { + checkedDriveIds.clear(); + document.querySelectorAll('.drive-checkbox').forEach(function (cb) { cb.checked = false; }); + var sa = document.getElementById('select-all-cb'); + if (sa) { sa.checked = false; sa.indeterminate = false; } + updateBatchBar(); + return; + } + }); + + function openBatchModal() { + var modal = document.getElementById('batch-modal'); + if (!modal) return; + var savedOp = localStorage.getItem('burnin_operator') || ''; + document.getElementById('batch-operator-input').value = savedOp; + document.getElementById('batch-confirm-cb').checked = false; + // Reset stages to all-on (keep user's drag order) + ['batch-stage-surface', 'batch-stage-short', 'batch-stage-long'].forEach(function (id) { + var el = document.getElementById(id); + if (el) el.checked = true; + }); + var countEls = document.querySelectorAll('#batch-modal-count, #batch-modal-count-btn'); + countEls.forEach(function (el) { el.textContent = checkedDriveIds.size; }); + // Init drag on first open + initStageDrag('batch-stage-order-list'); + validateBatchModal(); + modal.removeAttribute('hidden'); + setTimeout(function () { + document.getElementById('batch-operator-input').focus(); + }, 50); + } + + function closeBatchModal() { + var modal = document.getElementById('batch-modal'); + if (modal) modal.setAttribute('hidden', ''); + } + + function validateBatchModal() { + var operator = (document.getElementById('batch-operator-input').value || '').trim(); + var surfaceEl = document.getElementById('batch-stage-surface'); + var surfaceChecked = surfaceEl && surfaceEl.checked; + + // Show/hide destructive warning and confirm checkbox based on surface selection + var warning = document.getElementById('batch-surface-warning'); + var confirmWrap = document.getElementById('batch-confirm-wrap'); + if (warning) warning.style.display = surfaceChecked ? '' : 'none'; + if (confirmWrap) confirmWrap.style.display = surfaceChecked ? '' : 'none'; + + var shortEl = document.getElementById('batch-stage-short'); + var longEl = document.getElementById('batch-stage-long'); + var anyStage = surfaceChecked || + (shortEl && shortEl.checked) || + (longEl && longEl.checked); + + var valid; + if (surfaceChecked) { + var confirmed = document.getElementById('batch-confirm-cb').checked; + valid = operator.length > 0 && confirmed && anyStage; + } else { + valid = operator.length > 0 && anyStage; + } + + var btn = document.getElementById('batch-modal-start-btn'); + if (btn) btn.disabled = !valid; + } + + document.addEventListener('input', function (e) { + if (e.target.id === 'operator-input' || e.target.id === 'confirm-serial') validateModal(); + if (e.target.id === 'batch-operator-input') validateBatchModal(); + }); + + async function submitBatchStart() { + var operator = (document.getElementById('batch-operator-input').value || '').trim(); + localStorage.setItem('burnin_operator', operator); + + var ids = Array.from(checkedDriveIds).map(function (id) { return parseInt(id, 10); }); + if (ids.length === 0) return; + + var btn = document.getElementById('batch-modal-start-btn'); + if (btn) btn.disabled = true; + + var runSurface = document.getElementById('batch-stage-surface').checked; + var runShort = document.getElementById('batch-stage-short').checked; + var runLong = document.getElementById('batch-stage-long').checked; + var stageOrder = getStageOrder('batch-stage-order-list'); + + try { + var resp = await fetch('/api/v1/burnin/start', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + drive_ids: ids, + operator: operator, + run_surface: runSurface, + run_short: runShort, + run_long: runLong, + stage_order: stageOrder, + }), + }); + var data = await resp.json(); + if (!resp.ok) { + showToast(data.detail || 'Failed to queue batch', 'error'); + if (btn) btn.disabled = false; + return; + } + + closeBatchModal(); + checkedDriveIds.clear(); + updateBatchBar(); + var queued = (data.queued || []).length; + var errors = (data.errors || []).length; + var msg = queued + ' burn-in(s) queued'; + if (errors) msg += ', ' + errors + ' skipped (already active)'; + showToast(msg, errors && !queued ? 'error' : 'success'); + } catch (err) { + showToast('Network error', 'error'); + if (btn) btn.disabled = false; + } + } + + // ----------------------------------------------------------------------- + // Cancel burn-in (individual) + // ----------------------------------------------------------------------- + + async function cancelBurnin(btn) { + var jobId = btn.dataset.jobId; + var operator = localStorage.getItem('burnin_operator') || 'unknown'; + + if (!confirm('Cancel this burn-in job? This cannot be undone.')) return; + + btn.disabled = true; + try { + var resp = await fetch('/api/v1/burnin/' + jobId + '/cancel', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ operator: operator }), + }); + + if (resp.ok) { + showToast('Burn-in cancelled', 'info'); + } else { + var data = await resp.json(); + showToast(data.detail || 'Failed to cancel', 'error'); + btn.disabled = false; + } + } catch (err) { + showToast('Network error', 'error'); + btn.disabled = false; + } + } + + // ----------------------------------------------------------------------- + // Delegated event handlers (work after SSE swaps) + // ----------------------------------------------------------------------- + + document.addEventListener('click', function (e) { + // Short / Long SMART start buttons + var smartBtn = e.target.closest('.btn-smart-short, .btn-smart-long'); + if (smartBtn && !smartBtn.disabled) { startSmartTest(smartBtn); return; } + + // Cancel SMART test buttons + var cancelSmartBtn = e.target.closest('.btn-cancel-smart'); + if (cancelSmartBtn && !cancelSmartBtn.disabled) { cancelSmartTest(cancelSmartBtn); return; } + + // Burn-in start button (single drive) + var startBtn = e.target.closest('.btn-start'); + if (startBtn && !startBtn.disabled) { openModal(startBtn); return; } + + // Cancel burn-in button (individual) + var cancelBtn = e.target.closest('.btn-cancel'); + if (cancelBtn) { cancelBurnin(cancelBtn); return; } + + // Cancel ALL running burn-ins + if (e.target.id === 'cancel-all-btn' || e.target.closest('#cancel-all-btn')) { + cancelAllBurnins(); + return; + } + + // Single-drive modal close + if (e.target.closest('#modal-close-btn') || e.target.closest('#modal-cancel-btn')) { + closeModal(); + return; + } + if (e.target.id === 'start-modal') { + closeModal(); + return; + } + if (e.target.id === 'modal-start-btn') { + submitStart(); + return; + } + + // Batch modal close + if (e.target.closest('#batch-modal-close-btn') || e.target.closest('#batch-modal-cancel-btn')) { + closeBatchModal(); + return; + } + if (e.target.id === 'batch-modal') { + closeBatchModal(); + return; + } + if (e.target.id === 'batch-modal-start-btn') { + submitBatchStart(); + return; + } + }); + + document.addEventListener('input', function (e) { + var id = e.target.id; + if (id === 'operator-input' || id === 'confirm-serial') validateModal(); + }); + + document.addEventListener('keydown', function (e) { + if (e.key === 'Escape') { + var modal = document.getElementById('start-modal'); + if (modal && !modal.hidden) { closeModal(); return; } + var bModal = document.getElementById('batch-modal'); + if (bModal && !bModal.hidden) { closeBatchModal(); return; } + } + }); + +}()); diff --git a/app/templates/audit.html b/app/templates/audit.html new file mode 100644 index 0000000..7679a03 --- /dev/null +++ b/app/templates/audit.html @@ -0,0 +1,55 @@ +{% extends "layout.html" %} + +{% block title %}TrueNAS Burn-In — Audit Log{% endblock %} + +{% block content %} +
+

Audit Log

+
+ Last 200 events +
+
+ +
+ + + + + + + + + + + + + {% if events %} + {% for e in events %} + {% set color = event_colors.get(e.event_type, 'unknown') %} + + + + + + + + + {% endfor %} + {% else %} + + + + {% endif %} + +
#TimeEventDriveOperatorMessage
{{ e.id }}{{ e.created_at | format_dt_full }} + {{ e.event_type | replace('_', ' ') }} + + {% if e.devname %} + {{ e.devname }} + {% if e.serial %}{{ e.serial }}{% endif %} + {% else %} + + {% endif %} + {{ e.operator or '—' }}{{ e.message }}
No audit events yet.
+
+{% endblock %} diff --git a/app/templates/components/drives_table.html b/app/templates/components/drives_table.html new file mode 100644 index 0000000..1701b5b --- /dev/null +++ b/app/templates/components/drives_table.html @@ -0,0 +1,174 @@ +{%- macro smart_cell(smart) -%} +
+ {%- if smart.state == 'running' -%} +
+
+
+
+ {{ smart.percent or 0 }}% +
+ {%- if smart.eta_seconds %} +
{{ smart.eta_seconds | format_eta }}
+ {%- endif %} + {%- elif smart.state == 'passed' -%} + Passed + {%- elif smart.state == 'failed' -%} + Failed + {%- elif smart.state == 'aborted' -%} + Aborted + {%- else -%} + + {%- endif -%} +
+{%- endmacro -%} + +{%- macro burnin_cell(bi) -%} +
+ {%- if bi is none -%} + + {%- elif bi.state == 'queued' -%} + Queued + {%- elif bi.state == 'running' -%} +
+
+
+
+ {{ bi.percent or 0 }}% +
+
+ {%- if bi.stage_name %} + {{ bi.stage_name | replace('_', ' ') | title }} + {%- endif %} + {%- if bi.started_at %} + {{ bi.started_at | format_elapsed }} + {%- endif %} +
+ {%- elif bi.state == 'passed' -%} + Passed + {%- elif bi.state == 'failed' -%} + Failed{% if bi.stage_name %} ({{ bi.stage_name | replace('_',' ') }}){% endif %} + {%- elif bi.state == 'cancelled' -%} + Cancelled + {%- elif bi.state == 'unknown' -%} + Unknown + {%- else -%} + + {%- endif -%} +
+{%- endmacro -%} + + + + + + + + + + + + + + + + + + {%- if drives %} + {%- for drive in drives %} + {%- set bi_active = drive.burnin and drive.burnin.state in ('queued', 'running') %} + {%- set short_busy = drive.smart_short and drive.smart_short.state == 'running' %} + {%- set long_busy = drive.smart_long and drive.smart_long.state == 'running' %} + {%- set selectable = not bi_active and not short_busy and not long_busy %} + + + + + + + + + + + + + {%- endfor %} + {%- else %} + + + + {%- endif %} + +
+ + DriveSerialSizeTempHealthShort SMARTLong SMARTBurn-InActions
+ {%- if selectable %} + + {%- endif %} + + {{ drive.devname }} + {{ drive.model or "Unknown" }} + {%- if drive.location %} + {{ drive.location }} + {%- else %} + + location + {%- endif %} + {{ drive.serial or "—" }}{{ drive.size_bytes | format_bytes }} + {%- if drive.temperature_c is not none %} + {{ drive.temperature_c }}°C + {%- else %} + + {%- endif %} + + {{ drive.smart_health }} + {{ smart_cell(drive.smart_short) }}{{ smart_cell(drive.smart_long) }}{{ burnin_cell(drive.burnin) }} +
+ {%- if bi_active %} + + + {%- else %} + + {%- if short_busy %} + + {%- else %} + + {%- endif %} + + {%- if long_busy %} + + {%- else %} + + {%- endif %} + + + {%- endif %} +
+
No drives found. Waiting for first poll…
diff --git a/app/templates/components/modal_batch.html b/app/templates/components/modal_batch.html new file mode 100644 index 0000000..b241c85 --- /dev/null +++ b/app/templates/components/modal_batch.html @@ -0,0 +1,73 @@ + diff --git a/app/templates/components/modal_start.html b/app/templates/components/modal_start.html new file mode 100644 index 0000000..a59d732 --- /dev/null +++ b/app/templates/components/modal_start.html @@ -0,0 +1,87 @@ + diff --git a/app/templates/dashboard.html b/app/templates/dashboard.html new file mode 100644 index 0000000..06ec62e --- /dev/null +++ b/app/templates/dashboard.html @@ -0,0 +1,74 @@ +{% extends "layout.html" %} + +{% block title %}TrueNAS Burn-In — Dashboard{% endblock %} + +{% block content %} +{% include "components/modal_start.html" %} +{% include "components/modal_batch.html" %} + + +
+
+ {{ drives | length }} + Drives +
+
+ 0 + Running +
+
+ 0 + Failed + +
+ 0 + Passed +
+
+ 0 + Idle +
+
+ + + + +
+ + + + + + + + + + + +
+ +
+
+
+ {% include "components/drives_table.html" %} +
+
+
+{% endblock %} diff --git a/app/templates/history.html b/app/templates/history.html new file mode 100644 index 0000000..0f8ca6d --- /dev/null +++ b/app/templates/history.html @@ -0,0 +1,93 @@ +{% extends "layout.html" %} + +{% block title %}TrueNAS Burn-In — History{% endblock %} + +{% block content %} +
+

Burn-In History

+ +
+ + +
+ {% set states = [('all','All'), ('passed','Passed'), ('failed','Failed'), ('cancelled','Cancelled'), ('running','Running'), ('unknown','Unknown')] %} + {% for val, label in states %} + + {{ label }} + {% if val in counts %}{{ counts[val] }}{% endif %} + + {% endfor %} +
+ +
+ + + + + + + + + + + + + + + + {% if jobs %} + {% for j in jobs %} + + + + + + + + + + + + {% endfor %} + {% else %} + + + + {% endif %} + +
#DriveProfileStateOperatorStartedDurationError
{{ j.id }} + {{ j.devname }} + {{ j.serial }} + + {{ j.profile }} + + {{ j.state }} + {{ j.operator or '—' }}{{ j.started_at | format_dt_full }}{{ j.duration_seconds | format_duration }} + {% if j.error_text %} + {{ j.error_text[:60] }}{% if j.error_text | length > 60 %}…{% endif %} + {% else %}—{% endif %} + + Detail +
No burn-in jobs found.
+
+ + +{% if total_pages > 1 %} + +{% else %} + +{% endif %} +{% endblock %} diff --git a/app/templates/job_detail.html b/app/templates/job_detail.html new file mode 100644 index 0000000..a91f33b --- /dev/null +++ b/app/templates/job_detail.html @@ -0,0 +1,122 @@ +{% extends "layout.html" %} + +{% block title %}TrueNAS Burn-In — Job #{{ job.id }}{% endblock %} + +{% block content %} +
+ + +
+ + +
+ + +
+
Drive
+
+
+ Device + {{ job.devname }} +
+
+ Model + {{ job.model or '—' }} +
+
+ Serial + {{ job.serial or '—' }} +
+
+ Size + {{ job.size_bytes | format_bytes }} +
+
+
+ + +
+
Job
+
+
+ Profile + {{ job.profile }} +
+
+ State + {{ job.state }} +
+
+ Operator + {{ job.operator or '—' }} +
+
+ Created + {{ job.created_at | format_dt_full }} +
+
+ Started + {{ job.started_at | format_dt_full }} +
+
+ Finished + {{ job.finished_at | format_dt_full }} +
+
+ Duration + {{ job.duration_seconds | format_duration }} +
+
+
+ +
+ +{% if job.error_text %} + +{% endif %} + + +

Stages

+
+ + + + + + + + + + + + {% for s in job.stages %} + + + + + + + + {% endfor %} + +
StageStateStartedDurationError
+ {{ s.stage_name.replace('_', ' ').title() }} + + {{ s.state }} + {{ s.started_at | format_dt_full }}{{ s.duration_seconds | format_duration }} + {% if s.error_text %} + {{ s.error_text }} + {% else %}—{% endif %} +
+
+{% endblock %} diff --git a/app/templates/job_print.html b/app/templates/job_print.html new file mode 100644 index 0000000..b411d81 --- /dev/null +++ b/app/templates/job_print.html @@ -0,0 +1,304 @@ + + + + + + Burn-In Report — Job #{{ job.id }} + + + + + + +
+ +
+
Drive
+
+ Device + {{ job.devname }} +
+
+ Model + {{ job.model or '—' }} +
+
+ Serial + {{ job.serial or '—' }} +
+
+ Size + {{ job.size_bytes | format_bytes }} +
+
+ +
+
Job
+
+ Profile + {{ job.profile | title }} +
+
+ Operator + {{ job.operator or '—' }} +
+
+ Started + {{ job.started_at | format_dt_full }} +
+
+ Finished + {{ job.finished_at | format_dt_full }} +
+
+ Duration + {{ job.duration_seconds | format_duration }} +
+
+ +
+ +{% if job.error_text %} +
✕ {{ job.error_text }}
+{% endif %} + +

Stages

+ + + + + + + + + + + {% for s in job.stages %} + + + + + + + {% endfor %} + +
StageResultDurationNotes
{{ s.stage_name.replace('_', ' ').title() }} + {{ s.state | upper }} + {{ s.duration_seconds | format_duration }}{{ s.error_text or '—' }}
+ + + + + + + diff --git a/app/templates/layout.html b/app/templates/layout.html new file mode 100644 index 0000000..94fb300 --- /dev/null +++ b/app/templates/layout.html @@ -0,0 +1,64 @@ + + + + + + {% block title %}TrueNAS Burn-In{% endblock %} + + + + +
+ + + TrueNAS Burn-In + +
+ + + {% if poller and poller.healthy %}Live{% else %}Polling error{% endif %} + + {% if poller and poller.last_poll_at %} + Last poll {{ poller.last_poll_at | format_dt }} + {% endif %} + + History + Stats + Audit + Settings + API +
+
+ +{% if stale %} + +{% endif %} + +{% if poller and poller.last_error %} + +{% endif %} + +
+ {% block content %}{% endblock %} +
+ +
+ + + + + diff --git a/app/templates/settings.html b/app/templates/settings.html new file mode 100644 index 0000000..aa9d0d0 --- /dev/null +++ b/app/templates/settings.html @@ -0,0 +1,303 @@ +{% extends "layout.html" %} + +{% block title %}TrueNAS Burn-In — Settings{% endblock %} + +{% block content %} +
+

Settings

+
+ API Docs +
+
+

+ Changes take effect immediately. Settings marked + restart required must be changed in .env. +

+ +
+
+ + +
+ +
+
+ Email (SMTP) + {% if smtp_enabled %} + Enabled + {% else %} + Disabled — set Host to enable + {% endif %} +
+ + +
+ + +
+ + +
+ + + + + +
+ + Timeout + +
+ + + + + + + + + + + + + +
+
+ + +
+
+ Webhook +
+
+ +
+ + POST JSON on burnin_passed / burnin_failed. ntfy.sh, Slack, Discord, n8n. Leave blank to disable. +
+
+
+ +
+ + +
+ + +
+
+ Notifications +
+ +
+
+
Daily Report
+
Full drive status email each day
+
+ +
+ +
+
+ + +
+
+ +
+ +
+
+
Alert on Failure
+
Immediate email when a burn-in fails
+
+ +
+ +
+
+
Alert on Pass
+
Immediate email when a burn-in passes
+
+ +
+
+ + +
+
+ Burn-In Behavior +
+ +
+ + + How many jobs can run at the same time +
+ +
+ + + Jobs running longer than this → auto-marked unknown +
+
+ +
+
+ + +
+ + + +
+
+ + +
+
+ System + restart required to change +
+
+
+ TrueNAS URL + {{ readonly.truenas_base_url }} +
+
+ Verify TLS + {{ 'Yes' if readonly.truenas_verify_tls else 'No' }} +
+
+ Poll Interval + {{ readonly.poll_interval_seconds }}s +
+
+ Stale Threshold + {{ readonly.stale_threshold_seconds }}s +
+
+ IP Allowlist + {{ readonly.allowed_ips }} +
+
+ Log Level + {{ readonly.log_level }} +
+
+
+ + +{% endblock %} diff --git a/app/templates/stats.html b/app/templates/stats.html new file mode 100644 index 0000000..5417966 --- /dev/null +++ b/app/templates/stats.html @@ -0,0 +1,123 @@ +{% extends "layout.html" %} + +{% block title %}TrueNAS Burn-In — Stats{% endblock %} + +{% block content %} +
+

Analytics

+
+ {{ drives_total }} drives tracked +
+
+ + +
+
+ {{ overall.total or 0 }} + Total Jobs +
+
+ {{ overall.passed or 0 }} + Passed +
+
+ {{ overall.failed or 0 }} + Failed +
+
+ {{ overall.running or 0 }} + Running +
+
+ {{ overall.cancelled or 0 }} + Cancelled +
+ {% if overall.total and overall.total > 0 %} +
+ {{ "%.0f" | format(100 * (overall.passed or 0) / overall.total) }}% + Pass Rate +
+ {% endif %} +
+ +
+ + +
+

Results by Drive Model

+ {% if by_model %} +
+ + + + + + + + + + + + + {% for m in by_model %} + + + + + + + + + {% endfor %} + +
ModelTotalPassedFailedPass RateRate Bar
{{ m.model }}{{ m.total }}{{ m.passed }}{{ m.failed }} + {{ m.pass_rate or 0 }}% + +
+
+
+
+
+
+ {% else %} +
+ No completed burn-in jobs yet. +
+ {% endif %} +
+ + +
+

Activity — Last 14 Days

+ {% if by_day %} +
+ + + + + + + + + + + {% for d in by_day %} + + + + + + + {% endfor %} + +
DateTotalPassedFailed
{{ d.day }}{{ d.total }}{{ d.passed }}{{ d.failed }}
+
+ {% else %} +
+ No activity in the last 14 days. +
+ {% endif %} +
+ +
+{% endblock %} diff --git a/app/truenas.py b/app/truenas.py new file mode 100644 index 0000000..1c9bf30 --- /dev/null +++ b/app/truenas.py @@ -0,0 +1,112 @@ +import asyncio +import logging +from collections.abc import Callable, Coroutine +from typing import Any, TypeVar + +import httpx + +from app.config import settings + +log = logging.getLogger(__name__) + +T = TypeVar("T") + +# Exceptions that are safe to retry (transient network issues) +_RETRYABLE = ( + httpx.ConnectError, + httpx.TimeoutException, + httpx.RemoteProtocolError, + httpx.ReadError, +) + + +async def _with_retry( + factory: Callable[[], Coroutine[Any, Any, T]], + label: str, + max_attempts: int = 3, +) -> T: + """ + Call factory() to get a fresh coroutine and await it, retrying with + exponential backoff on transient failures. + + A factory (not a bare coroutine) is required so each attempt gets a + new coroutine object — an already-awaited coroutine cannot be reused. + """ + backoff = 1.0 + for attempt in range(1, max_attempts + 1): + try: + return await factory() + except _RETRYABLE as exc: + if attempt == max_attempts: + raise + log.warning( + "TrueNAS %s transient error (attempt %d/%d): %s — retrying in %.0fs", + label, attempt, max_attempts, exc, backoff, + ) + await asyncio.sleep(backoff) + backoff *= 2 + + +class TrueNASClient: + def __init__(self) -> None: + self._client = httpx.AsyncClient( + base_url=settings.truenas_base_url, + headers={"Authorization": f"Bearer {settings.truenas_api_key}"}, + verify=settings.truenas_verify_tls, + timeout=10.0, + ) + + async def close(self) -> None: + await self._client.aclose() + + async def get_disks(self) -> list[dict]: + r = await _with_retry( + lambda: self._client.get("/api/v2.0/disk"), + "get_disks", + ) + r.raise_for_status() + return r.json() + + async def get_smart_jobs(self, state: str | None = None) -> list[dict]: + params: dict = {"method": "smart.test"} + if state: + params["state"] = state + r = await _with_retry( + lambda: self._client.get("/api/v2.0/core/get_jobs", params=params), + "get_smart_jobs", + ) + r.raise_for_status() + return r.json() + + async def get_smart_results(self, devname: str) -> list[dict]: + r = await _with_retry( + lambda: self._client.get(f"/api/v2.0/smart/test/results/{devname}"), + f"get_smart_results({devname})", + ) + r.raise_for_status() + return r.json() + + async def start_smart_test(self, disks: list[str], test_type: str) -> int: + """Start a SMART test. Not retried — a duplicate start would launch a second job.""" + r = await self._client.post( + "/api/v2.0/smart/test", + json={"disks": disks, "type": test_type}, + ) + r.raise_for_status() + return r.json() + + async def abort_job(self, job_id: int) -> None: + """Abort a TrueNAS job. Not retried — best-effort cancel.""" + r = await self._client.post( + "/api/v2.0/core/job_abort", + json={"id": job_id}, + ) + r.raise_for_status() + + async def get_system_info(self) -> dict: + r = await _with_retry( + lambda: self._client.get("/api/v2.0/system/info"), + "get_system_info", + ) + r.raise_for_status() + return r.json() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ac77725 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,21 @@ +services: + mock-truenas: + build: ./mock-truenas + container_name: mock-truenas + ports: + - "8000:8000" + restart: unless-stopped + + app: + build: . + container_name: truenas-burnin + ports: + - "8084:8084" + env_file: .env + volumes: + - ./data:/data + - ./app/templates:/opt/app/app/templates + - ./app/static:/opt/app/app/static + depends_on: + - mock-truenas + restart: unless-stopped diff --git a/mock-truenas/Dockerfile b/mock-truenas/Dockerfile new file mode 100644 index 0000000..596b574 --- /dev/null +++ b/mock-truenas/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.12-slim + +WORKDIR /app + +RUN pip install --no-cache-dir fastapi uvicorn + +COPY app.py . + +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/mock-truenas/app.py b/mock-truenas/app.py new file mode 100644 index 0000000..c64a75c --- /dev/null +++ b/mock-truenas/app.py @@ -0,0 +1,345 @@ +""" +Mock TrueNAS CORE v2.0 API Server + +Simulates the TrueNAS CORE REST API for development and testing. +All state is in-memory. Restart resets everything. + +Simulation behavior: + - SHORT test completes in ~90 seconds real-time + - LONG test completes in ~8 minutes real-time + - Drive 'sdn' (serial FAIL001) always fails SMART at ~30% + - Temperatures drift slightly on each tick + - Debug endpoints at /debug/* for test control +""" + +import asyncio +import random +import time +from datetime import datetime, timezone +from typing import Optional + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel + +app = FastAPI(title="Mock TrueNAS CORE API", version="13.0-U6.1") + +# --------------------------------------------------------------------------- +# Simulation constants +# --------------------------------------------------------------------------- +SHORT_DURATION_SECONDS = 90 +LONG_DURATION_SECONDS = 480 +TICK_SECONDS = 5 + +# --------------------------------------------------------------------------- +# Static drive inventory — 15 drives, sda-sdo, mixed capacities +# --------------------------------------------------------------------------- +_BASE_DRIVES = [ + # 12TB Seagate Exos — sda, sdb, sdc + {"identifier": "3500151795937c001", "name": "sda", "devname": "sda", "serial": "WDZ1A001", "model": "ST12000NM0008", "size": 12000138625024, "rotationrate": 7200, "_base_temp": 36}, + {"identifier": "3500151795937c002", "name": "sdb", "devname": "sdb", "serial": "WDZ1A002", "model": "ST12000NM0008", "size": 12000138625024, "rotationrate": 7200, "_base_temp": 34}, + {"identifier": "3500151795937c003", "name": "sdc", "devname": "sdc", "serial": "WDZ1A003", "model": "ST12000NM0008", "size": 12000138625024, "rotationrate": 7200, "_base_temp": 37}, + # 8TB WD Red — sdd, sde, sdf + {"identifier": "3500151795937c004", "name": "sdd", "devname": "sdd", "serial": "WDZ1A004", "model": "WD80EFAX", "size": 8001563222016, "rotationrate": 5400, "_base_temp": 32}, + {"identifier": "3500151795937c005", "name": "sde", "devname": "sde", "serial": "WDZ1A005", "model": "WD80EFAX", "size": 8001563222016, "rotationrate": 5400, "_base_temp": 33}, + {"identifier": "3500151795937c006", "name": "sdf", "devname": "sdf", "serial": "WDZ1A006", "model": "WD80EFAX", "size": 8001563222016, "rotationrate": 5400, "_base_temp": 31}, + # 16TB Seagate Exos — sdg, sdh + {"identifier": "3500151795937c007", "name": "sdg", "devname": "sdg", "serial": "WDZ1A007", "model": "ST16000NM001G", "size": 16000900661248, "rotationrate": 7200, "_base_temp": 38}, + {"identifier": "3500151795937c008", "name": "sdh", "devname": "sdh", "serial": "WDZ1A008", "model": "ST16000NM001G", "size": 16000900661248, "rotationrate": 7200, "_base_temp": 39}, + # 4TB Seagate IronWolf — sdi, sdj + {"identifier": "3500151795937c009", "name": "sdi", "devname": "sdi", "serial": "WDZ1A009", "model": "ST4000VN008", "size": 4000787030016, "rotationrate": 5900, "_base_temp": 30}, + {"identifier": "3500151795937c00a", "name": "sdj", "devname": "sdj", "serial": "WDZ1A010", "model": "ST4000VN008", "size": 4000787030016, "rotationrate": 5900, "_base_temp": 29}, + # 10TB Toshiba — sdk, sdl + {"identifier": "3500151795937c00b", "name": "sdk", "devname": "sdk", "serial": "WDZ1A011", "model": "TOSHIBA MG06ACA10TE", "size": 10000831348736, "rotationrate": 7200, "_base_temp": 41}, + {"identifier": "3500151795937c00c", "name": "sdl", "devname": "sdl", "serial": "WDZ1A012", "model": "TOSHIBA MG06ACA10TE", "size": 10000831348736, "rotationrate": 7200, "_base_temp": 40}, + # 8TB HGST — sdm + {"identifier": "3500151795937c00d", "name": "sdm", "devname": "sdm", "serial": "WDZ1A013", "model": "HGST HUH728080ALE604", "size": 8001563222016, "rotationrate": 7200, "_base_temp": 35}, + # Always-fail drive — sdn + {"identifier": "3500151795937c00e", "name": "sdn", "devname": "sdn", "serial": "FAIL001", "model": "TOSHIBA MG06ACA10TE", "size": 10000831348736, "rotationrate": 7200, "_base_temp": 45, "_always_fail": True}, + # 6TB Seagate Archive — sdo + {"identifier": "3500151795937c00f", "name": "sdo", "devname": "sdo", "serial": "WDZ1A015", "model": "ST6000DM003", "size": 6001175126016, "rotationrate": 5900, "_base_temp": 33}, +] + +# Shared fields for every drive +_DRIVE_DEFAULTS = { + "type": "HDD", + "bus": "SCSI", + "togglesmart": True, + "pool": None, + "enclosure": None, +} + +# --------------------------------------------------------------------------- +# Mutable in-memory state +# --------------------------------------------------------------------------- +_state: dict = { + "drives": {}, + "jobs": {}, + "smart_history": {}, + "job_counter": 1000, +} + + +def _init_state() -> None: + for d in _BASE_DRIVES: + devname = d["devname"] + _state["drives"][devname] = { + **_DRIVE_DEFAULTS, + **{k: v for k, v in d.items() if not k.startswith("_")}, + "zfs_guid": f"1234{int(d['identifier'], 16):016x}", + "temperature": d["_base_temp"], + "smart_health": "PASSED", + "_base_temp": d["_base_temp"], + "_always_fail": d.get("_always_fail", False), + } + _state["smart_history"][devname] = [] + + +_init_state() + + +def _public_drive(d: dict) -> dict: + return {k: v for k, v in d.items() if not k.startswith("_")} + + +def _public_job(j: dict) -> dict: + return {k: v for k, v in j.items() if not k.startswith("_")} + + +# --------------------------------------------------------------------------- +# Simulation loop +# --------------------------------------------------------------------------- +async def _simulation_loop() -> None: + while True: + await asyncio.sleep(TICK_SECONDS) + _tick() + + +def _tick() -> None: + for drive in _state["drives"].values(): + drift = random.randint(-1, 2) + drive["temperature"] = max(20, min(70, drive["_base_temp"] + drift)) + + now_iso = datetime.now(timezone.utc).isoformat() + for job_id, job in list(_state["jobs"].items()): + if job["state"] != "RUNNING": + continue + + elapsed = time.monotonic() - job["_started_mono"] + duration = job["_duration_seconds"] + + if job["_always_fail"] and elapsed / duration >= 0.30: + job["state"] = "FAILED" + job["error"] = "SMART test aborted: uncorrectable read error at LBA 0x1234567" + job["progress"]["percent"] = 30 + job["progress"]["description"] = "Test failed" + job["time_finished"] = now_iso + _record_smart_result(job, failed=True) + continue + + pct = min(100, int(elapsed / duration * 100)) + job["progress"]["percent"] = pct + job["progress"]["description"] = ( + f"Running SMART {job['_test_type'].lower()} test on {job['_disk']} ({pct}%)" + ) + + if pct >= 100: + job["state"] = "SUCCESS" + job["result"] = True + job["time_finished"] = now_iso + job["progress"]["percent"] = 100 + job["progress"]["description"] = "Completed without error" + _record_smart_result(job, failed=False) + + +def _record_smart_result(job: dict, failed: bool) -> None: + devname = job["_disk"] + test_type = job["_test_type"] + history = _state["smart_history"].get(devname, []) + num = len(history) + 1 + + history.insert(0, { + "num": num, + "type": "Short offline" if test_type == "SHORT" else "Extended offline", + "status": "Read failure" if failed else "Completed without error", + "status_verbose": ( + "Read failure - error in segment #1" if failed + else "Completed without error" + ), + "remaining": 0, + "lifetime": random.randint(10000, 50000), + "lba_of_first_error": "0x1234567" if failed else None, + }) + + drive = _state["drives"].get(devname) + if drive: + drive["smart_health"] = "FAILED" if failed else "PASSED" + + +# --------------------------------------------------------------------------- +# Request models +# --------------------------------------------------------------------------- +class SmartTestRequest(BaseModel): + disks: list[str] + type: str # SHORT | LONG + + +class AbortRequest(BaseModel): + id: int + + +# --------------------------------------------------------------------------- +# API Routes — mirrors TrueNAS CORE v2.0 +# --------------------------------------------------------------------------- + +@app.get("/api/v2.0/disk") +async def list_disks(): + return [_public_drive(d) for d in _state["drives"].values()] + + +@app.get("/api/v2.0/disk/{identifier}") +async def get_disk(identifier: str): + for d in _state["drives"].values(): + if d["identifier"] == identifier or d["devname"] == identifier: + return _public_drive(d) + raise HTTPException(status_code=404, detail="Disk not found") + + +@app.get("/api/v2.0/smart/test/results/{disk_name}") +async def smart_test_results(disk_name: str): + if disk_name not in _state["smart_history"]: + raise HTTPException(status_code=404, detail="Disk not found") + return [{"disk": disk_name, "tests": _state["smart_history"][disk_name]}] + + +@app.post("/api/v2.0/smart/test") +async def start_smart_test(req: SmartTestRequest): + if req.type not in ("SHORT", "LONG"): + raise HTTPException(status_code=422, detail="type must be SHORT or LONG") + + job_ids = [] + for disk_name in req.disks: + if disk_name not in _state["drives"]: + raise HTTPException(status_code=404, detail=f"Disk {disk_name} not found") + + _state["job_counter"] += 1 + job_id = _state["job_counter"] + drive = _state["drives"][disk_name] + duration = SHORT_DURATION_SECONDS if req.type == "SHORT" else LONG_DURATION_SECONDS + + _state["jobs"][job_id] = { + "id": job_id, + "method": "smart.test", + "arguments": [{"disks": [disk_name], "type": req.type}], + "state": "RUNNING", + "progress": { + "percent": 0, + "description": f"Running SMART {req.type.lower()} test on {disk_name}", + "extra": None, + }, + "result": None, + "error": None, + "exception": None, + "time_started": datetime.now(timezone.utc).isoformat(), + "time_finished": None, + "_started_mono": time.monotonic(), + "_duration_seconds": duration, + "_disk": disk_name, + "_test_type": req.type, + "_always_fail": drive["_always_fail"], + } + job_ids.append(job_id) + + return job_ids[0] if len(job_ids) == 1 else job_ids + + +@app.get("/api/v2.0/core/get_jobs") +async def get_jobs(method: Optional[str] = None, state: Optional[str] = None): + results = [] + for job in _state["jobs"].values(): + if method and job["method"] != method: + continue + if state and job["state"] != state: + continue + results.append(_public_job(job)) + return results + + +@app.get("/api/v2.0/core/get_jobs/{job_id}") +async def get_job(job_id: int): + job = _state["jobs"].get(job_id) + if not job: + raise HTTPException(status_code=404, detail="Job not found") + return _public_job(job) + + +@app.post("/api/v2.0/core/job_abort") +async def abort_job(req: AbortRequest): + job = _state["jobs"].get(req.id) + if not job: + raise HTTPException(status_code=404, detail="Job not found") + if job["state"] != "RUNNING": + raise HTTPException(status_code=400, detail=f"Job is not running (state={job['state']})") + job["state"] = "ABORTED" + job["time_finished"] = datetime.now(timezone.utc).isoformat() + job["progress"]["description"] = "Aborted by user" + return True + + +@app.get("/api/v2.0/system/info") +async def system_info(): + return { + "version": "TrueNAS-13.0-U6.1", + "hostname": "mock-truenas", + "uptime_seconds": 86400, + "system_serial": "MOCK-SN-001", + "system_product": "MOCK SERVER", + "cores": 4, + "physmem": 17179869184, + } + + +@app.get("/health") +async def health(): + return {"status": "ok", "mock": True, "drives": len(_state["drives"]), "jobs": len(_state["jobs"])} + + +# --------------------------------------------------------------------------- +# Debug endpoints +# --------------------------------------------------------------------------- + +@app.post("/debug/reset") +async def debug_reset(): + _state["drives"].clear() + _state["jobs"].clear() + _state["smart_history"].clear() + _state["job_counter"] = 1000 + _init_state() + return {"reset": True} + + +@app.get("/debug/state") +async def debug_state(): + return { + "drives": {k: _public_drive(v) for k, v in _state["drives"].items()}, + "jobs": {str(k): _public_job(v) for k, v in _state["jobs"].items()}, + "smart_history": _state["smart_history"], + "job_counter": _state["job_counter"], + } + + +@app.post("/debug/complete-all-jobs") +async def debug_complete_all(): + completed = [] + for job_id, job in _state["jobs"].items(): + if job["state"] == "RUNNING": + job["_started_mono"] -= job["_duration_seconds"] + completed.append(job_id) + return {"fast_forwarded": completed} + + +# --------------------------------------------------------------------------- +# Startup +# --------------------------------------------------------------------------- +@app.on_event("startup") +async def startup(): + asyncio.create_task(_simulation_loop()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ba5b5fa --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn +aiosqlite +httpx +pydantic-settings +jinja2 +sse-starlette