Compare commits

..

No commits in common. "25d4622aa4cb877de079a3bdf5825b20359701ca" and "main" have entirely different histories.

82 changed files with 10502 additions and 3197 deletions

View file

@ -0,0 +1,76 @@
name: Security scan
# Runs on every push to main, every PR, and nightly at 07:00 UTC (~03:00 EDT).
# Three jobs run in parallel — failure of any one fails the workflow,
# making findings visible in the forge UI.
#
# Tools:
# pip-audit — known CVEs in pinned dependencies (PyPI advisory DB)
# bandit — Python static security analysis (subprocess, eval, etc.)
# gitleaks — secrets in git history (full repo scan)
on:
push:
branches: [main]
pull_request:
schedule:
- cron: "0 7 * * *"
workflow_dispatch:
jobs:
pip-audit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install pip-audit
run: pip install --upgrade pip-audit
- name: Audit requirements.txt
run: pip-audit --requirement requirements.txt --strict --format=columns
bandit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install bandit
run: pip install --upgrade bandit
- name: Static security analysis
# B608: SQL string construction. All dynamic SQL in this repo uses
# bound parameters for data; the dynamic part is structural
# (column lists / IN-clause '?,?,?' placeholders). Reviewed.
run: bandit -r app -ll -ii --skip B608 -x app/__pycache__,tests
gitleaks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install gitleaks
run: |
curl -sSfL https://github.com/gitleaks/gitleaks/releases/download/v8.21.2/gitleaks_8.21.2_linux_x64.tar.gz \
| tar -xz gitleaks
chmod +x gitleaks
- name: Scan git history for secrets
run: ./gitleaks detect --source . --no-banner --redact --verbose
mypy:
runs-on: ubuntu-latest
# Informational — does not fail the workflow. Use `continue-on-error`
# so the build stays green while we work down the type-debt baseline.
continue-on-error: true
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install mypy
run: pip install --upgrade mypy
- name: Type check
run: mypy --ignore-missing-imports --no-strict-optional app

View file

@ -1,18 +1,18 @@
# TrueNAS Burn-In Dashboard — Project Context
# NAS Burn-In Dashboard — Project Context
> Drop this file in any new Claude session to resume work with full context.
> Last updated: 2026-02-24 (Stage 8)
> Last updated: 2026-05-03 (v1.0.0-39 — live against TrueNAS SCALE 25.10)
---
## What This Is
A self-hosted web dashboard for running and tracking hard-drive burn-in tests
against a TrueNAS CORE instance. Deployed on **maple.local** (10.0.0.138).
against a TrueNAS SCALE 25.10 instance. Deployed on **maple.local** (10.0.0.138).
- **App URL**: http://10.0.0.138:8084 (or http://burnin.hellocomputer.xyz)
- **Stack path on maple.local**: `~/docker/stacks/truenas-burnin/`
- **Source (local mac)**: `~/Desktop/claude-sandbox/truenas-burnin/`
- **Stack path on maple.local**: `~/docker/stacks/nas-burnin/`
- **Source (local mac)**: `~/Desktop/claudesandbox/nas-burnin/`
- **Compose synced to maple.local** via `scp` or manual copy
### Stages completed
@ -29,14 +29,14 @@ against a TrueNAS CORE instance. Deployed on **maple.local** (10.0.0.138).
| 6c | Settings overhaul (editable form, runtime store, SMTP fix, stage selection) | ✅ |
| 6d | Cancel SMART tests, Cancel All burn-ins, drag-to-reorder stages in modals | ✅ |
| 7 | SSH burn-in execution, SMART attr monitoring, drive reset, version badge, stats polish | ✅ |
| 8 | Live SSH terminal in drawer (xterm.js + asyncssh WebSocket PTY bridge) | ✅ |
| 8 | Live against TrueNAS SCALE 25.10: SSH SMART, disk temps, CPU/PCH sensors, thermal gate | ✅ |
---
## File Map
```
truenas-burnin/
nas-burnin/
├── docker-compose.yml # two services: mock-truenas + app
├── Dockerfile # app container
├── requirements.txt
@ -53,8 +53,7 @@ truenas-burnin/
├── database.py # schema, migrations, init_db(), get_db()
├── models.py # Pydantic v2 models; StartBurninRequest has run_surface/run_short/run_long + profile property
├── settings_store.py # runtime settings store — persists to /data/settings_overrides.json
├── ssh_client.py # asyncssh client: smartctl parsing, badblocks streaming, test_connection
├── terminal.py # WebSocket ↔ asyncssh PTY bridge for live terminal tab
├── ssh_client.py # asyncssh client: smartctl parsing, badblocks streaming, sensors, test_connection
├── truenas.py # httpx async client with retry (lambda factory pattern)
├── poller.py # poll loop, SSE pub/sub, stale detection, stuck-job check
├── burnin.py # orchestrator, semaphore, stages, check_stuck_jobs()
@ -71,7 +70,7 @@ truenas-burnin/
└── templates/
├── layout.html # header nav: History, Stats, Audit, Settings, bell button
├── dashboard.html # stats bar, failed banner, batch bar, log drawer (4 tabs: Burn-In/SMART/Events/Terminal)
├── dashboard.html # stats bar (+ CPU/PCH sensors, thermal chip), failed banner, batch bar, log drawer (3 tabs: Burn-In/SMART/Events)
├── history.html
├── job_detail.html # + Print/Export button
├── audit.html # audit event log
@ -209,7 +208,7 @@ All read from `.env` via `pydantic-settings`. See `.env.example` for full list.
| `TEMP_WARN_C` | `46` | Temperature warning threshold (°C) |
| `TEMP_CRIT_C` | `55` | Temperature critical threshold — precheck fails above this |
| `BAD_BLOCK_THRESHOLD` | `0` | Max bad blocks allowed before surface_validate fails (0 = any bad = fail) |
| `APP_VERSION` | `1.0.0-8` | Displayed in header version badge |
| `APP_VERSION` | `1.0.0-9` | Displayed in header version badge |
| `SSH_HOST` | `` | TrueNAS SSH hostname/IP — empty disables SSH mode (uses mock/REST) |
| `SSH_PORT` | `22` | TrueNAS SSH port |
| `SSH_USER` | `root` | TrueNAS SSH username |
@ -223,18 +222,18 @@ All read from `.env` via `pydantic-settings`. See `.env.example` for full list.
### First deploy (already done)
```bash
# On maple.local
cd ~/docker/stacks/truenas-burnin
cd ~/docker/stacks/nas-burnin
docker compose up -d --build
```
### Redeploy after code changes
```bash
# Copy changed files from mac to maple.local first, e.g.:
scp -P 2225 -r app/ brandon@10.0.0.138:~/docker/stacks/truenas-burnin/
scp -P 2225 -r app/ brandon@10.0.0.138:~/docker/stacks/nas-burnin/
# Then on maple.local:
ssh brandon@10.0.0.138 -p 2225
cd ~/docker/stacks/truenas-burnin
cd ~/docker/stacks/nas-burnin
docker compose up -d --build
```
@ -243,7 +242,7 @@ docker compose up -d --build
# On maple.local — stop containers first
docker compose stop app
# Delete DB using alpine (container owns the file, sudo not available)
docker run --rm -v ~/docker/stacks/truenas-burnin/data:/data alpine rm -f /data/app.db
docker run --rm -v ~/docker/stacks/nas-burnin/data:/data alpine rm -f /data/app.db
docker compose start app
```
@ -297,6 +296,15 @@ yield {"event": "drives-update", "data": html}
thead { position: sticky; top: 0; z-index: 10; }
```
### Burn-in SMART column overlay
```python
# When a burn-in runs a short_smart or long_smart stage, its progress must be
# mirrored in the Short/Long SMART columns (which normally read from smart_tests table).
# _fetch_drives_for_template() queries burnin_stages for running/completed SMART stages
# and overlays them onto the drive dict. Only overlays if standalone SMART column is idle.
# Helper: _compute_eta_seconds(started_at, percent) for linear ETA extrapolation.
```
### export.csv route ordering
```python
# MUST register export.csv BEFORE /{job_id} — FastAPI tries int() on "export.csv"
@ -329,6 +337,31 @@ async def burnin_get(job_id: int, ...): ...
| `profile` NameError in `_execute_stages` | `_execute_stages` called `_recalculate_progress(job_id, profile)` but `profile` not in scope | Changed to `_recalculate_progress(job_id)` — profile param was unused |
| `app_version` Jinja2 global rendered as function | Set `templates.env.globals["app_version"] = _get_app_version` (callable) | Set to the static string value directly: `= _settings.app_version` |
| All buttons broken (Short/Long/Burn-In/Cancel) | `stages.forEach(function(s){` in `_drawerRenderBurnin` missing closing `});` — JS syntax error prevented entire IIFE from loading | Added missing `});` before `} else {` |
| Burn-in SMART stage shows in wrong column | Burn-in orchestrator tracks SMART progress in `burnin_stages` table, but SMART columns read from `smart_tests` table only | `_fetch_drives_for_template` now queries `burnin_stages` for active burn-ins and overlays SMART stage progress/results onto the Short/Long SMART columns |
| 14TB surface jobs marked `failed` after 6-day clean run (1.0.0-10) | `_stage_final_check` treated `ssh_client.get_smart_attributes` failures as drive failures, but that helper swallows transport errors and returns `failures: ["SSH error: ..."]`. A 1-second SSH blip invalidated multi-day surface scans. | `_stage_final_check` now distinguishes pure SSH-only failures (every entry starts with `"SSH error:"`) from real SMART failures; retries 3× with 30s gaps; soft-passes on persistent SSH-only — surface stages stand. |
| `database is locked` during long_smart (1.0.0-11) | `_stage_smart_test_ssh` appended full smartctl output to `log_text` every 5s poll. SQLite's `COALESCE(log_text,'')||?` rewrites the whole column, and over 6+ hours `log_text` grew to 50 MB → contention against poller/orchestrator/settings writers. | (a) `_db()` is now an `@asynccontextmanager` setting `PRAGMA busy_timeout=10000` per connection. (b) log_text appends throttled to every 12 polls (~60s) or on state change. |
| Stuck stage rows linger as `running` after `check_stuck_jobs` (1.0.0-11) | Stuck-job detector updated `burnin_jobs.state='unknown'` but didn't touch stage rows. | Added `UPDATE burnin_stages SET state='unknown', finished_at=? WHERE burnin_job_id=? AND state='running'` to the same transaction. |
| Dashboard 500 — `TypeError: unhashable type: 'dict'` from Jinja (1.0.0-12) | Starlette 1.0.0 (released 2026-04) removed the legacy `TemplateResponse(name, context)` signature. With the old call style, the context dict ended up where `name` was expected, → Jinja `cache_key` was unhashable. | Migrated all 7 calls to new signature: `TemplateResponse(request, name, context)`. **Root enabler**: `requirements.txt` is unpinned, so `--build` pulled the latest breaking release. |
---
## Operational Gotchas
### `requirements.txt` is unpinned
Every `docker compose up -d --build` pulls latest of fastapi, starlette, jinja2, asyncssh, etc. The Starlette 1.0 regression on 2026-04-27 is a direct consequence. **Either pin to known-good versions, or audit installed versions immediately after each rebuild** with:
```bash
docker exec nas-burnin python3 -c "import fastapi, starlette, jinja2; print(fastapi.__version__, starlette.__version__, jinja2.__version__)"
```
### Local source ↔ maple host can drift
The deploy convention is `scp -r app/` from mac to maple, but if you ever edit on maple directly (or skip an `scp` after local changes), the two trees diverge. As of 2026-04-27 the local `routes.py` had unsynced SMART-overlay work but was missing the deployed `/ws/terminal` Stage 8 endpoint — neither side a superset.
**Always `diff -u` before bulk scp:**
```bash
ssh -p 2225 brandon@10.0.0.138 'cat ~/docker/stacks/nas-burnin/app/routes.py' > /tmp/deployed_routes.py
diff -u /tmp/deployed_routes.py ~/Desktop/claudesandbox/nas-burnin/app/routes.py
```
When sides have conflicting edits, prefer **patching the host file in place + rebuild** over a destructive scp.
---
@ -394,7 +427,7 @@ SMART attrs stored as JSON blob in `drives.smart_attrs`. Updated by `final_check
Settings page has a "Check for Updates" button that fetches:
```
GET https://git.hellocomputer.xyz/api/v1/repos/brandon/truenas-burnin/releases/latest
GET https://git.hellocomputer.xyz/api/v1/repos/brandon/nas-burnin/releases/latest
```
Compares tag name against `settings.app_version`; shows "up to date" or "v{tag} available".

20
Dockerfile Normal file
View file

@ -0,0 +1,20 @@
FROM python:3.12-slim
WORKDIR /opt/app
# Bump pip to a version with no known CVEs before installing anything.
# Without this, pip-audit flags CVE-2025-8869, CVE-2026-1703, CVE-2026-3219
# in pip itself. Pinned floor; pip is forward-compatible across 26.x.
RUN pip install --no-cache-dir --upgrade "pip>=26.0"
# requirements.txt is a fully-pinned lockfile generated from
# requirements.in via pip-compile (see scripts/regenerate-lockfile.sh).
# --require-hashes refuses to install any package whose sha256 doesn't
# match a hash in the file — defends against compromised upstream
# mirrors and accidental version drift.
COPY requirements.txt .
RUN pip install --no-cache-dir --require-hashes -r requirements.txt
COPY app/ ./app/
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8084"]

361
README.md Normal file
View file

@ -0,0 +1,361 @@
# NAS Burn-In Dashboard
Web dashboard for running disciplined burn-in tests on TrueNAS drives.
Sits next to the NAS, not on it — orchestrates `smartctl`, `badblocks`, and
`nvme-cli` over SSH and tracks every job in SQLite.
Inspired by the community `disk-burnin.sh` script (Spearfoot et al.) but
adds: concurrent burn-ins, pool-membership safety locks, login + audit,
live progress UI, daily email reports, and resumable state.
## Stack
FastAPI + HTMX (SSE) + asyncssh + SQLite, in one Docker container. No
external services beyond your TrueNAS host. Templates and static assets
are bind-mounted; Python source is baked into the image.
---
## Quick start
```bash
# 1. Configure
cp .env.example .env
# edit SSH_HOST / SSH_USER / SSH_KEY (see .env.example) and, optionally,
# INITIAL_ADMIN_USERNAME / INITIAL_ADMIN_PASSWORD for first-run setup.
# 2. Build + run
docker compose up -d --build
# 3. Open the dashboard
open http://localhost:8084 # or your host's IP
# 4. First time: the login page renders a "Create initial admin" form.
# Pick a username + password (>= 8 chars). Done.
```
If you set `INITIAL_ADMIN_*` env vars *and* the users table is empty, that
account is created on startup automatically. After that the env vars are
ignored — change passwords from the UI ("Change password" header link) or
the CLI (`docker exec -it nas-burnin python -m app.auth_cli reset
<username>`).
---
## Burning in many drives at once
The dashboard runs **up to `max_parallel_burnins`** burn-ins concurrently
(configurable in Settings, default 4) and queues the rest. Submitting 14
drives doesn't take 14 separate clicks — you submit once and the queue
drains automatically as slots free up.
### The workflow
1. **Select all idle drives** — click the checkbox in the table header
(next to "DRIVE"). It auto-checks every drive that's currently
selectable: idle, no active SMART test, not pool-locked. Pool-locked
drives are intentionally excluded; if you really want to burn one of
them in, unlock it individually first (see [Drive locks](#drive-locks)
below).
2. **Click the Burn-In button** in the batch action bar that slides up
from the bottom — it shows the count of selected drives.
3. **In the batch modal**: pick the stages to run (Short SMART, Long
SMART, Surface Validate — drag to reorder), confirm your operator
name, and click Start.
4. **All selected drives are queued** in one POST. Up to
`max_parallel_burnins` enter `running`; the rest sit `queued`. As each
running job finishes, the next queued job picks up the freed slot
automatically — no operator action between batches.
5. The toast shows e.g. "12 burn-in(s) queued, 0 skipped, 0 pool-locked."
### Time estimate
| Drive size | Profile | Per-drive runtime (default block size) |
|-----------|-------------|----------------------------------------|
| 250 GB SSD | Short + Long SMART + Surface | ~1 hour |
| 14 TB HDD | Short + Long SMART + Surface | ~24 hours |
| 14 TB HDD | Short + Long SMART (no surface) | ~68 hours |
For 12× 14 TB drives at default 4-parallel: roughly **34 days** end-to-end.
Bumping `surface_validate_block_size` from 4096 to 8192 in Settings cuts
runtime roughly in half at ~2× RAM cost — matches the upstream
`disk-burnin.sh` recommendation.
### Watch out
- **Stuck-job timeout**`stuck_job_hours` (default 168 = 7 days)
marks any job past that threshold as `unknown` and kills the remote
process. The default covers `-w` surface_validate on 14 TB+ HDDs with
margin. If you're running short SSDs and want faster detection of
genuinely stuck jobs, drop it. (Earlier versions defaulted to 24h
which false-positived on multi-TB drives.)
- **Thermal gate** — if drives currently under burn-in hit the
temperature warning threshold, new jobs wait up to 3 minutes before
acquiring a slot. Increase `temp_warn_c` if your chassis runs hot but
is otherwise fine.
### Cancelling
Click the red ✕ next to a running job. The orchestrator:
1. Marks the job `cancelled` in the DB.
2. Issues `kill -9 <remote_pid>` over a fresh SSH session (the badblocks
PID is captured at launch via `sh -c 'echo PID:$$; exec ...'`).
3. Cancels the asyncio task, releasing the semaphore slot for the next
queued job.
Cancellations are durable — restart the container and queued jobs resume,
cancelled jobs stay cancelled.
### Job states explained
| State | When it's set |
|-------------|-------------------------------------------------------------------------------|
| `queued` | Submitted, waiting for a `max_parallel_burnins` slot |
| `running` | Actively executing some stage |
| `passed` | All stages finished green |
| `failed` | A stage failed deterministically (bad blocks > threshold, SMART failure, etc.) |
| `cancelled` | Operator clicked ✕ |
| `unknown` | Job was alive but its outcome is indeterminate — see below |
`unknown` fires in two situations:
1. The stuck-job detector (`stuck_job_hours`, default 7 days) trips because
the job has been running too long without finishing.
2. The asyncio task got cancelled mid-stage by something *other* than an
operator click — usually a container restart (`docker compose up -d`,
`--build`, or the host rebooting). Burn-in source code goes through
the Dockerfile `COPY`, so any source-code deploy recreates the
container, drops the SSH connection to TrueNAS, and would orphan the
running burn-in. Avoid `--build` while burn-ins are active.
When `unknown` fires the drawer's per-stage Reason block shows
*"Task cancelled mid-run — likely container restart or shutdown"* so the
classification is explicit, not silent.
---
## Drive drawer
Click any drive row to slide a detail drawer down from the top. Three tabs:
- **Burn-In** — per-stage breakdown of the latest job
- **SMART** — short/long test states + cached SMART attributes
- **Events** — last 50 audit events for the drive
### Surface-validate visualization
For drives in a `surface_validate` stage (running or finished), the Burn-In
tab renders:
1. **Vital-signs strip**`Start` (with date) · `Elapsed` · `ETA` (duration
remaining) · `Finish` (wall-clock estimate, browser-local timezone) ·
`Temp` (cool/warm/hot colour). Computed from data in the drawer payload;
ETA + Finish suppressed below 0.5% so you don't see a "Finish: Jun 22"
stutter at the very start.
2. **Four pattern meters**`0xaa` / `0x55` / `0xff` / `0x00`. Each meter
is split into a left half (write phase, blue) and a right half (verify
phase, green). Current pattern's label glows blue; completed patterns'
labels go green. This translates badblocks's per-phase percent into
monotonic 0-99% overall progress, so the bar never appears to "rewind"
when a new phase starts.
3. **Phase caption** — explicit text: *"Pattern 2 of 4 · Verify 0x55 · 47%
within phase"*. Makes the visual grammar unambiguous.
4. **Completed-pattern history** — once pattern 1 finishes, a chip appears
showing `0xaa: 14h 22m`. Lets you predict the rest of the run from the
first pattern's elapsed time.
### Failure reason block
Stages that ended `failed` / `cancelled` / `unknown` show a coloured Reason
pill at the top of the stage section. Sources, in order of preference:
1. The stage's own `error_text`
2. The parent job's `error_text` (backfilled by the drawer when the stage's
own is empty — catches orphan rows from hard crashes)
3. A heuristic: if the log is tiny and no real progress was recorded,
*"Stopped without recording an error — likely cause: SSH connection drop
or container restart while this stage was running"*
Otherwise: *"No error message recorded."* — there's never a blank where you
expect to see why something broke.
### Column sorting
Click any column header (Drive, Serial, Size, Temp, Health, Short SMART,
Long SMART, Burn-In) to sort. Cycle: ascending → descending → cleared. Sort
state persists in `localStorage` so it survives page reload AND every
SSE-driven tbody refresh (~12 s poll cycle). Empty values always sink to
the bottom regardless of direction.
Sortable values are emitted as `data-sort-*` attributes on each `<tr>`,
with numeric priority maps for SMART states (e.g. `running` always sorts
ahead of `idle`).
---
## Drive locks
To prevent destroying live data, the dashboard refuses to start
destructive burn-in on drives ZFS or the kernel reports as in-use.
Detected lock states (with the typed-confirmation token required to
override):
| State | Detected via | Confirm token |
|---------------|---------------------------|------------------------------|
| Active pool | `zpool list -vHP` | the pool name (e.g. `tank`) |
| Boot pool | pool name = `boot-pool` | `DESTROY BOOT POOL` |
| Exported ZFS | `lsblk` `zfs_member` partitions not in any active pool | `DESTROY EXPORTED POOL` |
| Mounted FS | `findmnt -no SOURCE` | `DESTROY MOUNTED FILESYSTEM` |
Detection runs every poll cycle (~12 s). On any SSH or parser failure the
poller fails *closed*: previously-locked drives stay locked, previously-
unlocked drives stay unlocked, until detection recovers.
Unlock is in-memory only with a 10-minute TTL — bound to the
`(pool_name, pool_role)` observed at unlock time. If a subsequent poll
reclassifies the drive (e.g. `(exported)``tank` because someone
imported the pool), the grant is invalidated automatically.
Every unlock writes an audit event and surfaces in the next daily report
in a red banner.
---
## Settings highlights
All settings live under `/settings` (header link). Key knobs:
- **`max_parallel_burnins`** (default 4) — semaphore cap. Restart container
for changes to take effect.
- **`surface_validate_block_size` / `_block_buffer` / `_passes`** —
badblocks `-b` / `-c` / `-p`. Defaults preserve original behaviour;
tune for speed vs paranoia.
- **`stuck_job_hours`** (default 168 = 7 days) — covers 14 TB+ HDDs;
drop for faster detection on small fast drives.
- **`temp_warn_c` / `temp_crit_c`** — thermal gating thresholds.
- **`bad_block_threshold`** (default 0) — number of bad blocks
surface_validate tolerates before failing the stage.
- **`retention_log_days`** (default 35) — when to NULL out
`burnin_stages.log_text`. Nightly job at 03:00 local.
- **`retention_backup_keep`** (default 14) — how many nightly DB
snapshots to keep in `/data/backups/`.
---
## Notifications
- **Daily SMTP report** at `smtp_report_hour` (default 08:00 local) with
drive-level summary, failed-health banner, and a red banner listing
every pool-drive unlock from the last 24 h.
- **Per-job email alerts** on pass/fail (configurable).
- **Webhook URL** posts JSON on every job state change.
Configure SMTP in Settings → Email. Includes a "Test SMTP" button.
---
## Operations
### Logs
```bash
docker logs -f nas-burnin
# JSON-structured. Filter with jq:
docker logs nas-burnin 2>&1 | jq -rR 'fromjson? | "\(.ts) \(.level) \(.msg)"'
```
### User management
```bash
docker exec -it nas-burnin python -m app.auth_cli list
docker exec -it nas-burnin python -m app.auth_cli add <username>
docker exec -it nas-burnin python -m app.auth_cli reset <username>
```
Passwords are read from a TTY prompt; never accept them on the command
line.
### Backups
Automated nightly to `/data/backups/app-YYYY-MM-DD.db` (online
`sqlite3.backup`, doesn't lock writers). To restore:
```bash
docker compose down
cp data/backups/app-2026-05-01.db data/app.db
docker compose up -d
```
### Health probe
`/health` is unauthenticated and returns 200 only when DB, poller, and
SSH (when configured) all check green; 503 otherwise. Use it for
container/orchestrator health checks.
```bash
curl -sf http://localhost:8084/health | jq
```
### Resetting the DB
If you need to start over:
```bash
docker compose down
sudo rm -f data/app.db data/session_secret
# keep data/settings_overrides.json if you want to preserve UI settings
docker compose up -d
```
---
## Updating dependencies
`requirements.in` is the human-edited list. `requirements.txt` is a
fully-pinned lockfile generated from it (with sha256 hashes), consumed
at build time with `pip install --require-hashes`. **Never edit
`requirements.txt` by hand.**
```bash
# 1. Add or change a constraint in requirements.in
$EDITOR requirements.in
# 2. Regenerate the lockfile (runs pip-compile in a clean container)
./scripts/regenerate-lockfile.sh
# 3. Review the diff — transitive bumps may be CVE fixes or breaking changes
git diff requirements.txt
# 4. Rebuild + smoke-test
docker compose up -d --build app
curl -sf http://localhost:8084/health | jq
# 5. Commit BOTH files together
git add requirements.in requirements.txt
git commit -m "deps: bump <package> for <reason>"
```
This + the daily security scan (`scripts/security-scan.sh`) gives
defense-in-depth: pinning prevents accidental breakage from upstream
releases (Starlette 1.0 broke us once), `--require-hashes` defends
against compromised mirrors, and `pip-audit` catches new CVEs in any
pinned version after the fact.
## See also
- `CLAUDE.md` — full architecture, file map, deploy workflow, and the
rationale behind every non-obvious design decision.
- `SPEC.md` — canonical feature reference per version.
- `tests/``python -m unittest discover tests/` (65 tests, stdlib-only). Or run inside the deployed container with `scripts/run-tests.sh`.
---
## Known gaps / not-yet-built
- No multi-user RBAC — every user is effectively admin.
- No per-drive SMART attribute trend graphs (snapshots only).
- No scheduled burn-ins — jobs run immediately when queued.
- No CSRF tokens on state-changing endpoints (relies on
`SameSite=Strict` session cookie).
PRs welcome.

View file

@ -1,6 +1,6 @@
# TrueNAS Burn-In — Project Specification
# NAS Burn-In — Project Specification
**Version:** 1.0.0-8
**Version:** 1.0.0-39
**Status:** Active Development
**Audience:** Public / Open Source
@ -8,7 +8,7 @@
## Overview
TrueNAS Burn-In is a self-hosted web dashboard that runs on a separate machine or VM and connects to a TrueNAS system via SSH to automate and monitor the drive burn-in process. It is designed for users who want to validate new hard drives before adding them to a ZFS pool — where reliability is non-negotiable.
NAS Burn-In is a self-hosted web dashboard that runs on a separate machine or VM and connects to a TrueNAS system via SSH to automate and monitor the drive burn-in process. It is designed for users who want to validate new hard drives before adding them to a ZFS pool — where reliability is non-negotiable.
The app is not a TrueNAS plugin and does not run on TrueNAS itself. It connects remotely over SSH to issue smartctl and badblocks commands, polls results, and presents everything through a dark-themed real-time dashboard. It is deployed via Docker Compose and configured through a Settings UI and `.env` file.
@ -85,7 +85,7 @@ A **Reset** action clears the test state for a drive so it can be re-queued. It
### Dashboard (Main View)
- **Stats bar:** Total drives, Running, Failed, Passed, Idle counts.
- **Stats bar:** Total drives, Running, Failed, Passed, Idle counts. When SSH is active, also shows CPU and PCH temperature chips (live via SSE) and a thermal pressure indicator (WARM/HOT) that appears when running drives exceed the warning threshold.
- **Filter chips:** All / Running / Failed / Passed / Idle — filters the table below.
- **Drive table columns:** Drive (device name + model), Serial, Size, Temp, Health, Short SMART, Long SMART, Burn-In, Actions.
- **Temperature display:** Color-coded. Green ≤ 45°C, Yellow 4654°C, Red ≥ 55°C. Thresholds configurable in Settings.
@ -97,11 +97,10 @@ A **Reset** action clears the test state for a drive so it can be re-queued. It
Slides up from the bottom of the page when a drive row is clicked. Does not navigate away — the table remains visible and scrollable above.
Four tabs:
Three tabs:
- **Burn-In** — stage-by-stage progress for the latest burn-in job; shows live elapsed time, raw SSH log output (smartctl / badblocks), and bad block count.
- **SMART** — output of the last smartctl run for this drive, with monitored attribute values highlighted (green/yellow/red). Raw `smartctl -a` output also shown when SSH mode is active.
- **Events** — chronological timeline of everything that happened to this drive (test started, test passed, failure detected, alert sent, reset, etc.).
- **Terminal** — live SSH PTY session (xterm.js). Opens an interactive shell on the TrueNAS host. Requires SSH to be configured in Settings. Supports full colour, resize, paste, and reconnect. xterm.js is loaded lazily on first use.
Features:
- Auto-scroll toggle (on by default).
@ -143,8 +142,9 @@ Divided into sections:
**BURN-IN BEHAVIOR**
- Max Parallel Burn-Ins (default: 2, max: 60).
- Warning displayed inline when set above 8: "Running many simultaneous surface scans may saturate your storage controller and produce unreliable results. Recommended: 24."
- Bad block failure threshold (default: 2).
- Bad block failure threshold (default: 0 — any bad sector = fail).
- Stuck job threshold in hours (default: 24 — jobs running longer than this are auto-marked Unknown).
- **Adaptive thermal gate:** When drive temperatures are at or above the warning threshold, new burn-in jobs wait up to 3 minutes before acquiring a semaphore slot. This reduces thermal pile-up when drives are already running hot.
**TEMPERATURE**
- Warning threshold (default: 46°C).
@ -166,8 +166,8 @@ Divided into sections:
- Log level (DEBUG / INFO / WARN / ERROR).
**VERSION & UPDATES**
- Displays current version (starting at 0.5.0).
- "Check for Updates" button — queries GitHub releases API and shows latest version with a link if an update is available.
- Displays current version.
- "Check for Updates" button — queries Forgejo releases API at `git.hellocomputer.xyz` and shows latest version if an update is available.
---
@ -209,15 +209,21 @@ Both email and webhook fire simultaneously when both are configured and enabled.
## SSH Architecture
The app connects to TrueNAS over SSH from the host running the Docker container. It does not use the TrueNAS web API for drive operations — all smartctl and badblocks commands are issued directly over SSH.
The app connects to TrueNAS over SSH from the host running the Docker container. It does not use the TrueNAS web API for SMART or badblocks operations — all commands are issued directly over SSH using `asyncssh`.
This is required for TrueNAS SCALE 25.10 (Electric Eel), which removed the `POST /api/v2.0/smart/test` REST endpoint. SSH is also the only way to run `badblocks`. The TrueNAS REST API is still used for drive discovery (`GET /api/v2.0/disk`) and temperature polling (`POST /api/v2.0/disk/temperatures`).
Connection details are configured in Settings (not `.env`). Supports:
- Password authentication.
- SSH key authentication (key pasted or uploaded in Settings UI).
- Custom port.
- SSH key authentication — key pasted into Settings UI or mounted as a Docker volume at `/run/secrets/ssh_key` (recommended for production).
- Custom port (default: 22).
- Test Connection button validates credentials before saving.
On SSH disconnection mid-test: the test process on TrueNAS may continue running (SSH disconnection does not kill the remote process if launched correctly with nohup or similar). The app marks the drive as `interrupted` in its own state, attempts to reconnect, and resumes polling if the process is still running. If the remote process is gone, the drive stays `interrupted`.
In addition to burn-in commands, the SSH connection is used to:
- Run `sensors -j` (lm-sensors) each poll cycle to read CPU and PCH/chipset temperatures, displayed live in the dashboard stats bar.
- Poll `smartctl -a` progress during standalone SMART tests.
On SSH disconnection mid-test: the app marks the drive as `interrupted`. The remote process may or may not still be running. The user must reset the drive and re-queue.
---
@ -233,8 +239,7 @@ Key endpoints:
- `POST /api/v1/drives/{drive_id}/smart/cancel` — cancel a SMART test.
- `POST /api/v1/burnin/start` — start a burn-in job.
- `POST /api/v1/burnin/{job_id}/cancel` — cancel a burn-in job.
- `GET /sse/drives` — Server-Sent Events stream powering the real-time dashboard UI.
- `WS /ws/terminal` — WebSocket endpoint bridging xterm.js to an asyncssh PTY on TrueNAS.
- `GET /sse/drives` — Server-Sent Events stream powering the real-time dashboard UI. Also emits `system-sensors` (CPU/PCH temps, thermal pressure) and `job-alert` (browser push notification) events.
- `GET /health` — health check endpoint.
The API makes this app a strong candidate for MCP server integration, allowing an AI assistant to query drive status, start tests, or receive alerts conversationally.
@ -246,8 +251,8 @@ The API makes this app a strong candidate for MCP server integration, allowing a
Docker Compose. Minimum viable setup:
```bash
git clone https://github.com/yourusername/truenas-burnin
cd truenas-burnin
git clone https://github.com/yourusername/nas-burnin
cd nas-burnin
cp .env.example .env
# Edit .env for system-level settings (TrueNAS URL, poll interval, etc.)
docker compose up -d
@ -259,24 +264,19 @@ All other configuration is done through the Settings UI — no manual file editi
---
## TrueNAS Compatibility
Tested and confirmed working against **TrueNAS SCALE 25.10.2.1 (Electric Eel)**. Key compatibility notes:
- SCALE 25.10 removed `POST /api/v2.0/smart/test` — SSH is required for all SMART operations.
- Drive temperatures are not included in `GET /api/v2.0/disk` on SCALE — use `POST /api/v2.0/disk/temperatures` instead.
- TrueNAS SCALE is Linux/Debian-based. Device names are `sda`, `sdb`, etc. (not `ada0`/`da0` as on CORE/FreeBSD).
- `lm-sensors` is available on SCALE — `sensors -j` returns CPU (`coretemp`) and PCH (`pch_*`) temperatures.
- `badblocks` and `smartctl` are present at standard paths.
## mock-truenas
A companion Docker service (`mock-truenas`) that simulates the TrueNAS API for UI development and testing without real hardware. It mocks drive discovery, SMART test responses, and badblocks progress. Used exclusively for development — not deployed in production.
### Testing on Real TrueNAS (v1.0 Milestone Plan)
To validate against real hardware:
1. Switch `TRUENAS_URL` in `.env` from `http://mock-truenas:8000` to your real TrueNAS IP/hostname.
2. Ensure SSH is enabled on TrueNAS (System → Services → SSH).
3. Configure SSH credentials in Settings and use Test Connection to verify.
4. Start with a single idle drive — run Short SMART only first.
5. Verify the log drawer shows real smartctl output.
6. If successful, proceed to Long SMART, then a full burn-in on a drive you're comfortable wiping.
7. Confirm an alert email is received on completion.
8. Scale to 24 drives simultaneously and monitor system resource warnings.
**v1.0 is considered production-ready when:** the app runs reliably on a real TrueNAS system with 10 simultaneous drives, a failure alert email is received correctly, and a passing drive's history is preserved across a container restart.
A companion Docker service (`mock-truenas`) that simulates the TrueNAS API for UI development and testing without real hardware. It mocks drive discovery, SMART test responses, and badblocks progress. Used exclusively for development — not deployed in production. Disabled (commented out) in the production `docker-compose.yml`.
---

426
app/auth.py Normal file
View file

@ -0,0 +1,426 @@
"""
App-level username/password auth for the burn-in dashboard.
Sessions are signed cookies (Starlette SessionMiddleware) that carry
{user_id, username}. Every request goes through `get_current_user_optional`
via the auth middleware in main.py; routes that need an authenticated user
import `get_current_user` instead, which raises 401 (or redirects to
/login for HTML requests) when there's no session.
Passwords are bcrypt with the library's default 12-round cost. We never
store plaintext.
Bootstrap: if the users table is empty AND `initial_admin_username` /
`initial_admin_password` are set, the lifespan creates that admin once at
startup. Otherwise, the login template renders the "first user" form when
visited and zero users exist.
"""
from __future__ import annotations
import logging
import secrets
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
import aiosqlite
import bcrypt
from fastapi import HTTPException, Request, status
from starlette.responses import RedirectResponse
from app.config import settings
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Session secret — env var > persisted file > generated
# ---------------------------------------------------------------------------
_SESSION_SECRET_FILE = "session_secret"
def get_session_secret() -> str:
"""Return the HMAC key for SessionMiddleware. env var beats disk."""
if settings.session_secret:
return settings.session_secret
path = Path(settings.db_path).parent / _SESSION_SECRET_FILE
if not path.exists():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(secrets.token_urlsafe(64).encode())
try:
path.chmod(0o600)
except OSError:
pass
log.warning(
"Generated and persisted session secret to %s. "
"Set SESSION_SECRET in env to override.", path,
)
return path.read_text().strip()
# ---------------------------------------------------------------------------
# User model + storage
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class User:
id: int
username: str
full_name: str | None
is_admin: bool
def LoopbackUser(username: str = "monitor", full_name: str = "Autonomous Monitor") -> User:
"""Synthetic admin used by the loopback bypass in _AuthGateMiddleware.
id=0 (no real DB row) and is_admin=True so admin-gated routes work.
Only reachable when request.client.host is 127.0.0.1 / ::1
a process inside the container's network namespace (docker exec)."""
return User(id=0, username=username, full_name=full_name, is_admin=True)
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def hash_password(plain: str) -> str:
return bcrypt.hashpw(plain.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
def verify_password(plain: str, hashed: str) -> bool:
try:
return bcrypt.checkpw(plain.encode("utf-8"), hashed.encode("utf-8"))
except (ValueError, TypeError):
return False
async def user_count() -> int:
async with aiosqlite.connect(settings.db_path) as db:
cur = await db.execute("SELECT COUNT(*) FROM users")
return (await cur.fetchone())[0]
async def get_user_by_username(username: str) -> tuple[User, str] | None:
"""Returns (user, password_hash) or None. Hash is the only place
callers should ever see the raw bcrypt string for verify_password."""
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT id, username, password_hash, full_name, is_admin "
"FROM users WHERE username = ? COLLATE NOCASE",
(username,),
)
row = await cur.fetchone()
if not row:
return None
user = User(
id=row["id"],
username=row["username"],
full_name=row["full_name"],
is_admin=bool(row["is_admin"]),
)
return user, row["password_hash"]
async def get_user_by_id(user_id: int) -> User | None:
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT id, username, full_name, is_admin "
"FROM users WHERE id = ?",
(user_id,),
)
row = await cur.fetchone()
if not row:
return None
return User(
id=row["id"],
username=row["username"],
full_name=row["full_name"],
is_admin=bool(row["is_admin"]),
)
async def create_user(username: str, password: str,
full_name: str | None = None,
is_admin: bool = False,
bootstrap_only: bool = False) -> User:
"""Insert a new user. Raises ValueError if the username collides.
bootstrap_only=True: serializes the insert with a check that the
users table is empty inside an IMMEDIATE transaction. Used for the
/api/v1/auth/setup first-user flow so two concurrent requests can't
both create admin accounts during the bootstrap window.
"""
username = (username or "").strip()
if not username:
raise ValueError("Username is required.")
if len(password) < 8:
raise ValueError("Password must be at least 8 characters.")
h = hash_password(password)
try:
async with aiosqlite.connect(settings.db_path) as db:
if bootstrap_only:
# IMMEDIATE acquires the write lock up-front so a parallel
# setup request waits or fails — no two-step race.
await db.execute("BEGIN IMMEDIATE")
cur = await db.execute("SELECT COUNT(*) FROM users")
if (await cur.fetchone())[0] != 0:
await db.execute("ROLLBACK")
raise ValueError(
"Users already exist — first-user setup is closed."
)
cur = await db.execute(
"""INSERT INTO users
(username, password_hash, full_name, is_admin, created_at)
VALUES (?, ?, ?, ?, ?)
RETURNING id""",
(username, h, full_name or None, 1 if is_admin else 0, _now()),
)
row = await cur.fetchone()
await db.commit()
except aiosqlite.IntegrityError:
raise ValueError(f"Username {username!r} already exists.")
return User(
id=row[0],
username=username,
full_name=full_name,
is_admin=is_admin,
)
async def touch_last_login(user_id: int) -> None:
async with aiosqlite.connect(settings.db_path) as db:
await db.execute(
"UPDATE users SET last_login_at = ? WHERE id = ?",
(_now(), user_id),
)
await db.commit()
async def change_password(user_id: int, current_password: str,
new_password: str) -> None:
"""Verify current password and rotate. Raises ValueError on any failure."""
if len(new_password) < 8:
raise ValueError("New password must be at least 8 characters.")
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT username, password_hash FROM users WHERE id = ?", (user_id,)
)
row = await cur.fetchone()
if not row or not verify_password(current_password, row["password_hash"]):
raise ValueError("Current password is incorrect.")
new_hash = hash_password(new_password)
await db.execute(
"UPDATE users SET password_hash = ? WHERE id = ?",
(new_hash, user_id),
)
await db.commit()
# ---------------------------------------------------------------------------
# Generic rate limiting (in-memory, multi-key per category)
# ---------------------------------------------------------------------------
#
# Each instance is a self-contained limiter for one category (login,
# unlock, password change). The atomicity guarantee is "no awaits between
# check and increment" — CPython's asyncio loop is single-threaded so
# concurrent requests cannot interleave the synchronous register() call.
import time as _time
class _RateLimiter:
def __init__(self, name: str, threshold: int, window_s: int, lockout_s: int):
self.name = name
self.threshold = threshold
self.window_s = window_s
self.lockout_s = lockout_s
self._failures: dict = {} # key -> [unix timestamps within window]
self._lockouts: dict = {} # key -> unix expiry
def _gc(self, key) -> None:
cutoff = _time.time() - self.window_s
arr = self._failures.get(key, [])
fresh = [t for t in arr if t >= cutoff]
if fresh:
self._failures[key] = fresh
elif key in self._failures:
del self._failures[key]
def locked_until(self, *keys) -> float | None:
"""Soonest active lockout expiry across `keys`, or None."""
now = _time.time()
soonest = None
for k in keys:
exp = self._lockouts.get(k)
if exp is None:
continue
if now >= exp:
del self._lockouts[k]
continue
soonest = exp if soonest is None else min(soonest, exp)
return soonest
def register(self, *keys) -> str:
"""Returns "ok" | "locked_out" | "now_locked_out"."""
now = _time.time()
for k in keys:
exp = self._lockouts.get(k)
if exp is None:
continue
if now >= exp:
del self._lockouts[k]
continue
return "locked_out"
tripped = False
for k in keys:
self._gc(k)
self._failures.setdefault(k, []).append(now)
if len(self._failures[k]) >= self.threshold:
self._lockouts[k] = now + self.lockout_s
self._failures[k] = []
tripped = True
return "now_locked_out" if tripped else "ok"
def clear(self, *keys) -> None:
for k in keys:
self._failures.pop(k, None)
self._lockouts.pop(k, None)
# Login: 10 failures in 10 min → 15 min lockout.
LOGIN_FAILURE_WINDOW_SECONDS = 600
LOGIN_FAILURE_THRESHOLD = 10
LOGIN_LOCKOUT_SECONDS = 900
# Unlock + password change: tighter caps; both are post-auth so a
# legitimate operator typoing a token shouldn't be locked out for long.
UNLOCK_FAILURE_THRESHOLD = 5
UNLOCK_LOCKOUT_SECONDS = 600
PWCHANGE_FAILURE_THRESHOLD = 5
PWCHANGE_LOCKOUT_SECONDS = 900
login_limiter = _RateLimiter(
"login", LOGIN_FAILURE_THRESHOLD, LOGIN_FAILURE_WINDOW_SECONDS,
LOGIN_LOCKOUT_SECONDS,
)
unlock_limiter = _RateLimiter(
"unlock", UNLOCK_FAILURE_THRESHOLD, 600, UNLOCK_LOCKOUT_SECONDS,
)
pwchange_limiter = _RateLimiter(
"pwchange", PWCHANGE_FAILURE_THRESHOLD, 600, PWCHANGE_LOCKOUT_SECONDS,
)
# Backward-compat facades — preserve the names existing routes.py reaches for.
def login_locked_until(username: str, ip: str) -> float | None:
return login_limiter.locked_until(("user", username.lower()), ("ip", ip))
def register_login_attempt(username: str, ip: str) -> str:
return login_limiter.register(("user", username.lower()), ("ip", ip))
def clear_login_failures(username: str, ip: str) -> None:
login_limiter.clear(("user", username.lower()), ("ip", ip))
# ---------------------------------------------------------------------------
# Audit events for auth flows
# ---------------------------------------------------------------------------
async def audit_auth_event(event_type: str, username: str | None,
message: str) -> None:
"""Write a row to audit_events. event_type is one of:
user_login / user_login_failed / user_logout / user_password_changed /
user_login_locked_out."""
async with aiosqlite.connect(settings.db_path) as db:
await db.execute(
"""INSERT INTO audit_events
(event_type, drive_id, burnin_job_id, operator, message)
VALUES (?,?,?,?,?)""",
(event_type, None, None, username or "?", message),
)
await db.commit()
async def bootstrap_admin_if_empty() -> None:
"""Create the env-supplied admin if the users table is empty."""
if await user_count() > 0:
return
if not (settings.initial_admin_username and settings.initial_admin_password):
return
try:
await create_user(
settings.initial_admin_username,
settings.initial_admin_password,
full_name=None,
is_admin=True,
)
log.warning(
"Bootstrapped initial admin user %r from env. "
"Change the password via the UI and remove the env vars from compose.",
settings.initial_admin_username,
)
except ValueError as exc:
log.error("Failed to bootstrap initial admin: %s", exc)
# ---------------------------------------------------------------------------
# FastAPI dependencies
# ---------------------------------------------------------------------------
async def get_current_user_optional(request: Request) -> User | None:
"""Return the logged-in user, or None. Doesn't raise — for templates."""
sess_user_id = request.session.get("user_id") if hasattr(request, "session") else None
if not sess_user_id:
return None
return await get_user_by_id(int(sess_user_id))
def require_admin(request: Request) -> User:
"""Strict admin gate — for any settings-mutating endpoint. The
AuthGate middleware has already populated request.state.current_user;
this just enforces is_admin on top."""
user = getattr(request.state, "current_user", None)
if not user:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Authentication required",
)
if not user.is_admin:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Admin only",
)
return user
async def get_current_user(request: Request) -> User:
"""Strict version — for routes. 401 (or redirect for HTML) if missing."""
user = await get_current_user_optional(request)
if user is None:
# HTML clients prefer a redirect; API clients need a clean 401.
accept = request.headers.get("accept", "")
if "text/html" in accept and request.method == "GET":
raise _RedirectToLogin(request.url.path)
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Authentication required",
)
return user
class _RedirectToLogin(Exception):
"""Raised by get_current_user when an HTML page needs to bounce to /login."""
def __init__(self, next_path: str):
self.next_path = next_path
def login_redirect(next_path: str = "/") -> RedirectResponse:
safe_next = next_path if next_path.startswith("/") else "/"
target = f"/login?next={safe_next}" if safe_next != "/" else "/login"
return RedirectResponse(url=target, status_code=303)

99
app/auth_cli.py Normal file
View file

@ -0,0 +1,99 @@
"""Password reset / user management CLI.
Run inside the container:
docker exec -it nas-burnin python -m app.auth_cli reset <username>
docker exec -it nas-burnin python -m app.auth_cli list
docker exec -it nas-burnin python -m app.auth_cli add <username>
Reads the password from a TTY prompt never accept it on the command
line so it doesn't leak into shell history.
"""
from __future__ import annotations
import asyncio
import getpass
import sys
import aiosqlite
from app import auth
from app.config import settings
async def _reset(username: str) -> int:
found = await auth.get_user_by_username(username)
if not found:
print(f"No such user: {username}", file=sys.stderr)
return 1
pw1 = getpass.getpass(f"New password for {username}: ")
pw2 = getpass.getpass("Confirm: ")
if pw1 != pw2:
print("Passwords don't match.", file=sys.stderr)
return 2
if len(pw1) < 8:
print("Password must be at least 8 characters.", file=sys.stderr)
return 3
new_hash = auth.hash_password(pw1)
async with aiosqlite.connect(settings.db_path) as db:
await db.execute(
"UPDATE users SET password_hash = ? WHERE username = ? COLLATE NOCASE",
(new_hash, username),
)
await db.commit()
print(f"Password updated for {username}.")
return 0
async def _list() -> int:
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT id, username, full_name, is_admin, created_at, last_login_at "
"FROM users ORDER BY username"
)
rows = list(await cur.fetchall())
if not rows:
print("(no users)")
return 0
for r in rows:
flag = "admin" if r["is_admin"] else "user "
print(f" [{flag}] {r['username']:24s} created={r['created_at'][:19]} "
f"last_login={(r['last_login_at'] or '-')[:19]}")
return 0
async def _add(username: str) -> int:
pw1 = getpass.getpass(f"Password for new user {username}: ")
pw2 = getpass.getpass("Confirm: ")
if pw1 != pw2:
print("Passwords don't match.", file=sys.stderr)
return 2
full = input("Full name (optional, press enter to skip): ").strip() or None
is_admin = input("Admin? [y/N]: ").strip().lower() == "y"
try:
u = await auth.create_user(username, pw1, full, is_admin=is_admin)
except ValueError as exc:
print(f"Failed: {exc}", file=sys.stderr)
return 1
print(f"Created user {u.username} (admin={u.is_admin}).")
return 0
def main() -> int:
if len(sys.argv) < 2:
print(__doc__, file=sys.stderr)
return 64
cmd = sys.argv[1]
if cmd == "list":
return asyncio.run(_list())
if cmd == "reset" and len(sys.argv) == 3:
return asyncio.run(_reset(sys.argv[2]))
if cmd == "add" and len(sys.argv) == 3:
return asyncio.run(_add(sys.argv[2]))
print(__doc__, file=sys.stderr)
return 64
if __name__ == "__main__":
sys.exit(main())

623
app/burnin/__init__.py Normal file
View file

@ -0,0 +1,623 @@
"""
Burn-in orchestrator.
Manages a FIFO queue of burn-in jobs capped at MAX_PARALLEL_BURNINS concurrent
executions. Each job runs stages sequentially; a failed stage aborts the job.
State is persisted to SQLite throughout DB is source of truth.
On startup:
- Any 'running' jobs from a previous run are marked 'unknown' (interrupted).
- Any 'queued' jobs are re-enqueued automatically.
Cancellation:
- cancel_job() sets DB state to 'cancelled'.
- Running stage coroutines check _is_cancelled() at POLL_INTERVAL boundaries
and abort within a few seconds of the cancel request.
"""
import asyncio
import logging
import time
from contextlib import asynccontextmanager
from datetime import datetime, timezone
import aiosqlite
from app.config import settings
from app.truenas import TrueNASClient
log = logging.getLogger(__name__)
# Stage configuration + DB helpers extracted to _common.py in 1.0.0-31.
from ._common import ( # noqa: E402
STAGE_ORDER, _STAGE_BASE_WEIGHTS, POLL_INTERVAL,
_now, _db,
_is_cancelled,
_start_stage, _finish_stage, _cancel_stage, _set_stage_error,
_update_stage_percent, _update_stage_bad_blocks, _append_stage_log,
_store_smart_attrs, _store_smart_raw_output,
_recalculate_progress, _push_update,
)
# ---------------------------------------------------------------------------
# Module-level state (initialized in init())
# ---------------------------------------------------------------------------
_semaphore: asyncio.Semaphore | None = None
_client: TrueNASClient | None = None
# Live job tracking — keeps a strong reference to every _run_job task so it
# isn't garbage-collected (asyncio.create_task only keeps a weak ref) and so
# cancel_job / check_stuck_jobs can actually unwedge a stuck task.
_active_tasks: dict[int, "asyncio.Task"] = {}
# Remote-PID kill machinery + pool-drive unlock state both live in their
# own submodules. We re-export the names the rest of the app reaches for
# (and keep the _kill_remote_process / _is_unlocked aliases for callers
# that grew up before the split).
from . import kill as _kill # noqa: E402
from . import unlock as _unlock # noqa: E402
_remote_pids = _kill._remote_pids
_unlock_grants = _unlock._unlock_grants
PoolMemberError = _unlock.PoolMemberError
UNLOCK_TTL_SECONDS = _unlock.UNLOCK_TTL_SECONDS
BOOT_POOL_NAME = _unlock.BOOT_POOL_NAME
BOOT_POOL_CONFIRM_TOKEN = _unlock.BOOT_POOL_CONFIRM_TOKEN
EXPORTED_POOL_ROLE = _unlock.EXPORTED_POOL_ROLE
EXPORTED_CONFIRM_TOKEN = _unlock.EXPORTED_CONFIRM_TOKEN
MOUNTED_ROLE = _unlock.MOUNTED_ROLE
MOUNTED_CONFIRM_TOKEN = _unlock.MOUNTED_CONFIRM_TOKEN
unlock_expiry = _unlock.unlock_expiry
grant_pool_unlock = _unlock.grant_pool_unlock
_is_unlocked = _unlock.is_unlocked # legacy private name
_kill_remote_process = _kill.kill_remote_process
# _now() and _db() are re-exported from _common above.
# ---------------------------------------------------------------------------
# Init + startup reconciliation
# ---------------------------------------------------------------------------
async def init(client: TrueNASClient) -> None:
global _semaphore, _client
_semaphore = asyncio.Semaphore(settings.max_parallel_burnins)
_client = client
async with _db() as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
await db.execute("PRAGMA foreign_keys=ON")
# Mark interrupted running jobs as unknown
await db.execute(
"UPDATE burnin_jobs SET state='unknown', finished_at=? WHERE state='running'",
(_now(),),
)
# Re-enqueue previously queued jobs
cur = await db.execute(
"SELECT id FROM burnin_jobs WHERE state='queued' ORDER BY created_at"
)
queued = [r["id"] for r in await cur.fetchall()]
await db.commit()
for job_id in queued:
_spawn_run_job(job_id)
log.info("Burn-in orchestrator ready (max_concurrent=%d)", settings.max_parallel_burnins)
def _spawn_run_job(job_id: int) -> "asyncio.Task":
"""Schedule a _run_job task and keep a strong reference to it.
Plain asyncio.create_task() only leaves a weak reference behind, so the
task can be GC'd before it ever runs. Storing it in _active_tasks also
lets cancel_job / check_stuck_jobs cancel it directly.
"""
task = asyncio.create_task(_run_job(job_id))
_active_tasks[job_id] = task
def _cleanup(t: "asyncio.Task") -> None:
# Remove only if it's still us — avoid clobbering a re-enqueued task.
if _active_tasks.get(job_id) is t:
_active_tasks.pop(job_id, None)
_kill.clear_remote_pid(job_id)
task.add_done_callback(_cleanup)
return task
# _kill_remote_process is re-exported above from .kill — the original
# definition was extracted to app/burnin/kill.py in 1.0.0-30.
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def start_job(drive_id: int, profile: str, operator: str,
stage_order: list[str] | None = None) -> int:
"""Create and enqueue a burn-in job. Returns the new job ID.
If stage_order is provided (e.g. ["short_smart","long_smart","surface_validate"]),
the job runs those stages in that order (precheck and final_check are always prepended/appended).
Otherwise the preset STAGE_ORDER[profile] is used.
"""
now = _now()
# Build the actual stage list
if stage_order is not None:
stages = ["precheck"] + list(stage_order) + ["final_check"]
else:
stages = STAGE_ORDER[profile]
async with _db() as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
await db.execute("PRAGMA foreign_keys=ON")
# Reject duplicate active burn-in for same drive
cur = await db.execute(
"SELECT COUNT(*) FROM burnin_jobs WHERE drive_id=? AND state IN ('queued','running')",
(drive_id,),
)
if (await cur.fetchone())[0] > 0:
raise ValueError("Drive already has an active burn-in job")
# Pool-membership gate: locked unless the operator explicitly
# unlocked this drive via /api/v1/drives/{id}/unlock recently.
# _is_unlocked also checks that the grant's stored (pool_name,
# pool_role) still matches the live row — a grant issued for an
# exported drive doesn't carry over if the drive turns out to be
# in an active pool on the next poll.
cur = await db.execute(
"SELECT pool_name, pool_role, devname FROM drives WHERE id=?", (drive_id,)
)
drow = await cur.fetchone()
if drow and drow["pool_name"] and not _is_unlocked(
drive_id, drow["pool_name"], drow["pool_role"]
):
raise PoolMemberError(drive_id, drow["pool_name"], drow["pool_role"])
# Closes Codex finding #5: re-check pool state OVER SSH right now,
# not against cached row. Defends against the 12s poll window
# where a drive could have been imported into a pool, mounted, or
# had ZFS labels written between when the operator unlocked it
# and when they clicked Start. Adds ~200ms per start; cheap
# against the cost of destroying a freshly-imported pool.
if drow:
from app import ssh_client as _ssh
if _ssh.is_configured():
fresh = await _ssh.fresh_pool_check_for_drive(drow["devname"])
cached = (
{"pool": drow["pool_name"], "role": drow["pool_role"]}
if drow["pool_name"] else None
)
if fresh != cached:
# State changed since the last poll. Invalidate any
# unlock grant (it was bound to stale identity) and
# refuse with a descriptive error so the operator
# knows to wait for the next poll cycle.
_unlock.invalidate_grant(drive_id)
fresh_pool = fresh["pool"] if fresh else None
fresh_role = fresh["role"] if fresh else None
if fresh_pool:
raise PoolMemberError(drive_id, fresh_pool, fresh_role)
# If the FRESH check shows free but cached said
# locked, the drive was just removed from a pool —
# safe to start, but invalidate any stale grant so
# the operator doesn't reuse old confirmations.
log.warning(
"Live pool check for drive_id=%d (%s): cached=%s "
"fresh=None — drive came free since last poll, "
"allowing burn-in",
drive_id, drow["devname"], cached,
)
# Create job. The partial unique index uniq_active_burnin_per_drive
# (database.py) is the actual race-stopper here: if two concurrent
# /api/v1/burnin/start calls both pass the SELECT-COUNT check above,
# only one INSERT can win; the loser raises IntegrityError, which
# we surface with the same ValueError as the inline duplicate check.
try:
cur = await db.execute(
"""INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
VALUES (?,?,?,?,?,?) RETURNING id""",
(drive_id, profile, "queued", 0, operator, now),
)
job_id = (await cur.fetchone())["id"]
except aiosqlite.IntegrityError:
raise ValueError("Drive already has an active burn-in job")
# Create stage rows in the desired execution order
for stage_name in stages:
await db.execute(
"INSERT INTO burnin_stages (burnin_job_id, stage_name, state) VALUES (?,?,?)",
(job_id, stage_name, "pending"),
)
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message)
VALUES (?,?,?,?,?)""",
("burnin_queued", drive_id, job_id, operator, f"Queued {profile} burn-in"),
)
await db.commit()
_spawn_run_job(job_id)
log.info("Burn-in job %d queued (drive_id=%d profile=%s operator=%s)",
job_id, drive_id, profile, operator)
return job_id
async def cancel_job(job_id: int, operator: str) -> bool:
"""Cancel a queued or running job. Returns True if state was changed."""
async with _db() as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
cur = await db.execute(
"SELECT state, drive_id FROM burnin_jobs WHERE id=?", (job_id,)
)
row = await cur.fetchone()
if not row or row["state"] not in ("queued", "running"):
return False
await db.execute(
"UPDATE burnin_jobs SET state='cancelled', finished_at=? WHERE id=?",
(_now(), job_id),
)
await db.execute(
"UPDATE burnin_stages SET state='cancelled' WHERE burnin_job_id=? AND state IN ('pending','running')",
(job_id,),
)
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message)
VALUES (?,?,?,?,?)""",
("burnin_cancelled", row["drive_id"], job_id, operator, "Cancelled by operator"),
)
await db.commit()
# Kill the remote child process FIRST (so proc.wait() in the running task
# can return), then cancel the task so any other awaits unblock.
await _kill_remote_process(job_id)
task = _active_tasks.get(job_id)
if task and not task.done():
task.cancel()
log.info("Burn-in job %d cancelled by %s", job_id, operator)
return True
# ---------------------------------------------------------------------------
# Job runner
# ---------------------------------------------------------------------------
async def _thermal_gate_ok() -> bool:
"""True if it's thermally safe to start a new burn-in.
Checks the peak temperature of drives currently under active burn-in.
"""
try:
async with _db() as db:
cur = await db.execute("""
SELECT MAX(d.temperature_c)
FROM drives d
JOIN burnin_jobs bj ON bj.drive_id = d.id
WHERE bj.state = 'running' AND d.temperature_c IS NOT NULL
""")
row = await cur.fetchone()
max_temp = row[0] if row and row[0] is not None else None
return max_temp is None or max_temp < settings.temp_warn_c
except Exception:
return True # Never block on error
async def _run_job(job_id: int) -> None:
"""Acquire semaphore slot, execute all stages, persist final state."""
assert _semaphore is not None, "burnin.init() not called"
# Adaptive thermal gate: wait before competing for a slot if running drives
# are already at or above the warning threshold. This prevents layering a
# new burn-in on top of a thermally-stressed system. Gives up after 3 min
# and proceeds anyway so jobs don't queue indefinitely.
for _attempt in range(18): # 18 × 10 s = 3 min max
if await _thermal_gate_ok():
break
if _attempt == 0:
log.info(
"Thermal gate: job %d waiting — running drive temps at or above %d°C",
job_id, settings.temp_warn_c,
)
await asyncio.sleep(10)
else:
log.warning("Thermal gate timed out for job %d — proceeding anyway", job_id)
async with _semaphore:
if await _is_cancelled(job_id):
return
# Transition queued → running
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
row = await (await db.execute(
"SELECT drive_id, profile FROM burnin_jobs WHERE id=?", (job_id,)
)).fetchone()
if not row:
return
drive_id, profile = row[0], row[1]
cur = await db.execute("SELECT devname, serial, model FROM drives WHERE id=?", (drive_id,))
devname_row = await cur.fetchone()
if not devname_row:
return
devname = devname_row[0]
drive_serial = devname_row[1]
drive_model = devname_row[2]
await db.execute(
"UPDATE burnin_jobs SET state='running', started_at=? WHERE id=?",
(_now(), job_id),
)
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message)
VALUES (?,?,?,(SELECT operator FROM burnin_jobs WHERE id=?),?)""",
("burnin_started", drive_id, job_id, job_id, f"Started {profile} burn-in on {devname}"),
)
# Read stage order from DB (respects any custom order set at job creation)
stage_cur = await db.execute(
"SELECT stage_name FROM burnin_stages WHERE burnin_job_id=? ORDER BY id",
(job_id,),
)
job_stages = [r[0] for r in await stage_cur.fetchall()]
await db.commit()
_push_update()
log.info("Burn-in started", extra={"job_id": job_id, "devname": devname, "profile": profile})
success = False
error_text = None
was_cancelled = False
try:
success = await _execute_stages(job_id, job_stages, devname, drive_id)
except asyncio.CancelledError:
was_cancelled = True
except Exception as exc:
error_text = str(exc)
log.exception("Burn-in raised exception", extra={"job_id": job_id, "devname": devname})
# If the job has already moved to a terminal state — by cancel_job
# ('cancelled') or check_stuck_jobs ('unknown') — leave it alone. The
# task may have been cancelled mid-stage; finalizing as 'failed' would
# clobber that audit-meaningful terminal state.
async with _db() as db:
cur = await db.execute("SELECT state FROM burnin_jobs WHERE id=?", (job_id,))
cur_row = await cur.fetchone()
if cur_row and cur_row[0] != "running":
return
# Cancellation arriving here means the asyncio task was cancelled
# by something other than cancel_job/check_stuck_jobs (shutdown,
# uvicorn reload, future code paths). The DB still says 'running',
# so we have to write *some* terminal state, but classifying the
# interrupted job as 'failed' would lie — we don't actually know
# whether the underlying SMART/badblocks work passed or not.
if was_cancelled:
final_state = "unknown"
else:
final_state = "passed" if success else "failed"
# If the asyncio task was cancelled mid-stage (container shutdown,
# uvicorn reload, etc.), CancelledError propagates past
# _execute_stages, so any running stage row is still marked
# 'running' in the DB. Reconcile here: mark every still-running
# stage on this job as 'unknown' with the parent's finished_at,
# and stamp a default error_text so the drawer's Reason block has
# something concrete to show. Use a write that's idempotent under
# repeat (only touches rows still 'running').
cancel_err = (
"Task cancelled mid-run — likely container restart or shutdown"
if was_cancelled else None
)
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
await db.execute(
"UPDATE burnin_jobs SET state=?, percent=?, finished_at=?, error_text=? WHERE id=?",
(final_state, 100 if success else None, _now(),
error_text or cancel_err, job_id),
)
if was_cancelled:
await db.execute(
"""UPDATE burnin_stages
SET state='unknown', finished_at=?,
error_text=COALESCE(error_text, ?)
WHERE burnin_job_id=? AND state='running'""",
(_now(), cancel_err, job_id),
)
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message)
VALUES (?,?,?,(SELECT operator FROM burnin_jobs WHERE id=?),?)""",
(f"burnin_{final_state}", drive_id, job_id, job_id,
f"Burn-in {final_state} on {devname}"),
)
await db.commit()
# Build SSE alert for browser notifications
alert = {
"state": final_state,
"job_id": job_id,
"devname": devname,
"serial": drive_serial,
"model": drive_model,
"error_text": error_text,
}
_push_update(alert=alert)
log.info("Burn-in finished", extra={"job_id": job_id, "devname": devname, "state": final_state})
# Fire webhook + immediate email in background (non-blocking)
try:
from app import notifier
cur2 = None
async with _db() as db2:
db2.row_factory = aiosqlite.Row
cur2 = await db2.execute(
"SELECT profile, operator FROM burnin_jobs WHERE id=?", (job_id,)
)
job_row = await cur2.fetchone()
if job_row:
# Get bad_blocks count from surface_validate stage if present
bad_blocks = 0
async with _db() as db3:
cur3 = await db3.execute(
"SELECT bad_blocks FROM burnin_stages WHERE burnin_job_id=? AND stage_name='surface_validate'",
(job_id,)
)
bb_row = await cur3.fetchone()
if bb_row and bb_row[0]:
bad_blocks = bb_row[0]
asyncio.create_task(notifier.notify_job_complete(
job_id=job_id,
devname=devname,
serial=drive_serial,
model=drive_model,
state=final_state,
profile=job_row["profile"],
operator=job_row["operator"],
error_text=error_text,
bad_blocks=bad_blocks,
))
except Exception as exc:
log.error("Failed to schedule notifications: %s", exc)
async def _execute_stages(job_id: int, stages: list[str], devname: str, drive_id: int) -> bool:
for stage_name in stages:
if await _is_cancelled(job_id):
return False
await _start_stage(job_id, stage_name)
_push_update()
try:
ok = await _dispatch_stage(job_id, stage_name, devname, drive_id)
except Exception as exc:
log.error("Stage raised exception: %s", exc, extra={"job_id": job_id, "devname": devname, "stage": stage_name})
ok = False
await _finish_stage(job_id, stage_name, success=False, error_text=str(exc))
_push_update()
return False
if not ok and await _is_cancelled(job_id):
# Stage was aborted due to cancellation — mark it cancelled, not failed
await _cancel_stage(job_id, stage_name)
else:
await _finish_stage(job_id, stage_name, success=ok)
await _recalculate_progress(job_id)
_push_update()
if not ok:
return False
return True
# Per-stage implementations and the dispatch router live in stages.py.
from .stages import ( # noqa: E402
_dispatch_stage,
_badblocks_available,
_nvme_cli_available,
_stage_precheck,
_stage_smart_test,
_stage_smart_test_api,
_stage_smart_test_ssh,
_stage_surface_validate,
_stage_surface_validate_nvme,
_stage_surface_validate_ssh,
_stage_surface_validate_truenas,
_stage_timed_simulate,
_stage_final_check,
)
# ---------------------------------------------------------------------------
# DB helpers
# ---------------------------------------------------------------------------
# DB helpers / progress / SSE re-exported from _common above.
# ---------------------------------------------------------------------------
# Stuck-job detection (called by poller every ~5 cycles)
# ---------------------------------------------------------------------------
async def check_stuck_jobs() -> None:
"""Mark jobs that have been 'running' beyond stuck_job_hours as 'unknown'."""
threshold_seconds = settings.stuck_job_hours * 3600
async with _db() as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
cur = await db.execute("""
SELECT bj.id, bj.drive_id, d.devname, bj.started_at
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
WHERE bj.state = 'running'
AND bj.started_at IS NOT NULL
AND (julianday('now') - julianday(bj.started_at)) * 86400 > ?
""", (threshold_seconds,))
stuck = await cur.fetchall()
if not stuck:
return
now = _now()
for row in stuck:
job_id, drive_id, devname, started_at = row[0], row[1], row[2], row[3]
log.critical(
"Stuck burn-in detected — marking unknown",
extra={"job_id": job_id, "devname": devname, "started_at": started_at},
)
await db.execute(
"UPDATE burnin_jobs SET state='unknown', finished_at=? WHERE id=?",
(now, job_id),
)
await db.execute(
"""UPDATE burnin_stages SET state='unknown', finished_at=?
WHERE burnin_job_id=? AND state='running'""",
(now, job_id),
)
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message)
VALUES (?,?,?,?,?)""",
("burnin_stuck", drive_id, job_id, "system",
f"Job stuck for >{settings.stuck_job_hours}h — automatically marked unknown"),
)
await db.commit()
# Actually unstick the running tasks so they release their semaphore slot.
# Without this the DB state becomes 'unknown' but the asyncio task keeps
# holding the slot forever — which is the bug that left subsequent jobs
# permanently 'queued' until container restart.
for row in stuck:
job_id = row[0]
await _kill_remote_process(job_id)
task = _active_tasks.get(job_id)
if task and not task.done():
task.cancel()
_push_update()
log.warning("Marked %d stuck job(s) as unknown", len(stuck))

347
app/burnin/_common.py Normal file
View file

@ -0,0 +1,347 @@
"""Shared helpers for the burnin package.
Lives below stages.py / task.py / __init__.py these all import from
here. _common itself imports nothing from sibling burnin modules so we
stay free of circular-import landmines.
Owns:
* Stage configuration constants (STAGE_ORDER, _STAGE_BASE_WEIGHTS,
POLL_INTERVAL).
* The connection-helper context manager `_db()` and the `_now()` ISO
timestamp helper used everywhere.
* Per-stage DB mutators called by stage implementations and by the
job orchestrator (`_start_stage`, `_finish_stage`, `_cancel_stage`,
`_set_stage_error`, `_update_stage_percent`,
`_update_stage_bad_blocks`, `_append_stage_log`).
* Drive-row mutators for SMART caches
(`_store_smart_attrs`, `_store_smart_raw_output`).
* The job-state read (`_is_cancelled`) + progress aggregator
(`_recalculate_progress`).
* SSE notifier (`_push_update`).
"""
from __future__ import annotations
import json
import logging
from contextlib import asynccontextmanager
from datetime import datetime, timezone
import aiosqlite
from app.config import settings
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Stage configuration
# ---------------------------------------------------------------------------
STAGE_ORDER: dict[str, list[str]] = {
# Legacy
"quick": ["precheck", "short_smart", "io_validate", "final_check"],
# Single-stage selectable profiles
"surface": ["precheck", "surface_validate", "final_check"],
"short": ["precheck", "short_smart", "final_check"],
"long": ["precheck", "long_smart", "final_check"],
# Two-stage combos
"surface_short": ["precheck", "surface_validate", "short_smart", "final_check"],
"surface_long": ["precheck", "surface_validate", "long_smart", "final_check"],
"short_long": ["precheck", "short_smart", "long_smart", "final_check"],
# All three
"full": ["precheck", "surface_validate", "short_smart", "long_smart", "final_check"],
}
# Per-stage base weights used to compute overall job % progress dynamically
_STAGE_BASE_WEIGHTS: dict[str, int] = {
"precheck": 5,
"surface_validate": 65,
"short_smart": 12,
"long_smart": 13,
"io_validate": 10,
"final_check": 5,
}
POLL_INTERVAL = 5.0 # seconds between progress checks during active stages
# ---------------------------------------------------------------------------
# Connection helpers
# ---------------------------------------------------------------------------
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
@asynccontextmanager
async def _db():
"""Open a WAL-mode connection with busy_timeout so writers wait for the lock
instead of immediately raising 'database is locked' under contention.
60s timeout is intentionally generous: with 4 concurrent burn-in drains
+ the poller + retention + auth all writing, brief contention spikes
are normal and waiting is the right behavior. 10s was too tight."""
async with aiosqlite.connect(settings.db_path) as db:
await db.execute("PRAGMA busy_timeout=60000")
yield db
# ---------------------------------------------------------------------------
# Job / stage DB mutators
# ---------------------------------------------------------------------------
async def _is_cancelled(job_id: int) -> bool:
async with _db() as db:
cur = await db.execute("SELECT state FROM burnin_jobs WHERE id=?", (job_id,))
row = await cur.fetchone()
return bool(row and row[0] == "cancelled")
async def _start_stage(job_id: int, stage_name: str) -> None:
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE burnin_stages SET state='running', started_at=? WHERE burnin_job_id=? AND stage_name=?",
(_now(), job_id, stage_name),
)
await db.execute(
"UPDATE burnin_jobs SET stage_name=? WHERE id=?",
(stage_name, job_id),
)
await db.commit()
async def _finish_stage(job_id: int, stage_name: str, success: bool, error_text: str | None = None) -> None:
now = _now()
state = "passed" if success else "failed"
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
cur = await db.execute(
"SELECT started_at FROM burnin_stages WHERE burnin_job_id=? AND stage_name=?",
(job_id, stage_name),
)
row = await cur.fetchone()
duration = None
if row and row[0]:
try:
start = datetime.fromisoformat(row[0])
if start.tzinfo is None:
start = start.replace(tzinfo=timezone.utc)
duration = (datetime.now(timezone.utc) - start).total_seconds()
except Exception:
pass
# Only overwrite error_text if one is passed; otherwise preserve what the stage already wrote
if error_text is not None:
await db.execute(
"""UPDATE burnin_stages
SET state=?, percent=?, finished_at=?, duration_seconds=?, error_text=?
WHERE burnin_job_id=? AND stage_name=?""",
(state, 100 if success else None, now, duration, error_text, job_id, stage_name),
)
else:
await db.execute(
"""UPDATE burnin_stages
SET state=?, percent=?, finished_at=?, duration_seconds=?
WHERE burnin_job_id=? AND stage_name=?""",
(state, 100 if success else None, now, duration, job_id, stage_name),
)
await db.commit()
async def _update_stage_percent(job_id: int, stage_name: str, pct: int) -> None:
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE burnin_stages SET percent=? WHERE burnin_job_id=? AND stage_name=?",
(pct, job_id, stage_name),
)
await db.commit()
async def _cancel_stage(job_id: int, stage_name: str) -> None:
now = _now()
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE burnin_stages SET state='cancelled', finished_at=? WHERE burnin_job_id=? AND stage_name=?",
(now, job_id, stage_name),
)
await db.commit()
async def _append_stage_log(job_id: int, stage_name: str, text: str) -> None:
"""Append text to the log_text column of a burnin_stages row."""
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"""UPDATE burnin_stages
SET log_text = COALESCE(log_text, '') || ?
WHERE burnin_job_id=? AND stage_name=?""",
(text, job_id, stage_name),
)
await db.commit()
async def _update_stage_bad_blocks(job_id: int, stage_name: str, count: int) -> None:
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE burnin_stages SET bad_blocks=? WHERE burnin_job_id=? AND stage_name=?",
(count, job_id, stage_name),
)
await db.commit()
async def _update_stage_bb_phase(
job_id: int, stage_name: str, phase: int, phase_pct: float,
) -> None:
"""Persist per-pattern badblocks progress so the drive-drawer UI
can render 4 meters with separate write/verify halves."""
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE burnin_stages SET bb_phase=?, bb_phase_pct=? "
"WHERE burnin_job_id=? AND stage_name=?",
(phase, phase_pct, job_id, stage_name),
)
await db.commit()
async def _update_stage_bb_mbps(
job_id: int, stage_name: str, mbps: float,
) -> None:
"""Persist live throughput for the surface_validate meter strip.
Computed from delta_overall_pct between successive badblocks
progress lines, scaled by drive size_bytes / 800 (8 phases × 100)."""
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE burnin_stages SET bb_mbps=? "
"WHERE burnin_job_id=? AND stage_name=?",
(mbps, job_id, stage_name),
)
await db.commit()
async def _record_bb_phase_start(
job_id: int, stage_name: str, phase: int, ts: str,
) -> None:
"""Record the moment a phase first becomes current. Idempotent:
re-entry of the same phase keeps the original timestamp so a
transient parser reset doesn't blow away history.
Stored as a JSON object keyed by phase number (string). The
drawer reads it to compute per-pattern elapsed times.
"""
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
cur = await db.execute(
"SELECT bb_phase_history FROM burnin_stages "
"WHERE burnin_job_id=? AND stage_name=?",
(job_id, stage_name),
)
row = await cur.fetchone()
existing = {}
if row and row[0]:
try:
existing = json.loads(row[0])
except (json.JSONDecodeError, TypeError):
existing = {}
key = str(phase)
if key not in existing:
existing[key] = ts
await db.execute(
"UPDATE burnin_stages SET bb_phase_history=? "
"WHERE burnin_job_id=? AND stage_name=?",
(json.dumps(existing), job_id, stage_name),
)
await db.commit()
async def _store_smart_attrs(drive_id: int, attrs: dict) -> None:
"""Persist latest SMART attribute dict to drives.smart_attrs (JSON)."""
# Convert int keys to str for JSON serialisation
serialisable = {str(k): v for k, v in attrs.get("attributes", {}).items()}
blob = json.dumps({
"health": attrs.get("health", "UNKNOWN"),
"attrs": serialisable,
"warnings": attrs.get("warnings", []),
"failures": attrs.get("failures", []),
})
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("UPDATE drives SET smart_attrs=? WHERE id=?", (blob, drive_id))
await db.commit()
async def _store_smart_raw_output(drive_id: int, test_type: str, raw: str) -> None:
"""Store raw smartctl output in smart_tests.raw_output."""
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE smart_tests SET raw_output=? WHERE drive_id=? AND test_type=?",
(raw, drive_id, test_type.lower()),
)
await db.commit()
async def _set_stage_error(job_id: int, stage_name: str, error_text: str) -> None:
async with _db() as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute(
"UPDATE burnin_stages SET error_text=? WHERE burnin_job_id=? AND stage_name=?",
(error_text, job_id, stage_name),
)
await db.commit()
async def _recalculate_progress(job_id: int, profile: str | None = None) -> None:
"""Recompute overall job % from actual stage rows. profile param is unused (kept for compat)."""
async with _db() as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
cur = await db.execute(
"SELECT stage_name, state, percent FROM burnin_stages WHERE burnin_job_id=? ORDER BY id",
(job_id,),
)
stages = await cur.fetchall()
if not stages:
return
total_weight = sum(_STAGE_BASE_WEIGHTS.get(s["stage_name"], 5) for s in stages)
if total_weight == 0:
return
completed = 0.0
current = None
for s in stages:
w = _STAGE_BASE_WEIGHTS.get(s["stage_name"], 5)
st = s["state"]
if st == "passed":
completed += w
elif st == "running":
completed += w * (s["percent"] or 0) / 100
current = s["stage_name"]
pct = int(completed / total_weight * 100)
await db.execute(
"UPDATE burnin_jobs SET percent=?, stage_name=? WHERE id=?",
(pct, current, job_id),
)
await db.commit()
# ---------------------------------------------------------------------------
# SSE notifier
# ---------------------------------------------------------------------------
def _push_update(alert: dict | None = None) -> None:
"""Notify SSE subscribers that data has changed, with optional browser notification payload."""
try:
from app import poller
poller._notify_subscribers(alert=alert)
except Exception:
pass

71
app/burnin/kill.py Normal file
View file

@ -0,0 +1,71 @@
"""Remote process kill machinery.
asyncssh's ``proc.kill()`` sends an SSH "signal" channel request that
OpenSSH's sshd ignores by default — the remote process keeps running and
``proc.wait()`` hangs forever, pinning the asyncio.Semaphore slot.
The fix: capture the remote PID at command launch (via the
``sh -c 'echo PID:$$; exec ...'`` wrapper) and issue ``kill -9 <pid>``
over a fresh SSH session when we need to abort. This module owns that
state and the kill helper.
Public surface (used by the rest of the burnin package):
set_remote_pid(job_id, pid) call from the stage when launch succeeds
clear_remote_pid(job_id) call from the cleanup callback
kill_remote_process(job_id) fire-and-clear; safe to call repeatedly
"""
from __future__ import annotations
import asyncio
import logging
log = logging.getLogger(__name__)
# job_id -> remote PID. Module-level dict so it survives across the
# stage / task / __init__ split without needing to thread it through
# function signatures.
_remote_pids: dict[int, int] = {}
def set_remote_pid(job_id: int, pid: int) -> None:
"""Record the remote PID captured by the running stage."""
_remote_pids[job_id] = pid
def clear_remote_pid(job_id: int) -> None:
"""Drop the PID without trying to kill — used by the task cleanup
callback so a normally-completed job doesn't carry stale state."""
_remote_pids.pop(job_id, None)
def get_remote_pid(job_id: int) -> int | None:
return _remote_pids.get(job_id)
async def kill_remote_process(job_id: int) -> None:
"""Send kill -9 to the remote PID associated with this job, if any.
Idempotent pops the PID before attempting the kill so a second
call is a no-op. SSH connection failure is logged but never raised
(we'd rather best-effort-kill than block the cancel path).
"""
pid = _remote_pids.pop(job_id, None)
if not pid:
return
try:
# Local import to avoid pulling asyncssh into module load if
# this helper is never used (tests, mock mode).
from app import ssh_client
async with await ssh_client._connect() as conn:
await asyncio.wait_for(
conn.run(
f"kill -9 {pid} 2>/dev/null || true", check=False,
),
timeout=10,
)
log.info("Remote-killed PID %d for job %d", pid, job_id)
except Exception as exc:
log.warning(
"Failed to remote-kill PID %d for job %d: %s", pid, job_id, exc,
)

920
app/burnin/stages.py Normal file
View file

@ -0,0 +1,920 @@
"""Per-stage burn-in implementations.
Each ``_stage_*`` function runs to completion or returns False. They share
state (DB, helpers, configuration) via ``app.burnin._common`` and pull
the live ``TrueNASClient`` instance lazily from the package root so the
extraction stays free of circular imports at module load.
``_dispatch_stage`` is the per-stage_name router used by the orchestrator
in ``app.burnin.__init__._execute_stages``.
"""
from __future__ import annotations
import asyncio
import logging
import time
from typing import TypedDict
from app.config import settings
class _BadblocksResult(TypedDict):
bad_blocks: int
output: str
aborted: bool
# `badblocks -w` cycles through 4 patterns (0xaa, 0x55, 0xff, 0x00),
# each with a write phase followed by a read-back/verify phase = 8 phases.
# Per-phase percent comes back via `XX% done`; without translation, the
# dashboard appears to "rewind" every ~2 hours when a new phase starts.
_BB_PATTERN_PHASE = {"0xaa": 1, "0x55": 3, "0xff": 5, "0x00": 7}
_BB_TOTAL_PHASES = 8
# Throttle DB writes from the badblocks parser. Each progress line used
# to trigger 4-6 transactions; with 4 concurrent burn-ins emitting sub-
# second progress lines, the asyncssh drain couldn't keep up — the
# stdout pipe on TrueNAS filled, badblocks blocked on pipe_write,
# disk I/O effectively stopped. 5 seconds is fine for the UI (drawer
# polls every ~12s anyway) and cuts DB load 60-80x.
BB_DB_MIN_SECONDS = 5.0
import re as _re_pre # noqa: E402
_BB_PATTERN_RE = _re_pre.compile(r"Testing with pattern\s+(0x[0-9a-fA-F]+)")
_BB_VERIFY_RE = _re_pre.compile(r"Reading and comparing")
_BB_PERCENT_RE = _re_pre.compile(r"([\d.]+)%\s+done")
class _BadblocksProgress:
"""Track which phase of `badblocks -w -p N` we're in so the
displayed percent maps to overall progress, not per-phase progress.
Pure state machine no I/O. Feed it lines from the badblocks output
via :meth:`update`; read :attr:`overall_pct` after each call.
Behavior:
- Defaults to phase 1 (write 0xaa) before any header is seen.
- "Testing with pattern 0xXX" sets the phase to the write-phase index
for that pattern (1, 3, 5, or 7).
- "Reading and comparing" advances to the matching verify phase
(last_write_phase + 1).
- "XX% done" updates the in-phase percent.
- overall_pct = ((phase - 1) * 100 + phase_pct) / 8, clipped to 99
so we don't claim "100%" until the stage's success path explicitly
writes 100.
"""
__slots__ = ("phase", "phase_pct", "_last_write_phase")
def __init__(self) -> None:
self.phase: int = 1
self.phase_pct: float = 0.0
self._last_write_phase: int = 1
def update(self, line: str) -> None:
m = _BB_PATTERN_RE.search(line)
if m:
p = m.group(1).lower()
if p in _BB_PATTERN_PHASE:
self.phase = _BB_PATTERN_PHASE[p]
self._last_write_phase = self.phase
self.phase_pct = 0.0
return
if _BB_VERIFY_RE.search(line):
self.phase = self._last_write_phase + 1
self.phase_pct = 0.0
return
m = _BB_PERCENT_RE.search(line)
if m:
try:
self.phase_pct = float(m.group(1))
except ValueError:
pass
@property
def overall_pct(self) -> int:
total = (self.phase - 1) * 100.0 + self.phase_pct
return min(99, int(total / _BB_TOTAL_PHASES))
def _build_badblocks_cmd(devname: str) -> str:
"""Construct the wrapped badblocks command for a given device.
badblocks's progress output uses '\\b' backspace characters to
overwrite the previous "XX% done" line there's no '\\n' between
updates until a phase transition. asyncssh's line-buffered reader
needs a real '\\n' to yield a line, so we pipe the output through
`tr '\\b' '\\n'` at the shell level. After this, every progress
update is a normal newline-terminated line.
Inner shell does `echo PID:$$; exec badblocks ...` so $$ is the
badblocks PID after exec (needed for out-of-band kill -9; asyncssh's
signal channel is ignored by sshd). 2>&1 merges stderr into stdout
so tr sees the progress lines (badblocks emits them on stderr).
Geometry (-b -c -p) is operator-tunable via Settings Burn-in;
defaults match the Spearfoot disk-burnin.sh recommendation.
"""
inner = (
f"echo PID:$$; exec badblocks "
f"-wsv "
f"-b {settings.surface_validate_block_size} "
f"-c {settings.surface_validate_block_buffer} "
f"-p {settings.surface_validate_passes} "
f"/dev/{devname} 2>&1"
)
# The outer pipeline lets tr translate \\b → \\n. stdbuf -oL forces
# tr's stdout to line-buffered mode; without it tr's stdout is
# block-buffered (4 KB chunks) when its destination is a pipe,
# which delays each progress line by ~6 minutes at our throughput.
return f"sh -c '{inner}' | stdbuf -oL tr '\\b' '\\n'"
from . import kill
from ._common import (
POLL_INTERVAL,
_append_stage_log,
_db,
_is_cancelled,
_now,
_push_update,
_recalculate_progress,
_record_bb_phase_start,
_set_stage_error,
_store_smart_attrs,
_store_smart_raw_output,
_update_stage_bad_blocks,
_update_stage_bb_mbps,
_update_stage_bb_phase,
_update_stage_percent,
)
log = logging.getLogger(__name__)
def _get_client():
"""Lazy access to the TrueNASClient set by ``burnin.init()``. Lives on
the package root for backward compat with routes.py which reaches
for ``burnin._client`` directly."""
from app import burnin
assert burnin._client is not None, (
"burnin._client is None — burnin.init() must be called before any "
"stage that reaches the TrueNAS REST API."
)
return burnin._client
async def _dispatch_stage(job_id: int, stage_name: str, devname: str, drive_id: int) -> bool:
if stage_name == "precheck":
return await _stage_precheck(job_id, drive_id)
elif stage_name == "short_smart":
return await _stage_smart_test(job_id, devname, "SHORT", "short_smart", drive_id)
elif stage_name == "long_smart":
return await _stage_smart_test(job_id, devname, "LONG", "long_smart", drive_id)
elif stage_name == "surface_validate":
return await _stage_surface_validate(job_id, devname, drive_id)
elif stage_name == "io_validate":
return await _stage_timed_simulate(job_id, "io_validate", settings.io_validate_seconds)
elif stage_name == "final_check":
return await _stage_final_check(job_id, devname, drive_id)
return True
# ---------------------------------------------------------------------------
# Individual stage implementations
# ---------------------------------------------------------------------------
async def _stage_precheck(job_id: int, drive_id: int) -> bool:
"""Check SMART health and temperature before starting destructive work."""
async with _db() as db:
cur = await db.execute(
"SELECT smart_health, temperature_c FROM drives WHERE id=?", (drive_id,)
)
row = await cur.fetchone()
if not row:
return False
health, temp = row[0], row[1]
if health == "FAILED":
await _set_stage_error(job_id, "precheck", "Drive SMART health is FAILED — refusing to burn in")
return False
if temp and temp > settings.temp_crit_c:
await _set_stage_error(job_id, "precheck", f"Drive temperature {temp}°C exceeds {settings.temp_crit_c}°C limit")
return False
await asyncio.sleep(1) # Simulate brief check
return True
async def _stage_smart_test(job_id: int, devname: str, test_type: str, stage_name: str,
drive_id: int | None = None) -> bool:
"""Start a SMART test. Uses SSH if configured, TrueNAS REST API otherwise."""
from app import ssh_client
if ssh_client.is_configured():
return await _stage_smart_test_ssh(job_id, devname, test_type, stage_name, drive_id)
return await _stage_smart_test_api(job_id, devname, test_type, stage_name)
async def _stage_smart_test_api(job_id: int, devname: str, test_type: str, stage_name: str) -> bool:
"""TrueNAS REST API path for SMART test (mock / dev mode)."""
tn_job_id = await _get_client().start_smart_test([devname], test_type)
while True:
if await _is_cancelled(job_id):
try:
await _get_client().abort_job(tn_job_id)
except Exception:
pass
return False
jobs = await _get_client().get_smart_jobs()
job = next((j for j in jobs if j["id"] == tn_job_id), None)
if not job:
return False
state = job["state"]
pct = job["progress"]["percent"]
await _update_stage_percent(job_id, stage_name, pct)
await _recalculate_progress(job_id, None)
_push_update()
if state == "SUCCESS":
return True
elif state in ("FAILED", "ABORTED"):
await _set_stage_error(job_id, stage_name,
job.get("error") or f"SMART {test_type} test failed")
return False
await asyncio.sleep(POLL_INTERVAL)
async def _stage_smart_test_ssh(job_id: int, devname: str, test_type: str, stage_name: str,
drive_id: int | None) -> bool:
"""SSH path for SMART test — runs smartctl directly on TrueNAS."""
from app import ssh_client
# Start the test
try:
startup = await ssh_client.start_smart_test(devname, test_type)
await _append_stage_log(job_id, stage_name, startup + "\n")
except Exception as exc:
await _set_stage_error(job_id, stage_name, f"Failed to start SMART test via SSH: {exc}")
return False
# Brief pause to let the test register in smartctl output
await asyncio.sleep(3)
# Throttle log_text appends — every poll on a multi-hour long_smart bloated
# log_text to 50+ MB and triggered SQLite "database is locked" because each
# COALESCE-then-append rewrites the whole column. Append every ~60s, on the
# first poll, and on any state change.
LOG_EVERY_N_POLLS = 12
poll_count = 0
last_state: str | None = None
# Poll until complete
while True:
if await _is_cancelled(job_id):
try:
await ssh_client.abort_smart_test(devname)
except Exception:
pass
return False
await asyncio.sleep(POLL_INTERVAL)
try:
progress = await ssh_client.poll_smart_progress(devname)
except Exception as exc:
log.warning("SSH SMART poll failed: %s", exc, extra={"job_id": job_id})
await _append_stage_log(job_id, stage_name, f"[poll error] {exc}\n")
continue
poll_count += 1
state_changed = progress["state"] != last_state
last_state = progress["state"]
if poll_count == 1 or poll_count % LOG_EVERY_N_POLLS == 0 or state_changed:
await _append_stage_log(job_id, stage_name, progress["output"] + "\n---\n")
if progress["state"] == "running":
pct = max(0, 100 - progress["percent_remaining"])
await _update_stage_percent(job_id, stage_name, pct)
await _recalculate_progress(job_id)
_push_update()
elif progress["state"] == "passed":
await _update_stage_percent(job_id, stage_name, 100)
# Run attribute check
if drive_id is not None:
try:
attrs = await ssh_client.get_smart_attributes(devname)
await _store_smart_attrs(drive_id, attrs)
await _store_smart_raw_output(drive_id, test_type, attrs["raw_output"])
if attrs["failures"]:
error = "SMART attribute failures: " + "; ".join(attrs["failures"])
await _set_stage_error(job_id, stage_name, error)
return False
if attrs["warnings"]:
await _append_stage_log(
job_id, stage_name,
"[WARNING] " + "; ".join(attrs["warnings"]) + "\n"
)
except Exception as exc:
log.warning("Failed to retrieve SMART attributes: %s", exc)
await _recalculate_progress(job_id)
_push_update()
return True
elif progress["state"] == "failed":
await _set_stage_error(job_id, stage_name, f"SMART {test_type} test failed")
return False
# "unknown" → keep polling
async def _badblocks_available() -> bool:
"""Check if badblocks is installed on the remote host (Linux/SCALE only)."""
from app import ssh_client
try:
async with await ssh_client._connect() as conn:
result = await conn.run("which badblocks", check=False)
return result.returncode == 0
except Exception:
return False
async def _stage_surface_validate(job_id: int, devname: str, drive_id: int) -> bool:
"""
Surface validation stage auto-routes to the right implementation:
1. NVMe device + SSH + nvme-cli available (TrueNAS SCALE):
`nvme format -s 1 /dev/{devname}` (cryptographic erase).
Far faster than badblocks on NVMe (seconds vs hours) and
exercises the controller's secure-erase path, not just user-LBA
writes.
2. SSH configured + badblocks available (TrueNAS SCALE / Linux):
badblocks -wsv -b N -c N -p N /dev/{devname} directly over SSH.
3. SSH configured + badblocks NOT available (TrueNAS CORE / FreeBSD):
uses TrueNAS REST API disk.wipe FULL job + post-wipe SMART check.
4. No SSH:
simulated timed progress (dev/mock mode).
"""
from app import ssh_client
if ssh_client.is_configured():
if devname.startswith("nvme") and await _nvme_cli_available():
return await _stage_surface_validate_nvme(job_id, devname, drive_id)
if await _badblocks_available():
return await _stage_surface_validate_ssh(job_id, devname, drive_id)
# TrueNAS CORE/FreeBSD: badblocks not available — use native wipe API
await _append_stage_log(
job_id, "surface_validate",
"[INFO] badblocks not found on host (TrueNAS CORE/FreeBSD) — "
"using TrueNAS disk.wipe API (FULL write pass).\n\n"
)
return await _stage_surface_validate_truenas(job_id, devname, drive_id)
return await _stage_timed_simulate(job_id, "surface_validate", settings.surface_validate_seconds)
async def _nvme_cli_available() -> bool:
"""Check if nvme-cli is installed on the remote host."""
from app import ssh_client
try:
async with await ssh_client._connect() as conn:
r = await conn.run("which nvme", check=False)
return r.returncode == 0
except Exception:
return False
async def _stage_surface_validate_nvme(job_id: int, devname: str,
drive_id: int) -> bool:
"""NVMe destructive surface test via `nvme format -s 1` (crypto erase).
Crypto-erase nukes the data encryption key on the drive's controller,
rendering all stored data unrecoverable in milliseconds; the actual
flash is then implicitly trim-able. This is the canonical destructive
burn-in for NVMe badblocks would write the entire LBA space, which
is slower AND wears the flash unnecessarily.
Post-format we re-read SMART attributes; the drive should report all
counters reset (life used + spare) and PASSED health.
"""
from app import ssh_client
await _append_stage_log(
job_id, "surface_validate",
f"[START] nvme format -s 1 /dev/{devname}\n"
f"[NOTE] Cryptographic erase — destroys all data on /dev/{devname}.\n\n"
)
cmd = f"nvme format -s 1 --force /dev/{devname}"
try:
async with await ssh_client._connect() as conn:
r = await asyncio.wait_for(
conn.run(cmd, check=False), timeout=600
)
except Exception as exc:
await _append_stage_log(
job_id, "surface_validate", f"\n[SSH error] {exc}\n"
)
await _set_stage_error(
job_id, "surface_validate", f"NVMe format SSH error: {exc}"
)
return False
output = (r.stdout or "") + (r.stderr or "")
await _append_stage_log(job_id, "surface_validate", output + "\n")
if r.returncode != 0:
await _set_stage_error(
job_id, "surface_validate",
f"nvme format exited {r.returncode}: {output.strip()[:200]}"
)
return False
# Sanity-check post-format SMART health. Mirrors the surface_validate
# SSH path's check parity — fail on FAILED health, fail on real
# SMART attribute failures, log warnings but don't fail. A transport
# error here is treated as a soft pass (log + continue) so a single
# SSH blip after a successful format doesn't undo the work.
try:
attrs = await ssh_client.get_smart_attributes(devname)
ssh_only_failures = [
f for f in (attrs.get("failures") or []) if f.startswith("SSH error:")
]
real_failures = [
f for f in (attrs.get("failures") or []) if not f.startswith("SSH error:")
]
if attrs.get("health") == "FAILED":
await _set_stage_error(
job_id, "surface_validate",
"NVMe SMART health FAILED after format",
)
return False
if real_failures:
await _set_stage_error(
job_id, "surface_validate",
"NVMe SMART attribute failures after format: "
+ "; ".join(real_failures),
)
return False
if ssh_only_failures:
await _append_stage_log(
job_id, "surface_validate",
"[WARN] post-format SMART check had SSH errors "
"(soft-passing): " + "; ".join(ssh_only_failures) + "\n",
)
if attrs.get("warnings"):
await _append_stage_log(
job_id, "surface_validate",
"[WARN] " + "; ".join(attrs["warnings"]) + "\n",
)
except Exception as exc:
log.warning("Post-format SMART check error on %s: %s", devname, exc)
await _append_stage_log(
job_id, "surface_validate",
f"[WARN] post-format SMART check raised: {exc}\n",
)
await _update_stage_percent(job_id, "surface_validate", 100)
await _recalculate_progress(job_id)
_push_update()
return True
async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int) -> bool:
"""Run badblocks over SSH, streaming output to stage log."""
from app import ssh_client
# Pull drive size for the throughput calculation. Each badblocks
# phase covers the full disk once, so 1% overall progress = size/800
# bytes (8 phases × 100). NULL-safe: if size lookup fails we just
# skip the MB/s update.
drive_size_bytes: int | None = None
async with _db() as db:
cur = await db.execute("SELECT size_bytes FROM drives WHERE id=?", (drive_id,))
row = await cur.fetchone()
if row and row[0]:
drive_size_bytes = int(row[0])
await _append_stage_log(
job_id, "surface_validate",
f"[START] badblocks -wsv -b {settings.surface_validate_block_size} "
f"-c {settings.surface_validate_block_buffer} "
f"-p {settings.surface_validate_passes} /dev/{devname}\n"
f"[NOTE] This is a DESTRUCTIVE write test. "
f"All data on /dev/{devname} will be overwritten.\n\n"
)
# Streaming + progress is handled by the inline _drain coroutines
# below; the in-loop _append_stage_log + _update_stage_percent calls
# take care of throttled DB writes. Result dict is just final tallies.
result: _BadblocksResult = {"bad_blocks": 0, "output": "", "aborted": False}
try:
bad_blocks_total = 0
output_lines: list[str] = []
async with await ssh_client._connect() as conn:
# Wrap in `sh -c 'echo PID:$$; exec ...'` so we get the remote
# PID on the first stdout line. asyncssh's proc.kill() sends an
# SSH signal request that OpenSSH's sshd ignores by default, so
# we need the PID to issue an out-of-band `kill -9` over a fresh
# session when we want to abort.
#
cmd = _build_badblocks_cmd(devname)
async with conn.create_process(cmd) as proc:
pid_seen = False
progress = _BadblocksProgress()
# Throughput tracker — store (overall_pct, monotonic_ts)
# of the previous progress sample so we can compute MB/s
# from the delta on each new sample.
last_pct_sample: float = progress.overall_pct
last_db_write_ts: float = time.monotonic()
# Lines accumulated since last log flush. Flushed in the
# throttled DB-write window (see BB_DB_MIN_SECONDS).
pending_log_chunks: list[str] = []
# Seed bb_phase=1, bb_phase_pct=0 immediately so the
# drawer's per-pattern meters have something to render
# before badblocks emits its first "X% done" line. On a
# 14 TB drive that first line can be several minutes in,
# and a blank meter strip looks broken to the operator.
await _update_stage_bb_phase(
job_id, "surface_validate",
progress.phase, progress.phase_pct,
)
# Stamp phase 1 (write 0xaa) start so the drawer's
# duration history starts populating immediately.
await _record_bb_phase_start(
job_id, "surface_validate", progress.phase, _now(),
)
_push_update()
async def _drain(stream, is_stderr: bool):
nonlocal bad_blocks_total, pid_seen, last_db_write_ts, last_pct_sample
# Line-based drain. The wrapped badblocks command
# pipes through `tr '\b' '\n'` at the shell level
# so every progress update is a real newline-
# terminated line by the time it reaches us.
async for raw in stream:
line = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace")
if not line.strip():
continue
# First stdout line is "PID:<n>" from the
# wrapping shell. Capture and skip.
if not is_stderr and not pid_seen and line.startswith("PID:"):
pid_seen = True
try:
kill.set_remote_pid(job_id, int(line[4:].strip()))
log.info(
"Captured remote PID %d for job %d (badblocks)",
kill.get_remote_pid(job_id), job_id,
)
except ValueError:
pass
continue
# Note: with the `tr` pipe, badblocks's stderr is
# merged into stdout (`2>&1`). is_stderr is now
# always False — we treat every non-PID line as
# potentially containing progress or bad-block
# output. The phase parser is idempotent on
# unrelated lines.
prev_phase = progress.phase
progress.update(line)
phase_changed = progress.phase != prev_phase
is_progress_line = bool(_BB_PERCENT_RE.search(line))
# Bare-number lines from badblocks are bad-block
# block numbers (one per line on stdout).
stripped = line.strip()
if stripped and stripped.isdigit() and not is_progress_line:
bad_blocks_total += 1
# Keep "XX% done" lines OUT of output_lines. Big
# volume + quadratic log_text concat.
if not is_progress_line:
output_lines.append(line)
# Single throttle gate covering EVERY DB touch.
# Cumulative DB load otherwise overwhelms the
# asyncio loop → asyncssh drain falls behind →
# SSH window stops advancing → pipe fills →
# badblocks blocks on pipe_write → disk I/O stops.
now_ts = time.monotonic()
time_since_last_db = now_ts - last_db_write_ts
should_write = phase_changed or time_since_last_db >= BB_DB_MIN_SECONDS
if should_write:
if await _is_cancelled(job_id):
await kill.kill_remote_process(job_id)
return
if phase_changed:
await _record_bb_phase_start(
job_id, "surface_validate",
progress.phase, _now(),
)
await _update_stage_percent(
job_id, "surface_validate", progress.overall_pct,
)
await _update_stage_bb_phase(
job_id, "surface_validate",
progress.phase, progress.phase_pct,
)
await _update_stage_bad_blocks(
job_id, "surface_validate", bad_blocks_total,
)
if (
drive_size_bytes
and not phase_changed
and progress.overall_pct > last_pct_sample
and time_since_last_db >= 1.0
):
d_pct = progress.overall_pct - last_pct_sample
bytes_done = (d_pct / 800.0) * drive_size_bytes
mbps = bytes_done / time_since_last_db / 1_000_000
await _update_stage_bb_mbps(
job_id, "surface_validate", mbps,
)
if pending_log_chunks:
chunk = "".join(pending_log_chunks)
pending_log_chunks.clear()
await _append_stage_log(
job_id, "surface_validate", chunk,
)
last_pct_sample = progress.overall_pct
last_db_write_ts = now_ts
await _recalculate_progress(job_id)
_push_update()
if not is_progress_line:
pending_log_chunks.append(line)
# Abort on bad block threshold — immediate.
if bad_blocks_total > settings.bad_block_threshold:
await kill.kill_remote_process(job_id)
output_lines.append(
f"\n[ABORTED] {bad_blocks_total} bad block(s) exceeded "
f"threshold ({settings.bad_block_threshold})\n"
)
return
# Single stream now — the `2>&1` in _build_badblocks_cmd
# merges stderr into stdout before the `tr` pipe.
await _drain(proc.stdout, False)
# Bound proc.wait so a remote process that ignored our kill
# signal (or that we never managed to kill) can't pin this
# task in the semaphore forever. Closing the connection on
# exit will deliver SIGPIPE to the remote on its next write.
try:
await asyncio.wait_for(proc.wait(), timeout=15)
except asyncio.TimeoutError:
log.warning(
"proc.wait() timed out for job %d — abandoning channel",
job_id,
)
# Flush only lines we haven't already written in 20-line chunks.
# Previously we appended the FULL accumulated output here too,
# doubling the stored log_text size for every surface_validate
# stage and pushing app.db into hundreds of MB.
flushed_count = (len(output_lines) // 20) * 20
tail = "".join(output_lines[flushed_count:])
if tail:
await _append_stage_log(job_id, "surface_validate", tail)
result["bad_blocks"] = bad_blocks_total
result["aborted"] = bad_blocks_total > settings.bad_block_threshold
except asyncio.CancelledError:
# Best-effort kill of the remote badblocks process before
# propagating the cancel. asyncio.shield() so the kill attempt
# itself isn't interrupted by ongoing loop shutdown. Then
# re-raise so _run_job marks the job 'unknown' (honest about
# the indeterminate outcome) instead of 'failed' (which
# implies the burn-in itself failed, which we don't know).
try:
await asyncio.shield(kill.kill_remote_process(job_id))
except Exception:
pass
await _append_stage_log(
job_id, "surface_validate",
"\n[ABORTED] task cancelled (likely container restart or shutdown)\n",
)
raise
except Exception as exc:
await _append_stage_log(job_id, "surface_validate", f"\n[SSH error] {exc}\n")
await _set_stage_error(job_id, "surface_validate", f"SSH badblocks error: {exc}")
return False
await _update_stage_bad_blocks(job_id, "surface_validate", result["bad_blocks"])
if result["aborted"] or result["bad_blocks"] > settings.bad_block_threshold:
await _set_stage_error(
job_id, "surface_validate",
f"Surface validate FAILED: {result['bad_blocks']} bad block(s) found "
f"(threshold: {settings.bad_block_threshold})"
)
return False
return True
async def _stage_surface_validate_truenas(job_id: int, devname: str, drive_id: int) -> bool:
"""
Surface validation via TrueNAS CORE disk.wipe REST API.
Used on FreeBSD (TrueNAS CORE) where badblocks is unavailable.
Sends a FULL write-zero pass across the entire disk, polls progress,
then runs a post-wipe SMART attribute check to catch reallocated sectors.
"""
from app import ssh_client
await _append_stage_log(
job_id, "surface_validate",
f"[START] TrueNAS disk.wipe FULL — {devname}\n"
f"[NOTE] DESTRUCTIVE: all data on {devname} will be overwritten.\n\n"
)
# Start the wipe job
try:
tn_job_id = await _get_client().wipe_disk(devname, "FULL")
except Exception as exc:
await _set_stage_error(job_id, "surface_validate", f"Failed to start disk.wipe: {exc}")
return False
await _append_stage_log(
job_id, "surface_validate",
f"[JOB] TrueNAS wipe job started (job_id={tn_job_id})\n"
)
# Poll until complete
log_flush_counter = 0
while True:
if await _is_cancelled(job_id):
try:
await _get_client().abort_job(tn_job_id)
except Exception:
pass
return False
await asyncio.sleep(POLL_INTERVAL)
try:
job = await _get_client().get_job(tn_job_id)
except Exception as exc:
log.warning("Wipe job poll failed: %s", exc, extra={"job_id": job_id})
await _append_stage_log(job_id, "surface_validate", f"[poll error] {exc}\n")
continue
if not job:
await _set_stage_error(job_id, "surface_validate", f"Wipe job {tn_job_id} not found")
return False
state = job.get("state", "")
pct = int(job.get("progress", {}).get("percent", 0) or 0)
desc = job.get("progress", {}).get("description", "")
await _update_stage_percent(job_id, "surface_validate", min(pct, 99))
await _recalculate_progress(job_id)
_push_update()
# Log progress description every ~5 polls to avoid DB spam
log_flush_counter += 1
if desc and log_flush_counter % 5 == 0:
await _append_stage_log(job_id, "surface_validate", f"[{pct}%] {desc}\n")
if state == "SUCCESS":
await _update_stage_percent(job_id, "surface_validate", 100)
await _append_stage_log(
job_id, "surface_validate",
f"\n[DONE] Wipe job {tn_job_id} completed successfully.\n"
)
# Post-wipe SMART check — catch any sectors that failed under write stress
if ssh_client.is_configured() and drive_id is not None:
await _append_stage_log(
job_id, "surface_validate",
"[CHECK] Running post-wipe SMART attribute check...\n"
)
try:
attrs = await ssh_client.get_smart_attributes(devname)
await _store_smart_attrs(drive_id, attrs)
if attrs["failures"]:
error = "Post-wipe SMART check: " + "; ".join(attrs["failures"])
await _set_stage_error(job_id, "surface_validate", error)
return False
if attrs["warnings"]:
await _append_stage_log(
job_id, "surface_validate",
"[WARNING] " + "; ".join(attrs["warnings"]) + "\n"
)
await _append_stage_log(
job_id, "surface_validate",
f"[CHECK] SMART health: {attrs['health']} — no critical attributes.\n"
)
except Exception as exc:
log.warning("Post-wipe SMART check failed: %s", exc)
await _append_stage_log(
job_id, "surface_validate",
f"[WARN] Post-wipe SMART check failed (non-fatal): {exc}\n"
)
return True
elif state in ("FAILED", "ABORTED", "ERROR"):
error_msg = job.get("error") or f"Disk wipe failed (state={state})"
await _set_stage_error(
job_id, "surface_validate",
f"TrueNAS disk.wipe FAILED: {error_msg}"
)
return False
# RUNNING or WAITING — keep polling
async def _stage_timed_simulate(job_id: int, stage_name: str, duration_seconds: int) -> bool:
"""Simulate a timed stage with progress updates (mock / dev mode)."""
start = time.monotonic()
while True:
if await _is_cancelled(job_id):
return False
elapsed = time.monotonic() - start
pct = min(100, int(elapsed / duration_seconds * 100))
await _update_stage_percent(job_id, stage_name, pct)
await _recalculate_progress(job_id, None)
_push_update()
if pct >= 100:
return True
await asyncio.sleep(POLL_INTERVAL)
async def _stage_final_check(job_id: int, devname: str, drive_id: int | None = None) -> bool:
"""
Verify drive passed all tests.
SSH mode: run smartctl -a and check critical attributes.
Mock mode: check SMART health field in DB.
A transient SSH connectivity failure here must NOT invalidate a prior
multi-day surface_validate. Retry SSH-only failures, then soft-pass.
"""
await asyncio.sleep(1)
from app import ssh_client
def _ssh_only(failures: list[str]) -> bool:
return bool(failures) and all(f.startswith("SSH error:") for f in failures)
if ssh_client.is_configured() and drive_id is not None:
try:
attrs = await ssh_client.get_smart_attributes(devname)
for attempt in range(2):
if not _ssh_only(attrs.get("failures") or []):
break
log.warning(
"final_check SSH unreachable (attempt %d/3); retrying in 30s",
attempt + 1,
extra={"job_id": job_id, "devname": devname},
)
await asyncio.sleep(30)
attrs = await ssh_client.get_smart_attributes(devname)
failures = attrs.get("failures") or []
if _ssh_only(failures):
log.warning(
"final_check soft-pass: SSH unreachable after retries; prior stages stand",
extra={"job_id": job_id, "devname": devname, "ssh_error": failures},
)
return True
await _store_smart_attrs(drive_id, attrs)
if attrs["health"] == "FAILED" or failures:
msg = failures or [f"SMART health: {attrs['health']}"]
await _set_stage_error(job_id, "final_check",
"Final check failed: " + "; ".join(msg))
return False
return True
except Exception as exc:
log.warning("SSH final_check raised, falling back to DB check: %s", exc)
# DB check (mock mode fallback)
async with _db() as db:
cur = await db.execute(
"SELECT smart_health FROM drives WHERE devname=?", (devname,)
)
row = await cur.fetchone()
if not row or row[0] == "FAILED":
await _set_stage_error(job_id, "final_check", "Drive SMART health is FAILED after burn-in")
return False
return True

209
app/burnin/unlock.py Normal file
View file

@ -0,0 +1,209 @@
"""Pool-drive unlock state.
Drives that ZFS reports as belonging to an active zpool (including the
boot pool), drives carrying ZFS labels from a previously-imported pool
("exported"), and drives with a non-ZFS mount somewhere ("mounted") are
all locked from burn-in until the operator explicitly unlocks them via
``POST /api/v1/drives/{id}/unlock``. Grants live in memory only a
container restart wipes them, which is the right default for "this is
very dangerous." TTL is bounded so an unlock you forgot about can't sit
armed indefinitely.
Each lock kind has its own confirm token to make the override
deliberate; see grant_pool_unlock for the matching logic.
Public surface:
is_unlocked(drive_id, current_pool_name, current_pool_role) -> bool
unlock_expiry(drive_id, current_pool_name, current_pool_role) -> float|None
grant_pool_unlock(drive_id, confirm_token, operator, reason) -> float
PoolMemberError raised by start_job
UNLOCK_TTL_SECONDS for the unlock endpoint response
BOOT_POOL_NAME / *_TOKEN consts for the UI / audit
"""
from __future__ import annotations
import logging
import time as _time
from dataclasses import dataclass
import aiosqlite
from app.config import settings
log = logging.getLogger(__name__)
UNLOCK_TTL_SECONDS = 600 # 10 minutes
BOOT_POOL_NAME = "boot-pool"
BOOT_POOL_CONFIRM_TOKEN = "DESTROY BOOT POOL"
EXPORTED_POOL_ROLE = "exported"
EXPORTED_CONFIRM_TOKEN = "DESTROY EXPORTED POOL"
MOUNTED_ROLE = "mounted"
MOUNTED_CONFIRM_TOKEN = "DESTROY MOUNTED FILESYSTEM"
@dataclass
class _UnlockGrant:
"""An operator-issued, time-bounded permission to burn-in a pool drive.
The grant is BOUND to the (pool_name, pool_role) observed at unlock
time. If a subsequent poll reclassifies the drive e.g. it was
"(exported)" when unlocked but is now in active pool "tank", or it
used to be a cache vdev and now shows as data the grant is
invalidated. Otherwise the operator's "I confirm this exported drive
is decommissioned" judgement would silently authorise destruction
of a live pool.
"""
expiry: float
pool_name: str
pool_role: str | None
_unlock_grants: dict[int, _UnlockGrant] = {}
class PoolMemberError(Exception):
"""Raised by start_job when a drive is in a zpool and not unlocked."""
def __init__(self, drive_id: int, pool_name: str, pool_role: str | None):
self.drive_id = drive_id
self.pool_name = pool_name
self.pool_role = pool_role
is_boot = pool_name == BOOT_POOL_NAME
super().__init__(
f"Drive is part of {'BOOT POOL' if is_boot else 'pool'} "
f"'{pool_name}'{' (' + pool_role + ')' if pool_role else ''}. "
f"Unlock required before burn-in."
)
def is_unlocked(drive_id: int, current_pool_name: str | None,
current_pool_role: str | None) -> bool:
"""True iff a non-expired grant exists AND the drive's pool identity
matches what was observed at unlock time."""
grant = _unlock_grants.get(drive_id)
if grant is None:
return False
if _time.time() >= grant.expiry:
_unlock_grants.pop(drive_id, None)
return False
if grant.pool_name != current_pool_name or grant.pool_role != current_pool_role:
# Pool identity changed since unlock — drive may now belong to a
# different (or live) pool. Invalidate the grant; operator must
# re-unlock with eyes-open against the current state.
_unlock_grants.pop(drive_id, None)
log.warning(
"Invalidating unlock grant for drive_id=%d: pool changed from "
"(%s, %s) to (%s, %s)",
drive_id, grant.pool_name, grant.pool_role,
current_pool_name, current_pool_role,
)
return False
return True
def unlock_expiry(drive_id: int, current_pool_name: str | None,
current_pool_role: str | None) -> float | None:
"""Return the absolute expiry of an active grant, or None.
Same identity-binding semantics as is_unlocked: a grant whose stored
pool identity no longer matches the current row is treated as expired
and reaped. This is what the dashboard reads to decide whether to show
the unlocked-Burn-In affordance vs the locked-Unlock affordance.
"""
grant = _unlock_grants.get(drive_id)
if grant is None:
return None
if _time.time() >= grant.expiry:
_unlock_grants.pop(drive_id, None)
return None
if grant.pool_name != current_pool_name or grant.pool_role != current_pool_role:
_unlock_grants.pop(drive_id, None)
return None
return grant.expiry
def invalidate_grant(drive_id: int) -> None:
"""Drop a grant unconditionally — used by start_job when a fresh
SSH-side pool check shows the drive's identity has shifted."""
_unlock_grants.pop(drive_id, None)
async def grant_pool_unlock(drive_id: int, confirm_token: str,
operator: str, reason: str) -> float:
"""Validate confirmation token + reason and grant a time-limited unlock.
Raises ValueError on bad confirm_token, missing reason, or drive not
actually in a pool. Returns the unix expiry timestamp on success.
"""
if not reason or len(reason.strip()) < 5:
raise ValueError("A reason of at least 5 characters is required.")
if not operator or not operator.strip():
raise ValueError("Operator name is required.")
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA busy_timeout=10000")
cur = await db.execute(
"SELECT pool_name, pool_role, devname FROM drives WHERE id=?",
(drive_id,),
)
row = await cur.fetchone()
if not row:
raise ValueError("Drive not found.")
pool_name = row["pool_name"]
pool_role = row["pool_role"]
if not pool_name:
raise ValueError(
"This drive is not part of any pool — no unlock needed."
)
# Boot-pool / exported / mounted-fs all get dedicated, harder-to-
# fat-finger tokens. Active data pools just need their pool name
# typed.
if pool_name == BOOT_POOL_NAME:
expected = BOOT_POOL_CONFIRM_TOKEN
elif pool_role == EXPORTED_POOL_ROLE:
expected = EXPORTED_CONFIRM_TOKEN
elif pool_role == MOUNTED_ROLE:
expected = MOUNTED_CONFIRM_TOKEN
else:
expected = pool_name
if (confirm_token or "").strip() != expected:
raise ValueError("Confirmation token does not match.")
if pool_name == BOOT_POOL_NAME:
evt = "boot_pool_drive_unlocked"
elif pool_role == EXPORTED_POOL_ROLE:
evt = "exported_pool_drive_unlocked"
elif pool_role == MOUNTED_ROLE:
evt = "mounted_drive_unlocked"
else:
evt = "pool_drive_unlocked"
await db.execute(
"""INSERT INTO audit_events
(event_type, drive_id, burnin_job_id, operator, message)
VALUES (?,?,?,?,?)""",
(evt, drive_id, None, operator.strip(),
f"Unlocked {pool_name} drive {row['devname']} for burn-in: {reason.strip()}"),
)
await db.commit()
# Arm the in-memory grant ONLY after the audit row is durable. If the
# commit above raises, we exit without writing _unlock_grants — no
# unaudited active unlocks. The grant is bound to the (pool_name,
# pool_role) we observed under the open transaction so a later poll
# that reclassifies the drive invalidates it (see is_unlocked).
expiry = _time.time() + UNLOCK_TTL_SECONDS
_unlock_grants[drive_id] = _UnlockGrant(
expiry=expiry,
pool_name=pool_name,
pool_role=pool_role,
)
log.warning(
"Pool-drive unlock granted: drive_id=%d pool=%s role=%s "
"operator=%s reason=%r",
drive_id, pool_name, pool_role, operator, reason,
)
return expiry

119
app/config.py Normal file
View file

@ -0,0 +1,119 @@
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
)
app_host: str = "0.0.0.0" # nosec B104 — container deliberately binds all interfaces; nginx-proxy-manager fronts it.
app_port: int = 8080
db_path: str = "/data/app.db"
truenas_base_url: str = "http://localhost:8000"
truenas_api_key: str = "mock-key"
truenas_verify_tls: bool = False
poll_interval_seconds: int = 12
stale_threshold_seconds: int = 45
max_parallel_burnins: int = 2
surface_validate_seconds: int = 45 # mock simulation duration
io_validate_seconds: int = 25 # mock simulation duration
# Logging
log_level: str = "INFO"
# Security — comma-separated IPs or CIDRs, e.g. "10.0.0.0/24,127.0.0.1"
# Empty string means allow all (default).
allowed_ips: str = ""
# SMTP — daily status email at 8am local time
# Leave smtp_host empty to disable email.
smtp_host: str = ""
smtp_port: int = 587
smtp_user: str = ""
smtp_password: str = ""
smtp_from: str = ""
smtp_to: str = "" # comma-separated recipients
smtp_report_hour: int = 8 # local hour to send (0-23)
smtp_daily_report_enabled: bool = True # set False to skip daily report without disabling alerts
smtp_alert_on_fail: bool = True # immediate email when a job fails
smtp_alert_on_pass: bool = False # immediate email when a job passes
smtp_ssl_mode: str = "starttls" # "starttls" | "ssl" | "plain"
smtp_timeout: int = 60 # connection + read timeout in seconds
# Webhook — POST JSON payload on every job state change (pass/fail)
# Leave empty to disable. Works with Slack, Discord, ntfy, n8n, etc.
webhook_url: str = ""
# Stuck-job detection: jobs running longer than this are marked 'unknown'
# and the remote badblocks/smartctl is killed. 168h (7 days) covers a
# full -w surface_validate on a 14 TB+ HDD with margin. Older default
# was 24h which false-positived on multi-TB drives almost every time.
stuck_job_hours: int = 168
# Temperature thresholds (°C) — drives table colouring + precheck gate
temp_warn_c: int = 46 # orange warning
temp_crit_c: int = 55 # red critical (precheck refuses to start above this)
# Bad-block tolerance — surface_validate fails if bad blocks exceed this
bad_block_threshold: int = 0
# Surface-validate (badblocks) tunables — defaults match the Spearfoot
# disk-burnin.sh community script's recommended geometry for large HDDs.
# block_size : -b in bytes; aligned to AF (4 KiB) sectors. Bumping
# to 8192 roughly halves badblocks runtime on multi-TB
# drives at the cost of ~2x RAM in the test buffer.
# block_buffer : -c blocks held in memory per IO. 64 = badblocks
# default. Higher values = larger buffer, faster IO,
# more RAM (block_size * block_buffer bytes per pass).
# passes : -p value. 1 = repeat until one consecutive clean
# scan (current behavior). 2-3 for paranoid burn-in
# that re-confirms after finding errors.
surface_validate_block_size: int = 4096
surface_validate_block_buffer: int = 64
surface_validate_passes: int = 1
# SSH credentials for direct TrueNAS command execution (Stage 7)
# When ssh_host is set, burn-in stages use SSH for smartctl/badblocks instead of REST API.
# Leave ssh_host empty to use the mock/REST API (development mode).
ssh_host: str = ""
ssh_port: int = 22
ssh_user: str = "root" # TrueNAS CORE default is root
ssh_password: str = "" # Password auth (leave blank if using key)
ssh_key: str = "" # PEM private key content (paste full key including headers)
# Application version — used by the /api/v1/updates/check endpoint
app_version: str = "1.0.0-60"
# ---- Authentication (1.0.0-22) ----
# session_secret: HMAC key for signing session cookies. Empty = generate
# one and persist to /data/session_secret on first run (sessions survive
# restarts but rotate if the file is deleted). Set explicitly via
# SESSION_SECRET env var if you want to share secrets across replicas.
session_secret: str = ""
session_max_age_seconds: int = 60 * 60 * 24 * 7 # 7 days
# Set to True when the dashboard is exclusively reachable over HTTPS
# (typical when fronted by nginx-proxy-manager with TLS). Refuses to
# send the session cookie on plain HTTP, eliminating the on-the-wire
# exposure surface. Leaving False allows initial deploy + LAN testing.
session_cookie_secure: bool = False
# Initial admin bootstrap. If both env vars are set AND the users table
# is empty at startup, create that account immediately. After that the
# env vars are ignored — change passwords via the UI / database, not
# by editing compose.yml.
initial_admin_username: str = ""
initial_admin_password: str = ""
# ---- Retention + backup (1.0.0-23) ----
# log_days : burnin_stages.log_text NULLed out after this many days
# (history rows themselves are preserved). Default keeps
# ~5 weeks; long-soak burn-ins typically finish in <2.
# backup_keep: number of nightly DB snapshots to keep in /data/backups.
retention_log_days: int = 35
retention_backup_keep: int = 14
settings = Settings()

View file

@ -89,6 +89,49 @@ _MIGRATIONS = [
"ALTER TABLE smart_tests ADD COLUMN raw_output TEXT",
# Stage 8: track last reset time so dashboard burn-in col clears after reset
"ALTER TABLE drives ADD COLUMN last_reset_at TEXT",
# 1.0.0-15: pool-membership lock
"ALTER TABLE drives ADD COLUMN pool_name TEXT",
"ALTER TABLE drives ADD COLUMN pool_role TEXT",
"ALTER TABLE drives ADD COLUMN pool_seen_at TEXT",
# 1.0.0-44: per-pattern badblocks progress for the drive drawer's
# 4-meter UI. bb_phase is 1-8 (1=write 0xaa, 2=verify 0xaa, 3=write
# 0x55, 4=verify 0x55, 5=write 0xff, 6=verify 0xff, 7=write 0x00,
# 8=verify 0x00). bb_phase_pct is 0-100 within the current phase.
"ALTER TABLE burnin_stages ADD COLUMN bb_phase INTEGER",
"ALTER TABLE burnin_stages ADD COLUMN bb_phase_pct REAL",
# 1.0.0-46: live write/read throughput for the per-pattern meters.
# Computed from successive `XX% done` lines in badblocks output:
# delta_bytes = (overall_pct_delta / 800) * drive_size_bytes.
# Updated on every progress line; NULL until the second progress
# line arrives (need two samples to compute a rate).
"ALTER TABLE burnin_stages ADD COLUMN bb_mbps REAL",
# 1.0.0-47: per-pattern duration history. JSON map of
# {"1": "2026-05-09T05:39:44+00:00", "2": ..., ...} where each key
# is the phase number (1-8) and the value is when the parser first
# observed that phase. Drawer derives "0xaa: 14h 22m" by diffing
# consecutive phase-1 keys.
"ALTER TABLE burnin_stages ADD COLUMN bb_phase_history TEXT",
# 1.0.0-19: enforce one active burn-in per drive at the storage layer.
# Closes the read-then-insert race in burnin.start_job — without this,
# two concurrent /api/v1/burnin/start requests for the same drive could
# both observe zero active jobs and both insert queued rows.
"""CREATE UNIQUE INDEX IF NOT EXISTS uniq_active_burnin_per_drive
ON burnin_jobs (drive_id) WHERE state IN ('queued', 'running')""",
# 1.0.0-22: app-level login (username + bcrypt password)
"""CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT UNIQUE NOT NULL,
password_hash TEXT NOT NULL,
full_name TEXT,
is_admin INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL,
last_login_at TEXT
)""",
# 1.0.0-28: case-insensitive uniqueness. The base UNIQUE on username
# is case-sensitive but login does NOCASE — without this index two
# users `Admin` and `admin` could coexist and shadow each other.
"""CREATE UNIQUE INDEX IF NOT EXISTS uniq_users_username_nocase
ON users (username COLLATE NOCASE)""",
]
@ -133,6 +176,7 @@ async def init_db() -> None:
Path(settings.db_path).parent.mkdir(parents=True, exist_ok=True)
async with aiosqlite.connect(settings.db_path) as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
await db.execute("PRAGMA foreign_keys=ON")
await db.executescript(SCHEMA)
await _run_migrations(db)
@ -144,6 +188,7 @@ async def get_db():
db.row_factory = aiosqlite.Row
try:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
await db.execute("PRAGMA foreign_keys=ON")
yield db
finally:

View file

@ -5,6 +5,7 @@ Disabled when SMTP_HOST is not set.
"""
import asyncio
import html
import logging
import smtplib
import ssl
@ -109,17 +110,63 @@ def _drive_rows_html(drives: list[dict]) -> str:
return "\n".join(rows)
def _build_html(drives: list[dict], generated_at: str) -> str:
def _build_unlock_banner_html(events: list[dict]) -> str:
"""Banner listing every pool-drive unlock granted in the last 24h.
Every interpolated DB field is run through html.escape operator and
reason are free-text from the unlock modal and otherwise inject into
the email body verbatim.
"""
if not events:
return ""
rows = []
for e in events:
evt = e.get("event_type") or ""
is_boot = evt == "boot_pool_drive_unlocked"
is_exported = evt == "exported_pool_drive_unlocked"
is_mounted = evt == "mounted_drive_unlocked"
kind = (
"BOOT POOL" if is_boot
else "EXPORTED ZFS" if is_exported
else "MOUNTED FILESYSTEM" if is_mounted
else "pool"
)
when = html.escape((e.get("created_at") or "")[:19])
operator = html.escape(e.get("operator") or "?")
devname = html.escape(e.get("devname") or "?")
# `message` already includes pool name, devname, and the operator's
# reason — surface it verbatim so the audit trail is faithful.
message = html.escape(e.get("message") or "")
rows.append(
f"<li style='margin:4px 0'><strong>{when}</strong> &middot; "
f"<strong>{operator}</strong> unlocked a {kind} drive "
f"({devname}): "
f"<span style='color:#c9d1d9'>{message}</span></li>"
)
return f"""
<div style="background:#4b1113;border:1px solid #f85149;border-radius:6px;
padding:14px 18px;margin-bottom:20px;color:#f85149">
<div style="font-weight:600;font-size:14px;margin-bottom:6px">
&#x26A0; {len(events)} pool-drive unlock(s) in the last 24h
</div>
<ul style="margin:0;padding-left:18px;font-size:12.5px;color:#f0a0a0">
{''.join(rows)}
</ul>
</div>"""
def _build_html(drives: list[dict], generated_at: str,
unlock_events: list[dict] | None = None) -> str:
total = len(drives)
failed_drives = [d for d in drives if d.get("smart_health") == "FAILED"]
running_burnin = [d for d in drives if (d.get("burnin") or {}).get("state") == "running"]
passed_burnin = [d for d in drives if (d.get("burnin") or {}).get("state") == "passed"]
# Alert banner
alert_html = ""
# Alert banners (unlock events first — the audit-grade signal)
alert_html = _build_unlock_banner_html(unlock_events or [])
if failed_drives:
names = ", ".join(d["devname"] for d in failed_drives)
alert_html = f"""
alert_html += f"""
<div style="background:#4b1113;border:1px solid #f85149;border-radius:6px;padding:14px 18px;margin-bottom:20px;color:#f85149;font-weight:500">
SMART health FAILED on {len(failed_drives)} drive(s): {names}
</div>"""
@ -131,7 +178,7 @@ def _build_html(drives: list[dict], generated_at: str) -> str:
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>TrueNAS Burn-In Daily Report</title>
<title>NAS Burn-In Daily Report</title>
</head>
<body style="margin:0;padding:0;background:#0d1117;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',system-ui,sans-serif;font-size:14px;color:#c9d1d9">
<table width="100%" cellpadding="0" cellspacing="0" style="background:#0d1117;min-height:100vh">
@ -144,7 +191,7 @@ def _build_html(drives: list[dict], generated_at: str) -> str:
<td style="background:#161b22;border:1px solid #30363d;border-radius:10px 10px 0 0;padding:20px 24px;border-bottom:none">
<table width="100%" cellpadding="0" cellspacing="0">
<tr>
<td><span style="font-size:18px;font-weight:700;color:#f0f6fc">TrueNAS Burn-In</span>
<td><span style="font-size:18px;font-weight:700;color:#f0f6fc">NAS Burn-In</span>
<span style="color:#8b949e;font-size:13px;margin-left:10px">Daily Status Report</span></td>
<td align="right" style="color:#8b949e;font-size:12px">{generated_at}</td>
</tr>
@ -214,7 +261,7 @@ def _build_html(drives: list[dict], generated_at: str) -> str:
<!-- Footer -->
<tr>
<td style="background:#161b22;border:1px solid #30363d;border-top:none;border-radius:0 0 10px 10px;padding:14px 24px;text-align:center">
<span style="font-size:12px;color:#8b949e">Generated by TrueNAS Burn-In Dashboard · {generated_at}</span>
<span style="font-size:12px;color:#8b949e">Generated by NAS Burn-In Dashboard · {generated_at}</span>
</td>
</tr>
@ -256,6 +303,9 @@ def _send_email(subject: str, html: str) -> None:
timeout = int(settings.smtp_timeout or 60)
port = _smtp_port()
# SMTP / SMTP_SSL share a parent class but mypy can't unify them
# without an explicit Union annotation on the binding.
server: smtplib.SMTP
if mode == "ssl":
server = smtplib.SMTP_SSL(settings.smtp_host, port, context=ctx, timeout=timeout)
server.ehlo()
@ -284,9 +334,42 @@ async def _fetch_report_data() -> list[dict]:
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
return await _fetch_drives_for_template(db)
async def _fetch_unlock_events_24h() -> list[dict]:
"""Return pool-drive unlock audit events from the last 24 hours.
These are operator overrides of the pool-membership lock every entry
represents a deliberate decision to risk a pool, so the daily report
surfaces them as an audit-grade banner.
"""
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
# julianday() handles the 'YYYY-MM-DDTHH:MM:SS.fff+00:00' format
# we write from Python; comparing the raw string against
# datetime('now','-1 day') (which formats as 'YYYY-MM-DD HH:MM:SS')
# produces subtle off-by-up-to-a-day errors because of the
# 'T' vs ' ' separator and the '+00:00' suffix.
cur = await db.execute("""
SELECT ae.event_type, ae.operator, ae.message, ae.created_at,
d.devname, d.pool_name, d.pool_role
FROM audit_events ae
LEFT JOIN drives d ON d.id = ae.drive_id
WHERE ae.event_type IN (
'pool_drive_unlocked',
'boot_pool_drive_unlocked',
'exported_pool_drive_unlocked',
'mounted_drive_unlocked')
AND julianday(ae.created_at) >= julianday('now', '-1 day')
ORDER BY ae.created_at DESC
""")
return [dict(r) for r in await cur.fetchall()]
# ---------------------------------------------------------------------------
# Scheduler
# ---------------------------------------------------------------------------
@ -387,6 +470,7 @@ async def test_smtp_connection() -> dict:
timeout = int(settings.smtp_timeout or 60)
port = _smtp_port()
server: smtplib.SMTP
if mode == "ssl":
server = smtplib.SMTP_SSL(settings.smtp_host, port,
context=ctx, timeout=timeout)
@ -411,9 +495,16 @@ async def test_smtp_connection() -> dict:
async def send_report_now() -> None:
"""Send a report immediately (used by on-demand API endpoint)."""
drives = await _fetch_report_data()
unlock_events = await _fetch_unlock_events_24h()
now_str = datetime.now().strftime("%Y-%m-%d %H:%M")
html = _build_html(drives, now_str)
subject = f"Burn-In Report — {datetime.now().strftime('%Y-%m-%d')} ({len(drives)} drives)"
html = _build_html(drives, now_str, unlock_events)
suffix = ""
if unlock_events:
suffix = f"{len(unlock_events)} pool unlock(s)"
subject = (
f"Burn-In Report — {datetime.now().strftime('%Y-%m-%d')} "
f"({len(drives)} drives){suffix}"
)
await asyncio.to_thread(_send_email, subject, html)

250
app/main.py Normal file
View file

@ -0,0 +1,250 @@
import asyncio
import ipaddress
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.middleware.sessions import SessionMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse, PlainTextResponse
from app import auth, burnin, mailer, poller, retention, settings_store
from app.config import settings
from app.database import init_db
from app.logging_config import configure as configure_logging
from app.renderer import templates # noqa: F401 — registers filters as side-effect
from app.routes import router
from app.truenas import TrueNASClient
# Configure structured JSON logging before anything else logs
configure_logging()
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# IP allowlist middleware
# ---------------------------------------------------------------------------
class _IPAllowlistMiddleware(BaseHTTPMiddleware):
"""
Block requests from IPs not in ALLOWED_IPS.
When ALLOWED_IPS is empty the middleware is a no-op.
Checks X-Forwarded-For first (trusts the leftmost address), then the
direct client IP.
"""
def __init__(self, app, allowed_ips: str) -> None:
super().__init__(app)
self._networks: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = []
for entry in (s.strip() for s in allowed_ips.split(",") if s.strip()):
try:
self._networks.append(ipaddress.ip_network(entry, strict=False))
except ValueError:
log.warning("Invalid ALLOWED_IPS entry ignored: %r", entry)
def _is_allowed(self, ip_str: str) -> bool:
try:
addr = ipaddress.ip_address(ip_str)
return any(addr in net for net in self._networks)
except ValueError:
return False
async def dispatch(self, request: Request, call_next):
if not self._networks:
return await call_next(request)
# Prefer X-Forwarded-For (leftmost = original client)
forwarded = request.headers.get("X-Forwarded-For", "").split(",")[0].strip()
client_ip = forwarded or (request.client.host if request.client else "")
if self._is_allowed(client_ip):
return await call_next(request)
log.warning("Request blocked by IP allowlist", extra={"client_ip": client_ip})
return PlainTextResponse("Forbidden", status_code=403)
# ---------------------------------------------------------------------------
# Poller supervisor — restarts run() if it ever exits unexpectedly
# ---------------------------------------------------------------------------
async def _supervised_poller(client: TrueNASClient) -> None:
while True:
try:
await poller.run(client)
except asyncio.CancelledError:
raise # Propagate shutdown signal cleanly
except Exception as exc:
log.critical("Poller crashed unexpectedly — restarting in 5s: %s", exc)
await asyncio.sleep(5)
# ---------------------------------------------------------------------------
# Lifespan
# ---------------------------------------------------------------------------
_client: TrueNASClient | None = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global _client
log.info("Starting up")
await init_db()
settings_store.init()
await auth.bootstrap_admin_if_empty()
_client = TrueNASClient()
await burnin.init(_client)
poll_task = asyncio.create_task(_supervised_poller(_client))
mailer_task = asyncio.create_task(mailer.run())
retention_task = asyncio.create_task(retention.run())
yield
log.info("Shutting down")
poll_task.cancel()
mailer_task.cancel()
retention_task.cancel()
try:
await asyncio.gather(poll_task, mailer_task, retention_task,
return_exceptions=True)
except asyncio.CancelledError:
pass
await _client.close()
# ---------------------------------------------------------------------------
# App
# ---------------------------------------------------------------------------
app = FastAPI(title="NAS Burn-In Dashboard", lifespan=lifespan)
# ---------------------------------------------------------------------------
# Defense-in-depth security headers
# ---------------------------------------------------------------------------
# CSP allows the CDNs we actively load:
# unpkg.com — htmx + htmx-sse-extension
# cdnjs.cloudflare.com — qrcodejs (history print page)
# cdn.jsdelivr.net — xterm.js (terminal tab, lazy-loaded)
# 'unsafe-inline' is needed for inline <script> in settings.html and
# inline <style> in job_print.html. Tighten via nonces later if you
# care about CSP-level XSS hardening; for now relies on Jinja2's
# autoescape + html.escape on all user-controlled fields.
_CSP = " ".join([
"default-src 'self';",
"script-src 'self' 'unsafe-inline' https://unpkg.com https://cdnjs.cloudflare.com https://cdn.jsdelivr.net;",
"style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net;",
"img-src 'self' data:;",
"font-src 'self' data:;",
"connect-src 'self' ws: wss:;",
"object-src 'none';",
"base-uri 'self';",
"form-action 'self';",
"frame-ancestors 'none';",
])
class _SecurityHeadersMiddleware(BaseHTTPMiddleware):
"""Sets security headers that are cheap, effective, and never break
the page if you stick to same-origin. CSP is the meaningful one;
the others close small XSS / clickjacking / referrer-leak surfaces."""
async def dispatch(self, request: Request, call_next):
response = await call_next(request)
response.headers.setdefault("Content-Security-Policy", _CSP)
response.headers.setdefault("X-Content-Type-Options", "nosniff")
response.headers.setdefault("Referrer-Policy", "same-origin")
response.headers.setdefault("X-Frame-Options", "DENY")
# Permissions-Policy disables every feature we don't use. The
# empty allowlist syntax `()` = block for all origins.
response.headers.setdefault(
"Permissions-Policy",
"camera=(), microphone=(), geolocation=(), interest-cohort=()",
)
return response
# ---------------------------------------------------------------------------
# Auth gate — must be added BEFORE include_router so it runs first.
# Path-prefix allowlist below covers anything we want reachable without
# a session cookie. SSE streams + WebSockets fall through to the dependency
# in their handler so they 401 cleanly.
# ---------------------------------------------------------------------------
_PUBLIC_PATHS = {"/login", "/logout", "/health", "/auth/setup"}
_PUBLIC_PREFIXES = ("/static/", "/api/v1/auth/")
class _AuthGateMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
path = request.url.path
# Always populate request.state.current_user from the session so
# templates and route handlers can both rely on it. None when
# unauthenticated.
user_id = request.session.get("user_id")
request.state.current_user = (
await auth.get_user_by_id(int(user_id)) if user_id else None
)
# Loopback bypass (1.0.0-56): requests from 127.0.0.1 / ::1
# inside the container skip the auth gate. The only way to hit
# that source IP is a process in the container's network
# namespace — `docker exec` from the host. External traffic
# comes through the docker bridge with a non-loopback source,
# so it still goes through full auth. We read request.client.host
# directly (raw TCP socket), NOT X-Forwarded-For, so external
# attackers can't spoof loopback via headers. This unlocks the
# autonomous monitor's ability to POST /api/v1/burnin/start
# without provisioning a session cookie.
if request.client and request.client.host in ("127.0.0.1", "::1"):
if request.state.current_user is None:
request.state.current_user = auth.LoopbackUser()
return await call_next(request)
if path in _PUBLIC_PATHS or path.startswith(_PUBLIC_PREFIXES):
return await call_next(request)
if request.state.current_user is not None:
return await call_next(request)
# Unauthenticated. HTML GETs bounce to /login with a `next` query
# arg so the user lands back where they tried to go after logging
# in. Anything else (API calls, SSE, POSTs) gets a 401.
accept = request.headers.get("accept", "")
if request.method == "GET" and "text/html" in accept:
return auth.login_redirect(path)
return JSONResponse(
{"detail": "Authentication required"}, status_code=401
)
app.add_middleware(_SecurityHeadersMiddleware)
app.add_middleware(_AuthGateMiddleware)
# SessionMiddleware must be added LAST (it wraps innermost so request.session
# is populated before AuthGate runs).
app.add_middleware(
SessionMiddleware,
secret_key=auth.get_session_secret(),
session_cookie="burnin_session",
max_age=settings.session_max_age_seconds,
# session_cookie_secure flips the cookie's Secure flag. Set to True
# in production behind HTTPS (nginx-proxy-manager) so the auth cookie
# is never sent on plain HTTP.
https_only=settings.session_cookie_secure,
# SameSite=strict is the primary CSRF mitigation: the browser never
# sends the session cookie on cross-site requests, so an attacker
# page can't trigger any state-changing endpoint even if it knows
# the URL. Trade-off: an external link (email, chat) into the app
# won't carry the session — user has to re-auth via /login. For an
# internal-only tool that's the right default.
same_site="strict",
)
if settings.allowed_ips:
app.add_middleware(_IPAllowlistMiddleware, allowed_ips=settings.allowed_ips)
log.info("IP allowlist active: %s", settings.allowed_ips)
app.mount("/static", StaticFiles(directory="app/static"), name="static")
app.include_router(router)

View file

@ -97,8 +97,17 @@ class DriveResponse(BaseModel):
smart_long: SmartTestState
notes: str | None = None
location: str | None = None
pool_name: str | None = None
pool_role: str | None = None
pool_unlocked_until: float | None = None # unix epoch; null = locked
class UpdateDriveRequest(BaseModel):
notes: str | None = None
location: str | None = None
class UnlockPoolDriveRequest(BaseModel):
confirm_token: str
operator: str
reason: str

View file

@ -89,19 +89,66 @@ def _map_history_state(status: str) -> str:
# DB helpers
# ---------------------------------------------------------------------------
async def _upsert_drive(db: aiosqlite.Connection, disk: dict, now: str) -> int:
await db.execute(
async def _upsert_drive(db: aiosqlite.Connection, disk: dict, now: str,
pool_info: dict | None = None,
update_pool: bool = True) -> int:
"""Insert/update a drive row.
pool_info: {"pool": str, "role": str} if this drive is currently in a
zpool, else None. None values clear pool columns so a removed-from-pool
drive doesn't stay locked.
update_pool: when False, pool columns are preserved on conflict and
initialised to NULL on insert. Callers pass False on detection failure
so a transient SSH outage doesn't silently unlock every drive.
"""
pool_name = pool_info["pool"] if pool_info else None
pool_role = pool_info["role"] if pool_info else None
pool_seen_at = now if pool_info else None
if update_pool:
update_clause = """
devname = excluded.devname,
serial = excluded.serial,
model = excluded.model,
size_bytes = excluded.size_bytes,
temperature_c = excluded.temperature_c,
smart_health = excluded.smart_health,
last_seen_at = excluded.last_seen_at,
last_polled_at = excluded.last_polled_at,
pool_name = excluded.pool_name,
pool_role = excluded.pool_role,
pool_seen_at = excluded.pool_seen_at
"""
INSERT INTO drives
(truenas_disk_id, devname, serial, model, size_bytes,
temperature_c, smart_health, last_seen_at, last_polled_at)
VALUES (?,?,?,?,?,?,?,?,?)
ON CONFLICT(truenas_disk_id) DO UPDATE SET
else:
# Preserve pool_name / pool_role / pool_seen_at — detection failed
# this cycle, so we have no fresh data and must not overwrite.
update_clause = """
devname = excluded.devname,
serial = excluded.serial,
model = excluded.model,
size_bytes = excluded.size_bytes,
temperature_c = excluded.temperature_c,
smart_health = excluded.smart_health,
last_seen_at = excluded.last_seen_at,
last_polled_at = excluded.last_polled_at
""",
"""
# SQL is built by concatenation rather than f-string so bandit's B608
# heuristic (which fires on f-string SQL regardless of source) doesn't
# flag it. update_clause is one of two hardcoded literal strings
# selected above; never carries user input.
sql = (
"INSERT INTO drives "
"(truenas_disk_id, devname, serial, model, size_bytes, "
" temperature_c, smart_health, last_seen_at, last_polled_at, "
" pool_name, pool_role, pool_seen_at) "
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?) "
"ON CONFLICT(truenas_disk_id) DO UPDATE SET "
+ update_clause
)
await db.execute(
sql,
(
disk["identifier"],
disk["devname"],
@ -112,6 +159,9 @@ async def _upsert_drive(db: aiosqlite.Connection, disk: dict, now: str) -> int:
disk.get("smart_health", "UNKNOWN"),
now,
now,
pool_name,
pool_role,
pool_seen_at,
),
)
cur = await db.execute(
@ -292,6 +342,87 @@ async def poll_cycle(client: TrueNASClient) -> int:
if t is not None:
disk["temperature"] = int(round(t))
# SMART health — TrueNAS /api/v2.0/disk doesn't expose smart_health,
# so without this every drive defaults to UNKNOWN forever (only burn-in
# stages used to populate it). Run `smartctl -H` over a single SSH
# session for every drive every Nth cycle. Cache between cycles via
# _state so the dashboard always renders the most recent answer.
SMART_HEALTH_EVERY_N_CYCLES = 5 # ~1 minute at default 12s interval
_state.setdefault("smart_health_cache", {})
cycle_n = _state.get("cycle", 0) + 1
_state["cycle"] = cycle_n
try:
from app import ssh_client as _ssh
if _ssh.is_configured() and (cycle_n % SMART_HEALTH_EVERY_N_CYCLES == 1):
health_map = await _ssh.get_smart_health_map(
[d["devname"] for d in disks if d.get("devname")]
)
if health_map is not None:
_state["smart_health_cache"] = health_map
except Exception as exc:
log.warning("smart_health refresh failed: %s", exc)
health_cache = _state.get("smart_health_cache") or {}
for disk in disks:
devname = disk.get("devname", "")
h = health_cache.get(devname)
if h:
disk["smart_health"] = h
# Pool membership map — drives in any zpool are locked from burn-in.
# ssh_client returns None on failure (distinct from {} which means "no
# pools"). If EITHER detection call fails we fail-closed: leave
# pool_name / pool_role columns alone so previously-locked drives stay
# locked, and previously-unlocked drives stay unlocked, until detection
# recovers. Treating a transient SSH blip as "no pool members" would
# silently unlock every drive on the next poll.
# Each detection probe (pool / exported / mounted) succeeds or fails
# INDEPENDENTLY. Previously a single None blew away the whole map,
# so a fresh DB on a host where lsblk lacks zfs_member info but
# zpool works would never lock pool members. Now we apply each
# successful probe and only fail-closed for the ones that actually
# errored.
pool_map: dict = {}
pool_probe_ok = True # zpool list -vHP succeeded
zfs_probe_ok = True # lsblk zfs_member succeeded
mounted_probe_ok = True # findmnt succeeded
try:
from app import ssh_client as _ssh
if _ssh.is_configured():
pm = await _ssh.get_pool_membership()
zs = await _ssh.get_zfs_member_drives()
ms = await _ssh.get_mounted_drives()
pool_probe_ok = pm is not None
zfs_probe_ok = zs is not None
mounted_probe_ok = ms is not None
if pool_probe_ok:
pool_map.update(pm)
if zfs_probe_ok:
for devname in zs:
if devname not in pool_map:
pool_map[devname] = {"pool": "(exported)", "role": "exported"}
if mounted_probe_ok:
for devname in ms:
if devname not in pool_map:
pool_map[devname] = {"pool": "(mounted)", "role": "mounted"}
# SSH unconfigured (mock/dev mode) — all probes "succeed" with
# empty maps, so dev mode never artificially locks drives.
except Exception:
pool_probe_ok = zfs_probe_ok = mounted_probe_ok = False
pool_map = {}
# If ALL probes failed we have no fresh data at all — preserve the
# existing pool columns to keep locks honest. If at least one probe
# succeeded the new pool_map is a partial truth: we apply it and
# only refuse to clear locks coming from a probe that failed.
detection_ok = pool_probe_ok or zfs_probe_ok or mounted_probe_ok
if not (pool_probe_ok and zfs_probe_ok and mounted_probe_ok):
log.warning(
"Pool detection partial: pool=%s zfs=%s mounted=%s — preserving "
"stale lock state from any probe that failed.",
pool_probe_ok, zfs_probe_ok, mounted_probe_ok,
)
# Index running jobs by (devname, test_type)
active: dict[tuple[str, str], dict] = {}
for job in running_jobs:
@ -306,11 +437,16 @@ async def poll_cycle(client: TrueNASClient) -> int:
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
await db.execute("PRAGMA foreign_keys=ON")
for disk in disks:
devname = disk["devname"]
drive_id = await _upsert_drive(db, disk, now)
drive_id = await _upsert_drive(
db, disk, now,
pool_map.get(devname) if detection_ok else None,
update_pool=detection_ok,
)
for ttype in ("short", "long"):
if (devname, ttype) in active:
@ -357,6 +493,7 @@ async def run(client: TrueNASClient) -> None:
async with aiosqlite.connect(settings.db_path) as _tdb:
_tdb.row_factory = aiosqlite.Row
await _tdb.execute("PRAGMA journal_mode=WAL")
await _tdb.execute("PRAGMA busy_timeout=60000")
_cur = await _tdb.execute("""
SELECT MAX(d.temperature_c)
FROM drives d

168
app/retention.py Normal file
View file

@ -0,0 +1,168 @@
"""
Background retention + backup tasks.
* Stage-log pruning: each surface_validate burn-in stage can write tens of
MB of badblocks output to burnin_stages.log_text. Without retention the
DB grows unbounded we observed 447 MB on the live host after a few
weeks of use. Nightly job nulls log_text on stages older than
`retention_days`, then VACUUMs to reclaim pages.
* Automated DB backup: nightly `sqlite3 .backup` to `backups/app-YYYY-
MM-DD.db` inside the data dir. Retains the most recent
`backup_keep_count` files. Uses the online-backup API so the live DB
isn't locked.
Both tasks share a single hourly tick cheap and fits the existing
mailer-style background-loop pattern. Failures are logged but never
crash the supervisor.
"""
from __future__ import annotations
import asyncio
import logging
from datetime import datetime, timedelta, timezone
from pathlib import Path
import aiosqlite
from app.config import settings
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Stage-log pruning
# ---------------------------------------------------------------------------
async def prune_stage_logs(retention_days: int) -> int:
"""NULL out log_text on burnin_stages older than retention_days.
Returns the number of rows updated."""
cutoff = (datetime.now(timezone.utc) - timedelta(days=retention_days)).isoformat()
async with aiosqlite.connect(settings.db_path) as db:
cur = await db.execute(
"""UPDATE burnin_stages
SET log_text = NULL
WHERE log_text IS NOT NULL
AND finished_at IS NOT NULL
AND finished_at < ?""",
(cutoff,),
)
n = cur.rowcount or 0
await db.commit()
if n > 0:
log.info("Retention: pruned log_text on %d stage row(s) older than %d days",
n, retention_days)
return n
async def vacuum_db() -> None:
"""Reclaim pages freed by the prune. SQLite VACUUM rewrites the file
so it must run outside any transaction."""
async with aiosqlite.connect(settings.db_path, isolation_level=None) as db:
await db.execute("VACUUM")
log.info("Retention: VACUUM completed")
# ---------------------------------------------------------------------------
# Backup
# ---------------------------------------------------------------------------
def _backup_dir() -> Path:
return Path(settings.db_path).parent / "backups"
async def backup_db(keep_count: int) -> Path | None:
"""Online-backup the live DB to backups/app-YYYY-MM-DD.db. Returns
the new file's path. Old backups beyond keep_count are deleted.
Atomicity: writes to a sibling tmp file first and renames into the
canonical daily slot only after backup succeeds. An interrupted
backup leaves the tmp file (cleaned up on next run); the previous
day's snapshot stays intact. os.replace is atomic within the same
filesystem on POSIX.
"""
import os as _os
bdir = _backup_dir()
bdir.mkdir(parents=True, exist_ok=True)
today = datetime.now().strftime("%Y-%m-%d")
out = bdir / f"app-{today}.db"
tmp = bdir / f"app-{today}.db.tmp"
# Drop any leftover tmp from a previous interrupted run.
if tmp.exists():
try:
tmp.unlink()
except OSError:
pass
# aiosqlite.Connection.backup() is an async wrapper around
# sqlite3.Connection.backup — atomic online snapshot that doesn't
# block writers (it copies pages in batches and yields between).
async with aiosqlite.connect(settings.db_path) as src:
async with aiosqlite.connect(str(tmp)) as dst:
await src.backup(dst)
_os.replace(tmp, out)
log.info("Retention: DB backed up to %s (%d bytes)", out, out.stat().st_size)
# Keep the N most recent backups; delete older.
snapshots = sorted(bdir.glob("app-*.db"), key=lambda p: p.stat().st_mtime,
reverse=True)
for old in snapshots[keep_count:]:
try:
old.unlink()
log.info("Retention: removed old backup %s", old.name)
except OSError as exc:
log.warning("Retention: could not remove %s: %s", old, exc)
return out
# ---------------------------------------------------------------------------
# Scheduler — single hourly tick fires daily-grain work
# ---------------------------------------------------------------------------
_RUN_HOUR = 3 # 03:00 local time — quiet for most homelabs
_state: dict[str, str | None] = {"last_run_date": None}
async def run() -> None:
"""Background loop. Wakes every 5 min, runs the daily tasks once
when the local hour matches _RUN_HOUR and we haven't run today."""
log.info(
"Retention loop started (run at %02d:00 local; prune>%d days; keep %d backups)",
_RUN_HOUR,
settings.retention_log_days,
settings.retention_backup_keep,
)
while True:
try:
now = datetime.now()
today = now.strftime("%Y-%m-%d")
if now.hour == _RUN_HOUR and _state["last_run_date"] != today:
# Track prune + backup success independently. Mark the
# day "done" only when BOTH succeed so a transient
# failure gets retried on the next 5-min tick (still
# within the 03:00 hour).
prune_ok = False
backup_ok = False
try:
pruned = await prune_stage_logs(settings.retention_log_days)
if pruned:
await vacuum_db()
prune_ok = True
except Exception as exc:
log.exception("Retention: pruning failed: %s", exc)
try:
await backup_db(settings.retention_backup_keep)
backup_ok = True
except Exception as exc:
log.exception("Retention: backup failed: %s", exc)
if prune_ok and backup_ok:
_state["last_run_date"] = today
except asyncio.CancelledError:
raise
except Exception as exc:
log.exception("Retention loop iteration failed: %s", exc)
await asyncio.sleep(300) # 5 min

166
app/routes/__init__.py Normal file
View file

@ -0,0 +1,166 @@
import asyncio
import csv
import io
import json
from datetime import datetime, timezone
import aiosqlite
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.responses import HTMLResponse, StreamingResponse
from sse_starlette.sse import EventSourceResponse
from app import poller
from app.config import settings
from app.database import get_db
from app.models import (
BurninJobResponse, BurninStageResponse,
CancelBurninRequest, DriveResponse,
SmartTestState, StartBurninRequest, UnlockPoolDriveRequest,
UpdateDriveRequest,
)
from app.renderer import templates
# Helpers shared with the extracted sub-routers — keep the underscore-
# prefixed local names that existing in-file callers reach for.
from ._helpers import (
client_ip as _client_ip,
is_stale as _is_stale,
operator_for as _operator_for,
secret_status as _secret_status,
stale_context as _stale_context,
SECRET_FIELDS as _SECRET_FIELDS,
)
router = APIRouter()
# Sub-routers extracted as part of the routes/ package split (1.0.0-34).
# Their endpoints get registered against the same APIRouter, so the
# external `from app.routes import router` import in app/main.py keeps
# working unchanged. Future slices can extract more — drives, burnin,
# settings, history — using the same pattern.
#
# Absolute imports (`import app.routes.X as _Y`) instead of relative
# (`from . import X as _Y`) so we stay safe even if a future top-level
# `from app import X` is reintroduced here — `from app import auth`
# would bind `auth` on the `app.routes` package namespace and shadow
# any relative-submodule lookup. Absolute imports always resolve to
# `app.routes.X` regardless of what's already bound on the package.
import app.routes.auth as _auth_routes # noqa: E402
import app.routes.system as _system_routes # noqa: E402
import app.routes.history as _history_routes # noqa: E402
import app.routes.audit as _audit_routes # noqa: E402
import app.routes.stats as _stats_routes # noqa: E402
import app.routes.report as _report_routes # noqa: E402
import app.routes.settings as _settings_routes # noqa: E402
import app.routes.drives as _drives_routes # noqa: E402
import app.routes.burnin as _burnin_routes # noqa: E402
router.include_router(_auth_routes.router)
router.include_router(_system_routes.router)
router.include_router(_history_routes.router)
router.include_router(_audit_routes.router)
router.include_router(_stats_routes.router)
router.include_router(_report_routes.router)
router.include_router(_settings_routes.router)
router.include_router(_drives_routes.router)
router.include_router(_burnin_routes.router)
# Drives helpers — re-exported for the dashboard + SSE handlers in this
# file AND for `from app.routes import _fetch_drives_for_template`
# from mailer.py (existing back-compat shim).
from ._drives_helpers import ( # noqa: E402
_DRIVES_QUERY, _row_to_drive, _build_smart, _compute_status,
_compute_eta_seconds, _eta_seconds,
_fetch_burnin_by_drive, _fetch_drives_for_template,
)
# _stale_context is now imported from ._helpers above.
# ---------------------------------------------------------------------------
# Dashboard
# ---------------------------------------------------------------------------
@router.get("/", response_class=HTMLResponse)
async def dashboard(request: Request, db: aiosqlite.Connection = Depends(get_db)):
drives = await _fetch_drives_for_template(db)
ps = poller.get_state()
return templates.TemplateResponse(request, "dashboard.html", {
"request": request,
"drives": drives,
"poller": ps,
**_stale_context(ps),
})
# ---------------------------------------------------------------------------
# SSE — live drive table updates
# ---------------------------------------------------------------------------
@router.get("/sse/drives")
async def sse_drives(request: Request):
q = poller.subscribe()
async def generate():
try:
while True:
# Wait for next poll notification or keepalive timeout
try:
payload = await asyncio.wait_for(q.get(), timeout=25.0)
except asyncio.TimeoutError:
if await request.is_disconnected():
break
yield {"event": "keepalive", "data": ""}
continue
if await request.is_disconnected():
break
# Extract alert from payload (may be None for regular polls)
alert = None
if isinstance(payload, dict):
alert = payload.get("alert")
# Render fresh table HTML
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=60000")
drives = await _fetch_drives_for_template(db)
html = templates.env.get_template(
"components/drives_table.html"
).render(drives=drives)
yield {"event": "drives-update", "data": html}
# Push system sensor state so JS can update temp chips live
ps = poller.get_state()
yield {
"event": "system-sensors",
"data": json.dumps({
"system_temps": ps.get("system_temps", {}),
"thermal_pressure": ps.get("thermal_pressure", "ok"),
"temp_warn_c": settings.temp_warn_c,
"temp_crit_c": settings.temp_crit_c,
}),
}
# Push browser notification event if this was a job completion
if alert:
yield {"event": "job-alert", "data": json.dumps(alert)}
finally:
poller.unsubscribe(q)
return EventSourceResponse(generate())
# ---------------------------------------------------------------------------
# JSON API
# ---------------------------------------------------------------------------

View file

@ -0,0 +1,212 @@
"""Shared drives helpers — used by routes/drives.py, routes/__init__.py
(for the dashboard + SSE), AND mailer.py (for the daily report).
This module exists so the drives endpoints can be extracted to their
own file without making mailer's `from app.routes import _fetch_drives_
for_template` break. The package re-exports `_fetch_drives_for_template`
on its `app.routes` namespace for that backward-compat shim.
"""
from __future__ import annotations
from datetime import datetime, timezone
import aiosqlite
from app import burnin
from app.models import DriveResponse, SmartTestState
from ._helpers import is_stale
def _eta_seconds(eta_at: str | None) -> int | None:
if not eta_at:
return None
try:
eta_ts = datetime.fromisoformat(eta_at)
if eta_ts.tzinfo is None:
eta_ts = eta_ts.replace(tzinfo=timezone.utc)
remaining = (eta_ts - datetime.now(timezone.utc)).total_seconds()
return max(0, int(remaining))
except Exception:
return None
def _compute_eta_seconds(started_at: str | None, percent: int) -> int | None:
"""Linear ETA extrapolation from started_at and percent complete."""
if not started_at or percent <= 0:
return None
try:
start = datetime.fromisoformat(started_at)
if start.tzinfo is None:
start = start.replace(tzinfo=timezone.utc)
elapsed = (datetime.now(timezone.utc) - start).total_seconds()
total_est = elapsed / (percent / 100)
remaining = max(0, int(total_est - elapsed))
return remaining
except Exception:
return None
def _build_smart(row: aiosqlite.Row, prefix: str) -> SmartTestState:
eta_at = row[f"{prefix}_eta_at"]
return SmartTestState(
state=row[f"{prefix}_state"] or "idle",
percent=row[f"{prefix}_percent"],
eta_seconds=_eta_seconds(eta_at),
eta_timestamp=eta_at,
started_at=row[f"{prefix}_started_at"],
finished_at=row[f"{prefix}_finished_at"],
error_text=row[f"{prefix}_error"],
)
def _row_to_drive(row: aiosqlite.Row) -> DriveResponse:
return DriveResponse(
id=row["id"],
devname=row["devname"],
serial=row["serial"],
model=row["model"],
size_bytes=row["size_bytes"],
temperature_c=row["temperature_c"],
smart_health=row["smart_health"] or "UNKNOWN",
last_polled_at=row["last_polled_at"],
is_stale=is_stale(row["last_polled_at"]),
smart_short=_build_smart(row, "short"),
smart_long=_build_smart(row, "long"),
notes=row["notes"],
location=row["location"],
pool_name=row["pool_name"],
pool_role=row["pool_role"],
pool_unlocked_until=burnin.unlock_expiry(
row["id"], row["pool_name"], row["pool_role"],
),
)
def _compute_status(drive: dict) -> str:
short = (drive.get("smart_short") or {}).get("state", "idle")
long_ = (drive.get("smart_long") or {}).get("state", "idle")
health = drive.get("smart_health", "UNKNOWN")
if "running" in (short, long_):
return "running"
if short == "failed" or long_ == "failed" or health == "FAILED":
return "failed"
if "passed" in (short, long_):
return "passed"
return "idle"
_DRIVES_QUERY = """
SELECT
d.id, d.devname, d.serial, d.model, d.size_bytes,
d.temperature_c, d.smart_health, d.last_polled_at,
d.notes, d.location, d.pool_name, d.pool_role,
s.state AS short_state,
s.percent AS short_percent,
s.started_at AS short_started_at,
s.eta_at AS short_eta_at,
s.finished_at AS short_finished_at,
s.error_text AS short_error,
l.state AS long_state,
l.percent AS long_percent,
l.started_at AS long_started_at,
l.eta_at AS long_eta_at,
l.finished_at AS long_finished_at,
l.error_text AS long_error
FROM drives d
LEFT JOIN smart_tests s ON s.drive_id = d.id AND s.test_type = 'short'
LEFT JOIN smart_tests l ON l.drive_id = d.id AND l.test_type = 'long'
WHERE d.last_seen_at >= datetime('now', '-7 days')
{where}
ORDER BY d.devname
"""
async def _fetch_burnin_by_drive(db: aiosqlite.Connection) -> dict[int, dict]:
"""Return latest burn-in job (any state) keyed by drive_id.
Jobs created before the drive's last_reset_at are excluded so the
dashboard burn-in column clears after a reset while history is preserved.
"""
cur = await db.execute("""
SELECT bj.*
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
WHERE bj.id IN (SELECT MAX(id) FROM burnin_jobs GROUP BY drive_id)
AND (d.last_reset_at IS NULL OR bj.created_at > d.last_reset_at)
""")
rows = await cur.fetchall()
return {r["drive_id"]: dict(r) for r in rows}
async def _fetch_drives_for_template(db: aiosqlite.Connection) -> list[dict]:
cur = await db.execute(_DRIVES_QUERY.format(where=""))
rows = await cur.fetchall()
burnin_by_drive = await _fetch_burnin_by_drive(db)
# For burn-ins that include SMART stages, fetch those stages so we can
# mirror their progress/result in the Short/Long SMART columns.
# We include burn-ins in ANY state — including failed/passed/cancelled —
# so the SMART columns don't go blank when the burn-in finishes. Without
# this, "FAILED (LONG SMART)" appears in the Burn-In column while the
# Long SMART column shows "—", which contradicts itself.
bi_smart_stages: dict[int, dict[str, dict]] = {} # job_id -> {stage_name: row}
bi_ids_with_smart = [bi["id"] for bi in burnin_by_drive.values()]
if bi_ids_with_smart:
placeholders = ",".join("?" * len(bi_ids_with_smart))
# placeholders is purely structural ("?,?,?"); IDs themselves are
# bound via the parameter tuple. SQL built via concatenation so
# bandit's B608 (which fires on any f-string SQL) doesn't flag it.
sql = (
"SELECT bs.burnin_job_id, bs.stage_name, bs.state, bs.percent, "
" bs.started_at, bs.finished_at, bs.error_text "
"FROM burnin_stages bs "
"WHERE bs.burnin_job_id IN (" + placeholders + ") "
" AND bs.stage_name IN ('short_smart', 'long_smart') "
" AND bs.state IN ('running', 'passed', 'failed', 'aborted')"
)
cur = await db.execute(sql, bi_ids_with_smart)
for r in await cur.fetchall():
bi_smart_stages.setdefault(r["burnin_job_id"], {})[r["stage_name"]] = dict(r)
drives = []
for row in rows:
d = _row_to_drive(row).model_dump()
d["status"] = _compute_status(d)
bi = burnin_by_drive.get(d["id"])
d["burnin"] = bi
# Overlay burn-in SMART stage progress/results onto the SMART columns
if bi and bi["id"] in bi_smart_stages:
for stage_name, stage in bi_smart_stages[bi["id"]].items():
target = "smart_short" if stage_name == "short_smart" else "smart_long"
# Only overlay if the standalone SMART column is idle/empty
existing = d.get(target) or {}
if existing.get("state") not in (None, "idle"):
continue
pct = stage["percent"] or 0
stage_state = stage["state"]
# If the parent burn-in ended in failure but this SMART
# stage is still recorded as "running", that's an
# orphaned stage row from a hard crash (e.g. the old
# `database is locked` failure mode). Surface as failed
# so the column matches the Burn-In column.
if stage_state == "running" and bi.get("state") in (
"failed", "cancelled", "unknown"
):
stage_state = bi["state"] if bi["state"] != "unknown" else "failed"
d[target] = {
"state": stage_state,
"percent": pct if stage_state == "running" else (100 if stage_state == "passed" else 0),
"eta_seconds": _compute_eta_seconds(stage["started_at"], pct) if stage_state == "running" else None,
"eta_timestamp": None,
"started_at": stage["started_at"],
"finished_at": stage["finished_at"],
"error_text": stage["error_text"] or (
bi.get("error_text") if stage_state == "failed" else None
),
}
drives.append(d)
return drives

97
app/routes/_helpers.py Normal file
View file

@ -0,0 +1,97 @@
"""Shared helpers used across multiple route modules.
Anything more than one route file needs lives here. Single-use helpers
stay in their owning route module.
"""
from __future__ import annotations
from datetime import datetime, timezone
from fastapi import HTTPException, Request
from app.config import settings
def client_ip(request: Request) -> str:
"""Best-effort source IP. Trusts X-Forwarded-For when present (we
sit behind nginx-proxy-manager) but falls back to the direct peer."""
fwd = (request.headers.get("X-Forwarded-For") or "").split(",")[0].strip()
return fwd or (request.client.host if request.client else "unknown")
def operator_for(request: Request, _ignored_body_value: str | None = None) -> str:
"""Always return the logged-in user's name for audit attribution.
The request body's `operator` field (if any) is ignored — clients
can't spoof the operator identity any more."""
user = getattr(request.state, "current_user", None)
if not user:
raise HTTPException(status_code=401, detail="Authentication required")
return user.full_name or user.username
def is_stale(last_polled_at: str) -> bool:
"""True if the most recent poll is older than the stale threshold."""
try:
last = datetime.fromisoformat(last_polled_at)
if last.tzinfo is None:
last = last.replace(tzinfo=timezone.utc)
return (datetime.now(timezone.utc) - last).total_seconds() > settings.stale_threshold_seconds
except Exception:
return True
def stale_context(ps: dict) -> dict:
"""Returns the {stale, stale_seconds} dict every HTML page passes
to the layout for the warning banner."""
last = ps.get("last_poll_at")
if not last:
return {"stale": False, "stale_seconds": 0}
try:
t = datetime.fromisoformat(last)
if t.tzinfo is None:
t = t.replace(tzinfo=timezone.utc)
age = (datetime.now(timezone.utc) - t).total_seconds()
return {
"stale": age > settings.stale_threshold_seconds,
"stale_seconds": int(age),
}
except Exception:
return {"stale": False, "stale_seconds": 0}
# Field names that hold secrets and must never be rendered to the UI
# verbatim or included in the redacted-settings dump.
SECRET_FIELDS = ("smtp_password", "ssh_password", "ssh_key", "truenas_api_key")
def secret_status() -> dict[str, str]:
"""Per-secret display string for the settings page so the operator
can see whether each secret is configured (and how) without ever
rendering the value. Distinguishes env-var, mounted-file, and
DB-stored sources for ssh_key the others can only come from the
live settings object."""
import os as _os
from app.ssh_client import _MOUNTED_KEY_PATH
def _has(field: str) -> bool:
v = getattr(settings, field, "")
return bool(v)
if _os.environ.get("SSH_KEY"):
ssh_key_status = "set (environment variable)"
elif _has("ssh_key"):
ssh_key_status = "set (stored in settings DB — prefer a mounted secret in production)"
elif _os.path.exists(
_os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH)
):
ssh_key_status = "set (mounted secret)"
else:
ssh_key_status = "unset"
return {
"smtp_password": "set" if _has("smtp_password") else "unset",
"ssh_password": "set" if _has("ssh_password") else "unset",
"ssh_key": ssh_key_status,
"truenas_api_key": "set" if _has("truenas_api_key") else "unset",
}

53
app/routes/audit.py Normal file
View file

@ -0,0 +1,53 @@
"""Audit log page — shows the last 200 entries from `audit_events`."""
from __future__ import annotations
import aiosqlite
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from app import poller
from app.database import get_db
from app.renderer import templates
from ._helpers import stale_context
router = APIRouter()
_AUDIT_QUERY = """
SELECT
ae.id, ae.event_type, ae.operator, ae.message, ae.created_at,
d.devname, d.serial
FROM audit_events ae
LEFT JOIN drives d ON d.id = ae.drive_id
ORDER BY ae.id DESC
LIMIT 200
"""
_AUDIT_EVENT_COLORS = {
"burnin_queued": "yellow",
"burnin_started": "blue",
"burnin_passed": "passed",
"burnin_failed": "failed",
"burnin_cancelled": "cancelled",
"burnin_stuck": "failed",
"burnin_unknown": "unknown",
}
@router.get("/audit", response_class=HTMLResponse)
async def audit_log(
request: Request,
db: aiosqlite.Connection = Depends(get_db),
):
cur = await db.execute(_AUDIT_QUERY)
rows = [dict(r) for r in await cur.fetchall()]
ps = poller.get_state()
return templates.TemplateResponse(request, "audit.html", {
"request": request,
"events": rows,
"event_colors": _AUDIT_EVENT_COLORS,
"poller": ps,
**stale_context(ps),
})

170
app/routes/auth.py Normal file
View file

@ -0,0 +1,170 @@
"""Login / logout / first-user setup / password change routes.
Public path mounting:
GET /login render login or first-user setup form
POST /login credential check + session bootstrap
POST /api/v1/auth/setup first-user creation (only when zero users)
GET /logout clear session, redirect
POST /logout same, for explicit POST clients
POST /api/v1/auth/change-password rotate password + audit
"""
from __future__ import annotations
import time as _time
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from app import auth
from app.renderer import templates
from ._helpers import client_ip
router = APIRouter()
@router.get("/login", response_class=HTMLResponse)
async def login_page(request: Request, next: str = "/", error: str | None = None):
needs_setup = (await auth.user_count()) == 0
return templates.TemplateResponse(request, "login.html", {
"request": request,
"needs_setup": needs_setup,
"error": error,
"next": next if next.startswith("/") else "/",
})
@router.post("/login")
async def login_submit(request: Request):
form = await request.form()
username = (form.get("username") or "").strip()
password = form.get("password") or ""
next_url = form.get("next") or "/"
if not next_url.startswith("/"):
next_url = "/"
ip = client_ip(request)
# Atomic register-and-check: increments the counter NOW (before any
# await), so a parallel burst of guesses can't all slip past the
# threshold. Cleared on successful auth via clear_login_failures.
attempt = auth.register_login_attempt(username, ip)
if attempt != "ok":
if attempt == "now_locked_out":
await auth.audit_auth_event(
"user_login_locked_out", username,
f"Failed login from {ip} — IP/user locked out for {auth.LOGIN_LOCKOUT_SECONDS // 60} min",
)
locked_until = auth.login_locked_until(username, ip)
remaining = int((locked_until or _time.time()) - _time.time())
return templates.TemplateResponse(request, "login.html", {
"request": request,
"needs_setup": False,
"error": f"Too many failed attempts. Try again in {remaining // 60 + 1} min.",
"next": next_url,
}, status_code=429)
found = await auth.get_user_by_username(username)
if not found or not auth.verify_password(password, found[1]):
# Constant-ish-time: still call verify on a junk hash if user missing
# so the timing of "user not found" matches "wrong password."
if not found:
auth.verify_password(password, "$2b$12$" + "x" * 53)
await auth.audit_auth_event(
"user_login_failed", username, f"Failed login from {ip}",
)
return templates.TemplateResponse(request, "login.html", {
"request": request,
"needs_setup": False,
"error": "Invalid username or password.",
"next": next_url,
}, status_code=401)
user = found[0]
auth.clear_login_failures(username, ip)
# Clear any pre-login session keys before populating the new identity.
# Closes session-fixation: if an attacker had somehow seeded the
# browser with a session cookie, this discards everything in it
# before issuing the new authenticated payload.
request.session.clear()
request.session["user_id"] = user.id
request.session["username"] = user.username
await auth.touch_last_login(user.id)
await auth.audit_auth_event(
"user_login", user.username, f"Signed in from {ip}",
)
return RedirectResponse(url=next_url, status_code=303)
@router.post("/api/v1/auth/setup")
async def auth_first_user_setup(request: Request):
"""Create the first admin from the login page when the users table is
empty. Public endpoint but only does anything when zero users exist."""
if (await auth.user_count()) > 0:
raise HTTPException(status_code=409, detail="Users already exist.")
form = await request.form()
username = (form.get("username") or "").strip()
password = form.get("password") or ""
full_name = (form.get("full_name") or "").strip() or None
try:
# bootstrap_only=True wraps the existence check + insert in an
# IMMEDIATE transaction so two concurrent setup requests can't
# both create admin accounts during the bootstrap window.
user = await auth.create_user(
username, password, full_name, is_admin=True, bootstrap_only=True
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
# Same fixation defense as the login flow — discard any pre-existing
# session payload before issuing the authenticated identity.
request.session.clear()
request.session["user_id"] = user.id
request.session["username"] = user.username
await auth.touch_last_login(user.id)
return RedirectResponse(url="/", status_code=303)
@router.get("/logout")
@router.post("/logout")
async def logout(request: Request):
user = request.state.current_user if hasattr(request.state, "current_user") else None
if user:
await auth.audit_auth_event(
"user_logout", user.username, f"Signed out from {client_ip(request)}",
)
request.session.clear()
return RedirectResponse(url="/login", status_code=303)
@router.post("/api/v1/auth/change-password")
async def change_password(request: Request):
user = request.state.current_user if hasattr(request.state, "current_user") else None
if not user:
raise HTTPException(status_code=401, detail="Authentication required")
ip = client_ip(request)
# Rate-limit before bcrypt to keep an attacker-controlled session
# from burning CPU brute-forcing the current_password field.
keys = (("user", user.username.lower()), ("ip", ip))
attempt = auth.pwchange_limiter.register(*keys)
if attempt != "ok":
raise HTTPException(
status_code=429,
detail="Too many password-change attempts. Try again later.",
)
form = await request.form()
current = form.get("current_password") or ""
new_pw = form.get("new_password") or ""
confirm = form.get("confirm_password") or ""
if new_pw != confirm:
raise HTTPException(status_code=400, detail="New passwords do not match.")
try:
await auth.change_password(user.id, current, new_pw)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
auth.pwchange_limiter.clear(*keys)
await auth.audit_auth_event(
"user_password_changed", user.username,
f"Password changed from {ip}",
)
return {"ok": True}

156
app/routes/burnin.py Normal file
View file

@ -0,0 +1,156 @@
"""Burn-in endpoints — start, cancel, CSV export, job detail.
POST /api/v1/burnin/start
POST /api/v1/burnin/{job_id}/cancel
GET /api/v1/burnin/export.csv must register before /{job_id}
so int("export.csv") doesn't 422
GET /api/v1/burnin/{job_id}
"""
from __future__ import annotations
import csv
import io
import aiosqlite
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import StreamingResponse
from app import burnin
from app.database import get_db
from app.models import (
BurninJobResponse, BurninStageResponse,
CancelBurninRequest, StartBurninRequest,
)
from ._helpers import operator_for
router = APIRouter()
def _row_to_burnin(row: aiosqlite.Row, stages: list[aiosqlite.Row]) -> BurninJobResponse:
return BurninJobResponse(
id=row["id"],
drive_id=row["drive_id"],
profile=row["profile"],
state=row["state"],
percent=row["percent"] or 0,
stage_name=row["stage_name"],
operator=row["operator"],
created_at=row["created_at"],
started_at=row["started_at"],
finished_at=row["finished_at"],
error_text=row["error_text"],
stages=[
BurninStageResponse(
id=s["id"],
stage_name=s["stage_name"],
state=s["state"],
percent=s["percent"] or 0,
started_at=s["started_at"],
finished_at=s["finished_at"],
error_text=s["error_text"],
)
for s in stages
],
)
@router.post("/api/v1/burnin/start")
async def burnin_start(request: Request, req: StartBurninRequest):
operator = operator_for(request, req.operator)
results = []
errors = []
for drive_id in req.drive_ids:
try:
job_id = await burnin.start_job(
drive_id, req.profile, operator, stage_order=req.stage_order
)
results.append({"drive_id": drive_id, "job_id": job_id})
except burnin.PoolMemberError as exc:
errors.append({
"drive_id": drive_id,
"error": str(exc),
"pool_name": exc.pool_name,
"pool_role": exc.pool_role,
"pool_locked": True,
})
except ValueError as exc:
errors.append({"drive_id": drive_id, "error": str(exc)})
if errors and not results:
# Surface the first error's structured fields so the UI can render
# an unlock affordance instead of a generic toast.
raise HTTPException(status_code=409, detail=errors[0])
return {"queued": results, "errors": errors}
@router.post("/api/v1/burnin/{job_id}/cancel")
async def burnin_cancel(job_id: int, request: Request, req: CancelBurninRequest):
operator = operator_for(request, req.operator)
ok = await burnin.cancel_job(job_id, operator)
if not ok:
raise HTTPException(status_code=409, detail="Job not found or not cancellable")
return {"cancelled": True}
# /api/v1/burnin/export.csv MUST be declared BEFORE /api/v1/burnin/{job_id}
# so FastAPI's path matching tries the literal first; otherwise the int
# coercion fires int("export.csv") and 422s.
@router.get("/api/v1/burnin/export.csv")
async def burnin_export_csv(db: aiosqlite.Connection = Depends(get_db)):
cur = await db.execute("""
SELECT
bj.id AS job_id,
bj.drive_id,
d.devname,
d.serial,
d.model,
bj.profile,
bj.state,
bj.operator,
bj.created_at,
bj.started_at,
bj.finished_at,
CAST(
(julianday(bj.finished_at) - julianday(bj.started_at)) * 86400
AS INTEGER
) AS duration_seconds,
bj.error_text
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
ORDER BY bj.id DESC
""")
rows = await cur.fetchall()
buf = io.StringIO()
writer = csv.writer(buf)
writer.writerow([
"job_id", "drive_id", "devname", "serial", "model",
"profile", "state", "operator",
"created_at", "started_at", "finished_at", "duration_seconds",
"error_text",
])
for r in rows:
writer.writerow(list(r))
buf.seek(0)
return StreamingResponse(
iter([buf.getvalue()]),
media_type="text/csv",
headers={"Content-Disposition": "attachment; filename=burnin_history.csv"},
)
@router.get("/api/v1/burnin/{job_id}", response_model=BurninJobResponse)
async def burnin_get(job_id: int, db: aiosqlite.Connection = Depends(get_db)):
db.row_factory = aiosqlite.Row
cur = await db.execute("SELECT * FROM burnin_jobs WHERE id=?", (job_id,))
row = await cur.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Burn-in job not found")
cur = await db.execute(
"SELECT * FROM burnin_stages WHERE burnin_job_id=? ORDER BY id", (job_id,)
)
stages = await cur.fetchall()
return _row_to_burnin(row, stages)

392
app/routes/drives.py Normal file
View file

@ -0,0 +1,392 @@
"""Drive endpoints — list, drawer, edit, SMART start/cancel, reset, unlock.
GET /api/v1/drives
GET /api/v1/drives/{id}/drawer
GET /api/v1/drives/{id}
PATCH /api/v1/drives/{id} notes / location update
POST /api/v1/drives/{id}/smart/start
POST /api/v1/drives/{id}/smart/cancel
POST /api/v1/drives/{id}/reset
POST /api/v1/drives/{id}/unlock pool-membership lock override
"""
from __future__ import annotations
import json as _json
from datetime import datetime, timezone
import aiosqlite
from fastapi import APIRouter, Depends, HTTPException, Request
from app import auth, burnin, poller
from app.database import get_db
from app.models import (
DriveResponse, UnlockPoolDriveRequest, UpdateDriveRequest,
)
from ._drives_helpers import _DRIVES_QUERY, _row_to_drive
from ._helpers import client_ip, operator_for
router = APIRouter()
@router.get("/api/v1/drives", response_model=list[DriveResponse])
async def list_drives(db: aiosqlite.Connection = Depends(get_db)):
cur = await db.execute(_DRIVES_QUERY.format(where=""))
rows = await cur.fetchall()
return [_row_to_drive(r) for r in rows]
@router.get("/api/v1/drives/{drive_id}/drawer")
async def drive_drawer(drive_id: int, db: aiosqlite.Connection = Depends(get_db)):
"""Data for the log drawer — latest burn-in job + stages, SMART tests, audit events."""
cur = await db.execute(_DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,))
row = await cur.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Drive not found")
drive = _row_to_drive(row)
# Latest burn-in job + its stages (include log_text and bad_blocks)
cur = await db.execute(
"SELECT * FROM burnin_jobs WHERE drive_id=? ORDER BY id DESC LIMIT 1",
(drive_id,),
)
job_row = await cur.fetchone()
burnin_job = None
if job_row:
job = dict(job_row)
cur = await db.execute(
"SELECT id, stage_name, state, percent, started_at, finished_at, "
"duration_seconds, error_text, log_text, bad_blocks, "
"bb_phase, bb_phase_pct, bb_mbps, bb_phase_history "
"FROM burnin_stages WHERE burnin_job_id=? ORDER BY id",
(job_row["id"],),
)
stages = [dict(r) for r in await cur.fetchall()]
# Backfill empty stage.error_text from the parent job's error_text
# for any stage that ended in a terminal state without recording
# an error of its own. This catches the orphan pattern from hard
# crashes (DB-locked, SSH disconnect, container restart) where
# the failure didn't get to write a per-stage explanation.
job_err = job.get("error_text")
for s in stages:
if (
s.get("state") in ("failed", "cancelled", "unknown")
and not s.get("error_text")
and job_err
):
s["error_text"] = job_err
job["stages"] = stages
burnin_job = job
# SMART raw output from smart_tests table
cur = await db.execute(
"SELECT test_type, state, percent, started_at, finished_at, error_text, raw_output "
"FROM smart_tests WHERE drive_id=?",
(drive_id,),
)
smart_rows = {r["test_type"]: dict(r) for r in await cur.fetchall()}
# Cached SMART attributes (JSON blob on drives table)
smart_attrs = None
cur = await db.execute("SELECT smart_attrs FROM drives WHERE id=?", (drive_id,))
attrs_row = await cur.fetchone()
if attrs_row and attrs_row["smart_attrs"]:
try:
smart_attrs = _json.loads(attrs_row["smart_attrs"])
except Exception:
pass
# Last 50 audit events for this drive (newest first)
cur = await db.execute("""
SELECT id, event_type, operator, message, created_at
FROM audit_events
WHERE drive_id = ?
ORDER BY id DESC
LIMIT 50
""", (drive_id,))
events = [dict(r) for r in await cur.fetchall()]
def _smart_card(test_type: str) -> dict:
smart_obj = drive.smart_short if test_type == "short" else drive.smart_long
base = smart_obj.model_dump() if smart_obj else {}
row = smart_rows.get(test_type, {})
base["raw_output"] = row.get("raw_output")
return base
return {
"drive": {
"id": drive.id,
"devname": drive.devname,
"serial": drive.serial,
"model": drive.model,
"size_bytes": drive.size_bytes,
"temperature_c": drive.temperature_c,
},
"burnin": burnin_job,
"smart": {
"short": _smart_card("short"),
"long": _smart_card("long"),
"attrs": smart_attrs,
},
"events": events,
}
@router.get("/api/v1/drives/{drive_id}", response_model=DriveResponse)
async def get_drive(drive_id: int, db: aiosqlite.Connection = Depends(get_db)):
cur = await db.execute(
_DRIVES_QUERY.format(where="AND d.id = ?"), (drive_id,)
)
row = await cur.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Drive not found")
return _row_to_drive(row)
@router.post("/api/v1/drives/{drive_id}/smart/start")
async def smart_start(
drive_id: int,
request: Request,
body: dict,
db: aiosqlite.Connection = Depends(get_db),
):
"""Start a standalone SHORT or LONG SMART test on a single drive.
Uses SSH (smartctl) when configured required for TrueNAS SCALE 25.10+
where the REST smart/test endpoint no longer exists.
Falls back to TrueNAS REST API for older versions.
"""
from app import ssh_client
test_type = (body.get("type") or "").upper()
if test_type not in ("SHORT", "LONG"):
raise HTTPException(status_code=422, detail="type must be SHORT or LONG")
cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,))
row = await cur.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Drive not found")
devname = row[0]
operator = operator_for(request, body.get("operator"))
now = datetime.now(timezone.utc).isoformat()
ttype_lower = test_type.lower()
if ssh_client.is_configured():
# SSH path — works on TrueNAS SCALE 25.10+ and CORE
try:
output = await ssh_client.start_smart_test(devname, test_type)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"SSH error: {exc}")
# Mark as running in DB (truenas_job_id=NULL signals SSH-managed test)
# Store smartctl start output as proof the test was initiated
await db.execute(
"""INSERT INTO smart_tests (drive_id, test_type, state, percent, started_at, raw_output)
VALUES (?,?,?,?,?,?)
ON CONFLICT(drive_id, test_type) DO UPDATE SET
state='running', percent=0, truenas_job_id=NULL,
started_at=excluded.started_at, finished_at=NULL, error_text=NULL,
raw_output=excluded.raw_output""",
(drive_id, ttype_lower, "running", 0, now, output),
)
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
VALUES (?,?,?,?)""",
("smart_test_start", drive_id, operator,
f"{test_type} SMART test started on {devname}"),
)
await db.commit()
poller._notify_subscribers()
return {"devname": devname, "type": test_type, "message": output[:200]}
else:
# REST path — older TrueNAS CORE / SCALE versions
client = burnin._client
if client is None:
raise HTTPException(status_code=503, detail="TrueNAS client not ready")
try:
tn_job_id = await client.start_smart_test([devname], test_type)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
VALUES (?,?,?,?)""",
("smart_test_start", drive_id, operator,
f"{test_type} SMART test started on {devname}"),
)
await db.commit()
return {"job_id": tn_job_id, "devname": devname, "type": test_type}
@router.post("/api/v1/drives/{drive_id}/smart/cancel")
async def smart_cancel(
drive_id: int,
request: Request,
body: dict,
db: aiosqlite.Connection = Depends(get_db),
):
"""Cancel a running standalone SMART test on a drive."""
test_type = (body.get("type") or "").lower()
if test_type not in ("short", "long"):
raise HTTPException(status_code=422, detail="type must be 'short' or 'long'")
cur = await db.execute("SELECT devname FROM drives WHERE id=?", (drive_id,))
row = await cur.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Drive not found")
devname = row[0]
operator = operator_for(request, body.get("operator"))
client = burnin._client
if client is None:
raise HTTPException(status_code=503, detail="TrueNAS client not ready")
from app import ssh_client
if ssh_client.is_configured():
# SSH path — abort via smartctl -X
try:
await ssh_client.abort_smart_test(devname)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"SSH abort error: {exc}")
else:
# REST path — find TrueNAS job and abort it
try:
jobs = await client.get_smart_jobs()
tn_job_id = None
for j in jobs:
if j.get("state") != "RUNNING":
continue
args = j.get("arguments", [])
if not args or not isinstance(args[0], dict):
continue
if devname in args[0].get("disks", []):
tn_job_id = j["id"]
break
if tn_job_id is None:
raise HTTPException(status_code=404, detail="No running SMART test found for this drive")
await client.abort_job(tn_job_id)
except HTTPException:
raise
except Exception as exc:
raise HTTPException(status_code=502, detail=f"TrueNAS error: {exc}")
# Update local DB state
now = datetime.now(timezone.utc).isoformat()
await db.execute(
"UPDATE smart_tests SET state='aborted', finished_at=? WHERE drive_id=? AND test_type=? AND state='running'",
(now, drive_id, test_type),
)
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
VALUES (?,?,?,?)""",
("smart_test_cancel", drive_id, operator,
f"{test_type.upper()} SMART test cancelled on {devname}"),
)
await db.commit()
return {"cancelled": True, "devname": devname, "type": test_type}
@router.patch("/api/v1/drives/{drive_id}")
async def update_drive(
drive_id: int,
req: UpdateDriveRequest,
db: aiosqlite.Connection = Depends(get_db),
):
cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,))
if not await cur.fetchone():
raise HTTPException(status_code=404, detail="Drive not found")
await db.execute(
"UPDATE drives SET notes=?, location=? WHERE id=?",
(req.notes, req.location, drive_id),
)
await db.commit()
return {"updated": True}
@router.post("/api/v1/drives/{drive_id}/reset")
async def reset_drive(
drive_id: int,
request: Request,
body: dict,
db: aiosqlite.Connection = Depends(get_db),
):
"""
Clear SMART test results for a drive so it shows as fresh.
Only allowed when no burn-in job is active (queued or running).
Preserves all job history just resets the display state.
"""
cur = await db.execute("SELECT id FROM drives WHERE id=?", (drive_id,))
if not await cur.fetchone():
raise HTTPException(status_code=404, detail="Drive not found")
# Reject if any active burn-in
cur = await db.execute(
"SELECT COUNT(*) FROM burnin_jobs WHERE drive_id=? AND state IN ('queued','running')",
(drive_id,),
)
if (await cur.fetchone())[0] > 0:
raise HTTPException(status_code=409, detail="Cannot reset while a burn-in is active")
# Trust the logged-in user, not the body (the JS used to send a
# literal "operator" because window._operator was never set).
operator = operator_for(request, body.get("operator"))
# Reset SMART test state to idle
await db.execute(
"""UPDATE smart_tests SET state='idle', percent=0, started_at=NULL,
eta_at=NULL, finished_at=NULL, error_text=NULL, raw_output=NULL
WHERE drive_id=?""",
(drive_id,),
)
# Clear SMART attrs cache + stamp reset time (hides prior burn-in from dashboard)
now = datetime.now(timezone.utc).isoformat()
await db.execute(
"UPDATE drives SET smart_attrs=NULL, last_reset_at=? WHERE id=?",
(now, drive_id),
)
# Audit event
await db.execute(
"""INSERT INTO audit_events (event_type, drive_id, operator, message)
VALUES (?,?,?,?)""",
("drive_reset", drive_id, operator, "Drive reset — SMART state cleared"),
)
await db.commit()
poller._notify_subscribers()
return {"reset": True}
@router.post("/api/v1/drives/{drive_id}/unlock")
async def unlock_pool_drive(drive_id: int, request: Request, req: UnlockPoolDriveRequest):
operator = operator_for(request, req.operator)
ip = client_ip(request)
# Rate-limit by drive AND by source IP. A typo on the confirm token
# is the common case so the threshold is loose, but a brute-force
# attempt to guess the token still hits the IP cap.
keys = (("drive", drive_id), ("ip", ip))
attempt = auth.unlock_limiter.register(*keys)
if attempt != "ok":
raise HTTPException(
status_code=429,
detail="Too many unlock attempts on this drive. Try again later.",
)
try:
expiry = await burnin.grant_pool_unlock(
drive_id, req.confirm_token, operator, req.reason,
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
auth.unlock_limiter.clear(*keys)
# Read from the submodule, not the package-root snapshot alias —
# keeps tests that monkey-patch UNLOCK_TTL_SECONDS in
# app.burnin.unlock observable from the API response.
return {"unlocked": True, "expires_at": expiry,
"ttl_seconds": burnin.unlock.UNLOCK_TTL_SECONDS}

184
app/routes/history.py Normal file
View file

@ -0,0 +1,184 @@
"""Burn-in history pages: paginated list + per-job detail + print view.
GET /history filterable + paginated list
GET /history/{job_id} per-job detail with stages
GET /history/{job_id}/print clean print-friendly variant
"""
from __future__ import annotations
import aiosqlite
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.responses import HTMLResponse
from app import poller
from app.database import get_db
from app.renderer import templates
from ._helpers import stale_context
router = APIRouter()
_PAGE_SIZE = 50
_ALL_STATES = ("queued", "running", "passed", "failed", "cancelled", "unknown")
_HISTORY_QUERY = """
SELECT
bj.id, bj.drive_id, bj.profile, bj.state, bj.operator,
bj.created_at, bj.started_at, bj.finished_at, bj.error_text,
d.devname, d.serial, d.model, d.size_bytes,
CAST(
(julianday(bj.finished_at) - julianday(bj.started_at)) * 86400
AS INTEGER
) AS duration_seconds
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
{where}
ORDER BY bj.id DESC
"""
def _state_where(state: str) -> tuple[str, list]:
if state == "all":
return "", []
return "WHERE bj.state = ?", [state]
@router.get("/history", response_class=HTMLResponse)
async def history_list(
request: Request,
state: str = Query(default="all"),
page: int = Query(default=1, ge=1),
db: aiosqlite.Connection = Depends(get_db),
):
if state not in ("all",) + _ALL_STATES:
state = "all"
where_clause, params = _state_where(state)
# Total count
count_sql = f"SELECT COUNT(*) FROM burnin_jobs bj JOIN drives d ON d.id = bj.drive_id {where_clause}" # nosec B608 — `where_clause` is one of two hardcoded literals from _state_where; user input goes through bound params.
cur = await db.execute(count_sql, params)
total_count = (await cur.fetchone())[0]
total_pages = max(1, (total_count + _PAGE_SIZE - 1) // _PAGE_SIZE)
page = min(page, total_pages)
offset = (page - 1) * _PAGE_SIZE
# Per-state counts for badges
cur = await db.execute(
"SELECT state, COUNT(*) FROM burnin_jobs GROUP BY state"
)
counts = {"all": total_count if state == "all" else 0}
for r in await cur.fetchall():
counts[r[0]] = r[1]
if state != "all":
cur2 = await db.execute("SELECT COUNT(*) FROM burnin_jobs")
counts["all"] = (await cur2.fetchone())[0]
# Job rows
sql = _HISTORY_QUERY.format(where=where_clause) + " LIMIT ? OFFSET ?"
cur = await db.execute(sql, params + [_PAGE_SIZE, offset])
rows = await cur.fetchall()
jobs = [dict(r) for r in rows]
ps = poller.get_state()
return templates.TemplateResponse(request, "history.html", {
"request": request,
"jobs": jobs,
"active_state": state,
"counts": counts,
"page": page,
"total_pages": total_pages,
"total_count": total_count,
"poller": ps,
**stale_context(ps),
})
# /history/{job_id}/print MUST be registered before /history/{job_id} so
# FastAPI's route matching tries the literal "print" before the int
# coercion would attempt int("print") and 422.
@router.get("/history/{job_id}/print", response_class=HTMLResponse)
async def history_print(
request: Request,
job_id: int,
db: aiosqlite.Connection = Depends(get_db),
):
cur = await db.execute("""
SELECT
bj.*, d.devname, d.serial, d.model, d.size_bytes,
CAST(
(julianday(bj.finished_at) - julianday(bj.started_at)) * 86400
AS INTEGER
) AS duration_seconds
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
WHERE bj.id = ?
""", (job_id,))
row = await cur.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Job not found")
job = dict(row)
cur = await db.execute("""
SELECT *,
CAST(
(julianday(finished_at) - julianday(started_at)) * 86400
AS INTEGER
) AS duration_seconds
FROM burnin_stages WHERE burnin_job_id=? ORDER BY id
""", (job_id,))
job["stages"] = [dict(r) for r in await cur.fetchall()]
return templates.TemplateResponse(request, "job_print.html", {
"request": request,
"job": job,
})
@router.get("/history/{job_id}", response_class=HTMLResponse)
async def history_detail(
request: Request,
job_id: int,
db: aiosqlite.Connection = Depends(get_db),
):
# Job + drive info
cur = await db.execute("""
SELECT
bj.*, d.devname, d.serial, d.model, d.size_bytes,
CAST(
(julianday(bj.finished_at) - julianday(bj.started_at)) * 86400
AS INTEGER
) AS duration_seconds
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
WHERE bj.id = ?
""", (job_id,))
row = await cur.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Burn-in job not found")
job = dict(row)
# Stages (with duration)
cur = await db.execute("""
SELECT *,
CAST(
(julianday(finished_at) - julianday(started_at)) * 86400
AS INTEGER
) AS duration_seconds
FROM burnin_stages
WHERE burnin_job_id = ?
ORDER BY id
""", (job_id,))
job["stages"] = [dict(r) for r in await cur.fetchall()]
ps = poller.get_state()
return templates.TemplateResponse(request, "job_detail.html", {
"request": request,
"job": job,
"poller": ps,
**stale_context(ps),
})

24
app/routes/report.py Normal file
View file

@ -0,0 +1,24 @@
"""On-demand email report trigger — useful for testing SMTP config."""
from __future__ import annotations
from fastapi import APIRouter, HTTPException, Request
from app import auth, mailer
from app.config import settings
router = APIRouter()
@router.post("/api/v1/report/send")
async def send_report_now(request: Request):
"""Trigger the daily status email immediately. Admin-only because
sending mail is a side effect non-admins shouldn't be able to fire."""
auth.require_admin(request)
if not settings.smtp_host:
raise HTTPException(status_code=503, detail="SMTP not configured (SMTP_HOST is empty)")
try:
await mailer.send_report_now()
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Mail send failed: {exc}")
return {"sent": True, "to": settings.smtp_to}

153
app/routes/settings.py Normal file
View file

@ -0,0 +1,153 @@
"""Settings page + settings API.
GET /settings admin-only HTML form
GET /api/v1/settings/redacted admin-only diagnostic dump
POST /api/v1/settings save (admin) + audit secret rotations
POST /api/v1/settings/test-smtp admin-only SMTP probe
POST /api/v1/settings/test-ssh admin-only SSH probe
"""
from __future__ import annotations
import aiosqlite
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse
from app import auth, mailer, poller, settings_store
from app.config import settings
from app.database import get_db
from app.renderer import templates
from ._helpers import client_ip, secret_status, stale_context, SECRET_FIELDS
router = APIRouter()
@router.get("/settings", response_class=HTMLResponse)
async def settings_page(
request: Request,
db: aiosqlite.Connection = Depends(get_db),
):
auth.require_admin(request)
# Editable values — real values for form fields (secrets excluded)
editable = {
# SMTP
"smtp_host": settings.smtp_host,
"smtp_port": settings.smtp_port,
"smtp_ssl_mode": settings.smtp_ssl_mode or "starttls",
"smtp_timeout": settings.smtp_timeout,
"smtp_user": settings.smtp_user,
"smtp_from": settings.smtp_from,
"smtp_to": settings.smtp_to,
"smtp_report_hour": settings.smtp_report_hour,
"smtp_daily_report_enabled": settings.smtp_daily_report_enabled,
"smtp_alert_on_fail": settings.smtp_alert_on_fail,
"smtp_alert_on_pass": settings.smtp_alert_on_pass,
# Webhook
"webhook_url": settings.webhook_url,
# Burn-in behaviour
"stuck_job_hours": settings.stuck_job_hours,
"max_parallel_burnins": settings.max_parallel_burnins,
"temp_warn_c": settings.temp_warn_c,
"temp_crit_c": settings.temp_crit_c,
"bad_block_threshold": settings.bad_block_threshold,
"surface_validate_block_size": settings.surface_validate_block_size,
"surface_validate_block_buffer": settings.surface_validate_block_buffer,
"surface_validate_passes": settings.surface_validate_passes,
# SSH credentials (take effect immediately — each SSH call reads live settings)
"ssh_host": settings.ssh_host,
"ssh_port": settings.ssh_port,
"ssh_user": settings.ssh_user,
# Note: ssh_password and ssh_key intentionally omitted from display (sensitive)
# System settings (restart required to fully apply)
"truenas_base_url": settings.truenas_base_url,
"truenas_verify_tls": settings.truenas_verify_tls,
"poll_interval_seconds": settings.poll_interval_seconds,
"stale_threshold_seconds": settings.stale_threshold_seconds,
"allowed_ips": settings.allowed_ips,
"log_level": settings.log_level,
# Note: truenas_api_key intentionally omitted from display (sensitive)
}
from app import ssh_client as _ssh
ps = poller.get_state()
return templates.TemplateResponse(request, "settings.html", {
"request": request,
"editable": editable,
"secret_status": secret_status(),
"smtp_enabled": bool(settings.smtp_host),
"ssh_configured": _ssh.is_configured(),
"app_version": settings.app_version,
"poller": ps,
**stale_context(ps),
})
@router.get("/api/v1/settings/redacted")
async def get_settings_redacted(request: Request):
"""Admin-only diagnostic dump of every editable setting with secrets
replaced by '***'. Useful for ops triage ("what's actually loaded
right now?") without leaking the real values into the transcript."""
auth.require_admin(request)
out: dict[str, object] = {}
for field in settings_store._EDITABLE.keys():
val = getattr(settings, field, None)
if field in SECRET_FIELDS:
out[field] = "***" if val else None
else:
out[field] = val
out["_secret_status"] = secret_status()
return out
@router.post("/api/v1/settings")
async def save_settings(request: Request, body: dict):
"""Save editable runtime settings. Secrets are only updated if non-empty."""
user = auth.require_admin(request)
# Don't overwrite secrets if client sent empty string. Track which
# ones DID get a real change so we can audit the rotation.
rotated: list[str] = []
for secret_field in SECRET_FIELDS:
if secret_field in body:
if body[secret_field] == "":
del body[secret_field]
else:
rotated.append(secret_field)
try:
saved = settings_store.save(body)
except ValueError as exc:
raise HTTPException(status_code=422, detail=str(exc))
# Audit secret rotations — never log the value, only the field name +
# operator + source IP. Lets the audit page answer "who rotated the
# SMTP password last week?"
if rotated and user:
await auth.audit_auth_event(
"settings_secret_changed",
user.username,
f"Rotated secrets from {client_ip(request)}: {', '.join(sorted(rotated))}",
)
return {"saved": True, "keys": saved, "rotated_secrets": rotated}
@router.post("/api/v1/settings/test-smtp")
async def test_smtp(request: Request):
"""Test the current SMTP configuration without sending an email."""
auth.require_admin(request)
result = await mailer.test_smtp_connection()
if not result["ok"]:
raise HTTPException(status_code=502, detail=result["error"])
return {"ok": True}
@router.post("/api/v1/settings/test-ssh")
async def test_ssh(request: Request):
"""Test the current SSH configuration."""
auth.require_admin(request)
from app import ssh_client
result = await ssh_client.test_connection()
if not result["ok"]:
raise HTTPException(status_code=502, detail=result.get("error", "Connection failed"))
return {"ok": True}

111
app/routes/stats.py Normal file
View file

@ -0,0 +1,111 @@
"""Stats / analytics page — aggregates over `burnin_jobs` for dashboards."""
from __future__ import annotations
import aiosqlite
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from app import poller
from app.database import get_db
from app.renderer import templates
from ._helpers import stale_context
router = APIRouter()
@router.get("/stats", response_class=HTMLResponse)
async def stats_page(
request: Request,
db: aiosqlite.Connection = Depends(get_db),
):
# Overall counts
cur = await db.execute("""
SELECT
COUNT(*) as total,
SUM(CASE WHEN state='passed' THEN 1 ELSE 0 END) as passed,
SUM(CASE WHEN state='failed' THEN 1 ELSE 0 END) as failed,
SUM(CASE WHEN state='running' THEN 1 ELSE 0 END) as running,
SUM(CASE WHEN state='cancelled' THEN 1 ELSE 0 END) as cancelled
FROM burnin_jobs
""")
overall = dict(await cur.fetchone())
# Failure rate by drive model (only completed jobs)
cur = await db.execute("""
SELECT
COALESCE(d.model, 'Unknown') AS model,
COUNT(*) AS total,
SUM(CASE WHEN bj.state='passed' THEN 1 ELSE 0 END) AS passed,
SUM(CASE WHEN bj.state='failed' THEN 1 ELSE 0 END) AS failed,
ROUND(100.0 * SUM(CASE WHEN bj.state='passed' THEN 1 ELSE 0 END) / COUNT(*), 1) AS pass_rate
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
WHERE bj.state IN ('passed', 'failed')
GROUP BY COALESCE(d.model, 'Unknown')
ORDER BY total DESC
LIMIT 20
""")
by_model = [dict(r) for r in await cur.fetchall()]
# Activity last 14 days
cur = await db.execute("""
SELECT
date(created_at) AS day,
COUNT(*) AS total,
SUM(CASE WHEN state='passed' THEN 1 ELSE 0 END) AS passed,
SUM(CASE WHEN state='failed' THEN 1 ELSE 0 END) AS failed
FROM burnin_jobs
WHERE created_at >= date('now', '-14 days')
GROUP BY date(created_at)
ORDER BY day DESC
""")
by_day = [dict(r) for r in await cur.fetchall()]
# Average test duration by drive size (rounded to nearest TB)
cur = await db.execute("""
SELECT
CAST(ROUND(CAST(d.size_bytes AS REAL) / 1e12) AS INTEGER) AS size_tb,
COUNT(*) AS total,
ROUND(AVG(
(julianday(bj.finished_at) - julianday(bj.started_at)) * 86400 / 3600.0
), 1) AS avg_hours
FROM burnin_jobs bj
JOIN drives d ON d.id = bj.drive_id
WHERE bj.state IN ('passed', 'failed')
AND bj.started_at IS NOT NULL
AND bj.finished_at IS NOT NULL
GROUP BY size_tb
ORDER BY size_tb
""")
by_size = [dict(r) for r in await cur.fetchall()]
# Failure breakdown by stage (which stage caused the failure)
cur = await db.execute("""
SELECT
COALESCE(bj.stage_name, 'unknown') AS failed_stage,
COUNT(*) AS count
FROM burnin_jobs bj
WHERE bj.state = 'failed'
GROUP BY failed_stage
ORDER BY count DESC
""")
by_failure_stage = [dict(r) for r in await cur.fetchall()]
# Drives tracked
cur = await db.execute("SELECT COUNT(*) FROM drives")
drives_total = (await cur.fetchone())[0]
ps = poller.get_state()
return templates.TemplateResponse(request, "stats.html", {
"request": request,
"overall": overall,
"by_model": by_model,
"by_day": by_day,
"by_size": by_size,
"by_failure_stage": by_failure_stage,
"drives_total": drives_total,
"poller": ps,
**stale_context(ps),
})

136
app/routes/system.py Normal file
View file

@ -0,0 +1,136 @@
"""System-level endpoints with no business-logic dependencies.
GET /health readiness probe (DB write + poller + SSH)
GET /api/v1/updates/check check Forgejo for newer release
WS /ws/terminal xterm.js bridge to TrueNAS SSH PTY
"""
from __future__ import annotations
from datetime import datetime, timezone
import aiosqlite
from fastapi import APIRouter, Depends, WebSocket
from fastapi.responses import JSONResponse
from app import poller
from app.config import settings
from app.database import get_db
router = APIRouter()
@router.get("/health")
async def health(db: aiosqlite.Connection = Depends(get_db)):
"""Real readiness check, not just process-is-running.
Verifies (a) DB writable, (b) poller has succeeded recently relative
to the configured stale_threshold_seconds, (c) SSH reachable when
configured. Returns 503 when any check fails so a proxy/orchestrator
health probe can take the container out of rotation.
"""
from app import ssh_client as _ssh
checks: dict[str, dict] = {}
# DB probe — actually exercise the write path (read-only mounts,
# full disks, broken WAL all silently pass a journal_mode read).
# Uses a temp table that lives only inside the connection so the
# round-trip touches the writer without polluting real data.
try:
await db.execute(
"CREATE TEMP TABLE IF NOT EXISTS _hc (k INTEGER PRIMARY KEY, v TEXT)"
)
await db.execute("INSERT OR REPLACE INTO _hc (k, v) VALUES (1, ?)",
(datetime.now(timezone.utc).isoformat(),))
cur = await db.execute("SELECT v FROM _hc WHERE k=1")
row = await cur.fetchone()
await db.commit()
checks["db"] = {"ok": bool(row)}
except Exception as exc:
checks["db"] = {"ok": False, "error": str(exc)}
ps = poller.get_state()
last = ps.get("last_poll_at")
poll_age = None
if last:
try:
t = datetime.fromisoformat(last)
if t.tzinfo is None:
t = t.replace(tzinfo=timezone.utc)
poll_age = (datetime.now(timezone.utc) - t).total_seconds()
except Exception:
poll_age = None
poll_ok = ps.get("healthy") and (
poll_age is None or poll_age <= settings.stale_threshold_seconds * 3
)
checks["poller"] = {
"ok": bool(poll_ok),
"last_error": ps.get("last_error"),
"last_poll_at": last,
"age_seconds": int(poll_age) if poll_age is not None else None,
}
# SSH probe — only when configured. Cheap (single sensors -j).
if _ssh.is_configured():
try:
r = await _ssh.test_connection()
checks["ssh"] = {"ok": bool(r.get("ok")),
"error": r.get("error")}
except Exception as exc:
checks["ssh"] = {"ok": False, "error": str(exc)}
else:
checks["ssh"] = {"ok": True, "skipped": True}
cur = await db.execute("SELECT COUNT(*) FROM drives")
row = await cur.fetchone()
drives_tracked = row[0] if row else 0
status_ok = all(c["ok"] for c in checks.values())
body = {
"status": "ok" if status_ok else "degraded",
"checks": checks,
"drives_tracked": drives_tracked,
"poll_interval_s": settings.poll_interval_seconds,
"version": settings.app_version,
}
return JSONResponse(body, status_code=200 if status_ok else 503)
@router.websocket("/ws/terminal")
async def terminal_ws(websocket: WebSocket):
"""WebSocket endpoint bridging the browser xterm.js terminal to an SSH PTY."""
from app import terminal as _term
await _term.handle(websocket)
@router.get("/api/v1/updates/check")
async def check_updates():
"""Check for a newer release on Forgejo."""
import httpx
current = settings.app_version
try:
async with httpx.AsyncClient(timeout=8.0) as client:
r = await client.get(
"https://git.hellocomputer.xyz/api/v1/repos/brandon/nas-burnin/releases/latest",
headers={"Accept": "application/json"},
)
if r.status_code == 200:
data = r.json()
latest = data.get("tag_name", "").lstrip("v")
up_to_date = not latest or latest == current
return {
"current": current,
"latest": latest or None,
"update_available": not up_to_date,
"message": None,
}
elif r.status_code == 404:
return {"current": current, "latest": None, "update_available": False,
"message": "No releases published yet"}
else:
return {"current": current, "latest": None, "update_available": False,
"message": f"Forgejo API returned {r.status_code}"}
except Exception as exc:
return {"current": current, "latest": None, "update_available": False,
"message": f"Could not reach update server: {exc}"}

View file

@ -11,6 +11,7 @@ a container restart to fully take effect (clients/middleware are initialized at
import json
import logging
from pathlib import Path
from typing import Any
from app.config import settings
@ -38,6 +39,9 @@ _EDITABLE: dict[str, type] = {
"temp_warn_c": int,
"temp_crit_c": int,
"bad_block_threshold": int,
"surface_validate_block_size": int,
"surface_validate_block_buffer": int,
"surface_validate_passes": int,
# SSH credentials — take effect immediately (each connection reads live settings)
"ssh_host": str,
"ssh_port": int,
@ -62,7 +66,14 @@ def _overrides_path() -> Path:
return Path(settings.db_path).parent / "settings_overrides.json"
def _coerce(key: str, raw) -> object:
def _coerce(key: str, raw: Any) -> Any:
"""Coerce a raw value to the type registered in _EDITABLE.
Return type is Any because the concrete return type depends on
the key int/str/bool and there's no narrowing path mypy can
follow from the dict lookup. Callers know which type to expect
based on the field they're reading.
"""
coerce = _EDITABLE[key]
if coerce is bool:
if isinstance(raw, bool):
@ -96,6 +107,26 @@ def _apply(data: dict) -> None:
if key == "bad_block_threshold" and int(val) < 0:
log.warning("settings_store: bad_block_threshold must be >= 0 — ignoring")
continue
if key == "surface_validate_block_size":
# badblocks accepts any positive int but in practice the
# useful range is 512..1048576 and it should be a power of 2.
v = int(val)
if v < 512 or v > 1048576 or (v & (v - 1)) != 0:
log.warning(
"settings_store: surface_validate_block_size must be "
"a power of 2 between 512 and 1048576 — ignoring %r", val
)
continue
if key == "surface_validate_block_buffer" and not (1 <= int(val) <= 4096):
log.warning(
"settings_store: surface_validate_block_buffer must be 1..4096 — ignoring"
)
continue
if key == "surface_validate_passes" and not (0 <= int(val) <= 16):
log.warning(
"settings_store: surface_validate_passes must be 0..16 — ignoring"
)
continue
if key == "ssh_port" and not (1 <= int(val) <= 65535):
log.warning("settings_store: ssh_port out of range — ignoring")
continue

627
app/ssh_client.py Normal file
View file

@ -0,0 +1,627 @@
"""
SSH client for direct TrueNAS command execution (Stage 7).
When ssh_host is configured, burn-in stages use SSH to run smartctl and
badblocks directly on the TrueNAS host instead of going through the REST API.
Falls back to REST API / simulation when SSH is not configured (dev/mock mode).
TrueNAS CORE (FreeBSD) device paths: /dev/ada0, /dev/da0, etc.
TrueNAS SCALE (Linux) device paths: /dev/sda, /dev/sdb, etc.
The devname from the TrueNAS API is used as-is in /dev/{devname}.
"""
import asyncio
import logging
import re
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Monitored SMART attributes
# True → any non-zero raw value is a hard failure (drive rejected)
# False → non-zero is a warning (flagged but test continues)
# ---------------------------------------------------------------------------
SMART_ATTRS: dict[int, tuple[str, bool]] = {
5: ("Reallocated_Sector_Ct", True), # reallocation = FAIL
10: ("Spin_Retry_Count", False), # mechanical stress = WARN
188: ("Command_Timeout", False), # drive not responding = WARN
197: ("Current_Pending_Sector", True), # pending reallocation = FAIL
198: ("Offline_Uncorrectable", True), # unrecoverable read error = FAIL
199: ("UDMA_CRC_Error_Count", False), # cable/controller issue = WARN
}
# ---------------------------------------------------------------------------
# Configuration check
# ---------------------------------------------------------------------------
def is_configured() -> bool:
"""Returns True when SSH host + at least one auth method is available."""
import os
from app.config import settings
if not settings.ssh_host:
return False
has_creds = bool(
settings.ssh_key
or settings.ssh_password
or os.path.exists(os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH))
)
return has_creds
# ---------------------------------------------------------------------------
# Low-level connection
# ---------------------------------------------------------------------------
_MOUNTED_KEY_PATH = "/run/secrets/ssh_key"
async def _connect():
"""Open a single-use SSH connection. Caller must use `async with`."""
import asyncssh
from app.config import settings
kwargs: dict = {
"host": settings.ssh_host,
"port": settings.ssh_port,
"username": settings.ssh_user,
"known_hosts": None, # trust all hosts (same spirit as TRUENAS_VERIFY_TLS=false)
}
if settings.ssh_key:
# Key material provided via env var (base case)
kwargs["client_keys"] = [asyncssh.import_private_key(settings.ssh_key)]
elif settings.ssh_password:
kwargs["password"] = settings.ssh_password
else:
# Fall back to mounted key file (preferred for production — no key in env vars)
import os
key_path = os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH)
if os.path.exists(key_path):
kwargs["client_keys"] = [key_path]
# If nothing is configured, asyncssh will attempt agent/default key lookup
return asyncssh.connect(**kwargs)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def test_connection() -> dict:
"""Test SSH connectivity. Returns {"ok": True} or {"ok": False, "error": str}."""
if not is_configured():
return {"ok": False, "error": "SSH not configured (ssh_host is empty)"}
try:
async with await _connect() as conn:
result = await conn.run("echo ok", check=False)
if "ok" in result.stdout:
return {"ok": True}
return {"ok": False, "error": result.stderr.strip() or "unexpected output"}
except Exception as exc:
return {"ok": False, "error": str(exc)}
async def get_smart_attributes(devname: str) -> dict:
"""
Run `smartctl -a /dev/{devname}` and parse the output.
Returns:
health: str "PASSED" | "FAILED" | "UNKNOWN"
raw_output: str full smartctl output
attributes: dict[int, {"name": str, "raw": int}]
warnings: list[str] attribute names with non-zero raw (non-critical)
failures: list[str] attribute names with non-zero raw (critical)
"""
cmd = f"smartctl -a /dev/{devname}"
try:
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
return _parse_smartctl(output)
except Exception as exc:
return {
"health": "UNKNOWN",
"raw_output": str(exc),
"attributes": {},
"warnings": [],
"failures": [f"SSH error: {exc}"],
}
async def start_smart_test(devname: str, test_type: str) -> str:
"""
Run `smartctl -t short|long /dev/{devname}`.
Returns raw output. Raises RuntimeError on unrecoverable failure.
test_type: "SHORT" or "LONG"
"""
arg = "short" if test_type.upper() == "SHORT" else "long"
cmd = f"smartctl -t {arg} /dev/{devname}"
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
# smartctl exits 0 or 4 when the test is successfully started on most drives
started = ("Testing has begun" in output or
"test has begun" in output.lower() or
result.returncode in (0, 4))
if not started:
raise RuntimeError(f"smartctl returned exit {result.returncode}: {output[:400]}")
return output
async def poll_smart_progress(devname: str) -> dict:
"""
Run `smartctl -a /dev/{devname}` and extract self-test status.
Returns:
state: "running" | "passed" | "failed" | "unknown"
percent_remaining: int (0 = complete when state != "running")
output: str
"""
cmd = f"smartctl -a /dev/{devname}"
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
return _parse_smart_progress(output)
async def abort_smart_test(devname: str) -> None:
"""Send `smartctl -X /dev/{devname}` to abort an in-progress test."""
cmd = f"smartctl -X /dev/{devname}"
async with await _connect() as conn:
await conn.run(cmd, check=False)
def _parse_zpool_list_output(stdout: str) -> dict:
"""Pure parser for `zpool list -vHP` stdout. Exposed for unit tests.
See get_pool_membership() for output semantics. This function never
raises malformed lines are silently skipped.
"""
import re as _re
def _strip_partition(name: str) -> str:
m = _re.match(r"^(nvme\d+n\d+)", name)
if m:
return m.group(1)
m = _re.match(r"^(sd[a-z]+)", name)
if m:
return m.group(1)
return name
SECTION_MARKERS = {"cache", "log", "logs", "spare", "spares",
"special", "dedup"}
SECTION_NORMALIZE = {"logs": "log", "spares": "spare"}
out: dict = {}
current_pool: str | None = None
current_role: str = "data"
for raw in stdout.splitlines():
if not raw.strip():
continue
depth = 0
while depth < len(raw) and raw[depth] == "\t":
depth += 1
first = raw[depth:].split("\t", 1)[0].strip()
if depth == 0:
current_pool = first
current_role = "data"
continue
if depth == 1:
if first in SECTION_MARKERS:
current_role = SECTION_NORMALIZE.get(first, first)
continue
if first.startswith(("mirror", "raidz", "draid")):
continue
if first.startswith("/dev/") and current_pool:
dn = _strip_partition(first[len("/dev/"):])
out[dn] = {"pool": current_pool, "role": current_role}
continue
if first.startswith("/dev/") and current_pool:
dn = _strip_partition(first[len("/dev/"):])
out[dn] = {"pool": current_pool, "role": current_role}
return out
def _parse_lsblk_zfs_output(stdout: str) -> set:
"""Pure parser for `lsblk -no NAME,FSTYPE -l` stdout. Returns base
devnames carrying ZFS labels (whole-disk OR via partition). Exposed
for unit tests."""
import re as _re
out: set = set()
for line in stdout.splitlines():
parts = line.split()
if len(parts) < 2:
continue
name, fstype = parts[0], parts[1]
if fstype != "zfs_member":
continue
if name.startswith("nvme"):
m = _re.match(r"^(nvme\d+n\d+)", name)
if m:
out.add(m.group(1))
else:
m = _re.match(r"^(sd[a-z]+)", name)
if m:
out.add(m.group(1))
return out
async def get_pool_membership() -> dict | None:
"""Return {devname: {"pool": str, "role": str}} for every drive in any zpool.
Parses `zpool list -vHP` output. Tab-indent depth tells us structure:
depth 0 pool name line
depth 1 vdev type line (mirror-N, raidz*N, draid*) OR section
marker (cache/log/spare/special/dedup/logs) OR a single-disk
vdev that is itself a /dev/... entry
depth 2 device line within a vdev '/dev/sdX', '/dev/nvmeXnY', etc.
may have a partition suffix that we strip back to the
base devname so it matches what TrueNAS reports.
Roles: data | cache | log | spare | special | dedup
Returns:
- {} when the SSH call succeeded and there are genuinely no pools
- None on any failure (SSH down, parse error, non-zero exit, no
stdout). Callers MUST treat None differently from {}: an
empty dict is "definitely no pool members," None is "we
couldn't tell." Treating None as "no pool members" is a
fail-open security regression.
"""
import re as _re
if not is_configured():
return {}
cmd = "zpool list -vHP 2>/dev/null"
try:
async with await _connect() as conn:
r = await conn.run(cmd, check=False)
if r.returncode != 0:
return None
except Exception:
return None
if not r.stdout:
# rc==0 with empty output = host has no pools. (`zpool list -H`
# returns no rows when zero pools are imported.) That's a real
# answer, not a failure.
return {}
return _parse_zpool_list_output(r.stdout)
async def get_mounted_drives() -> set | None:
"""Return base devnames of every drive whose partitions are mounted
anywhere right now. Defense-in-depth on top of pool detection catches
XFS/ext4/etc. scratch disks the operator forgot about. Returns None on
any failure (caller treats that as 'preserve previous state')."""
if not is_configured():
return set()
cmd = "findmnt -no SOURCE 2>/dev/null"
try:
async with await _connect() as conn:
r = await conn.run(cmd, check=False)
if r.returncode != 0 or not r.stdout:
# findmnt always has at least / mounted on a Linux host;
# empty output is itself suspicious. Treat as failure.
return None
except Exception:
return None
return _parse_findmnt_sources(r.stdout)
def _parse_findmnt_sources(stdout: str) -> set:
"""Pure parser for findmnt output. Strips partitions; ignores tmpfs,
overlay, zfs (zfs is handled by pool detection).
Recognised devnames (covers TrueNAS SCALE + CORE + LVM/MD stacks):
sd[a-z]+ Linux SCSI/SATA (sda, sdb, ..., sdaa)
nvmeXnY[pZ] Linux NVMe namespaces
mapper/<name> LVM logical volumes (/dev/mapper/vg-lv)
dm-N devicemapper short names
mdN Linux MD RAID arrays
ada[0-9]+, da[0-9]+ TrueNAS CORE (FreeBSD) SATA/SAS
"""
import re as _re
out: set = set()
for raw in stdout.splitlines():
s = raw.strip()
if not s.startswith("/dev/"):
continue
# Skip ZFS filesystems (those are pool/exported drives, handled
# separately and shouldn't double-lock as 'mounted').
if "/dev/zd" in s or "/dev/zvol" in s:
continue
name = s[len("/dev/"):].split("[")[0] # bind mounts can have [subdir]
# Try each recognised devname pattern in order. Mapper/dm-/md
# entries are kept whole because they represent a stack the
# operator should resolve manually before burn-in.
for pat in (
r"^(nvme\d+n\d+)", # NVMe (strip pN)
r"^(sd[a-z]+)", # Linux SCSI/SATA (strip number)
r"^(mapper/[^/]+)", # LVM logical volume
r"^(dm-\d+)", # devicemapper short name
r"^(md\d+)", # MD RAID
r"^(ada\d+)", # FreeBSD SATA
r"^(da\d+)", # FreeBSD SAS/SCSI
):
m = _re.match(pat, name)
if m:
out.add(m.group(1))
break
return out
async def fresh_pool_check_for_drive(devname: str) -> dict | None:
"""Live, on-demand re-detection of one drive's pool/mounted state.
Re-runs `zpool list -vHP`, `lsblk` (zfs_member), and `findmnt` over a
fresh SSH session and returns whichever entry matches `devname`,
falling back to None if the drive is genuinely free right now.
Closes the poll-window gap between an operator unlock and the next
cached state refresh used as a final gate inside burnin.start_job
so a drive that was imported into a pool after unlock but before the
next poll can't slip through.
Return shape: {"pool": str, "role": str} | None.
Returns None on SSH failure too caller should treat None
skeptically and only act on it if cached state ALSO says None.
"""
if not is_configured() or not devname:
return None
pm = await get_pool_membership()
if pm is None:
return None
if devname in pm:
return pm[devname]
zs = await get_zfs_member_drives()
if zs is not None and devname in zs:
return {"pool": "(exported)", "role": "exported"}
ms = await get_mounted_drives()
if ms is not None and devname in ms:
return {"pool": "(mounted)", "role": "mounted"}
return None
async def get_smart_health_map(devnames: list[str]) -> dict | None:
"""Return {devname: 'PASSED'|'FAILED'|'UNKNOWN'} for every devname.
Runs `smartctl -H` for each disk in a single SSH session much faster
than one connection per disk. Returns None on any SSH failure so the
poller can fall back to the previously-stored health value rather than
silently overwriting everything as 'UNKNOWN'.
`smartctl -H` is the cheap SMART self-assessment lookup (no full
attribute scan) milliseconds per drive. The output format is stable:
SMART overall-health self-assessment test result: PASSED
SMART overall-health self-assessment test result: FAILED!
For drives that don't support the command at all, smartctl exits
non-zero and we record UNKNOWN for that device specifically.
"""
if not is_configured() or not devnames:
return {} if devnames else None
# Build one shell pipeline that prefixes each result with "@@DEVNAME@@"
# so we can split the combined stdout deterministically.
parts = []
for d in devnames:
# Reject anything that doesn't look like a basic devname so we
# never inject shell metacharacters into the remote command.
if not d.replace("nvme", "").replace("n", "").replace("p", "").replace("sd", "").isalnum():
continue
parts.append(f"echo '@@{d}@@'; smartctl -H /dev/{d} 2>&1; echo '@@END@@'")
if not parts:
return {}
cmd = "; ".join(parts)
try:
async with await _connect() as conn:
r = await asyncio.wait_for(conn.run(cmd, check=False), timeout=30)
except Exception:
return None
if not r.stdout:
return None
return _parse_smart_health_batch(r.stdout)
def _parse_smart_health_batch(stdout: str) -> dict:
"""Pure parser for the batched smartctl -H output. Exposed for tests."""
result: dict[str, str] = {}
current: str | None = None
buf: list[str] = []
def _flush():
if current is None:
return
text = "\n".join(buf)
if "PASSED" in text:
result[current] = "PASSED"
elif "FAILED" in text or "FAILURE" in text:
result[current] = "FAILED"
else:
result[current] = "UNKNOWN"
for raw in stdout.splitlines():
line = raw.strip()
if line.startswith("@@") and line.endswith("@@"):
inner = line[2:-2]
if inner == "END":
_flush()
current = None
buf = []
else:
_flush()
current = inner
buf = []
else:
buf.append(line)
_flush()
return result
async def get_zfs_member_drives() -> set | None:
"""Return devnames of every drive whose partitions carry a ZFS label.
Combined with get_pool_membership(): a drive in this set but NOT in the
active-pool map carries ZFS data from a previously-imported pool that
was exported (or imported on a different system). We treat those as
locked too wiping them would silently destroy a pool.
Returns:
- set() when lsblk succeeded and no drives carry ZFS labels
- None on any failure. Same fail-closed semantics as
get_pool_membership() callers must NOT treat None as
"no exported drives," that's a security regression.
"""
if not is_configured():
return set()
cmd = "lsblk -no NAME,FSTYPE -l 2>/dev/null"
try:
async with await _connect() as conn:
r = await conn.run(cmd, check=False)
if r.returncode != 0:
return None
except Exception:
return None
if not r.stdout:
# lsblk with rc==0 and no output is impossible on a normal Linux
# host; treat as failure rather than "no drives at all."
return None
return _parse_lsblk_zfs_output(r.stdout)
async def get_system_sensors() -> dict:
"""
Run `sensors -j` on TrueNAS and extract system-level temperatures.
Returns {"cpu_c": int|None, "pch_c": int|None}.
cpu_c = CPU package temp (coretemp chip)
pch_c = PCH/chipset temp (pch_* chip) proxy for storage I/O lane thermals
Falls back gracefully if SSH is not configured or lm-sensors is unavailable.
"""
if not is_configured():
return {}
try:
async with await _connect() as conn:
result = await conn.run("sensors -j 2>/dev/null", check=False)
output = result.stdout.strip()
if not output:
return {}
return _parse_sensors_json(output)
except Exception as exc:
log.debug("get_system_sensors failed: %s", exc)
return {}
def _parse_sensors_json(output: str) -> dict:
import json as _json
try:
data = _json.loads(output)
except Exception:
return {}
cpu_c: int | None = None
pch_c: int | None = None
for chip_name, chip_data in data.items():
if not isinstance(chip_data, dict):
continue
# CPU package temp — coretemp chip, "Package id N" sensor
if chip_name.startswith("coretemp") and cpu_c is None:
for sensor_name, sensor_vals in chip_data.items():
if not isinstance(sensor_vals, dict):
continue
if "package" in sensor_name.lower():
for k, v in sensor_vals.items():
if k.endswith("_input") and isinstance(v, (int, float)):
cpu_c = int(round(v))
break
if cpu_c is not None:
break
# PCH / chipset temp — manages PCIe lanes including HBA / storage I/O
elif chip_name.startswith("pch_") and pch_c is None:
for sensor_name, sensor_vals in chip_data.items():
if not isinstance(sensor_vals, dict):
continue
for k, v in sensor_vals.items():
if k.endswith("_input") and isinstance(v, (int, float)):
pch_c = int(round(v))
break
if pch_c is not None:
break
return {"cpu_c": cpu_c, "pch_c": pch_c}
# ---------------------------------------------------------------------------
# Parsers
# ---------------------------------------------------------------------------
def _parse_smartctl(output: str) -> dict:
health = "UNKNOWN"
attributes: dict[int, dict] = {}
warnings: list[str] = []
failures: list[str] = []
m = re.search(r"self-assessment test result:\s+(\w+)", output, re.IGNORECASE)
if m:
health = m.group(1).upper()
# Attribute table: ID# NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
for line in output.splitlines():
am = re.match(
r"\s*(\d+)\s+(\S+)\s+\S+\s+\d+\s+\d+\s+\d+\s+\S+\s+\S+\s+\S+\s+(\d+)",
line,
)
if not am:
continue
attr_id = int(am.group(1))
attr_name = am.group(2)
raw_val = int(am.group(3))
attributes[attr_id] = {"name": attr_name, "raw": raw_val}
if attr_id in SMART_ATTRS:
_, is_critical = SMART_ATTRS[attr_id]
if raw_val > 0:
msg = f"{attr_name} = {raw_val}"
if is_critical:
failures.append(msg)
else:
warnings.append(msg)
return {
"health": health,
"raw_output": output,
"attributes": attributes,
"warnings": warnings,
"failures": failures,
}
def _parse_smart_progress(output: str) -> dict:
state = "unknown"
percent_remaining = None # None = "in progress but no % line parsed yet"
lower = output.lower()
if "self-test routine in progress" in lower or "self-test routine in progress" in output:
state = "running"
m = re.search(r"(\d+)%\s+of\s+test\s+remaining", output, re.IGNORECASE)
if m:
percent_remaining = int(m.group(1))
elif "completed without error" in lower:
state = "passed"
elif (
"completed: read failure" in lower
or "completed: write failure" in lower
or "aborted by host" in lower
or ("completed" in lower and "failure" in lower)
):
state = "failed"
elif "in progress" in lower:
state = "running"
return {
"state": state,
"percent_remaining": percent_remaining,
"output": output,
}

View file

@ -244,7 +244,7 @@ thead {
}
th {
padding: 9px 14px;
padding: 6px 8px;
font-size: 11px;
font-weight: 600;
text-transform: uppercase;
@ -256,9 +256,10 @@ th {
}
td {
padding: 10px 14px;
padding: 7px 8px;
border-bottom: 1px solid var(--border);
vertical-align: middle;
line-height: 1.3;
}
tr:last-child td {
@ -276,13 +277,15 @@ tr:hover td {
/* -----------------------------------------------------------------------
Column widths
----------------------------------------------------------------------- */
.col-drive { min-width: 180px; }
.col-serial { min-width: 110px; }
.col-size { min-width: 70px; text-align: right; }
.col-temp { min-width: 75px; text-align: right; }
.col-health { min-width: 85px; }
.col-smart { min-width: 150px; }
.col-actions { min-width: 170px; }
.col-drive { min-width: 160px; }
.col-serial { min-width: 95px; }
.col-size { min-width: 60px; text-align: right; }
.col-temp { min-width: 60px; text-align: right; }
.col-health { min-width: 70px; }
.col-smart { min-width: 80px; }
/* Tighter SMART columns — they hold short pills or a progress bar. */
th.col-smart, td.col-smart { padding-left: 5px; padding-right: 5px; }
.col-actions { min-width: 150px; }
/* -----------------------------------------------------------------------
Drive cell
@ -291,14 +294,23 @@ tr:hover td {
display: block;
font-weight: 500;
color: var(--text-strong);
font-size: 14px;
font-size: 13px;
line-height: 1.25;
}
.drive-model {
display: block;
font-size: 11px;
display: inline;
font-size: 10px;
color: var(--text-muted);
margin-top: 1px;
margin-top: 0;
line-height: 1.25;
}
/* Separator between model and location when both are present on the
same line. ::after on .drive-model puts a thin dot between them. */
.drive-model + .drive-location::before {
content: " · ";
color: var(--border);
margin: 0 2px;
}
/* -----------------------------------------------------------------------
@ -421,7 +433,7 @@ tr:hover td {
/* -----------------------------------------------------------------------
Burn-in column
----------------------------------------------------------------------- */
.col-burnin { min-width: 160px; }
.col-burnin { min-width: 130px; }
.burnin-cell { min-width: 140px; }
@ -1176,9 +1188,9 @@ a.stat-card:hover {
Checkbox column
----------------------------------------------------------------------- */
.col-check {
width: 36px;
min-width: 36px;
padding: 10px 8px 10px 14px;
width: 32px;
min-width: 32px;
padding: 7px 4px 7px 8px;
}
.drive-checkbox, #select-all-cb {
@ -1192,18 +1204,15 @@ a.stat-card:hover {
Drive location inline edit
----------------------------------------------------------------------- */
.drive-location {
display: block;
display: inline;
font-size: 10px;
color: var(--text-muted);
margin-top: 2px;
margin-top: 0;
cursor: pointer;
border-radius: 3px;
padding: 1px 3px;
padding: 0 3px;
line-height: 1.1;
transition: background 0.1s;
max-width: 160px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.drive-location:hover { background: var(--border); color: var(--text); }
@ -2422,6 +2431,237 @@ tr.drawer-row-active {
color: var(--yellow);
}
/* -----------------------------------------------------------------------
Settings: secret-status pills next to password/key labels
----------------------------------------------------------------------- */
.secret-status {
display: inline-block;
margin-left: 6px;
padding: 1px 6px;
font-size: 10.5px;
font-weight: 500;
letter-spacing: 0.04em;
text-transform: uppercase;
border-radius: 3px;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
}
.secret-status.secret-set {
background: color-mix(in srgb, var(--green, #39c179) 14%, transparent);
color: var(--green, #39c179);
border: 1px solid color-mix(in srgb, var(--green, #39c179) 35%, transparent);
}
.secret-status.secret-unset {
background: color-mix(in srgb, var(--text-muted) 14%, transparent);
color: var(--text-muted);
border: 1px solid color-mix(in srgb, var(--text-muted) 35%, transparent);
}
/* -----------------------------------------------------------------------
Login screen
----------------------------------------------------------------------- */
.login-body {
background: var(--bg);
color: var(--text);
display: flex;
align-items: center;
justify-content: center;
min-height: 100vh;
margin: 0;
}
.login-card {
width: min(420px, 92vw);
background: var(--bg-card, #161b22);
border: 1px solid var(--border);
border-radius: 10px;
padding: 28px 30px;
box-shadow: 0 8px 28px rgba(0, 0, 0, 0.35);
}
.login-header { margin-bottom: 18px; }
.login-title {
font-size: 20px;
font-weight: 700;
letter-spacing: -0.01em;
}
.login-sub {
margin-top: 2px;
color: var(--text-muted);
font-size: 13px;
text-transform: uppercase;
letter-spacing: 0.08em;
}
.login-blurb {
font-size: 13px;
color: var(--text-muted);
line-height: 1.5;
margin: 0 0 18px;
}
.login-error {
background: color-mix(in srgb, var(--red, #e25555) 16%, transparent);
border: 1px solid color-mix(in srgb, var(--red, #e25555) 50%, transparent);
color: var(--red, #e25555);
padding: 10px 12px;
border-radius: 6px;
font-size: 13px;
margin-bottom: 14px;
}
.login-form { display: flex; flex-direction: column; gap: 4px; }
.login-label {
font-size: 12px;
color: var(--text-muted);
margin-top: 8px;
margin-bottom: 4px;
text-transform: uppercase;
letter-spacing: 0.06em;
}
.login-optional { text-transform: none; opacity: 0.7; }
.login-input {
background: var(--bg);
color: var(--text);
border: 1px solid var(--border);
border-radius: 6px;
padding: 9px 12px;
font-size: 14px;
font-family: inherit;
transition: border-color .15s;
}
.login-input:focus {
border-color: var(--accent, #3b82f6);
outline: none;
}
.login-submit {
margin-top: 18px;
background: var(--accent, #3b82f6);
color: #fff;
border: none;
border-radius: 6px;
padding: 11px 14px;
font-size: 14px;
font-weight: 600;
cursor: pointer;
transition: opacity .15s;
}
.login-submit:hover { opacity: 0.9; }
.login-footer {
margin-top: 22px;
padding-top: 16px;
border-top: 1px solid var(--border);
font-size: 11.5px;
color: var(--text-muted);
line-height: 1.55;
}
.login-code {
display: inline-block;
margin-top: 4px;
padding: 2px 6px;
background: var(--bg);
border: 1px solid var(--border);
border-radius: 4px;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
font-size: 11px;
}
.header-user {
color: var(--text-muted);
font-size: 12px;
margin-left: 8px;
padding-left: 12px;
border-left: 1px solid var(--border);
}
.header-logout { font-size: 12px; }
/* -----------------------------------------------------------------------
Pool-membership lock indicators
----------------------------------------------------------------------- */
.pool-lock-icon {
display: inline-block;
margin-right: 4px;
font-size: 12px;
color: var(--yellow);
vertical-align: baseline;
}
.pool-lock-icon.pool-lock-boot {
color: var(--red, #e25555);
}
.pool-pill {
display: inline-block;
margin-top: 3px;
padding: 1px 7px;
font-size: 10.5px;
font-weight: 600;
letter-spacing: 0.3px;
text-transform: uppercase;
border-radius: 4px;
background: color-mix(in srgb, var(--yellow) 14%, transparent);
color: var(--yellow);
border: 1px solid color-mix(in srgb, var(--yellow) 35%, transparent);
}
.pool-pill.pool-pill-boot {
background: color-mix(in srgb, var(--red, #e25555) 16%, transparent);
color: var(--red, #e25555);
border-color: color-mix(in srgb, var(--red, #e25555) 45%, transparent);
}
.pool-pill.pool-pill-exported {
background: color-mix(in srgb, #e07a3f 16%, transparent);
color: #e07a3f;
border-color: color-mix(in srgb, #e07a3f 45%, transparent);
}
.pool-lock-icon.pool-lock-exported {
color: #e07a3f;
}
.pool-pill.pool-pill-mounted {
background: color-mix(in srgb, #c477e0 16%, transparent);
color: #c477e0;
border-color: color-mix(in srgb, #c477e0 45%, transparent);
}
.pool-lock-icon.pool-lock-mounted {
color: #c477e0;
}
.btn-unlock-mounted {
border-color: color-mix(in srgb, #c477e0 55%, transparent);
color: #c477e0;
}
.btn-unlock-mounted:hover {
background: color-mix(in srgb, #c477e0 14%, transparent);
}
.btn-unlock {
background: transparent;
border: 1px solid color-mix(in srgb, var(--yellow) 50%, transparent);
color: var(--yellow);
border-radius: 5px;
padding: 3px 9px;
font-size: 12px;
cursor: pointer;
transition: background .15s, color .15s, border-color .15s;
}
.btn-unlock:hover {
background: color-mix(in srgb, var(--yellow) 14%, transparent);
}
.btn-unlock-boot {
border-color: color-mix(in srgb, var(--red, #e25555) 55%, transparent);
color: var(--red, #e25555);
}
.btn-unlock-boot:hover {
background: color-mix(in srgb, var(--red, #e25555) 14%, transparent);
}
.btn-unlock-exported {
border-color: color-mix(in srgb, #e07a3f 55%, transparent);
color: #e07a3f;
}
.btn-unlock-exported:hover {
background: color-mix(in srgb, #e07a3f 14%, transparent);
}
.unlock-countdown {
margin-left: 4px;
font-size: 11px;
color: var(--green, #39c179);
font-variant-numeric: tabular-nums;
}
.unlock-countdown-expired {
color: var(--yellow);
}
.modal.modal-danger {
border-top: 3px solid var(--red, #e25555);
}
/* -----------------------------------------------------------------------
Parallel burn-in inline warning
----------------------------------------------------------------------- */
@ -2459,41 +2699,276 @@ tr.drawer-row-active {
font-variant-numeric: tabular-nums;
}
/* -----------------------------------------------------------------------
Live Terminal drawer panel (xterm.js)
----------------------------------------------------------------------- */
.drawer-panel-terminal {
padding: 0 !important;
overflow: hidden !important;
position: relative;
background: #0d1117;
}
/* Let xterm fill the full panel height */
.drawer-panel-terminal .xterm {
/* -----------------------------------------------------------------------
Per-pattern badblocks meters in the drive drawer (1.0.0-44).
Four meters, one per pattern (0xaa / 0x55 / 0xff / 0x00). Each meter
has two halves: write (left) and verify (right), so a glance shows
both which pattern is running and which sub-phase within it.
----------------------------------------------------------------------- */
.bb-meters {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 8px;
padding: 10px 12px;
background: var(--bg-soft, #161b22);
border-radius: 6px;
margin: 6px 0 8px 0;
}
.bb-meter {
display: flex;
flex-direction: column;
gap: 4px;
}
.bb-meter-label {
font-family: "SF Mono", "Consolas", monospace;
font-size: 10px;
color: var(--text-muted);
text-transform: uppercase;
letter-spacing: .04em;
}
.bb-meter-current .bb-meter-label {
color: var(--blue, #58a6ff);
font-weight: 600;
}
.bb-meter-done .bb-meter-label {
color: var(--green, #3fb950);
}
.bb-meter-bar {
display: flex;
height: 10px;
background: var(--bg, #0d1117);
border: 1px solid var(--border, #30363d);
border-radius: 3px;
overflow: hidden;
position: relative;
}
.bb-meter-half {
height: 100%;
transition: width .3s ease;
}
.bb-write {
background: var(--blue, #58a6ff);
flex: 0 0 auto;
max-width: 50%;
}
.bb-verify {
background: var(--green, #3fb950);
flex: 0 0 auto;
max-width: 50%;
}
.bb-meter-half-spacer {
flex: 0 0 auto;
width: 1px;
background: var(--border, #30363d);
height: 100%;
}
.drawer-panel-terminal .xterm-viewport {
overflow-y: auto !important;
.bb-meter-done .bb-write,
.bb-meter-done .bb-verify {
opacity: .55;
}
/* Reconnect bar — floats over the terminal when disconnected */
.term-reconnect-bar {
position: absolute;
bottom: 12px;
right: 12px;
z-index: 20;
.bb-meter-sub {
display: flex;
align-items: center;
gap: 8px;
background: rgba(13,17,23,0.85);
border: 1px solid var(--border);
border-radius: 6px;
padding: 6px 10px;
font-size: 12px;
justify-content: space-between;
font-family: "SF Mono", "Consolas", monospace;
font-size: 9px;
color: var(--text-muted);
}
.term-reconnect-bar .btn-secondary {
padding: 3px 10px;
font-size: 11px;
.bb-sub-write { color: color-mix(in srgb, var(--blue) 80%, var(--text-muted)); }
.bb-sub-verify { color: color-mix(in srgb, var(--green) 80%, var(--text-muted)); }
/* -----------------------------------------------------------------------
Surface-scan vital-signs row in the drawer (1.0.0-46).
Sits directly above the per-pattern meters. Temperature with
green/yellow/red colour, live MB/s, elapsed, ETA all derived
from data already in the drawer payload.
----------------------------------------------------------------------- */
.bb-vitals {
display: flex;
gap: 14px;
flex-wrap: wrap;
padding: 8px 12px 4px 12px;
background: var(--bg-soft, #161b22);
border-radius: 6px 6px 0 0;
margin: 6px 0 0 0;
border-bottom: 1px solid var(--border, #30363d);
}
/* When vitals lead, suppress the meter strip's top radius + margin so
they read as one stacked unit. */
.bb-vitals + .bb-meters {
border-radius: 0 0 6px 6px;
margin-top: 0;
}
.bb-vital {
display: flex;
flex-direction: column;
gap: 1px;
font-family: "SF Mono", "Consolas", monospace;
}
.bb-vital-label {
font-size: 9px;
color: var(--text-muted);
text-transform: uppercase;
letter-spacing: .04em;
}
.bb-vital-value {
font-size: 13px;
color: var(--text-strong, #f0f6fc);
font-weight: 500;
font-variant-numeric: tabular-nums;
}
/* -----------------------------------------------------------------------
Phase caption + per-pattern history (1.0.0-47).
----------------------------------------------------------------------- */
.bb-caption {
font-family: "SF Mono", "Consolas", monospace;
font-size: 11px;
color: var(--text-muted);
padding: 6px 12px 0 12px;
letter-spacing: .02em;
}
.bb-history {
display: flex;
flex-wrap: wrap;
align-items: center;
gap: 10px;
padding: 6px 12px 8px 12px;
font-family: "SF Mono", "Consolas", monospace;
font-size: 10px;
color: var(--text-muted);
}
.bb-hist-title {
text-transform: uppercase;
letter-spacing: .04em;
font-size: 9px;
margin-right: 4px;
}
.bb-hist-row {
display: inline-flex;
align-items: baseline;
gap: 4px;
background: var(--bg, #0d1117);
border: 1px solid var(--border, #30363d);
border-radius: 3px;
padding: 1px 6px;
}
.bb-hist-label {
color: var(--green, #3fb950);
font-weight: 600;
}
.bb-hist-dur {
color: var(--text-strong, #f0f6fc);
font-variant-numeric: tabular-nums;
}
/* Bad-block counter colour states inside the vitals row */
.bb-vital-good { color: var(--green, #3fb950); }
.bb-vital-bad { color: var(--red, #f85149); }
/* -----------------------------------------------------------------------
Column sort (1.0.0-48). Click a sortable TH to cycle asc desc
cleared. Indicator arrow appears next to the column label.
----------------------------------------------------------------------- */
th.sortable {
cursor: pointer;
user-select: none;
position: relative;
}
th.sortable:hover { color: var(--text); }
th.sortable::after {
content: "";
display: inline-block;
width: 0;
height: 0;
margin-left: 4px;
border-left: 4px solid transparent;
border-right: 4px solid transparent;
vertical-align: middle;
opacity: 0;
}
th.sortable:hover::after { opacity: 0.4; border-bottom: 5px solid currentColor; }
th.sort-asc::after {
opacity: 1;
border-bottom: 5px solid var(--blue, #58a6ff);
}
th.sort-desc::after {
opacity: 1;
border-top: 5px solid var(--blue, #58a6ff);
}
/* -----------------------------------------------------------------------
Stage "Reason" block explains why a stage ended in a terminal
state. Replaces the old single-line stage-error-line for
failed/cancelled/unknown stages so the operator gets a clear,
prominent explanation at the top.
----------------------------------------------------------------------- */
.stage-reason {
display: flex;
gap: 10px;
align-items: baseline;
padding: 8px 12px;
margin: 6px 0;
border-radius: 5px;
font-size: 12px;
border: 1px solid;
}
.stage-reason-failed {
background: var(--red-bg, color-mix(in srgb, var(--red) 12%, transparent));
border-color: var(--red-bd, color-mix(in srgb, var(--red) 40%, transparent));
}
.stage-reason-cancelled,
.stage-reason-unknown {
background: var(--yellow-bg, color-mix(in srgb, var(--yellow) 12%, transparent));
border-color: var(--yellow-bd, color-mix(in srgb, var(--yellow) 40%, transparent));
}
.stage-reason-label {
font-size: 10px;
text-transform: uppercase;
letter-spacing: .06em;
font-weight: 600;
color: var(--text-muted);
flex-shrink: 0;
}
.stage-reason-text {
flex: 1;
color: var(--text-strong, #f0f6fc);
line-height: 1.4;
word-wrap: break-word;
}
.stage-reason-failed .stage-reason-text { color: var(--red, #f85149); }
.stage-reason-cancelled .stage-reason-text,
.stage-reason-unknown .stage-reason-text { color: var(--yellow, #d29922); }
/* -----------------------------------------------------------------------
Drawer job-level estimated completion (right-aligned in the header,
so it doesn't compete with the state chip + operator info).
----------------------------------------------------------------------- */
.drawer-job-header {
display: flex;
align-items: center;
gap: 10px;
flex-wrap: wrap;
}
.drawer-job-finish {
display: inline-flex;
align-items: baseline;
gap: 8px;
padding: 4px 10px;
background: var(--bg-soft, #161b22);
border: 1px solid var(--border, #30363d);
border-radius: 5px;
font-family: "SF Mono", "Consolas", monospace;
}
.drawer-job-finish-label {
font-size: 9px;
color: var(--text-muted);
text-transform: uppercase;
letter-spacing: .04em;
}
.drawer-job-finish-value {
font-size: 12px;
color: var(--text-strong, #f0f6fc);
font-weight: 500;
font-variant-numeric: tabular-nums;
}

View file

@ -1,6 +1,15 @@
(function () {
'use strict';
// Default operator name — prefer the logged-in user (rendered into a
// <meta> by layout.html), fall back to the localStorage memory of the
// last-typed value, and empty string as last resort.
function defaultOperator() {
var meta = document.querySelector('meta[name="default-operator"]');
if (meta && meta.content) return meta.content;
return localStorage.getItem('burnin_operator') || '';
}
// -----------------------------------------------------------------------
// Filter bar + stats bar
// -----------------------------------------------------------------------
@ -68,13 +77,88 @@
applyFilter(activeFilter);
restoreCheckboxes();
initElapsedTimers();
initUnlockCountdowns();
initLocationEdits();
applySort(); // SSE swap replaces #drives-tbody — re-apply persisted sort
paintSortIndicators();
if (_drawerDriveId) {
_drawerHighlightRow(_drawerDriveId);
drawerFetch(_drawerDriveId);
}
});
// ---------------------------------------------------------------
// Column sorting (client-side, persisted in localStorage so it
// survives reload AND survives every SSE-driven tbody refresh).
// ---------------------------------------------------------------
var SORT_KEY = 'nasburnin.sort';
function getSort() {
try {
var raw = localStorage.getItem(SORT_KEY);
if (!raw) return null;
var p = JSON.parse(raw);
if (p && p.col && (p.dir === 'asc' || p.dir === 'desc')) return p;
} catch (e) {}
return null;
}
function setSort(col, dir) {
if (!col) localStorage.removeItem(SORT_KEY);
else localStorage.setItem(SORT_KEY, JSON.stringify({col: col, dir: dir}));
}
function applySort() {
var s = getSort();
var tbody = document.getElementById('drives-tbody');
if (!tbody || !s) return;
var rows = Array.from(tbody.querySelectorAll('tr[id^="drive-"]'));
if (!rows.length) return;
var attr = 'data-sort-' + s.col;
var dirMul = s.dir === 'asc' ? 1 : -1;
rows.sort(function (a, b) {
var av = a.getAttribute(attr);
var bv = b.getAttribute(attr);
// Empty values always sink to the bottom regardless of direction.
var aEmpty = av === null || av === '';
var bEmpty = bv === null || bv === '';
if (aEmpty && !bEmpty) return 1;
if (!aEmpty && bEmpty) return -1;
if (aEmpty && bEmpty) return 0;
// Numeric comparison if both parse cleanly, else string.
var an = parseFloat(av), bn = parseFloat(bv);
if (!isNaN(an) && !isNaN(bn) && String(an) === av && String(bn) === bv) {
return (an - bn) * dirMul;
}
return av.localeCompare(bv) * dirMul;
});
rows.forEach(function (r) { tbody.appendChild(r); });
}
function paintSortIndicators() {
var s = getSort();
document.querySelectorAll('th.sortable').forEach(function (th) {
th.classList.remove('sort-asc', 'sort-desc');
if (s && th.dataset.sortKey === s.col) {
th.classList.add(s.dir === 'asc' ? 'sort-asc' : 'sort-desc');
}
});
}
document.addEventListener('click', function (e) {
var th = e.target.closest('th.sortable');
if (!th) return;
var col = th.dataset.sortKey;
var s = getSort();
var dir = 'asc';
if (s && s.col === col) {
// Click cycle: asc → desc → cleared
if (s.dir === 'asc') dir = 'desc';
else { setSort(null); applySort(); paintSortIndicators(); return; }
}
setSort(col, dir);
applySort();
paintSortIndicators();
});
// Initial paint on page load (HTML is already rendered server-side).
applySort();
paintSortIndicators();
updateCounts();
// -----------------------------------------------------------------------
@ -124,7 +208,7 @@
updateNotifBtn();
if (perm === 'granted') {
showToast('Browser notifications enabled', 'success');
new Notification('TrueNAS Burn-In', {
new Notification('NAS Burn-In', {
body: 'You will be notified when burn-in jobs complete.',
});
}
@ -248,6 +332,41 @@
initElapsedTimers();
// Live countdown for pool-drive unlock TTL — runs once per second; ticker
// self-stops when no .unlock-countdown spans remain on the page.
var _unlockTickInterval = null;
function tickUnlockCountdowns() {
var spans = document.querySelectorAll('.unlock-countdown[data-expires]');
if (spans.length === 0) {
if (_unlockTickInterval) {
clearInterval(_unlockTickInterval);
_unlockTickInterval = null;
}
return;
}
var nowSec = Date.now() / 1000;
spans.forEach(function (el) {
var exp = parseFloat(el.dataset.expires);
if (!exp || isNaN(exp)) return;
var rem = Math.max(0, exp - nowSec);
if (rem <= 0) {
el.textContent = 'expired';
el.className = 'unlock-countdown unlock-countdown-expired';
return;
}
var m = Math.floor(rem / 60);
var s = Math.floor(rem % 60);
el.textContent = '\u{1F513} ' + m + ':' + (s < 10 ? '0' : '') + s;
});
}
function initUnlockCountdowns() {
if (_unlockTickInterval) return;
if (document.querySelectorAll('.unlock-countdown[data-expires]').length === 0) return;
_unlockTickInterval = setInterval(tickUnlockCountdowns, 1000);
tickUnlockCountdowns();
}
initUnlockCountdowns();
// -----------------------------------------------------------------------
// Inline location / notes edit
// -----------------------------------------------------------------------
@ -381,7 +500,7 @@
async function startSmartTest(btn) {
var driveId = btn.dataset.driveId;
var testType = btn.dataset.testType;
var operator = localStorage.getItem('burnin_operator') || 'unknown';
var operator = defaultOperator() || 'unknown';
btn.disabled = true;
try {
@ -447,7 +566,7 @@
return;
}
if (!confirm('Cancel ALL ' + cancelBtns.length + ' active burn-in job(s)? This cannot be undone.')) return;
var operator = localStorage.getItem('burnin_operator') || 'unknown';
var operator = defaultOperator() || 'unknown';
var count = 0;
for (var i = 0; i < cancelBtns.length; i++) {
var jobId = cancelBtns[i].dataset.jobId;
@ -521,7 +640,7 @@
document.getElementById('confirm-serial').value = '';
document.getElementById('confirm-hint').textContent = 'Expected: ' + modalSerial;
var savedOp = localStorage.getItem('burnin_operator') || '';
var savedOp = defaultOperator();
document.getElementById('operator-input').value = savedOp;
// Init drag on first open (list is in static DOM)
@ -583,7 +702,16 @@
var data = await resp.json();
if (!resp.ok) {
showToast(data.detail || 'Failed to start burn-in', 'error');
// detail may be the structured pool-locked object {drive_id,
// pool_name, pool_role, pool_locked: true, error: "..."}.
// The user already opened the start modal, so the unlock TTL must
// have just expired between modal-open and submit. Auto-flip to
// the unlock modal for that drive.
if (_handlePoolLockedError(data.detail)) {
closeModal();
return;
}
showToast(_extractErrorMessage(data.detail) || 'Failed to start burn-in', 'error');
return;
}
@ -594,6 +722,229 @@
}
}
// Helpers shared between single-drive and batch start error paths.
// Backend returns either a string (legacy errors) or, for pool-locked
// drives, an object: {drive_id, error, pool_name, pool_role, pool_locked}.
function _extractErrorMessage(detail) {
if (!detail) return null;
if (typeof detail === 'string') return detail;
if (typeof detail === 'object' && detail.error) return detail.error;
return null;
}
// Returns true if it handled a pool-locked error by opening the unlock
// modal for the offending drive. Caller should bail out.
function _handlePoolLockedError(detail) {
if (!detail || typeof detail !== 'object' || !detail.pool_locked) return false;
var driveId = detail.drive_id;
if (driveId == null) return false;
var btn = document.querySelector('.btn-unlock[data-drive-id="' + driveId + '"]');
if (btn) {
// openUnlockModal closes any other open modals as a side effect of
// calling its own close handlers; we still need to close the
// start/batch modal explicitly in the caller, since openUnlockModal
// doesn't know which one is open.
openUnlockModal(btn);
return true;
}
// Unlock button not in the DOM (drive row may have refreshed).
// Surface a descriptive toast instead of [object Object].
showToast(
(detail.error || 'Drive is pool-locked') +
' Reload the page and click Unlock on the drive row.',
'error',
);
return true;
}
// -----------------------------------------------------------------------
// Change-password modal
// -----------------------------------------------------------------------
function openPasswordModal() {
var m = document.getElementById('password-modal');
if (!m) return;
document.getElementById('pw-current').value = '';
document.getElementById('pw-new').value = '';
document.getElementById('pw-confirm').value = '';
document.getElementById('pw-hint').textContent = '';
document.getElementById('password-modal-submit-btn').disabled = true;
m.removeAttribute('hidden');
setTimeout(function () { document.getElementById('pw-current').focus(); }, 50);
}
function closePasswordModal() {
var m = document.getElementById('password-modal');
if (m) m.setAttribute('hidden', '');
}
function validatePasswordModal() {
var cur = document.getElementById('pw-current').value;
var nw = document.getElementById('pw-new').value;
var cf = document.getElementById('pw-confirm').value;
var hint = document.getElementById('pw-hint');
var ok = cur.length > 0 && nw.length >= 8 && nw === cf;
if (nw.length > 0 && nw.length < 8) hint.textContent = 'Min 8 characters.';
else if (nw.length >= 8 && cf.length > 0 && nw !== cf) hint.textContent = "Passwords don't match.";
else hint.textContent = '';
document.getElementById('password-modal-submit-btn').disabled = !ok;
}
async function submitPasswordChange() {
var btn = document.getElementById('password-modal-submit-btn');
btn.disabled = true;
var fd = new FormData();
fd.append('current_password', document.getElementById('pw-current').value);
fd.append('new_password', document.getElementById('pw-new').value);
fd.append('confirm_password', document.getElementById('pw-confirm').value);
try {
var resp = await fetch('/api/v1/auth/change-password', {
method: 'POST',
body: fd,
});
var data = await resp.json().catch(function () { return {}; });
if (!resp.ok) {
showToast(data.detail || 'Password change failed', 'error');
btn.disabled = false;
return;
}
closePasswordModal();
showToast('Password updated.', 'success');
} catch (err) {
showToast('Network error', 'error');
btn.disabled = false;
}
}
// -----------------------------------------------------------------------
// Pool-drive Unlock modal
// -----------------------------------------------------------------------
var unlockDriveId = null;
var unlockExpectedToken = null;
function openUnlockModal(btn) {
unlockDriveId = btn.dataset.driveId;
var poolName = btn.dataset.poolName || '';
var poolRole = btn.dataset.poolRole || 'data';
var isBoot = btn.dataset.isBootPool === '1';
var isExported = btn.dataset.isExported === '1';
var isMounted = btn.dataset.isMounted === '1';
if (isBoot) unlockExpectedToken = 'DESTROY BOOT POOL';
else if (isExported) unlockExpectedToken = 'DESTROY EXPORTED POOL';
else if (isMounted) unlockExpectedToken = 'DESTROY MOUNTED FILESYSTEM';
else unlockExpectedToken = poolName;
document.getElementById('unlock-devname').textContent = btn.dataset.devname || '—';
document.getElementById('unlock-model').textContent = btn.dataset.model || '—';
document.getElementById('unlock-serial').textContent = btn.dataset.serial || '—';
document.getElementById('unlock-size').textContent = btn.dataset.size || '—';
var chip = document.getElementById('unlock-pool-chip');
if (isExported) {
chip.textContent = 'exported ZFS';
chip.className = 'chip chip-aborted';
} else if (isMounted) {
chip.textContent = 'mounted FS';
chip.className = 'chip chip-aborted';
} else {
chip.textContent = poolName + ' · ' + poolRole;
chip.className = 'chip ' + (isBoot ? 'chip-failed' : 'chip-aborted');
}
var titleEl = document.getElementById('unlock-modal-title');
var warnTitle = document.getElementById('unlock-warning-title');
var warnBody = document.getElementById('unlock-warning-body');
if (isBoot) {
titleEl.textContent = 'Unlock BOOT POOL drive';
warnTitle.textContent = 'This is a TrueNAS BOOT drive.';
warnBody.textContent =
'Running burn-in on this drive will destroy the operating system on it. ' +
'If this drive is half of a mirrored boot pool, the system will continue running on the other mirror, ' +
'but you must already have a replacement plan. Proceeding without one bricks the host.';
} else if (isExported) {
titleEl.textContent = 'Unlock drive with EXPORTED ZFS data';
warnTitle.textContent = 'This drive carries ZFS data from a previously-imported pool.';
warnBody.textContent =
"TrueNAS isn't using this pool right now, but the drive still holds the labels and data. " +
'Burning it in will silently destroy whatever pool that data belongs to — including ' +
'pools that another system may be relying on. Confirm you have already evacuated or ' +
'reassigned the pool before continuing.';
} else if (isMounted) {
titleEl.textContent = 'Unlock drive with MOUNTED filesystem';
warnTitle.textContent = 'This drive has a non-ZFS filesystem currently mounted.';
warnBody.textContent =
'findmnt reports a partition on this drive is mounted right now. Burning it in will ' +
'destroy whatever data is on that filesystem and almost certainly leave the mount ' +
'point in a broken state. Unmount it first, or confirm you really mean to wipe it.';
} else {
titleEl.textContent = 'Unlock pool drive';
warnTitle.textContent = "This drive belongs to zpool '" + poolName + "'.";
warnBody.textContent =
'Running a destructive burn-in stage will overwrite all data on this drive ' +
'and almost certainly destroy the pool. Only proceed if you have already ' +
'removed this drive from the pool, or if you are intentionally decommissioning the pool.';
}
document.getElementById('unlock-confirm-token').textContent = unlockExpectedToken;
document.getElementById('unlock-confirm-hint').textContent = 'Expected: ' + unlockExpectedToken;
document.getElementById('unlock-confirm-input').value = '';
document.getElementById('unlock-reason-input').value = '';
var savedOp = defaultOperator();
document.getElementById('unlock-operator-input').value = savedOp;
validateUnlockModal();
document.getElementById('unlock-modal').removeAttribute('hidden');
setTimeout(function () {
document.getElementById('unlock-operator-input').focus();
}, 50);
}
function closeUnlockModal() {
document.getElementById('unlock-modal').setAttribute('hidden', '');
unlockDriveId = null;
unlockExpectedToken = null;
}
function validateUnlockModal() {
var op = (document.getElementById('unlock-operator-input').value || '').trim();
var rsn = (document.getElementById('unlock-reason-input').value || '').trim();
var tok = (document.getElementById('unlock-confirm-input').value || '').trim();
var ok = op.length > 0 && rsn.length >= 5 && tok === unlockExpectedToken;
document.getElementById('unlock-modal-submit-btn').disabled = !ok;
}
async function submitUnlock() {
var op = (document.getElementById('unlock-operator-input').value || '').trim();
var rsn = (document.getElementById('unlock-reason-input').value || '').trim();
var tok = (document.getElementById('unlock-confirm-input').value || '').trim();
localStorage.setItem('burnin_operator', op);
var btn = document.getElementById('unlock-modal-submit-btn');
btn.disabled = true;
try {
var resp = await fetch('/api/v1/drives/' + unlockDriveId + '/unlock', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
confirm_token: tok,
operator: op,
reason: rsn,
}),
});
var data = await resp.json();
if (!resp.ok) {
showToast(data.detail || 'Unlock failed', 'error');
btn.disabled = false;
return;
}
closeUnlockModal();
showToast('Unlocked for 10 minutes — start burn-in now to use it.', 'success');
// Force a drive list refresh so the row flips from Unlock → Burn-In
if (typeof refreshDrives === 'function') refreshDrives();
} catch (err) {
showToast('Network error', 'error');
btn.disabled = false;
}
}
// -----------------------------------------------------------------------
// Batch Burn-In
// -----------------------------------------------------------------------
@ -686,7 +1037,7 @@
function openBatchModal() {
var modal = document.getElementById('batch-modal');
if (!modal) return;
var savedOp = localStorage.getItem('burnin_operator') || '';
var savedOp = defaultOperator();
document.getElementById('batch-operator-input').value = savedOp;
document.getElementById('batch-confirm-cb').checked = false;
// Reset stages to all-on (keep user's drag order)
@ -774,7 +1125,11 @@
});
var data = await resp.json();
if (!resp.ok) {
showToast(data.detail || 'Failed to queue batch', 'error');
if (_handlePoolLockedError(data.detail)) {
closeBatchModal();
return;
}
showToast(_extractErrorMessage(data.detail) || 'Failed to queue batch', 'error');
if (btn) btn.disabled = false;
return;
}
@ -782,11 +1137,18 @@
closeBatchModal();
checkedDriveIds.clear();
updateBatchBar();
var queued = (data.queued || []).length;
var errors = (data.errors || []).length;
var msg = queued + ' burn-in(s) queued';
if (errors) msg += ', ' + errors + ' skipped (already active)';
showToast(msg, errors && !queued ? 'error' : 'success');
var queued = (data.queued || []).length;
var allErrors = data.errors || [];
var poolLocked = allErrors.filter(function (e) { return e && e.pool_locked; });
var alreadyActive = allErrors.length - poolLocked.length;
var parts = [queued + ' burn-in(s) queued'];
if (alreadyActive) parts.push(alreadyActive + ' skipped (already active)');
if (poolLocked.length) {
parts.push(poolLocked.length + ' pool-locked (use Unlock on each row)');
}
var tone = (queued === 0 && allErrors.length) ? 'error' : 'success';
showToast(parts.join(', '), tone);
} catch (err) {
showToast('Network error', 'error');
if (btn) btn.disabled = false;
@ -799,7 +1161,7 @@
async function cancelBurnin(btn) {
var jobId = btn.dataset.jobId;
var operator = localStorage.getItem('burnin_operator') || 'unknown';
var operator = defaultOperator() || 'unknown';
if (!confirm('Cancel this burn-in job? This cannot be undone.')) return;
@ -837,6 +1199,24 @@
var cancelSmartBtn = e.target.closest('.btn-cancel-smart');
if (cancelSmartBtn && !cancelSmartBtn.disabled) { cancelSmartTest(cancelSmartBtn); return; }
// Change password header link
if (e.target.id === 'open-password-modal' || e.target.closest('#open-password-modal')) {
e.preventDefault();
openPasswordModal();
return;
}
if (e.target.closest('#password-modal-close-btn') ||
e.target.closest('#password-modal-cancel-btn')) {
closePasswordModal();
return;
}
if (e.target.id === 'password-modal') { closePasswordModal(); return; }
if (e.target.id === 'password-modal-submit-btn') { submitPasswordChange(); return; }
// Pool-drive unlock button (single drive)
var unlockBtn = e.target.closest('.btn-unlock');
if (unlockBtn && !unlockBtn.disabled) { openUnlockModal(unlockBtn); return; }
// Burn-in start button (single drive)
var startBtn = e.target.closest('.btn-start');
if (startBtn && !startBtn.disabled) { openModal(startBtn); return; }
@ -865,6 +1245,14 @@
return;
}
// Unlock modal
if (e.target.closest('#unlock-modal-close-btn') || e.target.closest('#unlock-modal-cancel-btn')) {
closeUnlockModal();
return;
}
if (e.target.id === 'unlock-modal') { closeUnlockModal(); return; }
if (e.target.id === 'unlock-modal-submit-btn') { submitUnlock(); return; }
// Batch modal close
if (e.target.closest('#batch-modal-close-btn') || e.target.closest('#batch-modal-cancel-btn')) {
closeBatchModal();
@ -882,11 +1270,18 @@
document.addEventListener('input', function (e) {
var id = e.target.id;
if (id === 'pw-current' || id === 'pw-new' || id === 'pw-confirm') validatePasswordModal();
if (id === 'unlock-operator-input' || id === 'unlock-reason-input' ||
id === 'unlock-confirm-input') validateUnlockModal();
if (id === 'operator-input' || id === 'confirm-serial') validateModal();
});
document.addEventListener('keydown', function (e) {
if (e.key === 'Escape') {
var pwModal = document.getElementById('password-modal');
if (pwModal && !pwModal.hidden) { closePasswordModal(); return; }
var uModal = document.getElementById('unlock-modal');
if (uModal && !uModal.hidden) { closeUnlockModal(); return; }
var modal = document.getElementById('start-modal');
if (modal && !modal.hidden) { closeModal(); return; }
var bModal = document.getElementById('batch-modal');
@ -950,8 +1345,14 @@
}
}
// Stash the last drive object so the burn-in panel renderer can
// pull temperature_c into the vital-signs row without having to
// pass it through the Burn-In renderer's signature.
var _DRAWER_LAST_DRIVE = null;
function _drawerRender(data) {
var drive = data.drive || {};
_DRAWER_LAST_DRIVE = drive;
var devnameEl = document.getElementById('drawer-devname');
var metaEl = document.getElementById('drawer-drive-meta');
if (devnameEl) devnameEl.textContent = drive.devname || '\u2014';
@ -965,6 +1366,170 @@
_drawerRenderEvents(data.events);
}
// Vital-signs row above the meters: drive temp, live throughput,
// elapsed time, ETA. Computed from data already in the drawer payload.
function _drawerRenderBadblocksVitals(stage, drive) {
var phase = parseInt(stage.bb_phase, 10) || 1;
var phasePct = parseFloat(stage.bb_phase_pct || 0);
var overallPct = ((phase - 1) * 100 + phasePct) / 8; // 0..100
var html = '<div class="bb-vitals">';
var dateOpts = {
weekday: 'short', month: 'short', day: 'numeric',
hour: 'numeric', minute: '2-digit',
};
// Start (wall-clock, with date)
if (stage.started_at) {
var startMs = Date.parse(stage.started_at);
var startStr = new Date(startMs).toLocaleString(undefined, dateOpts);
html += '<div class="bb-vital">';
html += '<span class="bb-vital-label">Start</span>';
html += '<span class="bb-vital-value">' + startStr + '</span>';
html += '</div>';
// Elapsed
var elapsedSec = Math.max(0, (Date.now() - startMs) / 1000);
html += '<div class="bb-vital">';
html += '<span class="bb-vital-label">Elapsed</span>';
html += '<span class="bb-vital-value">' + _bbFmtDuration(elapsedSec) + '</span>';
html += '</div>';
// ETA + Finish — only once we have measurable progress, so the
// first samples don't paint a "47 days" estimate.
if (overallPct >= 0.5) {
var totalSec = elapsedSec * (100 / overallPct);
var remainingSec = Math.max(0, totalSec - elapsedSec);
html += '<div class="bb-vital">';
html += '<span class="bb-vital-label">ETA</span>';
html += '<span class="bb-vital-value">' + _bbFmtDuration(remainingSec) + '</span>';
html += '</div>';
var finishStr = new Date(Date.now() + remainingSec * 1000)
.toLocaleString(undefined, dateOpts);
html += '<div class="bb-vital">';
html += '<span class="bb-vital-label">Finish</span>';
html += '<span class="bb-vital-value">' + finishStr + '</span>';
html += '</div>';
}
}
// Temp with hot/warm/cool colour
if (drive && typeof drive.temperature_c === 'number') {
var tc = drive.temperature_c;
var tClass = 'temp-cool';
if (tc >= 48) tClass = 'temp-hot';
else if (tc >= 42) tClass = 'temp-warm';
html += '<div class="bb-vital">';
html += '<span class="bb-vital-label">Temp</span>';
html += '<span class="bb-vital-value temp ' + tClass + '">' + tc + '°C</span>';
html += '</div>';
}
html += '</div>';
return html;
}
function _bbFmtDuration(sec) {
sec = Math.floor(sec);
var d = Math.floor(sec / 86400);
var h = Math.floor((sec % 86400) / 3600);
var m = Math.floor((sec % 3600) / 60);
if (d > 0) return d + 'd ' + h + 'h';
if (h > 0) return h + 'h ' + m + 'm';
return m + 'm';
}
// Phase caption — explicit text below the meters: e.g.
// "Pattern 2 of 4 · Verify 0x55 · 47% within phase".
function _drawerRenderBadblocksCaption(phase, phasePct) {
if (!phase) return '';
var p = parseInt(phase, 10);
var pct = parseFloat(phasePct || 0);
var labels = ['0xaa', '0x55', '0xff', '0x00'];
var pattern = Math.ceil(p / 2);
var subPhase = (p % 2 === 1) ? 'Write' : 'Verify';
var label = labels[pattern - 1];
var html = '<div class="bb-caption">';
html += 'Pattern ' + pattern + ' of 4 · ';
html += subPhase + ' ' + label + ' · ';
html += pct.toFixed(1) + '% within phase';
html += '</div>';
return html;
}
// Per-pattern duration history. Reads bb_phase_history (JSON) and
// emits "0xaa: 14h 22m" rows for completed patterns. Pattern N is
// "complete" when its verify-phase end timestamp is known (= the
// next pattern's write-phase start, or stage.finished_at for the
// final one).
function _drawerRenderBadblocksHistory(stage) {
if (!stage.bb_phase_history) return '';
var hist;
try { hist = JSON.parse(stage.bb_phase_history); }
catch (e) { return ''; }
if (!hist || typeof hist !== 'object') return '';
var labels = ['0xaa', '0x55', '0xff', '0x00'];
var rows = [];
for (var n = 1; n <= 4; n++) {
var writeStart = hist[String(2 * n - 1)];
if (!writeStart) continue;
var endTs = (n < 4) ? hist[String(2 * n + 1)] : stage.finished_at;
if (!endTs) continue;
var elapsedSec = (Date.parse(endTs) - Date.parse(writeStart)) / 1000;
if (elapsedSec <= 0) continue;
rows.push('<span class="bb-hist-row">' +
'<span class="bb-hist-label">' + labels[n - 1] + '</span>' +
'<span class="bb-hist-dur">' + _bbFmtDuration(elapsedSec) + '</span>' +
'</span>');
}
if (!rows.length) return '';
return '<div class="bb-history"><span class="bb-hist-title">Completed patterns</span>' +
rows.join('') + '</div>';
}
// Render 4 pattern meters for badblocks -w surface_validate. Each
// meter splits write/verify halves so you can see at a glance which
// pattern is current AND whether you're writing or verifying within
// it. phase: 1-8 (1=write 0xaa, 2=verify 0xaa, 3=write 0x55, ...).
function _drawerRenderBadblocksMeters(phase, phasePct) {
if (!phase) return '';
var p = parseInt(phase, 10);
var pct = parseFloat(phasePct || 0);
var labels = ['0xaa', '0x55', '0xff', '0x00'];
var html = '<div class="bb-meters">';
for (var i = 0; i < 4; i++) {
var writePhase = i * 2 + 1;
var verifyPhase = writePhase + 1;
var writeFill, verifyFill;
if (p > verifyPhase) {
writeFill = 100; verifyFill = 100;
} else if (p === verifyPhase) {
writeFill = 100; verifyFill = pct;
} else if (p === writePhase) {
writeFill = pct; verifyFill = 0;
} else {
writeFill = 0; verifyFill = 0;
}
var classes = 'bb-meter';
if (p === writePhase || p === verifyPhase) classes += ' bb-meter-current';
if (p > verifyPhase) classes += ' bb-meter-done';
html += '<div class="' + classes + '">';
html += '<div class="bb-meter-label">' + labels[i] + '</div>';
html += '<div class="bb-meter-bar">';
html += '<div class="bb-meter-half bb-write" style="width:' + writeFill.toFixed(1) + '%"></div>';
html += '<div class="bb-meter-half-spacer"></div>';
html += '<div class="bb-meter-half bb-verify" style="width:' + verifyFill.toFixed(1) + '%"></div>';
html += '</div>';
html += '<div class="bb-meter-sub">';
html += '<span class="bb-sub-write">W ' + Math.round(writeFill) + '%</span>';
html += '<span class="bb-sub-verify">V ' + Math.round(verifyFill) + '%</span>';
html += '</div>';
html += '</div>';
}
html += '</div>';
return html;
}
function _drawerRenderBurnin(burnin) {
var panel = document.getElementById('drawer-panel-burnin');
if (!panel) return;
@ -979,7 +1544,30 @@
html += '<span class="drawer-job-meta">';
if (burnin.operator) html += 'by ' + _esc(burnin.operator);
if (burnin.started_at) html += ' \u00b7 ' + _drawerFmtDt(burnin.started_at);
html += '</span></div>';
html += '</span>';
// Job-level estimated completion. Uses the weighted overall job %
// (recalculated server-side from stage progress) so it reflects
// every stage, not just the current one. Suppressed under 0.5%
// so the early sample doesn't paint a "Finish: Sep 22" stutter.
if (burnin.state === 'running' && burnin.started_at) {
var jobPct = parseFloat(burnin.percent || 0);
if (jobPct >= 0.5) {
var jobStartMs = Date.parse(burnin.started_at);
var jobElapsedSec = Math.max(0, (Date.now() - jobStartMs) / 1000);
var jobTotalSec = jobElapsedSec * (100 / jobPct);
var jobRemainSec = Math.max(0, jobTotalSec - jobElapsedSec);
var jobFinish = new Date(Date.now() + jobRemainSec * 1000);
var jobFinishStr = jobFinish.toLocaleString(undefined, {
weekday: 'short', month: 'short', day: 'numeric',
hour: 'numeric', minute: '2-digit',
});
html += '<span class="drawer-job-finish" title="Estimated completion of the entire burn-in (all stages)">';
html += '<span class="drawer-job-finish-label">Est. completion</span>';
html += '<span class="drawer-job-finish-value">' + jobFinishStr + '</span>';
html += '</span>';
}
}
html += '</div>';
html += '<div class="drawer-stages">';
var stages = burnin.stages || [];
@ -999,9 +1587,37 @@
html += '<span class="stage-duration">' + _drawerFmtDuration(s.started_at, s.finished_at) + '</span>';
}
html += '</div>';
if (s.error_text) {
// Prominent "Why it failed" block at the top of failed/cancelled/
// unknown stages. Falls back to a heuristic when no error was
// recorded — e.g. a tiny log + no badblocks progress + terminal
// state means the stage was killed externally (SSH disconnect or
// container restart) before it could record an error.
if (s.state === 'failed' || s.state === 'cancelled' || s.state === 'unknown') {
var reason = s.error_text;
if (!reason) {
var logLen = (s.log_text || '').length;
var noBbProgress = !s.bb_phase || (s.bb_phase === 1 && (parseFloat(s.bb_phase_pct || 0) < 0.1));
if (logLen < 500 && noBbProgress) {
reason = 'Stopped without recording an error — likely cause: SSH connection drop or container restart while this stage was running.';
} else {
reason = 'No error message recorded.';
}
}
html += '<div class="stage-reason stage-reason-' + _esc(s.state) + '">';
html += '<span class="stage-reason-label">Reason</span>';
html += '<span class="stage-reason-text">' + _esc(reason) + '</span>';
html += '</div>';
} else if (s.error_text) {
html += '<div class="stage-error-line">' + _esc(s.error_text) + '</div>';
}
// Per-pattern meters for badblocks surface_validate, plus the
// vital-signs row above (temp / speed / elapsed / ETA).
if (s.stage_name === 'surface_validate' && s.bb_phase) {
html += _drawerRenderBadblocksVitals(s, _DRAWER_LAST_DRIVE);
html += _drawerRenderBadblocksMeters(s.bb_phase, s.bb_phase_pct);
html += _drawerRenderBadblocksCaption(s.bb_phase, s.bb_phase_pct);
html += _drawerRenderBadblocksHistory(s);
}
// Raw SSH log output (if available)
if (s.log_text) {
var logHtml = _esc(s.log_text)
@ -1162,14 +1778,6 @@
document.querySelectorAll('.drawer-panel').forEach(function (p) {
p.classList.toggle('active', p.id === 'drawer-panel-' + _drawerTab);
});
// Terminal tab: init/fit on activation; hide autoscroll (N/A for terminal)
var asl = document.querySelector('.autoscroll-label');
if (_drawerTab === 'terminal') {
if (asl) asl.style.visibility = 'hidden';
openTerminalTab();
} else {
if (asl) asl.style.visibility = '';
}
});
// Close button
@ -1194,155 +1802,4 @@
}).catch(function () { showToast('Network error', 'error'); });
});
// -----------------------------------------------------------------------
// Live Terminal (xterm.js + SSH WebSocket)
// -----------------------------------------------------------------------
var _xtermReady = false; // xterm.js + FitAddon libraries loaded
var _terminal = null; // xterm.js Terminal instance
var _termFit = null; // FitAddon instance
var _termWs = null; // active WebSocket (null = disconnected)
function _loadXtermLibs(cb) {
var link = document.createElement('link');
link.rel = 'stylesheet';
link.href = 'https://cdn.jsdelivr.net/npm/xterm@5.3.0/css/xterm.css';
document.head.appendChild(link);
var s1 = document.createElement('script');
s1.src = 'https://cdn.jsdelivr.net/npm/xterm@5.3.0/lib/xterm.js';
s1.onload = function () {
var s2 = document.createElement('script');
s2.src = 'https://cdn.jsdelivr.net/npm/xterm-addon-fit@0.8.0/lib/xterm-addon-fit.js';
s2.onload = cb;
document.head.appendChild(s2);
};
document.head.appendChild(s1);
}
function openTerminalTab() {
var panel = document.getElementById('drawer-panel-terminal');
if (!panel) return;
if (!_xtermReady) {
panel.innerHTML = '<div class="drawer-loading">Loading terminal\u2026</div>';
_loadXtermLibs(function () {
_xtermReady = true;
_termInit(panel);
});
return;
}
if (!_terminal) {
_termInit(panel);
return;
}
// Already initialised — refit to current panel dimensions
setTimeout(function () {
if (_termFit) try { _termFit.fit(); } catch (_) {}
}, 30);
}
function _termInit(panel) {
panel.innerHTML = '';
var term = new Terminal({
cursorBlink: true,
fontSize: 13,
fontFamily: '"SF Mono","Fira Code",Consolas,"DejaVu Sans Mono",monospace',
theme: {
background: '#0d1117',
foreground: '#e6edf3',
cursor: '#58a6ff',
cursorAccent: '#0d1117',
selectionBackground: 'rgba(88,166,255,0.25)',
black: '#484f58', red: '#ff7b72', green: '#3fb950', yellow: '#d29922',
blue: '#58a6ff', magenta: '#bc8cff', cyan: '#39c5cf', white: '#b1bac4',
brightBlack: '#6e7681', brightRed: '#ffa198', brightGreen: '#56d364',
brightYellow: '#e3b341', brightBlue: '#79c0ff', brightMagenta: '#d2a8ff',
brightCyan: '#56d4dd', brightWhite: '#f0f6fc',
},
scrollback: 2000,
allowProposedApi: true,
});
var fit = new FitAddon.FitAddon();
term.loadAddon(fit);
term.open(panel);
_terminal = term;
_termFit = fit;
// Initial fit after the panel is visible
setTimeout(function () {
if (_termFit) try { _termFit.fit(); } catch (_) {}
}, 30);
// Forward all keystrokes → SSH (onData registered once here)
term.onData(function (data) {
if (_termWs && _termWs.readyState === 1) {
_termWs.send(new TextEncoder().encode(data));
}
});
// Refit + notify server on resize
new ResizeObserver(function () {
if (!_termFit) return;
try { _termFit.fit(); } catch (_) {}
if (_termWs && _termWs.readyState === 1 && _terminal) {
_termWs.send(JSON.stringify({ type: 'resize', cols: _terminal.cols, rows: _terminal.rows }));
}
}).observe(panel);
_termConnect();
}
function _termConnect() {
if (_termWs && _termWs.readyState <= 1) return; // already open or connecting
var proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
var ws = new WebSocket(proto + '//' + location.host + '/ws/terminal');
ws.binaryType = 'arraybuffer';
_termWs = ws;
ws.onopen = function () {
_termHideReconnect();
if (_terminal && ws.readyState === 1) {
ws.send(JSON.stringify({ type: 'resize', cols: _terminal.cols, rows: _terminal.rows }));
}
};
ws.onmessage = function (e) {
if (!_terminal) return;
_terminal.write(e.data instanceof ArrayBuffer ? new Uint8Array(e.data) : e.data);
};
ws.onclose = function () {
if (_terminal) _terminal.write('\r\n\x1b[33m\u2500\u2500 disconnected \u2500\u2500\x1b[0m\r\n');
_termShowReconnect();
};
ws.onerror = function () { /* onclose fires too */ };
}
function _termShowReconnect() {
var panel = document.getElementById('drawer-panel-terminal');
if (!panel || panel.querySelector('.term-reconnect-bar')) return;
var bar = document.createElement('div');
bar.className = 'term-reconnect-bar';
bar.innerHTML = '<span>Connection closed</span>'
+ '<button class="btn-secondary">\u21ba Reconnect</button>';
bar.querySelector('button').onclick = function () {
bar.remove();
_termConnect();
};
panel.appendChild(bar);
}
function _termHideReconnect() {
var bar = document.querySelector('.term-reconnect-bar');
if (bar) bar.remove();
}
}());

View file

@ -1,6 +1,6 @@
{% extends "layout.html" %}
{% block title %}TrueNAS Burn-In — Audit Log{% endblock %}
{% block title %}NAS Burn-In — Audit Log{% endblock %}
{% block content %}
<div class="page-toolbar">

View file

@ -46,7 +46,13 @@
{%- elif bi.state == 'passed' -%}
<span class="chip chip-passed">Passed</span>
{%- elif bi.state == 'failed' -%}
<span class="chip chip-failed">Failed{% if bi.stage_name %} ({{ bi.stage_name | replace('_',' ') }}){% endif %}</span>
{# Suppress the stage suffix for SMART + surface_validate stages.
SMART has its own columns, and surface_validate is the dominant
case so a redundant suffix just adds visual noise. The drawer
shows the per-stage Reason for any digging. Keep the suffix for
precheck / final_check since those are rare enough that the hint
is helpful. #}
<span class="chip chip-failed">Failed{% if bi.stage_name and bi.stage_name not in ('short_smart', 'long_smart', 'surface_validate') %} ({{ bi.stage_name | replace('_',' ') }}){% endif %}</span>
{%- elif bi.state == 'cancelled' -%}
<span class="chip chip-aborted">Cancelled</span>
{%- elif bi.state == 'unknown' -%}
@ -63,14 +69,14 @@
<th class="col-check">
<input type="checkbox" id="select-all-cb" class="drive-cb" title="Select all idle drives">
</th>
<th class="col-drive">Drive</th>
<th class="col-serial">Serial</th>
<th class="col-size">Size</th>
<th class="col-temp">Temp</th>
<th class="col-health">Health</th>
<th class="col-smart">Short SMART</th>
<th class="col-smart">Long SMART</th>
<th class="col-burnin">Burn-In</th>
<th class="col-drive sortable" data-sort-key="drive">Drive</th>
<th class="col-serial sortable" data-sort-key="serial">Serial</th>
<th class="col-size sortable" data-sort-key="size">Size</th>
<th class="col-temp sortable" data-sort-key="temp">Temp</th>
<th class="col-health sortable" data-sort-key="health">Health</th>
<th class="col-smart sortable" data-sort-key="short">Short SMART</th>
<th class="col-smart sortable" data-sort-key="long">Long SMART</th>
<th class="col-burnin sortable" data-sort-key="burnin">Burn-In</th>
<th class="col-actions">Actions</th>
</tr>
</thead>
@ -80,20 +86,46 @@
{%- set bi_active = drive.burnin and drive.burnin.state in ('queued', 'running') %}
{%- set short_busy = drive.smart_short and drive.smart_short.state == 'running' %}
{%- set long_busy = drive.smart_long and drive.smart_long.state == 'running' %}
{%- set selectable = not bi_active and not short_busy and not long_busy %}
{%- set pool_locked = drive.pool_name and not drive.pool_unlocked_until %}
{%- set is_boot_pool = drive.pool_name == 'boot-pool' %}
{%- set is_exported = drive.pool_role == 'exported' %}
{%- set is_mounted = drive.pool_role == 'mounted' %}
{%- set selectable = not bi_active and not short_busy and not long_busy and not pool_locked %}
{%- set bi_done = drive.burnin and drive.burnin.state in ('passed', 'failed', 'cancelled', 'unknown') %}
{%- set smart_done = (drive.smart_short and drive.smart_short.state in ('passed','failed','aborted'))
or (drive.smart_long and drive.smart_long.state in ('passed','failed','aborted')) %}
{%- set can_reset = (bi_done or smart_done) and not bi_active and not short_busy and not long_busy %}
<tr data-status="{{ drive.status }}" id="drive-{{ drive.id }}">
{%- set can_reset = (bi_done or smart_done) and not bi_active and not short_busy and not long_busy and not pool_locked %}
{%- set short_state = drive.smart_short.state if drive.smart_short else 'idle' %}
{%- set long_state = drive.smart_long.state if drive.smart_long else 'idle' %}
{%- set burnin_state = drive.burnin.state if drive.burnin else '' %}
<tr data-status="{{ drive.status }}" id="drive-{{ drive.id }}"
data-sort-drive="{{ drive.devname }}"
data-sort-serial="{{ (drive.serial or '') | lower }}"
data-sort-size="{{ drive.size_bytes or 0 }}"
data-sort-temp="{{ drive.temperature_c if drive.temperature_c is not none else '' }}"
data-sort-health="{{ {'PASSED': 1, 'WARNING': 2, 'FAILED': 3, 'UNKNOWN': 4}.get(drive.smart_health, 9) }}"
data-sort-short="{{ {'running': 1, 'failed': 2, 'aborted': 3, 'passed': 4, 'idle': 5}.get(short_state, 9) }}"
data-sort-long="{{ {'running': 1, 'failed': 2, 'aborted': 3, 'passed': 4, 'idle': 5}.get(long_state, 9) }}"
data-sort-burnin="{{ {'running': 1, 'queued': 2, 'failed': 3, 'unknown': 4, 'cancelled': 5, 'passed': 6}.get(burnin_state, 9) }}"
>
<td class="col-check">
{%- if selectable %}
<input type="checkbox" class="drive-checkbox" data-drive-id="{{ drive.id }}">
{%- endif %}
</td>
<td class="col-drive">
<span class="drive-name">{{ drive.devname }}</span>
<span class="drive-name">
{%- if drive.pool_name -%}
<span class="pool-lock-icon{% if is_boot_pool %} pool-lock-boot{% elif is_exported %} pool-lock-exported{% elif is_mounted %} pool-lock-mounted{% endif %}"
title="{% if is_boot_pool %}In BOOT POOL '{{ drive.pool_name }}'{% elif is_exported %}Carries ZFS data from a previously-imported pool{% elif is_mounted %}Has a mounted (non-ZFS) filesystem{% else %}In pool '{{ drive.pool_name }}' ({{ drive.pool_role or 'data' }}){% endif %}">&#x1F512;</span>
{%- endif -%}
{{ drive.devname }}
</span>
<span class="drive-model">{{ drive.model or "Unknown" }}</span>
{%- if drive.pool_name %}
<span class="pool-pill{% if is_boot_pool %} pool-pill-boot{% elif is_exported %} pool-pill-exported{% elif is_mounted %} pool-pill-mounted{% endif %}"
title="Drive lock reason">{% if is_exported %}exported ZFS{% elif is_mounted %}mounted FS{% else %}{{ drive.pool_name }} &middot; {{ drive.pool_role or 'data' }}{% endif %}</span>
{%- endif %}
{%- if drive.location %}
<span class="drive-location"
data-drive-id="{{ drive.id }}"
@ -154,6 +186,21 @@
{% if short_busy %}disabled{% endif %}
title="Start Long SMART test (~several hours)">Long</button>
{%- endif %}
{%- if pool_locked %}
<!-- Drive is in a zpool — replace Burn-In with Unlock affordance -->
<button class="btn-action btn-unlock{% if is_boot_pool %} btn-unlock-boot{% elif is_exported %} btn-unlock-exported{% elif is_mounted %} btn-unlock-mounted{% endif %}"
data-drive-id="{{ drive.id }}"
data-devname="{{ drive.devname }}"
data-serial="{{ drive.serial or '' }}"
data-model="{{ drive.model or 'Unknown' }}"
data-size="{{ drive.size_bytes | format_bytes }}"
data-pool-name="{{ drive.pool_name }}"
data-pool-role="{{ drive.pool_role or 'data' }}"
data-is-boot-pool="{{ '1' if is_boot_pool else '0' }}"
data-is-exported="{{ '1' if is_exported else '0' }}"
data-is-mounted="{{ '1' if is_mounted else '0' }}"
title="{% if is_boot_pool %}Drive is in BOOT POOL '{{ drive.pool_name }}' — click to unlock{% elif is_exported %}Drive carries ZFS data from a previously-imported pool — click to unlock{% elif is_mounted %}Drive has a mounted filesystem — click to unlock{% else %}Drive is in pool '{{ drive.pool_name }}' — click to unlock{% endif %}">&#x1F512; Unlock</button>
{%- else %}
<!-- Burn-In -->
<button class="btn-action btn-start{% if short_busy or long_busy %} btn-disabled{% endif %}"
data-drive-id="{{ drive.id }}"
@ -162,8 +209,10 @@
data-model="{{ drive.model or 'Unknown' }}"
data-size="{{ drive.size_bytes | format_bytes }}"
data-health="{{ drive.smart_health }}"
data-pool-name="{{ drive.pool_name or '' }}"
data-pool-unlocked-until="{{ drive.pool_unlocked_until or '' }}"
{% if short_busy or long_busy %}disabled{% endif %}
title="Start Burn-In">Burn-In</button>
title="Start Burn-In{% if drive.pool_name %} (UNLOCKED — pool drive){% endif %}">Burn-In{% if drive.pool_name %} <span class="unlock-countdown" data-expires="{{ drive.pool_unlocked_until }}">&#x1F513;</span>{% endif %}</button>
<!-- Reset — clears SMART state so drive can be re-tested from scratch -->
{%- if can_reset %}
<button class="btn-action btn-reset"
@ -171,6 +220,7 @@
title="Reset SMART state — clears test results so drive shows as fresh">Reset</button>
{%- endif %}
{%- endif %}
{%- endif %}
</div>
</td>
</tr>

View file

@ -0,0 +1,32 @@
<div id="password-modal" class="modal-overlay" hidden aria-modal="true" role="dialog">
<div class="modal">
<div class="modal-header">
<h2 class="modal-title">Change password</h2>
<button class="modal-close" id="password-modal-close-btn" aria-label="Close">&#x2715;</button>
</div>
<div class="modal-body">
<div class="form-group">
<label class="form-label" for="pw-current">Current password</label>
<input class="form-input" type="password" id="pw-current"
autocomplete="current-password" required>
</div>
<div class="form-group">
<label class="form-label" for="pw-new">New password (8+ characters)</label>
<input class="form-input" type="password" id="pw-new"
autocomplete="new-password" required minlength="8">
</div>
<div class="form-group">
<label class="form-label" for="pw-confirm">Confirm new password</label>
<input class="form-input" type="password" id="pw-confirm"
autocomplete="new-password" required minlength="8">
<div class="confirm-hint" id="pw-hint"></div>
</div>
</div>
<div class="modal-footer">
<button class="btn-secondary" id="password-modal-cancel-btn">Cancel</button>
<button class="btn-danger" id="password-modal-submit-btn" disabled>Change password</button>
</div>
</div>
</div>

View file

@ -0,0 +1,69 @@
<div id="unlock-modal" class="modal-overlay" hidden aria-modal="true" role="dialog">
<div class="modal modal-danger">
<div class="modal-header">
<h2 class="modal-title" id="unlock-modal-title">Unlock pool drive</h2>
<button class="modal-close" id="unlock-modal-close-btn" aria-label="Close">&#x2715;</button>
</div>
<div class="modal-body">
<div class="modal-drive-info">
<div class="modal-drive-row">
<span class="modal-devname" id="unlock-devname">&mdash;</span>
<span class="chip" id="unlock-pool-chip">&mdash;</span>
</div>
<div class="modal-drive-sub">
<span id="unlock-model">&mdash;</span>
&middot;
<span id="unlock-size">&mdash;</span>
&middot;
<span class="mono" id="unlock-serial">&mdash;</span>
</div>
</div>
<div id="unlock-warning" class="confirm-warning">
<strong id="unlock-warning-title">This drive belongs to a zpool.</strong>
<p id="unlock-warning-body">
Running a destructive burn-in stage will overwrite all data on this drive
and almost certainly destroy the pool. Only proceed if you have already
removed this drive from the pool, or if you are intentionally
decommissioning the pool.
</p>
</div>
<div class="form-group">
<label class="form-label" for="unlock-operator-input">Operator</label>
<input class="form-input" type="text" id="unlock-operator-input"
placeholder="Your name" autocomplete="name" maxlength="64">
</div>
<div class="form-group">
<label class="form-label" for="unlock-reason-input">
Reason (recorded to audit log, minimum 5 characters)
</label>
<input class="form-input" type="text" id="unlock-reason-input"
placeholder="e.g. replacing failed drive in tank/raidz2-0"
autocomplete="off" maxlength="200">
</div>
<div class="form-group">
<label class="form-label" for="unlock-confirm-input" id="unlock-confirm-label">
Type <code id="unlock-confirm-token">&mdash;</code> to confirm
</label>
<input class="form-input form-input-confirm" type="text" id="unlock-confirm-input"
placeholder="" autocomplete="off" spellcheck="false">
<div class="confirm-hint" id="unlock-confirm-hint"></div>
</div>
<div class="stage-always-note">
Unlock lasts 10 minutes. After that, this drive locks again automatically.
</div>
</div>
<div class="modal-footer">
<button class="btn-secondary" id="unlock-modal-cancel-btn">Cancel</button>
<button class="btn-danger" id="unlock-modal-submit-btn" disabled>Unlock</button>
</div>
</div>
</div>

View file

@ -1,10 +1,11 @@
{% extends "layout.html" %}
{% block title %}TrueNAS Burn-In — Dashboard{% endblock %}
{% block title %}NAS Burn-In — Dashboard{% endblock %}
{% block content %}
{% include "components/modal_start.html" %}
{% include "components/modal_batch.html" %}
{% include "components/modal_unlock.html" %}
<!-- Stats bar — drive counts updated live by app.js updateCounts(); sensor chips updated by SSE system-sensors event -->
<div class="stats-bar">
@ -110,7 +111,6 @@
<button class="drawer-tab active" data-tab="burnin">Burn-In</button>
<button class="drawer-tab" data-tab="smart">SMART</button>
<button class="drawer-tab" data-tab="events">Events</button>
<button class="drawer-tab" data-tab="terminal">Terminal</button>
</nav>
<div class="drawer-controls">
<label class="autoscroll-label">
@ -124,7 +124,6 @@
<div class="drawer-panel active" id="drawer-panel-burnin"></div>
<div class="drawer-panel" id="drawer-panel-smart"></div>
<div class="drawer-panel" id="drawer-panel-events"></div>
<div class="drawer-panel drawer-panel-terminal" id="drawer-panel-terminal"></div>
</div>
</div>
{% endblock %}

View file

@ -1,6 +1,6 @@
{% extends "layout.html" %}
{% block title %}TrueNAS Burn-In — History{% endblock %}
{% block title %}NAS Burn-In — History{% endblock %}
{% block content %}
<div class="page-toolbar">

View file

@ -1,6 +1,6 @@
{% extends "layout.html" %}
{% block title %}TrueNAS Burn-In — Job #{{ job.id }}{% endblock %}
{% block title %}NAS Burn-In — Job #{{ job.id }}{% endblock %}
{% block content %}
<div class="page-toolbar">

View file

@ -196,7 +196,7 @@
<div class="print-header">
<div class="print-brand">
<strong>TrueNAS Burn-In Dashboard</strong>
<strong>NAS Burn-In Dashboard</strong>
Job #{{ job.id }} &nbsp;·&nbsp; {{ job.created_at | format_dt_full }}
</div>
<div class="result-badge {{ job.state }}">
@ -282,7 +282,7 @@
<div class="print-footer">
<div class="print-footer-note">
Generated by TrueNAS Burn-In Dashboard<br>
Generated by NAS Burn-In Dashboard<br>
{{ job.finished_at | format_dt_full }}<br>
Scan QR code to view full job details online
</div>

View file

@ -3,8 +3,11 @@
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{% block title %}TrueNAS Burn-In{% endblock %}</title>
<title>{% block title %}NAS Burn-In{% endblock %}</title>
<link rel="stylesheet" href="/static/app.css">
{% if request.state.current_user %}
<meta name="default-operator" content="{{ request.state.current_user.full_name or request.state.current_user.username }}">
{% endif %}
</head>
<body>
@ -16,7 +19,7 @@
<line x1="6" y1="6" x2="6.01" y2="6"></line>
<line x1="6" y1="18" x2="6.01" y2="18"></line>
</svg>
<span class="header-title">TrueNAS Burn-In</span>
<span class="header-title">NAS Burn-In</span>
<span class="header-version">v{{ app_version if app_version is defined else '—' }}</span>
</a>
<div class="header-meta">
@ -38,6 +41,11 @@
<a class="header-link" href="/audit">Audit</a>
<a class="header-link" href="/settings">Settings</a>
<a class="header-link" href="/docs" target="_blank" rel="noopener">API</a>
{% if request.state.current_user %}
<span class="header-user" title="Signed in">{{ request.state.current_user.full_name or request.state.current_user.username }}</span>
<a class="header-link header-pw" href="#" id="open-password-modal">Change password</a>
<a class="header-link header-logout" href="/logout">Logout</a>
{% endif %}
</div>
</header>
@ -57,6 +65,10 @@
{% block content %}{% endblock %}
</main>
{% if request.state.current_user %}
{% include "components/modal_password.html" %}
{% endif %}
<div id="toast-container" aria-live="polite"></div>
<script src="https://unpkg.com/htmx.org@2.0.3/dist/htmx.min.js"></script>
<script src="https://unpkg.com/htmx-ext-sse@2.2.2/sse.js"></script>

67
app/templates/login.html Normal file
View file

@ -0,0 +1,67 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Sign in &mdash; NAS Burn-In</title>
<link rel="stylesheet" href="/static/app.css">
</head>
<body class="login-body">
<main class="login-card">
<div class="login-header">
<div class="login-title">NAS Burn-In</div>
<div class="login-sub">{% if needs_setup %}First-time setup{% else %}Sign in{% endif %}</div>
</div>
{% if error %}
<div class="login-error">{{ error }}</div>
{% endif %}
{% if needs_setup %}
<p class="login-blurb">
No users exist yet. Create the initial administrator account.
Username and password go straight into the burn-in DB &mdash; no email,
no recovery flow. Pick something memorable.
</p>
<form method="POST" action="/api/v1/auth/setup" class="login-form">
<label class="login-label" for="username">Username</label>
<input class="login-input" type="text" id="username" name="username"
autocomplete="username" required minlength="2" maxlength="64"
autofocus>
<label class="login-label" for="full_name">Full name <span class="login-optional">(optional)</span></label>
<input class="login-input" type="text" id="full_name" name="full_name"
autocomplete="name" maxlength="128">
<label class="login-label" for="password">Password</label>
<input class="login-input" type="password" id="password" name="password"
autocomplete="new-password" required minlength="8" maxlength="128">
<button class="login-submit" type="submit">Create account &amp; sign in</button>
</form>
{% else %}
<form method="POST" action="/login" class="login-form">
<input type="hidden" name="next" value="{{ next }}">
<label class="login-label" for="username">Username</label>
<input class="login-input" type="text" id="username" name="username"
autocomplete="username" required maxlength="64" autofocus>
<label class="login-label" for="password">Password</label>
<input class="login-input" type="password" id="password" name="password"
autocomplete="current-password" required maxlength="128">
<button class="login-submit" type="submit">Sign in</button>
</form>
{% endif %}
<div class="login-footer">
Authentication is local to this dashboard. Forgot your password?
Reset it via the container DB:<br>
<code class="login-code">docker exec nas-burnin python -m app.auth_cli reset &lt;user&gt;</code>
</div>
</main>
</body>
</html>

View file

@ -1,6 +1,6 @@
{% extends "layout.html" %}
{% block title %}TrueNAS Burn-In — Settings{% endblock %}
{% block title %}NAS Burn-In — Settings{% endblock %}
{% block content %}
<div class="page-toolbar">
@ -61,7 +61,7 @@
<input class="sf-input" id="smtp_user" name="smtp_user" type="text"
value="{{ editable.smtp_user }}" autocomplete="off">
<label for="smtp_password">Password</label>
<label for="smtp_password">Password <span class="secret-status secret-{{ 'set' if secret_status.smtp_password == 'set' else 'unset' }}">[{{ secret_status.smtp_password }}]</span></label>
<input class="sf-input" id="smtp_password" name="smtp_password" type="password"
placeholder="leave blank to keep existing" autocomplete="new-password">
@ -125,17 +125,19 @@
<input class="sf-input" id="ssh_user" name="ssh_user" type="text"
value="{{ editable.ssh_user }}" placeholder="root">
<label for="ssh_password">Password</label>
<label for="ssh_password">Password <span class="secret-status secret-{{ 'set' if secret_status.ssh_password == 'set' else 'unset' }}">[{{ secret_status.ssh_password }}]</span></label>
<input class="sf-input" id="ssh_password" name="ssh_password" type="password"
placeholder="leave blank to keep existing" autocomplete="new-password">
<label for="ssh_key">Private Key</label>
<label for="ssh_key">Private Key <span class="secret-status secret-{{ 'set' if 'set' in secret_status.ssh_key else 'unset' }}">[{{ secret_status.ssh_key }}]</span></label>
<div>
<textarea class="sf-input sf-textarea" id="ssh_key" name="ssh_key"
rows="6" placeholder="Paste PEM private key here (-----BEGIN ... KEY-----). Leave blank to keep existing." autocomplete="off"></textarea>
<span class="sf-hint" style="margin-top:3px">
Either password or key auth. Key takes precedence if both are set.
Key is stored securely in <code>/data/settings_overrides.json</code>.
<strong>For production, mount the key as a Docker secret at
<code>/run/secrets/ssh_key</code> instead of pasting it here</strong>
— that path is checked automatically when no key is in settings.
</span>
</div>
@ -248,6 +250,30 @@
type="number" min="0" max="9999" value="{{ editable.bad_block_threshold }}">
<span class="sf-hint">Max bad blocks before surface validate fails (Stage 7)</span>
</div>
<div class="sf-row">
<label class="sf-label" for="surface_validate_block_size">Badblocks Block Size (bytes)</label>
<input class="sf-input sf-input-xs" id="surface_validate_block_size"
name="surface_validate_block_size" type="number" min="512" max="1048576" step="512"
value="{{ editable.surface_validate_block_size }}">
<span class="sf-hint">badblocks -b. 4096 (default) is conservative; 8192 is faster on multi-TB HDDs (~2x RAM, ~half the runtime). Power of 2.</span>
</div>
<div class="sf-row">
<label class="sf-label" for="surface_validate_block_buffer">Badblocks Block Buffer</label>
<input class="sf-input sf-input-xs" id="surface_validate_block_buffer"
name="surface_validate_block_buffer" type="number" min="1" max="4096"
value="{{ editable.surface_validate_block_buffer }}">
<span class="sf-hint">badblocks -c. 64 (default) matches the upstream tool. Buffer = block_size × this many blocks per IO.</span>
</div>
<div class="sf-row">
<label class="sf-label" for="surface_validate_passes">Badblocks Passes</label>
<input class="sf-input sf-input-xs" id="surface_validate_passes"
name="surface_validate_passes" type="number" min="0" max="16"
value="{{ editable.surface_validate_passes }}">
<span class="sf-hint">badblocks -p. 1 = repeat until one consecutive clean scan (default). 2-3 for paranoid burn-in that re-confirms after errors.</span>
</div>
</div>
</div><!-- /right col -->
@ -266,7 +292,7 @@
<input class="sf-input" id="truenas_base_url" name="truenas_base_url" type="text"
value="{{ editable.truenas_base_url }}" placeholder="http://10.0.0.x">
<label for="truenas_api_key">API Key</label>
<label for="truenas_api_key">API Key <span class="secret-status secret-{{ 'set' if secret_status.truenas_api_key == 'set' else 'unset' }}">[{{ secret_status.truenas_api_key }}]</span></label>
<input class="sf-input" id="truenas_api_key" name="truenas_api_key" type="password"
placeholder="leave blank to keep existing" autocomplete="new-password">
@ -317,7 +343,7 @@
<div id="restart-banner" style="display:none;margin-top:12px;padding:12px 16px;background:rgba(255,170,0,0.12);border:1px solid var(--yellow);border-radius:8px;color:var(--text-strong)">
<strong>&#9888; Container restart required</strong> — system settings are saved but won't take effect until you restart the app container:
<pre style="margin:8px 0 0;padding:8px 10px;background:var(--bg-card);border-radius:5px;font-size:12px;color:var(--text-strong);user-select:all">docker compose restart app</pre>
<span style="font-size:11px;color:var(--text-muted)">Run this on <strong>maple.local</strong> from <code>~/docker/stacks/truenas-burnin/</code></span>
<span style="font-size:11px;color:var(--text-muted)">Run this on <strong>maple.local</strong> from <code>~/docker/stacks/nas-burnin/</code></span>
</div>
</form>

View file

@ -1,6 +1,6 @@
{% extends "layout.html" %}
{% block title %}TrueNAS Burn-In — Stats{% endblock %}
{% block title %}NAS Burn-In — Stats{% endblock %}
{% block content %}
<div class="page-toolbar">

View file

@ -45,6 +45,10 @@ async def _with_retry(
)
await asyncio.sleep(backoff)
backoff *= 2
# Unreachable: the loop either returns on success or re-raises on the
# final attempt. The explicit raise makes that obvious to type-checkers
# and to anyone reading top-down without tracing the control flow.
raise RuntimeError("unreachable: _with_retry exhausted without returning")
class TrueNASClient:

View file

@ -1,10 +0,0 @@
FROM python:3.12-slim
WORKDIR /opt/app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app/ ./app/
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8084"]

File diff suppressed because it is too large Load diff

View file

@ -1,74 +0,0 @@
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
)
app_host: str = "0.0.0.0"
app_port: int = 8080
db_path: str = "/data/app.db"
truenas_base_url: str = "http://localhost:8000"
truenas_api_key: str = "mock-key"
truenas_verify_tls: bool = False
poll_interval_seconds: int = 12
stale_threshold_seconds: int = 45
max_parallel_burnins: int = 2
surface_validate_seconds: int = 45 # mock simulation duration
io_validate_seconds: int = 25 # mock simulation duration
# Logging
log_level: str = "INFO"
# Security — comma-separated IPs or CIDRs, e.g. "10.0.0.0/24,127.0.0.1"
# Empty string means allow all (default).
allowed_ips: str = ""
# SMTP — daily status email at 8am local time
# Leave smtp_host empty to disable email.
smtp_host: str = ""
smtp_port: int = 587
smtp_user: str = ""
smtp_password: str = ""
smtp_from: str = ""
smtp_to: str = "" # comma-separated recipients
smtp_report_hour: int = 8 # local hour to send (0-23)
smtp_daily_report_enabled: bool = True # set False to skip daily report without disabling alerts
smtp_alert_on_fail: bool = True # immediate email when a job fails
smtp_alert_on_pass: bool = False # immediate email when a job passes
smtp_ssl_mode: str = "starttls" # "starttls" | "ssl" | "plain"
smtp_timeout: int = 60 # connection + read timeout in seconds
# Webhook — POST JSON payload on every job state change (pass/fail)
# Leave empty to disable. Works with Slack, Discord, ntfy, n8n, etc.
webhook_url: str = ""
# Stuck-job detection: jobs running longer than this are marked 'unknown'
stuck_job_hours: int = 24
# Temperature thresholds (°C) — drives table colouring + precheck gate
temp_warn_c: int = 46 # orange warning
temp_crit_c: int = 55 # red critical (precheck refuses to start above this)
# Bad-block tolerance — surface_validate fails if bad blocks exceed this
bad_block_threshold: int = 0
# SSH credentials for direct TrueNAS command execution (Stage 7)
# When ssh_host is set, burn-in stages use SSH for smartctl/badblocks instead of REST API.
# Leave ssh_host empty to use the mock/REST API (development mode).
ssh_host: str = ""
ssh_port: int = 22
ssh_user: str = "root" # TrueNAS CORE default is root
ssh_password: str = "" # Password auth (leave blank if using key)
ssh_key: str = "" # PEM private key content (paste full key including headers)
# Application version — used by the /api/v1/updates/check endpoint
app_version: str = "1.0.0-8"
settings = Settings()

View file

@ -1,123 +0,0 @@
import asyncio
import ipaddress
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import PlainTextResponse
from app import burnin, mailer, poller, settings_store
from app.config import settings
from app.database import init_db
from app.logging_config import configure as configure_logging
from app.renderer import templates # noqa: F401 — registers filters as side-effect
from app.routes import router
from app.truenas import TrueNASClient
# Configure structured JSON logging before anything else logs
configure_logging()
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# IP allowlist middleware
# ---------------------------------------------------------------------------
class _IPAllowlistMiddleware(BaseHTTPMiddleware):
"""
Block requests from IPs not in ALLOWED_IPS.
When ALLOWED_IPS is empty the middleware is a no-op.
Checks X-Forwarded-For first (trusts the leftmost address), then the
direct client IP.
"""
def __init__(self, app, allowed_ips: str) -> None:
super().__init__(app)
self._networks: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = []
for entry in (s.strip() for s in allowed_ips.split(",") if s.strip()):
try:
self._networks.append(ipaddress.ip_network(entry, strict=False))
except ValueError:
log.warning("Invalid ALLOWED_IPS entry ignored: %r", entry)
def _is_allowed(self, ip_str: str) -> bool:
try:
addr = ipaddress.ip_address(ip_str)
return any(addr in net for net in self._networks)
except ValueError:
return False
async def dispatch(self, request: Request, call_next):
if not self._networks:
return await call_next(request)
# Prefer X-Forwarded-For (leftmost = original client)
forwarded = request.headers.get("X-Forwarded-For", "").split(",")[0].strip()
client_ip = forwarded or (request.client.host if request.client else "")
if self._is_allowed(client_ip):
return await call_next(request)
log.warning("Request blocked by IP allowlist", extra={"client_ip": client_ip})
return PlainTextResponse("Forbidden", status_code=403)
# ---------------------------------------------------------------------------
# Poller supervisor — restarts run() if it ever exits unexpectedly
# ---------------------------------------------------------------------------
async def _supervised_poller(client: TrueNASClient) -> None:
while True:
try:
await poller.run(client)
except asyncio.CancelledError:
raise # Propagate shutdown signal cleanly
except Exception as exc:
log.critical("Poller crashed unexpectedly — restarting in 5s: %s", exc)
await asyncio.sleep(5)
# ---------------------------------------------------------------------------
# Lifespan
# ---------------------------------------------------------------------------
_client: TrueNASClient | None = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global _client
log.info("Starting up")
await init_db()
settings_store.init()
_client = TrueNASClient()
await burnin.init(_client)
poll_task = asyncio.create_task(_supervised_poller(_client))
mailer_task = asyncio.create_task(mailer.run())
yield
log.info("Shutting down")
poll_task.cancel()
mailer_task.cancel()
try:
await asyncio.gather(poll_task, mailer_task, return_exceptions=True)
except asyncio.CancelledError:
pass
await _client.close()
# ---------------------------------------------------------------------------
# App
# ---------------------------------------------------------------------------
app = FastAPI(title="TrueNAS Burn-In Dashboard", lifespan=lifespan)
if settings.allowed_ips:
app.add_middleware(_IPAllowlistMiddleware, allowed_ips=settings.allowed_ips)
log.info("IP allowlist active: %s", settings.allowed_ips)
app.mount("/static", StaticFiles(directory="app/static"), name="static")
app.include_router(router)

File diff suppressed because it is too large Load diff

View file

@ -1,386 +0,0 @@
"""
SSH client for direct TrueNAS command execution (Stage 7).
When ssh_host is configured, burn-in stages use SSH to run smartctl and
badblocks directly on the TrueNAS host instead of going through the REST API.
Falls back to REST API / simulation when SSH is not configured (dev/mock mode).
TrueNAS CORE (FreeBSD) device paths: /dev/ada0, /dev/da0, etc.
TrueNAS SCALE (Linux) device paths: /dev/sda, /dev/sdb, etc.
The devname from the TrueNAS API is used as-is in /dev/{devname}.
"""
import asyncio
import logging
import re
from typing import Callable
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Monitored SMART attributes
# True → any non-zero raw value is a hard failure (drive rejected)
# False → non-zero is a warning (flagged but test continues)
# ---------------------------------------------------------------------------
SMART_ATTRS: dict[int, tuple[str, bool]] = {
5: ("Reallocated_Sector_Ct", True), # reallocation = FAIL
10: ("Spin_Retry_Count", False), # mechanical stress = WARN
188: ("Command_Timeout", False), # drive not responding = WARN
197: ("Current_Pending_Sector", True), # pending reallocation = FAIL
198: ("Offline_Uncorrectable", True), # unrecoverable read error = FAIL
199: ("UDMA_CRC_Error_Count", False), # cable/controller issue = WARN
}
# ---------------------------------------------------------------------------
# Configuration check
# ---------------------------------------------------------------------------
def is_configured() -> bool:
"""Returns True when SSH host + at least one auth method is available."""
import os
from app.config import settings
if not settings.ssh_host:
return False
has_creds = bool(
settings.ssh_key
or settings.ssh_password
or os.path.exists(os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH))
)
return has_creds
# ---------------------------------------------------------------------------
# Low-level connection
# ---------------------------------------------------------------------------
_MOUNTED_KEY_PATH = "/run/secrets/ssh_key"
async def _connect():
"""Open a single-use SSH connection. Caller must use `async with`."""
import asyncssh
from app.config import settings
kwargs: dict = {
"host": settings.ssh_host,
"port": settings.ssh_port,
"username": settings.ssh_user,
"known_hosts": None, # trust all hosts (same spirit as TRUENAS_VERIFY_TLS=false)
}
if settings.ssh_key:
# Key material provided via env var (base case)
kwargs["client_keys"] = [asyncssh.import_private_key(settings.ssh_key)]
elif settings.ssh_password:
kwargs["password"] = settings.ssh_password
else:
# Fall back to mounted key file (preferred for production — no key in env vars)
import os
key_path = os.environ.get("SSH_KEY_FILE", _MOUNTED_KEY_PATH)
if os.path.exists(key_path):
kwargs["client_keys"] = [key_path]
# If nothing is configured, asyncssh will attempt agent/default key lookup
return asyncssh.connect(**kwargs)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def test_connection() -> dict:
"""Test SSH connectivity. Returns {"ok": True} or {"ok": False, "error": str}."""
if not is_configured():
return {"ok": False, "error": "SSH not configured (ssh_host is empty)"}
try:
async with await _connect() as conn:
result = await conn.run("echo ok", check=False)
if "ok" in result.stdout:
return {"ok": True}
return {"ok": False, "error": result.stderr.strip() or "unexpected output"}
except Exception as exc:
return {"ok": False, "error": str(exc)}
async def get_smart_attributes(devname: str) -> dict:
"""
Run `smartctl -a /dev/{devname}` and parse the output.
Returns:
health: str "PASSED" | "FAILED" | "UNKNOWN"
raw_output: str full smartctl output
attributes: dict[int, {"name": str, "raw": int}]
warnings: list[str] attribute names with non-zero raw (non-critical)
failures: list[str] attribute names with non-zero raw (critical)
"""
cmd = f"smartctl -a /dev/{devname}"
try:
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
return _parse_smartctl(output)
except Exception as exc:
return {
"health": "UNKNOWN",
"raw_output": str(exc),
"attributes": {},
"warnings": [],
"failures": [f"SSH error: {exc}"],
}
async def start_smart_test(devname: str, test_type: str) -> str:
"""
Run `smartctl -t short|long /dev/{devname}`.
Returns raw output. Raises RuntimeError on unrecoverable failure.
test_type: "SHORT" or "LONG"
"""
arg = "short" if test_type.upper() == "SHORT" else "long"
cmd = f"smartctl -t {arg} /dev/{devname}"
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
# smartctl exits 0 or 4 when the test is successfully started on most drives
started = ("Testing has begun" in output or
"test has begun" in output.lower() or
result.returncode in (0, 4))
if not started:
raise RuntimeError(f"smartctl returned exit {result.returncode}: {output[:400]}")
return output
async def poll_smart_progress(devname: str) -> dict:
"""
Run `smartctl -a /dev/{devname}` and extract self-test status.
Returns:
state: "running" | "passed" | "failed" | "unknown"
percent_remaining: int (0 = complete when state != "running")
output: str
"""
cmd = f"smartctl -a /dev/{devname}"
async with await _connect() as conn:
result = await conn.run(cmd, check=False)
output = result.stdout + result.stderr
return _parse_smart_progress(output)
async def abort_smart_test(devname: str) -> None:
"""Send `smartctl -X /dev/{devname}` to abort an in-progress test."""
cmd = f"smartctl -X /dev/{devname}"
async with await _connect() as conn:
await conn.run(cmd, check=False)
async def run_badblocks(
devname: str,
on_progress: Callable[[int, int, str], None],
cancelled_fn: Callable[[], bool] | None = None,
) -> dict:
"""
Run `badblocks -wsv -b 4096 -p 1 /dev/{devname}` and stream output.
on_progress(percent, bad_blocks, line) is called for each line of output.
cancelled_fn() is polled to support mid-test cancellation.
Returns: {"bad_blocks": int, "output": str, "aborted": bool}
"""
from app.config import settings
cmd = f"badblocks -wsv -b 4096 -p 1 /dev/{devname}"
lines: list[str] = []
bad_blocks = 0
aborted = False
last_pct = 0
try:
async with await _connect() as conn:
async with conn.create_process(cmd) as proc:
# badblocks writes progress to stderr, bad block numbers to stdout
async def _read_stream(stream, is_stderr: bool):
nonlocal bad_blocks, last_pct, aborted
async for raw_line in stream:
line = raw_line if isinstance(raw_line, str) else raw_line.decode("utf-8", errors="replace")
lines.append(line)
if is_stderr:
m = re.search(r"([\d.]+)%\s+done", line)
if m:
last_pct = min(99, int(float(m.group(1))))
else:
# Each non-empty stdout line during badblocks is a bad block number
stripped = line.strip()
if stripped and stripped.isdigit():
bad_blocks += 1
on_progress(last_pct, bad_blocks, line)
# Abort if threshold exceeded
if bad_blocks > settings.bad_block_threshold:
aborted = True
proc.kill()
lines.append(
f"\n[ABORTED] Bad block count ({bad_blocks}) exceeded "
f"threshold ({settings.bad_block_threshold})\n"
)
return
# Abort on cancellation
if cancelled_fn and cancelled_fn():
aborted = True
proc.kill()
return
stdout_task = asyncio.create_task(_read_stream(proc.stdout, False))
stderr_task = asyncio.create_task(_read_stream(proc.stderr, True))
await asyncio.gather(stdout_task, stderr_task, return_exceptions=True)
await proc.wait()
except Exception as exc:
lines.append(f"\n[SSH error] {exc}\n")
if not aborted:
last_pct = 100
return {
"bad_blocks": bad_blocks,
"output": "".join(lines),
"aborted": aborted,
}
async def get_system_sensors() -> dict:
"""
Run `sensors -j` on TrueNAS and extract system-level temperatures.
Returns {"cpu_c": int|None, "pch_c": int|None}.
cpu_c = CPU package temp (coretemp chip)
pch_c = PCH/chipset temp (pch_* chip) proxy for storage I/O lane thermals
Falls back gracefully if SSH is not configured or lm-sensors is unavailable.
"""
if not is_configured():
return {}
try:
async with await _connect() as conn:
result = await conn.run("sensors -j 2>/dev/null", check=False)
output = result.stdout.strip()
if not output:
return {}
return _parse_sensors_json(output)
except Exception as exc:
log.debug("get_system_sensors failed: %s", exc)
return {}
def _parse_sensors_json(output: str) -> dict:
import json as _json
try:
data = _json.loads(output)
except Exception:
return {}
cpu_c: int | None = None
pch_c: int | None = None
for chip_name, chip_data in data.items():
if not isinstance(chip_data, dict):
continue
# CPU package temp — coretemp chip, "Package id N" sensor
if chip_name.startswith("coretemp") and cpu_c is None:
for sensor_name, sensor_vals in chip_data.items():
if not isinstance(sensor_vals, dict):
continue
if "package" in sensor_name.lower():
for k, v in sensor_vals.items():
if k.endswith("_input") and isinstance(v, (int, float)):
cpu_c = int(round(v))
break
if cpu_c is not None:
break
# PCH / chipset temp — manages PCIe lanes including HBA / storage I/O
elif chip_name.startswith("pch_") and pch_c is None:
for sensor_name, sensor_vals in chip_data.items():
if not isinstance(sensor_vals, dict):
continue
for k, v in sensor_vals.items():
if k.endswith("_input") and isinstance(v, (int, float)):
pch_c = int(round(v))
break
if pch_c is not None:
break
return {"cpu_c": cpu_c, "pch_c": pch_c}
# ---------------------------------------------------------------------------
# Parsers
# ---------------------------------------------------------------------------
def _parse_smartctl(output: str) -> dict:
health = "UNKNOWN"
attributes: dict[int, dict] = {}
warnings: list[str] = []
failures: list[str] = []
m = re.search(r"self-assessment test result:\s+(\w+)", output, re.IGNORECASE)
if m:
health = m.group(1).upper()
# Attribute table: ID# NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
for line in output.splitlines():
am = re.match(
r"\s*(\d+)\s+(\S+)\s+\S+\s+\d+\s+\d+\s+\d+\s+\S+\s+\S+\s+\S+\s+(\d+)",
line,
)
if not am:
continue
attr_id = int(am.group(1))
attr_name = am.group(2)
raw_val = int(am.group(3))
attributes[attr_id] = {"name": attr_name, "raw": raw_val}
if attr_id in SMART_ATTRS:
_, is_critical = SMART_ATTRS[attr_id]
if raw_val > 0:
msg = f"{attr_name} = {raw_val}"
if is_critical:
failures.append(msg)
else:
warnings.append(msg)
return {
"health": health,
"raw_output": output,
"attributes": attributes,
"warnings": warnings,
"failures": failures,
}
def _parse_smart_progress(output: str) -> dict:
state = "unknown"
percent_remaining = None # None = "in progress but no % line parsed yet"
lower = output.lower()
if "self-test routine in progress" in lower or "self-test routine in progress" in output:
state = "running"
m = re.search(r"(\d+)%\s+of\s+test\s+remaining", output, re.IGNORECASE)
if m:
percent_remaining = int(m.group(1))
elif "completed without error" in lower:
state = "passed"
elif (
"completed: read failure" in lower
or "completed: write failure" in lower
or "aborted by host" in lower
or ("completed" in lower and "failure" in lower)
):
state = "failed"
elif "in progress" in lower:
state = "running"
return {
"state": state,
"percent_remaining": percent_remaining,
"output": output,
}

View file

@ -1,8 +0,0 @@
fastapi
uvicorn[standard]
aiosqlite
httpx
pydantic-settings
jinja2
sse-starlette
asyncssh

View file

@ -11,7 +11,7 @@ services:
app:
build: .
container_name: truenas-burnin
container_name: nas-burnin
ports:
- "8084:8084"
env_file: .env

22
requirements.in Normal file
View file

@ -0,0 +1,22 @@
# Human-edited dependency input.
# requirements.txt is a fully-pinned lockfile generated from THIS file
# via scripts/regenerate-lockfile.sh — never edit requirements.txt by
# hand. Add/remove deps here, then re-run the script.
#
# Why a lockfile: starlette 1.0 shipping in 2026-04 broke the dashboard
# because requirements.txt had no upper bounds and the next rebuild
# pulled the breaking version (TemplateResponse signature change). The
# lockfile + --require-hashes in the Dockerfile makes the build fully
# reproducible AND defends against compromised upstream mirrors.
fastapi
uvicorn[standard]
aiosqlite
httpx
pydantic-settings
jinja2
sse-starlette
asyncssh
itsdangerous>=2.1
bcrypt>=4.0,<5.0
python-multipart>=0.0.7

876
requirements.txt Normal file
View file

@ -0,0 +1,876 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --generate-hashes --output-file=req.txt req.in
#
aiosqlite==0.22.1 \
--hash=sha256:043e0bd78d32888c0a9ca90fc788b38796843360c855a7262a532813133a0650 \
--hash=sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb
# via -r req.in
annotated-doc==0.0.4 \
--hash=sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320 \
--hash=sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4
# via fastapi
annotated-types==0.7.0 \
--hash=sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53 \
--hash=sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89
# via pydantic
anyio==4.13.0 \
--hash=sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708 \
--hash=sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc
# via
# httpx
# sse-starlette
# starlette
# watchfiles
asyncssh==2.22.0 \
--hash=sha256:c3ce72b01be4f97b40e62844dd384227e5ff5a401a3793007c42f86a5c8eb537 \
--hash=sha256:d16465ccdf1ed20eba1131b14415b155e047f6f5be0d19f39c2e0b61331ee0e7
# via -r req.in
bcrypt==4.3.0 \
--hash=sha256:0042b2e342e9ae3d2ed22727c1262f76cc4f345683b5c1715f0250cf4277294f \
--hash=sha256:0142b2cb84a009f8452c8c5a33ace5e3dfec4159e7735f5afe9a4d50a8ea722d \
--hash=sha256:08bacc884fd302b611226c01014eca277d48f0a05187666bca23aac0dad6fe24 \
--hash=sha256:0d3efb1157edebfd9128e4e46e2ac1a64e0c1fe46fb023158a407c7892b0f8c3 \
--hash=sha256:0e30e5e67aed0187a1764911af023043b4542e70a7461ad20e837e94d23e1d6c \
--hash=sha256:107d53b5c67e0bbc3f03ebf5b030e0403d24dda980f8e244795335ba7b4a027d \
--hash=sha256:12fa6ce40cde3f0b899729dbd7d5e8811cb892d31b6f7d0334a1f37748b789fd \
--hash=sha256:17a854d9a7a476a89dcef6c8bd119ad23e0f82557afbd2c442777a16408e614f \
--hash=sha256:191354ebfe305e84f344c5964c7cd5f924a3bfc5d405c75ad07f232b6dffb49f \
--hash=sha256:2ef6630e0ec01376f59a006dc72918b1bf436c3b571b80fa1968d775fa02fe7d \
--hash=sha256:3004df1b323d10021fda07a813fd33e0fd57bef0e9a480bb143877f6cba996fe \
--hash=sha256:335a420cfd63fc5bc27308e929bee231c15c85cc4c496610ffb17923abf7f231 \
--hash=sha256:33752b1ba962ee793fa2b6321404bf20011fe45b9afd2a842139de3011898fef \
--hash=sha256:3a3fd2204178b6d2adcf09cb4f6426ffef54762577a7c9b54c159008cb288c18 \
--hash=sha256:3b8d62290ebefd49ee0b3ce7500f5dbdcf13b81402c05f6dafab9a1e1b27212f \
--hash=sha256:3e36506d001e93bffe59754397572f21bb5dc7c83f54454c990c74a468cd589e \
--hash=sha256:41261d64150858eeb5ff43c753c4b216991e0ae16614a308a15d909503617732 \
--hash=sha256:50e6e80a4bfd23a25f5c05b90167c19030cf9f87930f7cb2eacb99f45d1c3304 \
--hash=sha256:531457e5c839d8caea9b589a1bcfe3756b0547d7814e9ce3d437f17da75c32b0 \
--hash=sha256:55a935b8e9a1d2def0626c4269db3fcd26728cbff1e84f0341465c31c4ee56d8 \
--hash=sha256:57967b7a28d855313a963aaea51bf6df89f833db4320da458e5b3c5ab6d4c938 \
--hash=sha256:584027857bc2843772114717a7490a37f68da563b3620f78a849bcb54dc11e62 \
--hash=sha256:59e1aa0e2cd871b08ca146ed08445038f42ff75968c7ae50d2fdd7860ade2180 \
--hash=sha256:5bd3cca1f2aa5dbcf39e2aa13dd094ea181f48959e1071265de49cc2b82525af \
--hash=sha256:5c1949bf259a388863ced887c7861da1df681cb2388645766c89fdfd9004c669 \
--hash=sha256:62f26585e8b219cdc909b6a0069efc5e4267e25d4a3770a364ac58024f62a761 \
--hash=sha256:67a561c4d9fb9465ec866177e7aebcad08fe23aaf6fbd692a6fab69088abfc51 \
--hash=sha256:6fb1fd3ab08c0cbc6826a2e0447610c6f09e983a281b919ed721ad32236b8b23 \
--hash=sha256:74a8d21a09f5e025a9a23e7c0fd2c7fe8e7503e4d356c0a2c1486ba010619f09 \
--hash=sha256:79e70b8342a33b52b55d93b3a59223a844962bef479f6a0ea318ebbcadf71505 \
--hash=sha256:7a4be4cbf241afee43f1c3969b9103a41b40bcb3a3f467ab19f891d9bc4642e4 \
--hash=sha256:7c03296b85cb87db865d91da79bf63d5609284fc0cab9472fdd8367bbd830753 \
--hash=sha256:842d08d75d9fe9fb94b18b071090220697f9f184d4547179b60734846461ed59 \
--hash=sha256:864f8f19adbe13b7de11ba15d85d4a428c7e2f344bac110f667676a0ff84924b \
--hash=sha256:97eea7408db3a5bcce4a55d13245ab3fa566e23b4c67cd227062bb49e26c585d \
--hash=sha256:a839320bf27d474e52ef8cb16449bb2ce0ba03ca9f44daba6d93fa1d8828e48a \
--hash=sha256:afe327968aaf13fc143a56a3360cb27d4ad0345e34da12c7290f1b00b8fe9a8b \
--hash=sha256:b4d4e57f0a63fd0b358eb765063ff661328f69a04494427265950c71b992a39a \
--hash=sha256:b6354d3760fcd31994a14c89659dee887f1351a06e5dac3c1142307172a79f90 \
--hash=sha256:b693dbb82b3c27a1604a3dff5bfc5418a7e6a781bb795288141e5f80cf3a3492 \
--hash=sha256:bdc6a24e754a555d7316fa4774e64c6c3997d27ed2d1964d55920c7c227bc4ce \
--hash=sha256:beeefe437218a65322fbd0069eb437e7c98137e08f22c4660ac2dc795c31f8bb \
--hash=sha256:c5eeac541cefd0bb887a371ef73c62c3cd78535e4887b310626036a7c0a817bb \
--hash=sha256:c950d682f0952bafcceaf709761da0a32a942272fad381081b51096ffa46cea1 \
--hash=sha256:d9af79d322e735b1fc33404b5765108ae0ff232d4b54666d46730f8ac1a43676 \
--hash=sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b \
--hash=sha256:e965a9c1e9a393b8005031ff52583cedc15b7884fce7deb8b0346388837d6cfe \
--hash=sha256:f01e060f14b6b57bbb72fc5b4a83ac21c443c9a2ee708e04a10e9192f90a6281 \
--hash=sha256:f1e3ffa1365e8702dc48c8b360fef8d7afeca482809c5e45e653af82ccd088c1 \
--hash=sha256:f6746e6fec103fcd509b96bacdfdaa2fbde9a553245dbada284435173a6f1aef \
--hash=sha256:f81b0ed2639568bf14749112298f9e4e2b28853dab50a8b357e31798686a036d
# via -r req.in
certifi==2026.4.22 \
--hash=sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a \
--hash=sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580
# via
# httpcore
# httpx
cffi==2.0.0 \
--hash=sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb \
--hash=sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b \
--hash=sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f \
--hash=sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9 \
--hash=sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44 \
--hash=sha256:0f6084a0ea23d05d20c3edcda20c3d006f9b6f3fefeac38f59262e10cef47ee2 \
--hash=sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c \
--hash=sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75 \
--hash=sha256:1cd13c99ce269b3ed80b417dcd591415d3372bcac067009b6e0f59c7d4015e65 \
--hash=sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e \
--hash=sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a \
--hash=sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e \
--hash=sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25 \
--hash=sha256:2081580ebb843f759b9f617314a24ed5738c51d2aee65d31e02f6f7a2b97707a \
--hash=sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe \
--hash=sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b \
--hash=sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91 \
--hash=sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592 \
--hash=sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187 \
--hash=sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c \
--hash=sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1 \
--hash=sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94 \
--hash=sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba \
--hash=sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb \
--hash=sha256:3f4d46d8b35698056ec29bca21546e1551a205058ae1a181d871e278b0b28165 \
--hash=sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529 \
--hash=sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca \
--hash=sha256:4647afc2f90d1ddd33441e5b0e85b16b12ddec4fca55f0d9671fef036ecca27c \
--hash=sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6 \
--hash=sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c \
--hash=sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0 \
--hash=sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743 \
--hash=sha256:61d028e90346df14fedc3d1e5441df818d095f3b87d286825dfcbd6459b7ef63 \
--hash=sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5 \
--hash=sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5 \
--hash=sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4 \
--hash=sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d \
--hash=sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b \
--hash=sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93 \
--hash=sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205 \
--hash=sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27 \
--hash=sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512 \
--hash=sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d \
--hash=sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c \
--hash=sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037 \
--hash=sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26 \
--hash=sha256:89472c9762729b5ae1ad974b777416bfda4ac5642423fa93bd57a09204712322 \
--hash=sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb \
--hash=sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c \
--hash=sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8 \
--hash=sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4 \
--hash=sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414 \
--hash=sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9 \
--hash=sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664 \
--hash=sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9 \
--hash=sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775 \
--hash=sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739 \
--hash=sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc \
--hash=sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062 \
--hash=sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe \
--hash=sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9 \
--hash=sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92 \
--hash=sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5 \
--hash=sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13 \
--hash=sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d \
--hash=sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26 \
--hash=sha256:cb527a79772e5ef98fb1d700678fe031e353e765d1ca2d409c92263c6d43e09f \
--hash=sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495 \
--hash=sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b \
--hash=sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6 \
--hash=sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c \
--hash=sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef \
--hash=sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5 \
--hash=sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18 \
--hash=sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad \
--hash=sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3 \
--hash=sha256:de8dad4425a6ca6e4e5e297b27b5c824ecc7581910bf9aee86cb6835e6812aa7 \
--hash=sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5 \
--hash=sha256:e6e73b9e02893c764e7e8d5bb5ce277f1a009cd5243f8228f75f842bf937c534 \
--hash=sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49 \
--hash=sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2 \
--hash=sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5 \
--hash=sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453 \
--hash=sha256:fe562eb1a64e67dd297ccc4f5addea2501664954f2692b69a76449ec7913ecbf
# via cryptography
click==8.3.3 \
--hash=sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2 \
--hash=sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613
# via uvicorn
cryptography==47.0.0 \
--hash=sha256:0024b87d47ae2399165a6bfb20d24888881eeab83ae2566d62467c5ff0030ce7 \
--hash=sha256:07efe86201817e7d3c18781ca9770bc0db04e1e48c994be384e4602bc38f8f27 \
--hash=sha256:09f6d7bf6724f8db8b32f11eccf23efc8e759924bc5603800335cf8859a3ddbd \
--hash=sha256:11438c7518132d95f354fa01a4aa2f806d172a061a7bed18cf18cbdacdb204d7 \
--hash=sha256:11dbb9f50a0f1bb9757b3d8c27c1101780efb8f0bdecfb12439c22a74d64c001 \
--hash=sha256:14432c8a9bcb37009784f9594a62fae211a2ae9543e96c92b2a8e4c3cd5cd0c4 \
--hash=sha256:1581aef4219f7ca2849d0250edaa3866212fb74bf5667284f46aa92f9e65c1ca \
--hash=sha256:160ad728f128972d362e714054f6ba0067cab7fb350c5202a9ae8ae4ce3ef1a0 \
--hash=sha256:1a405c08857258c11016777e11c02bacbe7ef596faf259305d282272a3a05cbe \
--hash=sha256:1e47422b5557bb82d3fff997e8d92cff4e28b9789576984f08c248d2b3535d93 \
--hash=sha256:20fdbe3e38fb67c385d233c89371fa27f9909f6ebca1cecc20c13518dae65475 \
--hash=sha256:2207a498b03275d0051589e326b79d4cf59985c99031b05bb292ac52631c37fe \
--hash=sha256:256d07c78a04d6b276f5df935a9923275f53bd1522f214447fdf365494e2d515 \
--hash=sha256:2b45761c6ec22b7c726d6a829558777e32d0f1c8be7c3f3480f9c912d5ee8a10 \
--hash=sha256:2ebd84adf0728c039a3be2700289378e1c164afc6748df1a5ed456767bef9ba7 \
--hash=sha256:34b4358b925a5ea3e14384ca781a2c0ef7ac219b57bb9eacc4457078e2b19f92 \
--hash=sha256:3fb8fa48075fad7193f2e5496135c6a76ac4b2aa5a38433df0a539296b377829 \
--hash=sha256:4e1de79e047e25d6e9f8cea71c86b4a53aced64134f0f003bbcbf3655fd172c8 \
--hash=sha256:4f7722c97826770bab8ae92959a2e7b20a5e9e9bf4deae68fd86c3ca457bab52 \
--hash=sha256:51c9313e90bd1690ec5a75ed047c27c0b8e6c570029712943d6116ef9a90620b \
--hash=sha256:5d0e362ff51041b0c0d219cc7d6924d7b8996f57ce5712bdcef71eb3c65a59cc \
--hash=sha256:6651d32eff255423503aa276739da98c30f26c40cbeffcc6048e0d54ef704c0c \
--hash=sha256:6eebcaf0df1d21ce1f90605c9b432dd2c4f4ab665ac29a40d5e3fc68f51b5e63 \
--hash=sha256:6f29f36582e6151d9686235e586dd35bb67491f024767d10b842e520dc6a07ac \
--hash=sha256:7a02675e2fabd0c0fc04c868b8781863cbf1967691543c22f5470500ff840b31 \
--hash=sha256:7f1207974a904e005f762869996cf620e9bf79ecb4622f148550bb48e0eb35a7 \
--hash=sha256:7f68d6fbc7fbbcfb0939fea72c3b96a9f9a6edfc0e1b1d29778a2066030418b1 \
--hash=sha256:7fda2f02c9015db3f42bb8a22324a454516ed10a8c29ca6ece6cdbb5efe2a203 \
--hash=sha256:80887c5cbd1774683cb126f0ab4184567f080071d5acf62205acb354b4b753b7 \
--hash=sha256:835d2d7f47cdc53b3224e90810fb1d36ca94ea29cc1801fb4c1bc43876735769 \
--hash=sha256:8c1a736bbb3288005796c3f7ccb9453360d7fed483b13b9f468aea5171432923 \
--hash=sha256:9af828c0d5a65c70ec729cd7495a4bf1a67ecb66417b8f02ff125ab8a6326a74 \
--hash=sha256:9c59ab0e0fa3a180a5a9c59f3a5abe3ef90d474bc56d7fadfbe80359491b615b \
--hash=sha256:9f8e55fe4e63613a5e1cc5819030f27b97742d720203a087802ce4ce9ceb52bb \
--hash=sha256:9fe6b7c64926c765f9dff301f9c1b867febcda5768868ca084e18589113732ab \
--hash=sha256:a49a3eb5341b9503fa3000a9a0db033161db90d47285291f53c2a9d2cd1b7f76 \
--hash=sha256:a9b761f012a943b7de0e828843c5688d0de94a0578d44d6c85a1bae32f87791f \
--hash=sha256:b1c76fca783aa7698eb21eb14f9c4aa09452248ee54a627d125025a43f83e7a7 \
--hash=sha256:b9a8943e359b7615db1a3ba587994618e094ff3d6fa5a390c73d079ce18b3973 \
--hash=sha256:be12cb6a204f77ed968bcefe68086eb061695b540a3dd05edac507a3111b25f0 \
--hash=sha256:cffbba3392df0fa8629bb7f43454ee2925059ee158e23c54620b9063912b86c8 \
--hash=sha256:ed67ea4e0cfb5faa5bc7ecb6e2b8838f3807a03758eec239d6c21c8769355310 \
--hash=sha256:edd4da498015da5b9f26d38d3bfc2e90257bfa9cbed1f6767c282a0025ae649b \
--hash=sha256:ef6b3634087f18d2155b1e8ce264e5345a753da2c5fa9815e7d41315c90f8318 \
--hash=sha256:f1557695e5c2b86e204f6ce9470497848634100787935ab7adc5397c54abd7ab \
--hash=sha256:f5c15764f261394b22aef6b00252f5195f46f2ca300bec57149474e2538b31f8 \
--hash=sha256:f5c3296dab66202f1b18a91fa266be93d6aa0c2806ea3d67762c69f60adc71aa \
--hash=sha256:f7db373287273d8af1414cf95dc4118b13ffdc62be521997b0f2b270771fef50 \
--hash=sha256:f9a034b642b960767fb343766ae5ba6ad653f2e890ddd82955aef288ffea8736
# via asyncssh
fastapi==0.136.1 \
--hash=sha256:7af665ad7acfa0a3baf8983d393b6b471b9da10ede59c60045f49fbc89a0fa7f \
--hash=sha256:a6e9d7eeada96c93a4d69cb03836b44fa34e2854accb7244a1ece36cd4781c3f
# via -r req.in
h11==0.16.0 \
--hash=sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1 \
--hash=sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86
# via
# httpcore
# uvicorn
httpcore==1.0.9 \
--hash=sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55 \
--hash=sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8
# via httpx
httptools==0.7.1 \
--hash=sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c \
--hash=sha256:0d92b10dbf0b3da4823cde6a96d18e6ae358a9daa741c71448975f6a2c339cad \
--hash=sha256:0e68b8582f4ea9166be62926077a3334064d422cf08ab87d8b74664f8e9058e1 \
--hash=sha256:11d01b0ff1fe02c4c32d60af61a4d613b74fad069e47e06e9067758c01e9ac78 \
--hash=sha256:135fbe974b3718eada677229312e97f3b31f8a9c8ffa3ae6f565bf808d5b6bcb \
--hash=sha256:2c15f37ef679ab9ecc06bfc4e6e8628c32a8e4b305459de7cf6785acd57e4d03 \
--hash=sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6 \
--hash=sha256:379b479408b8747f47f3b253326183d7c009a3936518cdb70db58cffd369d9df \
--hash=sha256:38e0c83a2ea9746ebbd643bdfb521b9aa4a91703e2cd705c20443405d2fd16a5 \
--hash=sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321 \
--hash=sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346 \
--hash=sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650 \
--hash=sha256:474d3b7ab469fefcca3697a10d11a32ee2b9573250206ba1e50d5980910da657 \
--hash=sha256:49794f9250188a57fa73c706b46cb21a313edb00d337ca4ce1a011fe3c760b28 \
--hash=sha256:5ddbd045cfcb073db2449563dd479057f2c2b681ebc232380e63ef15edc9c023 \
--hash=sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca \
--hash=sha256:654968cb6b6c77e37b832a9be3d3ecabb243bbe7a0b8f65fbc5b6b04c8fcabed \
--hash=sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66 \
--hash=sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3 \
--hash=sha256:7347714368fb2b335e9063bc2b96f2f87a9ceffcd9758ac295f8bbcd3ffbc0ca \
--hash=sha256:7aea2e3c3953521c3c51106ee11487a910d45586e351202474d45472db7d72d3 \
--hash=sha256:7fe6e96090df46b36ccfaf746f03034e5ab723162bc51b0a4cf58305324036f2 \
--hash=sha256:84d86c1e5afdc479a6fdabf570be0d3eb791df0ae727e8dbc0259ed1249998d4 \
--hash=sha256:a3c3b7366bb6c7b96bd72d0dbe7f7d5eead261361f013be5f6d9590465ea1c70 \
--hash=sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9 \
--hash=sha256:ac50afa68945df63ec7a2707c506bd02239272288add34539a2ef527254626a4 \
--hash=sha256:aeefa0648362bb97a7d6b5ff770bfb774930a327d7f65f8208394856862de517 \
--hash=sha256:b580968316348b474b020edf3988eecd5d6eec4634ee6561e72ae3a2a0e00a8a \
--hash=sha256:c08fe65728b8d70b6923ce31e3956f859d5e1e8548e6f22ec520a962c6757270 \
--hash=sha256:c8c751014e13d88d2be5f5f14fc8b89612fcfa92a9cc480f2bc1598357a23a05 \
--hash=sha256:cad6b591a682dcc6cf1397c3900527f9affef1e55a06c4547264796bbd17cf5e \
--hash=sha256:cbf8317bfccf0fed3b5680c559d3459cccf1abe9039bfa159e62e391c7270568 \
--hash=sha256:cfabda2a5bb85aa2a904ce06d974a3f30fb36cc63d7feaddec05d2050acede96 \
--hash=sha256:d169162803a24425eb5e4d51d79cbf429fd7a491b9e570a55f495ea55b26f0bf \
--hash=sha256:d496e2f5245319da9d764296e86c5bb6fcf0cf7a8806d3d000717a889c8c0b7b \
--hash=sha256:de987bb4e7ac95b99b805b99e0aae0ad51ae61df4263459d36e07cf4052d8b3a \
--hash=sha256:df091cf961a3be783d6aebae963cc9b71e00d57fa6f149025075217bc6a55a7b \
--hash=sha256:e99c7b90a29fd82fea9ef57943d501a16f3404d7b9ee81799d41639bdaae412c \
--hash=sha256:eb844698d11433d2139bbeeb56499102143beb582bd6c194e3ba69c22f25c274 \
--hash=sha256:f084813239e1eb403ddacd06a30de3d3e09a9b76e7894dcda2b22f8a726e9c60 \
--hash=sha256:f25bbaf1235e27704f1a7b86cd3304eabc04f569c828101d94a0e605ef7205a5 \
--hash=sha256:f65744d7a8bdb4bda5e1fa23e4ba16832860606fcc09d674d56e425e991539ec \
--hash=sha256:f72fdbae2dbc6e68b8239defb48e6a5937b12218e6ffc2c7846cc37befa84362
# via uvicorn
httpx==0.28.1 \
--hash=sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc \
--hash=sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad
# via -r req.in
idna==3.13 \
--hash=sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242 \
--hash=sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3
# via
# anyio
# httpx
itsdangerous==2.2.0 \
--hash=sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef \
--hash=sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173
# via -r req.in
jinja2==3.1.6 \
--hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \
--hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67
# via -r req.in
markupsafe==3.0.3 \
--hash=sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f \
--hash=sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a \
--hash=sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf \
--hash=sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19 \
--hash=sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf \
--hash=sha256:0f4b68347f8c5eab4a13419215bdfd7f8c9b19f2b25520968adfad23eb0ce60c \
--hash=sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175 \
--hash=sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219 \
--hash=sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb \
--hash=sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6 \
--hash=sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab \
--hash=sha256:15d939a21d546304880945ca1ecb8a039db6b4dc49b2c5a400387cdae6a62e26 \
--hash=sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1 \
--hash=sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce \
--hash=sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218 \
--hash=sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634 \
--hash=sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695 \
--hash=sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad \
--hash=sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73 \
--hash=sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c \
--hash=sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe \
--hash=sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa \
--hash=sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559 \
--hash=sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa \
--hash=sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37 \
--hash=sha256:3537e01efc9d4dccdf77221fb1cb3b8e1a38d5428920e0657ce299b20324d758 \
--hash=sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f \
--hash=sha256:38664109c14ffc9e7437e86b4dceb442b0096dfe3541d7864d9cbe1da4cf36c8 \
--hash=sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d \
--hash=sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c \
--hash=sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97 \
--hash=sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a \
--hash=sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19 \
--hash=sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9 \
--hash=sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9 \
--hash=sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc \
--hash=sha256:591ae9f2a647529ca990bc681daebdd52c8791ff06c2bfa05b65163e28102ef2 \
--hash=sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4 \
--hash=sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354 \
--hash=sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50 \
--hash=sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698 \
--hash=sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9 \
--hash=sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b \
--hash=sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc \
--hash=sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115 \
--hash=sha256:7c3fb7d25180895632e5d3148dbdc29ea38ccb7fd210aa27acbd1201a1902c6e \
--hash=sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485 \
--hash=sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f \
--hash=sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12 \
--hash=sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025 \
--hash=sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009 \
--hash=sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d \
--hash=sha256:949b8d66bc381ee8b007cd945914c721d9aba8e27f71959d750a46f7c282b20b \
--hash=sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a \
--hash=sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5 \
--hash=sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f \
--hash=sha256:a320721ab5a1aba0a233739394eb907f8c8da5c98c9181d1161e77a0c8e36f2d \
--hash=sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1 \
--hash=sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287 \
--hash=sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6 \
--hash=sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f \
--hash=sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581 \
--hash=sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed \
--hash=sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b \
--hash=sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c \
--hash=sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026 \
--hash=sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8 \
--hash=sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676 \
--hash=sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6 \
--hash=sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e \
--hash=sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d \
--hash=sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d \
--hash=sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01 \
--hash=sha256:df2449253ef108a379b8b5d6b43f4b1a8e81a061d6537becd5582fba5f9196d7 \
--hash=sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419 \
--hash=sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795 \
--hash=sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1 \
--hash=sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5 \
--hash=sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d \
--hash=sha256:e8fc20152abba6b83724d7ff268c249fa196d8259ff481f3b1476383f8f24e42 \
--hash=sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe \
--hash=sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda \
--hash=sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e \
--hash=sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737 \
--hash=sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523 \
--hash=sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591 \
--hash=sha256:f71a396b3bf33ecaa1626c255855702aca4d3d9fea5e051b41ac59a9c1c41edc \
--hash=sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a \
--hash=sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50
# via jinja2
pycparser==3.0 \
--hash=sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29 \
--hash=sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992
# via cffi
pydantic==2.13.3 \
--hash=sha256:6db14ac8dfc9a1e57f87ea2c0de670c251240f43cb0c30a5130e9720dc612927 \
--hash=sha256:af09e9d1d09f4e7fe37145c1f577e1d61ceb9a41924bf0094a36506285d0a84d
# via
# fastapi
# pydantic-settings
pydantic-core==2.46.3 \
--hash=sha256:0087084960f209a9a4af50ecd1fb063d9ad3658c07bb81a7a53f452dacbfb2ba \
--hash=sha256:031bb17f4885a43773c8c763089499f242aee2ea85cf17154168775dccdecf35 \
--hash=sha256:06d5d8820cbbdb4147578c1fe7ffcd5b83f34508cb9f9ab76e807be7db6ff0a4 \
--hash=sha256:07bc6d2a28c3adb4f7c6ae46aa4f2d2929af127f587ed44057af50bf1ce0f505 \
--hash=sha256:0c9ff69140423eea8ed2d5477df3ba037f671f5e897d206d921bc9fdc39613e7 \
--hash=sha256:1105677a6df914b1fb71a81b96c8cce7726857e1717d86001f29be06a25ee6f8 \
--hash=sha256:1108da631e602e5b3c38d6d04fe5bb3bfa54349e6918e3ca6cf570b2e2b2f9d4 \
--hash=sha256:12bc98de041458b80c86c56b24df1d23832f3e166cbaff011f25d187f5c62c37 \
--hash=sha256:13afdd885f3d71280cf286b13b310ee0f7ccfefd1dbbb661514a474b726e2f25 \
--hash=sha256:17eaface65d9fc5abb940003020309c1bf7a211f5f608d7870297c367e6f9022 \
--hash=sha256:1da3786b8018e60349680720158cc19161cc3b4bdd815beb0a321cd5ce1ad5b1 \
--hash=sha256:23cbdb3aaa74dfe0837975dbf69b469753bbde8eacace524519ffdb6b6e89eb7 \
--hash=sha256:2798b6ba041b9d70acfb9071a2ea13c8456dd1e6a5555798e41ba7b0790e329c \
--hash=sha256:27f9067c3bfadd04c55484b89c0d267981b2f3512850f6f66e1e74204a4e4ce3 \
--hash=sha256:28b5f2ef03416facccb1c6ef744c69793175fd27e44ef15669201601cf423acb \
--hash=sha256:28e8cf2f52d72ced402a137145923a762cbb5081e48b34312f7a0c8f55928ec3 \
--hash=sha256:28ed528c45446062ee66edb1d33df5d88828ae167de76e773a3c7f64bd14e976 \
--hash=sha256:2b8e4f2bbdf71415c544b4b1138b8060db7b6611bc927e8064c769f64bed651c \
--hash=sha256:2f40e4246676beb31c5ce77c38a55ca4e465c6b38d11ea1bd935420568e0b1ab \
--hash=sha256:3481bd1341dc85779ee506bc8e1196a277ace359d89d28588a9468c3ecbe63fa \
--hash=sha256:3612edf65c8ea67ac13616c4d23af12faef1ae435a8a93e5934c2a0cbbdd1fd6 \
--hash=sha256:367508faa4973b992b271ba1494acaab36eb7e8739d1e47be5035fb1ea225396 \
--hash=sha256:3861f1731b90c50a3266316b9044f5c9b405eecb8e299b0a7120596334e4fe9c \
--hash=sha256:3d08782c4045f90724b44c95d35ebec0d67edb8a957a2ac81d5a8e4b8a200495 \
--hash=sha256:41c178f65b8c29807239d47e6050262eb6bf84eb695e41101e62e38df4a5bc2c \
--hash=sha256:4335e87c7afa436a0dfa899e138d57a72f8aad542e2cf19c36fb428461caabd0 \
--hash=sha256:4b068543bdb707f5d935dab765d99227aa2545ef2820935f2e5dd801795c7dbd \
--hash=sha256:4de88889d7e88d50d40ee5b39d5dac0bcaef9ba91f7e536ac064e6b2834ecccf \
--hash=sha256:4e9d76736da5f362fabfeea6a69b13b7f2be405c6d6966f06b2f6bfff7e64531 \
--hash=sha256:57697d7c056aca4bbb680200f96563e841a6386ac1129370a0102592f4dddff5 \
--hash=sha256:57a973eae4665352a47cf1a99b4ee864620f2fe663a217d7a8da68a1f3a5bfda \
--hash=sha256:5ad3c826fe523e4becf4fe39baa44286cff85ef137c729a2c5e269afbfd0905d \
--hash=sha256:5c024e08c0ba23e6fd68c771a521e9d6a792f2ebb0fa734296b36394dc30390e \
--hash=sha256:5dcbbcf4d22210ced8f837c96db941bdb078f419543472aca5d9a0bb7cddc7df \
--hash=sha256:5dfd51cf457482f04ec49491811a2b8fd5b843b64b11eecd2d7a1ee596ea78a6 \
--hash=sha256:60e5f66e12c4f5212d08522963380eaaeac5ebd795826cfd19b2dfb0c7a52b9c \
--hash=sha256:610eda2e3838f401105e6326ca304f5da1e15393ae25dacae5c5c63f2c275b13 \
--hash=sha256:6529d1d128321a58d30afcc97b49e98836542f68dd41b33c2e972bb9e5290536 \
--hash=sha256:6645ce7eec4928e29a1e3b3d5c946621d105d3e79f0c9cddf07c2a9770949287 \
--hash=sha256:68cc7866ed863db34351294187f9b729964c371ba33e31c26f478471c52e1ed0 \
--hash=sha256:68ef2f623dda6d5a9067ac014e406c020c780b2a358930a7e5c1b73702900720 \
--hash=sha256:69a868ef3ff206343579021c40faf3b1edc64b1cc508ff243a28b0a514ccb050 \
--hash=sha256:6dff8cc884679df229ebc6d8eb2321ea6f8e091bc7d4886d4dc2e0e71452843c \
--hash=sha256:6e42d83d1c6b87fa56b521479cff237e626a292f3b31b6345c15a99121b454c1 \
--hash=sha256:706d9d0ce9cf4593d07270d8e9f53b161f90c57d315aeec4fb4fd7a8b10240d8 \
--hash=sha256:75a519dab6d63c514f3a81053e5266c549679e4aa88f6ec57f2b7b854aceb1b0 \
--hash=sha256:77706aeb41df6a76568434701e0917da10692da28cb69d5fb6919ce5fdb07374 \
--hash=sha256:79f561438481f28681584b89e2effb22855e2179880314bcddbf5968e935e807 \
--hash=sha256:830d1247d77ad23852314f069e9d7ddafeec5f684baf9d7e7065ed46a049c4e6 \
--hash=sha256:831eb19aa789a97356979e94c981e5667759301fb708d1c0d5adf1bc0098b873 \
--hash=sha256:83d002b97072a53ea150d63e0a3adfae5670cef5aa8a6e490240e482d3b22e57 \
--hash=sha256:85348b8f89d2c3508b65b16c3c33a4da22b8215138d8b996912bb1532868885f \
--hash=sha256:8690eba565c6d68ffd3a8655525cbdd5246510b44a637ee2c6c03a7ebfe64d3c \
--hash=sha256:87082cd65669a33adeba5470769e9704c7cf026cc30afb9cc77fd865578ebaad \
--hash=sha256:8940562319bc621da30714617e6a7eaa6b98c84e8c685bcdc02d7ed5e7c7c44e \
--hash=sha256:91249bcb7c165c2fb2a2f852dbc5c91636e2e218e75d96dfdd517e4078e173dd \
--hash=sha256:93fd339f23408a07e98950a89644f92c54d8729719a40b30c0a30bb9ebc55d23 \
--hash=sha256:9715525891ed524a0a1eb6d053c74d4d4ad5017677fb00af0b7c2644a31bae46 \
--hash=sha256:975c267cff4f7e7272eacbe50f6cc03ca9a3da4c4fbd66fffd89c94c1e311aa1 \
--hash=sha256:99421e7684a60f7f3550a1d159ade5fdff1954baedb6bdd407cba6a307c9f27d \
--hash=sha256:9be3e221bdc6d69abf294dcf7aff6af19c31a5cdcc8f0aa3b14be29df4bd03b1 \
--hash=sha256:9ce92e58abc722dac1bf835a6798a60b294e48eb0e625ec9fd994b932ac5feee \
--hash=sha256:9d2e32edcc143bc01e95300671915d9ca052d4f745aa0a49c48d4803f8a85f2c \
--hash=sha256:9d2f400712a99a013aff420ef1eb9be077f8189a36c1e3ef87660b4e1088a874 \
--hash=sha256:9f247596366f4221af52beddd65af1218797771d6989bc891a0b86ccaa019168 \
--hash=sha256:a03e6467f0f5ab796a486146d1b887b2dc5e5f9b3288898c1b1c3ad974e53e4a \
--hash=sha256:a35cc284c8dd7edae8a31533713b4d2467dfe7c4f1b5587dd4031f28f90d1d13 \
--hash=sha256:a3b11c812f61b3129c4905781a2601dfdfdea5fe1e6c1cfb696b55d14e9c054f \
--hash=sha256:a642ac886ecf6402d9882d10c405dcf4b902abeb2972cd5fb4a48c83cd59279a \
--hash=sha256:a6cd87cb1575b1ad05ba98894c5b5c96411ef678fa2f6ed2576607095b8d9789 \
--hash=sha256:a712c7118e6c5ea96562f7b488435172abb94a3c53c22c9efc1412264a45cbbe \
--hash=sha256:a7610b6a5242a6c736d8ad47fd5fff87fcfe8f833b281b1c409c3d6835d9227f \
--hash=sha256:ab124d49d0459b2373ecf54118a45c28a1e6d4192a533fbc915e70f556feb8e5 \
--hash=sha256:ac5ec7fb9b87f04ee839af2d53bcadea57ded7d229719f56c0ed895bff987943 \
--hash=sha256:aed19d0c783886d5bd86d80ae5030006b45e28464218747dcf83dabfdd092c7b \
--hash=sha256:af8653713055ea18a3abc1537fe2ebc42f5b0bbb768d1eb79fd74eb47c0ac089 \
--hash=sha256:afa3aa644f74e290cdede48a7b0bee37d1c35e71b05105f6b340d484af536d9b \
--hash=sha256:b00b76f7142fc60c762ce579bd29c8fa44aaa56592dd3c54fab3928d0d4ca6ff \
--hash=sha256:b11b59b3eee90a80a36701ddb4576d9ae31f93f05cb9e277ceaa09e6bf074a67 \
--hash=sha256:b12dd51f1187c2eb489af8e20f880362db98e954b54ab792fa5d92e8bcc6b803 \
--hash=sha256:b40ddd51e7c44b28cfaef746c9d3c506d658885e0a46f9eeef2ee815cbf8e045 \
--hash=sha256:b504bda01bafc69b6d3c7a0c7f039dcf60f47fab70e06fe23f57b5c75bdc82b8 \
--hash=sha256:b5b9c6cf08a8a5e502698f5e153056d12c34b8fb30317e0c5fd06f45162a6346 \
--hash=sha256:b675ab0a0d5b1c8fdb81195dc5bcefea3f3c240871cdd7ff9a2de8aa50772eb2 \
--hash=sha256:b6cdf19bf84128d5e7c37e8a73a0c5c10d51103a650ac585d42dd6ae233f2b7f \
--hash=sha256:bcf2a8b2982a6673693eae7348ef3d8cf3979c1d63b54fca7c397a635cc68687 \
--hash=sha256:bd2aab0e2e9dc2daf36bd2686c982535d5e7b1d930a1344a7bb6e82baab42a76 \
--hash=sha256:c3212fda0ee959c1dd04c60b601ec31097aaa893573a3a1abd0a47bcac2968c1 \
--hash=sha256:cc0988cb29d21bf4a9d5cf2ef970b5c0e38d8d8e107a493278c05dc6c1dda69f \
--hash=sha256:cc7e8c32db809aa0f6ea1d6869ebc8518a65d5150fdfad8bcae6a49ae32a22e2 \
--hash=sha256:cca67d52a5c7a16aed2b3999e719c4bcf644074eac304a5d3d62dd70ae7d4b2c \
--hash=sha256:ced3310e51aa425f7f77da8bbbb5212616655bedbe82c70944320bc1dbe5e018 \
--hash=sha256:cf489cf8986c543939aeee17a09c04d6ffb43bfef8ca16fcbcc5cfdcbed24dba \
--hash=sha256:d0793c90c1a3c74966e7975eaef3ed30ebdff3260a0f815a62a22adc17e4c01c \
--hash=sha256:d0fe3dce1e836e418f912c1ad91c73357d03e556a4d286f441bf34fed2dbeecf \
--hash=sha256:d11058e3201527d41bc6b545c79187c9e4bf85e15a236a6007f0e991518882b7 \
--hash=sha256:d2d0aead851b66f5245ec0c4fb2612ef457f8bbafefdf65a2bf9d6bac6140f47 \
--hash=sha256:d56bdb4af1767cc15b0386b3c581fdfe659bb9ee4a4f776e92c1cd9d074000d6 \
--hash=sha256:dcda6583921c05a40533f982321532f2d8db29326c7b95c4026941fa5074bd79 \
--hash=sha256:dd81f6907932ebac3abbe41378dac64b2380db1287e2aa64d8d88f78d170f51a \
--hash=sha256:de3a5c376f8cd94da9a1b8fd3dd1c16c7a7b216ed31dc8ce9fd7a22bf13b836e \
--hash=sha256:de885175515bcfa98ae618c1df7a072f13d179f81376c8007112af20567fd08a \
--hash=sha256:e29908922ce9da1a30b4da490bd1d3d82c01dcfdf864d2a74aacee674d0bfa34 \
--hash=sha256:e480080975c1ef7f780b8f99ed72337e7cc5efea2e518a20a692e8e7b278eb8b \
--hash=sha256:e61ea8e9fff9606d09178f577ff8ccdd7206ff73d6552bcec18e1033c4254b85 \
--hash=sha256:ec638c5d194ef8af27db69f16c954a09797c0dc25015ad6123eb2c73a4d271ca \
--hash=sha256:ed42e6cc8e1b0e2b9b96e2276bad70ae625d10d6d524aed0c93de974ae029f9f \
--hash=sha256:f00a0961b125f1a47af7bcc17f00782e12f4cd056f83416006b30111d941dfa3 \
--hash=sha256:f13936129ce841f2a5ddf6f126fea3c43cd128807b5a59588c37cf10178c2e64 \
--hash=sha256:f1771ce258afb3e4201e67d154edbbae712a76a6081079fe247c2f53c6322c22 \
--hash=sha256:f1f8338dd7a7f31761f1f1a3c47503a9a3b34eea3c8b01fa6ee96408affb5e72 \
--hash=sha256:f64b5537ac62b231572879cd08ec05600308636a5d63bcbdb15063a466977bec \
--hash=sha256:f80a55484b8d843c8ada81ebf70a682f3f00a3d40e378c06cf17ecb44d280d7d \
--hash=sha256:f91c0aff3e3ee0928edd1232c57f643a7a003e6edf1860bc3afcdc749cb513f3 \
--hash=sha256:fa3eb7c2995aa443687a825bc30395c8521b7c6ec201966e55debfd1128bcceb \
--hash=sha256:fb528e295ed31570ac3dcc9bfdd6e0150bc11ce6168ac87a8082055cf1a67395 \
--hash=sha256:fc331a5314ffddd5385b9ee9d0d2fee0b13c27e0e02dad71b1ae5d6561f51eeb \
--hash=sha256:fd35aa21299def8db7ef4fe5c4ff862941a9a158ca7b63d61e66fe67d30416b4 \
--hash=sha256:ff5e7783bcc5476e1db448bf268f11cb257b1c276d3e89f00b5727be86dd0127 \
--hash=sha256:ffe0883b56cfc05798bf994164d2b2ff03efe2d22022a2bb080f3b626176dd56
# via pydantic
pydantic-settings==2.14.0 \
--hash=sha256:24285fd4b0e0c06507dd9fdfd331ee23794305352aaec8fc4eb92d4047aeb67d \
--hash=sha256:fc8d5d692eb7092e43c8647c1c35a3ecd00e040fcf02ed86f4cb5458ca62182e
# via -r req.in
python-dotenv==1.2.2 \
--hash=sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a \
--hash=sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3
# via
# pydantic-settings
# uvicorn
python-multipart==0.0.27 \
--hash=sha256:6fccfad17a27334bd0193681b369f476eda3409f17381a2d65aa7df3f7275645 \
--hash=sha256:9870a6a8c5a20a5bf4f07c017bd1489006ff8836cff097b6933355ee2b49b602
# via -r req.in
pyyaml==6.0.3 \
--hash=sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c \
--hash=sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a \
--hash=sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3 \
--hash=sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956 \
--hash=sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6 \
--hash=sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c \
--hash=sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65 \
--hash=sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a \
--hash=sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0 \
--hash=sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b \
--hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \
--hash=sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6 \
--hash=sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7 \
--hash=sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e \
--hash=sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007 \
--hash=sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310 \
--hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \
--hash=sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9 \
--hash=sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295 \
--hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \
--hash=sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0 \
--hash=sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e \
--hash=sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac \
--hash=sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9 \
--hash=sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7 \
--hash=sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35 \
--hash=sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb \
--hash=sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b \
--hash=sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69 \
--hash=sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5 \
--hash=sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b \
--hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \
--hash=sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369 \
--hash=sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd \
--hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \
--hash=sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198 \
--hash=sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065 \
--hash=sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c \
--hash=sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c \
--hash=sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764 \
--hash=sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196 \
--hash=sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b \
--hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \
--hash=sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac \
--hash=sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8 \
--hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \
--hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \
--hash=sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3 \
--hash=sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5 \
--hash=sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4 \
--hash=sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b \
--hash=sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf \
--hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \
--hash=sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702 \
--hash=sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8 \
--hash=sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788 \
--hash=sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da \
--hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \
--hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \
--hash=sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c \
--hash=sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba \
--hash=sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f \
--hash=sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917 \
--hash=sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5 \
--hash=sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26 \
--hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \
--hash=sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b \
--hash=sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be \
--hash=sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c \
--hash=sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3 \
--hash=sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6 \
--hash=sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926 \
--hash=sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0
# via uvicorn
sse-starlette==3.4.1 \
--hash=sha256:6b43cf21f1d574d582a6e1b0cfbde1c94dc86a32a701a7168c99c4475c6bd1d0 \
--hash=sha256:f780bebcf6c8997fe514e3bd8e8c648d8284976b391c8bed0bcb1f611632b555
# via -r req.in
starlette==1.0.0 \
--hash=sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149 \
--hash=sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b
# via
# fastapi
# sse-starlette
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# anyio
# asyncssh
# fastapi
# pydantic
# pydantic-core
# starlette
# typing-inspection
typing-inspection==0.4.2 \
--hash=sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7 \
--hash=sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464
# via
# fastapi
# pydantic
# pydantic-settings
uvicorn[standard]==0.46.0 \
--hash=sha256:bbebbcbed972d162afca128605223022bedd345b7bc7855ce66deb31487a9048 \
--hash=sha256:fb9da0926999cc6cb22dc7cd71a94a632f078e6ae47ff683c5c420750fb7413d
# via -r req.in
uvloop==0.22.1 \
--hash=sha256:017bd46f9e7b78e81606329d07141d3da446f8798c6baeec124260e22c262772 \
--hash=sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e \
--hash=sha256:05e4b5f86e621cf3927631789999e697e58f0d2d32675b67d9ca9eb0bca55743 \
--hash=sha256:0ae676de143db2b2f60a9696d7eca5bb9d0dd6cc3ac3dad59a8ae7e95f9e1b54 \
--hash=sha256:1489cf791aa7b6e8c8be1c5a080bae3a672791fcb4e9e12249b05862a2ca9cec \
--hash=sha256:17d4e97258b0172dfa107b89aa1eeba3016f4b1974ce85ca3ef6a66b35cbf659 \
--hash=sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8 \
--hash=sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad \
--hash=sha256:286322a90bea1f9422a470d5d2ad82d38080be0a29c4dd9b3e6384320a4d11e7 \
--hash=sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35 \
--hash=sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289 \
--hash=sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142 \
--hash=sha256:3b7f102bf3cb1995cfeaee9321105e8f5da76fdb104cdad8986f85461a1b7b77 \
--hash=sha256:40631b049d5972c6755b06d0bfe8233b1bd9a8a6392d9d1c45c10b6f9e9b2733 \
--hash=sha256:481c990a7abe2c6f4fc3d98781cc9426ebd7f03a9aaa7eb03d3bfc68ac2a46bd \
--hash=sha256:4a968a72422a097b09042d5fa2c5c590251ad484acf910a651b4b620acd7f193 \
--hash=sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74 \
--hash=sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0 \
--hash=sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6 \
--hash=sha256:535cc37b3a04f6cd2c1ef65fa1d370c9a35b6695df735fcff5427323f2cd5473 \
--hash=sha256:53c85520781d84a4b8b230e24a5af5b0778efdb39142b424990ff1ef7c48ba21 \
--hash=sha256:55502bc2c653ed2e9692e8c55cb95b397d33f9f2911e929dc97c4d6b26d04242 \
--hash=sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705 \
--hash=sha256:56a2d1fae65fd82197cb8c53c367310b3eabe1bbb9fb5a04d28e3e3520e4f702 \
--hash=sha256:57df59d8b48feb0e613d9b1f5e57b7532e97cbaf0d61f7aa9aa32221e84bc4b6 \
--hash=sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f \
--hash=sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e \
--hash=sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d \
--hash=sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370 \
--hash=sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4 \
--hash=sha256:7cd375a12b71d33d46af85a3343b35d98e8116134ba404bd657b3b1d15988792 \
--hash=sha256:80eee091fe128e425177fbd82f8635769e2f32ec9daf6468286ec57ec0313efa \
--hash=sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079 \
--hash=sha256:a592b043a47ad17911add5fbd087c76716d7c9ccc1d64ec9249ceafd735f03c2 \
--hash=sha256:ac33ed96229b7790eb729702751c0e93ac5bc3bcf52ae9eccbff30da09194b86 \
--hash=sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6 \
--hash=sha256:b45649628d816c030dba3c80f8e2689bab1c89518ed10d426036cdc47874dfc4 \
--hash=sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3 \
--hash=sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21 \
--hash=sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c \
--hash=sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e \
--hash=sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25 \
--hash=sha256:c3e5c6727a57cb6558592a95019e504f605d1c54eb86463ee9f7a2dbd411c820 \
--hash=sha256:c60ebcd36f7b240b30788554b6f0782454826a0ed765d8430652621b5de674b9 \
--hash=sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88 \
--hash=sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2 \
--hash=sha256:ea721dd3203b809039fcc2983f14608dae82b212288b346e0bfe46ec2fab0b7c \
--hash=sha256:ef6f0d4cc8a9fa1f6a910230cd53545d9a14479311e87e3cb225495952eb672c \
--hash=sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42
# via uvicorn
watchfiles==1.1.1 \
--hash=sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c \
--hash=sha256:03fa0f5237118a0c5e496185cafa92878568b652a2e9a9382a5151b1a0380a43 \
--hash=sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510 \
--hash=sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0 \
--hash=sha256:08af70fd77eee58549cd69c25055dc344f918d992ff626068242259f98d598a2 \
--hash=sha256:0b495de0bb386df6a12b18335a0285dda90260f51bdb505503c02bcd1ce27a8b \
--hash=sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18 \
--hash=sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219 \
--hash=sha256:17ef139237dfced9da49fb7f2232c86ca9421f666d78c264c7ffca6601d154c3 \
--hash=sha256:1a0bb430adb19ef49389e1ad368450193a90038b5b752f4ac089ec6942c4dff4 \
--hash=sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803 \
--hash=sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94 \
--hash=sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6 \
--hash=sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce \
--hash=sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099 \
--hash=sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae \
--hash=sha256:35c53bd62a0b885bf653ebf6b700d1bf05debb78ad9292cf2a942b23513dc4c4 \
--hash=sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43 \
--hash=sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd \
--hash=sha256:399600947b170270e80134ac854e21b3ccdefa11a9529a3decc1327088180f10 \
--hash=sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374 \
--hash=sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051 \
--hash=sha256:3bc570d6c01c206c46deb6e935a260be44f186a2f05179f52f7fcd2be086a94d \
--hash=sha256:3dbd8cbadd46984f802f6d479b7e3afa86c42d13e8f0f322d669d79722c8ec34 \
--hash=sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49 \
--hash=sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7 \
--hash=sha256:3f6d37644155fb5beca5378feb8c1708d5783145f2a0f1c4d5a061a210254844 \
--hash=sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77 \
--hash=sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b \
--hash=sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741 \
--hash=sha256:4b943d3668d61cfa528eb949577479d3b077fd25fb83c641235437bc0b5bc60e \
--hash=sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33 \
--hash=sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42 \
--hash=sha256:544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab \
--hash=sha256:5524298e3827105b61951a29c3512deb9578586abf3a7c5da4a8069df247cccc \
--hash=sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5 \
--hash=sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da \
--hash=sha256:57ca5281a8b5e27593cb7d82c2ac927ad88a96ed406aa446f6344e4328208e9e \
--hash=sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05 \
--hash=sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a \
--hash=sha256:5f3f58818dc0b07f7d9aa7fe9eb1037aecb9700e63e1f6acfed13e9fef648f5d \
--hash=sha256:5fac835b4ab3c6487b5dbad78c4b3724e26bcc468e886f8ba8cc4306f68f6701 \
--hash=sha256:620bae625f4cb18427b1bb1a2d9426dc0dd5a5ba74c7c2cdb9de405f7b129863 \
--hash=sha256:672b8adf25b1a0d35c96b5888b7b18699d27d4194bac8beeae75be4b7a3fc9b2 \
--hash=sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101 \
--hash=sha256:6c3631058c37e4a0ec440bf583bc53cdbd13e5661bb6f465bc1d88ee9a0a4d02 \
--hash=sha256:6c9c9262f454d1c4d8aaa7050121eb4f3aea197360553699520767daebf2180b \
--hash=sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6 \
--hash=sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb \
--hash=sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620 \
--hash=sha256:74472234c8370669850e1c312490f6026d132ca2d396abfad8830b4f1c096957 \
--hash=sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6 \
--hash=sha256:77a13aea58bc2b90173bc69f2a90de8e282648939a00a602e1dc4ee23e26b66d \
--hash=sha256:79ff6c6eadf2e3fc0d7786331362e6ef1e51125892c75f1004bd6b52155fb956 \
--hash=sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef \
--hash=sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261 \
--hash=sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02 \
--hash=sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af \
--hash=sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9 \
--hash=sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21 \
--hash=sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336 \
--hash=sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d \
--hash=sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c \
--hash=sha256:8ca65483439f9c791897f7db49202301deb6e15fe9f8fe2fed555bf986d10c31 \
--hash=sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81 \
--hash=sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9 \
--hash=sha256:9bb9f66367023ae783551042d31b1d7fd422e8289eedd91f26754a66f44d5cff \
--hash=sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2 \
--hash=sha256:a36d8efe0f290835fd0f33da35042a1bb5dc0e83cbc092dcf69bce442579e88e \
--hash=sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc \
--hash=sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404 \
--hash=sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01 \
--hash=sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18 \
--hash=sha256:acb08650863767cbc58bca4813b92df4d6c648459dcaa3d4155681962b2aa2d3 \
--hash=sha256:aebfd0861a83e6c3d1110b78ad54704486555246e542be3e2bb94195eabb2606 \
--hash=sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04 \
--hash=sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3 \
--hash=sha256:b2cd9e04277e756a2e2d2543d65d1e2166d6fd4c9b183f8808634fda23f17b14 \
--hash=sha256:b9c4702f29ca48e023ffd9b7ff6b822acdf47cb1ff44cb490a3f1d5ec8987e9c \
--hash=sha256:bbe1ef33d45bc71cf21364df962af171f96ecaeca06bd9e3d0b583efb12aec82 \
--hash=sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610 \
--hash=sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0 \
--hash=sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150 \
--hash=sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5 \
--hash=sha256:c1f5210f1b8fc91ead1283c6fd89f70e76fb07283ec738056cf34d51e9c1d62c \
--hash=sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a \
--hash=sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b \
--hash=sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d \
--hash=sha256:c882d69f6903ef6092bedfb7be973d9319940d56b8427ab9187d1ecd73438a70 \
--hash=sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70 \
--hash=sha256:cdab464fee731e0884c35ae3588514a9bcf718d0e2c82169c1c4a85cc19c3c7f \
--hash=sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24 \
--hash=sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e \
--hash=sha256:cf57a27fb986c6243d2ee78392c503826056ffe0287e8794503b10fb51b881be \
--hash=sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5 \
--hash=sha256:d6ff426a7cb54f310d51bfe83fe9f2bbe40d540c741dc974ebc30e6aa238f52e \
--hash=sha256:d7e7067c98040d646982daa1f37a33d3544138ea155536c2e0e63e07ff8a7e0f \
--hash=sha256:db476ab59b6765134de1d4fe96a1a9c96ddf091683599be0f26147ea1b2e4b88 \
--hash=sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb \
--hash=sha256:de6da501c883f58ad50db3a32ad397b09ad29865b5f26f64c24d3e3281685849 \
--hash=sha256:e84087b432b6ac94778de547e08611266f1f8ffad28c0ee4c82e028b0fc5966d \
--hash=sha256:eef58232d32daf2ac67f42dea51a2c80f0d03379075d44a587051e63cc2e368c \
--hash=sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44 \
--hash=sha256:f0ab1c1af0cb38e3f598244c17919fb1a84d1629cc08355b0074b6d7f53138ac \
--hash=sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428 \
--hash=sha256:f537afb3276d12814082a2e9b242bdcf416c2e8fd9f799a737990a1dbe906e5b \
--hash=sha256:f57b396167a2565a4e8b5e56a5a1c537571733992b226f4f1197d79e94cf0ae5 \
--hash=sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa \
--hash=sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf
# via uvicorn
websockets==16.0 \
--hash=sha256:0298d07ee155e2e9fda5be8a9042200dd2e3bb0b8a38482156576f863a9d457c \
--hash=sha256:04cdd5d2d1dacbad0a7bf36ccbcd3ccd5a30ee188f2560b7a62a30d14107b31a \
--hash=sha256:08d7af67b64d29823fed316505a89b86705f2b7981c07848fb5e3ea3020c1abe \
--hash=sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e \
--hash=sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec \
--hash=sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1 \
--hash=sha256:1c1b30e4f497b0b354057f3467f56244c603a79c0d1dafce1d16c283c25f6e64 \
--hash=sha256:2b9f1e0d69bc60a4a87349d50c09a037a2607918746f07de04df9e43252c77a3 \
--hash=sha256:31a52addea25187bde0797a97d6fc3d2f92b6f72a9370792d65a6e84615ac8a8 \
--hash=sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206 \
--hash=sha256:335c23addf3d5e6a8633f9f8eda77efad001671e80b95c491dd0924587ece0b3 \
--hash=sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156 \
--hash=sha256:349f83cd6c9a415428ee1005cadb5c2c56f4389bc06a9af16103c3bc3dcc8b7d \
--hash=sha256:37b31c1623c6605e4c00d466c9d633f9b812ea430c11c8a278774a1fde1acfa9 \
--hash=sha256:417b28978cdccab24f46400586d128366313e8a96312e4b9362a4af504f3bbad \
--hash=sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2 \
--hash=sha256:4a1aba3340a8dca8db6eb5a7986157f52eb9e436b74813764241981ca4888f03 \
--hash=sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8 \
--hash=sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230 \
--hash=sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8 \
--hash=sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea \
--hash=sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641 \
--hash=sha256:583b7c42688636f930688d712885cf1531326ee05effd982028212ccc13e5957 \
--hash=sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6 \
--hash=sha256:5f451484aeb5cafee1ccf789b1b66f535409d038c56966d6101740c1614b86c6 \
--hash=sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5 \
--hash=sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f \
--hash=sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00 \
--hash=sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e \
--hash=sha256:7be95cfb0a4dae143eaed2bcba8ac23f4892d8971311f1b06f3c6b78952ee70b \
--hash=sha256:7d837379b647c0c4c2355c2499723f82f1635fd2c26510e1f587d89bc2199e72 \
--hash=sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39 \
--hash=sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9 \
--hash=sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79 \
--hash=sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0 \
--hash=sha256:8d7f0659570eefb578dacde98e24fb60af35350193e4f56e11190787bee77dac \
--hash=sha256:8e1dab317b6e77424356e11e99a432b7cb2f3ec8c5ab4dabbcee6add48f72b35 \
--hash=sha256:8ff32bb86522a9e5e31439a58addbb0166f0204d64066fb955265c4e214160f0 \
--hash=sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5 \
--hash=sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c \
--hash=sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8 \
--hash=sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1 \
--hash=sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244 \
--hash=sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3 \
--hash=sha256:a653aea902e0324b52f1613332ddf50b00c06fdaf7e92624fbf8c77c78fa5767 \
--hash=sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a \
--hash=sha256:af80d74d4edfa3cb9ed973a0a5ba2b2a549371f8a741e0800cb07becdd20f23d \
--hash=sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd \
--hash=sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e \
--hash=sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944 \
--hash=sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82 \
--hash=sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d \
--hash=sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4 \
--hash=sha256:d6297ce39ce5c2e6feb13c1a996a2ded3b6832155fcfc920265c76f24c7cceb5 \
--hash=sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904 \
--hash=sha256:df57afc692e517a85e65b72e165356ed1df12386ecb879ad5693be08fac65dde \
--hash=sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f \
--hash=sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c \
--hash=sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89 \
--hash=sha256:f4a32d1bd841d4bcbffdcb3d2ce50c09c3909fbead375ab28d0181af89fd04da \
--hash=sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4
# via uvicorn

45
scripts/regenerate-lockfile.sh Executable file
View file

@ -0,0 +1,45 @@
#!/usr/bin/env bash
# Regenerate requirements.txt from requirements.in.
#
# Run this whenever you add, remove, or change a constraint in
# requirements.in — never edit requirements.txt by hand. The output is
# a fully-pinned lockfile with sha256 hashes, consumed at image-build
# time with `pip install --require-hashes`.
#
# Runs pip-compile in a clean python:3.12-slim container so this script
# has no host dependencies — Docker is enough.
#
# Usage:
# ./scripts/regenerate-lockfile.sh
#
# After it runs:
# - Review the diff (`git diff requirements.txt`) — bumps to
# transitive deps may be CVE fixes or breaking changes
# - Rebuild the container locally to confirm install + boot
# - Commit requirements.in AND requirements.txt together
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$REPO_ROOT"
if [ ! -f requirements.in ]; then
echo "fatal: requirements.in not found in $REPO_ROOT" >&2
exit 1
fi
echo "Regenerating requirements.txt from requirements.in ..."
docker run --rm \
-v "$REPO_ROOT:/work" \
-w /work \
python:3.12-slim \
bash -c "
pip install --quiet --no-cache-dir --disable-pip-version-check pip-tools 2>&1 | tail -3
pip-compile --quiet --generate-hashes --strip-extras \
--output-file=requirements.txt requirements.in
chown $(id -u):$(id -g) requirements.txt
"
echo "Done. New lockfile is $(wc -l < requirements.txt) lines."
echo "Review: git diff requirements.txt"
echo "Verify: docker compose build app && docker compose up -d app"

44
scripts/run-tests.sh Executable file
View file

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Run the test suite against the deployed container on maple.
#
# Tests aren't shipped in the prod image (Dockerfile only COPYs app/),
# so this tars them, copies them in, and runs unittest discover. Cleans
# up after itself so the running container doesn't accrue test files.
#
# Usage:
# scripts/run-tests.sh # run full suite
# scripts/run-tests.sh test_lifecycle # run a specific module
#
# Requires: ssh access to maple (configured in ~/.ssh/config).
set -euo pipefail
REMOTE_HOST="${REMOTE_HOST:-maple}"
CONTAINER="${CONTAINER:-nas-burnin}"
REMOTE_TMP="/tmp/tnb-tests-$$.tgz"
CONTAINER_TMP="/tmp/tnb-tests.tgz"
PATTERN="${1:-}"
# Resolve repo root so this works whether invoked from the root or scripts/
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
echo "→ Packing tests/ from $REPO_ROOT"
cd "$REPO_ROOT"
tar cz tests | ssh "$REMOTE_HOST" "cat > $REMOTE_TMP"
echo "→ Copying into container $CONTAINER"
ssh "$REMOTE_HOST" "docker cp $REMOTE_TMP $CONTAINER:$CONTAINER_TMP && rm -f $REMOTE_TMP"
if [ -n "$PATTERN" ]; then
echo "→ Running tests matching: $PATTERN"
RUN_CMD="cd /opt/app && tar xzf $CONTAINER_TMP && python -m unittest tests.$PATTERN -v"
else
echo "→ Running full suite"
RUN_CMD="cd /opt/app && tar xzf $CONTAINER_TMP && python -m unittest discover -s tests"
fi
# Always try to clean tests/ out of the container after the run, even on failure.
CLEANUP="rm -rf /opt/app/tests $CONTAINER_TMP"
ssh "$REMOTE_HOST" "docker exec $CONTAINER sh -c '$RUN_CMD; rc=\$?; $CLEANUP; exit \$rc'"

View file

@ -0,0 +1,17 @@
[Unit]
Description=Security scan of nas-burnin (pip-audit + bandit + gitleaks)
After=network-online.target docker.service
Wants=network-online.target
[Service]
Type=oneshot
# Wire SECURITY_SCAN_WEBHOOK here if you want findings POSTed somewhere.
# Environment=SECURITY_SCAN_WEBHOOK=https://chat.example/hooks/abc
ExecStart=%h/docker/stacks/nas-burnin/scripts/security-scan.sh
# Tools cache + container pulls — give them headroom.
TimeoutStartSec=600
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=default.target

149
scripts/security-scan.sh Normal file
View file

@ -0,0 +1,149 @@
#!/usr/bin/env bash
# Daily security scan of the deployed nas-burnin source on maple.
# Mirrors the .forgejo/workflows/security-scan.yml CI pipeline so a finding
# the runner-less forge would have flagged still surfaces here.
#
# Tools all run in containers — nothing installed on the host.
# pip-audit — known CVEs in installed packages (scans the LIVE container)
# bandit — Python static security analysis on host source tree
# gitleaks — secrets across the full git history
#
# Output:
# ~/security-scans/scan-YYYY-MM-DD/{pip-audit,bandit,gitleaks}.txt
# ~/security-scans/findings.log — appended one line per scan with findings
#
# Wiring:
# Daily systemd user timer at 03:30 local (after the in-app retention job
# so backups are fresh). See scripts/security-scan.{service,timer}.
set -uo pipefail
REPO_URL="${REPO_URL:-https://git.hellocomputer.xyz/brandon/nas-burnin.git}"
REPO="${REPO:-$HOME/scan-checkouts/nas-burnin}"
OUT_BASE="${OUT_BASE:-$HOME/security-scans}"
DATE="$(date +%Y-%m-%d)"
OUT_DIR="$OUT_BASE/scan-$DATE"
SUMMARY="$OUT_BASE/findings.log"
GITLEAKS_VERSION="${GITLEAKS_VERSION:-8.21.2}"
mkdir -p "$OUT_DIR" "$(dirname "$REPO")"
# Maintain a dedicated checkout for scanning. The deploy at
# ~/docker/stacks/nas-burnin/ is just the bind-mounted source — no
# .git, no history — so gitleaks can't scan there. We keep a separate
# clone, fast-forward it to origin/main each run.
if [ ! -d "$REPO/.git" ]; then
echo "Cloning $REPO_URL to $REPO ..."
git clone --quiet "$REPO_URL" "$REPO" || {
echo "fatal: git clone failed" >&2
exit 65
}
fi
cd "$REPO"
# Refresh the scan checkout. Failures here mean we'd be scanning stale
# code without knowing — fail loudly instead of soldiering on silently.
if ! git fetch --quiet --prune origin; then
echo "fatal: git fetch failed in $REPO" >&2
exit 65
fi
git checkout --quiet main || true # ok if already on main
if ! git reset --hard --quiet origin/main; then
echo "fatal: git reset --hard failed in $REPO" >&2
exit 65
fi
echo "=== Security scan $DATE ===" > "$OUT_DIR/summary.txt"
date -Iseconds >> "$OUT_DIR/summary.txt"
echo >> "$OUT_DIR/summary.txt"
# --- pip-audit against the lockfile in a throwaway container ------------
# Previously we did `docker exec nas-burnin pip install pip-audit`
# which mutated the live production container with a transient package.
# Now scan the lockfile in an ephemeral container — same coverage of
# pinned versions + their transitives, no side effects on prod.
echo "--- pip-audit (requirements.txt in throwaway container) ---" | tee -a "$OUT_DIR/summary.txt"
docker run --rm \
-v "$REPO/requirements.txt:/work/requirements.txt:ro" \
-w /work \
python:3.12-slim sh -c \
"pip install --quiet --no-cache-dir --disable-pip-version-check pip-audit 2>/dev/null && pip-audit --requirement requirements.txt --strict --format=columns" \
> "$OUT_DIR/pip-audit.txt" 2>&1
PIPS=$?
echo " exit=$PIPS ($OUT_DIR/pip-audit.txt)" | tee -a "$OUT_DIR/summary.txt"
# --- bandit against the LIVE deploy dir ---------------------------------
# Scan what's actually running, not what's in git — catches drift between
# forge HEAD and maple. B608 (SQL injection via dynamic strings) is
# skipped globally: every dynamic SQL build in this codebase uses
# bound parameters for data and structural placeholders only.
DEPLOY_DIR="${DEPLOY_DIR:-$HOME/docker/stacks/nas-burnin}"
echo "--- bandit (deploy: $DEPLOY_DIR) ---" | tee -a "$OUT_DIR/summary.txt"
docker run --rm \
-v "$DEPLOY_DIR/app:/src:ro" \
python:3.12-slim sh -c \
"pip install --quiet --no-cache-dir --disable-pip-version-check bandit 2>/dev/null && bandit -r /src -ll -ii --skip B608" \
> "$OUT_DIR/bandit.txt" 2>&1
BANDITS=$?
echo " exit=$BANDITS ($OUT_DIR/bandit.txt)" | tee -a "$OUT_DIR/summary.txt"
# --- mypy against the deploy dir (gating as of 1.0.0-40) ----------------
# Type checker — surfaces None-handling bugs and missing-attribute errors
# the runtime would have caught at the worst possible moment.
#
# Mount at /opt/app/app so internal `from . import X` resolves through
# the `app` package (not `src`). Without this the relative imports inside
# subpackages like burnin/ produce spurious "Module 'src' has no
# attribute 'X'" errors that look like real bugs but are scan-env noise.
#
# Now counted toward TOTAL_EXIT — the codebase is fully clean under
# `--ignore-missing-imports --no-strict-optional`. New errors fail the scan.
echo "--- mypy ---" | tee -a "$OUT_DIR/summary.txt"
docker run --rm \
-v "$DEPLOY_DIR/app:/opt/app/app:ro" \
-w /opt/app \
python:3.12-slim sh -c \
"pip install --quiet --no-cache-dir --disable-pip-version-check mypy 2>&1 | tail -3 && mypy --ignore-missing-imports --no-strict-optional app" \
> "$OUT_DIR/mypy.txt" 2>&1
MYPY=$?
echo " exit=$MYPY ($OUT_DIR/mypy.txt)" | tee -a "$OUT_DIR/summary.txt"
# --- gitleaks against the full git history ------------------------------
echo "--- gitleaks ---" | tee -a "$OUT_DIR/summary.txt"
docker run --rm \
-v "$REPO:/repo:ro" \
"zricethezav/gitleaks:v$GITLEAKS_VERSION" \
detect --source /repo --no-banner --redact --verbose \
> "$OUT_DIR/gitleaks.txt" 2>&1
LEAKS=$?
echo " exit=$LEAKS ($OUT_DIR/gitleaks.txt)" | tee -a "$OUT_DIR/summary.txt"
# --- summary + notification --------------------------------------------
TOTAL_EXIT=$(( PIPS + BANDITS + MYPY + LEAKS ))
{
echo
echo "Total findings exit-code sum: $TOTAL_EXIT"
echo " pip-audit: $PIPS"
echo " bandit: $BANDITS"
echo " mypy: $MYPY"
echo " gitleaks: $LEAKS"
} >> "$OUT_DIR/summary.txt"
if [ "$TOTAL_EXIT" -ne 0 ]; then
printf '%s — findings (pip-audit=%d bandit=%d mypy=%d gitleaks=%d) — see %s\n' \
"$DATE" "$PIPS" "$BANDITS" "$MYPY" "$LEAKS" "$OUT_DIR" >> "$SUMMARY"
# Hook for downstream notification — wire to your existing Mattermost
# / Fastmail / webhook chain. Stays a no-op until SECURITY_SCAN_WEBHOOK
# is set in the systemd unit's Environment=.
if [ -n "${SECURITY_SCAN_WEBHOOK:-}" ]; then
curl -fsS -X POST -H 'Content-Type: text/plain' \
--data-binary "@$OUT_DIR/summary.txt" \
"$SECURITY_SCAN_WEBHOOK" || true
fi
fi
# Retention — keep last 30 daily directories, prune older.
find "$OUT_BASE" -maxdepth 1 -type d -name "scan-*" -mtime +30 \
-exec rm -rf {} \;
exit "$TOTAL_EXIT"

View file

@ -0,0 +1,15 @@
[Unit]
Description=Daily security scan of nas-burnin
Requires=security-scan.service
[Timer]
# 03:30 local — runs after the in-app retention/backup job (03:00) so the
# nightly DB snapshot has already landed.
OnCalendar=*-*-* 03:30:00
# If maple was off at 03:30, fire on next boot — we'd rather have a stale
# scan than miss a day entirely.
Persistent=true
RandomizedDelaySec=10m
[Install]
WantedBy=timers.target

0
tests/__init__.py Normal file
View file

View file

@ -0,0 +1,77 @@
"""Verifies the Spearfoot tunables (block_size, block_buffer, passes)
actually thread through to the badblocks command line.
These three settings are exposed in Settings Burn-in. Without a test,
nothing catches if a future refactor drops one of the flags or reads
from the wrong attribute. The defaults match the Spearfoot disk-burnin.sh
community script; non-defaults can roughly halve runtime on multi-TB
drives at the cost of more RAM.
Run inside the container image so app deps are present.
"""
from __future__ import annotations
import unittest
from app.burnin.stages import _build_badblocks_cmd
from app.config import settings
class TestBadblocksCmd(unittest.TestCase):
def setUp(self):
# Snapshot defaults so each test can mutate freely without
# polluting siblings or the running process.
self._snap = (
settings.surface_validate_block_size,
settings.surface_validate_block_buffer,
settings.surface_validate_passes,
)
def tearDown(self):
(
settings.surface_validate_block_size,
settings.surface_validate_block_buffer,
settings.surface_validate_passes,
) = self._snap
def test_defaults_match_spearfoot(self):
"""Out of the box: -b 4096 -c 64 -p 1 — matches the
disk-burnin.sh community script's recommendation for HDDs."""
cmd = _build_badblocks_cmd("sda")
self.assertIn("-b 4096", cmd)
self.assertIn("-c 64", cmd)
self.assertIn("-p 1", cmd)
self.assertIn("/dev/sda", cmd)
# Destructive write+verify mode must always be present — anything
# else (read-only, non-destructive) defeats the purpose of burn-in.
self.assertIn("-wsv", cmd)
def test_tunables_propagate_to_cmd(self):
"""Operator-set values (e.g. for paranoid 3-pass burn-in on a
suspect drive, or 8 KiB blocks for faster scan on a 24 TB HDD)
must end up in the shell command."""
settings.surface_validate_block_size = 8192
settings.surface_validate_block_buffer = 128
settings.surface_validate_passes = 3
cmd = _build_badblocks_cmd("sdb")
self.assertIn("-b 8192", cmd)
self.assertIn("-c 128", cmd)
self.assertIn("-p 3", cmd)
self.assertNotIn("-b 4096", cmd) # no leak from defaults
self.assertNotIn("-c 64", cmd)
self.assertIn("/dev/sdb", cmd)
def test_pid_capture_wrapper_intact(self):
"""The `sh -c 'echo PID:$$; exec ...'` wrapper is what makes
out-of-band kill -9 work over a fresh SSH session asyncssh's
signal channel is silently ignored by sshd. If a future refactor
drops the wrapper, a cancel won't actually stop the test."""
cmd = _build_badblocks_cmd("sda")
self.assertTrue(cmd.startswith("sh -c 'echo PID:$$; exec badblocks"))
self.assertTrue(cmd.endswith("'"))
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,125 @@
"""Verifies _BadblocksProgress translates per-phase badblocks output
into a monotonic 0-99% overall progress.
`badblocks -w` cycles through 4 patterns × {write, verify} = 8 phases.
Each phase prints "XX% done" relative to its own 0-100 range. Without
this translation the dashboard appeared to "rewind" every ~2 hours
when a new phase started and two drives racing each other could
look 4× apart in displayed progress despite identical hardware.
Run inside the container image so app deps are present.
"""
from __future__ import annotations
import unittest
from app.burnin.stages import _BadblocksProgress
class TestBadblocksProgress(unittest.TestCase):
def test_default_phase_one(self):
"""Before any header, treat as start of pattern-1 write."""
p = _BadblocksProgress()
self.assertEqual(p.phase, 1)
self.assertEqual(p.overall_pct, 0)
def test_pattern_headers_set_phase(self):
"""0xaa→1, 0x55→3, 0xff→5, 0x00→7 (write phases)."""
p = _BadblocksProgress()
for header, want in [
("Testing with pattern 0xaa: ", 1),
("Testing with pattern 0x55: ", 3),
("Testing with pattern 0xff: ", 5),
("Testing with pattern 0x00: ", 7),
]:
p.update(header)
self.assertEqual(p.phase, want, f"after {header!r}")
def test_verify_advances_to_next_phase(self):
"""`Reading and comparing` after `Testing with pattern 0x55`
(phase 3) advances to phase 4."""
p = _BadblocksProgress()
p.update("Testing with pattern 0x55: 100.00% done")
self.assertEqual(p.phase, 3)
p.update("Reading and comparing: 0.00% done")
self.assertEqual(p.phase, 4)
def test_overall_pct_at_phase_boundaries(self):
"""Verify the math at each phase boundary: phase N at 100% =
N * 12.5% overall (clipped to 99 at the end)."""
cases = [
(1, 0.0, 0), # start of run
(1, 100.0, 12), # 100/800 = 12.5
(2, 100.0, 25), # 200/800
(4, 100.0, 50), # 400/800
(7, 100.0, 87), # 700/800
(8, 100.0, 99), # 800/800 → clipped to 99
]
for phase, phase_pct, want in cases:
p = _BadblocksProgress()
p.phase = phase
p.phase_pct = phase_pct
self.assertEqual(
p.overall_pct, want,
f"phase={phase} phase_pct={phase_pct}",
)
def test_realistic_sequence(self):
"""End-to-end: feed a synthetic badblocks output stream and
check the overall percent stays monotonically non-decreasing."""
lines = [
"Testing with pattern 0xaa: ",
"10.00% done, 1:00:00 elapsed. (0/0/0 errors)",
"50.00% done, 5:00:00 elapsed. (0/0/0 errors)",
"99.99% done, 10:00:00 elapsed. (0/0/0 errors)",
"Reading and comparing: ",
"0.00% done, 10:00:01 elapsed. (0/0/0 errors)",
"50.00% done, 12:30:00 elapsed. (0/0/0 errors)",
"Testing with pattern 0x55: ",
"0.00% done, 15:00:00 elapsed. (0/0/0 errors)",
"50.00% done, 17:30:00 elapsed. (0/0/0 errors)",
]
p = _BadblocksProgress()
seen = []
for line in lines:
p.update(line)
seen.append(p.overall_pct)
self.assertEqual(
seen, sorted(seen),
f"progress went backwards: {seen}",
)
# Sanity: by the time we're halfway through pattern-2 write
# (phase 3, 50%), we should report ((3-1)*100 + 50) / 8 = 31%.
self.assertEqual(seen[-1], 31)
def test_drives_at_different_phases_show_different_overall(self):
"""The original bug: two drives at the same per-phase 60%
but different phases used to look identical (both '60%').
Now they correctly diverge."""
slow = _BadblocksProgress()
slow.update("Testing with pattern 0xaa: ")
slow.update("60.00% done")
fast = _BadblocksProgress()
fast.update("Testing with pattern 0xaa: ")
fast.update("99.99% done")
fast.update("Reading and comparing: ")
fast.update("60.00% done")
# slow: 60/800 = 7%; fast: (1*100 + 60)/800 = 20%
self.assertEqual(slow.overall_pct, 7)
self.assertEqual(fast.overall_pct, 20)
def test_unknown_pattern_does_not_crash(self):
"""An unrecognized pattern (e.g. badblocks future versions or
custom patterns) just leaves phase unchanged."""
p = _BadblocksProgress()
p.update("Testing with pattern 0xab: ")
# phase stays at the default 1
self.assertEqual(p.phase, 1)
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,100 @@
"""Verifies _update_stage_bb_phase actually writes to burnin_stages
and the migration adds the columns idempotently.
The drive-drawer's 4-meter UI depends on these columns being populated
on every parser tick. If a future refactor drops the call or breaks
the migration, this test catches it before users see the meters
go blank.
Run inside the container image so app deps are present.
"""
from __future__ import annotations
import os
import tempfile
import unittest
import aiosqlite
async def _setup_db_with_stage() -> str:
fd, path = tempfile.mkstemp(suffix=".db")
os.close(fd)
from app.config import settings
settings.db_path = path
from app.database import init_db
await init_db()
async with aiosqlite.connect(path) as db:
await db.execute(
"INSERT INTO drives "
"(truenas_disk_id, devname, serial, model, size_bytes, "
" temperature_c, smart_health, last_seen_at, last_polled_at) "
"VALUES ('id-1', 'sda', 'SER1', 'TestModel', 14000000000000, "
" 30, 'PASSED', '2026-05-09T00:00:00+00:00', "
" '2026-05-09T00:00:00+00:00')"
)
await db.execute(
"INSERT INTO burnin_jobs "
"(drive_id, profile, state, operator, created_at) "
"VALUES (1, 'surface', 'running', 'op', "
" '2026-05-09T00:00:00+00:00')"
)
await db.execute(
"INSERT INTO burnin_stages "
"(burnin_job_id, stage_name, state) "
"VALUES (1, 'surface_validate', 'running')"
)
await db.commit()
return path
class TestBBPhasePersistence(unittest.IsolatedAsyncioTestCase):
async def asyncSetUp(self):
self.path = await _setup_db_with_stage()
async def asyncTearDown(self):
try:
os.unlink(self.path)
except OSError:
pass
async def test_columns_exist_after_init(self):
async with aiosqlite.connect(self.path) as db:
cur = await db.execute("PRAGMA table_info(burnin_stages)")
cols = {r[1] for r in await cur.fetchall()}
self.assertIn("bb_phase", cols)
self.assertIn("bb_phase_pct", cols)
async def test_update_writes_phase_and_pct(self):
from app.burnin._common import _update_stage_bb_phase
await _update_stage_bb_phase(1, "surface_validate", 3, 47.5)
async with aiosqlite.connect(self.path) as db:
cur = await db.execute(
"SELECT bb_phase, bb_phase_pct FROM burnin_stages "
"WHERE burnin_job_id=1 AND stage_name='surface_validate'"
)
row = await cur.fetchone()
self.assertEqual(row[0], 3)
self.assertAlmostEqual(row[1], 47.5)
async def test_update_overwrites(self):
"""Each tick should replace the previous value, not accumulate."""
from app.burnin._common import _update_stage_bb_phase
await _update_stage_bb_phase(1, "surface_validate", 1, 10.0)
await _update_stage_bb_phase(1, "surface_validate", 2, 80.0)
async with aiosqlite.connect(self.path) as db:
cur = await db.execute(
"SELECT bb_phase, bb_phase_pct FROM burnin_stages "
"WHERE burnin_job_id=1 AND stage_name='surface_validate'"
)
row = await cur.fetchone()
self.assertEqual(row[0], 2)
self.assertAlmostEqual(row[1], 80.0)
if __name__ == "__main__":
unittest.main()

328
tests/test_lifecycle.py Normal file
View file

@ -0,0 +1,328 @@
"""Burn-in lifecycle tests covering the DB helpers in burnin._common,
plus the public surface of start_job + cancel_job that doesn't require
spinning up _run_job (which would need a mocked TrueNASClient + SSH).
These are the safety net Codex flagged was missing the orchestration
paths were entirely untested before this. Run inside the container
image so app deps (aiosqlite, pydantic-settings, bcrypt) are present.
"""
from __future__ import annotations
import os
import tempfile
import unittest
import aiosqlite
async def _setup_temp_db() -> str:
"""Same pattern as test_unlock_flow.py — temp DB + init_db, returning
the path. Caller must unlink in tearDown."""
fd, path = tempfile.mkstemp(suffix=".db")
os.close(fd)
from app.config import settings
settings.db_path = path
from app.database import init_db
await init_db()
# Seed two drives so start_job has something to attach to.
async with aiosqlite.connect(path) as db:
await db.execute("""
INSERT INTO drives
(truenas_disk_id, devname, serial, model, size_bytes,
temperature_c, smart_health, last_seen_at, last_polled_at)
VALUES ('id-1', 'sda', 'SER1', 'TestModel', 1000, 30, 'PASSED',
'2026-05-03T00:00:00+00:00', '2026-05-03T00:00:00+00:00')
""")
await db.execute("""
INSERT INTO drives
(truenas_disk_id, devname, serial, model, size_bytes,
temperature_c, smart_health, last_seen_at, last_polled_at)
VALUES ('id-2', 'sdb', 'SER2', 'TestModel', 1000, 30, 'PASSED',
'2026-05-03T00:00:00+00:00', '2026-05-03T00:00:00+00:00')
""")
await db.commit()
return path
class TestCommonHelpers(unittest.IsolatedAsyncioTestCase):
"""The per-stage DB mutators in app.burnin._common — pure SQLite
writes, no asyncssh, no orchestration. Trivially regression-testable."""
async def asyncSetUp(self):
self.db_path = await _setup_temp_db()
# Insert a queued job + 2 stages we can mutate.
async with aiosqlite.connect(self.db_path) as db:
cur = await db.execute(
"""INSERT INTO burnin_jobs
(drive_id, profile, state, percent, operator, created_at)
VALUES (?,?,?,?,?,?) RETURNING id""",
(1, "full", "running", 0, "test", "2026-05-03T00:00:00+00:00"),
)
self.job_id = (await cur.fetchone())[0]
for stage_name in ("precheck", "surface_validate", "final_check"):
await db.execute(
"INSERT INTO burnin_stages (burnin_job_id, stage_name, state) VALUES (?,?,?)",
(self.job_id, stage_name, "pending"),
)
await db.commit()
async def asyncTearDown(self):
try:
os.unlink(self.db_path)
except OSError:
pass
async def test_start_stage_marks_running(self):
from app.burnin import _common
await _common._start_stage(self.job_id, "precheck")
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT state, started_at FROM burnin_stages "
"WHERE burnin_job_id=? AND stage_name='precheck'",
(self.job_id,),
)
row = await cur.fetchone()
self.assertEqual(row["state"], "running")
self.assertIsNotNone(row["started_at"])
async def test_finish_stage_success_records_duration(self):
from app.burnin import _common
await _common._start_stage(self.job_id, "precheck")
await _common._finish_stage(self.job_id, "precheck", success=True)
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT state, percent, duration_seconds FROM burnin_stages "
"WHERE burnin_job_id=? AND stage_name='precheck'",
(self.job_id,),
)
row = await cur.fetchone()
self.assertEqual(row["state"], "passed")
self.assertEqual(row["percent"], 100)
# Duration is float seconds since started_at — should be tiny but >0.
self.assertIsNotNone(row["duration_seconds"])
self.assertGreaterEqual(row["duration_seconds"], 0)
async def test_finish_stage_failure_carries_error_text(self):
from app.burnin import _common
await _common._start_stage(self.job_id, "surface_validate")
await _common._finish_stage(
self.job_id, "surface_validate",
success=False, error_text="mock failure",
)
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT state, percent, error_text FROM burnin_stages "
"WHERE burnin_job_id=? AND stage_name='surface_validate'",
(self.job_id,),
)
row = await cur.fetchone()
self.assertEqual(row["state"], "failed")
self.assertIsNone(row["percent"])
self.assertEqual(row["error_text"], "mock failure")
async def test_finish_stage_preserves_existing_error(self):
"""When called with error_text=None, the existing column value
from _set_stage_error must be preserved (not overwritten with NULL).
This is the bug that 1.0.0-12-ish fixed."""
from app.burnin import _common
await _common._start_stage(self.job_id, "surface_validate")
await _common._set_stage_error(
self.job_id, "surface_validate", "set by stage",
)
await _common._finish_stage(
self.job_id, "surface_validate", success=False, error_text=None,
)
async with aiosqlite.connect(self.db_path) as db:
cur = await db.execute(
"SELECT error_text FROM burnin_stages "
"WHERE burnin_job_id=? AND stage_name='surface_validate'",
(self.job_id,),
)
row = await cur.fetchone()
self.assertEqual(row[0], "set by stage")
async def test_recalculate_progress_weights_correctly(self):
from app.burnin import _common
# Mark precheck passed, surface_validate at 50% running.
await _common._start_stage(self.job_id, "precheck")
await _common._finish_stage(self.job_id, "precheck", success=True)
await _common._start_stage(self.job_id, "surface_validate")
await _common._update_stage_percent(self.job_id, "surface_validate", 50)
await _common._recalculate_progress(self.job_id)
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT percent, stage_name FROM burnin_jobs WHERE id=?",
(self.job_id,),
)
row = await cur.fetchone()
# Weights: precheck=5, surface=65, final=5. Total = 75 across these
# 3 stages. Completed = 5 (precheck) + 32.5 (half of 65) = 37.5.
# 37.5 / 75 = 50%.
self.assertEqual(row["percent"], 50)
self.assertEqual(row["stage_name"], "surface_validate")
async def test_is_cancelled_reads_job_state(self):
from app.burnin import _common
self.assertFalse(await _common._is_cancelled(self.job_id))
async with aiosqlite.connect(self.db_path) as db:
await db.execute(
"UPDATE burnin_jobs SET state='cancelled' WHERE id=?",
(self.job_id,),
)
await db.commit()
self.assertTrue(await _common._is_cancelled(self.job_id))
async def test_append_stage_log_concatenates(self):
from app.burnin import _common
await _common._append_stage_log(self.job_id, "precheck", "alpha\n")
await _common._append_stage_log(self.job_id, "precheck", "beta\n")
async with aiosqlite.connect(self.db_path) as db:
cur = await db.execute(
"SELECT log_text FROM burnin_stages "
"WHERE burnin_job_id=? AND stage_name='precheck'",
(self.job_id,),
)
row = await cur.fetchone()
self.assertEqual(row[0], "alpha\nbeta\n")
class TestStartCancelJob(unittest.IsolatedAsyncioTestCase):
"""start_job + cancel_job touch the burnin orchestrator state. We
spawn _run_job tasks that try to acquire the semaphore we cancel
immediately after to avoid running real burn-in stages. The real
test value here is "did start_job create the right DB rows" and
"does cancel_job mark them correctly."""
async def asyncSetUp(self):
self.db_path = await _setup_temp_db()
# Initialise burnin without a real TrueNASClient — pass None.
# _run_job will hit the assert at top, but the test cancels
# before _run_job's first await actually runs.
from app import burnin
burnin._unlock_grants.clear()
burnin._active_tasks.clear()
import asyncio
burnin._semaphore = asyncio.Semaphore(2)
burnin._client = None # unused by start_job itself
async def asyncTearDown(self):
# Cancel any outstanding tasks so they don't bleed into later tests.
from app import burnin
for t in list(burnin._active_tasks.values()):
t.cancel()
try:
os.unlink(self.db_path)
except OSError:
pass
async def test_start_job_inserts_queued_row_and_stages(self):
from app import burnin
job_id = await burnin.start_job(1, "surface", "test")
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT state, profile, operator FROM burnin_jobs WHERE id=?",
(job_id,),
)
row = await cur.fetchone()
cur = await db.execute(
"SELECT stage_name FROM burnin_stages "
"WHERE burnin_job_id=? ORDER BY id",
(job_id,),
)
stages = [r[0] for r in await cur.fetchall()]
# State should be queued OR running (the spawned _run_job may
# have raced into the semaphore by now).
self.assertIn(row["state"], ("queued", "running"))
self.assertEqual(row["profile"], "surface")
self.assertEqual(row["operator"], "test")
# surface profile = precheck + surface_validate + final_check.
self.assertEqual(stages, ["precheck", "surface_validate", "final_check"])
async def test_start_job_rejects_duplicate_active(self):
from app import burnin
await burnin.start_job(1, "surface", "test")
# Second start on the same drive should be refused at the
# ValueError level (caught by the inline duplicate check or by
# the partial unique index).
with self.assertRaises(ValueError):
await burnin.start_job(1, "surface", "test")
async def test_cancel_job_marks_state(self):
from app import burnin
job_id = await burnin.start_job(1, "surface", "test")
ok = await burnin.cancel_job(job_id, "test")
self.assertTrue(ok)
async with aiosqlite.connect(self.db_path) as db:
cur = await db.execute(
"SELECT state FROM burnin_jobs WHERE id=?", (job_id,)
)
row = await cur.fetchone()
self.assertEqual(row[0], "cancelled")
async def test_cancel_job_returns_false_for_terminal_state(self):
from app import burnin
# Create a passed job manually
async with aiosqlite.connect(self.db_path) as db:
cur = await db.execute(
"""INSERT INTO burnin_jobs
(drive_id, profile, state, operator, created_at)
VALUES (?,?,?,?,?) RETURNING id""",
(2, "surface", "passed", "x", "2026-05-03T00:00:00+00:00"),
)
job_id = (await cur.fetchone())[0]
await db.commit()
ok = await burnin.cancel_job(job_id, "test")
self.assertFalse(ok)
class TestRateLimiter(unittest.TestCase):
"""The generic rate-limit class added in 1.0.0-33 for the
unlock + password-change endpoints."""
def test_register_allows_under_threshold(self):
from app.auth import _RateLimiter
rl = _RateLimiter("test", threshold=3, window_s=60, lockout_s=60)
self.assertEqual(rl.register(("k", "alice")), "ok")
self.assertEqual(rl.register(("k", "alice")), "ok")
def test_register_trips_at_threshold(self):
from app.auth import _RateLimiter
rl = _RateLimiter("test", threshold=3, window_s=60, lockout_s=60)
self.assertEqual(rl.register(("k", "alice")), "ok")
self.assertEqual(rl.register(("k", "alice")), "ok")
# 3rd attempt brings us to threshold — counts as the trip.
self.assertEqual(rl.register(("k", "alice")), "now_locked_out")
# 4th sees the lockout from the prior call.
self.assertEqual(rl.register(("k", "alice")), "locked_out")
def test_clear_removes_counter_and_lockout(self):
from app.auth import _RateLimiter
rl = _RateLimiter("test", threshold=2, window_s=60, lockout_s=60)
rl.register(("k", "alice"))
rl.register(("k", "alice")) # trips
self.assertIsNotNone(rl.locked_until(("k", "alice")))
rl.clear(("k", "alice"))
self.assertIsNone(rl.locked_until(("k", "alice")))
# Subsequent register should start fresh.
self.assertEqual(rl.register(("k", "alice")), "ok")
def test_separate_keys_dont_interfere(self):
from app.auth import _RateLimiter
rl = _RateLimiter("test", threshold=2, window_s=60, lockout_s=60)
rl.register(("k", "alice"))
rl.register(("k", "alice")) # trips alice
# Bob's attempt should be allowed and untouched by alice's lockout.
self.assertEqual(rl.register(("k", "bob")), "ok")
self.assertIsNone(rl.locked_until(("k", "bob")))
if __name__ == "__main__":
unittest.main()

283
tests/test_pool_parser.py Normal file
View file

@ -0,0 +1,283 @@
"""Unit tests for the zpool-list and lsblk parsers in ssh_client.
These cover the structural cases that drive the pool-membership lock:
mirror/raidz/draid container vdevs, single-disk vdevs at depth 1, the
flattened-indentation behaviour of `zpool list -vHP` on TrueNAS, partition
suffix stripping for NVMe and SCSI, and the cache/log/spare/special
section markers (including plural variants).
Run with: python -m unittest discover tests/ -v
"""
import unittest
from app.ssh_client import (
_parse_zpool_list_output,
_parse_lsblk_zfs_output,
_parse_smart_health_batch,
)
class TestParseZpoolList(unittest.TestCase):
def test_empty_output_returns_empty(self):
self.assertEqual(_parse_zpool_list_output(""), {})
def test_single_pool_with_mirror(self):
# TrueNAS-flattened output: pool at depth 0, vdev type and devices
# all at depth 1.
out = _parse_zpool_list_output(
"boot-pool\t232G\t8.4G\t224G\t-\t-\t17%\t3%\t1.00x\tONLINE\t-\n"
"\tmirror-0\t232G\t8.4G\t224G\t-\t-\t17%\t3.6%\t-\tONLINE\n"
"\t/dev/nvme0n1p3\t232G\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdd3\t232G\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out, {
"nvme0n1": {"pool": "boot-pool", "role": "data"},
"sdd": {"pool": "boot-pool", "role": "data"},
})
def test_raidz2_pool(self):
out = _parse_zpool_list_output(
"tank\t127T\t4.5T\t122T\t-\t-\t0%\t3%\t1.00x\tONLINE\t-\n"
"\traidz2-0\t127T\t4.5T\t122T\t-\t-\t0%\t3%\t-\tONLINE\n"
"\t/dev/sdc\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sde\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdf\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(set(out.keys()), {"sdc", "sde", "sdf"})
for v in out.values():
self.assertEqual(v, {"pool": "tank", "role": "data"})
def test_draid_pool(self):
out = _parse_zpool_list_output(
"warm\t100T\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\tdraid2:8d:10c:1s-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdg\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdh\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out["sdg"], {"pool": "warm", "role": "data"})
self.assertEqual(out["sdh"], {"pool": "warm", "role": "data"})
def test_single_disk_vdev_at_depth_1(self):
# No mirror/raidz wrapper — a `/dev/...` line itself sits at depth 1.
out = _parse_zpool_list_output(
"scratch\t1T\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\t/dev/sdi\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out, {"sdi": {"pool": "scratch", "role": "data"}})
def test_section_markers_switch_role(self):
# cache / log / spare / special / dedup all at depth 1; subsequent
# /dev/... lines (also at depth 1) inherit that role.
out = _parse_zpool_list_output(
"tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\tmirror-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdb\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\tcache\n"
"\t/dev/nvme1n1\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\tlog\n"
"\t/dev/nvme2n1\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\tspare\n"
"\t/dev/sdz\t-\t-\t-\t-\t-\t-\t-\t-\tAVAIL\n"
)
self.assertEqual(out["sda"], {"pool": "tank", "role": "data"})
self.assertEqual(out["sdb"], {"pool": "tank", "role": "data"})
self.assertEqual(out["nvme1n1"], {"pool": "tank", "role": "cache"})
self.assertEqual(out["nvme2n1"], {"pool": "tank", "role": "log"})
self.assertEqual(out["sdz"], {"pool": "tank", "role": "spare"})
def test_section_markers_plurals_normalize(self):
# ZFS sometimes emits 'logs'/'spares' instead of 'log'/'spare'.
out = _parse_zpool_list_output(
"tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\tlogs\n"
"\t/dev/nvme0n1\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\tspares\n"
"\t/dev/sdz\t-\t-\t-\t-\t-\t-\t-\t-\tAVAIL\n"
)
self.assertEqual(out["nvme0n1"]["role"], "log")
self.assertEqual(out["sdz"]["role"], "spare")
def test_special_and_dedup_section(self):
out = _parse_zpool_list_output(
"tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\tspecial\n"
"\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\tdedup\n"
"\t/dev/sdb\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out["sda"]["role"], "special")
self.assertEqual(out["sdb"]["role"], "dedup")
def test_partition_suffix_stripped(self):
out = _parse_zpool_list_output(
"tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\tmirror-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sda3\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/nvme0n1p3\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertIn("sda", out)
self.assertNotIn("sda3", out)
self.assertIn("nvme0n1", out)
self.assertNotIn("nvme0n1p3", out)
def test_long_scsi_devname(self):
# Past sdz: sdaa, sdab, ...
out = _parse_zpool_list_output(
"big\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\traidz3-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdaa\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdab1\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out["sdaa"]["pool"], "big")
self.assertEqual(out["sdab"]["pool"], "big") # partition stripped
def test_pool_name_with_dashes_dots_underscores(self):
out = _parse_zpool_list_output(
"my-cool_pool.v2\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out["sda"]["pool"], "my-cool_pool.v2")
def test_multiple_pools(self):
out = _parse_zpool_list_output(
"boot-pool\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\tmirror-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/nvme0n1p3\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdd3\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\traidz2-0\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"\t/dev/sdb\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out["nvme0n1"]["pool"], "boot-pool")
self.assertEqual(out["sdd"]["pool"], "boot-pool")
self.assertEqual(out["sda"]["pool"], "tank")
self.assertEqual(out["sdb"]["pool"], "tank")
def test_pool_role_resets_between_pools(self):
# Section marker in pool A must not carry into pool B.
out = _parse_zpool_list_output(
"a\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\tcache\n"
"\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
"b\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\t/dev/sdb\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out["sda"]["role"], "cache")
self.assertEqual(out["sdb"]["role"], "data")
def test_blank_lines_skipped(self):
out = _parse_zpool_list_output(
"\n"
"tank\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\t-\n"
"\n"
"\t/dev/sda\t-\t-\t-\t-\t-\t-\t-\t-\tONLINE\n"
)
self.assertEqual(out, {"sda": {"pool": "tank", "role": "data"}})
class TestParseLsblkZfs(unittest.TestCase):
def test_empty_returns_empty_set(self):
self.assertEqual(_parse_lsblk_zfs_output(""), set())
def test_partition_zfs_member(self):
# Typical TrueNAS layout: zpool members are partitions.
out = _parse_lsblk_zfs_output(
"sda \n"
"sda1 \n"
"sda3 zfs_member\n"
"sdb \n"
"sdb3 zfs_member\n"
)
self.assertEqual(out, {"sda", "sdb"})
def test_whole_disk_zfs_member(self):
# Some configurations put zfs_member on the whole disk.
out = _parse_lsblk_zfs_output(
"sdc zfs_member\n"
)
self.assertEqual(out, {"sdc"})
def test_nvme_partitioned_and_whole(self):
out = _parse_lsblk_zfs_output(
"nvme0n1 \n"
"nvme0n1p3 zfs_member\n"
"nvme1n1 zfs_member\n"
)
self.assertEqual(out, {"nvme0n1", "nvme1n1"})
def test_non_zfs_fstypes_ignored(self):
out = _parse_lsblk_zfs_output(
"sda1 ext4\n"
"sda2 swap\n"
"sdb1 btrfs\n"
)
self.assertEqual(out, set())
def test_long_scsi_devnames(self):
out = _parse_lsblk_zfs_output(
"sdaa zfs_member\n"
"sdab1 zfs_member\n"
)
self.assertEqual(out, {"sdaa", "sdab"})
def test_short_lines_skipped(self):
out = _parse_lsblk_zfs_output(
"sda\n"
"\n"
"sdb1 zfs_member\n"
)
self.assertEqual(out, {"sdb"})
class TestParseSmartHealthBatch(unittest.TestCase):
def test_passed_drive(self):
out = _parse_smart_health_batch(
"@@sda@@\n"
"smartctl 7.4 2023-08-01 r5530 [x86_64-linux-6.6]\n"
"SMART overall-health self-assessment test result: PASSED\n"
"@@END@@\n"
)
self.assertEqual(out, {"sda": "PASSED"})
def test_failed_drive(self):
out = _parse_smart_health_batch(
"@@sdb@@\n"
"SMART overall-health self-assessment test result: FAILED!\n"
"@@END@@\n"
)
self.assertEqual(out, {"sdb": "FAILED"})
def test_unknown_when_no_marker(self):
out = _parse_smart_health_batch(
"@@sdc@@\n"
"/dev/sdc: Unknown USB bridge\n"
"@@END@@\n"
)
self.assertEqual(out, {"sdc": "UNKNOWN"})
def test_multiple_drives_mixed_states(self):
out = _parse_smart_health_batch(
"@@sda@@\n"
"SMART overall-health self-assessment test result: PASSED\n"
"@@END@@\n"
"@@sdb@@\n"
"SMART overall-health self-assessment test result: FAILED!\n"
"@@END@@\n"
"@@nvme0n1@@\n"
"SMART overall-health self-assessment test result: PASSED\n"
"@@END@@\n"
)
self.assertEqual(out, {"sda": "PASSED", "sdb": "FAILED", "nvme0n1": "PASSED"})
def test_empty_returns_empty(self):
self.assertEqual(_parse_smart_health_batch(""), {})
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,79 @@
"""Route-resolution invariants for the routes/ package.
Guards two historical regressions Codex flagged were untested:
1. /api/v1/burnin/export.csv must resolve to the CSV export, not to
/api/v1/burnin/{job_id} with int("export.csv") 422. FastAPI's
path matching tries declarations in registration order, so the
literal must be declared before the parameterized route.
2. app.mailer reaches into app.routes for _fetch_drives_for_template
(back-compat from before the routes/ split). The shim re-export
in app/routes/__init__.py must remain importable.
Run inside the container image so app deps are present.
"""
from __future__ import annotations
import unittest
class TestRouteResolution(unittest.TestCase):
def test_export_csv_declared_before_job_id(self):
"""Route order in burnin.py: /export.csv must come before
/{job_id} or FastAPI will int-coerce 'export.csv' and 422.
"""
from app.routes import burnin as burnin_routes
paths = [r.path for r in burnin_routes.router.routes]
self.assertIn("/api/v1/burnin/export.csv", paths)
self.assertIn("/api/v1/burnin/{job_id}", paths)
self.assertLess(
paths.index("/api/v1/burnin/export.csv"),
paths.index("/api/v1/burnin/{job_id}"),
"/export.csv must be registered before /{job_id} or FastAPI "
"will try to int-coerce 'export.csv' and return 422",
)
def test_mailer_backcompat_shim(self):
"""app.mailer imports _fetch_drives_for_template from app.routes
(NOT app.routes._drives_helpers) the shim re-export in
routes/__init__.py keeps that working post-split.
"""
from app.routes import _fetch_drives_for_template
self.assertTrue(callable(_fetch_drives_for_template))
def test_all_subrouters_included(self):
"""Sanity check: every sub-router in app.routes.* is wired into
the package-level router.include_router calls. If a future split
adds a new file but forgets the include, this catches it.
"""
import importlib
import pkgutil
import app.routes as routes_pkg
sub_modules = [
name for _, name, _ in pkgutil.iter_modules(routes_pkg.__path__)
if not name.startswith("_") # skip _helpers, _drives_helpers
]
registered_paths = {r.path for r in routes_pkg.router.routes}
for mod_name in sub_modules:
mod = importlib.import_module(f"app.routes.{mod_name}")
sub_router = getattr(mod, "router", None)
self.assertIsNotNone(
sub_router,
f"app.routes.{mod_name} has no `router` attribute",
)
for r in sub_router.routes:
self.assertIn(
r.path, registered_paths,
f"{mod_name}.router has {r.path} but the package "
"router didn't include it",
)
if __name__ == "__main__":
unittest.main()

306
tests/test_unlock_flow.py Normal file
View file

@ -0,0 +1,306 @@
"""Unit tests for the pool-drive unlock state machine in burnin.py.
Covers: token validation per pool kind, identity-binding (grant
invalidated when pool_name/pool_role changes), TTL expiry, the
audit-commit-then-arm ordering (a failing audit insert leaves no
in-memory grant), and the unique-active-burnin partial index that
prevents duplicate queued rows for the same drive.
Uses an in-memory SQLite DB and monkeypatches app.config.settings.db_path.
No SSH, no network, no FastAPI.
Run with: python -m unittest discover tests/ -v
"""
import os
import tempfile
import time
import unittest
import aiosqlite
async def _setup_temp_db() -> str:
"""Create a temp SQLite file, point app.config at it, init schema.
Async-callable from IsolatedAsyncioTestCase.asyncSetUp."""
fd, path = tempfile.mkstemp(suffix=".db")
os.close(fd)
from app.config import settings
settings.db_path = path
from app.database import init_db
await init_db()
# Seed pool drives so unlock_flow tests have something to grant on.
async with aiosqlite.connect(path) as db:
await db.execute("""
INSERT INTO drives
(truenas_disk_id, devname, serial, model, size_bytes,
temperature_c, smart_health, last_seen_at, last_polled_at,
pool_name, pool_role, pool_seen_at)
VALUES ('test-id-1', 'sda', 'TESTSER1', 'TestModel', 1000,
30, 'PASSED', '2026-05-02T00:00:00+00:00',
'2026-05-02T00:00:00+00:00',
'tank', 'data', '2026-05-02T00:00:00+00:00')
""")
await db.execute("""
INSERT INTO drives
(truenas_disk_id, devname, serial, model, size_bytes,
temperature_c, smart_health, last_seen_at, last_polled_at,
pool_name, pool_role, pool_seen_at)
VALUES ('test-id-2', 'sdb', 'TESTSER2', 'TestModel', 1000,
30, 'PASSED', '2026-05-02T00:00:00+00:00',
'2026-05-02T00:00:00+00:00',
'boot-pool', 'data', '2026-05-02T00:00:00+00:00')
""")
await db.execute("""
INSERT INTO drives
(truenas_disk_id, devname, serial, model, size_bytes,
temperature_c, smart_health, last_seen_at, last_polled_at,
pool_name, pool_role, pool_seen_at)
VALUES ('test-id-3', 'sdc', 'TESTSER3', 'TestModel', 1000,
30, 'PASSED', '2026-05-02T00:00:00+00:00',
'2026-05-02T00:00:00+00:00',
'(exported)', 'exported', '2026-05-02T00:00:00+00:00')
""")
await db.commit()
return path
class TestUnlockFlow(unittest.IsolatedAsyncioTestCase):
async def asyncSetUp(self):
self.db_path = await _setup_temp_db()
# Reset module state so previous test runs don't bleed in.
from app import burnin
burnin._unlock_grants.clear()
async def asyncTearDown(self):
try:
os.unlink(self.db_path)
except OSError:
pass
# ----- token validation per pool kind -----
async def test_active_pool_token_is_pool_name(self):
from app import burnin
# Drive 1 = tank/data
with self.assertRaises(ValueError):
await burnin.grant_pool_unlock(1, "wrong", "op", "valid reason")
expiry = await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
self.assertGreater(expiry, time.time())
async def test_boot_pool_token_is_destroy_phrase(self):
from app import burnin
# Drive 2 = boot-pool — typing the pool name must NOT work.
with self.assertRaises(ValueError):
await burnin.grant_pool_unlock(2, "boot-pool", "op", "valid reason")
expiry = await burnin.grant_pool_unlock(
2, "DESTROY BOOT POOL", "op", "valid reason"
)
self.assertGreater(expiry, time.time())
async def test_exported_token_is_destroy_phrase(self):
from app import burnin
# Drive 3 = (exported)/exported
with self.assertRaises(ValueError):
await burnin.grant_pool_unlock(3, "(exported)", "op", "valid reason")
expiry = await burnin.grant_pool_unlock(
3, "DESTROY EXPORTED POOL", "op", "valid reason"
)
self.assertGreater(expiry, time.time())
# ----- input validation -----
async def test_empty_reason_rejected(self):
from app import burnin
with self.assertRaises(ValueError):
await burnin.grant_pool_unlock(1, "tank", "op", "")
async def test_short_reason_rejected(self):
from app import burnin
with self.assertRaises(ValueError):
await burnin.grant_pool_unlock(1, "tank", "op", "hi")
async def test_empty_operator_rejected(self):
from app import burnin
with self.assertRaises(ValueError):
await burnin.grant_pool_unlock(1, "tank", "", "valid reason")
async def test_unknown_drive_rejected(self):
from app import burnin
with self.assertRaises(ValueError):
await burnin.grant_pool_unlock(99999, "anything", "op", "valid reason")
async def test_drive_not_in_pool_rejected(self):
from app import burnin
# Manually clear pool fields on drive 1
async with aiosqlite.connect(self.db_path) as db:
await db.execute("UPDATE drives SET pool_name=NULL, pool_role=NULL WHERE id=1")
await db.commit()
with self.assertRaises(ValueError):
await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
# ----- identity binding (Codex finding #2) -----
async def test_grant_invalidated_when_pool_name_changes(self):
from app import burnin
await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
# Operator's grant references tank/data; pool detection now reports tank2.
self.assertTrue(burnin._is_unlocked(1, "tank", "data"))
self.assertFalse(burnin._is_unlocked(1, "tank2", "data"))
# And the side effect: the grant is reaped, not just temporarily denied.
self.assertNotIn(1, burnin._unlock_grants)
async def test_grant_invalidated_when_pool_role_changes(self):
from app import burnin
await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
# Same pool, different role (data -> cache).
self.assertFalse(burnin._is_unlocked(1, "tank", "cache"))
self.assertNotIn(1, burnin._unlock_grants)
async def test_unlock_expiry_returns_none_for_mismatched_identity(self):
from app import burnin
await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
self.assertIsNotNone(burnin.unlock_expiry(1, "tank", "data"))
self.assertIsNone(burnin.unlock_expiry(1, "tank2", "data"))
# ----- TTL expiry -----
async def test_expired_grant_returns_false(self):
from app import burnin
from app.burnin import unlock as _unlock
# Drop TTL to 0 so the grant is born expired. Monkey-patch the
# real source-of-truth in app.burnin.unlock — the alias on the
# package root is bound at import time and won't propagate back.
original = _unlock.UNLOCK_TTL_SECONDS
_unlock.UNLOCK_TTL_SECONDS = 0
try:
await burnin.grant_pool_unlock(1, "tank", "op", "valid reason")
self.assertFalse(burnin._is_unlocked(1, "tank", "data"))
self.assertNotIn(1, burnin._unlock_grants)
finally:
_unlock.UNLOCK_TTL_SECONDS = original
# ----- audit commit ordering (Codex finding #3) -----
async def test_audit_event_recorded_for_active_pool(self):
from app import burnin
await burnin.grant_pool_unlock(1, "tank", "alice", "swapping out drive")
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT event_type, operator, message FROM audit_events "
"WHERE drive_id=? ORDER BY id DESC LIMIT 1", (1,)
)
row = await cur.fetchone()
self.assertEqual(row["event_type"], "pool_drive_unlocked")
self.assertEqual(row["operator"], "alice")
self.assertIn("swapping out drive", row["message"])
async def test_audit_event_for_boot_pool_uses_distinct_type(self):
from app import burnin
await burnin.grant_pool_unlock(
2, "DESTROY BOOT POOL", "alice", "replacing failed mirror"
)
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT event_type FROM audit_events WHERE drive_id=? ORDER BY id DESC LIMIT 1",
(2,),
)
row = await cur.fetchone()
self.assertEqual(row["event_type"], "boot_pool_drive_unlocked")
async def test_audit_event_for_exported_uses_distinct_type(self):
from app import burnin
await burnin.grant_pool_unlock(
3, "DESTROY EXPORTED POOL", "alice", "decommissioned pool"
)
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cur = await db.execute(
"SELECT event_type FROM audit_events WHERE drive_id=? ORDER BY id DESC LIMIT 1",
(3,),
)
row = await cur.fetchone()
self.assertEqual(row["event_type"], "exported_pool_drive_unlocked")
async def test_failed_token_does_not_record_audit_event(self):
from app import burnin
try:
await burnin.grant_pool_unlock(1, "wrong-token", "op", "valid reason")
except ValueError:
pass
async with aiosqlite.connect(self.db_path) as db:
cur = await db.execute(
"SELECT COUNT(*) FROM audit_events WHERE drive_id=?", (1,)
)
self.assertEqual((await cur.fetchone())[0], 0)
# And no in-memory grant was armed.
self.assertNotIn(1, burnin._unlock_grants)
class TestActiveJobUniqueIndex(unittest.IsolatedAsyncioTestCase):
"""Codex finding #4 — the partial unique index on burnin_jobs(drive_id)
WHERE state IN ('queued','running') must reject a second active row even
when two requests pass the SELECT-COUNT check concurrently."""
async def asyncSetUp(self):
self.db_path = await _setup_temp_db()
from app import burnin
burnin._unlock_grants.clear()
# Need to clear the pool field on drive 1 so unlock isn't required
# for these race tests.
async with aiosqlite.connect(self.db_path) as db:
await db.execute("UPDATE drives SET pool_name=NULL, pool_role=NULL WHERE id=1")
await db.commit()
# Burnin orchestrator init for the semaphore
from app import burnin as b
import asyncio as _a
b._semaphore = _a.Semaphore(4)
async def asyncTearDown(self):
try:
os.unlink(self.db_path)
except OSError:
pass
async def test_index_blocks_second_active_insert(self):
# Insert a queued row by hand, then try a second one — index fires.
async with aiosqlite.connect(self.db_path) as db:
await db.execute(
"""INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
VALUES (?,?,?,?,?,?)""",
(1, "surface", "queued", 0, "op", "2026-05-02T00:00:00+00:00"),
)
await db.commit()
with self.assertRaises(aiosqlite.IntegrityError):
await db.execute(
"""INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
VALUES (?,?,?,?,?,?)""",
(1, "surface", "queued", 0, "op", "2026-05-02T00:00:01+00:00"),
)
await db.commit()
async def test_index_allows_terminal_state_then_new_job(self):
# passed/failed/cancelled/unknown rows must not block a fresh queue.
async with aiosqlite.connect(self.db_path) as db:
for state in ("passed", "failed", "cancelled", "unknown"):
await db.execute(
"""INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
VALUES (?,?,?,?,?,?)""",
(1, "surface", state, 100, "op", "2026-05-02T00:00:00+00:00"),
)
await db.commit()
# Should succeed — no other queued/running row exists.
await db.execute(
"""INSERT INTO burnin_jobs (drive_id, profile, state, percent, operator, created_at)
VALUES (?,?,?,?,?,?)""",
(1, "surface", "queued", 0, "op", "2026-05-02T00:00:00+00:00"),
)
await db.commit()
if __name__ == "__main__":
unittest.main()