Addresses 12 of 13 findings from the Codex tech-debt + security review of versions 1.0.0-22 through 1.0.0-27. Item #5 (live pool re-check before start_job) deferred — would add an SSH round-trip per start. #1 Pool detection now treats zpool / lsblk / findmnt failures INDEPENDENTLY. Previously a single None blew away the whole map, so a host where lsblk lacks zfs_member info but zpool works would never lock pool members. Extended findmnt parser to recognise /dev/mapper/*, /dev/dm-*, /dev/md*, /dev/da*, /dev/ada* (LVM, devicemapper, MD RAID, FreeBSD CORE devnames). #2 Admin role enforced on every settings mutation. New auth.require_admin() helper applied to GET /settings, POST /api/v1/settings, /test-smtp, /test-ssh. Previously any authenticated user (the CLI explicitly supports non-admin accounts) could rewrite SMTP/SSH/API secrets. #3 First-user setup race closed. auth.create_user() now accepts bootstrap_only=True which wraps the existence check + insert in BEGIN IMMEDIATE so two concurrent /api/v1/auth/setup requests can't both create admin accounts during the bootstrap window. #4 Case-insensitive uniqueness enforced via new `uniq_users_username_nocase` index. Login does NOCASE lookup so without this `Admin` and `admin` could coexist as distinct rows. #6 New `session_cookie_secure` setting (default False for LAN/dev deploys, set True in production behind HTTPS) flips the session cookie's Secure flag. Defends against on-the-wire exposure when the dashboard is reachable over plain HTTP. #7 Audit trail bound to authenticated identity. Burn-in start / cancel / unlock / drive reset all now use `_operator_for(request)` which reads `request.state.current_user.full_name|username` instead of the body's operator field. Logged-in users can no longer spoof attribution. Drive reset's literal-"operator" fallback (window._operator was never set) is also fixed by this. #8 Login rate-limit race fixed. New `register_login_attempt()` is atomic check-AND-increment in synchronous code (no awaits inside), so a parallel burst can't slip past the threshold. `record_login_failure()` removed; `clear_login_failures()` now also drops any active lockout for a successful auth. Pre-existing bug where `tripped` was always False (so user_login_locked_out audit events never fired) also fixed. #9 NVMe surface_validate post-format check now mirrors the SSH path: fails on FAILED health AND on real SMART attribute failures, soft-passes SSH-only failures (logged), surfaces warnings to the stage log without failing. #10 retention.backup_db() now writes to `.tmp` then atomic-renames into the canonical daily slot — an interrupted backup leaves the tmp behind but doesn't corrupt the real snapshot. Scheduler marks last_run_date only on (prune AND backup) success so a transient failure gets retried within the 03:00 hour. #11 /health DB probe now exercises the WRITE path via a temp-table INSERT/SELECT/COMMIT round-trip. Previously only read PRAGMA journal_mode + a row count, which silently passes on read-only mounts and broken-WAL conditions. #12 security-scan.sh now fails loudly if `git fetch` or `git reset --hard origin/main` errors (was `|| true`, scanning stale code silently). pip-audit now runs in a throwaway python:3.12-slim container against requirements.txt instead of `docker exec`-ing into the live truenas-burnin container — cleaner separation, no transient package install on prod. #13 Badblocks SSH stage no longer doubles its log_text. Previously appended every 20-line chunk during streaming AND the full accumulated output at end. Now only flushes the un-flushed tail (typically <20 lines). `result["output"]` stays in-memory only. Verification: all 44 unit tests pass in container; /health 200; security scan returns 0 findings; deployed maple build is green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
175 lines
6.8 KiB
Python
175 lines
6.8 KiB
Python
import aiosqlite
|
|
from pathlib import Path
|
|
|
|
from app.config import settings
|
|
|
|
SCHEMA = """
|
|
CREATE TABLE IF NOT EXISTS drives (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
truenas_disk_id TEXT UNIQUE NOT NULL,
|
|
devname TEXT NOT NULL,
|
|
serial TEXT,
|
|
model TEXT,
|
|
size_bytes INTEGER,
|
|
temperature_c INTEGER,
|
|
smart_health TEXT DEFAULT 'UNKNOWN',
|
|
last_seen_at TEXT NOT NULL,
|
|
last_polled_at TEXT NOT NULL,
|
|
notes TEXT,
|
|
location TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS smart_tests (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
drive_id INTEGER NOT NULL REFERENCES drives(id) ON DELETE CASCADE,
|
|
test_type TEXT NOT NULL CHECK(test_type IN ('short', 'long')),
|
|
state TEXT NOT NULL DEFAULT 'idle',
|
|
percent INTEGER DEFAULT 0,
|
|
truenas_job_id INTEGER,
|
|
started_at TEXT,
|
|
eta_at TEXT,
|
|
finished_at TEXT,
|
|
error_text TEXT,
|
|
UNIQUE(drive_id, test_type)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS burnin_jobs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
drive_id INTEGER NOT NULL REFERENCES drives(id),
|
|
profile TEXT NOT NULL,
|
|
state TEXT NOT NULL DEFAULT 'queued',
|
|
percent INTEGER DEFAULT 0,
|
|
stage_name TEXT,
|
|
operator TEXT NOT NULL,
|
|
created_at TEXT NOT NULL,
|
|
started_at TEXT,
|
|
finished_at TEXT,
|
|
error_text TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS burnin_stages (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
burnin_job_id INTEGER NOT NULL REFERENCES burnin_jobs(id) ON DELETE CASCADE,
|
|
stage_name TEXT NOT NULL,
|
|
state TEXT NOT NULL DEFAULT 'pending',
|
|
percent INTEGER DEFAULT 0,
|
|
started_at TEXT,
|
|
finished_at TEXT,
|
|
duration_seconds REAL,
|
|
error_text TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS audit_events (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
event_type TEXT NOT NULL,
|
|
drive_id INTEGER REFERENCES drives(id),
|
|
burnin_job_id INTEGER REFERENCES burnin_jobs(id),
|
|
operator TEXT,
|
|
message TEXT NOT NULL,
|
|
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_smart_drive_type ON smart_tests(drive_id, test_type);
|
|
CREATE INDEX IF NOT EXISTS idx_burnin_jobs_drive ON burnin_jobs(drive_id, state);
|
|
CREATE INDEX IF NOT EXISTS idx_burnin_stages_job ON burnin_stages(burnin_job_id);
|
|
CREATE INDEX IF NOT EXISTS idx_audit_events_job ON audit_events(burnin_job_id);
|
|
"""
|
|
|
|
|
|
# Migrations for existing databases that predate schema additions.
|
|
# Each entry is tried with try/except — SQLite raises OperationalError
|
|
# ("duplicate column name") if the column already exists, which is safe to ignore.
|
|
_MIGRATIONS = [
|
|
"ALTER TABLE drives ADD COLUMN notes TEXT",
|
|
"ALTER TABLE drives ADD COLUMN location TEXT",
|
|
# Stage 7: SSH command output + SMART attribute storage
|
|
"ALTER TABLE burnin_stages ADD COLUMN log_text TEXT",
|
|
"ALTER TABLE burnin_stages ADD COLUMN bad_blocks INTEGER DEFAULT 0",
|
|
"ALTER TABLE drives ADD COLUMN smart_attrs TEXT",
|
|
"ALTER TABLE smart_tests ADD COLUMN raw_output TEXT",
|
|
# Stage 8: track last reset time so dashboard burn-in col clears after reset
|
|
"ALTER TABLE drives ADD COLUMN last_reset_at TEXT",
|
|
# 1.0.0-15: pool-membership lock
|
|
"ALTER TABLE drives ADD COLUMN pool_name TEXT",
|
|
"ALTER TABLE drives ADD COLUMN pool_role TEXT",
|
|
"ALTER TABLE drives ADD COLUMN pool_seen_at TEXT",
|
|
# 1.0.0-19: enforce one active burn-in per drive at the storage layer.
|
|
# Closes the read-then-insert race in burnin.start_job — without this,
|
|
# two concurrent /api/v1/burnin/start requests for the same drive could
|
|
# both observe zero active jobs and both insert queued rows.
|
|
"""CREATE UNIQUE INDEX IF NOT EXISTS uniq_active_burnin_per_drive
|
|
ON burnin_jobs (drive_id) WHERE state IN ('queued', 'running')""",
|
|
# 1.0.0-22: app-level login (username + bcrypt password)
|
|
"""CREATE TABLE IF NOT EXISTS users (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
username TEXT UNIQUE NOT NULL,
|
|
password_hash TEXT NOT NULL,
|
|
full_name TEXT,
|
|
is_admin INTEGER NOT NULL DEFAULT 0,
|
|
created_at TEXT NOT NULL,
|
|
last_login_at TEXT
|
|
)""",
|
|
# 1.0.0-28: case-insensitive uniqueness. The base UNIQUE on username
|
|
# is case-sensitive but login does NOCASE — without this index two
|
|
# users `Admin` and `admin` could coexist and shadow each other.
|
|
"""CREATE UNIQUE INDEX IF NOT EXISTS uniq_users_username_nocase
|
|
ON users (username COLLATE NOCASE)""",
|
|
]
|
|
|
|
|
|
async def _run_migrations(db: aiosqlite.Connection) -> None:
|
|
for sql in _MIGRATIONS:
|
|
try:
|
|
await db.execute(sql)
|
|
except Exception:
|
|
pass # Column already exists — harmless
|
|
|
|
# Remove the old CHECK(profile IN ('quick','full')) constraint if present.
|
|
# SQLite can't ALTER a CHECK — requires a full table rebuild.
|
|
cur = await db.execute(
|
|
"SELECT sql FROM sqlite_master WHERE type='table' AND name='burnin_jobs'"
|
|
)
|
|
row = await cur.fetchone()
|
|
if row and "CHECK" in (row[0] or ""):
|
|
await db.executescript("""
|
|
PRAGMA foreign_keys=OFF;
|
|
CREATE TABLE burnin_jobs_new (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
drive_id INTEGER NOT NULL REFERENCES drives(id),
|
|
profile TEXT NOT NULL,
|
|
state TEXT NOT NULL DEFAULT 'queued',
|
|
percent INTEGER DEFAULT 0,
|
|
stage_name TEXT,
|
|
operator TEXT NOT NULL,
|
|
created_at TEXT NOT NULL,
|
|
started_at TEXT,
|
|
finished_at TEXT,
|
|
error_text TEXT
|
|
);
|
|
INSERT INTO burnin_jobs_new SELECT * FROM burnin_jobs;
|
|
DROP TABLE burnin_jobs;
|
|
ALTER TABLE burnin_jobs_new RENAME TO burnin_jobs;
|
|
CREATE INDEX IF NOT EXISTS idx_burnin_jobs_drive ON burnin_jobs(drive_id, state);
|
|
PRAGMA foreign_keys=ON;
|
|
""")
|
|
|
|
|
|
async def init_db() -> None:
|
|
Path(settings.db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
await db.execute("PRAGMA journal_mode=WAL")
|
|
await db.execute("PRAGMA foreign_keys=ON")
|
|
await db.executescript(SCHEMA)
|
|
await _run_migrations(db)
|
|
await db.commit()
|
|
|
|
|
|
async def get_db():
|
|
db = await aiosqlite.connect(settings.db_path)
|
|
db.row_factory = aiosqlite.Row
|
|
try:
|
|
await db.execute("PRAGMA journal_mode=WAL")
|
|
await db.execute("PRAGMA foreign_keys=ON")
|
|
yield db
|
|
finally:
|
|
await db.close()
|