Addresses 12 of 13 findings from the Codex tech-debt + security review of versions 1.0.0-22 through 1.0.0-27. Item #5 (live pool re-check before start_job) deferred — would add an SSH round-trip per start. #1 Pool detection now treats zpool / lsblk / findmnt failures INDEPENDENTLY. Previously a single None blew away the whole map, so a host where lsblk lacks zfs_member info but zpool works would never lock pool members. Extended findmnt parser to recognise /dev/mapper/*, /dev/dm-*, /dev/md*, /dev/da*, /dev/ada* (LVM, devicemapper, MD RAID, FreeBSD CORE devnames). #2 Admin role enforced on every settings mutation. New auth.require_admin() helper applied to GET /settings, POST /api/v1/settings, /test-smtp, /test-ssh. Previously any authenticated user (the CLI explicitly supports non-admin accounts) could rewrite SMTP/SSH/API secrets. #3 First-user setup race closed. auth.create_user() now accepts bootstrap_only=True which wraps the existence check + insert in BEGIN IMMEDIATE so two concurrent /api/v1/auth/setup requests can't both create admin accounts during the bootstrap window. #4 Case-insensitive uniqueness enforced via new `uniq_users_username_nocase` index. Login does NOCASE lookup so without this `Admin` and `admin` could coexist as distinct rows. #6 New `session_cookie_secure` setting (default False for LAN/dev deploys, set True in production behind HTTPS) flips the session cookie's Secure flag. Defends against on-the-wire exposure when the dashboard is reachable over plain HTTP. #7 Audit trail bound to authenticated identity. Burn-in start / cancel / unlock / drive reset all now use `_operator_for(request)` which reads `request.state.current_user.full_name|username` instead of the body's operator field. Logged-in users can no longer spoof attribution. Drive reset's literal-"operator" fallback (window._operator was never set) is also fixed by this. #8 Login rate-limit race fixed. New `register_login_attempt()` is atomic check-AND-increment in synchronous code (no awaits inside), so a parallel burst can't slip past the threshold. `record_login_failure()` removed; `clear_login_failures()` now also drops any active lockout for a successful auth. Pre-existing bug where `tripped` was always False (so user_login_locked_out audit events never fired) also fixed. #9 NVMe surface_validate post-format check now mirrors the SSH path: fails on FAILED health AND on real SMART attribute failures, soft-passes SSH-only failures (logged), surfaces warnings to the stage log without failing. #10 retention.backup_db() now writes to `.tmp` then atomic-renames into the canonical daily slot — an interrupted backup leaves the tmp behind but doesn't corrupt the real snapshot. Scheduler marks last_run_date only on (prune AND backup) success so a transient failure gets retried within the 03:00 hour. #11 /health DB probe now exercises the WRITE path via a temp-table INSERT/SELECT/COMMIT round-trip. Previously only read PRAGMA journal_mode + a row count, which silently passes on read-only mounts and broken-WAL conditions. #12 security-scan.sh now fails loudly if `git fetch` or `git reset --hard origin/main` errors (was `|| true`, scanning stale code silently). pip-audit now runs in a throwaway python:3.12-slim container against requirements.txt instead of `docker exec`-ing into the live truenas-burnin container — cleaner separation, no transient package install on prod. #13 Badblocks SSH stage no longer doubles its log_text. Previously appended every 20-line chunk during streaming AND the full accumulated output at end. Now only flushes the un-flushed tail (typically <20 lines). `result["output"]` stays in-memory only. Verification: all 44 unit tests pass in container; /health 200; security scan returns 0 findings; deployed maple build is green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
396 lines
14 KiB
Python
396 lines
14 KiB
Python
"""
|
|
App-level username/password auth for the burn-in dashboard.
|
|
|
|
Sessions are signed cookies (Starlette SessionMiddleware) that carry
|
|
{user_id, username}. Every request goes through `get_current_user_optional`
|
|
via the auth middleware in main.py; routes that need an authenticated user
|
|
import `get_current_user` instead, which raises 401 (or redirects to
|
|
/login for HTML requests) when there's no session.
|
|
|
|
Passwords are bcrypt with the library's default 12-round cost. We never
|
|
store plaintext.
|
|
|
|
Bootstrap: if the users table is empty AND `initial_admin_username` /
|
|
`initial_admin_password` are set, the lifespan creates that admin once at
|
|
startup. Otherwise, the login template renders the "first user" form when
|
|
visited and zero users exist.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import secrets
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import aiosqlite
|
|
import bcrypt
|
|
from fastapi import HTTPException, Request, status
|
|
from starlette.responses import RedirectResponse
|
|
|
|
from app.config import settings
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Session secret — env var > persisted file > generated
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_SESSION_SECRET_FILE = "session_secret"
|
|
|
|
|
|
def get_session_secret() -> str:
|
|
"""Return the HMAC key for SessionMiddleware. env var beats disk."""
|
|
if settings.session_secret:
|
|
return settings.session_secret
|
|
path = Path(settings.db_path).parent / _SESSION_SECRET_FILE
|
|
if not path.exists():
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_bytes(secrets.token_urlsafe(64).encode())
|
|
try:
|
|
path.chmod(0o600)
|
|
except OSError:
|
|
pass
|
|
log.warning(
|
|
"Generated and persisted session secret to %s. "
|
|
"Set SESSION_SECRET in env to override.", path,
|
|
)
|
|
return path.read_text().strip()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# User model + storage
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass(frozen=True)
|
|
class User:
|
|
id: int
|
|
username: str
|
|
full_name: str | None
|
|
is_admin: bool
|
|
|
|
|
|
def _now() -> str:
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
def hash_password(plain: str) -> str:
|
|
return bcrypt.hashpw(plain.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
|
|
|
|
|
|
def verify_password(plain: str, hashed: str) -> bool:
|
|
try:
|
|
return bcrypt.checkpw(plain.encode("utf-8"), hashed.encode("utf-8"))
|
|
except (ValueError, TypeError):
|
|
return False
|
|
|
|
|
|
async def user_count() -> int:
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
cur = await db.execute("SELECT COUNT(*) FROM users")
|
|
return (await cur.fetchone())[0]
|
|
|
|
|
|
async def get_user_by_username(username: str) -> tuple[User, str] | None:
|
|
"""Returns (user, password_hash) or None. Hash is the only place
|
|
callers should ever see the raw bcrypt string — for verify_password."""
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
db.row_factory = aiosqlite.Row
|
|
cur = await db.execute(
|
|
"SELECT id, username, password_hash, full_name, is_admin "
|
|
"FROM users WHERE username = ? COLLATE NOCASE",
|
|
(username,),
|
|
)
|
|
row = await cur.fetchone()
|
|
if not row:
|
|
return None
|
|
user = User(
|
|
id=row["id"],
|
|
username=row["username"],
|
|
full_name=row["full_name"],
|
|
is_admin=bool(row["is_admin"]),
|
|
)
|
|
return user, row["password_hash"]
|
|
|
|
|
|
async def get_user_by_id(user_id: int) -> User | None:
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
db.row_factory = aiosqlite.Row
|
|
cur = await db.execute(
|
|
"SELECT id, username, full_name, is_admin "
|
|
"FROM users WHERE id = ?",
|
|
(user_id,),
|
|
)
|
|
row = await cur.fetchone()
|
|
if not row:
|
|
return None
|
|
return User(
|
|
id=row["id"],
|
|
username=row["username"],
|
|
full_name=row["full_name"],
|
|
is_admin=bool(row["is_admin"]),
|
|
)
|
|
|
|
|
|
async def create_user(username: str, password: str,
|
|
full_name: str | None = None,
|
|
is_admin: bool = False,
|
|
bootstrap_only: bool = False) -> User:
|
|
"""Insert a new user. Raises ValueError if the username collides.
|
|
|
|
bootstrap_only=True: serializes the insert with a check that the
|
|
users table is empty inside an IMMEDIATE transaction. Used for the
|
|
/api/v1/auth/setup first-user flow so two concurrent requests can't
|
|
both create admin accounts during the bootstrap window.
|
|
"""
|
|
username = (username or "").strip()
|
|
if not username:
|
|
raise ValueError("Username is required.")
|
|
if len(password) < 8:
|
|
raise ValueError("Password must be at least 8 characters.")
|
|
h = hash_password(password)
|
|
try:
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
if bootstrap_only:
|
|
# IMMEDIATE acquires the write lock up-front so a parallel
|
|
# setup request waits or fails — no two-step race.
|
|
await db.execute("BEGIN IMMEDIATE")
|
|
cur = await db.execute("SELECT COUNT(*) FROM users")
|
|
if (await cur.fetchone())[0] != 0:
|
|
await db.execute("ROLLBACK")
|
|
raise ValueError(
|
|
"Users already exist — first-user setup is closed."
|
|
)
|
|
cur = await db.execute(
|
|
"""INSERT INTO users
|
|
(username, password_hash, full_name, is_admin, created_at)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
RETURNING id""",
|
|
(username, h, full_name or None, 1 if is_admin else 0, _now()),
|
|
)
|
|
row = await cur.fetchone()
|
|
await db.commit()
|
|
except aiosqlite.IntegrityError:
|
|
raise ValueError(f"Username {username!r} already exists.")
|
|
return User(
|
|
id=row[0],
|
|
username=username,
|
|
full_name=full_name,
|
|
is_admin=is_admin,
|
|
)
|
|
|
|
|
|
async def touch_last_login(user_id: int) -> None:
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
await db.execute(
|
|
"UPDATE users SET last_login_at = ? WHERE id = ?",
|
|
(_now(), user_id),
|
|
)
|
|
await db.commit()
|
|
|
|
|
|
async def change_password(user_id: int, current_password: str,
|
|
new_password: str) -> None:
|
|
"""Verify current password and rotate. Raises ValueError on any failure."""
|
|
if len(new_password) < 8:
|
|
raise ValueError("New password must be at least 8 characters.")
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
db.row_factory = aiosqlite.Row
|
|
cur = await db.execute(
|
|
"SELECT username, password_hash FROM users WHERE id = ?", (user_id,)
|
|
)
|
|
row = await cur.fetchone()
|
|
if not row or not verify_password(current_password, row["password_hash"]):
|
|
raise ValueError("Current password is incorrect.")
|
|
new_hash = hash_password(new_password)
|
|
await db.execute(
|
|
"UPDATE users SET password_hash = ? WHERE id = ?",
|
|
(new_hash, user_id),
|
|
)
|
|
await db.commit()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Login rate limiting (in-memory, per-username + per-source-IP)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
import time as _time
|
|
|
|
LOGIN_FAILURE_WINDOW_SECONDS = 600 # 10 min
|
|
LOGIN_FAILURE_THRESHOLD = 10 # this many failures within the window
|
|
LOGIN_LOCKOUT_SECONDS = 900 # then block for 15 min
|
|
|
|
# {(key,): [(timestamp, ...), ...]} key = (kind, value), kind in {"user","ip"}
|
|
_login_failures: dict = {}
|
|
_login_lockouts: dict = {} # key -> unix expiry
|
|
|
|
|
|
def _gc_failures(key) -> None:
|
|
"""Drop failure timestamps older than the window."""
|
|
arr = _login_failures.get(key, [])
|
|
cutoff = _time.time() - LOGIN_FAILURE_WINDOW_SECONDS
|
|
fresh = [t for t in arr if t >= cutoff]
|
|
if fresh:
|
|
_login_failures[key] = fresh
|
|
elif key in _login_failures:
|
|
del _login_failures[key]
|
|
|
|
|
|
def login_locked_until(username: str, ip: str) -> float | None:
|
|
"""Returns the lockout expiry (unix ts) if either dimension is locked,
|
|
else None. Lazily reaps expired lockouts."""
|
|
now = _time.time()
|
|
soonest = None
|
|
for key in (("user", username.lower()), ("ip", ip)):
|
|
exp = _login_lockouts.get(key)
|
|
if exp is None:
|
|
continue
|
|
if now >= exp:
|
|
del _login_lockouts[key]
|
|
continue
|
|
soonest = exp if soonest is None else min(soonest, exp)
|
|
return soonest
|
|
|
|
|
|
def register_login_attempt(username: str, ip: str) -> str:
|
|
"""Atomic check-then-increment for a login attempt.
|
|
|
|
Returns:
|
|
"ok" — allowed, counter incremented
|
|
"locked_out" — already locked from a prior attempt
|
|
"now_locked_out" — THIS attempt is what tripped the lockout
|
|
|
|
The increment runs synchronously (no awaits) so concurrent requests
|
|
can't slip past the threshold in CPython's single-threaded asyncio
|
|
loop. Caller must invoke clear_login_failures() on successful auth
|
|
to roll back this attempt's contribution.
|
|
"""
|
|
now = _time.time()
|
|
# Check existing lockouts first; if already locked, don't even
|
|
# increment — the lockout window absorbs everything.
|
|
for key in (("user", username.lower()), ("ip", ip)):
|
|
exp = _login_lockouts.get(key)
|
|
if exp is None:
|
|
continue
|
|
if now >= exp:
|
|
del _login_lockouts[key]
|
|
continue
|
|
return "locked_out"
|
|
# Increment + arm lockout if this push crosses the threshold.
|
|
tripped = False
|
|
for key in (("user", username.lower()), ("ip", ip)):
|
|
_gc_failures(key)
|
|
_login_failures.setdefault(key, []).append(now)
|
|
if len(_login_failures[key]) >= LOGIN_FAILURE_THRESHOLD:
|
|
_login_lockouts[key] = now + LOGIN_LOCKOUT_SECONDS
|
|
_login_failures[key] = []
|
|
tripped = True
|
|
return "now_locked_out" if tripped else "ok"
|
|
|
|
|
|
def clear_login_failures(username: str, ip: str) -> None:
|
|
"""Erase counters AND any lockout for a successful auth — caller
|
|
proved they have credentials, so the brute-force ladder resets."""
|
|
for key in (("user", username.lower()), ("ip", ip)):
|
|
_login_failures.pop(key, None)
|
|
_login_lockouts.pop(key, None)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Audit events for auth flows
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def audit_auth_event(event_type: str, username: str | None,
|
|
message: str) -> None:
|
|
"""Write a row to audit_events. event_type is one of:
|
|
user_login / user_login_failed / user_logout / user_password_changed /
|
|
user_login_locked_out."""
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
await db.execute(
|
|
"""INSERT INTO audit_events
|
|
(event_type, drive_id, burnin_job_id, operator, message)
|
|
VALUES (?,?,?,?,?)""",
|
|
(event_type, None, None, username or "?", message),
|
|
)
|
|
await db.commit()
|
|
|
|
|
|
async def bootstrap_admin_if_empty() -> None:
|
|
"""Create the env-supplied admin if the users table is empty."""
|
|
if await user_count() > 0:
|
|
return
|
|
if not (settings.initial_admin_username and settings.initial_admin_password):
|
|
return
|
|
try:
|
|
await create_user(
|
|
settings.initial_admin_username,
|
|
settings.initial_admin_password,
|
|
full_name=None,
|
|
is_admin=True,
|
|
)
|
|
log.warning(
|
|
"Bootstrapped initial admin user %r from env. "
|
|
"Change the password via the UI and remove the env vars from compose.",
|
|
settings.initial_admin_username,
|
|
)
|
|
except ValueError as exc:
|
|
log.error("Failed to bootstrap initial admin: %s", exc)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# FastAPI dependencies
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def get_current_user_optional(request: Request) -> User | None:
|
|
"""Return the logged-in user, or None. Doesn't raise — for templates."""
|
|
sess_user_id = request.session.get("user_id") if hasattr(request, "session") else None
|
|
if not sess_user_id:
|
|
return None
|
|
return await get_user_by_id(int(sess_user_id))
|
|
|
|
|
|
def require_admin(request: Request) -> User:
|
|
"""Strict admin gate — for any settings-mutating endpoint. The
|
|
AuthGate middleware has already populated request.state.current_user;
|
|
this just enforces is_admin on top."""
|
|
user = getattr(request.state, "current_user", None)
|
|
if not user:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
detail="Authentication required",
|
|
)
|
|
if not user.is_admin:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Admin only",
|
|
)
|
|
return user
|
|
|
|
|
|
async def get_current_user(request: Request) -> User:
|
|
"""Strict version — for routes. 401 (or redirect for HTML) if missing."""
|
|
user = await get_current_user_optional(request)
|
|
if user is None:
|
|
# HTML clients prefer a redirect; API clients need a clean 401.
|
|
accept = request.headers.get("accept", "")
|
|
if "text/html" in accept and request.method == "GET":
|
|
raise _RedirectToLogin(request.url.path)
|
|
raise HTTPException(
|
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
detail="Authentication required",
|
|
)
|
|
return user
|
|
|
|
|
|
class _RedirectToLogin(Exception):
|
|
"""Raised by get_current_user when an HTML page needs to bounce to /login."""
|
|
def __init__(self, next_path: str):
|
|
self.next_path = next_path
|
|
|
|
|
|
def login_redirect(next_path: str = "/") -> RedirectResponse:
|
|
safe_next = next_path if next_path.startswith("/") else "/"
|
|
target = f"/login?next={safe_next}" if safe_next != "/" else "/login"
|
|
return RedirectResponse(url=target, status_code=303)
|