"""Pool-drive unlock state. Drives that ZFS reports as belonging to an active zpool (including the boot pool), drives carrying ZFS labels from a previously-imported pool ("exported"), and drives with a non-ZFS mount somewhere ("mounted") are all locked from burn-in until the operator explicitly unlocks them via ``POST /api/v1/drives/{id}/unlock``. Grants live in memory only — a container restart wipes them, which is the right default for "this is very dangerous." TTL is bounded so an unlock you forgot about can't sit armed indefinitely. Each lock kind has its own confirm token to make the override deliberate; see grant_pool_unlock for the matching logic. Public surface: is_unlocked(drive_id, current_pool_name, current_pool_role) -> bool unlock_expiry(drive_id, current_pool_name, current_pool_role) -> float|None grant_pool_unlock(drive_id, confirm_token, operator, reason) -> float PoolMemberError — raised by start_job UNLOCK_TTL_SECONDS — for the unlock endpoint response BOOT_POOL_NAME / *_TOKEN consts — for the UI / audit """ from __future__ import annotations import logging import time as _time from dataclasses import dataclass import aiosqlite from app.config import settings log = logging.getLogger(__name__) UNLOCK_TTL_SECONDS = 600 # 10 minutes BOOT_POOL_NAME = "boot-pool" BOOT_POOL_CONFIRM_TOKEN = "DESTROY BOOT POOL" EXPORTED_POOL_ROLE = "exported" EXPORTED_CONFIRM_TOKEN = "DESTROY EXPORTED POOL" MOUNTED_ROLE = "mounted" MOUNTED_CONFIRM_TOKEN = "DESTROY MOUNTED FILESYSTEM" @dataclass class _UnlockGrant: """An operator-issued, time-bounded permission to burn-in a pool drive. The grant is BOUND to the (pool_name, pool_role) observed at unlock time. If a subsequent poll reclassifies the drive — e.g. it was "(exported)" when unlocked but is now in active pool "tank", or it used to be a cache vdev and now shows as data — the grant is invalidated. Otherwise the operator's "I confirm this exported drive is decommissioned" judgement would silently authorise destruction of a live pool. """ expiry: float pool_name: str pool_role: str | None _unlock_grants: dict[int, _UnlockGrant] = {} class PoolMemberError(Exception): """Raised by start_job when a drive is in a zpool and not unlocked.""" def __init__(self, drive_id: int, pool_name: str, pool_role: str | None): self.drive_id = drive_id self.pool_name = pool_name self.pool_role = pool_role is_boot = pool_name == BOOT_POOL_NAME super().__init__( f"Drive is part of {'BOOT POOL' if is_boot else 'pool'} " f"'{pool_name}'{' (' + pool_role + ')' if pool_role else ''}. " f"Unlock required before burn-in." ) def is_unlocked(drive_id: int, current_pool_name: str | None, current_pool_role: str | None) -> bool: """True iff a non-expired grant exists AND the drive's pool identity matches what was observed at unlock time.""" grant = _unlock_grants.get(drive_id) if grant is None: return False if _time.time() >= grant.expiry: _unlock_grants.pop(drive_id, None) return False if grant.pool_name != current_pool_name or grant.pool_role != current_pool_role: # Pool identity changed since unlock — drive may now belong to a # different (or live) pool. Invalidate the grant; operator must # re-unlock with eyes-open against the current state. _unlock_grants.pop(drive_id, None) log.warning( "Invalidating unlock grant for drive_id=%d: pool changed from " "(%s, %s) to (%s, %s)", drive_id, grant.pool_name, grant.pool_role, current_pool_name, current_pool_role, ) return False return True def unlock_expiry(drive_id: int, current_pool_name: str | None, current_pool_role: str | None) -> float | None: """Return the absolute expiry of an active grant, or None. Same identity-binding semantics as is_unlocked: a grant whose stored pool identity no longer matches the current row is treated as expired and reaped. This is what the dashboard reads to decide whether to show the unlocked-Burn-In affordance vs the locked-Unlock affordance. """ grant = _unlock_grants.get(drive_id) if grant is None: return None if _time.time() >= grant.expiry: _unlock_grants.pop(drive_id, None) return None if grant.pool_name != current_pool_name or grant.pool_role != current_pool_role: _unlock_grants.pop(drive_id, None) return None return grant.expiry def invalidate_grant(drive_id: int) -> None: """Drop a grant unconditionally — used by start_job when a fresh SSH-side pool check shows the drive's identity has shifted.""" _unlock_grants.pop(drive_id, None) async def grant_pool_unlock(drive_id: int, confirm_token: str, operator: str, reason: str) -> float: """Validate confirmation token + reason and grant a time-limited unlock. Raises ValueError on bad confirm_token, missing reason, or drive not actually in a pool. Returns the unix expiry timestamp on success. """ if not reason or len(reason.strip()) < 5: raise ValueError("A reason of at least 5 characters is required.") if not operator or not operator.strip(): raise ValueError("Operator name is required.") async with aiosqlite.connect(settings.db_path) as db: db.row_factory = aiosqlite.Row await db.execute("PRAGMA busy_timeout=10000") cur = await db.execute( "SELECT pool_name, pool_role, devname FROM drives WHERE id=?", (drive_id,), ) row = await cur.fetchone() if not row: raise ValueError("Drive not found.") pool_name = row["pool_name"] pool_role = row["pool_role"] if not pool_name: raise ValueError( "This drive is not part of any pool — no unlock needed." ) # Boot-pool / exported / mounted-fs all get dedicated, harder-to- # fat-finger tokens. Active data pools just need their pool name # typed. if pool_name == BOOT_POOL_NAME: expected = BOOT_POOL_CONFIRM_TOKEN elif pool_role == EXPORTED_POOL_ROLE: expected = EXPORTED_CONFIRM_TOKEN elif pool_role == MOUNTED_ROLE: expected = MOUNTED_CONFIRM_TOKEN else: expected = pool_name if (confirm_token or "").strip() != expected: raise ValueError("Confirmation token does not match.") if pool_name == BOOT_POOL_NAME: evt = "boot_pool_drive_unlocked" elif pool_role == EXPORTED_POOL_ROLE: evt = "exported_pool_drive_unlocked" elif pool_role == MOUNTED_ROLE: evt = "mounted_drive_unlocked" else: evt = "pool_drive_unlocked" await db.execute( """INSERT INTO audit_events (event_type, drive_id, burnin_job_id, operator, message) VALUES (?,?,?,?,?)""", (evt, drive_id, None, operator.strip(), f"Unlocked {pool_name} drive {row['devname']} for burn-in: {reason.strip()}"), ) await db.commit() # Arm the in-memory grant ONLY after the audit row is durable. If the # commit above raises, we exit without writing _unlock_grants — no # unaudited active unlocks. The grant is bound to the (pool_name, # pool_role) we observed under the open transaction so a later poll # that reclassifies the drive invalidates it (see is_unlocked). expiry = _time.time() + UNLOCK_TTL_SECONDS _unlock_grants[drive_id] = _UnlockGrant( expiry=expiry, pool_name=pool_name, pool_role=pool_role, ) log.warning( "Pool-drive unlock granted: drive_id=%d pool=%s role=%s " "operator=%s reason=%r", drive_id, pool_name, pool_role, operator, reason, ) return expiry