nas-burnin/app/config.py

from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
    )

    app_host: str = "0.0.0.0"  # nosec B104 — container deliberately binds all interfaces; nginx-proxy-manager fronts it.
    app_port: int = 8080
    db_path: str = "/data/app.db"

    truenas_base_url: str = "http://localhost:8000"
    truenas_api_key: str = "mock-key"
    truenas_verify_tls: bool = False

    poll_interval_seconds: int = 12
    stale_threshold_seconds: int = 45
    max_parallel_burnins: int = 2
    surface_validate_seconds: int = 45   # mock simulation duration
    io_validate_seconds: int = 25        # mock simulation duration

    # Logging
    log_level: str = "INFO"

    # Security — comma-separated IPs or CIDRs, e.g. "10.0.0.0/24,127.0.0.1"
    # Empty string means allow all (default).
    allowed_ips: str = ""

    # SMTP — daily status email at 8am local time
    # Leave smtp_host empty to disable email.
    smtp_host: str = ""
    smtp_port: int = 587
    smtp_user: str = ""
    smtp_password: str = ""
    smtp_from: str = ""
    smtp_to: str = ""          # comma-separated recipients
    smtp_report_hour: int = 8  # local hour to send (0-23)
    smtp_daily_report_enabled: bool = True  # set False to skip daily report without disabling alerts
    smtp_alert_on_fail: bool = True   # immediate email when a job fails
    smtp_alert_on_pass: bool = False  # immediate email when a job passes
    smtp_ssl_mode: str = "starttls"   # "starttls" | "ssl" | "plain"
    smtp_timeout: int = 60            # connection + read timeout in seconds

    # Webhook — POST JSON payload on every job state change (pass/fail)
    # Leave empty to disable. Works with Slack, Discord, ntfy, n8n, etc.
    webhook_url: str = ""

    # Stuck-job detection: jobs running longer than this are marked 'unknown'
    # and the remote badblocks/smartctl is killed. 168h (7 days) covers a
    # full -w surface_validate on a 14 TB+ HDD with margin. Older default
    # was 24h which false-positived on multi-TB drives almost every time.
    stuck_job_hours: int = 168

    # Temperature thresholds (°C) — drives table colouring + precheck gate
    temp_warn_c: int = 46   # orange warning
    temp_crit_c: int = 55   # red critical (precheck refuses to start above this)

    # Bad-block tolerance — surface_validate fails if bad blocks exceed this
    bad_block_threshold: int = 0

    # Surface-validate (badblocks) tunables — defaults match the Spearfoot
    # disk-burnin.sh community script's recommended geometry for large HDDs.
    # block_size      : -b in bytes; aligned to AF (4 KiB) sectors. Bumping
    #                   to 8192 roughly halves badblocks runtime on multi-TB
    #                   drives at the cost of ~2x RAM in the test buffer.
    # block_buffer    : -c blocks held in memory per IO. 64 = badblocks
    #                   default. Higher values = larger buffer, faster IO,
    #                   more RAM (block_size * block_buffer bytes per pass).
    # passes          : -p value. 1 = repeat until one consecutive clean
    #                   scan (current behavior). 2-3 for paranoid burn-in
    #                   that re-confirms after finding errors.
    surface_validate_block_size:   int = 4096
    surface_validate_block_buffer: int = 64
    surface_validate_passes:       int = 1

    # SSH credentials for direct TrueNAS command execution (Stage 7)
    # When ssh_host is set, burn-in stages use SSH for smartctl/badblocks instead of REST API.
    # Leave ssh_host empty to use the mock/REST API (development mode).
    ssh_host: str = ""
    ssh_port: int = 22
    ssh_user: str = "root"        # TrueNAS CORE default is root
    ssh_password: str = ""        # Password auth (leave blank if using key)
    ssh_key: str = ""             # PEM private key content (paste full key including headers)

    # Application version — used by the /api/v1/updates/check endpoint
    app_version: str = "1.0.0-54"

    # ---- Authentication (1.0.0-22) ----
    # session_secret: HMAC key for signing session cookies. Empty = generate
    # one and persist to /data/session_secret on first run (sessions survive
    # restarts but rotate if the file is deleted). Set explicitly via
    # SESSION_SECRET env var if you want to share secrets across replicas.
    session_secret: str = ""
    session_max_age_seconds: int = 60 * 60 * 24 * 7  # 7 days
    # Set to True when the dashboard is exclusively reachable over HTTPS
    # (typical when fronted by nginx-proxy-manager with TLS). Refuses to
    # send the session cookie on plain HTTP, eliminating the on-the-wire
    # exposure surface. Leaving False allows initial deploy + LAN testing.
    session_cookie_secure: bool = False
    # Initial admin bootstrap. If both env vars are set AND the users table
    # is empty at startup, create that account immediately. After that the
    # env vars are ignored — change passwords via the UI / database, not
    # by editing compose.yml.
    initial_admin_username: str = ""
    initial_admin_password: str = ""

    # ---- Retention + backup (1.0.0-23) ----
    # log_days   : burnin_stages.log_text NULLed out after this many days
    #              (history rows themselves are preserved). Default keeps
    #              ~5 weeks; long-soak burn-ins typically finish in <2.
    # backup_keep: number of nightly DB snapshots to keep in /data/backups.
    retention_log_days:    int = 35
    retention_backup_keep: int = 14


settings = Settings()