After the chunk-read refactor, the inner _drain coroutine assigns to last_db_write_ts and last_pct_sample. Without nonlocal, Python compiles these as locals of _drain, so any READ before the first assignment raises UnboundLocalError. In 1.0.0-55 / -57 the bug was hidden by gather(return_exceptions= True), which silently swallowed the exception — the drain coroutine ended immediately, the asyncssh channel buffer filled up, and the remote badblocks blocked on pipe_write. THAT was the actual cause of the "parser silently never works" symptom, not anything to do with the chunk-read or tr-pipe logic itself. 1.0.0-57 dropped the gather (single drain after merging 2>&1), which made the next deploy surface the bug as an explicit error_text on the surface_validate stage: "cannot access local variable 'last_db_write_ts' where it is not associated with a value". Fix: add both vars to the nonlocal declaration. pending_log_chunks only gets .append/.clear (no reassignment) so it doesn't need nonlocal. This is the bug that's been hiding behind all the recent parser work. Sorry for the round trips.
119 lines
5.5 KiB
Python
119 lines
5.5 KiB
Python
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
case_sensitive=False,
|
|
)
|
|
|
|
app_host: str = "0.0.0.0" # nosec B104 — container deliberately binds all interfaces; nginx-proxy-manager fronts it.
|
|
app_port: int = 8080
|
|
db_path: str = "/data/app.db"
|
|
|
|
truenas_base_url: str = "http://localhost:8000"
|
|
truenas_api_key: str = "mock-key"
|
|
truenas_verify_tls: bool = False
|
|
|
|
poll_interval_seconds: int = 12
|
|
stale_threshold_seconds: int = 45
|
|
max_parallel_burnins: int = 2
|
|
surface_validate_seconds: int = 45 # mock simulation duration
|
|
io_validate_seconds: int = 25 # mock simulation duration
|
|
|
|
# Logging
|
|
log_level: str = "INFO"
|
|
|
|
# Security — comma-separated IPs or CIDRs, e.g. "10.0.0.0/24,127.0.0.1"
|
|
# Empty string means allow all (default).
|
|
allowed_ips: str = ""
|
|
|
|
# SMTP — daily status email at 8am local time
|
|
# Leave smtp_host empty to disable email.
|
|
smtp_host: str = ""
|
|
smtp_port: int = 587
|
|
smtp_user: str = ""
|
|
smtp_password: str = ""
|
|
smtp_from: str = ""
|
|
smtp_to: str = "" # comma-separated recipients
|
|
smtp_report_hour: int = 8 # local hour to send (0-23)
|
|
smtp_daily_report_enabled: bool = True # set False to skip daily report without disabling alerts
|
|
smtp_alert_on_fail: bool = True # immediate email when a job fails
|
|
smtp_alert_on_pass: bool = False # immediate email when a job passes
|
|
smtp_ssl_mode: str = "starttls" # "starttls" | "ssl" | "plain"
|
|
smtp_timeout: int = 60 # connection + read timeout in seconds
|
|
|
|
# Webhook — POST JSON payload on every job state change (pass/fail)
|
|
# Leave empty to disable. Works with Slack, Discord, ntfy, n8n, etc.
|
|
webhook_url: str = ""
|
|
|
|
# Stuck-job detection: jobs running longer than this are marked 'unknown'
|
|
# and the remote badblocks/smartctl is killed. 168h (7 days) covers a
|
|
# full -w surface_validate on a 14 TB+ HDD with margin. Older default
|
|
# was 24h which false-positived on multi-TB drives almost every time.
|
|
stuck_job_hours: int = 168
|
|
|
|
# Temperature thresholds (°C) — drives table colouring + precheck gate
|
|
temp_warn_c: int = 46 # orange warning
|
|
temp_crit_c: int = 55 # red critical (precheck refuses to start above this)
|
|
|
|
# Bad-block tolerance — surface_validate fails if bad blocks exceed this
|
|
bad_block_threshold: int = 0
|
|
|
|
# Surface-validate (badblocks) tunables — defaults match the Spearfoot
|
|
# disk-burnin.sh community script's recommended geometry for large HDDs.
|
|
# block_size : -b in bytes; aligned to AF (4 KiB) sectors. Bumping
|
|
# to 8192 roughly halves badblocks runtime on multi-TB
|
|
# drives at the cost of ~2x RAM in the test buffer.
|
|
# block_buffer : -c blocks held in memory per IO. 64 = badblocks
|
|
# default. Higher values = larger buffer, faster IO,
|
|
# more RAM (block_size * block_buffer bytes per pass).
|
|
# passes : -p value. 1 = repeat until one consecutive clean
|
|
# scan (current behavior). 2-3 for paranoid burn-in
|
|
# that re-confirms after finding errors.
|
|
surface_validate_block_size: int = 4096
|
|
surface_validate_block_buffer: int = 64
|
|
surface_validate_passes: int = 1
|
|
|
|
# SSH credentials for direct TrueNAS command execution (Stage 7)
|
|
# When ssh_host is set, burn-in stages use SSH for smartctl/badblocks instead of REST API.
|
|
# Leave ssh_host empty to use the mock/REST API (development mode).
|
|
ssh_host: str = ""
|
|
ssh_port: int = 22
|
|
ssh_user: str = "root" # TrueNAS CORE default is root
|
|
ssh_password: str = "" # Password auth (leave blank if using key)
|
|
ssh_key: str = "" # PEM private key content (paste full key including headers)
|
|
|
|
# Application version — used by the /api/v1/updates/check endpoint
|
|
app_version: str = "1.0.0-59"
|
|
|
|
# ---- Authentication (1.0.0-22) ----
|
|
# session_secret: HMAC key for signing session cookies. Empty = generate
|
|
# one and persist to /data/session_secret on first run (sessions survive
|
|
# restarts but rotate if the file is deleted). Set explicitly via
|
|
# SESSION_SECRET env var if you want to share secrets across replicas.
|
|
session_secret: str = ""
|
|
session_max_age_seconds: int = 60 * 60 * 24 * 7 # 7 days
|
|
# Set to True when the dashboard is exclusively reachable over HTTPS
|
|
# (typical when fronted by nginx-proxy-manager with TLS). Refuses to
|
|
# send the session cookie on plain HTTP, eliminating the on-the-wire
|
|
# exposure surface. Leaving False allows initial deploy + LAN testing.
|
|
session_cookie_secure: bool = False
|
|
# Initial admin bootstrap. If both env vars are set AND the users table
|
|
# is empty at startup, create that account immediately. After that the
|
|
# env vars are ignored — change passwords via the UI / database, not
|
|
# by editing compose.yml.
|
|
initial_admin_username: str = ""
|
|
initial_admin_password: str = ""
|
|
|
|
# ---- Retention + backup (1.0.0-23) ----
|
|
# log_days : burnin_stages.log_text NULLed out after this many days
|
|
# (history rows themselves are preserved). Default keeps
|
|
# ~5 weeks; long-soak burn-ins typically finish in <2.
|
|
# backup_keep: number of nightly DB snapshots to keep in /data/backups.
|
|
retention_log_days: int = 35
|
|
retention_backup_keep: int = 14
|
|
|
|
|
|
settings = Settings()
|