fix: backport stages.py \b-parser fix + drawer-finish inline (uncommitted from 1.0.0-55)
Some checks are pending
Security scan / pip-audit (push) Waiting to run
Security scan / bandit (push) Waiting to run
Security scan / gitleaks (push) Waiting to run
Security scan / mypy (push) Waiting to run

The chunk-read parser fix that ships as part of 1.0.0-55 in the
running container was scp'd to maple but never reached git. Same
for the drawer-job-finish margin-left removal (request: pill
sits inline next to operator/date, not flush right).

Reconciling source with deployed state. No new behaviour — git
now matches what's been live on maple since 1.0.0-55.
This commit is contained in:
Brandon Walter 2026-05-12 07:53:33 -07:00
parent 71eac9cba0
commit f71ae341f5
2 changed files with 133 additions and 116 deletions

View file

@ -543,11 +543,37 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
async def _drain(stream, is_stderr: bool): async def _drain(stream, is_stderr: bool):
nonlocal bad_blocks_total, pid_seen nonlocal bad_blocks_total, pid_seen
async for raw in stream: # Chunk-read instead of line-iterate. badblocks emits
line = raw if isinstance(raw, str) else raw.decode("utf-8", errors="replace") # progress with '\b' backspaces (and sometimes '\r')
# to overwrite the previous progress line in-place —
# there's no '\n' between updates until a phase
# transition. async-for-line would buffer the entire
# phase's output as ONE line, so the parser never
# sees mid-phase percent updates. We read raw chunks,
# normalize \b runs + \r to \n, then process each
# resulting fragment as a line. Keep the partial
# trailing fragment in buf for the next chunk.
buf = ""
while True:
chunk = await stream.read(4096)
if not chunk:
break
if isinstance(chunk, bytes):
chunk = chunk.decode("utf-8", errors="replace")
buf += chunk
# Normalize \b runs + lone \r to \n so split() works.
normalized = _re_pre.sub(r"[\b]+", "\n", buf)
normalized = normalized.replace("\r", "\n")
fragments = normalized.split("\n")
# The tail is incomplete — keep it for the next chunk.
buf = fragments[-1]
for fragment in fragments[:-1]:
line = fragment + "\n"
if not line.strip():
continue
# First stdout line is "PID:<n>" from the wrapping shell. # First stdout line is "PID:<n>" from the
# Capture it and don't append it to the user-visible log. # wrapping shell. Capture and skip.
if not is_stderr and not pid_seen and line.startswith("PID:"): if not is_stderr and not pid_seen and line.startswith("PID:"):
pid_seen = True pid_seen = True
try: try:
@ -560,11 +586,10 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
pass pass
continue continue
# Drive progress.update from EVERY stderr line so it # Drive progress.update from EVERY stderr line
# picks up the "Testing with pattern 0xXX" + "Reading # so it picks up "Testing with pattern 0xXX" +
# and comparing" headers (which advance the phase # "Reading and comparing" headers, not just
# counter), not just the percent-done lines. CPU- # the percent-done lines. CPU-bound regex.
# bound regex work, no I/O, safe to do unconditionally.
prev_phase = progress.phase prev_phase = progress.phase
phase_changed = False phase_changed = False
is_progress_line = False is_progress_line = False
@ -578,22 +603,18 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
bad_blocks_total += 1 bad_blocks_total += 1
# Keep the "XX% done" lines OUT of output_lines. # Keep the "XX% done" lines OUT of output_lines.
# They're the dominant volume (sub-second cadence # They're the dominant volume; log_text concat
# for hours) and the log_text concat is quadratic # is quadratic.
# in length. Headers, errors, bad-block numbers,
# and other diagnostic output still get logged.
if not is_progress_line: if not is_progress_line:
output_lines.append(line) output_lines.append(line)
# Single throttle gate covering EVERY DB touch in # Single throttle gate covering EVERY DB touch.
# this loop. Without this, _is_cancelled + percent # Without this, the cumulative DB load makes
# writes + chunk-log appends collectively overwhelm # the asyncssh drain fall behind, the SSH
# the asyncio loop, the asyncssh drain falls behind, # window stops advancing, sshd stops reading
# the SSH window stops advancing, sshd stops reading # the pipe, badblocks blocks on pipe_write
# the pipe, badblocks blocks on pipe_write and no # and no disk I/O happens (sectors_written
# disk I/O happens. Symptom: badblocks pid in # delta of 0 confirmed the symptom).
# wchan=pipe_write with /sys/block sectors_written
# delta of 0.
now_ts = time.monotonic() now_ts = time.monotonic()
time_since_last_db = now_ts - last_db_write_ts time_since_last_db = now_ts - last_db_write_ts
should_write = phase_changed or time_since_last_db >= BB_DB_MIN_SECONDS should_write = phase_changed or time_since_last_db >= BB_DB_MIN_SECONDS
@ -621,8 +642,8 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
job_id, "surface_validate", bad_blocks_total, job_id, "surface_validate", bad_blocks_total,
) )
# 3) Throughput. Skip phase transitions (per- # 3) Throughput. Skip phase transitions
# phase pct resets would yield a negative delta). # (per-phase pct resets → negative delta).
if ( if (
drive_size_bytes drive_size_bytes
and not phase_changed and not phase_changed
@ -636,10 +657,7 @@ async def _stage_surface_validate_ssh(job_id: int, devname: str, drive_id: int)
job_id, "surface_validate", mbps, job_id, "surface_validate", mbps,
) )
# 4) Log flush. Chunk what's accumulated since # 4) Log flush.
# the last write. log_text concat is quadratic
# so the volume reduction from skipping "% done"
# lines above matters MORE than the throttle.
if pending_log_chunks: if pending_log_chunks:
chunk = "".join(pending_log_chunks) chunk = "".join(pending_log_chunks)
pending_log_chunks.clear() pending_log_chunks.clear()

View file

@ -2951,7 +2951,6 @@ th.sort-desc::after {
flex-wrap: wrap; flex-wrap: wrap;
} }
.drawer-job-finish { .drawer-job-finish {
margin-left: auto;
display: inline-flex; display: inline-flex;
align-items: baseline; align-items: baseline;
gap: 8px; gap: 8px;