Source code for exlab_wizard.cache.log_writer

"""Append-only writer for ``wizard.<hostname>.log`` files. Backend Spec §11.5, §16.2.4.

Low-level companion to the logger handler chain in ``exlab_wizard/logging/``.
This module owns the canonical line shape and the on-disk append semantics
so that both the high-level :class:`StructuredTagFormatter` (§16.4) and any
direct callers (e.g. the equipment-scoped file handler in §16.2.4) write
identical bytes.

The line format follows §11.5 verbatim:

::

    <UTC ISO 8601 timestamp> [<LEVEL:5>] [host:..] [equip:..] [proj:..] [kind:..] [run:..] <message>

Two public surfaces:

- :func:`format_log_line` -- pure function. Given a timestamp, level,
  message, and optional context tags, returns a single line WITHOUT a
  trailing newline. Truncates messages whose UTF-8 length exceeds
  ``LOG_LINE_MAX_BYTES`` with a literal ``...[truncated]`` marker
  (Backend §4.5).
- :func:`append_log_line` -- side-effecting. Appends a single line to a
  ``wizard.<hostname>.log`` file, creating the parent ``.exlab-wizard``
  directory if missing. Atomic up to ``PIPE_BUF`` on POSIX via
  ``O_APPEND``; on Windows the ``mode="a"`` open flag passes
  ``FILE_APPEND_DATA`` to the OS, which makes a single short append
  serializable against any other ``mode="a"`` writer on the same file
  (Backend §4.5 same-equipment concurrency rule).

The writer here is intentionally synchronous and side-effecting; the
non-blocking emit pipeline (``QueueHandler`` + ``QueueListener``;
§16.2.5) wraps it so the asyncio event loop is not blocked on filesystem
writes.
"""

from __future__ import annotations

from datetime import datetime
from pathlib import Path

from exlab_wizard.constants import LOG_LINE_MAX_BYTES
from exlab_wizard.utils.time import dt_to_iso

__all__ = [
    "append_log_line",
    "format_log_line",
]


# ---------------------------------------------------------------------------
# Format
# ---------------------------------------------------------------------------


# Mapping from kwarg name to the tag prefix that appears in the rendered
# line. Order matches §11.5 / §16.4: host, equip, proj, kind, run.
_TAG_ORDER: tuple[tuple[str, str], ...] = (
    ("host", "host"),
    ("equipment_id", "equip"),
    ("project_short_id", "proj"),
    ("run_kind", "kind"),
    ("run_id", "run"),
)


# Marker appended when a message must be truncated to satisfy
# ``LOG_LINE_MAX_BYTES``. The literal text is parsed by downstream tooling,
# so changing it is a deliberate spec change.
_TRUNCATION_MARKER: str = "...[truncated]"


[docs] def format_log_line( *, timestamp_utc: datetime, level: str, message: str, host: str | None = None, equipment_id: str | None = None, project_short_id: str | None = None, run_kind: str | None = None, run_id: str | None = None, ) -> str: """Render a structured log line per Backend Spec §11.5 / §16.4. Tags are omitted entirely when their argument is ``None``. The ``level`` field is left-padded to 5 characters so columns line up across ``INFO`` / ``WARN`` / ``DEBUG`` / ``ERROR`` lines. The timestamp is rendered in UTC with a trailing ``Z`` (e.g. ``2026-04-17T14:32:00Z``) matching the example shapes in §11.5. Lines whose UTF-8 length exceeds :data:`LOG_LINE_MAX_BYTES` are truncated by trimming the message tail and appending the literal ``...[truncated]`` marker. The truncation budget is computed against the full line length (timestamp + level + tags + message), so the marker is guaranteed to fit. Messages that are already short enough are returned untouched. Returns the line WITHOUT a trailing newline; the writer adds the newline at append time. """ timestamp = dt_to_iso(timestamp_utc) level_padded = f"{level:<5}" tags = _render_tags( host=host, equipment_id=equipment_id, project_short_id=project_short_id, run_kind=run_kind, run_id=run_id, ) prefix = f"{timestamp} [{level_padded}] {tags} " if tags else f"{timestamp} [{level_padded}] " line = f"{prefix}{message}" return _truncate_if_needed(line, prefix=prefix)
def _render_tags( *, host: str | None, equipment_id: str | None, project_short_id: str | None, run_kind: str | None, run_id: str | None, ) -> str: """Render the supplied context as ``[host:..] [equip:..] ...``. Returns the empty string when every argument is ``None`` so the caller can emit a tagless line cleanly. """ values: dict[str, str | None] = { "host": host, "equipment_id": equipment_id, "project_short_id": project_short_id, "run_kind": run_kind, "run_id": run_id, } parts: list[str] = [] for key, prefix in _TAG_ORDER: value = values[key] if value is not None: parts.append(f"[{prefix}:{value}]") return " ".join(parts) def _truncate_if_needed(line: str, *, prefix: str) -> str: """Trim ``line`` so its UTF-8 byte length is at most ``LOG_LINE_MAX_BYTES``. Truncation strips bytes from the end of the message body and appends the literal ``...[truncated]`` marker. The marker is guaranteed to fit by reserving its byte length up front. If even the prefix + marker alone exceeds the cap (a pathological case for callers that pass enormous tag values), the function still returns a best-effort truncated line rather than raising. """ encoded = line.encode("utf-8") if len(encoded) <= LOG_LINE_MAX_BYTES: return line marker_bytes = _TRUNCATION_MARKER.encode("utf-8") # Reserve room for the marker; the message body is shrunk to fit. budget = LOG_LINE_MAX_BYTES - len(marker_bytes) if budget <= 0: # Pathological case: even the marker alone exceeds the cap. Return # just the marker so the line is at least self-describing. return _TRUNCATION_MARKER prefix_bytes = prefix.encode("utf-8") if len(prefix_bytes) >= budget: # Prefix alone exhausts the budget. Emit prefix-truncated form so # downstream tools still see the marker and know to expect a tail # being missing. truncated_prefix = prefix_bytes[:budget].decode("utf-8", errors="ignore") return f"{truncated_prefix}{_TRUNCATION_MARKER}" # Normal path: keep the full prefix, trim the message tail to fit. body_budget = budget - len(prefix_bytes) body_bytes = encoded[len(prefix_bytes) : len(prefix_bytes) + body_budget] body = body_bytes.decode("utf-8", errors="ignore") return f"{prefix}{body}{_TRUNCATION_MARKER}" # --------------------------------------------------------------------------- # Append # ---------------------------------------------------------------------------
[docs] def append_log_line(path: Path, line: str) -> None: """Append a single ``line`` to a ``wizard.<hostname>.log`` file. Creates the parent ``.exlab-wizard`` directory if missing (the cache directory is allowed to not yet exist on first equipment-folder initialization). The file is opened in text-append mode with line buffering so each call ends up as one ``write()`` syscall. Concurrency: the file is opened with ``mode="a"``. On POSIX this maps to ``O_APPEND``, which makes a single ``write()`` of bytes ≤ ``PIPE_BUF`` (4096 on Linux) atomic against concurrent appenders. Lines longer than this cap are truncated upstream by :func:`format_log_line` to ``LOG_LINE_MAX_BYTES`` (1024) which is well under ``PIPE_BUF``. On Windows ``mode="a"`` opens with ``FILE_APPEND_DATA``; the OS serializes the actual append at the syscall boundary. See Backend §4.5 same-equipment concurrency rule. The line is appended verbatim; this function adds a single trailing newline. Callers SHOULD pass a line already truncated to :data:`LOG_LINE_MAX_BYTES` via :func:`format_log_line`. """ path.parent.mkdir(parents=True, exist_ok=True) # Open and close per-call. The persistent-handle optimization belongs # in the high-level handler (§16.2.4); this function is the # one-shot writer used by tests, fallback paths, and the default # handler when no equipment context is set. with path.open("a", encoding="utf-8") as fh: fh.write(line) fh.write("\n")