Source code for exlab_wizard.readme.generator

"""README generator. Backend Spec §10 / §11.4.

Renders ``README.md`` (YAML front matter + Markdown prose body) and the
``readme_fields.json`` cache file from a fully-resolved
:class:`ReadmeContext`.

The generator is the single producer for both files. The four field
layers (core / template / config / custom) are merged here and validated
before any bytes hit the disk: an out-of-bound or missing required field
raises before a partial README can be written.

Validation gates (User Interaction Spec §2 + Backend Spec §10.3):

* ``label`` non-empty after trim, ``<= LABEL_MAX_LENGTH``.
* ``operator`` non-empty after trim.
* ``objective`` non-empty after trim, ``<= OBJECTIVE_MAX_LENGTH``.
* every ``required: true`` template field has a value.
* every ``required: true`` config field has a value.
* field ids are unique across the template + config layers.
* custom field labels do not collide with the four-layer set's ids.
* core field ids (``label``, ``operator``, ``objective``) are not
  redeclared by the template or config layer (raises
  :class:`~exlab_wizard.errors.TemplateCoreFieldRedeclaredError`).
* every typed field value matches its declared type (string / text /
  choice / date / boolean).

Output format follows §10.7: a YAML front matter block delimited by
``---`` lines at the top of the file followed by a Markdown prose body.
The front matter is emitted via ``yaml.safe_dump(..., sort_keys=False)``
so the document order matches the spec example exactly.

The companion ``readme_fields.json`` is written at
``<dst>/.exlab-wizard/readme_fields.json`` using the typed
:class:`~exlab_wizard.api.schemas.ReadmeFieldsJson` Struct via
``msgspec.json.encode`` (§11.4 contract: every cache file goes through
msgspec).
"""

from __future__ import annotations

import asyncio
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any

import yaml
from msgspec import json as msgspec_json

from exlab_wizard import paths
from exlab_wizard.api.schemas import ReadmeFieldsJson
from exlab_wizard.constants import (
    LABEL_MAX_LENGTH,
    OBJECTIVE_MAX_LENGTH,
    README_FIELDS_JSON_VERSION,
    README_FILE_NAME,
    README_FRONT_MATTER_SCHEMA_VERSION,
    CreationLevel,
    FieldType,
)
from exlab_wizard.errors import TemplateCoreFieldRedeclaredError
from exlab_wizard.io import atomic_write_bytes
from exlab_wizard.logging import get_logger
from exlab_wizard.paths import readme_fields_json_path
from exlab_wizard.utils.time import dt_to_iso, parse_utc_iso

__all__ = [
    "CORE_FIELD_IDS",
    "CoreFields",
    "CustomField",
    "ReadmeContext",
    "ReadmeGenerator",
    "SystemFields",
    "TemplateFieldDecl",
]

_log = get_logger(__name__)

# The three core README field IDs. Templates and config may NOT redeclare
# these (Backend Spec §10.3). Mirrors
# ``exlab_wizard.template.copier_driver.CORE_README_FIELD_IDS`` --
# duplicated here to avoid a controller -> readme dependency cycle when
# the controller composes this generator.
CORE_FIELD_IDS: frozenset[str] = frozenset({"label", "operator", "objective"})

# Allowed values for ``TemplateFieldDecl.type``. Backend Spec §10.3.
_FIELD_TYPES: frozenset[str] = frozenset(m.value for m in FieldType)


# ---------------------------------------------------------------------------
# Public dataclasses
# ---------------------------------------------------------------------------


[docs] @dataclass(frozen=True) class CoreFields: """Mandatory core fields (User Interaction Spec §2). All three are non-empty (after trim) when the controller hands the context to :class:`ReadmeGenerator`; the generator re-validates so a misuse never lets a malformed README onto disk. """ label: str operator: str objective: str
[docs] @dataclass(frozen=True) class TemplateFieldDecl: """Field declaration from a template's ``_exlab_readme.fields`` list or from ``config.yaml`` ``readme.defaults``. Backend Spec §10.3. The same shape covers both layers: the controller knows which list it came from and packs the matching :class:`ReadmeContext` slot. """ id: str label: str type: FieldType required: bool = False default: Any = "" options: list[str] | None = None hint: str | None = None
[docs] @dataclass(frozen=True) class CustomField: """An ad-hoc user-added field. Backend Spec §10.4. Custom fields are plain string key-value pairs (no type selection) and their order in the output mirrors the order the user added them. """ label: str value: str
[docs] @dataclass(frozen=True) class SystemFields: """Auto-populated, non-editable system fields. Backend Spec §10.6.""" created: datetime created_by: str equipment: dict[str, str] template: dict[str, str] project: str run: str | None run_kind: str
[docs] @dataclass(frozen=True) class ReadmeContext: """Inputs to :class:`ReadmeGenerator`. Composed by the controller. The controller pre-merges the four layers into the dicts below so the generator does not need to know about the merge order; the generator's job is to validate, render, and persist. """ level: CreationLevel core: CoreFields template_fields: dict[str, Any] config_fields: dict[str, Any] custom_fields: list[CustomField] system: SystemFields template_field_decls: list[TemplateFieldDecl] = field(default_factory=list) config_field_decls: list[TemplateFieldDecl] = field(default_factory=list)
# --------------------------------------------------------------------------- # Generator # ---------------------------------------------------------------------------
[docs] class ReadmeGenerator: """Renders ``README.md`` + ``readme_fields.json``. Backend Spec §10."""
[docs] async def generate(self, dst: Path, ctx: ReadmeContext) -> tuple[Path, Path]: """Validate ``ctx``, write both files, return ``(readme, cache)``. The destination directory ``dst`` must already exist (the controller creates it during the directory-render phase). The ``.exlab-wizard/`` cache directory is created on demand. Both files are written via ``asyncio.to_thread`` so the asyncio event loop is never blocked on disk syscalls. The two writes share a single timestamp (``ctx.system.created``) so the ``generated_at`` and ``created`` fields agree. """ return await asyncio.to_thread(self._generate_sync, dst, ctx)
# ------------------------------------------------------------------ # Sync core (runs on the worker thread) # ------------------------------------------------------------------ def _generate_sync(self, dst: Path, ctx: ReadmeContext) -> tuple[Path, Path]: _validate(ctx) readme_path = dst / README_FILE_NAME paths.cache_dir(dst).mkdir(parents=True, exist_ok=True) cache_path = readme_fields_json_path(dst) generated_at = dt_to_iso(ctx.system.created) front_matter = _build_front_matter(ctx, generated_at=generated_at) readme_bytes = _render_readme_bytes(ctx, front_matter) cache_payload = _build_readme_fields(ctx, generated_at=generated_at) cache_bytes = msgspec_json.encode(cache_payload) atomic_write_bytes(readme_path, readme_bytes) atomic_write_bytes(cache_path, cache_bytes) _log.info( "README written: %s (level=%s, run_kind=%s)", readme_path, ctx.level, ctx.system.run_kind, ) return readme_path, cache_path
# --------------------------------------------------------------------------- # Validation (User Interaction Spec §2 + Backend Spec §10.3) # --------------------------------------------------------------------------- def _validate(ctx: ReadmeContext) -> None: """Run every validation gate. Raises on the first failure. Order matches the spec's prose: core fields first, then redeclaration check, then required-field presence, then field-id uniqueness, then per-field type validation, then the custom-field label collision check. """ _validate_core_fields(ctx.core) _reject_core_redeclaration(ctx.template_field_decls, layer="template") _reject_core_redeclaration(ctx.config_field_decls, layer="config") _validate_required_fields(ctx.template_field_decls, ctx.template_fields, layer="template") _validate_required_fields(ctx.config_field_decls, ctx.config_fields, layer="config") _validate_field_id_uniqueness(ctx.template_field_decls, ctx.config_field_decls) _validate_typed_fields(ctx.template_field_decls, ctx.template_fields, layer="template") _validate_typed_fields(ctx.config_field_decls, ctx.config_fields, layer="config") _validate_custom_field_labels(ctx) def _validate_core_fields(core: CoreFields) -> None: label = core.label.strip() operator = core.operator.strip() objective = core.objective.strip() if not label: raise ValueError("core_fields.label must be non-empty after trim") if not operator: raise ValueError("core_fields.operator must be non-empty after trim") if not objective: raise ValueError("core_fields.objective must be non-empty after trim") if len(label) > LABEL_MAX_LENGTH: raise ValueError( f"core_fields.label exceeds {LABEL_MAX_LENGTH} characters (got {len(label)})" ) if len(objective) > OBJECTIVE_MAX_LENGTH: raise ValueError( f"core_fields.objective exceeds {OBJECTIVE_MAX_LENGTH} characters " f"(got {len(objective)})" ) def _reject_core_redeclaration( decls: list[TemplateFieldDecl], *, layer: str, ) -> None: for decl in decls: if decl.id in CORE_FIELD_IDS: raise TemplateCoreFieldRedeclaredError( f"{layer} layer redeclares core field {decl.id!r}; " "core fields are backend-managed and cannot be redeclared (Backend §10.3)" ) def _validate_required_fields( decls: list[TemplateFieldDecl], values: dict[str, Any], *, layer: str, ) -> None: for decl in decls: if not decl.required: continue if decl.id not in values: raise ValueError(f"{layer}_fields[{decl.id!r}] is required but missing") if not _is_present(values[decl.id]): raise ValueError(f"{layer}_fields[{decl.id!r}] is required but empty") def _validate_field_id_uniqueness( template_decls: list[TemplateFieldDecl], config_decls: list[TemplateFieldDecl], ) -> None: seen: dict[str, str] = {} for decl in template_decls: seen[decl.id] = "template" for decl in config_decls: if decl.id in seen: raise ValueError( f"field id {decl.id!r} declared in both {seen[decl.id]} and config layers" ) seen[decl.id] = "config" def _validate_typed_fields( decls: list[TemplateFieldDecl], values: dict[str, Any], *, layer: str, ) -> None: """Per-type validation. Spec §10.3 type semantics. Skips fields whose value is missing or empty unless required (the required-field check handles those upstream). """ by_id = {d.id: d for d in decls} for fid, value in values.items(): decl = by_id.get(fid) if decl is None: # Field has no declaration -- nothing to type-check against. continue if not _is_present(value) and not decl.required: continue _check_value_type(decl, value, layer=layer) def _check_value_type(decl: TemplateFieldDecl, value: Any, *, layer: str) -> None: if decl.type not in _FIELD_TYPES: raise ValueError( f"{layer}_fields[{decl.id!r}] has unknown type {decl.type!r}; " f"allowed: {sorted(_FIELD_TYPES)}" ) if decl.type in {FieldType.STRING, FieldType.TEXT}: if not isinstance(value, str): raise ValueError( f"{layer}_fields[{decl.id!r}] expects {decl.type}, got {type(value).__name__}" ) return if decl.type == FieldType.CHOICE: if not isinstance(value, str): raise ValueError( f"{layer}_fields[{decl.id!r}] expects choice (string), got {type(value).__name__}" ) if not decl.options: raise ValueError(f"{layer}_fields[{decl.id!r}] is type=choice but declares no options") if value not in decl.options: raise ValueError( f"{layer}_fields[{decl.id!r}] value {value!r} is not in options {decl.options!r}" ) return if decl.type == FieldType.DATE: if not isinstance(value, str): raise ValueError( f"{layer}_fields[{decl.id!r}] expects ISO 8601 date string, " f"got {type(value).__name__}" ) try: parse_utc_iso(value) except ValueError as exc: raise ValueError( f"{layer}_fields[{decl.id!r}] is not a valid ISO 8601 date: {value!r}" ) from exc return # ``isinstance(True, int)`` is True in Python, so reject ints/strings # explicitly. ``bool`` is the only accepted shape for type=boolean. if decl.type == FieldType.BOOLEAN and not isinstance(value, bool): raise ValueError(f"{layer}_fields[{decl.id!r}] expects bool, got {type(value).__name__}") def _validate_custom_field_labels(ctx: ReadmeContext) -> None: """Ensure custom labels do not shadow any layer's field id. The four-layer set (Backend §10.2) reserves the core ids plus every template- and config-declared id. A custom label that collides would silently overwrite the typed value when readers ingest the front matter as a flat dict. """ reserved: set[str] = set(CORE_FIELD_IDS) reserved.update(d.id for d in ctx.template_field_decls) reserved.update(d.id for d in ctx.config_field_decls) for cf in ctx.custom_fields: if cf.label in reserved: raise ValueError( f"custom field label {cf.label!r} collides with a " f"declared field id (reserved: {sorted(reserved)})" ) def _is_present(value: Any) -> bool: """Return True iff ``value`` carries a non-empty piece of content. Strings are trimmed; everything else (bool, list, dict, number) is truthy when non-empty/non-zero. ``None`` is always absent. Booleans are present even when ``False`` -- the user explicitly chose a value. """ if value is None: return False if isinstance(value, bool): return True if isinstance(value, str): return bool(value.strip()) return bool(value) # --------------------------------------------------------------------------- # Front matter + body rendering (Backend Spec §10.7) # --------------------------------------------------------------------------- def _core_fields_dict(core: CoreFields) -> dict[str, str]: return {"label": core.label, "operator": core.operator, "objective": core.objective} def _custom_fields_list(custom: list[CustomField]) -> list[dict[str, str]]: return [{"label": cf.label, "value": cf.value} for cf in custom] def _system_fields_dict(system: SystemFields) -> dict[str, Any]: return { "created": dt_to_iso(system.created), "created_by": system.created_by, "equipment": dict(system.equipment), "template": dict(system.template), "project": system.project, "run": system.run, "run_kind": system.run_kind, } def _build_front_matter(ctx: ReadmeContext, *, generated_at: str) -> dict[str, Any]: """Build the ordered front matter dict for ``yaml.safe_dump``. Order matches the spec example at §10.7: ``schema_version``, ``generated_at``, ``core_fields``, ``template_fields``, ``config_fields``, ``custom_fields``, ``system_fields``. Empty layers still appear so downstream consumers can rely on the keys being present. """ return { "schema_version": README_FRONT_MATTER_SCHEMA_VERSION, "generated_at": generated_at, "core_fields": _core_fields_dict(ctx.core), "template_fields": dict(ctx.template_fields), "config_fields": dict(ctx.config_fields), "custom_fields": _custom_fields_list(ctx.custom_fields), "system_fields": _system_fields_dict(ctx.system), } def _render_readme_bytes(ctx: ReadmeContext, front_matter: dict[str, Any]) -> bytes: """Render the full README markdown into UTF-8 bytes. ``yaml.safe_dump`` is configured with ``sort_keys=False`` so the declaration order in :func:`_build_front_matter` is the on-disk order (the spec pins both at §10.7). """ fm_text = yaml.safe_dump( front_matter, sort_keys=False, allow_unicode=True, default_flow_style=False, ) body = f"# {ctx.core.label}\n\n{ctx.core.objective}\n" document = f"---\n{fm_text}---\n\n{body}" return document.encode("utf-8") def _build_readme_fields( ctx: ReadmeContext, *, generated_at: str, ) -> ReadmeFieldsJson: """Build the typed :class:`ReadmeFieldsJson` payload for the cache file.""" return ReadmeFieldsJson( schema_version=README_FIELDS_JSON_VERSION, generated_at=generated_at, core_fields=_core_fields_dict(ctx.core), template_fields=dict(ctx.template_fields), config_fields=dict(ctx.config_fields), custom_fields=_custom_fields_list(ctx.custom_fields), system_fields=_system_fields_dict(ctx.system), ) # --------------------------------------------------------------------------- # I/O helpers # ---------------------------------------------------------------------------