feat(catalog): stdlib-only Python validator + regenerator for templates/

Adds the catalog pipeline without introducing any external dependencies. tools/build-catalog.py walks templates/<author>/<name>/, validates every shipped .scarftemplate against its manifest (same invariants Swift's ProjectTemplateService.verifyClaims enforces at install time), and emits templates/catalog.json for the frontend to read. Validator invariants: - Required bundle files: template.json, README.md, AGENTS.md, dashboard.json - contents claim cross-checked against actual zip entries (instructions, skills, cron count, memory appendix) - dashboard.json widget types restricted to the vocabulary the Swift renderer knows - Manifest id author component must match the template directory - 5 MB bundle-size cap on submissions (installer's own cap is 50 MB) - High-confidence secret patterns (private keys, GitHub PATs, Slack tokens, AWS access keys, OpenAI/Anthropic keys) block the bundle - staging/ source tree must match the built bundle byte-for-byte — catches the common failure mode of editing staging/ but forgetting to rebuild scripts/catalog.sh wraps the Python script with check/build/preview/serve/ publish subcommands, mirroring the scripts/wiki.sh shape. publish adds a second-pass hard-pattern secret scan on the rendered gh-pages output so template prose can't leak credentials even if the Python scan missed them. tools/test_build_catalog.py has 14 unit tests covering the main validator paths (minimal-valid, missing-AGENTS, content-claim mismatch, author mismatch, oversized bundle, unknown widget type, secret detection, staging-drift detection, missing bundle, catalog.json shape, and a real- bundle end-to-end check against templates/awizemann/site-status-checker). Python 3.9 compatible (Xcode's bundled python3), so no runtime needs installing. templates/catalog.json committed as the first generated aggregate index; maintainers regenerate on merge by running `./scripts/catalog.sh build`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 10:36:35 +00:00 · 2026-04-23 00:02:09 +02:00
parent d8a0a89db2
commit 11732baa3c
4 changed files with 1200 additions and 0 deletions
@@ -0,0 +1,135 @@
 #!/usr/bin/env bash
 #
 # Scarf templates catalog helper — runs the Python validator, renders the
 # static site into .gh-pages-worktree/templates/, and (on `publish`)
 # commits + pushes that subdir on the gh-pages branch.
 #
 # Usage:
 #   ./scripts/catalog.sh check                # validate every template; no output
 #   ./scripts/catalog.sh build                # validate + write templates/catalog.json + .gh-pages-worktree/templates/
 #   ./scripts/catalog.sh preview [DIR]        # render self-contained preview; DIR defaults to /tmp/scarf-catalog-preview
 #   ./scripts/catalog.sh publish              # secret-scan + commit + push gh-pages (templates subdir only)
 #   ./scripts/catalog.sh serve  [PORT]        # serve .gh-pages-worktree/ on localhost:PORT (default 8000)
 #   ./scripts/catalog.sh --help               # this help
 #
 # The secret-scan runs BEFORE publish and inspects the generated
 # .gh-pages-worktree/templates/ tree — same hard-pattern regex as
 # scripts/wiki.sh so template README/AGENTS content that accidentally
 # leaks credentials gets blocked before it reaches the public site.
 #
 # Bootstrap (one-time): requires a .gh-pages-worktree/ clone of the
 # gh-pages branch. The release script (scripts/release.sh) creates it on
 # first use. If it's missing:
 #     git worktree add .gh-pages-worktree gh-pages
 #
 # Recovery: if .gh-pages-worktree/ is deleted, re-run the command above.
 set -euo pipefail
 # ---------- config ----------
 REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 GHPAGES_DIR="$REPO_ROOT/.gh-pages-worktree"
 CATALOG_SUBDIR="templates"
 PY="${PYTHON:-python3}"
 BUILDER="$REPO_ROOT/tools/build-catalog.py"
 # ---------- helpers (same shape as scripts/wiki.sh so a reader doesn't
 # have to learn two conventions) ----------
 log()  { printf '\033[1;34m==> %s\033[0m\n' "$*"; }
 warn() { printf '\033[1;33m[WARN] %s\033[0m\n' "$*" >&2; }
 die()  { printf '\033[1;31m[ERR] %s\033[0m\n' "$*" >&2; exit 1; }
 need_builder() {
  [[ -f "$BUILDER" ]] || die "missing $BUILDER"
  command -v "$PY" >/dev/null 2>&1 || die "python3 not found (set \$PYTHON if needed)"
 }
 need_ghpages() {
  [[ -d "$GHPAGES_DIR/.git" ]] || die "no gh-pages worktree at $GHPAGES_DIR
  Run: git worktree add .gh-pages-worktree gh-pages"
 }
 # ---------- secret-scan (mirrors scripts/wiki.sh hard-pattern set) ----------
 hard_regex='(sk-[A-Za-z0-9_-]{20,}|ghp_[A-Za-z0-9]{30,}|ghs_[A-Za-z0-9]{30,}|ghu_[A-Za-z0-9]{30,}|gho_[A-Za-z0-9]{30,}|ghr_[A-Za-z0-9]{30,}|github_pat_[A-Za-z0-9_]{20,}|xox[baprs]-[A-Za-z0-9-]{10,}|AKIA[0-9A-Z]{16}|AIza[0-9A-Za-z_-]{35}|-----BEGIN [A-Z ]*PRIVATE KEY-----|BEGIN OPENSSH PRIVATE KEY)'
 scan_hard_ghpages() {
  # Scan the generated output, NOT the repo source — the validator
  # already scans bundle contents. This pass catches anything that leaked
  # through template.json fields or README prose.
  local hits
  hits="$(grep -rInE --exclude-dir=.git "$hard_regex" "$GHPAGES_DIR/$CATALOG_SUBDIR" 2>/dev/null || true)"
  if [[ -n "$hits" ]]; then
    printf '%s\n' "$hits" >&2
    die "hard-pattern secret match in rendered site — refusing to publish."
  fi
 }
 # ---------- commands ----------
 cmd_check() {
  need_builder
  "$PY" "$BUILDER" --check --repo "$REPO_ROOT"
 }
 cmd_build() {
  need_builder
  "$PY" "$BUILDER" --build --repo "$REPO_ROOT"
 }
 cmd_preview() {
  need_builder
  local dir="${1:-/tmp/scarf-catalog-preview}"
  rm -rf "$dir"
  mkdir -p "$dir"
  "$PY" "$BUILDER" --preview "$dir" --repo "$REPO_ROOT"
  log "Preview rendered to $dir"
  log "Serve with:  (cd $dir && python3 -m http.server 8000)  then open http://localhost:8000/"
 }
 cmd_serve() {
  need_ghpages
  local port="${1:-8000}"
  log "Serving $GHPAGES_DIR on http://localhost:$port/"
  (cd "$GHPAGES_DIR" && "$PY" -m http.server "$port")
 }
 cmd_publish() {
  need_builder
  need_ghpages
  log "Validating"
  "$PY" "$BUILDER" --check --repo "$REPO_ROOT"
  log "Building"
  "$PY" "$BUILDER" --build --repo "$REPO_ROOT"
  log "Secret-scanning rendered site"
  scan_hard_ghpages
  log "Staging + committing gh-pages"
  (cd "$GHPAGES_DIR" && git add "$CATALOG_SUBDIR")
  if (cd "$GHPAGES_DIR" && git diff --cached --quiet); then
    log "No changes to publish."
    return 0
  fi
  local msg
  msg="catalog: rebuild at $(date -u +%Y-%m-%dT%H:%M:%SZ)"
  (cd "$GHPAGES_DIR" && git commit -m "$msg")
  log "Pushing gh-pages"
  (cd "$GHPAGES_DIR" && git push origin gh-pages)
  log "Published."
 }
 cmd_help() {
  sed -n '1,30p' "$0" | sed -n '/^# Usage/,/^#$/p'
 }
 # ---------- dispatch ----------
 sub="${1:-help}"
 shift || true
 case "$sub" in
  check)    cmd_check   "$@" ;;
  build)    cmd_build   "$@" ;;
  preview)  cmd_preview "$@" ;;
  serve)    cmd_serve   "$@" ;;
  publish)  cmd_publish "$@" ;;
  help|--help|-h) cmd_help ;;
  *) die "unknown command: $sub  (try --help)" ;;
 esac
@@ -0,0 +1,34 @@
 {
  "generated": true,
  "schemaVersion": 1,
  "templates": [
    {
      "author": {
        "name": "Alan Wizemann",
        "url": "https://github.com/awizemann/scarf"
      },
      "bundleSha256": "32b8c12706de8596be63dcdda32d46fc5bf478d5b9f7c1fc4c6d96ced251186a",
      "bundleSize": 5410,
      "category": "monitoring",
      "contents": {
        "agentsMd": true,
        "cron": 1,
        "dashboard": true
      },
      "description": "A daily uptime check for a short list of URLs. Writes status to status-log.md and updates the dashboard with current counts.",
      "detailSlug": "awizemann-site-status-checker",
      "id": "awizemann/site-status-checker",
      "installUrl": "https://raw.githubusercontent.com/awizemann/scarf/main/templates/awizemann/site-status-checker/site-status-checker.scarftemplate",
      "minHermesVersion": "0.9.0",
      "minScarfVersion": "2.2.0",
      "name": "Site Status Checker",
      "tags": [
        "monitoring",
        "uptime",
        "cron",
        "starter"
      ],
      "version": "1.0.0"
    }
  ]
 }
@@ -0,0 +1,646 @@
 #!/usr/bin/env python3
 """Scarf template catalog builder + validator.
 Walks every `templates/<author>/<name>/` in this repo, validates the
 `.scarftemplate` bundle against its manifest claim (same invariants the
 Swift `ProjectTemplateService.verifyClaims` enforces at install time), and
 produces:
  templates/catalog.json                aggregate index for the site
  .gh-pages-worktree/templates/...      per-template HTML + dashboard.json
                                        (only produced by --build / --publish)
 This is stdlib-only Python so it runs in a GitHub Action with zero
 dependencies and in under a second even when the catalog has thousands of
 templates. Schema drift between this validator and the Swift installer
 breaks one of two contracts — add a failing test in both places when you
 change anything here.
 Usage:
  tools/build-catalog.py --check           validate; no output written
  tools/build-catalog.py --build           validate + write catalog.json + site
  tools/build-catalog.py --preview DIR     render a self-contained preview
                                           site into DIR (for local viewing)
 Exit codes:
  0  success
  1  validation failure (one or more templates rejected)
  2  IO / usage error
 """
 from __future__ import annotations
 import argparse
 import hashlib
 import json
 import os
 import re
 import shutil
 import sys
 import zipfile
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Iterable
 # ---------------------------------------------------------------------------
 # Schema + invariants
 # ---------------------------------------------------------------------------
 SCHEMA_VERSION = 1
 MAX_BUNDLE_BYTES = 5 * 1024 * 1024  # 5 MB cap on submissions; installer is 50 MB
 REQUIRED_BUNDLE_FILES = ("template.json", "README.md", "AGENTS.md", "dashboard.json")
 SUPPORTED_WIDGET_TYPES = {"stat", "progress", "text", "table", "chart", "list", "webview"}
 # Common secret patterns — keep in sync with `scripts/wiki.sh` and reuse a
 # conservative subset. The validator rejects hard matches; the site's
 # CONTRIBUTING guide covers the rest.
 SECRET_PATTERNS = [
    (re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----"), "private key block"),
    (re.compile(r"(?i)\bgh[pousr]_[A-Za-z0-9]{36,}"), "github personal access token"),
    (re.compile(r"(?i)\bxox[abpso]-[A-Za-z0-9-]{10,}"), "slack token"),
    (re.compile(r"(?i)\bAKIA[0-9A-Z]{16}"), "aws access key id"),
    (re.compile(r"(?i)\bsk-[A-Za-z0-9]{32,}"), "openai/anthropic api key"),
 ]
 REPO_ROOT = Path(__file__).resolve().parent.parent
 # ---------------------------------------------------------------------------
 # Data classes
 # ---------------------------------------------------------------------------
@dataclass
 class ValidationError:
    template_path: Path
    message: str
    def __str__(self) -> str:
        # Render a repo-relative path when possible for concise CLI output;
        # fall back to the absolute path when the template lives outside
        # the repo tree (unit tests use temp dirs).
        try:
            rel: Path | str = self.template_path.relative_to(REPO_ROOT)
        except ValueError:
            rel = self.template_path
        return f"{rel}: {self.message}"
@dataclass
 class TemplateRecord:
    """One entry in the generated catalog.json. Mirrors the Swift
    ProjectTemplateManifest but with a few derived fields added."""
    path: Path
    manifest: dict
    bundle_path: Path
    bundle_sha256: str
    bundle_size: int
    install_url: str
    detail_slug: str
    def to_catalog_entry(self) -> dict:
        """Subset suitable for catalog.json. Keep fields stable — the
        site's widgets.js reads this shape."""
        m = self.manifest
        return {
            "id": m["id"],
            "name": m["name"],
            "version": m["version"],
            "description": m["description"],
            "author": m.get("author"),
            "category": m.get("category"),
            "tags": m.get("tags") or [],
            "contents": m["contents"],
            "installUrl": self.install_url,
            "detailSlug": self.detail_slug,
            "bundleSha256": self.bundle_sha256,
            "bundleSize": self.bundle_size,
            "minScarfVersion": m.get("minScarfVersion"),
            "minHermesVersion": m.get("minHermesVersion"),
        }
 # ---------------------------------------------------------------------------
 # Validation
 # ---------------------------------------------------------------------------
 def manifest_slug(manifest_id: str) -> str:
    """Mirror of Swift `ProjectTemplateManifest.slug`. Non-alphanumeric
    runs collapse to single hyphens; empty collapses to 'template'."""
    cleaned = re.sub(r"[^A-Za-z0-9_-]+", "-", manifest_id).strip("-")
    return cleaned or "template"
 def _iter_templates(repo_root: Path) -> Iterable[Path]:
    """Yield every `templates/<author>/<name>/` directory (those that hold
    a `template.json` or a built `.scarftemplate`). Authors whose dirs
    only hold a README are silently skipped."""
    root = repo_root / "templates"
    if not root.is_dir():
        return
    for author_dir in sorted(root.iterdir()):
        if not author_dir.is_dir() or author_dir.name.startswith("."):
            continue
        for template_dir in sorted(author_dir.iterdir()):
            if not template_dir.is_dir():
                continue
            if (template_dir / "staging").is_dir():
                yield template_dir
 def _validate_manifest(manifest: dict, template_dir: Path, errors: list[ValidationError]) -> None:
    required = ["schemaVersion", "id", "name", "version", "description", "contents"]
    for field in required:
        if field not in manifest:
            errors.append(ValidationError(template_dir, f"manifest missing required field: {field}"))
    if manifest.get("schemaVersion") != SCHEMA_VERSION:
        errors.append(ValidationError(template_dir, f"unsupported schemaVersion: {manifest.get('schemaVersion')}"))
    # Manifest id must match the directory layout.
    mid = manifest.get("id", "")
    if "/" not in mid:
        errors.append(ValidationError(template_dir, f"manifest id must be owner/name, got {mid!r}"))
    else:
        expected_author = template_dir.parent.name
        author_part, _, _ = mid.partition("/")
        if author_part != expected_author:
            errors.append(ValidationError(
                template_dir,
                f"manifest id {mid!r} author component does not match directory "
                f"({expected_author!r})"
            ))
 def _validate_contents_claim(
    manifest: dict,
    bundle_files: set[str],
    cron_job_count: int,
    template_dir: Path,
    errors: list[ValidationError],
 ) -> None:
    """Mirrors Swift `ProjectTemplateService.verifyClaims`. Rejects any
    mismatch between what the manifest says and what's actually in the
    bundle so the catalog site can't misrepresent a template."""
    contents = manifest.get("contents", {})
    for required in REQUIRED_BUNDLE_FILES:
        if required not in bundle_files:
            errors.append(ValidationError(template_dir, f"bundle missing required file: {required}"))
    # Optional instructions/ dir — claim must match presence exactly.
    claimed_instructions = contents.get("instructions") or []
    claimed_full = {f"instructions/{p}" for p in claimed_instructions}
    present_instructions = {f for f in bundle_files if f.startswith("instructions/")}
    for claim in claimed_full:
        if claim not in bundle_files:
            errors.append(ValidationError(template_dir, f"contents.instructions claims {claim} but file is missing"))
    for present in present_instructions - claimed_full:
        errors.append(ValidationError(
            template_dir,
            f"bundle has {present} but it's not listed in contents.instructions"
        ))
    # Skills — each claimed skill name must exist as a subdir with at least
    # one file; extra skill dirs not listed are rejected.
    claimed_skills = set(contents.get("skills") or [])
    present_skills = set()
    for f in bundle_files:
        if f.startswith("skills/"):
            rest = f[len("skills/"):]
            if "/" in rest:
                present_skills.add(rest.split("/", 1)[0])
    for skill in claimed_skills:
        if not any(f.startswith(f"skills/{skill}/") for f in bundle_files):
            errors.append(ValidationError(template_dir, f"contents.skills claims {skill!r} but skills/{skill}/ is empty"))
    for extra in present_skills - claimed_skills:
        errors.append(ValidationError(template_dir, f"bundle has skills/{extra}/ not listed in contents.skills"))
    # Cron — numeric count must match bundle.
    claimed_cron = int(contents.get("cron") or 0)
    if claimed_cron != cron_job_count:
        errors.append(ValidationError(
            template_dir,
            f"contents.cron={claimed_cron} but bundle contains {cron_job_count} cron jobs"
        ))
    # Memory appendix — claim must match file presence.
    claimed_memory = bool((contents.get("memory") or {}).get("append"))
    has_memory_file = "memory/append.md" in bundle_files
    if claimed_memory != has_memory_file:
        errors.append(ValidationError(
            template_dir,
            f"contents.memory.append={claimed_memory} disagrees with memory/append.md presence={has_memory_file}"
        ))
 def _validate_dashboard(zf: zipfile.ZipFile, template_dir: Path, errors: list[ValidationError]) -> None:
    """Decode dashboard.json against the widget-type vocabulary the Swift
    renderer knows. An unknown widget type means the app will render an
    'unknown widget' placeholder — that's a bad catalog experience."""
    try:
        dashboard = json.loads(zf.read("dashboard.json"))
    except Exception as e:
        errors.append(ValidationError(template_dir, f"dashboard.json failed to parse: {e}"))
        return
    if dashboard.get("version") != 1:
        errors.append(ValidationError(template_dir, f"dashboard.version must be 1, got {dashboard.get('version')}"))
    sections = dashboard.get("sections") or []
    if not isinstance(sections, list):
        errors.append(ValidationError(template_dir, "dashboard.sections must be a list"))
        return
    for section in sections:
        for widget in section.get("widgets") or []:
            widget_type = widget.get("type")
            if widget_type not in SUPPORTED_WIDGET_TYPES:
                errors.append(ValidationError(
                    template_dir,
                    f"dashboard widget {widget.get('title')!r} has unknown type {widget_type!r}"
                ))
 def _scan_for_secrets(zf: zipfile.ZipFile, template_dir: Path, errors: list[ValidationError]) -> None:
    """Refuse bundles containing obvious secret patterns. Conservative —
    matches only high-confidence substrings (no keyword-only warnings)."""
    for info in zf.infolist():
        if info.is_dir() or info.file_size > 256 * 1024:
            continue  # skip big binaries
        try:
            data = zf.read(info.filename).decode("utf-8", errors="replace")
        except Exception:
            continue
        for pattern, label in SECRET_PATTERNS:
            if pattern.search(data):
                errors.append(ValidationError(
                    template_dir,
                    f"bundle file {info.filename} matches {label} pattern — refusing"
                ))
                break
 def _parse_cron_jobs(zf: zipfile.ZipFile, template_dir: Path, errors: list[ValidationError]) -> int:
    """Parse cron/jobs.json if present; return the job count. Logs a
    validation error on a malformed file."""
    if "cron/jobs.json" not in set(zf.namelist()):
        return 0
    try:
        data = json.loads(zf.read("cron/jobs.json"))
    except Exception as e:
        errors.append(ValidationError(template_dir, f"cron/jobs.json failed to parse: {e}"))
        return 0
    if not isinstance(data, list):
        errors.append(ValidationError(template_dir, "cron/jobs.json must be a JSON array"))
        return 0
    for i, job in enumerate(data):
        if not isinstance(job, dict):
            errors.append(ValidationError(template_dir, f"cron/jobs.json[{i}] must be an object"))
            continue
        if "name" not in job or "schedule" not in job:
            errors.append(ValidationError(
                template_dir,
                f"cron/jobs.json[{i}] missing required field (name, schedule)"
            ))
    return len(data)
 def _bundle_files(zf: zipfile.ZipFile) -> set[str]:
    """Unique regular-file paths in the bundle, excluding dir entries and
    macOS __MACOSX/ metadata."""
    return {
        info.filename
        for info in zf.infolist()
        if not info.is_dir() and not info.filename.startswith("__MACOSX/")
    }
 def validate_template(template_dir: Path) -> tuple[TemplateRecord | None, list[ValidationError]]:
    """Validate one template dir and return a (record, errors) pair.
    record is None when errors are fatal enough that we can't build a
    catalog entry at all."""
    errors: list[ValidationError] = []
    # Find the bundle. By convention it's `<dir>/<dir-basename>.scarftemplate`
    # or any single .scarftemplate in the dir.
    bundles = sorted(template_dir.glob("*.scarftemplate"))
    if not bundles:
        errors.append(ValidationError(template_dir, "no .scarftemplate found in template directory"))
        return None, errors
    if len(bundles) > 1:
        errors.append(ValidationError(
            template_dir,
            f"more than one .scarftemplate present: {[b.name for b in bundles]}"
        ))
    bundle_path = bundles[0]
    bundle_size = bundle_path.stat().st_size
    if bundle_size > MAX_BUNDLE_BYTES:
        errors.append(ValidationError(
            template_dir,
            f"bundle size {bundle_size} exceeds catalog cap of {MAX_BUNDLE_BYTES} bytes"
        ))
    try:
        with zipfile.ZipFile(bundle_path, "r") as zf:
            bundle_files = _bundle_files(zf)
            if "template.json" not in bundle_files:
                errors.append(ValidationError(template_dir, "bundle is missing template.json"))
                return None, errors
            try:
                manifest = json.loads(zf.read("template.json"))
            except Exception as e:
                errors.append(ValidationError(template_dir, f"template.json failed to parse: {e}"))
                return None, errors
            _validate_manifest(manifest, template_dir, errors)
            cron_count = _parse_cron_jobs(zf, template_dir, errors)
            _validate_contents_claim(manifest, bundle_files, cron_count, template_dir, errors)
            _validate_dashboard(zf, template_dir, errors)
            _scan_for_secrets(zf, template_dir, errors)
    except zipfile.BadZipFile:
        errors.append(ValidationError(template_dir, "bundle is not a valid zip archive"))
        return None, errors
    # Compute the catalog-ready record.
    sha = hashlib.sha256(bundle_path.read_bytes()).hexdigest()
    author = template_dir.parent.name
    short_name = template_dir.name
    install_url = (
        "https://raw.githubusercontent.com/awizemann/scarf/main/"
        f"templates/{author}/{short_name}/{bundle_path.name}"
    )
    detail_slug = manifest_slug(manifest.get("id", f"{author}/{short_name}"))
    record = TemplateRecord(
        path=template_dir,
        manifest=manifest,
        bundle_path=bundle_path,
        bundle_sha256=sha,
        bundle_size=bundle_size,
        install_url=install_url,
        detail_slug=detail_slug,
    )
    return record, errors
 # ---------------------------------------------------------------------------
 # Staging/bundle drift check — keeps authors honest
 # ---------------------------------------------------------------------------
 def _check_staging_matches_bundle(record: TemplateRecord) -> list[ValidationError]:
    """If the template dir has a staging/ source tree, rebuild the bundle
    in memory and diff against the committed one. Catches the common
    failure mode of an author editing staging/ but forgetting to
    regenerate the .scarftemplate."""
    errors: list[ValidationError] = []
    staging = record.path / "staging"
    if not staging.is_dir():
        return errors
    committed = {}
    with zipfile.ZipFile(record.bundle_path, "r") as zf:
        for info in zf.infolist():
            if info.is_dir() or info.filename.startswith("__MACOSX/"):
                continue
            committed[info.filename] = zf.read(info.filename)
    source = {}
    for path in staging.rglob("*"):
        if not path.is_file():
            continue
        rel = path.relative_to(staging).as_posix()
        if rel.startswith(".") or "/.DS_Store" in rel or rel.endswith("/.DS_Store") or rel == ".DS_Store":
            continue
        source[rel] = path.read_bytes()
    missing_in_bundle = sorted(set(source) - set(committed))
    if missing_in_bundle:
        errors.append(ValidationError(
            record.path,
            f"staging has files not in the built bundle: {missing_in_bundle} "
            "(rebuild with `zip -qq -r <name>.scarftemplate .` from staging/)"
        ))
    missing_in_source = sorted(set(committed) - set(source))
    if missing_in_source:
        errors.append(ValidationError(
            record.path,
            f"bundle has files not in staging/: {missing_in_source} "
            "(commit them to staging/ or rebuild the bundle from staging/)"
        ))
    diff = [name for name, data in source.items() if name in committed and committed[name] != data]
    if diff:
        errors.append(ValidationError(
            record.path,
            f"staging content differs from built bundle: {diff} "
            "(rebuild the bundle from staging/)"
        ))
    return errors
 # ---------------------------------------------------------------------------
 # Build: write catalog.json (site rendering comes in a later commit)
 # ---------------------------------------------------------------------------
 def write_catalog_json(records: list[TemplateRecord], out_path: Path) -> None:
    catalog = {
        "schemaVersion": SCHEMA_VERSION,
        "generated": True,  # human reminder; a timestamp would churn the diff every run
        "templates": [r.to_catalog_entry() for r in records],
    }
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(catalog, indent=2, sort_keys=True) + "\n", encoding="utf-8")
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
 def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("--check", action="store_true", help="validate every template; don't write output")
    group.add_argument("--build", action="store_true", help="validate + write catalog.json")
    group.add_argument("--preview", metavar="DIR", help="render a self-contained site preview into DIR")
    parser.add_argument("--only", metavar="PATH", action="append", default=[],
                        help="validate only the given template dir (may repeat); useful for PR-diff runs")
    parser.add_argument("--repo", metavar="PATH", default=str(REPO_ROOT),
                        help="repo root to operate on (default: auto-detect)")
    args = parser.parse_args(argv)
    repo_root = Path(args.repo).resolve()
    template_dirs = list(_iter_templates(repo_root))
    if args.only:
        only = {Path(p).resolve() for p in args.only}
        template_dirs = [t for t in template_dirs if t.resolve() in only]
    if not template_dirs:
        if args.only:
            print(f"no templates matched --only filter", file=sys.stderr)
            return 2
        print("no templates found under templates/ — nothing to do", file=sys.stderr)
        return 0
    records: list[TemplateRecord] = []
    all_errors: list[ValidationError] = []
    for tdir in template_dirs:
        record, errors = validate_template(tdir)
        all_errors.extend(errors)
        if record is not None:
            all_errors.extend(_check_staging_matches_bundle(record))
            records.append(record)
    if all_errors:
        print(f"✗ {len(all_errors)} validation error(s):", file=sys.stderr)
        for err in all_errors:
            print(f"  {err}", file=sys.stderr)
        return 1
    print(f"✓ {len(records)} template(s) validated", file=sys.stderr)
    for r in records:
        rel = r.path.relative_to(repo_root)
        print(f"  {rel} — {r.manifest['id']} v{r.manifest['version']}")
    if args.check:
        return 0
    catalog_path = repo_root / "templates" / "catalog.json"
    write_catalog_json(records, catalog_path)
    print(f"wrote {catalog_path.relative_to(repo_root)}", file=sys.stderr)
    if args.preview:
        preview_dir = Path(args.preview).resolve()
        render_site(records, preview_dir, repo_root)
        print(f"preview site rendered to {preview_dir}", file=sys.stderr)
    if args.build:
        # --build renders into .gh-pages-worktree/templates/ so the
        # maintainer's publish step just has to commit + push gh-pages.
        gh_pages = repo_root / ".gh-pages-worktree" / "templates"
        render_site(records, gh_pages, repo_root)
        print(f"site rendered to {gh_pages.relative_to(repo_root)}", file=sys.stderr)
    return 0
 def render_site(records: list[TemplateRecord], out_dir: Path, repo_root: Path) -> None:
    """Render the catalog site. Defined here as a stub so --build and
    --preview both have a landing spot; the real HTML templates ship in
    the next commit (Phase 3)."""
    site_src = repo_root / "site"
    if not site_src.is_dir():
        # Phase 2: no site/ yet. Write just catalog.json into out_dir so
        # the preview mode is still demonstrable (and --build stays
        # idempotent).
        out_dir.mkdir(parents=True, exist_ok=True)
        write_catalog_json(records, out_dir / "catalog.json")
        return
    out_dir.mkdir(parents=True, exist_ok=True)
    index_tmpl = (site_src / "index.html.tmpl").read_text(encoding="utf-8")
    template_tmpl = (site_src / "template.html.tmpl").read_text(encoding="utf-8")
    # Copy static site assets (widgets.js, styles.css, assets/).
    for name in ("widgets.js", "styles.css"):
        src = site_src / name
        if src.exists():
            shutil.copy2(src, out_dir / name)
    assets_src = site_src / "assets"
    if assets_src.is_dir():
        assets_dst = out_dir / "assets"
        if assets_dst.exists():
            shutil.rmtree(assets_dst)
        shutil.copytree(assets_src, assets_dst)
    # Catalog index
    (out_dir / "index.html").write_text(
        render_index(index_tmpl, records),
        encoding="utf-8",
    )
    # Per-template detail pages + dashboard.json copies
    for r in records:
        detail_dir = out_dir / r.detail_slug
        detail_dir.mkdir(parents=True, exist_ok=True)
        (detail_dir / "index.html").write_text(
            render_detail(template_tmpl, r),
            encoding="utf-8",
        )
        # Copy the unpacked dashboard.json so widgets.js can fetch it
        # without cross-directory relative paths.
        with zipfile.ZipFile(r.bundle_path, "r") as zf:
            (detail_dir / "dashboard.json").write_bytes(zf.read("dashboard.json"))
            if "README.md" in zf.namelist():
                (detail_dir / "README.md").write_bytes(zf.read("README.md"))
    # The aggregate catalog.json is copied in so the frontend can fetch
    # /templates/catalog.json without reaching back into the repo.
    write_catalog_json(records, out_dir / "catalog.json")
 def render_index(tmpl: str, records: list[TemplateRecord]) -> str:
    """Very light string substitution — the site's JS does most of the
    rendering from catalog.json at page load."""
    cards = []
    for r in records:
        m = r.manifest
        author = (m.get("author") or {}).get("name", "")
        tags_html = "".join(f'<span class="tag">{t}</span>' for t in (m.get("tags") or []))
        cards.append(
            '<a class="card" href="{slug}/">'
            '<h3>{name}</h3>'
            '<p class="desc">{desc}</p>'
            '<div class="meta"><span class="author">{author}</span>'
            '<span class="version">v{version}</span></div>'
            '<div class="tags">{tags}</div>'
            '</a>'.format(
                slug=_html_escape(r.detail_slug),
                name=_html_escape(m["name"]),
                desc=_html_escape(m["description"]),
                author=_html_escape(author),
                version=_html_escape(m["version"]),
                tags=tags_html,
            )
        )
    return tmpl.replace("{{CARDS}}", "\n".join(cards)).replace("{{COUNT}}", str(len(records)))
 def render_detail(tmpl: str, record: TemplateRecord) -> str:
    m = record.manifest
    author = m.get("author") or {}
    author_html = _html_escape(author.get("name", ""))
    author_url = author.get("url") or ""
    if author_url:
        author_html = f'<a href="{_html_escape(author_url)}">{author_html}</a>'
    tags_html = "".join(f'<span class="tag">{_html_escape(t)}</span>' for t in (m.get("tags") or []))
    install_url = record.install_url
    tokens = {
        "ID": m["id"],
        "NAME": m["name"],
        "VERSION": m["version"],
        "DESC": m["description"],
        "AUTHOR_HTML": author_html,
        "CATEGORY": m.get("category") or "",
        "TAGS_HTML": tags_html,
        "INSTALL_URL_ENCODED": install_url,
        "SCARF_INSTALL_URL": f"scarf://install?url={install_url}",
    }
    out = tmpl
    for k, v in tokens.items():
        out = out.replace("{{" + k + "}}", _html_escape(v) if k != "TAGS_HTML" and k != "AUTHOR_HTML" else v)
    return out
 def _html_escape(s: str) -> str:
    return (
        s.replace("&", "&amp;")
         .replace("<", "&lt;")
         .replace(">", "&gt;")
         .replace('"', "&quot;")
         .replace("'", "&#39;")
    )
 if __name__ == "__main__":
    sys.exit(main())
@@ -0,0 +1,385 @@
 """Unit tests for tools/build-catalog.py.
 Run with:  python3 -m unittest tools.test_build_catalog
 Or just:   python3 tools/test_build_catalog.py
 Covers the validator's invariants against synthetic template directories
 created under a temp dir — no network, no global state, no dependency on
 the repo's actual templates/. A separate test at the bottom exercises the
 real shipped `templates/awizemann/site-status-checker` bundle to catch
 drift between validator + installer.
 """
 from __future__ import annotations
 import importlib.util
 import io
 import json
 import os
 import shutil
 import sys
 import tempfile
 import unittest
 import zipfile
 from pathlib import Path
 # Import tools/build-catalog.py via spec-loader (the dash in the filename
 # would otherwise make a plain `import` ugly). Register the module in
 # sys.modules BEFORE exec — Python 3.9's dataclass inspection reads
 # `sys.modules[cls.__module__].__dict__` and blows up if the module isn't
 # there yet (fixed in 3.10+, still matters on system-Python Macs).
 _SPEC_PATH = Path(__file__).resolve().parent / "build-catalog.py"
 _spec = importlib.util.spec_from_file_location("build_catalog", _SPEC_PATH)
 build_catalog = importlib.util.module_from_spec(_spec)
 sys.modules["build_catalog"] = build_catalog
 _spec.loader.exec_module(build_catalog)
 # ---------------------------------------------------------------------------
 # Fixture builders
 # ---------------------------------------------------------------------------
 MINIMAL_DASHBOARD = {
    "version": 1,
    "title": "Test",
    "description": "test",
    "sections": [
        {
            "title": "Current Status",
            "columns": 3,
            "widgets": [
                {"type": "stat", "title": "Sites Up", "value": 0},
            ],
        },
    ],
 }
 def make_fake_repo(tmp_root: Path) -> Path:
    """Create a repo layout: <tmp>/templates/ and (optionally) fake
    site/ dirs on demand. Returns the repo root."""
    (tmp_root / "templates").mkdir(parents=True)
    return tmp_root
 def make_template_dir(
    repo: Path,
    author: str,
    name: str,
    manifest: dict | None = None,
    bundle_files: dict[str, bytes] | None = None,
    include_staging: bool = True,
    bundle_name: str | None = None,
 ) -> Path:
    """Create a template dir under <repo>/templates/<author>/<name>/
    with a built bundle and (optionally) a staging dir whose contents
    match the bundle byte-for-byte. Returns the template dir."""
    template_dir = repo / "templates" / author / name
    (template_dir / "staging").mkdir(parents=True, exist_ok=True)
    manifest = manifest or {
        "schemaVersion": 1,
        "id": f"{author}/{name}",
        "name": name.replace("-", " ").title(),
        "version": "1.0.0",
        "description": "test description",
        "contents": {
            "dashboard": True,
            "agentsMd": True,
        },
    }
    files = bundle_files or {
        "template.json": json.dumps(manifest).encode("utf-8"),
        "README.md": b"# readme\n",
        "AGENTS.md": b"# agents\n",
        "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
    }
    # Write staging/ source tree so the drift check passes by default.
    if include_staging:
        for path, data in files.items():
            full = template_dir / "staging" / path
            full.parent.mkdir(parents=True, exist_ok=True)
            full.write_bytes(data)
    # Write the zipped bundle.
    bundle_name = bundle_name or f"{name}.scarftemplate"
    with zipfile.ZipFile(template_dir / bundle_name, "w", zipfile.ZIP_DEFLATED) as zf:
        for path, data in files.items():
            zf.writestr(path, data)
    return template_dir
 # ---------------------------------------------------------------------------
 # Tests
 # ---------------------------------------------------------------------------
 class ManifestSlugTests(unittest.TestCase):
    """Mirrors the Swift test of the same name so the two
    implementations stay in sync."""
    def test_sanitizes_punctuation(self):
        self.assertEqual(build_catalog.manifest_slug("alan@w/focus dashboard!"), "alan-w-focus-dashboard")
    def test_falls_back_to_placeholder(self):
        self.assertEqual(build_catalog.manifest_slug("////"), "template")
    def test_preserves_letters_numbers_dash_underscore(self):
        self.assertEqual(build_catalog.manifest_slug("user_1/name-2"), "user_1-name-2")
 class ValidationTests(unittest.TestCase):
    def setUp(self):
        self._dir = tempfile.TemporaryDirectory()
        self.repo = make_fake_repo(Path(self._dir.name))
        self.addCleanup(self._dir.cleanup)
    def test_accepts_minimal_valid_template(self):
        make_template_dir(self.repo, "tester", "minimal")
        records, errors = self._validate_all()
        self.assertEqual(errors, [])
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].manifest["id"], "tester/minimal")
    def test_rejects_missing_agents_md(self):
        # Build a bundle that lacks AGENTS.md.
        manifest = {
            "schemaVersion": 1,
            "id": "tester/bad",
            "name": "Bad",
            "version": "1.0.0",
            "description": "missing AGENTS.md",
            "contents": {"dashboard": True, "agentsMd": True},
        }
        make_template_dir(
            self.repo, "tester", "bad",
            manifest=manifest,
            bundle_files={
                "template.json": json.dumps(manifest).encode("utf-8"),
                "README.md": b"# readme",
                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
            },
        )
        _, errors = self._validate_all()
        self.assertTrue(any("AGENTS.md" in str(e) for e in errors), errors)
    def test_rejects_content_claim_mismatch(self):
        # Manifest claims cron: 2, bundle ships zero cron jobs.
        manifest = {
            "schemaVersion": 1,
            "id": "tester/claims",
            "name": "Claims",
            "version": "1.0.0",
            "description": "claim mismatch",
            "contents": {"dashboard": True, "agentsMd": True, "cron": 2},
        }
        make_template_dir(
            self.repo, "tester", "claims",
            manifest=manifest,
            bundle_files={
                "template.json": json.dumps(manifest).encode("utf-8"),
                "README.md": b"# readme",
                "AGENTS.md": b"# agents",
                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
            },
        )
        _, errors = self._validate_all()
        self.assertTrue(any("contents.cron=2" in str(e) for e in errors), errors)
    def test_rejects_manifest_author_mismatch(self):
        # Template lives under /tester/ but manifest id says /other/.
        manifest = {
            "schemaVersion": 1,
            "id": "other/name",
            "name": "Mismatch",
            "version": "1.0.0",
            "description": "author mismatch",
            "contents": {"dashboard": True, "agentsMd": True},
        }
        make_template_dir(
            self.repo, "tester", "name",
            manifest=manifest,
            bundle_files={
                "template.json": json.dumps(manifest).encode("utf-8"),
                "README.md": b"# readme",
                "AGENTS.md": b"# agents",
                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
            },
        )
        _, errors = self._validate_all()
        self.assertTrue(any("author component" in str(e) for e in errors), errors)
    def test_rejects_oversized_bundle(self):
        # Synthetic bundle > 5MB cap.
        template_dir = self.repo / "templates" / "tester" / "huge"
        (template_dir / "staging").mkdir(parents=True)
        manifest = {
            "schemaVersion": 1,
            "id": "tester/huge",
            "name": "Huge",
            "version": "1.0.0",
            "description": "oversized",
            "contents": {"dashboard": True, "agentsMd": True},
        }
        payload = b"x" * (6 * 1024 * 1024)
        files = {
            "template.json": json.dumps(manifest).encode("utf-8"),
            "README.md": b"# readme",
            "AGENTS.md": b"# agents",
            "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
            "ballast.bin": payload,
        }
        with zipfile.ZipFile(template_dir / "huge.scarftemplate", "w", zipfile.ZIP_STORED) as zf:
            for p, data in files.items():
                zf.writestr(p, data)
        _, errors = self._validate_all()
        self.assertTrue(any("exceeds catalog cap" in str(e) for e in errors), errors)
    def test_rejects_unknown_widget_type(self):
        bad_dashboard = {
            "version": 1,
            "title": "Bad",
            "sections": [{"title": "x", "columns": 1, "widgets": [{"type": "hologram", "title": "huh"}]}],
        }
        manifest = {
            "schemaVersion": 1,
            "id": "tester/weird",
            "name": "Weird",
            "version": "1.0.0",
            "description": "unknown widget",
            "contents": {"dashboard": True, "agentsMd": True},
        }
        make_template_dir(
            self.repo, "tester", "weird",
            manifest=manifest,
            bundle_files={
                "template.json": json.dumps(manifest).encode("utf-8"),
                "README.md": b"# readme",
                "AGENTS.md": b"# agents",
                "dashboard.json": json.dumps(bad_dashboard).encode("utf-8"),
            },
        )
        _, errors = self._validate_all()
        self.assertTrue(any("unknown type" in str(e) for e in errors), errors)
    def test_rejects_secret_in_bundle(self):
        leaky = b"config:\n  github_token: ghp_" + b"A" * 40 + b"\n"
        manifest = {
            "schemaVersion": 1,
            "id": "tester/leaky",
            "name": "Leaky",
            "version": "1.0.0",
            "description": "has a secret",
            "contents": {"dashboard": True, "agentsMd": True},
        }
        make_template_dir(
            self.repo, "tester", "leaky",
            manifest=manifest,
            bundle_files={
                "template.json": json.dumps(manifest).encode("utf-8"),
                "README.md": leaky,
                "AGENTS.md": b"# agents",
                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
            },
        )
        _, errors = self._validate_all()
        self.assertTrue(any("github" in str(e).lower() for e in errors), errors)
    def test_detects_staging_vs_bundle_drift(self):
        # Bundle ships an old README; staging/ has an edited one — should fail.
        manifest = {
            "schemaVersion": 1,
            "id": "tester/drift",
            "name": "Drift",
            "version": "1.0.0",
            "description": "staging ahead of bundle",
            "contents": {"dashboard": True, "agentsMd": True},
        }
        template_dir = make_template_dir(
            self.repo, "tester", "drift",
            manifest=manifest,
            bundle_files={
                "template.json": json.dumps(manifest).encode("utf-8"),
                "README.md": b"# old",
                "AGENTS.md": b"# agents",
                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
            },
        )
        # Edit staging/ AFTER building the bundle.
        (template_dir / "staging" / "README.md").write_bytes(b"# new")
        _, errors = self._validate_all()
        self.assertTrue(any("differs from built bundle" in str(e) for e in errors), errors)
    def test_rejects_missing_bundle(self):
        template_dir = self.repo / "templates" / "tester" / "bare"
        (template_dir / "staging").mkdir(parents=True)
        # No .scarftemplate in the dir.
        _, errors = self._validate_all()
        self.assertTrue(any("no .scarftemplate found" in str(e) for e in errors), errors)
    # --- helpers --------------------------------------------------------
    def _validate_all(self) -> tuple[list, list]:
        records = []
        errors = []
        for tdir in build_catalog._iter_templates(self.repo):
            record, errs = build_catalog.validate_template(tdir)
            errors.extend(errs)
            if record is not None:
                errors.extend(build_catalog._check_staging_matches_bundle(record))
                records.append(record)
        return records, errors
 class CatalogJsonTests(unittest.TestCase):
    """Shape of the emitted catalog.json must stay stable — the site's
    widgets.js reads these fields by name."""
    def test_catalog_json_shape(self):
        with tempfile.TemporaryDirectory() as tmp:
            repo = make_fake_repo(Path(tmp))
            make_template_dir(repo, "tester", "shape")
            records = []
            for tdir in build_catalog._iter_templates(repo):
                record, errors = build_catalog.validate_template(tdir)
                self.assertEqual(errors, [])
                records.append(record)
            out = Path(tmp) / "catalog.json"
            build_catalog.write_catalog_json(records, out)
            data = json.loads(out.read_text())
            self.assertEqual(data["schemaVersion"], 1)
            self.assertEqual(len(data["templates"]), 1)
            entry = data["templates"][0]
            for required in ["id", "name", "version", "description", "contents",
                             "installUrl", "detailSlug", "bundleSha256", "bundleSize"]:
                self.assertIn(required, entry)
            self.assertTrue(entry["installUrl"].startswith("https://raw.githubusercontent.com/"))
            self.assertEqual(entry["detailSlug"], "tester-shape")
 class RealBundleTest(unittest.TestCase):
    """Run the validator against the actual shipped Site Status Checker
    bundle. Catches drift between validator + real-world author
    conventions. Skipped if run outside the repo tree."""
    def test_site_status_checker_passes(self):
        repo_root = Path(__file__).resolve().parent.parent
        template = repo_root / "templates" / "awizemann" / "site-status-checker"
        if not template.exists():
            self.skipTest("site-status-checker not present (running outside repo?)")
        record, errors = build_catalog.validate_template(template)
        self.assertIsNotNone(record)
        drift = build_catalog._check_staging_matches_bundle(record)
        self.assertEqual(errors + drift, [], f"errors: {errors}, drift: {drift}")
        self.assertEqual(record.manifest["id"], "awizemann/site-status-checker")
 if __name__ == "__main__":
    unittest.main()