feat(catalog): stdlib-only Python validator + regenerator for templates/

Adds the catalog pipeline without introducing any external dependencies. tools/build-catalog.py walks templates/<author>/<name>/, validates every shipped .scarftemplate against its manifest (same invariants Swift's ProjectTemplateService.verifyClaims enforces at install time), and emits templates/catalog.json for the frontend to read. Validator invariants: - Required bundle files: template.json, README.md, AGENTS.md, dashboard.json - contents claim cross-checked against actual zip entries (instructions, skills, cron count, memory appendix) - dashboard.json widget types restricted to the vocabulary the Swift renderer knows - Manifest id author component must match the template directory - 5 MB bundle-size cap on submissions (installer's own cap is 50 MB) - High-confidence secret patterns (private keys, GitHub PATs, Slack tokens, AWS access keys, OpenAI/Anthropic keys) block the bundle - staging/ source tree must match the built bundle byte-for-byte — catches the common failure mode of editing staging/ but forgetting to rebuild scripts/catalog.sh wraps the Python script with check/build/preview/serve/ publish subcommands, mirroring the scripts/wiki.sh shape. publish adds a second-pass hard-pattern secret scan on the rendered gh-pages output so template prose can't leak credentials even if the Python scan missed them. tools/test_build_catalog.py has 14 unit tests covering the main validator paths (minimal-valid, missing-AGENTS, content-claim mismatch, author mismatch, oversized bundle, unknown widget type, secret detection, staging-drift detection, missing bundle, catalog.json shape, and a real- bundle end-to-end check against templates/awizemann/site-status-checker). Python 3.9 compatible (Xcode's bundled python3), so no runtime needs installing. templates/catalog.json committed as the first generated aggregate index; maintainers regenerate on merge by running `./scripts/catalog.sh build`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 10:36:35 +00:00 · 2026-04-23 00:02:09 +02:00
parent d8a0a89db2
commit 11732baa3c
4 changed files with 1200 additions and 0 deletions
@@ -0,0 +1,385 @@
+"""Unit tests for tools/build-catalog.py.
+
+Run with:  python3 -m unittest tools.test_build_catalog
+Or just:   python3 tools/test_build_catalog.py
+
+Covers the validator's invariants against synthetic template directories
+created under a temp dir — no network, no global state, no dependency on
+the repo's actual templates/. A separate test at the bottom exercises the
+real shipped `templates/awizemann/site-status-checker` bundle to catch
+drift between validator + installer.
+"""
+from __future__ import annotations
+
+import importlib.util
+import io
+import json
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+import zipfile
+from pathlib import Path
+
+
+# Import tools/build-catalog.py via spec-loader (the dash in the filename
+# would otherwise make a plain `import` ugly). Register the module in
+# sys.modules BEFORE exec — Python 3.9's dataclass inspection reads
+# `sys.modules[cls.__module__].__dict__` and blows up if the module isn't
+# there yet (fixed in 3.10+, still matters on system-Python Macs).
+_SPEC_PATH = Path(__file__).resolve().parent / "build-catalog.py"
+_spec = importlib.util.spec_from_file_location("build_catalog", _SPEC_PATH)
+build_catalog = importlib.util.module_from_spec(_spec)
+sys.modules["build_catalog"] = build_catalog
+_spec.loader.exec_module(build_catalog)
+
+
+# ---------------------------------------------------------------------------
+# Fixture builders
+# ---------------------------------------------------------------------------
+
+
+MINIMAL_DASHBOARD = {
+    "version": 1,
+    "title": "Test",
+    "description": "test",
+    "sections": [
+        {
+            "title": "Current Status",
+            "columns": 3,
+            "widgets": [
+                {"type": "stat", "title": "Sites Up", "value": 0},
+            ],
+        },
+    ],
+}
+
+
+def make_fake_repo(tmp_root: Path) -> Path:
+    """Create a repo layout: <tmp>/templates/ and (optionally) fake
+    site/ dirs on demand. Returns the repo root."""
+    (tmp_root / "templates").mkdir(parents=True)
+    return tmp_root
+
+
+def make_template_dir(
+    repo: Path,
+    author: str,
+    name: str,
+    manifest: dict | None = None,
+    bundle_files: dict[str, bytes] | None = None,
+    include_staging: bool = True,
+    bundle_name: str | None = None,
+) -> Path:
+    """Create a template dir under <repo>/templates/<author>/<name>/
+    with a built bundle and (optionally) a staging dir whose contents
+    match the bundle byte-for-byte. Returns the template dir."""
+    template_dir = repo / "templates" / author / name
+    (template_dir / "staging").mkdir(parents=True, exist_ok=True)
+
+    manifest = manifest or {
+        "schemaVersion": 1,
+        "id": f"{author}/{name}",
+        "name": name.replace("-", " ").title(),
+        "version": "1.0.0",
+        "description": "test description",
+        "contents": {
+            "dashboard": True,
+            "agentsMd": True,
+        },
+    }
+    files = bundle_files or {
+        "template.json": json.dumps(manifest).encode("utf-8"),
+        "README.md": b"# readme\n",
+        "AGENTS.md": b"# agents\n",
+        "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
+    }
+
+    # Write staging/ source tree so the drift check passes by default.
+    if include_staging:
+        for path, data in files.items():
+            full = template_dir / "staging" / path
+            full.parent.mkdir(parents=True, exist_ok=True)
+            full.write_bytes(data)
+
+    # Write the zipped bundle.
+    bundle_name = bundle_name or f"{name}.scarftemplate"
+    with zipfile.ZipFile(template_dir / bundle_name, "w", zipfile.ZIP_DEFLATED) as zf:
+        for path, data in files.items():
+            zf.writestr(path, data)
+
+    return template_dir
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class ManifestSlugTests(unittest.TestCase):
+    """Mirrors the Swift test of the same name so the two
+    implementations stay in sync."""
+
+    def test_sanitizes_punctuation(self):
+        self.assertEqual(build_catalog.manifest_slug("alan@w/focus dashboard!"), "alan-w-focus-dashboard")
+
+    def test_falls_back_to_placeholder(self):
+        self.assertEqual(build_catalog.manifest_slug("////"), "template")
+
+    def test_preserves_letters_numbers_dash_underscore(self):
+        self.assertEqual(build_catalog.manifest_slug("user_1/name-2"), "user_1-name-2")
+
+
+class ValidationTests(unittest.TestCase):
+
+    def setUp(self):
+        self._dir = tempfile.TemporaryDirectory()
+        self.repo = make_fake_repo(Path(self._dir.name))
+        self.addCleanup(self._dir.cleanup)
+
+    def test_accepts_minimal_valid_template(self):
+        make_template_dir(self.repo, "tester", "minimal")
+        records, errors = self._validate_all()
+        self.assertEqual(errors, [])
+        self.assertEqual(len(records), 1)
+        self.assertEqual(records[0].manifest["id"], "tester/minimal")
+
+    def test_rejects_missing_agents_md(self):
+        # Build a bundle that lacks AGENTS.md.
+        manifest = {
+            "schemaVersion": 1,
+            "id": "tester/bad",
+            "name": "Bad",
+            "version": "1.0.0",
+            "description": "missing AGENTS.md",
+            "contents": {"dashboard": True, "agentsMd": True},
+        }
+        make_template_dir(
+            self.repo, "tester", "bad",
+            manifest=manifest,
+            bundle_files={
+                "template.json": json.dumps(manifest).encode("utf-8"),
+                "README.md": b"# readme",
+                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
+            },
+        )
+        _, errors = self._validate_all()
+        self.assertTrue(any("AGENTS.md" in str(e) for e in errors), errors)
+
+    def test_rejects_content_claim_mismatch(self):
+        # Manifest claims cron: 2, bundle ships zero cron jobs.
+        manifest = {
+            "schemaVersion": 1,
+            "id": "tester/claims",
+            "name": "Claims",
+            "version": "1.0.0",
+            "description": "claim mismatch",
+            "contents": {"dashboard": True, "agentsMd": True, "cron": 2},
+        }
+        make_template_dir(
+            self.repo, "tester", "claims",
+            manifest=manifest,
+            bundle_files={
+                "template.json": json.dumps(manifest).encode("utf-8"),
+                "README.md": b"# readme",
+                "AGENTS.md": b"# agents",
+                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
+            },
+        )
+        _, errors = self._validate_all()
+        self.assertTrue(any("contents.cron=2" in str(e) for e in errors), errors)
+
+    def test_rejects_manifest_author_mismatch(self):
+        # Template lives under /tester/ but manifest id says /other/.
+        manifest = {
+            "schemaVersion": 1,
+            "id": "other/name",
+            "name": "Mismatch",
+            "version": "1.0.0",
+            "description": "author mismatch",
+            "contents": {"dashboard": True, "agentsMd": True},
+        }
+        make_template_dir(
+            self.repo, "tester", "name",
+            manifest=manifest,
+            bundle_files={
+                "template.json": json.dumps(manifest).encode("utf-8"),
+                "README.md": b"# readme",
+                "AGENTS.md": b"# agents",
+                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
+            },
+        )
+        _, errors = self._validate_all()
+        self.assertTrue(any("author component" in str(e) for e in errors), errors)
+
+    def test_rejects_oversized_bundle(self):
+        # Synthetic bundle > 5MB cap.
+        template_dir = self.repo / "templates" / "tester" / "huge"
+        (template_dir / "staging").mkdir(parents=True)
+        manifest = {
+            "schemaVersion": 1,
+            "id": "tester/huge",
+            "name": "Huge",
+            "version": "1.0.0",
+            "description": "oversized",
+            "contents": {"dashboard": True, "agentsMd": True},
+        }
+        payload = b"x" * (6 * 1024 * 1024)
+        files = {
+            "template.json": json.dumps(manifest).encode("utf-8"),
+            "README.md": b"# readme",
+            "AGENTS.md": b"# agents",
+            "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
+            "ballast.bin": payload,
+        }
+        with zipfile.ZipFile(template_dir / "huge.scarftemplate", "w", zipfile.ZIP_STORED) as zf:
+            for p, data in files.items():
+                zf.writestr(p, data)
+        _, errors = self._validate_all()
+        self.assertTrue(any("exceeds catalog cap" in str(e) for e in errors), errors)
+
+    def test_rejects_unknown_widget_type(self):
+        bad_dashboard = {
+            "version": 1,
+            "title": "Bad",
+            "sections": [{"title": "x", "columns": 1, "widgets": [{"type": "hologram", "title": "huh"}]}],
+        }
+        manifest = {
+            "schemaVersion": 1,
+            "id": "tester/weird",
+            "name": "Weird",
+            "version": "1.0.0",
+            "description": "unknown widget",
+            "contents": {"dashboard": True, "agentsMd": True},
+        }
+        make_template_dir(
+            self.repo, "tester", "weird",
+            manifest=manifest,
+            bundle_files={
+                "template.json": json.dumps(manifest).encode("utf-8"),
+                "README.md": b"# readme",
+                "AGENTS.md": b"# agents",
+                "dashboard.json": json.dumps(bad_dashboard).encode("utf-8"),
+            },
+        )
+        _, errors = self._validate_all()
+        self.assertTrue(any("unknown type" in str(e) for e in errors), errors)
+
+    def test_rejects_secret_in_bundle(self):
+        leaky = b"config:\n  github_token: ghp_" + b"A" * 40 + b"\n"
+        manifest = {
+            "schemaVersion": 1,
+            "id": "tester/leaky",
+            "name": "Leaky",
+            "version": "1.0.0",
+            "description": "has a secret",
+            "contents": {"dashboard": True, "agentsMd": True},
+        }
+        make_template_dir(
+            self.repo, "tester", "leaky",
+            manifest=manifest,
+            bundle_files={
+                "template.json": json.dumps(manifest).encode("utf-8"),
+                "README.md": leaky,
+                "AGENTS.md": b"# agents",
+                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
+            },
+        )
+        _, errors = self._validate_all()
+        self.assertTrue(any("github" in str(e).lower() for e in errors), errors)
+
+    def test_detects_staging_vs_bundle_drift(self):
+        # Bundle ships an old README; staging/ has an edited one — should fail.
+        manifest = {
+            "schemaVersion": 1,
+            "id": "tester/drift",
+            "name": "Drift",
+            "version": "1.0.0",
+            "description": "staging ahead of bundle",
+            "contents": {"dashboard": True, "agentsMd": True},
+        }
+        template_dir = make_template_dir(
+            self.repo, "tester", "drift",
+            manifest=manifest,
+            bundle_files={
+                "template.json": json.dumps(manifest).encode("utf-8"),
+                "README.md": b"# old",
+                "AGENTS.md": b"# agents",
+                "dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
+            },
+        )
+        # Edit staging/ AFTER building the bundle.
+        (template_dir / "staging" / "README.md").write_bytes(b"# new")
+        _, errors = self._validate_all()
+        self.assertTrue(any("differs from built bundle" in str(e) for e in errors), errors)
+
+    def test_rejects_missing_bundle(self):
+        template_dir = self.repo / "templates" / "tester" / "bare"
+        (template_dir / "staging").mkdir(parents=True)
+        # No .scarftemplate in the dir.
+        _, errors = self._validate_all()
+        self.assertTrue(any("no .scarftemplate found" in str(e) for e in errors), errors)
+
+    # --- helpers --------------------------------------------------------
+
+    def _validate_all(self) -> tuple[list, list]:
+        records = []
+        errors = []
+        for tdir in build_catalog._iter_templates(self.repo):
+            record, errs = build_catalog.validate_template(tdir)
+            errors.extend(errs)
+            if record is not None:
+                errors.extend(build_catalog._check_staging_matches_bundle(record))
+                records.append(record)
+        return records, errors
+
+
+class CatalogJsonTests(unittest.TestCase):
+    """Shape of the emitted catalog.json must stay stable — the site's
+    widgets.js reads these fields by name."""
+
+    def test_catalog_json_shape(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            repo = make_fake_repo(Path(tmp))
+            make_template_dir(repo, "tester", "shape")
+
+            records = []
+            for tdir in build_catalog._iter_templates(repo):
+                record, errors = build_catalog.validate_template(tdir)
+                self.assertEqual(errors, [])
+                records.append(record)
+
+            out = Path(tmp) / "catalog.json"
+            build_catalog.write_catalog_json(records, out)
+            data = json.loads(out.read_text())
+
+            self.assertEqual(data["schemaVersion"], 1)
+            self.assertEqual(len(data["templates"]), 1)
+            entry = data["templates"][0]
+            for required in ["id", "name", "version", "description", "contents",
+                             "installUrl", "detailSlug", "bundleSha256", "bundleSize"]:
+                self.assertIn(required, entry)
+            self.assertTrue(entry["installUrl"].startswith("https://raw.githubusercontent.com/"))
+            self.assertEqual(entry["detailSlug"], "tester-shape")
+
+
+class RealBundleTest(unittest.TestCase):
+    """Run the validator against the actual shipped Site Status Checker
+    bundle. Catches drift between validator + real-world author
+    conventions. Skipped if run outside the repo tree."""
+
+    def test_site_status_checker_passes(self):
+        repo_root = Path(__file__).resolve().parent.parent
+        template = repo_root / "templates" / "awizemann" / "site-status-checker"
+        if not template.exists():
+            self.skipTest("site-status-checker not present (running outside repo?)")
+        record, errors = build_catalog.validate_template(template)
+        self.assertIsNotNone(record)
+        drift = build_catalog._check_staging_matches_bundle(record)
+        self.assertEqual(errors + drift, [], f"errors: {errors}, drift: {drift}")
+        self.assertEqual(record.manifest["id"], "awizemann/site-status-checker")
+
+
+if __name__ == "__main__":
+    unittest.main()