feat(catalog): stdlib-only Python validator + regenerator for templates/

Adds the catalog pipeline without introducing any external dependencies.
tools/build-catalog.py walks templates/<author>/<name>/, validates every
shipped .scarftemplate against its manifest (same invariants Swift's
ProjectTemplateService.verifyClaims enforces at install time), and emits
templates/catalog.json for the frontend to read.

Validator invariants:
- Required bundle files: template.json, README.md, AGENTS.md, dashboard.json
- contents claim cross-checked against actual zip entries (instructions,
  skills, cron count, memory appendix)
- dashboard.json widget types restricted to the vocabulary the Swift
  renderer knows
- Manifest id author component must match the template directory
- 5 MB bundle-size cap on submissions (installer's own cap is 50 MB)
- High-confidence secret patterns (private keys, GitHub PATs, Slack tokens,
  AWS access keys, OpenAI/Anthropic keys) block the bundle
- staging/ source tree must match the built bundle byte-for-byte — catches
  the common failure mode of editing staging/ but forgetting to rebuild

scripts/catalog.sh wraps the Python script with check/build/preview/serve/
publish subcommands, mirroring the scripts/wiki.sh shape. publish adds a
second-pass hard-pattern secret scan on the rendered gh-pages output so
template prose can't leak credentials even if the Python scan missed them.

tools/test_build_catalog.py has 14 unit tests covering the main validator
paths (minimal-valid, missing-AGENTS, content-claim mismatch, author
mismatch, oversized bundle, unknown widget type, secret detection,
staging-drift detection, missing bundle, catalog.json shape, and a real-
bundle end-to-end check against templates/awizemann/site-status-checker).
Python 3.9 compatible (Xcode's bundled python3), so no runtime needs
installing.

templates/catalog.json committed as the first generated aggregate index;
maintainers regenerate on merge by running `./scripts/catalog.sh build`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-04-23 00:02:09 +02:00
parent d8a0a89db2
commit 11732baa3c
4 changed files with 1200 additions and 0 deletions
+135
View File
@@ -0,0 +1,135 @@
#!/usr/bin/env bash
#
# Scarf templates catalog helper — runs the Python validator, renders the
# static site into .gh-pages-worktree/templates/, and (on `publish`)
# commits + pushes that subdir on the gh-pages branch.
#
# Usage:
# ./scripts/catalog.sh check # validate every template; no output
# ./scripts/catalog.sh build # validate + write templates/catalog.json + .gh-pages-worktree/templates/
# ./scripts/catalog.sh preview [DIR] # render self-contained preview; DIR defaults to /tmp/scarf-catalog-preview
# ./scripts/catalog.sh publish # secret-scan + commit + push gh-pages (templates subdir only)
# ./scripts/catalog.sh serve [PORT] # serve .gh-pages-worktree/ on localhost:PORT (default 8000)
# ./scripts/catalog.sh --help # this help
#
# The secret-scan runs BEFORE publish and inspects the generated
# .gh-pages-worktree/templates/ tree — same hard-pattern regex as
# scripts/wiki.sh so template README/AGENTS content that accidentally
# leaks credentials gets blocked before it reaches the public site.
#
# Bootstrap (one-time): requires a .gh-pages-worktree/ clone of the
# gh-pages branch. The release script (scripts/release.sh) creates it on
# first use. If it's missing:
# git worktree add .gh-pages-worktree gh-pages
#
# Recovery: if .gh-pages-worktree/ is deleted, re-run the command above.
set -euo pipefail
# ---------- config ----------
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
GHPAGES_DIR="$REPO_ROOT/.gh-pages-worktree"
CATALOG_SUBDIR="templates"
PY="${PYTHON:-python3}"
BUILDER="$REPO_ROOT/tools/build-catalog.py"
# ---------- helpers (same shape as scripts/wiki.sh so a reader doesn't
# have to learn two conventions) ----------
log() { printf '\033[1;34m==> %s\033[0m\n' "$*"; }
warn() { printf '\033[1;33m[WARN] %s\033[0m\n' "$*" >&2; }
die() { printf '\033[1;31m[ERR] %s\033[0m\n' "$*" >&2; exit 1; }
need_builder() {
[[ -f "$BUILDER" ]] || die "missing $BUILDER"
command -v "$PY" >/dev/null 2>&1 || die "python3 not found (set \$PYTHON if needed)"
}
need_ghpages() {
[[ -d "$GHPAGES_DIR/.git" ]] || die "no gh-pages worktree at $GHPAGES_DIR
Run: git worktree add .gh-pages-worktree gh-pages"
}
# ---------- secret-scan (mirrors scripts/wiki.sh hard-pattern set) ----------
hard_regex='(sk-[A-Za-z0-9_-]{20,}|ghp_[A-Za-z0-9]{30,}|ghs_[A-Za-z0-9]{30,}|ghu_[A-Za-z0-9]{30,}|gho_[A-Za-z0-9]{30,}|ghr_[A-Za-z0-9]{30,}|github_pat_[A-Za-z0-9_]{20,}|xox[baprs]-[A-Za-z0-9-]{10,}|AKIA[0-9A-Z]{16}|AIza[0-9A-Za-z_-]{35}|-----BEGIN [A-Z ]*PRIVATE KEY-----|BEGIN OPENSSH PRIVATE KEY)'
scan_hard_ghpages() {
# Scan the generated output, NOT the repo source — the validator
# already scans bundle contents. This pass catches anything that leaked
# through template.json fields or README prose.
local hits
hits="$(grep -rInE --exclude-dir=.git "$hard_regex" "$GHPAGES_DIR/$CATALOG_SUBDIR" 2>/dev/null || true)"
if [[ -n "$hits" ]]; then
printf '%s\n' "$hits" >&2
die "hard-pattern secret match in rendered site — refusing to publish."
fi
}
# ---------- commands ----------
cmd_check() {
need_builder
"$PY" "$BUILDER" --check --repo "$REPO_ROOT"
}
cmd_build() {
need_builder
"$PY" "$BUILDER" --build --repo "$REPO_ROOT"
}
cmd_preview() {
need_builder
local dir="${1:-/tmp/scarf-catalog-preview}"
rm -rf "$dir"
mkdir -p "$dir"
"$PY" "$BUILDER" --preview "$dir" --repo "$REPO_ROOT"
log "Preview rendered to $dir"
log "Serve with: (cd $dir && python3 -m http.server 8000) then open http://localhost:8000/"
}
cmd_serve() {
need_ghpages
local port="${1:-8000}"
log "Serving $GHPAGES_DIR on http://localhost:$port/"
(cd "$GHPAGES_DIR" && "$PY" -m http.server "$port")
}
cmd_publish() {
need_builder
need_ghpages
log "Validating"
"$PY" "$BUILDER" --check --repo "$REPO_ROOT"
log "Building"
"$PY" "$BUILDER" --build --repo "$REPO_ROOT"
log "Secret-scanning rendered site"
scan_hard_ghpages
log "Staging + committing gh-pages"
(cd "$GHPAGES_DIR" && git add "$CATALOG_SUBDIR")
if (cd "$GHPAGES_DIR" && git diff --cached --quiet); then
log "No changes to publish."
return 0
fi
local msg
msg="catalog: rebuild at $(date -u +%Y-%m-%dT%H:%M:%SZ)"
(cd "$GHPAGES_DIR" && git commit -m "$msg")
log "Pushing gh-pages"
(cd "$GHPAGES_DIR" && git push origin gh-pages)
log "Published."
}
cmd_help() {
sed -n '1,30p' "$0" | sed -n '/^# Usage/,/^#$/p'
}
# ---------- dispatch ----------
sub="${1:-help}"
shift || true
case "$sub" in
check) cmd_check "$@" ;;
build) cmd_build "$@" ;;
preview) cmd_preview "$@" ;;
serve) cmd_serve "$@" ;;
publish) cmd_publish "$@" ;;
help|--help|-h) cmd_help ;;
*) die "unknown command: $sub (try --help)" ;;
esac
+34
View File
@@ -0,0 +1,34 @@
{
"generated": true,
"schemaVersion": 1,
"templates": [
{
"author": {
"name": "Alan Wizemann",
"url": "https://github.com/awizemann/scarf"
},
"bundleSha256": "32b8c12706de8596be63dcdda32d46fc5bf478d5b9f7c1fc4c6d96ced251186a",
"bundleSize": 5410,
"category": "monitoring",
"contents": {
"agentsMd": true,
"cron": 1,
"dashboard": true
},
"description": "A daily uptime check for a short list of URLs. Writes status to status-log.md and updates the dashboard with current counts.",
"detailSlug": "awizemann-site-status-checker",
"id": "awizemann/site-status-checker",
"installUrl": "https://raw.githubusercontent.com/awizemann/scarf/main/templates/awizemann/site-status-checker/site-status-checker.scarftemplate",
"minHermesVersion": "0.9.0",
"minScarfVersion": "2.2.0",
"name": "Site Status Checker",
"tags": [
"monitoring",
"uptime",
"cron",
"starter"
],
"version": "1.0.0"
}
]
}
+646
View File
@@ -0,0 +1,646 @@
#!/usr/bin/env python3
"""Scarf template catalog builder + validator.
Walks every `templates/<author>/<name>/` in this repo, validates the
`.scarftemplate` bundle against its manifest claim (same invariants the
Swift `ProjectTemplateService.verifyClaims` enforces at install time), and
produces:
templates/catalog.json aggregate index for the site
.gh-pages-worktree/templates/... per-template HTML + dashboard.json
(only produced by --build / --publish)
This is stdlib-only Python so it runs in a GitHub Action with zero
dependencies and in under a second even when the catalog has thousands of
templates. Schema drift between this validator and the Swift installer
breaks one of two contracts — add a failing test in both places when you
change anything here.
Usage:
tools/build-catalog.py --check validate; no output written
tools/build-catalog.py --build validate + write catalog.json + site
tools/build-catalog.py --preview DIR render a self-contained preview
site into DIR (for local viewing)
Exit codes:
0 success
1 validation failure (one or more templates rejected)
2 IO / usage error
"""
from __future__ import annotations
import argparse
import hashlib
import json
import os
import re
import shutil
import sys
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
# ---------------------------------------------------------------------------
# Schema + invariants
# ---------------------------------------------------------------------------
SCHEMA_VERSION = 1
MAX_BUNDLE_BYTES = 5 * 1024 * 1024 # 5 MB cap on submissions; installer is 50 MB
REQUIRED_BUNDLE_FILES = ("template.json", "README.md", "AGENTS.md", "dashboard.json")
SUPPORTED_WIDGET_TYPES = {"stat", "progress", "text", "table", "chart", "list", "webview"}
# Common secret patterns — keep in sync with `scripts/wiki.sh` and reuse a
# conservative subset. The validator rejects hard matches; the site's
# CONTRIBUTING guide covers the rest.
SECRET_PATTERNS = [
(re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----"), "private key block"),
(re.compile(r"(?i)\bgh[pousr]_[A-Za-z0-9]{36,}"), "github personal access token"),
(re.compile(r"(?i)\bxox[abpso]-[A-Za-z0-9-]{10,}"), "slack token"),
(re.compile(r"(?i)\bAKIA[0-9A-Z]{16}"), "aws access key id"),
(re.compile(r"(?i)\bsk-[A-Za-z0-9]{32,}"), "openai/anthropic api key"),
]
REPO_ROOT = Path(__file__).resolve().parent.parent
# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------
@dataclass
class ValidationError:
template_path: Path
message: str
def __str__(self) -> str:
# Render a repo-relative path when possible for concise CLI output;
# fall back to the absolute path when the template lives outside
# the repo tree (unit tests use temp dirs).
try:
rel: Path | str = self.template_path.relative_to(REPO_ROOT)
except ValueError:
rel = self.template_path
return f"{rel}: {self.message}"
@dataclass
class TemplateRecord:
"""One entry in the generated catalog.json. Mirrors the Swift
ProjectTemplateManifest but with a few derived fields added."""
path: Path
manifest: dict
bundle_path: Path
bundle_sha256: str
bundle_size: int
install_url: str
detail_slug: str
def to_catalog_entry(self) -> dict:
"""Subset suitable for catalog.json. Keep fields stable — the
site's widgets.js reads this shape."""
m = self.manifest
return {
"id": m["id"],
"name": m["name"],
"version": m["version"],
"description": m["description"],
"author": m.get("author"),
"category": m.get("category"),
"tags": m.get("tags") or [],
"contents": m["contents"],
"installUrl": self.install_url,
"detailSlug": self.detail_slug,
"bundleSha256": self.bundle_sha256,
"bundleSize": self.bundle_size,
"minScarfVersion": m.get("minScarfVersion"),
"minHermesVersion": m.get("minHermesVersion"),
}
# ---------------------------------------------------------------------------
# Validation
# ---------------------------------------------------------------------------
def manifest_slug(manifest_id: str) -> str:
"""Mirror of Swift `ProjectTemplateManifest.slug`. Non-alphanumeric
runs collapse to single hyphens; empty collapses to 'template'."""
cleaned = re.sub(r"[^A-Za-z0-9_-]+", "-", manifest_id).strip("-")
return cleaned or "template"
def _iter_templates(repo_root: Path) -> Iterable[Path]:
"""Yield every `templates/<author>/<name>/` directory (those that hold
a `template.json` or a built `.scarftemplate`). Authors whose dirs
only hold a README are silently skipped."""
root = repo_root / "templates"
if not root.is_dir():
return
for author_dir in sorted(root.iterdir()):
if not author_dir.is_dir() or author_dir.name.startswith("."):
continue
for template_dir in sorted(author_dir.iterdir()):
if not template_dir.is_dir():
continue
if (template_dir / "staging").is_dir():
yield template_dir
def _validate_manifest(manifest: dict, template_dir: Path, errors: list[ValidationError]) -> None:
required = ["schemaVersion", "id", "name", "version", "description", "contents"]
for field in required:
if field not in manifest:
errors.append(ValidationError(template_dir, f"manifest missing required field: {field}"))
if manifest.get("schemaVersion") != SCHEMA_VERSION:
errors.append(ValidationError(template_dir, f"unsupported schemaVersion: {manifest.get('schemaVersion')}"))
# Manifest id must match the directory layout.
mid = manifest.get("id", "")
if "/" not in mid:
errors.append(ValidationError(template_dir, f"manifest id must be owner/name, got {mid!r}"))
else:
expected_author = template_dir.parent.name
author_part, _, _ = mid.partition("/")
if author_part != expected_author:
errors.append(ValidationError(
template_dir,
f"manifest id {mid!r} author component does not match directory "
f"({expected_author!r})"
))
def _validate_contents_claim(
manifest: dict,
bundle_files: set[str],
cron_job_count: int,
template_dir: Path,
errors: list[ValidationError],
) -> None:
"""Mirrors Swift `ProjectTemplateService.verifyClaims`. Rejects any
mismatch between what the manifest says and what's actually in the
bundle so the catalog site can't misrepresent a template."""
contents = manifest.get("contents", {})
for required in REQUIRED_BUNDLE_FILES:
if required not in bundle_files:
errors.append(ValidationError(template_dir, f"bundle missing required file: {required}"))
# Optional instructions/ dir — claim must match presence exactly.
claimed_instructions = contents.get("instructions") or []
claimed_full = {f"instructions/{p}" for p in claimed_instructions}
present_instructions = {f for f in bundle_files if f.startswith("instructions/")}
for claim in claimed_full:
if claim not in bundle_files:
errors.append(ValidationError(template_dir, f"contents.instructions claims {claim} but file is missing"))
for present in present_instructions - claimed_full:
errors.append(ValidationError(
template_dir,
f"bundle has {present} but it's not listed in contents.instructions"
))
# Skills — each claimed skill name must exist as a subdir with at least
# one file; extra skill dirs not listed are rejected.
claimed_skills = set(contents.get("skills") or [])
present_skills = set()
for f in bundle_files:
if f.startswith("skills/"):
rest = f[len("skills/"):]
if "/" in rest:
present_skills.add(rest.split("/", 1)[0])
for skill in claimed_skills:
if not any(f.startswith(f"skills/{skill}/") for f in bundle_files):
errors.append(ValidationError(template_dir, f"contents.skills claims {skill!r} but skills/{skill}/ is empty"))
for extra in present_skills - claimed_skills:
errors.append(ValidationError(template_dir, f"bundle has skills/{extra}/ not listed in contents.skills"))
# Cron — numeric count must match bundle.
claimed_cron = int(contents.get("cron") or 0)
if claimed_cron != cron_job_count:
errors.append(ValidationError(
template_dir,
f"contents.cron={claimed_cron} but bundle contains {cron_job_count} cron jobs"
))
# Memory appendix — claim must match file presence.
claimed_memory = bool((contents.get("memory") or {}).get("append"))
has_memory_file = "memory/append.md" in bundle_files
if claimed_memory != has_memory_file:
errors.append(ValidationError(
template_dir,
f"contents.memory.append={claimed_memory} disagrees with memory/append.md presence={has_memory_file}"
))
def _validate_dashboard(zf: zipfile.ZipFile, template_dir: Path, errors: list[ValidationError]) -> None:
"""Decode dashboard.json against the widget-type vocabulary the Swift
renderer knows. An unknown widget type means the app will render an
'unknown widget' placeholder — that's a bad catalog experience."""
try:
dashboard = json.loads(zf.read("dashboard.json"))
except Exception as e:
errors.append(ValidationError(template_dir, f"dashboard.json failed to parse: {e}"))
return
if dashboard.get("version") != 1:
errors.append(ValidationError(template_dir, f"dashboard.version must be 1, got {dashboard.get('version')}"))
sections = dashboard.get("sections") or []
if not isinstance(sections, list):
errors.append(ValidationError(template_dir, "dashboard.sections must be a list"))
return
for section in sections:
for widget in section.get("widgets") or []:
widget_type = widget.get("type")
if widget_type not in SUPPORTED_WIDGET_TYPES:
errors.append(ValidationError(
template_dir,
f"dashboard widget {widget.get('title')!r} has unknown type {widget_type!r}"
))
def _scan_for_secrets(zf: zipfile.ZipFile, template_dir: Path, errors: list[ValidationError]) -> None:
"""Refuse bundles containing obvious secret patterns. Conservative —
matches only high-confidence substrings (no keyword-only warnings)."""
for info in zf.infolist():
if info.is_dir() or info.file_size > 256 * 1024:
continue # skip big binaries
try:
data = zf.read(info.filename).decode("utf-8", errors="replace")
except Exception:
continue
for pattern, label in SECRET_PATTERNS:
if pattern.search(data):
errors.append(ValidationError(
template_dir,
f"bundle file {info.filename} matches {label} pattern — refusing"
))
break
def _parse_cron_jobs(zf: zipfile.ZipFile, template_dir: Path, errors: list[ValidationError]) -> int:
"""Parse cron/jobs.json if present; return the job count. Logs a
validation error on a malformed file."""
if "cron/jobs.json" not in set(zf.namelist()):
return 0
try:
data = json.loads(zf.read("cron/jobs.json"))
except Exception as e:
errors.append(ValidationError(template_dir, f"cron/jobs.json failed to parse: {e}"))
return 0
if not isinstance(data, list):
errors.append(ValidationError(template_dir, "cron/jobs.json must be a JSON array"))
return 0
for i, job in enumerate(data):
if not isinstance(job, dict):
errors.append(ValidationError(template_dir, f"cron/jobs.json[{i}] must be an object"))
continue
if "name" not in job or "schedule" not in job:
errors.append(ValidationError(
template_dir,
f"cron/jobs.json[{i}] missing required field (name, schedule)"
))
return len(data)
def _bundle_files(zf: zipfile.ZipFile) -> set[str]:
"""Unique regular-file paths in the bundle, excluding dir entries and
macOS __MACOSX/ metadata."""
return {
info.filename
for info in zf.infolist()
if not info.is_dir() and not info.filename.startswith("__MACOSX/")
}
def validate_template(template_dir: Path) -> tuple[TemplateRecord | None, list[ValidationError]]:
"""Validate one template dir and return a (record, errors) pair.
record is None when errors are fatal enough that we can't build a
catalog entry at all."""
errors: list[ValidationError] = []
# Find the bundle. By convention it's `<dir>/<dir-basename>.scarftemplate`
# or any single .scarftemplate in the dir.
bundles = sorted(template_dir.glob("*.scarftemplate"))
if not bundles:
errors.append(ValidationError(template_dir, "no .scarftemplate found in template directory"))
return None, errors
if len(bundles) > 1:
errors.append(ValidationError(
template_dir,
f"more than one .scarftemplate present: {[b.name for b in bundles]}"
))
bundle_path = bundles[0]
bundle_size = bundle_path.stat().st_size
if bundle_size > MAX_BUNDLE_BYTES:
errors.append(ValidationError(
template_dir,
f"bundle size {bundle_size} exceeds catalog cap of {MAX_BUNDLE_BYTES} bytes"
))
try:
with zipfile.ZipFile(bundle_path, "r") as zf:
bundle_files = _bundle_files(zf)
if "template.json" not in bundle_files:
errors.append(ValidationError(template_dir, "bundle is missing template.json"))
return None, errors
try:
manifest = json.loads(zf.read("template.json"))
except Exception as e:
errors.append(ValidationError(template_dir, f"template.json failed to parse: {e}"))
return None, errors
_validate_manifest(manifest, template_dir, errors)
cron_count = _parse_cron_jobs(zf, template_dir, errors)
_validate_contents_claim(manifest, bundle_files, cron_count, template_dir, errors)
_validate_dashboard(zf, template_dir, errors)
_scan_for_secrets(zf, template_dir, errors)
except zipfile.BadZipFile:
errors.append(ValidationError(template_dir, "bundle is not a valid zip archive"))
return None, errors
# Compute the catalog-ready record.
sha = hashlib.sha256(bundle_path.read_bytes()).hexdigest()
author = template_dir.parent.name
short_name = template_dir.name
install_url = (
"https://raw.githubusercontent.com/awizemann/scarf/main/"
f"templates/{author}/{short_name}/{bundle_path.name}"
)
detail_slug = manifest_slug(manifest.get("id", f"{author}/{short_name}"))
record = TemplateRecord(
path=template_dir,
manifest=manifest,
bundle_path=bundle_path,
bundle_sha256=sha,
bundle_size=bundle_size,
install_url=install_url,
detail_slug=detail_slug,
)
return record, errors
# ---------------------------------------------------------------------------
# Staging/bundle drift check — keeps authors honest
# ---------------------------------------------------------------------------
def _check_staging_matches_bundle(record: TemplateRecord) -> list[ValidationError]:
"""If the template dir has a staging/ source tree, rebuild the bundle
in memory and diff against the committed one. Catches the common
failure mode of an author editing staging/ but forgetting to
regenerate the .scarftemplate."""
errors: list[ValidationError] = []
staging = record.path / "staging"
if not staging.is_dir():
return errors
committed = {}
with zipfile.ZipFile(record.bundle_path, "r") as zf:
for info in zf.infolist():
if info.is_dir() or info.filename.startswith("__MACOSX/"):
continue
committed[info.filename] = zf.read(info.filename)
source = {}
for path in staging.rglob("*"):
if not path.is_file():
continue
rel = path.relative_to(staging).as_posix()
if rel.startswith(".") or "/.DS_Store" in rel or rel.endswith("/.DS_Store") or rel == ".DS_Store":
continue
source[rel] = path.read_bytes()
missing_in_bundle = sorted(set(source) - set(committed))
if missing_in_bundle:
errors.append(ValidationError(
record.path,
f"staging has files not in the built bundle: {missing_in_bundle} "
"(rebuild with `zip -qq -r <name>.scarftemplate .` from staging/)"
))
missing_in_source = sorted(set(committed) - set(source))
if missing_in_source:
errors.append(ValidationError(
record.path,
f"bundle has files not in staging/: {missing_in_source} "
"(commit them to staging/ or rebuild the bundle from staging/)"
))
diff = [name for name, data in source.items() if name in committed and committed[name] != data]
if diff:
errors.append(ValidationError(
record.path,
f"staging content differs from built bundle: {diff} "
"(rebuild the bundle from staging/)"
))
return errors
# ---------------------------------------------------------------------------
# Build: write catalog.json (site rendering comes in a later commit)
# ---------------------------------------------------------------------------
def write_catalog_json(records: list[TemplateRecord], out_path: Path) -> None:
catalog = {
"schemaVersion": SCHEMA_VERSION,
"generated": True, # human reminder; a timestamp would churn the diff every run
"templates": [r.to_catalog_entry() for r in records],
}
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(catalog, indent=2, sort_keys=True) + "\n", encoding="utf-8")
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--check", action="store_true", help="validate every template; don't write output")
group.add_argument("--build", action="store_true", help="validate + write catalog.json")
group.add_argument("--preview", metavar="DIR", help="render a self-contained site preview into DIR")
parser.add_argument("--only", metavar="PATH", action="append", default=[],
help="validate only the given template dir (may repeat); useful for PR-diff runs")
parser.add_argument("--repo", metavar="PATH", default=str(REPO_ROOT),
help="repo root to operate on (default: auto-detect)")
args = parser.parse_args(argv)
repo_root = Path(args.repo).resolve()
template_dirs = list(_iter_templates(repo_root))
if args.only:
only = {Path(p).resolve() for p in args.only}
template_dirs = [t for t in template_dirs if t.resolve() in only]
if not template_dirs:
if args.only:
print(f"no templates matched --only filter", file=sys.stderr)
return 2
print("no templates found under templates/ — nothing to do", file=sys.stderr)
return 0
records: list[TemplateRecord] = []
all_errors: list[ValidationError] = []
for tdir in template_dirs:
record, errors = validate_template(tdir)
all_errors.extend(errors)
if record is not None:
all_errors.extend(_check_staging_matches_bundle(record))
records.append(record)
if all_errors:
print(f"{len(all_errors)} validation error(s):", file=sys.stderr)
for err in all_errors:
print(f" {err}", file=sys.stderr)
return 1
print(f"{len(records)} template(s) validated", file=sys.stderr)
for r in records:
rel = r.path.relative_to(repo_root)
print(f" {rel}{r.manifest['id']} v{r.manifest['version']}")
if args.check:
return 0
catalog_path = repo_root / "templates" / "catalog.json"
write_catalog_json(records, catalog_path)
print(f"wrote {catalog_path.relative_to(repo_root)}", file=sys.stderr)
if args.preview:
preview_dir = Path(args.preview).resolve()
render_site(records, preview_dir, repo_root)
print(f"preview site rendered to {preview_dir}", file=sys.stderr)
if args.build:
# --build renders into .gh-pages-worktree/templates/ so the
# maintainer's publish step just has to commit + push gh-pages.
gh_pages = repo_root / ".gh-pages-worktree" / "templates"
render_site(records, gh_pages, repo_root)
print(f"site rendered to {gh_pages.relative_to(repo_root)}", file=sys.stderr)
return 0
def render_site(records: list[TemplateRecord], out_dir: Path, repo_root: Path) -> None:
"""Render the catalog site. Defined here as a stub so --build and
--preview both have a landing spot; the real HTML templates ship in
the next commit (Phase 3)."""
site_src = repo_root / "site"
if not site_src.is_dir():
# Phase 2: no site/ yet. Write just catalog.json into out_dir so
# the preview mode is still demonstrable (and --build stays
# idempotent).
out_dir.mkdir(parents=True, exist_ok=True)
write_catalog_json(records, out_dir / "catalog.json")
return
out_dir.mkdir(parents=True, exist_ok=True)
index_tmpl = (site_src / "index.html.tmpl").read_text(encoding="utf-8")
template_tmpl = (site_src / "template.html.tmpl").read_text(encoding="utf-8")
# Copy static site assets (widgets.js, styles.css, assets/).
for name in ("widgets.js", "styles.css"):
src = site_src / name
if src.exists():
shutil.copy2(src, out_dir / name)
assets_src = site_src / "assets"
if assets_src.is_dir():
assets_dst = out_dir / "assets"
if assets_dst.exists():
shutil.rmtree(assets_dst)
shutil.copytree(assets_src, assets_dst)
# Catalog index
(out_dir / "index.html").write_text(
render_index(index_tmpl, records),
encoding="utf-8",
)
# Per-template detail pages + dashboard.json copies
for r in records:
detail_dir = out_dir / r.detail_slug
detail_dir.mkdir(parents=True, exist_ok=True)
(detail_dir / "index.html").write_text(
render_detail(template_tmpl, r),
encoding="utf-8",
)
# Copy the unpacked dashboard.json so widgets.js can fetch it
# without cross-directory relative paths.
with zipfile.ZipFile(r.bundle_path, "r") as zf:
(detail_dir / "dashboard.json").write_bytes(zf.read("dashboard.json"))
if "README.md" in zf.namelist():
(detail_dir / "README.md").write_bytes(zf.read("README.md"))
# The aggregate catalog.json is copied in so the frontend can fetch
# /templates/catalog.json without reaching back into the repo.
write_catalog_json(records, out_dir / "catalog.json")
def render_index(tmpl: str, records: list[TemplateRecord]) -> str:
"""Very light string substitution — the site's JS does most of the
rendering from catalog.json at page load."""
cards = []
for r in records:
m = r.manifest
author = (m.get("author") or {}).get("name", "")
tags_html = "".join(f'<span class="tag">{t}</span>' for t in (m.get("tags") or []))
cards.append(
'<a class="card" href="{slug}/">'
'<h3>{name}</h3>'
'<p class="desc">{desc}</p>'
'<div class="meta"><span class="author">{author}</span>'
'<span class="version">v{version}</span></div>'
'<div class="tags">{tags}</div>'
'</a>'.format(
slug=_html_escape(r.detail_slug),
name=_html_escape(m["name"]),
desc=_html_escape(m["description"]),
author=_html_escape(author),
version=_html_escape(m["version"]),
tags=tags_html,
)
)
return tmpl.replace("{{CARDS}}", "\n".join(cards)).replace("{{COUNT}}", str(len(records)))
def render_detail(tmpl: str, record: TemplateRecord) -> str:
m = record.manifest
author = m.get("author") or {}
author_html = _html_escape(author.get("name", ""))
author_url = author.get("url") or ""
if author_url:
author_html = f'<a href="{_html_escape(author_url)}">{author_html}</a>'
tags_html = "".join(f'<span class="tag">{_html_escape(t)}</span>' for t in (m.get("tags") or []))
install_url = record.install_url
tokens = {
"ID": m["id"],
"NAME": m["name"],
"VERSION": m["version"],
"DESC": m["description"],
"AUTHOR_HTML": author_html,
"CATEGORY": m.get("category") or "",
"TAGS_HTML": tags_html,
"INSTALL_URL_ENCODED": install_url,
"SCARF_INSTALL_URL": f"scarf://install?url={install_url}",
}
out = tmpl
for k, v in tokens.items():
out = out.replace("{{" + k + "}}", _html_escape(v) if k != "TAGS_HTML" and k != "AUTHOR_HTML" else v)
return out
def _html_escape(s: str) -> str:
return (
s.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace('"', "&quot;")
.replace("'", "&#39;")
)
if __name__ == "__main__":
sys.exit(main())
+385
View File
@@ -0,0 +1,385 @@
"""Unit tests for tools/build-catalog.py.
Run with: python3 -m unittest tools.test_build_catalog
Or just: python3 tools/test_build_catalog.py
Covers the validator's invariants against synthetic template directories
created under a temp dir — no network, no global state, no dependency on
the repo's actual templates/. A separate test at the bottom exercises the
real shipped `templates/awizemann/site-status-checker` bundle to catch
drift between validator + installer.
"""
from __future__ import annotations
import importlib.util
import io
import json
import os
import shutil
import sys
import tempfile
import unittest
import zipfile
from pathlib import Path
# Import tools/build-catalog.py via spec-loader (the dash in the filename
# would otherwise make a plain `import` ugly). Register the module in
# sys.modules BEFORE exec — Python 3.9's dataclass inspection reads
# `sys.modules[cls.__module__].__dict__` and blows up if the module isn't
# there yet (fixed in 3.10+, still matters on system-Python Macs).
_SPEC_PATH = Path(__file__).resolve().parent / "build-catalog.py"
_spec = importlib.util.spec_from_file_location("build_catalog", _SPEC_PATH)
build_catalog = importlib.util.module_from_spec(_spec)
sys.modules["build_catalog"] = build_catalog
_spec.loader.exec_module(build_catalog)
# ---------------------------------------------------------------------------
# Fixture builders
# ---------------------------------------------------------------------------
MINIMAL_DASHBOARD = {
"version": 1,
"title": "Test",
"description": "test",
"sections": [
{
"title": "Current Status",
"columns": 3,
"widgets": [
{"type": "stat", "title": "Sites Up", "value": 0},
],
},
],
}
def make_fake_repo(tmp_root: Path) -> Path:
"""Create a repo layout: <tmp>/templates/ and (optionally) fake
site/ dirs on demand. Returns the repo root."""
(tmp_root / "templates").mkdir(parents=True)
return tmp_root
def make_template_dir(
repo: Path,
author: str,
name: str,
manifest: dict | None = None,
bundle_files: dict[str, bytes] | None = None,
include_staging: bool = True,
bundle_name: str | None = None,
) -> Path:
"""Create a template dir under <repo>/templates/<author>/<name>/
with a built bundle and (optionally) a staging dir whose contents
match the bundle byte-for-byte. Returns the template dir."""
template_dir = repo / "templates" / author / name
(template_dir / "staging").mkdir(parents=True, exist_ok=True)
manifest = manifest or {
"schemaVersion": 1,
"id": f"{author}/{name}",
"name": name.replace("-", " ").title(),
"version": "1.0.0",
"description": "test description",
"contents": {
"dashboard": True,
"agentsMd": True,
},
}
files = bundle_files or {
"template.json": json.dumps(manifest).encode("utf-8"),
"README.md": b"# readme\n",
"AGENTS.md": b"# agents\n",
"dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
}
# Write staging/ source tree so the drift check passes by default.
if include_staging:
for path, data in files.items():
full = template_dir / "staging" / path
full.parent.mkdir(parents=True, exist_ok=True)
full.write_bytes(data)
# Write the zipped bundle.
bundle_name = bundle_name or f"{name}.scarftemplate"
with zipfile.ZipFile(template_dir / bundle_name, "w", zipfile.ZIP_DEFLATED) as zf:
for path, data in files.items():
zf.writestr(path, data)
return template_dir
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class ManifestSlugTests(unittest.TestCase):
"""Mirrors the Swift test of the same name so the two
implementations stay in sync."""
def test_sanitizes_punctuation(self):
self.assertEqual(build_catalog.manifest_slug("alan@w/focus dashboard!"), "alan-w-focus-dashboard")
def test_falls_back_to_placeholder(self):
self.assertEqual(build_catalog.manifest_slug("////"), "template")
def test_preserves_letters_numbers_dash_underscore(self):
self.assertEqual(build_catalog.manifest_slug("user_1/name-2"), "user_1-name-2")
class ValidationTests(unittest.TestCase):
def setUp(self):
self._dir = tempfile.TemporaryDirectory()
self.repo = make_fake_repo(Path(self._dir.name))
self.addCleanup(self._dir.cleanup)
def test_accepts_minimal_valid_template(self):
make_template_dir(self.repo, "tester", "minimal")
records, errors = self._validate_all()
self.assertEqual(errors, [])
self.assertEqual(len(records), 1)
self.assertEqual(records[0].manifest["id"], "tester/minimal")
def test_rejects_missing_agents_md(self):
# Build a bundle that lacks AGENTS.md.
manifest = {
"schemaVersion": 1,
"id": "tester/bad",
"name": "Bad",
"version": "1.0.0",
"description": "missing AGENTS.md",
"contents": {"dashboard": True, "agentsMd": True},
}
make_template_dir(
self.repo, "tester", "bad",
manifest=manifest,
bundle_files={
"template.json": json.dumps(manifest).encode("utf-8"),
"README.md": b"# readme",
"dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
},
)
_, errors = self._validate_all()
self.assertTrue(any("AGENTS.md" in str(e) for e in errors), errors)
def test_rejects_content_claim_mismatch(self):
# Manifest claims cron: 2, bundle ships zero cron jobs.
manifest = {
"schemaVersion": 1,
"id": "tester/claims",
"name": "Claims",
"version": "1.0.0",
"description": "claim mismatch",
"contents": {"dashboard": True, "agentsMd": True, "cron": 2},
}
make_template_dir(
self.repo, "tester", "claims",
manifest=manifest,
bundle_files={
"template.json": json.dumps(manifest).encode("utf-8"),
"README.md": b"# readme",
"AGENTS.md": b"# agents",
"dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
},
)
_, errors = self._validate_all()
self.assertTrue(any("contents.cron=2" in str(e) for e in errors), errors)
def test_rejects_manifest_author_mismatch(self):
# Template lives under /tester/ but manifest id says /other/.
manifest = {
"schemaVersion": 1,
"id": "other/name",
"name": "Mismatch",
"version": "1.0.0",
"description": "author mismatch",
"contents": {"dashboard": True, "agentsMd": True},
}
make_template_dir(
self.repo, "tester", "name",
manifest=manifest,
bundle_files={
"template.json": json.dumps(manifest).encode("utf-8"),
"README.md": b"# readme",
"AGENTS.md": b"# agents",
"dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
},
)
_, errors = self._validate_all()
self.assertTrue(any("author component" in str(e) for e in errors), errors)
def test_rejects_oversized_bundle(self):
# Synthetic bundle > 5MB cap.
template_dir = self.repo / "templates" / "tester" / "huge"
(template_dir / "staging").mkdir(parents=True)
manifest = {
"schemaVersion": 1,
"id": "tester/huge",
"name": "Huge",
"version": "1.0.0",
"description": "oversized",
"contents": {"dashboard": True, "agentsMd": True},
}
payload = b"x" * (6 * 1024 * 1024)
files = {
"template.json": json.dumps(manifest).encode("utf-8"),
"README.md": b"# readme",
"AGENTS.md": b"# agents",
"dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
"ballast.bin": payload,
}
with zipfile.ZipFile(template_dir / "huge.scarftemplate", "w", zipfile.ZIP_STORED) as zf:
for p, data in files.items():
zf.writestr(p, data)
_, errors = self._validate_all()
self.assertTrue(any("exceeds catalog cap" in str(e) for e in errors), errors)
def test_rejects_unknown_widget_type(self):
bad_dashboard = {
"version": 1,
"title": "Bad",
"sections": [{"title": "x", "columns": 1, "widgets": [{"type": "hologram", "title": "huh"}]}],
}
manifest = {
"schemaVersion": 1,
"id": "tester/weird",
"name": "Weird",
"version": "1.0.0",
"description": "unknown widget",
"contents": {"dashboard": True, "agentsMd": True},
}
make_template_dir(
self.repo, "tester", "weird",
manifest=manifest,
bundle_files={
"template.json": json.dumps(manifest).encode("utf-8"),
"README.md": b"# readme",
"AGENTS.md": b"# agents",
"dashboard.json": json.dumps(bad_dashboard).encode("utf-8"),
},
)
_, errors = self._validate_all()
self.assertTrue(any("unknown type" in str(e) for e in errors), errors)
def test_rejects_secret_in_bundle(self):
leaky = b"config:\n github_token: ghp_" + b"A" * 40 + b"\n"
manifest = {
"schemaVersion": 1,
"id": "tester/leaky",
"name": "Leaky",
"version": "1.0.0",
"description": "has a secret",
"contents": {"dashboard": True, "agentsMd": True},
}
make_template_dir(
self.repo, "tester", "leaky",
manifest=manifest,
bundle_files={
"template.json": json.dumps(manifest).encode("utf-8"),
"README.md": leaky,
"AGENTS.md": b"# agents",
"dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
},
)
_, errors = self._validate_all()
self.assertTrue(any("github" in str(e).lower() for e in errors), errors)
def test_detects_staging_vs_bundle_drift(self):
# Bundle ships an old README; staging/ has an edited one — should fail.
manifest = {
"schemaVersion": 1,
"id": "tester/drift",
"name": "Drift",
"version": "1.0.0",
"description": "staging ahead of bundle",
"contents": {"dashboard": True, "agentsMd": True},
}
template_dir = make_template_dir(
self.repo, "tester", "drift",
manifest=manifest,
bundle_files={
"template.json": json.dumps(manifest).encode("utf-8"),
"README.md": b"# old",
"AGENTS.md": b"# agents",
"dashboard.json": json.dumps(MINIMAL_DASHBOARD).encode("utf-8"),
},
)
# Edit staging/ AFTER building the bundle.
(template_dir / "staging" / "README.md").write_bytes(b"# new")
_, errors = self._validate_all()
self.assertTrue(any("differs from built bundle" in str(e) for e in errors), errors)
def test_rejects_missing_bundle(self):
template_dir = self.repo / "templates" / "tester" / "bare"
(template_dir / "staging").mkdir(parents=True)
# No .scarftemplate in the dir.
_, errors = self._validate_all()
self.assertTrue(any("no .scarftemplate found" in str(e) for e in errors), errors)
# --- helpers --------------------------------------------------------
def _validate_all(self) -> tuple[list, list]:
records = []
errors = []
for tdir in build_catalog._iter_templates(self.repo):
record, errs = build_catalog.validate_template(tdir)
errors.extend(errs)
if record is not None:
errors.extend(build_catalog._check_staging_matches_bundle(record))
records.append(record)
return records, errors
class CatalogJsonTests(unittest.TestCase):
"""Shape of the emitted catalog.json must stay stable — the site's
widgets.js reads these fields by name."""
def test_catalog_json_shape(self):
with tempfile.TemporaryDirectory() as tmp:
repo = make_fake_repo(Path(tmp))
make_template_dir(repo, "tester", "shape")
records = []
for tdir in build_catalog._iter_templates(repo):
record, errors = build_catalog.validate_template(tdir)
self.assertEqual(errors, [])
records.append(record)
out = Path(tmp) / "catalog.json"
build_catalog.write_catalog_json(records, out)
data = json.loads(out.read_text())
self.assertEqual(data["schemaVersion"], 1)
self.assertEqual(len(data["templates"]), 1)
entry = data["templates"][0]
for required in ["id", "name", "version", "description", "contents",
"installUrl", "detailSlug", "bundleSha256", "bundleSize"]:
self.assertIn(required, entry)
self.assertTrue(entry["installUrl"].startswith("https://raw.githubusercontent.com/"))
self.assertEqual(entry["detailSlug"], "tester-shape")
class RealBundleTest(unittest.TestCase):
"""Run the validator against the actual shipped Site Status Checker
bundle. Catches drift between validator + real-world author
conventions. Skipped if run outside the repo tree."""
def test_site_status_checker_passes(self):
repo_root = Path(__file__).resolve().parent.parent
template = repo_root / "templates" / "awizemann" / "site-status-checker"
if not template.exists():
self.skipTest("site-status-checker not present (running outside repo?)")
record, errors = build_catalog.validate_template(template)
self.assertIsNotNone(record)
drift = build_catalog._check_staging_matches_bundle(record)
self.assertEqual(errors + drift, [], f"errors: {errors}, drift: {drift}")
self.assertEqual(record.manifest["id"], "awizemann/site-status-checker")
if __name__ == "__main__":
unittest.main()