From 52c802676fe640140323f208e1bb997df2747fc5 Mon Sep 17 00:00:00 2001 From: Alan Wizemann Date: Sat, 9 May 2026 17:31:51 +0200 Subject: [PATCH] feat(capabilities): add Hermes v0.13 capability flags + version bump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 22 new capability flags grouped under a v0.13 (v2026.5.7) MARK section in HermesCapabilities, covering Persistent Goals, ACP /queue + /steer-on-idle, Kanban diagnostics + recovery UX, Curator archive + prune, Google Chat (20th platform), cross-platform allowlists, MCP SSE transport, Cron --no-agent, Web Tools backend split, Profiles --no-skills, context compression count, /new , OpenRouter cache, image_gen.model, display.language, xAI voice cloning, video_analyze, and the transform_llm_output plugin hook. Each flag gates on >= 0.13.0 so v0.13 patch releases (0.13.4 etc.) still light up every flag. Existing v0.12 flags unchanged. Test suite extends with v0.13.0/2026.5.7 fixtures, a v0.13.4 patch-release case, explicit "v0.13 flags off on v0.12 host" coverage, and updates the future-version test to v0.14.0. CLAUDE.md target line bumps to v2026.5.7 (v0.13.0); a new v2026.5.7 section mirrors the v0.12 / v0.11 scaffolding describing the Scarf- relevant subset. The v0.12 + v0.11 historical sections remain intact since pre-v0.13 hosts still consume those flags. Foundation for the v2.8.0 Scarf release — every subsequent work-stream (WS-2 through WS-9) consumes flags added here. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 24 +++- .../Services/HermesCapabilities.swift | 123 +++++++++++++++++- .../HermesCapabilitiesTests.swift | 73 ++++++++++- 3 files changed, 210 insertions(+), 10 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 99510c7..d123223 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -113,9 +113,29 @@ Public documentation lives in the GitHub wiki at https://github.com/awizemann/sc ## Hermes Version -Targets Hermes v2026.4.30 (v0.12.0). Log lines may carry an optional `[session_id]` tag between the level and logger name — `HermesLogService.parseLine` treats the session tag as an optional capture group, so older untagged lines still parse. +Targets Hermes v2026.5.7 (v0.13.0). Log lines may carry an optional `[session_id]` tag between the level and logger name — `HermesLogService.parseLine` treats the session tag as an optional capture group, so older untagged lines still parse. -**Capability gating.** Scarf detects the target's Hermes version once per server connection via [HermesCapabilities](scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesCapabilities.swift) (`hermes --version` → semver + `YYYY.M.D` parse). The resulting `HermesCapabilitiesStore` is injected on `ContextBoundRoot` (Mac) and `ScarfGoTabRoot` (iOS) via `.environment(_:)` and `.hermesCapabilities(_:)`; UI that depends on a v0.12+ surface (Curator, Kanban, ACP image input, `auxiliary.curator`, `prompt_caching.cache_ttl`, Piper TTS, Vercel terminal) reads it through the typed environment key. Pre-v0.12 hosts gracefully hide the new affordances rather than throwing on unknown CLI subcommands. Add a new flag at the top of `HermesCapabilities` whenever Scarf gains a release-gated UI surface. +**Capability gating.** Scarf detects the target's Hermes version once per server connection via [HermesCapabilities](scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesCapabilities.swift) (`hermes --version` → semver + `YYYY.M.D` parse). The resulting `HermesCapabilitiesStore` is injected on `ContextBoundRoot` (Mac) and `ScarfGoTabRoot` (iOS) via `.environment(_:)` and `.hermesCapabilities(_:)`; UI that depends on a release-gated surface reads it through the typed environment key. Pre-target hosts gracefully hide the new affordances rather than throwing on unknown CLI subcommands. Add a new flag at the top of `HermesCapabilities` whenever Scarf gains a release-gated UI surface — group flags by the Hermes release that introduced them (`MARK: v0.13 (v2026.5.7) flags`, etc.). + +**v2026.5.7 (v0.13.0)** added (Scarf-relevant subset; full v2.8.0 implementation lands across WS-2 through WS-9): + +- **Persistent Goals** — `/goal ` slash command locks the agent onto a target across turns. Checkpoints v2 single-store rewrite + auto-resume after gateway restart. Surfaced in Scarf chat as a non-interruptive command + a "🎯 Goal locked: " pill in the chat header. Gated on `HermesCapabilities.hasGoals`. +- **ACP `/queue` slash command** — queues a prompt to run after the current turn completes. Joins `/steer` in `RichChatViewModel.nonInterruptiveCommands` with a transient "Queued" toast. Gated on `hasACPQueue`. `/steer` now also runs as a regular prompt on idle sessions (`hasACPSteerOnIdle`). +- **Kanban v0.13 reliability + recovery UX** — hallucination gate on worker-created cards, generic diagnostics engine (per-task distress signals), per-task `max_retries` override, multiline title/body create, `auto_blocked_reason` rendered in the inspector banner, darwin zombie detection, unify failure counter across spawn/timeout/crash. New fields decode through tolerant `HermesKanbanRun` / `HermesKanbanTaskDetail` extensions; pre-v0.13 hosts ignore unknown keys. Gated on `hasKanbanDiagnostics`. +- **Curator archive + prune** — `hermes curator archive ` + `prune` + `list-archived` subcommands. The synchronous manual `hermes curator run` blocks until done (pre-v0.13 returned immediately). Surfaced as an "Archived" tab in CuratorView with per-row Restore + Prune actions and a destructive prune-confirm sheet. Gated on `hasCuratorArchive`. +- **Messaging Gateway expansion** — Google Chat (20th platform; `hasGoogleChatPlatform`), cross-platform allowlists (`allowed_channels` / `allowed_chats` / `allowed_rooms` per platform; `hasGatewayAllowlists`), per-platform `gateway_restart_notification` (`hasGatewayRestartNotification`), `busy_ack_enabled` toggle (`hasGatewayBusyAckToggle`), slash-command auto-delete TTL, `[[as_document]]` skill media routing directive, `hermes gateway list` cross-profile status verb (`hasGatewayList`). +- **Provider catalog refresh** — new models on Nous Portal + OpenRouter: `deepseek/deepseek-v4-pro`, `x-ai/grok-4.3`, `openrouter/owl-alpha` (free), `tencent/hy3-preview`, `arcee/trinity-large-thinking` (with temperature + compression overrides). `x-ai/grok-4.20-beta` renamed to `x-ai/grok-4.20` — keep alias map. Vercel AI Gateway demoted to bottom of the picker. `image_gen.model` from `config.yaml` now honored by Hermes (was advertised but ignored pre-v0.13); surfaced in `Settings → Auxiliary` (`hasImageGenModel`). OpenRouter response caching toggle (`hasOpenRouterResponseCache`). +- **MCP SSE transport** — MCP servers can be configured with SSE transport + `sse_read_timeout`. Surfaced in MCPServersView add-server flow alongside stdio/pipe. Gated on `hasMCPSSETransport`. +- **Cron `--no-agent` mode** — script-only watchdog jobs that skip the AI call. Surfaced in CronView edit sheet. Gated on `hasCronNoAgent`. +- **Web Tools per-capability backends** — `web_search` and `web_extract` can use distinct backends; SearXNG joined as a search-only backend. Surfaced in the Web Tools settings tab. Gated on `hasWebToolsBackendSplit`. +- **Profiles `--no-skills`** — `hermes profile create --no-skills` for empty-profile creation. Surfaced as a toggle in the create-profile flow. Gated on `hasProfileNoSkills`. +- **CLI / UX additions** — context compression count in the status feed (rendered next to the token count in chat status bar; `hasContextCompressionCount`), `/new ` slash-command argument (`hasNewWithSessionName`), `hermes update --yes` non-interactive (`hasUpdateNonInteractive`), `display.language` static-message translation (zh / ja / de / es / fr / uk / tr; `hasDisplayLanguage`), xAI Custom Voices (voice-cloning badge next to xAI TTS provider; `hasXAIVoiceCloning`). +- **Server-side defaults flipped** — secret redaction defaults back to ON in v0.13 (was off by default in v0.12). The Settings redaction toggle remains for opt-out; the default-state hint reflects the v0.13 semantics when the host advertises v0.13+. +- **`video_analyze` tool** — native video understanding on Gemini-class models. Hermes handles transparently inside the agent loop; Scarf has no UI surface yet but `hasVideoAnalyze` is reserved for future widget gating. +- **`transform_llm_output` plugin hook** — plugin-author concern; surfaced indirectly through PluginsView when a plugin advertises the hook. `hasTransformLLMOutputHook` gates the metadata badge. +- **Schema is unchanged from v0.11/v0.12** — same state.db columns. No migration needed. + +**v2026.4.30 (v0.12.0)** added (Scarf-relevant subset): **v2026.4.30 (v0.12.0)** added (Scarf-relevant subset): diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesCapabilities.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesCapabilities.swift index 303acb5..9c408fb 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesCapabilities.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesCapabilities.swift @@ -8,9 +8,13 @@ import os /// /// Scarf tracks Hermes feature releases by date-version + semver. v0.12 added /// a dozen surfaces (Curator, Kanban, multimodal ACP, ...) and removed a few -/// (`flush_memories` aux task). UI that branches on these surfaces calls -/// the boolean accessors here so older Hermes installs degrade silently -/// instead of throwing on an unknown CLI subcommand. +/// (`flush_memories` aux task); v0.13 added Persistent Goals, ACP `/queue`, +/// Kanban diagnostics + recovery UX, Curator archive/prune, Google Chat (20th +/// platform), cross-platform allowlists, MCP SSE transport, Cron `no_agent` +/// mode, Web Tools per-capability backends, Profiles `--no-skills`, and a +/// handful of UX additions. UI that branches on these surfaces calls the +/// boolean accessors here so older Hermes installs degrade silently instead +/// of throwing on an unknown CLI subcommand. /// /// Pure value type — no side effects. The async detection lives in /// `HermesCapabilitiesStore`. @@ -45,8 +49,11 @@ public struct HermesCapabilities: Sendable, Equatable { // MARK: - Capability flags // // Add a new flag here when Scarf gains UI that conditionally branches on - // a Hermes capability. Keep the comparison conservative: `>= 0.12.0` - // covers users still on the 0.12 line who haven't upgraded to 0.13 yet. + // a Hermes capability. Keep the comparison conservative: a flag introduced + // in v0.13.0 should gate on `>= 0.13.0`, not `>= 0.13.5`, so users on + // an early 0.13 patch still see the surface. + + // MARK: v0.12 (v2026.4.30) flags /// `hermes curator` autonomous skill maintenance (v0.12+). public var hasCurator: Bool { atLeastSemver(0, 12, 0) } @@ -96,9 +103,113 @@ public struct HermesCapabilities: Sendable, Equatable { public var hasPromptCacheTTL: Bool { atLeastSemver(0, 12, 0) } /// `redaction.enabled` is now off by default in v0.12 — Scarf surfaces - /// the toggle so users can flip it back on. + /// the toggle so users can flip it back on. v0.13 flips the server-side + /// default back to ON; the toggle remains so users on v0.13 can opt out. public var hasRedactionToggle: Bool { atLeastSemver(0, 12, 0) } + // MARK: v0.13 (v2026.5.7) flags + + /// `/goal` slash command + Persistent Goals + Checkpoints v2 single-store + /// (v0.13+). Used by RichChatViewModel to add `/goal` to the + /// non-interruptive command list and to render the "Goal locked" pill in + /// the chat header. + public var hasGoals: Bool { atLeastSemver(0, 13, 0) } + + /// `/queue` slash command in the ACP adapter (v0.13+). Queues a prompt + /// to run after the current turn completes without interrupting. + public var hasACPQueue: Bool { atLeastSemver(0, 13, 0) } + + /// `/steer` runs as a regular prompt on idle ACP sessions (v0.13+). Pre- + /// v0.13 hosts silently no-op `/steer` when no turn is in flight; with + /// this flag on, Scarf can surface `/steer` even when the agent isn't + /// mid-turn without confusing UX. + public var hasACPSteerOnIdle: Bool { atLeastSemver(0, 13, 0) } + + /// Kanban v0.13 reliability surface: hallucination gate on worker-created + /// cards, generic diagnostics engine, per-task `max_retries`, multiline + /// title/body create, `auto_blocked_reason` on blocked tasks, darwin + /// zombie detection. All read through the `kanban show` JSON surface. + public var hasKanbanDiagnostics: Bool { atLeastSemver(0, 13, 0) } + + /// `hermes curator archive`, `prune`, and `list-archived` subcommands + /// (v0.13+). The synchronous manual `hermes curator run` lives behind + /// this flag too — pre-v0.13 `run` returns immediately and the work + /// happens in the background. + public var hasCuratorArchive: Bool { atLeastSemver(0, 13, 0) } + + /// Google Chat — 20th messaging-gateway platform (v0.13+). + public var hasGoogleChatPlatform: Bool { atLeastSemver(0, 13, 0) } + + /// Cross-platform allowlist keys: `allowed_channels` (Slack / Mattermost + /// / Google Chat), `allowed_chats` (Telegram / WhatsApp), `allowed_rooms` + /// (Matrix / DingTalk). Settable per platform in `config.yaml` (v0.13+). + public var hasGatewayAllowlists: Bool { atLeastSemver(0, 13, 0) } + + /// `busy_ack_enabled` config to suppress per-message "agent is working…" + /// acks across platforms (v0.13+). + public var hasGatewayBusyAckToggle: Bool { atLeastSemver(0, 13, 0) } + + /// Per-platform `gateway_restart_notification` flag controls whether the + /// platform posts a "Gateway restarted" notice on boot (v0.13+). + public var hasGatewayRestartNotification: Bool { atLeastSemver(0, 13, 0) } + + /// `hermes gateway list` cross-profile status verb (v0.13+). Lets Scarf + /// show which profile is currently running which platform. + public var hasGatewayList: Bool { atLeastSemver(0, 13, 0) } + + /// MCP servers can use SSE transport (v0.13+). Adds an `sse_read_timeout` + /// knob alongside the existing stdio/pipe transports. + public var hasMCPSSETransport: Bool { atLeastSemver(0, 13, 0) } + + /// Cron `--no-agent` mode for script-only watchdog jobs (v0.13+). Skips + /// the AI call entirely — useful for keep-alive / periodic-check jobs. + public var hasCronNoAgent: Bool { atLeastSemver(0, 13, 0) } + + /// Web Tools split into per-capability backend selection: `web_search` + /// and `web_extract` can now use distinct backends (v0.13+). SearXNG + /// joined as a search-only backend. + public var hasWebToolsBackendSplit: Bool { atLeastSemver(0, 13, 0) } + + /// `hermes profile create --no-skills` flag for empty profiles (v0.13+). + public var hasProfileNoSkills: Bool { atLeastSemver(0, 13, 0) } + + /// Context compression count surfaced in the status feed (v0.13+). Scarf + /// renders it next to the token count in the chat status bar. + public var hasContextCompressionCount: Bool { atLeastSemver(0, 13, 0) } + + /// `/new` slash command accepts an optional session-name argument (v0.13+). + public var hasNewWithSessionName: Bool { atLeastSemver(0, 13, 0) } + + /// `hermes update --yes` / `-y` skips interactive prompts (v0.13+). Used + /// by Scarf's "Update Hermes" affordance to run unattended. + public var hasUpdateNonInteractive: Bool { atLeastSemver(0, 13, 0) } + + /// OpenRouter response caching toggle in `config.yaml` (v0.13+). + public var hasOpenRouterResponseCache: Bool { atLeastSemver(0, 13, 0) } + + /// `image_gen.model` honored from `config.yaml` (v0.13+). Pre-v0.13 the + /// value was advertised but ignored at runtime. + public var hasImageGenModel: Bool { atLeastSemver(0, 13, 0) } + + /// `display.language` config key for static-message translation: zh / ja / + /// de / es / fr / uk / tr (v0.13+). + public var hasDisplayLanguage: Bool { atLeastSemver(0, 13, 0) } + + /// xAI Custom Voices — voice cloning support (v0.13+). Exposed in Scarf + /// as a "Cloning supported" badge next to the xAI TTS provider entry. + public var hasXAIVoiceCloning: Bool { atLeastSemver(0, 13, 0) } + + /// `video_analyze` tool — native video understanding on Gemini and + /// compatible models (v0.13+). Hermes handles this transparently inside + /// the agent loop; Scarf has no UI surface yet, but the flag lets future + /// dashboards / activity views light up video-tool annotations. + public var hasVideoAnalyze: Bool { atLeastSemver(0, 13, 0) } + + /// `transform_llm_output` plugin hook for shaping LLM output before the + /// conversation receives it (v0.13+). Plugin-author concern; Scarf's + /// PluginsView surfaces it as a documented hook in plugin metadata. + public var hasTransformLLMOutputHook: Bool { atLeastSemver(0, 13, 0) } + private func atLeastSemver(_ major: Int, _ minor: Int, _ patch: Int) -> Bool { guard let s = semver else { return false } return s >= SemVer(major: major, minor: minor, patch: patch) diff --git a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/HermesCapabilitiesTests.swift b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/HermesCapabilitiesTests.swift index 2dda691..ab2824a 100644 --- a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/HermesCapabilitiesTests.swift +++ b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/HermesCapabilitiesTests.swift @@ -9,6 +9,13 @@ import Foundation // MARK: - Version line parsing + @Test func parseV013ReleaseLine() { + let caps = HermesCapabilities.parseLine("Hermes Agent v0.13.0 (2026.5.7)") + #expect(caps.semver == HermesCapabilities.SemVer(major: 0, minor: 13, patch: 0)) + #expect(caps.dateVersion == HermesCapabilities.DateVersion(year: 2026, month: 5, day: 7)) + #expect(caps.detected) + } + @Test func parseV012ReleaseLine() { let caps = HermesCapabilities.parseLine("Hermes Agent v0.12.0 (2026.4.30)") #expect(caps.semver == HermesCapabilities.SemVer(major: 0, minor: 12, patch: 0)) @@ -75,8 +82,42 @@ import Foundation // MARK: - Capability flags + @Test func v013FlagsAllOn() { + let caps = HermesCapabilities.parseLine("Hermes Agent v0.13.0 (2026.5.7)") + // v0.12 surfaces remain on. + #expect(caps.hasCurator) + #expect(caps.hasKanban) + #expect(caps.hasACPImagePrompts) + #expect(!caps.hasFlushMemoriesAux) + // v0.13 surfaces light up. + #expect(caps.hasGoals) + #expect(caps.hasACPQueue) + #expect(caps.hasACPSteerOnIdle) + #expect(caps.hasKanbanDiagnostics) + #expect(caps.hasCuratorArchive) + #expect(caps.hasGoogleChatPlatform) + #expect(caps.hasGatewayAllowlists) + #expect(caps.hasGatewayBusyAckToggle) + #expect(caps.hasGatewayRestartNotification) + #expect(caps.hasGatewayList) + #expect(caps.hasMCPSSETransport) + #expect(caps.hasCronNoAgent) + #expect(caps.hasWebToolsBackendSplit) + #expect(caps.hasProfileNoSkills) + #expect(caps.hasContextCompressionCount) + #expect(caps.hasNewWithSessionName) + #expect(caps.hasUpdateNonInteractive) + #expect(caps.hasOpenRouterResponseCache) + #expect(caps.hasImageGenModel) + #expect(caps.hasDisplayLanguage) + #expect(caps.hasXAIVoiceCloning) + #expect(caps.hasVideoAnalyze) + #expect(caps.hasTransformLLMOutputHook) + } + @Test func v012FlagsAllOn() { let caps = HermesCapabilities.parseLine("Hermes Agent v0.12.0 (2026.4.30)") + // v0.12 surfaces on. #expect(caps.hasCurator) #expect(caps.hasFallbackCommand) #expect(caps.hasKanban) @@ -94,6 +135,22 @@ import Foundation #expect(caps.hasRedactionToggle) // flush_memories was REMOVED in v0.12 — flag inverts. #expect(!caps.hasFlushMemoriesAux) + // v0.13 surfaces stay off on a v0.12 host. + #expect(!caps.hasGoals) + #expect(!caps.hasACPQueue) + #expect(!caps.hasKanbanDiagnostics) + #expect(!caps.hasCuratorArchive) + #expect(!caps.hasGoogleChatPlatform) + #expect(!caps.hasGatewayAllowlists) + #expect(!caps.hasMCPSSETransport) + #expect(!caps.hasCronNoAgent) + #expect(!caps.hasWebToolsBackendSplit) + #expect(!caps.hasProfileNoSkills) + #expect(!caps.hasContextCompressionCount) + #expect(!caps.hasOpenRouterResponseCache) + #expect(!caps.hasImageGenModel) + #expect(!caps.hasDisplayLanguage) + #expect(!caps.hasXAIVoiceCloning) } @Test func v011FlagsAllOff() { @@ -126,11 +183,23 @@ import Foundation } @Test func futureVersionRetainsCapabilities() { - // A v0.13 (hypothetical) should still see all v0.12 capabilities on. - let caps = HermesCapabilities.parseLine("Hermes Agent v0.13.0 (2026.6.1)") + // A v0.14 (hypothetical) should still see all v0.12 + v0.13 capabilities on. + let caps = HermesCapabilities.parseLine("Hermes Agent v0.14.0 (2026.7.1)") #expect(caps.hasCurator) #expect(caps.hasACPImagePrompts) + #expect(caps.hasGoals) + #expect(caps.hasKanbanDiagnostics) + #expect(caps.hasCuratorArchive) // And flush_memories stays gone. #expect(!caps.hasFlushMemoriesAux) } + + @Test func v0_13_patchReleaseStillEnablesAllFlags() { + // A v0.13.4 patch release should still enable every v0.13 flag. + let caps = HermesCapabilities.parseLine("Hermes Agent v0.13.4 (2026.5.20)") + #expect(caps.hasGoals) + #expect(caps.hasACPQueue) + #expect(caps.hasKanbanDiagnostics) + #expect(caps.hasGoogleChatPlatform) + } }