feat(hermes-v12): provider catalog + auxiliary swap (Phase B)

Adds the five v0.12 inference providers to ModelCatalogService.overlayOnlyProviders
so the model picker reaches them. IDs match HERMES_OVERLAYS verbatim:

- gmi → GMI Cloud (api_key)
- azure-foundry → Azure AI Foundry (api_key)
- lmstudio → LM Studio (api_key, promoted from custom-endpoint alias)
- minimax-oauth → MiniMax (OAuth, oauth_external)
- tencent-tokenhub → Tencent TokenHub (api_key)

Auxiliary tasks: drop the `flush_memories` row (Hermes removed it
entirely in v0.12) and add `auxiliary.curator` so users can configure
the model the autonomous curator's review fork uses. The Curator row is
gated on HermesCapabilities.hasCuratorAux, so v0.11 hosts don't see a
control that writes a key Hermes ignores. AuxiliarySettings, the YAML
parser, and HealthViewModel's Tool Gateway breakdown are all updated.

Side fixes:

- CredentialPoolsGatingTests was missing `import ScarfCore` after
  ModelCatalogService moved to the package (broke the test target's
  compile against pure-Mac scarf).
- Promoted `ModelCatalogService.overlayOnlyProviders` to public so the
  new `v012OverlayProvidersCarryCorrectAuthTypes` lock-in test can
  reach it.

Tests: 14 ToolGateway tests pass; 209 ScarfCore tests pass; both Mac
and iOS schemes build clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-05-01 12:16:37 +02:00
parent a90a29add8
commit da721fa276
9 changed files with 118 additions and 19 deletions
@@ -258,7 +258,13 @@ public struct VoiceSettings: Sendable, Equatable {
)
}
/// Eight sub-models that share the same provider/model/base_url/api_key/timeout shape.
/// Per-task auxiliary model overrides.
///
/// `flush_memories` was removed entirely in Hermes v0.12 (the underlying
/// task no longer exists), so the corresponding field was dropped here.
/// `curator` was added in v0.12 Curator's review fork uses its own
/// model so users can keep main-model spend separate from background
/// maintenance.
public struct AuxiliarySettings: Sendable, Equatable {
public var vision: AuxiliaryModel
public var webExtract: AuxiliaryModel
@@ -267,7 +273,8 @@ public struct AuxiliarySettings: Sendable, Equatable {
public var skillsHub: AuxiliaryModel
public var approval: AuxiliaryModel
public var mcp: AuxiliaryModel
public var flushMemories: AuxiliaryModel
/// v0.12+; pre-v0.12 Hermes installs ignore this slot.
public var curator: AuxiliaryModel
public init(
@@ -278,7 +285,7 @@ public struct AuxiliarySettings: Sendable, Equatable {
skillsHub: AuxiliaryModel,
approval: AuxiliaryModel,
mcp: AuxiliaryModel,
flushMemories: AuxiliaryModel
curator: AuxiliaryModel
) {
self.vision = vision
self.webExtract = webExtract
@@ -287,7 +294,7 @@ public struct AuxiliarySettings: Sendable, Equatable {
self.skillsHub = skillsHub
self.approval = approval
self.mcp = mcp
self.flushMemories = flushMemories
self.curator = curator
}
public nonisolated static let empty = AuxiliarySettings(
vision: .empty,
@@ -297,7 +304,7 @@ public struct AuxiliarySettings: Sendable, Equatable {
skillsHub: .empty,
approval: .empty,
mcp: .empty,
flushMemories: .empty
curator: .empty
)
}
@@ -122,7 +122,7 @@ public extension HermesConfig {
skillsHub: aux("skills_hub"),
approval: aux("approval"),
mcp: aux("mcp"),
flushMemories: aux("flush_memories")
curator: aux("curator")
)
let security = SecuritySettings(
@@ -425,15 +425,17 @@ public struct ModelCatalogService: Sendable {
// MARK: - Hermes overlay providers
/// The six providers Hermes surfaces via `hermes model` that have no
/// The 11 providers Hermes surfaces via `hermes model` that have no
/// entry in `models_dev_cache.json` (models.dev doesn't mirror them).
/// Mirrors the overlay-only subset of `HERMES_OVERLAYS` in
/// `hermes-agent/hermes_cli/providers.py`. The other ~19 overlay entries
/// `hermes-agent/hermes_cli/providers.py`. The other overlay entries
/// already ship in the cache and only add augmentation (base-URL
/// override, extra env vars) that Scarf doesn't currently display.
///
/// Keep this in sync with the Python side on Hermes version bumps.
static let overlayOnlyProviders: [String: HermesProviderOverlay] = [
/// Keep this in sync with the Python side on Hermes version bumps
/// see `ToolGatewayTests.v012OverlayProvidersCarryCorrectAuthTypes`
/// for the auth-type lock-in.
public static let overlayOnlyProviders: [String: HermesProviderOverlay] = [
"nous": HermesProviderOverlay(
displayName: "Nous Portal",
baseURL: "https://inference-api.nousresearch.com/v1",
@@ -476,6 +478,53 @@ public struct ModelCatalogService: Sendable {
subscriptionGated: false,
docURL: nil
),
// -- v0.12 additions ---------------------------------------------
// Hermes v2026.4.30 added five overlay-only providers that
// models.dev doesn't mirror. Provider IDs match HERMES_OVERLAYS
// verbatim drift here means the picker can't reach them.
"gmi": HermesProviderOverlay(
displayName: "GMI Cloud",
baseURL: "https://api.gmi-serving.com/v1",
authType: .apiKey,
subscriptionGated: false,
docURL: nil
),
"azure-foundry": HermesProviderOverlay(
displayName: "Azure AI Foundry",
// Base URL is per-tenant Hermes resolves it from the
// AZURE_FOUNDRY_BASE_URL env var at runtime. Leave nil so the
// settings UI shows "Tenant URL set via env" instead of a
// misleading default.
baseURL: nil,
authType: .apiKey,
subscriptionGated: false,
docURL: nil
),
"lmstudio": HermesProviderOverlay(
displayName: "LM Studio",
// v0.12 promotes LM Studio from custom-endpoint alias to a
// first-class provider. 1234 is the LM Studio default port;
// users with a non-default port set LM_BASE_URL.
baseURL: "http://127.0.0.1:1234/v1",
authType: .apiKey,
subscriptionGated: false,
docURL: nil
),
"minimax-oauth": HermesProviderOverlay(
displayName: "MiniMax (OAuth)",
baseURL: "https://api.minimax.io/anthropic",
authType: .oauthExternal,
subscriptionGated: false,
docURL: nil
),
"tencent-tokenhub": HermesProviderOverlay(
displayName: "Tencent TokenHub",
// Resolved from TOKENHUB_BASE_URL at runtime.
baseURL: nil,
authType: .apiKey,
subscriptionGated: false,
docURL: nil
),
]
}
@@ -129,7 +129,7 @@ struct HermesFileService: Sendable {
skillsHub: aux("skills_hub"),
approval: aux("approval"),
mcp: aux("mcp"),
flushMemories: aux("flush_memories")
curator: aux("curator")
)
let security = SecuritySettings(
@@ -180,7 +180,7 @@ final class HealthViewModel {
("skills_hub", config.auxiliary.skillsHub.provider),
("approval", config.auxiliary.approval.provider),
("mcp", config.auxiliary.mcp.provider),
("flush_memories", config.auxiliary.flushMemories.provider),
("curator", config.auxiliary.curator.provider),
].filter { $0.1 == "nous" }.map(\.0)
if !auxOnNous.isEmpty {
checks.append(HealthCheck(
@@ -9,25 +9,41 @@ import ScarfCore
/// (subscription-routed) and `auto` (inherit main provider) Hermes derives
/// the gateway routing from that single field; there is no separate
/// `use_gateway` key to write.
///
/// v0.12 dropped the `flush_memories` aux task (the underlying memory
/// pipeline was rewritten upstream) and added `curator` (the autonomous
/// skill-maintenance review fork). The Curator row only appears when
/// `HermesCapabilities.hasCuratorAux` is set so v0.11 hosts don't see a
/// row that writes a key Hermes ignores.
struct AuxiliaryTab: View {
@Bindable var viewModel: SettingsViewModel
@Environment(\.serverContext) private var serverContext
@Environment(\.hermesCapabilities) private var capabilitiesStore
@State private var subscription: NousSubscriptionState = .absent
@State private var showNousSignIn: Bool = false
// Keyed by the config path name matches `auxiliary.<task>.*` in config.yaml.
private let tasks: [(key: String, title: LocalizedStringKey, icon: String)] = [
// Static base list; the v0.12-only `curator` row is appended at render
// time when the target Hermes supports it.
private let baseTasks: [(key: String, title: LocalizedStringKey, icon: String)] = [
("vision", "Vision", "eye"),
("web_extract", "Web Extract", "doc.richtext"),
("compression", "Compression", "arrow.down.right.and.arrow.up.left.circle"),
("session_search", "Session Search", "magnifyingglass"),
("skills_hub", "Skills Hub", "books.vertical"),
("approval", "Approval", "checkmark.seal"),
("mcp", "MCP", "puzzlepiece"),
("flush_memories", "Flush Memories", "trash.slash")
("mcp", "MCP", "puzzlepiece")
]
private var tasks: [(key: String, title: LocalizedStringKey, icon: String)] {
var t = baseTasks
if capabilitiesStore?.capabilities.hasCuratorAux ?? false {
t.append(("curator", "Curator", "sparkles"))
}
return t
}
var body: some View {
Text("Auxiliary tasks use separate, typically cheaper models. Leave Provider as `auto` to inherit the main provider.")
.font(.caption)
@@ -94,7 +110,7 @@ struct AuxiliaryTab: View {
case "skills_hub": return viewModel.config.auxiliary.skillsHub
case "approval": return viewModel.config.auxiliary.approval
case "mcp": return viewModel.config.auxiliary.mcp
case "flush_memories": return viewModel.config.auxiliary.flushMemories
case "curator": return viewModel.config.auxiliary.curator
default: return .empty
}
}
+3 -3
View File
@@ -198,9 +198,9 @@ private struct ContextBoundRoot: View {
@State private var chatViewModel: ChatViewModel
/// Per-window snapshot of the target Hermes installation's capability
/// flags. Drives sidebar visibility (Curator, Kanban only on v0.12+),
/// settings rows (flush_memories aux dropped on v0.12), and version
/// banners. Refreshes once on init; explicit `refresh()` call rerun
/// after a `hermes update`.
/// settings rows (curator aux added on v0.12), and version banners.
/// Refreshes once on init; explicit `refresh()` call rerun after a
/// `hermes update`.
@State private var capabilities: HermesCapabilitiesStore
init(context: ServerContext) {
@@ -1,5 +1,6 @@
import Testing
import Foundation
import ScarfCore
@testable import scarf
/// Tests that ``CredentialPoolsOAuthGate`` steers each known provider to
+26
View File
@@ -55,11 +55,37 @@ import ScarfCore
#expect(ids.contains("nous"), "Nous Portal must appear after overlay merge")
#expect(ids.contains("openai-codex"), "OpenAI Codex overlay must appear")
#expect(ids.contains("qwen-oauth"), "Qwen OAuth overlay must appear")
// v0.12 additions IDs must match HERMES_OVERLAYS in
// hermes-agent/hermes_cli/providers.py exactly. Drift here
// means the picker can't reach the new providers.
#expect(ids.contains("gmi"), "GMI Cloud overlay must appear (v0.12)")
#expect(ids.contains("azure-foundry"), "Azure AI Foundry overlay must appear (v0.12)")
#expect(ids.contains("lmstudio"), "LM Studio overlay must appear (v0.12)")
#expect(ids.contains("minimax-oauth"), "MiniMax OAuth overlay must appear (v0.12)")
#expect(ids.contains("tencent-tokenhub"), "Tencent TokenHub overlay must appear (v0.12)")
// Cached providers still present.
#expect(ids.contains("anthropic"))
#expect(ids.contains("openai"))
}
@Test func v012OverlayProvidersCarryCorrectAuthTypes() throws {
// The auth-type drives whether Settings shows an API-key field,
// an OAuth flow, or external-process wiring. Locking the v0.12
// additions here so a typo doesn't quietly land users in the
// wrong setup flow.
let overlays = ModelCatalogService.overlayOnlyProviders
#expect(overlays["gmi"]?.authType == .apiKey)
#expect(overlays["azure-foundry"]?.authType == .apiKey)
#expect(overlays["lmstudio"]?.authType == .apiKey)
#expect(overlays["minimax-oauth"]?.authType == .oauthExternal)
#expect(overlays["tencent-tokenhub"]?.authType == .apiKey)
// None of the v0.12 additions are subscription-gated (only Nous
// Portal is).
for id in ["gmi", "azure-foundry", "lmstudio", "minimax-oauth", "tencent-tokenhub"] {
#expect(overlays[id]?.subscriptionGated == false, "\(id) shouldn't be subscription-gated")
}
}
@Test func nousPortalSortsFirst() throws {
let path = try writeCacheFixture()
let service = ModelCatalogService(path: path)