test(model-preflight): cover detect-mismatch + fix newline-trim bug

* New ModelPreflightTests suite (19 tests) covering both `check(_:)`
  and the v2.8 `detectMismatch(_:)` paths. Pins the dogfooding
  scenario (anthropic-prefixed model + nous active provider after
  Credential Pools OAuth swap), the case-insensitive prefix match,
  empty-prefix / empty-bare-model edge cases, and multi-slash model
  ids (OpenRouter style).

* Bug fix surfaced by the tests: `ModelPreflight` was using
  `trimmingCharacters(in: .whitespaces)` which doesn't strip
  newlines. A stray `\n` in a hand-edited config.yaml would either
  miss the missing-fields classifier OR false-positive the mismatch
  banner (showing "anthropic" vs "anthropic\n"). Switched both
  trims to `.whitespacesAndNewlines`.

perf(observability): instrument Tier C load paths + fetchSessionPreviews

No behavior change — adds ScarfMon coverage so future captures show
how often Memory/Skills/Cron/Curator/SessionPreviews load paths fire
and what they cost on remote (each is multiple sequential SFTP RTTs
that pre-fix were invisible). New events:

* `mac.fetchSessionPreviews` / `.rows` / `.transportError`
* `memory.load` / `.bytes`
* `cron.load` / `.jobs`
* `skills.load` / `.count`
* `curator.load` / `.bytes`

All 321 ScarfCore tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-05-05 20:03:35 +02:00
parent 09e33b2999
commit 5e23b59697
7 changed files with 300 additions and 74 deletions
@@ -637,27 +637,42 @@ public actor HermesDataService {
}
public func fetchSessionPreviews(limit: Int = QueryDefaults.sessionPreviewLimit) async -> [String: String] {
let sql = """
SELECT m.session_id, substr(m.content, 1, \(QueryDefaults.previewContentLength))
FROM messages m
INNER JOIN (
SELECT session_id, MIN(id) as min_id
FROM messages
WHERE role = 'user' AND content <> ''
GROUP BY session_id
) first ON m.id = first.min_id
ORDER BY m.timestamp DESC
LIMIT ?
"""
do {
let rows = try await backend.query(sql, params: [.integer(Int64(limit))])
var previews: [String: String] = [:]
for row in rows {
previews[row.string(at: 0)] = row.string(at: 1)
// Already bounded by `substr(content, 1, previewContentLength)`
// wire payload caps at ~limit × 100 bytes. v2.8 added
// ScarfMon instrumentation + transport-error logging for
// parity with `fetchRecentToolCallsOutcome`; if this query
// ever does start timing out on a slow remote we'll see it
// in captures rather than swallowing the error and returning
// an empty preview map.
await ScarfMon.measureAsync(.sessionLoad, "mac.fetchSessionPreviews") {
let sql = """
SELECT m.session_id, substr(m.content, 1, \(QueryDefaults.previewContentLength))
FROM messages m
INNER JOIN (
SELECT session_id, MIN(id) as min_id
FROM messages
WHERE role = 'user' AND content <> ''
GROUP BY session_id
) first ON m.id = first.min_id
ORDER BY m.timestamp DESC
LIMIT ?
"""
do {
let rows = try await backend.query(sql, params: [.integer(Int64(limit))])
var previews: [String: String] = [:]
for row in rows {
previews[row.string(at: 0)] = row.string(at: 1)
}
ScarfMon.event(.sessionLoad, "mac.fetchSessionPreviews.rows", count: previews.count)
return previews
} catch let BackendError.transport(reason) {
ScarfMon.event(.sessionLoad, "mac.fetchSessionPreviews.transportError", count: 1)
Self.logger.warning("fetchSessionPreviews transport error: \(reason, privacy: .public)")
return [:]
} catch {
Self.logger.warning("fetchSessionPreviews failed: \(error.localizedDescription, privacy: .public)")
return [:]
}
return previews
} catch {
return [:]
}
}
@@ -50,7 +50,7 @@ public enum ModelPreflight: Sendable {
}
private static func isUnset(_ value: String) -> Bool {
let trimmed = value.trimmingCharacters(in: .whitespaces).lowercased()
let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
return trimmed.isEmpty || trimmed == "unknown"
}
@@ -79,8 +79,8 @@ public enum ModelPreflight: Sendable {
/// Uses case-insensitive comparison Hermes accepts both
/// `Anthropic/...` and `anthropic/...` casings in the wild.
public static func detectMismatch(_ config: HermesConfig) -> Mismatch? {
let modelDefault = config.model.trimmingCharacters(in: .whitespaces)
let activeProvider = config.provider.trimmingCharacters(in: .whitespaces)
let modelDefault = config.model.trimmingCharacters(in: .whitespacesAndNewlines)
let activeProvider = config.provider.trimmingCharacters(in: .whitespacesAndNewlines)
guard !isUnset(modelDefault), !isUnset(activeProvider) else { return nil }
guard let slash = modelDefault.firstIndex(of: "/") else { return nil }
let prefix = String(modelDefault[..<slash])
@@ -37,22 +37,34 @@ public final class CuratorViewModel {
isLoading = true
defer { isLoading = false }
let context = self.context
let parsed = await Task.detached(priority: .userInitiated) { () -> (HermesCuratorStatus, String?) in
let textResult = Self.runCuratorStatus(context: context)
let stateData = context.readData(context.paths.curatorStateFile)
let parsed = HermesCuratorStatusParser.parse(text: textResult, stateFileJSON: stateData)
// Best-effort markdown report: the state file points at the
// most recent <YYYYMMDD-HHMMSS>/ dir; load REPORT.md from
// there. Missing on first run, which is fine.
var report: String?
if let reportDir = parsed.lastReportPath {
let reportPath = reportDir.hasSuffix("/")
? "\(reportDir)REPORT.md"
: "\(reportDir)/REPORT.md"
report = context.readText(reportPath)
}
return (parsed, report)
}.value
// v2.8 instrumented. Curator load fires `hermes curator
// status` (CLI subprocess) plus 1-2 file reads; on remote
// each is a separate SSH RTT. Visibility lets future captures
// show how often the report file is missing or oversized.
let parsed = await ScarfMon.measureAsync(.diskIO, "curator.load") {
await Task.detached(priority: .userInitiated) { () -> (HermesCuratorStatus, String?) in
let textResult = Self.runCuratorStatus(context: context)
let stateData = context.readData(context.paths.curatorStateFile)
let parsed = HermesCuratorStatusParser.parse(text: textResult, stateFileJSON: stateData)
// Best-effort markdown report: the state file points at the
// most recent <YYYYMMDD-HHMMSS>/ dir; load REPORT.md from
// there. Missing on first run, which is fine.
var report: String?
if let reportDir = parsed.lastReportPath {
let reportPath = reportDir.hasSuffix("/")
? "\(reportDir)REPORT.md"
: "\(reportDir)/REPORT.md"
report = context.readText(reportPath)
}
return (parsed, report)
}.value
}
ScarfMon.event(
.diskIO,
"curator.load.bytes",
count: 0,
bytes: parsed.1?.utf8.count ?? 0
)
self.status = parsed.0
self.lastReportMarkdown = parsed.1
}
@@ -82,16 +82,23 @@ public final class SkillsViewModel {
let ctx = context
let xport = transport
let pins = pinnedNames
let cats: [HermesSkillCategory] = await Task.detached {
let disabled = Self.readDisabledSkillNames(context: ctx)
let pinned = pins ?? Self.readPinnedSkillNames(context: ctx)
return SkillsScanner.scan(
context: ctx,
transport: xport,
disabledNames: disabled,
pinnedNames: pinned
)
}.value
// v2.8 instrumented so future captures show how many SSH
// RTTs the SkillsScanner walk costs on remote (it stats
// every ~/.hermes/skills/* directory + reads SKILL.md per).
let cats: [HermesSkillCategory] = await ScarfMon.measureAsync(.diskIO, "skills.load") {
await Task.detached {
let disabled = Self.readDisabledSkillNames(context: ctx)
let pinned = pins ?? Self.readPinnedSkillNames(context: ctx)
return SkillsScanner.scan(
context: ctx,
transport: xport,
disabledNames: disabled,
pinnedNames: pinned
)
}.value
}
let totalSkills = cats.reduce(0) { $0 + $1.skills.count }
ScarfMon.event(.diskIO, "skills.load.count", count: totalSkills)
categories = cats
isLoading = false
}
@@ -0,0 +1,182 @@
import Testing
import Foundation
@testable import ScarfCore
/// Pure tests for `ModelPreflight` both the `check(_:)` configured-vs-
/// missing classifier and the v2.8 `detectMismatch(_:)` provider/prefix
/// reconciliation. The mismatch path is what surfaces the orange
/// "Model/provider mismatch in config.yaml" banner in ChatView when the
/// user switches OAuth providers via Credential Pools and `model.default`
/// is left carrying the old provider's prefix.
@Suite struct ModelPreflightTests {
// MARK: - check(_:) missing-field classifier
@Test func bothModelAndProviderEmptyReportsMissingBoth() {
var cfg = HermesConfig.empty
cfg.model = ""
cfg.provider = ""
#expect(ModelPreflight.check(cfg) == .missingBoth)
}
@Test func bothModelAndProviderUnknownReportsMissingBoth() {
// `HermesConfig.empty` defaults model/provider to the literal
// "unknown" the classifier must treat that the same as "".
let cfg = HermesConfig.empty
#expect(ModelPreflight.check(cfg) == .missingBoth)
}
@Test func providerSetButModelEmptyReportsMissingModel() {
var cfg = HermesConfig.empty
cfg.model = ""
cfg.provider = "anthropic"
#expect(ModelPreflight.check(cfg) == .missingModel)
}
@Test func modelSetButProviderEmptyReportsMissingProvider() {
var cfg = HermesConfig.empty
cfg.model = "claude-sonnet-4.6"
cfg.provider = ""
#expect(ModelPreflight.check(cfg) == .missingProvider)
}
@Test func bothSetReportsConfigured() {
var cfg = HermesConfig.empty
cfg.model = "claude-sonnet-4.6"
cfg.provider = "anthropic"
#expect(ModelPreflight.check(cfg) == .configured)
}
@Test func whitespaceTreatedAsUnsetForBothFields() {
var cfg = HermesConfig.empty
cfg.model = " "
cfg.provider = "\n"
#expect(ModelPreflight.check(cfg) == .missingBoth)
}
@Test func resultIsConfiguredOnlyForConfiguredCase() {
#expect(ModelPreflight.Result.configured.isConfigured)
#expect(!ModelPreflight.Result.missingBoth.isConfigured)
#expect(!ModelPreflight.Result.missingModel.isConfigured)
#expect(!ModelPreflight.Result.missingProvider.isConfigured)
}
// MARK: - detectMismatch(_:)
@Test func detectMismatchReturnsNilWhenNoPrefixOnModelDefault() {
var cfg = HermesConfig.empty
cfg.model = "claude-sonnet-4.6"
cfg.provider = "anthropic"
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
@Test func detectMismatchReturnsNilWhenPrefixMatchesProvider() {
var cfg = HermesConfig.empty
cfg.model = "anthropic/claude-sonnet-4.6"
cfg.provider = "anthropic"
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
@Test func detectMismatchReturnsNilWhenModelDefaultIsUnset() {
var cfg = HermesConfig.empty
cfg.model = ""
cfg.provider = "nous"
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
@Test func detectMismatchReturnsNilWhenProviderIsUnset() {
var cfg = HermesConfig.empty
cfg.model = "anthropic/claude-sonnet-4.6"
cfg.provider = ""
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
@Test func detectMismatchReturnsNilWhenBothUnknown() {
// The literal "unknown" sentinel from the YAML parser fallback
// counts as unset on both sides no mismatch to report.
let cfg = HermesConfig.empty // model + provider both "unknown"
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
@Test func detectMismatchSurfacesPrefixVsActiveProvider() {
// The dogfooding scenario: Anthropic-prefixed model still sitting
// in config.yaml after the user OAuth'd into Nous via Credential
// Pools. Hermes can't reconcile and chats die with -32603 at
// first prompt. The banner offers a one-click fix in either
// direction; this test pins the data the banner reads.
var cfg = HermesConfig.empty
cfg.model = "anthropic/claude-sonnet-4.6"
cfg.provider = "nous"
let mismatch = ModelPreflight.detectMismatch(cfg)
#expect(mismatch != nil)
#expect(mismatch?.prefixProvider == "anthropic")
#expect(mismatch?.activeProvider == "nous")
#expect(mismatch?.modelDefault == "anthropic/claude-sonnet-4.6")
#expect(mismatch?.bareModel == "claude-sonnet-4.6")
}
@Test func detectMismatchIsCaseInsensitiveOnPrefixMatch() {
// Hermes accepts both `Anthropic/...` and `anthropic/...` casings
// in the wild case-only differences must NOT surface as a
// mismatch (would be a false-positive banner).
var cfg = HermesConfig.empty
cfg.model = "Anthropic/claude-sonnet-4.6"
cfg.provider = "anthropic"
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
@Test func detectMismatchHandlesNonAnthropicProviders() {
// The mismatch banner needs to work for any provider pair
// not just the dogfooding case. Pin the openai+nous shape.
var cfg = HermesConfig.empty
cfg.model = "openai/gpt-5"
cfg.provider = "nous"
let mismatch = ModelPreflight.detectMismatch(cfg)
#expect(mismatch?.prefixProvider == "openai")
#expect(mismatch?.activeProvider == "nous")
#expect(mismatch?.bareModel == "gpt-5")
}
@Test func detectMismatchReturnsNilForEmptyBareModel() {
// A pathological "anthropic/" with no model name after the
// slash isn't a valid mismatch caller has no bare model to
// write back. The classifier should refuse to surface it
// rather than emit a useless fix button.
var cfg = HermesConfig.empty
cfg.model = "anthropic/"
cfg.provider = "nous"
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
@Test func detectMismatchReturnsNilForEmptyPrefix() {
// Symmetric pathological case leading slash, no provider
// prefix. Don't fire.
var cfg = HermesConfig.empty
cfg.model = "/claude-sonnet-4.6"
cfg.provider = "nous"
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
@Test func detectMismatchHandlesModelsWithMultipleSlashes() {
// Some provider/model strings carry path-style segments after
// the first slash (e.g. an OpenRouter style path). The first
// slash separates prefix from bare model; the rest of the
// string is the bare model verbatim.
var cfg = HermesConfig.empty
cfg.model = "openrouter/anthropic/claude-sonnet-4.6"
cfg.provider = "anthropic"
let mismatch = ModelPreflight.detectMismatch(cfg)
#expect(mismatch?.prefixProvider == "openrouter")
#expect(mismatch?.activeProvider == "anthropic")
#expect(mismatch?.bareModel == "anthropic/claude-sonnet-4.6")
}
@Test func detectMismatchTrimsWhitespaceBeforeComparing() {
// A stray newline in a hand-edited config.yaml shouldn't read
// as a mismatch when the trimmed values agree.
var cfg = HermesConfig.empty
cfg.model = "anthropic/claude-sonnet-4.6 "
cfg.provider = " anthropic\n"
#expect(ModelPreflight.detectMismatch(cfg) == nil)
}
}
@@ -40,17 +40,22 @@ final class CronViewModel {
let selectedID = selectedJob?.id
Task.detached { [weak self] in
// Three sync transport ops on remote keep them off main.
let jobs = svc.loadCronJobs()
let skills = svc.loadSkills().flatMap { $0.skills.map(\.id) }.sorted()
let refreshed = selectedID.flatMap { id in jobs.first(where: { $0.id == id }) }
let output = refreshed.flatMap { svc.loadCronOutput(jobId: $0.id) }
await MainActor.run { [weak self] in
guard let self else { return }
self.jobs = jobs
self.availableSkills = skills
if let refreshed { self.selectedJob = refreshed }
if output != nil { self.jobOutput = output }
self.isLoading = false
// v2.8: instrumented so we can see how many SSH RTTs the
// Cron tab actually costs in captures.
await ScarfMon.measureAsync(.diskIO, "cron.load") {
let jobs = svc.loadCronJobs()
let skills = svc.loadSkills().flatMap { $0.skills.map(\.id) }.sorted()
let refreshed = selectedID.flatMap { id in jobs.first(where: { $0.id == id }) }
let output = refreshed.flatMap { svc.loadCronOutput(jobId: $0.id) }
ScarfMon.event(.diskIO, "cron.load.jobs", count: jobs.count)
await MainActor.run { [weak self] in
guard let self else { return }
self.jobs = jobs
self.availableSkills = skills
if let refreshed { self.selectedJob = refreshed }
if output != nil { self.jobOutput = output }
self.isLoading = false
}
}
}
}
@@ -43,21 +43,26 @@ final class MemoryViewModel {
let svc = fileService
let currentProfile = activeProfile
// Sync transport calls would beach-ball the UI on remote dispatch
// off main, then commit results back on MainActor.
// off main, then commit results back on MainActor. v2.8: wrapped
// in ScarfMon so we can see how many SSH RTTs this load actually
// costs (4 sequential SFTP reads on the slow path).
Task.detached { [weak self] in
let config = svc.loadConfig()
let profiles = svc.loadMemoryProfiles()
let profile = currentProfile.isEmpty ? config.memoryProfile : currentProfile
let memory = svc.loadMemory(profile: profile)
let user = svc.loadUserProfile(profile: profile)
await MainActor.run { [weak self] in
guard let self else { return }
self.memoryProvider = config.memoryProvider
self.profiles = profiles
self.activeProfile = profile
self.memoryContent = memory
self.userContent = user
self.isLoading = false
await ScarfMon.measureAsync(.diskIO, "memory.load") {
let config = svc.loadConfig()
let profiles = svc.loadMemoryProfiles()
let profile = currentProfile.isEmpty ? config.memoryProfile : currentProfile
let memory = svc.loadMemory(profile: profile)
let user = svc.loadUserProfile(profile: profile)
ScarfMon.event(.diskIO, "memory.load.bytes", count: 0, bytes: memory.utf8.count + user.utf8.count)
await MainActor.run { [weak self] in
guard let self else { return }
self.memoryProvider = config.memoryProvider
self.profiles = profiles
self.activeProfile = profile
self.memoryContent = memory
self.userContent = user
self.isLoading = false
}
}
}
}