test(model-preflight): cover detect-mismatch + fix newline-trim bug

* New ModelPreflightTests suite (19 tests) covering both `check(_:)` and the v2.8 `detectMismatch(_:)` paths. Pins the dogfooding scenario (anthropic-prefixed model + nous active provider after Credential Pools OAuth swap), the case-insensitive prefix match, empty-prefix / empty-bare-model edge cases, and multi-slash model ids (OpenRouter style). * Bug fix surfaced by the tests: `ModelPreflight` was using `trimmingCharacters(in: .whitespaces)` which doesn't strip newlines. A stray `\n` in a hand-edited config.yaml would either miss the missing-fields classifier OR false-positive the mismatch banner (showing "anthropic" vs "anthropic\n"). Switched both trims to `.whitespacesAndNewlines`. perf(observability): instrument Tier C load paths + fetchSessionPreviews No behavior change — adds ScarfMon coverage so future captures show how often Memory/Skills/Cron/Curator/SessionPreviews load paths fire and what they cost on remote (each is multiple sequential SFTP RTTs that pre-fix were invisible). New events: * `mac.fetchSessionPreviews` / `.rows` / `.transportError` * `memory.load` / `.bytes` * `cron.load` / `.jobs` * `skills.load` / `.count` * `curator.load` / `.bytes` All 321 ScarfCore tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 02:14:37 +00:00 · 2026-05-05 20:03:35 +02:00
parent 09e33b2999
commit 5e23b59697
7 changed files with 300 additions and 74 deletions
@@ -637,27 +637,42 @@ public actor HermesDataService {
    }

    public func fetchSessionPreviews(limit: Int = QueryDefaults.sessionPreviewLimit) async -> [String: String] {
-        let sql = """
-            SELECT m.session_id, substr(m.content, 1, \(QueryDefaults.previewContentLength))
-            FROM messages m
-            INNER JOIN (
-                SELECT session_id, MIN(id) as min_id
-                FROM messages
-                WHERE role = 'user' AND content <> ''
-                GROUP BY session_id
-            ) first ON m.id = first.min_id
-            ORDER BY m.timestamp DESC
-            LIMIT ?
-            """
-        do {
-            let rows = try await backend.query(sql, params: [.integer(Int64(limit))])
-            var previews: [String: String] = [:]
-            for row in rows {
-                previews[row.string(at: 0)] = row.string(at: 1)
+        // Already bounded by `substr(content, 1, previewContentLength)`
+        // — wire payload caps at ~limit × 100 bytes. v2.8 added
+        // ScarfMon instrumentation + transport-error logging for
+        // parity with `fetchRecentToolCallsOutcome`; if this query
+        // ever does start timing out on a slow remote we'll see it
+        // in captures rather than swallowing the error and returning
+        // an empty preview map.
+        await ScarfMon.measureAsync(.sessionLoad, "mac.fetchSessionPreviews") {
+            let sql = """
+                SELECT m.session_id, substr(m.content, 1, \(QueryDefaults.previewContentLength))
+                FROM messages m
+                INNER JOIN (
+                    SELECT session_id, MIN(id) as min_id
+                    FROM messages
+                    WHERE role = 'user' AND content <> ''
+                    GROUP BY session_id
+                ) first ON m.id = first.min_id
+                ORDER BY m.timestamp DESC
+                LIMIT ?
+                """
+            do {
+                let rows = try await backend.query(sql, params: [.integer(Int64(limit))])
+                var previews: [String: String] = [:]
+                for row in rows {
+                    previews[row.string(at: 0)] = row.string(at: 1)
+                }
+                ScarfMon.event(.sessionLoad, "mac.fetchSessionPreviews.rows", count: previews.count)
+                return previews
+            } catch let BackendError.transport(reason) {
+                ScarfMon.event(.sessionLoad, "mac.fetchSessionPreviews.transportError", count: 1)
+                Self.logger.warning("fetchSessionPreviews transport error: \(reason, privacy: .public)")
+                return [:]
+            } catch {
+                Self.logger.warning("fetchSessionPreviews failed: \(error.localizedDescription, privacy: .public)")
+                return [:]
            }
-            return previews
-        } catch {
-            return [:]
        }
    }

@@ -50,7 +50,7 @@ public enum ModelPreflight: Sendable {
    }

    private static func isUnset(_ value: String) -> Bool {
-        let trimmed = value.trimmingCharacters(in: .whitespaces).lowercased()
+        let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
        return trimmed.isEmpty || trimmed == "unknown"
    }

@@ -79,8 +79,8 @@ public enum ModelPreflight: Sendable {
    /// Uses case-insensitive comparison — Hermes accepts both
    /// `Anthropic/...` and `anthropic/...` casings in the wild.
    public static func detectMismatch(_ config: HermesConfig) -> Mismatch? {
-        let modelDefault = config.model.trimmingCharacters(in: .whitespaces)
-        let activeProvider = config.provider.trimmingCharacters(in: .whitespaces)
+        let modelDefault = config.model.trimmingCharacters(in: .whitespacesAndNewlines)
+        let activeProvider = config.provider.trimmingCharacters(in: .whitespacesAndNewlines)
        guard !isUnset(modelDefault), !isUnset(activeProvider) else { return nil }
        guard let slash = modelDefault.firstIndex(of: "/") else { return nil }
        let prefix = String(modelDefault[..<slash])
@@ -37,22 +37,34 @@ public final class CuratorViewModel {
        isLoading = true
        defer { isLoading = false }
        let context = self.context
-        let parsed = await Task.detached(priority: .userInitiated) { () -> (HermesCuratorStatus, String?) in
-            let textResult = Self.runCuratorStatus(context: context)
-            let stateData = context.readData(context.paths.curatorStateFile)
-            let parsed = HermesCuratorStatusParser.parse(text: textResult, stateFileJSON: stateData)
-            // Best-effort markdown report: the state file points at the
-            // most recent <YYYYMMDD-HHMMSS>/ dir; load REPORT.md from
-            // there. Missing on first run, which is fine.
-            var report: String?
-            if let reportDir = parsed.lastReportPath {
-                let reportPath = reportDir.hasSuffix("/")
-                    ? "\(reportDir)REPORT.md"
-                    : "\(reportDir)/REPORT.md"
-                report = context.readText(reportPath)
-            }
-            return (parsed, report)
-        }.value
+        // v2.8 — instrumented. Curator load fires `hermes curator
+        // status` (CLI subprocess) plus 1-2 file reads; on remote
+        // each is a separate SSH RTT. Visibility lets future captures
+        // show how often the report file is missing or oversized.
+        let parsed = await ScarfMon.measureAsync(.diskIO, "curator.load") {
+            await Task.detached(priority: .userInitiated) { () -> (HermesCuratorStatus, String?) in
+                let textResult = Self.runCuratorStatus(context: context)
+                let stateData = context.readData(context.paths.curatorStateFile)
+                let parsed = HermesCuratorStatusParser.parse(text: textResult, stateFileJSON: stateData)
+                // Best-effort markdown report: the state file points at the
+                // most recent <YYYYMMDD-HHMMSS>/ dir; load REPORT.md from
+                // there. Missing on first run, which is fine.
+                var report: String?
+                if let reportDir = parsed.lastReportPath {
+                    let reportPath = reportDir.hasSuffix("/")
+                        ? "\(reportDir)REPORT.md"
+                        : "\(reportDir)/REPORT.md"
+                    report = context.readText(reportPath)
+                }
+                return (parsed, report)
+            }.value
+        }
+        ScarfMon.event(
+            .diskIO,
+            "curator.load.bytes",
+            count: 0,
+            bytes: parsed.1?.utf8.count ?? 0
+        )
        self.status = parsed.0
        self.lastReportMarkdown = parsed.1
    }
@@ -82,16 +82,23 @@ public final class SkillsViewModel {
        let ctx = context
        let xport = transport
        let pins = pinnedNames
-        let cats: [HermesSkillCategory] = await Task.detached {
-            let disabled = Self.readDisabledSkillNames(context: ctx)
-            let pinned = pins ?? Self.readPinnedSkillNames(context: ctx)
-            return SkillsScanner.scan(
-                context: ctx,
-                transport: xport,
-                disabledNames: disabled,
-                pinnedNames: pinned
-            )
-        }.value
+        // v2.8 — instrumented so future captures show how many SSH
+        // RTTs the SkillsScanner walk costs on remote (it stats
+        // every ~/.hermes/skills/* directory + reads SKILL.md per).
+        let cats: [HermesSkillCategory] = await ScarfMon.measureAsync(.diskIO, "skills.load") {
+            await Task.detached {
+                let disabled = Self.readDisabledSkillNames(context: ctx)
+                let pinned = pins ?? Self.readPinnedSkillNames(context: ctx)
+                return SkillsScanner.scan(
+                    context: ctx,
+                    transport: xport,
+                    disabledNames: disabled,
+                    pinnedNames: pinned
+                )
+            }.value
+        }
+        let totalSkills = cats.reduce(0) { $0 + $1.skills.count }
+        ScarfMon.event(.diskIO, "skills.load.count", count: totalSkills)
        categories = cats
        isLoading = false
    }
@@ -0,0 +1,182 @@
+import Testing
+import Foundation
+@testable import ScarfCore
+
+/// Pure tests for `ModelPreflight` — both the `check(_:)` configured-vs-
+/// missing classifier and the v2.8 `detectMismatch(_:)` provider/prefix
+/// reconciliation. The mismatch path is what surfaces the orange
+/// "Model/provider mismatch in config.yaml" banner in ChatView when the
+/// user switches OAuth providers via Credential Pools and `model.default`
+/// is left carrying the old provider's prefix.
+@Suite struct ModelPreflightTests {
+
+    // MARK: - check(_:) — missing-field classifier
+
+    @Test func bothModelAndProviderEmptyReportsMissingBoth() {
+        var cfg = HermesConfig.empty
+        cfg.model = ""
+        cfg.provider = ""
+        #expect(ModelPreflight.check(cfg) == .missingBoth)
+    }
+
+    @Test func bothModelAndProviderUnknownReportsMissingBoth() {
+        // `HermesConfig.empty` defaults model/provider to the literal
+        // "unknown" — the classifier must treat that the same as "".
+        let cfg = HermesConfig.empty
+        #expect(ModelPreflight.check(cfg) == .missingBoth)
+    }
+
+    @Test func providerSetButModelEmptyReportsMissingModel() {
+        var cfg = HermesConfig.empty
+        cfg.model = ""
+        cfg.provider = "anthropic"
+        #expect(ModelPreflight.check(cfg) == .missingModel)
+    }
+
+    @Test func modelSetButProviderEmptyReportsMissingProvider() {
+        var cfg = HermesConfig.empty
+        cfg.model = "claude-sonnet-4.6"
+        cfg.provider = ""
+        #expect(ModelPreflight.check(cfg) == .missingProvider)
+    }
+
+    @Test func bothSetReportsConfigured() {
+        var cfg = HermesConfig.empty
+        cfg.model = "claude-sonnet-4.6"
+        cfg.provider = "anthropic"
+        #expect(ModelPreflight.check(cfg) == .configured)
+    }
+
+    @Test func whitespaceTreatedAsUnsetForBothFields() {
+        var cfg = HermesConfig.empty
+        cfg.model = "  "
+        cfg.provider = "\n"
+        #expect(ModelPreflight.check(cfg) == .missingBoth)
+    }
+
+    @Test func resultIsConfiguredOnlyForConfiguredCase() {
+        #expect(ModelPreflight.Result.configured.isConfigured)
+        #expect(!ModelPreflight.Result.missingBoth.isConfigured)
+        #expect(!ModelPreflight.Result.missingModel.isConfigured)
+        #expect(!ModelPreflight.Result.missingProvider.isConfigured)
+    }
+
+    // MARK: - detectMismatch(_:)
+
+    @Test func detectMismatchReturnsNilWhenNoPrefixOnModelDefault() {
+        var cfg = HermesConfig.empty
+        cfg.model = "claude-sonnet-4.6"
+        cfg.provider = "anthropic"
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+
+    @Test func detectMismatchReturnsNilWhenPrefixMatchesProvider() {
+        var cfg = HermesConfig.empty
+        cfg.model = "anthropic/claude-sonnet-4.6"
+        cfg.provider = "anthropic"
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+
+    @Test func detectMismatchReturnsNilWhenModelDefaultIsUnset() {
+        var cfg = HermesConfig.empty
+        cfg.model = ""
+        cfg.provider = "nous"
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+
+    @Test func detectMismatchReturnsNilWhenProviderIsUnset() {
+        var cfg = HermesConfig.empty
+        cfg.model = "anthropic/claude-sonnet-4.6"
+        cfg.provider = ""
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+
+    @Test func detectMismatchReturnsNilWhenBothUnknown() {
+        // The literal "unknown" sentinel from the YAML parser fallback
+        // counts as unset on both sides — no mismatch to report.
+        let cfg = HermesConfig.empty // model + provider both "unknown"
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+
+    @Test func detectMismatchSurfacesPrefixVsActiveProvider() {
+        // The dogfooding scenario: Anthropic-prefixed model still sitting
+        // in config.yaml after the user OAuth'd into Nous via Credential
+        // Pools. Hermes can't reconcile and chats die with -32603 at
+        // first prompt. The banner offers a one-click fix in either
+        // direction; this test pins the data the banner reads.
+        var cfg = HermesConfig.empty
+        cfg.model = "anthropic/claude-sonnet-4.6"
+        cfg.provider = "nous"
+        let mismatch = ModelPreflight.detectMismatch(cfg)
+        #expect(mismatch != nil)
+        #expect(mismatch?.prefixProvider == "anthropic")
+        #expect(mismatch?.activeProvider == "nous")
+        #expect(mismatch?.modelDefault == "anthropic/claude-sonnet-4.6")
+        #expect(mismatch?.bareModel == "claude-sonnet-4.6")
+    }
+
+    @Test func detectMismatchIsCaseInsensitiveOnPrefixMatch() {
+        // Hermes accepts both `Anthropic/...` and `anthropic/...` casings
+        // in the wild — case-only differences must NOT surface as a
+        // mismatch (would be a false-positive banner).
+        var cfg = HermesConfig.empty
+        cfg.model = "Anthropic/claude-sonnet-4.6"
+        cfg.provider = "anthropic"
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+
+    @Test func detectMismatchHandlesNonAnthropicProviders() {
+        // The mismatch banner needs to work for any provider pair —
+        // not just the dogfooding case. Pin the openai+nous shape.
+        var cfg = HermesConfig.empty
+        cfg.model = "openai/gpt-5"
+        cfg.provider = "nous"
+        let mismatch = ModelPreflight.detectMismatch(cfg)
+        #expect(mismatch?.prefixProvider == "openai")
+        #expect(mismatch?.activeProvider == "nous")
+        #expect(mismatch?.bareModel == "gpt-5")
+    }
+
+    @Test func detectMismatchReturnsNilForEmptyBareModel() {
+        // A pathological "anthropic/" with no model name after the
+        // slash isn't a valid mismatch — caller has no bare model to
+        // write back. The classifier should refuse to surface it
+        // rather than emit a useless fix button.
+        var cfg = HermesConfig.empty
+        cfg.model = "anthropic/"
+        cfg.provider = "nous"
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+
+    @Test func detectMismatchReturnsNilForEmptyPrefix() {
+        // Symmetric pathological case — leading slash, no provider
+        // prefix. Don't fire.
+        var cfg = HermesConfig.empty
+        cfg.model = "/claude-sonnet-4.6"
+        cfg.provider = "nous"
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+
+    @Test func detectMismatchHandlesModelsWithMultipleSlashes() {
+        // Some provider/model strings carry path-style segments after
+        // the first slash (e.g. an OpenRouter style path). The first
+        // slash separates prefix from bare model; the rest of the
+        // string is the bare model verbatim.
+        var cfg = HermesConfig.empty
+        cfg.model = "openrouter/anthropic/claude-sonnet-4.6"
+        cfg.provider = "anthropic"
+        let mismatch = ModelPreflight.detectMismatch(cfg)
+        #expect(mismatch?.prefixProvider == "openrouter")
+        #expect(mismatch?.activeProvider == "anthropic")
+        #expect(mismatch?.bareModel == "anthropic/claude-sonnet-4.6")
+    }
+
+    @Test func detectMismatchTrimsWhitespaceBeforeComparing() {
+        // A stray newline in a hand-edited config.yaml shouldn't read
+        // as a mismatch when the trimmed values agree.
+        var cfg = HermesConfig.empty
+        cfg.model = "anthropic/claude-sonnet-4.6  "
+        cfg.provider = " anthropic\n"
+        #expect(ModelPreflight.detectMismatch(cfg) == nil)
+    }
+}
@@ -40,17 +40,22 @@ final class CronViewModel {
        let selectedID = selectedJob?.id
        Task.detached { [weak self] in
            // Three sync transport ops on remote — keep them off main.
-            let jobs = svc.loadCronJobs()
-            let skills = svc.loadSkills().flatMap { $0.skills.map(\.id) }.sorted()
-            let refreshed = selectedID.flatMap { id in jobs.first(where: { $0.id == id }) }
-            let output = refreshed.flatMap { svc.loadCronOutput(jobId: $0.id) }
-            await MainActor.run { [weak self] in
-                guard let self else { return }
-                self.jobs = jobs
-                self.availableSkills = skills
-                if let refreshed { self.selectedJob = refreshed }
-                if output != nil { self.jobOutput = output }
-                self.isLoading = false
+            // v2.8: instrumented so we can see how many SSH RTTs the
+            // Cron tab actually costs in captures.
+            await ScarfMon.measureAsync(.diskIO, "cron.load") {
+                let jobs = svc.loadCronJobs()
+                let skills = svc.loadSkills().flatMap { $0.skills.map(\.id) }.sorted()
+                let refreshed = selectedID.flatMap { id in jobs.first(where: { $0.id == id }) }
+                let output = refreshed.flatMap { svc.loadCronOutput(jobId: $0.id) }
+                ScarfMon.event(.diskIO, "cron.load.jobs", count: jobs.count)
+                await MainActor.run { [weak self] in
+                    guard let self else { return }
+                    self.jobs = jobs
+                    self.availableSkills = skills
+                    if let refreshed { self.selectedJob = refreshed }
+                    if output != nil { self.jobOutput = output }
+                    self.isLoading = false
+                }
            }
        }
    }
@@ -43,21 +43,26 @@ final class MemoryViewModel {
        let svc = fileService
        let currentProfile = activeProfile
        // Sync transport calls would beach-ball the UI on remote — dispatch
-        // off main, then commit results back on MainActor.
+        // off main, then commit results back on MainActor. v2.8: wrapped
+        // in ScarfMon so we can see how many SSH RTTs this load actually
+        // costs (4 sequential SFTP reads on the slow path).
        Task.detached { [weak self] in
-            let config = svc.loadConfig()
-            let profiles = svc.loadMemoryProfiles()
-            let profile = currentProfile.isEmpty ? config.memoryProfile : currentProfile
-            let memory = svc.loadMemory(profile: profile)
-            let user = svc.loadUserProfile(profile: profile)
-            await MainActor.run { [weak self] in
-                guard let self else { return }
-                self.memoryProvider = config.memoryProvider
-                self.profiles = profiles
-                self.activeProfile = profile
-                self.memoryContent = memory
-                self.userContent = user
-                self.isLoading = false
+            await ScarfMon.measureAsync(.diskIO, "memory.load") {
+                let config = svc.loadConfig()
+                let profiles = svc.loadMemoryProfiles()
+                let profile = currentProfile.isEmpty ? config.memoryProfile : currentProfile
+                let memory = svc.loadMemory(profile: profile)
+                let user = svc.loadUserProfile(profile: profile)
+                ScarfMon.event(.diskIO, "memory.load.bytes", count: 0, bytes: memory.utf8.count + user.utf8.count)
+                await MainActor.run { [weak self] in
+                    guard let self else { return }
+                    self.memoryProvider = config.memoryProvider
+                    self.profiles = profiles
+                    self.activeProfile = profile
+                    self.memoryContent = memory
+                    self.userContent = user
+                    self.isLoading = false
+                }
            }
        }
    }