mirror of
https://github.com/awizemann/scarf.git
synced 2026-05-08 02:14:37 +00:00
test(model-preflight): cover detect-mismatch + fix newline-trim bug
* New ModelPreflightTests suite (19 tests) covering both `check(_:)` and the v2.8 `detectMismatch(_:)` paths. Pins the dogfooding scenario (anthropic-prefixed model + nous active provider after Credential Pools OAuth swap), the case-insensitive prefix match, empty-prefix / empty-bare-model edge cases, and multi-slash model ids (OpenRouter style). * Bug fix surfaced by the tests: `ModelPreflight` was using `trimmingCharacters(in: .whitespaces)` which doesn't strip newlines. A stray `\n` in a hand-edited config.yaml would either miss the missing-fields classifier OR false-positive the mismatch banner (showing "anthropic" vs "anthropic\n"). Switched both trims to `.whitespacesAndNewlines`. perf(observability): instrument Tier C load paths + fetchSessionPreviews No behavior change — adds ScarfMon coverage so future captures show how often Memory/Skills/Cron/Curator/SessionPreviews load paths fire and what they cost on remote (each is multiple sequential SFTP RTTs that pre-fix were invisible). New events: * `mac.fetchSessionPreviews` / `.rows` / `.transportError` * `memory.load` / `.bytes` * `cron.load` / `.jobs` * `skills.load` / `.count` * `curator.load` / `.bytes` All 321 ScarfCore tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -637,27 +637,42 @@ public actor HermesDataService {
|
||||
}
|
||||
|
||||
public func fetchSessionPreviews(limit: Int = QueryDefaults.sessionPreviewLimit) async -> [String: String] {
|
||||
let sql = """
|
||||
SELECT m.session_id, substr(m.content, 1, \(QueryDefaults.previewContentLength))
|
||||
FROM messages m
|
||||
INNER JOIN (
|
||||
SELECT session_id, MIN(id) as min_id
|
||||
FROM messages
|
||||
WHERE role = 'user' AND content <> ''
|
||||
GROUP BY session_id
|
||||
) first ON m.id = first.min_id
|
||||
ORDER BY m.timestamp DESC
|
||||
LIMIT ?
|
||||
"""
|
||||
do {
|
||||
let rows = try await backend.query(sql, params: [.integer(Int64(limit))])
|
||||
var previews: [String: String] = [:]
|
||||
for row in rows {
|
||||
previews[row.string(at: 0)] = row.string(at: 1)
|
||||
// Already bounded by `substr(content, 1, previewContentLength)`
|
||||
// — wire payload caps at ~limit × 100 bytes. v2.8 added
|
||||
// ScarfMon instrumentation + transport-error logging for
|
||||
// parity with `fetchRecentToolCallsOutcome`; if this query
|
||||
// ever does start timing out on a slow remote we'll see it
|
||||
// in captures rather than swallowing the error and returning
|
||||
// an empty preview map.
|
||||
await ScarfMon.measureAsync(.sessionLoad, "mac.fetchSessionPreviews") {
|
||||
let sql = """
|
||||
SELECT m.session_id, substr(m.content, 1, \(QueryDefaults.previewContentLength))
|
||||
FROM messages m
|
||||
INNER JOIN (
|
||||
SELECT session_id, MIN(id) as min_id
|
||||
FROM messages
|
||||
WHERE role = 'user' AND content <> ''
|
||||
GROUP BY session_id
|
||||
) first ON m.id = first.min_id
|
||||
ORDER BY m.timestamp DESC
|
||||
LIMIT ?
|
||||
"""
|
||||
do {
|
||||
let rows = try await backend.query(sql, params: [.integer(Int64(limit))])
|
||||
var previews: [String: String] = [:]
|
||||
for row in rows {
|
||||
previews[row.string(at: 0)] = row.string(at: 1)
|
||||
}
|
||||
ScarfMon.event(.sessionLoad, "mac.fetchSessionPreviews.rows", count: previews.count)
|
||||
return previews
|
||||
} catch let BackendError.transport(reason) {
|
||||
ScarfMon.event(.sessionLoad, "mac.fetchSessionPreviews.transportError", count: 1)
|
||||
Self.logger.warning("fetchSessionPreviews transport error: \(reason, privacy: .public)")
|
||||
return [:]
|
||||
} catch {
|
||||
Self.logger.warning("fetchSessionPreviews failed: \(error.localizedDescription, privacy: .public)")
|
||||
return [:]
|
||||
}
|
||||
return previews
|
||||
} catch {
|
||||
return [:]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ public enum ModelPreflight: Sendable {
|
||||
}
|
||||
|
||||
private static func isUnset(_ value: String) -> Bool {
|
||||
let trimmed = value.trimmingCharacters(in: .whitespaces).lowercased()
|
||||
let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
|
||||
return trimmed.isEmpty || trimmed == "unknown"
|
||||
}
|
||||
|
||||
@@ -79,8 +79,8 @@ public enum ModelPreflight: Sendable {
|
||||
/// Uses case-insensitive comparison — Hermes accepts both
|
||||
/// `Anthropic/...` and `anthropic/...` casings in the wild.
|
||||
public static func detectMismatch(_ config: HermesConfig) -> Mismatch? {
|
||||
let modelDefault = config.model.trimmingCharacters(in: .whitespaces)
|
||||
let activeProvider = config.provider.trimmingCharacters(in: .whitespaces)
|
||||
let modelDefault = config.model.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let activeProvider = config.provider.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard !isUnset(modelDefault), !isUnset(activeProvider) else { return nil }
|
||||
guard let slash = modelDefault.firstIndex(of: "/") else { return nil }
|
||||
let prefix = String(modelDefault[..<slash])
|
||||
|
||||
@@ -37,22 +37,34 @@ public final class CuratorViewModel {
|
||||
isLoading = true
|
||||
defer { isLoading = false }
|
||||
let context = self.context
|
||||
let parsed = await Task.detached(priority: .userInitiated) { () -> (HermesCuratorStatus, String?) in
|
||||
let textResult = Self.runCuratorStatus(context: context)
|
||||
let stateData = context.readData(context.paths.curatorStateFile)
|
||||
let parsed = HermesCuratorStatusParser.parse(text: textResult, stateFileJSON: stateData)
|
||||
// Best-effort markdown report: the state file points at the
|
||||
// most recent <YYYYMMDD-HHMMSS>/ dir; load REPORT.md from
|
||||
// there. Missing on first run, which is fine.
|
||||
var report: String?
|
||||
if let reportDir = parsed.lastReportPath {
|
||||
let reportPath = reportDir.hasSuffix("/")
|
||||
? "\(reportDir)REPORT.md"
|
||||
: "\(reportDir)/REPORT.md"
|
||||
report = context.readText(reportPath)
|
||||
}
|
||||
return (parsed, report)
|
||||
}.value
|
||||
// v2.8 — instrumented. Curator load fires `hermes curator
|
||||
// status` (CLI subprocess) plus 1-2 file reads; on remote
|
||||
// each is a separate SSH RTT. Visibility lets future captures
|
||||
// show how often the report file is missing or oversized.
|
||||
let parsed = await ScarfMon.measureAsync(.diskIO, "curator.load") {
|
||||
await Task.detached(priority: .userInitiated) { () -> (HermesCuratorStatus, String?) in
|
||||
let textResult = Self.runCuratorStatus(context: context)
|
||||
let stateData = context.readData(context.paths.curatorStateFile)
|
||||
let parsed = HermesCuratorStatusParser.parse(text: textResult, stateFileJSON: stateData)
|
||||
// Best-effort markdown report: the state file points at the
|
||||
// most recent <YYYYMMDD-HHMMSS>/ dir; load REPORT.md from
|
||||
// there. Missing on first run, which is fine.
|
||||
var report: String?
|
||||
if let reportDir = parsed.lastReportPath {
|
||||
let reportPath = reportDir.hasSuffix("/")
|
||||
? "\(reportDir)REPORT.md"
|
||||
: "\(reportDir)/REPORT.md"
|
||||
report = context.readText(reportPath)
|
||||
}
|
||||
return (parsed, report)
|
||||
}.value
|
||||
}
|
||||
ScarfMon.event(
|
||||
.diskIO,
|
||||
"curator.load.bytes",
|
||||
count: 0,
|
||||
bytes: parsed.1?.utf8.count ?? 0
|
||||
)
|
||||
self.status = parsed.0
|
||||
self.lastReportMarkdown = parsed.1
|
||||
}
|
||||
|
||||
@@ -82,16 +82,23 @@ public final class SkillsViewModel {
|
||||
let ctx = context
|
||||
let xport = transport
|
||||
let pins = pinnedNames
|
||||
let cats: [HermesSkillCategory] = await Task.detached {
|
||||
let disabled = Self.readDisabledSkillNames(context: ctx)
|
||||
let pinned = pins ?? Self.readPinnedSkillNames(context: ctx)
|
||||
return SkillsScanner.scan(
|
||||
context: ctx,
|
||||
transport: xport,
|
||||
disabledNames: disabled,
|
||||
pinnedNames: pinned
|
||||
)
|
||||
}.value
|
||||
// v2.8 — instrumented so future captures show how many SSH
|
||||
// RTTs the SkillsScanner walk costs on remote (it stats
|
||||
// every ~/.hermes/skills/* directory + reads SKILL.md per).
|
||||
let cats: [HermesSkillCategory] = await ScarfMon.measureAsync(.diskIO, "skills.load") {
|
||||
await Task.detached {
|
||||
let disabled = Self.readDisabledSkillNames(context: ctx)
|
||||
let pinned = pins ?? Self.readPinnedSkillNames(context: ctx)
|
||||
return SkillsScanner.scan(
|
||||
context: ctx,
|
||||
transport: xport,
|
||||
disabledNames: disabled,
|
||||
pinnedNames: pinned
|
||||
)
|
||||
}.value
|
||||
}
|
||||
let totalSkills = cats.reduce(0) { $0 + $1.skills.count }
|
||||
ScarfMon.event(.diskIO, "skills.load.count", count: totalSkills)
|
||||
categories = cats
|
||||
isLoading = false
|
||||
}
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
import Testing
|
||||
import Foundation
|
||||
@testable import ScarfCore
|
||||
|
||||
/// Pure tests for `ModelPreflight` — both the `check(_:)` configured-vs-
|
||||
/// missing classifier and the v2.8 `detectMismatch(_:)` provider/prefix
|
||||
/// reconciliation. The mismatch path is what surfaces the orange
|
||||
/// "Model/provider mismatch in config.yaml" banner in ChatView when the
|
||||
/// user switches OAuth providers via Credential Pools and `model.default`
|
||||
/// is left carrying the old provider's prefix.
|
||||
@Suite struct ModelPreflightTests {
|
||||
|
||||
// MARK: - check(_:) — missing-field classifier
|
||||
|
||||
@Test func bothModelAndProviderEmptyReportsMissingBoth() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = ""
|
||||
cfg.provider = ""
|
||||
#expect(ModelPreflight.check(cfg) == .missingBoth)
|
||||
}
|
||||
|
||||
@Test func bothModelAndProviderUnknownReportsMissingBoth() {
|
||||
// `HermesConfig.empty` defaults model/provider to the literal
|
||||
// "unknown" — the classifier must treat that the same as "".
|
||||
let cfg = HermesConfig.empty
|
||||
#expect(ModelPreflight.check(cfg) == .missingBoth)
|
||||
}
|
||||
|
||||
@Test func providerSetButModelEmptyReportsMissingModel() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = ""
|
||||
cfg.provider = "anthropic"
|
||||
#expect(ModelPreflight.check(cfg) == .missingModel)
|
||||
}
|
||||
|
||||
@Test func modelSetButProviderEmptyReportsMissingProvider() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "claude-sonnet-4.6"
|
||||
cfg.provider = ""
|
||||
#expect(ModelPreflight.check(cfg) == .missingProvider)
|
||||
}
|
||||
|
||||
@Test func bothSetReportsConfigured() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "claude-sonnet-4.6"
|
||||
cfg.provider = "anthropic"
|
||||
#expect(ModelPreflight.check(cfg) == .configured)
|
||||
}
|
||||
|
||||
@Test func whitespaceTreatedAsUnsetForBothFields() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = " "
|
||||
cfg.provider = "\n"
|
||||
#expect(ModelPreflight.check(cfg) == .missingBoth)
|
||||
}
|
||||
|
||||
@Test func resultIsConfiguredOnlyForConfiguredCase() {
|
||||
#expect(ModelPreflight.Result.configured.isConfigured)
|
||||
#expect(!ModelPreflight.Result.missingBoth.isConfigured)
|
||||
#expect(!ModelPreflight.Result.missingModel.isConfigured)
|
||||
#expect(!ModelPreflight.Result.missingProvider.isConfigured)
|
||||
}
|
||||
|
||||
// MARK: - detectMismatch(_:)
|
||||
|
||||
@Test func detectMismatchReturnsNilWhenNoPrefixOnModelDefault() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "claude-sonnet-4.6"
|
||||
cfg.provider = "anthropic"
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
|
||||
@Test func detectMismatchReturnsNilWhenPrefixMatchesProvider() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "anthropic/claude-sonnet-4.6"
|
||||
cfg.provider = "anthropic"
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
|
||||
@Test func detectMismatchReturnsNilWhenModelDefaultIsUnset() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = ""
|
||||
cfg.provider = "nous"
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
|
||||
@Test func detectMismatchReturnsNilWhenProviderIsUnset() {
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "anthropic/claude-sonnet-4.6"
|
||||
cfg.provider = ""
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
|
||||
@Test func detectMismatchReturnsNilWhenBothUnknown() {
|
||||
// The literal "unknown" sentinel from the YAML parser fallback
|
||||
// counts as unset on both sides — no mismatch to report.
|
||||
let cfg = HermesConfig.empty // model + provider both "unknown"
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
|
||||
@Test func detectMismatchSurfacesPrefixVsActiveProvider() {
|
||||
// The dogfooding scenario: Anthropic-prefixed model still sitting
|
||||
// in config.yaml after the user OAuth'd into Nous via Credential
|
||||
// Pools. Hermes can't reconcile and chats die with -32603 at
|
||||
// first prompt. The banner offers a one-click fix in either
|
||||
// direction; this test pins the data the banner reads.
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "anthropic/claude-sonnet-4.6"
|
||||
cfg.provider = "nous"
|
||||
let mismatch = ModelPreflight.detectMismatch(cfg)
|
||||
#expect(mismatch != nil)
|
||||
#expect(mismatch?.prefixProvider == "anthropic")
|
||||
#expect(mismatch?.activeProvider == "nous")
|
||||
#expect(mismatch?.modelDefault == "anthropic/claude-sonnet-4.6")
|
||||
#expect(mismatch?.bareModel == "claude-sonnet-4.6")
|
||||
}
|
||||
|
||||
@Test func detectMismatchIsCaseInsensitiveOnPrefixMatch() {
|
||||
// Hermes accepts both `Anthropic/...` and `anthropic/...` casings
|
||||
// in the wild — case-only differences must NOT surface as a
|
||||
// mismatch (would be a false-positive banner).
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "Anthropic/claude-sonnet-4.6"
|
||||
cfg.provider = "anthropic"
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
|
||||
@Test func detectMismatchHandlesNonAnthropicProviders() {
|
||||
// The mismatch banner needs to work for any provider pair —
|
||||
// not just the dogfooding case. Pin the openai+nous shape.
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "openai/gpt-5"
|
||||
cfg.provider = "nous"
|
||||
let mismatch = ModelPreflight.detectMismatch(cfg)
|
||||
#expect(mismatch?.prefixProvider == "openai")
|
||||
#expect(mismatch?.activeProvider == "nous")
|
||||
#expect(mismatch?.bareModel == "gpt-5")
|
||||
}
|
||||
|
||||
@Test func detectMismatchReturnsNilForEmptyBareModel() {
|
||||
// A pathological "anthropic/" with no model name after the
|
||||
// slash isn't a valid mismatch — caller has no bare model to
|
||||
// write back. The classifier should refuse to surface it
|
||||
// rather than emit a useless fix button.
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "anthropic/"
|
||||
cfg.provider = "nous"
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
|
||||
@Test func detectMismatchReturnsNilForEmptyPrefix() {
|
||||
// Symmetric pathological case — leading slash, no provider
|
||||
// prefix. Don't fire.
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "/claude-sonnet-4.6"
|
||||
cfg.provider = "nous"
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
|
||||
@Test func detectMismatchHandlesModelsWithMultipleSlashes() {
|
||||
// Some provider/model strings carry path-style segments after
|
||||
// the first slash (e.g. an OpenRouter style path). The first
|
||||
// slash separates prefix from bare model; the rest of the
|
||||
// string is the bare model verbatim.
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "openrouter/anthropic/claude-sonnet-4.6"
|
||||
cfg.provider = "anthropic"
|
||||
let mismatch = ModelPreflight.detectMismatch(cfg)
|
||||
#expect(mismatch?.prefixProvider == "openrouter")
|
||||
#expect(mismatch?.activeProvider == "anthropic")
|
||||
#expect(mismatch?.bareModel == "anthropic/claude-sonnet-4.6")
|
||||
}
|
||||
|
||||
@Test func detectMismatchTrimsWhitespaceBeforeComparing() {
|
||||
// A stray newline in a hand-edited config.yaml shouldn't read
|
||||
// as a mismatch when the trimmed values agree.
|
||||
var cfg = HermesConfig.empty
|
||||
cfg.model = "anthropic/claude-sonnet-4.6 "
|
||||
cfg.provider = " anthropic\n"
|
||||
#expect(ModelPreflight.detectMismatch(cfg) == nil)
|
||||
}
|
||||
}
|
||||
@@ -40,17 +40,22 @@ final class CronViewModel {
|
||||
let selectedID = selectedJob?.id
|
||||
Task.detached { [weak self] in
|
||||
// Three sync transport ops on remote — keep them off main.
|
||||
let jobs = svc.loadCronJobs()
|
||||
let skills = svc.loadSkills().flatMap { $0.skills.map(\.id) }.sorted()
|
||||
let refreshed = selectedID.flatMap { id in jobs.first(where: { $0.id == id }) }
|
||||
let output = refreshed.flatMap { svc.loadCronOutput(jobId: $0.id) }
|
||||
await MainActor.run { [weak self] in
|
||||
guard let self else { return }
|
||||
self.jobs = jobs
|
||||
self.availableSkills = skills
|
||||
if let refreshed { self.selectedJob = refreshed }
|
||||
if output != nil { self.jobOutput = output }
|
||||
self.isLoading = false
|
||||
// v2.8: instrumented so we can see how many SSH RTTs the
|
||||
// Cron tab actually costs in captures.
|
||||
await ScarfMon.measureAsync(.diskIO, "cron.load") {
|
||||
let jobs = svc.loadCronJobs()
|
||||
let skills = svc.loadSkills().flatMap { $0.skills.map(\.id) }.sorted()
|
||||
let refreshed = selectedID.flatMap { id in jobs.first(where: { $0.id == id }) }
|
||||
let output = refreshed.flatMap { svc.loadCronOutput(jobId: $0.id) }
|
||||
ScarfMon.event(.diskIO, "cron.load.jobs", count: jobs.count)
|
||||
await MainActor.run { [weak self] in
|
||||
guard let self else { return }
|
||||
self.jobs = jobs
|
||||
self.availableSkills = skills
|
||||
if let refreshed { self.selectedJob = refreshed }
|
||||
if output != nil { self.jobOutput = output }
|
||||
self.isLoading = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,21 +43,26 @@ final class MemoryViewModel {
|
||||
let svc = fileService
|
||||
let currentProfile = activeProfile
|
||||
// Sync transport calls would beach-ball the UI on remote — dispatch
|
||||
// off main, then commit results back on MainActor.
|
||||
// off main, then commit results back on MainActor. v2.8: wrapped
|
||||
// in ScarfMon so we can see how many SSH RTTs this load actually
|
||||
// costs (4 sequential SFTP reads on the slow path).
|
||||
Task.detached { [weak self] in
|
||||
let config = svc.loadConfig()
|
||||
let profiles = svc.loadMemoryProfiles()
|
||||
let profile = currentProfile.isEmpty ? config.memoryProfile : currentProfile
|
||||
let memory = svc.loadMemory(profile: profile)
|
||||
let user = svc.loadUserProfile(profile: profile)
|
||||
await MainActor.run { [weak self] in
|
||||
guard let self else { return }
|
||||
self.memoryProvider = config.memoryProvider
|
||||
self.profiles = profiles
|
||||
self.activeProfile = profile
|
||||
self.memoryContent = memory
|
||||
self.userContent = user
|
||||
self.isLoading = false
|
||||
await ScarfMon.measureAsync(.diskIO, "memory.load") {
|
||||
let config = svc.loadConfig()
|
||||
let profiles = svc.loadMemoryProfiles()
|
||||
let profile = currentProfile.isEmpty ? config.memoryProfile : currentProfile
|
||||
let memory = svc.loadMemory(profile: profile)
|
||||
let user = svc.loadUserProfile(profile: profile)
|
||||
ScarfMon.event(.diskIO, "memory.load.bytes", count: 0, bytes: memory.utf8.count + user.utf8.count)
|
||||
await MainActor.run { [weak self] in
|
||||
guard let self else { return }
|
||||
self.memoryProvider = config.memoryProvider
|
||||
self.profiles = profiles
|
||||
self.activeProfile = profile
|
||||
self.memoryContent = memory
|
||||
self.userContent = user
|
||||
self.isLoading = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user