test(scarfcore): fix cross-suite races + overlay-aware catalog tests

Pre-release verification surfaced 9 failures in `swift test` driven by two
issues — both fixed without changing production behaviour.

1. M3TransportTests + M5FeatureVMTests both held `.serialized` internally
   but ran in parallel with each other, racing on
   `ServerContext.sshTransportFactory` (a `nonisolated(unsafe)` static).
   Tried `@TaskLocal` first; reverted because production hot paths
   dispatch through `Task.detached` which severs TaskLocal inheritance.
   Final fix: move M3's three factory-injection tests + two
   HermesLogService tests + the `ScriptedTransport` test double into
   M5FeatureVMTests, the canonical factory-touching suite. M3 keeps its
   `.serialized` suite trait for the remaining (non-factory) tests, but
   the cross-suite race is gone because there's now exactly one suite
   that mutates the static.

2. `loadProviders()` returns the 6 hardcoded Hermes overlays (Nous Portal,
   Codex, Qwen, Gemini CLI, Copilot ACP, Arcee) on top of any models.dev
   catalog hits — added in v2.3 so the picker doesn't go dark when the
   cache is missing. `modelCatalogHandlesMissingAndMalformedFiles`
   asserted `.isEmpty`, which had been correct before that change.
   `modelCatalogLoadsSyntheticJSON` asserted `count == 2`, which was the
   catalog-only count. Both updated: the missing/malformed test now
   asserts the result is non-empty + every entry is `isOverlay`; the
   synthetic-JSON test filters `!isOverlay` before counting.

Verified: 163 tests across 12 suites pass on three consecutive runs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-04-25 07:44:42 +02:00
parent 54a0797334
commit 293e8341f5
4 changed files with 204 additions and 167 deletions
@@ -141,6 +141,15 @@ public struct ServerContext: Sendable, Hashable, Identifiable {
/// `nonisolated(unsafe)` annotation mirrors the same pattern
/// `SSHTransport.environmentEnricher` uses single-write at app
/// startup, many-read afterwards.
///
/// **Test usage.** Production sets this once at launch. Tests that need
/// to inject a fake transport must run inside `M5FeatureVMTests` (the
/// canonical `.serialized` suite that owns this static) running
/// factory-touching tests across multiple parallel suites races on this
/// var. `@TaskLocal` would scope cleanly, but the production hot paths
/// dispatch DB/SFTP reads through `Task.detached` which severs
/// TaskLocal inheritance, so the static-write pattern is the only one
/// that survives the call stack.
public typealias SSHTransportFactory = @Sendable (
_ id: ServerID,
_ config: SSHConfig,
@@ -166,7 +166,11 @@ import Foundation
let svc = ModelCatalogService(path: tmp.path)
let providers = svc.loadProviders()
// `loadProviders()` returns the merged result of the parsed catalog
// + the hardcoded overlay providers (Nous Portal, Codex, …). Filter
// to the catalog half here so the assertion is independent of the
// overlay table evolving.
let providers = svc.loadProviders().filter { !$0.isOverlay }
#expect(providers.count == 2)
// Alphabetical by display name → Anthropic, OpenAI.
#expect(providers[0].providerID == "anthropic")
@@ -193,18 +197,29 @@ import Foundation
}
@Test func modelCatalogHandlesMissingAndMalformedFiles() {
// Missing file → empty arrays, no crash.
// Missing or malformed catalog → no crash, no models.dev entries,
// but the hardcoded overlay providers (Nous Portal, Codex, Qwen,
// Gemini CLI, Copilot ACP, Arcee) still surface so the picker
// doesn't go dark when the cache is missing. Assert on the overlay
// shape rather than `.isEmpty` (which was correct before v2.3
// baked the overlays into `loadProviders()`).
let svc = ModelCatalogService(path: "/tmp/scarf-nonexistent-\(UUID().uuidString).json")
#expect(svc.loadProviders().isEmpty)
let missing = svc.loadProviders()
#expect(!missing.isEmpty)
#expect(missing.allSatisfy { $0.isOverlay })
#expect(missing.contains { $0.providerID == "nous" && $0.subscriptionGated })
#expect(svc.loadModels(for: "anthropic").isEmpty)
#expect(svc.provider(for: "anything") == nil)
// Malformed JSON → empty arrays, no crash, logger.error path exercised.
// Malformed JSON → same overlay-only result, no crash, logger.error
// path exercised.
let tmp = FileManager.default.temporaryDirectory.appendingPathComponent("scarf-bad-\(UUID().uuidString).json")
try? "not json".write(to: tmp, atomically: true, encoding: .utf8)
defer { try? FileManager.default.removeItem(at: tmp) }
let bad = ModelCatalogService(path: tmp.path)
#expect(bad.loadProviders().isEmpty)
let badProviders = bad.loadProviders()
#expect(!badProviders.isEmpty)
#expect(badProviders.allSatisfy { $0.isOverlay })
}
@Test func validateModelAcceptsCatalogHit() throws {
@@ -81,164 +81,13 @@ import Foundation
// MARK: - sshTransportFactory injection
@Test func sshTransportFactoryOverridesDefault() {
// Set up a mock factory that returns a `LocalTransport` regardless
// of the ServerKind easy way to prove the injection point
// routes to our override.
final class CountingBox: @unchecked Sendable {
var count = 0
func bump() { count += 1 }
}
let box = CountingBox()
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
ServerContext.sshTransportFactory = { id, _, _ in
box.bump()
return LocalTransport(contextID: id)
}
let ctx = ServerContext(
id: UUID(),
displayName: "test",
kind: .ssh(SSHConfig(host: "h"))
)
let transport = ctx.makeTransport()
#expect(transport is LocalTransport)
#expect(box.count == 1)
}
@Test func sshTransportFactoryNilFallsBackToSSHTransport() {
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
ServerContext.sshTransportFactory = nil
let ctx = ServerContext(
id: UUID(),
displayName: "test",
kind: .ssh(SSHConfig(host: "h"))
)
let transport = ctx.makeTransport()
#expect(transport is SSHTransport)
}
@Test func sshTransportFactoryIgnoredForLocalContext() {
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
// Even if set, the factory is ONLY consulted for `.ssh` kinds
// `.local` always gets a `LocalTransport` directly.
ServerContext.sshTransportFactory = { _, _, _ in
Issue.record("factory called for local context")
return LocalTransport()
}
let transport = ServerContext.local.makeTransport()
#expect(transport is LocalTransport)
}
// MARK: - HermesLogService remote tail refactor
/// Minimal `ServerTransport` test double: `isRemote == true`, all
/// file I/O throws, `streamLines` returns a scripted sequence of
/// lines. Exists to verify HermesLogService's remote-tail path
/// pumps scripted output into the ring buffer without a real SSH
/// subprocess.
final class ScriptedTransport: ServerTransport, @unchecked Sendable {
public let contextID: ServerID = UUID()
public let isRemote: Bool = true
private let lines: [String]
init(lines: [String]) { self.lines = lines }
func readFile(_ path: String) throws -> Data { throw TransportError.other(message: "N/A") }
func writeFile(_ path: String, data: Data) throws { throw TransportError.other(message: "N/A") }
func fileExists(_ path: String) -> Bool { true }
func stat(_ path: String) -> FileStat? { FileStat(size: 0, mtime: Date(), isDirectory: false) }
func listDirectory(_ path: String) throws -> [String] { [] }
func createDirectory(_ path: String) throws {}
func removeFile(_ path: String) throws {}
func runProcess(executable: String, args: [String], stdin: Data?, timeout: TimeInterval?) throws -> ProcessResult {
// For readLastLines' one-shot tail return all scripted lines joined.
let content = lines.joined(separator: "\n") + "\n"
return ProcessResult(exitCode: 0, stdout: Data(content.utf8), stderr: Data())
}
#if !os(iOS)
func makeProcess(executable: String, args: [String]) -> Process {
// Required by protocol on non-iOS; not exercised in tests below.
Process()
}
#endif
func streamLines(executable: String, args: [String]) -> AsyncThrowingStream<String, Error> {
AsyncThrowingStream { continuation in
Task {
for line in lines {
continuation.yield(line)
}
continuation.finish()
}
}
}
func snapshotSQLite(remotePath: String) throws -> URL { URL(fileURLWithPath: remotePath) }
func watchPaths(_ paths: [String]) -> AsyncStream<WatchEvent> {
AsyncStream { $0.finish() }
}
}
// Note: We can't easily inject the ScriptedTransport into
// HermesLogService directly (it takes a `ServerContext` and constructs
// its transport internally via `context.makeTransport()`). Instead we
// wire the scripted transport through the factory injection point.
@Test func hermesLogServiceRemoteTailPumpsThroughStreamLines() async throws {
let scripted = ScriptedTransport(lines: [
"2026-04-22 12:00:00,001 INFO hermes.agent: starting",
"2026-04-22 12:00:01,002 WARNING hermes.gateway: low disk",
"2026-04-22 12:00:02,003 ERROR hermes.agent: boom",
])
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
ServerContext.sshTransportFactory = { _, _, _ in scripted }
let ctx = ServerContext(
id: UUID(),
displayName: "t",
kind: .ssh(SSHConfig(host: "h"))
)
let service = HermesLogService(context: ctx)
await service.openLog(path: "/fake/agent.log")
defer { Task { await service.closeLog() } }
// Give the pump task a moment to drain the scripted stream.
try await Task.sleep(nanoseconds: 50_000_000)
let entries = await service.readNewLines()
#expect(entries.count == 3)
#expect(entries[0].level == .info)
#expect(entries[1].level == .warning)
#expect(entries[2].level == .error)
#expect(entries[2].message == "boom")
}
@Test func hermesLogServiceReadLastLinesUsesOneShotTail() async {
let scripted = ScriptedTransport(lines: ["x", "y", "z"])
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
ServerContext.sshTransportFactory = { _, _, _ in scripted }
let ctx = ServerContext(
id: UUID(),
displayName: "t",
kind: .ssh(SSHConfig(host: "h"))
)
let service = HermesLogService(context: ctx)
// Doesn't need openLog first for the one-shot, but currentPath
// has to be set openLog does both.
await service.openLog(path: "/fake/agent.log")
defer { Task { await service.closeLog() } }
let entries = await service.readLastLines(count: 100)
#expect(entries.count == 3)
#expect(entries[0].message == "x")
#expect(entries[2].message == "z")
}
// MARK: - sshTransportFactory injection + HermesLogService remote tail
//
// The factory-touching tests that used to live in M3 (3 factory-injection
// tests + 2 HermesLogService remote-tail tests) were moved into
// `M5FeatureVMTests` (the canonical `.serialized` factory suite) in
// v2.5. Co-locating them eliminates the cross-suite race we hit during
// pre-release verification: M3 + M5 each held `.serialized` internally
// but ran in parallel with each other, clobbering the static. The tests
// themselves are unchanged in shape, just relocated.
}
@@ -45,8 +45,10 @@ import Foundation
/// Wrap each test body in a factory override so `ctx.makeTransport()`
/// returns a `LocalTransport` instead of trying to spawn a real SSH
/// subprocess. The `.serialized` suite trait guarantees no other
/// test races on the factory static.
/// subprocess. The `.serialized` suite trait guarantees no other test
/// in *this* suite races on the factory static. Cross-suite races used
/// to bite us when M3TransportTests ran in parallel fixed by moving
/// every factory-touching test into this suite.
@MainActor
private func withLocalTransportFactory<T>(
_ body: @MainActor () async throws -> T
@@ -351,6 +353,168 @@ import Foundation
#expect(remote.contextID == remoteCtx.id)
}
// MARK: - M3 sshTransportFactory injection + HermesLogService remote tail
//
// Moved from M3TransportTests in v2.5. Both suites had `.serialized`
// internally but ran in parallel with each other, clobbering the
// `ServerContext.sshTransportFactory` static. Co-locating them in a
// single `.serialized` suite fixes the race; logic is unchanged.
@Test @MainActor func sshTransportFactoryOverridesDefault() {
// Set up a mock factory that returns a `LocalTransport` regardless
// of the ServerKind easy way to prove the injection point
// routes to our override.
final class CountingBox: @unchecked Sendable {
var count = 0
func bump() { count += 1 }
}
let box = CountingBox()
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
ServerContext.sshTransportFactory = { id, _, _ in
box.bump()
return LocalTransport(contextID: id)
}
let ctx = ServerContext(
id: UUID(),
displayName: "test",
kind: .ssh(SSHConfig(host: "h"))
)
let transport = ctx.makeTransport()
#expect(transport is LocalTransport)
#expect(box.count == 1)
}
@Test @MainActor func sshTransportFactoryNilFallsBackToSSHTransport() {
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
ServerContext.sshTransportFactory = nil
let ctx = ServerContext(
id: UUID(),
displayName: "test",
kind: .ssh(SSHConfig(host: "h"))
)
let transport = ctx.makeTransport()
#expect(transport is SSHTransport)
}
@Test @MainActor func sshTransportFactoryIgnoredForLocalContext() {
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
// Even if set, the factory is ONLY consulted for `.ssh` kinds
// `.local` always gets a `LocalTransport` directly.
ServerContext.sshTransportFactory = { _, _, _ in
Issue.record("factory called for local context")
return LocalTransport()
}
let transport = ServerContext.local.makeTransport()
#expect(transport is LocalTransport)
}
/// Minimal `ServerTransport` test double: `isRemote == true`, all
/// file I/O throws, `streamLines` returns a scripted sequence of
/// lines. Exists to verify HermesLogService's remote-tail path
/// pumps scripted output into the ring buffer without a real SSH
/// subprocess.
final class ScriptedTransport: ServerTransport, @unchecked Sendable {
public let contextID: ServerID = UUID()
public let isRemote: Bool = true
private let lines: [String]
init(lines: [String]) { self.lines = lines }
func readFile(_ path: String) throws -> Data { throw TransportError.other(message: "N/A") }
func writeFile(_ path: String, data: Data) throws { throw TransportError.other(message: "N/A") }
func fileExists(_ path: String) -> Bool { true }
func stat(_ path: String) -> FileStat? { FileStat(size: 0, mtime: Date(), isDirectory: false) }
func listDirectory(_ path: String) throws -> [String] { [] }
func createDirectory(_ path: String) throws {}
func removeFile(_ path: String) throws {}
func runProcess(executable: String, args: [String], stdin: Data?, timeout: TimeInterval?) throws -> ProcessResult {
// For readLastLines' one-shot tail return all scripted lines joined.
let content = lines.joined(separator: "\n") + "\n"
return ProcessResult(exitCode: 0, stdout: Data(content.utf8), stderr: Data())
}
#if !os(iOS)
func makeProcess(executable: String, args: [String]) -> Process {
// Required by protocol on non-iOS; not exercised in tests below.
Process()
}
#endif
func streamLines(executable: String, args: [String]) -> AsyncThrowingStream<String, Error> {
AsyncThrowingStream { continuation in
Task {
for line in lines {
continuation.yield(line)
}
continuation.finish()
}
}
}
func snapshotSQLite(remotePath: String) throws -> URL { URL(fileURLWithPath: remotePath) }
func watchPaths(_ paths: [String]) -> AsyncStream<WatchEvent> {
AsyncStream { $0.finish() }
}
}
@Test @MainActor func hermesLogServiceRemoteTailPumpsThroughStreamLines() async throws {
let scripted = ScriptedTransport(lines: [
"2026-04-22 12:00:00,001 INFO hermes.agent: starting",
"2026-04-22 12:00:01,002 WARNING hermes.gateway: low disk",
"2026-04-22 12:00:02,003 ERROR hermes.agent: boom",
])
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
ServerContext.sshTransportFactory = { _, _, _ in scripted }
let ctx = ServerContext(
id: UUID(),
displayName: "t",
kind: .ssh(SSHConfig(host: "h"))
)
let service = HermesLogService(context: ctx)
await service.openLog(path: "/fake/agent.log")
defer { Task { await service.closeLog() } }
// Give the pump task a moment to drain the scripted stream.
try await Task.sleep(nanoseconds: 50_000_000)
let entries = await service.readNewLines()
#expect(entries.count == 3)
#expect(entries[0].level == .info)
#expect(entries[1].level == .warning)
#expect(entries[2].level == .error)
#expect(entries[2].message == "boom")
}
@Test @MainActor func hermesLogServiceReadLastLinesUsesOneShotTail() async {
let scripted = ScriptedTransport(lines: ["x", "y", "z"])
let previous = ServerContext.sshTransportFactory
defer { ServerContext.sshTransportFactory = previous }
ServerContext.sshTransportFactory = { _, _, _ in scripted }
let ctx = ServerContext(
id: UUID(),
displayName: "t",
kind: .ssh(SSHConfig(host: "h"))
)
let service = HermesLogService(context: ctx)
// Doesn't need openLog first for the one-shot, but currentPath
// has to be set openLog does both.
await service.openLog(path: "/fake/agent.log")
defer { Task { await service.closeLog() } }
let entries = await service.readLastLines(count: 100)
#expect(entries.count == 3)
#expect(entries[0].message == "x")
#expect(entries[2].message == "z")
}
// MARK: - M6 Cron editing (write paths)
//
// Live in this suite (rather than M6ConfigCronTests) because they