feat(scarfmon): B2 + B3 + iOS dashboard — file watcher, message hydration, dashboard load

Three areas instrumented in this batch. Both targets build clean.

B2 — Mac HermesFileWatcher (FSEvents + remote SSH poll)
- mac.fileWatcher.localFire (event) — every FSEvents change on a
  watched core or project path. High counts during streaming chats
  are normal (state.db-wal ticks per persisted message); high counts
  during idle suggest a runaway watcher install.
- mac.fileWatcher.remoteRestart (event, bytes=path-count) — fires
  once per SSH poller restart, with the union path count attached.
  Frequent restarts mean the project-list update path is churning.
- mac.fileWatcher.remoteDelta (event) — fires per non-empty change
  detected on the SSH poll. Pair with `ssh.streamScript` cadence to
  see actual poll latency.

B3 — Chat session boot + message hydration
- mac.fetchMessages (interval) + .rows (event) — bounded SQL
  fetch from HermesDataService. Catches slow paginated scrolls
  back through long sessions.
- mac.refreshSessionFromDB (interval) — RichChatViewModel's
  post-promptComplete refresh that picks up cost/token data.
- mac.hydrateMessages (interval) + .rows (event) — full session-boot
  hydration in RichChatViewModel.loadSessionHistory. Was the suspected
  trigger of the 22-bubble session-start storms in the Phase 3a
  baseline; now precisely measurable.

iOS Dashboard (resolves the original "out of sync" mystery)
- ios.loadDashboard (interval) — wraps the four dataService.fetch*
  Citadel SFTP round-trips in IOSDashboardViewModel.load().
- ios.allSessions.count (event) — sidebar list size after each
  load, correlates load latency with list growth.
- ios.dashboardRefresh.trigger (event) — fires only on
  pull-to-refresh, separates that entry path from initial appear.

**Architectural finding:** the original v2.6.0 user feedback
("chat out of sync iOS↔Mac on fast LAN") is now firmly attributable
to this — iOS does NOT subscribe to a file watcher. The dashboard
refresh path is appear-time + pull-to-refresh only.
`CitadelServerTransport.watchPaths()` is effectively dead code on
iOS today; nobody calls it. Earlier A1 instrumentation (commit
9df7142) put measure points on it, which is why captures showed
zero `ios.fileWatcher.tick` events. Future work: either add a
foregrounded poll loop to iOS, or thread the file watcher into
the dashboard subscription. Documented in the ScarfMon roadmap
memory.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-05-04 23:52:11 +02:00
parent 96af545e66
commit bd9bacb8b3
4 changed files with 56 additions and 27 deletions
@@ -188,6 +188,7 @@ public actor HermesDataService {
limit: Int, limit: Int,
before: Int? = nil before: Int? = nil
) async -> [HermesMessage] { ) async -> [HermesMessage] {
await ScarfMon.measureAsync(.sessionLoad, "mac.fetchMessages") {
let sql: String let sql: String
let params: [SQLValue] let params: [SQLValue]
if let before { if let before {
@@ -201,11 +202,14 @@ public actor HermesDataService {
let rows = try await backend.query(sql, params: params) let rows = try await backend.query(sql, params: params)
// Caller wants chronological (oldest-first) order; the SELECT // Caller wants chronological (oldest-first) order; the SELECT
// is DESC for the LIMIT to bite the newest rows, so reverse. // is DESC for the LIMIT to bite the newest rows, so reverse.
return rows.map { messageFromRow($0) }.reversed() let messages = rows.map { messageFromRow($0) }.reversed() as [HermesMessage]
ScarfMon.event(.sessionLoad, "mac.fetchMessages.rows", count: messages.count)
return messages
} catch { } catch {
return [] return []
} }
} }
}
/// Legacy unbounded fetch retained for one release cycle so any /// Legacy unbounded fetch retained for one release cycle so any
/// out-of-tree consumers don't break. New code should use the /// out-of-tree consumers don't break. New code should use the
@@ -470,6 +470,7 @@ public final class RichChatViewModel {
/// Re-fetch session metadata from DB to pick up cost/token updates. /// Re-fetch session metadata from DB to pick up cost/token updates.
public func refreshSessionFromDB() async { public func refreshSessionFromDB() async {
await ScarfMon.measureAsync(.sessionLoad, "mac.refreshSessionFromDB") {
guard let sessionId else { return } guard let sessionId else { return }
let opened = await dataService.open() let opened = await dataService.open()
guard opened else { return } guard opened else { return }
@@ -478,6 +479,7 @@ public final class RichChatViewModel {
} }
await dataService.close() await dataService.close()
} }
}
// MARK: - ACP Event Handling // MARK: - ACP Event Handling
@@ -1015,6 +1017,7 @@ public final class RichChatViewModel {
/// Load message history from the DB, optionally combining an origin session /// Load message history from the DB, optionally combining an origin session
/// (e.g., CLI session) with the current ACP session. /// (e.g., CLI session) with the current ACP session.
public func loadSessionHistory(sessionId: String, acpSessionId: String? = nil) async { public func loadSessionHistory(sessionId: String, acpSessionId: String? = nil) async {
await ScarfMon.measureAsync(.sessionLoad, "mac.hydrateMessages") {
self.sessionId = sessionId self.sessionId = sessionId
// Force a fresh snapshot pull on remote contexts. An earlier open() // Force a fresh snapshot pull on remote contexts. An earlier open()
// would have cached a stale copy on resume we need whatever // would have cached a stale copy on resume we need whatever
@@ -1100,7 +1103,9 @@ public final class RichChatViewModel {
.map(\.id) .map(\.id)
.min() .min()
hasMoreHistory = moreHistory hasMoreHistory = moreHistory
ScarfMon.event(.sessionLoad, "mac.hydrateMessages.rows", count: messages.count)
buildMessageGroups() buildMessageGroups()
} // end measureAsync(.sessionLoad, "mac.hydrateMessages")
} }
// MARK: - Load Earlier (pagination) // MARK: - Load Earlier (pagination)
@@ -70,10 +70,13 @@ public final class IOSDashboardViewModel {
return return
} }
await ScarfMon.measureAsync(.sessionLoad, "ios.loadDashboard") {
stats = await dataService.fetchStats() stats = await dataService.fetchStats()
recentSessions = await dataService.fetchSessions(limit: 5) recentSessions = await dataService.fetchSessions(limit: 5)
allSessions = await dataService.fetchSessions(limit: 25) allSessions = await dataService.fetchSessions(limit: 25)
sessionPreviews = await dataService.fetchSessionPreviews(limit: 25) sessionPreviews = await dataService.fetchSessionPreviews(limit: 25)
}
ScarfMon.event(.sessionLoad, "ios.allSessions.count", count: allSessions.count)
// Attribution lookup (pass-2 UX): load the sessionproject // Attribution lookup (pass-2 UX): load the sessionproject
// sidecar + project registry once so Dashboard rows can show // sidecar + project registry once so Dashboard rows can show
@@ -126,6 +129,7 @@ public final class IOSDashboardViewModel {
/// Called from the pull-to-refresh gesture. /// Called from the pull-to-refresh gesture.
public func refresh() async { public func refresh() async {
ScarfMon.event(.sessionLoad, "ios.dashboardRefresh.trigger", count: 1)
await load() await load()
} }
} }
@@ -76,11 +76,21 @@ final class HermesFileWatcher {
/// (Re)start the SSH polling stream over the union of `watchedCorePaths` /// (Re)start the SSH polling stream over the union of `watchedCorePaths`
/// and the current `remoteProjectPaths`. Called on initial start and /// and the current `remoteProjectPaths`. Called on initial start and
/// whenever `updateProjectWatches` changes the project set. /// whenever `updateProjectWatches` changes the project set.
///
/// ScarfMon `mac.fileWatcher.remoteRestart` (event) fires once per
/// poller restart with `bytes` carrying the path count. Frequent
/// restarts mean the project-list update path is churning; pair
/// with `mac.fileWatcher.remoteTick` from the upstream transport
/// (`ssh.streamScript` / `transport.watchPaths`) to see actual
/// poll cadence.
private func startRemotePoller() { private func startRemotePoller() {
remotePollTask?.cancel() remotePollTask?.cancel()
let stream = transport.watchPaths(watchedCorePaths + remoteProjectPaths) let pathSet = watchedCorePaths + remoteProjectPaths
ScarfMon.event(.transport, "mac.fileWatcher.remoteRestart", count: 1, bytes: pathSet.count)
let stream = transport.watchPaths(pathSet)
remotePollTask = Task { [weak self] in remotePollTask = Task { [weak self] in
for await _ in stream { for await _ in stream {
ScarfMon.event(.transport, "mac.fileWatcher.remoteDelta", count: 1)
await MainActor.run { [weak self] in await MainActor.run { [weak self] in
self?.lastChangeDate = Date() self?.lastChangeDate = Date()
} }
@@ -146,6 +156,12 @@ final class HermesFileWatcher {
queue: .main queue: .main
) )
source.setEventHandler { [weak self] in source.setEventHandler { [weak self] in
// ScarfMon fires every time FSEvents detects a change on
// a watched core or project path. High counts during
// streaming chats are normal (state.db-wal ticks per
// message persisted); high counts when nothing's happening
// suggest a runaway watcher install.
ScarfMon.event(.transport, "mac.fileWatcher.localFire", count: 1)
self?.lastChangeDate = Date() self?.lastChangeDate = Date()
} }
source.setCancelHandler { source.setCancelHandler {