feat(scarfmon): B2 + B3 + iOS dashboard — file watcher, message hydration, dashboard load

Three areas instrumented in this batch. Both targets build clean.

B2 — Mac HermesFileWatcher (FSEvents + remote SSH poll)
- mac.fileWatcher.localFire (event) — every FSEvents change on a
  watched core or project path. High counts during streaming chats
  are normal (state.db-wal ticks per persisted message); high counts
  during idle suggest a runaway watcher install.
- mac.fileWatcher.remoteRestart (event, bytes=path-count) — fires
  once per SSH poller restart, with the union path count attached.
  Frequent restarts mean the project-list update path is churning.
- mac.fileWatcher.remoteDelta (event) — fires per non-empty change
  detected on the SSH poll. Pair with `ssh.streamScript` cadence to
  see actual poll latency.

B3 — Chat session boot + message hydration
- mac.fetchMessages (interval) + .rows (event) — bounded SQL
  fetch from HermesDataService. Catches slow paginated scrolls
  back through long sessions.
- mac.refreshSessionFromDB (interval) — RichChatViewModel's
  post-promptComplete refresh that picks up cost/token data.
- mac.hydrateMessages (interval) + .rows (event) — full session-boot
  hydration in RichChatViewModel.loadSessionHistory. Was the suspected
  trigger of the 22-bubble session-start storms in the Phase 3a
  baseline; now precisely measurable.

iOS Dashboard (resolves the original "out of sync" mystery)
- ios.loadDashboard (interval) — wraps the four dataService.fetch*
  Citadel SFTP round-trips in IOSDashboardViewModel.load().
- ios.allSessions.count (event) — sidebar list size after each
  load, correlates load latency with list growth.
- ios.dashboardRefresh.trigger (event) — fires only on
  pull-to-refresh, separates that entry path from initial appear.

**Architectural finding:** the original v2.6.0 user feedback
("chat out of sync iOS↔Mac on fast LAN") is now firmly attributable
to this — iOS does NOT subscribe to a file watcher. The dashboard
refresh path is appear-time + pull-to-refresh only.
`CitadelServerTransport.watchPaths()` is effectively dead code on
iOS today; nobody calls it. Earlier A1 instrumentation (commit
9df7142) put measure points on it, which is why captures showed
zero `ios.fileWatcher.tick` events. Future work: either add a
foregrounded poll loop to iOS, or thread the file watcher into
the dashboard subscription. Documented in the ScarfMon roadmap
memory.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-05-04 23:52:11 +02:00
parent 96af545e66
commit bd9bacb8b3
4 changed files with 56 additions and 27 deletions
@@ -188,22 +188,26 @@ public actor HermesDataService {
limit: Int,
before: Int? = nil
) async -> [HermesMessage] {
let sql: String
let params: [SQLValue]
if let before {
sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?"
params = [.text(sessionId), .integer(Int64(before)), .integer(Int64(limit))]
} else {
sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?"
params = [.text(sessionId), .integer(Int64(limit))]
}
do {
let rows = try await backend.query(sql, params: params)
// Caller wants chronological (oldest-first) order; the SELECT
// is DESC for the LIMIT to bite the newest rows, so reverse.
return rows.map { messageFromRow($0) }.reversed()
} catch {
return []
await ScarfMon.measureAsync(.sessionLoad, "mac.fetchMessages") {
let sql: String
let params: [SQLValue]
if let before {
sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?"
params = [.text(sessionId), .integer(Int64(before)), .integer(Int64(limit))]
} else {
sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?"
params = [.text(sessionId), .integer(Int64(limit))]
}
do {
let rows = try await backend.query(sql, params: params)
// Caller wants chronological (oldest-first) order; the SELECT
// is DESC for the LIMIT to bite the newest rows, so reverse.
let messages = rows.map { messageFromRow($0) }.reversed() as [HermesMessage]
ScarfMon.event(.sessionLoad, "mac.fetchMessages.rows", count: messages.count)
return messages
} catch {
return []
}
}
}
@@ -470,13 +470,15 @@ public final class RichChatViewModel {
/// Re-fetch session metadata from DB to pick up cost/token updates.
public func refreshSessionFromDB() async {
guard let sessionId else { return }
let opened = await dataService.open()
guard opened else { return }
if let session = await dataService.fetchSession(id: sessionId) {
currentSession = session
await ScarfMon.measureAsync(.sessionLoad, "mac.refreshSessionFromDB") {
guard let sessionId else { return }
let opened = await dataService.open()
guard opened else { return }
if let session = await dataService.fetchSession(id: sessionId) {
currentSession = session
}
await dataService.close()
}
await dataService.close()
}
// MARK: - ACP Event Handling
@@ -1015,6 +1017,7 @@ public final class RichChatViewModel {
/// Load message history from the DB, optionally combining an origin session
/// (e.g., CLI session) with the current ACP session.
public func loadSessionHistory(sessionId: String, acpSessionId: String? = nil) async {
await ScarfMon.measureAsync(.sessionLoad, "mac.hydrateMessages") {
self.sessionId = sessionId
// Force a fresh snapshot pull on remote contexts. An earlier open()
// would have cached a stale copy on resume we need whatever
@@ -1100,7 +1103,9 @@ public final class RichChatViewModel {
.map(\.id)
.min()
hasMoreHistory = moreHistory
ScarfMon.event(.sessionLoad, "mac.hydrateMessages.rows", count: messages.count)
buildMessageGroups()
} // end measureAsync(.sessionLoad, "mac.hydrateMessages")
}
// MARK: - Load Earlier (pagination)
@@ -70,10 +70,13 @@ public final class IOSDashboardViewModel {
return
}
stats = await dataService.fetchStats()
recentSessions = await dataService.fetchSessions(limit: 5)
allSessions = await dataService.fetchSessions(limit: 25)
sessionPreviews = await dataService.fetchSessionPreviews(limit: 25)
await ScarfMon.measureAsync(.sessionLoad, "ios.loadDashboard") {
stats = await dataService.fetchStats()
recentSessions = await dataService.fetchSessions(limit: 5)
allSessions = await dataService.fetchSessions(limit: 25)
sessionPreviews = await dataService.fetchSessionPreviews(limit: 25)
}
ScarfMon.event(.sessionLoad, "ios.allSessions.count", count: allSessions.count)
// Attribution lookup (pass-2 UX): load the sessionproject
// sidecar + project registry once so Dashboard rows can show
@@ -126,6 +129,7 @@ public final class IOSDashboardViewModel {
/// Called from the pull-to-refresh gesture.
public func refresh() async {
ScarfMon.event(.sessionLoad, "ios.dashboardRefresh.trigger", count: 1)
await load()
}
}
@@ -76,11 +76,21 @@ final class HermesFileWatcher {
/// (Re)start the SSH polling stream over the union of `watchedCorePaths`
/// and the current `remoteProjectPaths`. Called on initial start and
/// whenever `updateProjectWatches` changes the project set.
///
/// ScarfMon `mac.fileWatcher.remoteRestart` (event) fires once per
/// poller restart with `bytes` carrying the path count. Frequent
/// restarts mean the project-list update path is churning; pair
/// with `mac.fileWatcher.remoteTick` from the upstream transport
/// (`ssh.streamScript` / `transport.watchPaths`) to see actual
/// poll cadence.
private func startRemotePoller() {
remotePollTask?.cancel()
let stream = transport.watchPaths(watchedCorePaths + remoteProjectPaths)
let pathSet = watchedCorePaths + remoteProjectPaths
ScarfMon.event(.transport, "mac.fileWatcher.remoteRestart", count: 1, bytes: pathSet.count)
let stream = transport.watchPaths(pathSet)
remotePollTask = Task { [weak self] in
for await _ in stream {
ScarfMon.event(.transport, "mac.fileWatcher.remoteDelta", count: 1)
await MainActor.run { [weak self] in
self?.lastChangeDate = Date()
}
@@ -146,6 +156,12 @@ final class HermesFileWatcher {
queue: .main
)
source.setEventHandler { [weak self] in
// ScarfMon fires every time FSEvents detects a change on
// a watched core or project path. High counts during
// streaming chats are normal (state.db-wal ticks per
// message persisted); high counts when nothing's happening
// suggest a runaway watcher install.
ScarfMon.event(.transport, "mac.fileWatcher.localFire", count: 1)
self?.lastChangeDate = Date()
}
source.setCancelHandler {