diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift index 14d080a..6c21d1b 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift @@ -27,6 +27,28 @@ public enum QueryDefaults: Sendable { public nonisolated static let defaultSilenceThreshold = 200 } +/// Page sizes for `HermesDataService.fetchMessages(sessionId:limit:before:)`. +/// Centralized so iOS, Mac, and the polling code paths can pick a +/// consistent budget — and so we have one knob to retune if perf +/// concerns shift. +public enum HistoryPageSize: Sendable { + /// Initial chat-history load: covers the vast majority of + /// sessions in one fetch while keeping the snapshot read bounded + /// for the rare 1000+-message session. + public nonisolated static let initial = 200 + /// Reconnection reconcile against the DB. 200 rows is plenty — + /// disconnects don't generate hundreds of unseen messages. + public nonisolated static let reconcile = 200 + /// Mac sessions detail view. Larger to reduce paging UX in the + /// desktop browser-style read; the desktop has the screen real + /// estate and memory headroom for it. + public nonisolated static let macSessionDetail = 500 + /// Terminal-mode polling refresh. Same 500-row budget as Mac + /// detail; covers sessions long enough that the user is actively + /// scrolling but bounded to keep each poll tick cheap. + public nonisolated static let polling = 500 +} + // MARK: - File Size Formatting public enum FileSizeUnit: Sendable { diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift index 9f14bc4..af76e5b 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift @@ -61,6 +61,26 @@ public actor HermesDataService { /// instead of an empty Dashboard with no explanation. public private(set) var lastOpenError: String? + /// Modification date of the underlying state.db that backs the + /// currently-open connection. For local contexts this tracks the + /// live DB's mtime; for remote contexts it's the cached snapshot's + /// mtime — which equals "when did we last get fresh data." + public private(set) var lastSnapshotMtime: Date? + + /// True when a `snapshotSQLite` pull failed and the open succeeded + /// against a previously-cached snapshot instead of a fresh one. + /// Views render a "Last updated X ago" affordance when this is set + /// alongside `lastOpenError`. Always `false` for local contexts. + public private(set) var isUsingStaleSnapshot: Bool = false + + /// Convenience: how long ago the cached snapshot was written, when + /// we're using a stale snapshot. `nil` when the snapshot is fresh + /// or no mtime could be read. + public var staleAge: TimeInterval? { + guard isUsingStaleSnapshot, let m = lastSnapshotMtime else { return nil } + return Date().timeIntervalSince(m) + } + public let context: ServerContext private let transport: any ServerTransport @@ -70,6 +90,18 @@ public actor HermesDataService { } public func open() async -> Bool { + await openInternal(forceFresh: false) + } + + /// Variant that refuses the stale-snapshot fallback. Used by call + /// sites that genuinely need post-write consistency — most notably + /// the chat session-history reload, where a stale snapshot would + /// hide messages the agent just streamed. + private func openStrict() async -> Bool { + await openInternal(forceFresh: true) + } + + private func openInternal(forceFresh: Bool) async -> Bool { if db != nil { return true } let localPath: String if context.isRemote { @@ -86,10 +118,30 @@ public actor HermesDataService { ) localPath = url.path lastOpenError = nil + isUsingStaleSnapshot = false + lastSnapshotMtime = mtime(at: url) } catch { - lastOpenError = humanize(error) - Self.logger.warning("snapshotSQLite failed: \(error.localizedDescription, privacy: .public)") - return false + // Fresh pull failed. If the caller demanded fresh data + // (`forceFresh: true`) OR there's no usable cache on + // disk, surface the error and bail. Otherwise serve + // the cached snapshot with `isUsingStaleSnapshot = true` + // so views can render a "Last updated X ago" banner. + if !forceFresh, + let cached = transport.cachedSnapshotPath, + FileManager.default.fileExists(atPath: cached.path) + { + localPath = cached.path + isUsingStaleSnapshot = true + lastSnapshotMtime = mtime(at: cached) + lastOpenError = humanize(error) // user still sees why it's stale + Self.logger.warning( + "Using stale snapshot after pull failure: \(error.localizedDescription, privacy: .public)" + ) + } else { + lastOpenError = humanize(error) + Self.logger.warning("snapshotSQLite failed: \(error.localizedDescription, privacy: .public)") + return false + } } } else { localPath = context.paths.stateDB @@ -97,6 +149,8 @@ public actor HermesDataService { lastOpenError = "Hermes state database not found at \(localPath)." return false } + isUsingStaleSnapshot = false + lastSnapshotMtime = mtime(at: URL(fileURLWithPath: localPath)) } // Remote snapshots are point-in-time copies that no one writes to; // opening them with `immutable=1` tells SQLite to skip WAL/SHM and @@ -151,17 +205,27 @@ public actor HermesDataService { return desc } - /// Force a fresh snapshot pull + reopen. Used on session-load and in - /// any path that needs the UI to reflect writes Hermes just made. - /// Without this, remote snapshots would be frozen at the first `open()` - /// for the app's lifetime — new messages added to a resumed session - /// would never appear because the snapshot was pulled before they were - /// written. Local contexts pay essentially nothing: close+reopen on a - /// live DB is a no-op. + /// Close the current connection and re-open with a fresh snapshot + /// pull (when remote). When `forceFresh` is `false` (default) and + /// the snapshot pull fails, falls back to the cached snapshot — + /// `isUsingStaleSnapshot` is set so views can render a "Last + /// updated X ago" banner. Pass `forceFresh: true` from call sites + /// that genuinely need post-write consistency (chat session + /// history reload), where stale data would hide messages the + /// agent just streamed. @discardableResult - public func refresh() async -> Bool { + public func refresh(forceFresh: Bool = false) async -> Bool { close() - return await open() + return await openInternal(forceFresh: forceFresh) + } + + /// Read the modification date of a local file. Returns `nil` if + /// the file is unreachable or has no mtime metadata. Used to + /// stamp `lastSnapshotMtime` so views can show "Last updated + /// X ago" without each one duplicating the FileManager dance. + private nonisolated func mtime(at url: URL) -> Date? { + let attrs = try? FileManager.default.attributesOfItem(atPath: url.path) + return attrs?[.modificationDate] as? Date } public func close() { @@ -294,6 +358,50 @@ public actor HermesDataService { return cols } + /// Bounded message fetch keyed by message id (monotonic per row, + /// safer than timestamp-based pagination because streaming chunk + /// timestamps can collide). Returns the most recent `limit` + /// messages older than `before` (when supplied) in chronological + /// (ASC) order ready to display. Pass `before: nil` for the + /// initial load — the DB returns the newest `limit` rows. + public func fetchMessages( + sessionId: String, + limit: Int, + before: Int? = nil + ) -> [HermesMessage] { + guard let db else { return [] } + let sql: String + if before != nil { + sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?" + } else { + sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?" + } + var stmt: OpaquePointer? + guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } + defer { sqlite3_finalize(stmt) } + sqlite3_bind_text(stmt, 1, sessionId, -1, sqliteTransient) + if let before { + sqlite3_bind_int(stmt, 2, Int32(before)) + sqlite3_bind_int(stmt, 3, Int32(limit)) + } else { + sqlite3_bind_int(stmt, 2, Int32(limit)) + } + + var messages: [HermesMessage] = [] + while sqlite3_step(stmt) == SQLITE_ROW { + messages.append(messageFromRow(stmt!)) + } + // Caller wants chronological (oldest-first) order; the SELECT + // is DESC for the LIMIT to bite the newest rows, so reverse. + return messages.reversed() + } + + /// Legacy unbounded fetch retained for one release cycle so any + /// out-of-tree consumers don't break. New code should use the + /// bounded `fetchMessages(sessionId:limit:before:)` variant — + /// snapshot loads on 1000+-message sessions stall the UI when + /// they materialize the whole history at once. + @available(*, deprecated, message: "Use fetchMessages(sessionId:limit:before:) instead.") public func fetchMessages(sessionId: String) -> [HermesMessage] { guard let db else { return [] } let sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY timestamp ASC" diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift index a4a8b20..3d7049c 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift @@ -247,6 +247,11 @@ public struct LocalTransport: ServerTransport { URL(fileURLWithPath: remotePath) } + /// Local transport reads the live DB directly — there's no cached + /// snapshot to fall back to (and no failure mode where falling back + /// would help, since a missing local file is missing both ways). + public var cachedSnapshotPath: URL? { nil } + // MARK: - Watching #if canImport(Darwin) diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift index 8316301..3bce092 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift @@ -603,6 +603,14 @@ public struct SSHTransport: ServerTransport { return URL(fileURLWithPath: localPath) } + /// Path where the most recent successful snapshot was written — + /// returned even when the remote is currently unreachable. The + /// data service falls back to this when `snapshotSQLite` throws so + /// Dashboard / Sessions / Chat-history stay viewable offline. + public var cachedSnapshotPath: URL? { + URL(fileURLWithPath: snapshotDir + "/state.db") + } + // MARK: - Watching public func watchPaths(_ paths: [String]) -> AsyncStream { diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/ServerTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/ServerTransport.swift index affde53..08e75a8 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/ServerTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/ServerTransport.swift @@ -90,6 +90,19 @@ public protocol ServerTransport: Sendable { /// `~/Library/Caches/scarf//state.db`, returning that URL. nonisolated func snapshotSQLite(remotePath: String) throws -> URL + /// Local filesystem URL where this transport caches its SQLite snapshot, + /// returned even when the remote is unreachable. Callers should + /// `FileManager.default.fileExists(atPath:)` before reading — the + /// transport can't atomically check existence and return the URL + /// in one step without TOCTOU. Local transports return `nil` + /// (their data is the live DB, not a cache). + /// + /// Used by `HermesDataService.open()` to fall back to the last + /// successful snapshot when a fresh `snapshotSQLite` call fails, + /// so the app keeps showing data with a "Last updated X ago" + /// affordance instead of a blank screen. + nonisolated var cachedSnapshotPath: URL? { get } + // MARK: - Watching /// Observe changes to a set of paths and yield events when any of them diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/ConnectionStatusViewModel.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/ConnectionStatusViewModel.swift index 74762b4..f0ffefe 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/ConnectionStatusViewModel.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/ConnectionStatusViewModel.swift @@ -16,7 +16,7 @@ public final class ConnectionStatusViewModel { #endif public enum Status: Equatable { - /// Healthy: SSH connected AND we can read `~/.hermes/config.yaml`. + /// Healthy: SSH connected AND we can read `~/.hermes/state.db`. case connected /// SSH connects but the follow-up read-access probe failed. Data /// views will be empty until this is resolved. @@ -38,14 +38,17 @@ public final class ConnectionStatusViewModel { /// Specific tier-2 failure mode emitted by the probe script. Used to /// drive both the pill copy and the popover hint (issue #53). public enum DegradedCause: Equatable { - /// `config.yaml` is missing entirely. Most common cause: Hermes - /// hasn't run `setup` yet on this remote. + /// `state.db` is missing entirely. Most common cause: Hermes + /// is installed but no session has run on this remote yet. + /// Case name kept as `configMissing` for back-compat with + /// callers that pattern-match on it; "config" here is loose + /// for "Scarf's required state file." case configMissing /// `~/.hermes` itself doesn't exist. Hermes isn't installed for /// the SSH user on this host. case homeMissing /// File exists but the SSH user can't read it. Permission / - /// ownership mismatch. + /// ownership mismatch. Same back-compat note as above. case configUnreadable /// `~/.hermes/active_profile` points at a non-default Hermes /// profile and the configured Hermes home doesn't carry the @@ -110,10 +113,18 @@ public final class ConnectionStatusViewModel { let hermesHome = context.paths.home // Two-tier probe in one SSH round-trip: // tier 1: `true` — raw connectivity / auth / ControlMaster path - // tier 2: `test -r $HERMESHOME/config.yaml` — can we actually - // read the file Dashboard reads on every tick? Green pill - // only if both pass; yellow "degraded" if tier 1 passes - // but tier 2 fails (the exact symptom in issue #19). + // tier 2: `test -r $HERMESHOME/state.db` — can we actually read + // the file Dashboard / Sessions / Activity all hit on + // every tick? Green pill only if both pass. + // + // Probe historically targeted `config.yaml`, but Hermes v0.11+ + // doesn't materialize that file eagerly — it ships with sane + // defaults and only writes config.yaml when the user actually + // changes something. Result: a freshly-installed Hermes that's + // running, persisting sessions, and serving Scarf was being + // marked "degraded — config missing" indefinitely. `state.db` + // is created on first agent run and is the actual surface + // Scarf depends on, so we probe that instead. // Script emits two lines: TIER1: and TIER2:. let homeArg: String if hermesHome.hasPrefix("~/") { @@ -124,22 +135,21 @@ public final class ConnectionStatusViewModel { homeArg = "\"\(hermesHome.replacingOccurrences(of: "\"", with: "\\\""))\"" } // Probe emits a granular `TIER2:1:` code so the pill can - // surface a specific hint (issue #53) instead of the prior - // collapsed-to-binary "can't read config.yaml". Causes: + // surface a specific hint (issue #53). Causes: // no-home — $H itself doesn't exist - // missing — config.yaml absent + // missing — state.db absent (Hermes hasn't been run yet) // perm — exists but unreadable by SSH user - // profile: — config missing AND ~/.hermes/active_profile + // profile: — state.db missing AND ~/.hermes/active_profile // points at a Hermes profile, suggesting Scarf // is reading the wrong dir let script = """ echo TIER1:0 H=\(homeArg) - if [ -r "$H/config.yaml" ]; then + if [ -r "$H/state.db" ]; then echo TIER2:0 elif [ ! -d "$H" ]; then echo TIER2:1:no-home - elif [ ! -e "$H/config.yaml" ]; then + elif [ ! -e "$H/state.db" ]; then ACTIVE="" if [ -r "$HOME/.hermes/active_profile" ]; then ACTIVE=$(head -n1 "$HOME/.hermes/active_profile" 2>/dev/null | tr -d ' \\t\\r\\n') @@ -263,23 +273,23 @@ public final class ConnectionStatusViewModel { ) case .configMissing: return ( - "Hermes hasn't been set up yet", - "`\(hermesHome)/config.yaml` is missing. Run `hermes setup` (or your first `hermes chat`) on the remote to create it. Scarf will go green automatically once it appears." + "Hermes hasn't been run yet", + "`\(hermesHome)/state.db` is missing — Hermes creates it on first agent run. Start any session on the remote (e.g. `hermes chat`) and Scarf will go green automatically." ) case .configUnreadable: return ( - "Permission denied on config.yaml", - "`\(hermesHome)/config.yaml` exists but the SSH user can't read it. Check ownership: `ls -l \(hermesHome)/config.yaml`. Either run Hermes as the SSH user, `chmod a+r` the file, or SSH as the Hermes user." + "Permission denied on state.db", + "`\(hermesHome)/state.db` exists but the SSH user can't read it. Check ownership: `ls -l \(hermesHome)/state.db`. Either run Hermes as the SSH user, `chmod a+r` the file, or SSH as the Hermes user." ) case .profileActive(let name): return ( "Hermes profile \"\(name)\" is active", - "The remote is using Hermes profile `\(name)` — its config lives at `~/.hermes/profiles/\(name)/config.yaml`, not `\(hermesHome)/config.yaml`. Either set this server's Hermes home to `~/.hermes/profiles/\(name)` in Manage Servers → Edit, or run `hermes profile use default` on the remote to revert." + "The remote is using Hermes profile `\(name)` — its state lives at `~/.hermes/profiles/\(name)/state.db`, not `\(hermesHome)/state.db`. Either set this server's Hermes home to `~/.hermes/profiles/\(name)` in Manage Servers → Edit, or run `hermes profile use default` on the remote to revert." ) case .unknown: return ( "Can't read Hermes state", - "SSH is fine but Scarf can't reach `\(hermesHome)/config.yaml`. Run diagnostics for a full breakdown." + "SSH is fine but Scarf can't reach `\(hermesHome)/state.db`. Run diagnostics for a full breakdown." ) } } diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/RichChatViewModel.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/RichChatViewModel.swift index 7bd7a62..a3d9077 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/RichChatViewModel.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/RichChatViewModel.swift @@ -339,6 +339,20 @@ public final class RichChatViewModel { /// The original CLI session ID when resuming a CLI session via ACP. /// Used to combine old CLI messages with new ACP messages. public private(set) var originSessionId: String? + /// Smallest DB id currently loaded for the *current session* (i.e. + /// `sessionId`). Drives `loadEarlier()`: page back with + /// `before: oldestLoadedMessageID`. `nil` when nothing has been + /// loaded yet or the session has no DB-persisted messages. + public private(set) var oldestLoadedMessageID: Int? + /// Whether the most recent fetch suggests there are more older + /// messages on disk that haven't been loaded into `messages` yet. + /// Set to `true` when the initial fetch returned exactly `limit` + /// rows (a strong hint the table has more). Drives the "Load + /// earlier" button visibility in chat views. + public private(set) var hasMoreHistory: Bool = false + /// Cleared during a `loadEarlier()` fetch so the UI can show a + /// spinner and we don't fan out duplicate page requests. + public private(set) var isLoadingEarlier: Bool = false private var nextLocalId = -1 private var streamingAssistantText = "" private var streamingThinkingText = "" @@ -382,6 +396,9 @@ public final class RichChatViewModel { lastKnownFingerprint = nil sessionId = nil originSessionId = nil + oldestLoadedMessageID = nil + hasMoreHistory = false + isLoadingEarlier = false isAgentWorking = false userSendPending = false resetTimestamp = Date() @@ -875,12 +892,15 @@ public final class RichChatViewModel { let opened = await dataService.open() guard opened else { return } - var dbMessages = await dataService.fetchMessages(sessionId: sessionId) + // Reconnects don't generate hundreds of unseen messages, so a + // 200-row tail is plenty for the merge — and it keeps us from + // re-materializing 1000+ message sessions on every reconnect. + var dbMessages = await dataService.fetchMessages(sessionId: sessionId, limit: HistoryPageSize.reconcile) // If we have an origin session (CLI session continued via ACP), // include those messages too if let origin = originSessionId, origin != sessionId { - let originMessages = await dataService.fetchMessages(sessionId: origin) + let originMessages = await dataService.fetchMessages(sessionId: origin, limit: HistoryPageSize.reconcile) if !originMessages.isEmpty { dbMessages = originMessages + dbMessages dbMessages.sort { ($0.timestamp ?? .distantPast) < ($1.timestamp ?? .distantPast) } @@ -925,10 +945,18 @@ public final class RichChatViewModel { // would have cached a stale copy — on resume we need whatever // Hermes has actually persisted since then, or the resumed session // will show only history up to the moment the snapshot was taken. - let opened = await dataService.refresh() + // `forceFresh: true` refuses the stale-snapshot fallback the data + // service grew in M11 — falling back here would silently hide + // messages the agent streamed during the user's offline window. + let opened = await dataService.refresh(forceFresh: true) guard opened else { return } - var allMessages = await dataService.fetchMessages(sessionId: sessionId) + let pageSize = HistoryPageSize.initial + var allMessages = await dataService.fetchMessages(sessionId: sessionId, limit: pageSize) + // The DB has more on-disk history when the initial fetch + // saturated the limit. The "Load earlier" affordance reads + // this flag. + var moreHistory = allMessages.count >= pageSize let session = await dataService.fetchSession(id: sessionId) // If the ACP session is different from the origin, load its messages too @@ -936,10 +964,11 @@ public final class RichChatViewModel { if let acpId = acpSessionId, acpId != sessionId { originSessionId = sessionId self.sessionId = acpId - let acpMessages = await dataService.fetchMessages(sessionId: acpId) + let acpMessages = await dataService.fetchMessages(sessionId: acpId, limit: pageSize) if !acpMessages.isEmpty { allMessages.append(contentsOf: acpMessages) allMessages.sort { ($0.timestamp ?? .distantPast) < ($1.timestamp ?? .distantPast) } + moreHistory = moreHistory || acpMessages.count >= pageSize } } @@ -947,6 +976,51 @@ public final class RichChatViewModel { currentSession = session let minId = allMessages.map(\.id).min() ?? 0 nextLocalId = min(minId - 1, -1) + // Track the oldest loaded id from THIS session (not the merged + // origin) so `loadEarlier()` pages back through the live ACP + // session's history. Cross-session backfill (paging into the + // CLI origin) isn't supported in v1 — the merged 2× pageSize + // is enough headroom for the dashboard-resume case. + let currentSessionId = self.sessionId ?? sessionId + oldestLoadedMessageID = allMessages + .filter { $0.sessionId == currentSessionId } + .map(\.id) + .min() + hasMoreHistory = moreHistory + buildMessageGroups() + } + + // MARK: - Load Earlier (pagination) + + /// Page back through the current session's DB-persisted history + /// before `oldestLoadedMessageID` and prepend the page to + /// `messages`. Cheap on the SQLite side (`id` is the primary + /// key); the cost is the data-service `open()` round-trip on + /// remote contexts. `pageSize` defaults to the same 200-row + /// budget as the initial load. + public func loadEarlier(pageSize: Int = HistoryPageSize.initial) async { + guard !isLoadingEarlier, hasMoreHistory else { return } + guard let sessionId, let oldest = oldestLoadedMessageID else { return } + isLoadingEarlier = true + defer { isLoadingEarlier = false } + + let opened = await dataService.open() + guard opened else { return } + + let older = await dataService.fetchMessages( + sessionId: sessionId, + limit: pageSize, + before: oldest + ) + guard !older.isEmpty else { + hasMoreHistory = false + return + } + messages.insert(contentsOf: older, at: 0) + oldestLoadedMessageID = older.first?.id + // If this fetch returned fewer than the page size we've hit + // the bottom of the table — no further pages worth fetching. + hasMoreHistory = older.count >= pageSize buildMessageGroups() } @@ -990,7 +1064,7 @@ public final class RichChatViewModel { let fingerprint = await dataService.fetchMessageFingerprint(sessionId: sessionId) if fingerprint != lastKnownFingerprint { - let fetched = await dataService.fetchMessages(sessionId: sessionId) + let fetched = await dataService.fetchMessages(sessionId: sessionId, limit: HistoryPageSize.polling) let session = await dataService.fetchSession(id: sessionId) lastKnownFingerprint = fingerprint diff --git a/scarf/Packages/ScarfIOS/Sources/ScarfIOS/CitadelServerTransport.swift b/scarf/Packages/ScarfIOS/Sources/ScarfIOS/CitadelServerTransport.swift index 3ec9357..0bca7bf 100644 --- a/scarf/Packages/ScarfIOS/Sources/ScarfIOS/CitadelServerTransport.swift +++ b/scarf/Packages/ScarfIOS/Sources/ScarfIOS/CitadelServerTransport.swift @@ -165,6 +165,15 @@ public final class CitadelServerTransport: ServerTransport, @unchecked Sendable try runSync { try await self.asyncSnapshotSQLite(remotePath: remotePath) } } + /// Path where the most recent successful snapshot was written — + /// returned even when the SSH connection is currently down. The + /// data service falls back to this when `snapshotSQLite` throws so + /// Dashboard / Sessions / Chat-history stay viewable while the + /// phone is offline. + public var cachedSnapshotPath: URL? { + snapshotBaseDir.appendingPathComponent("state.db") + } + // MARK: - ServerTransport: watching public func watchPaths(_ paths: [String]) -> AsyncStream { diff --git a/scarf/Packages/ScarfIOS/Sources/ScarfIOS/NetworkReachabilityService.swift b/scarf/Packages/ScarfIOS/Sources/ScarfIOS/NetworkReachabilityService.swift new file mode 100644 index 0000000..6db9c68 --- /dev/null +++ b/scarf/Packages/ScarfIOS/Sources/ScarfIOS/NetworkReachabilityService.swift @@ -0,0 +1,88 @@ +import Foundation +import Network +import Observation +#if canImport(os) +import os +#endif + +/// Process-wide reachability monitor wrapping `NWPathMonitor`. Used by +/// `ChatController` to decide when to attempt a reconnect (on +/// `.satisfied`) vs. mark the chat offline (on `.unsatisfied`). +/// +/// Singleton because `NWPathMonitor` is per-process by design — there's +/// no benefit to instantiating multiple monitors and the cost (a small +/// background queue per instance) accumulates if every controller +/// spawns its own. +/// +/// ## Usage +/// +/// Don't read the published state from a SwiftUI view body — the +/// runtime samples through `NWPathMonitor`'s queue, but a `body` +/// re-evaluation that touches `currentPath` directly would block. Read +/// `isSatisfied` / observe `transitionTick` instead. Tests and +/// non-iOS callers can use the no-op default behavior (`isSatisfied` +/// reports `true`). +@Observable +@MainActor +public final class NetworkReachabilityService { + public static let shared = NetworkReachabilityService() + + /// `true` when the OS reports a usable network path (any + /// interface). Inverted via `!isSatisfied` for "we're offline." + public private(set) var isSatisfied: Bool = true + + /// Mirrors `NWPath.isExpensive`. Useful as a hint to UI for not + /// auto-fetching big payloads on cellular. Not consumed yet — + /// reserved so callers don't have to add another property later. + public private(set) var isExpensive: Bool = false + + /// Monotonic counter that bumps every time `isSatisfied` changes. + /// Views observe `transitionTick` rather than `isSatisfied` to + /// kick a `.onChange` even if the value is the same as before + /// (rare but possible during rapid network flapping). + public private(set) var transitionTick: Int = 0 + + private let monitor = NWPathMonitor() + private let queue = DispatchQueue(label: "com.scarf.ios.reachability") + + #if canImport(os) + private static let logger = Logger(subsystem: "com.scarf.ios", category: "NetworkReachability") + #endif + + private init() { + // Seed from the current path synchronously so first reads on + // launch don't show "satisfied" while the OS reports otherwise. + // `currentPath` is safe here at init (the monitor hasn't been + // started yet, no queue handler is firing). + let initial = monitor.currentPath + self.isSatisfied = (initial.status == .satisfied) + self.isExpensive = initial.isExpensive + + monitor.pathUpdateHandler = { [weak self] path in + // Bounce back through MainActor — the `Observable` + // protocol's published-property invariants require main- + // thread mutation. The pathUpdateHandler is invoked on + // `queue`, which is a private background queue. + Task { @MainActor in + guard let self else { return } + let satisfied = (path.status == .satisfied) + if self.isSatisfied != satisfied { + self.isSatisfied = satisfied + self.transitionTick &+= 1 + #if canImport(os) + Self.logger.info( + "Reachability transition: \(satisfied ? "satisfied" : "unsatisfied", privacy: .public)" + ) + #endif + } + self.isExpensive = path.isExpensive + } + } + monitor.start(queue: queue) + } + + deinit { + // Singleton is process-lifetime; this only runs on shutdown. + monitor.cancel() + } +} diff --git a/scarf/Scarf iOS/App/ScarfGoCoordinator.swift b/scarf/Scarf iOS/App/ScarfGoCoordinator.swift index cb06184..9bdacc8 100644 --- a/scarf/Scarf iOS/App/ScarfGoCoordinator.swift +++ b/scarf/Scarf iOS/App/ScarfGoCoordinator.swift @@ -31,6 +31,28 @@ final class ScarfGoCoordinator { /// `AppCoordinator.pendingProjectChat`. var pendingProjectChat: String? + /// Most-recent scene-phase value observed at the WindowGroup + /// level. Tab-specific view models (e.g. `ChatController`) + /// observe `scenePhaseTick` to react to transitions even when + /// they're on a non-foreground tab — `.onChange(of: ScenePhase)` + /// alone wouldn't fire for views that aren't on screen. + private(set) var scenePhase: ScenePhase = .active + private(set) var scenePhaseTick: Int = 0 + /// Wallclock when we last observed `.background`. Used by tab + /// view-models to decide whether a quick `.active` transition is + /// worth a full re-verify (long suspensions warrant it; brief + /// notification-center peeks don't). `nil` until the first + /// background transition. + private(set) var lastBackgroundedAt: Date? + + func setScenePhase(_ phase: ScenePhase) { + if phase == .background, scenePhase != .background { + lastBackgroundedAt = Date() + } + scenePhase = phase + scenePhaseTick &+= 1 + } + enum Tab: Hashable { case dashboard, projects, chat, skills, system } diff --git a/scarf/Scarf iOS/App/ScarfGoTabRoot.swift b/scarf/Scarf iOS/App/ScarfGoTabRoot.swift index 99ca059..0fe2512 100644 --- a/scarf/Scarf iOS/App/ScarfGoTabRoot.swift +++ b/scarf/Scarf iOS/App/ScarfGoTabRoot.swift @@ -36,6 +36,12 @@ struct ScarfGoTabRoot: View { /// through here. @State private var coordinator = ScarfGoCoordinator() + /// SwiftUI's `.onChange(of: ScenePhase)` modifier on a non-active + /// tab doesn't fire while the tab is unmounted — the coordinator + /// is the single source of truth for scene-phase transitions + /// across all tabs. + @Environment(\.scenePhase) private var scenePhase + var body: some View { // The transport factory is keyed by ServerID, so the correct // Keychain slot + config is picked automatically. Reuses the @@ -119,6 +125,12 @@ struct ScarfGoTabRoot: View { // just observes. NotificationRouter.shared.coordinator = coordinator } + // Funnel scene-phase transitions through the coordinator so + // tab view-models (notably ChatController) can react even + // when their tab isn't currently on-screen. + .onChange(of: scenePhase) { _, newPhase in + coordinator.setScenePhase(newPhase) + } } } diff --git a/scarf/Scarf iOS/App/ScarfIOSApp.swift b/scarf/Scarf iOS/App/ScarfIOSApp.swift index e33c8d8..20977b9 100644 --- a/scarf/Scarf iOS/App/ScarfIOSApp.swift +++ b/scarf/Scarf iOS/App/ScarfIOSApp.swift @@ -63,6 +63,13 @@ struct ScarfIOSApp: App { // Hermes gains a push sender. await MainActor.run { NotificationRouter.shared.setUpOnLaunch() } } + .task { + // Drop chat drafts older than 7 days so the + // UserDefaults plist doesn't grow unbounded across + // years of use. Cheap; UserDefaults is already in + // memory by the time we read keys. + ChatController.pruneStaleDrafts() + } // Clamp Dynamic Type at the scene root. ScarfGo is a // developer tool that needs more density than Apple's // .xxxLarge default, but we still scale from .xSmall diff --git a/scarf/Scarf iOS/Chat/ChatView.swift b/scarf/Scarf iOS/Chat/ChatView.swift index d1de4da..9d61414 100644 --- a/scarf/Scarf iOS/Chat/ChatView.swift +++ b/scarf/Scarf iOS/Chat/ChatView.swift @@ -50,6 +50,7 @@ struct ChatView: View { var body: some View { VStack(spacing: 0) { + connectionBanner errorBanner projectContextBar messageList @@ -118,6 +119,23 @@ struct ChatView: View { coordinator?.pendingProjectChat = nil Task { await consumePendingProjectChat(projectPath) } } + // React to network reachability transitions. The service + // updates its `transitionTick` on every `.satisfied <-> + // .unsatisfied` edge; the `.onChange` here funnels each + // edge into ChatController so the reconnect machinery can + // suspend on link-down and resume on link-up. + .onChange(of: NetworkReachabilityService.shared.transitionTick) { _, _ in + Task { await controller.handleReachabilityChange() } + } + // React to scene-phase transitions (background → active etc). + // Source of truth is the coordinator, not `@Environment(\.scenePhase)`, + // so the chat tab still picks up phase changes that happened + // while it was unmounted (the user is on Dashboard when the + // app backgrounds; sees Chat after resume). + .onChange(of: coordinator?.scenePhaseTick) { _, _ in + guard let phase = coordinator?.scenePhase else { return } + Task { await controller.handleScenePhase(phase) } + } // Deliberately NOT tearing down the ACP session on .onDisappear. // `TabView` unmounts tab content when the user switches tabs // (disappear fires), but `@State var controller` keeps the @@ -201,6 +219,9 @@ struct ChatView: View { emptyState } } + if controller.vm.hasMoreHistory { + loadEarlierButton + } ForEach(controller.vm.messages) { msg in MessageBubble( message: msg, @@ -247,6 +268,37 @@ struct ChatView: View { .scrollDismissesKeyboard(.interactively) } + /// "Load earlier messages" affordance pinned above the oldest + /// loaded bubble. Only rendered when `vm.hasMoreHistory == true`, + /// so it disappears organically once the user has paged back to + /// the start of the session. + @ViewBuilder + private var loadEarlierButton: some View { + Button { + Task { await controller.vm.loadEarlier() } + } label: { + HStack(spacing: 6) { + if controller.vm.isLoadingEarlier { + ProgressView() + .scaleEffect(0.7) + } else { + Image(systemName: "arrow.up.circle") + .font(.caption) + } + Text(controller.vm.isLoadingEarlier ? "Loading earlier…" : "Load earlier messages") + .font(.caption) + } + .foregroundStyle(ScarfColor.foregroundMuted) + .padding(.horizontal, 12) + .padding(.vertical, 6) + .background(.regularMaterial, in: Capsule()) + } + .buttonStyle(.plain) + .disabled(controller.vm.isLoadingEarlier) + .frame(maxWidth: .infinity) + .padding(.top, 8) + } + @ViewBuilder private var emptyState: some View { VStack(spacing: 8) { @@ -290,6 +342,58 @@ struct ChatView: View { .padding(.top, 60) } + /// Top-of-screen banner for transient connection states. `.failed` + /// keeps using the existing full-screen overlay (so the user has + /// somewhere obvious to tap "Retry"); `.reconnecting` and + /// `.offline` are non-modal so the user can keep reading the + /// transcript while we work in the background. + @ViewBuilder + private var connectionBanner: some View { + switch controller.state { + case .reconnecting(let attempt, let total): + connectionBannerStrip( + text: "Reconnecting (\(attempt)/\(total))…", + tint: ScarfColor.warning, + showSpinner: true + ) + case .offline(let reason): + connectionBannerStrip( + text: reason, + tint: ScarfColor.danger, + showSpinner: false + ) + default: + EmptyView() + } + } + + private func connectionBannerStrip( + text: String, + tint: Color, + showSpinner: Bool + ) -> some View { + HStack(spacing: 8) { + if showSpinner { + ProgressView() + .scaleEffect(0.7) + .tint(tint) + } else { + Image(systemName: "wifi.slash") + .font(.caption) + .foregroundStyle(tint) + } + Text(text) + .font(.caption) + .foregroundStyle(tint) + Spacer(minLength: 0) + } + .padding(.horizontal, 12) + .padding(.vertical, 6) + .frame(maxWidth: .infinity, alignment: .leading) + .background(tint.opacity(0.16)) + .transition(.move(edge: .top).combined(with: .opacity)) + } + @ViewBuilder /// Soft pill above the composer confirming a non-interruptive /// command was received (e.g. `/steer`). Auto-clears via the @@ -326,6 +430,12 @@ struct ChatView: View { .onSubmit { Task { await controller.send() } } + // Persist the half-typed message across app suspensions + // and force-quits. Debounced inside `scheduleDraftSave` + // so we coalesce per-keystroke writes. + .onChange(of: controller.draft) { _, _ in + controller.scheduleDraftSave() + } // Explicit dismiss-keyboard affordance, complementing the // interactive scroll-to-dismiss on the message list. iOS // shows a keyboard accessory toolbar above the system @@ -551,6 +661,14 @@ final class ChatController { case idle case connecting case ready + /// Mid-recovery: the SSH exec channel died but the agent on + /// the remote may still be running. We're trying to reattach + /// via `session/resume` (or `session/load` as a fallback). + case reconnecting(attempt: Int, of: Int) + /// Network reachability is unsatisfied. Distinct from + /// `.failed` so the banner can stay tinted yellow ("we'll + /// retry") instead of red ("dead"). + case offline(reason: String) case failed(String) } @@ -574,12 +692,100 @@ final class ChatController { private let context: ServerContext private var client: ACPClient? private var eventTask: Task? + private var healthMonitorTask: Task? + private var reconnectTask: Task? + private var isHandlingDisconnect = false + private var pendingDraftSave: Task? + + /// Session id of the currently-active chat. Saved when state + /// reaches `.ready` and cleared on explicit `stop()` so a + /// user-initiated disconnect doesn't get auto-reconnected when + /// network/scene events fire later. + private var lastActiveSessionID: String? + /// Optional project working directory of the currently-active + /// session. Used as `cwd` on the recovery path so a project- + /// scoped session reconnects with the right scope. + private var lastProjectPath: String? + + // Reconnect tuning — verbatim from the Mac implementation at + // scarf/Features/Chat/ViewModels/ChatViewModel.swift:563-693. + private static let maxReconnectAttempts = 5 + private static let reconnectBaseDelay: UInt64 = 1_000_000_000 // 1s + private static let maxReconnectDelay: UInt64 = 16_000_000_000 // 16s private static let logger = Logger( subsystem: "com.scarf.ios", category: "ChatController" ) + // MARK: - Draft persistence + + private static let draftKeyPrefix = "scarf.chat.draft.v1" + private static let draftMaxAge: TimeInterval = 7 * 24 * 60 * 60 // 7 days + + private static func draftKey(serverID: ServerID, sessionID: String?) -> String { + // `_no_session` covers the brief connecting window before + // `vm.setSessionId` lands. The TextField is disabled in that + // window today, so this slot is essentially never written — + // but the sentinel is here so the key is always well-formed. + "\(draftKeyPrefix).\(serverID.uuidString).\(sessionID ?? "_no_session")" + } + + private static func draftTimestampKey(forKey key: String) -> String { key + ".ts" } + + private func saveDraft() { + let key = Self.draftKey(serverID: context.id, sessionID: vm.sessionId) + let tsKey = Self.draftTimestampKey(forKey: key) + if draft.isEmpty { + UserDefaults.standard.removeObject(forKey: key) + UserDefaults.standard.removeObject(forKey: tsKey) + } else { + UserDefaults.standard.set(draft, forKey: key) + UserDefaults.standard.set(Date().timeIntervalSince1970, forKey: tsKey) + } + } + + private func loadDraft() { + let key = Self.draftKey(serverID: context.id, sessionID: vm.sessionId) + if let saved = UserDefaults.standard.string(forKey: key), !saved.isEmpty { + draft = saved + } + } + + private func clearStoredDraft() { + let key = Self.draftKey(serverID: context.id, sessionID: vm.sessionId) + UserDefaults.standard.removeObject(forKey: key) + UserDefaults.standard.removeObject(forKey: Self.draftTimestampKey(forKey: key)) + } + + /// Debounced draft save. The view layer hooks this off + /// `.onChange(of: controller.draft)` so per-keystroke writes are + /// coalesced into one UserDefaults flush per ~1s of typing. + func scheduleDraftSave() { + pendingDraftSave?.cancel() + pendingDraftSave = Task { @MainActor [weak self] in + try? await Task.sleep(nanoseconds: 1_000_000_000) + guard !Task.isCancelled else { return } + self?.saveDraft() + } + } + + /// One-shot janitor invoked at app launch. Removes draft slots + /// whose timestamp sidecar predates `draftMaxAge`. Cheap enough + /// to call synchronously — UserDefaults is in-memory at runtime. + static func pruneStaleDrafts(now: Date = Date()) { + let defaults = UserDefaults.standard + let cutoff = now.timeIntervalSince1970 - draftMaxAge + for key in defaults.dictionaryRepresentation().keys + where key.hasPrefix(draftKeyPrefix) && key.hasSuffix(".ts") + { + guard let ts = defaults.object(forKey: key) as? TimeInterval, ts < cutoff else { continue } + let baseKey = String(key.dropLast(3)) // strip ".ts" + defaults.removeObject(forKey: baseKey) + defaults.removeObject(forKey: key) + } + } + init(context: ServerContext) { self.context = context self.vm = RichChatViewModel(context: context) @@ -626,16 +832,10 @@ final class ChatController { // Start streaming ACP events into the view-model BEFORE we // send session/new, so the `available_commands_update` // notification that the server sends on session init is - // captured. - let stream = await client.events - eventTask = Task { [weak self] in - for await event in stream { - guard let self else { break } - await MainActor.run { - self.vm.handleACPEvent(event) - } - } - } + // captured. Health monitor catches socket-level death the + // event-stream EOF wouldn't see (e.g., a hung remote read). + startACPEventLoop(client: client) + startHealthMonitor(client: client) // Create a fresh ACP session. `cwd` is the remote user's home // directory — Hermes defaults to that for tool scoping. @@ -643,7 +843,10 @@ final class ChatController { let home = await context.resolvedUserHome() let sessionId = try await client.newSession(cwd: home) vm.setSessionId(sessionId) + loadDraft() state = .ready + lastActiveSessionID = sessionId + lastProjectPath = nil } catch { state = .failed(error.localizedDescription) await vm.recordACPFailure(error, client: client) @@ -661,6 +864,7 @@ final class ChatController { let sessionId = vm.sessionId ?? "" guard !sessionId.isEmpty else { return } draft = "" + clearStoredDraft() vm.addUserMessage(text: text) // /steer is non-interruptive — the agent is still on its // current turn; the guidance applies after the next tool call. @@ -721,13 +925,283 @@ final class ChatController { /// Stop the current session + tear down the SSH exec channel. /// Idempotent. func stop() async { - eventTask?.cancel() - eventTask = nil + eventTask?.cancel(); eventTask = nil + healthMonitorTask?.cancel(); healthMonitorTask = nil + reconnectTask?.cancel(); reconnectTask = nil if let client { await client.stop() } client = nil state = .idle + // Explicit user-initiated disconnect — clear the session + // memory so reachability/scenePhase events don't try to + // resurrect the dead chat. + lastActiveSessionID = nil + lastProjectPath = nil + isHandlingDisconnect = false + } + + // MARK: - Reconnect machinery (Section 1) + + /// Stream ACP events into the view-model. When the stream ends + /// without us cancelling it, the channel died; route into the + /// reconnect path. Direct port of Mac's `startACPEventLoop` + /// (scarf/Features/Chat/ViewModels/ChatViewModel.swift:563). + private func startACPEventLoop(client: ACPClient) { + eventTask = Task { @MainActor [weak self] in + let stream = await client.events + for await event in stream { + guard !Task.isCancelled else { break } + self?.vm.handleACPEvent(event) + } + // Stream ended — if we weren't explicitly cancelled the + // channel died (EOF on stdin/out, write to dead pipe, + // SSH socket gone). The Mac caller calls + // `handleConnectionDied`; we mirror that. + if !Task.isCancelled { + self?.handleConnectionDied() + } + } + } + + /// 5-second heartbeat that catches dead channels which don't + /// explicitly EOF the stream (e.g., a hung SSH socket waiting + /// for the next chunk that never arrives). When `isHealthy` + /// returns false, route into the reconnect path. Mirrors Mac's + /// `startHealthMonitor`. + private func startHealthMonitor(client: ACPClient) { + healthMonitorTask = Task { @MainActor [weak self] in + while !Task.isCancelled { + try? await Task.sleep(nanoseconds: 5_000_000_000) + guard !Task.isCancelled else { break } + let healthy = await client.isHealthy + if !healthy { + self?.handleConnectionDied() + break + } + } + } + } + + /// One-stop cleanup + reconnect dispatch. Idempotent — guarded by + /// `isHandlingDisconnect` so concurrent triggers (event-stream + /// EOF + health monitor + write failure) don't tear down the same + /// client twice. + private func handleConnectionDied() { + guard client != nil, !isHandlingDisconnect else { return } + isHandlingDisconnect = true + Self.logger.warning("ACP connection died") + + // Capture any in-progress streaming text into a finalized + // message before we attempt to merge against the DB. The VM + // doesn't add a system "Connection lost" bubble — that would + // create a phantom message during reconnect. + vm.finalizeOnDisconnect() + + let savedSessionId = vm.sessionId + + // Tear down the dead client. The eventTask will be cancelled + // immediately; awaiting `stop()` on the dead client is the + // detached fire-and-forget pattern Mac uses (its `Task` block). + eventTask?.cancel(); eventTask = nil + healthMonitorTask?.cancel(); healthMonitorTask = nil + if let dead = client { Task { await dead.stop() } } + client = nil + + guard let savedSessionId else { + // No session id to resume — surface the failure. + state = .failed("Connection lost") + isHandlingDisconnect = false + return + } + attemptReconnect(sessionId: savedSessionId) + } + + /// React to an iOS scene-phase transition. + /// + /// `.background`: cancel the keepalive — iOS will suspend the + /// socket within ~30s anyway, and fighting it via background + /// tasks costs battery for marginal benefit (the agent's work is + /// persisted to state.db on the remote, so we recover on resume). + /// + /// `.active`: if we had a session running before suspension and + /// the channel is now unhealthy, route into the reconnect path + /// so the user sees fresh state without having to tap anything. + func handleScenePhase(_ phase: ScenePhase) async { + switch phase { + case .background: + healthMonitorTask?.cancel(); healthMonitorTask = nil + case .active: + // No session worth verifying. + guard let id = lastActiveSessionID else { return } + // Already mid-recovery — let it finish. + if case .reconnecting = state { return } + await verifyAndResume(sessionId: id) + case .inactive: + break // brief: control center, banners, split-screen + @unknown default: + break + } + } + + /// Probe the existing client's health on resume. If alive, + /// just re-arm the heartbeat; if dead, route into the reconnect + /// path (which preserves the session id and reconciles against + /// the DB). + private func verifyAndResume(sessionId: String) async { + if let client { + if await client.isHealthy { + startHealthMonitor(client: client) + return + } + } + handleConnectionDied() + } + + /// React to a transition in `NetworkReachabilityService`. While + /// the device has no network, suppress reconnect attempts (they'd + /// just burn the 5-attempt budget against guaranteed failures); + /// when the network comes back, kick a fresh cycle if we're + /// stuck in `.failed` / `.offline` with a saved session id. + func handleReachabilityChange() async { + let satisfied = NetworkReachabilityService.shared.isSatisfied + if !satisfied { + // Stop the in-flight reconnect cycle — every attempt + // will fail until the link is back. We'll restart on + // the next `.satisfied` edge. + reconnectTask?.cancel(); reconnectTask = nil + if case .reconnecting = state { + state = .offline(reason: "No network") + } + return + } + // Network back. If we have a session worth restoring AND + // we're currently in a non-recoverable state, kick a fresh + // reconnect cycle. + guard let id = lastActiveSessionID else { return } + switch state { + case .offline, .failed: + attemptReconnect(sessionId: id) + default: + break + } + } + + /// 5-attempt exponential-backoff reconnect targeting the same + /// session id. Tries `session/resume` first (correct semantics + /// for live recovery), falls back to `session/load` for older + /// remotes. NEVER `session/new` — that would lose the agent's + /// in-context conversation. After a successful reattach, calls + /// `vm.reconcileWithDB` so messages the agent wrote during the + /// outage become visible. + private func attemptReconnect(sessionId: String) { + reconnectTask?.cancel() + reconnectTask = Task { @MainActor [weak self] in + guard let self else { return } + + for attempt in 1...Self.maxReconnectAttempts { + guard !Task.isCancelled else { return } + state = .reconnecting(attempt: attempt, of: Self.maxReconnectAttempts) + + // Skip backoff on the first attempt so a quick + // recovery (e.g., a momentary SSH socket flap) feels + // instant. Subsequent attempts back off 1→2→4→8→16s. + if attempt > 1 { + let delay = min( + Self.reconnectBaseDelay * UInt64(1 << (attempt - 1)), + Self.maxReconnectDelay + ) + try? await Task.sleep(nanoseconds: delay) + guard !Task.isCancelled else { return } + } + + let client = ACPClient.forIOSApp( + context: context, + keyProvider: { + let store = KeychainSSHKeyStore() + guard let key = try await store.load() else { + throw SSHKeyStoreError.backendFailure( + message: "No SSH key in Keychain — re-run onboarding.", + osStatus: nil + ) + } + return key + } + ) + + do { + try await client.start() + + // Project-scoped sessions reconnect with their + // project path as cwd; everything else uses the + // remote user's home directory. + let cwd: String + if let path = lastProjectPath { + cwd = path + } else { + cwd = await context.resolvedUserHome() + } + + let resolvedSessionId: String + do { + resolvedSessionId = try await client.resumeSession(cwd: cwd, sessionId: sessionId) + } catch { + Self.logger.info( + "session/resume failed, trying session/load: \(error.localizedDescription, privacy: .public)" + ) + resolvedSessionId = try await client.loadSession(cwd: cwd, sessionId: sessionId) + } + + // Wire up the new client BEFORE merging messages + // so any streaming chunks that arrive during the + // reconcile land in the right place. + self.client = client + vm.acpStderrProvider = { [weak client] in + await client?.recentStderr ?? "" + } + vm.setSessionId(resolvedSessionId) + + // Merge in-memory state (any local-only user + // messages typed before the disconnect) with + // whatever Hermes has persisted to state.db + // since we last looked. This is what makes the + // "agent kept working while you were locked" + // case visible to the user. + let countBefore = vm.messages.count + await vm.reconcileWithDB(sessionId: resolvedSessionId) + let added = vm.messages.count - countBefore + if added > 0 { + vm.transientHint = "Resynced \(added) new message\(added == 1 ? "" : "s")." + Task { @MainActor [weak vm] in + try? await Task.sleep(nanoseconds: 4_000_000_000) + if vm?.transientHint?.hasPrefix("Resynced") == true { + vm?.transientHint = nil + } + } + } + + startACPEventLoop(client: client) + startHealthMonitor(client: client) + state = .ready + lastActiveSessionID = resolvedSessionId + + isHandlingDisconnect = false + Self.logger.info("Reconnected on attempt \(attempt)") + return + } catch { + Self.logger.warning( + "Reconnect attempt \(attempt) failed: \(error.localizedDescription, privacy: .public)" + ) + await client.stop() + continue + } + } + + // Exhausted all attempts. Surface a manual-recovery prompt. + guard !Task.isCancelled else { return } + state = .failed("Connection lost") + isHandlingDisconnect = false + } } /// User tapped "New chat". Stop, reset the VM, start again. @@ -845,15 +1319,8 @@ final class ChatController { return } - let stream = await client.events - eventTask = Task { [weak self] in - for await event in stream { - guard let self else { break } - await MainActor.run { - self.vm.handleACPEvent(event) - } - } - } + startACPEventLoop(client: client) + startHealthMonitor(client: client) do { // Use the project's path as cwd when provided; else the @@ -866,7 +1333,10 @@ final class ChatController { } let sessionId = try await client.newSession(cwd: cwd) vm.setSessionId(sessionId) + loadDraft() state = .ready + lastActiveSessionID = sessionId + lastProjectPath = projectPath // If this was a project-scoped session, record the // attribution so Dashboard's Sessions tab can render the @@ -976,15 +1446,8 @@ final class ChatController { return } - let stream = await client.events - eventTask = Task { [weak self] in - for await event in stream { - guard let self else { break } - await MainActor.run { - self.vm.handleACPEvent(event) - } - } - } + startACPEventLoop(client: client) + startHealthMonitor(client: client) do { let home = await context.resolvedUserHome() @@ -998,6 +1461,7 @@ final class ChatController { resolvedID = try await client.loadSession(cwd: home, sessionId: sessionID) } vm.setSessionId(resolvedID) + loadDraft() // Pull the transcript out of state.db so the user sees // everything said up to now. Mirrors the Mac resume flow // (scarf/scarf/Features/Chat/ViewModels/ChatViewModel.swift:376). @@ -1009,6 +1473,8 @@ final class ChatController { acpSessionId: resolvedID == sessionID ? nil : resolvedID ) state = .ready + lastActiveSessionID = resolvedID + lastProjectPath = resolved?.path } catch { state = .failed(error.localizedDescription) await vm.recordACPFailure(error, client: client) diff --git a/scarf/scarf/Features/Chat/Views/ChatTranscriptPane.swift b/scarf/scarf/Features/Chat/Views/ChatTranscriptPane.swift index f6877ac..272a566 100644 --- a/scarf/scarf/Features/Chat/Views/ChatTranscriptPane.swift +++ b/scarf/scarf/Features/Chat/Views/ChatTranscriptPane.swift @@ -34,7 +34,10 @@ struct ChatTranscriptPane: View { isWorking: richChat.isGenerating, isLoadingSession: chatViewModel.isPreparingSession, scrollTrigger: richChat.scrollTrigger, - turnDurations: richChat.turnDurations + turnDurations: richChat.turnDurations, + hasMoreHistory: richChat.hasMoreHistory, + isLoadingEarlier: richChat.isLoadingEarlier, + onLoadEarlier: { Task { await richChat.loadEarlier() } } ) Divider() diff --git a/scarf/scarf/Features/Chat/Views/RichChatMessageList.swift b/scarf/scarf/Features/Chat/Views/RichChatMessageList.swift index a4a21e6..850f947 100644 --- a/scarf/scarf/Features/Chat/Views/RichChatMessageList.swift +++ b/scarf/scarf/Features/Chat/Views/RichChatMessageList.swift @@ -15,6 +15,13 @@ struct RichChatMessageList: View { /// bubble's metadata footer can render the v2.5 stopwatch pill. /// Defaults empty so callers that don't care can omit it. var turnDurations: [Int: TimeInterval] = [:] + /// Show the "Load earlier messages" button at the top of the + /// transcript when the underlying session has more on-disk + /// history that hasn't been paged in yet. Hidden by default so + /// existing callers who haven't opted in see no UI change. + var hasMoreHistory: Bool = false + var isLoadingEarlier: Bool = false + var onLoadEarlier: (() -> Void)? = nil /// Scrolling strategy: plain `VStack` (not `LazyVStack`) plus /// `.defaultScrollAnchor(.bottom)`. @@ -57,6 +64,30 @@ struct RichChatMessageList: View { .transition(.opacity) } + if hasMoreHistory, let onLoadEarlier { + Button { + onLoadEarlier() + } label: { + HStack(spacing: 6) { + if isLoadingEarlier { + ProgressView().scaleEffect(0.7) + } else { + Image(systemName: "arrow.up.circle") + .font(.caption) + } + Text(isLoadingEarlier ? "Loading earlier…" : "Load earlier messages") + .font(.caption) + } + .padding(.horizontal, 10) + .padding(.vertical, 5) + .background(.regularMaterial, in: Capsule()) + } + .buttonStyle(.plain) + .disabled(isLoadingEarlier) + .frame(maxWidth: .infinity) + .padding(.vertical, 4) + } + ForEach(groups) { group in MessageGroupView(group: group, turnDurations: turnDurations) .equatable() diff --git a/scarf/scarf/Features/Sessions/ViewModels/SessionsViewModel.swift b/scarf/scarf/Features/Sessions/ViewModels/SessionsViewModel.swift index d7ee42c..1a521e4 100644 --- a/scarf/scarf/Features/Sessions/ViewModels/SessionsViewModel.swift +++ b/scarf/scarf/Features/Sessions/ViewModels/SessionsViewModel.swift @@ -114,7 +114,7 @@ final class SessionsViewModel { func selectSession(_ session: HermesSession) async { selectedSession = session - messages = await dataService.fetchMessages(sessionId: session.id) + messages = await dataService.fetchMessages(sessionId: session.id, limit: HistoryPageSize.macSessionDetail) subagentSessions = await dataService.fetchSubagentSessions(parentId: session.id) }