mirror of
https://github.com/awizemann/scarf.git
synced 2026-05-10 18:44:45 +00:00
feat(chat-resilience): iOS reconnect + snapshot fallback + paging + pill fix
Brings iOS chat to parity with Mac's reconnect behavior so a session survives phone-sleep, network handoffs, and SSH socket drops without losing the agent's work — Hermes already persists messages to state.db in real-time, the iOS app just had no resync path. Core changes (shared between Mac and iOS via ScarfCore): - ServerTransport.cachedSnapshotPath: fall back to the cached state.db snapshot when a fresh pull fails. HermesDataService surfaces this via isUsingStaleSnapshot + lastSnapshotMtime so views can render "Last updated X ago." Default opt-in via refresh(forceFresh: false); chat history reload passes forceFresh: true to refuse stale data. - HermesDataService.fetchMessages(sessionId:limit:before:): bounded pagination by id desc. Legacy unbounded overload deprecated. New HistoryPageSize constants centralize the budget. - RichChatViewModel.loadEarlier(): pages back through the current session via oldestLoadedMessageID + hasMoreHistory. iOS-only: - ChatController gains the Mac reconnect machinery: 5-attempt exponential backoff (1→16s) via session/resume → session/load, reconcileWithDB on success, "Resynced N new messages" toast. startACPEventLoop + startHealthMonitor extracted as helpers. - New NetworkReachabilityService (NWPathMonitor singleton). Suspends reconnect attempts while offline; kicks a fresh cycle on link-up. - ScarfGoCoordinator + ScarfGoTabRoot funnel scenePhase transitions to ChatController.handleScenePhase. On .active we verify channel health and reconnect if dead. - Draft persistence: UserDefaults keyed by (serverID, sessionID) survives force-quit. 7-day janitor at app launch. - Connection-state banner: .reconnecting and .offline render slim ScarfDesign-tinted strips above the message list. .failed keeps using the existing full-screen overlay. Bonus fix: - ConnectionStatusViewModel tier-2 probe now checks state.db instead of config.yaml. Hermes v0.11+ doesn't materialize config.yaml until the user changes a setting, so a freshly-installed working Hermes was being marked "degraded — config missing" indefinitely. state.db is the file Scarf actually depends on. Out of scope (deferred): APNs push notifications, BGTaskScheduler- based extended-background keepalive, offline write queue. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -27,6 +27,28 @@ public enum QueryDefaults: Sendable {
|
||||
public nonisolated static let defaultSilenceThreshold = 200
|
||||
}
|
||||
|
||||
/// Page sizes for `HermesDataService.fetchMessages(sessionId:limit:before:)`.
|
||||
/// Centralized so iOS, Mac, and the polling code paths can pick a
|
||||
/// consistent budget — and so we have one knob to retune if perf
|
||||
/// concerns shift.
|
||||
public enum HistoryPageSize: Sendable {
|
||||
/// Initial chat-history load: covers the vast majority of
|
||||
/// sessions in one fetch while keeping the snapshot read bounded
|
||||
/// for the rare 1000+-message session.
|
||||
public nonisolated static let initial = 200
|
||||
/// Reconnection reconcile against the DB. 200 rows is plenty —
|
||||
/// disconnects don't generate hundreds of unseen messages.
|
||||
public nonisolated static let reconcile = 200
|
||||
/// Mac sessions detail view. Larger to reduce paging UX in the
|
||||
/// desktop browser-style read; the desktop has the screen real
|
||||
/// estate and memory headroom for it.
|
||||
public nonisolated static let macSessionDetail = 500
|
||||
/// Terminal-mode polling refresh. Same 500-row budget as Mac
|
||||
/// detail; covers sessions long enough that the user is actively
|
||||
/// scrolling but bounded to keep each poll tick cheap.
|
||||
public nonisolated static let polling = 500
|
||||
}
|
||||
|
||||
// MARK: - File Size Formatting
|
||||
|
||||
public enum FileSizeUnit: Sendable {
|
||||
|
||||
@@ -61,6 +61,26 @@ public actor HermesDataService {
|
||||
/// instead of an empty Dashboard with no explanation.
|
||||
public private(set) var lastOpenError: String?
|
||||
|
||||
/// Modification date of the underlying state.db that backs the
|
||||
/// currently-open connection. For local contexts this tracks the
|
||||
/// live DB's mtime; for remote contexts it's the cached snapshot's
|
||||
/// mtime — which equals "when did we last get fresh data."
|
||||
public private(set) var lastSnapshotMtime: Date?
|
||||
|
||||
/// True when a `snapshotSQLite` pull failed and the open succeeded
|
||||
/// against a previously-cached snapshot instead of a fresh one.
|
||||
/// Views render a "Last updated X ago" affordance when this is set
|
||||
/// alongside `lastOpenError`. Always `false` for local contexts.
|
||||
public private(set) var isUsingStaleSnapshot: Bool = false
|
||||
|
||||
/// Convenience: how long ago the cached snapshot was written, when
|
||||
/// we're using a stale snapshot. `nil` when the snapshot is fresh
|
||||
/// or no mtime could be read.
|
||||
public var staleAge: TimeInterval? {
|
||||
guard isUsingStaleSnapshot, let m = lastSnapshotMtime else { return nil }
|
||||
return Date().timeIntervalSince(m)
|
||||
}
|
||||
|
||||
public let context: ServerContext
|
||||
private let transport: any ServerTransport
|
||||
|
||||
@@ -70,6 +90,18 @@ public actor HermesDataService {
|
||||
}
|
||||
|
||||
public func open() async -> Bool {
|
||||
await openInternal(forceFresh: false)
|
||||
}
|
||||
|
||||
/// Variant that refuses the stale-snapshot fallback. Used by call
|
||||
/// sites that genuinely need post-write consistency — most notably
|
||||
/// the chat session-history reload, where a stale snapshot would
|
||||
/// hide messages the agent just streamed.
|
||||
private func openStrict() async -> Bool {
|
||||
await openInternal(forceFresh: true)
|
||||
}
|
||||
|
||||
private func openInternal(forceFresh: Bool) async -> Bool {
|
||||
if db != nil { return true }
|
||||
let localPath: String
|
||||
if context.isRemote {
|
||||
@@ -86,10 +118,30 @@ public actor HermesDataService {
|
||||
)
|
||||
localPath = url.path
|
||||
lastOpenError = nil
|
||||
isUsingStaleSnapshot = false
|
||||
lastSnapshotMtime = mtime(at: url)
|
||||
} catch {
|
||||
lastOpenError = humanize(error)
|
||||
Self.logger.warning("snapshotSQLite failed: \(error.localizedDescription, privacy: .public)")
|
||||
return false
|
||||
// Fresh pull failed. If the caller demanded fresh data
|
||||
// (`forceFresh: true`) OR there's no usable cache on
|
||||
// disk, surface the error and bail. Otherwise serve
|
||||
// the cached snapshot with `isUsingStaleSnapshot = true`
|
||||
// so views can render a "Last updated X ago" banner.
|
||||
if !forceFresh,
|
||||
let cached = transport.cachedSnapshotPath,
|
||||
FileManager.default.fileExists(atPath: cached.path)
|
||||
{
|
||||
localPath = cached.path
|
||||
isUsingStaleSnapshot = true
|
||||
lastSnapshotMtime = mtime(at: cached)
|
||||
lastOpenError = humanize(error) // user still sees why it's stale
|
||||
Self.logger.warning(
|
||||
"Using stale snapshot after pull failure: \(error.localizedDescription, privacy: .public)"
|
||||
)
|
||||
} else {
|
||||
lastOpenError = humanize(error)
|
||||
Self.logger.warning("snapshotSQLite failed: \(error.localizedDescription, privacy: .public)")
|
||||
return false
|
||||
}
|
||||
}
|
||||
} else {
|
||||
localPath = context.paths.stateDB
|
||||
@@ -97,6 +149,8 @@ public actor HermesDataService {
|
||||
lastOpenError = "Hermes state database not found at \(localPath)."
|
||||
return false
|
||||
}
|
||||
isUsingStaleSnapshot = false
|
||||
lastSnapshotMtime = mtime(at: URL(fileURLWithPath: localPath))
|
||||
}
|
||||
// Remote snapshots are point-in-time copies that no one writes to;
|
||||
// opening them with `immutable=1` tells SQLite to skip WAL/SHM and
|
||||
@@ -151,17 +205,27 @@ public actor HermesDataService {
|
||||
return desc
|
||||
}
|
||||
|
||||
/// Force a fresh snapshot pull + reopen. Used on session-load and in
|
||||
/// any path that needs the UI to reflect writes Hermes just made.
|
||||
/// Without this, remote snapshots would be frozen at the first `open()`
|
||||
/// for the app's lifetime — new messages added to a resumed session
|
||||
/// would never appear because the snapshot was pulled before they were
|
||||
/// written. Local contexts pay essentially nothing: close+reopen on a
|
||||
/// live DB is a no-op.
|
||||
/// Close the current connection and re-open with a fresh snapshot
|
||||
/// pull (when remote). When `forceFresh` is `false` (default) and
|
||||
/// the snapshot pull fails, falls back to the cached snapshot —
|
||||
/// `isUsingStaleSnapshot` is set so views can render a "Last
|
||||
/// updated X ago" banner. Pass `forceFresh: true` from call sites
|
||||
/// that genuinely need post-write consistency (chat session
|
||||
/// history reload), where stale data would hide messages the
|
||||
/// agent just streamed.
|
||||
@discardableResult
|
||||
public func refresh() async -> Bool {
|
||||
public func refresh(forceFresh: Bool = false) async -> Bool {
|
||||
close()
|
||||
return await open()
|
||||
return await openInternal(forceFresh: forceFresh)
|
||||
}
|
||||
|
||||
/// Read the modification date of a local file. Returns `nil` if
|
||||
/// the file is unreachable or has no mtime metadata. Used to
|
||||
/// stamp `lastSnapshotMtime` so views can show "Last updated
|
||||
/// X ago" without each one duplicating the FileManager dance.
|
||||
private nonisolated func mtime(at url: URL) -> Date? {
|
||||
let attrs = try? FileManager.default.attributesOfItem(atPath: url.path)
|
||||
return attrs?[.modificationDate] as? Date
|
||||
}
|
||||
|
||||
public func close() {
|
||||
@@ -294,6 +358,50 @@ public actor HermesDataService {
|
||||
return cols
|
||||
}
|
||||
|
||||
/// Bounded message fetch keyed by message id (monotonic per row,
|
||||
/// safer than timestamp-based pagination because streaming chunk
|
||||
/// timestamps can collide). Returns the most recent `limit`
|
||||
/// messages older than `before` (when supplied) in chronological
|
||||
/// (ASC) order ready to display. Pass `before: nil` for the
|
||||
/// initial load — the DB returns the newest `limit` rows.
|
||||
public func fetchMessages(
|
||||
sessionId: String,
|
||||
limit: Int,
|
||||
before: Int? = nil
|
||||
) -> [HermesMessage] {
|
||||
guard let db else { return [] }
|
||||
let sql: String
|
||||
if before != nil {
|
||||
sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?"
|
||||
} else {
|
||||
sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?"
|
||||
}
|
||||
var stmt: OpaquePointer?
|
||||
guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] }
|
||||
defer { sqlite3_finalize(stmt) }
|
||||
sqlite3_bind_text(stmt, 1, sessionId, -1, sqliteTransient)
|
||||
if let before {
|
||||
sqlite3_bind_int(stmt, 2, Int32(before))
|
||||
sqlite3_bind_int(stmt, 3, Int32(limit))
|
||||
} else {
|
||||
sqlite3_bind_int(stmt, 2, Int32(limit))
|
||||
}
|
||||
|
||||
var messages: [HermesMessage] = []
|
||||
while sqlite3_step(stmt) == SQLITE_ROW {
|
||||
messages.append(messageFromRow(stmt!))
|
||||
}
|
||||
// Caller wants chronological (oldest-first) order; the SELECT
|
||||
// is DESC for the LIMIT to bite the newest rows, so reverse.
|
||||
return messages.reversed()
|
||||
}
|
||||
|
||||
/// Legacy unbounded fetch retained for one release cycle so any
|
||||
/// out-of-tree consumers don't break. New code should use the
|
||||
/// bounded `fetchMessages(sessionId:limit:before:)` variant —
|
||||
/// snapshot loads on 1000+-message sessions stall the UI when
|
||||
/// they materialize the whole history at once.
|
||||
@available(*, deprecated, message: "Use fetchMessages(sessionId:limit:before:) instead.")
|
||||
public func fetchMessages(sessionId: String) -> [HermesMessage] {
|
||||
guard let db else { return [] }
|
||||
let sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY timestamp ASC"
|
||||
|
||||
@@ -247,6 +247,11 @@ public struct LocalTransport: ServerTransport {
|
||||
URL(fileURLWithPath: remotePath)
|
||||
}
|
||||
|
||||
/// Local transport reads the live DB directly — there's no cached
|
||||
/// snapshot to fall back to (and no failure mode where falling back
|
||||
/// would help, since a missing local file is missing both ways).
|
||||
public var cachedSnapshotPath: URL? { nil }
|
||||
|
||||
// MARK: - Watching
|
||||
|
||||
#if canImport(Darwin)
|
||||
|
||||
@@ -603,6 +603,14 @@ public struct SSHTransport: ServerTransport {
|
||||
return URL(fileURLWithPath: localPath)
|
||||
}
|
||||
|
||||
/// Path where the most recent successful snapshot was written —
|
||||
/// returned even when the remote is currently unreachable. The
|
||||
/// data service falls back to this when `snapshotSQLite` throws so
|
||||
/// Dashboard / Sessions / Chat-history stay viewable offline.
|
||||
public var cachedSnapshotPath: URL? {
|
||||
URL(fileURLWithPath: snapshotDir + "/state.db")
|
||||
}
|
||||
|
||||
// MARK: - Watching
|
||||
|
||||
public func watchPaths(_ paths: [String]) -> AsyncStream<WatchEvent> {
|
||||
|
||||
@@ -90,6 +90,19 @@ public protocol ServerTransport: Sendable {
|
||||
/// `~/Library/Caches/scarf/<serverID>/state.db`, returning that URL.
|
||||
nonisolated func snapshotSQLite(remotePath: String) throws -> URL
|
||||
|
||||
/// Local filesystem URL where this transport caches its SQLite snapshot,
|
||||
/// returned even when the remote is unreachable. Callers should
|
||||
/// `FileManager.default.fileExists(atPath:)` before reading — the
|
||||
/// transport can't atomically check existence and return the URL
|
||||
/// in one step without TOCTOU. Local transports return `nil`
|
||||
/// (their data is the live DB, not a cache).
|
||||
///
|
||||
/// Used by `HermesDataService.open()` to fall back to the last
|
||||
/// successful snapshot when a fresh `snapshotSQLite` call fails,
|
||||
/// so the app keeps showing data with a "Last updated X ago"
|
||||
/// affordance instead of a blank screen.
|
||||
nonisolated var cachedSnapshotPath: URL? { get }
|
||||
|
||||
// MARK: - Watching
|
||||
|
||||
/// Observe changes to a set of paths and yield events when any of them
|
||||
|
||||
+30
-20
@@ -16,7 +16,7 @@ public final class ConnectionStatusViewModel {
|
||||
#endif
|
||||
|
||||
public enum Status: Equatable {
|
||||
/// Healthy: SSH connected AND we can read `~/.hermes/config.yaml`.
|
||||
/// Healthy: SSH connected AND we can read `~/.hermes/state.db`.
|
||||
case connected
|
||||
/// SSH connects but the follow-up read-access probe failed. Data
|
||||
/// views will be empty until this is resolved.
|
||||
@@ -38,14 +38,17 @@ public final class ConnectionStatusViewModel {
|
||||
/// Specific tier-2 failure mode emitted by the probe script. Used to
|
||||
/// drive both the pill copy and the popover hint (issue #53).
|
||||
public enum DegradedCause: Equatable {
|
||||
/// `config.yaml` is missing entirely. Most common cause: Hermes
|
||||
/// hasn't run `setup` yet on this remote.
|
||||
/// `state.db` is missing entirely. Most common cause: Hermes
|
||||
/// is installed but no session has run on this remote yet.
|
||||
/// Case name kept as `configMissing` for back-compat with
|
||||
/// callers that pattern-match on it; "config" here is loose
|
||||
/// for "Scarf's required state file."
|
||||
case configMissing
|
||||
/// `~/.hermes` itself doesn't exist. Hermes isn't installed for
|
||||
/// the SSH user on this host.
|
||||
case homeMissing
|
||||
/// File exists but the SSH user can't read it. Permission /
|
||||
/// ownership mismatch.
|
||||
/// ownership mismatch. Same back-compat note as above.
|
||||
case configUnreadable
|
||||
/// `~/.hermes/active_profile` points at a non-default Hermes
|
||||
/// profile and the configured Hermes home doesn't carry the
|
||||
@@ -110,10 +113,18 @@ public final class ConnectionStatusViewModel {
|
||||
let hermesHome = context.paths.home
|
||||
// Two-tier probe in one SSH round-trip:
|
||||
// tier 1: `true` — raw connectivity / auth / ControlMaster path
|
||||
// tier 2: `test -r $HERMESHOME/config.yaml` — can we actually
|
||||
// read the file Dashboard reads on every tick? Green pill
|
||||
// only if both pass; yellow "degraded" if tier 1 passes
|
||||
// but tier 2 fails (the exact symptom in issue #19).
|
||||
// tier 2: `test -r $HERMESHOME/state.db` — can we actually read
|
||||
// the file Dashboard / Sessions / Activity all hit on
|
||||
// every tick? Green pill only if both pass.
|
||||
//
|
||||
// Probe historically targeted `config.yaml`, but Hermes v0.11+
|
||||
// doesn't materialize that file eagerly — it ships with sane
|
||||
// defaults and only writes config.yaml when the user actually
|
||||
// changes something. Result: a freshly-installed Hermes that's
|
||||
// running, persisting sessions, and serving Scarf was being
|
||||
// marked "degraded — config missing" indefinitely. `state.db`
|
||||
// is created on first agent run and is the actual surface
|
||||
// Scarf depends on, so we probe that instead.
|
||||
// Script emits two lines: TIER1:<exitcode> and TIER2:<exitcode>.
|
||||
let homeArg: String
|
||||
if hermesHome.hasPrefix("~/") {
|
||||
@@ -124,22 +135,21 @@ public final class ConnectionStatusViewModel {
|
||||
homeArg = "\"\(hermesHome.replacingOccurrences(of: "\"", with: "\\\""))\""
|
||||
}
|
||||
// Probe emits a granular `TIER2:1:<cause>` code so the pill can
|
||||
// surface a specific hint (issue #53) instead of the prior
|
||||
// collapsed-to-binary "can't read config.yaml". Causes:
|
||||
// surface a specific hint (issue #53). Causes:
|
||||
// no-home — $H itself doesn't exist
|
||||
// missing — config.yaml absent
|
||||
// missing — state.db absent (Hermes hasn't been run yet)
|
||||
// perm — exists but unreadable by SSH user
|
||||
// profile:<name> — config missing AND ~/.hermes/active_profile
|
||||
// profile:<name> — state.db missing AND ~/.hermes/active_profile
|
||||
// points at a Hermes profile, suggesting Scarf
|
||||
// is reading the wrong dir
|
||||
let script = """
|
||||
echo TIER1:0
|
||||
H=\(homeArg)
|
||||
if [ -r "$H/config.yaml" ]; then
|
||||
if [ -r "$H/state.db" ]; then
|
||||
echo TIER2:0
|
||||
elif [ ! -d "$H" ]; then
|
||||
echo TIER2:1:no-home
|
||||
elif [ ! -e "$H/config.yaml" ]; then
|
||||
elif [ ! -e "$H/state.db" ]; then
|
||||
ACTIVE=""
|
||||
if [ -r "$HOME/.hermes/active_profile" ]; then
|
||||
ACTIVE=$(head -n1 "$HOME/.hermes/active_profile" 2>/dev/null | tr -d ' \\t\\r\\n')
|
||||
@@ -263,23 +273,23 @@ public final class ConnectionStatusViewModel {
|
||||
)
|
||||
case .configMissing:
|
||||
return (
|
||||
"Hermes hasn't been set up yet",
|
||||
"`\(hermesHome)/config.yaml` is missing. Run `hermes setup` (or your first `hermes chat`) on the remote to create it. Scarf will go green automatically once it appears."
|
||||
"Hermes hasn't been run yet",
|
||||
"`\(hermesHome)/state.db` is missing — Hermes creates it on first agent run. Start any session on the remote (e.g. `hermes chat`) and Scarf will go green automatically."
|
||||
)
|
||||
case .configUnreadable:
|
||||
return (
|
||||
"Permission denied on config.yaml",
|
||||
"`\(hermesHome)/config.yaml` exists but the SSH user can't read it. Check ownership: `ls -l \(hermesHome)/config.yaml`. Either run Hermes as the SSH user, `chmod a+r` the file, or SSH as the Hermes user."
|
||||
"Permission denied on state.db",
|
||||
"`\(hermesHome)/state.db` exists but the SSH user can't read it. Check ownership: `ls -l \(hermesHome)/state.db`. Either run Hermes as the SSH user, `chmod a+r` the file, or SSH as the Hermes user."
|
||||
)
|
||||
case .profileActive(let name):
|
||||
return (
|
||||
"Hermes profile \"\(name)\" is active",
|
||||
"The remote is using Hermes profile `\(name)` — its config lives at `~/.hermes/profiles/\(name)/config.yaml`, not `\(hermesHome)/config.yaml`. Either set this server's Hermes home to `~/.hermes/profiles/\(name)` in Manage Servers → Edit, or run `hermes profile use default` on the remote to revert."
|
||||
"The remote is using Hermes profile `\(name)` — its state lives at `~/.hermes/profiles/\(name)/state.db`, not `\(hermesHome)/state.db`. Either set this server's Hermes home to `~/.hermes/profiles/\(name)` in Manage Servers → Edit, or run `hermes profile use default` on the remote to revert."
|
||||
)
|
||||
case .unknown:
|
||||
return (
|
||||
"Can't read Hermes state",
|
||||
"SSH is fine but Scarf can't reach `\(hermesHome)/config.yaml`. Run diagnostics for a full breakdown."
|
||||
"SSH is fine but Scarf can't reach `\(hermesHome)/state.db`. Run diagnostics for a full breakdown."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -339,6 +339,20 @@ public final class RichChatViewModel {
|
||||
/// The original CLI session ID when resuming a CLI session via ACP.
|
||||
/// Used to combine old CLI messages with new ACP messages.
|
||||
public private(set) var originSessionId: String?
|
||||
/// Smallest DB id currently loaded for the *current session* (i.e.
|
||||
/// `sessionId`). Drives `loadEarlier()`: page back with
|
||||
/// `before: oldestLoadedMessageID`. `nil` when nothing has been
|
||||
/// loaded yet or the session has no DB-persisted messages.
|
||||
public private(set) var oldestLoadedMessageID: Int?
|
||||
/// Whether the most recent fetch suggests there are more older
|
||||
/// messages on disk that haven't been loaded into `messages` yet.
|
||||
/// Set to `true` when the initial fetch returned exactly `limit`
|
||||
/// rows (a strong hint the table has more). Drives the "Load
|
||||
/// earlier" button visibility in chat views.
|
||||
public private(set) var hasMoreHistory: Bool = false
|
||||
/// Cleared during a `loadEarlier()` fetch so the UI can show a
|
||||
/// spinner and we don't fan out duplicate page requests.
|
||||
public private(set) var isLoadingEarlier: Bool = false
|
||||
private var nextLocalId = -1
|
||||
private var streamingAssistantText = ""
|
||||
private var streamingThinkingText = ""
|
||||
@@ -382,6 +396,9 @@ public final class RichChatViewModel {
|
||||
lastKnownFingerprint = nil
|
||||
sessionId = nil
|
||||
originSessionId = nil
|
||||
oldestLoadedMessageID = nil
|
||||
hasMoreHistory = false
|
||||
isLoadingEarlier = false
|
||||
isAgentWorking = false
|
||||
userSendPending = false
|
||||
resetTimestamp = Date()
|
||||
@@ -875,12 +892,15 @@ public final class RichChatViewModel {
|
||||
let opened = await dataService.open()
|
||||
guard opened else { return }
|
||||
|
||||
var dbMessages = await dataService.fetchMessages(sessionId: sessionId)
|
||||
// Reconnects don't generate hundreds of unseen messages, so a
|
||||
// 200-row tail is plenty for the merge — and it keeps us from
|
||||
// re-materializing 1000+ message sessions on every reconnect.
|
||||
var dbMessages = await dataService.fetchMessages(sessionId: sessionId, limit: HistoryPageSize.reconcile)
|
||||
|
||||
// If we have an origin session (CLI session continued via ACP),
|
||||
// include those messages too
|
||||
if let origin = originSessionId, origin != sessionId {
|
||||
let originMessages = await dataService.fetchMessages(sessionId: origin)
|
||||
let originMessages = await dataService.fetchMessages(sessionId: origin, limit: HistoryPageSize.reconcile)
|
||||
if !originMessages.isEmpty {
|
||||
dbMessages = originMessages + dbMessages
|
||||
dbMessages.sort { ($0.timestamp ?? .distantPast) < ($1.timestamp ?? .distantPast) }
|
||||
@@ -925,10 +945,18 @@ public final class RichChatViewModel {
|
||||
// would have cached a stale copy — on resume we need whatever
|
||||
// Hermes has actually persisted since then, or the resumed session
|
||||
// will show only history up to the moment the snapshot was taken.
|
||||
let opened = await dataService.refresh()
|
||||
// `forceFresh: true` refuses the stale-snapshot fallback the data
|
||||
// service grew in M11 — falling back here would silently hide
|
||||
// messages the agent streamed during the user's offline window.
|
||||
let opened = await dataService.refresh(forceFresh: true)
|
||||
guard opened else { return }
|
||||
|
||||
var allMessages = await dataService.fetchMessages(sessionId: sessionId)
|
||||
let pageSize = HistoryPageSize.initial
|
||||
var allMessages = await dataService.fetchMessages(sessionId: sessionId, limit: pageSize)
|
||||
// The DB has more on-disk history when the initial fetch
|
||||
// saturated the limit. The "Load earlier" affordance reads
|
||||
// this flag.
|
||||
var moreHistory = allMessages.count >= pageSize
|
||||
let session = await dataService.fetchSession(id: sessionId)
|
||||
|
||||
// If the ACP session is different from the origin, load its messages too
|
||||
@@ -936,10 +964,11 @@ public final class RichChatViewModel {
|
||||
if let acpId = acpSessionId, acpId != sessionId {
|
||||
originSessionId = sessionId
|
||||
self.sessionId = acpId
|
||||
let acpMessages = await dataService.fetchMessages(sessionId: acpId)
|
||||
let acpMessages = await dataService.fetchMessages(sessionId: acpId, limit: pageSize)
|
||||
if !acpMessages.isEmpty {
|
||||
allMessages.append(contentsOf: acpMessages)
|
||||
allMessages.sort { ($0.timestamp ?? .distantPast) < ($1.timestamp ?? .distantPast) }
|
||||
moreHistory = moreHistory || acpMessages.count >= pageSize
|
||||
}
|
||||
}
|
||||
|
||||
@@ -947,6 +976,51 @@ public final class RichChatViewModel {
|
||||
currentSession = session
|
||||
let minId = allMessages.map(\.id).min() ?? 0
|
||||
nextLocalId = min(minId - 1, -1)
|
||||
// Track the oldest loaded id from THIS session (not the merged
|
||||
// origin) so `loadEarlier()` pages back through the live ACP
|
||||
// session's history. Cross-session backfill (paging into the
|
||||
// CLI origin) isn't supported in v1 — the merged 2× pageSize
|
||||
// is enough headroom for the dashboard-resume case.
|
||||
let currentSessionId = self.sessionId ?? sessionId
|
||||
oldestLoadedMessageID = allMessages
|
||||
.filter { $0.sessionId == currentSessionId }
|
||||
.map(\.id)
|
||||
.min()
|
||||
hasMoreHistory = moreHistory
|
||||
buildMessageGroups()
|
||||
}
|
||||
|
||||
// MARK: - Load Earlier (pagination)
|
||||
|
||||
/// Page back through the current session's DB-persisted history
|
||||
/// before `oldestLoadedMessageID` and prepend the page to
|
||||
/// `messages`. Cheap on the SQLite side (`id` is the primary
|
||||
/// key); the cost is the data-service `open()` round-trip on
|
||||
/// remote contexts. `pageSize` defaults to the same 200-row
|
||||
/// budget as the initial load.
|
||||
public func loadEarlier(pageSize: Int = HistoryPageSize.initial) async {
|
||||
guard !isLoadingEarlier, hasMoreHistory else { return }
|
||||
guard let sessionId, let oldest = oldestLoadedMessageID else { return }
|
||||
isLoadingEarlier = true
|
||||
defer { isLoadingEarlier = false }
|
||||
|
||||
let opened = await dataService.open()
|
||||
guard opened else { return }
|
||||
|
||||
let older = await dataService.fetchMessages(
|
||||
sessionId: sessionId,
|
||||
limit: pageSize,
|
||||
before: oldest
|
||||
)
|
||||
guard !older.isEmpty else {
|
||||
hasMoreHistory = false
|
||||
return
|
||||
}
|
||||
messages.insert(contentsOf: older, at: 0)
|
||||
oldestLoadedMessageID = older.first?.id
|
||||
// If this fetch returned fewer than the page size we've hit
|
||||
// the bottom of the table — no further pages worth fetching.
|
||||
hasMoreHistory = older.count >= pageSize
|
||||
buildMessageGroups()
|
||||
}
|
||||
|
||||
@@ -990,7 +1064,7 @@ public final class RichChatViewModel {
|
||||
let fingerprint = await dataService.fetchMessageFingerprint(sessionId: sessionId)
|
||||
|
||||
if fingerprint != lastKnownFingerprint {
|
||||
let fetched = await dataService.fetchMessages(sessionId: sessionId)
|
||||
let fetched = await dataService.fetchMessages(sessionId: sessionId, limit: HistoryPageSize.polling)
|
||||
let session = await dataService.fetchSession(id: sessionId)
|
||||
lastKnownFingerprint = fingerprint
|
||||
|
||||
|
||||
@@ -165,6 +165,15 @@ public final class CitadelServerTransport: ServerTransport, @unchecked Sendable
|
||||
try runSync { try await self.asyncSnapshotSQLite(remotePath: remotePath) }
|
||||
}
|
||||
|
||||
/// Path where the most recent successful snapshot was written —
|
||||
/// returned even when the SSH connection is currently down. The
|
||||
/// data service falls back to this when `snapshotSQLite` throws so
|
||||
/// Dashboard / Sessions / Chat-history stay viewable while the
|
||||
/// phone is offline.
|
||||
public var cachedSnapshotPath: URL? {
|
||||
snapshotBaseDir.appendingPathComponent("state.db")
|
||||
}
|
||||
|
||||
// MARK: - ServerTransport: watching
|
||||
|
||||
public func watchPaths(_ paths: [String]) -> AsyncStream<WatchEvent> {
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
import Foundation
|
||||
import Network
|
||||
import Observation
|
||||
#if canImport(os)
|
||||
import os
|
||||
#endif
|
||||
|
||||
/// Process-wide reachability monitor wrapping `NWPathMonitor`. Used by
|
||||
/// `ChatController` to decide when to attempt a reconnect (on
|
||||
/// `.satisfied`) vs. mark the chat offline (on `.unsatisfied`).
|
||||
///
|
||||
/// Singleton because `NWPathMonitor` is per-process by design — there's
|
||||
/// no benefit to instantiating multiple monitors and the cost (a small
|
||||
/// background queue per instance) accumulates if every controller
|
||||
/// spawns its own.
|
||||
///
|
||||
/// ## Usage
|
||||
///
|
||||
/// Don't read the published state from a SwiftUI view body — the
|
||||
/// runtime samples through `NWPathMonitor`'s queue, but a `body`
|
||||
/// re-evaluation that touches `currentPath` directly would block. Read
|
||||
/// `isSatisfied` / observe `transitionTick` instead. Tests and
|
||||
/// non-iOS callers can use the no-op default behavior (`isSatisfied`
|
||||
/// reports `true`).
|
||||
@Observable
|
||||
@MainActor
|
||||
public final class NetworkReachabilityService {
|
||||
public static let shared = NetworkReachabilityService()
|
||||
|
||||
/// `true` when the OS reports a usable network path (any
|
||||
/// interface). Inverted via `!isSatisfied` for "we're offline."
|
||||
public private(set) var isSatisfied: Bool = true
|
||||
|
||||
/// Mirrors `NWPath.isExpensive`. Useful as a hint to UI for not
|
||||
/// auto-fetching big payloads on cellular. Not consumed yet —
|
||||
/// reserved so callers don't have to add another property later.
|
||||
public private(set) var isExpensive: Bool = false
|
||||
|
||||
/// Monotonic counter that bumps every time `isSatisfied` changes.
|
||||
/// Views observe `transitionTick` rather than `isSatisfied` to
|
||||
/// kick a `.onChange` even if the value is the same as before
|
||||
/// (rare but possible during rapid network flapping).
|
||||
public private(set) var transitionTick: Int = 0
|
||||
|
||||
private let monitor = NWPathMonitor()
|
||||
private let queue = DispatchQueue(label: "com.scarf.ios.reachability")
|
||||
|
||||
#if canImport(os)
|
||||
private static let logger = Logger(subsystem: "com.scarf.ios", category: "NetworkReachability")
|
||||
#endif
|
||||
|
||||
private init() {
|
||||
// Seed from the current path synchronously so first reads on
|
||||
// launch don't show "satisfied" while the OS reports otherwise.
|
||||
// `currentPath` is safe here at init (the monitor hasn't been
|
||||
// started yet, no queue handler is firing).
|
||||
let initial = monitor.currentPath
|
||||
self.isSatisfied = (initial.status == .satisfied)
|
||||
self.isExpensive = initial.isExpensive
|
||||
|
||||
monitor.pathUpdateHandler = { [weak self] path in
|
||||
// Bounce back through MainActor — the `Observable`
|
||||
// protocol's published-property invariants require main-
|
||||
// thread mutation. The pathUpdateHandler is invoked on
|
||||
// `queue`, which is a private background queue.
|
||||
Task { @MainActor in
|
||||
guard let self else { return }
|
||||
let satisfied = (path.status == .satisfied)
|
||||
if self.isSatisfied != satisfied {
|
||||
self.isSatisfied = satisfied
|
||||
self.transitionTick &+= 1
|
||||
#if canImport(os)
|
||||
Self.logger.info(
|
||||
"Reachability transition: \(satisfied ? "satisfied" : "unsatisfied", privacy: .public)"
|
||||
)
|
||||
#endif
|
||||
}
|
||||
self.isExpensive = path.isExpensive
|
||||
}
|
||||
}
|
||||
monitor.start(queue: queue)
|
||||
}
|
||||
|
||||
deinit {
|
||||
// Singleton is process-lifetime; this only runs on shutdown.
|
||||
monitor.cancel()
|
||||
}
|
||||
}
|
||||
@@ -31,6 +31,28 @@ final class ScarfGoCoordinator {
|
||||
/// `AppCoordinator.pendingProjectChat`.
|
||||
var pendingProjectChat: String?
|
||||
|
||||
/// Most-recent scene-phase value observed at the WindowGroup
|
||||
/// level. Tab-specific view models (e.g. `ChatController`)
|
||||
/// observe `scenePhaseTick` to react to transitions even when
|
||||
/// they're on a non-foreground tab — `.onChange(of: ScenePhase)`
|
||||
/// alone wouldn't fire for views that aren't on screen.
|
||||
private(set) var scenePhase: ScenePhase = .active
|
||||
private(set) var scenePhaseTick: Int = 0
|
||||
/// Wallclock when we last observed `.background`. Used by tab
|
||||
/// view-models to decide whether a quick `.active` transition is
|
||||
/// worth a full re-verify (long suspensions warrant it; brief
|
||||
/// notification-center peeks don't). `nil` until the first
|
||||
/// background transition.
|
||||
private(set) var lastBackgroundedAt: Date?
|
||||
|
||||
func setScenePhase(_ phase: ScenePhase) {
|
||||
if phase == .background, scenePhase != .background {
|
||||
lastBackgroundedAt = Date()
|
||||
}
|
||||
scenePhase = phase
|
||||
scenePhaseTick &+= 1
|
||||
}
|
||||
|
||||
enum Tab: Hashable {
|
||||
case dashboard, projects, chat, skills, system
|
||||
}
|
||||
|
||||
@@ -36,6 +36,12 @@ struct ScarfGoTabRoot: View {
|
||||
/// through here.
|
||||
@State private var coordinator = ScarfGoCoordinator()
|
||||
|
||||
/// SwiftUI's `.onChange(of: ScenePhase)` modifier on a non-active
|
||||
/// tab doesn't fire while the tab is unmounted — the coordinator
|
||||
/// is the single source of truth for scene-phase transitions
|
||||
/// across all tabs.
|
||||
@Environment(\.scenePhase) private var scenePhase
|
||||
|
||||
var body: some View {
|
||||
// The transport factory is keyed by ServerID, so the correct
|
||||
// Keychain slot + config is picked automatically. Reuses the
|
||||
@@ -119,6 +125,12 @@ struct ScarfGoTabRoot: View {
|
||||
// just observes.
|
||||
NotificationRouter.shared.coordinator = coordinator
|
||||
}
|
||||
// Funnel scene-phase transitions through the coordinator so
|
||||
// tab view-models (notably ChatController) can react even
|
||||
// when their tab isn't currently on-screen.
|
||||
.onChange(of: scenePhase) { _, newPhase in
|
||||
coordinator.setScenePhase(newPhase)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -63,6 +63,13 @@ struct ScarfIOSApp: App {
|
||||
// Hermes gains a push sender.
|
||||
await MainActor.run { NotificationRouter.shared.setUpOnLaunch() }
|
||||
}
|
||||
.task {
|
||||
// Drop chat drafts older than 7 days so the
|
||||
// UserDefaults plist doesn't grow unbounded across
|
||||
// years of use. Cheap; UserDefaults is already in
|
||||
// memory by the time we read keys.
|
||||
ChatController.pruneStaleDrafts()
|
||||
}
|
||||
// Clamp Dynamic Type at the scene root. ScarfGo is a
|
||||
// developer tool that needs more density than Apple's
|
||||
// .xxxLarge default, but we still scale from .xSmall
|
||||
|
||||
@@ -50,6 +50,7 @@ struct ChatView: View {
|
||||
|
||||
var body: some View {
|
||||
VStack(spacing: 0) {
|
||||
connectionBanner
|
||||
errorBanner
|
||||
projectContextBar
|
||||
messageList
|
||||
@@ -118,6 +119,23 @@ struct ChatView: View {
|
||||
coordinator?.pendingProjectChat = nil
|
||||
Task { await consumePendingProjectChat(projectPath) }
|
||||
}
|
||||
// React to network reachability transitions. The service
|
||||
// updates its `transitionTick` on every `.satisfied <->
|
||||
// .unsatisfied` edge; the `.onChange` here funnels each
|
||||
// edge into ChatController so the reconnect machinery can
|
||||
// suspend on link-down and resume on link-up.
|
||||
.onChange(of: NetworkReachabilityService.shared.transitionTick) { _, _ in
|
||||
Task { await controller.handleReachabilityChange() }
|
||||
}
|
||||
// React to scene-phase transitions (background → active etc).
|
||||
// Source of truth is the coordinator, not `@Environment(\.scenePhase)`,
|
||||
// so the chat tab still picks up phase changes that happened
|
||||
// while it was unmounted (the user is on Dashboard when the
|
||||
// app backgrounds; sees Chat after resume).
|
||||
.onChange(of: coordinator?.scenePhaseTick) { _, _ in
|
||||
guard let phase = coordinator?.scenePhase else { return }
|
||||
Task { await controller.handleScenePhase(phase) }
|
||||
}
|
||||
// Deliberately NOT tearing down the ACP session on .onDisappear.
|
||||
// `TabView` unmounts tab content when the user switches tabs
|
||||
// (disappear fires), but `@State var controller` keeps the
|
||||
@@ -201,6 +219,9 @@ struct ChatView: View {
|
||||
emptyState
|
||||
}
|
||||
}
|
||||
if controller.vm.hasMoreHistory {
|
||||
loadEarlierButton
|
||||
}
|
||||
ForEach(controller.vm.messages) { msg in
|
||||
MessageBubble(
|
||||
message: msg,
|
||||
@@ -247,6 +268,37 @@ struct ChatView: View {
|
||||
.scrollDismissesKeyboard(.interactively)
|
||||
}
|
||||
|
||||
/// "Load earlier messages" affordance pinned above the oldest
|
||||
/// loaded bubble. Only rendered when `vm.hasMoreHistory == true`,
|
||||
/// so it disappears organically once the user has paged back to
|
||||
/// the start of the session.
|
||||
@ViewBuilder
|
||||
private var loadEarlierButton: some View {
|
||||
Button {
|
||||
Task { await controller.vm.loadEarlier() }
|
||||
} label: {
|
||||
HStack(spacing: 6) {
|
||||
if controller.vm.isLoadingEarlier {
|
||||
ProgressView()
|
||||
.scaleEffect(0.7)
|
||||
} else {
|
||||
Image(systemName: "arrow.up.circle")
|
||||
.font(.caption)
|
||||
}
|
||||
Text(controller.vm.isLoadingEarlier ? "Loading earlier…" : "Load earlier messages")
|
||||
.font(.caption)
|
||||
}
|
||||
.foregroundStyle(ScarfColor.foregroundMuted)
|
||||
.padding(.horizontal, 12)
|
||||
.padding(.vertical, 6)
|
||||
.background(.regularMaterial, in: Capsule())
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.disabled(controller.vm.isLoadingEarlier)
|
||||
.frame(maxWidth: .infinity)
|
||||
.padding(.top, 8)
|
||||
}
|
||||
|
||||
@ViewBuilder
|
||||
private var emptyState: some View {
|
||||
VStack(spacing: 8) {
|
||||
@@ -290,6 +342,58 @@ struct ChatView: View {
|
||||
.padding(.top, 60)
|
||||
}
|
||||
|
||||
/// Top-of-screen banner for transient connection states. `.failed`
|
||||
/// keeps using the existing full-screen overlay (so the user has
|
||||
/// somewhere obvious to tap "Retry"); `.reconnecting` and
|
||||
/// `.offline` are non-modal so the user can keep reading the
|
||||
/// transcript while we work in the background.
|
||||
@ViewBuilder
|
||||
private var connectionBanner: some View {
|
||||
switch controller.state {
|
||||
case .reconnecting(let attempt, let total):
|
||||
connectionBannerStrip(
|
||||
text: "Reconnecting (\(attempt)/\(total))…",
|
||||
tint: ScarfColor.warning,
|
||||
showSpinner: true
|
||||
)
|
||||
case .offline(let reason):
|
||||
connectionBannerStrip(
|
||||
text: reason,
|
||||
tint: ScarfColor.danger,
|
||||
showSpinner: false
|
||||
)
|
||||
default:
|
||||
EmptyView()
|
||||
}
|
||||
}
|
||||
|
||||
private func connectionBannerStrip(
|
||||
text: String,
|
||||
tint: Color,
|
||||
showSpinner: Bool
|
||||
) -> some View {
|
||||
HStack(spacing: 8) {
|
||||
if showSpinner {
|
||||
ProgressView()
|
||||
.scaleEffect(0.7)
|
||||
.tint(tint)
|
||||
} else {
|
||||
Image(systemName: "wifi.slash")
|
||||
.font(.caption)
|
||||
.foregroundStyle(tint)
|
||||
}
|
||||
Text(text)
|
||||
.font(.caption)
|
||||
.foregroundStyle(tint)
|
||||
Spacer(minLength: 0)
|
||||
}
|
||||
.padding(.horizontal, 12)
|
||||
.padding(.vertical, 6)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.background(tint.opacity(0.16))
|
||||
.transition(.move(edge: .top).combined(with: .opacity))
|
||||
}
|
||||
|
||||
@ViewBuilder
|
||||
/// Soft pill above the composer confirming a non-interruptive
|
||||
/// command was received (e.g. `/steer`). Auto-clears via the
|
||||
@@ -326,6 +430,12 @@ struct ChatView: View {
|
||||
.onSubmit {
|
||||
Task { await controller.send() }
|
||||
}
|
||||
// Persist the half-typed message across app suspensions
|
||||
// and force-quits. Debounced inside `scheduleDraftSave`
|
||||
// so we coalesce per-keystroke writes.
|
||||
.onChange(of: controller.draft) { _, _ in
|
||||
controller.scheduleDraftSave()
|
||||
}
|
||||
// Explicit dismiss-keyboard affordance, complementing the
|
||||
// interactive scroll-to-dismiss on the message list. iOS
|
||||
// shows a keyboard accessory toolbar above the system
|
||||
@@ -551,6 +661,14 @@ final class ChatController {
|
||||
case idle
|
||||
case connecting
|
||||
case ready
|
||||
/// Mid-recovery: the SSH exec channel died but the agent on
|
||||
/// the remote may still be running. We're trying to reattach
|
||||
/// via `session/resume` (or `session/load` as a fallback).
|
||||
case reconnecting(attempt: Int, of: Int)
|
||||
/// Network reachability is unsatisfied. Distinct from
|
||||
/// `.failed` so the banner can stay tinted yellow ("we'll
|
||||
/// retry") instead of red ("dead").
|
||||
case offline(reason: String)
|
||||
case failed(String)
|
||||
}
|
||||
|
||||
@@ -574,12 +692,100 @@ final class ChatController {
|
||||
private let context: ServerContext
|
||||
private var client: ACPClient?
|
||||
private var eventTask: Task<Void, Never>?
|
||||
private var healthMonitorTask: Task<Void, Never>?
|
||||
private var reconnectTask: Task<Void, Never>?
|
||||
private var isHandlingDisconnect = false
|
||||
private var pendingDraftSave: Task<Void, Never>?
|
||||
|
||||
/// Session id of the currently-active chat. Saved when state
|
||||
/// reaches `.ready` and cleared on explicit `stop()` so a
|
||||
/// user-initiated disconnect doesn't get auto-reconnected when
|
||||
/// network/scene events fire later.
|
||||
private var lastActiveSessionID: String?
|
||||
/// Optional project working directory of the currently-active
|
||||
/// session. Used as `cwd` on the recovery path so a project-
|
||||
/// scoped session reconnects with the right scope.
|
||||
private var lastProjectPath: String?
|
||||
|
||||
// Reconnect tuning — verbatim from the Mac implementation at
|
||||
// scarf/Features/Chat/ViewModels/ChatViewModel.swift:563-693.
|
||||
private static let maxReconnectAttempts = 5
|
||||
private static let reconnectBaseDelay: UInt64 = 1_000_000_000 // 1s
|
||||
private static let maxReconnectDelay: UInt64 = 16_000_000_000 // 16s
|
||||
|
||||
private static let logger = Logger(
|
||||
subsystem: "com.scarf.ios",
|
||||
category: "ChatController"
|
||||
)
|
||||
|
||||
// MARK: - Draft persistence
|
||||
|
||||
private static let draftKeyPrefix = "scarf.chat.draft.v1"
|
||||
private static let draftMaxAge: TimeInterval = 7 * 24 * 60 * 60 // 7 days
|
||||
|
||||
private static func draftKey(serverID: ServerID, sessionID: String?) -> String {
|
||||
// `_no_session` covers the brief connecting window before
|
||||
// `vm.setSessionId` lands. The TextField is disabled in that
|
||||
// window today, so this slot is essentially never written —
|
||||
// but the sentinel is here so the key is always well-formed.
|
||||
"\(draftKeyPrefix).\(serverID.uuidString).\(sessionID ?? "_no_session")"
|
||||
}
|
||||
|
||||
private static func draftTimestampKey(forKey key: String) -> String { key + ".ts" }
|
||||
|
||||
private func saveDraft() {
|
||||
let key = Self.draftKey(serverID: context.id, sessionID: vm.sessionId)
|
||||
let tsKey = Self.draftTimestampKey(forKey: key)
|
||||
if draft.isEmpty {
|
||||
UserDefaults.standard.removeObject(forKey: key)
|
||||
UserDefaults.standard.removeObject(forKey: tsKey)
|
||||
} else {
|
||||
UserDefaults.standard.set(draft, forKey: key)
|
||||
UserDefaults.standard.set(Date().timeIntervalSince1970, forKey: tsKey)
|
||||
}
|
||||
}
|
||||
|
||||
private func loadDraft() {
|
||||
let key = Self.draftKey(serverID: context.id, sessionID: vm.sessionId)
|
||||
if let saved = UserDefaults.standard.string(forKey: key), !saved.isEmpty {
|
||||
draft = saved
|
||||
}
|
||||
}
|
||||
|
||||
private func clearStoredDraft() {
|
||||
let key = Self.draftKey(serverID: context.id, sessionID: vm.sessionId)
|
||||
UserDefaults.standard.removeObject(forKey: key)
|
||||
UserDefaults.standard.removeObject(forKey: Self.draftTimestampKey(forKey: key))
|
||||
}
|
||||
|
||||
/// Debounced draft save. The view layer hooks this off
|
||||
/// `.onChange(of: controller.draft)` so per-keystroke writes are
|
||||
/// coalesced into one UserDefaults flush per ~1s of typing.
|
||||
func scheduleDraftSave() {
|
||||
pendingDraftSave?.cancel()
|
||||
pendingDraftSave = Task { @MainActor [weak self] in
|
||||
try? await Task.sleep(nanoseconds: 1_000_000_000)
|
||||
guard !Task.isCancelled else { return }
|
||||
self?.saveDraft()
|
||||
}
|
||||
}
|
||||
|
||||
/// One-shot janitor invoked at app launch. Removes draft slots
|
||||
/// whose timestamp sidecar predates `draftMaxAge`. Cheap enough
|
||||
/// to call synchronously — UserDefaults is in-memory at runtime.
|
||||
static func pruneStaleDrafts(now: Date = Date()) {
|
||||
let defaults = UserDefaults.standard
|
||||
let cutoff = now.timeIntervalSince1970 - draftMaxAge
|
||||
for key in defaults.dictionaryRepresentation().keys
|
||||
where key.hasPrefix(draftKeyPrefix) && key.hasSuffix(".ts")
|
||||
{
|
||||
guard let ts = defaults.object(forKey: key) as? TimeInterval, ts < cutoff else { continue }
|
||||
let baseKey = String(key.dropLast(3)) // strip ".ts"
|
||||
defaults.removeObject(forKey: baseKey)
|
||||
defaults.removeObject(forKey: key)
|
||||
}
|
||||
}
|
||||
|
||||
init(context: ServerContext) {
|
||||
self.context = context
|
||||
self.vm = RichChatViewModel(context: context)
|
||||
@@ -626,16 +832,10 @@ final class ChatController {
|
||||
// Start streaming ACP events into the view-model BEFORE we
|
||||
// send session/new, so the `available_commands_update`
|
||||
// notification that the server sends on session init is
|
||||
// captured.
|
||||
let stream = await client.events
|
||||
eventTask = Task { [weak self] in
|
||||
for await event in stream {
|
||||
guard let self else { break }
|
||||
await MainActor.run {
|
||||
self.vm.handleACPEvent(event)
|
||||
}
|
||||
}
|
||||
}
|
||||
// captured. Health monitor catches socket-level death the
|
||||
// event-stream EOF wouldn't see (e.g., a hung remote read).
|
||||
startACPEventLoop(client: client)
|
||||
startHealthMonitor(client: client)
|
||||
|
||||
// Create a fresh ACP session. `cwd` is the remote user's home
|
||||
// directory — Hermes defaults to that for tool scoping.
|
||||
@@ -643,7 +843,10 @@ final class ChatController {
|
||||
let home = await context.resolvedUserHome()
|
||||
let sessionId = try await client.newSession(cwd: home)
|
||||
vm.setSessionId(sessionId)
|
||||
loadDraft()
|
||||
state = .ready
|
||||
lastActiveSessionID = sessionId
|
||||
lastProjectPath = nil
|
||||
} catch {
|
||||
state = .failed(error.localizedDescription)
|
||||
await vm.recordACPFailure(error, client: client)
|
||||
@@ -661,6 +864,7 @@ final class ChatController {
|
||||
let sessionId = vm.sessionId ?? ""
|
||||
guard !sessionId.isEmpty else { return }
|
||||
draft = ""
|
||||
clearStoredDraft()
|
||||
vm.addUserMessage(text: text)
|
||||
// /steer is non-interruptive — the agent is still on its
|
||||
// current turn; the guidance applies after the next tool call.
|
||||
@@ -721,13 +925,283 @@ final class ChatController {
|
||||
/// Stop the current session + tear down the SSH exec channel.
|
||||
/// Idempotent.
|
||||
func stop() async {
|
||||
eventTask?.cancel()
|
||||
eventTask = nil
|
||||
eventTask?.cancel(); eventTask = nil
|
||||
healthMonitorTask?.cancel(); healthMonitorTask = nil
|
||||
reconnectTask?.cancel(); reconnectTask = nil
|
||||
if let client {
|
||||
await client.stop()
|
||||
}
|
||||
client = nil
|
||||
state = .idle
|
||||
// Explicit user-initiated disconnect — clear the session
|
||||
// memory so reachability/scenePhase events don't try to
|
||||
// resurrect the dead chat.
|
||||
lastActiveSessionID = nil
|
||||
lastProjectPath = nil
|
||||
isHandlingDisconnect = false
|
||||
}
|
||||
|
||||
// MARK: - Reconnect machinery (Section 1)
|
||||
|
||||
/// Stream ACP events into the view-model. When the stream ends
|
||||
/// without us cancelling it, the channel died; route into the
|
||||
/// reconnect path. Direct port of Mac's `startACPEventLoop`
|
||||
/// (scarf/Features/Chat/ViewModels/ChatViewModel.swift:563).
|
||||
private func startACPEventLoop(client: ACPClient) {
|
||||
eventTask = Task { @MainActor [weak self] in
|
||||
let stream = await client.events
|
||||
for await event in stream {
|
||||
guard !Task.isCancelled else { break }
|
||||
self?.vm.handleACPEvent(event)
|
||||
}
|
||||
// Stream ended — if we weren't explicitly cancelled the
|
||||
// channel died (EOF on stdin/out, write to dead pipe,
|
||||
// SSH socket gone). The Mac caller calls
|
||||
// `handleConnectionDied`; we mirror that.
|
||||
if !Task.isCancelled {
|
||||
self?.handleConnectionDied()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 5-second heartbeat that catches dead channels which don't
|
||||
/// explicitly EOF the stream (e.g., a hung SSH socket waiting
|
||||
/// for the next chunk that never arrives). When `isHealthy`
|
||||
/// returns false, route into the reconnect path. Mirrors Mac's
|
||||
/// `startHealthMonitor`.
|
||||
private func startHealthMonitor(client: ACPClient) {
|
||||
healthMonitorTask = Task { @MainActor [weak self] in
|
||||
while !Task.isCancelled {
|
||||
try? await Task.sleep(nanoseconds: 5_000_000_000)
|
||||
guard !Task.isCancelled else { break }
|
||||
let healthy = await client.isHealthy
|
||||
if !healthy {
|
||||
self?.handleConnectionDied()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// One-stop cleanup + reconnect dispatch. Idempotent — guarded by
|
||||
/// `isHandlingDisconnect` so concurrent triggers (event-stream
|
||||
/// EOF + health monitor + write failure) don't tear down the same
|
||||
/// client twice.
|
||||
private func handleConnectionDied() {
|
||||
guard client != nil, !isHandlingDisconnect else { return }
|
||||
isHandlingDisconnect = true
|
||||
Self.logger.warning("ACP connection died")
|
||||
|
||||
// Capture any in-progress streaming text into a finalized
|
||||
// message before we attempt to merge against the DB. The VM
|
||||
// doesn't add a system "Connection lost" bubble — that would
|
||||
// create a phantom message during reconnect.
|
||||
vm.finalizeOnDisconnect()
|
||||
|
||||
let savedSessionId = vm.sessionId
|
||||
|
||||
// Tear down the dead client. The eventTask will be cancelled
|
||||
// immediately; awaiting `stop()` on the dead client is the
|
||||
// detached fire-and-forget pattern Mac uses (its `Task` block).
|
||||
eventTask?.cancel(); eventTask = nil
|
||||
healthMonitorTask?.cancel(); healthMonitorTask = nil
|
||||
if let dead = client { Task { await dead.stop() } }
|
||||
client = nil
|
||||
|
||||
guard let savedSessionId else {
|
||||
// No session id to resume — surface the failure.
|
||||
state = .failed("Connection lost")
|
||||
isHandlingDisconnect = false
|
||||
return
|
||||
}
|
||||
attemptReconnect(sessionId: savedSessionId)
|
||||
}
|
||||
|
||||
/// React to an iOS scene-phase transition.
|
||||
///
|
||||
/// `.background`: cancel the keepalive — iOS will suspend the
|
||||
/// socket within ~30s anyway, and fighting it via background
|
||||
/// tasks costs battery for marginal benefit (the agent's work is
|
||||
/// persisted to state.db on the remote, so we recover on resume).
|
||||
///
|
||||
/// `.active`: if we had a session running before suspension and
|
||||
/// the channel is now unhealthy, route into the reconnect path
|
||||
/// so the user sees fresh state without having to tap anything.
|
||||
func handleScenePhase(_ phase: ScenePhase) async {
|
||||
switch phase {
|
||||
case .background:
|
||||
healthMonitorTask?.cancel(); healthMonitorTask = nil
|
||||
case .active:
|
||||
// No session worth verifying.
|
||||
guard let id = lastActiveSessionID else { return }
|
||||
// Already mid-recovery — let it finish.
|
||||
if case .reconnecting = state { return }
|
||||
await verifyAndResume(sessionId: id)
|
||||
case .inactive:
|
||||
break // brief: control center, banners, split-screen
|
||||
@unknown default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
/// Probe the existing client's health on resume. If alive,
|
||||
/// just re-arm the heartbeat; if dead, route into the reconnect
|
||||
/// path (which preserves the session id and reconciles against
|
||||
/// the DB).
|
||||
private func verifyAndResume(sessionId: String) async {
|
||||
if let client {
|
||||
if await client.isHealthy {
|
||||
startHealthMonitor(client: client)
|
||||
return
|
||||
}
|
||||
}
|
||||
handleConnectionDied()
|
||||
}
|
||||
|
||||
/// React to a transition in `NetworkReachabilityService`. While
|
||||
/// the device has no network, suppress reconnect attempts (they'd
|
||||
/// just burn the 5-attempt budget against guaranteed failures);
|
||||
/// when the network comes back, kick a fresh cycle if we're
|
||||
/// stuck in `.failed` / `.offline` with a saved session id.
|
||||
func handleReachabilityChange() async {
|
||||
let satisfied = NetworkReachabilityService.shared.isSatisfied
|
||||
if !satisfied {
|
||||
// Stop the in-flight reconnect cycle — every attempt
|
||||
// will fail until the link is back. We'll restart on
|
||||
// the next `.satisfied` edge.
|
||||
reconnectTask?.cancel(); reconnectTask = nil
|
||||
if case .reconnecting = state {
|
||||
state = .offline(reason: "No network")
|
||||
}
|
||||
return
|
||||
}
|
||||
// Network back. If we have a session worth restoring AND
|
||||
// we're currently in a non-recoverable state, kick a fresh
|
||||
// reconnect cycle.
|
||||
guard let id = lastActiveSessionID else { return }
|
||||
switch state {
|
||||
case .offline, .failed:
|
||||
attemptReconnect(sessionId: id)
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
/// 5-attempt exponential-backoff reconnect targeting the same
|
||||
/// session id. Tries `session/resume` first (correct semantics
|
||||
/// for live recovery), falls back to `session/load` for older
|
||||
/// remotes. NEVER `session/new` — that would lose the agent's
|
||||
/// in-context conversation. After a successful reattach, calls
|
||||
/// `vm.reconcileWithDB` so messages the agent wrote during the
|
||||
/// outage become visible.
|
||||
private func attemptReconnect(sessionId: String) {
|
||||
reconnectTask?.cancel()
|
||||
reconnectTask = Task { @MainActor [weak self] in
|
||||
guard let self else { return }
|
||||
|
||||
for attempt in 1...Self.maxReconnectAttempts {
|
||||
guard !Task.isCancelled else { return }
|
||||
state = .reconnecting(attempt: attempt, of: Self.maxReconnectAttempts)
|
||||
|
||||
// Skip backoff on the first attempt so a quick
|
||||
// recovery (e.g., a momentary SSH socket flap) feels
|
||||
// instant. Subsequent attempts back off 1→2→4→8→16s.
|
||||
if attempt > 1 {
|
||||
let delay = min(
|
||||
Self.reconnectBaseDelay * UInt64(1 << (attempt - 1)),
|
||||
Self.maxReconnectDelay
|
||||
)
|
||||
try? await Task.sleep(nanoseconds: delay)
|
||||
guard !Task.isCancelled else { return }
|
||||
}
|
||||
|
||||
let client = ACPClient.forIOSApp(
|
||||
context: context,
|
||||
keyProvider: {
|
||||
let store = KeychainSSHKeyStore()
|
||||
guard let key = try await store.load() else {
|
||||
throw SSHKeyStoreError.backendFailure(
|
||||
message: "No SSH key in Keychain — re-run onboarding.",
|
||||
osStatus: nil
|
||||
)
|
||||
}
|
||||
return key
|
||||
}
|
||||
)
|
||||
|
||||
do {
|
||||
try await client.start()
|
||||
|
||||
// Project-scoped sessions reconnect with their
|
||||
// project path as cwd; everything else uses the
|
||||
// remote user's home directory.
|
||||
let cwd: String
|
||||
if let path = lastProjectPath {
|
||||
cwd = path
|
||||
} else {
|
||||
cwd = await context.resolvedUserHome()
|
||||
}
|
||||
|
||||
let resolvedSessionId: String
|
||||
do {
|
||||
resolvedSessionId = try await client.resumeSession(cwd: cwd, sessionId: sessionId)
|
||||
} catch {
|
||||
Self.logger.info(
|
||||
"session/resume failed, trying session/load: \(error.localizedDescription, privacy: .public)"
|
||||
)
|
||||
resolvedSessionId = try await client.loadSession(cwd: cwd, sessionId: sessionId)
|
||||
}
|
||||
|
||||
// Wire up the new client BEFORE merging messages
|
||||
// so any streaming chunks that arrive during the
|
||||
// reconcile land in the right place.
|
||||
self.client = client
|
||||
vm.acpStderrProvider = { [weak client] in
|
||||
await client?.recentStderr ?? ""
|
||||
}
|
||||
vm.setSessionId(resolvedSessionId)
|
||||
|
||||
// Merge in-memory state (any local-only user
|
||||
// messages typed before the disconnect) with
|
||||
// whatever Hermes has persisted to state.db
|
||||
// since we last looked. This is what makes the
|
||||
// "agent kept working while you were locked"
|
||||
// case visible to the user.
|
||||
let countBefore = vm.messages.count
|
||||
await vm.reconcileWithDB(sessionId: resolvedSessionId)
|
||||
let added = vm.messages.count - countBefore
|
||||
if added > 0 {
|
||||
vm.transientHint = "Resynced \(added) new message\(added == 1 ? "" : "s")."
|
||||
Task { @MainActor [weak vm] in
|
||||
try? await Task.sleep(nanoseconds: 4_000_000_000)
|
||||
if vm?.transientHint?.hasPrefix("Resynced") == true {
|
||||
vm?.transientHint = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
startACPEventLoop(client: client)
|
||||
startHealthMonitor(client: client)
|
||||
state = .ready
|
||||
lastActiveSessionID = resolvedSessionId
|
||||
|
||||
isHandlingDisconnect = false
|
||||
Self.logger.info("Reconnected on attempt \(attempt)")
|
||||
return
|
||||
} catch {
|
||||
Self.logger.warning(
|
||||
"Reconnect attempt \(attempt) failed: \(error.localizedDescription, privacy: .public)"
|
||||
)
|
||||
await client.stop()
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Exhausted all attempts. Surface a manual-recovery prompt.
|
||||
guard !Task.isCancelled else { return }
|
||||
state = .failed("Connection lost")
|
||||
isHandlingDisconnect = false
|
||||
}
|
||||
}
|
||||
|
||||
/// User tapped "New chat". Stop, reset the VM, start again.
|
||||
@@ -845,15 +1319,8 @@ final class ChatController {
|
||||
return
|
||||
}
|
||||
|
||||
let stream = await client.events
|
||||
eventTask = Task { [weak self] in
|
||||
for await event in stream {
|
||||
guard let self else { break }
|
||||
await MainActor.run {
|
||||
self.vm.handleACPEvent(event)
|
||||
}
|
||||
}
|
||||
}
|
||||
startACPEventLoop(client: client)
|
||||
startHealthMonitor(client: client)
|
||||
|
||||
do {
|
||||
// Use the project's path as cwd when provided; else the
|
||||
@@ -866,7 +1333,10 @@ final class ChatController {
|
||||
}
|
||||
let sessionId = try await client.newSession(cwd: cwd)
|
||||
vm.setSessionId(sessionId)
|
||||
loadDraft()
|
||||
state = .ready
|
||||
lastActiveSessionID = sessionId
|
||||
lastProjectPath = projectPath
|
||||
|
||||
// If this was a project-scoped session, record the
|
||||
// attribution so Dashboard's Sessions tab can render the
|
||||
@@ -976,15 +1446,8 @@ final class ChatController {
|
||||
return
|
||||
}
|
||||
|
||||
let stream = await client.events
|
||||
eventTask = Task { [weak self] in
|
||||
for await event in stream {
|
||||
guard let self else { break }
|
||||
await MainActor.run {
|
||||
self.vm.handleACPEvent(event)
|
||||
}
|
||||
}
|
||||
}
|
||||
startACPEventLoop(client: client)
|
||||
startHealthMonitor(client: client)
|
||||
|
||||
do {
|
||||
let home = await context.resolvedUserHome()
|
||||
@@ -998,6 +1461,7 @@ final class ChatController {
|
||||
resolvedID = try await client.loadSession(cwd: home, sessionId: sessionID)
|
||||
}
|
||||
vm.setSessionId(resolvedID)
|
||||
loadDraft()
|
||||
// Pull the transcript out of state.db so the user sees
|
||||
// everything said up to now. Mirrors the Mac resume flow
|
||||
// (scarf/scarf/Features/Chat/ViewModels/ChatViewModel.swift:376).
|
||||
@@ -1009,6 +1473,8 @@ final class ChatController {
|
||||
acpSessionId: resolvedID == sessionID ? nil : resolvedID
|
||||
)
|
||||
state = .ready
|
||||
lastActiveSessionID = resolvedID
|
||||
lastProjectPath = resolved?.path
|
||||
} catch {
|
||||
state = .failed(error.localizedDescription)
|
||||
await vm.recordACPFailure(error, client: client)
|
||||
|
||||
@@ -34,7 +34,10 @@ struct ChatTranscriptPane: View {
|
||||
isWorking: richChat.isGenerating,
|
||||
isLoadingSession: chatViewModel.isPreparingSession,
|
||||
scrollTrigger: richChat.scrollTrigger,
|
||||
turnDurations: richChat.turnDurations
|
||||
turnDurations: richChat.turnDurations,
|
||||
hasMoreHistory: richChat.hasMoreHistory,
|
||||
isLoadingEarlier: richChat.isLoadingEarlier,
|
||||
onLoadEarlier: { Task { await richChat.loadEarlier() } }
|
||||
)
|
||||
|
||||
Divider()
|
||||
|
||||
@@ -15,6 +15,13 @@ struct RichChatMessageList: View {
|
||||
/// bubble's metadata footer can render the v2.5 stopwatch pill.
|
||||
/// Defaults empty so callers that don't care can omit it.
|
||||
var turnDurations: [Int: TimeInterval] = [:]
|
||||
/// Show the "Load earlier messages" button at the top of the
|
||||
/// transcript when the underlying session has more on-disk
|
||||
/// history that hasn't been paged in yet. Hidden by default so
|
||||
/// existing callers who haven't opted in see no UI change.
|
||||
var hasMoreHistory: Bool = false
|
||||
var isLoadingEarlier: Bool = false
|
||||
var onLoadEarlier: (() -> Void)? = nil
|
||||
|
||||
/// Scrolling strategy: plain `VStack` (not `LazyVStack`) plus
|
||||
/// `.defaultScrollAnchor(.bottom)`.
|
||||
@@ -57,6 +64,30 @@ struct RichChatMessageList: View {
|
||||
.transition(.opacity)
|
||||
}
|
||||
|
||||
if hasMoreHistory, let onLoadEarlier {
|
||||
Button {
|
||||
onLoadEarlier()
|
||||
} label: {
|
||||
HStack(spacing: 6) {
|
||||
if isLoadingEarlier {
|
||||
ProgressView().scaleEffect(0.7)
|
||||
} else {
|
||||
Image(systemName: "arrow.up.circle")
|
||||
.font(.caption)
|
||||
}
|
||||
Text(isLoadingEarlier ? "Loading earlier…" : "Load earlier messages")
|
||||
.font(.caption)
|
||||
}
|
||||
.padding(.horizontal, 10)
|
||||
.padding(.vertical, 5)
|
||||
.background(.regularMaterial, in: Capsule())
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.disabled(isLoadingEarlier)
|
||||
.frame(maxWidth: .infinity)
|
||||
.padding(.vertical, 4)
|
||||
}
|
||||
|
||||
ForEach(groups) { group in
|
||||
MessageGroupView(group: group, turnDurations: turnDurations)
|
||||
.equatable()
|
||||
|
||||
@@ -114,7 +114,7 @@ final class SessionsViewModel {
|
||||
|
||||
func selectSession(_ session: HermesSession) async {
|
||||
selectedSession = session
|
||||
messages = await dataService.fetchMessages(sessionId: session.id)
|
||||
messages = await dataService.fetchMessages(sessionId: session.id, limit: HistoryPageSize.macSessionDetail)
|
||||
subagentSessions = await dataService.fetchSubagentSessions(parentId: session.id)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user