diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift index 99c1941..e6a43cc 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift @@ -36,10 +36,17 @@ public enum HistoryPageSize: Sendable { /// inside a 30-second `RemoteSQLiteBackend.queryTimeout`.** A /// 157-message session at 200-row page size produced enough /// JSON (with `reasoning_content` for thinking models) to time - /// out at exactly 30 s on a 420 ms-RTT remote, returning empty. - /// 50 rows comfortably fits that envelope. The "Load earlier" + /// out at exactly 30 s on a 420 ms-RTT remote. Dropped to 50, + /// then to 25 in v2.7 after a 160-message session still timed + /// out at 50 — `reasoning_content` for thinking-model turns can + /// run 20+ KB per row, so 50 rows × 30 KB = 1.5 MB JSON which + /// over a slow SSH channel still trips the 30s budget. Pair + /// with `messageColumnsLight` (excludes `reasoning_content`) + /// so the on-wire payload is small even at this size; the + /// inspector pane lazy-loads via `fetchReasoningContent(for:)` + /// when the user expands a disclosure. The "Load earlier" /// affordance pages back through older messages on demand. - public nonisolated static let initial = 50 + public nonisolated static let initial = 25 /// Reconnection reconcile against the DB. 200 rows is plenty — /// disconnects don't generate hundreds of unseen messages. public nonisolated static let reconcile = 200 diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift index 0ebc523..9e48128 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift @@ -142,6 +142,31 @@ public actor HermesDataService { return cols } + /// Same as `messageColumns` but with the `reasoning_content` + /// column omitted. v0.11+ Hermes thinking-model output stores + /// the full chain-of-thought transcript in `reasoning_content`, + /// which on a single message can be 20+ KB of JSON. For a + /// 160-message session that's >1 MB of wire payload — enough + /// to time out a 30s SSH `sqlite3 -json` fetch on a 420ms-RTT + /// remote (perf capture confirmed). The bubble's main body + /// doesn't render reasoning_content directly; the inspector + /// pane does, and the user opens that on demand. So initial + /// fetch can skip it and a follow-up `fetchReasoningContent` + /// can pull it lazily when the inspector opens. + private var messageColumnsLight: String { + var cols = """ + id, session_id, role, content, tool_call_id, tool_calls, + tool_name, timestamp, token_count, finish_reason + """ + if hasV07Schema { + cols += ", reasoning" + } + // v0.11+ `reasoning_content` is intentionally excluded. + // `messageFromRow` defaults it to nil; callers that need it + // call `fetchReasoningContent(for:)` to lazy-load. + return cols + } + // MARK: - Session Queries public func fetchSessions(limit: Int = QueryDefaults.sessionLimit) async -> [HermesSession] { @@ -189,13 +214,19 @@ public actor HermesDataService { before: Int? = nil ) async -> [HermesMessage] { await ScarfMon.measureAsync(.sessionLoad, "mac.fetchMessages") { + // Use the lite column set — excludes reasoning_content which + // can be 20+ KB per message on thinking-model sessions and + // was the cause of repeated 30s SSH timeouts on 100+-message + // sessions over 420ms-RTT remote links. The inspector pane + // calls `fetchReasoningContent(for:)` to lazy-load when the + // user opens a message's disclosure. let sql: String let params: [SQLValue] if let before { - sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?" + sql = "SELECT \(messageColumnsLight) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?" params = [.text(sessionId), .integer(Int64(before)), .integer(Int64(limit))] } else { - sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?" + sql = "SELECT \(messageColumnsLight) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?" params = [.text(sessionId), .integer(Int64(limit))] } do { @@ -211,6 +242,23 @@ public actor HermesDataService { } } + /// Lazy-load the `reasoning_content` for a single message. Called + /// when the user expands the inspector disclosure on a thinking-model + /// reply that has reasoning available (i.e. the message has v0.11 + /// schema). Cheap on a single message — avoids the bulk-fetch + /// payload-size problem that motivated `messageColumnsLight`. + public func fetchReasoningContent(for messageId: Int) async -> String? { + guard hasV011Schema else { return nil } + let sql = "SELECT reasoning_content FROM messages WHERE id = ?" + do { + let rows = try await backend.query(sql, params: [.integer(Int64(messageId))]) + return rows.first?.optionalString(at: 0) + } catch { + Self.logger.warning("fetchReasoningContent failed: \(error.localizedDescription, privacy: .public)") + return nil + } + } + /// Legacy unbounded fetch retained for one release cycle so any /// out-of-tree consumers don't break. New code should use the /// bounded `fetchMessages(sessionId:limit:before:)` variant —