diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift
index 99c1941..e6a43cc 100644
--- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift
+++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Models/HermesConstants.swift
@@ -36,10 +36,17 @@ public enum HistoryPageSize: Sendable {
     /// inside a 30-second `RemoteSQLiteBackend.queryTimeout`.** A
     /// 157-message session at 200-row page size produced enough
     /// JSON (with `reasoning_content` for thinking models) to time
-    /// out at exactly 30 s on a 420 ms-RTT remote, returning empty.
-    /// 50 rows comfortably fits that envelope. The "Load earlier"
+    /// out at exactly 30 s on a 420 ms-RTT remote. Dropped to 50,
+    /// then to 25 in v2.7 after a 160-message session still timed
+    /// out at 50 — `reasoning_content` for thinking-model turns can
+    /// run 20+ KB per row, so 50 rows × 30 KB = 1.5 MB JSON which
+    /// over a slow SSH channel still trips the 30s budget. Pair
+    /// with `messageColumnsLight` (excludes `reasoning_content`)
+    /// so the on-wire payload is small even at this size; the
+    /// inspector pane lazy-loads via `fetchReasoningContent(for:)`
+    /// when the user expands a disclosure. The "Load earlier"
     /// affordance pages back through older messages on demand.
-    public nonisolated static let initial = 50
+    public nonisolated static let initial = 25
     /// Reconnection reconcile against the DB. 200 rows is plenty —
     /// disconnects don't generate hundreds of unseen messages.
     public nonisolated static let reconcile = 200
diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift
index 0ebc523..9e48128 100644
--- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift
+++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift
@@ -142,6 +142,31 @@ public actor HermesDataService {
         return cols
     }
 
+    /// Same as `messageColumns` but with the `reasoning_content`
+    /// column omitted. v0.11+ Hermes thinking-model output stores
+    /// the full chain-of-thought transcript in `reasoning_content`,
+    /// which on a single message can be 20+ KB of JSON. For a
+    /// 160-message session that's >1 MB of wire payload — enough
+    /// to time out a 30s SSH `sqlite3 -json` fetch on a 420ms-RTT
+    /// remote (perf capture confirmed). The bubble's main body
+    /// doesn't render reasoning_content directly; the inspector
+    /// pane does, and the user opens that on demand. So initial
+    /// fetch can skip it and a follow-up `fetchReasoningContent`
+    /// can pull it lazily when the inspector opens.
+    private var messageColumnsLight: String {
+        var cols = """
+            id, session_id, role, content, tool_call_id, tool_calls,
+            tool_name, timestamp, token_count, finish_reason
+            """
+        if hasV07Schema {
+            cols += ", reasoning"
+        }
+        // v0.11+ `reasoning_content` is intentionally excluded.
+        // `messageFromRow` defaults it to nil; callers that need it
+        // call `fetchReasoningContent(for:)` to lazy-load.
+        return cols
+    }
+
     // MARK: - Session Queries
 
     public func fetchSessions(limit: Int = QueryDefaults.sessionLimit) async -> [HermesSession] {
@@ -189,13 +214,19 @@ public actor HermesDataService {
         before: Int? = nil
     ) async -> [HermesMessage] {
         await ScarfMon.measureAsync(.sessionLoad, "mac.fetchMessages") {
+            // Use the lite column set — excludes reasoning_content which
+            // can be 20+ KB per message on thinking-model sessions and
+            // was the cause of repeated 30s SSH timeouts on 100+-message
+            // sessions over 420ms-RTT remote links. The inspector pane
+            // calls `fetchReasoningContent(for:)` to lazy-load when the
+            // user opens a message's disclosure.
             let sql: String
             let params: [SQLValue]
             if let before {
-                sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?"
+                sql = "SELECT \(messageColumnsLight) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?"
                 params = [.text(sessionId), .integer(Int64(before)), .integer(Int64(limit))]
             } else {
-                sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?"
+                sql = "SELECT \(messageColumnsLight) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?"
                 params = [.text(sessionId), .integer(Int64(limit))]
             }
             do {
@@ -211,6 +242,23 @@ public actor HermesDataService {
         }
     }
 
+    /// Lazy-load the `reasoning_content` for a single message. Called
+    /// when the user expands the inspector disclosure on a thinking-model
+    /// reply that has reasoning available (i.e. the message has v0.11
+    /// schema). Cheap on a single message — avoids the bulk-fetch
+    /// payload-size problem that motivated `messageColumnsLight`.
+    public func fetchReasoningContent(for messageId: Int) async -> String? {
+        guard hasV011Schema else { return nil }
+        let sql = "SELECT reasoning_content FROM messages WHERE id = ?"
+        do {
+            let rows = try await backend.query(sql, params: [.integer(Int64(messageId))])
+            return rows.first?.optionalString(at: 0)
+        } catch {
+            Self.logger.warning("fetchReasoningContent failed: \(error.localizedDescription, privacy: .public)")
+            return nil
+        }
+    }
+
     /// Legacy unbounded fetch retained for one release cycle so any
     /// out-of-tree consumers don't break. New code should use the
     /// bounded `fetchMessages(sessionId:limit:before:)` variant —