From 593b4e62cb96292a3821a327fa78242854e46e91 Mon Sep 17 00:00:00 2001 From: Alan Wizemann Date: Mon, 4 May 2026 13:09:06 +0200 Subject: [PATCH] feat(remote): replace SQLite snapshot pipeline with SSH query streaming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remote-DB pipeline pulled the entire state.db down via scp on every refresh tick. For the issue #74 user (4.87 GB DB) that meant ~7-min per-snapshot wall time even with the size-aware-timeout fix, ~30 GB/hour upload, and data permanently 5–10 minutes stale. This isn't a bug to patch — it's the wrong architecture for any non-trivial remote DB. Replace it with per-query streaming over SSH. Each SQL statement becomes one ssh round-trip running `sqlite3 -readonly -json` against the live remote DB. ControlMaster keeps the channel warm at ~5 ms overhead; sqlite3 cold-start adds ~30–50 ms; total ~50–100 ms per query vs. the old multi-minute snapshot. Bandwidth scales with query result size, not DB size. What changed: * New `HermesQueryBackend` protocol and two implementations: `LocalSQLiteBackend` (libsqlite3 in-process — local performance unchanged) and `RemoteSQLiteBackend` (sqlite3 over SSH per query with batched-statement support for multi-query view loads). * `SQLValue` and `Row` types as the typed boundary between backends and the row parsers. `SQLValueInliner` substitutes `?` placeholders with SQLite-escaped literals for the remote-CLI codepath (local backend keeps real `sqlite3_bind_*`). * `ServerTransport` swaps `snapshotSQLite` + `cachedSnapshotPath` for `streamScript(_:timeout:)`. SSHTransport delegates to the existing `SSHScriptRunner`; CitadelServerTransport (iOS) base64-encodes the script + decodes remotely via Citadel's exec channel since stdin pipes aren't supported there yet. * `HermesDataService` becomes a thin facade — every fetch* method routes through `backend.query(...)`. Public API is unchanged for view-model callers; `lastSnapshotMtime`/`isUsingStaleSnapshot`/ `staleAge` removed (had zero UI consumers). * New `dashboardSnapshot()` and `insightsSnapshot(since:)` batched calls turn Dashboard's 4-query and Insights' 5-query view loads into one SSH round-trip each (~80–100 ms total instead of ~280 ms naive). DashboardViewModel and InsightsViewModel updated to use them. * One-time launch migration in `scarfApp` wipes the orphaned `~/Library/Caches/scarf/snapshots/` directory (could be 5 GB+ for the issue #74 user). JSON parsing detail: sqlite3 -json preserves SELECT column order in the raw bytes, but `[String: Any]` from NSJSONSerialization doesn't. The remote backend extracts column ordering by walking the first object's literal bytes — without this, every positional row read (`row.string(at: 0)`) would silently return wrong columns. Tests: 41 new across `SQLValueInlinerTests`, `HermesDataServiceBackendTests` (mock backend) and `RemoteSQLiteBackendTests` (integration via local sqlite3 binary). Full suite 262/262 passing. Builds clean on Mac and iOS. Ships as part of v2.7. Refs #74 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Backends/HermesQueryBackend.swift | 113 ++ .../Backends/LocalSQLiteBackend.swift | 254 +++++ .../Backends/RemoteSQLiteBackend.swift | 524 +++++++++ .../Services/Backends/SQLValue.swift | 136 +++ .../Services/Backends/SQLValueInliner.swift | 107 ++ .../Services/HermesDataService.swift | 995 ++++++++---------- .../ScarfCore/Transport/LocalTransport.swift | 35 +- .../ScarfCore/Transport/SSHTransport.swift | 154 +-- .../ScarfCore/Transport/ServerTransport.swift | 38 +- .../ViewModels/InsightsViewModel.swift | 15 +- .../Helpers/MockHermesQueryBackend.swift | 150 +++ .../HermesDataServiceBackendTests.swift | 338 ++++++ .../ScarfCoreTests/M0bTransportTests.swift | 6 - .../ScarfCoreTests/M5FeatureVMTests.swift | 5 +- .../RemoteSQLiteBackendTests.swift | 496 +++++++++ .../ScarfCoreTests/SQLValueInlinerTests.swift | 147 +++ .../ScarfIOS/CitadelServerTransport.swift | 165 ++- .../ViewModels/DashboardViewModel.swift | 21 +- scarf/scarf/scarfApp.swift | 12 + 19 files changed, 2878 insertions(+), 833 deletions(-) create mode 100644 scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/HermesQueryBackend.swift create mode 100644 scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/LocalSQLiteBackend.swift create mode 100644 scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/RemoteSQLiteBackend.swift create mode 100644 scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/SQLValue.swift create mode 100644 scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/SQLValueInliner.swift create mode 100644 scarf/Packages/ScarfCore/Tests/ScarfCoreTests/Helpers/MockHermesQueryBackend.swift create mode 100644 scarf/Packages/ScarfCore/Tests/ScarfCoreTests/HermesDataServiceBackendTests.swift create mode 100644 scarf/Packages/ScarfCore/Tests/ScarfCoreTests/RemoteSQLiteBackendTests.swift create mode 100644 scarf/Packages/ScarfCore/Tests/ScarfCoreTests/SQLValueInlinerTests.swift diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/HermesQueryBackend.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/HermesQueryBackend.swift new file mode 100644 index 0000000..5ef1d7c --- /dev/null +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/HermesQueryBackend.swift @@ -0,0 +1,113 @@ +import Foundation + +/// Pluggable query engine for `HermesDataService`. Two implementations +/// today: +/// +/// * `LocalSQLiteBackend` — opens the local `~/.hermes/state.db` via +/// libsqlite3 and runs queries in-process. Microseconds per query. +/// * `RemoteSQLiteBackend` — invokes `sqlite3 -readonly -json` over an +/// SSH session (ControlMaster keeps the channel warm), parses the +/// JSON response into `Row`s. ~50–100 ms per query. +/// +/// The data service picks one based on `ServerContext.isRemote`. View +/// models are oblivious — they keep calling `await dataService.fetch…` +/// like before. +/// +/// **Why a protocol, not a class hierarchy.** Backends have very +/// different internals (libsqlite3 handles vs. SSH script piping) but +/// the call-site shape is identical. A protocol lets us hand the data +/// service either backend through one stored property without +/// abstract-class ceremony, and keeps the test mock (see +/// `MockHermesQueryBackend` in tests) free of inheritance baggage. +/// +/// **Sendable.** Concrete impls are actors, so they're trivially +/// `Sendable`. The protocol conforms to `Sendable` to satisfy Swift 6 +/// strict-concurrency for the data-service stored property. +public protocol HermesQueryBackend: Sendable { + + /// True iff the connected DB has the v0.7 columns (`reasoning_tokens`, + /// `actual_cost_usd`, `cost_status`, `billing_provider` on + /// `sessions` plus `reasoning` on `messages`). Detected once at + /// `open()` time. + var hasV07Schema: Bool { get async } + + /// True iff the connected DB has the v0.11 columns + /// (`api_call_count` on `sessions`, `reasoning_content` on + /// `messages`). Belt-and-braces: BOTH must be present (a + /// partially-migrated DB stays on the v0.7 path to avoid "no such + /// column" failures). + var hasV011Schema: Bool { get async } + + /// User-presentable error from the most recent `open()` (or the + /// most recent failed query for the remote backend's + /// connectivity-loss codepath). `nil` means everything is healthy. + var lastOpenError: String? { get async } + + /// One-time setup. Local: `sqlite3_open_v2` + `PRAGMA table_info` + /// schema detection. Remote: one SSH round-trip running + /// `sqlite3 --version` plus the two PRAGMA queries. + /// + /// Returns `false` on any failure; detail is in `lastOpenError`. + /// Calling `open()` on an already-open backend is a no-op that + /// returns `true`. + func open() async -> Bool + + /// Local backend: `close()` then `open(forceFresh:)` — re-pulls + /// the SQLite handle so a Hermes-side migration becomes visible. + /// Remote backend: a no-op when `forceFresh: false` (every query + /// is already fresh — there's nothing to refresh). `forceFresh: + /// true` re-runs the schema preflight, covering the rare "user + /// upgraded Hermes on the remote, my schema flags are stale" case. + @discardableResult + func refresh(forceFresh: Bool) async -> Bool + + /// Drop any persistent resources. Idempotent. + func close() async + + /// Run a single SQL statement and collect every row before + /// returning. SQL uses `?` placeholders; `params` is bound + /// positionally (one entry per `?`). + /// + /// Local backend: `sqlite3_prepare_v2` + `sqlite3_bind_*` + + /// `sqlite3_step` loop, materialising each row into a `Row`. + /// Remote backend: inlines params via `SQLValueInliner` to produce + /// a final SQL string, runs `sqlite3 -readonly -json` over SSH, + /// parses the resulting JSON array. + /// + /// Throws `BackendError` on any failure. The data-service façade + /// generally catches and returns empty results to preserve the + /// existing "show empty UI on error" behaviour. + func query(_ sql: String, params: [SQLValue]) async throws -> [Row] + + /// Run several statements in one round-trip, returning each + /// statement's row set in order. Lets multi-query view loads + /// (Dashboard's 4-query pattern, Insights' 5-query pattern) + /// amortise the SSH/sqlite3 cold-start cost. + /// + /// Each `(sql, params)` pair has the same shape as `query` — + /// `?` placeholders bound positionally per pair. + func queryBatch(_ statements: [(sql: String, params: [SQLValue])]) async throws -> [[Row]] +} + +/// Errors that backends raise. Mapped into user-facing messages by the +/// `humanize` helper that lives alongside `HermesDataService`. +public enum BackendError: Error, Sendable, Equatable { + /// Backend is not open — caller should `open()` first. + case notOpen + + /// Connectivity failure (SSH down, ControlMaster dead, transport + /// can't reach the host). Carries a short human-readable reason. + /// Triggers the data-service's `lastOpenError` populate path. + case transport(String) + + /// sqlite3 itself reported an error — non-zero exit, parse failure, + /// schema mismatch. `exitCode` is the sqlite3 process exit (or + /// libsqlite3 result code on the local backend); `stderr` is the + /// sqlite3-emitted message (already user-readable in most cases). + case sqlite(exitCode: Int32, stderr: String) + + /// JSON-parsing failed on remote-backend output. Indicates either a + /// sqlite3 binary that didn't honour `-json`, or output corruption + /// (rare). Carries the first 200 bytes of stdout for diagnostics. + case parseFailure(stdoutHead: String) +} diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/LocalSQLiteBackend.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/LocalSQLiteBackend.swift new file mode 100644 index 0000000..977a00b --- /dev/null +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/LocalSQLiteBackend.swift @@ -0,0 +1,254 @@ +// MARK: - Platform gate +// +// libsqlite3 is a system module on macOS/iOS but not on swift-corelibs +// foundation. Gate the entire backend so ScarfCore still compiles for +// any future Linux target. Apple platforms — the runtime targets — get +// the full implementation. +#if canImport(SQLite3) + +import Foundation +import SQLite3 +#if canImport(os) +import os +#endif + +/// `HermesQueryBackend` that opens a local SQLite file via libsqlite3 +/// and runs queries in-process. Microseconds per query. +/// +/// Used for `ServerContext.local` (the user's own `~/.hermes/state.db`) +/// — the previous behaviour of `HermesDataService` lifted out unchanged. +/// For `.ssh` contexts the data service constructs `RemoteSQLiteBackend` +/// instead. +/// +/// Actor isolation matches the parent `HermesDataService` actor: queries +/// serialise on this backend's executor, and the data service hops once +/// (`await backend.query…`) per public method call. +public actor LocalSQLiteBackend: HermesQueryBackend { + + #if canImport(os) + private static let logger = Logger(subsystem: "com.scarf", category: "LocalSQLiteBackend") + #endif + + private var db: OpaquePointer? + private var openedAtPath: String? + private(set) public var hasV07Schema = false + private(set) public var hasV011Schema = false + private(set) public var lastOpenError: String? + + private let context: ServerContext + + public init(context: ServerContext) { + self.context = context + } + + // MARK: - Lifecycle + + public func open() async -> Bool { + if db != nil { return true } + let path = context.paths.stateDB + guard FileManager.default.fileExists(atPath: path) else { + lastOpenError = "Hermes state database not found at \(path)." + return false + } + let flags: Int32 = SQLITE_OPEN_READONLY | SQLITE_OPEN_NOMUTEX + let rc = sqlite3_open_v2(path, &db, flags, nil) + guard rc == SQLITE_OK else { + let msg: String + if let db { + msg = String(cString: sqlite3_errmsg(db)) + } else { + msg = "sqlite3_open_v2 returned \(rc)" + } + lastOpenError = "Couldn't open state.db: \(msg)" + #if canImport(os) + Self.logger.warning("sqlite3_open_v2 failed (\(rc)) at \(path, privacy: .public): \(msg, privacy: .public)") + #endif + db = nil + return false + } + openedAtPath = path + lastOpenError = nil + detectSchema() + return true + } + + @discardableResult + public func refresh(forceFresh: Bool) async -> Bool { + // Local always close-and-reopen — the file may have been swapped + // by Hermes (rare) or we want to pick up a schema migration. + // `forceFresh` is irrelevant locally; included for protocol + // parity with the remote backend. + await close() + return await open() + } + + public func close() async { + if let db { + sqlite3_close(db) + } + db = nil + openedAtPath = nil + } + + // MARK: - Schema detection + + private func detectSchema() { + guard let db else { return } + + // sessions schema + var stmt: OpaquePointer? + if sqlite3_prepare_v2(db, "PRAGMA table_info(sessions)", -1, &stmt, nil) == SQLITE_OK { + defer { sqlite3_finalize(stmt) } + while sqlite3_step(stmt) == SQLITE_ROW { + if let name = sqlite3_column_text(stmt, 1) { + let column = String(cString: name) + if column == "reasoning_tokens" { + hasV07Schema = true + } + if column == "api_call_count" { + hasV011Schema = true + } + } + } + } + + // messages schema — confirm `reasoning_content` is present too. + // Belt-and-braces: a partially-migrated DB (sessions migrated, + // messages not) shouldn't blow up reads with "no such column". + if hasV011Schema { + var msgStmt: OpaquePointer? + var sawReasoningContent = false + if sqlite3_prepare_v2(db, "PRAGMA table_info(messages)", -1, &msgStmt, nil) == SQLITE_OK { + defer { sqlite3_finalize(msgStmt) } + while sqlite3_step(msgStmt) == SQLITE_ROW { + if let name = sqlite3_column_text(msgStmt, 1), + String(cString: name) == "reasoning_content" { + sawReasoningContent = true + break + } + } + } + if !sawReasoningContent { + hasV011Schema = false + } + } + } + + // MARK: - Queries + + public func query(_ sql: String, params: [SQLValue]) async throws -> [Row] { + guard let db else { throw BackendError.notOpen } + return try executeOne(db: db, sql: sql, params: params) + } + + public func queryBatch(_ statements: [(sql: String, params: [SQLValue])]) async throws -> [[Row]] { + guard let db else { throw BackendError.notOpen } + // Local backend has no SSH/process round-trip cost — running + // sequentially against the open handle is exactly equivalent + // to running each via `query`. The protocol method exists for + // remote-backend amortisation; locally we just satisfy the + // signature. + var out: [[Row]] = [] + out.reserveCapacity(statements.count) + for (sql, params) in statements { + out.append(try executeOne(db: db, sql: sql, params: params)) + } + return out + } + + // MARK: - Internals + + private func executeOne(db: OpaquePointer, sql: String, params: [SQLValue]) throws -> [Row] { + var stmt: OpaquePointer? + let prepRC = sqlite3_prepare_v2(db, sql, -1, &stmt, nil) + guard prepRC == SQLITE_OK, let stmt else { + let msg = String(cString: sqlite3_errmsg(db)) + throw BackendError.sqlite(exitCode: prepRC, stderr: msg) + } + defer { sqlite3_finalize(stmt) } + + for (i, value) in params.enumerated() { + let col = Int32(i + 1) + let rc: Int32 + switch value { + case .null: + rc = sqlite3_bind_null(stmt, col) + case .integer(let n): + rc = sqlite3_bind_int64(stmt, col, n) + case .real(let d): + rc = sqlite3_bind_double(stmt, col, d) + case .text(let s): + rc = sqlite3_bind_text(stmt, col, s, -1, sqliteTransient) + case .blob(let d): + rc = d.withUnsafeBytes { buf -> Int32 in + guard let base = buf.baseAddress else { + return sqlite3_bind_zeroblob(stmt, col, 0) + } + return sqlite3_bind_blob(stmt, col, base, Int32(buf.count), sqliteTransient) + } + } + if rc != SQLITE_OK { + let msg = String(cString: sqlite3_errmsg(db)) + throw BackendError.sqlite(exitCode: rc, stderr: msg) + } + } + + // Build column-name → index map once per result set, lazily on + // first row (sqlite3_column_name needs the prepared stmt; cheap + // either way). For a 0-row result set we still build it so + // callers that read column names from the first hypothetical + // row don't error — though `Row.columnIndex` on an empty + // `[Row]` is moot. + let columnCount = Int(sqlite3_column_count(stmt)) + var columnIndex: [String: Int] = [:] + columnIndex.reserveCapacity(columnCount) + for i in 0.. 0, let p = sqlite3_column_blob(stmt, col) { + values.append(.blob(Data(bytes: p, count: n))) + } else { + values.append(.blob(Data())) + } + default: + values.append(.null) + } + } + rows.append(Row(values: values, columnIndex: columnIndex)) + } + return rows + } +} + +#endif // canImport(SQLite3) diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/RemoteSQLiteBackend.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/RemoteSQLiteBackend.swift new file mode 100644 index 0000000..07b826b --- /dev/null +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/RemoteSQLiteBackend.swift @@ -0,0 +1,524 @@ +#if canImport(SQLite3) + +import Foundation +#if canImport(os) +import os +#endif + +/// `HermesQueryBackend` that runs `sqlite3 -readonly -json` over an +/// SSH session per query. Replaces the old snapshot-then-open pipeline +/// (issue #74): no full-DB transfers, no local cache, every query +/// against the live remote DB. +/// +/// **Why one round-trip per query is OK.** ControlMaster keeps the SSH +/// session warm — first connect spins up the master socket; subsequent +/// queries reuse it at ~5 ms overhead. sqlite3 cold-start is ~30–50 ms, +/// query execution is sub-millisecond for indexed queries, JSON +/// serialisation is small. End-to-end ~50–100 ms per query, dominated +/// by sqlite3 process spawn. Multi-query view loads (Dashboard, +/// Insights) batch via `queryBatch` — one cold-start, all statements +/// in a single sqlite3 invocation, ~80–100 ms total. +/// +/// **Result format**. `sqlite3 -json` emits one JSON array per +/// statement that returns rows: `[{"col":val,...}, ...]`. Multi-statement +/// scripts emit each array on its own. We separate batched queries +/// with a `SELECT '__SCARF_RS_BEGIN__N' AS marker;` synthesised line so +/// the parser can split on the markers — sqlite3's marker rows +/// preserve order and let us pair each result-set with the originating +/// statement index. +public actor RemoteSQLiteBackend: HermesQueryBackend { + + #if canImport(os) + private static let logger = Logger(subsystem: "com.scarf", category: "RemoteSQLiteBackend") + #endif + + private let context: ServerContext + private let transport: any ServerTransport + private(set) public var hasV07Schema = false + private(set) public var hasV011Schema = false + private(set) public var lastOpenError: String? + private var isOpen = false + /// Captured `sqlite3 --version` line from the most recent preflight. + /// Stashed for diagnostic logs and a future "remote sqlite3 too old" + /// error path. + private var sqliteVersion: String? + + /// Per-query timeout for `query`. A healthy query is <100 ms; + /// 15 s is 100× headroom and short enough that a wedged remote + /// doesn't hang the UI for minutes. + private let queryTimeout: TimeInterval = 15 + + /// Preflight timeout. First SSH round-trip may include cold + /// ControlMaster establishment (~1–3 s) plus the schema PRAGMA + /// queries; 30 s is generous. + private let preflightTimeout: TimeInterval = 30 + + /// Marker prefix used to split `queryBatch` result sets. Picked to + /// be very unlikely to collide with a real session_id, role string, + /// or content fragment. + private static let batchMarkerPrefix = "__SCARF_RS_BEGIN__" + + public init(context: ServerContext, transport: any ServerTransport) { + self.context = context + self.transport = transport + } + + // MARK: - Lifecycle + + public func open() async -> Bool { + if isOpen { return true } + let dbPath = context.paths.stateDB + // One SSH round-trip running: + // 1. sqlite3 --version (sanity + capture for diagnostics) + // 2. PRAGMA table_info(sessions) | sessions schema + // 3. PRAGMA table_info(messages) | messages schema + // sqlite3 -json emits two arrays back-to-back for the two PRAGMA + // statements; we parse them as separate result sets. + let preflight = """ + set -e + sqlite3 --version + sqlite3 -readonly -json '\(escape(dbPath))' "PRAGMA table_info(sessions); PRAGMA table_info(messages);" + """ + + do { + let result = try await transport.streamScript(preflight, timeout: preflightTimeout) + if result.exitCode != 0 { + lastOpenError = errorMessage(stderr: result.stderrString, stdout: result.stdoutString, exitCode: result.exitCode) + #if canImport(os) + Self.logger.warning("Remote preflight failed (exit \(result.exitCode)): \(self.lastOpenError ?? "", privacy: .public)") + #endif + return false + } + try parsePreflightOutput(result.stdoutString) + lastOpenError = nil + isOpen = true + #if canImport(os) + Self.logger.info("Remote SQLite backend ready: sqlite3=\(self.sqliteVersion ?? "?", privacy: .public), v0.7=\(self.hasV07Schema), v0.11=\(self.hasV011Schema)") + #endif + return true + } catch { + lastOpenError = error.localizedDescription + #if canImport(os) + Self.logger.warning("Remote preflight transport error: \(error.localizedDescription, privacy: .public)") + #endif + return false + } + } + + @discardableResult + public func refresh(forceFresh: Bool) async -> Bool { + // Streaming queries are always fresh. The watcher tick still + // fires `dataService.refresh()` on every observed file change + // — locally that re-opens the SQLite handle; here it's a + // no-op. `forceFresh: true` is the escape hatch for when the + // user explicitly wants a re-preflight (e.g. they upgraded + // Hermes on the remote). Drop the open state and re-run. + if forceFresh { + isOpen = false + return await open() + } + return isOpen ? true : await open() + } + + public func close() async { + isOpen = false + } + + // MARK: - Queries + + public func query(_ sql: String, params: [SQLValue]) async throws -> [Row] { + guard isOpen else { throw BackendError.notOpen } + let inlined = SQLValueInliner.inline(sql, params: params) + let dbPath = context.paths.stateDB + let script = """ + sqlite3 -readonly -json '\(escape(dbPath))' <<'__SCARF_SQL__' + \(inlined) + __SCARF_SQL__ + """ + let result: ProcessResult + do { + result = try await transport.streamScript(script, timeout: queryTimeout) + } catch { + throw BackendError.transport(error.localizedDescription) + } + if result.exitCode != 0 { + throw BackendError.sqlite(exitCode: result.exitCode, stderr: result.stderrString) + } + return try parseSingleResultSet(result.stdoutString) + } + + public func queryBatch(_ statements: [(sql: String, params: [SQLValue])]) async throws -> [[Row]] { + guard isOpen else { throw BackendError.notOpen } + if statements.isEmpty { return [] } + // Build one sqlite3 invocation with marker SELECTs separating + // each statement's result set. `SELECT '__SCARF_RS_BEGIN__N'` + // emits a one-row JSON array we use as a sentinel. + var sqlBlocks: [String] = [] + for (i, stmt) in statements.enumerated() { + let inlined = SQLValueInliner.inline(stmt.sql, params: stmt.params) + // Marker first (so we know which result-set follows even + // if a query returns zero rows — sqlite3 -json prints + // nothing for empty result sets, which would otherwise + // make the parser drift). + sqlBlocks.append("SELECT '\(Self.batchMarkerPrefix)\(i)' AS marker;") + sqlBlocks.append(ensureTrailingSemicolon(inlined)) + } + let combined = sqlBlocks.joined(separator: "\n") + let dbPath = context.paths.stateDB + let script = """ + sqlite3 -readonly -json '\(escape(dbPath))' <<'__SCARF_SQL__' + \(combined) + __SCARF_SQL__ + """ + let result: ProcessResult + do { + // Batched timeout: scale with statement count, capped at + // a comfortable 30 s. Most batches are 4–5 statements. + let timeout = min(30, queryTimeout + Double(statements.count) * 2) + result = try await transport.streamScript(script, timeout: timeout) + } catch { + throw BackendError.transport(error.localizedDescription) + } + if result.exitCode != 0 { + throw BackendError.sqlite(exitCode: result.exitCode, stderr: result.stderrString) + } + return try parseBatchResultSets(result.stdoutString, expectedCount: statements.count) + } + + // MARK: - Preflight parsing + + private func parsePreflightOutput(_ stdout: String) throws { + // Expected output: + // + // [] + // [] + let lines = stdout.split(separator: "\n", omittingEmptySubsequences: false) + guard let firstLine = lines.first, !firstLine.isEmpty else { + throw BackendError.parseFailure(stdoutHead: String(stdout.prefix(200))) + } + sqliteVersion = String(firstLine).trimmingCharacters(in: .whitespacesAndNewlines) + + // The remaining lines should contain two JSON arrays. sqlite3 + // -json emits each on its own — though it can wrap long arrays + // across multiple lines. We split on `][` boundaries to be + // robust. Walk the stream looking for two top-level arrays. + let rest = lines.dropFirst().joined(separator: "\n") + let arrays = splitTopLevelJSONArrays(rest) + guard arrays.count >= 2 else { + throw BackendError.parseFailure(stdoutHead: String(stdout.prefix(200))) + } + let sessionsTable = try parseTableInfo(arrays[0]) + let messagesTable = try parseTableInfo(arrays[1]) + + // v0.7: sessions has `reasoning_tokens`. + hasV07Schema = sessionsTable.contains("reasoning_tokens") + // v0.11: BOTH sessions has `api_call_count` AND messages has + // `reasoning_content`. Belt-and-braces against partial migrations. + let sessionsHasV011 = sessionsTable.contains("api_call_count") + let messagesHasV011 = messagesTable.contains("reasoning_content") + hasV011Schema = sessionsHasV011 && messagesHasV011 + } + + /// Extract column names from a `PRAGMA table_info(...)` result set. + private func parseTableInfo(_ json: String) throws -> Set { + guard let data = json.data(using: .utf8), + let arr = try? JSONSerialization.jsonObject(with: data) as? [[String: Any]] else { + throw BackendError.parseFailure(stdoutHead: String(json.prefix(200))) + } + var names: Set = [] + for row in arr { + if let name = row["name"] as? String { + names.insert(name) + } + } + return names + } + + // MARK: - Result-set parsing + + private func parseSingleResultSet(_ stdout: String) throws -> [Row] { + // sqlite3 -json prints nothing for empty result sets, so an + // empty stdout is valid and means "0 rows". + let trimmed = stdout.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.isEmpty { return [] } + return try rowsFromJSONArray(trimmed) + } + + private func parseBatchResultSets(_ stdout: String, expectedCount: Int) throws -> [[Row]] { + // Scan the output as a sequence of JSON arrays. Each marker + // SELECT emits a one-row array `[{"marker":"__SCARF_RS_BEGIN__N"}]`; + // the following array (if present) is statement N's result set. + let arrays = splitTopLevelJSONArrays(stdout) + var result: [[Row]] = Array(repeating: [], count: expectedCount) + var i = 0 + while i < arrays.count { + let chunk = arrays[i] + // Try to read this chunk as a marker. A marker row is one + // object with exactly the `marker` field. Anything else + // is a real result set (which we attribute to the most + // recent marker we saw). + if let idx = markerIndex(in: chunk) { + // Next array (if any) is this statement's result set. + // If the next array is ALSO a marker, the current + // statement returned zero rows. + let next = i + 1 + if next < arrays.count, markerIndex(in: arrays[next]) == nil { + result[idx] = try rowsFromJSONArray(arrays[next]) + i = next + 1 + } else { + // Empty result set for this statement. + i = next + } + } else { + // Stray array (no preceding marker). Skip — shouldn't + // happen in practice given how we build the script. + i += 1 + } + } + return result + } + + /// If the array's single row is a marker `{"marker":"__SCARF_RS_BEGIN__N"}`, + /// return N. Otherwise nil. + private func markerIndex(in json: String) -> Int? { + guard let data = json.data(using: .utf8), + let arr = try? JSONSerialization.jsonObject(with: data) as? [[String: Any]], + arr.count == 1, + let marker = arr[0]["marker"] as? String, + marker.hasPrefix(Self.batchMarkerPrefix) else { return nil } + let suffix = marker.dropFirst(Self.batchMarkerPrefix.count) + return Int(suffix) + } + + private func rowsFromJSONArray(_ json: String) throws -> [Row] { + guard let data = json.data(using: .utf8), + let arr = try? JSONSerialization.jsonObject(with: data) as? [[String: Any]] else { + throw BackendError.parseFailure(stdoutHead: String(json.prefix(200))) + } + if arr.isEmpty { return [] } + // `[String: Any]` does NOT preserve insertion order on macOS + // (NSDictionary backing). To keep the SELECT column order + // intact — which the data-service row parsers depend on + // (`row.string(at: 0)` for `id`, etc.) — we extract the key + // order from the FIRST object's raw JSON bytes. Subsequent + // rows reuse that key list to look up values by name from + // their parsed dictionaries. + let firstObjectRaw = extractFirstJSONObject(from: json) + let orderedKeys = firstObjectRaw.flatMap(extractKeysInOrder) ?? Array(arr[0].keys) + var columnIndex: [String: Int] = [:] + columnIndex.reserveCapacity(orderedKeys.count) + for (i, k) in orderedKeys.enumerated() { columnIndex[k] = i } + + var rows: [Row] = [] + rows.reserveCapacity(arr.count) + for obj in arr { + var values: [SQLValue] = [] + values.reserveCapacity(orderedKeys.count) + for key in orderedKeys { + values.append(decode(obj[key])) + } + rows.append(Row(values: values, columnIndex: columnIndex)) + } + return rows + } + + /// Extract the substring of the first `{...}` object in a JSON + /// array string. Used so we can scan its keys in original order + /// before NSJSONSerialization's hash-table conversion strips the + /// ordering. Tolerates nested objects/arrays via depth tracking. + private func extractFirstJSONObject(from json: String) -> String? { + guard let openIdx = json.firstIndex(of: "{") else { return nil } + var depth = 0 + var inString = false + var escape = false + var i = openIdx + while i < json.endIndex { + let c = json[i] + if inString { + if escape { escape = false } + else if c == "\\" { escape = true } + else if c == "\"" { inString = false } + i = json.index(after: i) + continue + } + switch c { + case "\"": + inString = true + case "{": + depth += 1 + case "}": + depth -= 1 + if depth == 0 { + let end = json.index(after: i) + return String(json[openIdx.. [String] { + var keys: [String] = [] + var i = objectJSON.startIndex + // Skip past the leading `{`. + while i < objectJSON.endIndex, objectJSON[i] != "{" { + i = objectJSON.index(after: i) + } + if i < objectJSON.endIndex { i = objectJSON.index(after: i) } + var depth = 0 + var inString = false + var escape = false + var keyStart: String.Index? + // We're at the start of object body. Looking for `"key":` patterns + // at depth 0. Toggle `expectingKey` after each `:`/`,`. + var expectingKey = true + while i < objectJSON.endIndex { + let c = objectJSON[i] + if inString { + if escape { + escape = false + } else if c == "\\" { + escape = true + } else if c == "\"" { + inString = false + if expectingKey && depth == 0, let start = keyStart { + keys.append(String(objectJSON[start.. SQLValue { + guard let v else { return .null } + if v is NSNull { return .null } + if let n = v as? NSNumber { + // NSJSONSerialization decodes both ints and doubles into + // NSNumber. Distinguish: if it round-trips through Int64 + // unchanged, treat as integer; else real. + // A leading-zero-after-dot Double like 1.0 still has + // .doubleValue == 1.0 and Int64(1.0) == 1, so the round- + // trip check correctly bins integral doubles as integer + // (which sqlite3 -json does too — `1` in JSON, not `1.0`). + let asInt64 = n.int64Value + if Double(asInt64) == n.doubleValue { + return .integer(asInt64) + } + return .real(n.doubleValue) + } + if let s = v as? String { + return .text(s) + } + // Fall-through: stringify whatever it is so we don't lose data + // silently. SQLite -json doesn't emit booleans or nested + // objects from PRAGMA / SELECT outputs in our usage. + return .text(String(describing: v)) + } + + // MARK: - JSON helpers + + /// Walk a string of one or more concatenated JSON arrays at the top + /// level (sqlite3 -json's batched output) and return each array as + /// a separate substring. Tolerates whitespace/newlines between + /// arrays. + private func splitTopLevelJSONArrays(_ s: String) -> [String] { + var out: [String] = [] + var depth = 0 + var inString = false + var escape = false + var start: String.Index? + var i = s.startIndex + while i < s.endIndex { + let c = s[i] + if inString { + if escape { + escape = false + } else if c == "\\" { + escape = true + } else if c == "\"" { + inString = false + } + i = s.index(after: i) + continue + } + switch c { + case "\"": + inString = true + case "[": + if depth == 0 { start = i } + depth += 1 + case "]": + depth -= 1 + if depth == 0, let begin = start { + let end = s.index(after: i) + out.append(String(s[begin.. String { + let trimmed = sql.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.hasSuffix(";") { return trimmed } + return trimmed + ";" + } + + // MARK: - Quoting + error mapping + + /// Defensive escape for paths embedded in single-quoted shell + /// strings. Real Hermes paths never contain `'`, but doubling the + /// escape doesn't cost anything and keeps us safe against future + /// surprise. + private func escape(_ path: String) -> String { + path.replacingOccurrences(of: "'", with: "'\\''") + } + + /// Translate a non-zero sqlite3 exit into a user-presentable + /// message. Mirrors substrings that `HermesDataService.humanize` + /// keys off so the existing dashboard banner renders correctly. + private func errorMessage(stderr: String, stdout: String, exitCode: Int32) -> String { + let combined = (stderr.isEmpty ? stdout : stderr).trimmingCharacters(in: .whitespacesAndNewlines) + if combined.isEmpty { + return "sqlite3 exited \(exitCode) with no output" + } + return combined + } +} + +#endif // canImport(SQLite3) diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/SQLValue.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/SQLValue.swift new file mode 100644 index 0000000..d82cd8c --- /dev/null +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/SQLValue.swift @@ -0,0 +1,136 @@ +import Foundation + +/// Typed SQLite column value. Mirrors SQLite's storage classes +/// (`SQLITE_NULL`, `SQLITE_INTEGER`, `SQLITE_FLOAT`, `SQLITE_TEXT`, +/// `SQLITE_BLOB`) so both backends — libsqlite3 (`LocalSQLiteBackend`) +/// and remote `sqlite3 -json` parsing (`RemoteSQLiteBackend`) — can +/// produce and consume the same `Row` shape. +/// +/// Used in two places: +/// +/// 1. **Bound parameters**: callers hand `[SQLValue]` to +/// `HermesQueryBackend.query(_:params:)`. The local backend feeds +/// them into `sqlite3_bind_*`; the remote backend inlines them as +/// SQLite literals via `SQLValueInliner.inline(_:into:)`. +/// 2. **Result columns**: each `Row.values` entry is one of these. +/// Parsers (`sessionFromRow`, `messageFromRow` in HermesDataService) +/// read positional accessors like `row.string(at: 3)` to get the +/// typed value. +public enum SQLValue: Sendable, Equatable { + case null + case integer(Int64) + case real(Double) + case text(String) + case blob(Data) +} + +/// One result row from a query. Indexable both by position (matching the +/// libsqlite3 `sqlite3_column_*` ergonomics that `HermesDataService`'s +/// existing parsers expect) and by name (more readable for new code). +/// +/// `columnIndex` is built once per result-set, not per row, so the +/// per-row overhead is just the `[SQLValue]` allocation. +public struct Row: Sendable { + /// Ordered column values, indexable by their position in the + /// underlying SELECT. + public let values: [SQLValue] + + /// Column-name → position map. Built once per result-set by the + /// backend, then shared (by reference) across every row in the + /// set. Lookups are case-sensitive — match SQLite's default. + public let columnIndex: [String: Int] + + public init(values: [SQLValue], columnIndex: [String: Int]) { + self.values = values + self.columnIndex = columnIndex + } + + public subscript(_ position: Int) -> SQLValue { + guard position >= 0, position < values.count else { return .null } + return values[position] + } + + public subscript(_ name: String) -> SQLValue { + guard let i = columnIndex[name] else { return .null } + return values[i] + } + + // MARK: - Typed positional accessors + // + // These mirror the `columnText(stmt, i)` / `columnDate(stmt, i)` + // helpers that lived in HermesDataService so the row-parser + // migrations from `OpaquePointer` to `Row` are line-for-line. + + public func string(at i: Int) -> String { + if case .text(let s) = self[i] { return s } + return "" + } + + public func optionalString(at i: Int) -> String? { + switch self[i] { + case .text(let s): return s + case .null: return nil + default: return nil + } + } + + public func int(at i: Int) -> Int { + switch self[i] { + case .integer(let n): return Int(n) + case .real(let d): return Int(d) + case .text(let s): return Int(s) ?? 0 + default: return 0 + } + } + + public func optionalInt(at i: Int) -> Int? { + switch self[i] { + case .integer(let n): return Int(n) + case .real(let d): return Int(d) + case .text(let s): return Int(s) + case .null: return nil + default: return nil + } + } + + public func int64(at i: Int) -> Int64 { + switch self[i] { + case .integer(let n): return n + case .real(let d): return Int64(d) + case .text(let s): return Int64(s) ?? 0 + default: return 0 + } + } + + public func double(at i: Int) -> Double { + switch self[i] { + case .real(let d): return d + case .integer(let n): return Double(n) + case .text(let s): return Double(s) ?? 0 + default: return 0 + } + } + + public func optionalDouble(at i: Int) -> Double? { + switch self[i] { + case .real(let d): return d + case .integer(let n): return Double(n) + case .text(let s): return Double(s) + case .null: return nil + default: return nil + } + } + + /// Interpret the column as a Unix-epoch timestamp (seconds, fractional + /// allowed). Returns `nil` when the column is NULL or unparseable. + /// Mirrors the existing `columnDate` helper exactly. + public func date(at i: Int) -> Date? { + guard let secs = optionalDouble(at: i) else { return nil } + return Date(timeIntervalSince1970: secs) + } + + public func isNull(at i: Int) -> Bool { + if case .null = self[i] { return true } + return false + } +} diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/SQLValueInliner.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/SQLValueInliner.swift new file mode 100644 index 0000000..c9dc0bf --- /dev/null +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/Backends/SQLValueInliner.swift @@ -0,0 +1,107 @@ +import Foundation + +/// Replaces `?` placeholders in a SQL string with SQLite-escaped +/// literal values, in order. Used by `RemoteSQLiteBackend` because +/// the `sqlite3` CLI doesn't accept `?`-bound parameters on the +/// command line — it would need stdin `.parameter set @name` dot- +/// commands, which require a multi-line script for every query and +/// add round-trip overhead with no upside for our use case. +/// +/// **Trust model.** This is a literal-encoder for in-tree, trusted +/// callers — every current param source is either an integer (`limit`, +/// `before`, `since.timeIntervalSince1970`), a Hermes-internal ID +/// (UUID-shaped session/tool IDs that come back from the same DB), or +/// a search query that already passes through `sanitizeFTSQuery` in +/// HermesDataService. It is **NOT** a general SQL-injection defense. +/// Don't extend the data-service surface with methods that accept raw +/// untrusted user input as a `.text` param without first validating +/// upstream. The local backend skips inlining entirely (uses +/// `sqlite3_bind_*`) so this only affects the remote path. +/// +/// Escape rules mirror SQLite's literal syntax: +/// * `.null` → `NULL` +/// * `.integer(n)` → `` (no quoting) +/// * `.real(d)` → `%.17g`-formatted (round-trips Double via decimal) +/// * `.text(s)` → `''` +/// * `.blob(d)` → `X''` +public enum SQLValueInliner { + + /// Walk `sql`, replacing each `?` (outside SQL string literals) with + /// the corresponding `params` entry's encoded form. Throws via + /// fatalError if the placeholder count doesn't match `params.count` + /// — a programmer error, not a runtime condition. + /// + /// `?` inside string literals (e.g. `WHERE name = '?'`) is preserved + /// unchanged. We track quote state with a tiny scanner so existing + /// SQL with literal `?` chars in strings doesn't get mis-bound. + public static func inline(_ sql: String, params: [SQLValue]) -> String { + var out = "" + out.reserveCapacity(sql.count + params.count * 16) + var paramIndex = 0 + var inSingleQuote = false + var inDoubleQuote = false + var i = sql.startIndex + while i < sql.endIndex { + let c = sql[i] + if c == "'" && !inDoubleQuote { + // Check for SQL's `''` escape (a doubled single-quote + // INSIDE a string literal stays inside; we don't toggle + // out). The next char being another `'` keeps us in. + let next = sql.index(after: i) + if inSingleQuote && next < sql.endIndex && sql[next] == "'" { + out.append("'") + out.append("'") + i = sql.index(after: next) + continue + } + inSingleQuote.toggle() + out.append(c) + i = sql.index(after: i) + continue + } + if c == "\"" && !inSingleQuote { + inDoubleQuote.toggle() + out.append(c) + i = sql.index(after: i) + continue + } + if c == "?" && !inSingleQuote && !inDoubleQuote { + // Bind placeholder. + if paramIndex >= params.count { + fatalError("SQLValueInliner: more `?` placeholders in SQL than provided params (\(params.count)). SQL: \(sql)") + } + out.append(encode(params[paramIndex])) + paramIndex += 1 + i = sql.index(after: i) + continue + } + out.append(c) + i = sql.index(after: i) + } + if paramIndex != params.count { + fatalError("SQLValueInliner: \(params.count) params provided but only \(paramIndex) `?` placeholders consumed. SQL: \(sql)") + } + return out + } + + /// Encode a single value as a SQLite literal. Public so callers + /// that build SQL strings by hand (rare — prefer `inline`) can + /// reuse the same escape rules. + public static func encode(_ value: SQLValue) -> String { + switch value { + case .null: + return "NULL" + case .integer(let n): + return String(n) + case .real(let d): + // %.17g round-trips a Double precisely as a decimal. + return String(format: "%.17g", d) + case .text(let s): + return "'" + s.replacingOccurrences(of: "'", with: "''") + "'" + case .blob(let d): + // SQLite blob literal: X'' (case-insensitive prefix). + let hex = d.map { String(format: "%02x", $0) }.joined() + return "X'\(hex)'" + } + } +} diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift index af76e5b..15e6234 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift @@ -1,195 +1,100 @@ // MARK: - Platform gate // -// `SQLite3` is a system module on macOS/iOS but not on Linux -// swift-corelibs-foundation. Everything below depends on it heavily, so the -// whole file is gated on `canImport(SQLite3)`. On Linux the types -// (`HermesDataService`, `SnapshotCoordinator`, and helpers) simply don't -// exist — nothing in ScarfCore compiled for Linux references them, so -// there's no downstream breakage. Apple platforms — the only real runtime -// targets — get the full implementation unchanged. +// This file's row-parsing helpers used to lean on libsqlite3 directly +// (`sqlite3_column_*`); after the v2.7 backend split they go through +// the typed `Row` API and don't actually need the SQLite3 module. +// The gate stays for symmetry with the backend files (LocalSQLiteBackend +// imports SQLite3) and to keep ScarfCore's compile target narrow. #if canImport(SQLite3) import Foundation -import SQLite3 #if canImport(os) import os #endif -/// Dedupes concurrent `snapshotSQLite` calls for the same server. When the -/// file watcher ticks, Dashboard + Sessions + Activity (+ Chat's loadHistory) -/// can all ask for a fresh snapshot within the same millisecond — without -/// coordination they each spawn their own `ssh host sqlite3 .backup; scp` -/// round-trip, three parallel backups of the same DB. Callers in flight for -/// the same `ServerID` await the first caller's Task and share its result. -public actor SnapshotCoordinator { - public static let shared = SnapshotCoordinator() - private var inFlight: [ServerID: Task] = [:] - - public func snapshot( - remotePath: String, - contextID: ServerID, - transport: any ServerTransport - ) async throws -> URL { - if let existing = inFlight[contextID] { - return try await existing.value - } - let task = Task { - try transport.snapshotSQLite(remotePath: remotePath) - } - inFlight[contextID] = task - defer { inFlight[contextID] = nil } - return try await task.value - } -} - +/// Read-only data service over Hermes's `state.db`. Routes every query +/// through a `HermesQueryBackend`: +/// +/// * `LocalSQLiteBackend` for `ServerContext.local` — opens the live +/// `~/.hermes/state.db` via libsqlite3. Microseconds per query. +/// * `RemoteSQLiteBackend` for `.ssh` contexts — runs `sqlite3 -json` +/// over an SSH session per query (ControlMaster keeps the channel +/// warm). 50–100 ms per query, but no full-DB transfers and always- +/// fresh data, even for multi-GB DBs (issue #74). +/// +/// The split happened in v2.7 to fix the "5 GB state.db means 7-minute +/// snapshots every refresh" issue. Local performance is unchanged; +/// remote bandwidth scales with query result size, not DB size. public actor HermesDataService { private static let logger = Logger(subsystem: "com.scarf", category: "HermesDataService") - private var db: OpaquePointer? - private var hasV07Schema = false - /// True when the connected DB carries the Hermes v2026.4.23+ - /// columns (`sessions.api_call_count`, `messages.reasoning_content`). - /// Detected via PRAGMA table_info in `detectSchema`. Drives - /// optional-column SELECT shape so older DBs keep working. - private var hasV011Schema = false - /// Local filesystem path we last opened. For remote contexts this is - /// the cached snapshot under `~/Library/Caches/scarf/snapshots//`. - private var openedAtPath: String? - /// Last error from `open()` / `refresh()`, user-presentable. `nil` means - /// the last attempt succeeded. Views surface this when their own load - /// path fails, so the user sees "Permission denied reading state.db" - /// instead of an empty Dashboard with no explanation. - public private(set) var lastOpenError: String? - - /// Modification date of the underlying state.db that backs the - /// currently-open connection. For local contexts this tracks the - /// live DB's mtime; for remote contexts it's the cached snapshot's - /// mtime — which equals "when did we last get fresh data." - public private(set) var lastSnapshotMtime: Date? - - /// True when a `snapshotSQLite` pull failed and the open succeeded - /// against a previously-cached snapshot instead of a fresh one. - /// Views render a "Last updated X ago" affordance when this is set - /// alongside `lastOpenError`. Always `false` for local contexts. - public private(set) var isUsingStaleSnapshot: Bool = false - - /// Convenience: how long ago the cached snapshot was written, when - /// we're using a stale snapshot. `nil` when the snapshot is fresh - /// or no mtime could be read. - public var staleAge: TimeInterval? { - guard isUsingStaleSnapshot, let m = lastSnapshotMtime else { return nil } - return Date().timeIntervalSince(m) - } - + private let backend: any HermesQueryBackend public let context: ServerContext private let transport: any ServerTransport + /// Cached schema fingerprint, populated on `open()`. Keeps the + /// SELECT-shape builders (`sessionColumns`, `messageColumns`) + /// synchronous — without this they'd `await backend.hasV07Schema` + /// on every call. + private var hasV07Schema = false + private var hasV011Schema = false + + /// Last error from `open()` / `refresh()`, user-presentable. `nil` + /// means the last attempt succeeded. Views surface this when their + /// own load path fails, so the user sees "Permission denied + /// reading state.db" instead of an empty Dashboard with no + /// explanation. + public private(set) var lastOpenError: String? + public init(context: ServerContext = .local) { self.context = context self.transport = context.makeTransport() + if context.isRemote { + self.backend = RemoteSQLiteBackend(context: context, transport: self.transport) + } else { + self.backend = LocalSQLiteBackend(context: context) + } } + /// Test seam — inject any `HermesQueryBackend`. Production code + /// should use the `init(context:)` overload. + internal init(context: ServerContext, backend: any HermesQueryBackend) { + self.context = context + self.transport = context.makeTransport() + self.backend = backend + } + + // MARK: - Lifecycle + public func open() async -> Bool { - await openInternal(forceFresh: false) + let ok = await backend.open() + // Cache schema flags — sessionColumns / messageColumns are + // hot paths (called on every fetch* method) and going async + // for them would force every fetch into a multi-await pattern. + hasV07Schema = await backend.hasV07Schema + hasV011Schema = await backend.hasV011Schema + lastOpenError = await backend.lastOpenError + return ok } - /// Variant that refuses the stale-snapshot fallback. Used by call - /// sites that genuinely need post-write consistency — most notably - /// the chat session-history reload, where a stale snapshot would - /// hide messages the agent just streamed. - private func openStrict() async -> Bool { - await openInternal(forceFresh: true) + @discardableResult + public func refresh(forceFresh: Bool = false) async -> Bool { + let ok = await backend.refresh(forceFresh: forceFresh) + hasV07Schema = await backend.hasV07Schema + hasV011Schema = await backend.hasV011Schema + lastOpenError = await backend.lastOpenError + return ok } - private func openInternal(forceFresh: Bool) async -> Bool { - if db != nil { return true } - let localPath: String - if context.isRemote { - // Pull a fresh snapshot from the remote host. Uses `sqlite3 - // .backup` on the remote, which is WAL-safe; a plain cp would - // corrupt. Routed through SnapshotCoordinator so concurrent - // view models don't each spawn a parallel SSH backup for the - // same server. - do { - let url = try await SnapshotCoordinator.shared.snapshot( - remotePath: context.paths.stateDB, - contextID: context.id, - transport: transport - ) - localPath = url.path - lastOpenError = nil - isUsingStaleSnapshot = false - lastSnapshotMtime = mtime(at: url) - } catch { - // Fresh pull failed. If the caller demanded fresh data - // (`forceFresh: true`) OR there's no usable cache on - // disk, surface the error and bail. Otherwise serve - // the cached snapshot with `isUsingStaleSnapshot = true` - // so views can render a "Last updated X ago" banner. - if !forceFresh, - let cached = transport.cachedSnapshotPath, - FileManager.default.fileExists(atPath: cached.path) - { - localPath = cached.path - isUsingStaleSnapshot = true - lastSnapshotMtime = mtime(at: cached) - lastOpenError = humanize(error) // user still sees why it's stale - Self.logger.warning( - "Using stale snapshot after pull failure: \(error.localizedDescription, privacy: .public)" - ) - } else { - lastOpenError = humanize(error) - Self.logger.warning("snapshotSQLite failed: \(error.localizedDescription, privacy: .public)") - return false - } - } - } else { - localPath = context.paths.stateDB - guard FileManager.default.fileExists(atPath: localPath) else { - lastOpenError = "Hermes state database not found at \(localPath)." - return false - } - isUsingStaleSnapshot = false - lastSnapshotMtime = mtime(at: URL(fileURLWithPath: localPath)) - } - // Remote snapshots are point-in-time copies that no one writes to; - // opening them with `immutable=1` tells SQLite to skip WAL/SHM and - // locking entirely, which is both faster and avoids spurious - // "unable to open database file" errors if the snapshot ever gets - // pulled mid-checkpoint. Local points at the live Hermes DB where - // the process already has WAL enabled in the header, so a plain - // readonly open is the right thing. - let flags: Int32 - let openPath: String - if context.isRemote { - openPath = "file:\(localPath)?immutable=1" - flags = SQLITE_OPEN_READONLY | SQLITE_OPEN_NOMUTEX | SQLITE_OPEN_URI - } else { - openPath = localPath - flags = SQLITE_OPEN_READONLY | SQLITE_OPEN_NOMUTEX - } - let result = sqlite3_open_v2(openPath, &db, flags, nil) - guard result == SQLITE_OK else { - let msg: String - if let db { - msg = String(cString: sqlite3_errmsg(db)) - } else { - msg = "sqlite3_open_v2 returned \(result)" - } - lastOpenError = "Couldn't open state.db: \(msg)" - Self.logger.warning("sqlite3_open_v2 failed (\(result)) at \(localPath, privacy: .public): \(msg, privacy: .public)") - db = nil - return false - } - openedAtPath = localPath - lastOpenError = nil - detectSchema() - return true + public func close() async { + await backend.close() } - /// Turn a transport error into the one-line string Dashboard shows. Adds - /// hints for the common "sqlite3 not installed" and "permission denied" - /// cases so users know what to do. + /// Turn a transport / backend error into the one-line string Dashboard + /// shows. Adds hints for the common "sqlite3 not installed" and + /// "permission denied" cases so users know what to do. Mirrors the + /// pre-v2.7 humanise behaviour exactly so existing UI banners + /// continue to render with the same copy. private nonisolated func humanize(_ error: Error) -> String { let desc = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription let lower = desc.lowercased() @@ -199,87 +104,13 @@ public actor HermesDataService { if lower.contains("permission denied") { return "Permission denied reading Hermes state on \(context.displayName). The SSH user may not have read access to ~/.hermes/state.db — try Run Diagnostics." } - if lower.contains("no such file") { + if lower.contains("no such file") || lower.contains("unable to open database file") { return "Hermes state not found at ~/.hermes on \(context.displayName). If Hermes is installed elsewhere, set its data directory in Manage Servers." } return desc } - /// Close the current connection and re-open with a fresh snapshot - /// pull (when remote). When `forceFresh` is `false` (default) and - /// the snapshot pull fails, falls back to the cached snapshot — - /// `isUsingStaleSnapshot` is set so views can render a "Last - /// updated X ago" banner. Pass `forceFresh: true` from call sites - /// that genuinely need post-write consistency (chat session - /// history reload), where stale data would hide messages the - /// agent just streamed. - @discardableResult - public func refresh(forceFresh: Bool = false) async -> Bool { - close() - return await openInternal(forceFresh: forceFresh) - } - - /// Read the modification date of a local file. Returns `nil` if - /// the file is unreachable or has no mtime metadata. Used to - /// stamp `lastSnapshotMtime` so views can show "Last updated - /// X ago" without each one duplicating the FileManager dance. - private nonisolated func mtime(at url: URL) -> Date? { - let attrs = try? FileManager.default.attributesOfItem(atPath: url.path) - return attrs?[.modificationDate] as? Date - } - - public func close() { - if let db { - sqlite3_close(db) - } - db = nil - } - - // MARK: - Schema Detection - - private func detectSchema() { - guard let db else { return } - // Sessions schema - var stmt: OpaquePointer? - if sqlite3_prepare_v2(db, "PRAGMA table_info(sessions)", -1, &stmt, nil) == SQLITE_OK { - defer { sqlite3_finalize(stmt) } - while sqlite3_step(stmt) == SQLITE_ROW { - if let name = sqlite3_column_text(stmt, 1) { - let column = String(cString: name) - if column == "reasoning_tokens" { - hasV07Schema = true - } - if column == "api_call_count" { - hasV011Schema = true - } - } - } - } - // Messages schema — confirm `reasoning_content` exists. We - // upgrade to v0.11 only if BOTH new columns are present so - // partial-migration DBs (sessions migrated, messages not yet) - // don't trigger a "no such column" runtime error on message - // reads. Belt-and-braces. - if hasV011Schema { - var msgStmt: OpaquePointer? - var sawReasoningContent = false - if sqlite3_prepare_v2(db, "PRAGMA table_info(messages)", -1, &msgStmt, nil) == SQLITE_OK { - defer { sqlite3_finalize(msgStmt) } - while sqlite3_step(msgStmt) == SQLITE_ROW { - if let name = sqlite3_column_text(msgStmt, 1), - String(cString: name) == "reasoning_content" { - sawReasoningContent = true - break - } - } - } - if !sawReasoningContent { - hasV011Schema = false - } - } - } - - // MARK: - Session Queries + // MARK: - Column shapes private var sessionColumns: String { var cols = """ @@ -297,53 +128,6 @@ public actor HermesDataService { return cols } - public func fetchSessions(limit: Int = QueryDefaults.sessionLimit) -> [HermesSession] { - guard let db else { return [] } - let sql = "SELECT \(sessionColumns) FROM sessions WHERE parent_session_id IS NULL ORDER BY started_at DESC LIMIT ?" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_int(stmt, 1, Int32(limit)) - - var sessions: [HermesSession] = [] - while sqlite3_step(stmt) == SQLITE_ROW { - sessions.append(sessionFromRow(stmt!)) - } - return sessions - } - - public func fetchSessionsInPeriod(since: Date) -> [HermesSession] { - guard let db else { return [] } - let sql = "SELECT \(sessionColumns) FROM sessions WHERE parent_session_id IS NULL AND started_at >= ? ORDER BY started_at DESC" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_double(stmt, 1, since.timeIntervalSince1970) - - var sessions: [HermesSession] = [] - while sqlite3_step(stmt) == SQLITE_ROW { - sessions.append(sessionFromRow(stmt!)) - } - return sessions - } - - public func fetchSubagentSessions(parentId: String) -> [HermesSession] { - guard let db else { return [] } - let sql = "SELECT \(sessionColumns) FROM sessions WHERE parent_session_id = ? ORDER BY started_at ASC" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_text(stmt, 1, parentId, -1, sqliteTransient) - - var sessions: [HermesSession] = [] - while sqlite3_step(stmt) == SQLITE_ROW { - sessions.append(sessionFromRow(stmt!)) - } - return sessions - } - - // MARK: - Message Queries - private var messageColumns: String { var cols = """ id, session_id, role, content, tool_call_id, tool_calls, @@ -358,6 +142,41 @@ public actor HermesDataService { return cols } + // MARK: - Session Queries + + public func fetchSessions(limit: Int = QueryDefaults.sessionLimit) async -> [HermesSession] { + let sql = "SELECT \(sessionColumns) FROM sessions WHERE parent_session_id IS NULL ORDER BY started_at DESC LIMIT ?" + do { + let rows = try await backend.query(sql, params: [.integer(Int64(limit))]) + return rows.map { sessionFromRow($0) } + } catch { + Self.logger.warning("fetchSessions failed: \(error.localizedDescription, privacy: .public)") + return [] + } + } + + public func fetchSessionsInPeriod(since: Date) async -> [HermesSession] { + let sql = "SELECT \(sessionColumns) FROM sessions WHERE parent_session_id IS NULL AND started_at >= ? ORDER BY started_at DESC" + do { + let rows = try await backend.query(sql, params: [.real(since.timeIntervalSince1970)]) + return rows.map { sessionFromRow($0) } + } catch { + return [] + } + } + + public func fetchSubagentSessions(parentId: String) async -> [HermesSession] { + let sql = "SELECT \(sessionColumns) FROM sessions WHERE parent_session_id = ? ORDER BY started_at ASC" + do { + let rows = try await backend.query(sql, params: [.text(parentId)]) + return rows.map { sessionFromRow($0) } + } catch { + return [] + } + } + + // MARK: - Message Queries + /// Bounded message fetch keyed by message id (monotonic per row, /// safer than timestamp-based pagination because streaming chunk /// timestamps can collide). Returns the most recent `limit` @@ -368,57 +187,43 @@ public actor HermesDataService { sessionId: String, limit: Int, before: Int? = nil - ) -> [HermesMessage] { - guard let db else { return [] } + ) async -> [HermesMessage] { let sql: String - if before != nil { + let params: [SQLValue] + if let before { sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?" + params = [.text(sessionId), .integer(Int64(before)), .integer(Int64(limit))] } else { sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY id DESC LIMIT ?" + params = [.text(sessionId), .integer(Int64(limit))] } - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_text(stmt, 1, sessionId, -1, sqliteTransient) - if let before { - sqlite3_bind_int(stmt, 2, Int32(before)) - sqlite3_bind_int(stmt, 3, Int32(limit)) - } else { - sqlite3_bind_int(stmt, 2, Int32(limit)) + do { + let rows = try await backend.query(sql, params: params) + // Caller wants chronological (oldest-first) order; the SELECT + // is DESC for the LIMIT to bite the newest rows, so reverse. + return rows.map { messageFromRow($0) }.reversed() + } catch { + return [] } - - var messages: [HermesMessage] = [] - while sqlite3_step(stmt) == SQLITE_ROW { - messages.append(messageFromRow(stmt!)) - } - // Caller wants chronological (oldest-first) order; the SELECT - // is DESC for the LIMIT to bite the newest rows, so reverse. - return messages.reversed() } /// Legacy unbounded fetch retained for one release cycle so any /// out-of-tree consumers don't break. New code should use the /// bounded `fetchMessages(sessionId:limit:before:)` variant — - /// snapshot loads on 1000+-message sessions stall the UI when - /// they materialize the whole history at once. + /// loads on 1000+-message sessions stall the UI when they + /// materialise the whole history at once. @available(*, deprecated, message: "Use fetchMessages(sessionId:limit:before:) instead.") - public func fetchMessages(sessionId: String) -> [HermesMessage] { - guard let db else { return [] } + public func fetchMessages(sessionId: String) async -> [HermesMessage] { let sql = "SELECT \(messageColumns) FROM messages WHERE session_id = ? ORDER BY timestamp ASC" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_text(stmt, 1, sessionId, -1, sqliteTransient) - - var messages: [HermesMessage] = [] - while sqlite3_step(stmt) == SQLITE_ROW { - messages.append(messageFromRow(stmt!)) + do { + let rows = try await backend.query(sql, params: [.text(sessionId)]) + return rows.map { messageFromRow($0) } + } catch { + return [] } - return messages } - public func searchMessages(query: String, limit: Int = QueryDefaults.messageSearchLimit) -> [HermesMessage] { - guard let db else { return [] } + public func searchMessages(query: String, limit: Int = QueryDefaults.messageSearchLimit) async -> [HermesMessage] { let sanitized = sanitizeFTSQuery(query) guard !sanitized.isEmpty else { return [] } var msgCols = "m.id, m.session_id, m.role, m.content, m.tool_call_id, m.tool_calls, m.tool_name, m.timestamp, m.token_count, m.finish_reason" @@ -432,32 +237,26 @@ public actor HermesDataService { ORDER BY rank LIMIT ? """ - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_text(stmt, 1, sanitized, -1, sqliteTransient) - sqlite3_bind_int(stmt, 2, Int32(limit)) - - var messages: [HermesMessage] = [] - while sqlite3_step(stmt) == SQLITE_ROW { - messages.append(messageFromRow(stmt!)) + do { + let rows = try await backend.query(sql, params: [.text(sanitized), .integer(Int64(limit))]) + return rows.map { messageFromRow($0) } + } catch { + return [] } - return messages } - public func fetchToolResult(callId: String) -> String? { - guard let db else { return nil } + public func fetchToolResult(callId: String) async -> String? { let sql = "SELECT content FROM messages WHERE role = 'tool' AND tool_call_id = ? LIMIT 1" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return nil } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_text(stmt, 1, callId, -1, sqliteTransient) - guard sqlite3_step(stmt) == SQLITE_ROW else { return nil } - return columnText(stmt!, 0) + do { + let rows = try await backend.query(sql, params: [.text(callId)]) + guard let first = rows.first else { return nil } + return first.string(at: 0) + } catch { + return nil + } } - public func fetchRecentToolCalls(limit: Int = QueryDefaults.toolCallLimit) -> [HermesMessage] { - guard let db else { return [] } + public func fetchRecentToolCalls(limit: Int = QueryDefaults.toolCallLimit) async -> [HermesMessage] { let sql = """ SELECT \(messageColumns) FROM messages @@ -465,20 +264,15 @@ public actor HermesDataService { ORDER BY timestamp DESC LIMIT ? """ - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_int(stmt, 1, Int32(limit)) - - var messages: [HermesMessage] = [] - while sqlite3_step(stmt) == SQLITE_ROW { - messages.append(messageFromRow(stmt!)) + do { + let rows = try await backend.query(sql, params: [.integer(Int64(limit))]) + return rows.map { messageFromRow($0) } + } catch { + return [] } - return messages } - public func fetchSessionPreviews(limit: Int = QueryDefaults.sessionPreviewLimit) -> [String: String] { - guard let db else { return [:] } + public func fetchSessionPreviews(limit: Int = QueryDefaults.sessionPreviewLimit) async -> [String: String] { let sql = """ SELECT m.session_id, substr(m.content, 1, \(QueryDefaults.previewContentLength)) FROM messages m @@ -491,18 +285,16 @@ public actor HermesDataService { ORDER BY m.timestamp DESC LIMIT ? """ - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [:] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_int(stmt, 1, Int32(limit)) - - var previews: [String: String] = [:] - while sqlite3_step(stmt) == SQLITE_ROW { - let sessionId = columnText(stmt!, 0) - let preview = columnText(stmt!, 1) - previews[sessionId] = preview + do { + let rows = try await backend.query(sql, params: [.integer(Int64(limit))]) + var previews: [String: String] = [:] + for row in rows { + previews[row.string(at: 0)] = row.string(at: 1) + } + return previews + } catch { + return [:] } - return previews } // MARK: - Single-Row Queries @@ -515,69 +307,67 @@ public actor HermesDataService { static let empty = MessageFingerprint(count: 0, maxId: 0, maxTimestamp: 0) } - public func fetchMessageFingerprint(sessionId: String) -> MessageFingerprint { - guard let db else { return .empty } + public func fetchMessageFingerprint(sessionId: String) async -> MessageFingerprint { let sql = "SELECT COUNT(*), COALESCE(MAX(id), 0), COALESCE(MAX(timestamp), 0) FROM messages WHERE session_id = ?" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return .empty } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_text(stmt, 1, sessionId, -1, sqliteTransient) - guard sqlite3_step(stmt) == SQLITE_ROW else { return .empty } - return MessageFingerprint( - count: Int(sqlite3_column_int(stmt, 0)), - maxId: Int(sqlite3_column_int(stmt, 1)), - maxTimestamp: sqlite3_column_double(stmt, 2) - ) + do { + let rows = try await backend.query(sql, params: [.text(sessionId)]) + guard let row = rows.first else { return .empty } + return MessageFingerprint( + count: row.int(at: 0), + maxId: row.int(at: 1), + maxTimestamp: row.double(at: 2) + ) + } catch { + return .empty + } } - public func fetchMessageCount(sessionId: String) -> Int { - guard let db else { return 0 } + public func fetchMessageCount(sessionId: String) async -> Int { let sql = "SELECT COUNT(*) FROM messages WHERE session_id = ?" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return 0 } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_text(stmt, 1, sessionId, -1, sqliteTransient) - guard sqlite3_step(stmt) == SQLITE_ROW else { return 0 } - return Int(sqlite3_column_int(stmt, 0)) + do { + let rows = try await backend.query(sql, params: [.text(sessionId)]) + return rows.first?.int(at: 0) ?? 0 + } catch { + return 0 + } } - public func fetchSession(id: String) -> HermesSession? { - guard let db else { return nil } + public func fetchSession(id: String) async -> HermesSession? { let sql = "SELECT \(sessionColumns) FROM sessions WHERE id = ? LIMIT 1" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return nil } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_text(stmt, 1, id, -1, sqliteTransient) - guard sqlite3_step(stmt) == SQLITE_ROW else { return nil } - return sessionFromRow(stmt!) + do { + let rows = try await backend.query(sql, params: [.text(id)]) + return rows.first.map { sessionFromRow($0) } + } catch { + return nil + } } - public func fetchMostRecentlyActiveSessionId() -> String? { - guard let db else { return nil } + public func fetchMostRecentlyActiveSessionId() async -> String? { let sql = "SELECT session_id FROM messages ORDER BY timestamp DESC LIMIT 1" - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return nil } - defer { sqlite3_finalize(stmt) } - guard sqlite3_step(stmt) == SQLITE_ROW else { return nil } - return columnText(stmt!, 0) + do { + let rows = try await backend.query(sql, params: []) + return rows.first?.optionalString(at: 0) + } catch { + return nil + } } - public func fetchMostRecentlyStartedSessionId(after: Date? = nil) -> String? { - guard let db else { return nil } + public func fetchMostRecentlyStartedSessionId(after: Date? = nil) async -> String? { let sql: String - if after != nil { + let params: [SQLValue] + if let after { sql = "SELECT id FROM sessions WHERE parent_session_id IS NULL AND started_at > ? ORDER BY started_at DESC LIMIT 1" + params = [.real(after.timeIntervalSince1970)] } else { sql = "SELECT id FROM sessions WHERE parent_session_id IS NULL ORDER BY started_at DESC LIMIT 1" + params = [] } - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return nil } - defer { sqlite3_finalize(stmt) } - if let after { - sqlite3_bind_double(stmt, 1, after.timeIntervalSince1970) + do { + let rows = try await backend.query(sql, params: params) + return rows.first?.optionalString(at: 0) + } catch { + return nil } - guard sqlite3_step(stmt) == SQLITE_ROW else { return nil } - return columnText(stmt!, 0) } // MARK: - Stats @@ -619,60 +409,64 @@ public actor HermesDataService { ) } - public func fetchStats() -> SessionStats { - guard let db else { return .empty } - let sql: String + public func fetchStats() async -> SessionStats { + let sql = statsSQL() + do { + let rows = try await backend.query(sql, params: []) + return rows.first.map { statsFromRow($0) } ?? .empty + } catch { + return .empty + } + } + + private func statsSQL() -> String { if hasV07Schema { - sql = """ + return """ SELECT COUNT(*), COALESCE(SUM(message_count),0), COALESCE(SUM(tool_call_count),0), COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0), COALESCE(SUM(estimated_cost_usd),0), COALESCE(SUM(reasoning_tokens),0), COALESCE(SUM(actual_cost_usd),0) FROM sessions """ - } else { - sql = """ - SELECT COUNT(*), COALESCE(SUM(message_count),0), COALESCE(SUM(tool_call_count),0), - COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0), - COALESCE(SUM(estimated_cost_usd),0) - FROM sessions - """ } - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return .empty } - defer { sqlite3_finalize(stmt) } - guard sqlite3_step(stmt) == SQLITE_ROW else { return .empty } - return SessionStats( - totalSessions: Int(sqlite3_column_int(stmt, 0)), - totalMessages: Int(sqlite3_column_int(stmt, 1)), - totalToolCalls: Int(sqlite3_column_int(stmt, 2)), - totalInputTokens: Int(sqlite3_column_int(stmt, 3)), - totalOutputTokens: Int(sqlite3_column_int(stmt, 4)), - totalCostUSD: sqlite3_column_double(stmt, 5), - totalReasoningTokens: hasV07Schema ? Int(sqlite3_column_int(stmt, 6)) : 0, - totalActualCostUSD: hasV07Schema ? sqlite3_column_double(stmt, 7) : 0 + return """ + SELECT COUNT(*), COALESCE(SUM(message_count),0), COALESCE(SUM(tool_call_count),0), + COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0), + COALESCE(SUM(estimated_cost_usd),0) + FROM sessions + """ + } + + private func statsFromRow(_ row: Row) -> SessionStats { + SessionStats( + totalSessions: row.int(at: 0), + totalMessages: row.int(at: 1), + totalToolCalls: row.int(at: 2), + totalInputTokens: row.int(at: 3), + totalOutputTokens: row.int(at: 4), + totalCostUSD: row.double(at: 5), + totalReasoningTokens: hasV07Schema ? row.int(at: 6) : 0, + totalActualCostUSD: hasV07Schema ? row.double(at: 7) : 0 ) } // MARK: - Insights Queries - public func fetchUserMessageCount(since: Date) -> Int { - guard let db else { return 0 } + public func fetchUserMessageCount(since: Date) async -> Int { let sql = """ SELECT COUNT(*) FROM messages m JOIN sessions s ON m.session_id = s.id WHERE m.role = 'user' AND s.parent_session_id IS NULL AND s.started_at >= ? """ - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return 0 } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_double(stmt, 1, since.timeIntervalSince1970) - guard sqlite3_step(stmt) == SQLITE_ROW else { return 0 } - return Int(sqlite3_column_int(stmt, 0)) + do { + let rows = try await backend.query(sql, params: [.real(since.timeIntervalSince1970)]) + return rows.first?.int(at: 0) ?? 0 + } catch { + return 0 + } } - public func fetchToolUsage(since: Date) -> [(name: String, count: Int)] { - guard let db else { return [] } + public func fetchToolUsage(since: Date) async -> [(name: String, count: Int)] { let sql = """ SELECT m.tool_name, COUNT(*) as cnt FROM messages m @@ -681,62 +475,199 @@ public actor HermesDataService { GROUP BY m.tool_name ORDER BY cnt DESC """ - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_double(stmt, 1, since.timeIntervalSince1970) - - var results: [(name: String, count: Int)] = [] - while sqlite3_step(stmt) == SQLITE_ROW { - let name = columnText(stmt!, 0) - let count = Int(sqlite3_column_int(stmt!, 1)) - results.append((name: name, count: count)) + do { + let rows = try await backend.query(sql, params: [.real(since.timeIntervalSince1970)]) + return rows.map { (name: $0.string(at: 0), count: $0.int(at: 1)) } + } catch { + return [] } - return results } - public func fetchSessionStartHours(since: Date) -> [Int: Int] { - guard let db else { return [:] } + public func fetchSessionStartHours(since: Date) async -> [Int: Int] { let sql = """ SELECT started_at FROM sessions WHERE parent_session_id IS NULL AND started_at >= ? """ - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [:] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_double(stmt, 1, since.timeIntervalSince1970) - - var hours: [Int: Int] = [:] - let calendar = Calendar.current - while sqlite3_step(stmt) == SQLITE_ROW { - let ts = sqlite3_column_double(stmt!, 0) - let date = Date(timeIntervalSince1970: ts) - let hour = calendar.component(.hour, from: date) - hours[hour, default: 0] += 1 + do { + let rows = try await backend.query(sql, params: [.real(since.timeIntervalSince1970)]) + var hours: [Int: Int] = [:] + let calendar = Calendar.current + for row in rows { + if let date = row.date(at: 0) { + let hour = calendar.component(.hour, from: date) + hours[hour, default: 0] += 1 + } + } + return hours + } catch { + return [:] } - return hours } - public func fetchSessionDaysOfWeek(since: Date) -> [Int: Int] { - guard let db else { return [:] } + public func fetchSessionDaysOfWeek(since: Date) async -> [Int: Int] { let sql = """ SELECT started_at FROM sessions WHERE parent_session_id IS NULL AND started_at >= ? """ - var stmt: OpaquePointer? - guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { return [:] } - defer { sqlite3_finalize(stmt) } - sqlite3_bind_double(stmt, 1, since.timeIntervalSince1970) - - var days: [Int: Int] = [:] - let calendar = Calendar.current - while sqlite3_step(stmt) == SQLITE_ROW { - let ts = sqlite3_column_double(stmt!, 0) - let date = Date(timeIntervalSince1970: ts) - let weekday = (calendar.component(.weekday, from: date) + 5) % 7 // Mon=0 - days[weekday, default: 0] += 1 + do { + let rows = try await backend.query(sql, params: [.real(since.timeIntervalSince1970)]) + var days: [Int: Int] = [:] + let calendar = Calendar.current + for row in rows { + if let date = row.date(at: 0) { + let weekday = (calendar.component(.weekday, from: date) + 5) % 7 // Mon=0 + days[weekday, default: 0] += 1 + } + } + return days + } catch { + return [:] } - return days } + // MARK: - Batched snapshots + + /// Bundle the four queries Dashboard fires on every load into one + /// backend round-trip. For local backends this is just four + /// sequential `query` calls (no perf change). For remote backends + /// it's one SSH round-trip running one sqlite3 invocation, which + /// turns Dashboard's "open" cost from ~280 ms (4 × 70 ms) into + /// ~80–100 ms. + public struct DashboardSnapshot: Sendable { + public let stats: SessionStats + public let recentSessions: [HermesSession] + public let sessionPreviews: [String: String] + public let recentToolCalls: [HermesMessage] + } + + public func dashboardSnapshot( + sessionLimit: Int = 5, + previewLimit: Int = 5, + toolCallLimit: Int = 8 + ) async -> DashboardSnapshot { + let statements: [(sql: String, params: [SQLValue])] = [ + (statsSQL(), []), + ( + "SELECT \(sessionColumns) FROM sessions WHERE parent_session_id IS NULL ORDER BY started_at DESC LIMIT ?", + [.integer(Int64(sessionLimit))] + ), + ( + """ + SELECT m.session_id, substr(m.content, 1, \(QueryDefaults.previewContentLength)) + FROM messages m + INNER JOIN ( + SELECT session_id, MIN(id) as min_id + FROM messages + WHERE role = 'user' AND content <> '' + GROUP BY session_id + ) first ON m.id = first.min_id + ORDER BY m.timestamp DESC + LIMIT ? + """, + [.integer(Int64(previewLimit))] + ), + ( + """ + SELECT \(messageColumns) + FROM messages + WHERE tool_calls IS NOT NULL AND tool_calls != '[]' AND tool_calls != '' + ORDER BY timestamp DESC + LIMIT ? + """, + [.integer(Int64(toolCallLimit))] + ) + ] + do { + let resultSets = try await backend.queryBatch(statements) + let stats = resultSets.first?.first.map { statsFromRow($0) } ?? .empty + let sessions = (resultSets.count > 1 ? resultSets[1] : []).map { sessionFromRow($0) } + var previews: [String: String] = [:] + for row in (resultSets.count > 2 ? resultSets[2] : []) { + previews[row.string(at: 0)] = row.string(at: 1) + } + let toolCalls = (resultSets.count > 3 ? resultSets[3] : []).map { messageFromRow($0) } + return DashboardSnapshot( + stats: stats, + recentSessions: sessions, + sessionPreviews: previews, + recentToolCalls: toolCalls + ) + } catch { + Self.logger.warning("dashboardSnapshot failed: \(error.localizedDescription, privacy: .public)") + return DashboardSnapshot( + stats: .empty, + recentSessions: [], + sessionPreviews: [:], + recentToolCalls: [] + ) + } + } + + /// Bundle the queries Insights fires on every load into one + /// backend round-trip — same rationale as `dashboardSnapshot`. + public struct InsightsSnapshot: Sendable { + public let userMessageCount: Int + public let toolUsage: [(name: String, count: Int)] + public let startHours: [Int: Int] + public let daysOfWeek: [Int: Int] + } + + public func insightsSnapshot(since: Date) async -> InsightsSnapshot { + let sinceTs = since.timeIntervalSince1970 + let statements: [(sql: String, params: [SQLValue])] = [ + ( + """ + SELECT COUNT(*) FROM messages m + JOIN sessions s ON m.session_id = s.id + WHERE m.role = 'user' AND s.parent_session_id IS NULL AND s.started_at >= ? + """, + [.real(sinceTs)] + ), + ( + """ + SELECT m.tool_name, COUNT(*) as cnt + FROM messages m + JOIN sessions s ON m.session_id = s.id + WHERE m.tool_name IS NOT NULL AND m.tool_name <> '' AND s.parent_session_id IS NULL AND s.started_at >= ? + GROUP BY m.tool_name + ORDER BY cnt DESC + """, + [.real(sinceTs)] + ), + ( + "SELECT started_at FROM sessions WHERE parent_session_id IS NULL AND started_at >= ?", + [.real(sinceTs)] + ) + ] + do { + let resultSets = try await backend.queryBatch(statements) + let userCount = resultSets.first?.first?.int(at: 0) ?? 0 + let toolUsage = (resultSets.count > 1 ? resultSets[1] : []).map { + (name: $0.string(at: 0), count: $0.int(at: 1)) + } + // The third statement returns timestamps; client-side + // calendar bucketing into hours + days-of-week. + let calendar = Calendar.current + var hours: [Int: Int] = [:] + var days: [Int: Int] = [:] + for row in (resultSets.count > 2 ? resultSets[2] : []) { + guard let date = row.date(at: 0) else { continue } + let hour = calendar.component(.hour, from: date) + hours[hour, default: 0] += 1 + let weekday = (calendar.component(.weekday, from: date) + 5) % 7 + days[weekday, default: 0] += 1 + } + return InsightsSnapshot( + userMessageCount: userCount, + toolUsage: toolUsage, + startHours: hours, + daysOfWeek: days + ) + } catch { + return InsightsSnapshot(userMessageCount: 0, toolUsage: [], startHours: [:], daysOfWeek: [:]) + } + } + + // MARK: - Modification date + public func stateDBModificationDate() -> Date? { // For remote contexts we stat the remote paths. For local it's the // same FileManager lookup as before, just via the transport. @@ -750,59 +681,56 @@ public actor HermesDataService { // MARK: - Row Parsing - private func sessionFromRow(_ stmt: OpaquePointer) -> HermesSession { + private func sessionFromRow(_ row: Row) -> HermesSession { // v0.11 column lives at index 20 (after the 16 base + 4 v0.7 - // columns). Read defensively — old DBs that lack the column + // columns). Reading defensively — old DBs that lack the column // never reach this code path because hasV011Schema gates the // SELECT shape. - let apiCallCount: Int = { - guard hasV011Schema else { return 0 } - return Int(sqlite3_column_int(stmt, 20)) - }() + let apiCallCount: Int = hasV011Schema ? row.int(at: 20) : 0 return HermesSession( - id: columnText(stmt, 0), - source: columnText(stmt, 1), - userId: columnOptionalText(stmt, 2), - model: columnOptionalText(stmt, 3), - title: columnOptionalText(stmt, 4), - parentSessionId: columnOptionalText(stmt, 5), - startedAt: columnDate(stmt, 6), - endedAt: columnDate(stmt, 7), - endReason: columnOptionalText(stmt, 8), - messageCount: Int(sqlite3_column_int(stmt, 9)), - toolCallCount: Int(sqlite3_column_int(stmt, 10)), - inputTokens: Int(sqlite3_column_int(stmt, 11)), - outputTokens: Int(sqlite3_column_int(stmt, 12)), - cacheReadTokens: Int(sqlite3_column_int(stmt, 13)), - cacheWriteTokens: Int(sqlite3_column_int(stmt, 14)), - estimatedCostUSD: sqlite3_column_type(stmt, 15) != SQLITE_NULL ? sqlite3_column_double(stmt, 15) : nil, - reasoningTokens: hasV07Schema ? Int(sqlite3_column_int(stmt, 16)) : 0, - actualCostUSD: hasV07Schema && sqlite3_column_type(stmt, 17) != SQLITE_NULL ? sqlite3_column_double(stmt, 17) : nil, - costStatus: hasV07Schema ? columnOptionalText(stmt, 18) : nil, - billingProvider: hasV07Schema ? columnOptionalText(stmt, 19) : nil, + id: row.string(at: 0), + source: row.string(at: 1), + userId: row.optionalString(at: 2), + model: row.optionalString(at: 3), + title: row.optionalString(at: 4), + parentSessionId: row.optionalString(at: 5), + startedAt: row.date(at: 6), + endedAt: row.date(at: 7), + endReason: row.optionalString(at: 8), + messageCount: row.int(at: 9), + toolCallCount: row.int(at: 10), + inputTokens: row.int(at: 11), + outputTokens: row.int(at: 12), + cacheReadTokens: row.int(at: 13), + cacheWriteTokens: row.int(at: 14), + estimatedCostUSD: row.optionalDouble(at: 15), + reasoningTokens: hasV07Schema ? row.int(at: 16) : 0, + actualCostUSD: hasV07Schema ? row.optionalDouble(at: 17) : nil, + costStatus: hasV07Schema ? row.optionalString(at: 18) : nil, + billingProvider: hasV07Schema ? row.optionalString(at: 19) : nil, apiCallCount: apiCallCount ) } - private func messageFromRow(_ stmt: OpaquePointer) -> HermesMessage { - let toolCallsJSON = columnOptionalText(stmt, 5) + private func messageFromRow(_ row: Row) -> HermesMessage { + let toolCallsJSON = row.optionalString(at: 5) let toolCalls = parseToolCalls(toolCallsJSON) // reasoning lives at index 10 (v0.7+); reasoning_content at 11 // when v0.11 schema is present. Both columns can carry text // simultaneously — UI prefers `reasoningContent`. - let reasoningContent: String? = hasV011Schema ? columnOptionalText(stmt, 11) : nil + let reasoningContent: String? = hasV011Schema ? row.optionalString(at: 11) : nil return HermesMessage( - id: Int(sqlite3_column_int(stmt, 0)), - sessionId: columnText(stmt, 1), - role: columnText(stmt, 2), - content: columnText(stmt, 3), - toolCallId: columnOptionalText(stmt, 4), + id: row.int(at: 0), + sessionId: row.string(at: 1), + role: row.string(at: 2), + content: row.string(at: 3), + toolCallId: row.optionalString(at: 4), toolCalls: toolCalls, - toolName: columnOptionalText(stmt, 6), - timestamp: columnDate(stmt, 7), - tokenCount: sqlite3_column_type(stmt, 8) != SQLITE_NULL ? Int(sqlite3_column_int(stmt, 8)) : nil, - finishReason: columnOptionalText(stmt, 9), - reasoning: hasV07Schema ? columnOptionalText(stmt, 10) : nil, + toolName: row.optionalString(at: 6), + timestamp: row.date(at: 7), + tokenCount: row.optionalInt(at: 8), + finishReason: row.optionalString(at: 9), + reasoning: hasV07Schema ? row.optionalString(at: 10) : nil, reasoningContent: reasoningContent ) } @@ -818,25 +746,6 @@ public actor HermesDataService { } } - private func columnText(_ stmt: OpaquePointer, _ col: Int32) -> String { - if let cStr = sqlite3_column_text(stmt, col) { - return String(cString: cStr) - } - return "" - } - - private func columnOptionalText(_ stmt: OpaquePointer, _ col: Int32) -> String? { - guard sqlite3_column_type(stmt, col) != SQLITE_NULL, - let cStr = sqlite3_column_text(stmt, col) else { return nil } - return String(cString: cStr) - } - - private func columnDate(_ stmt: OpaquePointer, _ col: Int32) -> Date? { - guard sqlite3_column_type(stmt, col) != SQLITE_NULL else { return nil } - let value = sqlite3_column_double(stmt, col) - return Date(timeIntervalSince1970: value) - } - /// Wraps each whitespace-delimited token in double quotes to prevent FTS5 parse errors /// on terms containing dots, hyphens, or FTS5 operators (e.g., "v0.7.0", "config.yaml"). private func sanitizeFTSQuery(_ raw: String) -> String { diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift index e0733e3..7177114 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift @@ -289,18 +289,35 @@ public struct LocalTransport: ServerTransport { #endif } - // MARK: - SQLite + // MARK: - Script streaming - public func snapshotSQLite(remotePath: String) throws -> URL { - // Local case: no copy needed. Services open the path directly. - URL(fileURLWithPath: remotePath) + /// Run `script` through `/bin/sh -c` locally. Local data path + /// doesn't actually call this in production (the data service + /// hands `LocalSQLiteBackend` the libsqlite3-direct path) — kept + /// for protocol parity and for tooling that wants a uniform + /// "run a script" entry on either context kind. + public func streamScript(_ script: String, timeout: TimeInterval) async throws -> ProcessResult { + #if os(iOS) + throw TransportError.other(message: "LocalTransport.streamScript is unavailable on iOS") + #else + let outcome = await SSHScriptRunner.run( + script: script, + context: ServerContext(id: contextID, displayName: "Local", kind: .local), + timeout: timeout + ) + switch outcome { + case .connectFailure(let reason): + throw TransportError.other(message: reason) + case .completed(let stdout, let stderr, let exitCode): + return ProcessResult( + exitCode: exitCode, + stdout: Data(stdout.utf8), + stderr: Data(stderr.utf8) + ) + } + #endif } - /// Local transport reads the live DB directly — there's no cached - /// snapshot to fall back to (and no failure mode where falling back - /// would help, since a missing local file is missing both ways). - public var cachedSnapshotPath: URL? { nil } - // MARK: - Watching #if canImport(Darwin) diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift index 254448f..23cabd3 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift @@ -620,146 +620,26 @@ public struct SSHTransport: ServerTransport { return env } - // MARK: - SQLite snapshot + // MARK: - Script streaming - public func snapshotSQLite(remotePath: String) throws -> URL { - try? FileManager.default.createDirectory(atPath: snapshotDir, withIntermediateDirectories: true) - let localPath = snapshotDir + "/state.db" - - // Probe remote size up front. Drives both the timeout budget - // (a multi-GB state.db over a slow link can take many minutes — - // the historical hardcoded 120s scp timeout was wildly - // insufficient for users with 5GB+ DBs, issue #74) and a local- - // disk-space pre-flight so we don't fill the user's Mac - // mid-transfer. Falls back to base timeouts if stat fails. - let remoteSize = stat(remotePath)?.size ?? 0 - - // Pre-flight: refuse to start if local Caches volume can't hold - // the snapshot plus a 500MB safety margin. Better to fail - // up-front with a clear "out of disk" message than to 90%-fill - // the volume and crash mid-transfer. - if remoteSize > 0, - let attrs = try? FileManager.default.attributesOfFileSystem(forPath: snapshotDir), - let free = (attrs[.systemFreeSize] as? NSNumber)?.int64Value, - free < remoteSize + 500_000_000 { - throw TransportError.fileIO( - path: localPath, - underlying: "Insufficient local disk space: state.db is \(Self.formatBytes(remoteSize)), only \(Self.formatBytes(free)) free in \(snapshotDir)." + /// Pipe `script` to `/bin/sh -s` over the ControlMaster-shared SSH + /// channel. Used by `RemoteSQLiteBackend` to invoke `sqlite3 -json` + /// per query without the per-arg quoting that `runProcess` would + /// apply. Delegates to `SSHScriptRunner` which already implements + /// the ssh-stdin-pipe pattern correctly. + public func streamScript(_ script: String, timeout: TimeInterval) async throws -> ProcessResult { + let context = ServerContext(id: contextID, displayName: displayName, kind: .ssh(config)) + let outcome = await SSHScriptRunner.run(script: script, context: context, timeout: timeout) + switch outcome { + case .connectFailure(let reason): + throw TransportError.other(message: reason) + case .completed(let stdout, let stderr, let exitCode): + return ProcessResult( + exitCode: exitCode, + stdout: Data(stdout.utf8), + stderr: Data(stderr.utf8) ) } - - // Adaptive timeouts. `.backup` is sequential page copy at ~100MB/s - // on a typical SSD, but the resulting file can be huge — give it - // 60s base + 1s per 100MB, capped at 10 minutes. SCP is the real - // bottleneck: 300s base + 0.5s per MB (≈2 MB/s minimum throughput, - // which covers users on slow international links), capped at 1 - // hour. A user with a state.db so big it doesn't fit in 1h needs - // a different approach than Scarf can offer (rsync delta, mounted - // FS, etc.). - let backupTimeout: TimeInterval = remoteSize > 0 - ? min(600, 60 + Double(remoteSize) / 100_000_000) - : 60 - let scpTimeout: TimeInterval = remoteSize > 0 - ? min(3600, 300 + Double(remoteSize) / 2_000_000) - : 300 - - // `.backup` is WAL-safe: sqlite takes a consistent snapshot without - // blocking writers. A plain `cp` of a WAL-mode DB could corrupt. - let remoteTmp = "/tmp/scarf-snapshot-\(UUID().uuidString).db" - // sqlite3's `.backup` is a dot-command, not a CLI arg. The whole - // dot-command must be one shell argument (double-quoted) so sqlite3 - // receives it as a single command; the backup path inside it is - // single-quoted so sqlite3 parses it correctly. The DB path is a - // separate shell argument and goes through `remotePathArg` - // (double-quoted, $HOME-aware) so `~/.hermes/state.db` actually - // resolves on the remote. - // - // The second sqlite3 invocation flips the snapshot out of WAL mode - // so the scp'd file is self-contained: `.backup` preserves the - // source's journal_mode in the destination header, so without this - // step the client would need the `-wal`/`-shm` sidecars too, and - // every read would fail with "unable to open database file". - // - // Final shell command on the remote: - // sqlite3 "$HOME/.hermes/state.db" ".backup '/tmp/scarf-snapshot-XYZ.db'" \ - // && sqlite3 '/tmp/scarf-snapshot-XYZ.db' "PRAGMA journal_mode=DELETE;" - let backupScript = #"sqlite3 \#(Self.remotePathArg(remotePath)) ".backup '\#(remoteTmp)'" && sqlite3 '\#(remoteTmp)' "PRAGMA journal_mode=DELETE;" > /dev/null"# - let backup = try runRemoteShell(backupScript, timeout: backupTimeout) - if backup.exitCode != 0 { - throw TransportError.classifySSHFailure(host: config.host, exitCode: backup.exitCode, stderr: backup.stderrString) - } - // scp the backup down. scp/sftp expands `~` natively (it goes - // through the SSH file-transfer protocol, not a remote shell), so - // remoteTmp's `/tmp/...` absolute path round-trips as-is. - // `-C` enables gzip compression in transit; SQLite DBs typically - // have lots of empty pages and zero padding so the wire savings - // are 30-50% in practice. - ensureControlDir() - var scpArgs: [String] = [ - "-C", - "-o", "ControlMaster=auto", - "-o", "ControlPath=\(controlDir)/%C", - "-o", "ControlPersist=600", - "-o", "StrictHostKeyChecking=accept-new", - "-o", "LogLevel=QUIET", - "-o", "BatchMode=yes" - ] - if let port = config.port { scpArgs += ["-P", String(port)] } - if let id = config.identityFile, !id.isEmpty { scpArgs += ["-i", id] } - scpArgs.append("\(hostSpec):\(remoteTmp)") - scpArgs.append(localPath) - - do { - let pull = try runLocal(executable: scpBinary, args: scpArgs, stdin: nil, timeout: scpTimeout) - // Best-effort cleanup of remote tmp regardless of outcome. - _ = try? runRemoteShell("rm -f \(Self.remotePathArg(remoteTmp))") - if pull.exitCode != 0 { - // Wipe the partial local file so a subsequent attempt - // doesn't try to open a corrupted SQLite database. - // Otherwise scp's truncate-on-write semantics leave a - // smaller-than-expected `.db` that sqlite_open succeeds - // on but every read returns garbage from. - try? FileManager.default.removeItem(atPath: localPath) - throw TransportError.classifySSHFailure(host: config.host, exitCode: pull.exitCode, stderr: pull.stderrString) - } - } catch let error as TransportError { - _ = try? runRemoteShell("rm -f \(Self.remotePathArg(remoteTmp))") - try? FileManager.default.removeItem(atPath: localPath) - // Rewrite "Command timed out after Ns" into something useful - // when it was the snapshot pull that hit the wall — generic - // timeout message gives the user no clue that the cause was - // a 5GB DB on a slow link. - if case .timeout(let secs, _) = error, remoteSize > 0 { - throw TransportError.other( - message: "Snapshot transfer timed out after \(Int(secs))s pulling \(Self.formatBytes(remoteSize)) state.db from \(config.host). Try again on a faster network connection, or reduce the size of the remote state.db." - ) - } - throw error - } catch { - _ = try? runRemoteShell("rm -f \(Self.remotePathArg(remoteTmp))") - try? FileManager.default.removeItem(atPath: localPath) - throw error - } - - return URL(fileURLWithPath: localPath) - } - - /// Human-readable byte count for snapshot-pipeline error messages. - /// Wraps `ByteCountFormatter` for callers that just want - /// `"4.87 GB"` — used in the size-aware timeout / disk-space - /// errors emitted by `snapshotSQLite`. - nonisolated private static func formatBytes(_ bytes: Int64) -> String { - let formatter = ByteCountFormatter() - formatter.countStyle = .file - return formatter.string(fromByteCount: bytes) - } - - /// Path where the most recent successful snapshot was written — - /// returned even when the remote is currently unreachable. The - /// data service falls back to this when `snapshotSQLite` throws so - /// Dashboard / Sessions / Chat-history stay viewable offline. - public var cachedSnapshotPath: URL? { - URL(fileURLWithPath: snapshotDir + "/state.db") } // MARK: - Watching diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/ServerTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/ServerTransport.swift index a2fd544..c229625 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/ServerTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/ServerTransport.swift @@ -96,27 +96,25 @@ public protocol ServerTransport: Sendable { args: [String] ) -> AsyncThrowingStream - // MARK: - SQLite - - /// Return a local filesystem URL pointing at a fresh, consistent copy of - /// the SQLite database at `remotePath`. For local transports this is - /// just the remote path unchanged. For SSH transports this performs - /// `sqlite3 .backup` on the remote side and scp's the backup into - /// `~/Library/Caches/scarf//state.db`, returning that URL. - nonisolated func snapshotSQLite(remotePath: String) throws -> URL - - /// Local filesystem URL where this transport caches its SQLite snapshot, - /// returned even when the remote is unreachable. Callers should - /// `FileManager.default.fileExists(atPath:)` before reading — the - /// transport can't atomically check existence and return the URL - /// in one step without TOCTOU. Local transports return `nil` - /// (their data is the live DB, not a cache). + /// Pipe a multi-line shell script through `/bin/sh -s` on the + /// target and return its captured output. The script travels as a + /// single opaque byte stream — no per-line shell interpolation, + /// no per-arg quoting — so `"$VAR"` references, here-docs, and + /// nested quotes survive untouched. /// - /// Used by `HermesDataService.open()` to fall back to the last - /// successful snapshot when a fresh `snapshotSQLite` call fails, - /// so the app keeps showing data with a "Last updated X ago" - /// affordance instead of a blank screen. - nonisolated var cachedSnapshotPath: URL? { get } + /// Replaces the old `snapshotSQLite` + scp pipeline. Used by + /// `RemoteSQLiteBackend` to invoke `sqlite3 -readonly -json` over + /// SSH per query (or per batch). Local transport runs the script + /// in-process via `/bin/sh -c`. SSH transport delegates to + /// `SSHScriptRunner` (ControlMaster-shared channel). Citadel + /// transport (iOS) base64-encodes the script + decodes remotely + /// to skirt Citadel's missing-stdin support. + /// + /// Throws on transport failures (host unreachable, ssh exit 255, + /// timeout). Returns `ProcessResult` with the script's exit code + /// + stdout + stderr on completion — non-zero exit is NOT a + /// throw; callers inspect `exitCode` and decide. + nonisolated func streamScript(_ script: String, timeout: TimeInterval) async throws -> ProcessResult // MARK: - Watching diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/InsightsViewModel.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/InsightsViewModel.swift index 6555616..43085ce 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/InsightsViewModel.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/InsightsViewModel.swift @@ -117,12 +117,19 @@ public final class InsightsViewModel { } let since = period.sinceDate + // The four insights queries (user-message count, tool usage, + // hourly + daily activity histograms) batch through one + // `insightsSnapshot` round-trip. Sessions and session-previews + // stay separate — they're large result sets and stay on their + // own calls. For remote contexts this turns ~5 SSH round-trips + // into 3. sessions = await dataService.fetchSessionsInPeriod(since: since) sessionPreviews = await dataService.fetchSessionPreviews(limit: 500) - userMessageCount = await dataService.fetchUserMessageCount(since: since) - let tools = await dataService.fetchToolUsage(since: since) - hourlyActivity = await dataService.fetchSessionStartHours(since: since) - dailyActivity = await dataService.fetchSessionDaysOfWeek(since: since) + let snapshot = await dataService.insightsSnapshot(since: since) + userMessageCount = snapshot.userMessageCount + let tools = snapshot.toolUsage + hourlyActivity = snapshot.startHours + dailyActivity = snapshot.daysOfWeek await dataService.close() diff --git a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/Helpers/MockHermesQueryBackend.swift b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/Helpers/MockHermesQueryBackend.swift new file mode 100644 index 0000000..6bc3337 --- /dev/null +++ b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/Helpers/MockHermesQueryBackend.swift @@ -0,0 +1,150 @@ +#if canImport(SQLite3) + +import Foundation +@testable import ScarfCore + +/// Test double for `HermesQueryBackend`. Lets the data-service-façade +/// tests assert which SQL gets emitted, with which params, and feed +/// scripted result rows back. +/// +/// Implemented as an `actor` to satisfy the protocol's `Sendable` +/// requirement and to mirror how the real backends serialize state. +/// Marked `final` to prevent accidental subclassing — Swift Testing +/// instances are short-lived per-`@Test`, but a stray subclass could +/// hide override quirks. +final actor MockHermesQueryBackend: HermesQueryBackend { + + // MARK: - Knobs + + var openShouldSucceed: Bool = true + var hasV07Schema: Bool = false + var hasV011Schema: Bool = false + var lastOpenError: String? = nil + + /// Map of SQL prefix → rows. Lookup picks the longest matching + /// prefix, so callers can register both broad ("SELECT") and + /// narrow ("SELECT id, source FROM sessions") matchers without + /// the broad one swallowing the narrow one. + private var scriptedResults: [String: [Row]] = [:] + + /// Map of SQL prefix → backend error to throw instead of returning + /// rows. Used to test the data-service's error-swallowing paths. + private var scriptedFailures: [String: BackendError] = [:] + + /// Every `query(_:params:)` call lands here in order — assertion + /// material for "did the façade emit the SQL we expected". + private(set) var queryLog: [(sql: String, params: [SQLValue])] = [] + + /// Every `queryBatch` call lands here in order, one outer entry + /// per call, inner entries for each statement in that batch. + private(set) var batchLog: [[(sql: String, params: [SQLValue])]] = [] + + /// Track open/refresh/close lifecycle for a couple of tests that + /// want to assert "façade really did call open()". + private(set) var openCallCount = 0 + private(set) var refreshCallCount = 0 + private(set) var closeCallCount = 0 + + // MARK: - Knob mutators (called from tests) + + func setOpenShouldSucceed(_ value: Bool) { openShouldSucceed = value } + func setHasV07Schema(_ value: Bool) { hasV07Schema = value } + func setHasV011Schema(_ value: Bool) { hasV011Schema = value } + func setLastOpenError(_ value: String?) { lastOpenError = value } + + /// Build a one-row result keyed on `prefix`. `columns` is the + /// column-name → position map; `values` must be the same length. + func _seedRow(forSQLPrefix prefix: String, columns: [String: Int], values: [SQLValue]) { + let row = Row(values: values, columnIndex: columns) + scriptedResults[prefix] = [row] + } + + /// Seed an arbitrary row sequence for queries that share `prefix`. + func _seedRows(forSQLPrefix prefix: String, _ rows: [Row]) { + scriptedResults[prefix] = rows + } + + /// Make `query` throw the specified `error` whenever it sees a SQL + /// that begins with `prefix`. + func _seedFailure(forSQLPrefix prefix: String, error: BackendError) { + scriptedFailures[prefix] = error + } + + // MARK: - HermesQueryBackend conformance + + func open() async -> Bool { + openCallCount += 1 + return openShouldSucceed + } + + @discardableResult + func refresh(forceFresh: Bool) async -> Bool { + refreshCallCount += 1 + return openShouldSucceed + } + + func close() async { + closeCallCount += 1 + } + + func query(_ sql: String, params: [SQLValue]) async throws -> [Row] { + queryLog.append((sql: sql, params: params)) + if let failure = longestMatchingFailure(for: sql) { + throw failure + } + return longestMatchingRows(for: sql) ?? [] + } + + func queryBatch(_ statements: [(sql: String, params: [SQLValue])]) async throws -> [[Row]] { + batchLog.append(statements) + var out: [[Row]] = [] + out.reserveCapacity(statements.count) + for stmt in statements { + if let failure = longestMatchingFailure(for: stmt.sql) { + throw failure + } + out.append(longestMatchingRows(for: stmt.sql) ?? []) + } + return out + } + + // MARK: - Internals + + /// Pick the longest registered prefix that `sql` starts with. + /// Ties go to whichever ordering Dictionary iteration produced — + /// callers should not register two equal-length matchers for the + /// same SQL because the resolution order is undefined. + private func longestMatchingRows(for sql: String) -> [Row]? { + var bestMatch: (key: String, rows: [Row])? + for (prefix, rows) in scriptedResults { + if sql.hasPrefix(prefix) { + if let current = bestMatch { + if prefix.count > current.key.count { + bestMatch = (prefix, rows) + } + } else { + bestMatch = (prefix, rows) + } + } + } + return bestMatch?.rows + } + + private func longestMatchingFailure(for sql: String) -> BackendError? { + var bestMatch: (key: String, error: BackendError)? + for (prefix, error) in scriptedFailures { + if sql.hasPrefix(prefix) { + if let current = bestMatch { + if prefix.count > current.key.count { + bestMatch = (prefix, error) + } + } else { + bestMatch = (prefix, error) + } + } + } + return bestMatch?.error + } +} + +#endif // canImport(SQLite3) diff --git a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/HermesDataServiceBackendTests.swift b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/HermesDataServiceBackendTests.swift new file mode 100644 index 0000000..ec20f93 --- /dev/null +++ b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/HermesDataServiceBackendTests.swift @@ -0,0 +1,338 @@ +#if canImport(SQLite3) + +import Testing +import Foundation +@testable import ScarfCore + +/// Exercises the `HermesDataService` façade against a `MockHermesQueryBackend` +/// via the `internal init(context:backend:)` test seam. Focus is the SQL +/// the façade emits + how it consumes the rows that come back. +@Suite struct HermesDataServiceBackendTests { + + // MARK: - Helpers + + /// Build a `Row` from `(name, value)` pairs in column order. + /// Mirrors the shape `LocalSQLiteBackend.executeOne` produces. + private func makeRow(_ pairs: [(String, SQLValue)]) -> Row { + var values: [SQLValue] = [] + var columnIndex: [String: Int] = [:] + values.reserveCapacity(pairs.count) + for (i, pair) in pairs.enumerated() { + values.append(pair.1) + columnIndex[pair.0] = i + } + return Row(values: values, columnIndex: columnIndex) + } + + /// Default 16-column session row matching `sessionColumns` for + /// the bare base schema. Uses `.text("s1")` for id by default. + private func makeBaseSessionRow(id: String = "s1") -> Row { + makeRow([ + ("id", .text(id)), + ("source", .text("acp")), + ("user_id", .null), + ("model", .text("gpt-5")), + ("title", .text("hello")), + ("parent_session_id", .null), + ("started_at", .real(1_700_000_000.0)), + ("ended_at", .null), + ("end_reason", .null), + ("message_count", .integer(5)), + ("tool_call_count", .integer(2)), + ("input_tokens", .integer(100)), + ("output_tokens", .integer(200)), + ("cache_read_tokens", .integer(0)), + ("cache_write_tokens", .integer(0)), + ("estimated_cost_usd", .real(0.05)) + ]) + } + + /// 10-column message row matching `messageColumns` for the bare base schema. + private func makeBaseMessageRow(id: Int, sessionId: String = "s1", timestamp: Double = 1_700_000_001.0) -> Row { + makeRow([ + ("id", .integer(Int64(id))), + ("session_id", .text(sessionId)), + ("role", .text("user")), + ("content", .text("hi #\(id)")), + ("tool_call_id", .null), + ("tool_calls", .null), + ("tool_name", .null), + ("timestamp", .real(timestamp)), + ("token_count", .integer(10)), + ("finish_reason", .null) + ]) + } + + /// Use a real `ServerContext.local` so the data service has a + /// transport to construct (it's never used by these tests — every + /// I/O path goes through the injected backend). + private let context: ServerContext = .local + + // MARK: - fetchSessions + + @Test func fetchSessionsEmitsExpectedSQLPrefixAndDefaultLimit() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + _ = await service.fetchSessions() + + let log = await mock.queryLog + #expect(log.count == 1) + let first = log[0] + #expect(first.sql.hasPrefix("SELECT id, source")) + #expect(first.sql.contains("FROM sessions WHERE parent_session_id IS NULL ORDER BY started_at DESC LIMIT ?")) + // QueryDefaults.sessionLimit == 100. + #expect(first.params == [.integer(100)]) + } + + @Test func fetchSessionsBareSchemaUsesBaseColumnList() async { + let mock = MockHermesQueryBackend() + // Both schema flags off — neither v0.7 nor v0.11 columns selected. + await mock.setHasV07Schema(false) + await mock.setHasV011Schema(false) + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + _ = await service.fetchSessions() + + let sql = await mock.queryLog[0].sql + #expect(!sql.contains("reasoning_tokens")) + #expect(!sql.contains("api_call_count")) + // Sanity: base columns are still all there. + #expect(sql.contains("estimated_cost_usd")) + } + + @Test func fetchSessionsWithV07SchemaIncludesReasoningTokens() async { + let mock = MockHermesQueryBackend() + await mock.setHasV07Schema(true) + await mock.setHasV011Schema(false) + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + _ = await service.fetchSessions() + + let sql = await mock.queryLog[0].sql + #expect(sql.contains("reasoning_tokens")) + #expect(sql.contains("actual_cost_usd")) + #expect(sql.contains("cost_status")) + #expect(sql.contains("billing_provider")) + #expect(!sql.contains("api_call_count")) + } + + @Test func fetchSessionsWithV011SchemaIncludesApiCallCount() async { + let mock = MockHermesQueryBackend() + await mock.setHasV07Schema(true) + await mock.setHasV011Schema(true) + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + _ = await service.fetchSessions() + + let sql = await mock.queryLog[0].sql + #expect(sql.contains("reasoning_tokens")) + #expect(sql.contains("api_call_count")) + } + + // MARK: - fetchSession(id:) + + @Test func fetchSessionByIdBindsTextParam() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + await mock._seedRow( + forSQLPrefix: "SELECT id, source", + columns: makeBaseSessionRow().columnIndex, + values: makeBaseSessionRow().values + ) + + let session = await service.fetchSession(id: "abc-123") + #expect(session?.id == "s1") // From the seeded row. + + let log = await mock.queryLog + #expect(log.count == 1) + #expect(log[0].sql.contains("FROM sessions WHERE id = ? LIMIT 1")) + #expect(log[0].params == [.text("abc-123")]) + } + + // MARK: - fetchMessages + + @Test func fetchMessagesWithoutBeforeBindsSessionAndLimit() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + _ = await service.fetchMessages(sessionId: "s1", limit: 25, before: nil) + + let log = await mock.queryLog + #expect(log.count == 1) + #expect(!log[0].sql.contains("id < ?")) + #expect(log[0].sql.contains("WHERE session_id = ? ORDER BY id DESC LIMIT ?")) + #expect(log[0].params == [.text("s1"), .integer(25)]) + } + + @Test func fetchMessagesWithBeforeIncludesIdLessThanClause() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + _ = await service.fetchMessages(sessionId: "s1", limit: 25, before: 999) + + let log = await mock.queryLog + #expect(log.count == 1) + #expect(log[0].sql.contains("WHERE session_id = ? AND id < ? ORDER BY id DESC LIMIT ?")) + #expect(log[0].params == [.text("s1"), .integer(999), .integer(25)]) + } + + @Test func fetchMessagesReversesDescResultsToChronological() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + // Backend returns DESC (newest first); service should reverse to + // chronological (oldest first) for display. + let row3 = makeBaseMessageRow(id: 3, timestamp: 1_700_000_003.0) + let row2 = makeBaseMessageRow(id: 2, timestamp: 1_700_000_002.0) + let row1 = makeBaseMessageRow(id: 1, timestamp: 1_700_000_001.0) + await mock._seedRows(forSQLPrefix: "SELECT id, session_id", [row3, row2, row1]) + + let result = await service.fetchMessages(sessionId: "s1", limit: 10, before: nil) + #expect(result.count == 3) + #expect(result.map { $0.id } == [1, 2, 3]) + } + + // MARK: - dashboardSnapshot + + @Test func dashboardSnapshotUsesQueryBatchNotIndividualQueries() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + _ = await service.dashboardSnapshot() + + let queries = await mock.queryLog + let batches = await mock.batchLog + #expect(queries.isEmpty) + #expect(batches.count == 1) + #expect(batches[0].count == 4) + } + + @Test func dashboardSnapshotBatchOrderIsStatsRecentSessionsPreviewsToolCalls() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + _ = await service.dashboardSnapshot() + + let batches = await mock.batchLog + #expect(batches.count == 1) + let stmts = batches[0] + // 0: stats — selects COUNT(*), SUM(...) from sessions. + #expect(stmts[0].sql.contains("COUNT(*)")) + #expect(stmts[0].sql.contains("FROM sessions")) + // 1: recent sessions — selects session columns with a LIMIT param. + #expect(stmts[1].sql.hasPrefix("SELECT id, source")) + #expect(stmts[1].sql.contains("ORDER BY started_at DESC LIMIT ?")) + // 2: session previews — joins messages with first user message. + #expect(stmts[2].sql.contains("INNER JOIN")) + #expect(stmts[2].sql.contains("MIN(id)")) + // 3: recent tool calls — selects messages WHERE tool_calls IS NOT NULL. + #expect(stmts[3].sql.contains("WHERE tool_calls IS NOT NULL")) + } + + @Test func dashboardSnapshotAssemblesDataFromFourResultSets() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + // Stats row (6 cols on bare schema). + let statsRow = makeRow([ + ("c0", .integer(7)), // totalSessions + ("c1", .integer(50)), // totalMessages + ("c2", .integer(12)), // totalToolCalls + ("c3", .integer(1000)), // totalInputTokens + ("c4", .integer(2000)), // totalOutputTokens + ("c5", .real(1.25)) // totalCostUSD + ]) + await mock._seedRow(forSQLPrefix: "SELECT COUNT(*),", columns: statsRow.columnIndex, values: statsRow.values) + + // Recent sessions: one base session row. + await mock._seedRows(forSQLPrefix: "SELECT id, source", [makeBaseSessionRow(id: "sess-A")]) + + // Previews: two-column rows (session_id, content slice). + let p1 = makeRow([("session_id", .text("sess-A")), ("preview", .text("first user msg"))]) + await mock._seedRows(forSQLPrefix: "SELECT m.session_id", [p1]) + + // Recent tool calls: one message row with non-empty tool_calls. + var toolRow = makeBaseMessageRow(id: 99, sessionId: "sess-A") + // Manually rewrite tool_calls column (idx 5) to non-null/non-empty. + let toolRowValues: [SQLValue] = [ + .integer(99), .text("sess-A"), .text("assistant"), .text("Calling tool"), + .null, .text("[{\"id\":\"t1\",\"name\":\"bash\"}]"), .text("bash"), + .real(1_700_000_010.0), .integer(15), .text("stop") + ] + toolRow = Row(values: toolRowValues, columnIndex: toolRow.columnIndex) + // Both `fetchRecentToolCalls` and the dashboard batch slot start + // with the same `messageColumns` prefix; match on a shorter + // common substring that's whitespace-stable across the two + // SQL builders. + await mock._seedRows(forSQLPrefix: "SELECT id, session_id, role, content, tool_call_id, tool_calls,\ntool_name", [toolRow]) + + let snapshot = await service.dashboardSnapshot() + #expect(snapshot.stats.totalSessions == 7) + #expect(snapshot.stats.totalMessages == 50) + #expect(snapshot.recentSessions.map { $0.id } == ["sess-A"]) + #expect(snapshot.sessionPreviews["sess-A"] == "first user msg") + #expect(snapshot.recentToolCalls.count == 1) + #expect(snapshot.recentToolCalls[0].id == 99) + } + + // MARK: - searchMessages + + @Test func searchMessagesEmptyInputReturnsEmptyAndSkipsBackend() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + let result = await service.searchMessages(query: " ") + #expect(result.isEmpty) + + let log = await mock.queryLog + #expect(log.isEmpty) + } + + @Test func searchMessagesWrapsTokensInDoubleQuotes() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + + _ = await service.searchMessages(query: "config.yaml v0.7.0") + + let log = await mock.queryLog + #expect(log.count == 1) + // FTS query is the first param. + guard case .text(let fts) = log[0].params[0] else { + Issue.record("Expected first FTS search param to be .text") + return + } + // Each whitespace-delimited token gets wrapped in double-quotes + // and joined with spaces. + #expect(fts == "\"config.yaml\" \"v0.7.0\"") + } + + // MARK: - Error swallowing + + @Test func fetchSessionsReturnsEmptyOnBackendTransportError() async { + let mock = MockHermesQueryBackend() + let service = HermesDataService(context: context, backend: mock) + _ = await service.open() + await mock._seedFailure(forSQLPrefix: "SELECT id, source", error: .transport("ssh dropped")) + + let result = await service.fetchSessions() + #expect(result.isEmpty) + + // Sanity: the error reached the backend (the call was made). + let log = await mock.queryLog + #expect(log.count == 1) + } +} + +#endif // canImport(SQLite3) diff --git a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/M0bTransportTests.swift b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/M0bTransportTests.swift index eca2ce5..d2f7e00 100644 --- a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/M0bTransportTests.swift +++ b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/M0bTransportTests.swift @@ -219,12 +219,6 @@ import Foundation try transport.removeFile(tmp.path) } - @Test func localTransportSnapshotSQLiteReturnsPathUnchanged() throws { - let transport = LocalTransport() - let url = try transport.snapshotSQLite(remotePath: "/tmp/some/state.db") - #expect(url.path == "/tmp/some/state.db") - } - /// The Mac target wires `SSHTransport.environmentEnricher` at launch to /// `HermesFileService.enrichedEnvironment()` so SSH subprocesses /// inherit SSH_AUTH_SOCK from the user's login shell (1Password / diff --git a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/M5FeatureVMTests.swift b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/M5FeatureVMTests.swift index dbe3330..03e480a 100644 --- a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/M5FeatureVMTests.swift +++ b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/M5FeatureVMTests.swift @@ -455,8 +455,9 @@ import Foundation } } } - func snapshotSQLite(remotePath: String) throws -> URL { URL(fileURLWithPath: remotePath) } - var cachedSnapshotPath: URL? { nil } + func streamScript(_ script: String, timeout: TimeInterval) async throws -> ProcessResult { + ProcessResult(exitCode: 0, stdout: Data(), stderr: Data()) + } func watchPaths(_ paths: [String]) -> AsyncStream { AsyncStream { $0.finish() } } diff --git a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/RemoteSQLiteBackendTests.swift b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/RemoteSQLiteBackendTests.swift new file mode 100644 index 0000000..cf434fe --- /dev/null +++ b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/RemoteSQLiteBackendTests.swift @@ -0,0 +1,496 @@ +#if canImport(SQLite3) + +import Testing +import Foundation +import SQLite3 +@testable import ScarfCore + +// MARK: - LocalSQLite3Transport + +/// Test-only transport that runs the script through `/bin/sh -c` on the +/// local machine. Lets `RemoteSQLiteBackend`'s production codepath +/// (which calls `transport.streamScript`) drive a real local sqlite3 +/// invocation against a tmp fixture DB. No SSH, no Citadel — the +/// backend doesn't care how `streamScript` gets its bytes. +private struct LocalSQLite3Transport: ServerTransport { + let contextID: ServerID + let isRemote: Bool = false + + init(contextID: ServerID = ServerContext.local.id) { + self.contextID = contextID + } + + func readFile(_ path: String) throws -> Data { + try Data(contentsOf: URL(fileURLWithPath: path)) + } + func writeFile(_ path: String, data: Data) throws { + try data.write(to: URL(fileURLWithPath: path), options: .atomic) + } + func fileExists(_ path: String) -> Bool { + FileManager.default.fileExists(atPath: path) + } + func stat(_ path: String) -> FileStat? { + guard let attrs = try? FileManager.default.attributesOfItem(atPath: path) else { return nil } + let size = (attrs[.size] as? Int64) ?? Int64((attrs[.size] as? Int) ?? 0) + let mtime = (attrs[.modificationDate] as? Date) ?? Date(timeIntervalSince1970: 0) + let isDir = (attrs[.type] as? FileAttributeType) == .typeDirectory + return FileStat(size: size, mtime: mtime, isDirectory: isDir) + } + func listDirectory(_ path: String) throws -> [String] { + try FileManager.default.contentsOfDirectory(atPath: path) + } + func createDirectory(_ path: String) throws { + try FileManager.default.createDirectory(atPath: path, withIntermediateDirectories: true) + } + func removeFile(_ path: String) throws { + guard FileManager.default.fileExists(atPath: path) else { return } + try FileManager.default.removeItem(atPath: path) + } + + func runProcess(executable: String, args: [String], stdin: Data?, timeout: TimeInterval?) throws -> ProcessResult { + throw TransportError.other(message: "LocalSQLite3Transport.runProcess unused in tests") + } + + #if !os(iOS) + func makeProcess(executable: String, args: [String]) -> Process { + let p = Process() + p.executableURL = URL(fileURLWithPath: executable) + p.arguments = args + return p + } + #endif + + func streamLines(executable: String, args: [String]) -> AsyncThrowingStream { + AsyncThrowingStream { $0.finish() } + } + + /// The actual workhorse: feed the script to `/bin/sh -c` so heredocs + /// and command substitution behave exactly as they would on the + /// remote end of an SSH session. Capture stdout / stderr / exit + /// code into a `ProcessResult`. + func streamScript(_ script: String, timeout: TimeInterval) async throws -> ProcessResult { + return try await withCheckedThrowingContinuation { continuation in + DispatchQueue.global().async { + let proc = Process() + proc.executableURL = URL(fileURLWithPath: "/bin/sh") + proc.arguments = ["-c", script] + let outPipe = Pipe() + let errPipe = Pipe() + proc.standardOutput = outPipe + proc.standardError = errPipe + do { + try proc.run() + } catch { + continuation.resume(throwing: TransportError.other( + message: "Failed to launch /bin/sh: \(error.localizedDescription)" + )) + return + } + try? outPipe.fileHandleForWriting.close() + try? errPipe.fileHandleForWriting.close() + proc.waitUntilExit() + let stdout = (try? outPipe.fileHandleForReading.readToEnd()) ?? Data() + let stderr = (try? errPipe.fileHandleForReading.readToEnd()) ?? Data() + try? outPipe.fileHandleForReading.close() + try? errPipe.fileHandleForReading.close() + continuation.resume(returning: ProcessResult( + exitCode: proc.terminationStatus, + stdout: stdout, + stderr: stderr + )) + } + } + } + + func watchPaths(_ paths: [String]) -> AsyncStream { + AsyncStream { $0.finish() } + } +} + +// MARK: - Suite + +/// Integration tests for `RemoteSQLiteBackend`. Drives the real backend +/// against a local sqlite3 binary (via `LocalSQLite3Transport`) and a +/// per-test fixture state.db on disk. +@Suite struct RemoteSQLiteBackendTests { + + // MARK: - Fixture builders + + /// Build a minimal v0.6 baseline state.db (no v0.7, no v0.11 columns). + /// Each test takes ownership of cleanup via `defer`. + private func makeFixtureStateDB( + addV07Columns: Bool = false, + addV011SessionsColumn: Bool = false, + addV011MessagesColumn: Bool = false + ) throws -> URL { + // Each test gets its own isolated parent dir. We can't dump the + // fixture directly into `temporaryDirectory` because the symlink + // we create alongside (`/state.db`) would clobber a + // sibling test's symlink when the suite runs in parallel. + let testDir = FileManager.default.temporaryDirectory + .appendingPathComponent("scarf-test-\(UUID().uuidString)", isDirectory: true) + try FileManager.default.createDirectory(at: testDir, withIntermediateDirectories: true) + let url = testDir.appendingPathComponent("fixture.db") + var db: OpaquePointer? + guard sqlite3_open_v2(url.path, &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, nil) == SQLITE_OK else { + throw TransportError.other(message: "sqlite3_open_v2 failed") + } + defer { sqlite3_close(db) } + + var sessionsExtra = "" + if addV07Columns { + sessionsExtra += ", reasoning_tokens INTEGER, actual_cost_usd REAL, cost_status TEXT, billing_provider TEXT" + } + if addV011SessionsColumn { + sessionsExtra += ", api_call_count INTEGER" + } + var messagesExtra = "" + if addV011MessagesColumn { + messagesExtra += ", reasoning_content TEXT" + } + + let schema = """ + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT, + user_id TEXT, + model TEXT, + title TEXT, + parent_session_id TEXT, + started_at REAL, + ended_at REAL, + end_reason TEXT, + message_count INTEGER, + tool_call_count INTEGER, + input_tokens INTEGER, + output_tokens INTEGER, + cache_read_tokens INTEGER, + cache_write_tokens INTEGER, + estimated_cost_usd REAL\(sessionsExtra) + ); + INSERT INTO sessions (id, source, user_id, model, title, parent_session_id, started_at, ended_at, end_reason, message_count, tool_call_count, input_tokens, output_tokens, cache_read_tokens, cache_write_tokens, estimated_cost_usd) + VALUES ('s1', 'acp', 'u1', 'gpt-5', 'Test', NULL, 1700000000.0, NULL, NULL, 5, 2, 100, 200, 0, 0, 0.05); + CREATE TABLE messages ( + id INTEGER PRIMARY KEY, + session_id TEXT, + role TEXT, + content TEXT, + tool_call_id TEXT, + tool_calls TEXT, + tool_name TEXT, + timestamp REAL, + token_count INTEGER, + finish_reason TEXT\(messagesExtra) + ); + INSERT INTO messages (id, session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason) + VALUES (1, 's1', 'user', 'hi', NULL, NULL, NULL, 1700000001.0, NULL, NULL); + """ + var errMsg: UnsafeMutablePointer? + let rc = sqlite3_exec(db, schema, nil, nil, &errMsg) + if rc != SQLITE_OK { + let msg = errMsg.flatMap { String(cString: $0) } ?? "unknown" + sqlite3_free(errMsg) + throw TransportError.other(message: "sqlite3_exec failed: \(msg)") + } + return url + } + + /// Construct a remote-shaped context whose `paths.stateDB` points at + /// the fixture file. We embed the absolute path under a fake + /// `remoteHome` whose final `/.hermes/state.db` resolves to our + /// real DB on disk. + private func makeFixtureContext(dbURL: URL) -> ServerContext { + // The DB the backend opens is `/state.db`. We point + // `remoteHome` at the parent dir of the fixture file and then + // symlink `state.db` to the fixture so the backend's resolved + // path lands on it. + let parent = dbURL.deletingLastPathComponent() + let stateLink = parent.appendingPathComponent("state.db") + // Replace any prior symlink/file at the canonical "state.db" path. + try? FileManager.default.removeItem(at: stateLink) + try? FileManager.default.createSymbolicLink(at: stateLink, withDestinationURL: dbURL) + return ServerContext( + id: UUID(), + displayName: "fixture", + kind: .ssh(SSHConfig(host: "fake.invalid", remoteHome: parent.path)) + ) + } + + /// Skip the test if /usr/bin/sqlite3 isn't available. Mirrors how + /// other Apple-only tests gate on system tooling. + private func requireSqlite3() throws { + let path = "/usr/bin/sqlite3" + let exists = FileManager.default.isExecutableFile(atPath: path) + try #require(exists, "Test requires /usr/bin/sqlite3") + } + + // MARK: - open() / schema detection + + @Test func openProbesSchemaSuccessfully() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB() + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + + let opened = await backend.open() + #expect(opened) + let v07 = await backend.hasV07Schema + let v011 = await backend.hasV011Schema + #expect(v07 == false) + #expect(v011 == false) + let err = await backend.lastOpenError + #expect(err == nil) + } + + @Test func openOnV07SchemaDB() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB(addV07Columns: true) + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + + let opened = await backend.open() + #expect(opened) + let v07 = await backend.hasV07Schema + let v011 = await backend.hasV011Schema + #expect(v07 == true) + #expect(v011 == false) + } + + @Test func openOnV011SchemaDB() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB( + addV07Columns: true, + addV011SessionsColumn: true, + addV011MessagesColumn: true + ) + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + + let opened = await backend.open() + #expect(opened) + let v011 = await backend.hasV011Schema + #expect(v011 == true) + } + + @Test func partialMigrationStaysOnV07() async throws { + try requireSqlite3() + // sessions has api_call_count but messages lacks reasoning_content + // — the belt-and-braces guard should keep hasV011Schema false. + let dbURL = try makeFixtureStateDB( + addV07Columns: true, + addV011SessionsColumn: true, + addV011MessagesColumn: false + ) + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + + let opened = await backend.open() + #expect(opened) + let v011 = await backend.hasV011Schema + #expect(v011 == false) + let v07 = await backend.hasV07Schema + #expect(v07 == true) + } + + // MARK: - query() + + @Test func queryReturnsRows() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB() + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + _ = await backend.open() + + let rows = try await backend.query("SELECT id FROM sessions", params: []) + #expect(rows.count == 1) + if case .text(let id) = rows[0][0] { + #expect(id == "s1") + } else { + Issue.record("Expected .text id, got \(rows[0][0])") + } + } + + @Test func queryWithIntParam() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB() + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + _ = await backend.open() + + let rows = try await backend.query( + "SELECT id FROM sessions WHERE message_count >= ?", + params: [.integer(5)] + ) + #expect(rows.count == 1) + } + + @Test func queryWithTextParamEscapesQuotes() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB() + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + _ = await backend.open() + + // Injection-shaped value — should be escaped to a harmless literal, + // matching nothing in the fixture. + let rows = try await backend.query( + "SELECT id FROM sessions WHERE id = ?", + params: [.text("s' OR 1=1 --")] + ) + #expect(rows.isEmpty) + } + + @Test func queryEmptyResultSet() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB() + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + _ = await backend.open() + + let rows = try await backend.query( + "SELECT id FROM sessions WHERE id = ?", + params: [.text("does-not-exist")] + ) + #expect(rows.isEmpty) + } + + @Test func queryNullValuesPreserved() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB() + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + _ = await backend.open() + + let rows = try await backend.query( + "SELECT id, ended_at, end_reason FROM sessions WHERE id = ?", + params: [.text("s1")] + ) + #expect(rows.count == 1) + // ended_at and end_reason are NULL in the fixture row. + #expect(rows[0].isNull(at: 1)) + #expect(rows[0].isNull(at: 2)) + } + + // MARK: - queryBatch() + + @Test func queryBatchSplitsResultsCorrectly() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB() + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + _ = await backend.open() + + let results = try await backend.queryBatch([ + (sql: "SELECT id FROM sessions", params: []), + (sql: "SELECT id FROM messages WHERE session_id = ?", params: [.text("s1")]), + (sql: "SELECT COUNT(*) FROM sessions", params: []) + ]) + #expect(results.count == 3) + // Slot 0: one session row. + #expect(results[0].count == 1) + if case .text(let sid) = results[0][0][0] { + #expect(sid == "s1") + } else { + Issue.record("Expected .text in slot 0") + } + // Slot 1: one message row. + #expect(results[1].count == 1) + // Slot 2: one count row with integer 1. + #expect(results[2].count == 1) + if case .integer(let n) = results[2][0][0] { + #expect(n == 1) + } else { + Issue.record("Expected .integer in slot 2") + } + } + + @Test func queryBatchHandlesEmptyResultSets() async throws { + try requireSqlite3() + let dbURL = try makeFixtureStateDB() + defer { + try? FileManager.default.removeItem(at: dbURL) + try? FileManager.default.removeItem(at: dbURL.deletingLastPathComponent().appendingPathComponent("state.db")) + } + let ctx = makeFixtureContext(dbURL: dbURL) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + _ = await backend.open() + + // Middle statement returns 0 rows; outer slots should still be + // populated correctly. + let results = try await backend.queryBatch([ + (sql: "SELECT id FROM sessions", params: []), + (sql: "SELECT id FROM messages WHERE session_id = ?", params: [.text("does-not-exist")]), + (sql: "SELECT COUNT(*) FROM messages", params: []) + ]) + #expect(results.count == 3) + #expect(results[0].count == 1) + #expect(results[1].isEmpty) + #expect(results[2].count == 1) + } + + // MARK: - Failure paths + + @Test func nonZeroExitThrowsSqliteError() async throws { + try requireSqlite3() + // Point at a parent dir with no state.db symlink — sqlite3 will + // open a brand-new empty DB, so the schema PRAGMAs return empty + // tables. That actually succeeds. Instead, point remoteHome at + // a path under a non-existent directory so sqlite3 can't open + // the file at all. + let nonExistentParent = "/var/empty/scarf-test-no-such-dir-\(UUID().uuidString)" + let ctx = ServerContext( + id: UUID(), + displayName: "broken", + kind: .ssh(SSHConfig(host: "fake.invalid", remoteHome: nonExistentParent)) + ) + let backend = RemoteSQLiteBackend(context: ctx, transport: LocalSQLite3Transport()) + + let opened = await backend.open() + #expect(opened == false) + let err = await backend.lastOpenError + #expect(err != nil) + #expect(!(err ?? "").isEmpty) + } +} + +#endif // canImport(SQLite3) diff --git a/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/SQLValueInlinerTests.swift b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/SQLValueInlinerTests.swift new file mode 100644 index 0000000..53a24f8 --- /dev/null +++ b/scarf/Packages/ScarfCore/Tests/ScarfCoreTests/SQLValueInlinerTests.swift @@ -0,0 +1,147 @@ +import Testing +import Foundation +@testable import ScarfCore + +/// Pure unit tests on `SQLValueInliner.inline(_:params:)` and +/// `SQLValueInliner.encode(_:)`. No backend, no transport, no actor — +/// these are the lexical-substitution rules that drive the remote +/// SQLite backend's `?` → literal pipeline. +@Suite struct SQLValueInlinerTests { + + // MARK: - encode(_:) per SQLValue case + + @Test func encodeNullProducesNULL() { + #expect(SQLValueInliner.encode(.null) == "NULL") + } + + @Test func encodeIntegerProducesUnquotedDigits() { + #expect(SQLValueInliner.encode(.integer(42)) == "42") + #expect(SQLValueInliner.encode(.integer(-7)) == "-7") + #expect(SQLValueInliner.encode(.integer(0)) == "0") + #expect(SQLValueInliner.encode(.integer(Int64.max)) == "9223372036854775807") + } + + @Test func encodeRealUsesPercent17gFormat() { + // %.17g round-trips a Double precisely as decimal. Verify the + // formatted string parses back to the exact same Double. + let original: Double = 3.14 + let encoded = SQLValueInliner.encode(.real(original)) + #expect(encoded == String(format: "%.17g", original)) + // Round-trip: encoded value re-parsed must equal the source. + #expect(Double(encoded) == original) + + // Tricky case: 0.1 + 0.2 has imprecise binary representation. + let imprecise = 0.1 + 0.2 + let encodedImprecise = SQLValueInliner.encode(.real(imprecise)) + #expect(Double(encodedImprecise) == imprecise) + } + + @Test func encodeTextWrapsInSingleQuotes() { + #expect(SQLValueInliner.encode(.text("hi")) == "'hi'") + #expect(SQLValueInliner.encode(.text("")) == "''") + } + + @Test func encodeTextDoublesEmbeddedSingleQuotes() { + // SQL literal escape: `it's` becomes `'it''s'`. + #expect(SQLValueInliner.encode(.text("it's")) == "'it''s'") + // Multiple embedded quotes — each one is doubled. + #expect(SQLValueInliner.encode(.text("a'b'c")) == "'a''b''c'") + // The classic injection-shaped value gets escaped to harmless. + #expect(SQLValueInliner.encode(.text("' OR 1=1 --")) == "''' OR 1=1 --'") + } + + @Test func encodeBlobProducesHexLiteral() { + // Two-byte blob: `X'dead'`. + #expect(SQLValueInliner.encode(.blob(Data([0xde, 0xad]))) == "X'dead'") + // Empty blob: `X''`. + #expect(SQLValueInliner.encode(.blob(Data())) == "X''") + // Lowercase hex, full byte range, with leading zero preserved. + #expect(SQLValueInliner.encode(.blob(Data([0x00, 0x0f, 0xff]))) == "X'000fff'") + } + + // MARK: - inline(_:params:) substitution rules + + @Test func inlineSubstitutesPlaceholdersInOrder() { + let out = SQLValueInliner.inline( + "INSERT INTO t VALUES (?, ?, ?)", + params: [.integer(1), .text("two"), .real(3.0)] + ) + // Order is preserved: integer 1, text 'two', real 3.0. + #expect(out.hasPrefix("INSERT INTO t VALUES (")) + #expect(out.contains("1")) + #expect(out.contains("'two'")) + // Real 3.0 should round-trip via %.17g. + let real3 = String(format: "%.17g", 3.0) + #expect(out.contains(real3)) + } + + @Test func inlineSkipsPlaceholderInsideStringLiteral() { + // The `?` inside `'?'` is part of a string and must not be bound. + // Only the trailing `?` (outside the quotes) consumes the param. + let out = SQLValueInliner.inline( + "WHERE name = '?' AND id = ?", + params: [.integer(7)] + ) + #expect(out == "WHERE name = '?' AND id = 7") + } + + @Test func inlineSkipsPlaceholderInsideDoubleQuotedIdentifier() { + // Double-quoted identifiers (column / table names with special chars) + // are also a quoted region — `?` inside them is literal. + let out = SQLValueInliner.inline( + "SELECT \"col?\" FROM t WHERE x = ?", + params: [.integer(1)] + ) + #expect(out == "SELECT \"col?\" FROM t WHERE x = 1") + } + + @Test func inlineHandlesDoubledSingleQuoteEscapeInString() { + // `'it''s ?'` is a single SQL string literal containing `it's ?`. + // The doubled single-quote is the SQL escape for an embedded + // apostrophe — the scanner must NOT toggle out of string state + // at the doubled quote, and the trailing `?` is inside the string. + // No params consumed. + let out = SQLValueInliner.inline( + "WHERE x = 'it''s ?'", + params: [] + ) + #expect(out == "WHERE x = 'it''s ?'") + } + + @Test func inlineSelectShapeMatchesDataServicePattern() { + // Sanity check: the SELECT shape `HermesDataService.fetchSessions` + // generates inlines cleanly for the typical `[.integer(100)]` + // limit param. + let sql = "SELECT id, source FROM sessions WHERE parent_session_id IS NULL ORDER BY started_at DESC LIMIT ?" + let out = SQLValueInliner.inline(sql, params: [.integer(100)]) + #expect(out == "SELECT id, source FROM sessions WHERE parent_session_id IS NULL ORDER BY started_at DESC LIMIT 100") + } + + @Test func inlineWithNoPlaceholdersReturnsInputUnchanged() { + let sql = "SELECT COUNT(*) FROM messages" + #expect(SQLValueInliner.inline(sql, params: []) == sql) + } + + @Test func inlinePreservesAllOtherCharacters() { + // Make sure we're not mangling whitespace, semicolons, parens. + let sql = " SELECT *\n FROM t WHERE id = ? ; " + let out = SQLValueInliner.inline(sql, params: [.integer(5)]) + #expect(out == " SELECT *\n FROM t WHERE id = 5 ; ") + } + + @Test func inlineSubstitutesNullPlaceholder() { + let out = SQLValueInliner.inline( + "UPDATE t SET col = ? WHERE id = ?", + params: [.null, .integer(1)] + ) + #expect(out == "UPDATE t SET col = NULL WHERE id = 1") + } + + @Test func inlineSubstitutesBlobPlaceholder() { + let out = SQLValueInliner.inline( + "INSERT INTO t (data) VALUES (?)", + params: [.blob(Data([0x01, 0x02, 0x03]))] + ) + #expect(out == "INSERT INTO t (data) VALUES (X'010203')") + } +} diff --git a/scarf/Packages/ScarfIOS/Sources/ScarfIOS/CitadelServerTransport.swift b/scarf/Packages/ScarfIOS/Sources/ScarfIOS/CitadelServerTransport.swift index 170b86e..5077b9b 100644 --- a/scarf/Packages/ScarfIOS/Sources/ScarfIOS/CitadelServerTransport.swift +++ b/scarf/Packages/ScarfIOS/Sources/ScarfIOS/CitadelServerTransport.swift @@ -58,6 +58,9 @@ public final class CitadelServerTransport: ServerTransport, @unchecked Sendable /// Shared directory under which cached SQLite snapshots land. On /// iOS this maps to `/scarf/snapshots//`. + /// Stable per-server cache directory. Was used by the snapshot + /// pipeline pre-v2.7; kept for the cache-cleanup migration that + /// purges old snapshot files at first launch on the new build. private let snapshotBaseDir: URL /// Actor-serialized access to the one shared `SSHClient`. Opens @@ -159,19 +162,66 @@ public final class CitadelServerTransport: ServerTransport, @unchecked Sendable AsyncThrowingStream { $0.finish() } } - // MARK: - ServerTransport: SQLite snapshot + // MARK: - ServerTransport: script streaming - public func snapshotSQLite(remotePath: String) throws -> URL { - try runSync { try await self.asyncSnapshotSQLite(remotePath: remotePath) } + /// Pipe `script` to `/bin/sh -s` over Citadel's exec channel. + /// + /// **Why base64.** Citadel's `executeCommandStream` doesn't expose + /// stdin in the version we're on, so we can't just open `sh -s` and + /// write the script. Instead we encode the script as base64, decode + /// it on the remote inline, and pipe the result into `sh`: + /// + /// printf '%s' '' | base64 -d | /bin/sh + /// + /// `base64 -d` is universally available on Linux/macOS. The base64 + /// blob travels as a single shell-safe argv token, so multi-line + /// scripts with `"$VAR"` references and nested quotes survive + /// untouched — same correctness guarantee as `SSHScriptRunner`'s + /// stdin-pipe approach. + public func streamScript(_ script: String, timeout: TimeInterval) async throws -> ProcessResult { + let scriptBytes = Data(script.utf8) + let b64 = scriptBytes.base64EncodedString() + // Prepend the same PATH guard that `asyncRunProcess` uses so + // base64 + sh resolve on hosts where they live in non-default + // prefixes. Most distros have base64 in /usr/bin but + // homebrew-installed coreutils in /opt/homebrew/bin would + // otherwise be invisible from a stripped-PATH exec channel. + let cmd = "PATH=\"$HOME/.local/bin:/opt/homebrew/bin:/usr/local/bin:$PATH\" " + + "printf '%s' '\(b64)' | base64 -d | /bin/sh" + return try await runScript(cmd, timeout: timeout) } - /// Path where the most recent successful snapshot was written — - /// returned even when the SSH connection is currently down. The - /// data service falls back to this when `snapshotSQLite` throws so - /// Dashboard / Sessions / Chat-history stay viewable while the - /// phone is offline. - public var cachedSnapshotPath: URL? { - snapshotBaseDir.appendingPathComponent("state.db") + private func runScript(_ cmd: String, timeout: TimeInterval) async throws -> ProcessResult { + let client = try await connectionHolder.ssh() + let stream: AsyncThrowingStream + do { + stream = try await client.executeCommandStream(cmd) + } catch { + throw TransportError.other(message: "Failed to start exec stream: \(error.localizedDescription)") + } + var stdout = Data() + var stderr = Data() + var exitCode: Int32 = 0 + do { + for try await chunk in stream { + switch chunk { + case .stdout(var buf): + if let s = buf.readString(length: buf.readableBytes) { + stdout.append(Data(s.utf8)) + } + case .stderr(var buf): + if let s = buf.readString(length: buf.readableBytes) { + stderr.append(Data(s.utf8)) + } + } + } + } catch let failed as SSHClient.CommandFailed { + exitCode = Int32(failed.exitCode) + } catch { + stderr.append(Data(error.localizedDescription.utf8)) + exitCode = -1 + } + return ProcessResult(exitCode: exitCode, stdout: stdout, stderr: stderr) } // MARK: - ServerTransport: watching @@ -397,101 +447,6 @@ public final class CitadelServerTransport: ServerTransport, @unchecked Sendable return ProcessResult(exitCode: exitCode, stdout: stdout, stderr: stderr) } - private func asyncSnapshotSQLite(remotePath: String) async throws -> URL { - // Same flow as SSHTransport: run `sqlite3 .backup` on the remote - // (WAL-safe), flip out of WAL mode on the snapshot, then SFTP - // the backup file down to the local cache. - try? FileManager.default.createDirectory(at: snapshotBaseDir, withIntermediateDirectories: true) - let localURL = snapshotBaseDir.appendingPathComponent("state.db") - let client = try await connectionHolder.ssh() - let remoteTmp = "/tmp/scarf-snapshot-\(UUID().uuidString).db" - // Double-quote paths; $HOME expansion happens inside double quotes. - let rewritten = Self.rewriteHomeRelative(remotePath) - - // Prepend the same PATH prefix `asyncRunProcess` uses so `sqlite3` - // resolves on hosts where it lives in /usr/local/bin or - // /opt/homebrew/bin (issue #56). Citadel's bare exec channel - // inherits a stripped PATH (typically `/usr/bin:/bin` on Linux); - // without this, statically-linked or custom-prefix sqlite3 - // installs fail "command not found" at exit 127. - let backupScript = - #"PATH="$HOME/.local/bin:/opt/homebrew/bin:/usr/local/bin:$PATH" "# - + #"sqlite3 "\#(rewritten)" ".backup '\#(remoteTmp)'" && sqlite3 '\#(remoteTmp)' "PRAGMA journal_mode=DELETE;" > /dev/null"# - - // Drive `executeCommandStream` instead of `executeCommand` so we - // capture stderr regardless of exit code (issue #56). Pre-fix - // a non-zero exit threw `CommandFailed` and discarded the buffer - // — surfaced as the unhelpful "Citadel.SSHClient.CommandFailed - // error 1" banner. Now we propagate the real stderr so - // `HermesDataService.humanize` can translate "sqlite3: command - // not found" / "no such file" / "permission denied" into the - // dashboard banner with actionable copy. - let stream: AsyncThrowingStream - do { - stream = try await client.executeCommandStream(backupScript) - } catch { - throw NSError( - domain: "CitadelServerTransport", - code: -1, - userInfo: [NSLocalizedDescriptionKey: "Failed to start snapshot stream: \(error.localizedDescription)"] - ) - } - var stdout = Data() - var stderr = Data() - var exitCode: Int32 = 0 - do { - for try await chunk in stream { - switch chunk { - case .stdout(var buf): - if let s = buf.readString(length: buf.readableBytes) { - stdout.append(Data(s.utf8)) - } - case .stderr(var buf): - if let s = buf.readString(length: buf.readableBytes) { - stderr.append(Data(s.utf8)) - } - } - } - } catch let failed as SSHClient.CommandFailed { - exitCode = Int32(failed.exitCode) - } catch { - stderr.append(Data(error.localizedDescription.utf8)) - exitCode = -1 - } - if exitCode != 0 { - // Combine stdout + stderr into the error message — sqlite3 - // sometimes prints "Error: ..." on stdout depending on the - // remote shell. HermesDataService.humanize keys off - // substrings like "sqlite3: command not found", - // "permission denied", "no such file", so as long as one of - // them ends up in the message we get a useful banner. - let messageBytes = stderr.isEmpty ? stdout : stderr - let message = String(data: messageBytes, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - throw NSError( - domain: "CitadelServerTransport", - code: Int(exitCode), - userInfo: [ - NSLocalizedDescriptionKey: message.isEmpty - ? "Snapshot exited \(exitCode) with no output (likely sqlite3 missing on remote)" - : message - ] - ) - } - - // SFTP-download the remote tmp into our local snapshot cache. - let sftp = try await connectionHolder.sftp() - let data: Data = try await sftp.withFile(filePath: remoteTmp, flags: [.read]) { file in - let buf = try await file.readAll() - return Data(buffer: buf) - } - try data.write(to: localURL, options: .atomic) - - // Best-effort cleanup of the remote tmp. - _ = try? await client.executeCommand("rm -f '\(remoteTmp)'") - - return localURL - } - // MARK: - Shell helpers /// Minimal shell-argument joiner. Handles spaces + quotes; sufficient diff --git a/scarf/scarf/Features/Dashboard/ViewModels/DashboardViewModel.swift b/scarf/scarf/Features/Dashboard/ViewModels/DashboardViewModel.swift index 5e4d8b0..fab639e 100644 --- a/scarf/scarf/Features/Dashboard/ViewModels/DashboardViewModel.swift +++ b/scarf/scarf/Features/Dashboard/ViewModels/DashboardViewModel.swift @@ -43,16 +43,23 @@ final class DashboardViewModel { func load() async { isLoading = true - // refresh() = close + reopen, forces a fresh remote snapshot. Cheap - // on local (live DB reopen). + // refresh() is essentially free for the streaming remote backend + // (no transfer — every query is fresh) and a cheap reopen for + // local. The four data-service queries below are batched + // through `dashboardSnapshot` so a remote load is one SSH + // round-trip instead of four. let opened = await dataService.refresh() var collectedErrors: [String] = [] if opened { - stats = await dataService.fetchStats() - recentSessions = await dataService.fetchSessions(limit: 5) - sessionPreviews = await dataService.fetchSessionPreviews(limit: 5) - let activityMessages = await dataService.fetchRecentToolCalls(limit: 8) - recentActivity = activityMessages.flatMap { msg in + let snapshot = await dataService.dashboardSnapshot( + sessionLimit: 5, + previewLimit: 5, + toolCallLimit: 8 + ) + stats = snapshot.stats + recentSessions = snapshot.recentSessions + sessionPreviews = snapshot.sessionPreviews + recentActivity = snapshot.recentToolCalls.flatMap { msg in msg.toolCalls.map { call in ActivityEntry( id: call.callId, diff --git a/scarf/scarf/scarfApp.swift b/scarf/scarf/scarfApp.swift index c2ce401..a014739 100644 --- a/scarf/scarf/scarfApp.swift +++ b/scarf/scarf/scarfApp.swift @@ -27,6 +27,18 @@ struct ScarfApp: App { // wasn't running. Cheap: just an `ls` of the snapshots root. registry.sweepOrphanCaches() + // v2.7 cache cleanup: the remote-DB pipeline switched from + // "snapshot the whole state.db locally" to "stream queries + // over SSH per call" (issue #74). Old snapshot files for an + // active 5GB-DB user could be 5GB+ on disk, with no live + // codepath that would ever clean them up. Wipe the snapshots + // root once at first launch on the new build. Subsequent + // launches no-op via the UserDefaults flag. + if !UserDefaults.standard.bool(forKey: "scarf.v27.snapshotCacheCleaned") { + try? FileManager.default.removeItem(atPath: SSHTransport.snapshotRootPath()) + UserDefaults.standard.set(true, forKey: "scarf.v27.snapshotCacheCleaned") + } + // Wire ScarfCore's SSHTransport to the Mac-target login-shell env // probe. Without this, `ssh`/`scp` subprocesses spawned from Scarf // can't reach 1Password / Secretive / `.zshrc`-exported ssh-agent