mirror of
https://github.com/awizemann/scarf.git
synced 2026-05-08 02:14:37 +00:00
fix(remote-sqlite): bump query timeout 15s→30s + add in-flight coalescing
Two issues from the perf capture: 1. fetchMessages on a 157-message session timed out at exactly 15.06 s (`mac.fetchMessages` interval = 15,062,646,042 ns), then silently returned 0 rows. The chat appeared empty but the session had data; the timeout was firing before sqlite3 -json could ship the ~50KB payload over a 420 ms-RTT SSH link. Bumped queryTimeout to 30 s. The streamScript transport-level timeout still fires on truly wedged hosts. 2. mac.loadRecentSessions fired twice in parallel at t=960450 + t=960584, finishing 134 ms apart — two independent watcher ticks each spawning a full 3-query SSH load for the same data. Added in-flight request coalescing keyed on the inlined SQL text: when a query with the exact same SQL is already pending, second caller awaits the first task instead of spawning a new subprocess. New ScarfMon event `sqlite.query.coalesced` surfaces hits in traces. Coalescing is surgical — applies to single `query` calls only, not `queryBatch` (different timeout scaling, concurrent-same-batch is rare). Avoids serializing independent work. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+63
-25
@@ -54,10 +54,34 @@ public actor RemoteSQLiteBackend: HermesQueryBackend {
|
||||
/// shell expansion.
|
||||
private var resolvedHome: String?
|
||||
|
||||
/// Per-query timeout for `query`. A healthy query is <100 ms;
|
||||
/// 15 s is 100× headroom and short enough that a wedged remote
|
||||
/// doesn't hang the UI for minutes.
|
||||
private let queryTimeout: TimeInterval = 15
|
||||
/// In-flight query coalescing — keyed on the inlined SQL text,
|
||||
/// value is the Task currently fetching that exact result set.
|
||||
/// When two concurrent callers ask for the same query (common
|
||||
/// pattern: file watcher tick + chat-finalize debounce both
|
||||
/// firing `loadRecentSessions` within ~100 ms), the second
|
||||
/// caller awaits the first call's task instead of spawning a
|
||||
/// fresh SSH subprocess. Cleared on task completion. Drops
|
||||
/// duplicate `mac.loadRecentSessions` traces observed at
|
||||
/// t=960450 / t=960584 in the perf capture (two parallel 3-s
|
||||
/// loads for the same data, finishing 134 ms apart).
|
||||
///
|
||||
/// Coalescing is *only* applied to single `query` calls, not
|
||||
/// `queryBatch` — batches are larger payloads with caller-
|
||||
/// specific timeout scaling, and concurrent callers wanting
|
||||
/// "the same batch" is rare in practice. Keep coalescing
|
||||
/// surgical so we don't accidentally serialize independent
|
||||
/// work that just happens to match.
|
||||
private var inFlightQueries: [String: Task<[Row], Error>] = [:]
|
||||
|
||||
/// Per-query timeout for `query`. Healthy local queries are
|
||||
/// <100 ms; remote ones over 420 ms-RTT SSH amortize one round
|
||||
/// trip per call PLUS the wire payload time. A `fetchMessages`
|
||||
/// over a 157-message session (~50KB JSON encoded) exceeded
|
||||
/// the previous 15 s ceiling, silently returned 0 rows, and the
|
||||
/// chat appeared empty — a worse failure than the wait it was
|
||||
/// guarding against. Bumped to 30 s; the `streamScript`
|
||||
/// transport-level timeout still fires on truly wedged hosts.
|
||||
private let queryTimeout: TimeInterval = 30
|
||||
|
||||
/// Preflight timeout. First SSH round-trip may include cold
|
||||
/// ControlMaster establishment (~1–3 s) plus the schema PRAGMA
|
||||
@@ -149,28 +173,42 @@ public actor RemoteSQLiteBackend: HermesQueryBackend {
|
||||
// MARK: - Queries
|
||||
|
||||
public func query(_ sql: String, params: [SQLValue]) async throws -> [Row] {
|
||||
try await ScarfMon.measureAsync(.sqlite, "query") {
|
||||
guard isOpen else { throw BackendError.notOpen }
|
||||
let inlined = SQLValueInliner.inline(sql, params: params)
|
||||
let dbPath = context.paths.stateDB
|
||||
let script = """
|
||||
sqlite3 -readonly -json \(quoteForRemoteShell(dbPath)) <<'__SCARF_SQL__'
|
||||
\(inlined)
|
||||
__SCARF_SQL__
|
||||
"""
|
||||
let result: ProcessResult
|
||||
do {
|
||||
result = try await transport.streamScript(script, timeout: queryTimeout)
|
||||
} catch {
|
||||
throw BackendError.transport(error.localizedDescription)
|
||||
}
|
||||
if result.exitCode != 0 {
|
||||
throw BackendError.sqlite(exitCode: result.exitCode, stderr: result.stderrString)
|
||||
}
|
||||
let rows = try parseSingleResultSet(result.stdoutString)
|
||||
ScarfMon.event(.sqlite, "query.rows", count: rows.count, bytes: result.stdout.count)
|
||||
return rows
|
||||
guard isOpen else { throw BackendError.notOpen }
|
||||
let inlined = SQLValueInliner.inline(sql, params: params)
|
||||
// In-flight coalescing — if a query with the exact same
|
||||
// inlined SQL is already pending, await its task instead
|
||||
// of spawning a new SSH subprocess. Surfaces in ScarfMon as
|
||||
// a `sqlite.query.coalesced` event so we can see how often
|
||||
// the dedup actually fires in the wild.
|
||||
if let existing = inFlightQueries[inlined] {
|
||||
ScarfMon.event(.sqlite, "query.coalesced", count: 1)
|
||||
return try await existing.value
|
||||
}
|
||||
let task = Task<[Row], Error> { [self] in
|
||||
try await ScarfMon.measureAsync(.sqlite, "query") {
|
||||
let dbPath = context.paths.stateDB
|
||||
let script = """
|
||||
sqlite3 -readonly -json \(quoteForRemoteShell(dbPath)) <<'__SCARF_SQL__'
|
||||
\(inlined)
|
||||
__SCARF_SQL__
|
||||
"""
|
||||
let result: ProcessResult
|
||||
do {
|
||||
result = try await transport.streamScript(script, timeout: queryTimeout)
|
||||
} catch {
|
||||
throw BackendError.transport(error.localizedDescription)
|
||||
}
|
||||
if result.exitCode != 0 {
|
||||
throw BackendError.sqlite(exitCode: result.exitCode, stderr: result.stderrString)
|
||||
}
|
||||
let rows = try parseSingleResultSet(result.stdoutString)
|
||||
ScarfMon.event(.sqlite, "query.rows", count: rows.count, bytes: result.stdout.count)
|
||||
return rows
|
||||
}
|
||||
}
|
||||
inFlightQueries[inlined] = task
|
||||
defer { inFlightQueries[inlined] = nil }
|
||||
return try await task.value
|
||||
}
|
||||
|
||||
public func queryBatch(_ statements: [(sql: String, params: [SQLValue])]) async throws -> [[Row]] {
|
||||
|
||||
Reference in New Issue
Block a user