mirror of
https://github.com/awizemann/scarf.git
synced 2026-05-10 10:36:35 +00:00
fix(remote-sqlite): bump query timeout 15s→30s + add in-flight coalescing
Two issues from the perf capture: 1. fetchMessages on a 157-message session timed out at exactly 15.06 s (`mac.fetchMessages` interval = 15,062,646,042 ns), then silently returned 0 rows. The chat appeared empty but the session had data; the timeout was firing before sqlite3 -json could ship the ~50KB payload over a 420 ms-RTT SSH link. Bumped queryTimeout to 30 s. The streamScript transport-level timeout still fires on truly wedged hosts. 2. mac.loadRecentSessions fired twice in parallel at t=960450 + t=960584, finishing 134 ms apart — two independent watcher ticks each spawning a full 3-query SSH load for the same data. Added in-flight request coalescing keyed on the inlined SQL text: when a query with the exact same SQL is already pending, second caller awaits the first task instead of spawning a new subprocess. New ScarfMon event `sqlite.query.coalesced` surfaces hits in traces. Coalescing is surgical — applies to single `query` calls only, not `queryBatch` (different timeout scaling, concurrent-same-batch is rare). Avoids serializing independent work. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+43
-5
@@ -54,10 +54,34 @@ public actor RemoteSQLiteBackend: HermesQueryBackend {
|
|||||||
/// shell expansion.
|
/// shell expansion.
|
||||||
private var resolvedHome: String?
|
private var resolvedHome: String?
|
||||||
|
|
||||||
/// Per-query timeout for `query`. A healthy query is <100 ms;
|
/// In-flight query coalescing — keyed on the inlined SQL text,
|
||||||
/// 15 s is 100× headroom and short enough that a wedged remote
|
/// value is the Task currently fetching that exact result set.
|
||||||
/// doesn't hang the UI for minutes.
|
/// When two concurrent callers ask for the same query (common
|
||||||
private let queryTimeout: TimeInterval = 15
|
/// pattern: file watcher tick + chat-finalize debounce both
|
||||||
|
/// firing `loadRecentSessions` within ~100 ms), the second
|
||||||
|
/// caller awaits the first call's task instead of spawning a
|
||||||
|
/// fresh SSH subprocess. Cleared on task completion. Drops
|
||||||
|
/// duplicate `mac.loadRecentSessions` traces observed at
|
||||||
|
/// t=960450 / t=960584 in the perf capture (two parallel 3-s
|
||||||
|
/// loads for the same data, finishing 134 ms apart).
|
||||||
|
///
|
||||||
|
/// Coalescing is *only* applied to single `query` calls, not
|
||||||
|
/// `queryBatch` — batches are larger payloads with caller-
|
||||||
|
/// specific timeout scaling, and concurrent callers wanting
|
||||||
|
/// "the same batch" is rare in practice. Keep coalescing
|
||||||
|
/// surgical so we don't accidentally serialize independent
|
||||||
|
/// work that just happens to match.
|
||||||
|
private var inFlightQueries: [String: Task<[Row], Error>] = [:]
|
||||||
|
|
||||||
|
/// Per-query timeout for `query`. Healthy local queries are
|
||||||
|
/// <100 ms; remote ones over 420 ms-RTT SSH amortize one round
|
||||||
|
/// trip per call PLUS the wire payload time. A `fetchMessages`
|
||||||
|
/// over a 157-message session (~50KB JSON encoded) exceeded
|
||||||
|
/// the previous 15 s ceiling, silently returned 0 rows, and the
|
||||||
|
/// chat appeared empty — a worse failure than the wait it was
|
||||||
|
/// guarding against. Bumped to 30 s; the `streamScript`
|
||||||
|
/// transport-level timeout still fires on truly wedged hosts.
|
||||||
|
private let queryTimeout: TimeInterval = 30
|
||||||
|
|
||||||
/// Preflight timeout. First SSH round-trip may include cold
|
/// Preflight timeout. First SSH round-trip may include cold
|
||||||
/// ControlMaster establishment (~1–3 s) plus the schema PRAGMA
|
/// ControlMaster establishment (~1–3 s) plus the schema PRAGMA
|
||||||
@@ -149,9 +173,19 @@ public actor RemoteSQLiteBackend: HermesQueryBackend {
|
|||||||
// MARK: - Queries
|
// MARK: - Queries
|
||||||
|
|
||||||
public func query(_ sql: String, params: [SQLValue]) async throws -> [Row] {
|
public func query(_ sql: String, params: [SQLValue]) async throws -> [Row] {
|
||||||
try await ScarfMon.measureAsync(.sqlite, "query") {
|
|
||||||
guard isOpen else { throw BackendError.notOpen }
|
guard isOpen else { throw BackendError.notOpen }
|
||||||
let inlined = SQLValueInliner.inline(sql, params: params)
|
let inlined = SQLValueInliner.inline(sql, params: params)
|
||||||
|
// In-flight coalescing — if a query with the exact same
|
||||||
|
// inlined SQL is already pending, await its task instead
|
||||||
|
// of spawning a new SSH subprocess. Surfaces in ScarfMon as
|
||||||
|
// a `sqlite.query.coalesced` event so we can see how often
|
||||||
|
// the dedup actually fires in the wild.
|
||||||
|
if let existing = inFlightQueries[inlined] {
|
||||||
|
ScarfMon.event(.sqlite, "query.coalesced", count: 1)
|
||||||
|
return try await existing.value
|
||||||
|
}
|
||||||
|
let task = Task<[Row], Error> { [self] in
|
||||||
|
try await ScarfMon.measureAsync(.sqlite, "query") {
|
||||||
let dbPath = context.paths.stateDB
|
let dbPath = context.paths.stateDB
|
||||||
let script = """
|
let script = """
|
||||||
sqlite3 -readonly -json \(quoteForRemoteShell(dbPath)) <<'__SCARF_SQL__'
|
sqlite3 -readonly -json \(quoteForRemoteShell(dbPath)) <<'__SCARF_SQL__'
|
||||||
@@ -172,6 +206,10 @@ public actor RemoteSQLiteBackend: HermesQueryBackend {
|
|||||||
return rows
|
return rows
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
inFlightQueries[inlined] = task
|
||||||
|
defer { inFlightQueries[inlined] = nil }
|
||||||
|
return try await task.value
|
||||||
|
}
|
||||||
|
|
||||||
public func queryBatch(_ statements: [(sql: String, params: [SQLValue])]) async throws -> [[Row]] {
|
public func queryBatch(_ statements: [(sql: String, params: [SQLValue])]) async throws -> [[Row]] {
|
||||||
try await ScarfMon.measureAsync(.sqlite, "queryBatch") {
|
try await ScarfMon.measureAsync(.sqlite, "queryBatch") {
|
||||||
|
|||||||
Reference in New Issue
Block a user