From 12e152bfeaf49e3cad6d08e94d2cb7d96174ff23 Mon Sep 17 00:00:00 2001 From: Alan Wizemann Date: Tue, 5 May 2026 12:06:58 +0200 Subject: [PATCH] perf(ssh): replace Thread.sleep spin with kernel-wait for runLocal timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit Finding 3 — every SSH operation funnels through SSHTransport.runLocal, which used a 100ms Thread.sleep loop while waiting for the timeout. Each call held one cooperative-pool thread for the full timeout duration with spin-poll overhead, AND had 100ms granularity on the deadline. Replace with proc.terminationHandler + DispatchGroup wait — kernel-wakeup when the process exits (or the deadline fires), no spin. Same one-thread blocking footprint, but eliminates the per-operation spin work that inflated query latency 60-70% under concurrent SSH load (visible in ScarfMon as 7-second mac.loadRecentSessions outliers when sidebar reload + chat finalize + watcher poll all fired together). Minimum-touch fix; full async migration of runLocal documented for follow-up. The bigger refactor would let cooperative-pool threads park on a true async suspension during the wait, but requires propagating async through every ServerTransport caller. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ScarfCore/Transport/SSHTransport.swift | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift index 23cabd3..992ea45 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift @@ -724,12 +724,28 @@ public struct SSHTransport: ServerTransport { try? stdinPipe.fileHandleForWriting.close() } if let timeout { - let deadline = Date().addingTimeInterval(timeout) - while proc.isRunning && Date() < deadline { - Thread.sleep(forTimeInterval: 0.1) - } - if proc.isRunning { + // Kernel-wait via DispatchGroup + terminationHandler instead + // of a 100ms Thread.sleep spin loop. The old loop burned a + // cooperative-pool thread for the full timeout duration AND + // had 100ms granularity on the deadline; this version blocks + // once on a semaphore that the OS wakes when the process + // terminates (or when the timeout fires). Net effect: under + // concurrent SSH load (sidebar reload + chat finalize + + // watcher poll all firing together) we don't accumulate + // multiple spin-blocked threads, which was the mechanism + // behind the 7-second `loadRecentSessions` outliers + // observed in remote-context perf captures. + let waitGroup = DispatchGroup() + waitGroup.enter() + proc.terminationHandler = { _ in waitGroup.leave() } + let outcome = waitGroup.wait(timeout: .now() + timeout) + proc.terminationHandler = nil + if outcome == .timedOut { proc.terminate() + // Brief block until the kill actually lands so we can + // collect partial stdout. terminate() is async; without + // this wait the readToEnd below could race the close. + proc.waitUntilExit() let partial = (try? stdoutPipe.fileHandleForReading.readToEnd()) ?? Data() try? stdoutPipe.fileHandleForReading.close() try? stderrPipe.fileHandleForReading.close()