chore: fd-leak cleanup, os.Logger conversion, status-poll backoff

Pre-release maintenance pass picked up across both targets while
debugging the iOS Browse fix:

- LocalTransport / SSHTransport: close the parent's copy of every
  pipe write end after spawn so EOF reaches the reader once the child
  exits, and explicitly close read ends after draining. Was leaking
  one fd per `runProcess`/`streamLines` invocation under load.
- ProcessACPChannel: also close stdout/stderr write-end fds on
  channel teardown — same pattern, ACP sessions can churn on long
  reconnect loops.
- HermesDataService / HermesLogService / ProjectDashboardService:
  replace remaining `print("[Scarf] ...")` debug statements with
  os.Logger calls (subsystem="com.scarf"), matching the global rule
  that production code uses Logger and `print()` is reserved for
  previews + test helpers.
- ProjectDashboardService / IOSCronViewModel: drop redundant
  `fileExists` guards before `createDirectory` — the operation is
  already mkdir -p across every transport, so the extra round-trip
  was pure latency on remote hosts.
- scarfApp.swift: server-status sidebar probe now uses an exponential
  backoff (10s → 30s → 60s → 120s → 300s) when consecutive probes
  fail, resetting on the first full success. Previously a registered
  remote going unreachable hammered pgrep + gateway_state.json every
  10s indefinitely; now offline servers settle to a 5-min cadence
  while live servers stay snappy.
- Localizable.xcstrings: routine .strings catalog refresh — stale
  entries for removed UI strings, picked up new "Stored under
  quick_commands:..." subtitle wording.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-04-25 16:18:15 +02:00
parent 850fa7a697
commit 63c5d13bec
9 changed files with 128 additions and 34 deletions
@@ -191,6 +191,8 @@ public actor ProcessACPChannel: ACPChannel {
stdinPipe.fileHandleForReading.closeFile() stdinPipe.fileHandleForReading.closeFile()
stdoutPipe.fileHandleForReading.closeFile() stdoutPipe.fileHandleForReading.closeFile()
stderrPipe.fileHandleForReading.closeFile() stderrPipe.fileHandleForReading.closeFile()
stdoutPipe.fileHandleForWriting.closeFile()
stderrPipe.fileHandleForWriting.closeFile()
readerTask?.cancel() readerTask?.cancel()
stderrTask?.cancel() stderrTask?.cancel()
@@ -705,7 +705,7 @@ public actor HermesDataService {
do { do {
return try JSONDecoder().decode([HermesToolCall].self, from: data) return try JSONDecoder().decode([HermesToolCall].self, from: data)
} catch { } catch {
print("[Scarf] Failed to decode tool calls: \(error.localizedDescription)") Self.logger.error("Failed to decode tool calls: \(error.localizedDescription, privacy: .public)")
return [] return []
} }
} }
@@ -1,4 +1,7 @@
import Foundation import Foundation
#if canImport(os)
import os
#endif
public struct LogEntry: Identifiable, Sendable { public struct LogEntry: Identifiable, Sendable {
public let id: Int public let id: Int
@@ -47,6 +50,10 @@ public struct LogEntry: Identifiable, Sendable {
} }
public actor HermesLogService { public actor HermesLogService {
#if canImport(os)
nonisolated private static let logger = Logger(subsystem: "com.scarf", category: "HermesLogService")
#endif
/// Local file handle for local contexts. `nil` when following a remote /// Local file handle for local contexts. `nil` when following a remote
/// log or when no log is open. /// log or when no log is open.
private var localHandle: FileHandle? private var localHandle: FileHandle?
@@ -89,8 +96,8 @@ public actor HermesLogService {
} catch { } catch {
// Transient disconnects / command failures: surface once // Transient disconnects / command failures: surface once
// and stop. Callers typically re-open the log on retry. // and stop. Callers typically re-open the log on retry.
#if DEBUG #if canImport(os)
print("[Scarf] remote tail ended: \(error.localizedDescription)") Self.logger.warning("remote tail ended: \(error.localizedDescription, privacy: .public)")
#endif #endif
} }
} }
@@ -103,7 +110,9 @@ public actor HermesLogService {
do { do {
try localHandle?.close() try localHandle?.close()
} catch { } catch {
print("[Scarf] Failed to close log handle: \(error.localizedDescription)") #if canImport(os)
Self.logger.warning("Failed to close log handle: \(error.localizedDescription, privacy: .public)")
#endif
} }
localHandle = nil localHandle = nil
currentPath = nil currentPath = nil
@@ -38,9 +38,10 @@ public struct ProjectDashboardService: Sendable {
/// choice is now theirs. /// choice is now theirs.
public func saveRegistry(_ registry: ProjectRegistry) throws { public func saveRegistry(_ registry: ProjectRegistry) throws {
let dir = context.paths.scarfDir let dir = context.paths.scarfDir
if !transport.fileExists(dir) { // `createDirectory` is mkdir -p across every transport (Local
try transport.createDirectory(dir) // uses withIntermediateDirectories, SSH/Citadel both ignore
} // "already exists"), so we don't need to fileExists-guard it.
try transport.createDirectory(dir)
let data = try JSONEncoder().encode(registry) let data = try JSONEncoder().encode(registry)
// Pretty-print for readability (agents may read this file). // Pretty-print for readability (agents may read this file).
let writeData: Data let writeData: Data
@@ -62,7 +63,7 @@ public struct ProjectDashboardService: Sendable {
do { do {
return try JSONDecoder().decode(ProjectDashboard.self, from: data) return try JSONDecoder().decode(ProjectDashboard.self, from: data)
} catch { } catch {
print("[Scarf] Failed to decode dashboard for \(project.name): \(error.localizedDescription)") Self.logger.error("Failed to decode dashboard for \(project.name, privacy: .public): \(error.localizedDescription, privacy: .public)")
return nil return nil
} }
} }
@@ -127,6 +127,17 @@ public struct LocalTransport: ServerTransport {
} catch { } catch {
throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)") throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)")
} }
// Parent has its own copy of every pipe end after fork. The child
// inherits and uses the writing ends of stdout/stderr and the
// reading end of stdin; the parent must close its own copies of
// those so EOF reaches the parent's reader once the child exits
// (otherwise the kernel keeps each fd open as long as any process
// holds a reference, and we leak fds).
try? stdoutPipe.fileHandleForWriting.close()
try? stderrPipe.fileHandleForWriting.close()
if stdin != nil {
try? stdinPipe.fileHandleForReading.close()
}
if let stdin { if let stdin {
try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin) try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin)
try? stdinPipe.fileHandleForWriting.close() try? stdinPipe.fileHandleForWriting.close()
@@ -189,6 +200,10 @@ public struct LocalTransport: ServerTransport {
continuation.finish(throwing: error) continuation.finish(throwing: error)
return return
} }
// Parent's copy of the writing ends the child has its
// own; close ours so EOF reaches the reader after exit.
try? outPipe.fileHandleForWriting.close()
try? errPipe.fileHandleForWriting.close()
let handle = outPipe.fileHandleForReading let handle = outPipe.fileHandleForReading
var buffer = Data() var buffer = Data()
while true { while true {
@@ -204,11 +219,18 @@ public struct LocalTransport: ServerTransport {
} }
} }
proc.waitUntilExit() proc.waitUntilExit()
let stderrTail: String
if proc.terminationStatus != 0 { if proc.terminationStatus != 0 {
let stderr = (try? errPipe.fileHandleForReading.readToEnd()) stderrTail = (try? errPipe.fileHandleForReading.readToEnd())
.flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? "" .flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? ""
} else {
stderrTail = ""
}
try? outPipe.fileHandleForReading.close()
try? errPipe.fileHandleForReading.close()
if proc.terminationStatus != 0 {
continuation.finish(throwing: TransportError.commandFailed( continuation.finish(throwing: TransportError.commandFailed(
exitCode: proc.terminationStatus, stderr: stderr exitCode: proc.terminationStatus, stderr: stderrTail
)) ))
} else { } else {
continuation.finish() continuation.finish()
@@ -474,6 +474,10 @@ public struct SSHTransport: ServerTransport {
continuation.finish(throwing: error) continuation.finish(throwing: error)
return return
} }
// Parent's copy of the writing ends close ours so EOF
// reaches the reader after the child exits.
try? outPipe.fileHandleForWriting.close()
try? errPipe.fileHandleForWriting.close()
let handle = outPipe.fileHandleForReading let handle = outPipe.fileHandleForReading
var buffer = Data() var buffer = Data()
while true { while true {
@@ -489,11 +493,18 @@ public struct SSHTransport: ServerTransport {
} }
} }
proc.waitUntilExit() proc.waitUntilExit()
let stderrTail: String
if proc.terminationStatus != 0 { if proc.terminationStatus != 0 {
let stderr = (try? errPipe.fileHandleForReading.readToEnd()) stderrTail = (try? errPipe.fileHandleForReading.readToEnd())
.flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? "" .flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? ""
} else {
stderrTail = ""
}
try? outPipe.fileHandleForReading.close()
try? errPipe.fileHandleForReading.close()
if proc.terminationStatus != 0 {
continuation.finish(throwing: TransportError.classifySSHFailure( continuation.finish(throwing: TransportError.classifySSHFailure(
host: config.host, exitCode: proc.terminationStatus, stderr: stderr host: config.host, exitCode: proc.terminationStatus, stderr: stderrTail
)) ))
} else { } else {
continuation.finish() continuation.finish()
@@ -662,6 +673,13 @@ public struct SSHTransport: ServerTransport {
} catch { } catch {
throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)") throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)")
} }
// Parent's copy of the inherited ends close so EOF lands when
// the child exits and we don't leak fds.
try? stdoutPipe.fileHandleForWriting.close()
try? stderrPipe.fileHandleForWriting.close()
if stdin != nil {
try? stdinPipe.fileHandleForReading.close()
}
if let stdin { if let stdin {
try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin) try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin)
try? stdinPipe.fileHandleForWriting.close() try? stdinPipe.fileHandleForWriting.close()
@@ -119,10 +119,11 @@ public final class IOSCronViewModel {
let transport = ctx.makeTransport() let transport = ctx.makeTransport()
// Ensure the cron/ directory exists on a fresh // Ensure the cron/ directory exists on a fresh
// Hermes install this file won't be present. // Hermes install this file won't be present.
// `createDirectory` is mkdir -p across all transports;
// call unconditionally and let writeFile surface any
// real failure.
let parent = (path as NSString).deletingLastPathComponent let parent = (path as NSString).deletingLastPathComponent
if !transport.fileExists(parent) { try? transport.createDirectory(parent)
try? transport.createDirectory(parent)
}
try transport.writeFile(path, data: data) try transport.writeFile(path, data: data)
return true return true
} catch { } catch {
+6 -7
View File
@@ -12733,9 +12733,6 @@
"New cron job" : { "New cron job" : {
"comment" : "A button that creates a new cron job.", "comment" : "A button that creates a new cron job.",
"isCommentAutoGenerated" : true "isCommentAutoGenerated" : true
},
"New folder name" : {
}, },
"New folder…" : { "New folder…" : {
"comment" : "A label for a new folder name.", "comment" : "A label for a new folder name.",
@@ -15993,10 +15990,6 @@
}, },
"Project folder kept" : { "Project folder kept" : {
},
"Project name" : {
"comment" : "A label for a text field that lets the user enter a project name.",
"isCommentAutoGenerated" : true
}, },
"Project Name" : { "Project Name" : {
"localizations" : { "localizations" : {
@@ -16411,6 +16404,7 @@
} }
}, },
"Quick commands are shell shortcuts hermes exposes in chat as `/command_name`. They live under `quick_commands:` in config.yaml." : { "Quick commands are shell shortcuts hermes exposes in chat as `/command_name`. They live under `quick_commands:` in config.yaml." : {
"extractionState" : "stale",
"localizations" : { "localizations" : {
"de" : { "de" : {
"stringUnit" : { "stringUnit" : {
@@ -20709,6 +20703,7 @@
} }
}, },
"Skills (%lld)" : { "Skills (%lld)" : {
"extractionState" : "stale",
"localizations" : { "localizations" : {
"de" : { "de" : {
"stringUnit" : { "stringUnit" : {
@@ -21529,6 +21524,10 @@
} }
} }
}, },
"Stored under `quick_commands:` in config.yaml." : {
"comment" : "A description of the quick commands feature.",
"isCommentAutoGenerated" : true
},
"Strip the template's begin/end block, preserve everything else in MEMORY.md" : { "Strip the template's begin/end block, preserve everything else in MEMORY.md" : {
"comment" : "A description of the action to remove a memory block.", "comment" : "A description of the action to remove a memory block.",
"isCommentAutoGenerated" : true "isCommentAutoGenerated" : true
+54 -12
View File
@@ -243,14 +243,27 @@ final class ServerLiveStatus: Identifiable {
func startPolling() { func startPolling() {
stopPolling() stopPolling()
// First refresh inline so the icon doesn't flash "stopped" for the
// first 10s after launch.
refresh()
pollTask = Task { [weak self] in pollTask = Task { [weak self] in
// Exponential backoff on consecutive failures. Healthy servers
// poll every 10s. When a registered remote goes unreachable,
// pgrep + gateway_state.json reads fail every tick without
// backoff that's a log warning + a 5s pgrep timeout every 10s
// for as long as the remote stays down. Reset to 10s on the
// first probe that fully succeeds.
var consecutiveFailures = 0
while !Task.isCancelled { while !Task.isCancelled {
try? await Task.sleep(nanoseconds: 10_000_000_000) let ok = await self?.pollOnce() ?? false
if Task.isCancelled { return } if Task.isCancelled { return }
self?.refresh() consecutiveFailures = ok ? 0 : consecutiveFailures + 1
let delaySec: UInt64
switch consecutiveFailures {
case 0: delaySec = 10
case 1: delaySec = 30
case 2: delaySec = 60
case 3: delaySec = 120
default: delaySec = 300
}
try? await Task.sleep(nanoseconds: delaySec * 1_000_000_000)
} }
} }
} }
@@ -290,15 +303,44 @@ final class ServerLiveStatus: Identifiable {
} }
private func refresh() { private func refresh() {
Task { [weak self] in _ = await self?.pollOnce() }
}
/// Single probe used by both the polling loop (which needs the
/// success/failure signal for backoff) and the fire-and-forget
/// `refresh()` callers (start/stop/restart). Returns `true` only when
/// both the pgrep call AND the gateway_state.json read returned a
/// transport-level success `.success(nil)` (file missing because
/// hermes is stopped) still counts as a successful probe.
private func pollOnce() async -> Bool {
let svc = fileService let svc = fileService
Task.detached { [weak self] in struct ProbeResult: Sendable {
let running = svc.isHermesRunning() let running: Bool
let gateway = svc.loadGatewayState()?.isRunning ?? false let gatewayRunning: Bool
await MainActor.run { [weak self] in let ok: Bool
self?.hermesRunning = running
self?.gatewayRunning = gateway
}
} }
let probe = await Task.detached { () -> ProbeResult in
let pgrep = svc.hermesPIDResult()
let gateway = svc.loadGatewayStateResult()
let running: Bool
switch pgrep {
case .success(let pid): running = (pid != nil)
case .failure: running = false
}
let gatewayRunning: Bool
switch gateway {
case .success(let state): gatewayRunning = state?.isRunning ?? false
case .failure: gatewayRunning = false
}
let pgrepOK: Bool
if case .failure = pgrep { pgrepOK = false } else { pgrepOK = true }
let gatewayOK: Bool
if case .failure = gateway { gatewayOK = false } else { gatewayOK = true }
return ProbeResult(running: running, gatewayRunning: gatewayRunning, ok: pgrepOK && gatewayOK)
}.value
hermesRunning = probe.running
gatewayRunning = probe.gatewayRunning
return probe.ok
} }
} }