chore: fd-leak cleanup, os.Logger conversion, status-poll backoff

Pre-release maintenance pass picked up across both targets while
debugging the iOS Browse fix:

- LocalTransport / SSHTransport: close the parent's copy of every
  pipe write end after spawn so EOF reaches the reader once the child
  exits, and explicitly close read ends after draining. Was leaking
  one fd per `runProcess`/`streamLines` invocation under load.
- ProcessACPChannel: also close stdout/stderr write-end fds on
  channel teardown — same pattern, ACP sessions can churn on long
  reconnect loops.
- HermesDataService / HermesLogService / ProjectDashboardService:
  replace remaining `print("[Scarf] ...")` debug statements with
  os.Logger calls (subsystem="com.scarf"), matching the global rule
  that production code uses Logger and `print()` is reserved for
  previews + test helpers.
- ProjectDashboardService / IOSCronViewModel: drop redundant
  `fileExists` guards before `createDirectory` — the operation is
  already mkdir -p across every transport, so the extra round-trip
  was pure latency on remote hosts.
- scarfApp.swift: server-status sidebar probe now uses an exponential
  backoff (10s → 30s → 60s → 120s → 300s) when consecutive probes
  fail, resetting on the first full success. Previously a registered
  remote going unreachable hammered pgrep + gateway_state.json every
  10s indefinitely; now offline servers settle to a 5-min cadence
  while live servers stay snappy.
- Localizable.xcstrings: routine .strings catalog refresh — stale
  entries for removed UI strings, picked up new "Stored under
  quick_commands:..." subtitle wording.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-04-25 16:18:15 +02:00
parent 850fa7a697
commit 63c5d13bec
9 changed files with 128 additions and 34 deletions
@@ -191,6 +191,8 @@ public actor ProcessACPChannel: ACPChannel {
stdinPipe.fileHandleForReading.closeFile()
stdoutPipe.fileHandleForReading.closeFile()
stderrPipe.fileHandleForReading.closeFile()
stdoutPipe.fileHandleForWriting.closeFile()
stderrPipe.fileHandleForWriting.closeFile()
readerTask?.cancel()
stderrTask?.cancel()
@@ -705,7 +705,7 @@ public actor HermesDataService {
do {
return try JSONDecoder().decode([HermesToolCall].self, from: data)
} catch {
print("[Scarf] Failed to decode tool calls: \(error.localizedDescription)")
Self.logger.error("Failed to decode tool calls: \(error.localizedDescription, privacy: .public)")
return []
}
}
@@ -1,4 +1,7 @@
import Foundation
#if canImport(os)
import os
#endif
public struct LogEntry: Identifiable, Sendable {
public let id: Int
@@ -47,6 +50,10 @@ public struct LogEntry: Identifiable, Sendable {
}
public actor HermesLogService {
#if canImport(os)
nonisolated private static let logger = Logger(subsystem: "com.scarf", category: "HermesLogService")
#endif
/// Local file handle for local contexts. `nil` when following a remote
/// log or when no log is open.
private var localHandle: FileHandle?
@@ -89,8 +96,8 @@ public actor HermesLogService {
} catch {
// Transient disconnects / command failures: surface once
// and stop. Callers typically re-open the log on retry.
#if DEBUG
print("[Scarf] remote tail ended: \(error.localizedDescription)")
#if canImport(os)
Self.logger.warning("remote tail ended: \(error.localizedDescription, privacy: .public)")
#endif
}
}
@@ -103,7 +110,9 @@ public actor HermesLogService {
do {
try localHandle?.close()
} catch {
print("[Scarf] Failed to close log handle: \(error.localizedDescription)")
#if canImport(os)
Self.logger.warning("Failed to close log handle: \(error.localizedDescription, privacy: .public)")
#endif
}
localHandle = nil
currentPath = nil
@@ -38,9 +38,10 @@ public struct ProjectDashboardService: Sendable {
/// choice is now theirs.
public func saveRegistry(_ registry: ProjectRegistry) throws {
let dir = context.paths.scarfDir
if !transport.fileExists(dir) {
try transport.createDirectory(dir)
}
// `createDirectory` is mkdir -p across every transport (Local
// uses withIntermediateDirectories, SSH/Citadel both ignore
// "already exists"), so we don't need to fileExists-guard it.
try transport.createDirectory(dir)
let data = try JSONEncoder().encode(registry)
// Pretty-print for readability (agents may read this file).
let writeData: Data
@@ -62,7 +63,7 @@ public struct ProjectDashboardService: Sendable {
do {
return try JSONDecoder().decode(ProjectDashboard.self, from: data)
} catch {
print("[Scarf] Failed to decode dashboard for \(project.name): \(error.localizedDescription)")
Self.logger.error("Failed to decode dashboard for \(project.name, privacy: .public): \(error.localizedDescription, privacy: .public)")
return nil
}
}
@@ -127,6 +127,17 @@ public struct LocalTransport: ServerTransport {
} catch {
throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)")
}
// Parent has its own copy of every pipe end after fork. The child
// inherits and uses the writing ends of stdout/stderr and the
// reading end of stdin; the parent must close its own copies of
// those so EOF reaches the parent's reader once the child exits
// (otherwise the kernel keeps each fd open as long as any process
// holds a reference, and we leak fds).
try? stdoutPipe.fileHandleForWriting.close()
try? stderrPipe.fileHandleForWriting.close()
if stdin != nil {
try? stdinPipe.fileHandleForReading.close()
}
if let stdin {
try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin)
try? stdinPipe.fileHandleForWriting.close()
@@ -189,6 +200,10 @@ public struct LocalTransport: ServerTransport {
continuation.finish(throwing: error)
return
}
// Parent's copy of the writing ends the child has its
// own; close ours so EOF reaches the reader after exit.
try? outPipe.fileHandleForWriting.close()
try? errPipe.fileHandleForWriting.close()
let handle = outPipe.fileHandleForReading
var buffer = Data()
while true {
@@ -204,11 +219,18 @@ public struct LocalTransport: ServerTransport {
}
}
proc.waitUntilExit()
let stderrTail: String
if proc.terminationStatus != 0 {
let stderr = (try? errPipe.fileHandleForReading.readToEnd())
stderrTail = (try? errPipe.fileHandleForReading.readToEnd())
.flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? ""
} else {
stderrTail = ""
}
try? outPipe.fileHandleForReading.close()
try? errPipe.fileHandleForReading.close()
if proc.terminationStatus != 0 {
continuation.finish(throwing: TransportError.commandFailed(
exitCode: proc.terminationStatus, stderr: stderr
exitCode: proc.terminationStatus, stderr: stderrTail
))
} else {
continuation.finish()
@@ -474,6 +474,10 @@ public struct SSHTransport: ServerTransport {
continuation.finish(throwing: error)
return
}
// Parent's copy of the writing ends close ours so EOF
// reaches the reader after the child exits.
try? outPipe.fileHandleForWriting.close()
try? errPipe.fileHandleForWriting.close()
let handle = outPipe.fileHandleForReading
var buffer = Data()
while true {
@@ -489,11 +493,18 @@ public struct SSHTransport: ServerTransport {
}
}
proc.waitUntilExit()
let stderrTail: String
if proc.terminationStatus != 0 {
let stderr = (try? errPipe.fileHandleForReading.readToEnd())
stderrTail = (try? errPipe.fileHandleForReading.readToEnd())
.flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? ""
} else {
stderrTail = ""
}
try? outPipe.fileHandleForReading.close()
try? errPipe.fileHandleForReading.close()
if proc.terminationStatus != 0 {
continuation.finish(throwing: TransportError.classifySSHFailure(
host: config.host, exitCode: proc.terminationStatus, stderr: stderr
host: config.host, exitCode: proc.terminationStatus, stderr: stderrTail
))
} else {
continuation.finish()
@@ -662,6 +673,13 @@ public struct SSHTransport: ServerTransport {
} catch {
throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)")
}
// Parent's copy of the inherited ends close so EOF lands when
// the child exits and we don't leak fds.
try? stdoutPipe.fileHandleForWriting.close()
try? stderrPipe.fileHandleForWriting.close()
if stdin != nil {
try? stdinPipe.fileHandleForReading.close()
}
if let stdin {
try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin)
try? stdinPipe.fileHandleForWriting.close()
@@ -119,10 +119,11 @@ public final class IOSCronViewModel {
let transport = ctx.makeTransport()
// Ensure the cron/ directory exists on a fresh
// Hermes install this file won't be present.
// `createDirectory` is mkdir -p across all transports;
// call unconditionally and let writeFile surface any
// real failure.
let parent = (path as NSString).deletingLastPathComponent
if !transport.fileExists(parent) {
try? transport.createDirectory(parent)
}
try? transport.createDirectory(parent)
try transport.writeFile(path, data: data)
return true
} catch {
+6 -7
View File
@@ -12733,9 +12733,6 @@
"New cron job" : {
"comment" : "A button that creates a new cron job.",
"isCommentAutoGenerated" : true
},
"New folder name" : {
},
"New folder…" : {
"comment" : "A label for a new folder name.",
@@ -15993,10 +15990,6 @@
},
"Project folder kept" : {
},
"Project name" : {
"comment" : "A label for a text field that lets the user enter a project name.",
"isCommentAutoGenerated" : true
},
"Project Name" : {
"localizations" : {
@@ -16411,6 +16404,7 @@
}
},
"Quick commands are shell shortcuts hermes exposes in chat as `/command_name`. They live under `quick_commands:` in config.yaml." : {
"extractionState" : "stale",
"localizations" : {
"de" : {
"stringUnit" : {
@@ -20709,6 +20703,7 @@
}
},
"Skills (%lld)" : {
"extractionState" : "stale",
"localizations" : {
"de" : {
"stringUnit" : {
@@ -21529,6 +21524,10 @@
}
}
},
"Stored under `quick_commands:` in config.yaml." : {
"comment" : "A description of the quick commands feature.",
"isCommentAutoGenerated" : true
},
"Strip the template's begin/end block, preserve everything else in MEMORY.md" : {
"comment" : "A description of the action to remove a memory block.",
"isCommentAutoGenerated" : true
+54 -12
View File
@@ -243,14 +243,27 @@ final class ServerLiveStatus: Identifiable {
func startPolling() {
stopPolling()
// First refresh inline so the icon doesn't flash "stopped" for the
// first 10s after launch.
refresh()
pollTask = Task { [weak self] in
// Exponential backoff on consecutive failures. Healthy servers
// poll every 10s. When a registered remote goes unreachable,
// pgrep + gateway_state.json reads fail every tick without
// backoff that's a log warning + a 5s pgrep timeout every 10s
// for as long as the remote stays down. Reset to 10s on the
// first probe that fully succeeds.
var consecutiveFailures = 0
while !Task.isCancelled {
try? await Task.sleep(nanoseconds: 10_000_000_000)
let ok = await self?.pollOnce() ?? false
if Task.isCancelled { return }
self?.refresh()
consecutiveFailures = ok ? 0 : consecutiveFailures + 1
let delaySec: UInt64
switch consecutiveFailures {
case 0: delaySec = 10
case 1: delaySec = 30
case 2: delaySec = 60
case 3: delaySec = 120
default: delaySec = 300
}
try? await Task.sleep(nanoseconds: delaySec * 1_000_000_000)
}
}
}
@@ -290,15 +303,44 @@ final class ServerLiveStatus: Identifiable {
}
private func refresh() {
Task { [weak self] in _ = await self?.pollOnce() }
}
/// Single probe used by both the polling loop (which needs the
/// success/failure signal for backoff) and the fire-and-forget
/// `refresh()` callers (start/stop/restart). Returns `true` only when
/// both the pgrep call AND the gateway_state.json read returned a
/// transport-level success `.success(nil)` (file missing because
/// hermes is stopped) still counts as a successful probe.
private func pollOnce() async -> Bool {
let svc = fileService
Task.detached { [weak self] in
let running = svc.isHermesRunning()
let gateway = svc.loadGatewayState()?.isRunning ?? false
await MainActor.run { [weak self] in
self?.hermesRunning = running
self?.gatewayRunning = gateway
}
struct ProbeResult: Sendable {
let running: Bool
let gatewayRunning: Bool
let ok: Bool
}
let probe = await Task.detached { () -> ProbeResult in
let pgrep = svc.hermesPIDResult()
let gateway = svc.loadGatewayStateResult()
let running: Bool
switch pgrep {
case .success(let pid): running = (pid != nil)
case .failure: running = false
}
let gatewayRunning: Bool
switch gateway {
case .success(let state): gatewayRunning = state?.isRunning ?? false
case .failure: gatewayRunning = false
}
let pgrepOK: Bool
if case .failure = pgrep { pgrepOK = false } else { pgrepOK = true }
let gatewayOK: Bool
if case .failure = gateway { gatewayOK = false } else { gatewayOK = true }
return ProbeResult(running: running, gatewayRunning: gatewayRunning, ok: pgrepOK && gatewayOK)
}.value
hermesRunning = probe.running
gatewayRunning = probe.gatewayRunning
return probe.ok
}
}