From 63c5d13bec66c93fa507af6da1f0f675c0f0dbc7 Mon Sep 17 00:00:00 2001 From: Alan Wizemann Date: Sat, 25 Apr 2026 16:18:15 +0200 Subject: [PATCH] chore: fd-leak cleanup, os.Logger conversion, status-poll backoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-release maintenance pass picked up across both targets while debugging the iOS Browse fix: - LocalTransport / SSHTransport: close the parent's copy of every pipe write end after spawn so EOF reaches the reader once the child exits, and explicitly close read ends after draining. Was leaking one fd per `runProcess`/`streamLines` invocation under load. - ProcessACPChannel: also close stdout/stderr write-end fds on channel teardown — same pattern, ACP sessions can churn on long reconnect loops. - HermesDataService / HermesLogService / ProjectDashboardService: replace remaining `print("[Scarf] ...")` debug statements with os.Logger calls (subsystem="com.scarf"), matching the global rule that production code uses Logger and `print()` is reserved for previews + test helpers. - ProjectDashboardService / IOSCronViewModel: drop redundant `fileExists` guards before `createDirectory` — the operation is already mkdir -p across every transport, so the extra round-trip was pure latency on remote hosts. - scarfApp.swift: server-status sidebar probe now uses an exponential backoff (10s → 30s → 60s → 120s → 300s) when consecutive probes fail, resetting on the first full success. Previously a registered remote going unreachable hammered pgrep + gateway_state.json every 10s indefinitely; now offline servers settle to a 5-min cadence while live servers stay snappy. - Localizable.xcstrings: routine .strings catalog refresh — stale entries for removed UI strings, picked up new "Stored under quick_commands:..." subtitle wording. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ScarfCore/ACP/ProcessACPChannel.swift | 2 + .../Services/HermesDataService.swift | 2 +- .../ScarfCore/Services/HermesLogService.swift | 15 ++++- .../Services/ProjectDashboardService.swift | 9 +-- .../ScarfCore/Transport/LocalTransport.swift | 26 +++++++- .../ScarfCore/Transport/SSHTransport.swift | 22 ++++++- .../ViewModels/IOSCronViewModel.swift | 7 +- scarf/scarf/Localizable.xcstrings | 13 ++-- scarf/scarf/scarfApp.swift | 66 +++++++++++++++---- 9 files changed, 128 insertions(+), 34 deletions(-) diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/ACP/ProcessACPChannel.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/ACP/ProcessACPChannel.swift index fab4fe6..6c65c52 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/ACP/ProcessACPChannel.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/ACP/ProcessACPChannel.swift @@ -191,6 +191,8 @@ public actor ProcessACPChannel: ACPChannel { stdinPipe.fileHandleForReading.closeFile() stdoutPipe.fileHandleForReading.closeFile() stderrPipe.fileHandleForReading.closeFile() + stdoutPipe.fileHandleForWriting.closeFile() + stderrPipe.fileHandleForWriting.closeFile() readerTask?.cancel() stderrTask?.cancel() diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift index 931b2df..9f14bc4 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesDataService.swift @@ -705,7 +705,7 @@ public actor HermesDataService { do { return try JSONDecoder().decode([HermesToolCall].self, from: data) } catch { - print("[Scarf] Failed to decode tool calls: \(error.localizedDescription)") + Self.logger.error("Failed to decode tool calls: \(error.localizedDescription, privacy: .public)") return [] } } diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesLogService.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesLogService.swift index 3bd8861..f084c84 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesLogService.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/HermesLogService.swift @@ -1,4 +1,7 @@ import Foundation +#if canImport(os) +import os +#endif public struct LogEntry: Identifiable, Sendable { public let id: Int @@ -47,6 +50,10 @@ public struct LogEntry: Identifiable, Sendable { } public actor HermesLogService { + #if canImport(os) + nonisolated private static let logger = Logger(subsystem: "com.scarf", category: "HermesLogService") + #endif + /// Local file handle for local contexts. `nil` when following a remote /// log or when no log is open. private var localHandle: FileHandle? @@ -89,8 +96,8 @@ public actor HermesLogService { } catch { // Transient disconnects / command failures: surface once // and stop. Callers typically re-open the log on retry. - #if DEBUG - print("[Scarf] remote tail ended: \(error.localizedDescription)") + #if canImport(os) + Self.logger.warning("remote tail ended: \(error.localizedDescription, privacy: .public)") #endif } } @@ -103,7 +110,9 @@ public actor HermesLogService { do { try localHandle?.close() } catch { - print("[Scarf] Failed to close log handle: \(error.localizedDescription)") + #if canImport(os) + Self.logger.warning("Failed to close log handle: \(error.localizedDescription, privacy: .public)") + #endif } localHandle = nil currentPath = nil diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/ProjectDashboardService.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/ProjectDashboardService.swift index 36998a9..85e14fe 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/ProjectDashboardService.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Services/ProjectDashboardService.swift @@ -38,9 +38,10 @@ public struct ProjectDashboardService: Sendable { /// choice is now theirs. public func saveRegistry(_ registry: ProjectRegistry) throws { let dir = context.paths.scarfDir - if !transport.fileExists(dir) { - try transport.createDirectory(dir) - } + // `createDirectory` is mkdir -p across every transport (Local + // uses withIntermediateDirectories, SSH/Citadel both ignore + // "already exists"), so we don't need to fileExists-guard it. + try transport.createDirectory(dir) let data = try JSONEncoder().encode(registry) // Pretty-print for readability (agents may read this file). let writeData: Data @@ -62,7 +63,7 @@ public struct ProjectDashboardService: Sendable { do { return try JSONDecoder().decode(ProjectDashboard.self, from: data) } catch { - print("[Scarf] Failed to decode dashboard for \(project.name): \(error.localizedDescription)") + Self.logger.error("Failed to decode dashboard for \(project.name, privacy: .public): \(error.localizedDescription, privacy: .public)") return nil } } diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift index cc9fa03..a4a8b20 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/LocalTransport.swift @@ -127,6 +127,17 @@ public struct LocalTransport: ServerTransport { } catch { throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)") } + // Parent has its own copy of every pipe end after fork. The child + // inherits and uses the writing ends of stdout/stderr and the + // reading end of stdin; the parent must close its own copies of + // those so EOF reaches the parent's reader once the child exits + // (otherwise the kernel keeps each fd open as long as any process + // holds a reference, and we leak fds). + try? stdoutPipe.fileHandleForWriting.close() + try? stderrPipe.fileHandleForWriting.close() + if stdin != nil { + try? stdinPipe.fileHandleForReading.close() + } if let stdin { try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin) try? stdinPipe.fileHandleForWriting.close() @@ -189,6 +200,10 @@ public struct LocalTransport: ServerTransport { continuation.finish(throwing: error) return } + // Parent's copy of the writing ends — the child has its + // own; close ours so EOF reaches the reader after exit. + try? outPipe.fileHandleForWriting.close() + try? errPipe.fileHandleForWriting.close() let handle = outPipe.fileHandleForReading var buffer = Data() while true { @@ -204,11 +219,18 @@ public struct LocalTransport: ServerTransport { } } proc.waitUntilExit() + let stderrTail: String if proc.terminationStatus != 0 { - let stderr = (try? errPipe.fileHandleForReading.readToEnd()) + stderrTail = (try? errPipe.fileHandleForReading.readToEnd()) .flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? "" + } else { + stderrTail = "" + } + try? outPipe.fileHandleForReading.close() + try? errPipe.fileHandleForReading.close() + if proc.terminationStatus != 0 { continuation.finish(throwing: TransportError.commandFailed( - exitCode: proc.terminationStatus, stderr: stderr + exitCode: proc.terminationStatus, stderr: stderrTail )) } else { continuation.finish() diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift index 5c87c9c..8316301 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHTransport.swift @@ -474,6 +474,10 @@ public struct SSHTransport: ServerTransport { continuation.finish(throwing: error) return } + // Parent's copy of the writing ends — close ours so EOF + // reaches the reader after the child exits. + try? outPipe.fileHandleForWriting.close() + try? errPipe.fileHandleForWriting.close() let handle = outPipe.fileHandleForReading var buffer = Data() while true { @@ -489,11 +493,18 @@ public struct SSHTransport: ServerTransport { } } proc.waitUntilExit() + let stderrTail: String if proc.terminationStatus != 0 { - let stderr = (try? errPipe.fileHandleForReading.readToEnd()) + stderrTail = (try? errPipe.fileHandleForReading.readToEnd()) .flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? "" + } else { + stderrTail = "" + } + try? outPipe.fileHandleForReading.close() + try? errPipe.fileHandleForReading.close() + if proc.terminationStatus != 0 { continuation.finish(throwing: TransportError.classifySSHFailure( - host: config.host, exitCode: proc.terminationStatus, stderr: stderr + host: config.host, exitCode: proc.terminationStatus, stderr: stderrTail )) } else { continuation.finish() @@ -662,6 +673,13 @@ public struct SSHTransport: ServerTransport { } catch { throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)") } + // Parent's copy of the inherited ends — close so EOF lands when + // the child exits and we don't leak fds. + try? stdoutPipe.fileHandleForWriting.close() + try? stderrPipe.fileHandleForWriting.close() + if stdin != nil { + try? stdinPipe.fileHandleForReading.close() + } if let stdin { try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin) try? stdinPipe.fileHandleForWriting.close() diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/IOSCronViewModel.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/IOSCronViewModel.swift index 53ed66d..cc77aeb 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/IOSCronViewModel.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/IOSCronViewModel.swift @@ -119,10 +119,11 @@ public final class IOSCronViewModel { let transport = ctx.makeTransport() // Ensure the cron/ directory exists — on a fresh // Hermes install this file won't be present. + // `createDirectory` is mkdir -p across all transports; + // call unconditionally and let writeFile surface any + // real failure. let parent = (path as NSString).deletingLastPathComponent - if !transport.fileExists(parent) { - try? transport.createDirectory(parent) - } + try? transport.createDirectory(parent) try transport.writeFile(path, data: data) return true } catch { diff --git a/scarf/scarf/Localizable.xcstrings b/scarf/scarf/Localizable.xcstrings index a99efb5..f36ed09 100644 --- a/scarf/scarf/Localizable.xcstrings +++ b/scarf/scarf/Localizable.xcstrings @@ -12733,9 +12733,6 @@ "New cron job" : { "comment" : "A button that creates a new cron job.", "isCommentAutoGenerated" : true - }, - "New folder name" : { - }, "New folder…" : { "comment" : "A label for a new folder name.", @@ -15993,10 +15990,6 @@ }, "Project folder kept" : { - }, - "Project name" : { - "comment" : "A label for a text field that lets the user enter a project name.", - "isCommentAutoGenerated" : true }, "Project Name" : { "localizations" : { @@ -16411,6 +16404,7 @@ } }, "Quick commands are shell shortcuts hermes exposes in chat as `/command_name`. They live under `quick_commands:` in config.yaml." : { + "extractionState" : "stale", "localizations" : { "de" : { "stringUnit" : { @@ -20709,6 +20703,7 @@ } }, "Skills (%lld)" : { + "extractionState" : "stale", "localizations" : { "de" : { "stringUnit" : { @@ -21529,6 +21524,10 @@ } } }, + "Stored under `quick_commands:` in config.yaml." : { + "comment" : "A description of the quick commands feature.", + "isCommentAutoGenerated" : true + }, "Strip the template's begin/end block, preserve everything else in MEMORY.md" : { "comment" : "A description of the action to remove a memory block.", "isCommentAutoGenerated" : true diff --git a/scarf/scarf/scarfApp.swift b/scarf/scarf/scarfApp.swift index ef5ee7a..5dcbcc2 100644 --- a/scarf/scarf/scarfApp.swift +++ b/scarf/scarf/scarfApp.swift @@ -243,14 +243,27 @@ final class ServerLiveStatus: Identifiable { func startPolling() { stopPolling() - // First refresh inline so the icon doesn't flash "stopped" for the - // first 10s after launch. - refresh() pollTask = Task { [weak self] in + // Exponential backoff on consecutive failures. Healthy servers + // poll every 10s. When a registered remote goes unreachable, + // pgrep + gateway_state.json reads fail every tick — without + // backoff that's a log warning + a 5s pgrep timeout every 10s + // for as long as the remote stays down. Reset to 10s on the + // first probe that fully succeeds. + var consecutiveFailures = 0 while !Task.isCancelled { - try? await Task.sleep(nanoseconds: 10_000_000_000) + let ok = await self?.pollOnce() ?? false if Task.isCancelled { return } - self?.refresh() + consecutiveFailures = ok ? 0 : consecutiveFailures + 1 + let delaySec: UInt64 + switch consecutiveFailures { + case 0: delaySec = 10 + case 1: delaySec = 30 + case 2: delaySec = 60 + case 3: delaySec = 120 + default: delaySec = 300 + } + try? await Task.sleep(nanoseconds: delaySec * 1_000_000_000) } } } @@ -290,15 +303,44 @@ final class ServerLiveStatus: Identifiable { } private func refresh() { + Task { [weak self] in _ = await self?.pollOnce() } + } + + /// Single probe used by both the polling loop (which needs the + /// success/failure signal for backoff) and the fire-and-forget + /// `refresh()` callers (start/stop/restart). Returns `true` only when + /// both the pgrep call AND the gateway_state.json read returned a + /// transport-level success — `.success(nil)` (file missing because + /// hermes is stopped) still counts as a successful probe. + private func pollOnce() async -> Bool { let svc = fileService - Task.detached { [weak self] in - let running = svc.isHermesRunning() - let gateway = svc.loadGatewayState()?.isRunning ?? false - await MainActor.run { [weak self] in - self?.hermesRunning = running - self?.gatewayRunning = gateway - } + struct ProbeResult: Sendable { + let running: Bool + let gatewayRunning: Bool + let ok: Bool } + let probe = await Task.detached { () -> ProbeResult in + let pgrep = svc.hermesPIDResult() + let gateway = svc.loadGatewayStateResult() + let running: Bool + switch pgrep { + case .success(let pid): running = (pid != nil) + case .failure: running = false + } + let gatewayRunning: Bool + switch gateway { + case .success(let state): gatewayRunning = state?.isRunning ?? false + case .failure: gatewayRunning = false + } + let pgrepOK: Bool + if case .failure = pgrep { pgrepOK = false } else { pgrepOK = true } + let gatewayOK: Bool + if case .failure = gateway { gatewayOK = false } else { gatewayOK = true } + return ProbeResult(running: running, gatewayRunning: gatewayRunning, ok: pgrepOK && gatewayOK) + }.value + hermesRunning = probe.running + gatewayRunning = probe.gatewayRunning + return probe.ok } }