mirror of
https://github.com/awizemann/scarf.git
synced 2026-05-08 02:14:37 +00:00
chore: fd-leak cleanup, os.Logger conversion, status-poll backoff
Pre-release maintenance pass picked up across both targets while
debugging the iOS Browse fix:
- LocalTransport / SSHTransport: close the parent's copy of every
pipe write end after spawn so EOF reaches the reader once the child
exits, and explicitly close read ends after draining. Was leaking
one fd per `runProcess`/`streamLines` invocation under load.
- ProcessACPChannel: also close stdout/stderr write-end fds on
channel teardown — same pattern, ACP sessions can churn on long
reconnect loops.
- HermesDataService / HermesLogService / ProjectDashboardService:
replace remaining `print("[Scarf] ...")` debug statements with
os.Logger calls (subsystem="com.scarf"), matching the global rule
that production code uses Logger and `print()` is reserved for
previews + test helpers.
- ProjectDashboardService / IOSCronViewModel: drop redundant
`fileExists` guards before `createDirectory` — the operation is
already mkdir -p across every transport, so the extra round-trip
was pure latency on remote hosts.
- scarfApp.swift: server-status sidebar probe now uses an exponential
backoff (10s → 30s → 60s → 120s → 300s) when consecutive probes
fail, resetting on the first full success. Previously a registered
remote going unreachable hammered pgrep + gateway_state.json every
10s indefinitely; now offline servers settle to a 5-min cadence
while live servers stay snappy.
- Localizable.xcstrings: routine .strings catalog refresh — stale
entries for removed UI strings, picked up new "Stored under
quick_commands:..." subtitle wording.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -191,6 +191,8 @@ public actor ProcessACPChannel: ACPChannel {
|
||||
stdinPipe.fileHandleForReading.closeFile()
|
||||
stdoutPipe.fileHandleForReading.closeFile()
|
||||
stderrPipe.fileHandleForReading.closeFile()
|
||||
stdoutPipe.fileHandleForWriting.closeFile()
|
||||
stderrPipe.fileHandleForWriting.closeFile()
|
||||
|
||||
readerTask?.cancel()
|
||||
stderrTask?.cancel()
|
||||
|
||||
@@ -705,7 +705,7 @@ public actor HermesDataService {
|
||||
do {
|
||||
return try JSONDecoder().decode([HermesToolCall].self, from: data)
|
||||
} catch {
|
||||
print("[Scarf] Failed to decode tool calls: \(error.localizedDescription)")
|
||||
Self.logger.error("Failed to decode tool calls: \(error.localizedDescription, privacy: .public)")
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import Foundation
|
||||
#if canImport(os)
|
||||
import os
|
||||
#endif
|
||||
|
||||
public struct LogEntry: Identifiable, Sendable {
|
||||
public let id: Int
|
||||
@@ -47,6 +50,10 @@ public struct LogEntry: Identifiable, Sendable {
|
||||
}
|
||||
|
||||
public actor HermesLogService {
|
||||
#if canImport(os)
|
||||
nonisolated private static let logger = Logger(subsystem: "com.scarf", category: "HermesLogService")
|
||||
#endif
|
||||
|
||||
/// Local file handle for local contexts. `nil` when following a remote
|
||||
/// log or when no log is open.
|
||||
private var localHandle: FileHandle?
|
||||
@@ -89,8 +96,8 @@ public actor HermesLogService {
|
||||
} catch {
|
||||
// Transient disconnects / command failures: surface once
|
||||
// and stop. Callers typically re-open the log on retry.
|
||||
#if DEBUG
|
||||
print("[Scarf] remote tail ended: \(error.localizedDescription)")
|
||||
#if canImport(os)
|
||||
Self.logger.warning("remote tail ended: \(error.localizedDescription, privacy: .public)")
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -103,7 +110,9 @@ public actor HermesLogService {
|
||||
do {
|
||||
try localHandle?.close()
|
||||
} catch {
|
||||
print("[Scarf] Failed to close log handle: \(error.localizedDescription)")
|
||||
#if canImport(os)
|
||||
Self.logger.warning("Failed to close log handle: \(error.localizedDescription, privacy: .public)")
|
||||
#endif
|
||||
}
|
||||
localHandle = nil
|
||||
currentPath = nil
|
||||
|
||||
@@ -38,9 +38,10 @@ public struct ProjectDashboardService: Sendable {
|
||||
/// choice is now theirs.
|
||||
public func saveRegistry(_ registry: ProjectRegistry) throws {
|
||||
let dir = context.paths.scarfDir
|
||||
if !transport.fileExists(dir) {
|
||||
try transport.createDirectory(dir)
|
||||
}
|
||||
// `createDirectory` is mkdir -p across every transport (Local
|
||||
// uses withIntermediateDirectories, SSH/Citadel both ignore
|
||||
// "already exists"), so we don't need to fileExists-guard it.
|
||||
try transport.createDirectory(dir)
|
||||
let data = try JSONEncoder().encode(registry)
|
||||
// Pretty-print for readability (agents may read this file).
|
||||
let writeData: Data
|
||||
@@ -62,7 +63,7 @@ public struct ProjectDashboardService: Sendable {
|
||||
do {
|
||||
return try JSONDecoder().decode(ProjectDashboard.self, from: data)
|
||||
} catch {
|
||||
print("[Scarf] Failed to decode dashboard for \(project.name): \(error.localizedDescription)")
|
||||
Self.logger.error("Failed to decode dashboard for \(project.name, privacy: .public): \(error.localizedDescription, privacy: .public)")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,6 +127,17 @@ public struct LocalTransport: ServerTransport {
|
||||
} catch {
|
||||
throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)")
|
||||
}
|
||||
// Parent has its own copy of every pipe end after fork. The child
|
||||
// inherits and uses the writing ends of stdout/stderr and the
|
||||
// reading end of stdin; the parent must close its own copies of
|
||||
// those so EOF reaches the parent's reader once the child exits
|
||||
// (otherwise the kernel keeps each fd open as long as any process
|
||||
// holds a reference, and we leak fds).
|
||||
try? stdoutPipe.fileHandleForWriting.close()
|
||||
try? stderrPipe.fileHandleForWriting.close()
|
||||
if stdin != nil {
|
||||
try? stdinPipe.fileHandleForReading.close()
|
||||
}
|
||||
if let stdin {
|
||||
try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin)
|
||||
try? stdinPipe.fileHandleForWriting.close()
|
||||
@@ -189,6 +200,10 @@ public struct LocalTransport: ServerTransport {
|
||||
continuation.finish(throwing: error)
|
||||
return
|
||||
}
|
||||
// Parent's copy of the writing ends — the child has its
|
||||
// own; close ours so EOF reaches the reader after exit.
|
||||
try? outPipe.fileHandleForWriting.close()
|
||||
try? errPipe.fileHandleForWriting.close()
|
||||
let handle = outPipe.fileHandleForReading
|
||||
var buffer = Data()
|
||||
while true {
|
||||
@@ -204,11 +219,18 @@ public struct LocalTransport: ServerTransport {
|
||||
}
|
||||
}
|
||||
proc.waitUntilExit()
|
||||
let stderrTail: String
|
||||
if proc.terminationStatus != 0 {
|
||||
let stderr = (try? errPipe.fileHandleForReading.readToEnd())
|
||||
stderrTail = (try? errPipe.fileHandleForReading.readToEnd())
|
||||
.flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? ""
|
||||
} else {
|
||||
stderrTail = ""
|
||||
}
|
||||
try? outPipe.fileHandleForReading.close()
|
||||
try? errPipe.fileHandleForReading.close()
|
||||
if proc.terminationStatus != 0 {
|
||||
continuation.finish(throwing: TransportError.commandFailed(
|
||||
exitCode: proc.terminationStatus, stderr: stderr
|
||||
exitCode: proc.terminationStatus, stderr: stderrTail
|
||||
))
|
||||
} else {
|
||||
continuation.finish()
|
||||
|
||||
@@ -474,6 +474,10 @@ public struct SSHTransport: ServerTransport {
|
||||
continuation.finish(throwing: error)
|
||||
return
|
||||
}
|
||||
// Parent's copy of the writing ends — close ours so EOF
|
||||
// reaches the reader after the child exits.
|
||||
try? outPipe.fileHandleForWriting.close()
|
||||
try? errPipe.fileHandleForWriting.close()
|
||||
let handle = outPipe.fileHandleForReading
|
||||
var buffer = Data()
|
||||
while true {
|
||||
@@ -489,11 +493,18 @@ public struct SSHTransport: ServerTransport {
|
||||
}
|
||||
}
|
||||
proc.waitUntilExit()
|
||||
let stderrTail: String
|
||||
if proc.terminationStatus != 0 {
|
||||
let stderr = (try? errPipe.fileHandleForReading.readToEnd())
|
||||
stderrTail = (try? errPipe.fileHandleForReading.readToEnd())
|
||||
.flatMap { String(data: $0 ?? Data(), encoding: .utf8) } ?? ""
|
||||
} else {
|
||||
stderrTail = ""
|
||||
}
|
||||
try? outPipe.fileHandleForReading.close()
|
||||
try? errPipe.fileHandleForReading.close()
|
||||
if proc.terminationStatus != 0 {
|
||||
continuation.finish(throwing: TransportError.classifySSHFailure(
|
||||
host: config.host, exitCode: proc.terminationStatus, stderr: stderr
|
||||
host: config.host, exitCode: proc.terminationStatus, stderr: stderrTail
|
||||
))
|
||||
} else {
|
||||
continuation.finish()
|
||||
@@ -662,6 +673,13 @@ public struct SSHTransport: ServerTransport {
|
||||
} catch {
|
||||
throw TransportError.other(message: "Failed to launch \(executable): \(error.localizedDescription)")
|
||||
}
|
||||
// Parent's copy of the inherited ends — close so EOF lands when
|
||||
// the child exits and we don't leak fds.
|
||||
try? stdoutPipe.fileHandleForWriting.close()
|
||||
try? stderrPipe.fileHandleForWriting.close()
|
||||
if stdin != nil {
|
||||
try? stdinPipe.fileHandleForReading.close()
|
||||
}
|
||||
if let stdin {
|
||||
try? stdinPipe.fileHandleForWriting.write(contentsOf: stdin)
|
||||
try? stdinPipe.fileHandleForWriting.close()
|
||||
|
||||
@@ -119,10 +119,11 @@ public final class IOSCronViewModel {
|
||||
let transport = ctx.makeTransport()
|
||||
// Ensure the cron/ directory exists — on a fresh
|
||||
// Hermes install this file won't be present.
|
||||
// `createDirectory` is mkdir -p across all transports;
|
||||
// call unconditionally and let writeFile surface any
|
||||
// real failure.
|
||||
let parent = (path as NSString).deletingLastPathComponent
|
||||
if !transport.fileExists(parent) {
|
||||
try? transport.createDirectory(parent)
|
||||
}
|
||||
try? transport.createDirectory(parent)
|
||||
try transport.writeFile(path, data: data)
|
||||
return true
|
||||
} catch {
|
||||
|
||||
@@ -12733,9 +12733,6 @@
|
||||
"New cron job" : {
|
||||
"comment" : "A button that creates a new cron job.",
|
||||
"isCommentAutoGenerated" : true
|
||||
},
|
||||
"New folder name" : {
|
||||
|
||||
},
|
||||
"New folder…" : {
|
||||
"comment" : "A label for a new folder name.",
|
||||
@@ -15993,10 +15990,6 @@
|
||||
},
|
||||
"Project folder kept" : {
|
||||
|
||||
},
|
||||
"Project name" : {
|
||||
"comment" : "A label for a text field that lets the user enter a project name.",
|
||||
"isCommentAutoGenerated" : true
|
||||
},
|
||||
"Project Name" : {
|
||||
"localizations" : {
|
||||
@@ -16411,6 +16404,7 @@
|
||||
}
|
||||
},
|
||||
"Quick commands are shell shortcuts hermes exposes in chat as `/command_name`. They live under `quick_commands:` in config.yaml." : {
|
||||
"extractionState" : "stale",
|
||||
"localizations" : {
|
||||
"de" : {
|
||||
"stringUnit" : {
|
||||
@@ -20709,6 +20703,7 @@
|
||||
}
|
||||
},
|
||||
"Skills (%lld)" : {
|
||||
"extractionState" : "stale",
|
||||
"localizations" : {
|
||||
"de" : {
|
||||
"stringUnit" : {
|
||||
@@ -21529,6 +21524,10 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"Stored under `quick_commands:` in config.yaml." : {
|
||||
"comment" : "A description of the quick commands feature.",
|
||||
"isCommentAutoGenerated" : true
|
||||
},
|
||||
"Strip the template's begin/end block, preserve everything else in MEMORY.md" : {
|
||||
"comment" : "A description of the action to remove a memory block.",
|
||||
"isCommentAutoGenerated" : true
|
||||
|
||||
+54
-12
@@ -243,14 +243,27 @@ final class ServerLiveStatus: Identifiable {
|
||||
|
||||
func startPolling() {
|
||||
stopPolling()
|
||||
// First refresh inline so the icon doesn't flash "stopped" for the
|
||||
// first 10s after launch.
|
||||
refresh()
|
||||
pollTask = Task { [weak self] in
|
||||
// Exponential backoff on consecutive failures. Healthy servers
|
||||
// poll every 10s. When a registered remote goes unreachable,
|
||||
// pgrep + gateway_state.json reads fail every tick — without
|
||||
// backoff that's a log warning + a 5s pgrep timeout every 10s
|
||||
// for as long as the remote stays down. Reset to 10s on the
|
||||
// first probe that fully succeeds.
|
||||
var consecutiveFailures = 0
|
||||
while !Task.isCancelled {
|
||||
try? await Task.sleep(nanoseconds: 10_000_000_000)
|
||||
let ok = await self?.pollOnce() ?? false
|
||||
if Task.isCancelled { return }
|
||||
self?.refresh()
|
||||
consecutiveFailures = ok ? 0 : consecutiveFailures + 1
|
||||
let delaySec: UInt64
|
||||
switch consecutiveFailures {
|
||||
case 0: delaySec = 10
|
||||
case 1: delaySec = 30
|
||||
case 2: delaySec = 60
|
||||
case 3: delaySec = 120
|
||||
default: delaySec = 300
|
||||
}
|
||||
try? await Task.sleep(nanoseconds: delaySec * 1_000_000_000)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -290,15 +303,44 @@ final class ServerLiveStatus: Identifiable {
|
||||
}
|
||||
|
||||
private func refresh() {
|
||||
Task { [weak self] in _ = await self?.pollOnce() }
|
||||
}
|
||||
|
||||
/// Single probe used by both the polling loop (which needs the
|
||||
/// success/failure signal for backoff) and the fire-and-forget
|
||||
/// `refresh()` callers (start/stop/restart). Returns `true` only when
|
||||
/// both the pgrep call AND the gateway_state.json read returned a
|
||||
/// transport-level success — `.success(nil)` (file missing because
|
||||
/// hermes is stopped) still counts as a successful probe.
|
||||
private func pollOnce() async -> Bool {
|
||||
let svc = fileService
|
||||
Task.detached { [weak self] in
|
||||
let running = svc.isHermesRunning()
|
||||
let gateway = svc.loadGatewayState()?.isRunning ?? false
|
||||
await MainActor.run { [weak self] in
|
||||
self?.hermesRunning = running
|
||||
self?.gatewayRunning = gateway
|
||||
}
|
||||
struct ProbeResult: Sendable {
|
||||
let running: Bool
|
||||
let gatewayRunning: Bool
|
||||
let ok: Bool
|
||||
}
|
||||
let probe = await Task.detached { () -> ProbeResult in
|
||||
let pgrep = svc.hermesPIDResult()
|
||||
let gateway = svc.loadGatewayStateResult()
|
||||
let running: Bool
|
||||
switch pgrep {
|
||||
case .success(let pid): running = (pid != nil)
|
||||
case .failure: running = false
|
||||
}
|
||||
let gatewayRunning: Bool
|
||||
switch gateway {
|
||||
case .success(let state): gatewayRunning = state?.isRunning ?? false
|
||||
case .failure: gatewayRunning = false
|
||||
}
|
||||
let pgrepOK: Bool
|
||||
if case .failure = pgrep { pgrepOK = false } else { pgrepOK = true }
|
||||
let gatewayOK: Bool
|
||||
if case .failure = gateway { gatewayOK = false } else { gatewayOK = true }
|
||||
return ProbeResult(running: running, gatewayRunning: gatewayRunning, ok: pgrepOK && gatewayOK)
|
||||
}.value
|
||||
hermesRunning = probe.running
|
||||
gatewayRunning = probe.gatewayRunning
|
||||
return probe.ok
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user