From 2e0eb63ea46931dabd9a8d112ca46e55d35d7d77 Mon Sep 17 00:00:00 2001 From: Alan Wizemann Date: Thu, 7 May 2026 12:08:11 +0200 Subject: [PATCH] fix(health): tighten Hermes gateway pgrep so unrelated commands don't match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `hermesPIDResult()` was running `pgrep -f hermes`, which matched any process with "hermes" anywhere in its argv — `hermes acp` chat sessions Scarf itself spawns, `hermes -z` one-shots, log tails, even this very file in an editor. The Dashboard "Hermes is running" badge read true even when the gateway daemon was down. Narrow the match to the gateway shape specifically. Two alternations cover both invocation forms used in the wild: - `python -m hermes_cli.main gateway run …` (the launchctl form) - `/path/to/hermes gateway run …` (the script-path form) Verified locally against an actual gateway PID: cmd=/Users/.../python -m hermes_cli.main gateway run --replace The first alternation matches via the `-m hermes_cli.main gateway run` boundary. All callers — `stopHermes()`, `DashboardViewModel`, `HealthViewModel`, `SettingsViewModel`, `scarfApp` — semantically want the gateway PID specifically, so the narrower match is the right shape, not a behavior change. Cherry-picked from #76 with thanks to @unixwzrd for the diagnosis and the regex. Co-Authored-By: M S Co-Authored-By: Claude Opus 4.7 (1M context) --- scarf/scarf/Core/Services/HermesFileService.swift | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scarf/scarf/Core/Services/HermesFileService.swift b/scarf/scarf/Core/Services/HermesFileService.swift index d00fb43..3938d09 100644 --- a/scarf/scarf/Core/Services/HermesFileService.swift +++ b/scarf/scarf/Core/Services/HermesFileService.swift @@ -1233,14 +1233,25 @@ struct HermesFileService: Sendable { } /// Error-surfacing variant. `.success(nil)` means `pgrep` ran successfully - /// and found no hermes process (Hermes is genuinely not running). + /// and found no Hermes gateway process (Hermes is genuinely not running). /// `.failure` means we couldn't probe at all (pgrep missing, connection /// down, permission issue) — a *different* UX from "not running". + /// + /// The regex narrows the match to the gateway daemon shape so unrelated + /// commands that happen to contain "hermes" — `hermes acp` chat sessions, + /// `hermes -z` one-shots, log tails, README readers — don't get flagged + /// as "Hermes is running" in the dashboard banner. Two alternations cover + /// both invocation forms: the python-module path (`python -m + /// hermes_cli.main gateway run …`) and the script-path form + /// (`/usr/local/bin/hermes gateway run …`). All callers semantically + /// want the gateway PID specifically — `stopHermes()` issues + /// `hermes gateway stop` first and only falls back to killing this + /// PID, and the dashboard health probe only cares about the gateway. nonisolated func hermesPIDResult() -> Result { do { let result = try transport.runProcess( executable: "/usr/bin/pgrep", - args: ["-f", "hermes"], + args: ["-f", #"(^|[[:space:]])-m[[:space:]]+hermes_cli\.main[[:space:]]+gateway[[:space:]]+run([[:space:]]|$)|(^|[[:space:]/])hermes[[:space:]]+gateway[[:space:]]+run([[:space:]]|$)"#], stdin: nil, timeout: 5 )