fix: diagnostics sheet — bypass transport.runProcess for shell script

First-run of diagnostics against a working Mardon returned 0/14 passing
with "(no output)" for every probe — including the trivial "emit
connectivity PASS" that the script emits unconditionally. That meant the
script wasn't executing as written; the parser saw `__END__` but no
probe lines.

Root cause: SSHTransport.runProcess wraps every argument through
`remotePathArg`, which is designed for PATHS (it rewrites `~/` to
`$HOME/` and double-quotes the result with backslash-escapes). Passing
a multi-line shell script with embedded `"$1"` / `"$2"` / `"$3"` and
`printf '\n'` escape sequences through that is corruption — the remote
sh -c receives a scrambled script and silently emits nothing.

TestConnectionProbe already works around this: it builds the ssh argv
directly (ssh host -- /bin/sh -c <script>) so the script travels as a
single opaque argv entry and ssh forwards it to the remote shell
unchanged.

Mirror that approach. RemoteDiagnosticsViewModel.execute now:
- For remote contexts: builds ssh argv directly (ControlMaster-aware,
  uses the same socket as SSHTransport so it's effectively free after
  the first connection), then passes /bin/sh -c <script> as argv.
- For local contexts: spawns /bin/sh -c <script> via Process directly.

Also surfaces raw stdout/stderr/exit-code in a disclosure panel at the
bottom of the sheet, visible only when ALL probes fail. Makes any
future transport-level breakage self-diagnosing: the user sees exactly
what the remote returned, not just "(no output)" rows.

Expose SSHTransport.controlDirPath (already static) as a public helper
so the diagnostics probe reuses the same ControlMaster socket as the
connection itself.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-04-20 13:56:09 -07:00
parent f8069a4481
commit ec03627bcd
2 changed files with 183 additions and 23 deletions
@@ -101,6 +101,13 @@ final class RemoteDiagnosticsViewModel {
private(set) var isRunning: Bool = false private(set) var isRunning: Bool = false
private(set) var startedAt: Date? private(set) var startedAt: Date?
private(set) var finishedAt: Date? private(set) var finishedAt: Date?
/// Raw stdout/stderr from the most recent run, preserved so the UI can
/// surface them in a disclosure panel when things look wrong. This is
/// how we debug cases where the script ran but no probes were parsed
/// (e.g. transport-quoting bugs, dash-vs-bash incompatibilities).
private(set) var rawStdout: String = ""
private(set) var rawStderr: String = ""
private(set) var rawExitCode: Int32 = 0
init(context: ServerContext) { init(context: ServerContext) {
self.context = context self.context = context
@@ -119,12 +126,18 @@ final class RemoteDiagnosticsViewModel {
switch captured { switch captured {
case .connectFailure(let msg): case .connectFailure(let msg):
rawStdout = ""
rawStderr = msg
rawExitCode = -1
probes = [ probes = [
Probe(id: .connectivity, passed: false, detail: msg) Probe(id: .connectivity, passed: false, detail: msg)
] + ProbeID.allCases ] + ProbeID.allCases
.filter { $0 != .connectivity } .filter { $0 != .connectivity }
.map { Probe(id: $0, passed: false, detail: "(skipped — SSH didn't connect)") } .map { Probe(id: $0, passed: false, detail: "(skipped — SSH didn't connect)") }
case .completed(let stdout, let stderr, let exitCode): case .completed(let stdout, let stderr, let exitCode):
rawStdout = stdout
rawStderr = stderr
rawExitCode = exitCode
probes = Self.parse(stdout: stdout, stderr: stderr, exitCode: exitCode) probes = Self.parse(stdout: stdout, stderr: stderr, exitCode: exitCode)
} }
@@ -274,27 +287,137 @@ final class RemoteDiagnosticsViewModel {
} }
private static func execute(script: String, context: ServerContext) async -> Captured { private static func execute(script: String, context: ServerContext) async -> Captured {
let transport = context.makeTransport() // Can't use `transport.runProcess(executable: "/bin/sh", args: ["-c", script])`
return await Task.detached { () -> Captured in // here: SSHTransport.runProcess pipes every argument through
do { // `remotePathArg` (which double-quotes to rewrite `~/` `$HOME/`),
let result = try transport.runProcess( // which mangles a multi-line shell script containing `"$1"`,
executable: "/bin/sh", // nested quotes, and `printf` escape sequences. The result on the
args: ["-c", script], // remote is a scrambled string and every probe fails to emit.
stdin: nil, //
timeout: 30 // Mirror TestConnectionProbe's approach: build the ssh argv
) // directly so the script travels as a single opaque argv entry
return .completed( // that ssh forwards to the remote shell unchanged.
stdout: result.stdoutString, switch context.kind {
stderr: result.stderrString, case .local:
exitCode: result.exitCode return await runLocally(script: script)
) case .ssh(let config):
} catch { return await runOverSSH(script: script, config: config)
let desc = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
return .connectFailure(desc)
} }
}
/// Direct ssh invocation same style as TestConnectionProbe. Uses the
/// connection's ControlMaster socket so this is cheap (~5ms) after the
/// first connection is warm.
private static func runOverSSH(script: String, config: SSHConfig) async -> Captured {
var sshArgv: [String] = [
"-o", "ControlMaster=auto",
"-o", "ControlPath=\(controlDirPath())/%C",
"-o", "ControlPersist=600",
"-o", "ServerAliveInterval=30",
"-o", "ConnectTimeout=10",
"-o", "StrictHostKeyChecking=accept-new",
"-o", "LogLevel=QUIET",
"-o", "BatchMode=yes"
]
if let port = config.port { sshArgv += ["-p", String(port)] }
if let id = config.identityFile, !id.isEmpty {
sshArgv += ["-i", id]
}
let hostSpec: String
if let user = config.user, !user.isEmpty { hostSpec = "\(user)@\(config.host)" }
else { hostSpec = config.host }
sshArgv.append(hostSpec)
sshArgv.append("--")
sshArgv.append("/bin/sh")
sshArgv.append("-c")
sshArgv.append(script)
return await Task.detached { () -> Captured in
let proc = Process()
proc.executableURL = URL(fileURLWithPath: "/usr/bin/ssh")
proc.arguments = sshArgv
// Inherit the shell's SSH_AUTH_SOCK so ssh can reach the
// agent same pattern as SSHTransport + TestConnectionProbe.
var env = ProcessInfo.processInfo.environment
let shellEnv = HermesFileService.enrichedEnvironment()
for key in ["SSH_AUTH_SOCK", "SSH_AGENT_PID"] {
if env[key] == nil, let v = shellEnv[key], !v.isEmpty {
env[key] = v
}
}
proc.environment = env
let stdoutPipe = Pipe()
let stderrPipe = Pipe()
proc.standardOutput = stdoutPipe
proc.standardError = stderrPipe
do {
try proc.run()
} catch {
return .connectFailure("Failed to launch ssh: \(error.localizedDescription)")
}
let deadline = Date().addingTimeInterval(30)
while proc.isRunning && Date() < deadline {
try? await Task.sleep(nanoseconds: 100_000_000)
}
if proc.isRunning {
proc.terminate()
return .connectFailure("Diagnostics timed out after 30s")
}
let out = (try? stdoutPipe.fileHandleForReading.readToEnd()) ?? Data()
let err = (try? stderrPipe.fileHandleForReading.readToEnd()) ?? Data()
return .completed(
stdout: String(data: out, encoding: .utf8) ?? "",
stderr: String(data: err, encoding: .utf8) ?? "",
exitCode: proc.terminationStatus
)
}.value }.value
} }
/// Local Shell invocation runs the diagnostic script against the
/// user's own Mac. Less useful than the remote form (most checks will
/// trivially pass), but lets the same UI work for both contexts.
private static func runLocally(script: String) async -> Captured {
return await Task.detached { () -> Captured in
let proc = Process()
proc.executableURL = URL(fileURLWithPath: "/bin/sh")
proc.arguments = ["-c", script]
let stdoutPipe = Pipe()
let stderrPipe = Pipe()
proc.standardOutput = stdoutPipe
proc.standardError = stderrPipe
do {
try proc.run()
} catch {
return .connectFailure("Failed to launch /bin/sh: \(error.localizedDescription)")
}
let deadline = Date().addingTimeInterval(10)
while proc.isRunning && Date() < deadline {
try? await Task.sleep(nanoseconds: 100_000_000)
}
if proc.isRunning {
proc.terminate()
return .connectFailure("Local diagnostics timed out (should be <1s)")
}
let out = (try? stdoutPipe.fileHandleForReading.readToEnd()) ?? Data()
let err = (try? stderrPipe.fileHandleForReading.readToEnd()) ?? Data()
return .completed(
stdout: String(data: out, encoding: .utf8) ?? "",
stderr: String(data: err, encoding: .utf8) ?? "",
exitCode: proc.terminationStatus
)
}.value
}
/// Same cache directory used by SSHTransport shared so the diagnostic
/// probe reuses the connection's ControlMaster socket when it already
/// exists (no second TCP handshake, no second auth).
private static func controlDirPath() -> String {
SSHTransport.controlDirPath()
}
private static func parse(stdout: String, stderr: String, exitCode: Int32) -> [Probe] { private static func parse(stdout: String, stderr: String, exitCode: Int32) -> [Probe] {
var results: [ProbeID: Probe] = [:] var results: [ProbeID: Probe] = [:]
for line in stdout.split(whereSeparator: { $0 == "\n" || $0 == "\r" }) { for line in stdout.split(whereSeparator: { $0 == "\n" || $0 == "\r" }) {
@@ -128,6 +128,42 @@ struct RemoteDiagnosticsView: View {
} }
private var footer: some View { private var footer: some View {
VStack(alignment: .leading, spacing: 8) {
// Raw-output disclosure. Shown when EVERY probe fails, since
// that's the signature of a script-quoting / transport-level
// issue rather than a real remote problem. Hidden in the normal
// case so it doesn't distract.
if !viewModel.probes.isEmpty, viewModel.passingCount == 0 {
DisclosureGroup("Raw remote output (for debugging)") {
VStack(alignment: .leading, spacing: 6) {
Text("exit code: \(viewModel.rawExitCode)")
.font(.caption.monospaced())
if !viewModel.rawStdout.isEmpty {
Text("stdout:").font(.caption).foregroundStyle(.secondary)
ScrollView {
Text(viewModel.rawStdout)
.font(.system(size: 10, design: .monospaced))
.textSelection(.enabled)
.frame(maxWidth: .infinity, alignment: .leading)
}
.frame(maxHeight: 140)
}
if !viewModel.rawStderr.isEmpty {
Text("stderr:").font(.caption).foregroundStyle(.secondary)
ScrollView {
Text(viewModel.rawStderr)
.font(.system(size: 10, design: .monospaced))
.textSelection(.enabled)
.frame(maxWidth: .infinity, alignment: .leading)
}
.frame(maxHeight: 140)
}
}
.padding(8)
.background(Color.secondary.opacity(0.08), in: RoundedRectangle(cornerRadius: 6))
}
.font(.caption)
}
HStack { HStack {
Text("Scarf runs these over a single SSH session that mirrors the shell your dashboard reads from, so a green row here means Scarf can actually read that file at runtime.") Text("Scarf runs these over a single SSH session that mirrors the shell your dashboard reads from, so a green row here means Scarf can actually read that file at runtime.")
.font(.caption) .font(.caption)
@@ -137,6 +173,7 @@ struct RemoteDiagnosticsView: View {
Button("Done") { dismiss() } Button("Done") { dismiss() }
.keyboardShortcut(.defaultAction) .keyboardShortcut(.defaultAction)
} }
}
.padding(16) .padding(16)
} }