diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHScriptRunner.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHScriptRunner.swift new file mode 100644 index 0000000..f8dae75 --- /dev/null +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/Transport/SSHScriptRunner.swift @@ -0,0 +1,183 @@ +import Foundation + +/// Runs multi-line shell scripts on a server (local or SSH) without +/// going through `ServerTransport.runProcess`. +/// +/// **Why this exists.** `SSHTransport.runProcess` quotes every argument +/// via `remotePathArg` (it rewrites `~/` → `$HOME/`), which is correct +/// for path arguments but mangles a multi-line script containing +/// `"$VAR"` references, nested quotes, and control structures. The +/// remote receives a scrambled string and the script silently +/// produces no useful output. +/// +/// `RemoteDiagnosticsViewModel` originally documented this and worked +/// around it locally. Issue #44 surfaced the same bug for the +/// connection-status pill (multi-line probe script through +/// `runProcess` → tier 2 always reads as failed even when the file +/// is readable, while diagnostics — which used the workaround — +/// reports 14/14 passing). This helper centralises the workaround so +/// any future caller running a script gets it for free. +/// +/// **Approach.** We invoke `/usr/bin/ssh ... -- /bin/sh -s` directly +/// and pipe the script via stdin, so the script travels as a single +/// opaque byte stream that the remote shell parses unchanged. Local +/// contexts skip ssh and just pipe to `/bin/sh -s` — same shape so +/// callers can treat both uniformly. +public enum SSHScriptRunner { + + public enum Outcome: Sendable { + /// Couldn't even reach the remote (process spawn failed, + /// timeout before any output, network refused). Carries the + /// human-readable reason. + case connectFailure(String) + /// Script ran to completion (or until timeout cut it short + /// after producing partial output). Exit code, stdout, stderr + /// are reported as captured. + case completed(stdout: String, stderr: String, exitCode: Int32) + } + + /// Run `script` against the given context. Times out after + /// `timeout` seconds, killing the subprocess if it overruns. + /// + /// **Platforms.** Real implementation is macOS-only — relies on + /// `Foundation.Process` which iOS doesn't ship. iOS callers + /// (ScarfGo) use Citadel-backed SSH transports for their own + /// flows; they never reach this entry point. To keep ScarfCore + /// cross-platform we return a connect failure on non-macOS so + /// the file compiles everywhere. + public static func run(script: String, context: ServerContext, timeout: TimeInterval = 30) async -> Outcome { + #if os(macOS) + switch context.kind { + case .local: + return await runLocally(script: script, timeout: timeout) + case .ssh(let config): + return await runOverSSH(script: script, config: config, timeout: timeout) + } + #else + return .connectFailure("SSHScriptRunner is only available on macOS") + #endif + } + + // MARK: - SSH path + + #if os(macOS) + private static func runOverSSH(script: String, config: SSHConfig, timeout: TimeInterval) async -> Outcome { + var sshArgv: [String] = [ + "-o", "ControlMaster=auto", + "-o", "ControlPath=\(SSHTransport.controlDirPath())/%C", + "-o", "ControlPersist=600", + "-o", "ServerAliveInterval=30", + "-o", "ConnectTimeout=10", + "-o", "StrictHostKeyChecking=accept-new", + "-o", "LogLevel=QUIET", + "-o", "BatchMode=yes", + "-T", // no pty — keep stdin/stdout a clean byte stream + ] + if let port = config.port { sshArgv += ["-p", String(port)] } + if let id = config.identityFile, !id.isEmpty { + sshArgv += ["-i", id] + } + let hostSpec: String + if let user = config.user, !user.isEmpty { hostSpec = "\(user)@\(config.host)" } + else { hostSpec = config.host } + sshArgv.append(hostSpec) + sshArgv.append("--") + sshArgv.append("/bin/sh") + sshArgv.append("-s") // read script from stdin + + return await Task.detached { () -> Outcome in + let proc = Process() + proc.executableURL = URL(fileURLWithPath: "/usr/bin/ssh") + proc.arguments = sshArgv + + // Inherit shell-derived SSH_AUTH_SOCK so ssh-agent reaches. + // Same path SSHTransport uses internally — see + // `environmentEnricher` set at app boot. + var env = ProcessInfo.processInfo.environment + if let enricher = SSHTransport.environmentEnricher { + let shellEnv = enricher() + for key in ["SSH_AUTH_SOCK", "SSH_AGENT_PID"] { + if env[key] == nil, let v = shellEnv[key], !v.isEmpty { + env[key] = v + } + } + } + proc.environment = env + + let stdinPipe = Pipe() + let stdoutPipe = Pipe() + let stderrPipe = Pipe() + proc.standardInput = stdinPipe + proc.standardOutput = stdoutPipe + proc.standardError = stderrPipe + + do { + try proc.run() + } catch { + return .connectFailure("Failed to launch ssh: \(error.localizedDescription)") + } + + if let data = script.data(using: .utf8) { + try? stdinPipe.fileHandleForWriting.write(contentsOf: data) + } + try? stdinPipe.fileHandleForWriting.close() + + let deadline = Date().addingTimeInterval(timeout) + while proc.isRunning && Date() < deadline { + try? await Task.sleep(nanoseconds: 100_000_000) + } + if proc.isRunning { + proc.terminate() + return .connectFailure("Script timed out after \(Int(timeout))s") + } + let out = (try? stdoutPipe.fileHandleForReading.readToEnd()) ?? Data() + let err = (try? stderrPipe.fileHandleForReading.readToEnd()) ?? Data() + // Best-effort fd close — Pipe leaks fd's otherwise. + try? stdoutPipe.fileHandleForReading.close() + try? stderrPipe.fileHandleForReading.close() + return .completed( + stdout: String(data: out, encoding: .utf8) ?? "", + stderr: String(data: err, encoding: .utf8) ?? "", + exitCode: proc.terminationStatus + ) + }.value + } + + // MARK: - Local path + + private static func runLocally(script: String, timeout: TimeInterval) async -> Outcome { + return await Task.detached { () -> Outcome in + let proc = Process() + proc.executableURL = URL(fileURLWithPath: "/bin/sh") + proc.arguments = ["-c", script] + + let stdoutPipe = Pipe() + let stderrPipe = Pipe() + proc.standardOutput = stdoutPipe + proc.standardError = stderrPipe + do { + try proc.run() + } catch { + return .connectFailure("Failed to launch /bin/sh: \(error.localizedDescription)") + } + let deadline = Date().addingTimeInterval(timeout) + while proc.isRunning && Date() < deadline { + try? await Task.sleep(nanoseconds: 100_000_000) + } + if proc.isRunning { + proc.terminate() + return .connectFailure("Script timed out after \(Int(timeout))s") + } + let out = (try? stdoutPipe.fileHandleForReading.readToEnd()) ?? Data() + let err = (try? stderrPipe.fileHandleForReading.readToEnd()) ?? Data() + try? stdoutPipe.fileHandleForReading.close() + try? stderrPipe.fileHandleForReading.close() + return .completed( + stdout: String(data: out, encoding: .utf8) ?? "", + stderr: String(data: err, encoding: .utf8) ?? "", + exitCode: proc.terminationStatus + ) + }.value + } + #endif // os(macOS) +} diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/ConnectionStatusViewModel.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/ConnectionStatusViewModel.swift index 0a4046b..74762b4 100644 --- a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/ConnectionStatusViewModel.swift +++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/ConnectionStatusViewModel.swift @@ -70,12 +70,10 @@ public final class ConnectionStatusViewModel { private let consecutiveFailureThreshold = 2 public let context: ServerContext - private let transport: any ServerTransport private var probeTask: Task? public init(context: ServerContext) { self.context = context - self.transport = context.makeTransport() if !context.isRemote { // Local contexts are always considered connected — no network // or auth can fail. @@ -108,7 +106,7 @@ public final class ConnectionStatusViewModel { } private func probeOnce() async { - let snapshot = transport + let snapshot = context let hermesHome = context.paths.home // Two-tier probe in one SSH round-trip: // tier 1: `true` — raw connectivity / auth / ControlMaster path @@ -162,39 +160,38 @@ public final class ConnectionStatusViewModel { case failure(TransportError) } - let outcome: ProbeOutcome = await Task.detached { - do { - let probe = try snapshot.runProcess( - executable: "/bin/sh", - args: ["-c", script], - stdin: nil, - timeout: 10 - ) - guard probe.exitCode == 0 else { - return .failure(.commandFailed(exitCode: probe.exitCode, stderr: probe.stderrString)) + // Issue #44: previously this used `transport.runProcess(executable: + // "/bin/sh", args: ["-c", script])`, which goes through + // SSHTransport's `remotePathArg` quoting. That mangles multi-line + // shell scripts containing `"$VAR"` references and nested + // quotes — the remote received a scrambled string and the if-test + // for config.yaml readability silently failed even when the file + // was readable. Result: 14/14 diagnostics passing AND a stuck + // "Connected — can't read Hermes state" pill, simultaneously, + // because diagnostics had its own runOverSSH workaround. Now + // both paths use SSHScriptRunner so they always agree. + let outcome: ProbeOutcome = await { + let result = await SSHScriptRunner.run(script: script, context: snapshot, timeout: 10) + switch result { + case .connectFailure(let msg): + return .failure(.other(message: msg)) + case .completed(let out, let stderr, let exitCode): + guard exitCode == 0 else { + return .failure(.commandFailed(exitCode: exitCode, stderr: stderr)) } - let out = probe.stdoutString let tier1 = out.contains("TIER1:0") let tier2 = out.contains("TIER2:0") if !tier1 { - // The script itself didn't reach tier 1 — treat as connection failure. return .failure(.commandFailed(exitCode: 1, stderr: out)) } if tier2 { return .connected } - // Connected but tier 2 failed. Parse the granular cause - // code; older remotes that don't emit a tag fall through - // to `.unknown` with a generic hint (issue #53). let cause = Self.parseDegradedCause(stdout: out) let (reason, hint) = Self.describe(cause: cause, hermesHome: hermesHome) return .degraded(reason: reason, hint: hint, cause: cause) - } catch let e as TransportError { - return .failure(e) - } catch { - return .failure(.other(message: error.localizedDescription)) } - }.value + }() switch outcome { case .connected: diff --git a/scarf/scarf/Features/Servers/ViewModels/RemoteDiagnosticsViewModel.swift b/scarf/scarf/Features/Servers/ViewModels/RemoteDiagnosticsViewModel.swift index c70578e..c62c76f 100644 --- a/scarf/scarf/Features/Servers/ViewModels/RemoteDiagnosticsViewModel.swift +++ b/scarf/scarf/Features/Servers/ViewModels/RemoteDiagnosticsViewModel.swift @@ -123,7 +123,11 @@ final class RemoteDiagnosticsViewModel { finishedAt = nil let script = Self.buildScript(hermesHome: context.paths.home) - let captured = await Self.execute(script: script, context: context) + // Use the shared SSHScriptRunner so this view model and the + // ConnectionStatusViewModel pill always agree on what the + // remote sees (issue #44 — the prior local copies of the + // workaround drifted from each other). + let captured = await SSHScriptRunner.run(script: script, context: context, timeout: 30) switch captured { case .connectFailure(let msg): @@ -282,164 +286,6 @@ final class RemoteDiagnosticsViewModel { """# } - enum Captured { - case connectFailure(String) - case completed(stdout: String, stderr: String, exitCode: Int32) - } - - private static func execute(script: String, context: ServerContext) async -> Captured { - // Can't use `transport.runProcess(executable: "/bin/sh", args: ["-c", script])` - // here: SSHTransport.runProcess pipes every argument through - // `remotePathArg` (which double-quotes to rewrite `~/` → `$HOME/`), - // which mangles a multi-line shell script containing `"$1"`, - // nested quotes, and `printf` escape sequences. The result on the - // remote is a scrambled string and every probe fails to emit. - // - // Mirror TestConnectionProbe's approach: build the ssh argv - // directly so the script travels as a single opaque argv entry - // that ssh forwards to the remote shell unchanged. - switch context.kind { - case .local: - return await runLocally(script: script) - case .ssh(let config): - return await runOverSSH(script: script, config: config) - } - } - - /// Direct ssh invocation. Pipes the script into `sh` on stdin rather - /// than passing it as `sh -c