feat(test-harness): Layer B pivot to real ~/.hermes + a11y IDs + Sparkle gating

Discovered during Layer B work that XCUITest runners are sandboxed:
they can read ~/.hermes/ but writes throw NSFileWriteNoPermissionError.
That kills the SCARF_HERMES_HOME-based isolation pattern for UI tests —
snapshot/restore from inside the runner can't work. Pivot:

- Layer B drives the real ~/.hermes the dev Mac is already running
  against. The harness assumes a working Hermes install (XCTSkip if
  the binary isn't there). Cleanup is via the app's own UI flows
  (which have full disk access), not direct file I/O. Layer A keeps
  its env-var seam — those tests run inside the host app's address
  space and write freely.
- SwiftUI's WindowGroup(for: ServerID.self) doesn't auto-surface a
  window on a fresh XCUIApplication.launch(). The harness sends ⌘1
  (the "Open Server → Local" menu shortcut wired in scarfApp.swift's
  OpenServerCommands) to take the same code path real users hit via
  Dock click.
- Real user home resolved via getpwuid(getuid()) rather than
  NSHomeDirectory(), which inside the sandboxed runner returns
  ~/Library/Containers/com.scarfUITests.xctrunner/Data.
- 8 accessibility IDs added on the install path so the next iteration
  can drive the full Templates → Install from URL → Parent dir →
  Confirm Install flow without depending on view-tree label scraping:
  templates.toolbar.menu, templates.installFromFile,
  templates.installFromURL, templates.installURL.field,
  templates.installURL.confirm, templateInstall.parentDir.field,
  templateInstall.parentDir.continue, templateInstall.confirmInstall.
- TestModeFlags.shared.isTestMode now gates UpdaterService —
  --scarf-test-mode launches Sparkle inert so update prompts don't
  pop on top of an XCUITest-driven window. Production launches
  unchanged.

FixtureHermesHome.swift removed — the fixture-tmpdir approach is
abandoned in favour of using the real installation. Layer A's
SCARF_HERMES_HOME tests still pass; they just don't need a populated
home to exercise path derivation.

Verification: scarfTests 124/124, ScarfCore 220/220, Layer B smoke
1/1 (after fresh build — XCUITest is sensitive to stale binaries).
catalog.py --check still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-05-03 16:06:32 +02:00
parent a686f8824e
commit 55ade47ad3
4 changed files with 97 additions and 89 deletions
@@ -1,4 +1,5 @@
import Foundation
import ScarfCore
import Sparkle
/// Thin wrapper around Sparkle's `SPUStandardUpdaterController`.
@@ -24,9 +25,15 @@ final class UpdaterService: NSObject {
override init() {
// startingUpdater: true Sparkle scans for updates on launch per Info.plist schedule.
// Default delegates are sufficient for a non-sandboxed app.
// Under `--scarf-test-mode` we keep Sparkle inert so XCUITest runs
// never see a "an update is available" sheet pop on top of the
// window the test is trying to drive. The controller still
// initializes `automaticallyChecksForUpdates` reads/writes
// continue to work it just doesn't fire the on-launch check
// or surface UI.
let startUpdater = !TestModeFlags.shared.isTestMode
self.controller = SPUStandardUpdaterController(
startingUpdater: true,
startingUpdater: startUpdater,
updaterDelegate: nil,
userDriverDelegate: nil
)
@@ -201,10 +201,12 @@ struct ProjectsView: View {
Button("Install from File…", systemImage: "tray.and.arrow.down") {
openInstallFilePicker()
}
.accessibilityIdentifier("templates.installFromFile")
Button("Install from URL…", systemImage: "link") {
installURLInput = ""
showingInstallURLPrompt = true
}
.accessibilityIdentifier("templates.installFromURL")
Divider()
if let selected = viewModel.selectedProject {
Button("Export \"\(selected.name)\" as Template…", systemImage: "tray.and.arrow.up") {
@@ -217,6 +219,7 @@ struct ProjectsView: View {
} label: {
Label("Templates", systemImage: "shippingbox")
}
.accessibilityIdentifier("templates.toolbar.menu")
}
}
@@ -229,6 +232,7 @@ struct ProjectsView: View {
.foregroundStyle(.secondary)
TextField("https://example.com/my.scarftemplate", text: $installURLInput)
.textFieldStyle(.roundedBorder)
.accessibilityIdentifier("templates.installURL.field")
HStack {
Button("Cancel") { showingInstallURLPrompt = false }
.keyboardShortcut(.cancelAction)
@@ -243,6 +247,7 @@ struct ProjectsView: View {
.keyboardShortcut(.defaultAction)
.buttonStyle(.borderedProminent)
.disabled(URL(string: installURLInput)?.scheme?.lowercased() != "https")
.accessibilityIdentifier("templates.installURL.confirm")
}
}
.padding()
@@ -179,6 +179,7 @@ struct TemplateInstallSheet: View {
Button("Install") { viewModel.confirmInstall() }
.keyboardShortcut(.defaultAction)
.buttonStyle(ScarfPrimaryButton())
.accessibilityIdentifier("templateInstall.confirmInstall")
}
.padding(.top, 8)
}
@@ -507,6 +508,7 @@ private struct ParentDirectoryStep: View {
TextField("Parent directory", text: $parentPath)
.textFieldStyle(.roundedBorder)
.autocorrectionDisabled()
.accessibilityIdentifier("templateInstall.parentDir.field")
.onChange(of: parentPath) { _, _ in
if remoteVerification != .idle {
remoteVerification = .idle
@@ -565,6 +567,7 @@ private struct ParentDirectoryStep: View {
}
.keyboardShortcut(.defaultAction)
.disabled(parentPath.trimmingCharacters(in: .whitespaces).isEmpty)
.accessibilityIdentifier("templateInstall.parentDir.continue")
}
}
+80 -87
View File
@@ -2,111 +2,104 @@
// TemplateInstallUITests.swift
// scarfUITests
//
// Layer B of the dogfooding-templates harness the XCUITest layer that
// drives Scarf end-to-end via the real UI. This file lands as a scaffold
// in the v2.7 cycle: it exercises the launch-argument + env-var plumbing
// (SCARF_HERMES_HOME, --scarf-test-mode) and proves the app reaches a
// non-crashed state under those flags. Driving the full install /
// configure / dashboard journey arrives in v2.8 alongside the
// accessibility-identifier sweep see Test-Harness.md on the wiki.
// Layer B of the dogfooding-templates harness drives Scarf via XCUITest
// against the developer Mac's real `~/.hermes/` installation. v1 is
// intentionally small: a single smoke test that proves the harness can
// launch the app, surface a window, and read state. The install-flow
// drive (Templates Install Configure Dashboard) lands in v2 once
// accessibility identifiers are wired across the install path.
//
// The scaffold is deliberately small. Its job is to prove the harness
// *can* run, so the next person extending it has a known-green starting
// point. The contract for the next iteration: keep `tmpHermesHome()` and
// `launchedApp()` as the two helpers every Layer B test calls; everything
// else is per-test.
// ## Sandbox shape (load-bearing)
//
// XCUITest runners on macOS are sandboxed even when the app under test
// isn't. Concretely:
//
// - The runner CAN read `~/.hermes/` (verified `Data(contentsOf:)`
// succeeds on `~/.hermes/scarf/projects.json`).
// - The runner CANNOT write to `~/.hermes/` attempting `try data.write(...)`
// throws `NSCocoaErrorDomain Code=513 (NSFileWriteNoPermissionError)`
// with underlying EPERM.
// - The Mac app under test runs unsandboxed and writes there freely.
//
// Implication for the harness: the install/uninstall round-trip MUST
// happen via the app's own UI (which has the permissions), not via
// direct file I/O from the runner. setUp can read state for assertions;
// it can't snapshot-and-restore.
//
// ## SwiftUI scene wiring
//
// Scarf's main window is `WindowGroup(for: ServerID.self)`. On a fresh
// `XCUIApplication.launch()` call, SwiftUI doesn't auto-surface a window
// real users get the window via Dock click AppKit
// `applicationOpenUntitledFile`, which XCUITest skips. The harness
// nudges the same code path users hit by sending 1 (the "Open Server
// Local" menu shortcut from `scarfApp.swift`'s `OpenServerCommands`).
//
import XCTest
final class TemplateInstallUITests: XCTestCase {
private var tmpHome: URL?
/// Real user home NOT `NSHomeDirectory()`, which inside the
/// XCUITest runner sandbox returns
/// `~/Library/Containers/com.scarfUITests.xctrunner/Data`. The Mac
/// app itself runs unsandboxed and reads from `~/.hermes/`, so any
/// path the harness checks against the same data must point at the
/// un-sandboxed home. `getpwuid(getuid()).pw_dir` is the canonical
/// UNIX answer.
private static let realHome: String = {
guard let pw = getpwuid(getuid()), let dir = pw.pointee.pw_dir else {
return NSHomeDirectory()
}
return String(cString: dir)
}()
private static let hermesBinary = (realHome as NSString)
.appendingPathComponent(".local/bin/hermes")
override func setUpWithError() throws {
// Stop on first failure XCUITest runs are linear and the failure
// mode we care about ("the app launched in test mode and is
// responsive") is not something a later test recovers from.
continueAfterFailure = false
}
override func tearDownWithError() throws {
// Wipe any tmp Hermes home created during the test. Wrapped in a
// try? because tearDown should never be the thing that masks a
// real test failure if the rmdir fails, we'd rather the test
// pass and the tmp dir get garbage-collected by the OS than the
// test fail for a reason unrelated to the assertion.
if let tmpHome {
try? FileManager.default.removeItem(at: tmpHome)
// Refuse to run if `hermes` isn't on the dev Mac. The harness's
// whole premise is "validate against the real Hermes install
// pre-release"; failing here is friendlier than letting tests
// crash later in the install flow.
guard FileManager.default.isExecutableFile(atPath: Self.hermesBinary) else {
throw XCTSkip("Hermes binary not found at \(Self.hermesBinary) — Layer B requires a real Hermes install on the dev Mac.")
}
}
/// Scaffold: launch Scarf with the harness's env var + launch argument
/// and confirm the launch fires. Asserting on window existence
/// would currently fail because the app's polling services
/// (`ServerLiveStatusRegistry`, `HermesCapabilitiesStore`) crash on
/// the IPC handshake when `SCARF_HERMES_HOME` points at an empty dir
/// they assume `gateway_state.json` and the Hermes binary's state
/// dir are populated. A follow-up will pre-populate the tmp home
/// with a minimal fixture (`config.yaml`, `auth.json`, empty
/// `cron/jobs.json`) before the assertion gets re-enabled.
///
/// The test still earns its keep today: it proves the
/// `XCUIApplication.launchArguments` + `launchEnvironment` plumbing
/// reaches Scarf, and acts as the canonical "this is how Layer B
/// tests start." Drop it if you re-architect the harness; otherwise
/// keep it green until the fixture-Hermes-home work lands.
///
/// See [Test-Harness wiki page](https://github.com/awizemann/scarf/wiki/Test-Harness)
/// for the rest of the rollout.
/// Smoke test: Scarf launches normally against the real Hermes home,
/// the harness pushes 1 (the "Open Server Local" menu shortcut),
/// and a window surfaces. This is the regression net for the test
/// target itself if a future change breaks XCUITest's ability to
/// drive Scarf at all, this fails before any of the install-flow
/// tests do.
@MainActor
func testAppLaunchesUnderTestMode() throws {
let home = try makeTmpHermesHome()
tmpHome = home
let app = launchedApp(hermesHome: home)
defer { app.terminate() }
// Verify the launch reached the XCUITest IPC handshake i.e. the
// app process was spawned and the test runner connected to it.
// `app.state` is non-blocking and reports `.runningForeground`
// once the process has handshaked. Anything past that requires
// the fixture work above.
XCTAssertNotEqual(
app.state, .notRunning,
"XCUITest could not start Scarf with --scarf-test-mode + SCARF_HERMES_HOME=\(home.path). The launchArguments / launchEnvironment plumbing has regressed."
)
}
// MARK: - Helpers (called from every Layer B test, keep the contract stable)
/// Build a launched `XCUIApplication` configured for the harness:
/// - `--scarf-test-mode` launch argument (read by `TestModeFlags`).
/// - `SCARF_HERMES_HOME` env var (read by `HermesProfileResolver`).
///
/// Mirroring this configuration exactly across every Layer B test
/// means a single regression in either seam fails the whole suite
/// loudly the alternative is per-test launch configs that quietly
/// drift apart and let bugs hide between them.
@MainActor
private func launchedApp(hermesHome: URL) -> XCUIApplication {
func testAppLaunchesAndSurfacesAWindow() throws {
let app = XCUIApplication()
app.launchArguments = ["--scarf-test-mode"]
app.launchEnvironment["SCARF_HERMES_HOME"] = hermesHome.path
app.launch()
return app
}
defer { app.terminate() }
/// Create a fresh, empty Hermes home dir for this test. The harness
/// pattern is one home per test never share across tests, since the
/// installer writes to it and a leaked install from test A breaks
/// test B's preconditions. The path lands under
/// `NSTemporaryDirectory()` so the OS reaps it on reboot even if
/// teardown skips.
private func makeTmpHermesHome() throws -> URL {
let base = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
let path = base.appendingPathComponent("scarf-uitest-home-\(UUID().uuidString)", isDirectory: true)
try FileManager.default.createDirectory(at: path, withIntermediateDirectories: true)
return path
// Activate first without this, 1 is delivered to whatever
// app currently owns the keyboard focus (often Xcode), and the
// menu shortcut is silently dropped by Scarf.
app.activate()
// Brief pause for activation to settle. We sleep up to 1s; if
// the app is already responsive sooner, the 1 send is harmless.
Thread.sleep(forTimeInterval: 1.0)
app.typeKey("1", modifierFlags: .command)
let windowAppeared = app.windows.firstMatch.waitForExistence(timeout: 15)
XCTAssertTrue(
windowAppeared,
"Scarf did not surface a window within 15s of ⌘1 nudge. Crash logs land under derivedData/Logs/Test/."
)
let attachment = XCTAttachment(screenshot: app.screenshot())
attachment.name = "App Launch"
attachment.lifetime = .deleteOnSuccess
add(attachment)
}
}