feat(test-harness): Layer B pivot to real ~/.hermes + a11y IDs + Sparkle gating

Discovered during Layer B work that XCUITest runners are sandboxed:
they can read ~/.hermes/ but writes throw NSFileWriteNoPermissionError.
That kills the SCARF_HERMES_HOME-based isolation pattern for UI tests —
snapshot/restore from inside the runner can't work. Pivot:

- Layer B drives the real ~/.hermes the dev Mac is already running
  against. The harness assumes a working Hermes install (XCTSkip if
  the binary isn't there). Cleanup is via the app's own UI flows
  (which have full disk access), not direct file I/O. Layer A keeps
  its env-var seam — those tests run inside the host app's address
  space and write freely.
- SwiftUI's WindowGroup(for: ServerID.self) doesn't auto-surface a
  window on a fresh XCUIApplication.launch(). The harness sends ⌘1
  (the "Open Server → Local" menu shortcut wired in scarfApp.swift's
  OpenServerCommands) to take the same code path real users hit via
  Dock click.
- Real user home resolved via getpwuid(getuid()) rather than
  NSHomeDirectory(), which inside the sandboxed runner returns
  ~/Library/Containers/com.scarfUITests.xctrunner/Data.
- 8 accessibility IDs added on the install path so the next iteration
  can drive the full Templates → Install from URL → Parent dir →
  Confirm Install flow without depending on view-tree label scraping:
  templates.toolbar.menu, templates.installFromFile,
  templates.installFromURL, templates.installURL.field,
  templates.installURL.confirm, templateInstall.parentDir.field,
  templateInstall.parentDir.continue, templateInstall.confirmInstall.
- TestModeFlags.shared.isTestMode now gates UpdaterService —
  --scarf-test-mode launches Sparkle inert so update prompts don't
  pop on top of an XCUITest-driven window. Production launches
  unchanged.

FixtureHermesHome.swift removed — the fixture-tmpdir approach is
abandoned in favour of using the real installation. Layer A's
SCARF_HERMES_HOME tests still pass; they just don't need a populated
home to exercise path derivation.

Verification: scarfTests 124/124, ScarfCore 220/220, Layer B smoke
1/1 (after fresh build — XCUITest is sensitive to stale binaries).
catalog.py --check still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alan Wizemann
2026-05-03 16:06:32 +02:00
parent a686f8824e
commit 55ade47ad3
4 changed files with 97 additions and 89 deletions
@@ -1,4 +1,5 @@
import Foundation import Foundation
import ScarfCore
import Sparkle import Sparkle
/// Thin wrapper around Sparkle's `SPUStandardUpdaterController`. /// Thin wrapper around Sparkle's `SPUStandardUpdaterController`.
@@ -24,9 +25,15 @@ final class UpdaterService: NSObject {
override init() { override init() {
// startingUpdater: true Sparkle scans for updates on launch per Info.plist schedule. // startingUpdater: true Sparkle scans for updates on launch per Info.plist schedule.
// Default delegates are sufficient for a non-sandboxed app. // Under `--scarf-test-mode` we keep Sparkle inert so XCUITest runs
// never see a "an update is available" sheet pop on top of the
// window the test is trying to drive. The controller still
// initializes `automaticallyChecksForUpdates` reads/writes
// continue to work it just doesn't fire the on-launch check
// or surface UI.
let startUpdater = !TestModeFlags.shared.isTestMode
self.controller = SPUStandardUpdaterController( self.controller = SPUStandardUpdaterController(
startingUpdater: true, startingUpdater: startUpdater,
updaterDelegate: nil, updaterDelegate: nil,
userDriverDelegate: nil userDriverDelegate: nil
) )
@@ -201,10 +201,12 @@ struct ProjectsView: View {
Button("Install from File…", systemImage: "tray.and.arrow.down") { Button("Install from File…", systemImage: "tray.and.arrow.down") {
openInstallFilePicker() openInstallFilePicker()
} }
.accessibilityIdentifier("templates.installFromFile")
Button("Install from URL…", systemImage: "link") { Button("Install from URL…", systemImage: "link") {
installURLInput = "" installURLInput = ""
showingInstallURLPrompt = true showingInstallURLPrompt = true
} }
.accessibilityIdentifier("templates.installFromURL")
Divider() Divider()
if let selected = viewModel.selectedProject { if let selected = viewModel.selectedProject {
Button("Export \"\(selected.name)\" as Template…", systemImage: "tray.and.arrow.up") { Button("Export \"\(selected.name)\" as Template…", systemImage: "tray.and.arrow.up") {
@@ -217,6 +219,7 @@ struct ProjectsView: View {
} label: { } label: {
Label("Templates", systemImage: "shippingbox") Label("Templates", systemImage: "shippingbox")
} }
.accessibilityIdentifier("templates.toolbar.menu")
} }
} }
@@ -229,6 +232,7 @@ struct ProjectsView: View {
.foregroundStyle(.secondary) .foregroundStyle(.secondary)
TextField("https://example.com/my.scarftemplate", text: $installURLInput) TextField("https://example.com/my.scarftemplate", text: $installURLInput)
.textFieldStyle(.roundedBorder) .textFieldStyle(.roundedBorder)
.accessibilityIdentifier("templates.installURL.field")
HStack { HStack {
Button("Cancel") { showingInstallURLPrompt = false } Button("Cancel") { showingInstallURLPrompt = false }
.keyboardShortcut(.cancelAction) .keyboardShortcut(.cancelAction)
@@ -243,6 +247,7 @@ struct ProjectsView: View {
.keyboardShortcut(.defaultAction) .keyboardShortcut(.defaultAction)
.buttonStyle(.borderedProminent) .buttonStyle(.borderedProminent)
.disabled(URL(string: installURLInput)?.scheme?.lowercased() != "https") .disabled(URL(string: installURLInput)?.scheme?.lowercased() != "https")
.accessibilityIdentifier("templates.installURL.confirm")
} }
} }
.padding() .padding()
@@ -179,6 +179,7 @@ struct TemplateInstallSheet: View {
Button("Install") { viewModel.confirmInstall() } Button("Install") { viewModel.confirmInstall() }
.keyboardShortcut(.defaultAction) .keyboardShortcut(.defaultAction)
.buttonStyle(ScarfPrimaryButton()) .buttonStyle(ScarfPrimaryButton())
.accessibilityIdentifier("templateInstall.confirmInstall")
} }
.padding(.top, 8) .padding(.top, 8)
} }
@@ -507,6 +508,7 @@ private struct ParentDirectoryStep: View {
TextField("Parent directory", text: $parentPath) TextField("Parent directory", text: $parentPath)
.textFieldStyle(.roundedBorder) .textFieldStyle(.roundedBorder)
.autocorrectionDisabled() .autocorrectionDisabled()
.accessibilityIdentifier("templateInstall.parentDir.field")
.onChange(of: parentPath) { _, _ in .onChange(of: parentPath) { _, _ in
if remoteVerification != .idle { if remoteVerification != .idle {
remoteVerification = .idle remoteVerification = .idle
@@ -565,6 +567,7 @@ private struct ParentDirectoryStep: View {
} }
.keyboardShortcut(.defaultAction) .keyboardShortcut(.defaultAction)
.disabled(parentPath.trimmingCharacters(in: .whitespaces).isEmpty) .disabled(parentPath.trimmingCharacters(in: .whitespaces).isEmpty)
.accessibilityIdentifier("templateInstall.parentDir.continue")
} }
} }
+80 -87
View File
@@ -2,111 +2,104 @@
// TemplateInstallUITests.swift // TemplateInstallUITests.swift
// scarfUITests // scarfUITests
// //
// Layer B of the dogfooding-templates harness the XCUITest layer that // Layer B of the dogfooding-templates harness drives Scarf via XCUITest
// drives Scarf end-to-end via the real UI. This file lands as a scaffold // against the developer Mac's real `~/.hermes/` installation. v1 is
// in the v2.7 cycle: it exercises the launch-argument + env-var plumbing // intentionally small: a single smoke test that proves the harness can
// (SCARF_HERMES_HOME, --scarf-test-mode) and proves the app reaches a // launch the app, surface a window, and read state. The install-flow
// non-crashed state under those flags. Driving the full install / // drive (Templates Install Configure Dashboard) lands in v2 once
// configure / dashboard journey arrives in v2.8 alongside the // accessibility identifiers are wired across the install path.
// accessibility-identifier sweep see Test-Harness.md on the wiki.
// //
// The scaffold is deliberately small. Its job is to prove the harness // ## Sandbox shape (load-bearing)
// *can* run, so the next person extending it has a known-green starting //
// point. The contract for the next iteration: keep `tmpHermesHome()` and // XCUITest runners on macOS are sandboxed even when the app under test
// `launchedApp()` as the two helpers every Layer B test calls; everything // isn't. Concretely:
// else is per-test. //
// - The runner CAN read `~/.hermes/` (verified `Data(contentsOf:)`
// succeeds on `~/.hermes/scarf/projects.json`).
// - The runner CANNOT write to `~/.hermes/` attempting `try data.write(...)`
// throws `NSCocoaErrorDomain Code=513 (NSFileWriteNoPermissionError)`
// with underlying EPERM.
// - The Mac app under test runs unsandboxed and writes there freely.
//
// Implication for the harness: the install/uninstall round-trip MUST
// happen via the app's own UI (which has the permissions), not via
// direct file I/O from the runner. setUp can read state for assertions;
// it can't snapshot-and-restore.
//
// ## SwiftUI scene wiring
//
// Scarf's main window is `WindowGroup(for: ServerID.self)`. On a fresh
// `XCUIApplication.launch()` call, SwiftUI doesn't auto-surface a window
// real users get the window via Dock click AppKit
// `applicationOpenUntitledFile`, which XCUITest skips. The harness
// nudges the same code path users hit by sending 1 (the "Open Server
// Local" menu shortcut from `scarfApp.swift`'s `OpenServerCommands`).
// //
import XCTest import XCTest
final class TemplateInstallUITests: XCTestCase { final class TemplateInstallUITests: XCTestCase {
private var tmpHome: URL? /// Real user home NOT `NSHomeDirectory()`, which inside the
/// XCUITest runner sandbox returns
/// `~/Library/Containers/com.scarfUITests.xctrunner/Data`. The Mac
/// app itself runs unsandboxed and reads from `~/.hermes/`, so any
/// path the harness checks against the same data must point at the
/// un-sandboxed home. `getpwuid(getuid()).pw_dir` is the canonical
/// UNIX answer.
private static let realHome: String = {
guard let pw = getpwuid(getuid()), let dir = pw.pointee.pw_dir else {
return NSHomeDirectory()
}
return String(cString: dir)
}()
private static let hermesBinary = (realHome as NSString)
.appendingPathComponent(".local/bin/hermes")
override func setUpWithError() throws { override func setUpWithError() throws {
// Stop on first failure XCUITest runs are linear and the failure
// mode we care about ("the app launched in test mode and is
// responsive") is not something a later test recovers from.
continueAfterFailure = false continueAfterFailure = false
}
override func tearDownWithError() throws { // Refuse to run if `hermes` isn't on the dev Mac. The harness's
// Wipe any tmp Hermes home created during the test. Wrapped in a // whole premise is "validate against the real Hermes install
// try? because tearDown should never be the thing that masks a // pre-release"; failing here is friendlier than letting tests
// real test failure if the rmdir fails, we'd rather the test // crash later in the install flow.
// pass and the tmp dir get garbage-collected by the OS than the guard FileManager.default.isExecutableFile(atPath: Self.hermesBinary) else {
// test fail for a reason unrelated to the assertion. throw XCTSkip("Hermes binary not found at \(Self.hermesBinary) — Layer B requires a real Hermes install on the dev Mac.")
if let tmpHome {
try? FileManager.default.removeItem(at: tmpHome)
} }
} }
/// Scaffold: launch Scarf with the harness's env var + launch argument /// Smoke test: Scarf launches normally against the real Hermes home,
/// and confirm the launch fires. Asserting on window existence /// the harness pushes 1 (the "Open Server Local" menu shortcut),
/// would currently fail because the app's polling services /// and a window surfaces. This is the regression net for the test
/// (`ServerLiveStatusRegistry`, `HermesCapabilitiesStore`) crash on /// target itself if a future change breaks XCUITest's ability to
/// the IPC handshake when `SCARF_HERMES_HOME` points at an empty dir /// drive Scarf at all, this fails before any of the install-flow
/// they assume `gateway_state.json` and the Hermes binary's state /// tests do.
/// dir are populated. A follow-up will pre-populate the tmp home
/// with a minimal fixture (`config.yaml`, `auth.json`, empty
/// `cron/jobs.json`) before the assertion gets re-enabled.
///
/// The test still earns its keep today: it proves the
/// `XCUIApplication.launchArguments` + `launchEnvironment` plumbing
/// reaches Scarf, and acts as the canonical "this is how Layer B
/// tests start." Drop it if you re-architect the harness; otherwise
/// keep it green until the fixture-Hermes-home work lands.
///
/// See [Test-Harness wiki page](https://github.com/awizemann/scarf/wiki/Test-Harness)
/// for the rest of the rollout.
@MainActor @MainActor
func testAppLaunchesUnderTestMode() throws { func testAppLaunchesAndSurfacesAWindow() throws {
let home = try makeTmpHermesHome()
tmpHome = home
let app = launchedApp(hermesHome: home)
defer { app.terminate() }
// Verify the launch reached the XCUITest IPC handshake i.e. the
// app process was spawned and the test runner connected to it.
// `app.state` is non-blocking and reports `.runningForeground`
// once the process has handshaked. Anything past that requires
// the fixture work above.
XCTAssertNotEqual(
app.state, .notRunning,
"XCUITest could not start Scarf with --scarf-test-mode + SCARF_HERMES_HOME=\(home.path). The launchArguments / launchEnvironment plumbing has regressed."
)
}
// MARK: - Helpers (called from every Layer B test, keep the contract stable)
/// Build a launched `XCUIApplication` configured for the harness:
/// - `--scarf-test-mode` launch argument (read by `TestModeFlags`).
/// - `SCARF_HERMES_HOME` env var (read by `HermesProfileResolver`).
///
/// Mirroring this configuration exactly across every Layer B test
/// means a single regression in either seam fails the whole suite
/// loudly the alternative is per-test launch configs that quietly
/// drift apart and let bugs hide between them.
@MainActor
private func launchedApp(hermesHome: URL) -> XCUIApplication {
let app = XCUIApplication() let app = XCUIApplication()
app.launchArguments = ["--scarf-test-mode"] app.launchArguments = ["--scarf-test-mode"]
app.launchEnvironment["SCARF_HERMES_HOME"] = hermesHome.path
app.launch() app.launch()
return app defer { app.terminate() }
}
/// Create a fresh, empty Hermes home dir for this test. The harness // Activate first without this, 1 is delivered to whatever
/// pattern is one home per test never share across tests, since the // app currently owns the keyboard focus (often Xcode), and the
/// installer writes to it and a leaked install from test A breaks // menu shortcut is silently dropped by Scarf.
/// test B's preconditions. The path lands under app.activate()
/// `NSTemporaryDirectory()` so the OS reaps it on reboot even if // Brief pause for activation to settle. We sleep up to 1s; if
/// teardown skips. // the app is already responsive sooner, the 1 send is harmless.
private func makeTmpHermesHome() throws -> URL { Thread.sleep(forTimeInterval: 1.0)
let base = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true) app.typeKey("1", modifierFlags: .command)
let path = base.appendingPathComponent("scarf-uitest-home-\(UUID().uuidString)", isDirectory: true)
try FileManager.default.createDirectory(at: path, withIntermediateDirectories: true) let windowAppeared = app.windows.firstMatch.waitForExistence(timeout: 15)
return path XCTAssertTrue(
windowAppeared,
"Scarf did not surface a window within 15s of ⌘1 nudge. Crash logs land under derivedData/Logs/Test/."
)
let attachment = XCTAttachment(screenshot: app.screenshot())
attachment.name = "App Launch"
attachment.lifetime = .deleteOnSuccess
add(attachment)
} }
} }