feat(scarfmon,chat): Phase 3b — dampen finalize bursts + Thinking… status + wider loadConfig stack

Three targeted fixes from the Phase 3a baseline. Bubble-burst dampening (Phase 3b-1): - RichChatViewModel.finalizeStreamingMessage wraps both the streaming-id rewrite and the empty-finalize remove() in a no-animation Transaction. The id flip from 0 → permanent value was the load-bearing trigger of the 5–8 RichMessageBubble.body fires we were seeing 1–2 ms after every `finalizeStreamingMessage` interval; SwiftUI ran an animated diff against neighbors and re-evaluated their bodies. The new message is content-equal to the streaming one — there is no animation worth running. Thinking… status promotion (Phase 3b-2): - RichChatViewModel exposes `isStreamingThoughtsOnly` — true while a turn is in flight, has emitted thought-stream bytes, and has not yet produced any visible assistant text. The Phase 3a baseline showed this is where most of the user-perceived "feels slow" lives: reasoning models commonly take 3–8 s before producing visible output, and Scarf surfaced no specific signal during that window. - Mac ChatView.displayedStatus promotes the toolbar pill to "Thinking…" when the flag is true. - iOS connectionBanner gains a transient "Thinking…" strip with spinner, same trigger condition. Phase 3a fix-up: - HermesFileService.loadConfig stack-trace logging widened from one frame to a 10-frame window prefixed with "#N", so the actual caller is visible past inlined ScarfMon wrappers (the prior log surfaced ScarfMon.measure itself, not the loadConfig caller). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 10:36:35 +00:00 · 2026-05-04 23:14:03 +02:00
parent 0a4f8de492
commit 9ff9a018e7
4 changed files with 119 additions and 31 deletions
@@ -5,6 +5,7 @@
 import Foundation
 import Observation
 import SwiftUI
 public enum ChatDisplayMode: String, CaseIterable {
    case terminal
@@ -379,6 +380,19 @@ public final class RichChatViewModel {
    private var streamingThinkingText = ""
    private var streamingToolCalls: [HermesToolCall] = []
    /// True while a turn is in flight, has emitted thought-stream
    /// bytes, but has NOT yet produced any visible assistant text.
    /// Surfaces the user-facing "Thinking…" status promotion (the
    /// model is reasoning before answering — Hermes reasoning models
    /// commonly take 3–8 s here, which the ScarfMon `firstThoughtByte`
    /// vs `firstByte` split makes visible). Becomes false the moment
    /// the first message chunk arrives or the turn ends.
    public var isStreamingThoughtsOnly: Bool {
        currentTurnStart != nil
            && !streamingThinkingText.isEmpty
            && streamingAssistantText.isEmpty
    }
    // DB polling state (used in terminal mode fallback)
    private var lastKnownFingerprint: HermesDataService.MessageFingerprint?
    private var debounceTask: Task<Void, Never>?
@@ -886,6 +900,17 @@ public final class RichChatViewModel {
        if hasContent {
            let id = nextLocalId
            nextLocalId -= 1
            // Wrap the streaming-id rewrite in a no-animation
            // transaction. Without this SwiftUI sees an identity
            // change for the streaming ForEach element (id 0 → new
            // permanent id) and runs an animated diff against
            // adjacent elements, which costs ~5–8 RichMessageBubble
            // body re-evaluations per turn-end (visible in the
            // ScarfMon ring as a 1–2 ms burst right after every
            // `finalizeStreamingMessage` interval). The new message
            // is content-equal to the streaming one — there is no
            // animation worth running.
            withTransaction(Transaction(animation: nil)) {
                messages[idx] = HermesMessage(
                    id: id,
                    sessionId: sessionId ?? "",
@@ -899,6 +924,7 @@ public final class RichChatViewModel {
                    finishReason: streamingToolCalls.isEmpty ? "stop" : nil,
                    reasoning: streamingThinkingText.isEmpty ? nil : streamingThinkingText
                )
            }
            // Capture per-turn duration so the chat UI can render the
            // stopwatch pill (v2.5). Skips assistants we don't have a
            // start time for — e.g., the .promptComplete fired but the
@@ -909,9 +935,13 @@ public final class RichChatViewModel {
                currentTurnStart = nil
            }
        } else {
-            // Remove empty streaming placeholder
+            // Remove empty streaming placeholder. Same no-animation
            // transaction pattern — empty-finalize used to ripple the
            // ForEach diff to every following bubble.
            withTransaction(Transaction(animation: nil)) {
                messages.remove(at: idx)
            }
        }
        // Reset streaming state for next chunk
        streamingAssistantText = ""
@@ -400,9 +400,23 @@ struct ChatView: View {
                showSpinner: false
            )
        default:
            // v2.7: surface "Thinking…" while the agent's thought
            // stream is in flight without any visible message bytes.
            // Hermes reasoning models commonly take 3–8 s here and
            // the streaming bubble has nothing to render — the user
            // would otherwise see a stalled transcript. Disappears
            // the moment the first message chunk arrives.
            if controller.vm.isStreamingThoughtsOnly {
                connectionBannerStrip(
                    text: "Thinking…",
                    tint: ScarfColor.info,
                    showSpinner: true
                )
            } else {
                EmptyView()
            }
        }
    }
    private func connectionBannerStrip(
        text: String,
@@ -17,18 +17,25 @@ struct HermesFileService: Sendable {
    // MARK: - Config
    nonisolated func loadConfig() -> HermesConfig {
-        ScarfMon.measure(.diskIO, "loadConfig") {
+        // ScarfMon — when Full mode is on, log a window of stack
-            // ScarfMon — when Full mode is on, log the first stack
+        // frames above this call so mystery callers (e.g. config
-            // frame above this call to the perf Logger channel so
+        // reads with no user action) can be identified by tailing
            // mystery callers (e.g. config reads with no user action)
            // can be identified by tailing
        //   `log stream --predicate 'subsystem == "com.scarf.mon"'`.
        // The window spans frames 1..8: SwiftUI / ObservableObject
        // body re-eval chains burn 4–6 frames before reaching the
        // user code, so dropping fewer than that hides the real
        // caller. Each frame is on its own line, prefixed with "#N",
        // so a single `log stream` line carries the full breadcrumb.
        // Symbol-only — no addresses, no PII. Backtrace alloc is
        // gated on isActive so it's free outside Full mode.
        if ScarfMon.isActive {
-                let caller = Thread.callStackSymbols.dropFirst(2).first ?? "<unknown>"
+            let frames = Thread.callStackSymbols.prefix(10)
-                Self.perfLogger.debug("loadConfig caller: \(caller, privacy: .public)")
+                .enumerated()
                .map { "#\($0.offset) \($0.element)" }
                .joined(separator: " | ")
            Self.perfLogger.debug("loadConfig stack: \(frames, privacy: .public)")
        }
        return ScarfMon.measure(.diskIO, "loadConfig") {
            guard let content = readFile(context.paths.configYAML) else { return .empty }
            return parseConfig(content)
        }
@@ -53,9 +53,15 @@ struct ChatView: View {
            // coordinator was already populated. Consume the request
            // here. The onChange below handles the live case.
            if let pending = coordinator.pendingProjectChat {
                let prompt = coordinator.pendingInitialPrompt
                coordinator.pendingProjectChat = nil
                coordinator.pendingInitialPrompt = nil
                if let prompt {
                    viewModel.startNewSessionAndSend(projectPath: pending, text: prompt)
                } else {
                    viewModel.startNewSession(projectPath: pending)
                }
            }
            // Same story for resume-session handoff: the user clicked
            // a session in the Projects Sessions tab (routes to `.chat`
            // rather than `.sessions` so the chat actually reopens).
@@ -89,12 +95,24 @@ struct ChatView: View {
        // `.chat`; this view consumes the path and starts a fresh
        // session with cwd=projectPath. Attribution happens inside
        // ChatViewModel on successful session creation.
        //
        // The "New Project from Scratch" wizard (v2.8) sets the
        // sister slot `pendingInitialPrompt` alongside the project
        // path so the agent receives a kickoff prompt without the
        // user having to type one. We drain both atomically and
        // route to `startNewSessionAndSend` when present.
        .onChange(of: coord.pendingProjectChat) { _, new in
            if let projectPath = new {
                let prompt = coordinator.pendingInitialPrompt
                coordinator.pendingProjectChat = nil
                coordinator.pendingInitialPrompt = nil
                if let prompt {
                    viewModel.startNewSessionAndSend(projectPath: projectPath, text: prompt)
                } else {
                    viewModel.startNewSession(projectPath: projectPath)
                }
            }
        }
        // Live handoff for resume: user clicked an existing session in
        // the Projects Sessions tab while already in the Chat section
        // (or switched back to Chat after). Project-chip rendering
@@ -109,6 +127,18 @@ struct ChatView: View {
    }
    /// Banner rendered between the toolbar and the chat area when either
    /// Status string surfaced in the toolbar pill. When the agent's
    /// thought stream is in flight without any visible message bytes
    /// (Hermes reasoning models routinely take 3–8 s here), promote
    /// the generic "Agent working..." to "Thinking…" so the user
    /// sees the model is reasoning rather than stalled. v2.7.
    private var displayedStatus: String {
        if viewModel.richChatViewModel.isStreamingThoughtsOnly {
            return "Thinking…"
        }
        return viewModel.acpStatus.isEmpty ? "Active" : viewModel.acpStatus
    }
    /// (a) a preflight credential check failed, or (b) the ACP subprocess
    /// returned an error we captured. Shows a short hint + expandable raw
    /// details (stderr tail) that the user can copy to the clipboard.
@@ -215,7 +245,14 @@ struct ChatView: View {
                Circle()
                    .fill(.green)
                    .frame(width: 6, height: 6)
-                (viewModel.acpStatus.isEmpty ? Text("Active") : Text(viewModel.acpStatus))
+                // Promote the generic "Agent working..." status to
                // "Thinking…" the moment the thought stream starts
                // arriving without visible message bytes — the user
                // gets a more honest signal that the model is
                // reasoning, not stalled. Falls back to whatever
                // status string the VM has when no thought stream
                // is in flight.
                Text(displayedStatus)
                    .font(.caption)
                    .foregroundStyle(.secondary)
                    .lineLimit(1)