From 9ff9a018e79ed159f28ad93c7ac59f4501d653b7 Mon Sep 17 00:00:00 2001
From: Alan Wizemann <alan@wizemann.com>
Date: Mon, 4 May 2026 23:14:03 +0200
Subject: [PATCH] =?UTF-8?q?feat(scarfmon,chat):=20Phase=203b=20=E2=80=94?=
 =?UTF-8?q?=20dampen=20finalize=20bursts=20+=20Thinking=E2=80=A6=20status?=
 =?UTF-8?q?=20+=20wider=20loadConfig=20stack?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three targeted fixes from the Phase 3a baseline.

Bubble-burst dampening (Phase 3b-1):
- RichChatViewModel.finalizeStreamingMessage wraps both the
  streaming-id rewrite and the empty-finalize remove() in a
  no-animation Transaction. The id flip from 0 → permanent value
  was the load-bearing trigger of the 5–8 RichMessageBubble.body
  fires we were seeing 1–2 ms after every `finalizeStreamingMessage`
  interval; SwiftUI ran an animated diff against neighbors and
  re-evaluated their bodies. The new message is content-equal to
  the streaming one — there is no animation worth running.

Thinking… status promotion (Phase 3b-2):
- RichChatViewModel exposes `isStreamingThoughtsOnly` — true while
  a turn is in flight, has emitted thought-stream bytes, and has not
  yet produced any visible assistant text. The Phase 3a baseline
  showed this is where most of the user-perceived "feels slow" lives:
  reasoning models commonly take 3–8 s before producing visible
  output, and Scarf surfaced no specific signal during that window.
- Mac ChatView.displayedStatus promotes the toolbar pill to
  "Thinking…" when the flag is true.
- iOS connectionBanner gains a transient "Thinking…" strip with
  spinner, same trigger condition.

Phase 3a fix-up:
- HermesFileService.loadConfig stack-trace logging widened from
  one frame to a 10-frame window prefixed with "#N", so the actual
  caller is visible past inlined ScarfMon wrappers (the prior log
  surfaced ScarfMon.measure itself, not the loadConfig caller).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../ViewModels/RichChatViewModel.swift        | 60 ++++++++++++++-----
 scarf/Scarf iOS/Chat/ChatView.swift           | 16 ++++-
 .../Core/Services/HermesFileService.swift     | 31 ++++++----
 .../scarf/Features/Chat/Views/ChatView.swift  | 43 ++++++++++++-
 4 files changed, 119 insertions(+), 31 deletions(-)

diff --git a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/RichChatViewModel.swift b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/RichChatViewModel.swift
index 60905a9..37feac0 100644
--- a/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/RichChatViewModel.swift
+++ b/scarf/Packages/ScarfCore/Sources/ScarfCore/ViewModels/RichChatViewModel.swift
@@ -5,6 +5,7 @@
 
 import Foundation
 import Observation
+import SwiftUI
 
 public enum ChatDisplayMode: String, CaseIterable {
     case terminal
@@ -379,6 +380,19 @@ public final class RichChatViewModel {
     private var streamingThinkingText = ""
     private var streamingToolCalls: [HermesToolCall] = []
 
+    /// True while a turn is in flight, has emitted thought-stream
+    /// bytes, but has NOT yet produced any visible assistant text.
+    /// Surfaces the user-facing "Thinking…" status promotion (the
+    /// model is reasoning before answering — Hermes reasoning models
+    /// commonly take 3–8 s here, which the ScarfMon `firstThoughtByte`
+    /// vs `firstByte` split makes visible). Becomes false the moment
+    /// the first message chunk arrives or the turn ends.
+    public var isStreamingThoughtsOnly: Bool {
+        currentTurnStart != nil
+            && !streamingThinkingText.isEmpty
+            && streamingAssistantText.isEmpty
+    }
+
     // DB polling state (used in terminal mode fallback)
     private var lastKnownFingerprint: HermesDataService.MessageFingerprint?
     private var debounceTask: Task<Void, Never>?
@@ -886,19 +900,31 @@ public final class RichChatViewModel {
         if hasContent {
             let id = nextLocalId
             nextLocalId -= 1
-            messages[idx] = HermesMessage(
-                id: id,
-                sessionId: sessionId ?? "",
-                role: "assistant",
-                content: streamingAssistantText,
-                toolCallId: nil,
-                toolCalls: streamingToolCalls,
-                toolName: nil,
-                timestamp: Date(),
-                tokenCount: nil,
-                finishReason: streamingToolCalls.isEmpty ? "stop" : nil,
-                reasoning: streamingThinkingText.isEmpty ? nil : streamingThinkingText
-            )
+            // Wrap the streaming-id rewrite in a no-animation
+            // transaction. Without this SwiftUI sees an identity
+            // change for the streaming ForEach element (id 0 → new
+            // permanent id) and runs an animated diff against
+            // adjacent elements, which costs ~5–8 RichMessageBubble
+            // body re-evaluations per turn-end (visible in the
+            // ScarfMon ring as a 1–2 ms burst right after every
+            // `finalizeStreamingMessage` interval). The new message
+            // is content-equal to the streaming one — there is no
+            // animation worth running.
+            withTransaction(Transaction(animation: nil)) {
+                messages[idx] = HermesMessage(
+                    id: id,
+                    sessionId: sessionId ?? "",
+                    role: "assistant",
+                    content: streamingAssistantText,
+                    toolCallId: nil,
+                    toolCalls: streamingToolCalls,
+                    toolName: nil,
+                    timestamp: Date(),
+                    tokenCount: nil,
+                    finishReason: streamingToolCalls.isEmpty ? "stop" : nil,
+                    reasoning: streamingThinkingText.isEmpty ? nil : streamingThinkingText
+                )
+            }
             // Capture per-turn duration so the chat UI can render the
             // stopwatch pill (v2.5). Skips assistants we don't have a
             // start time for — e.g., the .promptComplete fired but the
@@ -909,8 +935,12 @@ public final class RichChatViewModel {
                 currentTurnStart = nil
             }
         } else {
-            // Remove empty streaming placeholder
-            messages.remove(at: idx)
+            // Remove empty streaming placeholder. Same no-animation
+            // transaction pattern — empty-finalize used to ripple the
+            // ForEach diff to every following bubble.
+            withTransaction(Transaction(animation: nil)) {
+                messages.remove(at: idx)
+            }
         }
 
         // Reset streaming state for next chunk
diff --git a/scarf/Scarf iOS/Chat/ChatView.swift b/scarf/Scarf iOS/Chat/ChatView.swift
index 7aafc7c..7d9852f 100644
--- a/scarf/Scarf iOS/Chat/ChatView.swift	
+++ b/scarf/Scarf iOS/Chat/ChatView.swift	
@@ -400,7 +400,21 @@ struct ChatView: View {
                 showSpinner: false
             )
         default:
-            EmptyView()
+            // v2.7: surface "Thinking…" while the agent's thought
+            // stream is in flight without any visible message bytes.
+            // Hermes reasoning models commonly take 3–8 s here and
+            // the streaming bubble has nothing to render — the user
+            // would otherwise see a stalled transcript. Disappears
+            // the moment the first message chunk arrives.
+            if controller.vm.isStreamingThoughtsOnly {
+                connectionBannerStrip(
+                    text: "Thinking…",
+                    tint: ScarfColor.info,
+                    showSpinner: true
+                )
+            } else {
+                EmptyView()
+            }
         }
     }
 
diff --git a/scarf/scarf/Core/Services/HermesFileService.swift b/scarf/scarf/Core/Services/HermesFileService.swift
index bfd9abf..d00fb43 100644
--- a/scarf/scarf/Core/Services/HermesFileService.swift
+++ b/scarf/scarf/Core/Services/HermesFileService.swift
@@ -17,18 +17,25 @@ struct HermesFileService: Sendable {
     // MARK: - Config
 
     nonisolated func loadConfig() -> HermesConfig {
-        ScarfMon.measure(.diskIO, "loadConfig") {
-            // ScarfMon — when Full mode is on, log the first stack
-            // frame above this call to the perf Logger channel so
-            // mystery callers (e.g. config reads with no user action)
-            // can be identified by tailing
-            //   `log stream --predicate 'subsystem == "com.scarf.mon"'`.
-            // Symbol-only — no addresses, no PII. Backtrace alloc is
-            // gated on isActive so it's free outside Full mode.
-            if ScarfMon.isActive {
-                let caller = Thread.callStackSymbols.dropFirst(2).first ?? "<unknown>"
-                Self.perfLogger.debug("loadConfig caller: \(caller, privacy: .public)")
-            }
+        // ScarfMon — when Full mode is on, log a window of stack
+        // frames above this call so mystery callers (e.g. config
+        // reads with no user action) can be identified by tailing
+        //   `log stream --predicate 'subsystem == "com.scarf.mon"'`.
+        // The window spans frames 1..8: SwiftUI / ObservableObject
+        // body re-eval chains burn 4–6 frames before reaching the
+        // user code, so dropping fewer than that hides the real
+        // caller. Each frame is on its own line, prefixed with "#N",
+        // so a single `log stream` line carries the full breadcrumb.
+        // Symbol-only — no addresses, no PII. Backtrace alloc is
+        // gated on isActive so it's free outside Full mode.
+        if ScarfMon.isActive {
+            let frames = Thread.callStackSymbols.prefix(10)
+                .enumerated()
+                .map { "#\($0.offset) \($0.element)" }
+                .joined(separator: " | ")
+            Self.perfLogger.debug("loadConfig stack: \(frames, privacy: .public)")
+        }
+        return ScarfMon.measure(.diskIO, "loadConfig") {
             guard let content = readFile(context.paths.configYAML) else { return .empty }
             return parseConfig(content)
         }
diff --git a/scarf/scarf/Features/Chat/Views/ChatView.swift b/scarf/scarf/Features/Chat/Views/ChatView.swift
index aa9b28b..2587393 100644
--- a/scarf/scarf/Features/Chat/Views/ChatView.swift
+++ b/scarf/scarf/Features/Chat/Views/ChatView.swift
@@ -53,8 +53,14 @@ struct ChatView: View {
             // coordinator was already populated. Consume the request
             // here. The onChange below handles the live case.
             if let pending = coordinator.pendingProjectChat {
+                let prompt = coordinator.pendingInitialPrompt
                 coordinator.pendingProjectChat = nil
-                viewModel.startNewSession(projectPath: pending)
+                coordinator.pendingInitialPrompt = nil
+                if let prompt {
+                    viewModel.startNewSessionAndSend(projectPath: pending, text: prompt)
+                } else {
+                    viewModel.startNewSession(projectPath: pending)
+                }
             }
             // Same story for resume-session handoff: the user clicked
             // a session in the Projects Sessions tab (routes to `.chat`
@@ -89,10 +95,22 @@ struct ChatView: View {
         // `.chat`; this view consumes the path and starts a fresh
         // session with cwd=projectPath. Attribution happens inside
         // ChatViewModel on successful session creation.
+        //
+        // The "New Project from Scratch" wizard (v2.8) sets the
+        // sister slot `pendingInitialPrompt` alongside the project
+        // path so the agent receives a kickoff prompt without the
+        // user having to type one. We drain both atomically and
+        // route to `startNewSessionAndSend` when present.
         .onChange(of: coord.pendingProjectChat) { _, new in
             if let projectPath = new {
+                let prompt = coordinator.pendingInitialPrompt
                 coordinator.pendingProjectChat = nil
-                viewModel.startNewSession(projectPath: projectPath)
+                coordinator.pendingInitialPrompt = nil
+                if let prompt {
+                    viewModel.startNewSessionAndSend(projectPath: projectPath, text: prompt)
+                } else {
+                    viewModel.startNewSession(projectPath: projectPath)
+                }
             }
         }
         // Live handoff for resume: user clicked an existing session in
@@ -109,6 +127,18 @@ struct ChatView: View {
     }
 
     /// Banner rendered between the toolbar and the chat area when either
+    /// Status string surfaced in the toolbar pill. When the agent's
+    /// thought stream is in flight without any visible message bytes
+    /// (Hermes reasoning models routinely take 3–8 s here), promote
+    /// the generic "Agent working..." to "Thinking…" so the user
+    /// sees the model is reasoning rather than stalled. v2.7.
+    private var displayedStatus: String {
+        if viewModel.richChatViewModel.isStreamingThoughtsOnly {
+            return "Thinking…"
+        }
+        return viewModel.acpStatus.isEmpty ? "Active" : viewModel.acpStatus
+    }
+
     /// (a) a preflight credential check failed, or (b) the ACP subprocess
     /// returned an error we captured. Shows a short hint + expandable raw
     /// details (stderr tail) that the user can copy to the clipboard.
@@ -215,7 +245,14 @@ struct ChatView: View {
                 Circle()
                     .fill(.green)
                     .frame(width: 6, height: 6)
-                (viewModel.acpStatus.isEmpty ? Text("Active") : Text(viewModel.acpStatus))
+                // Promote the generic "Agent working..." status to
+                // "Thinking…" the moment the thought stream starts
+                // arriving without visible message bytes — the user
+                // gets a more honest signal that the model is
+                // reasoning, not stalled. Falls back to whatever
+                // status string the VM has when no thought stream
+                // is in flight.
+                Text(displayedStatus)
                     .font(.caption)
                     .foregroundStyle(.secondary)
                     .lineLimit(1)