mirror of
https://github.com/awizemann/scarf.git
synced 2026-05-10 10:36:35 +00:00
feat(hermes-v12): ACP multimodal image input on Mac + iOS (Phase C)
Hermes v0.12 advertises `prompt_capabilities.image = true` and accepts
image content blocks in `session/prompt`. This wires a producer flow on
both targets so users can attach images alongside text and have them
routed to the vision-capable model automatically.
Pipeline:
- ChatImageAttachment: Sendable value type holding base64 payload +
thumbnail, MIME type, source filename, and approximate byte count.
- ImageEncoder: detached-only Sendable service that downsamples to
Anthropic's 1568px long-edge cap, JPEG-encodes at q=0.85, and
produces a small inline thumbnail for composer chips. Cross-platform
(NSImage on Mac, UIImage on iOS, JPEG-passthrough on Linux/CI).
- ACPClient.sendPrompt(sessionId:text:images:) overload emits a content
array `[{type: "text"...}, {type: "image", data, mimeType}]` matching
the wire shape in hermes-agent/acp_adapter/server.py. The
zero-arg-images convenience overload preserves the v0.11 wire shape
for any unmodified callers.
Mac UI:
- RichChatInputBar grew an `attachments: [ChatImageAttachment]` state
array, a paperclip button (NSOpenPanel multi-pick), drag-drop and
paste handlers, and a horizontal preview chip strip. The "send"
callback's signature is `(String, [ChatImageAttachment]) -> Void`
threaded through RichChatView -> ChatTranscriptPane -> ChatView ->
ChatViewModel.sendText(text, images:). Image-only prompts are
permitted ("describe this") once at least one attachment is queued.
iOS UI:
- ChatView's composer adopts a paperclip + PhotosPicker flow with the
same chip strip and 5-attachment cap. Attachments live on
ChatController so they survive across PhotosPicker presentations.
loadTransferable(type: Data.self) feeds raw bytes into the same
ImageEncoder; encode work runs detached so MainActor stays
responsive on cellular.
Capability gating:
- Both composers hide the entire attachment surface when
HermesCapabilities.hasACPImagePrompts is false (pre-v0.12 hosts).
No paperclip button, no drop target, no paste accept — the input bar
is byte-for-byte the v0.11 surface against an older Hermes.
Tests: 209 ScarfCore tests pass; both Mac and iOS schemes build clean.
The encoder's pixel work is hard to unit-test at the package level
(no NSImage/UIImage in plain Swift CI) — manual end-to-end testing
is the verification path here.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -266,14 +266,47 @@ public actor ACPClient {
|
|||||||
// MARK: - Messaging
|
// MARK: - Messaging
|
||||||
|
|
||||||
public func sendPrompt(sessionId: String, text: String) async throws -> ACPPromptResult {
|
public func sendPrompt(sessionId: String, text: String) async throws -> ACPPromptResult {
|
||||||
|
try await sendPrompt(sessionId: sessionId, text: text, images: [])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// v0.12+ overload: forward zero or more image attachments alongside
|
||||||
|
/// the user's text. Each attachment becomes a separate
|
||||||
|
/// `ImageContentBlock` in the ACP `prompt` content array — matches
|
||||||
|
/// the shape Hermes' `acp_adapter/server.py` expects (text first,
|
||||||
|
/// then image blocks). Hermes routes the resulting payload to a
|
||||||
|
/// vision-capable model automatically; the producer side only has
|
||||||
|
/// to deliver the bytes.
|
||||||
|
///
|
||||||
|
/// Pre-v0.12 Hermes installs accepted only a single `text` block.
|
||||||
|
/// Callers gate this overload on
|
||||||
|
/// `HermesCapabilitiesStore.capabilities.hasACPImagePrompts` so we
|
||||||
|
/// don't send blocks an older agent would silently drop.
|
||||||
|
public func sendPrompt(
|
||||||
|
sessionId: String,
|
||||||
|
text: String,
|
||||||
|
images: [ChatImageAttachment]
|
||||||
|
) async throws -> ACPPromptResult {
|
||||||
statusMessage = "Sending prompt..."
|
statusMessage = "Sending prompt..."
|
||||||
let messageId = UUID().uuidString
|
let messageId = UUID().uuidString
|
||||||
|
|
||||||
|
// Always include the text block, even when empty — keeps the
|
||||||
|
// server-side text-extraction path stable regardless of whether
|
||||||
|
// the user sent text alongside the image(s).
|
||||||
|
var promptBlocks: [[String: Any]] = [
|
||||||
|
["type": "text", "text": text] as [String: Any],
|
||||||
|
]
|
||||||
|
for image in images {
|
||||||
|
promptBlocks.append([
|
||||||
|
"type": "image",
|
||||||
|
"data": image.base64Data,
|
||||||
|
"mimeType": image.mimeType,
|
||||||
|
] as [String: Any])
|
||||||
|
}
|
||||||
|
|
||||||
let params: [String: AnyCodable] = [
|
let params: [String: AnyCodable] = [
|
||||||
"sessionId": AnyCodable(sessionId),
|
"sessionId": AnyCodable(sessionId),
|
||||||
"messageId": AnyCodable(messageId),
|
"messageId": AnyCodable(messageId),
|
||||||
"prompt": AnyCodable([
|
"prompt": AnyCodable(promptBlocks as [Any]),
|
||||||
["type": "text", "text": text] as [String: Any],
|
|
||||||
] as [Any]),
|
|
||||||
]
|
]
|
||||||
let result = try await sendRequest(method: "session/prompt", params: params)
|
let result = try await sendRequest(method: "session/prompt", params: params)
|
||||||
let dict = result?.dictValue ?? [:]
|
let dict = result?.dictValue ?? [:]
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
/// One image attached to an outgoing chat prompt.
|
||||||
|
///
|
||||||
|
/// Hermes v0.12 ACP advertises `prompt_capabilities.image = true` and
|
||||||
|
/// accepts content-block arrays in `session/prompt`. Scarf produces these
|
||||||
|
/// blocks from drag-dropped / pasted / picker-selected images. We
|
||||||
|
/// downsample + JPEG-encode at the producer side so the wire payload
|
||||||
|
/// stays under a few hundred kilobytes per image even when the user
|
||||||
|
/// drops a 12 MP screenshot.
|
||||||
|
///
|
||||||
|
/// Constructed via `ImageEncoder.encode(...)`. The store-the-bytes-once
|
||||||
|
/// shape means `RichChatViewModel` can keep the array between turns
|
||||||
|
/// (e.g. while the agent is responding) without holding `NSImage` /
|
||||||
|
/// `UIImage` references that would pin the originals in memory.
|
||||||
|
public struct ChatImageAttachment: Sendable, Equatable, Identifiable {
|
||||||
|
public let id: String
|
||||||
|
/// IANA MIME type — matches the `mimeType` field on ACP `ImageContentBlock`.
|
||||||
|
/// Currently always `image/jpeg` after re-encoding; PNG-only originals
|
||||||
|
/// keep their type when small enough to skip the JPEG step.
|
||||||
|
public let mimeType: String
|
||||||
|
/// Base64-encoded payload. NOT prefixed with `data:` — Hermes wraps it
|
||||||
|
/// when forwarding to OpenAI multimodal payloads (see
|
||||||
|
/// `_image_block_to_openai_part` in `acp_adapter/server.py`).
|
||||||
|
public let base64Data: String
|
||||||
|
/// Small inline thumbnail for the composer's preview strip. Same MIME
|
||||||
|
/// type as `base64Data`. Nil when the source was already small enough
|
||||||
|
/// to use directly.
|
||||||
|
public let thumbnailBase64: String?
|
||||||
|
/// Original filename, when known (drag-drop carries it; paste doesn't).
|
||||||
|
/// Surfaced as a tooltip on the preview chip.
|
||||||
|
public let filename: String?
|
||||||
|
/// Approximate decoded byte count, kept for the composer's
|
||||||
|
/// "X images, Y KB" status pill.
|
||||||
|
public let approximateByteCount: Int
|
||||||
|
|
||||||
|
public init(
|
||||||
|
id: String = UUID().uuidString,
|
||||||
|
mimeType: String,
|
||||||
|
base64Data: String,
|
||||||
|
thumbnailBase64: String?,
|
||||||
|
filename: String?,
|
||||||
|
approximateByteCount: Int
|
||||||
|
) {
|
||||||
|
self.id = id
|
||||||
|
self.mimeType = mimeType
|
||||||
|
self.base64Data = base64Data
|
||||||
|
self.thumbnailBase64 = thumbnailBase64
|
||||||
|
self.filename = filename
|
||||||
|
self.approximateByteCount = approximateByteCount
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,162 @@
|
|||||||
|
import Foundation
|
||||||
|
#if canImport(AppKit)
|
||||||
|
import AppKit
|
||||||
|
#endif
|
||||||
|
#if canImport(UIKit)
|
||||||
|
import UIKit
|
||||||
|
#endif
|
||||||
|
#if canImport(CoreImage)
|
||||||
|
import CoreImage
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/// Downsamples + base64-encodes user-supplied images for ACP transport.
|
||||||
|
///
|
||||||
|
/// **Why downsample on the producer side.** Hermes happily forwards the
|
||||||
|
/// bytes to a vision model, but a 12 MP screenshot at 4 MB is wasteful
|
||||||
|
/// — it eats 5–6× more tokens than a 1024×1024 thumbnail and gives the
|
||||||
|
/// model no extra signal. Cap the long edge at 1568 px (Anthropic's
|
||||||
|
/// recommended max for Claude vision) and drop quality to JPEG 0.85,
|
||||||
|
/// which keeps screenshot text crisp while landing under ~300 KB per
|
||||||
|
/// image. The 5-image-per-message limit (chosen on the producer side)
|
||||||
|
/// keeps the total prompt payload below ~2 MB.
|
||||||
|
///
|
||||||
|
/// **Why detached.** Image loading + downsampling is CPU-bound. Run only
|
||||||
|
/// from a `Task.detached` context (the encoder type is `Sendable` and
|
||||||
|
/// every method is `nonisolated`). The companion `ChatImageAttachment`
|
||||||
|
/// is a Sendable value type so the result hops back to MainActor cleanly.
|
||||||
|
public struct ImageEncoder: Sendable {
|
||||||
|
/// Long-edge pixel cap. 1568 is Anthropic's recommended ceiling for
|
||||||
|
/// Claude vision input — past it, the provider downsamples server-side
|
||||||
|
/// and we just paid for the extra bytes. Tweak only with vision-model
|
||||||
|
/// guidance from Hermes side.
|
||||||
|
public static let maxLongEdge: CGFloat = 1568
|
||||||
|
/// JPEG quality factor. 0.85 is the inflection point above which
|
||||||
|
/// file size jumps quickly without obvious visual gain on screenshots
|
||||||
|
/// or photographs.
|
||||||
|
public static let jpegQuality: CGFloat = 0.85
|
||||||
|
/// Long-edge cap for the inline thumbnail rendered in the composer
|
||||||
|
/// chip. Kept under the system thumbnail size so `Image(data:)`
|
||||||
|
/// renders without extra resampling.
|
||||||
|
public static let thumbnailLongEdge: CGFloat = 256
|
||||||
|
|
||||||
|
public init() {}
|
||||||
|
|
||||||
|
public enum EncoderError: Error, LocalizedError {
|
||||||
|
case unsupportedFormat
|
||||||
|
case decodeFailed
|
||||||
|
case encodeFailed
|
||||||
|
case empty
|
||||||
|
|
||||||
|
public var errorDescription: String? {
|
||||||
|
switch self {
|
||||||
|
case .unsupportedFormat: return "Image format not recognized"
|
||||||
|
case .decodeFailed: return "Couldn't decode image data"
|
||||||
|
case .encodeFailed: return "Couldn't encode image as JPEG"
|
||||||
|
case .empty: return "Image data was empty"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Encode raw bytes (from a paste/drop/picker) into a wire-ready
|
||||||
|
/// attachment. Detached-only — never call from MainActor. The
|
||||||
|
/// originating bytes are not retained beyond this call.
|
||||||
|
public nonisolated func encode(
|
||||||
|
rawBytes: Data,
|
||||||
|
sourceFilename: String? = nil
|
||||||
|
) throws -> ChatImageAttachment {
|
||||||
|
guard !rawBytes.isEmpty else { throw EncoderError.empty }
|
||||||
|
|
||||||
|
#if canImport(AppKit)
|
||||||
|
guard let nsImage = NSImage(data: rawBytes) else { throw EncoderError.decodeFailed }
|
||||||
|
let targetSize = Self.fittedSize(for: nsImage.size, maxLongEdge: Self.maxLongEdge)
|
||||||
|
let mainData = try Self.jpegBytes(from: nsImage, size: targetSize)
|
||||||
|
let thumbSize = Self.fittedSize(for: nsImage.size, maxLongEdge: Self.thumbnailLongEdge)
|
||||||
|
let thumbData = try? Self.jpegBytes(from: nsImage, size: thumbSize)
|
||||||
|
return ChatImageAttachment(
|
||||||
|
mimeType: "image/jpeg",
|
||||||
|
base64Data: mainData.base64EncodedString(),
|
||||||
|
thumbnailBase64: thumbData?.base64EncodedString(),
|
||||||
|
filename: sourceFilename,
|
||||||
|
approximateByteCount: mainData.count
|
||||||
|
)
|
||||||
|
|
||||||
|
#elseif canImport(UIKit)
|
||||||
|
guard let uiImage = UIImage(data: rawBytes) else { throw EncoderError.decodeFailed }
|
||||||
|
let targetSize = Self.fittedSize(for: uiImage.size, maxLongEdge: Self.maxLongEdge)
|
||||||
|
let mainData = try Self.jpegBytes(from: uiImage, size: targetSize)
|
||||||
|
let thumbSize = Self.fittedSize(for: uiImage.size, maxLongEdge: Self.thumbnailLongEdge)
|
||||||
|
let thumbData = try? Self.jpegBytes(from: uiImage, size: thumbSize)
|
||||||
|
return ChatImageAttachment(
|
||||||
|
mimeType: "image/jpeg",
|
||||||
|
base64Data: mainData.base64EncodedString(),
|
||||||
|
thumbnailBase64: thumbData?.base64EncodedString(),
|
||||||
|
filename: sourceFilename,
|
||||||
|
approximateByteCount: mainData.count
|
||||||
|
)
|
||||||
|
|
||||||
|
#else
|
||||||
|
// Linux CI / unknown platforms: pass through raw bytes if the
|
||||||
|
// input already looks like a JPEG, else refuse. Keeps the
|
||||||
|
// package compiling without a hard AppKit/UIKit dep.
|
||||||
|
if rawBytes.starts(with: [0xFF, 0xD8]) {
|
||||||
|
return ChatImageAttachment(
|
||||||
|
mimeType: "image/jpeg",
|
||||||
|
base64Data: rawBytes.base64EncodedString(),
|
||||||
|
thumbnailBase64: nil,
|
||||||
|
filename: sourceFilename,
|
||||||
|
approximateByteCount: rawBytes.count
|
||||||
|
)
|
||||||
|
}
|
||||||
|
throw EncoderError.unsupportedFormat
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
nonisolated private static func fittedSize(for source: CGSize, maxLongEdge: CGFloat) -> CGSize {
|
||||||
|
let longest = max(source.width, source.height)
|
||||||
|
if longest <= maxLongEdge { return source }
|
||||||
|
let scale = maxLongEdge / longest
|
||||||
|
return CGSize(
|
||||||
|
width: floor(source.width * scale),
|
||||||
|
height: floor(source.height * scale)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#if canImport(AppKit)
|
||||||
|
nonisolated private static func jpegBytes(from image: NSImage, size: CGSize) throws -> Data {
|
||||||
|
let resized = NSImage(size: size)
|
||||||
|
resized.lockFocus()
|
||||||
|
NSGraphicsContext.current?.imageInterpolation = .high
|
||||||
|
image.draw(
|
||||||
|
in: CGRect(origin: .zero, size: size),
|
||||||
|
from: .zero,
|
||||||
|
operation: .copy,
|
||||||
|
fraction: 1.0
|
||||||
|
)
|
||||||
|
resized.unlockFocus()
|
||||||
|
guard let tiff = resized.tiffRepresentation,
|
||||||
|
let rep = NSBitmapImageRep(data: tiff),
|
||||||
|
let data = rep.representation(
|
||||||
|
using: .jpeg,
|
||||||
|
properties: [.compressionFactor: jpegQuality]
|
||||||
|
)
|
||||||
|
else {
|
||||||
|
throw EncoderError.encodeFailed
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
#elseif canImport(UIKit)
|
||||||
|
nonisolated private static func jpegBytes(from image: UIImage, size: CGSize) throws -> Data {
|
||||||
|
let format = UIGraphicsImageRendererFormat()
|
||||||
|
format.scale = 1
|
||||||
|
format.opaque = true
|
||||||
|
let renderer = UIGraphicsImageRenderer(size: size, format: format)
|
||||||
|
let resized = renderer.image { _ in
|
||||||
|
image.draw(in: CGRect(origin: .zero, size: size))
|
||||||
|
}
|
||||||
|
guard let data = resized.jpegData(compressionQuality: jpegQuality) else {
|
||||||
|
throw EncoderError.encodeFailed
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
@@ -3,6 +3,9 @@ import ScarfCore
|
|||||||
import ScarfIOS
|
import ScarfIOS
|
||||||
import ScarfDesign
|
import ScarfDesign
|
||||||
import os
|
import os
|
||||||
|
#if canImport(PhotosUI)
|
||||||
|
import PhotosUI
|
||||||
|
#endif
|
||||||
|
|
||||||
// The Chat feature on iOS is gated on `canImport(SQLite3)` because
|
// The Chat feature on iOS is gated on `canImport(SQLite3)` because
|
||||||
// `RichChatViewModel` reads session history from `HermesDataService`
|
// `RichChatViewModel` reads session history from `HermesDataService`
|
||||||
@@ -24,9 +27,23 @@ struct ChatView: View {
|
|||||||
|
|
||||||
@Environment(\.scarfGoCoordinator) private var coordinator
|
@Environment(\.scarfGoCoordinator) private var coordinator
|
||||||
@Environment(\.serverContext) private var envContext
|
@Environment(\.serverContext) private var envContext
|
||||||
|
@Environment(\.hermesCapabilities) private var capabilitiesStore
|
||||||
@State private var controller: ChatController
|
@State private var controller: ChatController
|
||||||
@State private var showProjectPicker = false
|
@State private var showProjectPicker = false
|
||||||
@State private var showSlashCommandsSheet = false
|
@State private var showSlashCommandsSheet = false
|
||||||
|
/// PhotosPicker selection. Bridge between SwiftUI's selection
|
||||||
|
/// binding and our `ChatImageAttachment` payload — `loadTransferable`
|
||||||
|
/// produces raw `Data` we then hand to `ImageEncoder`. v0.12+ only.
|
||||||
|
@State private var pickerSelection: [PhotosPickerItem] = []
|
||||||
|
@State private var showPhotoPicker = false
|
||||||
|
@State private var isEncodingAttachment = false
|
||||||
|
@State private var attachmentError: String?
|
||||||
|
|
||||||
|
private static let maxAttachments = 5
|
||||||
|
|
||||||
|
private var supportsImagePrompts: Bool {
|
||||||
|
capabilitiesStore?.capabilities.hasACPImagePrompts ?? false
|
||||||
|
}
|
||||||
/// Drives the composer's keyboard. Bound to the TextField via
|
/// Drives the composer's keyboard. Bound to the TextField via
|
||||||
/// `.focused(...)`; cleared by the scroll-to-dismiss gesture on
|
/// `.focused(...)`; cleared by the scroll-to-dismiss gesture on
|
||||||
/// the message list AND by an explicit keyboard-toolbar button.
|
/// the message list AND by an explicit keyboard-toolbar button.
|
||||||
@@ -431,7 +448,108 @@ struct ChatView: View {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private var composer: some View {
|
private var composer: some View {
|
||||||
|
VStack(alignment: .leading, spacing: 4) {
|
||||||
|
if !controller.attachments.isEmpty || isEncodingAttachment || attachmentError != nil {
|
||||||
|
attachmentStrip
|
||||||
|
}
|
||||||
|
composerRow
|
||||||
|
}
|
||||||
|
.padding(.horizontal, 12)
|
||||||
|
.padding(.vertical, 8)
|
||||||
|
.background(.regularMaterial)
|
||||||
|
#if canImport(PhotosUI)
|
||||||
|
.photosPicker(
|
||||||
|
isPresented: $showPhotoPicker,
|
||||||
|
selection: $pickerSelection,
|
||||||
|
maxSelectionCount: max(0, Self.maxAttachments - controller.attachments.count),
|
||||||
|
matching: .images
|
||||||
|
)
|
||||||
|
.onChange(of: pickerSelection) { _, items in
|
||||||
|
ingestPickerItems(items)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var attachmentStrip: some View {
|
||||||
|
HStack(alignment: .center, spacing: 8) {
|
||||||
|
if isEncodingAttachment {
|
||||||
|
ProgressView().controlSize(.small)
|
||||||
|
Text("Encoding…")
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
ForEach(controller.attachments) { attachment in
|
||||||
|
attachmentChip(attachment)
|
||||||
|
}
|
||||||
|
if let err = attachmentError {
|
||||||
|
Text(err)
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(ScarfColor.danger)
|
||||||
|
}
|
||||||
|
Spacer(minLength: 0)
|
||||||
|
if !controller.attachments.isEmpty {
|
||||||
|
Text("\(controller.attachments.count)/\(Self.maxAttachments)")
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.tertiary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private func attachmentChip(_ attachment: ChatImageAttachment) -> some View {
|
||||||
|
HStack(spacing: 4) {
|
||||||
|
attachmentChipThumbnail(attachment)
|
||||||
|
.frame(width: 32, height: 32)
|
||||||
|
.clipShape(RoundedRectangle(cornerRadius: 4))
|
||||||
|
Button {
|
||||||
|
controller.attachments.removeAll { $0.id == attachment.id }
|
||||||
|
} label: {
|
||||||
|
Image(systemName: "xmark.circle.fill")
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
.buttonStyle(.plain)
|
||||||
|
.accessibilityLabel("Remove attached image")
|
||||||
|
}
|
||||||
|
.padding(.horizontal, 6)
|
||||||
|
.padding(.vertical, 4)
|
||||||
|
.background(
|
||||||
|
RoundedRectangle(cornerRadius: 8)
|
||||||
|
.fill(ScarfColor.backgroundSecondary)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private func attachmentChipThumbnail(_ attachment: ChatImageAttachment) -> some View {
|
||||||
|
if let thumb = attachment.thumbnailBase64,
|
||||||
|
let data = Data(base64Encoded: thumb),
|
||||||
|
let image = UIImage(data: data) {
|
||||||
|
Image(uiImage: image)
|
||||||
|
.resizable()
|
||||||
|
.aspectRatio(contentMode: .fill)
|
||||||
|
} else {
|
||||||
|
Image(systemName: "photo")
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
.frame(maxWidth: .infinity, maxHeight: .infinity)
|
||||||
|
.background(ScarfColor.backgroundSecondary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private var composerRow: some View {
|
||||||
HStack(alignment: .bottom, spacing: 8) {
|
HStack(alignment: .bottom, spacing: 8) {
|
||||||
|
if supportsImagePrompts {
|
||||||
|
Button {
|
||||||
|
showPhotoPicker = true
|
||||||
|
} label: {
|
||||||
|
Image(systemName: "paperclip")
|
||||||
|
.font(.system(size: 22))
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
.padding(.bottom, 4)
|
||||||
|
}
|
||||||
|
.buttonStyle(.plain)
|
||||||
|
.disabled(controller.state != .ready || controller.attachments.count >= Self.maxAttachments)
|
||||||
|
.accessibilityLabel("Attach image")
|
||||||
|
}
|
||||||
TextField(
|
TextField(
|
||||||
"Message…",
|
"Message…",
|
||||||
text: $controller.draft,
|
text: $controller.draft,
|
||||||
@@ -480,13 +598,58 @@ struct ChatView: View {
|
|||||||
Image(systemName: "arrow.up.circle.fill")
|
Image(systemName: "arrow.up.circle.fill")
|
||||||
.font(.system(size: 28))
|
.font(.system(size: 28))
|
||||||
}
|
}
|
||||||
.disabled(controller.state != .ready || controller.draft.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty)
|
.disabled(!canSendComposer)
|
||||||
}
|
}
|
||||||
.padding(.horizontal, 12)
|
|
||||||
.padding(.vertical, 8)
|
|
||||||
.background(.regularMaterial)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Send is enabled when ready AND we have either text or at least
|
||||||
|
/// one attachment. Image-only sends are valid for vision models.
|
||||||
|
private var canSendComposer: Bool {
|
||||||
|
guard controller.state == .ready else { return false }
|
||||||
|
if !controller.attachments.isEmpty { return true }
|
||||||
|
return !controller.draft.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pull JPEG/PNG bytes out of each PhotosPickerItem and feed them
|
||||||
|
/// through ImageEncoder. Detached so the heavyweight resize +
|
||||||
|
/// JPEG-encode work doesn't block MainActor; the resulting
|
||||||
|
/// attachment hops back to MainActor for state mutation.
|
||||||
|
///
|
||||||
|
/// PhotosPickerItem can deliver `Data` directly via the
|
||||||
|
/// `Transferable` API. After ingestion the binding is reset so a
|
||||||
|
/// follow-up pick triggers `onChange` again.
|
||||||
|
#if canImport(PhotosUI)
|
||||||
|
private func ingestPickerItems(_ items: [PhotosPickerItem]) {
|
||||||
|
guard !items.isEmpty else { return }
|
||||||
|
// Capture the items, immediately clear the binding so a future
|
||||||
|
// pick triggers onChange even when the user re-selects the
|
||||||
|
// same image set. PhotosPicker behavior: identical selection
|
||||||
|
// doesn't re-fire onChange unless the binding flips through nil.
|
||||||
|
let snapshot = items
|
||||||
|
pickerSelection = []
|
||||||
|
isEncodingAttachment = true
|
||||||
|
Task { @MainActor in
|
||||||
|
for item in snapshot {
|
||||||
|
guard controller.attachments.count < Self.maxAttachments else { break }
|
||||||
|
do {
|
||||||
|
guard let data = try await item.loadTransferable(type: Data.self) else { continue }
|
||||||
|
let attachment = try await Task.detached(priority: .userInitiated) {
|
||||||
|
try ImageEncoder().encode(rawBytes: data, sourceFilename: nil)
|
||||||
|
}.value
|
||||||
|
controller.attachments.append(attachment)
|
||||||
|
} catch {
|
||||||
|
attachmentError = (error as? LocalizedError)?.errorDescription ?? "Couldn't encode image"
|
||||||
|
Task { @MainActor in
|
||||||
|
try? await Task.sleep(nanoseconds: 4_000_000_000)
|
||||||
|
attachmentError = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isEncodingAttachment = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
@State private var showErrorDetails: Bool = false
|
@State private var showErrorDetails: Bool = false
|
||||||
|
|
||||||
/// Inline error banner rendered above the message list when the
|
/// Inline error banner rendered above the message list when the
|
||||||
@@ -696,6 +859,12 @@ final class ChatController {
|
|||||||
var vm: RichChatViewModel
|
var vm: RichChatViewModel
|
||||||
var draft: String = ""
|
var draft: String = ""
|
||||||
|
|
||||||
|
/// v0.12+ image attachments queued to send with the next prompt.
|
||||||
|
/// Capped at 5 by the composer UI; the cap matches the Mac behavior
|
||||||
|
/// and keeps total ACP prompt payload under ~2 MB even on a slow
|
||||||
|
/// cellular link. Cleared after each successful `send()`.
|
||||||
|
var attachments: [ChatImageAttachment] = []
|
||||||
|
|
||||||
/// Set when chat-start is blocked because the active server's
|
/// Set when chat-start is blocked because the active server's
|
||||||
/// `config.yaml` has no `model.default` / `model.provider`. ChatView
|
/// `config.yaml` has no `model.default` / `model.provider`. ChatView
|
||||||
/// observes this to present an inline "pick a model" sheet — the
|
/// observes this to present an inline "pick a model" sheet — the
|
||||||
@@ -1003,12 +1172,22 @@ final class ChatController {
|
|||||||
func send() async {
|
func send() async {
|
||||||
guard state == .ready, let client else { return }
|
guard state == .ready, let client else { return }
|
||||||
let text = draft.trimmingCharacters(in: .whitespacesAndNewlines)
|
let text = draft.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
guard !text.isEmpty else { return }
|
// v0.12+ allows image-only sends — vision models accept "describe
|
||||||
|
// this" with no text. Bail only when both fields are empty.
|
||||||
|
guard !text.isEmpty || !attachments.isEmpty else { return }
|
||||||
let sessionId = vm.sessionId ?? ""
|
let sessionId = vm.sessionId ?? ""
|
||||||
guard !sessionId.isEmpty else { return }
|
guard !sessionId.isEmpty else { return }
|
||||||
|
let images = attachments
|
||||||
|
attachments = []
|
||||||
draft = ""
|
draft = ""
|
||||||
clearStoredDraft()
|
clearStoredDraft()
|
||||||
|
if !text.isEmpty {
|
||||||
vm.addUserMessage(text: text)
|
vm.addUserMessage(text: text)
|
||||||
|
} else {
|
||||||
|
// Surface an image-only message so the user sees their bubble
|
||||||
|
// even when they didn't type any caption.
|
||||||
|
vm.addUserMessage(text: "[image attached]")
|
||||||
|
}
|
||||||
// /steer is non-interruptive — the agent is still on its
|
// /steer is non-interruptive — the agent is still on its
|
||||||
// current turn; the guidance applies after the next tool call.
|
// current turn; the guidance applies after the next tool call.
|
||||||
// Surface a transient toast confirming the guidance was
|
// Surface a transient toast confirming the guidance was
|
||||||
@@ -1029,7 +1208,7 @@ final class ChatController {
|
|||||||
// literally. v2.5.
|
// literally. v2.5.
|
||||||
let wireText = expandIfProjectScoped(text)
|
let wireText = expandIfProjectScoped(text)
|
||||||
do {
|
do {
|
||||||
_ = try await client.sendPrompt(sessionId: sessionId, text: wireText)
|
_ = try await client.sendPrompt(sessionId: sessionId, text: wireText, images: images)
|
||||||
} catch {
|
} catch {
|
||||||
// The event task may already have surfaced a
|
// The event task may already have surfaced a
|
||||||
// .connectionLost; show the send-time error only if the
|
// .connectionLost; show the send-time error only if the
|
||||||
|
|||||||
@@ -254,14 +254,32 @@ final class ChatViewModel {
|
|||||||
// MARK: - Send Message
|
// MARK: - Send Message
|
||||||
|
|
||||||
func sendText(_ text: String) {
|
func sendText(_ text: String) {
|
||||||
|
sendText(text, images: [])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// v0.12+ overload: forward image attachments alongside the text.
|
||||||
|
/// Empty `images` keeps the legacy v0.11 wire shape; non-empty images
|
||||||
|
/// only flow when `HermesCapabilities.hasACPImagePrompts` is true
|
||||||
|
/// (the input bar gates the attachment UI on the same flag, so a
|
||||||
|
/// non-empty array reaching here means we've already verified the
|
||||||
|
/// agent supports it).
|
||||||
|
///
|
||||||
|
/// Terminal mode silently drops attachments — there's no way to
|
||||||
|
/// pipe binary content through the TTY. Surface a one-shot warning
|
||||||
|
/// so the user knows.
|
||||||
|
func sendText(_ text: String, images: [ChatImageAttachment]) {
|
||||||
if displayMode == .richChat {
|
if displayMode == .richChat {
|
||||||
if let client = acpClient {
|
if let client = acpClient {
|
||||||
sendViaACP(client: client, text: text)
|
sendViaACP(client: client, text: text, images: images)
|
||||||
} else {
|
} else {
|
||||||
// Auto-start ACP and send the queued message
|
// Auto-start ACP and send the queued message
|
||||||
autoStartACPAndSend(text: text)
|
autoStartACPAndSend(text: text, images: images)
|
||||||
}
|
}
|
||||||
} else if let tv = terminalView {
|
} else if let tv = terminalView {
|
||||||
|
if !images.isEmpty {
|
||||||
|
logger.warning("Terminal-mode chat dropped \(images.count) image attachment(s) — image input only works in ACP rich-chat mode")
|
||||||
|
acpError = "Image attachments require ACP mode (rich chat)."
|
||||||
|
}
|
||||||
sendToTerminal(tv, text: text + "\r")
|
sendToTerminal(tv, text: text + "\r")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -274,7 +292,7 @@ final class ChatViewModel {
|
|||||||
/// user never interacted with; those can be garbage-collected by Hermes
|
/// user never interacted with; those can be garbage-collected by Hermes
|
||||||
/// between the DB read and ACP `session/load`, producing a silent prompt
|
/// between the DB read and ACP `session/load`, producing a silent prompt
|
||||||
/// failure with no UI feedback.
|
/// failure with no UI feedback.
|
||||||
private func autoStartACPAndSend(text: String) {
|
private func autoStartACPAndSend(text: String, images: [ChatImageAttachment] = []) {
|
||||||
// Show the user message immediately
|
// Show the user message immediately
|
||||||
richChatViewModel.addUserMessage(text: text)
|
richChatViewModel.addUserMessage(text: text)
|
||||||
|
|
||||||
@@ -313,7 +331,7 @@ final class ChatViewModel {
|
|||||||
acpStatus = "Connected (\(resolvedSessionId.prefix(12)))"
|
acpStatus = "Connected (\(resolvedSessionId.prefix(12)))"
|
||||||
|
|
||||||
// Now send the queued prompt
|
// Now send the queued prompt
|
||||||
sendViaACP(client: client, text: text)
|
sendViaACP(client: client, text: text, images: images)
|
||||||
} catch {
|
} catch {
|
||||||
acpStatus = "Failed"
|
acpStatus = "Failed"
|
||||||
await recordACPFailure(error, client: client, context: "Auto-start ACP failed")
|
await recordACPFailure(error, client: client, context: "Auto-start ACP failed")
|
||||||
@@ -350,7 +368,7 @@ final class ChatViewModel {
|
|||||||
return ProjectSlashCommandService(context: context).expand(cmd, withArgument: argument)
|
return ProjectSlashCommandService(context: context).expand(cmd, withArgument: argument)
|
||||||
}
|
}
|
||||||
|
|
||||||
private func sendViaACP(client: ACPClient, text: String) {
|
private func sendViaACP(client: ACPClient, text: String, images: [ChatImageAttachment] = []) {
|
||||||
guard let sessionId = richChatViewModel.sessionId else {
|
guard let sessionId = richChatViewModel.sessionId else {
|
||||||
clearACPErrorState()
|
clearACPErrorState()
|
||||||
acpError = "No session ID — cannot send"
|
acpError = "No session ID — cannot send"
|
||||||
@@ -390,7 +408,7 @@ final class ChatViewModel {
|
|||||||
}
|
}
|
||||||
acpPromptTask = Task { @MainActor in
|
acpPromptTask = Task { @MainActor in
|
||||||
do {
|
do {
|
||||||
let result = try await client.sendPrompt(sessionId: sessionId, text: wireText)
|
let result = try await client.sendPrompt(sessionId: sessionId, text: wireText, images: images)
|
||||||
acpStatus = "Ready"
|
acpStatus = "Ready"
|
||||||
richChatViewModel.handleACPEvent(
|
richChatViewModel.handleACPEvent(
|
||||||
.promptComplete(sessionId: sessionId, response: result)
|
.promptComplete(sessionId: sessionId, response: result)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import ScarfDesign
|
|||||||
struct ChatTranscriptPane: View {
|
struct ChatTranscriptPane: View {
|
||||||
@Bindable var richChat: RichChatViewModel
|
@Bindable var richChat: RichChatViewModel
|
||||||
@Bindable var chatViewModel: ChatViewModel
|
@Bindable var chatViewModel: ChatViewModel
|
||||||
var onSend: (String) -> Void
|
var onSend: (String, [ChatImageAttachment]) -> Void
|
||||||
var isEnabled: Bool
|
var isEnabled: Bool
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
|
|||||||
@@ -396,7 +396,7 @@ struct ChatView: View {
|
|||||||
if viewModel.hermesBinaryExists {
|
if viewModel.hermesBinaryExists {
|
||||||
RichChatView(
|
RichChatView(
|
||||||
richChat: viewModel.richChatViewModel,
|
richChat: viewModel.richChatViewModel,
|
||||||
onSend: { viewModel.sendText($0) },
|
onSend: { text, images in viewModel.sendText(text, images: images) },
|
||||||
isEnabled: viewModel.hasActiveProcess || viewModel.hermesBinaryExists
|
isEnabled: viewModel.hasActiveProcess || viewModel.hermesBinaryExists
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1,20 +1,51 @@
|
|||||||
import SwiftUI
|
import SwiftUI
|
||||||
import ScarfCore
|
import ScarfCore
|
||||||
import ScarfDesign
|
import ScarfDesign
|
||||||
|
import UniformTypeIdentifiers
|
||||||
|
import os
|
||||||
|
#if canImport(AppKit)
|
||||||
|
import AppKit
|
||||||
|
#endif
|
||||||
|
|
||||||
struct RichChatInputBar: View {
|
struct RichChatInputBar: View {
|
||||||
let onSend: (String) -> Void
|
/// Send the user's text and any attached images. Empty `images`
|
||||||
|
/// preserves the v0.11 wire shape; non-empty images are forwarded
|
||||||
|
/// as ACP image content blocks (Hermes v0.12+; the composer hides
|
||||||
|
/// the attachment UI on older hosts).
|
||||||
|
let onSend: (String, [ChatImageAttachment]) -> Void
|
||||||
let isEnabled: Bool
|
let isEnabled: Bool
|
||||||
var commands: [HermesSlashCommand] = []
|
var commands: [HermesSlashCommand] = []
|
||||||
var showCompressButton: Bool = false
|
var showCompressButton: Bool = false
|
||||||
|
|
||||||
|
@Environment(\.hermesCapabilities) private var capabilitiesStore
|
||||||
|
|
||||||
@State private var text = ""
|
@State private var text = ""
|
||||||
@State private var showCompressSheet = false
|
@State private var showCompressSheet = false
|
||||||
@State private var compressFocus = ""
|
@State private var compressFocus = ""
|
||||||
@State private var showMenu = false
|
@State private var showMenu = false
|
||||||
@State private var selectedIndex = 0
|
@State private var selectedIndex = 0
|
||||||
|
@State private var attachments: [ChatImageAttachment] = []
|
||||||
|
/// True while ImageEncoder is decoding/encoding pasted/dropped bytes.
|
||||||
|
/// Renders a small spinner in the preview strip so the user knows
|
||||||
|
/// their drop landed.
|
||||||
|
@State private var isEncodingAttachment = false
|
||||||
|
/// User-visible failure (decode failed, format unsupported). Auto-clears.
|
||||||
|
@State private var attachmentError: String?
|
||||||
@FocusState private var isFocused: Bool
|
@FocusState private var isFocused: Bool
|
||||||
|
|
||||||
|
/// Hard cap matches what Hermes' vision aux model swallows comfortably
|
||||||
|
/// in one prompt. Going higher costs tokens without a quality gain.
|
||||||
|
private static let maxAttachments = 5
|
||||||
|
|
||||||
|
private static let logger = Logger(subsystem: "com.scarf", category: "ChatComposer")
|
||||||
|
|
||||||
|
/// `nil` until detection finishes — we hide the attachment UI in
|
||||||
|
/// that brief window (~50ms locally, longer over SSH) so we never
|
||||||
|
/// flash an attachment chip a v0.11 host couldn't honor.
|
||||||
|
private var supportsImagePrompts: Bool {
|
||||||
|
capabilitiesStore?.capabilities.hasACPImagePrompts ?? false
|
||||||
|
}
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
VStack(alignment: .leading, spacing: 0) {
|
VStack(alignment: .leading, spacing: 0) {
|
||||||
if showMenu {
|
if showMenu {
|
||||||
@@ -36,6 +67,10 @@ struct RichChatInputBar: View {
|
|||||||
.padding(.top, 8)
|
.padding(.top, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !attachments.isEmpty || isEncodingAttachment || attachmentError != nil {
|
||||||
|
attachmentStrip
|
||||||
|
}
|
||||||
|
|
||||||
HStack(alignment: .bottom, spacing: ScarfSpace.s2) {
|
HStack(alignment: .bottom, spacing: ScarfSpace.s2) {
|
||||||
if showCompressButton {
|
if showCompressButton {
|
||||||
Button {
|
Button {
|
||||||
@@ -52,6 +87,10 @@ struct RichChatInputBar: View {
|
|||||||
.help("Compress conversation (/compress)")
|
.help("Compress conversation (/compress)")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if supportsImagePrompts {
|
||||||
|
attachmentButton
|
||||||
|
}
|
||||||
|
|
||||||
TextEditor(text: $text)
|
TextEditor(text: $text)
|
||||||
.font(ScarfFont.body)
|
.font(ScarfFont.body)
|
||||||
.scrollContentBackground(.hidden)
|
.scrollContentBackground(.hidden)
|
||||||
@@ -70,7 +109,9 @@ struct RichChatInputBar: View {
|
|||||||
)
|
)
|
||||||
.overlay(alignment: .topLeading) {
|
.overlay(alignment: .topLeading) {
|
||||||
if text.isEmpty {
|
if text.isEmpty {
|
||||||
Text("Message Hermes… / for commands")
|
Text(supportsImagePrompts
|
||||||
|
? "Message Hermes… / for commands · drag images to attach"
|
||||||
|
: "Message Hermes… / for commands")
|
||||||
.scarfStyle(.body)
|
.scarfStyle(.body)
|
||||||
.foregroundStyle(ScarfColor.foregroundFaint)
|
.foregroundStyle(ScarfColor.foregroundFaint)
|
||||||
.padding(.horizontal, 14)
|
.padding(.horizontal, 14)
|
||||||
@@ -78,6 +119,25 @@ struct RichChatInputBar: View {
|
|||||||
.allowsHitTesting(false)
|
.allowsHitTesting(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Drag-drop image attachments. Receives both file URLs
|
||||||
|
// (from Finder) and raw image bitmap data (from
|
||||||
|
// screenshot tools that drop tiff/png directly).
|
||||||
|
// Capability-gated so v0.11 hosts don't surface a
|
||||||
|
// drop target that does nothing.
|
||||||
|
.onDrop(
|
||||||
|
of: supportsImagePrompts ? [.image, .fileURL] : [],
|
||||||
|
isTargeted: nil
|
||||||
|
) { providers in
|
||||||
|
guard supportsImagePrompts else { return false }
|
||||||
|
ingestProviders(providers)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Paste from screenshots / browser context menu.
|
||||||
|
// Accepting `Data` keeps us off `NSImage` which would
|
||||||
|
// require AppKit-typed paste. v0.12+ only.
|
||||||
|
.onPasteCommand(of: pasteAcceptedTypes) { providers in
|
||||||
|
ingestProviders(providers)
|
||||||
|
}
|
||||||
.onKeyPress(.upArrow, phases: .down) { _ in
|
.onKeyPress(.upArrow, phases: .down) { _ in
|
||||||
guard showMenu, !filteredCommands.isEmpty else { return .ignored }
|
guard showMenu, !filteredCommands.isEmpty else { return .ignored }
|
||||||
let n = filteredCommands.count
|
let n = filteredCommands.count
|
||||||
@@ -148,6 +208,96 @@ struct RichChatInputBar: View {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Horizontal preview strip for attached images. Each chip shows the
|
||||||
|
/// thumbnail (or a placeholder icon if we couldn't render one) plus
|
||||||
|
/// an X to remove the attachment.
|
||||||
|
@ViewBuilder
|
||||||
|
private var attachmentStrip: some View {
|
||||||
|
HStack(alignment: .center, spacing: ScarfSpace.s2) {
|
||||||
|
if isEncodingAttachment {
|
||||||
|
ProgressView()
|
||||||
|
.controlSize(.small)
|
||||||
|
Text("Encoding…")
|
||||||
|
.scarfStyle(.caption)
|
||||||
|
.foregroundStyle(ScarfColor.foregroundMuted)
|
||||||
|
}
|
||||||
|
ForEach(attachments) { attachment in
|
||||||
|
attachmentChip(attachment)
|
||||||
|
}
|
||||||
|
if let err = attachmentError {
|
||||||
|
Text(err)
|
||||||
|
.scarfStyle(.caption)
|
||||||
|
.foregroundStyle(ScarfColor.danger)
|
||||||
|
}
|
||||||
|
Spacer(minLength: 0)
|
||||||
|
if !attachments.isEmpty {
|
||||||
|
Text("\(attachments.count)/\(Self.maxAttachments)")
|
||||||
|
.scarfStyle(.caption)
|
||||||
|
.foregroundStyle(ScarfColor.foregroundFaint)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(.horizontal, ScarfSpace.s3)
|
||||||
|
.padding(.top, ScarfSpace.s2)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private func attachmentChip(_ attachment: ChatImageAttachment) -> some View {
|
||||||
|
let thumb = chipThumbnail(for: attachment)
|
||||||
|
HStack(spacing: 4) {
|
||||||
|
thumb
|
||||||
|
.frame(width: 32, height: 32)
|
||||||
|
.clipShape(RoundedRectangle(cornerRadius: 4))
|
||||||
|
Button {
|
||||||
|
attachments.removeAll { $0.id == attachment.id }
|
||||||
|
} label: {
|
||||||
|
Image(systemName: "xmark.circle.fill")
|
||||||
|
.font(.system(size: 14))
|
||||||
|
.foregroundStyle(ScarfColor.foregroundMuted)
|
||||||
|
}
|
||||||
|
.buttonStyle(.plain)
|
||||||
|
.help(attachment.filename ?? "Image attachment")
|
||||||
|
}
|
||||||
|
.padding(.horizontal, 6)
|
||||||
|
.padding(.vertical, 4)
|
||||||
|
.background(
|
||||||
|
RoundedRectangle(cornerRadius: ScarfRadius.md)
|
||||||
|
.fill(ScarfColor.backgroundTertiary)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render the inline thumbnail for a chip. Falls back to a generic
|
||||||
|
/// photo icon when the encoder didn't produce a thumbnail (e.g. the
|
||||||
|
/// image was already small enough to skip the resize step).
|
||||||
|
@ViewBuilder
|
||||||
|
private func chipThumbnail(for attachment: ChatImageAttachment) -> some View {
|
||||||
|
if let thumb = attachment.thumbnailBase64,
|
||||||
|
let data = Data(base64Encoded: thumb),
|
||||||
|
let image = NSImage(data: data) {
|
||||||
|
Image(nsImage: image)
|
||||||
|
.resizable()
|
||||||
|
.aspectRatio(contentMode: .fill)
|
||||||
|
} else {
|
||||||
|
Image(systemName: "photo")
|
||||||
|
.foregroundStyle(ScarfColor.foregroundMuted)
|
||||||
|
.frame(maxWidth: .infinity, maxHeight: .infinity)
|
||||||
|
.background(ScarfColor.backgroundSecondary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private var attachmentButton: some View {
|
||||||
|
Button {
|
||||||
|
presentImagePicker()
|
||||||
|
} label: {
|
||||||
|
Image(systemName: "paperclip")
|
||||||
|
.font(.system(size: 16))
|
||||||
|
.foregroundStyle(ScarfColor.foregroundMuted)
|
||||||
|
.padding(6)
|
||||||
|
}
|
||||||
|
.buttonStyle(.plain)
|
||||||
|
.disabled(!isEnabled || attachments.count >= Self.maxAttachments)
|
||||||
|
.help("Attach image (\(attachments.count)/\(Self.maxAttachments))")
|
||||||
|
}
|
||||||
|
|
||||||
private var compressSheet: some View {
|
private var compressSheet: some View {
|
||||||
VStack(alignment: .leading, spacing: ScarfSpace.s3) {
|
VStack(alignment: .leading, spacing: ScarfSpace.s3) {
|
||||||
Text("Compress Conversation")
|
Text("Compress Conversation")
|
||||||
@@ -164,7 +314,7 @@ struct RichChatInputBar: View {
|
|||||||
Button("Compress") {
|
Button("Compress") {
|
||||||
let focus = compressFocus.trimmingCharacters(in: .whitespacesAndNewlines)
|
let focus = compressFocus.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
let command = focus.isEmpty ? "/compress" : "/compress \(focus)"
|
let command = focus.isEmpty ? "/compress" : "/compress \(focus)"
|
||||||
onSend(command)
|
onSend(command, [])
|
||||||
showCompressSheet = false
|
showCompressSheet = false
|
||||||
}
|
}
|
||||||
.buttonStyle(ScarfPrimaryButton())
|
.buttonStyle(ScarfPrimaryButton())
|
||||||
@@ -176,7 +326,18 @@ struct RichChatInputBar: View {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private var canSend: Bool {
|
private var canSend: Bool {
|
||||||
isEnabled && !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
|
guard isEnabled else { return false }
|
||||||
|
// Allow sending image-only messages once at least one attachment
|
||||||
|
// exists — vision models accept "describe this" with no text.
|
||||||
|
if !attachments.isEmpty { return true }
|
||||||
|
return !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
|
||||||
|
}
|
||||||
|
|
||||||
|
/// MIME types accepted for paste. Restricting to image-bearing
|
||||||
|
/// providers stops macOS from offering a paste menu when the user
|
||||||
|
/// has plain text on the clipboard.
|
||||||
|
private var pasteAcceptedTypes: [UTType] {
|
||||||
|
supportsImagePrompts ? [.image, .png, .jpeg, .tiff, .heic] : []
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Show the slash menu only while the user is typing the command token:
|
/// Show the slash menu only while the user is typing the command token:
|
||||||
@@ -224,12 +385,116 @@ struct RichChatInputBar: View {
|
|||||||
|
|
||||||
private func send() {
|
private func send() {
|
||||||
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
guard !trimmed.isEmpty, isEnabled else { return }
|
guard canSend else { return }
|
||||||
onSend(trimmed)
|
onSend(trimmed, attachments)
|
||||||
text = ""
|
text = ""
|
||||||
|
attachments.removeAll()
|
||||||
showMenu = false
|
showMenu = false
|
||||||
selectedIndex = 0
|
selectedIndex = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MARK: - Attachment ingestion
|
||||||
|
|
||||||
|
/// Pull image bytes out of a set of `NSItemProvider`s (drag/drop or
|
||||||
|
/// paste). Each provider may carry a file URL OR raw image data —
|
||||||
|
/// we try both. Caps at `maxAttachments`; surplus drops are
|
||||||
|
/// dropped silently with a status message.
|
||||||
|
private func ingestProviders(_ providers: [NSItemProvider]) {
|
||||||
|
let remainingSlots = Self.maxAttachments - attachments.count
|
||||||
|
guard remainingSlots > 0 else {
|
||||||
|
attachmentError = "Limit of \(Self.maxAttachments) images reached"
|
||||||
|
scheduleAttachmentErrorClear()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
let toIngest = providers.prefix(remainingSlots)
|
||||||
|
for provider in toIngest {
|
||||||
|
ingestProvider(provider)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func ingestProvider(_ provider: NSItemProvider) {
|
||||||
|
// Prefer file URL when available — gives us the original filename
|
||||||
|
// for the attachment chip's tooltip.
|
||||||
|
if provider.hasItemConformingToTypeIdentifier(UTType.fileURL.identifier) {
|
||||||
|
isEncodingAttachment = true
|
||||||
|
provider.loadObject(ofClass: URL.self) { url, _ in
|
||||||
|
guard let url, let data = try? Data(contentsOf: url) else {
|
||||||
|
Task { @MainActor in
|
||||||
|
isEncodingAttachment = false
|
||||||
|
attachmentError = "Couldn't read dropped file"
|
||||||
|
scheduleAttachmentErrorClear()
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
encode(data: data, filename: url.lastPathComponent)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for typeId in [UTType.image.identifier, UTType.png.identifier, UTType.jpeg.identifier, UTType.tiff.identifier, UTType.heic.identifier] {
|
||||||
|
if provider.hasItemConformingToTypeIdentifier(typeId) {
|
||||||
|
isEncodingAttachment = true
|
||||||
|
provider.loadDataRepresentation(forTypeIdentifier: typeId) { data, _ in
|
||||||
|
guard let data else {
|
||||||
|
Task { @MainActor in
|
||||||
|
isEncodingAttachment = false
|
||||||
|
attachmentError = "Couldn't decode pasted image"
|
||||||
|
scheduleAttachmentErrorClear()
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
encode(data: data, filename: nil)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func encode(data: Data, filename: String?) {
|
||||||
|
Task.detached(priority: .userInitiated) {
|
||||||
|
do {
|
||||||
|
let attachment = try ImageEncoder().encode(rawBytes: data, sourceFilename: filename)
|
||||||
|
await MainActor.run {
|
||||||
|
isEncodingAttachment = false
|
||||||
|
attachments.append(attachment)
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
await MainActor.run {
|
||||||
|
isEncodingAttachment = false
|
||||||
|
attachmentError = (error as? LocalizedError)?.errorDescription ?? "Couldn't encode image"
|
||||||
|
Self.logger.warning("ImageEncoder failed: \(error.localizedDescription, privacy: .public)")
|
||||||
|
scheduleAttachmentErrorClear()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func scheduleAttachmentErrorClear() {
|
||||||
|
Task { @MainActor in
|
||||||
|
try? await Task.sleep(nanoseconds: 4_000_000_000)
|
||||||
|
attachmentError = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func presentImagePicker() {
|
||||||
|
#if canImport(AppKit)
|
||||||
|
let panel = NSOpenPanel()
|
||||||
|
panel.allowsMultipleSelection = true
|
||||||
|
panel.canChooseDirectories = false
|
||||||
|
panel.canChooseFiles = true
|
||||||
|
panel.allowedContentTypes = [.image, .png, .jpeg, .tiff, .heic]
|
||||||
|
panel.message = "Choose images to attach"
|
||||||
|
panel.prompt = "Attach"
|
||||||
|
let response = panel.runModal()
|
||||||
|
guard response == .OK else { return }
|
||||||
|
let urls = panel.urls
|
||||||
|
let remainingSlots = Self.maxAttachments - attachments.count
|
||||||
|
for url in urls.prefix(remainingSlots) {
|
||||||
|
guard let data = try? Data(contentsOf: url) else { continue }
|
||||||
|
isEncodingAttachment = true
|
||||||
|
encode(data: data, filename: url.lastPathComponent)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private extension Array {
|
private extension Array {
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import ScarfDesign
|
|||||||
/// can scroll horizontally inside the panes rather than losing them.
|
/// can scroll horizontally inside the panes rather than losing them.
|
||||||
struct RichChatView: View {
|
struct RichChatView: View {
|
||||||
@Bindable var richChat: RichChatViewModel
|
@Bindable var richChat: RichChatViewModel
|
||||||
var onSend: (String) -> Void
|
var onSend: (String, [ChatImageAttachment]) -> Void
|
||||||
var isEnabled: Bool
|
var isEnabled: Bool
|
||||||
@Environment(HermesFileWatcher.self) private var fileWatcher
|
@Environment(HermesFileWatcher.self) private var fileWatcher
|
||||||
@Environment(ChatViewModel.self) private var chatViewModel
|
@Environment(ChatViewModel.self) private var chatViewModel
|
||||||
|
|||||||
Reference in New Issue
Block a user