feat(tools): add structured metadata to tool results

This commit is contained in:
Peter Steinberger 2025-11-15 03:29:04 +00:00
parent 1ed44a9939
commit dc6123adfa
49 changed files with 1491 additions and 383 deletions

View File

@ -69,7 +69,7 @@
"generated_by": "poltergeist",
"project_type": "mixed",
"performance_profile": "balanced",
"generated_at": "2025-11-14T13:16:14.877Z",
"generated_at": "2025-11-15T02:45:36.462Z",
"total_exclusions": 53
}
}

View File

@ -172,10 +172,10 @@ var verbose: Bool { self.runtime?.configuration.verbose ?? self.runtimeOptions.v
private final class EscapeKeyMonitor {
private var source: (any DispatchSourceRead)?
private var originalTermios = termios()
private let handler: @Sendable () -> Void
private let handler: @Sendable () async -> Void
private let queue = DispatchQueue(label: "peekaboo.escape.monitor")
init(handler: @escaping @Sendable () -> Void) {
init(handler: @escaping @Sendable () async -> Void) {
self.handler = handler
}
@ -195,8 +195,8 @@ private final class EscapeKeyMonitor {
let count = read(STDIN_FILENO, &buffer, buffer.count)
guard count > 0 else { return }
if buffer[..<count].contains(0x1B) {
Task { @MainActor in
self.handler()
Task {
await self.handler()
}
}
}
@ -983,7 +983,7 @@ extension AgentCommand {
let cancelMonitor = EscapeKeyMonitor { [runTask] in
if !runTask.isCancelled {
runTask.cancel()
Task { @MainActor in
await MainActor.run {
print("\n\(TerminalColor.yellow)Esc pressed cancelling current run...\(TerminalColor.reset)")
}
}

View File

@ -134,6 +134,7 @@ extension AgentOutputDelegate {
}
let (formatter, toolType) = self.toolFormatter(for: name)
let summary = ToolEventSummary.from(resultJSON: json)
if let toolType, [ToolType.taskCompleted, .needMoreInformation, .needInfo].contains(toolType) {
self.handleCommunicationToolComplete(name: name, toolType: toolType)
@ -143,7 +144,11 @@ extension AgentOutputDelegate {
let success = (json["success"] as? Bool) ?? true
if success {
let resultSummary = self.resultSummary(for: name, json: json, formatter: formatter)
let resultSummary = self.resultSummary(
for: name,
json: json,
formatter: formatter,
summary: summary)
self.handleSuccess(
resultSummary: resultSummary,
durationString: durationString,
@ -326,18 +331,8 @@ extension AgentOutputDelegate {
}
private func successStatusLine(resultSummary: String, durationString: String) -> String {
let statusPrefix = [
" ",
TerminalColor.bgGreen,
TerminalColor.bold,
" ",
AgentDisplayTokens.Status.success,
" ",
TerminalColor.reset
].joined()
guard !resultSummary.isEmpty else {
return "\(statusPrefix)\(durationString)"
if resultSummary.isEmpty {
return " \(durationString)"
}
let summarySegment = [
@ -347,7 +342,7 @@ extension AgentOutputDelegate {
TerminalColor.reset
].joined()
return "\(statusPrefix)\(summarySegment)\(durationString)"
return "\(summarySegment)\(durationString)"
}
private func failureStatusLine(message: String, durationString: String) -> String {
@ -428,11 +423,20 @@ extension AgentOutputDelegate {
return (UnknownToolFormatter(toolName: name), nil)
}
private func resultSummary(for name: String, json: [String: Any], formatter: any ToolFormatter) -> String {
var summary = formatter.formatResultSummary(result: json)
private func resultSummary(
for name: String,
json: [String: Any],
formatter: any ToolFormatter,
summary: ToolEventSummary?
) -> String {
if let summaryText = summary?.shortDescription(toolName: name) {
return summaryText
}
var fallback = formatter.formatResultSummary(result: json)
guard name == "app" else {
return summary
return self.cleanToolPrefix(fallback)
}
if let meta = json["meta"] as? [String: Any],
@ -442,21 +446,21 @@ extension AgentOutputDelegate {
let text = firstContent["text"] as? String {
switch text {
case let value where value.contains("Launched"):
summary = "\(appName) launched"
fallback = "\(appName) launched"
case let value where value.contains("Quit"):
summary = "\(appName) quit"
fallback = "\(appName) quit"
case let value where value.contains("Focused") || value.contains("Switched"):
summary = "\(appName) focused"
fallback = "\(appName) focused"
case let value where value.contains("Hidden"):
summary = "\(appName) hidden"
fallback = "\(appName) hidden"
case let value where value.contains("Unhidden"):
summary = "\(appName) shown"
fallback = "\(appName) shown"
default:
break
}
}
return summary
return self.cleanToolPrefix(fallback)
}
private func handleSuccess(
@ -467,16 +471,11 @@ extension AgentOutputDelegate {
) {
switch self.outputMode {
case .minimal:
if !resultSummary.isEmpty {
print(" OK \(resultSummary)\(durationString)")
} else {
print(" OK\(durationString)")
}
let prefix = resultSummary.isEmpty ? "" : " \(resultSummary)"
print("\(prefix)\(durationString)")
case .verbose:
print(
" \(TerminalColor.green)\(AgentDisplayTokens.Status.success)\(TerminalColor.reset)\(durationString)"
)
print(" \(durationString)")
if let formatted = formatJSON(result) {
print("\(TerminalColor.gray)Result:\(TerminalColor.reset)")
print(formatted)
@ -530,8 +529,9 @@ extension AgentOutputDelegate {
guard self.outputMode != .minimal && self.outputMode != .quiet else { return }
guard let detail = self.primaryResultMessage(from: json) else { return }
let snippet = detail.trimmingCharacters(in: .whitespacesAndNewlines)
guard !snippet.isEmpty else { return }
print("\n \(TerminalColor.gray)\(snippet.prefix(240))\(TerminalColor.reset)")
let sanitized = self.cleanToolPrefix(snippet)
guard !sanitized.isEmpty else { return }
print("\n \(TerminalColor.gray)\(sanitized.prefix(240))\(TerminalColor.reset)")
}
private func primaryResultMessage(from json: [String: Any]) -> String? {

View File

@ -1,10 +1,10 @@
// This file is auto-generated by the build script. Do not edit manually.
enum Version {
static let current = "Peekaboo 3.0.0"
static let gitCommit = "7c99d16e-dirty"
static let gitCommitDate = "2025-11-14 14:34:54 +0000"
static let gitCommit = "b4c088fe-dirty"
static let gitCommitDate = "2025-11-14 19:52:55 +0000"
static let gitBranch = "main"
static let buildDate = "2025-11-14T14:40:52+00:00"
static let buildDate = "2025-11-14T20:57:47+00:00"
static var fullVersion: String {
return "\(current) (\(gitBranch)/\(gitCommit), built: \(buildDate))"

View File

@ -30,14 +30,14 @@ class ToolFormatterBridge {
// Format completed tool call
let resultDict = self.parseArguments(result)
let success = (resultDict["success"] as? Bool) ?? true
let summaryText = ToolEventSummary.from(resultJSON: resultDict)?
.shortDescription(toolName: name) ?? formatter.formatResultSummary(result: resultDict)
if success {
let summary = formatter.formatResultSummary(result: resultDict)
if !summary.isEmpty {
return "\(AgentDisplayTokens.Status.success) \(toolType.displayName): \(summary)"
} else {
return "\(AgentDisplayTokens.Status.success) \(toolType.displayName) completed"
if !summaryText.isEmpty {
return "\(AgentDisplayTokens.Status.success) \(toolType.displayName): \(summaryText)"
}
return "\(AgentDisplayTokens.Status.success) \(toolType.displayName) completed"
} else {
let error = (resultDict["error"] as? String) ?? "Failed"
return "\(AgentDisplayTokens.Status.failure) \(toolType.displayName): \(error)"
@ -79,6 +79,11 @@ class ToolFormatterBridge {
let formatter = ToolFormatterRegistry.shared.formatter(for: toolType)
let resultDict = self.parseArguments(result)
if let summary = ToolEventSummary.from(resultJSON: resultDict)?.shortDescription(toolName: name),
!summary.isEmpty
{
return summary
}
let summary = formatter.formatResultSummary(result: resultDict)
if !summary.isEmpty {
@ -135,8 +140,12 @@ class ToolFormatterBridge {
if let result {
let resultDict = self.parseArguments(result)
let success = (resultDict["success"] as? Bool) ?? true
let summaryText = ToolEventSummary.from(resultJSON: resultDict)?.shortDescription(toolName: name)
if success {
if let summaryText, !summaryText.isEmpty {
return "\(AgentDisplayTokens.Status.success) \(displayName): \(summaryText)"
}
return "\(AgentDisplayTokens.Status.success) \(displayName) completed"
} else {
let error = (resultDict["error"] as? String) ?? "Failed"

View File

@ -4,6 +4,7 @@
//
import Foundation
import PeekabooCore
/// Registry that manages all tool formatters for the Mac app
@MainActor
@ -70,6 +71,12 @@ final class MacToolFormatterRegistry {
return nil
}
if let summary = ToolEventSummary.from(resultJSON: json)?.shortDescription(toolName: toolName),
!summary.isEmpty
{
return summary
}
// Try to get formatter
if let formatter = formatter(for: toolName) {
return formatter.formatResult(toolName: toolName, result: json)

View File

@ -320,7 +320,10 @@ struct AnimationToggleRow: View {
@MainActor
private func previewTyping() async {
let sampleKeys = ["H", "e", "l", "l", "o"]
_ = await self.visualizerCoordinator.showTypingFeedback(keys: sampleKeys, duration: 2.0)
_ = await self.visualizerCoordinator.showTypingFeedback(
keys: sampleKeys,
duration: 2.0,
cadence: .human(wordsPerMinute: 60))
}
@MainActor

View File

@ -70,8 +70,10 @@ struct NewSessionButton: View {
Label("New Session", systemImage: "plus")
.font(.subheadline.weight(.semibold))
.frame(maxWidth: .infinity)
.foregroundStyle(.white.opacity(0.92))
.menuActionCapsule(fillOpacity: 0.16)
})
.buttonStyle(MenuActionButtonStyle())
.buttonStyle(.modern)
}
}
@ -84,8 +86,10 @@ struct ExpandButton: View {
Label("Expand", systemImage: "arrow.up.left.and.arrow.down.right")
.font(.subheadline.weight(.semibold))
.frame(maxWidth: .infinity)
.foregroundStyle(.white.opacity(0.92))
.menuActionCapsule(fillOpacity: 0.16)
})
.buttonStyle(MenuActionButtonStyle())
.buttonStyle(.modern)
}
}
@ -99,41 +103,24 @@ struct QuickActionsView: View {
Button(action: self.onOpenMainWindow, label: {
Label("Open Main Window", systemImage: "rectangle.stack")
.frame(maxWidth: .infinity)
.foregroundStyle(.white.opacity(0.92))
.menuActionCapsule(fillOpacity: 0.16)
})
.buttonStyle(MenuActionButtonStyle())
.buttonStyle(.modern)
Button(action: self.onCreateNewSession, label: {
Label("New Session", systemImage: "plus.circle")
.frame(maxWidth: .infinity)
.foregroundStyle(.white.opacity(0.92))
.menuActionCapsule(fillOpacity: 0.16)
})
.buttonStyle(MenuActionButtonStyle())
.buttonStyle(.modern)
}
}
}
// MARK: - Shared Styling
struct MenuActionButtonStyle: ButtonStyle {
typealias Body = AnyView
func makeBody(configuration: Configuration) -> AnyView {
AnyView(
configuration.label
.foregroundStyle(.white.opacity(0.92))
.padding(.vertical, 10)
.padding(.horizontal, 12)
.background(
RoundedRectangle(cornerRadius: 16, style: .continuous)
.fill(Color.white.opacity(configuration.isPressed ? 0.24 : 0.16))
.overlay(
RoundedRectangle(cornerRadius: 16, style: .continuous)
.stroke(Color.white.opacity(0.2))))
.shadow(color: Color.black.opacity(configuration.isPressed ? 0.1 : 0.18), radius: 12, y: 8)
.scaleEffect(configuration.isPressed ? 0.98 : 1)
.animation(.easeOut(duration: 0.12), value: configuration.isPressed))
}
}
extension View {
fileprivate func menuActionCapsule(fillOpacity: Double) -> some View {
self
@ -145,5 +132,6 @@ extension View {
.overlay(
RoundedRectangle(cornerRadius: 16, style: .continuous)
.stroke(Color.white.opacity(0.15))))
.shadow(color: Color.black.opacity(0.18), radius: 12, y: 8)
}
}

View File

@ -191,7 +191,10 @@ struct VisualizerTestView: View {
func testTypeAnimation() async {
let keys = ["H", "e", "l", "l", "o", "Space", "W", "o", "r", "l", "d"]
_ = await self.coordinator.showTypingFeedback(keys: keys, duration: 3.0)
_ = await self.coordinator.showTypingFeedback(
keys: keys,
duration: 3.0,
cadence: .human(wordsPerMinute: 55))
}
func testScrollAnimation() async {

View File

@ -5,8 +5,10 @@
@preconcurrency import Foundation
import os
import PeekabooAutomation
import PeekabooCore
import PeekabooFoundation
import PeekabooProtocols
#if VISUALIZER_VERBOSE_LOGS
@inline(__always)
@ -129,7 +131,7 @@ final class VisualizerEventReceiver {
case let .annotatedScreenshot(imageData, elements, windowBounds, duration):
await self.coordinator.showAnnotatedScreenshot(
imageData: imageData,
elements: elements,
elements: self.convertDetectedElements(elements),
windowBounds: windowBounds,
duration: duration)
}
@ -142,4 +144,18 @@ final class VisualizerEventReceiver {
private static func parseEventID(from descriptor: String) -> UUID? {
descriptor.split(separator: "|", maxSplits: 1).first.flatMap { UUID(uuidString: String($0)) }
}
private func convertDetectedElements(
_ elements: [PeekabooProtocols.DetectedElement]) -> [PeekabooAutomation.DetectedElement]
{
elements.map { element in
PeekabooAutomation.DetectedElement(
id: element.id,
type: element.type,
label: element.label,
value: element.value,
bounds: element.bounds,
isEnabled: element.isEnabled)
}
}
}

View File

@ -301,7 +301,7 @@ extension PeekabooAgentService {
let toolResult = AgentToolResult.success(toolCallId: toolCall.id, result: result)
await self.sendToolCompletionEvent(
name: toolCall.name,
payload: self.toolResultPayload(from: result),
payload: self.toolResultPayload(from: result, toolName: toolCall.name),
eventHandler: context.eventHandler)
currentMessages.append(ModelMessage(role: .tool, content: [.toolResult(toolResult)]))
return toolResult
@ -337,10 +337,20 @@ extension PeekabooAgentService {
await eventHandler.send(.toolCallCompleted(name: name, result: payload))
}
private func toolResultPayload(from result: AnyAgentToolValue) -> String {
private func toolResultPayload(from result: AnyAgentToolValue, toolName: String) -> String {
do {
let jsonObject = try result.toJSON()
let wrapped: Any = jsonObject is [String: Any] ? jsonObject : ["result": jsonObject]
var wrapped: [String: Any]
if let dict = jsonObject as? [String: Any] {
wrapped = dict
} else {
wrapped = ["result": jsonObject]
}
if let summaryText = self.summaryText(from: wrapped, toolName: toolName) {
wrapped["summary_text"] = summaryText
}
let data = try JSONSerialization.data(withJSONObject: wrapped, options: [])
return String(data: data, encoding: .utf8) ?? "{}"
} catch {
@ -350,6 +360,17 @@ extension PeekabooAgentService {
}
}
private func summaryText(from payload: [String: Any], toolName: String) -> String? {
guard
let meta = payload["meta"] as? [String: Any],
let summaryJSON = meta["summary"] as? [String: Any],
let summary = ToolEventSummary(json: summaryJSON)
else {
return nil
}
return summary.shortDescription(toolName: toolName)
}
private func toolErrorPayload(from error: any Error) -> String {
let errorDict = ["error": error.localizedDescription]
guard let data = try? JSONSerialization.data(withJSONObject: errorDict, options: []),

View File

@ -111,11 +111,23 @@ public struct AnalyzeTool: MCPTool {
"in \(String(format: "%.2f", duration))s.",
].joined(separator: " ")
let baseMeta: [String: Value] = [
"image_path": .string(imagePath),
"question": .string(question),
"provider": providerType != nil ? .string(providerType!) : .null,
"model": .string(modelName),
"execution_time": .double(duration),
]
let summary = ToolEventSummary(
actionDescription: "Image Analyze",
notes: question)
return ToolResponse(
content: [
.text(analysisText),
.text(timingMessage),
])
],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
} catch {
self.logger.error("Analysis failed: \(error)")

View File

@ -305,24 +305,26 @@ private struct AppToolActions {
let countLine = "\(AgentDisplayTokens.Status.info) Found \(apps.count) running applications "
+ "in \(self.executionTimeString(from: executionTime))"
let baseMeta: [String: Value] = [
"apps": .array(
apps.map { app in
.object([
"name": .string(app.name),
"bundle_id": app.bundleIdentifier != nil ? .string(app.bundleIdentifier!) : .null,
"process_id": .double(Double(app.processIdentifier)),
"is_active": .bool(app.isActive),
"is_hidden": .bool(app.isHidden),
])
}),
"execution_time": .double(executionTime),
]
let summaryMeta = self.makeSummary(for: nil, action: "List Applications", notes: "Found \(apps.count) apps")
return ToolResponse(
content: [
.text(summary),
.text(countLine),
],
meta: .object([
"apps": .array(
apps.map { app in
.object([
"name": .string(app.name),
"bundle_id": app.bundleIdentifier != nil ? .string(app.bundleIdentifier!) : .null,
"process_id": .double(Double(app.processIdentifier)),
"is_active": .bool(app.isActive),
"is_hidden": .bool(app.isHidden),
])
}),
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summaryMeta, into: .object(baseMeta)))
}
// MARK: Helpers
@ -371,15 +373,17 @@ private struct AppToolActions {
message += warningLine
}
let baseMeta: [String: Value] = [
"quit_count": .double(Double(quitCount)),
"failed": .array(failed.map(Value.string)),
"except": .array(excluded.map(Value.string)),
"execution_time": .double(executionTime),
"force": .bool(request.force),
]
let summary = self.makeSummary(for: nil, action: "Quit Applications", notes: "Quit \(quitCount) apps")
return ToolResponse(
content: [.text(message)],
meta: .object([
"quit_count": .double(Double(quitCount)),
"failed": .array(failed.map(Value.string)),
"except": .array(excluded.map(Value.string)),
"execution_time": .double(executionTime),
"force": .bool(request.force),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func buildResponse(
@ -396,24 +400,29 @@ private struct AppToolActions {
]
meta.merge(extraMeta) { $1 }
let summary = self.makeSummary(for: app, action: self.actionDescription(from: message), notes: nil)
return ToolResponse(
content: [.text(message)],
meta: .object(meta))
meta: ToolEventSummary.merge(summary: summary, into: .object(meta)))
}
private func focusResponse(app: ServiceApplicationInfo, startTime: Date, verb: String) -> ToolResponse {
let statusLine = "\(AgentDisplayTokens.Status.success) \(verb) \(app.name) (PID: \(app.processIdentifier))"
let baseMeta: [String: Value] = [
"app_name": .string(app.name),
"process_id": .double(Double(app.processIdentifier)),
"execution_time": .double(self.executionTime(since: startTime)),
]
let summary = self.makeSummary(for: app, action: verb, notes: nil)
return ToolResponse(
content: [.text(statusLine)],
meta: .object([
"app_name": .string(app.name),
"process_id": .double(Double(app.processIdentifier)),
"execution_time": .double(self.executionTime(since: startTime)),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func executionMeta(from startTime: Date) -> Value {
.object(["execution_time": .double(self.executionTime(since: startTime))])
let baseMeta: Value = .object(["execution_time": .double(self.executionTime(since: startTime))])
let summary = self.makeSummary(for: nil, action: "Switch Applications", notes: nil)
return ToolEventSummary.merge(summary: summary, into: baseMeta)
}
private func executionTime(since startTime: Date) -> Double {
@ -428,6 +437,22 @@ private struct AppToolActions {
"\(String(format: "%.2f", interval))s"
}
private func makeSummary(for app: ServiceApplicationInfo?, action: String, notes: String?) -> ToolEventSummary {
var summary = ToolEventSummary(
targetApp: app?.name,
actionDescription: action,
notes: notes)
summary.elementValue = app?.bundleIdentifier
return summary
}
private func actionDescription(from message: String) -> String {
guard let token = message.split(separator: " ").dropFirst().first else {
return "App"
}
return String(token)
}
private func identifier(for app: ServiceApplicationInfo) -> String {
if let bundleId = app.bundleIdentifier, !bundleId.isEmpty {
return bundleId

View File

@ -112,13 +112,21 @@ public struct ClickTool: MCPTool {
let element = try await self.requireElement(id: identifier, session: session)
return ClickResolution(
location: element.centerPoint,
elementDescription: element.humanDescription)
elementDescription: element.humanDescription,
targetApp: session.applicationName,
windowTitle: session.windowTitle,
elementRole: element.humanRole,
elementLabel: element.displayLabel)
case let .query(text):
let session = try await self.requireSession(id: request.sessionId)
let element = try await self.findElement(matching: text, session: session)
return ClickResolution(
location: element.centerPoint,
elementDescription: element.humanDescription)
elementDescription: element.humanDescription,
targetApp: session.applicationName,
windowTitle: session.windowTitle,
elementRole: element.humanRole,
elementLabel: element.displayLabel)
}
}
@ -150,9 +158,22 @@ public struct ClickTool: MCPTool {
"clicked_element": resolution.elementDescription.map(Value.string) ?? .null,
]
let summary = ToolEventSummary(
targetApp: resolution.targetApp,
windowTitle: resolution.windowTitle,
elementRole: resolution.elementRole,
elementLabel: resolution.elementLabel,
actionDescription: intent.displayVerb,
coordinates: ToolEventSummary.Coordinates(
x: Double(resolution.location.x),
y: Double(resolution.location.y))
)
let metaValue = ToolEventSummary.merge(summary: summary, into: .object(metaDict))
return ToolResponse(
content: [.text(message)],
meta: .object(metaDict))
meta: metaValue)
}
private func parseCoordinates(_ raw: String) throws -> CGPoint {
@ -232,6 +253,26 @@ private enum ClickRequestTarget {
private struct ClickResolution {
let location: CGPoint
let elementDescription: String?
let targetApp: String?
let windowTitle: String?
let elementRole: String?
let elementLabel: String?
init(
location: CGPoint,
elementDescription: String?,
targetApp: String? = nil,
windowTitle: String? = nil,
elementRole: String? = nil,
elementLabel: String? = nil)
{
self.location = location
self.elementDescription = elementDescription
self.targetApp = targetApp
self.windowTitle = windowTitle
self.elementRole = elementRole
self.elementLabel = elementLabel
}
}
private struct ClickIntent {
@ -265,4 +306,12 @@ extension UIElement {
fileprivate var humanDescription: String {
"\(self.role): \(self.title ?? self.label ?? "untitled")"
}
fileprivate var humanRole: String? {
self.roleDescription ?? self.role
}
fileprivate var displayLabel: String? {
self.title ?? self.label ?? self.value
}
}

View File

@ -163,6 +163,10 @@ public struct DialogTool: MCPTool {
let summary =
"\(AgentDisplayTokens.Status.success) Clicked button '\(button)' in " +
"\(Self.formattedDuration(executionTime))s"
let summaryMeta = ToolEventSummary(
targetApp: window,
actionDescription: "Dialog Button",
notes: button)
return self.successResponse(
message: summary,
meta: [
@ -171,7 +175,8 @@ public struct DialogTool: MCPTool {
"success": .bool(result.success),
"execution_time": .double(executionTime),
"details": .object(result.details.mapValues { .string($0) }),
])
],
summary: summaryMeta)
} else {
return ToolResponse
.error("Failed to click button '\(button)': \(result.details["error"] ?? "Unknown error")")
@ -196,6 +201,10 @@ public struct DialogTool: MCPTool {
let message =
"\(AgentDisplayTokens.Status.success) Entered text '\(request.text)' into \(fieldDesc)\(clearSuffix) " +
"in \(Self.formattedDuration(executionTime))s"
let summaryMeta = ToolEventSummary(
targetApp: request.window,
actionDescription: "Dialog Input",
notes: fieldDesc)
return self.successResponse(
message: message,
meta: [
@ -206,7 +215,8 @@ public struct DialogTool: MCPTool {
"success": .bool(result.success),
"execution_time": .double(executionTime),
"details": .object(result.details.mapValues { .string($0) }),
])
],
summary: summaryMeta)
} else {
return ToolResponse.error("Failed to enter text: \(result.details["error"] ?? "Unknown error")")
}
@ -227,6 +237,9 @@ public struct DialogTool: MCPTool {
let summary =
"\(AgentDisplayTokens.Status.success) Selected file '\(selection.path)' " +
"in \(Self.formattedDuration(executionTime))s"
let summaryMeta = ToolEventSummary(
actionDescription: "Dialog File",
notes: selection.filename)
return self.successResponse(
message: summary,
meta: [
@ -237,7 +250,8 @@ public struct DialogTool: MCPTool {
"success": .bool(result.success),
"execution_time": .double(executionTime),
"details": .object(result.details.mapValues { .string($0) }),
])
],
summary: summaryMeta)
} else {
return ToolResponse.error("Failed to select file: \(result.details["error"] ?? "Unknown error")")
}
@ -256,6 +270,10 @@ public struct DialogTool: MCPTool {
let summary =
"\(AgentDisplayTokens.Status.success) Dismissed dialog using \(method) in " +
"\(Self.formattedDuration(executionTime))s"
let summaryMeta = ToolEventSummary(
targetApp: request.window,
actionDescription: "Dialog Dismiss",
notes: method)
return self.successResponse(
message: summary,
meta: [
@ -264,14 +282,15 @@ public struct DialogTool: MCPTool {
"success": .bool(result.success),
"execution_time": .double(executionTime),
"details": .object(result.details.mapValues { .string($0) }),
])
],
summary: summaryMeta)
} else {
return ToolResponse.error("Failed to dismiss dialog: \(result.details["error"] ?? "Unknown error")")
}
}
private func successResponse(message: String, meta: [String: Value]) -> ToolResponse {
ToolResponse(content: [.text(message)], meta: .object(meta))
private func successResponse(message: String, meta: [String: Value], summary: ToolEventSummary) -> ToolResponse {
ToolResponse(content: [.text(message)], meta: ToolEventSummary.merge(summary: summary, into: .object(meta)))
}
static func formattedDuration(_ duration: TimeInterval) -> String {
@ -387,9 +406,13 @@ private struct DialogListFormatter {
let executionTime: TimeInterval
func response() -> ToolResponse {
ToolResponse(
let summary = ToolEventSummary(
targetApp: self.elements.dialogInfo.title,
actionDescription: "List Dialog",
notes: self.elements.dialogInfo.title)
return ToolResponse(
content: [.text(self.renderContent())],
meta: .object(self.metaDictionary()))
meta: ToolEventSummary.merge(summary: summary, into: .object(self.metaDictionary())))
}
private func renderContent() -> String {

View File

@ -116,12 +116,17 @@ public struct DockTool: MCPTool {
let duration = self.formatDuration(executionTime)
let message = "\(AgentDisplayTokens.Status.success) Launched \(app) from dock in \(duration)"
let baseMeta: [String: Value] = [
"app_name": .string(app),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: app,
actionDescription: "Dock Launch",
notes: nil)
return ToolResponse(
content: [.text(message)],
meta: .object([
"app_name": .string(app),
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleRightClick(
@ -144,13 +149,18 @@ public struct DockTool: MCPTool {
}
message += " in \(self.formatDuration(executionTime))"
let baseMeta: [String: Value] = [
"app_name": .string(app),
"menu_item": menuItem != nil ? .string(menuItem!) : .null,
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: app,
actionDescription: "Dock Menu",
notes: menuItem ?? "Context menu")
return ToolResponse(
content: [.text(message)],
meta: .object([
"app_name": .string(app),
"menu_item": menuItem != nil ? .string(menuItem!) : .null,
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleHide(
@ -164,12 +174,14 @@ public struct DockTool: MCPTool {
let duration = self.formatDuration(executionTime)
let message = "\(AgentDisplayTokens.Status.success) Hidden dock (enabled auto-hide) in \(duration)"
let baseMeta: [String: Value] = [
"auto_hide_enabled": .bool(true),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(actionDescription: "Dock Hide", notes: nil)
return ToolResponse(
content: [.text(message)],
meta: .object([
"auto_hide_enabled": .bool(true),
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleShow(
@ -183,12 +195,14 @@ public struct DockTool: MCPTool {
let duration = self.formatDuration(executionTime)
let message = "\(AgentDisplayTokens.Status.success) Shown dock (disabled auto-hide) in \(duration)"
let baseMeta: [String: Value] = [
"auto_hide_enabled": .bool(false),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(actionDescription: "Dock Show", notes: nil)
return ToolResponse(
content: [.text(message)],
meta: .object([
"auto_hide_enabled": .bool(false),
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleList(
@ -220,30 +234,34 @@ public struct DockTool: MCPTool {
"""
.trimmingCharacters(in: .whitespacesAndNewlines)
let baseMeta: [String: Value] = [
"dock_item_count": .double(Double(dockItems.count)),
"include_all": .bool(includeAll),
"dock_items": .array(dockItems.map { item in
.object([
"index": .double(Double(item.index)),
"title": .string(item.title),
"item_type": .string(item.itemType.rawValue),
"is_running": item.isRunning != nil ? .bool(item.isRunning!) : .null,
"bundle_identifier": item.bundleIdentifier != nil ? .string(item.bundleIdentifier!) : .null,
"position": item.position != nil ? .object([
"x": .double(Double(item.position!.x)),
"y": .double(Double(item.position!.y)),
]) : .null,
"size": item.size != nil ? .object([
"width": .double(Double(item.size!.width)),
"height": .double(Double(item.size!.height)),
]) : .null,
])
}),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
actionDescription: "Dock List",
notes: "\(dockItems.count) items")
return ToolResponse(
content: [.text(message)],
meta: .object([
"dock_item_count": .double(Double(dockItems.count)),
"include_all": .bool(includeAll),
"dock_items": .array(dockItems.map { item in
.object([
"index": .double(Double(item.index)),
"title": .string(item.title),
"item_type": .string(item.itemType.rawValue),
"is_running": item.isRunning != nil ? .bool(item.isRunning!) : .null,
"bundle_identifier": item.bundleIdentifier != nil ? .string(item.bundleIdentifier!) : .null,
"position": item.position != nil ? .object([
"x": .double(Double(item.position!.x)),
"y": .double(Double(item.position!.y)),
]) : .null,
"size": item.size != nil ? .object([
"width": .double(Double(item.size!.width)),
"height": .double(Double(item.size!.height)),
]) : .null,
])
}),
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func formatDuration(_ duration: TimeInterval) -> String {

View File

@ -75,23 +75,23 @@ public struct DragTool: MCPTool {
do {
let startTime = Date()
let (fromPoint, fromDescription) = try await self.resolveLocation(
let fromPoint = try await self.resolveLocation(
target: request.fromTarget,
sessionId: request.sessionId,
parameterName: "from")
let (toPoint, toDescription) = try await self.resolveLocation(
let toPoint = try await self.resolveLocation(
target: request.toTarget,
sessionId: request.sessionId,
parameterName: "to")
guard fromPoint != toPoint else {
guard fromPoint.point != toPoint.point else {
return ToolResponse.error("Start and end points must be different")
}
try await self.focusTargetAppIfNeeded(request: request)
self.logSpaceIntentIfNeeded(request: request)
let distance = hypot(toPoint.x - fromPoint.x, toPoint.y - fromPoint.y)
let distance = hypot(toPoint.point.x - fromPoint.point.x, toPoint.point.y - fromPoint.point.y)
let movement = request.profile.resolveParameters(
smooth: true,
durationOverride: request.durationOverride,
@ -102,8 +102,8 @@ public struct DragTool: MCPTool {
)
try await self.context.automation.drag(
from: fromPoint,
to: toPoint,
from: fromPoint.point,
to: toPoint.point,
duration: movement.duration,
steps: movement.steps,
modifiers: request.modifiers,
@ -111,8 +111,8 @@ public struct DragTool: MCPTool {
let executionTime = Date().timeIntervalSince(startTime)
return self.buildResponse(
from: DragPointDescription(point: fromPoint, description: fromDescription),
to: DragPointDescription(point: toPoint, description: toDescription),
from: fromPoint,
to: toPoint,
movement: movement,
executionTime: executionTime,
request: request)
@ -131,18 +131,24 @@ public struct DragTool: MCPTool {
private func resolveLocation(
target: DragLocationInput,
sessionId: String?,
parameterName: String) async throws -> (CGPoint, String)
parameterName: String) async throws -> DragPointDescription
{
switch target {
case let .coordinates(raw):
let point = try self.parseCoordinates(raw, parameterName: parameterName)
return (point, "(\(Int(point.x)), \(Int(point.y)))")
return DragPointDescription(point: point, description: "(\(Int(point.x)), \(Int(point.y)))")
case let .element(query):
guard let session = await self.getSession(id: sessionId) else {
throw CoordinateParseError(message: "No active session. Run 'see' command first to capture UI state.")
}
if let element = await session.getElement(byId: query) {
return (element.centerPoint, "element \(query) (\(element.humanDescription))")
return DragPointDescription(
point: element.centerPoint,
description: "element \(query) (\(element.humanDescription))",
targetApp: session.applicationName,
windowTitle: session.windowTitle,
elementRole: element.summaryRole,
elementLabel: element.summaryLabel)
}
let elements = await session.uiElements
@ -158,7 +164,13 @@ public struct DragTool: MCPTool {
}
let element = matches.first { $0.isActionable } ?? matches[0]
return (element.centerPoint, element.humanDescription)
return DragPointDescription(
point: element.centerPoint,
description: element.humanDescription,
targetApp: session.applicationName,
windowTitle: session.windowTitle,
elementRole: element.summaryRole,
elementLabel: element.summaryLabel)
}
}
@ -266,7 +278,24 @@ public struct DragTool: MCPTool {
metaData["target_app"] = .string(toApp)
}
return ToolResponse(content: [.text(message)], meta: .object(metaData))
let summary = ToolEventSummary(
targetApp: request.targetApp ?? to.targetApp ?? from.targetApp,
windowTitle: to.windowTitle ?? from.windowTitle,
elementRole: to.elementRole ?? from.elementRole,
elementLabel: to.elementLabel ?? from.elementLabel,
actionDescription: "Drag",
coordinates: ToolEventSummary.Coordinates(
x: Double(to.point.x),
y: Double(to.point.y)),
pointerProfile: movement.profileName,
pointerDistance: Double(distance),
pointerDirection: pointerDirection(from: from.point, to: to.point),
pointerDurationMs: Double(movement.duration),
notes: "from \(from.description) to \(to.description)")
let metaValue = ToolEventSummary.merge(summary: summary, into: .object(metaData))
return ToolResponse(content: [.text(message)], meta: metaValue)
}
private struct CoordinateParseError: Swift.Error {
@ -367,6 +396,26 @@ private struct DragToolError: Swift.Error {
private struct DragPointDescription {
let point: CGPoint
let description: String
let targetApp: String?
let windowTitle: String?
let elementRole: String?
let elementLabel: String?
init(
point: CGPoint,
description: String,
targetApp: String? = nil,
windowTitle: String? = nil,
elementRole: String? = nil,
elementLabel: String? = nil)
{
self.point = point
self.description = description
self.targetApp = targetApp
self.windowTitle = windowTitle
self.elementRole = elementRole
self.elementLabel = elementLabel
}
}
extension UIElement {

View File

@ -85,14 +85,21 @@ public struct HotkeyTool: MCPTool {
let message = "\(AgentDisplayTokens.Status.success) Pressed \(formattedKeys) " +
"(held for \(holdDurationMs)ms) in \(durationText)s"
let baseMeta: Value = .object([
"keys": .string(keys),
"hold_duration": .double(Double(holdDurationMs)),
"execution_time": .double(executionTime),
"formatted_keys": .string(formattedKeys),
])
let summary = ToolEventSummary(
actionDescription: "Hotkey",
waitDurationMs: Double(holdDurationMs),
notes: formattedKeys)
return ToolResponse(
content: [.text(message)],
meta: .object([
"keys": .string(keys),
"hold_duration": .double(Double(holdDurationMs)),
"execution_time": .double(executionTime),
"formatted_keys": .string(formattedKeys),
]))
meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
} catch {
self.logger.error("Hotkey execution failed: \(error)")

View File

@ -156,13 +156,18 @@ extension ImageTool {
let imagePath = try savedFiles.first?.path ?? saveTemporaryImage(firstCapture.imageData)
let analysis = try await analyzeImage(at: imagePath, question: question)
let baseMeta: [String: Value] = [
"model": .string(analysis.modelUsed),
"savedFiles": .array(savedFiles.map { Value.string($0.path) }),
"question": .string(question),
]
let summary = ToolEventSummary(
actionDescription: "Image Analyze",
notes: question)
return ToolResponse.text(
analysis.text,
meta: .object([
"model": .string(analysis.modelUsed),
"savedFiles": .array(savedFiles.map { Value.string($0.path) }),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func buildCaptureResponse(
@ -170,7 +175,19 @@ extension ImageTool {
savedFiles: [MCPSavedFile],
captureResults: [CaptureResult]) -> ToolResponse
{
let meta = Value.object(["savedFiles": .array(savedFiles.map { Value.string($0.path) })])
let baseMeta = Value.object(["savedFiles": .array(savedFiles.map { Value.string($0.path) })])
let captureNote: String
if savedFiles.isEmpty {
captureNote = "Captured image"
} else if savedFiles.count == 1, let label = savedFiles.first?.item_label {
captureNote = label
} else {
captureNote = "Captured \(savedFiles.count) images"
}
let summary = ToolEventSummary(
actionDescription: "Image Capture",
notes: captureNote)
let meta = ToolEventSummary.merge(summary: summary, into: baseMeta)
if format == .data, let capture = captureResults.first, captureResults.count == 1 {
return ToolResponse.image(data: capture.imageData, mimeType: "image/png", meta: meta)

View File

@ -109,7 +109,12 @@ public struct ListTool: MCPTool {
lines.append(activeLine)
}
return ToolResponse.text(lines.joined(separator: "\n"))
let summary = ToolEventSummary(
actionDescription: "List Applications",
notes: "\(apps.count) running")
return ToolResponse.text(
lines.joined(separator: "\n"),
meta: ToolEventSummary.merge(summary: summary, into: nil))
} catch {
return ToolResponse.error("Failed to list applications: \(error.localizedDescription)")
}
@ -197,8 +202,9 @@ public struct ListTool: MCPTool {
sections.append("- Architecture: \(ProcessInfo.processInfo.processorArchitecture)")
let fullStatus = sections.joined(separator: "\n")
let summary = ToolEventSummary(actionDescription: "Server Status", notes: nil)
return ToolResponse.text(fullStatus)
return ToolResponse.text(fullStatus, meta: ToolEventSummary.merge(summary: summary, into: nil))
}
}
@ -271,7 +277,17 @@ private struct WindowListFormatter {
var lines = self.headerLines()
lines.append("")
lines.append(contentsOf: self.windowLines())
return ToolResponse.text(lines.joined(separator: "\n"))
let baseMeta: Value = .object([
"window_count": .int(self.windows.count),
"app": self.appInfo?.name != nil ? .string(self.appInfo!.name) : .string(self.identifier),
])
let summary = ToolEventSummary(
targetApp: self.appInfo?.name ?? self.identifier,
actionDescription: "List Windows",
notes: "\(self.windows.count) windows")
return ToolResponse.text(
lines.joined(separator: "\n"),
meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
}
private func headerLines() -> [String] {

View File

@ -127,12 +127,17 @@ public struct MCPAgentTool: MCPTool {
])
}
let meta = Value.object([
let baseMeta = Value.object([
"sessionCount": .string(String(sessions.count)),
"sessions": .array(sessionsArray),
])
let summaryMeta = ToolEventSummary(
actionDescription: "List agent sessions",
notes: "\(sessions.count) session\(sessions.count == 1 ? "" : "s")")
return ToolResponse.text("Available Sessions:\n\n\(summary)", meta: meta)
return ToolResponse.text(
"Available Sessions:\n\n\(summary)",
meta: ToolEventSummary.merge(summary: summaryMeta, into: baseMeta))
}
private func renderSessionSummaries(_ sessions: [SessionSummary]) -> String {
@ -185,12 +190,17 @@ public struct MCPAgentTool: MCPTool {
}
private func formatResult(result: AgentExecutionResult, input: AgentInput) -> ToolResponse {
let summary = self.summary(for: result)
if input.quiet {
return ToolResponse.text(result.content)
return ToolResponse.text(result.content, meta: ToolEventSummary.merge(summary: summary, into: nil))
}
if input.verbose {
return ToolResponse.text(result.content, meta: self.verboseMetadata(for: result))
let verboseMeta = self.verboseMetadata(for: result)
return ToolResponse.text(
result.content,
meta: ToolEventSummary.merge(summary: summary, into: verboseMeta))
}
var output = result.content
@ -209,8 +219,25 @@ public struct MCPAgentTool: MCPTool {
output += tokensLine
}
let meta = result.sessionId.map { Value.object(["sessionId": .string($0)]) }
return ToolResponse.text(output, meta: meta)
let baseMeta = result.sessionId.map { Value.object(["sessionId": .string($0)]) }
return ToolResponse.text(output, meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
}
private func summary(for result: AgentExecutionResult) -> ToolEventSummary {
var details: [String] = []
if !result.metadata.modelName.isEmpty {
details.append("Model \(result.metadata.modelName)")
}
if result.metadata.toolCallCount > 0 {
details.append("\(result.metadata.toolCallCount) tool call\(result.metadata.toolCallCount == 1 ? "" : "s")")
}
if let usage = result.usage {
details.append("\(usage.totalTokens) tokens total")
}
return ToolEventSummary(
actionDescription: "Agent run",
notes: details.isEmpty ? nil : details.joined(separator: " · "))
}
private func verboseMetadata(for result: AgentExecutionResult) -> Value {

View File

@ -90,13 +90,18 @@ public struct MenuTool: MCPTool {
let menuStructure = try await self.context.menu.listMenus(for: app)
let formattedOutput = self.formatMenuStructure(menuStructure)
let baseMeta: Value = .object([
"app": .string(menuStructure.application.name),
"total_menus": .int(menuStructure.menus.count),
"total_items": .int(menuStructure.totalItems),
])
let summary = ToolEventSummary(
targetApp: menuStructure.application.name,
actionDescription: "List Menus",
notes: "\(menuStructure.menus.count) menus / \(menuStructure.totalItems) items")
return ToolResponse.text(
formattedOutput,
meta: .object([
"app": .string(menuStructure.application.name),
"total_menus": .int(menuStructure.menus.count),
"total_items": .int(menuStructure.totalItems),
]))
meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
} catch {
return ToolResponse.error("Failed to list menus for app '\(app)': \(error.localizedDescription)")
}
@ -130,12 +135,16 @@ public struct MenuTool: MCPTool {
output += "\(menuInfo.app): \(menuInfo.menuCount) menus, \(menuInfo.itemCount) items\n"
}
let baseMeta: Value = .object([
"total_apps": .int(allMenus.count),
"apps": .array(allMenus.map { .string($0.app) }),
])
let summary = ToolEventSummary(
actionDescription: "List All Menus",
notes: "\(allMenus.count) apps")
return ToolResponse.text(
output,
meta: .object([
"total_apps": .int(allMenus.count),
"apps": .array(allMenus.map { .string($0.app) }),
]))
meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
} catch {
return ToolResponse.error("Failed to list all menus: \(error.localizedDescription)")
}
@ -150,7 +159,13 @@ public struct MenuTool: MCPTool {
if let path = arguments.getString("path") {
do {
try await self.context.menu.clickMenuItem(app: app, itemPath: path)
return ToolResponse.text("\(AgentDisplayTokens.Status.success) Successfully clicked menu item: \(path)")
let summary = ToolEventSummary(
targetApp: app,
actionDescription: "Menu Click",
notes: path)
return ToolResponse.text(
"\(AgentDisplayTokens.Status.success) Successfully clicked menu item: \(path)",
meta: ToolEventSummary.merge(summary: summary, into: nil))
} catch {
return ToolResponse
.error("Failed to click menu item '\(path)' in app '\(app)': \(error.localizedDescription)")
@ -158,7 +173,13 @@ public struct MenuTool: MCPTool {
} else if let item = arguments.getString("item") {
do {
try await self.context.menu.clickMenuItemByName(app: app, itemName: item)
return ToolResponse.text("\(AgentDisplayTokens.Status.success) Successfully clicked menu item: \(item)")
let summary = ToolEventSummary(
targetApp: app,
actionDescription: "Menu Click",
notes: item)
return ToolResponse.text(
"\(AgentDisplayTokens.Status.success) Successfully clicked menu item: \(item)",
meta: ToolEventSummary.merge(summary: summary, into: nil))
} catch {
return ToolResponse
.error("Failed to click menu item '\(item)' in app '\(app)': \(error.localizedDescription)")
@ -176,8 +197,12 @@ public struct MenuTool: MCPTool {
do {
try await self.context.menu.clickMenuExtra(title: title)
return ToolResponse
.text("\(AgentDisplayTokens.Status.success) Successfully clicked system menu extra: \(title)")
let summary = ToolEventSummary(
actionDescription: "Menu Extra",
notes: title)
return ToolResponse.text(
"\(AgentDisplayTokens.Status.success) Successfully clicked system menu extra: \(title)",
meta: ToolEventSummary.merge(summary: summary, into: nil))
} catch {
return ToolResponse.error("Failed to click system menu extra '\(title)': \(error.localizedDescription)")
}

View File

@ -211,11 +211,17 @@ public struct MoveTool: MCPTool {
let location = CGPoint(x: element.frame.midX, y: element.frame.midY)
let label = element.title ?? element.label ?? "untitled"
let summary = "element \(elementId) (\(element.role): \(label))"
return ResolvedMoveTarget(location: location, description: summary)
return ResolvedMoveTarget(
location: location,
description: summary,
targetApp: session.applicationName,
windowTitle: session.windowTitle,
elementRole: element.summaryRole,
elementLabel: element.summaryLabel)
}
}
private func performMovement(to location: CGPoint, request: MoveRequest) async throws -> MovementParameters {
private func performMovement(to location: CGPoint, request: MoveRequest) async throws -> MovementExecution {
let automation = self.context.automation
let currentLocation = CGEvent(source: nil)?.location ?? .zero
let distance = hypot(location.x - currentLocation.x, location.y - currentLocation.y)
@ -236,35 +242,68 @@ public struct MoveTool: MCPTool {
profile: movement.profile
)
}
return movement
return MovementExecution(
parameters: movement,
startPoint: currentLocation,
distance: distance,
direction: pointerDirection(from: currentLocation, to: location)
)
}
private func buildResponse(
target: ResolvedMoveTarget,
movement: MovementParameters,
movement: MovementExecution,
executionTime: TimeInterval) -> ToolResponse
{
var message = "\(AgentDisplayTokens.Status.success) Moved mouse cursor to \(target.description)"
message += " using \(movement.profileName) profile"
if movement.smooth {
message += " (\(movement.duration)ms, \(movement.steps) steps)"
message += " using \(movement.parameters.profileName) profile"
if movement.parameters.smooth {
message += " (\(movement.parameters.duration)ms, \(movement.parameters.steps) steps)"
}
message += " in \(String(format: "%.2f", executionTime))s"
var metaDict: [String: Value] = [
"target_location": .object([
"x": .double(Double(target.location.x)),
"y": .double(Double(target.location.y)),
]),
"target_description": .string(target.description),
"smooth": .bool(movement.parameters.smooth),
"profile": .string(movement.parameters.profileName),
"duration": movement.parameters.smooth ? .double(Double(movement.parameters.duration)) : .null,
"steps": movement.parameters.smooth ? .double(Double(movement.parameters.steps)) : .null,
"execution_time": .double(executionTime),
"distance": .double(Double(movement.distance)),
"start_location": .object([
"x": .double(Double(movement.startPoint.x)),
"y": .double(Double(movement.startPoint.y)),
]),
]
if let direction = movement.direction {
metaDict["direction"] = .string(direction)
}
let summary = ToolEventSummary(
targetApp: target.targetApp,
windowTitle: target.windowTitle,
elementRole: target.elementRole,
elementLabel: target.elementLabel,
actionDescription: "Move cursor",
coordinates: ToolEventSummary.Coordinates(
x: Double(target.location.x),
y: Double(target.location.y)),
pointerProfile: movement.parameters.profileName,
pointerDistance: Double(movement.distance),
pointerDirection: movement.direction,
pointerDurationMs: Double(movement.parameters.duration),
notes: target.description)
let metaValue = ToolEventSummary.merge(summary: summary, into: .object(metaDict))
return ToolResponse(
content: [.text(message)],
meta: .object([
"target_location": .object([
"x": .double(Double(target.location.x)),
"y": .double(Double(target.location.y)),
]),
"target_description": .string(target.description),
"smooth": .bool(movement.smooth),
"profile": .string(movement.profileName),
"duration": movement.smooth ? .double(Double(movement.duration)) : .null,
"steps": movement.smooth ? .double(Double(movement.steps)) : .null,
"execution_time": .double(executionTime),
]))
meta: metaValue)
}
private func getSession(id: String?) async -> UISession? {
@ -307,6 +346,33 @@ private struct MoveRequest {
private struct ResolvedMoveTarget {
let location: CGPoint
let description: String
let targetApp: String?
let windowTitle: String?
let elementRole: String?
let elementLabel: String?
init(
location: CGPoint,
description: String,
targetApp: String? = nil,
windowTitle: String? = nil,
elementRole: String? = nil,
elementLabel: String? = nil)
{
self.location = location
self.description = description
self.targetApp = targetApp
self.windowTitle = windowTitle
self.elementRole = elementRole
self.elementLabel = elementLabel
}
}
private struct MovementExecution {
let parameters: MovementParameters
let startPoint: CGPoint
let distance: CGFloat
let direction: String?
}
private struct MoveToolValidationError: Error {

View File

@ -67,3 +67,13 @@ extension MovementProfileOption {
}
}
}
extension UIElement {
var summaryRole: String? {
self.roleDescription ?? self.role
}
var summaryLabel: String? {
self.title ?? self.label ?? self.value
}
}

View File

@ -63,9 +63,20 @@ public struct PermissionsTool: MCPTool {
// Return error response if required permissions are missing
if !screenRecording {
return ToolResponse.error(responseText)
let summary = ToolEventSummary(actionDescription: "Permissions", notes: "Screen Recording missing")
return ToolResponse.error(responseText, meta: ToolEventSummary.merge(summary: summary, into: nil))
}
return ToolResponse.text(responseText)
let baseMeta: [String: Value] = [
"screen_recording": .bool(screenRecording),
"accessibility": .bool(accessibility),
]
let summary = ToolEventSummary(
actionDescription: "Permissions",
notes: "Screen Recording ✅, Accessibility \(accessibility ? "" : "⚠️")")
return ToolResponse.text(
responseText,
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
}

View File

@ -0,0 +1,17 @@
import CoreGraphics
/// Utility to convert delta between two points into a compass-style label.
func pointerDirection(from start: CGPoint, to end: CGPoint) -> String? {
let dx = end.x - start.x
let dy = end.y - start.y
let distance = hypot(dx, dy)
guard distance >= 1 else { return nil }
let angle = atan2(dy, dx)
// Map angle to 8 compass directions (E, NE, N, NW, W, SW, S, SE)
let directions = ["E", "NE", "N", "NW", "W", "SW", "S", "SE"]
let normalized = (angle + .pi) / (2 * .pi)
var index = Int(round(normalized * 8)) % 8
if index < 0 { index += 8 }
return directions[index]
}

View File

@ -139,13 +139,20 @@ public struct ScrollTool: MCPTool {
let message = "\(AgentDisplayTokens.Status.success) Performed \(scrollDescription) \(request.direction) " +
"(\(request.amount) ticks) \(target.description) in \(duration)"
return ToolResponse.text(message)
let summary = ToolEventSummary(
targetApp: target.appName,
actionDescription: request.smooth ? "Smooth scroll" : "Scroll",
scrollDirection: request.direction.rawValue,
scrollAmount: Double(request.amount),
notes: target.description
)
return ToolResponse.text(message, meta: ToolEventSummary.merge(summary: summary, into: nil))
}
@MainActor
private func resolveTargetDescription(request: ScrollToolRequest) async throws -> ScrollTargetDescription {
guard let elementId = request.elementId else {
return ScrollTargetDescription(elementId: nil, description: "at current mouse position")
return ScrollTargetDescription(elementId: nil, description: "at current mouse position", appName: nil)
}
guard let session = await self.getSession(id: request.sessionId) else {
@ -159,7 +166,10 @@ public struct ScrollTool: MCPTool {
let label = element.title ?? element.label ?? "untitled"
let description = "on \(element.role): \(label)"
return ScrollTargetDescription(elementId: elementId, description: description)
return ScrollTargetDescription(
elementId: elementId,
description: description,
appName: session.applicationName)
}
}
@ -175,6 +185,7 @@ private struct ScrollToolRequest {
private struct ScrollTargetDescription {
let elementId: String?
let description: String
let appName: String?
}
private struct ScrollToolValidationError: Error {

View File

@ -380,19 +380,29 @@ public struct SeeTool: MCPTool {
target: CaptureTarget) async throws -> ToolResponse
{
let finalScreenshot = output.annotatedPath ?? output.screenshotPath
let summary = await buildSummary(
let summaryText = await buildSummary(
session: session,
elements: elements,
screenshotPath: finalScreenshot,
target: target)
var content: [MCP.Tool.Content] = [.text(summary)]
var content: [MCP.Tool.Content] = [.text(summaryText)]
if output.annotate, let annotatedPath = output.annotatedPath {
let imageData = try Data(contentsOf: URL(fileURLWithPath: annotatedPath))
content.append(.image(data: imageData.base64EncodedString(), mimeType: "image/png", metadata: nil))
}
return ToolResponse(content: content, meta: self.makeMetadata(session: session, elements: elements))
let baseMeta = self.makeMetadata(session: session, elements: elements)
var summary = ToolEventSummary(
targetApp: session.applicationName,
windowTitle: session.windowTitle,
actionDescription: "See",
notes: String(describing: target))
summary.captureApp = session.applicationName
summary.captureWindow = session.windowTitle
let mergedMeta = ToolEventSummary.merge(summary: summary, into: baseMeta)
return ToolResponse(content: content, meta: mergedMeta)
}
private func makeMetadata(session: UISession, elements: [UIElement]) -> Value {
@ -538,6 +548,8 @@ actor UISession {
private(set) var uiElements: [UIElement] = []
private(set) var createdAt: Date
private(set) var lastAccessedAt: Date
nonisolated(unsafe) private(set) var cachedApplicationName: String?
nonisolated(unsafe) private(set) var cachedWindowTitle: String?
init() {
self.id = UUID().uuidString
@ -548,6 +560,8 @@ actor UISession {
func setScreenshot(path: String, metadata: CaptureMetadata) {
self.screenshotPath = path
self.screenshotMetadata = metadata
self.cachedApplicationName = metadata.applicationInfo?.name
self.cachedWindowTitle = metadata.windowInfo?.title
self.lastAccessedAt = Date()
}
@ -559,6 +573,14 @@ actor UISession {
func getElement(byId id: String) -> UIElement? {
self.uiElements.first { $0.id == id }
}
nonisolated var applicationName: String? {
self.cachedApplicationName
}
nonisolated var windowTitle: String? {
self.cachedWindowTitle
}
}
actor UISessionManager {

View File

@ -82,9 +82,15 @@ public struct ShellTool: MCPTool {
}
self.logger.debug("Command completed successfully")
let summary = ToolEventSummary(
command: command,
workingDirectory: FileManager.default.currentDirectoryPath,
notes: nil)
let meta = ToolEventSummary.merge(summary: summary, into: nil)
return ToolResponse(
content: [.text(output)],
isError: false)
isError: false,
meta: meta)
} catch {
self.logger.error("Failed to execute command: \(error.localizedDescription)")
return ToolResponse(

View File

@ -48,9 +48,14 @@ public struct SleepTool: MCPTool {
let actualDuration = Date().timeIntervalSince(startTime) * 1000 // Convert to ms
let seconds = Double(milliseconds) / 1000.0
let summary =
let summaryText =
"\(AgentDisplayTokens.Status.success) Paused for \(seconds)s " +
"(requested: \(milliseconds)ms, actual: \(Int(actualDuration))ms)"
return ToolResponse.text(summary)
let summaryMeta = ToolEventSummary(
actionDescription: "Sleep",
waitDurationMs: actualDuration,
waitReason: nil
)
return ToolResponse.text(summaryText, meta: ToolEventSummary.merge(summary: summaryMeta, into: nil))
}
}

View File

@ -157,12 +157,18 @@ public struct SpaceTool: MCPTool {
output += "\n"
}
let message = output.trimmingCharacters(in: .whitespacesAndNewlines)
let baseMeta: [String: Value] = [
"count": .double(Double(spaces.count)),
"execution_time": .double(executionTime),
"detailed": .bool(detailed),
]
let summary = ToolEventSummary(
actionDescription: "List Spaces",
notes: "\(spaces.count) spaces")
return ToolResponse(
content: [.text(output.trimmingCharacters(in: .whitespacesAndNewlines))],
meta: .object([
"count": .double(Double(spaces.count)),
"execution_time": .double(executionTime),
]))
content: [.text(message)],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
@MainActor
@ -197,13 +203,17 @@ public struct SpaceTool: MCPTool {
let executionTime = Date().timeIntervalSince(startTime)
let message = self.successMessage("Switched to Space \(spaceNumber)", duration: executionTime)
let baseMeta: [String: Value] = [
"space_number": .double(Double(spaceNumber)),
"space_id": .double(Double(targetSpace.id)),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
actionDescription: "Switch Space",
notes: "Space \(spaceNumber)")
return ToolResponse(
content: [.text(message)],
meta: .object([
"space_number": .double(Double(spaceNumber)),
"space_id": .double(Double(targetSpace.id)),
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
@MainActor
@ -255,11 +265,11 @@ public struct SpaceTool: MCPTool {
{
switch action {
case let .list(detailed):
try await self.handleList(service: service, detailed: detailed, startTime: startTime)
return try await self.handleList(service: service, detailed: detailed, startTime: startTime)
case let .switchSpace(spaceNumber):
try await self.handleSwitch(service: service, spaceNumber: spaceNumber, startTime: startTime)
return try await self.handleSwitch(service: service, spaceNumber: spaceNumber, startTime: startTime)
case let .moveWindow(request):
try await self.handleMoveWindow(service: service, request: request, startTime: startTime)
return try await self.handleMoveWindow(service: service, request: request, startTime: startTime)
}
}
@ -381,14 +391,19 @@ extension SpaceTool {
"Moved window '\(windowInfo.title)' to current Space",
duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowID)),
"moved_to_current": .bool(true),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
windowTitle: windowInfo.title,
actionDescription: "Space Move",
notes: "current")
return ToolResponse(
content: [.text(message)],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowID)),
"moved_to_current": .bool(true),
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
@MainActor
@ -420,15 +435,20 @@ extension SpaceTool {
let body = "Moved window '\(windowInfo.title)' to Space \(targetSpaceNumber)\(followText)"
let message = self.successMessage(body, duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowID)),
"target_space_number": .double(Double(targetSpaceNumber)),
"target_space_id": .double(Double(targetSpace.id)),
"followed": .bool(request.follow),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
windowTitle: windowInfo.title,
actionDescription: "Space Move",
notes: "space \(targetSpaceNumber)")
return ToolResponse(
content: [.text(message)],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowID)),
"target_space_number": .double(Double(targetSpaceNumber)),
"target_space_id": .double(Double(targetSpace.id)),
"followed": .bool(request.follow),
"execution_time": .double(executionTime),
]))
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
}

View File

@ -167,23 +167,37 @@ public struct SwipeTool: MCPTool {
with \(movement.steps) steps (\(movement.profileName) profile, distance: \(distanceText)px) in \(durationText)s
"""
let metaDict: [String: Value] = [
"from": .object([
"x": .double(Double(fromPoint.x)),
"y": .double(Double(fromPoint.y)),
]),
"to": .object([
"x": .double(Double(toPoint.x)),
"y": .double(Double(toPoint.y)),
]),
"duration": .double(Double(movement.duration)),
"steps": .double(Double(movement.steps)),
"profile": .string(movement.profileName),
"distance": .double(distance),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
actionDescription: "Swipe",
coordinates: ToolEventSummary.Coordinates(x: Double(toPoint.x), y: Double(toPoint.y)),
pointerProfile: movement.profileName,
pointerDistance: Double(distance),
pointerDirection: pointerDirection(from: fromPoint, to: toPoint),
pointerDurationMs: Double(movement.duration),
notes: "from (\(Int(fromPoint.x)), \(Int(fromPoint.y))) to (\(Int(toPoint.x)), \(Int(toPoint.y)))"
)
let metaValue = ToolEventSummary.merge(summary: summary, into: .object(metaDict))
return ToolResponse(
content: [.text(message)],
meta: .object([
"from": .object([
"x": .double(Double(fromPoint.x)),
"y": .double(Double(fromPoint.y)),
]),
"to": .object([
"x": .double(Double(toPoint.x)),
"y": .double(Double(toPoint.y)),
]),
"duration": .double(Double(movement.duration)),
"steps": .double(Double(movement.steps)),
"profile": .string(movement.profileName),
"distance": .double(distance),
"execution_time": .double(executionTime),
]))
meta: metaValue)
}
private func parseCoordinates(_ coordString: String, parameterName: String) throws -> CGPoint {

View File

@ -129,7 +129,9 @@ public struct TypeTool: MCPTool {
let automation = self.context.automation
let startTime = Date()
try await self.focusIfNeeded(request: request, automation: automation)
let targetContext = try await self.resolveTargetContext(for: request)
try await self.focusIfNeeded(targetContext: targetContext, request: request, automation: automation)
let actions = try self.buildActions(for: request)
let typeResult = try await automation.typeActions(
actions,
@ -141,18 +143,41 @@ public struct TypeTool: MCPTool {
request: request,
executionTime: executionTime,
result: typeResult)
let baseMeta: Value = .object([
"execution_time": .double(executionTime),
"characters_typed": .double(Double(typeResult.totalCharacters)),
])
let summary = self.buildEventSummary(
request: request,
result: typeResult,
targetContext: targetContext)
let mergedMeta = ToolEventSummary.merge(summary: summary, into: baseMeta)
return ToolResponse(
content: [.text(message)],
meta: .object([
"execution_time": .double(executionTime),
"characters_typed": .double(Double(typeResult.totalCharacters)),
]))
meta: mergedMeta)
}
@MainActor
private func focusIfNeeded(request: TypeRequest, automation: any UIAutomationServiceProtocol) async throws {
guard let elementId = request.elementId else { return }
private func focusIfNeeded(
targetContext: TargetElementContext?,
request: TypeRequest,
automation: any UIAutomationServiceProtocol) async throws
{
guard let context = targetContext else { return }
let element = context.element
let clickLocation = CGPoint(x: element.frame.midX, y: element.frame.midY)
try await automation.click(
target: .coordinates(clickLocation),
clickType: .single,
sessionId: request.sessionId)
try await Task.sleep(nanoseconds: 100_000_000)
}
@MainActor
private func resolveTargetContext(for request: TypeRequest) async throws -> TargetElementContext? {
guard let elementId = request.elementId else { return nil }
guard let session = await self.getSession(id: request.sessionId) else {
throw TypeToolValidationError("No active session. Run 'see' command first to capture UI state.")
}
@ -162,12 +187,45 @@ public struct TypeTool: MCPTool {
"Element '\(elementId)' not found in current session. Run 'see' command to update UI state.")
}
let clickLocation = CGPoint(x: element.frame.midX, y: element.frame.midY)
try await automation.click(
target: .coordinates(clickLocation),
clickType: .single,
sessionId: request.sessionId)
try await Task.sleep(nanoseconds: 100_000_000)
return TargetElementContext(session: session, element: element)
}
private func buildEventSummary(
request: TypeRequest,
result: TypeResult,
targetContext: TargetElementContext?) -> ToolEventSummary
{
let truncatedInput = self.truncatedText(request.text)
return ToolEventSummary(
targetApp: targetContext?.session.applicationName,
windowTitle: targetContext?.session.windowTitle,
elementRole: targetContext?.element.summaryRole,
elementLabel: targetContext?.element.summaryLabel,
elementValue: truncatedInput,
actionDescription: self.describeAction(for: request),
notes: truncatedInput)
}
private func truncatedText(_ text: String?, limit: Int = 80) -> String? {
guard let text, !text.isEmpty else { return nil }
if text.count <= limit {
return text
}
let endIndex = text.index(text.startIndex, offsetBy: limit)
return String(text[..<endIndex]) + ""
}
private func describeAction(for request: TypeRequest) -> String {
if let text = request.text, !text.isEmpty {
return "Typed"
}
var actions: [String] = []
if let tabs = request.tabCount, tabs > 0 { actions.append("Tab×\(tabs)") }
if request.pressReturn { actions.append("Return") }
if request.pressEscape { actions.append("Escape") }
if request.pressDelete { actions.append("Delete") }
if request.clearField { actions.append("Clear Field") }
return actions.isEmpty ? "Type" : actions.joined(separator: ", ")
}
private func buildActions(for request: TypeRequest) throws -> [TypeAction] {
@ -294,3 +352,8 @@ private struct TypeToolValidationError: Error {
let message: String
init(_ message: String) { self.message = message }
}
private struct TargetElementContext {
let session: UISession
let element: UIElement
}

View File

@ -119,32 +119,43 @@ public struct WindowTool: MCPTool {
switch action {
case .close:
return try await self.handleClose(service: service, target: target, startTime: startTime)
return try await self.handleClose(service: service, target: target, appName: inputs.app, startTime: startTime)
case .minimize:
return try await self.handleMinimize(service: service, target: target, startTime: startTime)
return try await self.handleMinimize(service: service, target: target, appName: inputs.app, startTime: startTime)
case .maximize:
return try await self.handleMaximize(service: service, target: target, startTime: startTime)
return try await self.handleMaximize(service: service, target: target, appName: inputs.app, startTime: startTime)
case .move:
let position = try inputs.requirePosition(for: action)
return try await self.handleMove(service: service, target: target, position: position, startTime: startTime)
return try await self.handleMove(
service: service,
target: target,
appName: inputs.app,
position: position,
startTime: startTime)
case .resize:
let size = try inputs.requireSize(for: action)
return try await self.handleResize(service: service, target: target, size: size, startTime: startTime)
return try await self.handleResize(
service: service,
target: target,
appName: inputs.app,
size: size,
startTime: startTime)
case .setBounds:
let bounds = try inputs.requireBounds()
return try await self.handleSetBounds(
service: service,
target: target,
appName: inputs.app,
bounds: bounds,
startTime: startTime)
case .focus:
return try await self.handleFocus(service: service, target: target, startTime: startTime)
return try await self.handleFocus(service: service, target: target, appName: inputs.app, startTime: startTime)
}
}
@ -153,6 +164,7 @@ public struct WindowTool: MCPTool {
private func handleClose(
service: any WindowManagementServiceProtocol,
target: WindowTarget,
appName: String?,
startTime: Date) async throws -> ToolResponse
{
// Get window info before closing for better reporting
@ -165,22 +177,26 @@ public struct WindowTool: MCPTool {
let executionTime = Date().timeIntervalSince(startTime)
let message = self.successMessage(action: "Closed window '\(windowInfo.title)'", duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: appName,
windowTitle: windowInfo.title,
actionDescription: "Window Close",
notes: nil)
return ToolResponse(
content: [
.text(self.successMessage(
action: "Closed window '\(windowInfo.title)'",
duration: executionTime)),
],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"execution_time": .double(executionTime),
]))
content: [.text(message)],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleMinimize(
service: any WindowManagementServiceProtocol,
target: WindowTarget,
appName: String?,
startTime: Date) async throws -> ToolResponse
{
// Get window info before minimizing
@ -193,22 +209,26 @@ public struct WindowTool: MCPTool {
let executionTime = Date().timeIntervalSince(startTime)
let message = self.successMessage(action: "Minimized window '\(windowInfo.title)'", duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: appName,
windowTitle: windowInfo.title,
actionDescription: "Window Minimize",
notes: nil)
return ToolResponse(
content: [
.text(self.successMessage(
action: "Minimized window '\(windowInfo.title)'",
duration: executionTime)),
],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"execution_time": .double(executionTime),
]))
content: [.text(message)],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleMaximize(
service: any WindowManagementServiceProtocol,
target: WindowTarget,
appName: String?,
startTime: Date) async throws -> ToolResponse
{
// Get window info before maximizing
@ -221,22 +241,26 @@ public struct WindowTool: MCPTool {
let executionTime = Date().timeIntervalSince(startTime)
let message = self.successMessage(action: "Maximized window '\(windowInfo.title)'", duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: appName,
windowTitle: windowInfo.title,
actionDescription: "Window Maximize",
notes: nil)
return ToolResponse(
content: [
.text(self.successMessage(
action: "Maximized window '\(windowInfo.title)'",
duration: executionTime)),
],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"execution_time": .double(executionTime),
]))
content: [.text(message)],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleMove(
service: any WindowManagementServiceProtocol,
target: WindowTarget,
appName: String?,
position: CGPoint,
startTime: Date) async throws -> ToolResponse
{
@ -251,22 +275,29 @@ public struct WindowTool: MCPTool {
let executionTime = Date().timeIntervalSince(startTime)
let detail = "Moved window '\(windowInfo.title)' to (\(Int(position.x)), \(Int(position.y)))"
let message = self.successMessage(action: detail, duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"new_x": .double(Double(position.x)),
"new_y": .double(Double(position.y)),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: appName,
windowTitle: windowInfo.title,
actionDescription: "Window Move",
coordinates: ToolEventSummary.Coordinates(x: Double(position.x), y: Double(position.y)),
notes: nil)
return ToolResponse(
content: [
.text(self.successMessage(action: detail, duration: executionTime)),
],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"new_x": .double(Double(position.x)),
"new_y": .double(Double(position.y)),
"execution_time": .double(executionTime),
]))
content: [.text(message)],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleResize(
service: any WindowManagementServiceProtocol,
target: WindowTarget,
appName: String?,
size: CGSize,
startTime: Date) async throws -> ToolResponse
{
@ -281,22 +312,28 @@ public struct WindowTool: MCPTool {
let executionTime = Date().timeIntervalSince(startTime)
let detail = "Resized window '\(windowInfo.title)' to \(Int(size.width)) × \(Int(size.height))"
let message = self.successMessage(action: detail, duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"new_width": .double(Double(size.width)),
"new_height": .double(Double(size.height)),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: appName,
windowTitle: windowInfo.title,
actionDescription: "Window Resize",
notes: "\(Int(size.width))×\(Int(size.height))")
return ToolResponse(
content: [
.text(self.successMessage(action: detail, duration: executionTime)),
],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"new_width": .double(Double(size.width)),
"new_height": .double(Double(size.height)),
"execution_time": .double(executionTime),
]))
content: [.text(message)],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleSetBounds(
service: any WindowManagementServiceProtocol,
target: WindowTarget,
appName: String?,
bounds: CGRect,
startTime: Date) async throws -> ToolResponse
{
@ -312,24 +349,33 @@ public struct WindowTool: MCPTool {
let detail = "Set bounds for window '\(windowInfo.title)' to (\(Int(bounds.origin.x)), "
+ "\(Int(bounds.origin.y)), \(Int(bounds.width)) × \(Int(bounds.height)))"
let message = self.successMessage(action: detail, duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"new_x": .double(Double(bounds.origin.x)),
"new_y": .double(Double(bounds.origin.y)),
"new_width": .double(Double(bounds.width)),
"new_height": .double(Double(bounds.height)),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: appName,
windowTitle: windowInfo.title,
actionDescription: "Window Set Bounds",
coordinates: ToolEventSummary.Coordinates(
x: Double(bounds.origin.x),
y: Double(bounds.origin.y)),
notes: "\(Int(bounds.width))×\(Int(bounds.height))")
return ToolResponse(
content: [
.text(self.successMessage(action: detail, duration: executionTime)),
],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"new_x": .double(Double(bounds.origin.x)),
"new_y": .double(Double(bounds.origin.y)),
"new_width": .double(Double(bounds.width)),
"new_height": .double(Double(bounds.height)),
"execution_time": .double(executionTime),
]))
content: [.text(message)],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
private func handleFocus(
service: any WindowManagementServiceProtocol,
target: WindowTarget,
appName: String?,
startTime: Date) async throws -> ToolResponse
{
// Get window info before focusing
@ -342,17 +388,20 @@ public struct WindowTool: MCPTool {
let executionTime = Date().timeIntervalSince(startTime)
let message = self.successMessage(action: "Focused window '\(windowInfo.title)'", duration: executionTime)
let baseMeta: [String: Value] = [
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"execution_time": .double(executionTime),
]
let summary = ToolEventSummary(
targetApp: appName,
windowTitle: windowInfo.title,
actionDescription: "Window Focus",
notes: nil)
return ToolResponse(
content: [
.text(self.successMessage(
action: "Focused window '\(windowInfo.title)'",
duration: executionTime)),
],
meta: .object([
"window_title": .string(windowInfo.title),
"window_id": .double(Double(windowInfo.windowID)),
"execution_time": .double(executionTime),
]))
content: [.text(message)],
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
}
// MARK: - Helper Methods

View File

@ -0,0 +1,214 @@
import Foundation
import MCP
public struct ToolEventSummary: Codable, Sendable {
public struct Coordinates: Codable, Sendable {
public var x: Double?
public var y: Double?
public init(x: Double? = nil, y: Double? = nil) {
self.x = x
self.y = y
}
}
public var targetApp: String?
public var windowTitle: String?
public var elementRole: String?
public var elementLabel: String?
public var elementValue: String?
public var actionDescription: String?
public var coordinates: Coordinates?
public var pointerProfile: String?
public var pointerDistance: Double?
public var pointerDirection: String?
public var pointerDurationMs: Double?
public var scrollDirection: String?
public var scrollAmount: Double?
public var command: String?
public var workingDirectory: String?
public var waitDurationMs: Double?
public var waitReason: String?
public var captureApp: String?
public var captureWindow: String?
public var notes: String?
public init(
targetApp: String? = nil,
windowTitle: String? = nil,
elementRole: String? = nil,
elementLabel: String? = nil,
elementValue: String? = nil,
actionDescription: String? = nil,
coordinates: Coordinates? = nil,
pointerProfile: String? = nil,
pointerDistance: Double? = nil,
pointerDirection: String? = nil,
pointerDurationMs: Double? = nil,
scrollDirection: String? = nil,
scrollAmount: Double? = nil,
command: String? = nil,
workingDirectory: String? = nil,
waitDurationMs: Double? = nil,
waitReason: String? = nil,
captureApp: String? = nil,
captureWindow: String? = nil,
notes: String? = nil)
{
self.targetApp = targetApp
self.windowTitle = windowTitle
self.elementRole = elementRole
self.elementLabel = elementLabel
self.elementValue = elementValue
self.actionDescription = actionDescription
self.coordinates = coordinates
self.pointerProfile = pointerProfile
self.pointerDistance = pointerDistance
self.pointerDirection = pointerDirection
self.pointerDurationMs = pointerDurationMs
self.scrollDirection = scrollDirection
self.scrollAmount = scrollAmount
self.command = command
self.workingDirectory = workingDirectory
self.waitDurationMs = waitDurationMs
self.waitReason = waitReason
self.captureApp = captureApp
self.captureWindow = captureWindow
self.notes = notes
}
public func toMetaValue() -> Value {
var dict: [String: Value] = [:]
if let targetApp { dict["target_app"] = .string(targetApp) }
if let windowTitle { dict["window_title"] = .string(windowTitle) }
if let elementRole { dict["element_role"] = .string(elementRole) }
if let elementLabel { dict["element_label"] = .string(elementLabel) }
if let elementValue { dict["element_value"] = .string(elementValue) }
if let actionDescription { dict["action"] = .string(actionDescription) }
if let coordinates {
var coords: [String: Value] = [:]
if let x = coordinates.x { coords["x"] = .double(x) }
if let y = coordinates.y { coords["y"] = .double(y) }
if !coords.isEmpty { dict["coordinates"] = .object(coords) }
}
if let pointerProfile { dict["pointer_profile"] = .string(pointerProfile) }
if let pointerDistance { dict["pointer_distance"] = .double(pointerDistance) }
if let pointerDirection { dict["pointer_direction"] = .string(pointerDirection) }
if let pointerDurationMs { dict["pointer_duration_ms"] = .double(pointerDurationMs) }
if let scrollDirection { dict["scroll_direction"] = .string(scrollDirection) }
if let scrollAmount { dict["scroll_amount"] = .double(scrollAmount) }
if let command { dict["command"] = .string(command) }
if let workingDirectory { dict["working_directory"] = .string(workingDirectory) }
if let waitDurationMs { dict["wait_duration_ms"] = .double(waitDurationMs) }
if let waitReason { dict["wait_reason"] = .string(waitReason) }
if let captureApp { dict["capture_app"] = .string(captureApp) }
if let captureWindow { dict["capture_window"] = .string(captureWindow) }
if let notes { dict["notes"] = .string(notes) }
return .object(dict)
}
public static func merge(summary: ToolEventSummary, into existingMeta: Value?) -> Value {
var payload: [String: Value] = [:]
if case let .object(existing) = existingMeta {
payload = existing
}
payload["summary"] = summary.toMetaValue()
return .object(payload)
}
public init?(json: [String: Any]) {
guard !json.isEmpty else { return nil }
self.targetApp = json["target_app"] as? String
self.windowTitle = json["window_title"] as? String
self.elementRole = json["element_role"] as? String
self.elementLabel = json["element_label"] as? String
self.elementValue = json["element_value"] as? String
self.actionDescription = json["action"] as? String
if let coords = json["coordinates"] as? [String: Any] {
let x = coords["x"] as? Double
let y = coords["y"] as? Double
if x != nil || y != nil {
self.coordinates = Coordinates(x: x, y: y)
}
}
self.pointerProfile = json["pointer_profile"] as? String
self.pointerDistance = json["pointer_distance"] as? Double
self.pointerDirection = json["pointer_direction"] as? String
self.pointerDurationMs = json["pointer_duration_ms"] as? Double
self.scrollDirection = json["scroll_direction"] as? String
self.scrollAmount = json["scroll_amount"] as? Double
self.command = json["command"] as? String
self.workingDirectory = json["working_directory"] as? String
self.waitDurationMs = json["wait_duration_ms"] as? Double
self.waitReason = json["wait_reason"] as? String
self.captureApp = json["capture_app"] as? String
self.captureWindow = json["capture_window"] as? String
self.notes = json["notes"] as? String
}
public static func from(resultJSON: [String: Any]) -> ToolEventSummary? {
guard
let meta = resultJSON["meta"] as? [String: Any],
let summaryJSON = meta["summary"] as? [String: Any]
else {
return nil
}
return ToolEventSummary(json: summaryJSON)
}
public func shortDescription(toolName: String) -> String? {
if let command {
if let cwd = workingDirectory {
return "Run `\(command)` in \(cwd)"
}
return "Run `\(command)`"
}
if let captureApp {
if let captureWindow {
return "Captured \(captureApp) · \(captureWindow)"
}
return "Captured \(captureApp)"
}
if let elementLabel {
var segments: [String] = []
if let targetApp { segments.append(targetApp) }
segments.append(elementLabel)
if let elementRole {
segments.append("(\(elementRole))")
}
return segments.joined(separator: " · ")
}
if let targetApp, let actionDescription {
return "\(actionDescription) \(targetApp)"
}
if let targetApp {
return targetApp
}
if let notes {
return notes
}
if let waitDurationMs {
let seconds = waitDurationMs / 1000.0
if let reason = waitReason {
return String(format: "Wait %.1fs (%@)", seconds, reason)
}
return String(format: "Wait %.1fs", seconds)
}
if let scrollDirection, let scrollAmount {
return String(format: "Scrolled %@ %.0f px", scrollDirection, scrollAmount)
}
if let pointerDirection, let pointerDistance {
return String(format: "Pointer %@ %.0f px", pointerDirection, pointerDistance)
}
return actionDescription
}
}

View File

@ -0,0 +1,43 @@
import PeekabooAgentRuntime
import PeekabooCore
import Testing
@Suite("Tool event summary formatting")
struct ToolEventSummaryTests {
@Test("Shell commands render with working directory")
func shellSummaryUsesWorkingDirectory() {
let summary = ToolEventSummary(
command: "ls -la",
workingDirectory: "/tmp")
#expect(summary.shortDescription(toolName: "shell") == "Run `ls -la` in /tmp")
}
@Test("Click actions include target app and role")
func clickSummaryShowsElement() {
let summary = ToolEventSummary(
targetApp: "Google Chrome",
elementRole: "Button",
elementLabel: "Sign In with Email")
#expect(summary.shortDescription(toolName: "click") == "Google Chrome · Sign In with Email (Button)")
}
@Test("Sleep summaries use wait duration and reason")
func sleepSummaryIncludesDuration() {
let summary = ToolEventSummary(
waitDurationMs: 2100,
waitReason: "waiting for UI state")
#expect(summary.shortDescription(toolName: "sleep") == "Wait 2.1s (waiting for UI state)")
}
@Test("Screen captures include app and window")
func seeSummaryDescribesCaptureContext() {
let summary = ToolEventSummary(
captureApp: "Google Chrome",
captureWindow: "Grindr Dashboard")
#expect(summary.shortDescription(toolName: "see") == "Captured Google Chrome · Grindr Dashboard")
}
}

View File

@ -0,0 +1,46 @@
import MCP
import PeekabooAgentRuntime
import PeekabooAutomation
import TachikomaMCP
import Testing
@Suite("Tool summary emission")
struct ToolSummaryEmissionTests {
@Test("Shell tool attaches command metadata")
func shellToolEmitsSummary() async throws {
let tool = ShellTool()
let response = try await tool.execute(arguments: ToolArguments(raw: ["command": "echo summary-test"]))
guard let summary = extractSummary(from: response.meta) else {
Issue.record("ShellTool response missing summary metadata")
return
}
#expect(summary.command == "echo summary-test")
#expect(summary.shortDescription(toolName: tool.name) == "Run `echo summary-test`")
}
@Test("Sleep tool stores wait duration")
func sleepToolEmitsSummary() async throws {
let tool = SleepTool()
let response = try await tool.execute(arguments: ToolArguments(raw: ["duration": 5]))
guard let summary = extractSummary(from: response.meta) else {
Issue.record("SleepTool response missing summary metadata")
return
}
#expect(summary.actionDescription == "Sleep")
#expect((summary.waitDurationMs ?? 0) >= 0)
}
}
private func extractSummary(from meta: Value?) -> ToolEventSummary? {
guard case let .object(metaDict) = meta,
let summaryValue = metaDict["summary"],
let json = summaryValue.toJSON() as? [String: Any]
else {
return nil
}
return ToolEventSummary(json: json)
}

@ -1 +1 @@
Subproject commit 51405dea5811953055ded57cee085caf788ba2a6
Subproject commit 326892a32e93e03df7fb5fefe53c61ebcb6ef4ad

View File

@ -0,0 +1,66 @@
---
summary: 'Refactor tool results so agents can show rich, human-readable summaries'
read_when:
- 'planning tool/agent runtime work'
- 'touching ToolResponse or formatter plumbing'
---
# Tool Result Metadata Refactor Plan
## Current Status
- `ToolEventSummary` struct + helpers live in `ToolEventSummary.swift`; pointer direction math handled in `PointerDirection.swift`.
- Tachikoma MCP adapter now preserves `meta` so summaries flow from tools to CLI/Mac renderers.
- Core UI/system tools (click/drag/move/swipe/scroll/see/shell/sleep/type/hotkey/app/menu/dialog/dock/list/window) populate summaries with human-readable labels instead of internal IDs.
- Permission/Image/Analyze/Space tool paths updated to emit contextual summaries (app name, capture source, question text, etc.).
- MCPAgentTool now emits summaries for session listings and agent runs, completing MCP tool coverage.
- CLI `AgentOutputDelegate` consumes `ToolEventSummary` data, strips legacy `[ok]` glyphs, and falls back to sanitized formatter output only when necessary.
- Mac tool formatter bridge + registry now prioritize `ToolEventSummary` data so timeline rows show the same human-readable summaries as the CLI.
- Added Swift Testing coverage (`ToolEventSummaryTests`, `ToolSummaryEmissionTests`) so shell/sleep summaries and short-description helpers are locked in.
- Streaming pipeline now injects a top-level `summary_text` field into tool completion payloads, giving JSON consumers the same human-readable copy without parsing nested meta blobs.
- Agent output formatters still contain legacy fallbacks; `[ok]` badges remain until we finish Phase 3.
## Next Steps
- Capture CLI/Mac golden transcripts once formatter cleanup lands in CI so we can detect regressions automatically.
## Goals
- Preserve structured context (app name, element label, pointer geometry, shell command, etc.) for every tool call.
- Render concise, human-readable summaries in the CLI/Mac agent views without exposing internal IDs or glyph tokens.
- Eliminate the success `[ok]` badge for normal completions; only show badges/flags on warnings or errors.
- Keep completion tools (`task_completed`, `need_more_information`, `need_info`) flowing through their existing "state" UI without extra summary lines.
## Constraints & Challenges
- `ToolResponse.meta` is currently dropped when converting to `AnyAgentToolValue`; formatters only see whatever plain text the tool returned.
- MCP tools live in `PeekabooAgentRuntime` while the agent runtime/CLI sits elsewhere, so the metadata schema must be shared via Tachikoma types.
- We must not break existing MCP integrations; the new summary data needs a backwards-compatible wire format.
## Phase 1 Plumbing
1. Introduce a typed `ToolEventSummary` struct (in Tachikoma) with optional fields for app/window, element, coordinates, scroll/move vectors, command strings, durations, etc.
2. Extend `ToolResponse` to carry an optional `summary: ToolEventSummary` (or replace `meta` entirely) and ensure the MCP adapter serializes/deserializes it.
3. Update the agent streaming pipeline (`PeekabooAgentService+Streaming`, `AnyAgentToolValue`, CLI event payloads) so the summary is delivered alongside the existing text result.
## Phase 2 Tool Implementations
1. Audit every MCP tool (click/type/scroll/see/shell/sleep/window/app/menu/dialog/drag/move/swipe/list/etc.).
2. For each tool, populate `ToolEventSummary` using the context it already has:
- UI tools: `targetApp`, `windowTitle`, `elementLabel`, `elementRole`, `humanizedPosition`.
- Pointer tools: `direction`, `distancePx`, `profile`, `durationMs`.
- Vision tools: `captureApp`, `windowTitle`, `sessionId` (for internal tracing only if we still need it), element counts.
- System tools: `shellCommand`, `workingDirectory`, `sleepMs`, `reason`.
3. Remove raw element IDs (`elem_153`) and replace them with user-facing labels.
## Phase 3 Formatting & UX
1. Update `ToolFormatter` (and specialized subclasses) to prefer the new summary fields when generating compact/result summaries.
2. Teach `AgentOutputDelegate` to:
- Drop the green `[ok]` marker on success.
- Render geometry in natural language (e.g., `1280×720 anchored top-left on Display 1`).
- Continue showing badges only for warnings/errors.
3. Verify the Mac UI timeline consumes the same summary strings.
## Phase 4 Verification
- Add unit tests for representative tools ensuring they emit the expected `ToolEventSummary`.
- Record CLI golden outputs (before/after) to confirm we now print sentences like `Click Chrome · Button "Sign In with Email"`.
- Dogfood on Grindr/Wingman workflow to ensure the motivation scenarios look correct end-to-end.
## Open Questions
- Should we completely remove `meta`, or keep it for third-party MCP clients that expect arbitrary dictionaries?
- Do we want localized summaries, or is English-only acceptable for now?
- How do we expose the same summaries via API (e.g., JSON streaming) for downstream automation/telemetry?

View File

@ -33,7 +33,8 @@
"poltergeist:rest": "./scripts/poltergeist-wrapper.sh rest",
"poltergeist:status": "./scripts/poltergeist-wrapper.sh status",
"poltergeist:panel": "./scripts/poltergeist-wrapper.sh status panel",
"poltergeist:logs": "./scripts/poltergeist-wrapper.sh logs"
"poltergeist:logs": "./scripts/poltergeist-wrapper.sh logs",
"oracle": "oracle"
},
"repository": {
"type": "git",
@ -44,5 +45,8 @@
"bugs": {
"url": "https://github.com/steipete/peekaboo/issues"
},
"homepage": "https://github.com/steipete/peekaboo#readme"
"homepage": "https://github.com/steipete/peekaboo#readme",
"devDependencies": {
"@steipete/oracle": "file:../oracle"
}
}

9
pnpm-lock.yaml generated
View File

@ -4,6 +4,13 @@ settings:
autoInstallPeers: true
excludeLinksFromLockfile: false
overrides:
'@steipete/oracle': link:../oracle
importers:
.: {}
.:
devDependencies:
'@steipete/oracle':
specifier: link:../oracle
version: link:../oracle

2
pnpm-workspace.yaml Normal file
View File

@ -0,0 +1,2 @@
overrides:
'@steipete/oracle': link:../oracle

View File

@ -4,6 +4,21 @@
# This script builds the CLI independently of the Node.js MCP server
set -e
set -o pipefail
if command -v xcbeautify >/dev/null 2>&1; then
USE_XCBEAUTIFY=1
else
USE_XCBEAUTIFY=0
fi
pipe_build_output() {
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
xcbeautify "$@"
else
cat
fi
}
# Colors for output
GREEN='\033[0;32m'
@ -17,7 +32,7 @@ cd "$(dirname "$0")/../Apps/CLI"
# Build for release with optimizations
echo -e "${BLUE}Building release version...${NC}"
swift build -c release
swift build -c release 2>&1 | pipe_build_output
# Get the build output path
BUILD_PATH=".build/release/peekaboo"
@ -48,4 +63,4 @@ if [ -f "$BUILD_PATH" ]; then
else
echo -e "${RED}❌ Build failed!${NC}"
exit 1
fi
fi

View File

@ -1,5 +1,6 @@
#!/bin/bash
# Build script for macOS Peekaboo app using xcodebuild
set -o pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
@ -11,6 +12,20 @@ YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
if command -v xcbeautify >/dev/null 2>&1; then
USE_XCBEAUTIFY=1
else
USE_XCBEAUTIFY=0
fi
pipe_build_output() {
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
xcbeautify "$@"
else
cat
fi
}
# Build configuration
WORKSPACE="$PROJECT_ROOT/Apps/Peekaboo.xcworkspace"
SCHEME="Peekaboo"
@ -37,9 +52,10 @@ xcodebuild \
CODE_SIGN_IDENTITY="" \
CODE_SIGNING_REQUIRED=NO \
CODE_SIGN_ENTITLEMENTS="" \
CODE_SIGNING_ALLOWED=NO
CODE_SIGNING_ALLOWED=NO \
2>&1 | pipe_build_output
BUILD_EXIT_CODE=$?
BUILD_EXIT_CODE=${PIPESTATUS[0]}
if [ $BUILD_EXIT_CODE -eq 0 ]; then
echo -e "${GREEN}✅ Build successful${NC}"
@ -52,4 +68,4 @@ if [ $BUILD_EXIT_CODE -eq 0 ]; then
else
echo -e "${RED}❌ Build failed with exit code $BUILD_EXIT_CODE${NC}" >&2
exit $BUILD_EXIT_CODE
fi
fi

View File

@ -1,5 +1,20 @@
#!/bin/bash
set -e
set -o pipefail
if command -v xcbeautify >/dev/null 2>&1; then
USE_XCBEAUTIFY=1
else
USE_XCBEAUTIFY=0
fi
pipe_build_output() {
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
xcbeautify "$@"
else
cat
fi
}
echo "Building Swift CLI..."
@ -7,7 +22,7 @@ echo "Building Swift CLI..."
cd "$(dirname "$0")/../Apps/CLI"
# Build the Swift CLI in release mode
swift build --configuration release
swift build --configuration release 2>&1 | pipe_build_output
# Copy the binary to the root directory
cp .build/release/peekaboo ../peekaboo
@ -15,4 +30,4 @@ cp .build/release/peekaboo ../peekaboo
# Make it executable
chmod +x ../peekaboo
echo "Swift CLI built successfully and copied to ./peekaboo"
echo "Swift CLI built successfully and copied to ./peekaboo"

View File

@ -1,11 +1,26 @@
#!/bin/bash
set -e # Exit immediately if a command exits with a non-zero status.
set -o pipefail
PROJECT_ROOT=$(cd "$(dirname "$0")/.." && pwd)
SWIFT_PROJECT_PATH="$PROJECT_ROOT/Apps/CLI"
FINAL_BINARY_NAME="peekaboo"
FINAL_BINARY_PATH="$PROJECT_ROOT/$FINAL_BINARY_NAME"
if command -v xcbeautify >/dev/null 2>&1; then
USE_XCBEAUTIFY=1
else
USE_XCBEAUTIFY=0
fi
pipe_build_output() {
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
xcbeautify "$@"
else
cat
fi
}
# Swift compiler flags for size optimization
# -Osize: Optimize for binary size.
# -wmo: Whole Module Optimization, allows more aggressive optimizations.
@ -47,7 +62,10 @@ enum Version {
EOF
echo "🏗️ Building for arm64 (Apple Silicon) only..."
(cd "$SWIFT_PROJECT_PATH" && swift build --arch arm64 -c release $SWIFT_OPTIMIZATION_FLAGS)
(
cd "$SWIFT_PROJECT_PATH"
swift build --arch arm64 -c release $SWIFT_OPTIMIZATION_FLAGS 2>&1 | pipe_build_output
)
cp "$SWIFT_PROJECT_PATH/.build/arm64-apple-macosx/release/$FINAL_BINARY_NAME" "$FINAL_BINARY_PATH.tmp"
echo "✅ arm64 build complete"
@ -90,4 +108,4 @@ echo "🔍 Verifying final binary..."
lipo -info "$FINAL_BINARY_PATH"
ls -lh "$FINAL_BINARY_PATH"
echo "🎉 ARM64 binary '$FINAL_BINARY_PATH' created and optimized successfully!"
echo "🎉 ARM64 binary '$FINAL_BINARY_PATH' created and optimized successfully!"

View File

@ -1,9 +1,24 @@
#!/bin/bash
set -e
set -o pipefail
PROJECT_ROOT=$(cd "$(dirname "$0")/.." && pwd)
SWIFT_PROJECT_PATH="$PROJECT_ROOT/Apps/CLI"
if command -v xcbeautify >/dev/null 2>&1; then
USE_XCBEAUTIFY=1
else
USE_XCBEAUTIFY=0
fi
pipe_build_output() {
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
xcbeautify "$@"
else
cat
fi
}
# Parse arguments
CLEAN_BUILD=false
if [[ "$1" == "--clean" ]]; then
@ -70,7 +85,10 @@ else
echo "🏗️ Building for debug (incremental)..."
fi
(cd "$SWIFT_PROJECT_PATH" && swift build)
(
cd "$SWIFT_PROJECT_PATH"
swift build 2>&1 | pipe_build_output
)
echo "🔏 Code signing the debug binary..."
PROJECT_NAME="peekaboo"

View File

@ -1,5 +1,6 @@
#!/bin/bash
set -e # Exit immediately if a command exits with a non-zero status.
set -o pipefail
PROJECT_ROOT=$(cd "$(dirname "$0")/.." && pwd)
SWIFT_PROJECT_PATH="$PROJECT_ROOT/Apps/CLI"
@ -15,6 +16,20 @@ X86_64_BINARY_TEMP="$PROJECT_ROOT/${FINAL_BINARY_NAME}-x86_64"
# -Xlinker -dead_strip: Remove dead code at the linking stage.
SWIFT_OPTIMIZATION_FLAGS="-Xswiftc -Osize -Xswiftc -wmo -Xlinker -dead_strip"
if command -v xcbeautify >/dev/null 2>&1; then
USE_XCBEAUTIFY=1
else
USE_XCBEAUTIFY=0
fi
pipe_build_output() {
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
xcbeautify "$@"
else
cat
fi
}
echo "🧹 Cleaning previous build artifacts..."
(cd "$SWIFT_PROJECT_PATH" && swift package reset) || echo "'swift package reset' encountered an issue, attempting rm -rf..."
rm -rf "$SWIFT_PROJECT_PATH/.build"
@ -50,12 +65,18 @@ enum Version {
EOF
echo "🏗️ Building for arm64 (Apple Silicon)..."
(cd "$SWIFT_PROJECT_PATH" && swift build --arch arm64 -c release $SWIFT_OPTIMIZATION_FLAGS)
(
cd "$SWIFT_PROJECT_PATH"
swift build --arch arm64 -c release $SWIFT_OPTIMIZATION_FLAGS 2>&1 | pipe_build_output
)
cp "$SWIFT_PROJECT_PATH/.build/arm64-apple-macosx/release/$FINAL_BINARY_NAME" "$ARM64_BINARY_TEMP"
echo "✅ arm64 build complete: $ARM64_BINARY_TEMP"
echo "🏗️ Building for x86_64 (Intel)..."
(cd "$SWIFT_PROJECT_PATH" && swift build --arch x86_64 -c release $SWIFT_OPTIMIZATION_FLAGS)
(
cd "$SWIFT_PROJECT_PATH"
swift build --arch x86_64 -c release $SWIFT_OPTIMIZATION_FLAGS 2>&1 | pipe_build_output
)
cp "$SWIFT_PROJECT_PATH/.build/x86_64-apple-macosx/release/$FINAL_BINARY_NAME" "$X86_64_BINARY_TEMP"
echo "✅ x86_64 build complete: $X86_64_BINARY_TEMP"
@ -104,4 +125,4 @@ echo "🔍 Verifying final universal binary..."
lipo -info "$FINAL_BINARY_PATH"
ls -lh "$FINAL_BINARY_PATH"
echo "🎉 Universal binary '$FINAL_BINARY_PATH' created and optimized successfully!"
echo "🎉 Universal binary '$FINAL_BINARY_PATH' created and optimized successfully!"

View File

@ -6,6 +6,20 @@ LOG_PATH=${CLI_BUILD_LOG:-/tmp/cli-build.log}
EXIT_PATH=${CLI_BUILD_EXIT:-/tmp/cli-build.exit}
BUILD_PATH=${CLI_BUILD_DIR:-/tmp/peekaboo-cli-build}
if command -v xcbeautify >/dev/null 2>&1; then
USE_XCBEAUTIFY=1
else
USE_XCBEAUTIFY=0
fi
pipe_build_output() {
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
xcbeautify "$@"
else
cat
fi
}
write_exit_code() {
local status=${1:-$?}
mkdir -p "$(dirname "$EXIT_PATH")"
@ -19,7 +33,7 @@ rm -f "$LOG_PATH" "$EXIT_PATH"
cd "$ROOT_DIR"
set +e
swift build --package-path Apps/CLI --build-path "$BUILD_PATH" "$@" 2>&1 | tee "$LOG_PATH"
swift build --package-path Apps/CLI --build-path "$BUILD_PATH" "$@" 2>&1 | pipe_build_output | tee "$LOG_PATH"
BUILD_STATUS=${PIPESTATUS[0]}
set -e