feat(tools): add structured metadata to tool results
This commit is contained in:
parent
1ed44a9939
commit
dc6123adfa
@ -69,7 +69,7 @@
|
||||
"generated_by": "poltergeist",
|
||||
"project_type": "mixed",
|
||||
"performance_profile": "balanced",
|
||||
"generated_at": "2025-11-14T13:16:14.877Z",
|
||||
"generated_at": "2025-11-15T02:45:36.462Z",
|
||||
"total_exclusions": 53
|
||||
}
|
||||
}
|
||||
@ -172,10 +172,10 @@ var verbose: Bool { self.runtime?.configuration.verbose ?? self.runtimeOptions.v
|
||||
private final class EscapeKeyMonitor {
|
||||
private var source: (any DispatchSourceRead)?
|
||||
private var originalTermios = termios()
|
||||
private let handler: @Sendable () -> Void
|
||||
private let handler: @Sendable () async -> Void
|
||||
private let queue = DispatchQueue(label: "peekaboo.escape.monitor")
|
||||
|
||||
init(handler: @escaping @Sendable () -> Void) {
|
||||
init(handler: @escaping @Sendable () async -> Void) {
|
||||
self.handler = handler
|
||||
}
|
||||
|
||||
@ -195,8 +195,8 @@ private final class EscapeKeyMonitor {
|
||||
let count = read(STDIN_FILENO, &buffer, buffer.count)
|
||||
guard count > 0 else { return }
|
||||
if buffer[..<count].contains(0x1B) {
|
||||
Task { @MainActor in
|
||||
self.handler()
|
||||
Task {
|
||||
await self.handler()
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -983,7 +983,7 @@ extension AgentCommand {
|
||||
let cancelMonitor = EscapeKeyMonitor { [runTask] in
|
||||
if !runTask.isCancelled {
|
||||
runTask.cancel()
|
||||
Task { @MainActor in
|
||||
await MainActor.run {
|
||||
print("\n\(TerminalColor.yellow)Esc pressed – cancelling current run...\(TerminalColor.reset)")
|
||||
}
|
||||
}
|
||||
|
||||
@ -134,6 +134,7 @@ extension AgentOutputDelegate {
|
||||
}
|
||||
|
||||
let (formatter, toolType) = self.toolFormatter(for: name)
|
||||
let summary = ToolEventSummary.from(resultJSON: json)
|
||||
|
||||
if let toolType, [ToolType.taskCompleted, .needMoreInformation, .needInfo].contains(toolType) {
|
||||
self.handleCommunicationToolComplete(name: name, toolType: toolType)
|
||||
@ -143,7 +144,11 @@ extension AgentOutputDelegate {
|
||||
let success = (json["success"] as? Bool) ?? true
|
||||
|
||||
if success {
|
||||
let resultSummary = self.resultSummary(for: name, json: json, formatter: formatter)
|
||||
let resultSummary = self.resultSummary(
|
||||
for: name,
|
||||
json: json,
|
||||
formatter: formatter,
|
||||
summary: summary)
|
||||
self.handleSuccess(
|
||||
resultSummary: resultSummary,
|
||||
durationString: durationString,
|
||||
@ -326,18 +331,8 @@ extension AgentOutputDelegate {
|
||||
}
|
||||
|
||||
private func successStatusLine(resultSummary: String, durationString: String) -> String {
|
||||
let statusPrefix = [
|
||||
" ",
|
||||
TerminalColor.bgGreen,
|
||||
TerminalColor.bold,
|
||||
" ",
|
||||
AgentDisplayTokens.Status.success,
|
||||
" ",
|
||||
TerminalColor.reset
|
||||
].joined()
|
||||
|
||||
guard !resultSummary.isEmpty else {
|
||||
return "\(statusPrefix)\(durationString)"
|
||||
if resultSummary.isEmpty {
|
||||
return " \(durationString)"
|
||||
}
|
||||
|
||||
let summarySegment = [
|
||||
@ -347,7 +342,7 @@ extension AgentOutputDelegate {
|
||||
TerminalColor.reset
|
||||
].joined()
|
||||
|
||||
return "\(statusPrefix)\(summarySegment)\(durationString)"
|
||||
return "\(summarySegment)\(durationString)"
|
||||
}
|
||||
|
||||
private func failureStatusLine(message: String, durationString: String) -> String {
|
||||
@ -428,11 +423,20 @@ extension AgentOutputDelegate {
|
||||
return (UnknownToolFormatter(toolName: name), nil)
|
||||
}
|
||||
|
||||
private func resultSummary(for name: String, json: [String: Any], formatter: any ToolFormatter) -> String {
|
||||
var summary = formatter.formatResultSummary(result: json)
|
||||
private func resultSummary(
|
||||
for name: String,
|
||||
json: [String: Any],
|
||||
formatter: any ToolFormatter,
|
||||
summary: ToolEventSummary?
|
||||
) -> String {
|
||||
if let summaryText = summary?.shortDescription(toolName: name) {
|
||||
return summaryText
|
||||
}
|
||||
|
||||
var fallback = formatter.formatResultSummary(result: json)
|
||||
|
||||
guard name == "app" else {
|
||||
return summary
|
||||
return self.cleanToolPrefix(fallback)
|
||||
}
|
||||
|
||||
if let meta = json["meta"] as? [String: Any],
|
||||
@ -442,21 +446,21 @@ extension AgentOutputDelegate {
|
||||
let text = firstContent["text"] as? String {
|
||||
switch text {
|
||||
case let value where value.contains("Launched"):
|
||||
summary = "→ \(appName) launched"
|
||||
fallback = "→ \(appName) launched"
|
||||
case let value where value.contains("Quit"):
|
||||
summary = "→ \(appName) quit"
|
||||
fallback = "→ \(appName) quit"
|
||||
case let value where value.contains("Focused") || value.contains("Switched"):
|
||||
summary = "→ \(appName) focused"
|
||||
fallback = "→ \(appName) focused"
|
||||
case let value where value.contains("Hidden"):
|
||||
summary = "→ \(appName) hidden"
|
||||
fallback = "→ \(appName) hidden"
|
||||
case let value where value.contains("Unhidden"):
|
||||
summary = "→ \(appName) shown"
|
||||
fallback = "→ \(appName) shown"
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return summary
|
||||
return self.cleanToolPrefix(fallback)
|
||||
}
|
||||
|
||||
private func handleSuccess(
|
||||
@ -467,16 +471,11 @@ extension AgentOutputDelegate {
|
||||
) {
|
||||
switch self.outputMode {
|
||||
case .minimal:
|
||||
if !resultSummary.isEmpty {
|
||||
print(" OK \(resultSummary)\(durationString)")
|
||||
} else {
|
||||
print(" OK\(durationString)")
|
||||
}
|
||||
let prefix = resultSummary.isEmpty ? "" : " \(resultSummary)"
|
||||
print("\(prefix)\(durationString)")
|
||||
|
||||
case .verbose:
|
||||
print(
|
||||
" \(TerminalColor.green)\(AgentDisplayTokens.Status.success)\(TerminalColor.reset)\(durationString)"
|
||||
)
|
||||
print(" \(durationString)")
|
||||
if let formatted = formatJSON(result) {
|
||||
print("\(TerminalColor.gray)Result:\(TerminalColor.reset)")
|
||||
print(formatted)
|
||||
@ -530,8 +529,9 @@ extension AgentOutputDelegate {
|
||||
guard self.outputMode != .minimal && self.outputMode != .quiet else { return }
|
||||
guard let detail = self.primaryResultMessage(from: json) else { return }
|
||||
let snippet = detail.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard !snippet.isEmpty else { return }
|
||||
print("\n \(TerminalColor.gray)\(snippet.prefix(240))\(TerminalColor.reset)")
|
||||
let sanitized = self.cleanToolPrefix(snippet)
|
||||
guard !sanitized.isEmpty else { return }
|
||||
print("\n \(TerminalColor.gray)\(sanitized.prefix(240))\(TerminalColor.reset)")
|
||||
}
|
||||
|
||||
private func primaryResultMessage(from json: [String: Any]) -> String? {
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
// This file is auto-generated by the build script. Do not edit manually.
|
||||
enum Version {
|
||||
static let current = "Peekaboo 3.0.0"
|
||||
static let gitCommit = "7c99d16e-dirty"
|
||||
static let gitCommitDate = "2025-11-14 14:34:54 +0000"
|
||||
static let gitCommit = "b4c088fe-dirty"
|
||||
static let gitCommitDate = "2025-11-14 19:52:55 +0000"
|
||||
static let gitBranch = "main"
|
||||
static let buildDate = "2025-11-14T14:40:52+00:00"
|
||||
static let buildDate = "2025-11-14T20:57:47+00:00"
|
||||
|
||||
static var fullVersion: String {
|
||||
return "\(current) (\(gitBranch)/\(gitCommit), built: \(buildDate))"
|
||||
|
||||
@ -30,14 +30,14 @@ class ToolFormatterBridge {
|
||||
// Format completed tool call
|
||||
let resultDict = self.parseArguments(result)
|
||||
let success = (resultDict["success"] as? Bool) ?? true
|
||||
let summaryText = ToolEventSummary.from(resultJSON: resultDict)?
|
||||
.shortDescription(toolName: name) ?? formatter.formatResultSummary(result: resultDict)
|
||||
|
||||
if success {
|
||||
let summary = formatter.formatResultSummary(result: resultDict)
|
||||
if !summary.isEmpty {
|
||||
return "\(AgentDisplayTokens.Status.success) \(toolType.displayName): \(summary)"
|
||||
} else {
|
||||
return "\(AgentDisplayTokens.Status.success) \(toolType.displayName) completed"
|
||||
if !summaryText.isEmpty {
|
||||
return "\(AgentDisplayTokens.Status.success) \(toolType.displayName): \(summaryText)"
|
||||
}
|
||||
return "\(AgentDisplayTokens.Status.success) \(toolType.displayName) completed"
|
||||
} else {
|
||||
let error = (resultDict["error"] as? String) ?? "Failed"
|
||||
return "\(AgentDisplayTokens.Status.failure) \(toolType.displayName): \(error)"
|
||||
@ -79,6 +79,11 @@ class ToolFormatterBridge {
|
||||
|
||||
let formatter = ToolFormatterRegistry.shared.formatter(for: toolType)
|
||||
let resultDict = self.parseArguments(result)
|
||||
if let summary = ToolEventSummary.from(resultJSON: resultDict)?.shortDescription(toolName: name),
|
||||
!summary.isEmpty
|
||||
{
|
||||
return summary
|
||||
}
|
||||
|
||||
let summary = formatter.formatResultSummary(result: resultDict)
|
||||
if !summary.isEmpty {
|
||||
@ -135,8 +140,12 @@ class ToolFormatterBridge {
|
||||
if let result {
|
||||
let resultDict = self.parseArguments(result)
|
||||
let success = (resultDict["success"] as? Bool) ?? true
|
||||
let summaryText = ToolEventSummary.from(resultJSON: resultDict)?.shortDescription(toolName: name)
|
||||
|
||||
if success {
|
||||
if let summaryText, !summaryText.isEmpty {
|
||||
return "\(AgentDisplayTokens.Status.success) \(displayName): \(summaryText)"
|
||||
}
|
||||
return "\(AgentDisplayTokens.Status.success) \(displayName) completed"
|
||||
} else {
|
||||
let error = (resultDict["error"] as? String) ?? "Failed"
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import PeekabooCore
|
||||
|
||||
/// Registry that manages all tool formatters for the Mac app
|
||||
@MainActor
|
||||
@ -70,6 +71,12 @@ final class MacToolFormatterRegistry {
|
||||
return nil
|
||||
}
|
||||
|
||||
if let summary = ToolEventSummary.from(resultJSON: json)?.shortDescription(toolName: toolName),
|
||||
!summary.isEmpty
|
||||
{
|
||||
return summary
|
||||
}
|
||||
|
||||
// Try to get formatter
|
||||
if let formatter = formatter(for: toolName) {
|
||||
return formatter.formatResult(toolName: toolName, result: json)
|
||||
|
||||
@ -320,7 +320,10 @@ struct AnimationToggleRow: View {
|
||||
@MainActor
|
||||
private func previewTyping() async {
|
||||
let sampleKeys = ["H", "e", "l", "l", "o"]
|
||||
_ = await self.visualizerCoordinator.showTypingFeedback(keys: sampleKeys, duration: 2.0)
|
||||
_ = await self.visualizerCoordinator.showTypingFeedback(
|
||||
keys: sampleKeys,
|
||||
duration: 2.0,
|
||||
cadence: .human(wordsPerMinute: 60))
|
||||
}
|
||||
|
||||
@MainActor
|
||||
|
||||
@ -70,8 +70,10 @@ struct NewSessionButton: View {
|
||||
Label("New Session", systemImage: "plus")
|
||||
.font(.subheadline.weight(.semibold))
|
||||
.frame(maxWidth: .infinity)
|
||||
.foregroundStyle(.white.opacity(0.92))
|
||||
.menuActionCapsule(fillOpacity: 0.16)
|
||||
})
|
||||
.buttonStyle(MenuActionButtonStyle())
|
||||
.buttonStyle(.modern)
|
||||
}
|
||||
}
|
||||
|
||||
@ -84,8 +86,10 @@ struct ExpandButton: View {
|
||||
Label("Expand", systemImage: "arrow.up.left.and.arrow.down.right")
|
||||
.font(.subheadline.weight(.semibold))
|
||||
.frame(maxWidth: .infinity)
|
||||
.foregroundStyle(.white.opacity(0.92))
|
||||
.menuActionCapsule(fillOpacity: 0.16)
|
||||
})
|
||||
.buttonStyle(MenuActionButtonStyle())
|
||||
.buttonStyle(.modern)
|
||||
}
|
||||
}
|
||||
|
||||
@ -99,41 +103,24 @@ struct QuickActionsView: View {
|
||||
Button(action: self.onOpenMainWindow, label: {
|
||||
Label("Open Main Window", systemImage: "rectangle.stack")
|
||||
.frame(maxWidth: .infinity)
|
||||
.foregroundStyle(.white.opacity(0.92))
|
||||
.menuActionCapsule(fillOpacity: 0.16)
|
||||
})
|
||||
.buttonStyle(MenuActionButtonStyle())
|
||||
.buttonStyle(.modern)
|
||||
|
||||
Button(action: self.onCreateNewSession, label: {
|
||||
Label("New Session", systemImage: "plus.circle")
|
||||
.frame(maxWidth: .infinity)
|
||||
.foregroundStyle(.white.opacity(0.92))
|
||||
.menuActionCapsule(fillOpacity: 0.16)
|
||||
})
|
||||
.buttonStyle(MenuActionButtonStyle())
|
||||
.buttonStyle(.modern)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Shared Styling
|
||||
|
||||
struct MenuActionButtonStyle: ButtonStyle {
|
||||
typealias Body = AnyView
|
||||
|
||||
func makeBody(configuration: Configuration) -> AnyView {
|
||||
AnyView(
|
||||
configuration.label
|
||||
.foregroundStyle(.white.opacity(0.92))
|
||||
.padding(.vertical, 10)
|
||||
.padding(.horizontal, 12)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 16, style: .continuous)
|
||||
.fill(Color.white.opacity(configuration.isPressed ? 0.24 : 0.16))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 16, style: .continuous)
|
||||
.stroke(Color.white.opacity(0.2))))
|
||||
.shadow(color: Color.black.opacity(configuration.isPressed ? 0.1 : 0.18), radius: 12, y: 8)
|
||||
.scaleEffect(configuration.isPressed ? 0.98 : 1)
|
||||
.animation(.easeOut(duration: 0.12), value: configuration.isPressed))
|
||||
}
|
||||
}
|
||||
|
||||
extension View {
|
||||
fileprivate func menuActionCapsule(fillOpacity: Double) -> some View {
|
||||
self
|
||||
@ -145,5 +132,6 @@ extension View {
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 16, style: .continuous)
|
||||
.stroke(Color.white.opacity(0.15))))
|
||||
.shadow(color: Color.black.opacity(0.18), radius: 12, y: 8)
|
||||
}
|
||||
}
|
||||
|
||||
@ -191,7 +191,10 @@ struct VisualizerTestView: View {
|
||||
|
||||
func testTypeAnimation() async {
|
||||
let keys = ["H", "e", "l", "l", "o", "Space", "W", "o", "r", "l", "d"]
|
||||
_ = await self.coordinator.showTypingFeedback(keys: keys, duration: 3.0)
|
||||
_ = await self.coordinator.showTypingFeedback(
|
||||
keys: keys,
|
||||
duration: 3.0,
|
||||
cadence: .human(wordsPerMinute: 55))
|
||||
}
|
||||
|
||||
func testScrollAnimation() async {
|
||||
|
||||
@ -5,8 +5,10 @@
|
||||
|
||||
@preconcurrency import Foundation
|
||||
import os
|
||||
import PeekabooAutomation
|
||||
import PeekabooCore
|
||||
import PeekabooFoundation
|
||||
import PeekabooProtocols
|
||||
|
||||
#if VISUALIZER_VERBOSE_LOGS
|
||||
@inline(__always)
|
||||
@ -129,7 +131,7 @@ final class VisualizerEventReceiver {
|
||||
case let .annotatedScreenshot(imageData, elements, windowBounds, duration):
|
||||
await self.coordinator.showAnnotatedScreenshot(
|
||||
imageData: imageData,
|
||||
elements: elements,
|
||||
elements: self.convertDetectedElements(elements),
|
||||
windowBounds: windowBounds,
|
||||
duration: duration)
|
||||
}
|
||||
@ -142,4 +144,18 @@ final class VisualizerEventReceiver {
|
||||
private static func parseEventID(from descriptor: String) -> UUID? {
|
||||
descriptor.split(separator: "|", maxSplits: 1).first.flatMap { UUID(uuidString: String($0)) }
|
||||
}
|
||||
|
||||
private func convertDetectedElements(
|
||||
_ elements: [PeekabooProtocols.DetectedElement]) -> [PeekabooAutomation.DetectedElement]
|
||||
{
|
||||
elements.map { element in
|
||||
PeekabooAutomation.DetectedElement(
|
||||
id: element.id,
|
||||
type: element.type,
|
||||
label: element.label,
|
||||
value: element.value,
|
||||
bounds: element.bounds,
|
||||
isEnabled: element.isEnabled)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -301,7 +301,7 @@ extension PeekabooAgentService {
|
||||
let toolResult = AgentToolResult.success(toolCallId: toolCall.id, result: result)
|
||||
await self.sendToolCompletionEvent(
|
||||
name: toolCall.name,
|
||||
payload: self.toolResultPayload(from: result),
|
||||
payload: self.toolResultPayload(from: result, toolName: toolCall.name),
|
||||
eventHandler: context.eventHandler)
|
||||
currentMessages.append(ModelMessage(role: .tool, content: [.toolResult(toolResult)]))
|
||||
return toolResult
|
||||
@ -337,10 +337,20 @@ extension PeekabooAgentService {
|
||||
await eventHandler.send(.toolCallCompleted(name: name, result: payload))
|
||||
}
|
||||
|
||||
private func toolResultPayload(from result: AnyAgentToolValue) -> String {
|
||||
private func toolResultPayload(from result: AnyAgentToolValue, toolName: String) -> String {
|
||||
do {
|
||||
let jsonObject = try result.toJSON()
|
||||
let wrapped: Any = jsonObject is [String: Any] ? jsonObject : ["result": jsonObject]
|
||||
var wrapped: [String: Any]
|
||||
if let dict = jsonObject as? [String: Any] {
|
||||
wrapped = dict
|
||||
} else {
|
||||
wrapped = ["result": jsonObject]
|
||||
}
|
||||
|
||||
if let summaryText = self.summaryText(from: wrapped, toolName: toolName) {
|
||||
wrapped["summary_text"] = summaryText
|
||||
}
|
||||
|
||||
let data = try JSONSerialization.data(withJSONObject: wrapped, options: [])
|
||||
return String(data: data, encoding: .utf8) ?? "{}"
|
||||
} catch {
|
||||
@ -350,6 +360,17 @@ extension PeekabooAgentService {
|
||||
}
|
||||
}
|
||||
|
||||
private func summaryText(from payload: [String: Any], toolName: String) -> String? {
|
||||
guard
|
||||
let meta = payload["meta"] as? [String: Any],
|
||||
let summaryJSON = meta["summary"] as? [String: Any],
|
||||
let summary = ToolEventSummary(json: summaryJSON)
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
return summary.shortDescription(toolName: toolName)
|
||||
}
|
||||
|
||||
private func toolErrorPayload(from error: any Error) -> String {
|
||||
let errorDict = ["error": error.localizedDescription]
|
||||
guard let data = try? JSONSerialization.data(withJSONObject: errorDict, options: []),
|
||||
|
||||
@ -111,11 +111,23 @@ public struct AnalyzeTool: MCPTool {
|
||||
"in \(String(format: "%.2f", duration))s.",
|
||||
].joined(separator: " ")
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"image_path": .string(imagePath),
|
||||
"question": .string(question),
|
||||
"provider": providerType != nil ? .string(providerType!) : .null,
|
||||
"model": .string(modelName),
|
||||
"execution_time": .double(duration),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Image Analyze",
|
||||
notes: question)
|
||||
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(analysisText),
|
||||
.text(timingMessage),
|
||||
])
|
||||
],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
|
||||
} catch {
|
||||
self.logger.error("Analysis failed: \(error)")
|
||||
|
||||
@ -305,24 +305,26 @@ private struct AppToolActions {
|
||||
let countLine = "\(AgentDisplayTokens.Status.info) Found \(apps.count) running applications "
|
||||
+ "in \(self.executionTimeString(from: executionTime))"
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"apps": .array(
|
||||
apps.map { app in
|
||||
.object([
|
||||
"name": .string(app.name),
|
||||
"bundle_id": app.bundleIdentifier != nil ? .string(app.bundleIdentifier!) : .null,
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"is_active": .bool(app.isActive),
|
||||
"is_hidden": .bool(app.isHidden),
|
||||
])
|
||||
}),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summaryMeta = self.makeSummary(for: nil, action: "List Applications", notes: "Found \(apps.count) apps")
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(summary),
|
||||
.text(countLine),
|
||||
],
|
||||
meta: .object([
|
||||
"apps": .array(
|
||||
apps.map { app in
|
||||
.object([
|
||||
"name": .string(app.name),
|
||||
"bundle_id": app.bundleIdentifier != nil ? .string(app.bundleIdentifier!) : .null,
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"is_active": .bool(app.isActive),
|
||||
"is_hidden": .bool(app.isHidden),
|
||||
])
|
||||
}),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summaryMeta, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
// MARK: Helpers
|
||||
@ -371,15 +373,17 @@ private struct AppToolActions {
|
||||
message += warningLine
|
||||
}
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"quit_count": .double(Double(quitCount)),
|
||||
"failed": .array(failed.map(Value.string)),
|
||||
"except": .array(excluded.map(Value.string)),
|
||||
"execution_time": .double(executionTime),
|
||||
"force": .bool(request.force),
|
||||
]
|
||||
let summary = self.makeSummary(for: nil, action: "Quit Applications", notes: "Quit \(quitCount) apps")
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"quit_count": .double(Double(quitCount)),
|
||||
"failed": .array(failed.map(Value.string)),
|
||||
"except": .array(excluded.map(Value.string)),
|
||||
"execution_time": .double(executionTime),
|
||||
"force": .bool(request.force),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func buildResponse(
|
||||
@ -396,24 +400,29 @@ private struct AppToolActions {
|
||||
]
|
||||
meta.merge(extraMeta) { $1 }
|
||||
|
||||
let summary = self.makeSummary(for: app, action: self.actionDescription(from: message), notes: nil)
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object(meta))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(meta)))
|
||||
}
|
||||
|
||||
private func focusResponse(app: ServiceApplicationInfo, startTime: Date, verb: String) -> ToolResponse {
|
||||
let statusLine = "\(AgentDisplayTokens.Status.success) \(verb) \(app.name) (PID: \(app.processIdentifier))"
|
||||
let baseMeta: [String: Value] = [
|
||||
"app_name": .string(app.name),
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"execution_time": .double(self.executionTime(since: startTime)),
|
||||
]
|
||||
let summary = self.makeSummary(for: app, action: verb, notes: nil)
|
||||
return ToolResponse(
|
||||
content: [.text(statusLine)],
|
||||
meta: .object([
|
||||
"app_name": .string(app.name),
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"execution_time": .double(self.executionTime(since: startTime)),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func executionMeta(from startTime: Date) -> Value {
|
||||
.object(["execution_time": .double(self.executionTime(since: startTime))])
|
||||
let baseMeta: Value = .object(["execution_time": .double(self.executionTime(since: startTime))])
|
||||
let summary = self.makeSummary(for: nil, action: "Switch Applications", notes: nil)
|
||||
return ToolEventSummary.merge(summary: summary, into: baseMeta)
|
||||
}
|
||||
|
||||
private func executionTime(since startTime: Date) -> Double {
|
||||
@ -428,6 +437,22 @@ private struct AppToolActions {
|
||||
"\(String(format: "%.2f", interval))s"
|
||||
}
|
||||
|
||||
private func makeSummary(for app: ServiceApplicationInfo?, action: String, notes: String?) -> ToolEventSummary {
|
||||
var summary = ToolEventSummary(
|
||||
targetApp: app?.name,
|
||||
actionDescription: action,
|
||||
notes: notes)
|
||||
summary.elementValue = app?.bundleIdentifier
|
||||
return summary
|
||||
}
|
||||
|
||||
private func actionDescription(from message: String) -> String {
|
||||
guard let token = message.split(separator: " ").dropFirst().first else {
|
||||
return "App"
|
||||
}
|
||||
return String(token)
|
||||
}
|
||||
|
||||
private func identifier(for app: ServiceApplicationInfo) -> String {
|
||||
if let bundleId = app.bundleIdentifier, !bundleId.isEmpty {
|
||||
return bundleId
|
||||
|
||||
@ -112,13 +112,21 @@ public struct ClickTool: MCPTool {
|
||||
let element = try await self.requireElement(id: identifier, session: session)
|
||||
return ClickResolution(
|
||||
location: element.centerPoint,
|
||||
elementDescription: element.humanDescription)
|
||||
elementDescription: element.humanDescription,
|
||||
targetApp: session.applicationName,
|
||||
windowTitle: session.windowTitle,
|
||||
elementRole: element.humanRole,
|
||||
elementLabel: element.displayLabel)
|
||||
case let .query(text):
|
||||
let session = try await self.requireSession(id: request.sessionId)
|
||||
let element = try await self.findElement(matching: text, session: session)
|
||||
return ClickResolution(
|
||||
location: element.centerPoint,
|
||||
elementDescription: element.humanDescription)
|
||||
elementDescription: element.humanDescription,
|
||||
targetApp: session.applicationName,
|
||||
windowTitle: session.windowTitle,
|
||||
elementRole: element.humanRole,
|
||||
elementLabel: element.displayLabel)
|
||||
}
|
||||
}
|
||||
|
||||
@ -150,9 +158,22 @@ public struct ClickTool: MCPTool {
|
||||
"clicked_element": resolution.elementDescription.map(Value.string) ?? .null,
|
||||
]
|
||||
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: resolution.targetApp,
|
||||
windowTitle: resolution.windowTitle,
|
||||
elementRole: resolution.elementRole,
|
||||
elementLabel: resolution.elementLabel,
|
||||
actionDescription: intent.displayVerb,
|
||||
coordinates: ToolEventSummary.Coordinates(
|
||||
x: Double(resolution.location.x),
|
||||
y: Double(resolution.location.y))
|
||||
)
|
||||
|
||||
let metaValue = ToolEventSummary.merge(summary: summary, into: .object(metaDict))
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object(metaDict))
|
||||
meta: metaValue)
|
||||
}
|
||||
|
||||
private func parseCoordinates(_ raw: String) throws -> CGPoint {
|
||||
@ -232,6 +253,26 @@ private enum ClickRequestTarget {
|
||||
private struct ClickResolution {
|
||||
let location: CGPoint
|
||||
let elementDescription: String?
|
||||
let targetApp: String?
|
||||
let windowTitle: String?
|
||||
let elementRole: String?
|
||||
let elementLabel: String?
|
||||
|
||||
init(
|
||||
location: CGPoint,
|
||||
elementDescription: String?,
|
||||
targetApp: String? = nil,
|
||||
windowTitle: String? = nil,
|
||||
elementRole: String? = nil,
|
||||
elementLabel: String? = nil)
|
||||
{
|
||||
self.location = location
|
||||
self.elementDescription = elementDescription
|
||||
self.targetApp = targetApp
|
||||
self.windowTitle = windowTitle
|
||||
self.elementRole = elementRole
|
||||
self.elementLabel = elementLabel
|
||||
}
|
||||
}
|
||||
|
||||
private struct ClickIntent {
|
||||
@ -265,4 +306,12 @@ extension UIElement {
|
||||
fileprivate var humanDescription: String {
|
||||
"\(self.role): \(self.title ?? self.label ?? "untitled")"
|
||||
}
|
||||
|
||||
fileprivate var humanRole: String? {
|
||||
self.roleDescription ?? self.role
|
||||
}
|
||||
|
||||
fileprivate var displayLabel: String? {
|
||||
self.title ?? self.label ?? self.value
|
||||
}
|
||||
}
|
||||
|
||||
@ -163,6 +163,10 @@ public struct DialogTool: MCPTool {
|
||||
let summary =
|
||||
"\(AgentDisplayTokens.Status.success) Clicked button '\(button)' in " +
|
||||
"\(Self.formattedDuration(executionTime))s"
|
||||
let summaryMeta = ToolEventSummary(
|
||||
targetApp: window,
|
||||
actionDescription: "Dialog Button",
|
||||
notes: button)
|
||||
return self.successResponse(
|
||||
message: summary,
|
||||
meta: [
|
||||
@ -171,7 +175,8 @@ public struct DialogTool: MCPTool {
|
||||
"success": .bool(result.success),
|
||||
"execution_time": .double(executionTime),
|
||||
"details": .object(result.details.mapValues { .string($0) }),
|
||||
])
|
||||
],
|
||||
summary: summaryMeta)
|
||||
} else {
|
||||
return ToolResponse
|
||||
.error("Failed to click button '\(button)': \(result.details["error"] ?? "Unknown error")")
|
||||
@ -196,6 +201,10 @@ public struct DialogTool: MCPTool {
|
||||
let message =
|
||||
"\(AgentDisplayTokens.Status.success) Entered text '\(request.text)' into \(fieldDesc)\(clearSuffix) " +
|
||||
"in \(Self.formattedDuration(executionTime))s"
|
||||
let summaryMeta = ToolEventSummary(
|
||||
targetApp: request.window,
|
||||
actionDescription: "Dialog Input",
|
||||
notes: fieldDesc)
|
||||
return self.successResponse(
|
||||
message: message,
|
||||
meta: [
|
||||
@ -206,7 +215,8 @@ public struct DialogTool: MCPTool {
|
||||
"success": .bool(result.success),
|
||||
"execution_time": .double(executionTime),
|
||||
"details": .object(result.details.mapValues { .string($0) }),
|
||||
])
|
||||
],
|
||||
summary: summaryMeta)
|
||||
} else {
|
||||
return ToolResponse.error("Failed to enter text: \(result.details["error"] ?? "Unknown error")")
|
||||
}
|
||||
@ -227,6 +237,9 @@ public struct DialogTool: MCPTool {
|
||||
let summary =
|
||||
"\(AgentDisplayTokens.Status.success) Selected file '\(selection.path)' " +
|
||||
"in \(Self.formattedDuration(executionTime))s"
|
||||
let summaryMeta = ToolEventSummary(
|
||||
actionDescription: "Dialog File",
|
||||
notes: selection.filename)
|
||||
return self.successResponse(
|
||||
message: summary,
|
||||
meta: [
|
||||
@ -237,7 +250,8 @@ public struct DialogTool: MCPTool {
|
||||
"success": .bool(result.success),
|
||||
"execution_time": .double(executionTime),
|
||||
"details": .object(result.details.mapValues { .string($0) }),
|
||||
])
|
||||
],
|
||||
summary: summaryMeta)
|
||||
} else {
|
||||
return ToolResponse.error("Failed to select file: \(result.details["error"] ?? "Unknown error")")
|
||||
}
|
||||
@ -256,6 +270,10 @@ public struct DialogTool: MCPTool {
|
||||
let summary =
|
||||
"\(AgentDisplayTokens.Status.success) Dismissed dialog using \(method) in " +
|
||||
"\(Self.formattedDuration(executionTime))s"
|
||||
let summaryMeta = ToolEventSummary(
|
||||
targetApp: request.window,
|
||||
actionDescription: "Dialog Dismiss",
|
||||
notes: method)
|
||||
return self.successResponse(
|
||||
message: summary,
|
||||
meta: [
|
||||
@ -264,14 +282,15 @@ public struct DialogTool: MCPTool {
|
||||
"success": .bool(result.success),
|
||||
"execution_time": .double(executionTime),
|
||||
"details": .object(result.details.mapValues { .string($0) }),
|
||||
])
|
||||
],
|
||||
summary: summaryMeta)
|
||||
} else {
|
||||
return ToolResponse.error("Failed to dismiss dialog: \(result.details["error"] ?? "Unknown error")")
|
||||
}
|
||||
}
|
||||
|
||||
private func successResponse(message: String, meta: [String: Value]) -> ToolResponse {
|
||||
ToolResponse(content: [.text(message)], meta: .object(meta))
|
||||
private func successResponse(message: String, meta: [String: Value], summary: ToolEventSummary) -> ToolResponse {
|
||||
ToolResponse(content: [.text(message)], meta: ToolEventSummary.merge(summary: summary, into: .object(meta)))
|
||||
}
|
||||
|
||||
static func formattedDuration(_ duration: TimeInterval) -> String {
|
||||
@ -387,9 +406,13 @@ private struct DialogListFormatter {
|
||||
let executionTime: TimeInterval
|
||||
|
||||
func response() -> ToolResponse {
|
||||
ToolResponse(
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: self.elements.dialogInfo.title,
|
||||
actionDescription: "List Dialog",
|
||||
notes: self.elements.dialogInfo.title)
|
||||
return ToolResponse(
|
||||
content: [.text(self.renderContent())],
|
||||
meta: .object(self.metaDictionary()))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(self.metaDictionary())))
|
||||
}
|
||||
|
||||
private func renderContent() -> String {
|
||||
|
||||
@ -116,12 +116,17 @@ public struct DockTool: MCPTool {
|
||||
let duration = self.formatDuration(executionTime)
|
||||
let message = "\(AgentDisplayTokens.Status.success) Launched \(app) from dock in \(duration)"
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"app_name": .string(app),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: app,
|
||||
actionDescription: "Dock Launch",
|
||||
notes: nil)
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"app_name": .string(app),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleRightClick(
|
||||
@ -144,13 +149,18 @@ public struct DockTool: MCPTool {
|
||||
}
|
||||
message += " in \(self.formatDuration(executionTime))"
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"app_name": .string(app),
|
||||
"menu_item": menuItem != nil ? .string(menuItem!) : .null,
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: app,
|
||||
actionDescription: "Dock Menu",
|
||||
notes: menuItem ?? "Context menu")
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"app_name": .string(app),
|
||||
"menu_item": menuItem != nil ? .string(menuItem!) : .null,
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleHide(
|
||||
@ -164,12 +174,14 @@ public struct DockTool: MCPTool {
|
||||
let duration = self.formatDuration(executionTime)
|
||||
let message = "\(AgentDisplayTokens.Status.success) Hidden dock (enabled auto-hide) in \(duration)"
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"auto_hide_enabled": .bool(true),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(actionDescription: "Dock Hide", notes: nil)
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"auto_hide_enabled": .bool(true),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleShow(
|
||||
@ -183,12 +195,14 @@ public struct DockTool: MCPTool {
|
||||
let duration = self.formatDuration(executionTime)
|
||||
let message = "\(AgentDisplayTokens.Status.success) Shown dock (disabled auto-hide) in \(duration)"
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"auto_hide_enabled": .bool(false),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(actionDescription: "Dock Show", notes: nil)
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"auto_hide_enabled": .bool(false),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleList(
|
||||
@ -220,30 +234,34 @@ public struct DockTool: MCPTool {
|
||||
"""
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"dock_item_count": .double(Double(dockItems.count)),
|
||||
"include_all": .bool(includeAll),
|
||||
"dock_items": .array(dockItems.map { item in
|
||||
.object([
|
||||
"index": .double(Double(item.index)),
|
||||
"title": .string(item.title),
|
||||
"item_type": .string(item.itemType.rawValue),
|
||||
"is_running": item.isRunning != nil ? .bool(item.isRunning!) : .null,
|
||||
"bundle_identifier": item.bundleIdentifier != nil ? .string(item.bundleIdentifier!) : .null,
|
||||
"position": item.position != nil ? .object([
|
||||
"x": .double(Double(item.position!.x)),
|
||||
"y": .double(Double(item.position!.y)),
|
||||
]) : .null,
|
||||
"size": item.size != nil ? .object([
|
||||
"width": .double(Double(item.size!.width)),
|
||||
"height": .double(Double(item.size!.height)),
|
||||
]) : .null,
|
||||
])
|
||||
}),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Dock List",
|
||||
notes: "\(dockItems.count) items")
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"dock_item_count": .double(Double(dockItems.count)),
|
||||
"include_all": .bool(includeAll),
|
||||
"dock_items": .array(dockItems.map { item in
|
||||
.object([
|
||||
"index": .double(Double(item.index)),
|
||||
"title": .string(item.title),
|
||||
"item_type": .string(item.itemType.rawValue),
|
||||
"is_running": item.isRunning != nil ? .bool(item.isRunning!) : .null,
|
||||
"bundle_identifier": item.bundleIdentifier != nil ? .string(item.bundleIdentifier!) : .null,
|
||||
"position": item.position != nil ? .object([
|
||||
"x": .double(Double(item.position!.x)),
|
||||
"y": .double(Double(item.position!.y)),
|
||||
]) : .null,
|
||||
"size": item.size != nil ? .object([
|
||||
"width": .double(Double(item.size!.width)),
|
||||
"height": .double(Double(item.size!.height)),
|
||||
]) : .null,
|
||||
])
|
||||
}),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func formatDuration(_ duration: TimeInterval) -> String {
|
||||
|
||||
@ -75,23 +75,23 @@ public struct DragTool: MCPTool {
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
let (fromPoint, fromDescription) = try await self.resolveLocation(
|
||||
let fromPoint = try await self.resolveLocation(
|
||||
target: request.fromTarget,
|
||||
sessionId: request.sessionId,
|
||||
parameterName: "from")
|
||||
let (toPoint, toDescription) = try await self.resolveLocation(
|
||||
let toPoint = try await self.resolveLocation(
|
||||
target: request.toTarget,
|
||||
sessionId: request.sessionId,
|
||||
parameterName: "to")
|
||||
|
||||
guard fromPoint != toPoint else {
|
||||
guard fromPoint.point != toPoint.point else {
|
||||
return ToolResponse.error("Start and end points must be different")
|
||||
}
|
||||
|
||||
try await self.focusTargetAppIfNeeded(request: request)
|
||||
self.logSpaceIntentIfNeeded(request: request)
|
||||
|
||||
let distance = hypot(toPoint.x - fromPoint.x, toPoint.y - fromPoint.y)
|
||||
let distance = hypot(toPoint.point.x - fromPoint.point.x, toPoint.point.y - fromPoint.point.y)
|
||||
let movement = request.profile.resolveParameters(
|
||||
smooth: true,
|
||||
durationOverride: request.durationOverride,
|
||||
@ -102,8 +102,8 @@ public struct DragTool: MCPTool {
|
||||
)
|
||||
|
||||
try await self.context.automation.drag(
|
||||
from: fromPoint,
|
||||
to: toPoint,
|
||||
from: fromPoint.point,
|
||||
to: toPoint.point,
|
||||
duration: movement.duration,
|
||||
steps: movement.steps,
|
||||
modifiers: request.modifiers,
|
||||
@ -111,8 +111,8 @@ public struct DragTool: MCPTool {
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
return self.buildResponse(
|
||||
from: DragPointDescription(point: fromPoint, description: fromDescription),
|
||||
to: DragPointDescription(point: toPoint, description: toDescription),
|
||||
from: fromPoint,
|
||||
to: toPoint,
|
||||
movement: movement,
|
||||
executionTime: executionTime,
|
||||
request: request)
|
||||
@ -131,18 +131,24 @@ public struct DragTool: MCPTool {
|
||||
private func resolveLocation(
|
||||
target: DragLocationInput,
|
||||
sessionId: String?,
|
||||
parameterName: String) async throws -> (CGPoint, String)
|
||||
parameterName: String) async throws -> DragPointDescription
|
||||
{
|
||||
switch target {
|
||||
case let .coordinates(raw):
|
||||
let point = try self.parseCoordinates(raw, parameterName: parameterName)
|
||||
return (point, "(\(Int(point.x)), \(Int(point.y)))")
|
||||
return DragPointDescription(point: point, description: "(\(Int(point.x)), \(Int(point.y)))")
|
||||
case let .element(query):
|
||||
guard let session = await self.getSession(id: sessionId) else {
|
||||
throw CoordinateParseError(message: "No active session. Run 'see' command first to capture UI state.")
|
||||
}
|
||||
if let element = await session.getElement(byId: query) {
|
||||
return (element.centerPoint, "element \(query) (\(element.humanDescription))")
|
||||
return DragPointDescription(
|
||||
point: element.centerPoint,
|
||||
description: "element \(query) (\(element.humanDescription))",
|
||||
targetApp: session.applicationName,
|
||||
windowTitle: session.windowTitle,
|
||||
elementRole: element.summaryRole,
|
||||
elementLabel: element.summaryLabel)
|
||||
}
|
||||
|
||||
let elements = await session.uiElements
|
||||
@ -158,7 +164,13 @@ public struct DragTool: MCPTool {
|
||||
}
|
||||
|
||||
let element = matches.first { $0.isActionable } ?? matches[0]
|
||||
return (element.centerPoint, element.humanDescription)
|
||||
return DragPointDescription(
|
||||
point: element.centerPoint,
|
||||
description: element.humanDescription,
|
||||
targetApp: session.applicationName,
|
||||
windowTitle: session.windowTitle,
|
||||
elementRole: element.summaryRole,
|
||||
elementLabel: element.summaryLabel)
|
||||
}
|
||||
}
|
||||
|
||||
@ -266,7 +278,24 @@ public struct DragTool: MCPTool {
|
||||
metaData["target_app"] = .string(toApp)
|
||||
}
|
||||
|
||||
return ToolResponse(content: [.text(message)], meta: .object(metaData))
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: request.targetApp ?? to.targetApp ?? from.targetApp,
|
||||
windowTitle: to.windowTitle ?? from.windowTitle,
|
||||
elementRole: to.elementRole ?? from.elementRole,
|
||||
elementLabel: to.elementLabel ?? from.elementLabel,
|
||||
actionDescription: "Drag",
|
||||
coordinates: ToolEventSummary.Coordinates(
|
||||
x: Double(to.point.x),
|
||||
y: Double(to.point.y)),
|
||||
pointerProfile: movement.profileName,
|
||||
pointerDistance: Double(distance),
|
||||
pointerDirection: pointerDirection(from: from.point, to: to.point),
|
||||
pointerDurationMs: Double(movement.duration),
|
||||
notes: "from \(from.description) to \(to.description)")
|
||||
|
||||
let metaValue = ToolEventSummary.merge(summary: summary, into: .object(metaData))
|
||||
|
||||
return ToolResponse(content: [.text(message)], meta: metaValue)
|
||||
}
|
||||
|
||||
private struct CoordinateParseError: Swift.Error {
|
||||
@ -367,6 +396,26 @@ private struct DragToolError: Swift.Error {
|
||||
private struct DragPointDescription {
|
||||
let point: CGPoint
|
||||
let description: String
|
||||
let targetApp: String?
|
||||
let windowTitle: String?
|
||||
let elementRole: String?
|
||||
let elementLabel: String?
|
||||
|
||||
init(
|
||||
point: CGPoint,
|
||||
description: String,
|
||||
targetApp: String? = nil,
|
||||
windowTitle: String? = nil,
|
||||
elementRole: String? = nil,
|
||||
elementLabel: String? = nil)
|
||||
{
|
||||
self.point = point
|
||||
self.description = description
|
||||
self.targetApp = targetApp
|
||||
self.windowTitle = windowTitle
|
||||
self.elementRole = elementRole
|
||||
self.elementLabel = elementLabel
|
||||
}
|
||||
}
|
||||
|
||||
extension UIElement {
|
||||
|
||||
@ -85,14 +85,21 @@ public struct HotkeyTool: MCPTool {
|
||||
let message = "\(AgentDisplayTokens.Status.success) Pressed \(formattedKeys) " +
|
||||
"(held for \(holdDurationMs)ms) in \(durationText)s"
|
||||
|
||||
let baseMeta: Value = .object([
|
||||
"keys": .string(keys),
|
||||
"hold_duration": .double(Double(holdDurationMs)),
|
||||
"execution_time": .double(executionTime),
|
||||
"formatted_keys": .string(formattedKeys),
|
||||
])
|
||||
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Hotkey",
|
||||
waitDurationMs: Double(holdDurationMs),
|
||||
notes: formattedKeys)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"keys": .string(keys),
|
||||
"hold_duration": .double(Double(holdDurationMs)),
|
||||
"execution_time": .double(executionTime),
|
||||
"formatted_keys": .string(formattedKeys),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
|
||||
|
||||
} catch {
|
||||
self.logger.error("Hotkey execution failed: \(error)")
|
||||
|
||||
@ -156,13 +156,18 @@ extension ImageTool {
|
||||
|
||||
let imagePath = try savedFiles.first?.path ?? saveTemporaryImage(firstCapture.imageData)
|
||||
let analysis = try await analyzeImage(at: imagePath, question: question)
|
||||
let baseMeta: [String: Value] = [
|
||||
"model": .string(analysis.modelUsed),
|
||||
"savedFiles": .array(savedFiles.map { Value.string($0.path) }),
|
||||
"question": .string(question),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Image Analyze",
|
||||
notes: question)
|
||||
|
||||
return ToolResponse.text(
|
||||
analysis.text,
|
||||
meta: .object([
|
||||
"model": .string(analysis.modelUsed),
|
||||
"savedFiles": .array(savedFiles.map { Value.string($0.path) }),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func buildCaptureResponse(
|
||||
@ -170,7 +175,19 @@ extension ImageTool {
|
||||
savedFiles: [MCPSavedFile],
|
||||
captureResults: [CaptureResult]) -> ToolResponse
|
||||
{
|
||||
let meta = Value.object(["savedFiles": .array(savedFiles.map { Value.string($0.path) })])
|
||||
let baseMeta = Value.object(["savedFiles": .array(savedFiles.map { Value.string($0.path) })])
|
||||
let captureNote: String
|
||||
if savedFiles.isEmpty {
|
||||
captureNote = "Captured image"
|
||||
} else if savedFiles.count == 1, let label = savedFiles.first?.item_label {
|
||||
captureNote = label
|
||||
} else {
|
||||
captureNote = "Captured \(savedFiles.count) images"
|
||||
}
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Image Capture",
|
||||
notes: captureNote)
|
||||
let meta = ToolEventSummary.merge(summary: summary, into: baseMeta)
|
||||
|
||||
if format == .data, let capture = captureResults.first, captureResults.count == 1 {
|
||||
return ToolResponse.image(data: capture.imageData, mimeType: "image/png", meta: meta)
|
||||
|
||||
@ -109,7 +109,12 @@ public struct ListTool: MCPTool {
|
||||
lines.append(activeLine)
|
||||
}
|
||||
|
||||
return ToolResponse.text(lines.joined(separator: "\n"))
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "List Applications",
|
||||
notes: "\(apps.count) running")
|
||||
return ToolResponse.text(
|
||||
lines.joined(separator: "\n"),
|
||||
meta: ToolEventSummary.merge(summary: summary, into: nil))
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to list applications: \(error.localizedDescription)")
|
||||
}
|
||||
@ -197,8 +202,9 @@ public struct ListTool: MCPTool {
|
||||
sections.append("- Architecture: \(ProcessInfo.processInfo.processorArchitecture)")
|
||||
|
||||
let fullStatus = sections.joined(separator: "\n")
|
||||
let summary = ToolEventSummary(actionDescription: "Server Status", notes: nil)
|
||||
|
||||
return ToolResponse.text(fullStatus)
|
||||
return ToolResponse.text(fullStatus, meta: ToolEventSummary.merge(summary: summary, into: nil))
|
||||
}
|
||||
}
|
||||
|
||||
@ -271,7 +277,17 @@ private struct WindowListFormatter {
|
||||
var lines = self.headerLines()
|
||||
lines.append("")
|
||||
lines.append(contentsOf: self.windowLines())
|
||||
return ToolResponse.text(lines.joined(separator: "\n"))
|
||||
let baseMeta: Value = .object([
|
||||
"window_count": .int(self.windows.count),
|
||||
"app": self.appInfo?.name != nil ? .string(self.appInfo!.name) : .string(self.identifier),
|
||||
])
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: self.appInfo?.name ?? self.identifier,
|
||||
actionDescription: "List Windows",
|
||||
notes: "\(self.windows.count) windows")
|
||||
return ToolResponse.text(
|
||||
lines.joined(separator: "\n"),
|
||||
meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
|
||||
}
|
||||
|
||||
private func headerLines() -> [String] {
|
||||
|
||||
@ -127,12 +127,17 @@ public struct MCPAgentTool: MCPTool {
|
||||
])
|
||||
}
|
||||
|
||||
let meta = Value.object([
|
||||
let baseMeta = Value.object([
|
||||
"sessionCount": .string(String(sessions.count)),
|
||||
"sessions": .array(sessionsArray),
|
||||
])
|
||||
let summaryMeta = ToolEventSummary(
|
||||
actionDescription: "List agent sessions",
|
||||
notes: "\(sessions.count) session\(sessions.count == 1 ? "" : "s")")
|
||||
|
||||
return ToolResponse.text("Available Sessions:\n\n\(summary)", meta: meta)
|
||||
return ToolResponse.text(
|
||||
"Available Sessions:\n\n\(summary)",
|
||||
meta: ToolEventSummary.merge(summary: summaryMeta, into: baseMeta))
|
||||
}
|
||||
|
||||
private func renderSessionSummaries(_ sessions: [SessionSummary]) -> String {
|
||||
@ -185,12 +190,17 @@ public struct MCPAgentTool: MCPTool {
|
||||
}
|
||||
|
||||
private func formatResult(result: AgentExecutionResult, input: AgentInput) -> ToolResponse {
|
||||
let summary = self.summary(for: result)
|
||||
|
||||
if input.quiet {
|
||||
return ToolResponse.text(result.content)
|
||||
return ToolResponse.text(result.content, meta: ToolEventSummary.merge(summary: summary, into: nil))
|
||||
}
|
||||
|
||||
if input.verbose {
|
||||
return ToolResponse.text(result.content, meta: self.verboseMetadata(for: result))
|
||||
let verboseMeta = self.verboseMetadata(for: result)
|
||||
return ToolResponse.text(
|
||||
result.content,
|
||||
meta: ToolEventSummary.merge(summary: summary, into: verboseMeta))
|
||||
}
|
||||
|
||||
var output = result.content
|
||||
@ -209,8 +219,25 @@ public struct MCPAgentTool: MCPTool {
|
||||
output += tokensLine
|
||||
}
|
||||
|
||||
let meta = result.sessionId.map { Value.object(["sessionId": .string($0)]) }
|
||||
return ToolResponse.text(output, meta: meta)
|
||||
let baseMeta = result.sessionId.map { Value.object(["sessionId": .string($0)]) }
|
||||
return ToolResponse.text(output, meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
|
||||
}
|
||||
|
||||
private func summary(for result: AgentExecutionResult) -> ToolEventSummary {
|
||||
var details: [String] = []
|
||||
if !result.metadata.modelName.isEmpty {
|
||||
details.append("Model \(result.metadata.modelName)")
|
||||
}
|
||||
if result.metadata.toolCallCount > 0 {
|
||||
details.append("\(result.metadata.toolCallCount) tool call\(result.metadata.toolCallCount == 1 ? "" : "s")")
|
||||
}
|
||||
if let usage = result.usage {
|
||||
details.append("\(usage.totalTokens) tokens total")
|
||||
}
|
||||
|
||||
return ToolEventSummary(
|
||||
actionDescription: "Agent run",
|
||||
notes: details.isEmpty ? nil : details.joined(separator: " · "))
|
||||
}
|
||||
|
||||
private func verboseMetadata(for result: AgentExecutionResult) -> Value {
|
||||
|
||||
@ -90,13 +90,18 @@ public struct MenuTool: MCPTool {
|
||||
let menuStructure = try await self.context.menu.listMenus(for: app)
|
||||
let formattedOutput = self.formatMenuStructure(menuStructure)
|
||||
|
||||
let baseMeta: Value = .object([
|
||||
"app": .string(menuStructure.application.name),
|
||||
"total_menus": .int(menuStructure.menus.count),
|
||||
"total_items": .int(menuStructure.totalItems),
|
||||
])
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: menuStructure.application.name,
|
||||
actionDescription: "List Menus",
|
||||
notes: "\(menuStructure.menus.count) menus / \(menuStructure.totalItems) items")
|
||||
return ToolResponse.text(
|
||||
formattedOutput,
|
||||
meta: .object([
|
||||
"app": .string(menuStructure.application.name),
|
||||
"total_menus": .int(menuStructure.menus.count),
|
||||
"total_items": .int(menuStructure.totalItems),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to list menus for app '\(app)': \(error.localizedDescription)")
|
||||
}
|
||||
@ -130,12 +135,16 @@ public struct MenuTool: MCPTool {
|
||||
output += "• \(menuInfo.app): \(menuInfo.menuCount) menus, \(menuInfo.itemCount) items\n"
|
||||
}
|
||||
|
||||
let baseMeta: Value = .object([
|
||||
"total_apps": .int(allMenus.count),
|
||||
"apps": .array(allMenus.map { .string($0.app) }),
|
||||
])
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "List All Menus",
|
||||
notes: "\(allMenus.count) apps")
|
||||
return ToolResponse.text(
|
||||
output,
|
||||
meta: .object([
|
||||
"total_apps": .int(allMenus.count),
|
||||
"apps": .array(allMenus.map { .string($0.app) }),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: baseMeta))
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to list all menus: \(error.localizedDescription)")
|
||||
}
|
||||
@ -150,7 +159,13 @@ public struct MenuTool: MCPTool {
|
||||
if let path = arguments.getString("path") {
|
||||
do {
|
||||
try await self.context.menu.clickMenuItem(app: app, itemPath: path)
|
||||
return ToolResponse.text("\(AgentDisplayTokens.Status.success) Successfully clicked menu item: \(path)")
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: app,
|
||||
actionDescription: "Menu Click",
|
||||
notes: path)
|
||||
return ToolResponse.text(
|
||||
"\(AgentDisplayTokens.Status.success) Successfully clicked menu item: \(path)",
|
||||
meta: ToolEventSummary.merge(summary: summary, into: nil))
|
||||
} catch {
|
||||
return ToolResponse
|
||||
.error("Failed to click menu item '\(path)' in app '\(app)': \(error.localizedDescription)")
|
||||
@ -158,7 +173,13 @@ public struct MenuTool: MCPTool {
|
||||
} else if let item = arguments.getString("item") {
|
||||
do {
|
||||
try await self.context.menu.clickMenuItemByName(app: app, itemName: item)
|
||||
return ToolResponse.text("\(AgentDisplayTokens.Status.success) Successfully clicked menu item: \(item)")
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: app,
|
||||
actionDescription: "Menu Click",
|
||||
notes: item)
|
||||
return ToolResponse.text(
|
||||
"\(AgentDisplayTokens.Status.success) Successfully clicked menu item: \(item)",
|
||||
meta: ToolEventSummary.merge(summary: summary, into: nil))
|
||||
} catch {
|
||||
return ToolResponse
|
||||
.error("Failed to click menu item '\(item)' in app '\(app)': \(error.localizedDescription)")
|
||||
@ -176,8 +197,12 @@ public struct MenuTool: MCPTool {
|
||||
|
||||
do {
|
||||
try await self.context.menu.clickMenuExtra(title: title)
|
||||
return ToolResponse
|
||||
.text("\(AgentDisplayTokens.Status.success) Successfully clicked system menu extra: \(title)")
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Menu Extra",
|
||||
notes: title)
|
||||
return ToolResponse.text(
|
||||
"\(AgentDisplayTokens.Status.success) Successfully clicked system menu extra: \(title)",
|
||||
meta: ToolEventSummary.merge(summary: summary, into: nil))
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to click system menu extra '\(title)': \(error.localizedDescription)")
|
||||
}
|
||||
|
||||
@ -211,11 +211,17 @@ public struct MoveTool: MCPTool {
|
||||
let location = CGPoint(x: element.frame.midX, y: element.frame.midY)
|
||||
let label = element.title ?? element.label ?? "untitled"
|
||||
let summary = "element \(elementId) (\(element.role): \(label))"
|
||||
return ResolvedMoveTarget(location: location, description: summary)
|
||||
return ResolvedMoveTarget(
|
||||
location: location,
|
||||
description: summary,
|
||||
targetApp: session.applicationName,
|
||||
windowTitle: session.windowTitle,
|
||||
elementRole: element.summaryRole,
|
||||
elementLabel: element.summaryLabel)
|
||||
}
|
||||
}
|
||||
|
||||
private func performMovement(to location: CGPoint, request: MoveRequest) async throws -> MovementParameters {
|
||||
private func performMovement(to location: CGPoint, request: MoveRequest) async throws -> MovementExecution {
|
||||
let automation = self.context.automation
|
||||
let currentLocation = CGEvent(source: nil)?.location ?? .zero
|
||||
let distance = hypot(location.x - currentLocation.x, location.y - currentLocation.y)
|
||||
@ -236,35 +242,68 @@ public struct MoveTool: MCPTool {
|
||||
profile: movement.profile
|
||||
)
|
||||
}
|
||||
return movement
|
||||
return MovementExecution(
|
||||
parameters: movement,
|
||||
startPoint: currentLocation,
|
||||
distance: distance,
|
||||
direction: pointerDirection(from: currentLocation, to: location)
|
||||
)
|
||||
}
|
||||
|
||||
private func buildResponse(
|
||||
target: ResolvedMoveTarget,
|
||||
movement: MovementParameters,
|
||||
movement: MovementExecution,
|
||||
executionTime: TimeInterval) -> ToolResponse
|
||||
{
|
||||
var message = "\(AgentDisplayTokens.Status.success) Moved mouse cursor to \(target.description)"
|
||||
message += " using \(movement.profileName) profile"
|
||||
if movement.smooth {
|
||||
message += " (\(movement.duration)ms, \(movement.steps) steps)"
|
||||
message += " using \(movement.parameters.profileName) profile"
|
||||
if movement.parameters.smooth {
|
||||
message += " (\(movement.parameters.duration)ms, \(movement.parameters.steps) steps)"
|
||||
}
|
||||
message += " in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
var metaDict: [String: Value] = [
|
||||
"target_location": .object([
|
||||
"x": .double(Double(target.location.x)),
|
||||
"y": .double(Double(target.location.y)),
|
||||
]),
|
||||
"target_description": .string(target.description),
|
||||
"smooth": .bool(movement.parameters.smooth),
|
||||
"profile": .string(movement.parameters.profileName),
|
||||
"duration": movement.parameters.smooth ? .double(Double(movement.parameters.duration)) : .null,
|
||||
"steps": movement.parameters.smooth ? .double(Double(movement.parameters.steps)) : .null,
|
||||
"execution_time": .double(executionTime),
|
||||
"distance": .double(Double(movement.distance)),
|
||||
"start_location": .object([
|
||||
"x": .double(Double(movement.startPoint.x)),
|
||||
"y": .double(Double(movement.startPoint.y)),
|
||||
]),
|
||||
]
|
||||
|
||||
if let direction = movement.direction {
|
||||
metaDict["direction"] = .string(direction)
|
||||
}
|
||||
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: target.targetApp,
|
||||
windowTitle: target.windowTitle,
|
||||
elementRole: target.elementRole,
|
||||
elementLabel: target.elementLabel,
|
||||
actionDescription: "Move cursor",
|
||||
coordinates: ToolEventSummary.Coordinates(
|
||||
x: Double(target.location.x),
|
||||
y: Double(target.location.y)),
|
||||
pointerProfile: movement.parameters.profileName,
|
||||
pointerDistance: Double(movement.distance),
|
||||
pointerDirection: movement.direction,
|
||||
pointerDurationMs: Double(movement.parameters.duration),
|
||||
notes: target.description)
|
||||
|
||||
let metaValue = ToolEventSummary.merge(summary: summary, into: .object(metaDict))
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"target_location": .object([
|
||||
"x": .double(Double(target.location.x)),
|
||||
"y": .double(Double(target.location.y)),
|
||||
]),
|
||||
"target_description": .string(target.description),
|
||||
"smooth": .bool(movement.smooth),
|
||||
"profile": .string(movement.profileName),
|
||||
"duration": movement.smooth ? .double(Double(movement.duration)) : .null,
|
||||
"steps": movement.smooth ? .double(Double(movement.steps)) : .null,
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: metaValue)
|
||||
}
|
||||
|
||||
private func getSession(id: String?) async -> UISession? {
|
||||
@ -307,6 +346,33 @@ private struct MoveRequest {
|
||||
private struct ResolvedMoveTarget {
|
||||
let location: CGPoint
|
||||
let description: String
|
||||
let targetApp: String?
|
||||
let windowTitle: String?
|
||||
let elementRole: String?
|
||||
let elementLabel: String?
|
||||
|
||||
init(
|
||||
location: CGPoint,
|
||||
description: String,
|
||||
targetApp: String? = nil,
|
||||
windowTitle: String? = nil,
|
||||
elementRole: String? = nil,
|
||||
elementLabel: String? = nil)
|
||||
{
|
||||
self.location = location
|
||||
self.description = description
|
||||
self.targetApp = targetApp
|
||||
self.windowTitle = windowTitle
|
||||
self.elementRole = elementRole
|
||||
self.elementLabel = elementLabel
|
||||
}
|
||||
}
|
||||
|
||||
private struct MovementExecution {
|
||||
let parameters: MovementParameters
|
||||
let startPoint: CGPoint
|
||||
let distance: CGFloat
|
||||
let direction: String?
|
||||
}
|
||||
|
||||
private struct MoveToolValidationError: Error {
|
||||
|
||||
@ -67,3 +67,13 @@ extension MovementProfileOption {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension UIElement {
|
||||
var summaryRole: String? {
|
||||
self.roleDescription ?? self.role
|
||||
}
|
||||
|
||||
var summaryLabel: String? {
|
||||
self.title ?? self.label ?? self.value
|
||||
}
|
||||
}
|
||||
|
||||
@ -63,9 +63,20 @@ public struct PermissionsTool: MCPTool {
|
||||
|
||||
// Return error response if required permissions are missing
|
||||
if !screenRecording {
|
||||
return ToolResponse.error(responseText)
|
||||
let summary = ToolEventSummary(actionDescription: "Permissions", notes: "Screen Recording missing")
|
||||
return ToolResponse.error(responseText, meta: ToolEventSummary.merge(summary: summary, into: nil))
|
||||
}
|
||||
|
||||
return ToolResponse.text(responseText)
|
||||
let baseMeta: [String: Value] = [
|
||||
"screen_recording": .bool(screenRecording),
|
||||
"accessibility": .bool(accessibility),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Permissions",
|
||||
notes: "Screen Recording ✅, Accessibility \(accessibility ? "✅" : "⚠️")")
|
||||
|
||||
return ToolResponse.text(
|
||||
responseText,
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
import CoreGraphics
|
||||
|
||||
/// Utility to convert delta between two points into a compass-style label.
|
||||
func pointerDirection(from start: CGPoint, to end: CGPoint) -> String? {
|
||||
let dx = end.x - start.x
|
||||
let dy = end.y - start.y
|
||||
let distance = hypot(dx, dy)
|
||||
guard distance >= 1 else { return nil }
|
||||
|
||||
let angle = atan2(dy, dx)
|
||||
// Map angle to 8 compass directions (E, NE, N, NW, W, SW, S, SE)
|
||||
let directions = ["E", "NE", "N", "NW", "W", "SW", "S", "SE"]
|
||||
let normalized = (angle + .pi) / (2 * .pi)
|
||||
var index = Int(round(normalized * 8)) % 8
|
||||
if index < 0 { index += 8 }
|
||||
return directions[index]
|
||||
}
|
||||
@ -139,13 +139,20 @@ public struct ScrollTool: MCPTool {
|
||||
let message = "\(AgentDisplayTokens.Status.success) Performed \(scrollDescription) \(request.direction) " +
|
||||
"(\(request.amount) ticks) \(target.description) in \(duration)"
|
||||
|
||||
return ToolResponse.text(message)
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: target.appName,
|
||||
actionDescription: request.smooth ? "Smooth scroll" : "Scroll",
|
||||
scrollDirection: request.direction.rawValue,
|
||||
scrollAmount: Double(request.amount),
|
||||
notes: target.description
|
||||
)
|
||||
return ToolResponse.text(message, meta: ToolEventSummary.merge(summary: summary, into: nil))
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private func resolveTargetDescription(request: ScrollToolRequest) async throws -> ScrollTargetDescription {
|
||||
guard let elementId = request.elementId else {
|
||||
return ScrollTargetDescription(elementId: nil, description: "at current mouse position")
|
||||
return ScrollTargetDescription(elementId: nil, description: "at current mouse position", appName: nil)
|
||||
}
|
||||
|
||||
guard let session = await self.getSession(id: request.sessionId) else {
|
||||
@ -159,7 +166,10 @@ public struct ScrollTool: MCPTool {
|
||||
|
||||
let label = element.title ?? element.label ?? "untitled"
|
||||
let description = "on \(element.role): \(label)"
|
||||
return ScrollTargetDescription(elementId: elementId, description: description)
|
||||
return ScrollTargetDescription(
|
||||
elementId: elementId,
|
||||
description: description,
|
||||
appName: session.applicationName)
|
||||
}
|
||||
}
|
||||
|
||||
@ -175,6 +185,7 @@ private struct ScrollToolRequest {
|
||||
private struct ScrollTargetDescription {
|
||||
let elementId: String?
|
||||
let description: String
|
||||
let appName: String?
|
||||
}
|
||||
|
||||
private struct ScrollToolValidationError: Error {
|
||||
|
||||
@ -380,19 +380,29 @@ public struct SeeTool: MCPTool {
|
||||
target: CaptureTarget) async throws -> ToolResponse
|
||||
{
|
||||
let finalScreenshot = output.annotatedPath ?? output.screenshotPath
|
||||
let summary = await buildSummary(
|
||||
let summaryText = await buildSummary(
|
||||
session: session,
|
||||
elements: elements,
|
||||
screenshotPath: finalScreenshot,
|
||||
target: target)
|
||||
|
||||
var content: [MCP.Tool.Content] = [.text(summary)]
|
||||
var content: [MCP.Tool.Content] = [.text(summaryText)]
|
||||
if output.annotate, let annotatedPath = output.annotatedPath {
|
||||
let imageData = try Data(contentsOf: URL(fileURLWithPath: annotatedPath))
|
||||
content.append(.image(data: imageData.base64EncodedString(), mimeType: "image/png", metadata: nil))
|
||||
}
|
||||
|
||||
return ToolResponse(content: content, meta: self.makeMetadata(session: session, elements: elements))
|
||||
let baseMeta = self.makeMetadata(session: session, elements: elements)
|
||||
var summary = ToolEventSummary(
|
||||
targetApp: session.applicationName,
|
||||
windowTitle: session.windowTitle,
|
||||
actionDescription: "See",
|
||||
notes: String(describing: target))
|
||||
summary.captureApp = session.applicationName
|
||||
summary.captureWindow = session.windowTitle
|
||||
|
||||
let mergedMeta = ToolEventSummary.merge(summary: summary, into: baseMeta)
|
||||
return ToolResponse(content: content, meta: mergedMeta)
|
||||
}
|
||||
|
||||
private func makeMetadata(session: UISession, elements: [UIElement]) -> Value {
|
||||
@ -538,6 +548,8 @@ actor UISession {
|
||||
private(set) var uiElements: [UIElement] = []
|
||||
private(set) var createdAt: Date
|
||||
private(set) var lastAccessedAt: Date
|
||||
nonisolated(unsafe) private(set) var cachedApplicationName: String?
|
||||
nonisolated(unsafe) private(set) var cachedWindowTitle: String?
|
||||
|
||||
init() {
|
||||
self.id = UUID().uuidString
|
||||
@ -548,6 +560,8 @@ actor UISession {
|
||||
func setScreenshot(path: String, metadata: CaptureMetadata) {
|
||||
self.screenshotPath = path
|
||||
self.screenshotMetadata = metadata
|
||||
self.cachedApplicationName = metadata.applicationInfo?.name
|
||||
self.cachedWindowTitle = metadata.windowInfo?.title
|
||||
self.lastAccessedAt = Date()
|
||||
}
|
||||
|
||||
@ -559,6 +573,14 @@ actor UISession {
|
||||
func getElement(byId id: String) -> UIElement? {
|
||||
self.uiElements.first { $0.id == id }
|
||||
}
|
||||
|
||||
nonisolated var applicationName: String? {
|
||||
self.cachedApplicationName
|
||||
}
|
||||
|
||||
nonisolated var windowTitle: String? {
|
||||
self.cachedWindowTitle
|
||||
}
|
||||
}
|
||||
|
||||
actor UISessionManager {
|
||||
|
||||
@ -82,9 +82,15 @@ public struct ShellTool: MCPTool {
|
||||
}
|
||||
|
||||
self.logger.debug("Command completed successfully")
|
||||
let summary = ToolEventSummary(
|
||||
command: command,
|
||||
workingDirectory: FileManager.default.currentDirectoryPath,
|
||||
notes: nil)
|
||||
let meta = ToolEventSummary.merge(summary: summary, into: nil)
|
||||
return ToolResponse(
|
||||
content: [.text(output)],
|
||||
isError: false)
|
||||
isError: false,
|
||||
meta: meta)
|
||||
} catch {
|
||||
self.logger.error("Failed to execute command: \(error.localizedDescription)")
|
||||
return ToolResponse(
|
||||
|
||||
@ -48,9 +48,14 @@ public struct SleepTool: MCPTool {
|
||||
let actualDuration = Date().timeIntervalSince(startTime) * 1000 // Convert to ms
|
||||
let seconds = Double(milliseconds) / 1000.0
|
||||
|
||||
let summary =
|
||||
let summaryText =
|
||||
"\(AgentDisplayTokens.Status.success) Paused for \(seconds)s " +
|
||||
"(requested: \(milliseconds)ms, actual: \(Int(actualDuration))ms)"
|
||||
return ToolResponse.text(summary)
|
||||
let summaryMeta = ToolEventSummary(
|
||||
actionDescription: "Sleep",
|
||||
waitDurationMs: actualDuration,
|
||||
waitReason: nil
|
||||
)
|
||||
return ToolResponse.text(summaryText, meta: ToolEventSummary.merge(summary: summaryMeta, into: nil))
|
||||
}
|
||||
}
|
||||
|
||||
@ -157,12 +157,18 @@ public struct SpaceTool: MCPTool {
|
||||
output += "\n"
|
||||
}
|
||||
|
||||
let message = output.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let baseMeta: [String: Value] = [
|
||||
"count": .double(Double(spaces.count)),
|
||||
"execution_time": .double(executionTime),
|
||||
"detailed": .bool(detailed),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "List Spaces",
|
||||
notes: "\(spaces.count) spaces")
|
||||
return ToolResponse(
|
||||
content: [.text(output.trimmingCharacters(in: .whitespacesAndNewlines))],
|
||||
meta: .object([
|
||||
"count": .double(Double(spaces.count)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
content: [.text(message)],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
@MainActor
|
||||
@ -197,13 +203,17 @@ public struct SpaceTool: MCPTool {
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
let message = self.successMessage("Switched to Space \(spaceNumber)", duration: executionTime)
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"space_number": .double(Double(spaceNumber)),
|
||||
"space_id": .double(Double(targetSpace.id)),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Switch Space",
|
||||
notes: "Space \(spaceNumber)")
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"space_number": .double(Double(spaceNumber)),
|
||||
"space_id": .double(Double(targetSpace.id)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
@MainActor
|
||||
@ -255,11 +265,11 @@ public struct SpaceTool: MCPTool {
|
||||
{
|
||||
switch action {
|
||||
case let .list(detailed):
|
||||
try await self.handleList(service: service, detailed: detailed, startTime: startTime)
|
||||
return try await self.handleList(service: service, detailed: detailed, startTime: startTime)
|
||||
case let .switchSpace(spaceNumber):
|
||||
try await self.handleSwitch(service: service, spaceNumber: spaceNumber, startTime: startTime)
|
||||
return try await self.handleSwitch(service: service, spaceNumber: spaceNumber, startTime: startTime)
|
||||
case let .moveWindow(request):
|
||||
try await self.handleMoveWindow(service: service, request: request, startTime: startTime)
|
||||
return try await self.handleMoveWindow(service: service, request: request, startTime: startTime)
|
||||
}
|
||||
}
|
||||
|
||||
@ -381,14 +391,19 @@ extension SpaceTool {
|
||||
"Moved window '\(windowInfo.title)' to current Space",
|
||||
duration: executionTime)
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowID)),
|
||||
"moved_to_current": .bool(true),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Space Move",
|
||||
notes: "current")
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowID)),
|
||||
"moved_to_current": .bool(true),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
@MainActor
|
||||
@ -420,15 +435,20 @@ extension SpaceTool {
|
||||
let body = "Moved window '\(windowInfo.title)' to Space \(targetSpaceNumber)\(followText)"
|
||||
let message = self.successMessage(body, duration: executionTime)
|
||||
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowID)),
|
||||
"target_space_number": .double(Double(targetSpaceNumber)),
|
||||
"target_space_id": .double(Double(targetSpace.id)),
|
||||
"followed": .bool(request.follow),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Space Move",
|
||||
notes: "space \(targetSpaceNumber)")
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowID)),
|
||||
"target_space_number": .double(Double(targetSpaceNumber)),
|
||||
"target_space_id": .double(Double(targetSpace.id)),
|
||||
"followed": .bool(request.follow),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
}
|
||||
|
||||
@ -167,23 +167,37 @@ public struct SwipeTool: MCPTool {
|
||||
with \(movement.steps) steps (\(movement.profileName) profile, distance: \(distanceText)px) in \(durationText)s
|
||||
"""
|
||||
|
||||
let metaDict: [String: Value] = [
|
||||
"from": .object([
|
||||
"x": .double(Double(fromPoint.x)),
|
||||
"y": .double(Double(fromPoint.y)),
|
||||
]),
|
||||
"to": .object([
|
||||
"x": .double(Double(toPoint.x)),
|
||||
"y": .double(Double(toPoint.y)),
|
||||
]),
|
||||
"duration": .double(Double(movement.duration)),
|
||||
"steps": .double(Double(movement.steps)),
|
||||
"profile": .string(movement.profileName),
|
||||
"distance": .double(distance),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
|
||||
let summary = ToolEventSummary(
|
||||
actionDescription: "Swipe",
|
||||
coordinates: ToolEventSummary.Coordinates(x: Double(toPoint.x), y: Double(toPoint.y)),
|
||||
pointerProfile: movement.profileName,
|
||||
pointerDistance: Double(distance),
|
||||
pointerDirection: pointerDirection(from: fromPoint, to: toPoint),
|
||||
pointerDurationMs: Double(movement.duration),
|
||||
notes: "from (\(Int(fromPoint.x)), \(Int(fromPoint.y))) to (\(Int(toPoint.x)), \(Int(toPoint.y)))"
|
||||
)
|
||||
|
||||
let metaValue = ToolEventSummary.merge(summary: summary, into: .object(metaDict))
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"from": .object([
|
||||
"x": .double(Double(fromPoint.x)),
|
||||
"y": .double(Double(fromPoint.y)),
|
||||
]),
|
||||
"to": .object([
|
||||
"x": .double(Double(toPoint.x)),
|
||||
"y": .double(Double(toPoint.y)),
|
||||
]),
|
||||
"duration": .double(Double(movement.duration)),
|
||||
"steps": .double(Double(movement.steps)),
|
||||
"profile": .string(movement.profileName),
|
||||
"distance": .double(distance),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
meta: metaValue)
|
||||
}
|
||||
|
||||
private func parseCoordinates(_ coordString: String, parameterName: String) throws -> CGPoint {
|
||||
|
||||
@ -129,7 +129,9 @@ public struct TypeTool: MCPTool {
|
||||
let automation = self.context.automation
|
||||
let startTime = Date()
|
||||
|
||||
try await self.focusIfNeeded(request: request, automation: automation)
|
||||
let targetContext = try await self.resolveTargetContext(for: request)
|
||||
|
||||
try await self.focusIfNeeded(targetContext: targetContext, request: request, automation: automation)
|
||||
let actions = try self.buildActions(for: request)
|
||||
let typeResult = try await automation.typeActions(
|
||||
actions,
|
||||
@ -141,18 +143,41 @@ public struct TypeTool: MCPTool {
|
||||
request: request,
|
||||
executionTime: executionTime,
|
||||
result: typeResult)
|
||||
let baseMeta: Value = .object([
|
||||
"execution_time": .double(executionTime),
|
||||
"characters_typed": .double(Double(typeResult.totalCharacters)),
|
||||
])
|
||||
let summary = self.buildEventSummary(
|
||||
request: request,
|
||||
result: typeResult,
|
||||
targetContext: targetContext)
|
||||
let mergedMeta = ToolEventSummary.merge(summary: summary, into: baseMeta)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"execution_time": .double(executionTime),
|
||||
"characters_typed": .double(Double(typeResult.totalCharacters)),
|
||||
]))
|
||||
meta: mergedMeta)
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private func focusIfNeeded(request: TypeRequest, automation: any UIAutomationServiceProtocol) async throws {
|
||||
guard let elementId = request.elementId else { return }
|
||||
private func focusIfNeeded(
|
||||
targetContext: TargetElementContext?,
|
||||
request: TypeRequest,
|
||||
automation: any UIAutomationServiceProtocol) async throws
|
||||
{
|
||||
guard let context = targetContext else { return }
|
||||
|
||||
let element = context.element
|
||||
let clickLocation = CGPoint(x: element.frame.midX, y: element.frame.midY)
|
||||
try await automation.click(
|
||||
target: .coordinates(clickLocation),
|
||||
clickType: .single,
|
||||
sessionId: request.sessionId)
|
||||
try await Task.sleep(nanoseconds: 100_000_000)
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private func resolveTargetContext(for request: TypeRequest) async throws -> TargetElementContext? {
|
||||
guard let elementId = request.elementId else { return nil }
|
||||
guard let session = await self.getSession(id: request.sessionId) else {
|
||||
throw TypeToolValidationError("No active session. Run 'see' command first to capture UI state.")
|
||||
}
|
||||
@ -162,12 +187,45 @@ public struct TypeTool: MCPTool {
|
||||
"Element '\(elementId)' not found in current session. Run 'see' command to update UI state.")
|
||||
}
|
||||
|
||||
let clickLocation = CGPoint(x: element.frame.midX, y: element.frame.midY)
|
||||
try await automation.click(
|
||||
target: .coordinates(clickLocation),
|
||||
clickType: .single,
|
||||
sessionId: request.sessionId)
|
||||
try await Task.sleep(nanoseconds: 100_000_000)
|
||||
return TargetElementContext(session: session, element: element)
|
||||
}
|
||||
|
||||
private func buildEventSummary(
|
||||
request: TypeRequest,
|
||||
result: TypeResult,
|
||||
targetContext: TargetElementContext?) -> ToolEventSummary
|
||||
{
|
||||
let truncatedInput = self.truncatedText(request.text)
|
||||
return ToolEventSummary(
|
||||
targetApp: targetContext?.session.applicationName,
|
||||
windowTitle: targetContext?.session.windowTitle,
|
||||
elementRole: targetContext?.element.summaryRole,
|
||||
elementLabel: targetContext?.element.summaryLabel,
|
||||
elementValue: truncatedInput,
|
||||
actionDescription: self.describeAction(for: request),
|
||||
notes: truncatedInput)
|
||||
}
|
||||
|
||||
private func truncatedText(_ text: String?, limit: Int = 80) -> String? {
|
||||
guard let text, !text.isEmpty else { return nil }
|
||||
if text.count <= limit {
|
||||
return text
|
||||
}
|
||||
let endIndex = text.index(text.startIndex, offsetBy: limit)
|
||||
return String(text[..<endIndex]) + "…"
|
||||
}
|
||||
|
||||
private func describeAction(for request: TypeRequest) -> String {
|
||||
if let text = request.text, !text.isEmpty {
|
||||
return "Typed"
|
||||
}
|
||||
var actions: [String] = []
|
||||
if let tabs = request.tabCount, tabs > 0 { actions.append("Tab×\(tabs)") }
|
||||
if request.pressReturn { actions.append("Return") }
|
||||
if request.pressEscape { actions.append("Escape") }
|
||||
if request.pressDelete { actions.append("Delete") }
|
||||
if request.clearField { actions.append("Clear Field") }
|
||||
return actions.isEmpty ? "Type" : actions.joined(separator: ", ")
|
||||
}
|
||||
|
||||
private func buildActions(for request: TypeRequest) throws -> [TypeAction] {
|
||||
@ -294,3 +352,8 @@ private struct TypeToolValidationError: Error {
|
||||
let message: String
|
||||
init(_ message: String) { self.message = message }
|
||||
}
|
||||
|
||||
private struct TargetElementContext {
|
||||
let session: UISession
|
||||
let element: UIElement
|
||||
}
|
||||
|
||||
@ -119,32 +119,43 @@ public struct WindowTool: MCPTool {
|
||||
|
||||
switch action {
|
||||
case .close:
|
||||
return try await self.handleClose(service: service, target: target, startTime: startTime)
|
||||
return try await self.handleClose(service: service, target: target, appName: inputs.app, startTime: startTime)
|
||||
|
||||
case .minimize:
|
||||
return try await self.handleMinimize(service: service, target: target, startTime: startTime)
|
||||
return try await self.handleMinimize(service: service, target: target, appName: inputs.app, startTime: startTime)
|
||||
|
||||
case .maximize:
|
||||
return try await self.handleMaximize(service: service, target: target, startTime: startTime)
|
||||
return try await self.handleMaximize(service: service, target: target, appName: inputs.app, startTime: startTime)
|
||||
|
||||
case .move:
|
||||
let position = try inputs.requirePosition(for: action)
|
||||
return try await self.handleMove(service: service, target: target, position: position, startTime: startTime)
|
||||
return try await self.handleMove(
|
||||
service: service,
|
||||
target: target,
|
||||
appName: inputs.app,
|
||||
position: position,
|
||||
startTime: startTime)
|
||||
|
||||
case .resize:
|
||||
let size = try inputs.requireSize(for: action)
|
||||
return try await self.handleResize(service: service, target: target, size: size, startTime: startTime)
|
||||
return try await self.handleResize(
|
||||
service: service,
|
||||
target: target,
|
||||
appName: inputs.app,
|
||||
size: size,
|
||||
startTime: startTime)
|
||||
|
||||
case .setBounds:
|
||||
let bounds = try inputs.requireBounds()
|
||||
return try await self.handleSetBounds(
|
||||
service: service,
|
||||
target: target,
|
||||
appName: inputs.app,
|
||||
bounds: bounds,
|
||||
startTime: startTime)
|
||||
|
||||
case .focus:
|
||||
return try await self.handleFocus(service: service, target: target, startTime: startTime)
|
||||
return try await self.handleFocus(service: service, target: target, appName: inputs.app, startTime: startTime)
|
||||
}
|
||||
}
|
||||
|
||||
@ -153,6 +164,7 @@ public struct WindowTool: MCPTool {
|
||||
private func handleClose(
|
||||
service: any WindowManagementServiceProtocol,
|
||||
target: WindowTarget,
|
||||
appName: String?,
|
||||
startTime: Date) async throws -> ToolResponse
|
||||
{
|
||||
// Get window info before closing for better reporting
|
||||
@ -165,22 +177,26 @@ public struct WindowTool: MCPTool {
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
let message = self.successMessage(action: "Closed window '\(windowInfo.title)'", duration: executionTime)
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: appName,
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Window Close",
|
||||
notes: nil)
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(self.successMessage(
|
||||
action: "Closed window '\(windowInfo.title)'",
|
||||
duration: executionTime)),
|
||||
],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
content: [.text(message)],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleMinimize(
|
||||
service: any WindowManagementServiceProtocol,
|
||||
target: WindowTarget,
|
||||
appName: String?,
|
||||
startTime: Date) async throws -> ToolResponse
|
||||
{
|
||||
// Get window info before minimizing
|
||||
@ -193,22 +209,26 @@ public struct WindowTool: MCPTool {
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
let message = self.successMessage(action: "Minimized window '\(windowInfo.title)'", duration: executionTime)
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: appName,
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Window Minimize",
|
||||
notes: nil)
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(self.successMessage(
|
||||
action: "Minimized window '\(windowInfo.title)'",
|
||||
duration: executionTime)),
|
||||
],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
content: [.text(message)],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleMaximize(
|
||||
service: any WindowManagementServiceProtocol,
|
||||
target: WindowTarget,
|
||||
appName: String?,
|
||||
startTime: Date) async throws -> ToolResponse
|
||||
{
|
||||
// Get window info before maximizing
|
||||
@ -221,22 +241,26 @@ public struct WindowTool: MCPTool {
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
let message = self.successMessage(action: "Maximized window '\(windowInfo.title)'", duration: executionTime)
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: appName,
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Window Maximize",
|
||||
notes: nil)
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(self.successMessage(
|
||||
action: "Maximized window '\(windowInfo.title)'",
|
||||
duration: executionTime)),
|
||||
],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
content: [.text(message)],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleMove(
|
||||
service: any WindowManagementServiceProtocol,
|
||||
target: WindowTarget,
|
||||
appName: String?,
|
||||
position: CGPoint,
|
||||
startTime: Date) async throws -> ToolResponse
|
||||
{
|
||||
@ -251,22 +275,29 @@ public struct WindowTool: MCPTool {
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
let detail = "Moved window '\(windowInfo.title)' to (\(Int(position.x)), \(Int(position.y)))"
|
||||
let message = self.successMessage(action: detail, duration: executionTime)
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_x": .double(Double(position.x)),
|
||||
"new_y": .double(Double(position.y)),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: appName,
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Window Move",
|
||||
coordinates: ToolEventSummary.Coordinates(x: Double(position.x), y: Double(position.y)),
|
||||
notes: nil)
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(self.successMessage(action: detail, duration: executionTime)),
|
||||
],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_x": .double(Double(position.x)),
|
||||
"new_y": .double(Double(position.y)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
content: [.text(message)],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleResize(
|
||||
service: any WindowManagementServiceProtocol,
|
||||
target: WindowTarget,
|
||||
appName: String?,
|
||||
size: CGSize,
|
||||
startTime: Date) async throws -> ToolResponse
|
||||
{
|
||||
@ -281,22 +312,28 @@ public struct WindowTool: MCPTool {
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
let detail = "Resized window '\(windowInfo.title)' to \(Int(size.width)) × \(Int(size.height))"
|
||||
let message = self.successMessage(action: detail, duration: executionTime)
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_width": .double(Double(size.width)),
|
||||
"new_height": .double(Double(size.height)),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: appName,
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Window Resize",
|
||||
notes: "\(Int(size.width))×\(Int(size.height))")
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(self.successMessage(action: detail, duration: executionTime)),
|
||||
],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_width": .double(Double(size.width)),
|
||||
"new_height": .double(Double(size.height)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
content: [.text(message)],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleSetBounds(
|
||||
service: any WindowManagementServiceProtocol,
|
||||
target: WindowTarget,
|
||||
appName: String?,
|
||||
bounds: CGRect,
|
||||
startTime: Date) async throws -> ToolResponse
|
||||
{
|
||||
@ -312,24 +349,33 @@ public struct WindowTool: MCPTool {
|
||||
|
||||
let detail = "Set bounds for window '\(windowInfo.title)' to (\(Int(bounds.origin.x)), "
|
||||
+ "\(Int(bounds.origin.y)), \(Int(bounds.width)) × \(Int(bounds.height)))"
|
||||
let message = self.successMessage(action: detail, duration: executionTime)
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_x": .double(Double(bounds.origin.x)),
|
||||
"new_y": .double(Double(bounds.origin.y)),
|
||||
"new_width": .double(Double(bounds.width)),
|
||||
"new_height": .double(Double(bounds.height)),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: appName,
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Window Set Bounds",
|
||||
coordinates: ToolEventSummary.Coordinates(
|
||||
x: Double(bounds.origin.x),
|
||||
y: Double(bounds.origin.y)),
|
||||
notes: "\(Int(bounds.width))×\(Int(bounds.height))")
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(self.successMessage(action: detail, duration: executionTime)),
|
||||
],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_x": .double(Double(bounds.origin.x)),
|
||||
"new_y": .double(Double(bounds.origin.y)),
|
||||
"new_width": .double(Double(bounds.width)),
|
||||
"new_height": .double(Double(bounds.height)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
content: [.text(message)],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
private func handleFocus(
|
||||
service: any WindowManagementServiceProtocol,
|
||||
target: WindowTarget,
|
||||
appName: String?,
|
||||
startTime: Date) async throws -> ToolResponse
|
||||
{
|
||||
// Get window info before focusing
|
||||
@ -342,17 +388,20 @@ public struct WindowTool: MCPTool {
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
let message = self.successMessage(action: "Focused window '\(windowInfo.title)'", duration: executionTime)
|
||||
let baseMeta: [String: Value] = [
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime),
|
||||
]
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: appName,
|
||||
windowTitle: windowInfo.title,
|
||||
actionDescription: "Window Focus",
|
||||
notes: nil)
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(self.successMessage(
|
||||
action: "Focused window '\(windowInfo.title)'",
|
||||
duration: executionTime)),
|
||||
],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime),
|
||||
]))
|
||||
content: [.text(message)],
|
||||
meta: ToolEventSummary.merge(summary: summary, into: .object(baseMeta)))
|
||||
}
|
||||
|
||||
// MARK: - Helper Methods
|
||||
|
||||
@ -0,0 +1,214 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
public struct ToolEventSummary: Codable, Sendable {
|
||||
public struct Coordinates: Codable, Sendable {
|
||||
public var x: Double?
|
||||
public var y: Double?
|
||||
|
||||
public init(x: Double? = nil, y: Double? = nil) {
|
||||
self.x = x
|
||||
self.y = y
|
||||
}
|
||||
}
|
||||
|
||||
public var targetApp: String?
|
||||
public var windowTitle: String?
|
||||
public var elementRole: String?
|
||||
public var elementLabel: String?
|
||||
public var elementValue: String?
|
||||
public var actionDescription: String?
|
||||
public var coordinates: Coordinates?
|
||||
public var pointerProfile: String?
|
||||
public var pointerDistance: Double?
|
||||
public var pointerDirection: String?
|
||||
public var pointerDurationMs: Double?
|
||||
public var scrollDirection: String?
|
||||
public var scrollAmount: Double?
|
||||
public var command: String?
|
||||
public var workingDirectory: String?
|
||||
public var waitDurationMs: Double?
|
||||
public var waitReason: String?
|
||||
public var captureApp: String?
|
||||
public var captureWindow: String?
|
||||
public var notes: String?
|
||||
|
||||
public init(
|
||||
targetApp: String? = nil,
|
||||
windowTitle: String? = nil,
|
||||
elementRole: String? = nil,
|
||||
elementLabel: String? = nil,
|
||||
elementValue: String? = nil,
|
||||
actionDescription: String? = nil,
|
||||
coordinates: Coordinates? = nil,
|
||||
pointerProfile: String? = nil,
|
||||
pointerDistance: Double? = nil,
|
||||
pointerDirection: String? = nil,
|
||||
pointerDurationMs: Double? = nil,
|
||||
scrollDirection: String? = nil,
|
||||
scrollAmount: Double? = nil,
|
||||
command: String? = nil,
|
||||
workingDirectory: String? = nil,
|
||||
waitDurationMs: Double? = nil,
|
||||
waitReason: String? = nil,
|
||||
captureApp: String? = nil,
|
||||
captureWindow: String? = nil,
|
||||
notes: String? = nil)
|
||||
{
|
||||
self.targetApp = targetApp
|
||||
self.windowTitle = windowTitle
|
||||
self.elementRole = elementRole
|
||||
self.elementLabel = elementLabel
|
||||
self.elementValue = elementValue
|
||||
self.actionDescription = actionDescription
|
||||
self.coordinates = coordinates
|
||||
self.pointerProfile = pointerProfile
|
||||
self.pointerDistance = pointerDistance
|
||||
self.pointerDirection = pointerDirection
|
||||
self.pointerDurationMs = pointerDurationMs
|
||||
self.scrollDirection = scrollDirection
|
||||
self.scrollAmount = scrollAmount
|
||||
self.command = command
|
||||
self.workingDirectory = workingDirectory
|
||||
self.waitDurationMs = waitDurationMs
|
||||
self.waitReason = waitReason
|
||||
self.captureApp = captureApp
|
||||
self.captureWindow = captureWindow
|
||||
self.notes = notes
|
||||
}
|
||||
|
||||
public func toMetaValue() -> Value {
|
||||
var dict: [String: Value] = [:]
|
||||
if let targetApp { dict["target_app"] = .string(targetApp) }
|
||||
if let windowTitle { dict["window_title"] = .string(windowTitle) }
|
||||
if let elementRole { dict["element_role"] = .string(elementRole) }
|
||||
if let elementLabel { dict["element_label"] = .string(elementLabel) }
|
||||
if let elementValue { dict["element_value"] = .string(elementValue) }
|
||||
if let actionDescription { dict["action"] = .string(actionDescription) }
|
||||
if let coordinates {
|
||||
var coords: [String: Value] = [:]
|
||||
if let x = coordinates.x { coords["x"] = .double(x) }
|
||||
if let y = coordinates.y { coords["y"] = .double(y) }
|
||||
if !coords.isEmpty { dict["coordinates"] = .object(coords) }
|
||||
}
|
||||
if let pointerProfile { dict["pointer_profile"] = .string(pointerProfile) }
|
||||
if let pointerDistance { dict["pointer_distance"] = .double(pointerDistance) }
|
||||
if let pointerDirection { dict["pointer_direction"] = .string(pointerDirection) }
|
||||
if let pointerDurationMs { dict["pointer_duration_ms"] = .double(pointerDurationMs) }
|
||||
if let scrollDirection { dict["scroll_direction"] = .string(scrollDirection) }
|
||||
if let scrollAmount { dict["scroll_amount"] = .double(scrollAmount) }
|
||||
if let command { dict["command"] = .string(command) }
|
||||
if let workingDirectory { dict["working_directory"] = .string(workingDirectory) }
|
||||
if let waitDurationMs { dict["wait_duration_ms"] = .double(waitDurationMs) }
|
||||
if let waitReason { dict["wait_reason"] = .string(waitReason) }
|
||||
if let captureApp { dict["capture_app"] = .string(captureApp) }
|
||||
if let captureWindow { dict["capture_window"] = .string(captureWindow) }
|
||||
if let notes { dict["notes"] = .string(notes) }
|
||||
return .object(dict)
|
||||
}
|
||||
|
||||
public static func merge(summary: ToolEventSummary, into existingMeta: Value?) -> Value {
|
||||
var payload: [String: Value] = [:]
|
||||
if case let .object(existing) = existingMeta {
|
||||
payload = existing
|
||||
}
|
||||
payload["summary"] = summary.toMetaValue()
|
||||
return .object(payload)
|
||||
}
|
||||
|
||||
public init?(json: [String: Any]) {
|
||||
guard !json.isEmpty else { return nil }
|
||||
self.targetApp = json["target_app"] as? String
|
||||
self.windowTitle = json["window_title"] as? String
|
||||
self.elementRole = json["element_role"] as? String
|
||||
self.elementLabel = json["element_label"] as? String
|
||||
self.elementValue = json["element_value"] as? String
|
||||
self.actionDescription = json["action"] as? String
|
||||
if let coords = json["coordinates"] as? [String: Any] {
|
||||
let x = coords["x"] as? Double
|
||||
let y = coords["y"] as? Double
|
||||
if x != nil || y != nil {
|
||||
self.coordinates = Coordinates(x: x, y: y)
|
||||
}
|
||||
}
|
||||
self.pointerProfile = json["pointer_profile"] as? String
|
||||
self.pointerDistance = json["pointer_distance"] as? Double
|
||||
self.pointerDirection = json["pointer_direction"] as? String
|
||||
self.pointerDurationMs = json["pointer_duration_ms"] as? Double
|
||||
self.scrollDirection = json["scroll_direction"] as? String
|
||||
self.scrollAmount = json["scroll_amount"] as? Double
|
||||
self.command = json["command"] as? String
|
||||
self.workingDirectory = json["working_directory"] as? String
|
||||
self.waitDurationMs = json["wait_duration_ms"] as? Double
|
||||
self.waitReason = json["wait_reason"] as? String
|
||||
self.captureApp = json["capture_app"] as? String
|
||||
self.captureWindow = json["capture_window"] as? String
|
||||
self.notes = json["notes"] as? String
|
||||
}
|
||||
|
||||
public static func from(resultJSON: [String: Any]) -> ToolEventSummary? {
|
||||
guard
|
||||
let meta = resultJSON["meta"] as? [String: Any],
|
||||
let summaryJSON = meta["summary"] as? [String: Any]
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
return ToolEventSummary(json: summaryJSON)
|
||||
}
|
||||
|
||||
public func shortDescription(toolName: String) -> String? {
|
||||
if let command {
|
||||
if let cwd = workingDirectory {
|
||||
return "Run `\(command)` in \(cwd)"
|
||||
}
|
||||
return "Run `\(command)`"
|
||||
}
|
||||
|
||||
if let captureApp {
|
||||
if let captureWindow {
|
||||
return "Captured \(captureApp) · \(captureWindow)"
|
||||
}
|
||||
return "Captured \(captureApp)"
|
||||
}
|
||||
|
||||
if let elementLabel {
|
||||
var segments: [String] = []
|
||||
if let targetApp { segments.append(targetApp) }
|
||||
segments.append(elementLabel)
|
||||
if let elementRole {
|
||||
segments.append("(\(elementRole))")
|
||||
}
|
||||
return segments.joined(separator: " · ")
|
||||
}
|
||||
|
||||
if let targetApp, let actionDescription {
|
||||
return "\(actionDescription) – \(targetApp)"
|
||||
}
|
||||
|
||||
if let targetApp {
|
||||
return targetApp
|
||||
}
|
||||
|
||||
if let notes {
|
||||
return notes
|
||||
}
|
||||
|
||||
if let waitDurationMs {
|
||||
let seconds = waitDurationMs / 1000.0
|
||||
if let reason = waitReason {
|
||||
return String(format: "Wait %.1fs (%@)", seconds, reason)
|
||||
}
|
||||
return String(format: "Wait %.1fs", seconds)
|
||||
}
|
||||
|
||||
if let scrollDirection, let scrollAmount {
|
||||
return String(format: "Scrolled %@ %.0f px", scrollDirection, scrollAmount)
|
||||
}
|
||||
|
||||
if let pointerDirection, let pointerDistance {
|
||||
return String(format: "Pointer %@ %.0f px", pointerDirection, pointerDistance)
|
||||
}
|
||||
|
||||
return actionDescription
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,43 @@
|
||||
import PeekabooAgentRuntime
|
||||
import PeekabooCore
|
||||
import Testing
|
||||
|
||||
@Suite("Tool event summary formatting")
|
||||
struct ToolEventSummaryTests {
|
||||
@Test("Shell commands render with working directory")
|
||||
func shellSummaryUsesWorkingDirectory() {
|
||||
let summary = ToolEventSummary(
|
||||
command: "ls -la",
|
||||
workingDirectory: "/tmp")
|
||||
|
||||
#expect(summary.shortDescription(toolName: "shell") == "Run `ls -la` in /tmp")
|
||||
}
|
||||
|
||||
@Test("Click actions include target app and role")
|
||||
func clickSummaryShowsElement() {
|
||||
let summary = ToolEventSummary(
|
||||
targetApp: "Google Chrome",
|
||||
elementRole: "Button",
|
||||
elementLabel: "Sign In with Email")
|
||||
|
||||
#expect(summary.shortDescription(toolName: "click") == "Google Chrome · Sign In with Email (Button)")
|
||||
}
|
||||
|
||||
@Test("Sleep summaries use wait duration and reason")
|
||||
func sleepSummaryIncludesDuration() {
|
||||
let summary = ToolEventSummary(
|
||||
waitDurationMs: 2100,
|
||||
waitReason: "waiting for UI state")
|
||||
|
||||
#expect(summary.shortDescription(toolName: "sleep") == "Wait 2.1s (waiting for UI state)")
|
||||
}
|
||||
|
||||
@Test("Screen captures include app and window")
|
||||
func seeSummaryDescribesCaptureContext() {
|
||||
let summary = ToolEventSummary(
|
||||
captureApp: "Google Chrome",
|
||||
captureWindow: "Grindr – Dashboard")
|
||||
|
||||
#expect(summary.shortDescription(toolName: "see") == "Captured Google Chrome · Grindr – Dashboard")
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,46 @@
|
||||
import MCP
|
||||
import PeekabooAgentRuntime
|
||||
import PeekabooAutomation
|
||||
import TachikomaMCP
|
||||
import Testing
|
||||
|
||||
@Suite("Tool summary emission")
|
||||
struct ToolSummaryEmissionTests {
|
||||
@Test("Shell tool attaches command metadata")
|
||||
func shellToolEmitsSummary() async throws {
|
||||
let tool = ShellTool()
|
||||
let response = try await tool.execute(arguments: ToolArguments(raw: ["command": "echo summary-test"]))
|
||||
|
||||
guard let summary = extractSummary(from: response.meta) else {
|
||||
Issue.record("ShellTool response missing summary metadata")
|
||||
return
|
||||
}
|
||||
|
||||
#expect(summary.command == "echo summary-test")
|
||||
#expect(summary.shortDescription(toolName: tool.name) == "Run `echo summary-test`")
|
||||
}
|
||||
|
||||
@Test("Sleep tool stores wait duration")
|
||||
func sleepToolEmitsSummary() async throws {
|
||||
let tool = SleepTool()
|
||||
let response = try await tool.execute(arguments: ToolArguments(raw: ["duration": 5]))
|
||||
|
||||
guard let summary = extractSummary(from: response.meta) else {
|
||||
Issue.record("SleepTool response missing summary metadata")
|
||||
return
|
||||
}
|
||||
|
||||
#expect(summary.actionDescription == "Sleep")
|
||||
#expect((summary.waitDurationMs ?? 0) >= 0)
|
||||
}
|
||||
}
|
||||
|
||||
private func extractSummary(from meta: Value?) -> ToolEventSummary? {
|
||||
guard case let .object(metaDict) = meta,
|
||||
let summaryValue = metaDict["summary"],
|
||||
let json = summaryValue.toJSON() as? [String: Any]
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
return ToolEventSummary(json: json)
|
||||
}
|
||||
@ -1 +1 @@
|
||||
Subproject commit 51405dea5811953055ded57cee085caf788ba2a6
|
||||
Subproject commit 326892a32e93e03df7fb5fefe53c61ebcb6ef4ad
|
||||
66
docs/refactor/tool-results.md
Normal file
66
docs/refactor/tool-results.md
Normal file
@ -0,0 +1,66 @@
|
||||
---
|
||||
summary: 'Refactor tool results so agents can show rich, human-readable summaries'
|
||||
read_when:
|
||||
- 'planning tool/agent runtime work'
|
||||
- 'touching ToolResponse or formatter plumbing'
|
||||
---
|
||||
|
||||
# Tool Result Metadata Refactor Plan
|
||||
|
||||
## Current Status
|
||||
- `ToolEventSummary` struct + helpers live in `ToolEventSummary.swift`; pointer direction math handled in `PointerDirection.swift`.
|
||||
- Tachikoma MCP adapter now preserves `meta` so summaries flow from tools to CLI/Mac renderers.
|
||||
- Core UI/system tools (click/drag/move/swipe/scroll/see/shell/sleep/type/hotkey/app/menu/dialog/dock/list/window) populate summaries with human-readable labels instead of internal IDs.
|
||||
- Permission/Image/Analyze/Space tool paths updated to emit contextual summaries (app name, capture source, question text, etc.).
|
||||
- MCPAgentTool now emits summaries for session listings and agent runs, completing MCP tool coverage.
|
||||
- CLI `AgentOutputDelegate` consumes `ToolEventSummary` data, strips legacy `[ok]` glyphs, and falls back to sanitized formatter output only when necessary.
|
||||
- Mac tool formatter bridge + registry now prioritize `ToolEventSummary` data so timeline rows show the same human-readable summaries as the CLI.
|
||||
- Added Swift Testing coverage (`ToolEventSummaryTests`, `ToolSummaryEmissionTests`) so shell/sleep summaries and short-description helpers are locked in.
|
||||
- Streaming pipeline now injects a top-level `summary_text` field into tool completion payloads, giving JSON consumers the same human-readable copy without parsing nested meta blobs.
|
||||
- Agent output formatters still contain legacy fallbacks; `[ok]` badges remain until we finish Phase 3.
|
||||
|
||||
## Next Steps
|
||||
- Capture CLI/Mac golden transcripts once formatter cleanup lands in CI so we can detect regressions automatically.
|
||||
|
||||
## Goals
|
||||
- Preserve structured context (app name, element label, pointer geometry, shell command, etc.) for every tool call.
|
||||
- Render concise, human-readable summaries in the CLI/Mac agent views without exposing internal IDs or glyph tokens.
|
||||
- Eliminate the success `[ok]` badge for normal completions; only show badges/flags on warnings or errors.
|
||||
- Keep completion tools (`task_completed`, `need_more_information`, `need_info`) flowing through their existing "state" UI without extra summary lines.
|
||||
|
||||
## Constraints & Challenges
|
||||
- `ToolResponse.meta` is currently dropped when converting to `AnyAgentToolValue`; formatters only see whatever plain text the tool returned.
|
||||
- MCP tools live in `PeekabooAgentRuntime` while the agent runtime/CLI sits elsewhere, so the metadata schema must be shared via Tachikoma types.
|
||||
- We must not break existing MCP integrations; the new summary data needs a backwards-compatible wire format.
|
||||
|
||||
## Phase 1 – Plumbing
|
||||
1. Introduce a typed `ToolEventSummary` struct (in Tachikoma) with optional fields for app/window, element, coordinates, scroll/move vectors, command strings, durations, etc.
|
||||
2. Extend `ToolResponse` to carry an optional `summary: ToolEventSummary` (or replace `meta` entirely) and ensure the MCP adapter serializes/deserializes it.
|
||||
3. Update the agent streaming pipeline (`PeekabooAgentService+Streaming`, `AnyAgentToolValue`, CLI event payloads) so the summary is delivered alongside the existing text result.
|
||||
|
||||
## Phase 2 – Tool Implementations
|
||||
1. Audit every MCP tool (click/type/scroll/see/shell/sleep/window/app/menu/dialog/drag/move/swipe/list/etc.).
|
||||
2. For each tool, populate `ToolEventSummary` using the context it already has:
|
||||
- UI tools: `targetApp`, `windowTitle`, `elementLabel`, `elementRole`, `humanizedPosition`.
|
||||
- Pointer tools: `direction`, `distancePx`, `profile`, `durationMs`.
|
||||
- Vision tools: `captureApp`, `windowTitle`, `sessionId` (for internal tracing only if we still need it), element counts.
|
||||
- System tools: `shellCommand`, `workingDirectory`, `sleepMs`, `reason`.
|
||||
3. Remove raw element IDs (`elem_153`) and replace them with user-facing labels.
|
||||
|
||||
## Phase 3 – Formatting & UX
|
||||
1. Update `ToolFormatter` (and specialized subclasses) to prefer the new summary fields when generating compact/result summaries.
|
||||
2. Teach `AgentOutputDelegate` to:
|
||||
- Drop the green `[ok]` marker on success.
|
||||
- Render geometry in natural language (e.g., `1280×720 anchored top-left on Display 1`).
|
||||
- Continue showing badges only for warnings/errors.
|
||||
3. Verify the Mac UI timeline consumes the same summary strings.
|
||||
|
||||
## Phase 4 – Verification
|
||||
- Add unit tests for representative tools ensuring they emit the expected `ToolEventSummary`.
|
||||
- Record CLI golden outputs (before/after) to confirm we now print sentences like `Click – Chrome · Button "Sign In with Email"`.
|
||||
- Dogfood on Grindr/Wingman workflow to ensure the motivation scenarios look correct end-to-end.
|
||||
|
||||
## Open Questions
|
||||
- Should we completely remove `meta`, or keep it for third-party MCP clients that expect arbitrary dictionaries?
|
||||
- Do we want localized summaries, or is English-only acceptable for now?
|
||||
- How do we expose the same summaries via API (e.g., JSON streaming) for downstream automation/telemetry?
|
||||
@ -33,7 +33,8 @@
|
||||
"poltergeist:rest": "./scripts/poltergeist-wrapper.sh rest",
|
||||
"poltergeist:status": "./scripts/poltergeist-wrapper.sh status",
|
||||
"poltergeist:panel": "./scripts/poltergeist-wrapper.sh status panel",
|
||||
"poltergeist:logs": "./scripts/poltergeist-wrapper.sh logs"
|
||||
"poltergeist:logs": "./scripts/poltergeist-wrapper.sh logs",
|
||||
"oracle": "oracle"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@ -44,5 +45,8 @@
|
||||
"bugs": {
|
||||
"url": "https://github.com/steipete/peekaboo/issues"
|
||||
},
|
||||
"homepage": "https://github.com/steipete/peekaboo#readme"
|
||||
"homepage": "https://github.com/steipete/peekaboo#readme",
|
||||
"devDependencies": {
|
||||
"@steipete/oracle": "file:../oracle"
|
||||
}
|
||||
}
|
||||
|
||||
9
pnpm-lock.yaml
generated
9
pnpm-lock.yaml
generated
@ -4,6 +4,13 @@ settings:
|
||||
autoInstallPeers: true
|
||||
excludeLinksFromLockfile: false
|
||||
|
||||
overrides:
|
||||
'@steipete/oracle': link:../oracle
|
||||
|
||||
importers:
|
||||
|
||||
.: {}
|
||||
.:
|
||||
devDependencies:
|
||||
'@steipete/oracle':
|
||||
specifier: link:../oracle
|
||||
version: link:../oracle
|
||||
|
||||
2
pnpm-workspace.yaml
Normal file
2
pnpm-workspace.yaml
Normal file
@ -0,0 +1,2 @@
|
||||
overrides:
|
||||
'@steipete/oracle': link:../oracle
|
||||
@ -4,6 +4,21 @@
|
||||
# This script builds the CLI independently of the Node.js MCP server
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
if command -v xcbeautify >/dev/null 2>&1; then
|
||||
USE_XCBEAUTIFY=1
|
||||
else
|
||||
USE_XCBEAUTIFY=0
|
||||
fi
|
||||
|
||||
pipe_build_output() {
|
||||
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
|
||||
xcbeautify "$@"
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
# Colors for output
|
||||
GREEN='\033[0;32m'
|
||||
@ -17,7 +32,7 @@ cd "$(dirname "$0")/../Apps/CLI"
|
||||
|
||||
# Build for release with optimizations
|
||||
echo -e "${BLUE}Building release version...${NC}"
|
||||
swift build -c release
|
||||
swift build -c release 2>&1 | pipe_build_output
|
||||
|
||||
# Get the build output path
|
||||
BUILD_PATH=".build/release/peekaboo"
|
||||
@ -48,4 +63,4 @@ if [ -f "$BUILD_PATH" ]; then
|
||||
else
|
||||
echo -e "${RED}❌ Build failed!${NC}"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
#!/bin/bash
|
||||
# Build script for macOS Peekaboo app using xcodebuild
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
@ -11,6 +12,20 @@ YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
if command -v xcbeautify >/dev/null 2>&1; then
|
||||
USE_XCBEAUTIFY=1
|
||||
else
|
||||
USE_XCBEAUTIFY=0
|
||||
fi
|
||||
|
||||
pipe_build_output() {
|
||||
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
|
||||
xcbeautify "$@"
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
# Build configuration
|
||||
WORKSPACE="$PROJECT_ROOT/Apps/Peekaboo.xcworkspace"
|
||||
SCHEME="Peekaboo"
|
||||
@ -37,9 +52,10 @@ xcodebuild \
|
||||
CODE_SIGN_IDENTITY="" \
|
||||
CODE_SIGNING_REQUIRED=NO \
|
||||
CODE_SIGN_ENTITLEMENTS="" \
|
||||
CODE_SIGNING_ALLOWED=NO
|
||||
CODE_SIGNING_ALLOWED=NO \
|
||||
2>&1 | pipe_build_output
|
||||
|
||||
BUILD_EXIT_CODE=$?
|
||||
BUILD_EXIT_CODE=${PIPESTATUS[0]}
|
||||
|
||||
if [ $BUILD_EXIT_CODE -eq 0 ]; then
|
||||
echo -e "${GREEN}✅ Build successful${NC}"
|
||||
@ -52,4 +68,4 @@ if [ $BUILD_EXIT_CODE -eq 0 ]; then
|
||||
else
|
||||
echo -e "${RED}❌ Build failed with exit code $BUILD_EXIT_CODE${NC}" >&2
|
||||
exit $BUILD_EXIT_CODE
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -1,5 +1,20 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
if command -v xcbeautify >/dev/null 2>&1; then
|
||||
USE_XCBEAUTIFY=1
|
||||
else
|
||||
USE_XCBEAUTIFY=0
|
||||
fi
|
||||
|
||||
pipe_build_output() {
|
||||
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
|
||||
xcbeautify "$@"
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
echo "Building Swift CLI..."
|
||||
|
||||
@ -7,7 +22,7 @@ echo "Building Swift CLI..."
|
||||
cd "$(dirname "$0")/../Apps/CLI"
|
||||
|
||||
# Build the Swift CLI in release mode
|
||||
swift build --configuration release
|
||||
swift build --configuration release 2>&1 | pipe_build_output
|
||||
|
||||
# Copy the binary to the root directory
|
||||
cp .build/release/peekaboo ../peekaboo
|
||||
@ -15,4 +30,4 @@ cp .build/release/peekaboo ../peekaboo
|
||||
# Make it executable
|
||||
chmod +x ../peekaboo
|
||||
|
||||
echo "Swift CLI built successfully and copied to ./peekaboo"
|
||||
echo "Swift CLI built successfully and copied to ./peekaboo"
|
||||
|
||||
@ -1,11 +1,26 @@
|
||||
#!/bin/bash
|
||||
set -e # Exit immediately if a command exits with a non-zero status.
|
||||
set -o pipefail
|
||||
|
||||
PROJECT_ROOT=$(cd "$(dirname "$0")/.." && pwd)
|
||||
SWIFT_PROJECT_PATH="$PROJECT_ROOT/Apps/CLI"
|
||||
FINAL_BINARY_NAME="peekaboo"
|
||||
FINAL_BINARY_PATH="$PROJECT_ROOT/$FINAL_BINARY_NAME"
|
||||
|
||||
if command -v xcbeautify >/dev/null 2>&1; then
|
||||
USE_XCBEAUTIFY=1
|
||||
else
|
||||
USE_XCBEAUTIFY=0
|
||||
fi
|
||||
|
||||
pipe_build_output() {
|
||||
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
|
||||
xcbeautify "$@"
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
# Swift compiler flags for size optimization
|
||||
# -Osize: Optimize for binary size.
|
||||
# -wmo: Whole Module Optimization, allows more aggressive optimizations.
|
||||
@ -47,7 +62,10 @@ enum Version {
|
||||
EOF
|
||||
|
||||
echo "🏗️ Building for arm64 (Apple Silicon) only..."
|
||||
(cd "$SWIFT_PROJECT_PATH" && swift build --arch arm64 -c release $SWIFT_OPTIMIZATION_FLAGS)
|
||||
(
|
||||
cd "$SWIFT_PROJECT_PATH"
|
||||
swift build --arch arm64 -c release $SWIFT_OPTIMIZATION_FLAGS 2>&1 | pipe_build_output
|
||||
)
|
||||
cp "$SWIFT_PROJECT_PATH/.build/arm64-apple-macosx/release/$FINAL_BINARY_NAME" "$FINAL_BINARY_PATH.tmp"
|
||||
echo "✅ arm64 build complete"
|
||||
|
||||
@ -90,4 +108,4 @@ echo "🔍 Verifying final binary..."
|
||||
lipo -info "$FINAL_BINARY_PATH"
|
||||
ls -lh "$FINAL_BINARY_PATH"
|
||||
|
||||
echo "🎉 ARM64 binary '$FINAL_BINARY_PATH' created and optimized successfully!"
|
||||
echo "🎉 ARM64 binary '$FINAL_BINARY_PATH' created and optimized successfully!"
|
||||
|
||||
@ -1,9 +1,24 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
PROJECT_ROOT=$(cd "$(dirname "$0")/.." && pwd)
|
||||
SWIFT_PROJECT_PATH="$PROJECT_ROOT/Apps/CLI"
|
||||
|
||||
if command -v xcbeautify >/dev/null 2>&1; then
|
||||
USE_XCBEAUTIFY=1
|
||||
else
|
||||
USE_XCBEAUTIFY=0
|
||||
fi
|
||||
|
||||
pipe_build_output() {
|
||||
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
|
||||
xcbeautify "$@"
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
CLEAN_BUILD=false
|
||||
if [[ "$1" == "--clean" ]]; then
|
||||
@ -70,7 +85,10 @@ else
|
||||
echo "🏗️ Building for debug (incremental)..."
|
||||
fi
|
||||
|
||||
(cd "$SWIFT_PROJECT_PATH" && swift build)
|
||||
(
|
||||
cd "$SWIFT_PROJECT_PATH"
|
||||
swift build 2>&1 | pipe_build_output
|
||||
)
|
||||
|
||||
echo "🔏 Code signing the debug binary..."
|
||||
PROJECT_NAME="peekaboo"
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e # Exit immediately if a command exits with a non-zero status.
|
||||
set -o pipefail
|
||||
|
||||
PROJECT_ROOT=$(cd "$(dirname "$0")/.." && pwd)
|
||||
SWIFT_PROJECT_PATH="$PROJECT_ROOT/Apps/CLI"
|
||||
@ -15,6 +16,20 @@ X86_64_BINARY_TEMP="$PROJECT_ROOT/${FINAL_BINARY_NAME}-x86_64"
|
||||
# -Xlinker -dead_strip: Remove dead code at the linking stage.
|
||||
SWIFT_OPTIMIZATION_FLAGS="-Xswiftc -Osize -Xswiftc -wmo -Xlinker -dead_strip"
|
||||
|
||||
if command -v xcbeautify >/dev/null 2>&1; then
|
||||
USE_XCBEAUTIFY=1
|
||||
else
|
||||
USE_XCBEAUTIFY=0
|
||||
fi
|
||||
|
||||
pipe_build_output() {
|
||||
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
|
||||
xcbeautify "$@"
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
echo "🧹 Cleaning previous build artifacts..."
|
||||
(cd "$SWIFT_PROJECT_PATH" && swift package reset) || echo "'swift package reset' encountered an issue, attempting rm -rf..."
|
||||
rm -rf "$SWIFT_PROJECT_PATH/.build"
|
||||
@ -50,12 +65,18 @@ enum Version {
|
||||
EOF
|
||||
|
||||
echo "🏗️ Building for arm64 (Apple Silicon)..."
|
||||
(cd "$SWIFT_PROJECT_PATH" && swift build --arch arm64 -c release $SWIFT_OPTIMIZATION_FLAGS)
|
||||
(
|
||||
cd "$SWIFT_PROJECT_PATH"
|
||||
swift build --arch arm64 -c release $SWIFT_OPTIMIZATION_FLAGS 2>&1 | pipe_build_output
|
||||
)
|
||||
cp "$SWIFT_PROJECT_PATH/.build/arm64-apple-macosx/release/$FINAL_BINARY_NAME" "$ARM64_BINARY_TEMP"
|
||||
echo "✅ arm64 build complete: $ARM64_BINARY_TEMP"
|
||||
|
||||
echo "🏗️ Building for x86_64 (Intel)..."
|
||||
(cd "$SWIFT_PROJECT_PATH" && swift build --arch x86_64 -c release $SWIFT_OPTIMIZATION_FLAGS)
|
||||
(
|
||||
cd "$SWIFT_PROJECT_PATH"
|
||||
swift build --arch x86_64 -c release $SWIFT_OPTIMIZATION_FLAGS 2>&1 | pipe_build_output
|
||||
)
|
||||
cp "$SWIFT_PROJECT_PATH/.build/x86_64-apple-macosx/release/$FINAL_BINARY_NAME" "$X86_64_BINARY_TEMP"
|
||||
echo "✅ x86_64 build complete: $X86_64_BINARY_TEMP"
|
||||
|
||||
@ -104,4 +125,4 @@ echo "🔍 Verifying final universal binary..."
|
||||
lipo -info "$FINAL_BINARY_PATH"
|
||||
ls -lh "$FINAL_BINARY_PATH"
|
||||
|
||||
echo "🎉 Universal binary '$FINAL_BINARY_PATH' created and optimized successfully!"
|
||||
echo "🎉 Universal binary '$FINAL_BINARY_PATH' created and optimized successfully!"
|
||||
|
||||
@ -6,6 +6,20 @@ LOG_PATH=${CLI_BUILD_LOG:-/tmp/cli-build.log}
|
||||
EXIT_PATH=${CLI_BUILD_EXIT:-/tmp/cli-build.exit}
|
||||
BUILD_PATH=${CLI_BUILD_DIR:-/tmp/peekaboo-cli-build}
|
||||
|
||||
if command -v xcbeautify >/dev/null 2>&1; then
|
||||
USE_XCBEAUTIFY=1
|
||||
else
|
||||
USE_XCBEAUTIFY=0
|
||||
fi
|
||||
|
||||
pipe_build_output() {
|
||||
if [[ "$USE_XCBEAUTIFY" -eq 1 ]]; then
|
||||
xcbeautify "$@"
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
write_exit_code() {
|
||||
local status=${1:-$?}
|
||||
mkdir -p "$(dirname "$EXIT_PATH")"
|
||||
@ -19,7 +33,7 @@ rm -f "$LOG_PATH" "$EXIT_PATH"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
set +e
|
||||
swift build --package-path Apps/CLI --build-path "$BUILD_PATH" "$@" 2>&1 | tee "$LOG_PATH"
|
||||
swift build --package-path Apps/CLI --build-path "$BUILD_PATH" "$@" 2>&1 | pipe_build_output | tee "$LOG_PATH"
|
||||
BUILD_STATUS=${PIPESTATUS[0]}
|
||||
set -e
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user