Add AX wrappers and input driver coverage

This commit is contained in:
Peter Steinberger 2025-11-19 01:01:29 +01:00
parent 49da2383ae
commit 89cbe7a274
6 changed files with 522 additions and 0 deletions

View File

@ -0,0 +1,53 @@
import AppKit
/// Lightweight wrapper around a running application that exposes common AX handles
/// without forcing callers to touch `AXUIElementCreateApplication` directly.
public struct AXApp: Sendable {
public let application: NSRunningApplication
public let element: Element
public init(_ application: NSRunningApplication) {
self.application = application
self.element = Element(AXUIElementCreateApplication(application.processIdentifier))
}
/// Convenience initializer from a pid if the process is running.
public init?(pid: pid_t) {
guard let app = NSRunningApplication(processIdentifier: pid) else { return nil }
self.init(app)
}
public var pid: pid_t { application.processIdentifier }
public var bundleIdentifier: String? { application.bundleIdentifier }
public var localizedName: String? { application.localizedName }
/// Windows exposed via AX for this application.
public func windows() -> [Element]? {
element.windows()
}
/// Focused window if available.
public func focusedWindow() -> Element? {
element.focusedWindow()
}
}
/// Simple typed window handle pairing an AX element with its owning app.
public struct AXWindowHandle: Sendable {
public let app: AXApp
public let element: Element
public init(app: AXApp, element: Element) {
self.app = app
self.element = element
}
public var title: String? { element.title() }
public var frame: CGRect? { element.frame() }
public var role: String? { element.role() }
/// CGWindowID for this AX window, if resolvable.
public var windowID: CGWindowID? {
AXWindowResolver().windowID(from: element)
}
}

View File

@ -0,0 +1,96 @@
import ApplicationServices
import Foundation
import os
// MARK: - Element timeout helpers
extension Element {
/// Retrieve the main menu element if available.
@MainActor
public func menuBar() -> Element? {
guard let menuBar: AXUIElement = attribute(Attribute<AXUIElement>.mainMenu) else { return nil }
return Element(menuBar)
}
/// Set a messaging timeout for this element to prevent hangs.
@MainActor
public func setMessagingTimeout(_ timeout: Float) {
let error = AXUIElementSetMessagingTimeout(self.underlyingElement, timeout)
if error != .success {
Logger(subsystem: "boo.peekaboo.axorcist", category: "AXTimeout")
.warning("Failed to set messaging timeout: \(error.rawValue)")
}
}
/// Get windows with timeout protection.
@MainActor
public func windowsWithTimeout(timeout: Float = 2.0) -> [Element]? {
self.setMessagingTimeout(timeout)
let windows = self.windows()
self.setMessagingTimeout(0)
return windows
}
/// Get menu bar with timeout protection.
@MainActor
public func menuBarWithTimeout(timeout: Float = 2.0) -> Element? {
self.setMessagingTimeout(timeout)
let menuBar = self.menuBar()
self.setMessagingTimeout(0)
return menuBar
}
}
/// Global timeout configuration for all AX operations.
public enum AXTimeoutConfiguration {
/// Set the global messaging timeout for all AX operations.
@MainActor
public static func setGlobalTimeout(_ timeout: Float) {
let systemWide = AXUIElementCreateSystemWide()
let error = AXUIElementSetMessagingTimeout(systemWide, timeout)
let logger = Logger(subsystem: "boo.peekaboo.axorcist", category: "AXTimeout")
if error != .success {
logger.warning("Failed to set global AX timeout: \(error.rawValue)")
} else {
logger.info("Set global AX timeout to \(timeout, format: .fixed(precision: 2)) seconds")
}
}
}
/// Wrapper for AX operations with automatic retry on timeout.
public struct AXTimeoutWrapper {
private let maxRetries: Int
private let retryDelay: TimeInterval
public init(maxRetries: Int = 3, retryDelay: TimeInterval = 0.5) {
self.maxRetries = maxRetries
self.retryDelay = retryDelay
}
/// Execute an AX operation with timeout protection and retry logic.
@MainActor
public func execute<T>(_ operation: () throws -> T?) async throws -> T? {
var lastError: (any Error)?
for attempt in 0..<self.maxRetries {
do {
if let result = try operation() {
return result
}
} catch {
lastError = error
Logger(subsystem: "boo.peekaboo.axorcist", category: "AXTimeout")
.debug("AX operation failed (attempt \(attempt + 1)/\(self.maxRetries)): \(String(describing: error))")
if attempt < self.maxRetries - 1 {
try await Task.sleep(nanoseconds: UInt64(self.retryDelay * 1_000_000_000))
}
}
}
if let error = lastError {
throw error
}
return nil
}
}

View File

@ -0,0 +1,134 @@
import AppKit
import ApplicationServices
import CoreGraphics
import Foundation
import os
/// Generic window/App resolution helpers (no product heuristics).
public final class AXWindowResolver {
private let logger = Logger(subsystem: "boo.peekaboo.axorcist", category: "AXWindowResolver")
public init() {}
// MARK: - CGWindowID extraction
/// Private API to extract CGWindowID from an AXUIElement.
@_silgen_name("_AXUIElementGetWindow")
private func _AXUIElementGetWindow(_ element: AXUIElement, _ windowID: inout CGWindowID) -> AXError
/// Extract CGWindowID from an AXUIElement window.
@MainActor
public func windowID(from axElement: AXUIElement) -> CGWindowID? {
var windowID: CGWindowID = 0
let result = _AXUIElementGetWindow(axElement, &windowID)
guard result == .success else {
self.logger.error("Failed to get window ID from AXUIElement, error: \(result.rawValue)")
return nil
}
return windowID
}
/// Extract CGWindowID from an AXorcist Element.
@MainActor
public func windowID(from element: Element) -> CGWindowID? {
let axElement = element.underlyingElement
return self.windowID(from: axElement)
}
// MARK: - Lookup
/// Find AX window by CGWindowID in a specific app.
@MainActor
public func findWindow(by windowID: CGWindowID, in app: NSRunningApplication) -> Element? {
let appElement = AXUIElementCreateApplication(app.processIdentifier)
let element = Element(appElement)
guard let windows = element.windows() else { return nil }
for window in windows {
if let currentID = self.windowID(from: window), currentID == windowID {
return window
}
}
return nil
}
/// Find AX window by CGWindowID across running apps.
@MainActor
public func findWindow(by windowID: CGWindowID) -> (window: Element, app: NSRunningApplication)? {
// Fast path: CoreGraphics owner lookup
let options: CGWindowListOption = [.optionIncludingWindow]
if let windowInfoList = CGWindowListCopyWindowInfo(options, windowID) as? [[String: Any]],
let windowInfo = windowInfoList.first,
let ownerPID = windowInfo[kCGWindowOwnerPID as String] as? pid_t,
let app = NSWorkspace.shared.runningApplications.first(where: { $0.processIdentifier == ownerPID }),
let window = self.findWindow(by: windowID, in: app)
{
return (window, app)
}
// Fallback: full AX enumeration (works without Screen Recording permission).
for app in NSWorkspace.shared.runningApplications {
if let window = self.findWindow(by: windowID, in: app) {
return (window, app)
}
}
return nil
}
// MARK: - Window info
public struct WindowInfo: Sendable {
public let windowID: CGWindowID
public let title: String?
public let bounds: CGRect
public let ownerPID: pid_t
public let applicationName: String?
public let bundleIdentifier: String?
public let layer: Int
public let alpha: CGFloat
}
/// Get comprehensive window information using CGWindowID.
public func windowInfo(windowID: CGWindowID) -> WindowInfo? {
let options: CGWindowListOption = [.optionIncludingWindow]
guard let windowInfoList = CGWindowListCopyWindowInfo(options, windowID) as? [[String: Any]],
let info = windowInfoList.first
else {
return nil
}
let title = info[kCGWindowName as String] as? String
let ownerPID = info[kCGWindowOwnerPID as String] as? pid_t ?? 0
let layer = info[kCGWindowLayer as String] as? Int ?? 0
let alpha = info[kCGWindowAlpha as String] as? CGFloat ?? 1.0
var bounds: CGRect = .zero
if let boundsDict = info[kCGWindowBounds as String] as? [String: Any] {
bounds = CGRect(
x: boundsDict["X"] as? CGFloat ?? 0,
y: boundsDict["Y"] as? CGFloat ?? 0,
width: boundsDict["Width"] as? CGFloat ?? 0,
height: boundsDict["Height"] as? CGFloat ?? 0)
}
let app = NSWorkspace.shared.runningApplications
.first(where: { $0.processIdentifier == ownerPID })
return WindowInfo(
windowID: windowID,
title: title,
bounds: bounds,
ownerPID: ownerPID,
applicationName: app?.localizedName,
bundleIdentifier: app?.bundleIdentifier,
layer: layer,
alpha: alpha)
}
// MARK: - Existence
public func windowExists(windowID: CGWindowID) -> Bool {
self.windowInfo(windowID: windowID) != nil
}
}

View File

@ -0,0 +1,51 @@
import AppKit
import CoreGraphics
import Foundation
import os
/// Generic helpers for discovering running applications.
public enum AppLocator {
private static let logger = Logger(subsystem: "boo.peekaboo.axorcist", category: "AppLocator")
/// Find the application that owns the window under the given screen point.
/// Falls back to the frontmost app if nothing matches.
@MainActor
public static func app(at screenPoint: CGPoint? = nil) -> NSRunningApplication? {
let mouseLocation = screenPoint ?? NSEvent.mouseLocation
// Prefer frontmost app first (cheap).
if let front = NSWorkspace.shared.frontmostApplication,
Self.point(mouseLocation, isInsideWindowOf: front) {
return front
}
// Search other visible apps.
let visibleApps = NSWorkspace.shared.runningApplications.filter {
$0.activationPolicy == .regular && !$0.isHidden && $0.bundleIdentifier != nil
}
for app in visibleApps {
if Self.point(mouseLocation, isInsideWindowOf: app) {
return app
}
}
// Fallback.
let fallback = NSWorkspace.shared.frontmostApplication
Self.logger.debug("app(at:): falling back to frontmost \(fallback?.localizedName ?? "unknown")")
return fallback
}
@MainActor
private static func point(_ point: CGPoint, isInsideWindowOf app: NSRunningApplication) -> Bool {
let axApp = AXUIElementCreateApplication(app.processIdentifier)
let appElement = Element(axApp)
guard let windows = appElement.windows() else { return false }
for window in windows {
if let frame = window.frame(), frame.contains(point) {
return true
}
}
return false
}
}

View File

@ -0,0 +1,167 @@
import AppKit
import CoreGraphics
import Foundation
/// Lightweight, allocation-conscious helpers for synthesizing user input.
///
/// These intentionally stay thin: no logging, no implicit delays beyond what
/// the underlying AX/UI toolkits already impose. Callers (e.g. Peekaboo) can
/// layer heuristics or visualization on top without paying a baseline tax.
public enum InputDriver {
// MARK: - Mouse
/// Click at a screen point.
@MainActor
public static func click(
at point: CGPoint,
button: MouseButton = .left,
count: Int = 1) throws
{
try Element.clickAt(point, button: button, clickCount: count)
}
/// Move mouse to a point (no click)
@MainActor
public static func move(to point: CGPoint) throws {
guard let moveEvent = CGEvent(
mouseEventSource: nil,
mouseType: .mouseMoved,
mouseCursorPosition: point,
mouseButton: .left)
else { throw UIAutomationError.failedToCreateEvent }
moveEvent.post(tap: .cghidEventTap)
}
/// Current mouse location (if available).
public static func currentLocation() -> CGPoint? {
CGEvent(source: nil)?.location
}
/// Cached current location provider to avoid repeated CGEvent creation in tight loops.
public static func cachedLocation(using cache: inout CGPoint?) -> CGPoint? {
if let cached = cache { return cached }
let loc = self.currentLocation()
cache = loc
return loc
}
/// Press and hold at a point for a duration (simulates force click fallback).
@MainActor
public static func pressHold(at point: CGPoint, button: MouseButton = .left, duration: TimeInterval) throws {
let buttonType: CGMouseButton = (button == .left ? .left : .right)
let downType: CGEventType = (button == .left ? .leftMouseDown : .rightMouseDown)
let upType: CGEventType = (button == .left ? .leftMouseUp : .rightMouseUp)
guard let down = CGEvent(
mouseEventSource: nil,
mouseType: downType,
mouseCursorPosition: point,
mouseButton: buttonType)
else { throw UIAutomationError.failedToCreateEvent }
down.setDoubleValueField(.mouseEventPressure, value: 2.0)
down.post(tap: .cghidEventTap)
if duration > 0 {
Thread.sleep(forTimeInterval: duration)
}
guard let up = CGEvent(
mouseEventSource: nil,
mouseType: upType,
mouseCursorPosition: point,
mouseButton: buttonType)
else { throw UIAutomationError.failedToCreateEvent }
up.post(tap: .cghidEventTap)
}
/// Drag from to using the given button.
@MainActor
public static func drag(
from start: CGPoint,
to end: CGPoint,
button: MouseButton = .left,
steps: Int = 20,
interStepDelay: TimeInterval = 0.0) throws
{
let steps = max(1, steps)
let buttonType: CGMouseButton = (button == .left ? .left : .right)
let downType: CGEventType = (button == .left ? .leftMouseDown : .rightMouseDown)
let dragType: CGEventType = .leftMouseDragged
let upType: CGEventType = (button == .left ? .leftMouseUp : .rightMouseUp)
guard let down = CGEvent(
mouseEventSource: nil,
mouseType: downType,
mouseCursorPosition: start,
mouseButton: buttonType)
else { throw UIAutomationError.failedToCreateEvent }
down.post(tap: .cghidEventTap)
for i in 1...steps {
let t = CGFloat(i) / CGFloat(steps)
let pos = CGPoint(
x: start.x + (end.x - start.x) * t,
y: start.y + (end.y - start.y) * t)
guard let move = CGEvent(
mouseEventSource: nil,
mouseType: dragType,
mouseCursorPosition: pos,
mouseButton: buttonType)
else { continue }
move.post(tap: .cghidEventTap)
if interStepDelay > 0 { Thread.sleep(forTimeInterval: interStepDelay) }
}
guard let up = CGEvent(
mouseEventSource: nil,
mouseType: upType,
mouseCursorPosition: end,
mouseButton: buttonType)
else { throw UIAutomationError.failedToCreateEvent }
up.post(tap: .cghidEventTap)
}
/// Scroll by deltas (line-based). Positive `deltaY` scrolls up.
@MainActor
public static func scroll(
deltaX: Double = 0,
deltaY: Double,
at point: CGPoint? = nil) throws
{
let pixelsPerLine: Double = 10
let scrollEvent = CGEvent(
scrollWheelEvent2Source: nil,
units: .line,
wheelCount: 2,
wheel1: Int32(deltaY / pixelsPerLine),
wheel2: Int32(deltaX / pixelsPerLine),
wheel3: 0)
guard let event = scrollEvent else { throw UIAutomationError.failedToCreateEvent }
if let point {
event.location = point
}
event.post(tap: .cghidEventTap)
}
// MARK: - Keyboard
/// Type a string at the current focus.
@MainActor
public static func type(_ text: String, delayPerCharacter: TimeInterval = 0.0) throws {
try Element.typeText(text, delay: delayPerCharacter)
}
/// Tap a special key (e.g. return, tab) with optional modifiers.
@MainActor
public static func tapKey(_ key: SpecialKey, modifiers: CGEventFlags = []) throws {
try Element.typeKey(key, modifiers: modifiers)
}
/// Perform a hotkey chord (e.g. ["cmd","shift","4"]).
@MainActor
public static func hotkey(keys: [String], holdDuration: TimeInterval = 0.1) throws {
try Element.performHotkey(keys: keys, holdDuration: holdDuration)
}
}

View File

@ -0,0 +1,21 @@
import CoreGraphics
import Testing
@testable import AXorcist
@Suite("InputDriver cursor helpers")
struct InputDriverTests {
@Test("cachedLocation returns cached value when present")
func cachedLocationUsesCache() {
var cache: CGPoint? = CGPoint(x: 10, y: 20)
let result = InputDriver.cachedLocation(using: &cache)
#expect(result == CGPoint(x: 10, y: 20))
}
@Test("cachedLocation populates cache when empty")
func cachedLocationPopulatesCache() {
var cache: CGPoint? = nil
_ = InputDriver.cachedLocation(using: &cache)
// If running in CI without UI, location may be nil; just assert cache mirrors result.
#expect(cache == InputDriver.currentLocation())
}
}