diff --git a/Sources/AXorcist/Core/AXApp.swift b/Sources/AXorcist/Core/AXApp.swift new file mode 100644 index 0000000..1548a22 --- /dev/null +++ b/Sources/AXorcist/Core/AXApp.swift @@ -0,0 +1,53 @@ +import AppKit + +/// Lightweight wrapper around a running application that exposes common AX handles +/// without forcing callers to touch `AXUIElementCreateApplication` directly. +public struct AXApp: Sendable { + public let application: NSRunningApplication + public let element: Element + + public init(_ application: NSRunningApplication) { + self.application = application + self.element = Element(AXUIElementCreateApplication(application.processIdentifier)) + } + + /// Convenience initializer from a pid if the process is running. + public init?(pid: pid_t) { + guard let app = NSRunningApplication(processIdentifier: pid) else { return nil } + self.init(app) + } + + public var pid: pid_t { application.processIdentifier } + public var bundleIdentifier: String? { application.bundleIdentifier } + public var localizedName: String? { application.localizedName } + + /// Windows exposed via AX for this application. + public func windows() -> [Element]? { + element.windows() + } + + /// Focused window if available. + public func focusedWindow() -> Element? { + element.focusedWindow() + } +} + +/// Simple typed window handle pairing an AX element with its owning app. +public struct AXWindowHandle: Sendable { + public let app: AXApp + public let element: Element + + public init(app: AXApp, element: Element) { + self.app = app + self.element = element + } + + public var title: String? { element.title() } + public var frame: CGRect? { element.frame() } + public var role: String? { element.role() } + + /// CGWindowID for this AX window, if resolvable. + public var windowID: CGWindowID? { + AXWindowResolver().windowID(from: element) + } +} diff --git a/Sources/AXorcist/Core/AXTimeoutPolicy.swift b/Sources/AXorcist/Core/AXTimeoutPolicy.swift new file mode 100644 index 0000000..4230b08 --- /dev/null +++ b/Sources/AXorcist/Core/AXTimeoutPolicy.swift @@ -0,0 +1,96 @@ +import ApplicationServices +import Foundation +import os + +// MARK: - Element timeout helpers + +extension Element { + /// Retrieve the main menu element if available. + @MainActor + public func menuBar() -> Element? { + guard let menuBar: AXUIElement = attribute(Attribute.mainMenu) else { return nil } + return Element(menuBar) + } + + /// Set a messaging timeout for this element to prevent hangs. + @MainActor + public func setMessagingTimeout(_ timeout: Float) { + let error = AXUIElementSetMessagingTimeout(self.underlyingElement, timeout) + if error != .success { + Logger(subsystem: "boo.peekaboo.axorcist", category: "AXTimeout") + .warning("Failed to set messaging timeout: \(error.rawValue)") + } + } + + /// Get windows with timeout protection. + @MainActor + public func windowsWithTimeout(timeout: Float = 2.0) -> [Element]? { + self.setMessagingTimeout(timeout) + let windows = self.windows() + self.setMessagingTimeout(0) + return windows + } + + /// Get menu bar with timeout protection. + @MainActor + public func menuBarWithTimeout(timeout: Float = 2.0) -> Element? { + self.setMessagingTimeout(timeout) + let menuBar = self.menuBar() + self.setMessagingTimeout(0) + return menuBar + } +} + +/// Global timeout configuration for all AX operations. +public enum AXTimeoutConfiguration { + /// Set the global messaging timeout for all AX operations. + @MainActor + public static func setGlobalTimeout(_ timeout: Float) { + let systemWide = AXUIElementCreateSystemWide() + let error = AXUIElementSetMessagingTimeout(systemWide, timeout) + let logger = Logger(subsystem: "boo.peekaboo.axorcist", category: "AXTimeout") + if error != .success { + logger.warning("Failed to set global AX timeout: \(error.rawValue)") + } else { + logger.info("Set global AX timeout to \(timeout, format: .fixed(precision: 2)) seconds") + } + } +} + +/// Wrapper for AX operations with automatic retry on timeout. +public struct AXTimeoutWrapper { + private let maxRetries: Int + private let retryDelay: TimeInterval + + public init(maxRetries: Int = 3, retryDelay: TimeInterval = 0.5) { + self.maxRetries = maxRetries + self.retryDelay = retryDelay + } + + /// Execute an AX operation with timeout protection and retry logic. + @MainActor + public func execute(_ operation: () throws -> T?) async throws -> T? { + var lastError: (any Error)? + + for attempt in 0.. AXError + + /// Extract CGWindowID from an AXUIElement window. + @MainActor + public func windowID(from axElement: AXUIElement) -> CGWindowID? { + var windowID: CGWindowID = 0 + let result = _AXUIElementGetWindow(axElement, &windowID) + guard result == .success else { + self.logger.error("Failed to get window ID from AXUIElement, error: \(result.rawValue)") + return nil + } + return windowID + } + + /// Extract CGWindowID from an AXorcist Element. + @MainActor + public func windowID(from element: Element) -> CGWindowID? { + let axElement = element.underlyingElement + return self.windowID(from: axElement) + } + + // MARK: - Lookup + + /// Find AX window by CGWindowID in a specific app. + @MainActor + public func findWindow(by windowID: CGWindowID, in app: NSRunningApplication) -> Element? { + let appElement = AXUIElementCreateApplication(app.processIdentifier) + let element = Element(appElement) + guard let windows = element.windows() else { return nil } + + for window in windows { + if let currentID = self.windowID(from: window), currentID == windowID { + return window + } + } + return nil + } + + /// Find AX window by CGWindowID across running apps. + @MainActor + public func findWindow(by windowID: CGWindowID) -> (window: Element, app: NSRunningApplication)? { + // Fast path: CoreGraphics owner lookup + let options: CGWindowListOption = [.optionIncludingWindow] + if let windowInfoList = CGWindowListCopyWindowInfo(options, windowID) as? [[String: Any]], + let windowInfo = windowInfoList.first, + let ownerPID = windowInfo[kCGWindowOwnerPID as String] as? pid_t, + let app = NSWorkspace.shared.runningApplications.first(where: { $0.processIdentifier == ownerPID }), + let window = self.findWindow(by: windowID, in: app) + { + return (window, app) + } + + // Fallback: full AX enumeration (works without Screen Recording permission). + for app in NSWorkspace.shared.runningApplications { + if let window = self.findWindow(by: windowID, in: app) { + return (window, app) + } + } + + return nil + } + + // MARK: - Window info + + public struct WindowInfo: Sendable { + public let windowID: CGWindowID + public let title: String? + public let bounds: CGRect + public let ownerPID: pid_t + public let applicationName: String? + public let bundleIdentifier: String? + public let layer: Int + public let alpha: CGFloat + } + + /// Get comprehensive window information using CGWindowID. + public func windowInfo(windowID: CGWindowID) -> WindowInfo? { + let options: CGWindowListOption = [.optionIncludingWindow] + guard let windowInfoList = CGWindowListCopyWindowInfo(options, windowID) as? [[String: Any]], + let info = windowInfoList.first + else { + return nil + } + + let title = info[kCGWindowName as String] as? String + let ownerPID = info[kCGWindowOwnerPID as String] as? pid_t ?? 0 + let layer = info[kCGWindowLayer as String] as? Int ?? 0 + let alpha = info[kCGWindowAlpha as String] as? CGFloat ?? 1.0 + + var bounds: CGRect = .zero + if let boundsDict = info[kCGWindowBounds as String] as? [String: Any] { + bounds = CGRect( + x: boundsDict["X"] as? CGFloat ?? 0, + y: boundsDict["Y"] as? CGFloat ?? 0, + width: boundsDict["Width"] as? CGFloat ?? 0, + height: boundsDict["Height"] as? CGFloat ?? 0) + } + + let app = NSWorkspace.shared.runningApplications + .first(where: { $0.processIdentifier == ownerPID }) + + return WindowInfo( + windowID: windowID, + title: title, + bounds: bounds, + ownerPID: ownerPID, + applicationName: app?.localizedName, + bundleIdentifier: app?.bundleIdentifier, + layer: layer, + alpha: alpha) + } + + // MARK: - Existence + + public func windowExists(windowID: CGWindowID) -> Bool { + self.windowInfo(windowID: windowID) != nil + } +} diff --git a/Sources/AXorcist/Core/AppLocator.swift b/Sources/AXorcist/Core/AppLocator.swift new file mode 100644 index 0000000..218786e --- /dev/null +++ b/Sources/AXorcist/Core/AppLocator.swift @@ -0,0 +1,51 @@ +import AppKit +import CoreGraphics +import Foundation +import os + +/// Generic helpers for discovering running applications. +public enum AppLocator { + private static let logger = Logger(subsystem: "boo.peekaboo.axorcist", category: "AppLocator") + + /// Find the application that owns the window under the given screen point. + /// Falls back to the frontmost app if nothing matches. + @MainActor + public static func app(at screenPoint: CGPoint? = nil) -> NSRunningApplication? { + let mouseLocation = screenPoint ?? NSEvent.mouseLocation + + // Prefer frontmost app first (cheap). + if let front = NSWorkspace.shared.frontmostApplication, + Self.point(mouseLocation, isInsideWindowOf: front) { + return front + } + + // Search other visible apps. + let visibleApps = NSWorkspace.shared.runningApplications.filter { + $0.activationPolicy == .regular && !$0.isHidden && $0.bundleIdentifier != nil + } + + for app in visibleApps { + if Self.point(mouseLocation, isInsideWindowOf: app) { + return app + } + } + + // Fallback. + let fallback = NSWorkspace.shared.frontmostApplication + Self.logger.debug("app(at:): falling back to frontmost \(fallback?.localizedName ?? "unknown")") + return fallback + } + + @MainActor + private static func point(_ point: CGPoint, isInsideWindowOf app: NSRunningApplication) -> Bool { + let axApp = AXUIElementCreateApplication(app.processIdentifier) + let appElement = Element(axApp) + guard let windows = appElement.windows() else { return false } + for window in windows { + if let frame = window.frame(), frame.contains(point) { + return true + } + } + return false + } +} diff --git a/Sources/AXorcist/Core/InputDriver.swift b/Sources/AXorcist/Core/InputDriver.swift new file mode 100644 index 0000000..eeb9a22 --- /dev/null +++ b/Sources/AXorcist/Core/InputDriver.swift @@ -0,0 +1,167 @@ +import AppKit +import CoreGraphics +import Foundation + +/// Lightweight, allocation-conscious helpers for synthesizing user input. +/// +/// These intentionally stay thin: no logging, no implicit delays beyond what +/// the underlying AX/UI toolkits already impose. Callers (e.g. Peekaboo) can +/// layer heuristics or visualization on top without paying a baseline tax. +public enum InputDriver { + // MARK: - Mouse + + /// Click at a screen point. + @MainActor + public static func click( + at point: CGPoint, + button: MouseButton = .left, + count: Int = 1) throws + { + try Element.clickAt(point, button: button, clickCount: count) + } + + /// Move mouse to a point (no click) + @MainActor + public static func move(to point: CGPoint) throws { + guard let moveEvent = CGEvent( + mouseEventSource: nil, + mouseType: .mouseMoved, + mouseCursorPosition: point, + mouseButton: .left) + else { throw UIAutomationError.failedToCreateEvent } + moveEvent.post(tap: .cghidEventTap) + } + + /// Current mouse location (if available). + public static func currentLocation() -> CGPoint? { + CGEvent(source: nil)?.location + } + + /// Cached current location provider to avoid repeated CGEvent creation in tight loops. + public static func cachedLocation(using cache: inout CGPoint?) -> CGPoint? { + if let cached = cache { return cached } + let loc = self.currentLocation() + cache = loc + return loc + } + + /// Press and hold at a point for a duration (simulates force click fallback). + @MainActor + public static func pressHold(at point: CGPoint, button: MouseButton = .left, duration: TimeInterval) throws { + let buttonType: CGMouseButton = (button == .left ? .left : .right) + let downType: CGEventType = (button == .left ? .leftMouseDown : .rightMouseDown) + let upType: CGEventType = (button == .left ? .leftMouseUp : .rightMouseUp) + + guard let down = CGEvent( + mouseEventSource: nil, + mouseType: downType, + mouseCursorPosition: point, + mouseButton: buttonType) + else { throw UIAutomationError.failedToCreateEvent } + down.setDoubleValueField(.mouseEventPressure, value: 2.0) + down.post(tap: .cghidEventTap) + + if duration > 0 { + Thread.sleep(forTimeInterval: duration) + } + + guard let up = CGEvent( + mouseEventSource: nil, + mouseType: upType, + mouseCursorPosition: point, + mouseButton: buttonType) + else { throw UIAutomationError.failedToCreateEvent } + up.post(tap: .cghidEventTap) + } + + /// Drag from → to using the given button. + @MainActor + public static func drag( + from start: CGPoint, + to end: CGPoint, + button: MouseButton = .left, + steps: Int = 20, + interStepDelay: TimeInterval = 0.0) throws + { + let steps = max(1, steps) + + let buttonType: CGMouseButton = (button == .left ? .left : .right) + let downType: CGEventType = (button == .left ? .leftMouseDown : .rightMouseDown) + let dragType: CGEventType = .leftMouseDragged + let upType: CGEventType = (button == .left ? .leftMouseUp : .rightMouseUp) + + guard let down = CGEvent( + mouseEventSource: nil, + mouseType: downType, + mouseCursorPosition: start, + mouseButton: buttonType) + else { throw UIAutomationError.failedToCreateEvent } + down.post(tap: .cghidEventTap) + + for i in 1...steps { + let t = CGFloat(i) / CGFloat(steps) + let pos = CGPoint( + x: start.x + (end.x - start.x) * t, + y: start.y + (end.y - start.y) * t) + guard let move = CGEvent( + mouseEventSource: nil, + mouseType: dragType, + mouseCursorPosition: pos, + mouseButton: buttonType) + else { continue } + move.post(tap: .cghidEventTap) + if interStepDelay > 0 { Thread.sleep(forTimeInterval: interStepDelay) } + } + + guard let up = CGEvent( + mouseEventSource: nil, + mouseType: upType, + mouseCursorPosition: end, + mouseButton: buttonType) + else { throw UIAutomationError.failedToCreateEvent } + up.post(tap: .cghidEventTap) + } + + /// Scroll by deltas (line-based). Positive `deltaY` scrolls up. + @MainActor + public static func scroll( + deltaX: Double = 0, + deltaY: Double, + at point: CGPoint? = nil) throws + { + let pixelsPerLine: Double = 10 + let scrollEvent = CGEvent( + scrollWheelEvent2Source: nil, + units: .line, + wheelCount: 2, + wheel1: Int32(deltaY / pixelsPerLine), + wheel2: Int32(deltaX / pixelsPerLine), + wheel3: 0) + + guard let event = scrollEvent else { throw UIAutomationError.failedToCreateEvent } + if let point { + event.location = point + } + event.post(tap: .cghidEventTap) + } + + // MARK: - Keyboard + + /// Type a string at the current focus. + @MainActor + public static func type(_ text: String, delayPerCharacter: TimeInterval = 0.0) throws { + try Element.typeText(text, delay: delayPerCharacter) + } + + /// Tap a special key (e.g. return, tab) with optional modifiers. + @MainActor + public static func tapKey(_ key: SpecialKey, modifiers: CGEventFlags = []) throws { + try Element.typeKey(key, modifiers: modifiers) + } + + /// Perform a hotkey chord (e.g. ["cmd","shift","4"]). + @MainActor + public static func hotkey(keys: [String], holdDuration: TimeInterval = 0.1) throws { + try Element.performHotkey(keys: keys, holdDuration: holdDuration) + } +} diff --git a/Tests/AXorcistTests/InputDriverTests.swift b/Tests/AXorcistTests/InputDriverTests.swift new file mode 100644 index 0000000..2d0ebde --- /dev/null +++ b/Tests/AXorcistTests/InputDriverTests.swift @@ -0,0 +1,21 @@ +import CoreGraphics +import Testing +@testable import AXorcist + +@Suite("InputDriver cursor helpers") +struct InputDriverTests { + @Test("cachedLocation returns cached value when present") + func cachedLocationUsesCache() { + var cache: CGPoint? = CGPoint(x: 10, y: 20) + let result = InputDriver.cachedLocation(using: &cache) + #expect(result == CGPoint(x: 10, y: 20)) + } + + @Test("cachedLocation populates cache when empty") + func cachedLocationPopulatesCache() { + var cache: CGPoint? = nil + _ = InputDriver.cachedLocation(using: &cache) + // If running in CI without UI, location may be nil; just assert cache mirrors result. + #expect(cache == InputDriver.currentLocation()) + } +}