fix: document element IDs as opaque (#202)
Some checks failed
macOS CI / PeekabooCore build & tests (push) Has been cancelled
Website (GitHub Pages) / build (push) Has been cancelled
macOS CI / Peekaboo CLI build & tests (push) Has been cancelled
macOS CI / Tachikoma build & tests (push) Has been cancelled
macOS CI / Build macOS apps (Peekaboo + Inspector) (push) Has been cancelled
macOS CI / SwiftLint (core + CLI) (push) Has been cancelled
Website (GitHub Pages) / deploy (push) Has been cancelled

This commit is contained in:
Peter Steinberger 2026-06-24 09:11:52 +01:00 committed by GitHub
parent efde5b18ca
commit dda07c245f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
38 changed files with 94 additions and 51 deletions

View File

@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [3.5.3] - 2026-06-13
### Fixed
- Public CLI, agent, MCP, and API guidance now treats runtime element IDs as opaque strings to copy exactly instead of implying role-specific ID shapes. Thanks @coygeek for #194.
- JSON-only `peekaboo see` runs without `--path` now keep required screenshots in snapshot storage instead of leaving files on Desktop or exposing their temporary paths. Thanks @coygeek for #196.
- Background element/query/coordinate clicks now pin actions to the requested process and exact window, reject mismatched window/PID selectors and unverifiable snapshots, invalidate implicit latest snapshots without deleting history, and no longer require Event Synthesizing when Accessibility completes the click.
- App launch, open, and inventory commands now use the selected runtime host, fixing sandboxed LaunchServices failures; launch/open preserve `--no-focus` and caller-relative app paths, relaunch preflights and keeps quit/wait/launch in one daemon-held transaction, build-scoped fallback daemons remain reusable and controllable across native/Rosetta execution and executable upgrades, incompatible legacy hosts no longer force sandboxed local fallback, and inventory ignores unrelated input overrides.

View File

@ -54,7 +54,7 @@ extension ClickCommand: CommanderSignatureProviding {
),
.commandOption(
"on",
help: "Element ID to click (e.g., B1, T2)",
help: "Opaque element ID copied from current see or inspect-ui output",
long: "on"
),
.commandOption(

View File

@ -48,7 +48,7 @@ extension ClickCommand {
💡 Hints:
Run 'peekaboo see' first to capture UI elements
Check that the element ID is correct (e.g., B1, T2)
Copy the opaque element ID exactly from current see or inspect-ui output
Element may have disappeared or changed
"""
}

View File

@ -14,7 +14,7 @@ struct ClickCommand: ErrorHandlingCommand, OutputFormattable, RuntimeOptionsConf
@Option(help: "Snapshot ID, or 'latest' (uses latest if not specified)")
var snapshot: String?
@Option(help: "Element ID to click (e.g., B1, T2)")
@Option(help: "Opaque element ID copied from current see or inspect-ui output")
var on: String?
@Option(name: .customLong("id"), help: "Element ID to click (alias for --on)")

View File

@ -259,11 +259,11 @@ extension DragCommand: ParsableCommand {
Execute click-and-drag operations for moving elements, selecting text, or dragging files.
EXAMPLES:
peekaboo drag --from B1 --to T2
peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID"
peekaboo drag --from-coords "100,200" --to-coords "400,300"
peekaboo drag --from B1 --to-app Trash
peekaboo drag --from S1 --to-coords "500,250" --duration 2000
peekaboo drag --from T1 --to T5 --modifiers shift
peekaboo drag --from "$SOURCE_ID" --to-app Trash
peekaboo drag --from "$SOURCE_ID" --to-coords "500,250" --duration 2000
peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID" --modifiers shift
""",
version: "2.0.0",
showHelpOnEmptyInvocation: true

View File

@ -16,7 +16,7 @@ extension MoveCommand: ParsableCommand {
EXAMPLES:
peekaboo move 100,200 # Move to coordinates
peekaboo move --to "Submit Button" # Move to element by text
peekaboo move --on B3 # Move to element by ID
peekaboo move --on "$ELEMENT_ID" # ID copied from current output
peekaboo move 500,300 --smooth # Smooth movement
peekaboo move --center # Move to screen center
@ -84,7 +84,7 @@ extension MoveCommand: CommanderSignatureProviding {
),
.commandOption(
"on",
help: "Element ID to move to (e.g., B1, T2)",
help: "Opaque element ID copied from current see or inspect-ui output",
long: "on"
),
.commandOption(

View File

@ -17,7 +17,7 @@ struct MoveCommand: ErrorHandlingCommand, OutputFormattable {
@Option(help: "Move to element by text/label")
var to: String?
@Option(help: "Element ID to move to (e.g., B1, T2)")
@Option(help: "Opaque element ID copied from current see or inspect-ui output")
var on: String?
@Option(name: .customLong("id"), help: "Element ID to move to (alias for --on)")

View File

@ -116,7 +116,7 @@ extension PerformActionCommand: ParsableCommand {
Invokes an accessibility action without synthesizing a mouse or keyboard event.
EXAMPLES:
peekaboo perform-action --on B1 --action AXPress
peekaboo perform-action --on "$ELEMENT_ID" --action AXPress
peekaboo perform-action --on Stepper --action AXIncrement
""",
showHelpOnEmptyInvocation: true

View File

@ -119,7 +119,7 @@ extension SetValueCommand: ParsableCommand {
Sets a settable accessibility value without synthesizing keystrokes.
EXAMPLES:
peekaboo set-value "hello" --on T1
peekaboo set-value "hello" --on "$ELEMENT_ID"
peekaboo set-value "42" --on "Search"
""",
showHelpOnEmptyInvocation: true

View File

@ -249,20 +249,20 @@ extension SwipeCommand: ParsableCommand {
EXAMPLES:
# Swipe between UI elements
peekaboo swipe --from B1 --to B5 --snapshot 12345
peekaboo swipe --from "$SOURCE_ID" --to "$TARGET_ID" --snapshot "$SNAPSHOT_ID"
# Swipe with coordinates
peekaboo swipe --from-coords 100,200 --to-coords 300,400
# Mixed mode: element to coordinates
peekaboo swipe --from T1 --to-coords 500,300 --duration 1000
peekaboo swipe --from "$SOURCE_ID" --to-coords 500,300 --duration 1000
# Slow swipe for precise gesture
peekaboo swipe --from-coords 50,50 --to-coords 400,400 --duration 2000
USAGE:
You can specify source and destination using either:
- Element IDs from a previous 'see' command
- Opaque element IDs copied from current 'see' or 'inspect-ui' output
- Direct coordinates
- A mix of both

View File

@ -21,6 +21,14 @@ struct CommandHelpRendererTests {
#expect(!help.contains("<alsoText>"))
#expect(!help.contains("<logLevel>"))
}
@Test
func `interaction help describes element IDs as opaque`() {
for help in [ClickCommand.helpMessage(), MoveCommand.helpMessage()] {
#expect(help.contains("Opaque element ID"))
#expect(help.range(of: #"\b[BTMS]\d+\b"#, options: .regularExpression) == nil)
}
}
}
private struct SampleHelpCommand: ParsableCommand {

View File

@ -3,6 +3,7 @@
## [3.5.3] - 2026-06-13
### Fixed
- Public CLI, agent, MCP, and API guidance now treats runtime element IDs as opaque strings to copy exactly instead of implying role-specific ID shapes. Thanks @coygeek for #194.
- Sparkle no longer advertises the unpublished 3.5.3 release whose public app download returns 404; the entry will return through the normal release flow when the release is published. Thanks @bcharleson for #199.
- JSON-only `peekaboo see` runs without `--path` now keep required screenshots in snapshot storage instead of leaving files on Desktop or exposing their temporary paths. Thanks @coygeek for #196.
- Watch captures now honor stop requests during transient ScreenCaptureKit retry backoff instead of waiting out the full delay. Thanks @SebTardif for #193.

View File

@ -78,7 +78,7 @@ public struct DetectedElements: Sendable, Codable {
/// A detected UI element
public struct DetectedElement: Sendable, Codable {
/// Unique identifier (e.g., "B1", "T2")
/// Opaque identifier returned by element detection.
public let id: String
/// Element type

View File

@ -4,7 +4,7 @@ import PeekabooFoundation
/// Target for click operations
public enum ClickTarget: Sendable, Codable {
/// Click on element by ID (e.g., "B1")
/// Click an element by its opaque detected ID.
case elementId(String)
/// Click at specific coordinates

View File

@ -24,7 +24,7 @@ import PeekabooFoundation
*
* // Click by element ID
* try await clickService.click(
* target: .elementId("B1"),
* target: .elementId(detectedElement.id),
* clickType: .single,
* snapshotId: "snapshot_123"
* )

View File

@ -103,7 +103,7 @@ extension UIAutomationService {
*
* ## Click Targeting
* Three targeting modes are supported:
* - **Element ID**: Click on a specific detected element (e.g., "B1", "T3")
* - **Element ID**: Click on a specific element using its opaque detected ID
* - **Query**: Find element by text content or accessibility label
* - **Coordinates**: Click at exact screen coordinates
*
@ -128,7 +128,7 @@ extension UIAutomationService {
* ```swift
* // Click on detected element
* try await automation.click(
* target: .elementId("B1"),
* target: .elementId(detectedElement.id),
* clickType: .single,
* snapshotId: "snapshot_123"
* )

View File

@ -47,7 +47,7 @@ extension UIAutomationService {
* // Type into specific element with clearing
* try await automation.type(
* text: "Hello World!",
* target: "T1",
* target: detectedElement.id,
* clearExisting: true,
* typingDelay: 50,
* snapshotId: "snapshot_123"

View File

@ -27,8 +27,10 @@ import PeekabooFoundation
* )
*
* // Perform automation
* try await automation.click(target: .elementId("B1"), clickType: .single, snapshotId: "snapshot_123")
* try await automation.type(text: "Hello World", target: "T1", clearExisting: true, snapshotId: "snapshot_123")
* try await automation.click(
* target: .elementId(button.id), clickType: .single, snapshotId: "snapshot_123")
* try await automation.type(
* text: "Hello World", target: textField.id, clearExisting: true, snapshotId: "snapshot_123")
* ```
*
* - Important: Requires Screen Recording and Accessibility permissions

View File

@ -30,7 +30,7 @@ public struct ClickTool: MCPTool {
"""),
"on": SchemaBuilder.string(
description: """
Optional. Element ID to click (e.g., B1, T2) from `see` or `inspect_ui` output.
Optional. Opaque element ID copied exactly from current `see` or `inspect_ui` output.
"""),
"coords": SchemaBuilder.string(
description: """

View File

@ -23,7 +23,8 @@ public struct PerformActionTool: MCPTool {
SchemaBuilder.object(
properties: [
"on": SchemaBuilder.string(
description: "Element ID from `see` or `inspect_ui` output, such as B1, or a query string."),
description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output, " +
"or a query string."),
"action": SchemaBuilder.string(
description: "Accessibility action name to invoke, e.g. AXPress, AXShowMenu, AXIncrement."),
"snapshot": SchemaBuilder.string(

View File

@ -50,7 +50,7 @@ struct SeeSummaryBuilder {
lines.append("")
lines.append(contentsOf: self.elementSection())
lines.append("")
lines.append("Use element IDs (B1, T1, etc.) with click, type, and other interaction commands.")
lines.append("Copy opaque element IDs exactly into click, type, and other interaction commands.")
return lines.joined(separator: "\n")
}

View File

@ -21,8 +21,9 @@ public struct SeeTool: MCPTool {
"""
Captures a screenshot of the active UI and generates an element map.
Returns Peekaboo element IDs (B1 for buttons, T1 for text fields, etc.) that can be
used with interaction commands and creates/updates a snapshot that tracks UI state.
Returns opaque Peekaboo element IDs that can be passed unchanged to interaction commands.
Do not infer an element's role or type from the shape of its ID. Creates or updates a
snapshot that tracks UI state.
\(PeekabooMCPVersion.banner) using openai/gpt-5.5
and anthropic/claude-opus-4-8.
"""

View File

@ -23,7 +23,8 @@ public struct SetValueTool: MCPTool {
SchemaBuilder.object(
properties: [
"on": SchemaBuilder.string(
description: "Element ID from `see` or `inspect_ui` output, such as T1, or a query string."),
description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output, " +
"or a query string."),
"value": SchemaBuilder.anyOf(
[
SchemaBuilder.string(),

View File

@ -76,7 +76,7 @@ public enum UIAutomationToolDefinitions {
ParameterDefinition(
name: "on",
type: .string,
description: "Element ID to click (e.g., B1, T2) from `see` or `inspect_ui` output",
description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output",
required: false),
ParameterDefinition(
name: "coords",

View File

@ -58,7 +58,7 @@ public enum ToolRegistry {
EXAMPLE
peekaboo click --foreground --wait-for 1500 --double \"Submit\"
peekaboo click --on B2 --foreground --space-switch
peekaboo click --on "$ELEMENT_ID" --foreground --space-switch
TROUBLESHOOTING
If the element isn't found, refresh the snapshot with a fresh observation (`peekaboo see`

View File

@ -144,6 +144,27 @@ struct AgentToolDescriptionTests {
}
}
@Test
@MainActor
func `Agent tools treat element IDs as opaque`() throws {
let service = try PeekabooAgentService(services: PeekabooServices())
let agentTools = service.createAgentTools()
for tool in agentTools {
let parameterDescriptions = tool.parameters.properties.values.map(\.description)
let guidance = ([tool.description] + parameterDescriptions).joined(separator: "\n")
#expect(
guidance.range(of: #"\b[BTMS]\d+\b"#, options: .regularExpression) == nil,
"Tool '\(tool.name)' must not imply that element ID shape encodes element role.")
}
let clickGuidance = agentTools.first(where: { $0.name == "click" }).map { tool in
([tool.description] + tool.parameters.properties.values.map(\.description)).joined(separator: "\n")
}
#expect(clickGuidance?.localizedCaseInsensitiveContains("opaque") == true)
}
@Test
@MainActor
func `Shell tool has quoting examples`() {

View File

@ -41,15 +41,20 @@ Peekaboo brings high-fidelity screen capture, AI analysis, and complete GUI auto
# Capture full screen at Retina scale and save to Desktop
peekaboo image --mode screen --retina --path ~/Desktop/screen.png
# Click a button by label (captures, resolves, and clicks in one go)
peekaboo see --app Safari --json | jq -r '.data.snapshot_id' | read SNAPSHOT
# Capture current UI state, then copy its snapshot and opaque element IDs exactly
peekaboo see --app Safari --json
SNAPSHOT="<snapshot-id>"
TEXT_FIELD_ID="<text-field-id>"
BUTTON_ID="<button-id>"
# Click a button by label
peekaboo click --on "Reload this page" --snapshot "$SNAPSHOT"
# Directly set a text field value when the accessibility value is settable
peekaboo set-value --on T1 --value "hello" --snapshot "$SNAPSHOT"
peekaboo set-value --on "$TEXT_FIELD_ID" --value "hello" --snapshot "$SNAPSHOT"
# Invoke a named accessibility action on an element
peekaboo perform-action --on B1 --action AXPress --snapshot "$SNAPSHOT"
peekaboo perform-action --on "$BUTTON_ID" --action AXPress --snapshot "$SNAPSHOT"
# Run a natural-language automation
peekaboo agent "Open Notes and create a TODO list with three items"

View File

@ -13,7 +13,7 @@ read_when:
| Flag | Description |
| --- | --- |
| `[query]` | Optional positional text query (case-insensitive substring match). |
| `--on <id>` / `--id <id>` | Target a specific Peekaboo element ID (e.g., `B1`, `T2`). |
| `--on <id>` / `--id <id>` | Target an opaque Peekaboo element ID copied exactly from current `see` or `inspect-ui` output. |
| `--coords x,y` | Click coordinates. With target flags, coordinates are relative to the resolved target window; without target flags, they are global screen coordinates. |
| `--global-coords` | Treat `--coords` as global screen coordinates even when target flags are supplied. |
| `--snapshot <id>` | Reuse a prior snapshot; defaults to `services.snapshots.getMostRecentSnapshot()` when omitted. |
@ -40,8 +40,8 @@ read_when:
## Examples
```bash
# Click the "Send" button (ID from a previous `see` run)
peekaboo click --on B12
# Click the "Send" button using an ID copied from current `see` output
peekaboo click --on "$ELEMENT_ID"
# Fuzzy search + extra wait for a slow dialog using foreground delivery
peekaboo click "Allow" --foreground --wait-for 8000 --space-switch

View File

@ -27,7 +27,7 @@ read_when:
```bash
peekaboo see --app Calculator
peekaboo perform-action --on B7 --action AXPress --snapshot <snapshot-id>
peekaboo perform-action --on "$ELEMENT_ID" --action AXPress --snapshot <snapshot-id>
peekaboo perform-action --on Stepper --action AXIncrement
```

View File

@ -28,7 +28,7 @@ read_when:
```bash
peekaboo see --app TextEdit
peekaboo set-value "hello" --on T1 --snapshot <snapshot-id>
peekaboo set-value "hello" --on "$ELEMENT_ID" --snapshot <snapshot-id>
peekaboo set-value "42" --on "Search"
```

View File

@ -132,7 +132,7 @@ Environment variables:
CLI override:
```bash
peekaboo click --on B1 --input-strategy actionFirst
peekaboo click --on "$ELEMENT_ID" --input-strategy actionFirst
```
## Logging & Troubleshooting

View File

@ -69,7 +69,7 @@ peekaboo type "Hello world"
peekaboo scroll --direction down
peekaboo menu click --app Safari --item "New Tab"
peekaboo hotkey --keys "cmd,s"
peekaboo drag --from B1 --to T2
peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID"
```
### Default Behavior

View File

@ -106,7 +106,7 @@ Always use `--no-remote --capture-engine cg` for capture commands:
peekaboo see --app Safari --no-remote --capture-engine cg --json
# Click element (doesn't need workaround, but safe to include)
peekaboo click --on B1 --no-remote
peekaboo click --on "$ELEMENT_ID" --no-remote
# Type text (doesn't need workaround, but safe to include)
peekaboo type --text "Hello" --no-remote

View File

@ -29,7 +29,7 @@ Use the `--verbose` or `-v` flag with any command:
```bash
peekaboo see --app Safari --verbose
peekaboo click --on B1 --verbose
peekaboo click --on "$ELEMENT_ID" --verbose
```
### Environment Variable
@ -106,12 +106,12 @@ $ peekaboo see --app Safari --verbose
### Debugging Element Not Found
```bash
$ peekaboo click --on B99 --verbose
$ peekaboo click --on "$ELEMENT_ID" --verbose
[2025-01-06T08:05:24.123Z] VERBOSE [Snapshot]: Resolving snapshot {explicitId=null}
[2025-01-06T08:05:24.124Z] VERBOSE [Snapshot]: Found valid snapshots {count=1, latest=12345}
[2025-01-06T08:05:24.125Z] VERBOSE [ElementSearch]: Looking for element {id=B99, snapshotId=12345}
[2025-01-06T08:05:24.125Z] VERBOSE [ElementSearch]: Looking for element {id=<element-id>, snapshotId=12345}
[2025-01-06T08:05:24.126Z] VERBOSE [ElementSearch]: Loading snapshot map from cache
[2025-01-06T08:05:24.127Z] ERROR [ElementSearch]: Element not found in snapshot {id=B99, availableIds=[B1,B2,B3,T1,T2]}
[2025-01-06T08:05:24.127Z] ERROR [ElementSearch]: Element not found in snapshot {id=<element-id>}
```
### Performance Analysis

View File

@ -253,7 +253,7 @@ The following subsections spell out the concrete steps, required Playground surf
- **Log capture**: `./Apps/Playground/scripts/playground-log.sh -c Click --last 10m --all -o "$LOG_ROOT/click-$(date +%s).log"`.
- **Test cases**:
1. Query-based click: `polter peekaboo -- click "Single Click"` (expect `Click` log + counter increment).
2. ID-based click: `polter peekaboo -- click --on B1 --snapshot <id>` targeting `single-click-button`.
2. ID-based click: copy the opaque ID from current `see` output, then run `polter peekaboo -- click --on "$ELEMENT_ID" --snapshot <id>` targeting `single-click-button`.
3. Coordinate click: `polter peekaboo -- click --coords 400,400 --foreground` hitting the nested area.
4. Coordinate validation: `polter peekaboo -- click --coords , --json-output` should fail with `VALIDATION_ERROR` (no crash).
5. Error path: attempt to click disabled button and confirm descriptive `elementNotFound` guidance.
@ -264,7 +264,7 @@ The following subsections spell out the concrete steps, required Playground surf
- `polter peekaboo -- click "Single Click" --snapshot <legacy snapshot>` succeeded but targeted Ghostty (click hit terminal input); highlighting importance of focusing Playground first.
- `polter peekaboo -- app switch --to Playground` followed by `polter peekaboo -- click --on elem_6 --snapshot 263F8CD6-...` successfully hit the “View Logs” button (Playground log recorded the click).
- Coordinate click `--coords 600,500` succeeded (see log); attempting `--on elem_disabled` produced expected `elementNotFound` error.
- IDs like `B1` are not stable in this build; rely on `elem_*` IDs from the `see` output.
- Element IDs are opaque and unstable; always copy the exact ID from current `see` output.
- **2025-12-17 Controls Fixture add-on**:
- Open “Controls Fixture” via `⌘⌃3`, then drive checkboxes + segmented control by clicking snapshot IDs (`--on elem_…`) captured from `see`.
- **Important**: ControlsView is scrollable; after any `scroll`, re-run `see` before clicking elements further down (otherwise snapshot coordinates can be stale).

View File

@ -190,7 +190,7 @@ Peekaboo.app still respects user-facing toggles via `PeekabooSettings`; the coor
### Element Detection (See) 👁️
- **Effect**: All detected elements briefly highlight
- **Style**: Colored overlays with IDs (B1, T1, etc.)
- **Style**: Colored overlays labeled with opaque element IDs
- **Animation**: Fade in with slight scale
- **Duration**: 2 seconds before fade

View File

@ -19,6 +19,8 @@ const staleCliPatterns = [
[/--label\b/, 'use positional query text or `--on`'],
[/--at\b/, 'use `--coords`'],
[/--ticks\b/, 'use `--amount`'],
[/--(?:on|from|to)\s+[`"']?[BTMS]\d+\b/, 'use an opaque element ID copied from current output'],
[/element IDs?[^\n]*[`"']?[BTMS]\d+\b/i, 'describe element IDs as opaque'],
];
const staleDocsPatterns = [
[/mcp-capture-meta/i, 'remove stale native MCP capture metadata references'],

View File

@ -151,7 +151,7 @@ ruby -e 'h=File.read("skills/peekaboo/SKILL.md").split(/^---\s*$/,3)[1]; keys=h.
! rg -n '^allowed-tools:' skills/peekaboo/SKILL.md
pnpm run build:cli
BIN="$(swift build --package-path Apps/CLI --show-bin-path)/peekaboo"; "$BIN" --version
"$BIN" click --help | rg -- '--foreground|--focus-background|--input-strategy|B1, T2'
"$BIN" click --help | rg -- '--foreground|--focus-background|--input-strategy|Opaque element ID'
"$BIN" see --help | rg -- '--json|--annotate|--app|--no-web-focus'
"$BIN" inspect-ui --help | rg 'inspect_ui|--app-target|--snapshot|--json'
git diff --check -- skills/peekaboo/SKILL.md docs/agent-skill.md docs/commands/see.md docs/automation.md scripts/docs-lint.mjs